In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import cv2
import pickle
import tensorflow as tf

  return f(*args, **kwds)


In [2]:
NUM_CLASSES = 50 # limit classes trained

import matplotlib.patches as patches
from collections import defaultdict
from scipy.misc import imread, imresize
from numpy.random import choice

class_ids = set()

ALL_INSTANCES = defaultdict(list) # maps class id to list of images in that class.

im_addr_classes = pickle.load(open('metadata', 'rb'))
for index, row in im_addr_classes.iterrows():
    im_id, im_addr, bb_x, bb_y, bb_w, bb_h, class_id = row
    
    if class_id not in class_ids:
        if len(class_ids) >= NUM_CLASSES:
            break
        class_ids.add(class_id)
    
    img = imread('CUB_200_2011/images/' + im_addr)
    
    bb_x, bb_y, bb_w, bb_h = [int(x) for x in [bb_x, bb_y, bb_w, bb_h]]
    if len(img.shape) == 2:
        img = np.dstack([img, img, img])
    if 0 in img.shape:
        continue

    img = img[bb_y:bb_y + bb_h, bb_x:bb_x + bb_w, :]
    img = imresize(img, (224, 224))
#     plt.imshow(img)
#     plt.show()
    
    ALL_INSTANCES[class_id] += [img]

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.


In [3]:
# used to help generate training, testing, and validation datasets by randomly sampling all data.
class Datasets(object):
    def __init__(self, ratio, all_instances):
        """
        Ratio: [training, validation, testing] ratios.
        all_instances: dict(class_id -> [images])
        """
        self.sets = {}
        for set_key in ['train', 'val', 'test']:
            self.sets[set_key] = {}
            self.sets[set_key]['imgs'] = []
            self.sets[set_key]['lbls'] = []
        self.spread_data(ratio, all_instances)

    def get_sets_pairs(self):
        return self.sets

    def spread_data(self, ratio, all_instances):
        """
        Forms training, validation, and testing set
        """
        for class_id, imgs in all_instances.items():
            for img in imgs:
                set_choice_idx = choice([0, 1, 2], p=ratio)
                set_type = list(self.sets.keys())[set_choice_idx]
                self.sets[set_type]['imgs'].append(img)
                self.sets[set_type]['lbls'].append(class_id)
        for set_type in self.sets.keys():
            self.sets[set_type]['imgs'] = np.array(self.sets[set_type]['imgs'])
            self.sets[set_type]['lbls'] = np.array(self.sets[set_type]['lbls'])

    def show_dataset_size(self):
        """
        Print length of training, validation, and testing set
        """
        print('Train set size:', self.sets['train']['imgs'].shape)
        print('Validation set size:', self.sets['val']['imgs'].shape)
        print('Test set size:', self.sets['test']['imgs'].shape)

train_test_ratio = [0.5, 0.25, 0.25]

datasets = Datasets(train_test_ratio, dict(ALL_INSTANCES))
datasets.show_dataset_size()

Train set size: (1384, 224, 224, 3)
Validation set size: (762, 224, 224, 3)
Test set size: (743, 224, 224, 3)


In [4]:
# VGGNet training

# all_imgs_processed = np.load('images_normalized.npz')
# all_labels = np.load('images_normalized_labels.npz')
import vgg as vgg_mod
import importlib
importlib.reload(vgg_mod)

sess = tf.Session()

vgg_obj = None
# graph = tf.Graph()
# with graph.as_default():
imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
vgg_obj = vgg_mod.vgg16(imgs, 'vgg16_weights.npz', sess)

labels_ph = tf.placeholder(tf.int64, (None), name='labels_ph')
labels_ph_oh = tf.one_hot(labels_ph, 50)
#     print(labels_ph_oh)
#     vgg_obj.load_weights(vgg_obj.weights, vgg_obj.sess)

# print(tf.trainable_variables())

# Get trainable variables for the last three custom FC layers (featuring dropout, etc)
fc_stem = 'fully_connected'
trainable_vars_list = []
for i in ['', '_1', '_2']:
    trainable_weight_var = tf.contrib.framework.get_variables(fc_stem + i + '/weights:0')
    trainable_bias_var = tf.contrib.framework.get_variables(fc_stem + i + '/biases:0')
    trainable_vars_list += [trainable_weight_var, trainable_bias_var]

optimizer_warm = tf.train.AdamOptimizer(0.001, 0.9, 0.999)
loss_warm = tf.nn.sigmoid_cross_entropy_with_logits(logits=vgg_obj.logits, labels=labels_ph_oh)
opt_warm = optimizer_warm.minimize(loss_warm, var_list=trainable_vars_list)

optimizer_full = tf.train.AdamOptimizer(0.00001, 0.9, 0.999)
loss_full = tf.nn.sigmoid_cross_entropy_with_logits(logits=vgg_obj.logits, labels=labels_ph_oh)
opt_full = optimizer_full.minimize(loss_full)

correct = tf.equal(tf.argmax(vgg_obj.logits, 1), labels_ph)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        
# tf.get_default_graph().finalize()
sess.run(tf.global_variables_initializer())
vgg_obj.load_weights(vgg_obj.weights, vgg_obj.sess)



0 conv1_1_W (3, 3, 3, 64)
1 conv1_1_b (64,)
2 conv1_2_W (3, 3, 64, 64)
3 conv1_2_b (64,)
4 conv2_1_W (3, 3, 64, 128)
5 conv2_1_b (128,)
6 conv2_2_W (3, 3, 128, 128)
7 conv2_2_b (128,)
8 conv3_1_W (3, 3, 128, 256)
9 conv3_1_b (256,)
10 conv3_2_W (3, 3, 256, 256)
11 conv3_2_b (256,)
12 conv3_3_W (3, 3, 256, 256)
13 conv3_3_b (256,)
14 conv4_1_W (3, 3, 256, 512)
15 conv4_1_b (512,)
16 conv4_2_W (3, 3, 512, 512)
17 conv4_2_b (512,)
18 conv4_3_W (3, 3, 512, 512)
19 conv4_3_b (512,)
20 conv5_1_W (3, 3, 512, 512)
21 conv5_1_b (512,)
22 conv5_2_W (3, 3, 512, 512)
23 conv5_2_b (512,)
24 conv5_3_W (3, 3, 512, 512)
25 conv5_3_b (512,)


In [5]:
# Training code
BS = 64
NUM_EPOCHS_WARM = 12
NUM_EPOCHS_FULL = 10

train_imgs = datasets.sets['train']['imgs']
train_lbls = datasets.sets['train']['lbls']

val_imgs = datasets.sets['val']['imgs']
val_lbls = datasets.sets['val']['lbls']

training = vgg_obj.training
keep_prob = vgg_obj.keep_prob

def train_for_epochs(optimizer_in, loss_in, epochs, correct_in, accuracy_in):
    for epoch in range(epochs):
        # Let's shuffle the data every epoch
        np.random.seed(epoch)
        np.random.shuffle(train_imgs)
        np.random.seed(epoch)
        np.random.shuffle(train_lbls)
        # Go through the entire dataset once
        accuracy_vals, loss_vals = [], []
        for i in range(0, train_imgs.shape[0]-BS+1, BS):
            # Train a single batch
            batch_images, batch_labels = train_imgs[i:i+BS], train_lbls[i:i+BS]
            accuracy_val, loss_val, _ = sess.run([accuracy_in, loss_in, optimizer_in], feed_dict={imgs: batch_images, labels_ph: batch_labels, training: True, keep_prob: 0.9})
            accuracy_vals.append(accuracy_val)
            loss_vals.append(loss_val)

        val_correct = []
        for i in range(0, val_imgs.shape[0], BS):
            batch_images, batch_labels = val_imgs[i:i+BS], val_lbls[i:i+BS]
            val_correct.extend( sess.run(correct_in, feed_dict={imgs: batch_images, labels_ph: batch_labels, training: False, keep_prob:0.9}) )
        print('[%3d] Accuracy: %0.3f  \t  Loss: %0.3f \t Val acc: %0.3f'%(epoch, np.mean(accuracy_vals), np.mean(loss_vals), np.mean(val_correct)))
train_for_epochs(opt_warm, loss_warm, NUM_EPOCHS_WARM, correct, accuracy)
train_for_epochs(opt_full, loss_full, NUM_EPOCHS_FULL, correct, accuracy)

[  0] Accuracy: 0.158  	  Loss: 0.774 	 Val acc: 0.077
[  1] Accuracy: 0.460  	  Loss: 0.735 	 Val acc: 0.210
[  2] Accuracy: 0.622  	  Loss: 0.711 	 Val acc: 0.249
[  3] Accuracy: 0.692  	  Loss: 0.698 	 Val acc: 0.299
[  4] Accuracy: 0.749  	  Loss: 0.687 	 Val acc: 0.331
[  5] Accuracy: 0.755  	  Loss: 0.683 	 Val acc: 0.228
[  6] Accuracy: 0.757  	  Loss: 0.678 	 Val acc: 0.257
[  7] Accuracy: 0.782  	  Loss: 0.675 	 Val acc: 0.345
[  8] Accuracy: 0.789  	  Loss: 0.673 	 Val acc: 0.378
[  9] Accuracy: 0.767  	  Loss: 0.673 	 Val acc: 0.438
[ 10] Accuracy: 0.788  	  Loss: 0.670 	 Val acc: 0.408
[ 11] Accuracy: 0.766  	  Loss: 0.670 	 Val acc: 0.437
[  0] Accuracy: 0.777  	  Loss: 0.665 	 Val acc: 0.479
[  1] Accuracy: 0.799  	  Loss: 0.667 	 Val acc: 0.499
[  2] Accuracy: 0.789  	  Loss: 0.666 	 Val acc: 0.510
[  3] Accuracy: 0.800  	  Loss: 0.663 	 Val acc: 0.535
[  4] Accuracy: 0.785  	  Loss: 0.670 	 Val acc: 0.542
[  5] Accuracy: 0.793  	  Loss: 0.666 	 Val acc: 0.516
[  6] Accu

## notes about running hyperparameters:

1. Warmup starts to overfit after ~15 epochs. So use 12 instead, and hopefully the full network training will improve generalization.

In [13]:
test_imgs_processed = datasets.sets['test']['imgs']
test_labels = datasets.sets['test']['lbls']

num_test = len(test_imgs_processed)
test_batch_size = 128
total_top_1, total_top_3, total_top_5 = [[] for x in range(3)]

for batch_start in range(0, num_test - test_batch_size + 1, test_batch_size):
    
    img_batch = test_imgs_processed[batch_start:batch_start + test_batch_size, :, :]
    labels_batch = test_labels[batch_start:batch_start + test_batch_size]
    test_probabilities = sess.run(vgg_obj.probs, feed_dict={vgg_obj.imgs: img_batch, training: False, keep_prob:0.9})

    top_1_correct, top_3_correct, top_5_correct = [0] * 3
    for sample_idx, sample in enumerate(test_probabilities):
        top_predictions = (np.argsort(sample)[::-1])[:5]
        test_label = labels_batch[sample_idx]
        if test_label in top_predictions[:1]:
            top_1_correct += 1
        if test_label in top_predictions[:3]:
            top_3_correct += 1
        if test_label in top_predictions:
            top_5_correct += 1

    total_top_1.append(top_1_correct)
    total_top_3.append(top_3_correct)
    total_top_5.append(top_5_correct)

print('Test accuracy:')
print('Top-1:', np.mean(total_top_1))
print('Top-3:', np.mean(total_top_3))
print('Top-5:', np.mean(total_top_5))

Test accuracy:
Top-1: 66.2
Top-3: 75.4
Top-5: 78.4


In [14]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, './checkpoints/bird_662_test_acc.ckpt')

'./checkpoints/bird_662_test_acc.ckpt'