In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import pickle
import tensorflow as tf

  return f(*args, **kwds)


In [8]:
# NUM_CLASSES = 50 # limit classes trained

from collections import defaultdict
from scipy.misc import imread, imresize
from numpy.random import choice

# used to help generate training, testing, and validation datasets by randomly sampling all data.
class Datasets(object):
    def __init__(self, ratio, subdir="default", num_classes=50, load_new_data=True):
        """
        Ratio: [training, validation, testing] ratios.
        load_new_data: whether or not to create new train/val/test datasets from scratch
        """
        self.sets = {}
        self.meta = pickle.load(open('metadata', 'rb'))

        for set_key in ['train', 'val', 'test']:
            self.sets[set_key] = {}
            self.sets[set_key]['imgs'] = []
            self.sets[set_key]['lbls'] = []
            self.sets[set_key]['addrs'] = []

        if load_new_data:
            all_instances = self.load_all_instances()
            self.spread_data(ratio, all_instances)
            self.save_addrs(subdir)
        else:
            self.load_from_addrs(subdir)
    
    def load_single_img(self, bb_coords, im_addr):
        """
        bb_coords = takes string values for x, y, width, and height of bounding box.
        im_addr = filepath of the image relative to CUB_200_2011/images/
        """
        img = imread('CUB_200_2011/images/' + im_addr)

        bb_x, bb_y, bb_w, bb_h = [int(x) for x in bb_coords]
        if len(img.shape) == 2:
            img = np.dstack([img, img, img])
        if 0 in img.shape:
            raise

        img = img[bb_y:bb_y + bb_h, bb_x:bb_x + bb_w, :]
        img = imresize(img, (224, 224))
        
        return img
    
    def load_all_instances(self):
        """
        Loads in all images and their classes, before creating train/val/test datasets with them.
        Returns: {class_id -> [all instances of this class, as processed images]}
        """
        class_ids = set()

        ALL_INSTANCES = defaultdict(list) # maps class id to list of images in that class.

        for index, row in self.meta.iterrows():
            im_id, im_addr, bb_x, bb_y, bb_w, bb_h, class_id = row

            if class_id not in class_ids:
                if len(class_ids) >= NUM_CLASSES:
                    break
                class_ids.add(class_id)
            
            try:
                img = self.load_single_img([bb_x, bb_y, bb_w, bb_h], im_addr)
            except:
                continue

            ALL_INSTANCES[class_id] += [(img, im_addr)]
        
        return ALL_INSTANCES
    
    def save_addrs(self, subdir):
        """
        Only run for the case of creating new train/val/test datasets: saves addresses/classes to file, so that
        clearing output and restarting will not change the dataset makeup.
        """
        for set_type in self.sets.keys():
            with open('./dataset_reconstruct/' + subdir + '/' + set_type + '.txt', 'w') as outf:
                for lbl, addr in zip(self.sets[set_type]['lbls'], self.sets[set_type]['addrs']):
                    outf.write(lbl.astype(str) + ' ' + addr.astype(str) + '\n')
    
    def load_from_addrs(self, subdir):
        """
        subdir = subdirectory under `dataset_reconstruct`
        Loads dataset from lists of image addresses: {train, val, test}.txt
        """
#         import pdb; pdb.set_trace()
        for set_type in self.sets.keys():
            with open('./dataset_reconstruct/' + subdir + '/' + set_type + '.txt', 'r') as inf:
                for line in inf:
                    class_id, im_addr = line.split()
                    for row in self.meta.loc[self.meta['image_addr'] == im_addr].iterrows():
                        _, _, bb_x, bb_y, bb_w, bb_h, _ = row[1]
                    
                    try:
                        img = self.load_single_img([bb_x, bb_y, bb_w, bb_h], im_addr)
                    except:
                        continue

                    self.sets[set_type]['imgs'].append(img)
                    self.sets[set_type]['lbls'].append(class_id)

            self.sets[set_type]['imgs'] = np.array(self.sets[set_type]['imgs'])
            self.sets[set_type]['lbls'] = np.array(self.sets[set_type]['lbls'])

    def spread_data(self, ratio, all_instances):
        """
        Forms training, validation, and testing set by random sampling
        """
        for class_id, imgs in all_instances.items():
        # distributes samples evenly from each class, to balance out train/test/validation sets.
            for img, img_addr in imgs:
                set_choice_idx = choice([0, 1, 2], p=ratio)
                set_type = list(self.sets.keys())[set_choice_idx]
                self.sets[set_type]['imgs'].append(img)
                self.sets[set_type]['lbls'].append(class_id)
                self.sets[set_type]['addrs'].append(img_addr)
        for set_type in self.sets.keys():
            self.sets[set_type]['imgs'] = np.array(self.sets[set_type]['imgs'])
            self.sets[set_type]['lbls'] = np.array(self.sets[set_type]['lbls'])
            self.sets[set_type]['addrs'] = np.array(self.sets[set_type]['addrs'])

    def show_dataset_size(self):
        """
        Print length of training, validation, and testing set
        """
        print('Train set size:', self.sets['train']['imgs'].shape)
        print('Validation set size:', self.sets['val']['imgs'].shape)
        print('Test set size:', self.sets['test']['imgs'].shape)

train_test_ratio = [0.5, 0.25, 0.25]

# datasets = Datasets(train_test_ratio)
# datasets.show_dataset_size()

In [9]:
# save dataset (train, validation, test) from im_addrs stored
# import pdb; pdb.set_trace()
new_datasets = Datasets(train_test_ratio, load_new_data=False)
new_datasets.show_dataset_size()

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.


Train set size: (1441, 224, 224, 3)
Validation set size: (695, 224, 224, 3)
Test set size: (753, 224, 224, 3)


In [None]:
import vgg as vgg_mod
import importlib
importlib.reload(vgg_mod)

sess = tf.Session()

vgg_obj = None
# graph = tf.Graph()
# with graph.as_default():
imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
vgg_obj = vgg_mod.vgg16(imgs, 'vgg16_weights.npz', sess)

labels_ph = tf.placeholder(tf.int64, (None), name='labels_ph')
labels_ph_oh = tf.one_hot(labels_ph, 50)
#     print(labels_ph_oh)
#     vgg_obj.load_weights(vgg_obj.weights, vgg_obj.sess)

# print(tf.trainable_variables())

# Get trainable variables for the last three custom FC layers (featuring dropout, etc)
fc_stem = 'fully_connected'
trainable_vars_list = []
for i in ['', '_1', '_2']:
    trainable_weight_var = tf.contrib.framework.get_variables(fc_stem + i + '/weights:0')
    trainable_bias_var = tf.contrib.framework.get_variables(fc_stem + i + '/biases:0')
    trainable_vars_list += [trainable_weight_var, trainable_bias_var]

optimizer_warm = tf.train.AdamOptimizer(0.001, 0.9, 0.999)
loss_warm = tf.nn.sigmoid_cross_entropy_with_logits(logits=vgg_obj.logits, labels=labels_ph_oh)
opt_warm = optimizer_warm.minimize(loss_warm, var_list=trainable_vars_list)

optimizer_full = tf.train.AdamOptimizer(0.00001, 0.9, 0.999)
loss_full = tf.nn.sigmoid_cross_entropy_with_logits(logits=vgg_obj.logits, labels=labels_ph_oh)
opt_full = optimizer_full.minimize(loss_full)

correct = tf.equal(tf.argmax(vgg_obj.logits, 1), labels_ph)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        
# tf.get_default_graph().finalize()
sess.run(tf.global_variables_initializer())
vgg_obj.load_weights(vgg_obj.weights, vgg_obj.sess)



In [None]:
# Training code
BS = 64
NUM_EPOCHS_WARM = 15
NUM_EPOCHS_FULL = 8

train_imgs = datasets.sets['train']['imgs']
train_lbls = datasets.sets['train']['lbls']

val_imgs = datasets.sets['val']['imgs']
val_lbls = datasets.sets['val']['lbls']

training = vgg_obj.training
keep_prob = vgg_obj.keep_prob

accuracy_vals, loss_vals, val_correct = [], [], []

def train_for_epochs(optimizer_in, loss_in, epochs, correct_in, accuracy_in):
    for epoch in range(epochs):
        # Let's shuffle the data every epoch
        np.random.seed(epoch)
        np.random.shuffle(train_imgs)
        np.random.seed(epoch)
        np.random.shuffle(train_lbls)
        # Go through the entire dataset once
        accuracy_vals, loss_vals, val_correct = [], [], []
        for i in range(0, train_imgs.shape[0]-BS+1, BS):
            # Train a single batch
            batch_images, batch_labels = train_imgs[i:i+BS], train_lbls[i:i+BS]
            accuracy_val, loss_val, _ = sess.run([accuracy_in, loss_in, optimizer_in], feed_dict={imgs: batch_images, labels_ph: batch_labels, training: True, keep_prob: 0.9})
            accuracy_vals.append(accuracy_val)
            loss_vals.append(loss_val)

        for i in range(0, val_imgs.shape[0], BS):
            batch_images, batch_labels = val_imgs[i:i+BS], val_lbls[i:i+BS]
            val_correct.extend( sess.run(correct_in, feed_dict={imgs: batch_images, labels_ph: batch_labels, training: False, keep_prob:0.9}) )
        print('[%3d] Accuracy: %0.3f  \t  Loss: %0.3f \t Val acc: %0.3f'%(epoch, np.mean(accuracy_vals), np.mean(loss_vals), np.mean(val_correct)))
train_for_epochs(opt_warm, loss_warm, NUM_EPOCHS_WARM, correct, accuracy)
train_for_epochs(opt_full, loss_full, NUM_EPOCHS_FULL, correct, accuracy)

## notes about running hyperparameters:

1. Warmup starts to overfit after ~15 epochs. So use 12 instead, and hopefully the full network training will improve generalization.

In [None]:
test_imgs_processed = datasets.sets['test']['imgs']
test_labels = datasets.sets['test']['lbls']

num_test = len(test_imgs_processed)
test_batch_size = 128
total_top_1, total_top_3, total_top_5 = [[] for x in range(3)]

for batch_start in range(0, num_test - test_batch_size + 1, test_batch_size):
    
    img_batch = test_imgs_processed[batch_start:batch_start + test_batch_size, :, :]
    labels_batch = test_labels[batch_start:batch_start + test_batch_size]
    test_probabilities = sess.run(vgg_obj.probs, feed_dict={vgg_obj.imgs: img_batch, training: False, keep_prob:0.9})

    top_1_correct, top_3_correct, top_5_correct = [0] * 3
    for sample_idx, sample in enumerate(test_probabilities):
        top_predictions = (np.argsort(sample)[::-1])[:5]
        test_label = labels_batch[sample_idx]
        if test_label in top_predictions[:1]:
            top_1_correct += 1
        if test_label in top_predictions[:3]:
            top_3_correct += 1
        if test_label in top_predictions:
            top_5_correct += 1

    total_top_1.append(top_1_correct)
    total_top_3.append(top_3_correct)
    total_top_5.append(top_5_correct)

print('Test accuracy:')
print('Top-1:', np.mean(total_top_1))
print('Top-3:', np.mean(total_top_3))
print('Top-5:', np.mean(total_top_5))

In [None]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, './checkpoints/bird_662_test_acc.ckpt')

In [None]:
# stem is file stem, the next three are arrays, and the last is an array of [top 1, top 3, top 5] accuracies.
def save_train_acc_loss(stem, trainaccs, losses, valaccs, testaccs):
    np.save('./run_results/' + stem + '_accuracy_progress', np.stack([trainaccs, losses, valaccs]))
    np.save('./run_results/' + stem + '_test_accs', testaccs)
save_train_acc_loss('662testacc', accuracy_vals, loss_vals, val_correct)

In [None]:
color_sequence = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c',
                  '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
                  '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f',
                  '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5']

fig, ax = plt.subplots(1, 1, figsize=(10, 7.5))

ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)

ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()

ax.set_ylim(0, 0.803)