In [1]:
# Rather than importing everything manually, we'll make things easy
#   and load them all in utils.py, and just import them from there.
%matplotlib inline
import utils; reload(utils)
from utils import *

Using gpu device 0: Tesla K80 (CNMeM is disabled)
Using Theano backend.


In [2]:
%matplotlib inline
from __future__ import division,print_function
import os, json
from glob import glob
import numpy as np
import scipy
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
import utils; reload(utils)
from utils import plots, get_batches, plot_confusion_matrix, get_data

In [3]:
from numpy.random import random, permutation
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom

import keras
from keras import backend as K
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers import Input
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD, RMSprop
from keras.preprocessing import image

In [4]:
#path = "../data/dogsandcats_small/" # we copied a fraction of the full set for tests
path = "../data/dogsandcats/"
model_path = path + "models/"
if not os.path.exists(model_path):
    os.mkdir(model_path)
    print('Done')

In [6]:
from vgg16 import Vgg16

In [7]:
batch_size = 100

In [8]:
def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, 
                batch_size=batch_size, class_mode='categorical'):
    return gen.flow_from_directory(path+dirname, target_size=(224,224), 
                class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)

In [9]:
# Use batch size of 1 since we're just doing preprocessing on the CPU
val_batches = get_batches('valid', shuffle=False, batch_size=batch_size) # no shuffle as we store conv output
trn_batches = get_batches('train', shuffle=False, batch_size=batch_size) # no shuffle as we store conv output

Found 4000 images belonging to 2 classes.
Found 21000 images belonging to 2 classes.


In [10]:
val_batches.filenames[0:10]

['cat/cat.1262.jpg',
 'cat/cat.9495.jpg',
 'cat/cat.3044.jpg',
 'cat/cat.1424.jpg',
 'cat/cat.8210.jpg',
 'cat/cat.8847.jpg',
 'cat/cat.308.jpg',
 'cat/cat.10802.jpg',
 'cat/cat.5060.jpg',
 'cat/cat.10406.jpg']

In [11]:
val_labels = onehot(val_batches.classes)
trn_labels = onehot(trn_batches.classes)

In [12]:
'''try:
    trn = load_array(model_path+'train_data.bc')
except:
    trn = get_data(path+'train')
    save_array(model_path+'train_data.bc', trn)'''

In [13]:
'''try:
    val = load_array(model_path+'valid_data.bc')
except:
    val = get_data(path+'valid')
    save_array(model_path+'valid_data.bc', val)'''

In [14]:
'''gen = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.05, 
                               zoom_range=0.05,
                               #channel_shift_range=10,
                               height_shift_range=0.05, shear_range=0.05, horizontal_flip=False)
trn_batchesRND = gen.flow(trn, trn_labels, batch_size=batch_size)
val_batchesRND = gen.flow(val, val_labels, batch_size=batch_size)'''

In [13]:
if True:
    realvgg = Vgg16()
    conv_layers, fc_layers = split_at(realvgg.model, Flatten)
    #conv_layers, fc_layers = split_at(realvgg.model, Convolution2D)
    conv_model = Sequential(conv_layers)
    conv_model_hash = 'conv_v3'

  .format(self.name, input_shape))


### Will take a few minutes to complete the 1st time

In [14]:
if True:
    try:
        val_convfeatures = load_array(model_path+'valid_'+conv_model_hash+'_features.bc')
        if False: # force update
            raise
    except:
        print('Missing file')
        val_convfeatures = conv_model.predict_generator(val_batches, val_batches.nb_sample)
        save_array(model_path+'valid_'+conv_model_hash+'_features.bc', val_convfeatures)

### Will take a few minutes (maybe 10) to complete the 1st time

In [15]:
if True:
    try:
        trn_convfeatures = load_array(model_path+'train_'+conv_model_hash+'_features.bc')
        if False: # force update
            raise
    except:
        print('Missing file')
        trn_convfeatures = conv_model.predict_generator(trn_batches, trn_batches.nb_sample)
        save_array(model_path+'train_'+conv_model_hash+'_features.bc', trn_convfeatures)

### Ready to train the model
#### We use VGG top layers but we insert BatchNorm layers
#### BatchNorm layers needs to be initialized properly so we first estimate
#### the mean/var of the layers feeding into them

In [18]:
# see : https://github.com/fastai/courses/blob/master/deeplearning1/nbs/lesson3.ipynb

def proc_wgts(layer, ndo):
    # copy the weights from the pre-trained model
    # original weights are for a 50% drop out
    # we infer the corresponding weight for a new drop out (ndo) level
    return [w*0.5/(1.-ndo) for w in layer.get_weights()]

def get_fc_model(ndo):
    model = Sequential([
        Dense(4096, activation='relu', input_shape=conv_layers[-1].output_shape[1:]),
        Dropout(ndo),
        Dense(4096, activation='relu'),
        Dropout(ndo),
        Dense(2, activation='softmax')
        ])

    for l_new, l_orig in zip(model.layers[0:3], fc_layers[0:3]):
        assert (type(l_new) == type(l_orig))
        l_new.set_weights(proc_wgts(l_orig, ndo))
    
    for layer in model.layers[:-1]:
        layer.trainable = False
        
    model.layers[-1].trainable = True
    
    #opt = RMSprop(lr=0.00001, rho=0.7)
    opt = Adam()
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [62]:
def get_bn_model(p):
    dense_model =  get_fc_model(p)

    k_layer_out0 = K.function([dense_model.layers[0].input, K.learning_phase()],
                              [dense_model.layers[0].output])
    d0_out = k_layer_out0([trn_convfeatures, 0])[0]
    mu0, var0 = d0_out.mean(axis=0), d0_out.var(axis=0)


    k_layer_out2 = K.function([dense_model.layers[0].input, K.learning_phase()],
                              [dense_model.layers[2].output])
    d2_out = k_layer_out2([trn_convfeatures, 0])[0]
    mu2, var2 = d2_out.mean(axis=0), d2_out.var(axis=0)

    bn_model = insert_layer(dense_model, BatchNormalization(), 1)
    bn_model = insert_layer(bn_model, BatchNormalization(), 4) # shifted due to insertion

    bnl1 = bn_model.layers[1]
    bnl4 = bn_model.layers[4]

    #After inserting the layers, we can set their weights to the variance and mean we just calculated.
    bnl1.set_weights([var0, mu0, mu0, var0])
    bnl4.set_weights([var2, mu2, mu2, var2])

    bn_model.compile(Adam(1e-3), 'categorical_crossentropy', ['accuracy'])
    
    for layer in bn_model.layers:
        layer.trainable = False
    bn_model.layers[-1].trainable = True
    
    return bn_model

In [64]:
def train_fresh_bn(mdl, top=2, full=5):
    # top
    for layer in mdl.layers:
        layer.trainable = False
    mdl.layers[-1].trainable = True
    mdl.optimizer.lr = 1e-3
    mdl.fit(trn_convfeatures, trn_labels, validation_data=(val_convfeatures, val_labels), nb_epoch=top)
    # full
    for layer in mdl.layers:
        layer.trainable = True
    mdl.optimizer.lr = 0.01*1e-3
    mdl.fit(trn_convfeatures, trn_labels, validation_data=(val_convfeatures, val_labels), nb_epoch=full)

In [None]:
#bn_model = get_bn_model(0.30)

In [72]:
#train_fresh_bn(bn_model, 2, 5)

### Train one or several models (ensembling)

In [66]:
bn_models = []
for i in range(10): # INFO : change here the size of the ensemble
    bn_models.append( get_bn_model(0.30) )
    train_fresh_bn(bn_models[-1], 2, 8)

Train on 21000 samples, validate on 4000 samples
Epoch 1/2
Epoch 2/2
Train on 21000 samples, validate on 4000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train on 21000 samples, validate on 4000 samples
Epoch 1/2
Epoch 2/2
Train on 21000 samples, validate on 4000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train on 21000 samples, validate on 4000 samples
Epoch 1/2
Epoch 2/2
Train on 21000 samples, validate on 4000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train on 21000 samples, validate on 4000 samples
Epoch 1/2
Epoch 2/2
Train on 21000 samples, validate on 4000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train on 21000 samples, validate on 4000 samples
Epoch 1/2
Epoch 2/2
Train on 21000 samples, validate on 4000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train on 21000 samples, validate on 4000 samples
Epoch 1/2
Epoch 2/2
Train on 21000 samples, validate on 4000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Ep

In [64]:
'''i = 0

x_conv_model = Sequential(conv_layers)
for layer in x_conv_model.layers:
    layer.trainable = False

for layer in ll_models[i].layers:
    x_conv_model.add(layer)
    
#for l1,l2 in zip(conv_model.layers[last_conv_idx+1:], fc_model.layers): 
#        l1.set_weights(l2.get_weights())
x_conv_model.compile(optimizer=Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
#x_conv_model.save_weights(model_path+'no_dropout_bn' + i + '.h5')'''

In [65]:
'''for layer in x_conv_model.layers[-5:]:
    layer.trainable = True
x_conv_model.optimizer.lr = 1e-6'''

In [68]:
'''x_conv_model.fit_generator(trn_batchesRND,
                           samples_per_epoch = min(40*batch_size,trn_batchesRND.n),
                           nb_epoch = 1,
                           validation_data = val_batchesRND,
                           nb_val_samples = min(20*batch_size,val_batchesRND.n))'''

Epoch 1/1


<keras.callbacks.History at 0x7f2c6e529410>

In [69]:
'''for mdl in ll_models:
    for k in range(-len(mdl.layers),0):
        print(k)
        #x_conv_model.layers[k].get_weights()
        #mdl.layers[k].set_weights
        mdl.layers[k].set_weights( x_conv_model.layers[k].get_weights() )'''

-5
-4
-3
-2
-1


In [73]:
if False:
    models = [bn_model] # without ensembling
else:
    models = bn_models # with ensembling

all_val_preds = []
for mdl in models:
    these_val_preds = mdl.predict_on_batch(val_convfeatures)
    assert(len(these_val_preds) == 4000)
    all_val_preds.append( these_val_preds )
mean_val_preds = np.stack(all_val_preds).mean(axis=0)
categorical_accuracy(val_labels, mean_val_preds).eval()

array(0.984499990940094, dtype=float32)

In [None]:
# WARNING : should save each model of the ensemble
#ll_model.save_weights(model_path+'llmodel_finetune1.h5')
#ll_model.load_weights(model_path+'llmodel_finetune1.h5')

In [39]:
test_batches = get_batches('test', shuffle=False, batch_size=batch_size, class_mode=None)
testfiles = test_batches.filenames
testfiles[0:10]

Found 12500 images belonging to 1 classes.


['test/10592.jpg',
 'test/7217.jpg',
 'test/3653.jpg',
 'test/4382.jpg',
 'test/2924.jpg',
 'test/10.jpg',
 'test/10916.jpg',
 'test/12374.jpg',
 'test/1871.jpg',
 'test/11645.jpg']

### Will take a few minutes (maybe 5) to complete the 1st time

In [40]:
try:
    test_convfeatures = load_array(model_path+'test_'+conv_model_hash+'_features.bc')
    if False: # force update
        raise
except:
    print('Missing file')
    test_convfeatures = conv_model.predict_generator(test_batches, test_batches.nb_sample)
    save_array(model_path+'test_'+conv_model_hash+'_features.bc', test_convfeatures)

In [74]:
if False:
    models = [bn_model] # without ensembling
else:
    models = bn_models # with ensembling

all_test_preds = []
for mdl in models:
    these_test_preds = mdl.predict_on_batch(test_convfeatures)
    assert(len(these_test_preds) == 12500)
    all_test_preds.append( these_test_preds )
mean_test_preds = np.stack(all_test_preds).mean(axis=0)

In [75]:
mean_test_preds[0:10]

array([[  9.9996e-01,   3.8756e-05],
       [  9.9993e-01,   6.8629e-05],
       [  2.0637e-04,   9.9979e-01],
       [  9.9551e-01,   4.4935e-03],
       [  1.4125e-02,   9.8587e-01],
       [  1.0000e+00,   3.3480e-06],
       [  2.2238e-03,   9.9778e-01],
       [  1.0000e+00,   1.2753e-07],
       [  1.8051e-04,   9.9982e-01],
       [  6.9930e-05,   9.9993e-01]], dtype=float32)

In [76]:
dog_idx = 1
eps = 1e-3 # WARNING : this has significant impact
digits = 3 # WARNING : this has significant impact

cut = lambda x : round(min(max(x,eps),1-eps),digits)

a = sum([p[dog_idx]*math.log(p[dog_idx]) for p in mean_test_preds])/len(mean_test_preds)
b = sum([p[dog_idx]*math.log(cut(p[dog_idx])) for p in mean_test_preds])/len(mean_test_preds)
a, b

(-0.018694336826138514, -0.018949097448258439)

In [77]:
Z1 = [{'id':int(f.split('/')[-1].split('.')[0]), 'label':cut(p[dog_idx])} for f, p in zip(testfiles, mean_test_preds)]
def comp(x,y):
    return int(x['id']) - int(y['id'])
Z1 = sorted(Z1, comp)
Z1[0:18]

[{'id': 1, 'label': 0.999},
 {'id': 2, 'label': 0.999},
 {'id': 3, 'label': 0.999},
 {'id': 4, 'label': 0.999},
 {'id': 5, 'label': 0.001},
 {'id': 6, 'label': 0.001},
 {'id': 7, 'label': 0.001},
 {'id': 8, 'label': 0.001},
 {'id': 9, 'label': 0.001},
 {'id': 10, 'label': 0.001},
 {'id': 11, 'label': 0.001},
 {'id': 12, 'label': 0.999},
 {'id': 13, 'label': 0.001},
 {'id': 14, 'label': 0.003},
 {'id': 15, 'label': 0.001},
 {'id': 16, 'label': 0.001},
 {'id': 17, 'label': 0.998},
 {'id': 18, 'label': 0.999}]

In [79]:
import csv

with open('predictions_v4_9.csv', 'w') as csvfile:
    fieldnames = ['id', 'label']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for z in Z1:
        writer.writerow(z)