# Import Utilities #

In [1]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

Using gpu device 0: Tesla K80 (CNMeM is enabled with initial size: 95.0% of memory, cuDNN 5110)
Using Theano backend.


In [2]:
import resnet50; reload (resnet50)
from resnet50 import Resnet50

** Set image ordering to channel first**

In [3]:
from keras import backend
backend.set_image_dim_ordering('th')

The dataset we load from --data command is in /input

The root directory of jypyter is /output

In [8]:
path = '/input/dogscats/'
batch_size=32 # batch_size=64 cause memory error on pre-computed data, if I use fit_generator, it will be fine.

# ResNet50 Model #

Create a resnet model without final layers, thus we can add new layers for finetuning.

In [5]:
res50 = Resnet50(include_top=False).model

  .format(self.name, input_shape))


Downloading data from http://www.platform.ai/models/resnet_nt.h5
Downloading data from http://www.platform.ai/models/imagenet_class_index.json


The output of res50 model are 2048 7-by-7 activation maps

In [6]:
res50.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 3, 224, 224)   0                                            
____________________________________________________________________________________________________
lambda_1 (Lambda)                (None, 3, 224, 224)   0           input_1[0][0]                    
____________________________________________________________________________________________________
zeropadding2d_1 (ZeroPadding2D)  (None, 3, 230, 230)   0           lambda_1[0][0]                   
____________________________________________________________________________________________________
conv1 (Convolution2D)            (None, 64, 112, 112)  9472        zeropadding2d_1[0][0]            
___________________________________________________________________________________________

#get_batches() #

    batches: images without data augmentation

    da_atches: images with data augmentation (parameters are specified as gen_t)

    val_batches: validation batches

In [9]:
# ImageDataGenerator to generate slightly different imgs.
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, horizontal_flip=True,
                                 shear_range=0.1, channel_shift_range=30, width_shift_range=0.1)

# Set to larger image where default is (224,224)
target_size=(224,224)

#get_batches (suffle=False)
batches = get_batches(path+'train', gen=image.ImageDataGenerator(), batch_size=batch_size, target_size=target_size, shuffle=False)
da_batches = get_batches(path+'train', gen_t, batch_size=batch_size, target_size=target_size, shuffle=False)
val_batches = get_batches(path+'valid', gen=image.ImageDataGenerator(), batch_size=batch_size, target_size=target_size, shuffle=False)
#test_batches = get_batches(path+'test', gen=image.ImageDataGenerator(), batch_size=batch_size, shuffle=False, class_mode=None)

Found 23000 images belonging to 2 classes.
Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


** Oops! Here's the problem **

there should be one more directory in test1, so that flow_from_directory could work

We create new directory /output/test/unknown and copy all test images (which are in /input/dogscats/test1) into it

In [13]:
test_batches = get_batches(path+'test1', gen=image.ImageDataGenerator(), batch_size=batch_size, shuffle=False, class_mode=None)

Found 0 images belonging to 0 classes.


In [132]:
%mkdir -p test/unknown

In [133]:
%cd /input/dogscats/test1
%cp *.jpg /output/test/unknown

/input/dogscats/test1


We have to cd back to /output after copying completes.

In [134]:
%cd /output

/output


And now we have the right test batch.

In [135]:
test_batches = get_batches('/output/test', gen=image.ImageDataGenerator(), batch_size=batch_size, shuffle=False, class_mode=None)

Found 12500 images belonging to 1 classes.


In [10]:
#(val_classes, trn_classes, val_labels, trn_labels, val_filenames, filenames, test_filenames) = get_classes(path)
val_classes = val_batches.classes
trn_classes_wo_da = batches.classes
trn_classes = da_batches.classes
val_labels = to_categorical(val_batches.classes) # onehot
trn_labels_wo_da = to_categorical(batches.classes) # onehot
trn_labels = to_categorical(da_batches.classes) # onehot
#test_filenames = test_batches.filenames

In [14]:
filenames = batches.filenames
val_filenames = val_batches.filenames

# Pre-compute Output Features#

Pre-compute output features from res50 model, and call save_array() if we have to reuse them afterward.

    trn_features_wo_da: training feartures without data augmentation.

In [15]:
# Pre-compute output of ResNet Blocks
val_features = res50.predict_generator(val_batches, val_batches.nb_sample)
print ('val_features complete.')
trn_features_wo_da = res50.predict_generator(batches, batches.nb_sample) # trn_features w/o data augmentation
print ('trn_featutes_wo_da complete.')

val_features complete.
trn_featutes_wo_da complete.


In [136]:
test_features = res50.predict_generator(test_batches, test_batches.nb_sample)
print ('test_featutes complete.')

test_featutes complete.


# Save Pre-computed Data#

** It's stringly recommend to save any pre-computed data. Since the kernel dies very often during pre-computing and predicting.**

In [16]:
save_array('trn_features_wo_da.dat', trn_features_wo_da)
save_array('trn_classes_wo_da.dat',trn_classes_wo_da)
save_array('trn_labels_wo_da.dat',trn_labels_wo_da)
save_array('val_features.dat', val_features)
save_array('val_classes.dat',val_classes)
save_array('val_labels.dat',val_labels)

In [137]:
save_array('test_features.dat', test_features)

# Res50 with fully-connected Layers #

We use Sequential() to create this model.

In [49]:
def get_fc_layers(p):
    return [
        MaxPooling2D(input_shape=res50.layers[-1].output_shape[1:]),
        BatchNormalization(axis=1),
        Dropout(p/4),
        Flatten(),
        Dense(1024, activation='relu'),
        BatchNormalization(),
        Dropout(p/2),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(2, activation='softmax')
    ]     

We set dropout probability to 0.8. I tried p=0.5, but the model immediately overfit.

However, I haven't tried to set p=0.5 for all 3 Dropout(p) (instead of p/4, p/2, p). Maybe it will result better.

In [50]:
p = 0.8
model_res50_fc = Sequential(get_fc_layers(p))
model_res50_fc.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
maxpooling2d_3 (MaxPooling2D)    (None, 2048, 3, 3)    0           maxpooling2d_input_2[0][0]       
____________________________________________________________________________________________________
batchnormalization_4 (BatchNorma (None, 2048, 3, 3)    8192        maxpooling2d_3[0][0]             
____________________________________________________________________________________________________
dropout_4 (Dropout)              (None, 2048, 3, 3)    0           batchnormalization_4[0][0]       
____________________________________________________________________________________________________
flatten_2 (Flatten)              (None, 18432)         0           dropout_4[0][0]                  
___________________________________________________________________________________________

In [51]:
from keras.optimizers import Nadam
model_res50_fc.compile(optimizer=Nadam(), loss='categorical_crossentropy', metrics=['accuracy'])

First, we finetune the fully-connected layers using training data without data augmentation.

During training, we save model weights whenever we get good result.

** It takes 15 ~20 mins to run .fit() and .predict() the first time **

In [52]:
model_res50_fc.optimizer.lr = 1e-4
model_res50_fc.fit(trn_features_wo_da, trn_labels_wo_da, nb_epoch=1, batch_size=batch_size, validation_data=(val_features,val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f37a3f97910>

In [53]:
model_res50_fc.optimizer.lr = 1e-4
model_res50_fc.fit(trn_features_wo_da, trn_labels_wo_da, nb_epoch=1, batch_size=batch_size, validation_data=(val_features,val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f37a4014190>

In [55]:
model_res50_fc.optimizer.lr = 1e-5
model_res50_fc.fit(trn_features_wo_da, trn_labels_wo_da, nb_epoch=3, batch_size=batch_size, validation_data=(val_features,val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f37a3f97f10>

**This result is pretty good, a little overfit with high validation accurasy. Let's save this check point.**

In [85]:
model_res50_fc.save_weights('model_res50_fc_4.h5')

In [77]:
model_res50_fc.optimizer.lr = 1e-5
model_res50_fc.fit(trn_features_wo_da, trn_labels_wo_da, nb_epoch=2, batch_size=batch_size, validation_data=(val_features,val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f37a3f97b10>

**After we keep on training for several epechs, It seems like the model has overfitted (loss > val_loss). Then it's time to do data augmentation. **

Oh, before that we have to load our best weghts.

In [88]:
model_res50_fc.load_weights('model_res50_fc_3.h5')

Pre-compute output features of res50 as before. But this time our input are images with data augmentation (da_batches).

    trn_features: output features of res50 with data augmentation

In [192]:
trn_features = res50.predict_generator(da_batches, batches.nb_sample)
print ('trn_featutes complete.')

trn_featutes complete.


In [193]:
save_array('trn_features.dat', trn_features)

In [114]:
model_res50_fc.optimizer.lr = 1e-4
model_res50_fc.fit(trn_features, trn_labels, nb_epoch=2, batch_size=batch_size, validation_data=(val_features,val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f37a332b410>

In [115]:
model_res50_fc.optimizer.lr = 1e-5
model_res50_fc.fit(trn_features, trn_labels, nb_epoch=2, batch_size=batch_size, validation_data=(val_features,val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f37a337bfd0>

**We train the model using augmented data, and obtain val_loss decreasing from 0.0485 to 0.0414. Great, save it.**

In [113]:
model_res50_fc.load_weights('model_res50_fc_3.h5')

# Pseudo-labeling (fully-connected model) #

https://shaoanlu.wordpress.com/2017/04/10/a-simple-pseudo-labeling-function-implementation-in-keras/

In [207]:
pseudo_preds = model_res50_fc.predict(test_features, batch_size=batch_size)

In [240]:
model_res50_fc.optimizer.lr = 1e-5

i_trn = 0
i_test = 0

# iterate through 800 mini-batch
num_iter = 600*2
# mini-batch size
size_trn = 44
size_test = 16
num_batch_per_epoch_trn = int(trn_features.shape[0]/size_trn)
num_batch_per_epoch_test = int(test_features.shape[0]/size_test)
index_trn = np.random.permutation(num_batch_per_epoch_trn)
index_test = np.random.permutation(num_batch_per_epoch_test)
for i in range(num_iter):
    i_trn = index_trn[i%num_batch_per_epoch_trn]
    i_test = index_test[i%num_batch_per_epoch_test]
    
    comb_features = np.concatenate((trn_features[(size_trn*i_trn):size_trn*(i_trn+1)],
                                   test_features[(size_test*i_test):size_test*(i_test+1)]),axis=0)
    comb_labels = np.concatenate((trn_labels[(size_trn*i_trn):size_trn*(i_trn+1)],
                                 pseudo_preds[(size_test*i_test):size_test*(i_test+1)]), axis=0)
    
    model_res50_fc.train_on_batch(comb_features, comb_labels)
    
    if (i+1)%num_batch_per_epoch_trn == 0:
        index_trn = np.random.permutation(num_batch_per_epoch_trn)
    if (i+1)%num_batch_per_epoch_test == 0:
        index_test = np.random.permutation(num_batch_per_epoch_test)

In [242]:
model_res50_fc.optimizer.lr = 1e-9
model_res50_fc.fit(trn_features, trn_labels, nb_epoch=1, batch_size=batch_size, validation_data=(val_features,val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f342d0ffc90>

In [232]:
model_res50_fc.optimizer.lr = 1e-9
model_res50_fc.fit(trn_features_wo_da, trn_labels_wo_da, nb_epoch=1, batch_size=batch_size, validation_data=(val_features,val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f3206be2510>

**Best result after pseudo-labeling: **

    - loss: 0.0317 - acc: 0.9882 - val_loss: 0.0335 - val_acc: 0.9880

In [214]:
model_res50_fc.save_weights('model_res50_fc_8.h5')

In [239]:
model_res50_fc.load_weights('model_res50_fc_8.h5')

# Res50 with Inception #

Here we create another model using inception block via functional API. 

For more information about incep. block, please find fast.ai lesson 7.

In [155]:
def conv2d_bn(x, nb_filter, nb_row, nb_col, subsample=(1, 1)):
    x = Convolution2D(nb_filter, nb_row, nb_col,
                      subsample=subsample, activation='relu', border_mode='same')(x)
    return BatchNormalization(axis=1)(x)

In [156]:
def incep_block(x):
    branch1x1 = conv2d_bn(x, 32, 1, 1, subsample=(2, 2))
    branch5x5 = conv2d_bn(x, 24, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 32, 5, 5, subsample=(2, 2))

    branch3x3dbl = conv2d_bn(x, 32, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 48, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 48, 3, 3, subsample=(2, 2))

    branch_pool = AveragePooling2D(
        (3, 3), strides=(2, 2), border_mode='same')(x)
    branch_pool = conv2d_bn(branch_pool, 16, 1, 1)
    return merge([branch1x1, branch5x5, branch3x3dbl, branch_pool],
              mode='concat', concat_axis=1)

In [157]:
inp = Input(res50.layers[-1].output_shape[1:]) 
x = BatchNormalization(axis=1)(inp)
x = incep_block(x)
x = incep_block(x)
x = incep_block(x)
x = Dropout(0.7)(x)
x = Convolution2D(2,3,3, border_mode='same')(x)
x = GlobalAveragePooling2D()(x)
outp = Activation('softmax')(x)

In [158]:
model_res50_incep = Model([inp], outp)

In [159]:
model_res50_incep.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [161]:
model_res50_incep.optimizer.lr = 1e-4
model_res50_incep.fit(trn_features_wo_da, trn_labels_wo_da, nb_epoch=1, batch_size=batch_size, validation_data=(val_features,val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f34476e6490>

In [162]:
model_res50_incep.optimizer.lr = 1e-5
model_res50_incep.fit(trn_features_wo_da, trn_labels_wo_da, nb_epoch=2, batch_size=batch_size, validation_data=(val_features,val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f357c5cb8d0>

In [197]:
pseudo_preds = model_res50_incep.predict(test_features, batch_size=batch_size)

# Pseudo-labeling (incep. block model)#

In [201]:
model_res50_incep.optimizer.lr = 1e-5

i_trn = 0
i_test = 0

# iterate through 800 mini-batch
num_iter = 600*2
# mini-batch size
size_trn = 44
size_test = 16
num_batch_per_epoch_trn = int(trn_features.shape[0]/size_trn)
num_batch_per_epoch_test = int(test_features.shape[0]/size_test)
index_trn = np.random.permutation(num_batch_per_epoch_trn)
index_test = np.random.permutation(num_batch_per_epoch_test)
for i in range(num_iter):
    i_trn = index_trn[i%num_batch_per_epoch_trn]
    i_test = index_test[i%num_batch_per_epoch_test]
    
    comb_features = np.concatenate((trn_features[(size_trn*i_trn):size_trn*(i_trn+1)],
                                   test_features[(size_test*i_test):size_test*(i_test+1)]),axis=0)
    comb_labels = np.concatenate((trn_labels[(size_trn*i_trn):size_trn*(i_trn+1)],
                                 pseudo_preds[(size_test*i_test):size_test*(i_test+1)]), axis=0)
    
    model_res50_incep.train_on_batch(comb_features, comb_labels)
    
    if (i+1)%num_batch_per_epoch_trn == 0:
        index_trn = np.random.permutation(num_batch_per_epoch_trn)
    if (i+1)%num_batch_per_epoch_test == 0:
        index_test = np.random.permutation(num_batch_per_epoch_test)

In [202]:
model_res50_incep.optimizer.lr = 1e-9
model_res50_incep.fit(trn_features_wo_da, trn_labels_wo_da, nb_epoch=1, batch_size=batch_size, validation_data=(val_features,val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f34507c4590>

In [203]:
model_res50_incep.load_weights('model_res50_incep_2.h5')

#Submission #

In [206]:
preds = model_res50_fc.predict(test_features, batch_size = batch_size*2)
isdog = preds[:,1]
#isdog = isdog.clip(min=0.02, max=0.98)
filenames = test_batches.filenames
ids = np.array([int(f[8:f.find('.')]) for f in filenames])
subm = np.stack([ids,isdog], axis=1)

submission_file_name = 'subm_res50_fc_ver1.csv'
np.savetxt(submission_file_name, subm, fmt='%d,%.5f', header='id,label', comments='')