# Lesson 03 - Under and Over Fitting

The previous model which we have used transfer learning for Cats and Dogs Redux Kaggle competition was under fitting. The training accuracy was lower than the validation accuracy. This is due to dropout operations in dense (fully connected) layers in VGG-16 model. We should somehow remove the dropout operations.

In [1]:
# import the model and utils
import numpy as np
from utils import *
from importlib import reload
import vgg16_for_keras2
reload(vgg16_for_keras2)
from vgg16_for_keras2 import Vgg16

Using TensorFlow backend.


In [2]:
vgg = Vgg16()

In [3]:
%ls data

[0m[01;31mdata.zip[0m               subm_full.csv            [01;34mtest10[0m/
[01;34msample10-fast-ai[0m/      subm_sample.csv          [01;34mtrain[0m/
[01;34msample-fast-ai[0m/        subm_sample_ordered.csv  [01;34mtrain-fast-ai[0m/
sample_submission.csv  [01;34mtest[0m/                    [01;34mvalid-fast-ai[0m/


In [4]:
path = './data/sample-fast-ai/'
batch_size = 5

In [5]:
%ls data/sample-fast-ai/

[0m[01;34mtrain[0m/  [01;34mvalid[0m/


In [6]:
from keras.utils.np_utils import to_categorical

In [7]:
batches = vgg.get_batches(path+'train', shuffle=True, batch_size=batch_size)
val_batches = vgg.get_batches(path+'valid', shuffle=True, batch_size=batch_size)

train_labels = to_categorical(batches.classes)
val_labels = to_categorical(val_batches.classes)

Found 200 images belonging to 2 classes.
Found 200 images belonging to 2 classes.


In [8]:
# change the output layer from 1000 nodes to 2 nodes
vgg.finetune(batches)

In [9]:
vgg.model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_1 (Lambda)            (None, 3, 224, 224)       0         
_________________________________________________________________
zero_padding2d_1 (ZeroPaddin (None, 3, 226, 226)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 224, 224)      1792      
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, 64, 226, 226)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 224, 224)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 64, 112, 112)      0         
_________________________________________________________________
zero_padding2d_3 (ZeroPaddin (None, 64, 114, 114)      0         
__________

In [10]:
# get layers
layers = vgg.model.layers
len(layers)

38

In [11]:
from keras.layers import Convolution2D
from keras.models import Sequential

In [12]:
# lets check the layer types
print(type(layers[37]))
print(type(layers[36]))
print(type(layers[35]))
print(type(layers[23]))
print(type(layers[23]) is Convolution2D)

<class 'keras.layers.core.Dense'>
<class 'keras.layers.core.Dropout'>
<class 'keras.layers.core.Dense'>
<class 'keras.layers.convolutional.Conv2D'>
True


In [13]:
# find the index of last convolutional layer
last_conv_index = [index for index,layer in enumerate(layers)
                      if type(layer) is Convolution2D][-1]
print(last_conv_index)

30


In [14]:
print(type(layers[30]))
print(type(layers[31]))
print(type(layers[32]))

<class 'keras.layers.convolutional.Conv2D'>
<class 'keras.layers.pooling.MaxPooling2D'>
<class 'keras.layers.core.Flatten'>


In [15]:
conv_layers = layers[:last_conv_index+1]
conv_model = Sequential(conv_layers)

In [16]:
fc_layers = layers[last_conv_index+1:]

In [73]:
val_features = conv_model.predict_generator(val_batches, val_batches.samples//batch_size)

In [74]:
val_features.shape

(200, 512, 14, 14)

In [75]:
train_features = conv_model.predict_generator(batches, batches.samples//batch_size)

In [76]:
train_features.shape

(200, 512, 14, 14)

In [77]:
type(train_features)

numpy.ndarray

In [17]:
import bcolz

In [18]:
#%rm -r bottleneck_features/*

In [19]:
# save bottleneck fetures for later use
features_dir ='bottleneck_features'

In [80]:
# save
%mkdir -p features_dir
c = bcolz.carray(val_features, rootdir=features_dir+'/val_features', mode='w')
c.flush()

c = bcolz.carray(train_features, rootdir=features_dir+'/train_features', mode='w')
c.flush()

%ls bottleneck_features

[0m[01;34mtrain_features[0m/  [01;34mval_features[0m/


In [20]:
train_features = bcolz.open(features_dir+'/train_features')
val_features = bcolz.open(features_dir+'/val_features')

In [22]:
print(train_features.shape)
print(val_features.shape)

(200, 512, 14, 14)
(200, 512, 14, 14)


In [23]:
from keras.layers import MaxPooling2D, Dense, Flatten, Dropout

In [24]:
conv_layers[-1].output_shape

(None, 512, 14, 14)

In [25]:
# create fully connected model
fc_model = Sequential([
    MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
    Flatten(),
    Dense(4096, activation='relu'),
    Dropout(0.),
    Dense(4096, activation='relu'),
    Dropout(0.),
    Dense(2, activation='softmax'),
]
)

In [26]:
def get_layer_weights(layer): return [o for o in layer.get_weights()]

In [27]:
# load weights to fc_model
for l1,l2 in zip(fc_model.layers, fc_layers): l1.set_weights(get_layer_weights(l2))

In [28]:
# just a test on how weights are handled
w = [o for o in fc_model.layers[4].get_weights()]
print(w[0].shape)
print(w[1].shape)

(4096, 4096)
(4096,)


In [29]:
# compile the model
fc_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# train model
fc_model.fit(
    train_features, train_labels, validation_data=(val_features,val_labels),
    batch_size=2, epochs=1)

I was unabled to give results here. I tried to run the model on an Amazon AWS g2x.large instance, however data is too large to fit in one of these GPUs. Maybe, as a solution, i will implement the same model in TensorFlow to run parallel on all 4 GPUs of g2x.large instance.