In [1]:
import os, sys
from keras import backend as K

%matplotlib inline

Using Theano backend.
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
homeDir = os.getcwd()
dataDir = homeDir + "/data/"
#dataDir = homeDir + "/data/sample/"
train_path = dataDir + "train/"
valid_path = dataDir + "valid/"
model_path = homeDir + "/data/models/"
test_path = dataDir + "test"
results_path = dataDir + "/results"

In [3]:
from utils import *
from Vgg16 import Vgg16
from vgg16bn import Vgg16BN

Intializing the model and fitting it just to get used to cats dogs.

In [7]:
vgg = Vgg16BN()

In [8]:
#Set constants. You can experiment with no_of_epochs to improve the model. You can reduce the batch_size 
#depending on the memory contraints of gpu
batch_size=64

In [9]:
# get_batches function transforms images into arrays and the gets them in batches.
batches = vgg.get_batches(train_path, batch_size=batch_size)
val_batches = vgg.get_batches(valid_path, batch_size=batch_size)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [10]:
vgg.model.pop() #Remove the final layer
for layer in vgg.model.layers:
    layer.trainable=False # Set all other layers to untrainable

In [11]:
vgg.model.add(Dense(2, activation='softmax')) 
#Adding a new dense layer wiht only 2 outputs and softmax acitvation as it is the output layer

In [12]:
vgg.compile() #using the default compiler
#vgg.model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
vgg.fit(batches, val_batches, nb_epoch=2)

Epoch 1/2
Epoch 2/2


In [13]:
vgg.model.save_weights(model_path+'vggbnStarter.h5')

We observe that we are massively underfitting the model.

Seperating out the conv layers and dense layers. THe idea is that most of the conv filters work for our problem. We need only to train the dense features for cats_dogs dataset.

In [14]:
model = vgg.model
layers = model.layers
lastConvIdx = [index for index,layer in enumerate(layers) if type(layer) is Convolution2D][-1]
convLayers = layers[:lastConvIdx+1]
convModel = Sequential(convLayers)
fcLayers = layers[lastConvIdx+1:]

In [15]:
batches = get_batches(train_path, shuffle=False, batch_size = batch_size)
val_batches = get_batches(valid_path, shuffle=False, batch_size=batch_size)
val_classes = val_batches.classes
trn_classes = batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [None]:
# Not needed everytime if we already have these features saved up.
train_features = convModel.predict_generator(batches,batches.nb_sample)
val_features = convModel.predict_generator(val_batches, val_batches.nb_sample)

In [None]:
save_array(model_path + 'train_convlayer_features.bc', train_features)
save_array(model_path + 'valid_convlayer_features.bc', val_features)

In [16]:
trn_features = load_array(model_path+'train_convlayer_features.bc')
val_features = load_array(model_path+'valid_convlayer_features.bc')

In [17]:
def new_weights(layer, prev_p, new_p):
    scal = (1-prev_p)/(1-new_p)
    return [o*scal for o in layer.get_weights()]
opt = RMSprop(lr=0.00001)
def get_fc_model(p):
    model = Sequential([
        MaxPooling2D(input_shape = convLayers[-1].output_shape[1:]),
        Flatten(),
        Dense(4096,activation="relu"),
        BatchNormalization(),
        Dropout(p),        
        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(2, activation='softmax')
    ])
    for l1,l2 in zip(model.layers, fcLayers): l1.set_weights(new_weights(l2,0.5,p))

    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [18]:
fc_model = get_fc_model(0.5)

In [19]:
fc_model.fit(trn_features, trn_labels, nb_epoch=3, 
             batch_size=batch_size, validation_data=(val_features, val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f02e22e2d10>

Looks like we have overshooted, lets decrease the learning rate.

In [20]:
opt = RMSprop(lr=0.00001)
fc_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [21]:
fc_model.fit(trn_features, trn_labels, nb_epoch=3, 
             batch_size=batch_size, validation_data=(val_features, val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f02db90b790>

Learning rate looks to be cool, let us try few more epochs

In [22]:
fc_model.fit(trn_features, trn_labels, nb_epoch=3, 
             batch_size=batch_size, validation_data=(val_features, val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f02db90b990>

### Psuedo labelling

In [23]:
test_path = dataDir + "/test"
test_batches = get_batches(test_path,shuffle=False, batch_size = batch_size)

Found 12500 images belonging to 1 classes.


In [24]:
#get the test set features first
test_features = convModel.predict_generator(test_batches,test_batches.nb_sample)

In [25]:
save_array(results_path + 'test_features.dat', test_features)

In [26]:
test_features = load_array(results_path + 'test_features.dat')

In [27]:
final_model = Sequential(convLayers)
for layer in final_model.layers:
    layer.trainable = False
    
for layer in fc_model.layers:
    layer.called_with = None
    final_model.add(layer)

Let us fit it once before trying wiht pseudo labels.

In [28]:
opt = RMSprop(lr=0.00001, rho=0.7)
final_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

THe learning rate is very low, hence running two more epochs

In [30]:
opt = RMSprop(lr=0.0001, rho=0.7)
final_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [33]:
final_model.load_weights(model_path+'vggbnStarter.h5')

In [35]:
test_labels = final_model.predict_generator(test_batches,test_batches.nb_sample)

In [37]:
combined_features = np.concatenate([trn_features,test_features])
combined_labels = np.concatenate([trn_labels,test_labels])

In [38]:
model = final_model
layers = model.layers
lastConvIdx = [index for index,layer in enumerate(layers) if type(layer) is Convolution2D][-1]
convLayers = layers[:lastConvIdx+1]
convModel = Sequential(convLayers)
fcLayers = layers[lastConvIdx+1:]

In [39]:
fc_model = get_fc_model(0.5)

In [41]:
fc_model.fit(combined_features, combined_labels, nb_epoch=1, 
             batch_size=batch_size, validation_data=(val_features, val_labels))

Train on 35500 samples, validate on 2000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f02d0081790>

In [59]:
opt = Adam(lr = 0.00001)
final_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [60]:
fc_model.fit(combined_features, combined_labels, nb_epoch=10, 
             batch_size=batch_size, validation_data=(val_features, val_labels))

Train on 35500 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

KeyboardInterrupt: 

## Predictions

In [None]:
test_path = dataDir + "/test"
test_batches = get_batches(test_path,shuffle=False, batch_size = batch_size)

In [None]:
preds = convModel.predict_generator(test_batches, test_batches.nb_sample)

In [None]:
filenames = test_batches.filenames
isdog = preds[:,1]
isdog = isdog.clip(min=0.025, max=0.975)
ids = np.array([int(f[8:f.find('.')]) for f in filenames])
subm = np.stack([ids,isdog], axis=1)
subm[:5]
%cd $dataDir
submission_file_name = 'submission_ft_denseLayers_droupout6.csv'
np.savetxt(submission_file_name, subm, fmt='%d,%.5f', header='id,label', comments='')
from IPython.display import FileLink
%cd ../
FileLink('data/'+submission_file_name)