### Linear models with CNN features

In [None]:
from utils import *
%matplotlib inline
from __future__ import division,print_function
import os, json
from glob import glob
import numpy as np
import scipy
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
import utils; reload(utils)
from utils import plots, get_batches, plot_confusion_matrix, get_data

from numpy.random import random, permutation
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom

import keras
from keras import backend as K
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers import Input
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD, RMSprop
from keras.preprocessing import image


#### Linear models in keras

In [2]:
x = random((30,2))
x

array([[ 0.0757,  0.4879],
       [ 0.2688,  0.8152],
       [ 0.3069,  0.6867],
       [ 0.2403,  0.8383],
       [ 0.29  ,  0.7524],
       [ 0.7629,  0.7809],
       [ 0.5818,  0.6132],
       [ 0.4597,  0.4712],
       [ 0.1095,  0.7411],
       [ 0.8669,  0.9558],
       [ 0.8582,  0.943 ],
       [ 0.9304,  0.3402],
       [ 0.5427,  0.5043],
       [ 0.6805,  0.2243],
       [ 0.5752,  0.5126],
       [ 0.8578,  0.5433],
       [ 0.1836,  0.092 ],
       [ 0.8416,  0.0233],
       [ 0.9469,  0.2414],
       [ 0.9445,  0.8352],
       [ 0.4676,  0.8094],
       [ 0.1282,  0.0647],
       [ 0.3009,  0.6987],
       [ 0.4157,  0.3149],
       [ 0.5266,  0.3608],
       [ 0.9487,  0.8267],
       [ 0.5142,  0.6453],
       [ 0.826 ,  0.3038],
       [ 0.6361,  0.171 ],
       [ 0.05  ,  0.5461]])

In [3]:
y = np.dot(x,[2,3]) + 1
y

array([ 2.6151,  3.9833,  3.6738,  3.9955,  3.8374,  4.8685,  4.0032,  3.3329,  3.4422,  5.6012,
        5.5452,  3.8814,  3.5983,  3.0339,  3.6882,  4.3455,  1.6432,  2.7531,  3.6179,  5.3946,
        4.3636,  1.4507,  3.6978,  2.7762,  3.1358,  5.3775,  3.9641,  3.5633,  2.7853,  2.7383])

In [6]:
lm = Sequential([ Dense(1, input_shape=(2,)) ])
lm.compile(optimizer=SGD(lr=0.1), loss='mse')

In [7]:
# let's evaluate the model, note that weights aren't learnt yet, 
# this evaluation is based on the initial value of the weights
lm.evaluate(x, y, verbose=0)

10.496160507202148

In [8]:
# train the model for 5 epochs
lm.fit(x, y, nb_epoch=5, batch_size=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x16108240>

In [9]:
# let's evaluate again
lm.evaluate(x, y, verbose=0)

0.010440172627568245

In [10]:
# compare the weights with the ones we used while synthesizing the data (2, 3, 1)
lm.get_weights()

[array([[ 1.8346],
        [ 2.8365]], dtype=float32), array([ 1.2551], dtype=float32)]

####  Train a linear model on imagenet predictions

In [11]:
#path = "data/dogscats/sample/"
path = "data/dogscats/"
model_path = path + 'models/'
if not os.path.exists(model_path): os.mkdir(model_path)

In [12]:
batch_size = 16

In [13]:
from vgg16 import Vgg16
vgg = Vgg16()
model = vgg.model

1. Get the true label for each image
2. Get 1000 imagenet category prediction for each image
3. Feed these predictions to linear model

In [14]:
val_batches = get_batches(path+'valid', shuffle=False, batch_size=1)
train_batches = get_batches(path+'train', shuffle=False, batch_size=1)

Found 2000 images belonging to 2 classes.
Found 23000 images belonging to 2 classes.


In [15]:
import bcolz
def save_array(fname, arr): c=bcolz.carray(arr, rootdir=fname, mode='w'); c.flush()
def load_array(fname): return bcolz.open(fname)[:]

In [16]:
val_data = get_data(path+'valid')
train_data = get_data(path+'train')

Found 2000 images belonging to 2 classes.
Found 23000 images belonging to 2 classes.


In [None]:
#save_array(model_path+'train_data.bc', trn_data)
#save_array(model_path+'valid_data.bc', val_data)

#train_data = load_array(model_path+'train_data.bc')
#val_data = load_array(model_path+'valid_data.bc')

In [17]:
def onehot(x): return np.array(OneHotEncoder().fit_transform(x.reshape(-1,1)).todense())

In [18]:
val_classes = val_batches.classes
train_classes = train_batches.classes

val_labels = onehot(val_classes)
train_labels = onehot(train_classes)

In [19]:
train_classes[:4]

array([0, 0, 0, 0])

In [20]:
train_labels[:4]

array([[ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.]])

In [21]:
train_features = model.predict(train_data, batch_size=batch_size)
val_features = model.predict(val_data, batch_size=batch_size)

In [None]:
# save_array(model_path+'train_lastlayer_features.bc', trn_features)
# save_array(model_path+'valid_lastlayer_features.bc', val_features)

# trn_features = load_array(model_path+'train_lastlayer_features.bc')
# val_features = load_array(model_path+'valid_lastlayer_features.bc')

In [22]:
# 1000 features, 2 outputs
lm = Sequential([ Dense(2,activation='softmax', input_shape=(1000,)) ])
lm.compile(optimizer=RMSprop(lr=0.1), loss='categorical_crossentropy', metrics=['accuracy'])

In [23]:
lm.fit(train_features, train_labels, nb_epoch=5, batch_size=batch_size, validation_data=(val_features, val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0xd1ab1048>

In [24]:
lm.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_6 (Dense)                  (None, 2)             2002        dense_input_3[0][0]              
Total params: 2,002
Trainable params: 2,002
Non-trainable params: 0
____________________________________________________________________________________________________


####  Retrain last layer of vgg16

In [25]:
# pop the last layer and disable re-training all other layers
model.pop()
for layer in model.layers:  layer.trainable = False
batch_size = 4

In [26]:
# add the new layer
model.add(Dense(2, activation='softmax'))

In [27]:
gen=image.ImageDataGenerator()
train_batches = gen.flow(train_data, train_labels, batch_size=batch_size, shuffle=True)
val_batches = gen.flow(val_data, val_labels, batch_size=batch_size, shuffle=False)

In [28]:
def fit_model(model, batches, val_batches, nb_epoch=1):
    model.fit_generator(batches, samples_per_epoch=batches.n, nb_epoch=nb_epoch, 
                        validation_data=val_batches, nb_val_samples=val_batches.n)

In [29]:
opt = RMSprop(lr=0.1)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [30]:
fit_model(model, train_batches, val_batches, nb_epoch=2)

Epoch 1/2
Epoch 2/2


#### Re-training multiple layers

In [31]:
#let's skip conv layers
layers = model.layers
# get the index of first dense layer and set this and subsquent layer to be trainable
first_dense_idx = [index for index, layer in enumerate(layers) if type(layer) is Dense][0]
for layer in layers[first_dense_idx:] : layer.trainable = True

In [32]:
K.set_value(opt.lr, 0.01)
fit_model(model, train_batches, val_batches, 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [34]:
# let's re-train some of the conv layers as well
for layer in layers[12:]: layer.trainable=True
# decrease the learning rate further
K.set_value(opt.lr, 0.001)
fit_model(model, train_batches, val_batches, 4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
