In [21]:
from __future__ import division, print_function

import os, json
from glob import glob
import numpy as np
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom

from keras import backend as K
from keras.layers.normalization import BatchNormalization
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import MaxPooling2D, ZeroPadding2D, Conv2D
from keras.layers.pooling import GlobalAveragePooling2D
from keras.optimizers import SGD, RMSprop, Adam
from keras.preprocessing import image
from keras.models import model_from_json
from keras.utils.np_utils import to_categorical

In [2]:
# path = 'data/sample/'
path = 'data/'

In [3]:
vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32)
vgg_mean.shape

(3,)

In [4]:
def vgg_preprocess(x):
    """
        Subtracts the mean RGB value, and transposes RGB to BGR.
        The mean RGB was computed on the image set used to train the VGG model.
        Args: 
            x: Image array (height x width x channels)
        Returns:
            Image array (height x width x transposed_channels)
    """
    vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32)
    x = x - vgg_mean
    return x[:, ::-1] # reverse axis rgb->bgr

In [5]:
model = Sequential()
model.add(Lambda(vgg_preprocess, input_shape = (224, 224, 3), output_shape = (224, 224, 3)))

model.add(Conv2D(64, (3, 3), padding = 'same', activation = 'relu'))
model.add(Conv2D(64, (3, 3), padding = 'same', activation = 'relu'))
model.add(MaxPooling2D((2, 2), strides = (2, 2)))

model.add(Conv2D(128, (3, 3), padding = 'same', activation = 'relu'))
model.add(Conv2D(128, (3, 3), padding = 'same', activation = 'relu'))
model.add(MaxPooling2D((2, 2), strides = (2, 2)))

model.add(Conv2D(256, (3, 3), padding = 'same', activation = 'relu'))
model.add(Conv2D(256, (3, 3), padding = 'same', activation = 'relu'))
model.add(Conv2D(256, (3, 3), padding = 'same', activation = 'relu'))
model.add(MaxPooling2D((2, 2), strides = (2, 2)))

model.add(Conv2D(512, (3, 3), padding = 'same', activation = 'relu'))
model.add(Conv2D(512, (3, 3), padding = 'same', activation = 'relu'))
model.add(Conv2D(512, (3, 3), padding = 'same', activation = 'relu'))
model.add(MaxPooling2D((2, 2), strides = (2, 2)))

model.add(Conv2D(512, (3, 3), padding = 'same', activation = 'relu'))
model.add(Conv2D(512, (3, 3), padding = 'same', activation = 'relu'))
model.add(Conv2D(512, (3, 3), padding = 'same', activation = 'relu'))
model.add(MaxPooling2D((2, 2), strides = (2, 2)))

model.add(Flatten())
model.add(Dense(4096, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(1000, activation = 'softmax'))

model.compile(optimizer = Adam(lr = 0.001), loss = 'categorical_crossentropy', metrics = ['accuracy'])

fname = 'weights/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
model.load_weights(fname)

In [6]:
model.pop()
for layer in model.layers[:20]: layer.trainable = False

In [7]:
model.add(Dense(2, activation = 'softmax'))
model.compile(optimizer = Adam(lr = 0.001), loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [8]:
batch_size = 8

In [9]:
datagen = image.ImageDataGenerator()
trn_batches = datagen.flow_from_directory(path + 'sample/train/', target_size = (224, 224),
            class_mode = 'categorical', shuffle = True, batch_size = batch_size)

val_batches = datagen.flow_from_directory(path + 'sample/valid/', target_size = (224, 224),
            class_mode = 'categorical', shuffle = True, batch_size = batch_size)

Found 1124 images belonging to 2 classes.
Found 124 images belonging to 2 classes.


In [10]:
model.fit_generator(trn_batches, steps_per_epoch = trn_batches.n / 8, epochs = 1, validation_data = val_batches, 
                    validation_steps = val_batches.n / 8)

Epoch 1/1


KeyboardInterrupt: 

In [11]:
layers = model.layers

In [14]:
last_conv_idx = [index for index,layer in enumerate(layers) 
                     if type(layer) is Conv2D][-1]

In [15]:
last_conv_idx

17

In [16]:
layers[last_conv_idx]

<keras.layers.convolutional.Conv2D at 0x11c1f9d30>

In [17]:
conv_layers = layers[:last_conv_idx+1]
conv_model = Sequential(conv_layers)

# Dense layers - also known as fully connected or 'FC' layers
fc_layers = layers[last_conv_idx+1:]

In [19]:
val_classes = val_batches.classes
trn_classes = trn_batches.classes

In [20]:
val_classes, trn_classes

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32),
 array([0, 0, 0, ..., 1, 1, 1], dtype=int32))

In [22]:
val_labels = to_categorical(val_classes)
trn_labels = to_categorical(trn_classes)

In [23]:
val_labels

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.

In [24]:
trn_labels

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]])

In [26]:
trn_batches.class_indices, val_batches.class_indices

({'cats': 0, 'dogs': 1}, {'cats': 0, 'dogs': 1})

In [27]:
val_features = conv_model.predict_generator(val_batches, val_batches.n)

In [None]:
trn_features = conv_model.predict_generator(trn_batches, trn_batches.n)

In [None]:
def save_array(fname, arr):
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()


def load_array(fname):
    return bcolz.open(fname)[:]

In [None]:
save_array('weights/train_convlayer_features.bc', trn_features)
save_array('weights/valid_convlayer_features.bc', val_features)

In [None]:
trn_features = load_array(model_path+'train_convlayer_features.bc')
val_features = load_array(model_path+'valid_convlayer_features.bc')

In [None]:
trn_features.shape

In [None]:
def get_fc_model():
    model = Sequential([
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dense(1024, activation='relu'),
        Dropout(0.),
        Dense(1024, activation='relu'),
        Dropout(0.),
        Dense(2, activation='softmax')
        ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
fc_model = get_fc_model()

In [None]:
fc_model.fit(trn_features, trn_labels, nb_epoch=8, 
             batch_size=batch_size, validation_data=(val_features, val_labels))