In [2]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense, Lambda
from keras import backend as K
from keras.optimizers import SGD
from keras.models import Model
import numpy as np
import theano, os, h5py
import os

Using Theano backend.


### Configure Settings

In [3]:
K.set_image_dim_ordering('th')

# Paths to image data
training_data_dir = "../../data/train"
validation_data_dir = "../../data/validation"

# Paths to network weights
vgg16_weights_path = '../vgg16_weights.h5' # Not in GitHub, as it's too large
vgg16_new_top_layers_weights_path = '../vgg16_new_top_layers_weights.h5'

# Training parameters
img_width, img_height = 224, 224
number_of_training_samples = 23000
number_of_validation_samples = 2000
number_of_epochs = 20
batch_size = 64

### The VGG16 ConvNet

Define the VGG16 architecture to use (from https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3), without the final dense layers.

In [4]:
def build_vgg16():
    model = Sequential()
    model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))

    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
  
    # Final fully connected layers omitted, so we can use our own instead
    # model.add(Flatten())
    # model.add(Dense(4096, activation='relu'))
    # model.add(Dropout(0.5))
    # model.add(Dense(4096, activation='relu'))
    # model.add(Dropout(0.5))
    # model.add(Dense(1000, activation='softmax'))
    
    return model

The downloaded weights of the VGG16 network are for the full model, but we're only interested in the convolutional and pooling layers. So only load the weights for those layers.

In [5]:
model = build_vgg16()

# Load the weights only up until the fully connected layers 
# (https://gist.github.com/fchollet/f35fbc80e066a49d65f1688a7e99f069)
f = h5py.File(vgg16_weights_path)
for k in range(f.attrs['nb_layers']):
    if k >= len(model.layers):
        # we don't look at the last (fully-connected) layers in the savefile
        break
    g = f['layer_{}'.format(k)]
    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
    model.layers[k].set_weights(weights)
f.close()

Next, we get the results from this partial VGG16 model for our training and validation data. These results can then be used in turn as the our new training and validation data for the next stage.

Getting these results is slow, so they're saved when finished. 

In [None]:
training_datagen = ImageDataGenerator(rescale=1./255)
generator = training_datagen.flow_from_directory(
        training_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
vgg16_features_train = model.predict_generator(generator, number_of_training_samples)
np.save(open('vgg16_features_train.npy', 'wb'), vgg16_features_train)

In [33]:
validation_datagen = ImageDataGenerator(rescale=1./255)
generator = validation_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
vgg16_features_validation = model.predict_generator(generator, number_of_validation_samples)
np.save(open('vgg16_features_validation.npy', 'wb'), vgg16_features_validation)

Found 2000 images belonging to 2 classes.


Now we create our own model, consisting only of fully connected final layers to classify the images into just 'dog' or 'cat' (not the 1000 different classes VGG16 used for the ImageNet challenge.)

Training and validation data for this comes from the partial VGG16 results. The relevant labels can be generated by the fact the images were read in order (shuffle=False), and so we know the first half will all cats and the second half all dogs.

Training here is relatively fast, because the final layers in isolation are very simple.

In [6]:
def build_top_layers_model(): 
    
    # Load the saved VGG results
    train_data = np.load(open('vgg16_features_train.npy', 'rb'))
    validation_data = np.load(open('vgg16_features_validation.npy', 'rb'))    
    
    # Generate labels where the first half are all cats, the second half all dogs.
    train_labels = np.array([0] * (number_of_training_samples / 2) + [1] * (number_of_training_samples / 2))
    validation_labels = np.array([0] * (number_of_validation_samples / 2) + [1] * (number_of_validation_samples / 2))

    # Build the model
    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

    model.fit(train_data, train_labels,
              nb_epoch=number_of_epochs, 
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels))

    model.save_weights(vgg16_new_top_layers_weights_path)
    
build_top_layers_model()    


Train on 23000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
