In [2]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense, Lambda
from keras import backend as K
from keras.optimizers import SGD
from keras import optimizers
from keras.models import Model
import numpy as np
import theano, os, h5py
import os

Using Theano backend.


### Configure Settings

In [3]:
K.set_image_dim_ordering('th')

# Paths to image data
training_data_dir = "../../data/train"
validation_data_dir = "../../data/validation"

# Paths to network weights
vgg16_weights_path = '../vgg16_weights.h5' # Not in GitHub, as it's too large
vgg16_new_top_layers_weights_path = '../vgg16_new_top_layers_weights.h5'
vgg16_fine_tune_weights_path = '../vgg_fine_tune.h5'

# Training parameters
img_width, img_height = 224, 224
number_of_training_samples = 23000
number_of_validation_samples = 2000
number_of_epochs = 20
batch_size = 32

### The VGG16 ConvNet

Define the VGG16 architecture to use (from https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3), without the final dense layers.

In [4]:
def build_vgg16():
    model = Sequential()
    model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))

    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
  
    # Final fully connected layers omitted, so we can use our own instead
    # model.add(Flatten())
    # model.add(Dense(4096, activation='relu'))
    # model.add(Dropout(0.5))
    # model.add(Dense(4096, activation='relu'))
    # model.add(Dropout(0.5))
    # model.add(Dense(1000, activation='softmax'))
    
    return model

The downloaded weights of the VGG16 network are for the full model, but we're only interested in the convolutional and pooling layers. So only load the weights for those layers.

In [5]:
model = build_vgg16()

# Load the weights only up until the fully connected layers 
# (https://gist.github.com/fchollet/f35fbc80e066a49d65f1688a7e99f069)
f = h5py.File(vgg16_weights_path)
for k in range(f.attrs['nb_layers']):
    if k >= len(model.layers):
        # we don't look at the last (fully-connected) layers in the savefile
        break
    g = f['layer_{}'.format(k)]
    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
    model.layers[k].set_weights(weights)
f.close()

Create the new top layers we want to use for the model. This needs to be trained first before being combined with the pre-trained VGG model.

As this model is the same model created and trained in the 'VGG16 new top layers' approach, we can solve this by just loading in those weights.

In [6]:
top_layers_model = Sequential()
top_layers_model.add(Flatten(input_shape=model.output_shape[1:]))
top_layers_model.add(Dense(256, activation='relu'))
top_layers_model.add(Dropout(0.5))
top_layers_model.add(Dense(1, activation='sigmoid'))

top_layers_model.load_weights(vgg16_new_top_layers_weights_path)

Combine our new final layers into the VGG16 model

In [7]:
model.add(top_layers_model)

By fine-tuning, we want to adjust the existing weights based on the extra information gained from our training data.

However, the lower level layers in a ConvNet represent very general, abstract features and shapes. These are not likely to change with a few thousand more images, given the existing weights were built from the millions of ImageNet images.

The higher up the layer, the more specific the feature it represents. In this case, the highest level layers may well benefit from our extra training data. 

To deal with this, we can 'freeze' the weights of the lower layers, while keeping the highest level layers still trainable. This has the side benefit of also being much faster.

In [8]:
lower_layers = model.layers[:26]
for layer in lower_layers:
    layer.trainable = False

Next, prepare the training and validation data.

In [11]:
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

training_data_generator = train_datagen.flow_from_directory(directory=training_data_dir, 
                                                            target_size=(img_width, img_height),
                                                            batch_size=batch_size, class_mode='binary')
validation_data_generator = val_datagen.flow_from_directory(directory=validation_data_dir, 
                                                            target_size=(img_width, img_height),
                                                            batch_size=batch_size, class_mode='binary')


Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


Finally, compile and run the fine-tune model. 

Using a slow learning rate is beneficial here, as the model is already pre-trained and the fine-tuning should only minimally adjust the weights. 

In [139]:
model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9), metrics=['accuracy'])

In [9]:
model.fit_generator(
        training_data_generator,
        samples_per_epoch=number_of_training_samples,
        nb_epoch=number_of_epochs,
        validation_data=validation_data_generator,
        nb_val_samples=number_of_validation_samples)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f699c6bae50>

In [10]:
model.save_weights(vgg16_fine_tune_weights_path)

## Visualization

To visualize how well the classifier did, lets look at the four possible outcomes.

- Predicted cat, was a cat
- Predicted cat, was a dog
- Predicted dog, was a cat
- Predicted dog, was a dog

To do this, we'll need to get an array of all our predicted labels and compare that against the actual labels.

In [9]:
model.load_weights(vgg16_fine_tune_weights_path)

In [174]:
# As the generator only provides the actual classes in directory order, we need to make
# sure the predicted classes also come out in the same order. Therefore, shuffle must be False
visualise_validation_data_generator = val_datagen.flow_from_directory(directory=validation_data_dir, 
                                                            target_size=(img_width, img_height),
                                                            batch_size=batch_size, class_mode='binary', 
                                                            shuffle=False)

# Get predictions for all validation data
prediction_probablities = model.predict_generator(generator=visualise_validation_data_generator, 
                                                  val_samples=number_of_validation_samples)

Found 2000 images belonging to 2 classes.


In [47]:
# Combine to single array
prediction_probablities_array = prediction_probablities[:,0]

# Round them to definite 1 or 0
predicted_labels = np.round(prediction_probablities_array).astype(np.int64)
actual_labels = visualise_validation_data_generator.classes.astype(np.int64)[:number_of_validation_samples]
image_filenames = visualise_validation_data_generator.filenames

Having got all the data needed, now we just make a couple of helper methods to use when displaying the images...

In [155]:
from IPython.core.display import display, HTML 

def make_html_img(path):
     return '<img src="{}" style="display:inline-block;margin:10px 1px; vertical-align:top; max-width:19%"/>'.format(path)

def show_random_images(image_indexes, limit=5):
    random_indexes = np.random.permutation(image_indexes)
    html = ""
    for index in random_indexes[:limit]:
        image_path = os.path.join(validation_data_dir, image_filenames[index])
        html += make_html_img(image_path)
    display(HTML(html))

### Predicted cat, was a cat

In [168]:
predicted_cat_correctly = np.where((predicted_labels==actual_labels) & (predicted_labels == 0))[0]
print "{} images".format(len(predicted_cat_correctly))
show_random_images(predicted_cat_correctly)

935 images


### Predicted cat, was a dog

In [180]:
predicted_cat_incorrectly = np.where((predicted_labels!=actual_labels) & (predicted_labels == 0))[0]
print "{} images".format(len(predicted_cat_incorrectly))
show_random_images(predicted_cat_incorrectly, 10)

19 images


### Predicted dog, was a dog

In [170]:
predicted_dog_correctly = np.where((predicted_labels==actual_labels) & (predicted_labels == 1))[0]
print "{} images".format(len(predicted_dog_correctly))
show_random_images(predicted_dog_correctly)

981 images


### Predicted dog, was a cat

In [173]:
predicted_dog_incorrectly = np.where((predicted_labels!=actual_labels) & (predicted_labels == 1))[0]
print "{} images".format(len(predicted_dog_incorrectly))
show_random_images(predicted_dog_incorrectly, 10)

65 images
