In [53]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers.core import Flatten, Dense, Lambda, Dropout
from keras.layers.pooling import MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator


import numpy as np

## Load MNIST dataset and add the extra channel dimension

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.pkl.gz


In [3]:
x_train.shape

(60000, 28, 28)

In [4]:
x_train = np.expand_dims(x_train, 1)
x_test = np.expand_dims(x_test, 1)

In [5]:
x_train.shape

(60000, 1, 28, 28)

In [6]:
x_test.shape

(10000, 1, 28, 28)

## Convert to One Hot encoding

Noticing that the current label data is encoding using a digit to represent each number, we convert to onehot encoding.

In [7]:
from keras.utils.np_utils import to_categorical

In [8]:
y_train[:5]

array([5, 0, 4, 1, 9], dtype=uint8)

In [9]:
y_train = to_categorical(y_train)

In [10]:
y_train[:5]

array([[ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])

In [11]:
y_test = to_categorical(y_test)

## Normalise inputs

Since we should always normalise our inputs for any machine learning tasks, we're going to find the mean of the pixel values and subtract that from each pixel, then divide by the standard deviation.

In [12]:
mean_px = x_train.mean().astype(np.float32)
std_px = x_train.std().astype(np.float32)

def norm_input(x):
    return (x - mean_px) / std_px

## Linear Model

Create a linear model that normalises the input (which has a shape of ``(1, 28, 28)``), flattens it, then applies a dense layer with 10 outputs using a softmax activation function. It then compiles it using the Adam optimiser with categorical_crossentropy as the loss function.

In [13]:
def linear_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1, 28, 28)),
        Flatten(),
        Dense(10, activation='softmax')
    ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [139]:
model = linear_model()

Now we create batches of training images and validation sets and fit to the model using ``fit_generator``, starting with batch sizes of 64. The default format for Keras is ``channels_list`` which expects channel data to be last ``(60000, 28, 28, 1)`` vs channel first: ``(60000, 1, 28, 28)``.

In [22]:
gen = ImageDataGenerator()
training_batches = gen.flow(x_train, y_train, batch_size=64)
test_batches = gen.flow(x_test, y_test, batch_size=64)

In [21]:
model.fit_generator(
    training_batches,
    steps_per_epoch=training_batches.n,
    epochs=1, 
    validation_data=test_batches,
    validation_steps=test_batches.n)

NameError: name 'model' is not defined

In [84]:
# Change learning rate for optimiser
model.optimizer_lr = 0.1
model.fit_generator(
    training_batches,
    steps_per_epoch=training_batches.n,
    epochs=1, 
    validation_data=test_batches,
    validation_steps=test_batches.n)

Epoch 1/1


<keras.callbacks.History at 0x124691cc0>

In [85]:
model.optimizer_lr = 0.01
model.fit_generator(
    training_batches,
    steps_per_epoch=training_batches.n,
    epochs=4, 
    validation_data=test_batches,
    validation_steps=test_batches.n)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x124691ac8>

## Add a single dense layer

We're going to add a single Dense layer (not sure why still - need to revisit).

In [86]:
def get_fc_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1, 28, 28)),
        Flatten(),
        Dense(512, activation='softmax'),
        Dense(10, activation='softmax')
    ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [87]:
fc_model = get_fc_model()

In [88]:
fc_model.fit_generator(
    training_batches,
    steps_per_epoch=training_batches.n,
    epochs=1, 
    validation_data=test_batches,
    validation_steps=test_batches.n)

Epoch 1/1


<keras.callbacks.History at 0x116f46978>

In [None]:
fc_model.optimizer.lr = 0.1

In [None]:
fc_model.fit_generator(
    training_batches,
    steps_per_epoch=training_batches.n,
    epochs=1, 
    validation_data=test_batches,
    validation_steps=test_batches.n)

In [None]:
fc_model.optimizer.lr = 0.01

In [None]:
fc_model.fit_generator(
    training_batches,
    steps_per_epoch=training_batches.n,
    epochs=1, 
    validation_data=test_batches,
    validation_steps=test_batches.n)

## Upgrade model to be a VGG-style CNN

Basically, adding 2 convulutional layers, with 32 filters, a kernel size of 3 and 3 strides with a relu activation, then a max pooling layer, followed by 2 more conolutional layers with 64 filters and a max pooling layer.

In [17]:
def get_vgg_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1, 28, 28)),
        Convolution2D(32, 3, 3, activation='relu'),
        Convolution2D(32, 3, 3, activation='relu'),
        MaxPooling2D(),
        Convolution2D(64, 3, 3, activation='relu'),
        Convolution2D(64, 3, 3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
    ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [18]:
vgg_style_model = get_vgg_model()

In [26]:
vgg_style_model.fit_generator(
    training_batches,
    samples_per_epoch=training_batches.N,
    nb_epoch=1, 
    validation_data=test_batches,
    nb_val_samples=test_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x10d6658d0>

In [28]:
vgg_style_model.optimizer.lr=0.1

In [29]:
vgg_style_model.fit_generator(
    training_batches,
    samples_per_epoch=training_batches.N,
    nb_epoch=1, 
    validation_data=test_batches,
    nb_val_samples=test_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x10d16fcf8>

## Adding data augmentation

Use the same VGG-style model, but this time the image data generator is passed the following params:

  * rotation_range = 8
  * width_shift_range = 0.08
  * shear_range = 0.3
  * height_shift_range = 0.08
  * zoom_range = 0.08

In [None]:
aug_gen = ImageDataGenerator(
    rotation_range=8, width_shift_range=0.08,
    shear_range=0.3, height_shift_range=0.08,
    zoom_range=0.08)
gen = ImageDataGenerator()

training_batches = aug_gen.flow(x_train, y_train, batch_size=64)
test_batches = gen.flow(x_test, y_test, batch_size=64)

In [41]:
vgg_style_model = get_model()

In [44]:
vgg_style_model.fit_generator(
    training_batches,
    samples_per_epoch=training_batches.N,
    nb_epoch=1, 
    validation_data=test_batches,
    nb_val_samples=test_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x108b26860>

## Adding batch normalisation

Here we add batch normalisation after every convolutional layer or convolution + max pool layer. We use the first axis until it's flatten, after which the defaults are used.

In [49]:
def get_model_bn():
    model = Sequential([
        Lambda(norm_input, input_shape=(1, 28, 28)),
        Convolution2D(32, 3, 3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32, 3, 3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64, 3, 3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64, 3, 3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax') 
    ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [50]:
bn_model = get_model_bn()

In [51]:
bn_model.fit_generator(
    training_batches,
    training_batches.N, nb_epoch=1, 
    validation_data=test_batches,
    nb_val_samples=test_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x10ec09550>

## Batchnorm, dropout and data augmentation

Going to add some dropout after the final batch normalisation set, attempting to reduce overfitting (though it doesn't seem like we're overfitting much).

In [54]:
def get_model_bn_do():
    model = Sequential([
        Lambda(norm_input, input_shape=(1, 28, 28)),
        Convolution2D(32, 3, 3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32, 3, 3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64, 3, 3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64, 3, 3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax') 
    ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [55]:
bn_do_model = get_model_bn_do()

In [57]:
bn_do_model.fit_generator(
    training_batches,
    training_batches.N, nb_epoch=1, 
    validation_data=test_batches,
    nb_val_samples=test_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x1139c0748>

## Ensembling

Train a model 6 times and make predictions on all 6. Then average the predictions to return a final score.

In [60]:
def fit_model():
    model = get_model_bn_do()
    model.fit_generator(training_batches, training_batches.N, nb_epoch=1, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.1
    model.fit_generator(training_batches, training_batches.N, nb_epoch=4, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.01
    model.fit_generator(training_batches, training_batches.N, nb_epoch=12, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.001
    model.fit_generator(training_batches, training_batches.N, nb_epoch=18, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    return model

In [None]:
models = [fit_model() for i in range(6)]