In [1]:
from theano.sandbox import cuda
cuda.use('gpu2')

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

Using Theano backend.


# Setup

In [3]:
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
x_train = np.expand_dims(x_train, 1)
x_test = np.expand_dims(x_test, 1)

In [5]:
y_train = onehot(y_train)
y_test = onehot(y_test)

In [6]:
mean_x = x_train.mean().astype(np.float32)
std_x = x_train.std().astype(np.float32)

In [7]:
# method to normalise the input 
def normalise(x): return (x-mean_x)/std_x

# Linear Model

In [10]:
x_train.shape

(60000, 1, 28, 28)

In [11]:
def get_lin_model():
    model = Sequential([
             Lambda(normalise, input_shape=x_train.shape[1:]),
             Flatten(),
             Dense(10, activation='softmax')
    ])
    model.compile(Adam(), 'categorical_crossentropy', ['accuracy'])
    return model

In [12]:
lm = get_lin_model()

In [13]:
#Generate batches from image data
gen = image.ImageDataGenerator()
batches = gen.flow(x_train, y_train, batch_size=64)
val_batches = gen.flow(x_test, y_test, batch_size=64)

In [14]:
#Run the model for 1 epoch using the default learning rate
lm.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7f364e954210>

In [15]:
lm.optimizer.lr = 0.1

In [16]:
lm.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7f364e8b8450>

In [17]:
lm.optimizer.lr = 0.01

In [18]:
lm.fit_generator(batches, batches.N, nb_epoch=4, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f364e8b8a90>

# Single Dense Layer

In [32]:
def get_fc_model():
    model = Sequential([
            Lambda(normalise, input_shape=x_train.shape[1:]),
            Flatten(),
            Dense(512, activation='softmax'),
            Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [33]:
fc = get_fc_model()

In [34]:
fc.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7f364516b7d0>

In [35]:
fc.optimizer.lr=0.1

In [36]:
fc.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7f3644f84c90>

In [37]:
fc.optimizer.lr=0.01

In [38]:
fc.fit_generator(batches, batches.N, nb_epoch=4, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f3644f84f50>

# Basic VGG-style CNN

In [8]:
def get_model():
    model = Sequential([
            Lambda(normalise, input_shape=x_train.shape[1:]),
            Convolution2D(32, 3, 3, activation='relu'),
            Convolution2D(32, 3, 3, activation='relu'),
            MaxPooling2D(),
            Convolution2D(64, 3, 3, activation='relu'),
            Convolution2D(64, 3, 3, activation='relu'),
            MaxPooling2D(),
            Flatten(),
            Dense(512, activation='relu'),
            Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [45]:
model = get_model()

In [46]:
model.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7f363a7d9410>

In [47]:
model.optimizer.lr=0.1

In [48]:
model.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7f363915cc50>

In [49]:
model.optimizer.lr=0.01

In [50]:
model.fit_generator(batches, batches.N, nb_epoch=8, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f363915c890>

# Reducing Overfitting

In [9]:
model = get_model()

In [15]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3, zoom_range=0.08, height_shift_range=0.08)
batches = gen.flow(x_train, y_train, batch_size=64)
val_batches = gen.flow(x_test, y_test, batch_size=64)

In [16]:
model.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7f447a17a9d0>

In [17]:
model.optimizer.lr = 0.1

In [18]:
model.fit_generator(batches, batches.N, nb_epoch=4, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f446d2e6890>

In [19]:
model.optimizer.lr = 0.01

In [20]:
model.fit_generator(batches, batches.N, nb_epoch=8, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f446d2e6790>

In [21]:
model.optimizer.lr = 0.001

In [22]:
model.fit_generator(batches, batches.N, nb_epoch=14, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


<keras.callbacks.History at 0x7f447a17aa10>

In [23]:
model.optimizer.lr = 0.0001

In [24]:
model.fit_generator(batches, batches.N, nb_epoch=10, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f446d2e6690>

# Batchnorm + data Augmentation

In [25]:
def get_model_bn():
    model = Sequential([
            Lambda(normalise, input_shape=x_train.shape[1:]),
            Convolution2D(32, 3, 3, activation='relu'),
            BatchNormalization(axis=1),
            Convolution2D(32, 3, 3, activation='relu'),
            MaxPooling2D(),
            BatchNormalization(axis=1),
            Convolution2D(64, 3, 3, activation='relu'),
            BatchNormalization(axis=1),
            Convolution2D(64, 3, 3, activation='relu'),
            MaxPooling2D(),
            Flatten(), 
            BatchNormalization(),
            Dense(512, activation='relu'),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])
    
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return  model

In [26]:
model = get_model_bn()

In [27]:
model.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7f444b28fd90>

In [28]:
model.optimizer.lr = 0.1

In [29]:
model.fit_generator(batches, batches.N, nb_epoch=4, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f444b2897d0>

In [30]:
model.optimizer.lr = 0.01

In [31]:
model.fit_generator(batches, batches.N, nb_epoch=12, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f444b289350>

In [32]:
model.optimizer.lr = 0.001

In [33]:
model.fit_generator(batches, batches.N, nb_epoch=12, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f447cabfb50>

# Batchnorm + dropout + dataaugmentation

In [34]:
def get_model_bn_do():
    model = Sequential([
            Lambda(normalise, input_shape=x_train.shape[1:]),
            Convolution2D(32, 3, 3, activation='relu'),
            BatchNormalization(axis=1),
            Convolution2D(32, 3, 3, activation='relu'),
            MaxPooling2D(),
            BatchNormalization(axis=1),
            Convolution2D(64, 3, 3, activation='relu'),
            BatchNormalization(axis=1),
            Convolution2D(64, 3, 3, activation='relu'),
            MaxPooling2D(),
            Flatten(), 
            BatchNormalization(),
            Dense(512, activation='relu'),
            BatchNormalization(),
            Dropout(0.5),
            Dense(10, activation='softmax')
        ])
    
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return  model

In [35]:
model = get_model_bn_do()

In [36]:
model.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/1


<keras.callbacks.History at 0x7f44423702d0>

In [37]:
model.optimizer.lr = 0.1

In [38]:
model.fit_generator(batches, batches.N, nb_epoch=4, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f4442370590>

In [39]:
model.optimizer.lr = 0.01

In [40]:
model.fit_generator(batches, batches.N, nb_epoch=12, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f4448ce3750>

In [41]:
model.optimizer.lr = 0.001

In [42]:
model.fit_generator(batches, batches.N, nb_epoch=12, validation_data=val_batches, nb_val_samples=val_batches.N)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f4448ceb810>

# Ensemble

In [43]:
def fit_model():
    model = get_model_bn_do()
    model.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, nb_val_samples=val_batches.N)
    model.optimizer.lr = 0.1
    model.fit_generator(batches, batches.N, nb_epoch=4, validation_data=val_batches, nb_val_samples=val_batches.N)
    model.optimizer.lr = 0.01
    model.fit_generator(batches, batches.N, nb_epoch=12, validation_data=val_batches, nb_val_samples=val_batches.N)
    model.optimizer.lr = 0.001
    model.fit_generator(batches, batches.N, nb_epoch=18, validation_data=val_batches, nb_val_samples=val_batches.N)
    return model

In [45]:
models = [fit_model() for i in range(6)]

Epoch 1/1
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Epoch 1/18
Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18
Epoch 1/1
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Epoch 1/18
Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18
Epoch 1/1
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Epoch 1/18
Epoch 2/18
Epoch 3/1

In [46]:
path = 'data/mnsit/'
model_path = path + 'model/'

In [47]:
for i, m in enumerate(models):
    m.save_weights(model_path+'mnsit_ensemble'+str(i)+'.pkl')

In [48]:
evals = [m.evaluate(x_test, y_test, batch_size=256) for m in models]



In [49]:
evals = np.array(evals)

In [50]:
evals.mean(axis=0)

array([ 0.0139,  0.9957])

In [51]:
all_preds = [m.predict(x_test, batch_size=256) for m in models]

In [52]:
all_preds = np.stack(all_preds)

In [53]:
avg_preds = all_preds.mean(axis=0)

In [54]:
keras.metrics.categorical_accuracy(y_test, avg_preds).eval()

array(0.996999979019165, dtype=float32)