In [1]:
from __future__ import division, print_function
%matplotlib inline
from importlib import reload  # Python 3
import utils; reload(utils)
from utils import *

Using cuDNN version 6021 on context None
Mapped name None to device cuda0: GeForce GTX TITAN X (0000:04:00.0)
Using Theano backend.


## Setup

In [2]:
batch_size=64

In [3]:
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [4]:
X_test = np.expand_dims(X_test,1)
X_train = np.expand_dims(X_train,1)

In [5]:
X_train.shape

(60000, 1, 28, 28)

In [6]:
y_train[:5]

array([5, 0, 4, 1, 9], dtype=uint8)

In [7]:
y_train = onehot(y_train)
y_test = onehot(y_test)

In [8]:
y_train[:5]

array([[ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])

In [9]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)

In [10]:
def norm_input(x): return (x-mean_px)/std_px

## Linear model

In [11]:
def get_lin_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [12]:
lm = get_lin_model()

  .format(self.name, input_shape))


In [13]:
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=batch_size)
test_batches = gen.flow(X_test, y_test, batch_size=batch_size)
steps_per_epoch = int(np.ceil(batches.n/batch_size))
validation_steps = int(np.ceil(test_batches.n/batch_size))

In [14]:
lm.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=1, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x7f97bfab98d0>

In [15]:
lm.optimizer.lr=0.1

In [16]:
lm.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=1, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x7f97bf207cf8>

In [17]:
lm.optimizer.lr=0.01

In [18]:
lm.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=4, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f97bf2165f8>

## Single dense layer

In [19]:
def get_fc_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(512, activation='softmax'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [20]:
fc = get_fc_model()

  .format(self.name, input_shape))


In [21]:
fc.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=1, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x7f97fff3e240>

In [22]:
fc.optimizer.lr=0.1

In [23]:
fc.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=4, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f97be2d9908>

In [24]:
fc.optimizer.lr=0.01

In [25]:
fc.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=4, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f97be33dcf8>

## Basic 'VGG-style' CNN

In [26]:
def get_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Conv2D(32,(3,3), activation='relu'),
        Conv2D(32,(3,3), activation='relu'),
        MaxPooling2D(),
        Conv2D(64,(3,3), activation='relu'),
        Conv2D(64,(3,3), activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [27]:
model = get_model()

  .format(self.name, input_shape))


In [28]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=1, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x7f97a4091438>

In [29]:
model.optimizer.lr=0.1

In [30]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=1, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x7f978e6e4c88>

In [31]:
model.optimizer.lr=0.01

In [32]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=8, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f978e6e1ac8>

## Data augmentation

In [33]:
model = get_model()

  .format(self.name, input_shape))


In [34]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
batches = gen.flow(X_train, y_train, batch_size=batch_size)
test_batches = gen.flow(X_test, y_test, batch_size=batch_size)
steps_per_epoch = int(np.ceil(batches.n/batch_size))
validation_steps = int(np.ceil(test_batches.n/batch_size))

In [35]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=1, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x7f97d2f997f0>

In [36]:
model.optimizer.lr=0.1

In [37]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=4, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f97d2c44b00>

In [38]:
model.optimizer.lr=0.01

In [39]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=8, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f97d2c44d68>

In [40]:
model.optimizer.lr=0.001

In [41]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=14, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


<keras.callbacks.History at 0x7f97d2c4e2b0>

In [42]:
model.optimizer.lr=0.0001

In [43]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=10, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f97d2c62e80>

## Batchnorm + data augmentation

In [44]:
def get_model_bn():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Conv2D(32,(3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(32,(3,3), activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Conv2D(64,(3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(64,(3,3), activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [45]:
model = get_model_bn()

  .format(self.name, input_shape))


In [46]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=1, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x7f9728090358>

In [47]:
model.optimizer.lr=0.1

In [48]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=4, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f97d29854a8>

In [49]:
model.optimizer.lr=0.01

In [50]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=12, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f971aef8d68>

In [51]:
model.optimizer.lr=0.001

In [52]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=12, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f97d2c51710>

## Batchnorm + dropout + data augmentation

In [53]:
def get_model_bn_do():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Conv2D(32,(3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(32,(3,3), activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Conv2D(64,(3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(64,(3,3), activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [54]:
model = get_model_bn_do()

  .format(self.name, input_shape))


In [55]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=1, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x7f96f80a5518>

In [56]:
model.optimizer.lr=0.1

In [57]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=4, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f96d40ab390>

In [58]:
model.optimizer.lr=0.01

In [59]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=12, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f96d40ab6a0>

In [60]:
model.optimizer.lr=0.001

In [61]:
model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=1, 
                    validation_data=test_batches, validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x7f96d40b5160>

## Ensembling

In [62]:
def fit_model():
    model = get_model_bn_do()
    model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=1, verbose=0,
                        validation_data=test_batches, validation_steps=validation_steps)
    model.optimizer.lr=0.1
    model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=4, verbose=0,
                        validation_data=test_batches, validation_steps=validation_steps)
    model.optimizer.lr=0.01
    model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=12, verbose=0,
                        validation_data=test_batches, validation_steps=validation_steps)
    model.optimizer.lr=0.001
    model.fit_generator(batches, steps_per_epoch=steps_per_epoch, epochs=18, verbose=0,
                        validation_data=test_batches, validation_steps=validation_steps)
    return model

In [63]:
models = [fit_model() for i in range(6)]

  .format(self.name, input_shape))
  .format(self.name, input_shape))
  .format(self.name, input_shape))
  .format(self.name, input_shape))
  .format(self.name, input_shape))
  .format(self.name, input_shape))


In [64]:
import os
user_home = os.path.expanduser('~')
path = os.path.join(user_home, "pj/fastai/data/MNIST_data/")
model_path = path + 'models/'

# path = "data/mnist/"
# model_path = path + 'models/'

In [69]:
for i,m in enumerate(models):
    m.save_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')

In [70]:
eval_batch_size = 256

In [71]:
evals = np.array([m.evaluate(X_test, y_test, batch_size=eval_batch_size) for m in models])



In [72]:
evals.mean(axis=0)

array([ 0.0134,  0.9957])

In [73]:
all_preds = np.stack([m.predict(X_test, batch_size=eval_batch_size) for m in models])

In [74]:
all_preds.shape

(6, 10000, 10)

In [75]:
avg_preds = all_preds.mean(axis=0)

In [76]:
keras.metrics.categorical_accuracy(y_test, avg_preds).eval()

array([ 1.,  1.,  1., ...,  1.,  1.,  1.], dtype=float32)