## MNIST CNN

TODO :
1. [Create validation and sample sets](#Create-validation-and-sample-sets)
2. [Rearrange image files into new directories](#Rearrange-image-files-into-new-directories)
3. [Fine-tuning](#Fine-tuning)
4. [Training](#Training)

In [1]:
import pandas as pd
import numpy as np

from sklearn.cross_validation import train_test_split

from keras.metrics import categorical_accuracy
from keras.models import Sequential
from keras.layers import  Conv2D, Dense, Dropout, Flatten, Lambda, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

Using Theano backend.
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [4]:
train = pd.read_csv('./data/mnist/train.csv')

In [5]:
X_train = train.drop('label', axis=1)
y_train = train.label

In [6]:
X_test = pd.read_csv('./data/mnist/test.csv')

In [7]:
del train

In [8]:
X_train = X_train.values.reshape(-1, 1, 28, 28)
X_test = X_test.values.reshape(-1, 1, 28, 28)

In [9]:
X_train.shape

(42000, 1, 28, 28)

In [10]:
random_seed = 2

In [11]:
#X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=random_seed)

In [12]:
def onehot(y, num_classes=None):
    """Converts a class vector (integers) to binary class matrix.
    E.g. for use with categorical_crossentropy.
    # Arguments
        y: class vector to be converted into a matrix
            (integers from 0 to num_classes).
        num_classes: total number of classes.
    # Returns
        A binary matrix representation of the input.
    """
    y = np.array(y, dtype='int').ravel()
    if not num_classes:
        num_classes = np.max(y) + 1
    n = y.shape[0]
    categorical = np.zeros((n, num_classes))
    categorical[np.arange(n), y] = 1
    return categorical

In [15]:
Y_train = onehot(y_train, num_classes=10)
#Y_val = onehot(y_val, num_classes=10)

In [16]:
mean_x = X_train.mean().astype(np.float32)
std_x = X_train.std().astype(np.float32)

In [17]:
def norm_input(x): return (x - mean_x) / std_x

In [18]:
def get_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Conv2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Conv2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [19]:
gen = ImageDataGenerator(
    rotation_range=12, 
    width_shift_range=0.1, 
    shear_range=0.3,
    height_shift_range=0.1, 
    zoom_range=0.1)

In [20]:
batches = gen.flow(X_train, Y_train, batch_size=64)
#val_batches = gen.flow(X_val, Y_val, batch_size=64)

In [21]:
def fit_model():
    model = get_model()
    model.fit_generator(batches, batches.N, nb_epoch=1)#, validation_data=val_batches, nb_val_samples=val_batches.N)
    model.optimizer.lr = 0.1
    model.fit_generator(batches, batches.N, nb_epoch=4)#, validation_data=val_batches, nb_val_samples=val_batches.N)
    model.optimizer.lr = 0.01
    model.fit_generator(batches, batches.N, nb_epoch=8)#, validation_data=val_batches, nb_val_samples=val_batches.N)
    model.optimizer.lr = 0.001
    model.fit_generator(batches, batches.N, nb_epoch=8)#, validation_data=val_batches, nb_val_samples=val_batches.N)
    return model

In [22]:
models = [fit_model() for i in range(6)]

Epoch 1/1
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/1
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/1
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/1
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/1
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8


In [23]:
for i, m in enumerate(models):
    m.save_weights('data/mnist/cnn-mnist-noval-' + str(i) + '.pkl')

In [24]:
predictions = np.array([m.predict(X_test, batch_size=256) for m in models])

In [25]:
labels = np.max(predictions, axis=0)
labels.shape

(28000, 10)

In [26]:
labels = np.argmax(labels, axis=1)
labels.shape

(28000,)

In [27]:
imageIds = np.arange(1, len(labels) + 1)
imageIds.shape

(28000,)

In [28]:
subm = np.stack([imageIds, labels], axis=1)
subm[:5]

array([[1, 2],
       [2, 0],
       [3, 9],
       [4, 0],
       [5, 3]])

In [29]:
subm_filename = 'subm.csv'

In [30]:
np.savetxt(subm_filename, subm, fmt='%d,%d', header='ImageId,Label', comments='')

In [31]:
from IPython.display import FileLink
FileLink(subm_filename)