In [1]:
import keras
import numpy as np

Using Theano backend.
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
np.random.seed(123)

## Preprocessing Data

In [3]:
def get_mnist_data():
    from keras.datasets import mnist
    from keras.utils import np_utils
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1], X_train.shape[2])
    y_train = y_train.reshape(y_train.shape[0], 1)
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1], X_test.shape[2])
    y_test = y_test.reshape(y_test.shape[0], 1)
    
    y_train = np_utils.to_categorical(y_train)
    y_test  = np_utils.to_categorical(y_test)
    
    return X_train, y_train, X_test, y_test

In [4]:
X_train, y_train, X_test, y_test = get_mnist_data()

In [5]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((60000, 1, 28, 28), (60000, 10), (10000, 1, 28, 28), (10000, 10))

In [6]:
X_mean = X_train.mean().astype(np.float32)

In [7]:
X_std = X_train.std().astype(np.float32)

In [8]:
def normalizer(x):
    return (x - X_mean) / X_std

## Building the model

In [12]:
from keras.layers import Convolution2D, Dense, Flatten, Lambda, Dropout
from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers.pooling import MaxPooling2D

### CNN Model

In [10]:
nb_epoch = 5

In [17]:
model_1 = Sequential()
model_1.add(Lambda(normalizer, input_shape=(1, 28, 28)))
model_1.add(Convolution2D(32, 3, 3, activation='relu'))
model_1.add(Convolution2D(32, 3, 3, activation='relu'))
model_1.add(MaxPooling2D())
model_1.add(Convolution2D(64, 3, 3, activation='relu'))
model_1.add(Convolution2D(64, 3, 3, activation='relu'))
model_1.add(MaxPooling2D())
model_1.add(Flatten())
model_1.add(Dense(512, activation='relu'))
model_1.add(Dense(10, activation='softmax'))
model_1.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'] )

In [18]:
model_1.fit(X_train, y_train,batch_size=64, validation_data=(X_test, y_test), nb_epoch=nb_epoch)

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7efe0a3a4250>

In [19]:
model_1.fit(X_train, y_train,batch_size=64, validation_data=(X_test, y_test), nb_epoch=10)

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7efe0ab29fd0>

### Is learning rate optimal?

default in Adam is 0.001

In [13]:
model.optimizer.lr = 0.1

Looks like learning rate is already good enough at 0.001 (default value)