In [1]:
import keras
import numpy as np

Using Theano backend.
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [12]:
np.random.seed(123)

## Preprocessing Data

In [2]:
def get_mnist_data():
    from keras.datasets import mnist
    from keras.utils import np_utils
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1], X_train.shape[2])
    y_train = y_train.reshape(y_train.shape[0], 1)
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1], X_test.shape[2])
    y_test = y_test.reshape(y_test.shape[0], 1)
    
    y_train = np_utils.to_categorical(y_train)
    y_test  = np_utils.to_categorical(y_test)
    
    return X_train, y_train, X_test, y_test

In [3]:
X_train, y_train, X_test, y_test = get_mnist_data()

In [4]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((60000, 1, 28, 28), (60000, 10), (10000, 1, 28, 28), (10000, 10))

In [5]:
X_mean = X_train.mean().astype(np.float32)

In [6]:
X_std = X_train.std().astype(np.float32)

In [7]:
def normalizer(x):
    return (x - X_mean) / X_std

## Building the model

In [8]:
from keras.layers import Convolution2D, Dense, Flatten, Lambda, Dropout
from keras.models import Sequential
from keras.optimizers import Adam

### Linear Model

In [15]:
nb_epoch = 5

In [9]:
model = Sequential([Lambda(normalizer, input_shape=(1, 28, 28)), Flatten(), Dense(10, activation='softmax')])

In [10]:
model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'] )

In [11]:
model.fit(X_train, y_train,validation_data=(X_test, y_test), nb_epoch=nb_epoch)

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f92c6d6a6d0>

### Is learning rate optimal?

default in Adam is 0.001

In [13]:
model.optimizer.lr = 0.1

In [14]:
model.fit(X_train, y_train,validation_data=(X_test, y_test), nb_epoch=nb_epoch)

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f92d2e1ff50>

In [16]:
model.optimizer.lr = 0.0001

In [17]:
model.fit(X_train, y_train,validation_data=(X_test, y_test), nb_epoch=15)

Train on 60000 samples, validate on 10000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f930dd407d0>

Looks like learning rate is already good enough at 0.001 (default value)