# Model: CNN Keras

* Note: Run this on a laptop (without GPU) is fine (~1hr).

In [21]:
import os
import numpy as np
import tensorflow as tf
import keras 
from keras.layers import Input, Dense, Conv2D, Reshape, Flatten, MaxPooling2D
from keras.models import Model
from keras.utils import to_categorical
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint

from helper import read_mnist_data

print(tf.__version__)
print(keras.__version__)

1.10.0
2.2.2


In [10]:
DATA_PATH = '/Users/lywang/Data/mnist'
image_train, label_train, image_test, label_test = read_mnist_data(DATA_PATH)
print(image_train.shape, label_train.shape )
print(image_test.shape, label_test.shape )

# one-hot encode labels
label_train_onehot = to_categorical(label_train)
label_test_onehot = to_categorical(label_test)
print(label_train_onehot.shape )
print(label_test_onehot.shape )

(60000, 784) (60000, 1)
(10000, 784) (10000, 1)
(60000, 10)
(10000, 10)


In [11]:
MODEL_DIR = 'model_checkpoints_cnn/'

In [27]:
# restart training everytime
K.clear_session()

model_path = os.path.join(MODEL_DIR, 'keras.{epoch:02d}-{val_loss:.2f}-{val_acc:.2f}.hdf5')

# construct
inputs = Input(shape=(784,))
x = Reshape((28, 28, 1))(inputs) # keras default: channel last
x = Conv2D(16, (5,5), activation='relu')(x)
x = MaxPooling2D((3,3))(x)
x = Conv2D(8, (3,3), activation='relu')(x)
x = MaxPooling2D((2,2))(x)
x = Flatten()(x)
x = Dense(32, activation='relu')(x)
predictions = Dense(10, activation='softmax')(x)
model = Model(inputs=inputs, outputs=predictions)

model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

callback_list = [EarlyStopping(monitor='val_loss', patience=5),
                 ModelCheckpoint(model_path, monitor='val_loss', 
                                 verbose=1, save_best_only=True, 
                                 period=1)
                ]
model.fit(image_train, label_train_onehot, 
          batch_size=500, 
          epochs=100, 
          shuffle=True,
          validation_split=0.25,
          callbacks=callback_list)  # starts training

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 784)               0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 16)        416       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 16)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 6, 6, 8)           1160      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 3, 3, 8)           0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 72)                0         
__________

<keras.callbacks.History at 0x182a7109e8>

## load model for prediction

In [28]:
loaded_model = keras.models.load_model(os.path.join(MODEL_DIR, 'keras.19-0.08-0.98.hdf5'))

In [29]:
prediction = loaded_model.predict(image_test)
prediction = np.argmax(prediction, axis=1)

In [30]:
test_acc = sum(prediction == np.squeeze(label_test)) / len(prediction)
print('test acc: {} (error rate {:.2f}%) '.format(test_acc, (1-test_acc)*100))

test acc: 0.9796 (error rate 2.04%) 


Record

train acc: 0.9869  

test acc: 0.9796 (error rate 2.04%) 