# MNIST with CNN using Keras

## Setup

In [33]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D

import numpy as np
import boto3
import time

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('./.data/', one_hot=True)

Extracting ./.data/train-images-idx3-ubyte.gz
Extracting ./.data/train-labels-idx1-ubyte.gz
Extracting ./.data/t10k-images-idx3-ubyte.gz
Extracting ./.data/t10k-labels-idx1-ubyte.gz


In [2]:
X_train = mnist.train.images
X_test = mnist.test.images
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')/255
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')/255

y_train = mnist.train.labels
y_test = mnist.test.labels

## Define network

In [3]:
model = Sequential()
model.add(Convolution2D(32, 5, 5, activation='relu', input_shape=(1,28,28), dim_ordering='th'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(64, 5, 5, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
#model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

## Train

In [4]:
model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

In [5]:
t0 = time.time()
model.fit(X_train, y_train,
          batch_size=50,
          nb_epoch=10
         )
print "training time = {}".format(time.time() - t0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
training time = 157.253924131


## Evaluate

In [6]:
score = model.evaluate(X_test, y_test, verbose=1)
print score



## Save Model

In [7]:
model.save('./keras.hdf5')

s3 = boto3.client('s3')
s3.upload_file(
    './keras.hdf5',
    'jakechenawstemp',
    'keras.hdf5'
)

In [39]:
model2 = Sequential()
model2.add(Convolution2D(32, 5, 5, activation='relu', input_shape=(1,28,28), dim_ordering='th'))
model2.add(MaxPooling2D(pool_size=(2,2)))
model2.add(Convolution2D(64, 5, 5, activation='relu'))
model2.add(MaxPooling2D(pool_size=(2,2)))
model2.add(Flatten())
model2.add(Dense(1024, activation='relu'))
#model.add(Dropout(0.5))
model2.add(Dense(10, activation='softmax'))

In [41]:
model2.load_weights('./keras.hdf5')

In [42]:
model2.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_3 (Convolution2D)  (None, 32, 24, 24)    832         convolution2d_input_2[0][0]      
____________________________________________________________________________________________________
maxpooling2d_3 (MaxPooling2D)    (None, 16, 12, 24)    0           convolution2d_3[0][0]            
____________________________________________________________________________________________________
convolution2d_4 (Convolution2D)  (None, 12, 8, 64)     38464       maxpooling2d_3[0][0]             
____________________________________________________________________________________________________
maxpooling2d_4 (MaxPooling2D)    (None, 6, 4, 64)      0           convolution2d_4[0][0]            
___________________________________________________________________________________________

In [43]:
model2.pop()

In [44]:
for layer in model2.layers:
    layer.trainable = False

In [47]:
model2.add(Dense(2, activation='softmax'))

In [48]:
model2.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_3 (Convolution2D)  (None, 32, 24, 24)    832         convolution2d_input_2[0][0]      
____________________________________________________________________________________________________
maxpooling2d_3 (MaxPooling2D)    (None, 16, 12, 24)    0           convolution2d_3[0][0]            
____________________________________________________________________________________________________
convolution2d_4 (Convolution2D)  (None, 12, 8, 64)     38464       maxpooling2d_3[0][0]             
____________________________________________________________________________________________________
maxpooling2d_4 (MaxPooling2D)    (None, 6, 4, 64)      0           convolution2d_4[0][0]            
___________________________________________________________________________________________

In [51]:
y_train2 = np.array([[0,1] if y[0]==1 else [1,0] for y in y_train])

In [52]:
model2.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

In [53]:
t0 = time.time()
model2.fit(X_train, y_train2,
          batch_size=50,
          nb_epoch=10
         )
print "training time = {}".format(time.time() - t0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
training time = 52.324185133


## Evaluate

In [54]:
y_test2 = np.array([[0,1] if y[0]==1 else [1,0] for y in y_test])

In [55]:
score = model2.evaluate(X_test, y_test2, verbose=1)
print score



## Predict

In [56]:
model2.predict(X_test)

array([[  9.99993682e-01,   6.30177828e-06],
       [  9.99997258e-01,   2.77353433e-06],
       [  9.99998808e-01,   1.18772141e-06],
       ..., 
       [  1.00000000e+00,   2.47176987e-14],
       [  1.00000000e+00,   2.10434639e-08],
       [  1.00000000e+00,   1.75111516e-08]], dtype=float32)