In [1]:
# Plot ad hoc mnist instances
from keras.datasets import mnist
import matplotlib.pyplot as plt
# load (downloaded if needed) the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()


Using TensorFlow backend.


In [2]:
import numpy
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')

In [3]:
seed = 7
numpy.random.seed(seed)

In [4]:
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [5]:
# flatten 28*28 images to a 784 vector for each image
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')

In [6]:
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255

In [7]:
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

# <font color= 'red'>Multi Layer Perceptron</font>

In [8]:
# define baseline model
def baseline_model():
	# create model
	model = Sequential()
	model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
	model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
	# Compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [9]:
# build the model
model = baseline_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Baseline Error: %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 2s - loss: 0.2810 - acc: 0.9207 - val_loss: 0.1413 - val_acc: 0.9576
Epoch 2/10
 - 0s - loss: 0.1115 - acc: 0.9677 - val_loss: 0.0911 - val_acc: 0.9715
Epoch 3/10
 - 0s - loss: 0.0712 - acc: 0.9799 - val_loss: 0.0779 - val_acc: 0.9778
Epoch 4/10
 - 0s - loss: 0.0501 - acc: 0.9858 - val_loss: 0.0744 - val_acc: 0.9766
Epoch 5/10
 - 0s - loss: 0.0370 - acc: 0.9895 - val_loss: 0.0678 - val_acc: 0.9789
Epoch 6/10
 - 0s - loss: 0.0267 - acc: 0.9928 - val_loss: 0.0625 - val_acc: 0.9813
Epoch 7/10
 - 0s - loss: 0.0205 - acc: 0.9949 - val_loss: 0.0623 - val_acc: 0.9802
Epoch 8/10
 - 0s - loss: 0.0140 - acc: 0.9971 - val_loss: 0.0617 - val_acc: 0.9805
Epoch 9/10
 - 0s - loss: 0.0107 - acc: 0.9978 - val_loss: 0.0578 - val_acc: 0.9818
Epoch 10/10
 - 0s - loss: 0.0081 - acc: 0.9985 - val_loss: 0.0598 - val_acc: 0.9811
Baseline Error: 1.89%


# <font color='red'>Convolutional Neural Network</font> 

In [10]:
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# reshape to be [samples][pixels][width][height]
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')

In [11]:
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# reshape to be [samples][pixels][width][height]
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')

In [12]:
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

In [13]:
def baseline_model():
	# create model
	model = Sequential()
	model.add(Conv2D(32, (5, 5), input_shape=(1, 28, 28), activation='relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Dropout(0.2))
	model.add(Flatten())
	model.add(Dense(128, activation='relu'))
	model.add(Dense(num_classes, activation='softmax'))
	# Compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [14]:
# build the model
model = baseline_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("CNN Error: %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 2s - loss: 0.2422 - acc: 0.9312 - val_loss: 0.0695 - val_acc: 0.9790
Epoch 2/10
 - 1s - loss: 0.0694 - acc: 0.9795 - val_loss: 0.0467 - val_acc: 0.9854
Epoch 3/10
 - 1s - loss: 0.0495 - acc: 0.9854 - val_loss: 0.0372 - val_acc: 0.9883
Epoch 4/10
 - 1s - loss: 0.0379 - acc: 0.9878 - val_loss: 0.0347 - val_acc: 0.9894
Epoch 5/10
 - 1s - loss: 0.0314 - acc: 0.9907 - val_loss: 0.0450 - val_acc: 0.9863
Epoch 6/10
 - 1s - loss: 0.0253 - acc: 0.9924 - val_loss: 0.0359 - val_acc: 0.9877
Epoch 7/10
 - 1s - loss: 0.0209 - acc: 0.9934 - val_loss: 0.0353 - val_acc: 0.9889
Epoch 8/10
 - 1s - loss: 0.0188 - acc: 0.9940 - val_loss: 0.0320 - val_acc: 0.9890
Epoch 9/10
 - 1s - loss: 0.0161 - acc: 0.9949 - val_loss: 0.0368 - val_acc: 0.9883
Epoch 10/10
 - 1s - loss: 0.0134 - acc: 0.9960 - val_loss: 0.0286 - val_acc: 0.9905
CNN Error: 0.95%


# <font color='red'> Large Convolutional Network </font>

In [15]:
# define the larger model
def larger_model():
	# create model
	model = Sequential()
	model.add(Conv2D(30, (5, 5), input_shape=(1, 28, 28), activation='relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Conv2D(15, (3, 3), activation='relu'))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Dropout(0.2))
	model.add(Flatten())
	model.add(Dense(128, activation='relu'))
	model.add(Dense(50, activation='relu'))
	model.add(Dense(num_classes, activation='softmax'))
	# Compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [16]:
# build the model
model = larger_model()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Large CNN Error: %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Large CNN Error: 0.88%
