In [1]:
#Import Packages

#To build a NN model
from keras.models import Sequential

#To build a densely connected NN layer
from keras.layers import Dense

#To build a 2D convolution layer
from keras.layers import Conv2D

#Regularization Function
from keras.layers import Dropout

#To flatten the inputs
from keras.layers import Flatten

#MaxPooling operations for spatial data
from keras.layers import MaxPooling2D

#Categorical Targets - N dimension vector which contains all zeros except for a 1 at a corresponding sample index
from keras.utils.np_utils import to_categorical

#Optimizer
from keras.optimizers import Adadelta

#MNIST Dataset
from keras.datasets import mnist

Using Theano backend.


In [2]:
#Fetch Dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

#x_train contains 60000 28*28 grayscale images of 10 digits(0-9).
print x_train.shape

#y_train contains their corresponding digit labels.
print y_train.shape

#x_test contains 10000 28*28 grayscale images of 10 digits(0-9).
print x_test.shape

#y_train contains their corresponding digit labels.
print y_test.shape

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [3]:
#Convert X_train, y_train, X_test and y_test in a format that can be fed into CNN.
X_train = x_train.reshape(60000, 28, 28, 1)
X_train = X_train.astype('float32')
X_train /= 255
Y_train = to_categorical(y_train, 10)
X_test = x_test.reshape(10000, 28, 28, 1)
X_test = X_test.astype('float32')
X_test /= 255
Y_test = to_categorical(y_test, 10)

In [4]:
print X_train.shape
print y_train.shape
print X_test.shape
print y_test.shape

(60000, 28, 28, 1)
(60000,)
(10000, 28, 28, 1)
(10000,)


In [5]:
#Build NN model
model = Sequential()

#Build a 2D Convolution Layer
#Here
    #32 = Number of filters/kernels
    #kernel_size = Filter size which is 3*3
    #Relu Layer follows Convolution Layer
    #Input to Convolution Layer is 28*28*1 ie) Height = 28, Width=28 and Depth=1 (Grayscale images)
    #Output of below Conv 2D/Relu Layer is 26*26*32. Its calculated using below formula:
        #Output Volume = ((N-F)/stride) + 1
        #Where
            #N = 28 = Input Height
            #F = 3 = Filter Size Height
            #Stride = 1
        #((28 - 3)/1) + 1 = 26. Thus output volume = 26*26*32
        #Where
            #32 = Number of filters
        #For every application of filter we get an activation map of 26*26*1. Thus for a total of 32 filters we get 26*26*32 activation maps.
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))


#Build another 2D Convolution Layer - Output of previous Relu Layer is fed as input to this Conv 2D Layer ie) Input volume is 26*26*32
#Here
    #64 = Number of filters/kernels
    #kernel_size = Filter size which is 3*3
    #Relu Layer follows Convultion Layer
#Using above said explanation, output volume of this Conv 2D/Relu Layer is 24*24*64
model.add(Conv2D(64, (3, 3), activation='relu'))

#Output of previous Conv 2D/Relu Layer is fed as input to this Pooling Layer ie) Input volume is 24*24*64
#MaxPooling chooses the maximum value of a pool size subset of the input volume ie) Input volume is segmented as 2*2 inputs and the maximum of it is chosen for that segmented input. 
#Output Volume of this MaxPooling Layer is 12*12*64
model.add(MaxPooling2D(pool_size=(2, 2)))

#Dropout helps to prevent overfitting
model.add(Dropout(0.25))

#Flatten the output of previous MaxPooling Layer ie) here 12*12*64 flattens to 9216
model.add(Flatten())

#Add a Fully Connected Layer - Output of previous MaxPooling Layer is fed as input to this Fully Connected Layer
#Using Dense add a hidden layer which contains 128 hidden units
#Apply relu activation function for this hidden layer. 
model.add(Dense(128, activation='relu'))

model.add(Dropout(0.5))

#Using Dense add output layer which contains 10 output units - one per each digit (0-9).
#Apply softmax activation function in output layer.model.add(Dense(num_classes, activation='softmax'))
model.add(Dense(10, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
__________

**Model Parameters:** In our example total paramters is . Lets see how is this calculated.

Total Params = 1,199,882 = 320 + 18496 + 1179776 + 1290

**320** = 32 * ((3*3) + 1)

where 32 = Number of filters, 3*3 = Filter Size and 1 = Bias Term

**18496** = 64 * ((3*3*32) + 1)

where 64 = Number of filters, 3*3*32 = Filter Size and 1 = Bias Term

**1179776** = (128 * 9216) + 128

where 128 = Hidden Units , 9216 = Flattened Input Units and 128 = Bias Units

**1290** = (10 * 128) + 10

where 10 = Output Units, 128 = Hidden Units and 10 = Bias Units

In [6]:
optimizer = Adadelta()
#Configure the learning process.
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [7]:
#Train the CNN model for 10 epochs and after training the model with a batch_size of 128 training samples, perform parameter update.
model.fit(X_train, Y_train, batch_size=128, epochs=5, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f615bb4db50>

In [8]:
#Compute the loss on test data.
score,acc = model.evaluate(X_test, Y_test, verbose=0)
#Print the accuracy
print '\nTest accuracy:%f %%' %(acc * 100)


Test accuracy:98.630000 %


In [9]:
#Lets now perform prediction
predicted_classes = model.predict_classes(X_test, verbose=0)
#Print target values and its predicted values.
print 'Target Num:%d, Output Num:%d' %(y_test[0], predicted_classes[0])
print 'Target Num:%d, Output Num:%d' %(y_test[99], predicted_classes[99])
print 'Target Num:%d, Output Num:%d' %(y_test[9999], predicted_classes[9999])

Target Num:7, Output Num:7
Target Num:9, Output Num:9
Target Num:6, Output Num:6


Reference:
    1. https://github.com/fchollet/keras/blob/master/examples/mnist_cnn.py