In [15]:
import numpy as np
import keras
import tensorflow as tf
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPool2D
from keras import backend as k


In [16]:
tf.__version__ , keras.__version__

('2.3.0', '2.4.3')

## 1. Importing Libraries and the Dataset

In [17]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


## 2. Data Preparation

#### 2.1 Reshape the Data
First, let's understand the shape in which the network expects the training data. Since we have 60,000 training samples each of size (28, 28, 1), the training data (x_train) needs to be of the shape (60000, 28, 28, 1). If the images were coloured, the shape would have been (60000, 28, 28, 3).

Further, each of the 60,000 images have a 0-9 label, so y_train needs to be of the shape (60000, 10) where each image's label is represented as a 10-d one-hot encoded vector.

The shapes of x_test and y_test will be the same as that of x_train and y_train respectively.

In [18]:
# shape which 
X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],X_train.shape[2],1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],X_test.shape[2],1)
print(X_train.shape)
print(X_test.shape)

(60000, 28, 28, 1)
(10000, 28, 28, 1)


In [19]:
y_train = keras.utils.to_categorical(y_train,10)
y_test = keras.utils.to_categorical(y_test,10)
print(y_train.shape)
print(y_test.shape)

(60000, 10)
(10000, 10)


#### 2.2 Converting Input Data Type: Int to Float
The pixels are originally stored as type `int`, but it is advisable to feed the data as `float`. This is not really compulsory, but advisable. You can read <a href="https://datascience.stackexchange.com/questions/13636/neural-network-data-type-conversion-float-from-int">why conversion from int to float is helpful here</a>.

In [20]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

#### 2.3 Rescaling (Normalisation)
The value of each pixel is between 0-255, so we will **rescale each pixel** by dividing by 255 so that the range becomes 0-1. Recollect <a href="https://stats.stackexchange.com/questions/185853/why-do-we-need-to-normalize-the-images-before-we-put-them-into-cnn">why normalisation is important for training NNs</a>.

In [21]:
X_train.max(), X_train.min()

(255.0, 0.0)

In [22]:
X_train /= X_train.max()
X_test /= X_test.max()

## 3. Building the Model

Let's now build the CNN architecture. For the MNIST dataset, we do not need to build a very sophisticated CNN - a simple shallow-ish CNN would suffice. 

We will build a network with:
- two convolutional layers having 32 and 64 filters respectively, 
- followed by a max pooling layer, 
- and then `Flatten` the output of the pooling layer to give us a long vector, 
- then add a fully connected `Dense` layer with 128 neurons, and finally
- add a `softmax` layer with 10 neurons

The generic way to build a model in Keras is to instantiate a `Sequential` model and keep adding `keras.layers` to it. We will also use some dropouts.

In [23]:
# specify input dimensions of each image
img_rows, img_cols = X_train.shape[1],X_train.shape[2]
input_shape = (img_rows, img_cols, 1)

# batch size, number of classes, epochs
batch_size = 128
num_classes = 10
epochs = 12

input_shape

(28, 28, 1)

In [24]:
model = Sequential()
model.add(Conv2D(32,
                 (3,3),
                 activation='relu',
                 input_shape=input_shape))

model.add(Conv2D(64,
                 (3,3),
                 activation='relu',
                 input_shape=input_shape))

model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(num_classes,activation='softmax'))
model.summary()



Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)              

## 4. Fitting and Evaluating the Model

Let's now compile and train the model.

In [25]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(0.01),
              loss=keras.losses.categorical_crossentropy,
              metrics=[tf.keras.metrics.CategoricalAccuracy()])

In [26]:
model.fit(X_train, 
          y_train, 
          batch_size=batch_size, 
          epochs=epochs, 
          validation_data=(X_test, y_test))

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<tensorflow.python.keras.callbacks.History at 0x7f1b2e3f6f60>

In [27]:
# evaluate the model on test data
model.evaluate(X_test, y_test)



[0.05782555043697357, 0.9850000143051147]