In [1]:
### How to train a simple CNN

# Goal: Classify MNIST digits

In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow.keras import datasets, layers, models

In [6]:
### Download and normalize the data

(X_train, Y_train), (X_test, Y_test) = datasets.mnist.load_data()
print('X_train type: ', type(X_train))
print('X_train shape: ', X_train.shape)

# Rescale the images
X_train = X_train.reshape((60000, 28, 28, 1)) # Add a new dimension
X_test = X_test.reshape((10000, 28, 28, 1))

# Normalize images between 0 and 1
X_train, X_test = X_train/255.0, X_test/255.0

X_train type:  <class 'numpy.ndarray'>
X_train shape:  (60000, 28, 28)


In [10]:
# Create a 'convolutional base' model
# Usually it is composed of a Convolutional layer and a Max pooling layer

# As input CNN takes images of shape (n_h, n_w, n_c), ignore the batch size.

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (28, 28, 1))) # (nb_filters = 32, filter_size is (3 x 3))
model.add(layers.MaxPooling2D((2, 2))) # (filter_size is (2 x 2))
model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2))) 
model.add(layers.Conv2D(64, (3, 3), activation = 'relu')) 

# As the width and height shrink, we can afford to ass more output channels in each Conv2D

In [11]:
# Display a summary of the model
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 3, 3, 64)          36928     
Total params: 55,744
Trainable params: 55,744
Non-trainable params: 0
_________________________________________________________________


In [12]:
# To complete the model, let's add fully connected layers to perform classification

# INPUT -> EXTRACT FEATURES (CNN) -> CLASSIFY (FC)

# Flatten the 3D tensor into a 1D tensor
model.add(layers.Flatten())
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dense(10, activation = 'softmax'))

In [13]:
# Display a summary of the final model
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 3, 3, 64)          36928     
_________________________________________________________________
flatten (Flatten)            (None, 576)               0         
_________________________________________________________________
dense (Dense)                (None, 64)               

In [14]:
# Add an optimizer, loss function and metrics

model.compile(optimizer = 'adam',
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy'])

# Train the model
model.fit(X_train, Y_train, epochs = 5)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x125f3b390>

In [16]:
# Evaluate the model

test_loss, test_accuracy = model.evaluate(X_test, Y_test)



In [17]:
print('Cost function of test set: ', test_loss)
print('Accuracy of test set: ', test_accuracy)

Cost function of test set:  0.030467047839540465
Accuracy of test set:  0.9908
