# Train a CNN on MNIST

In [1]:
import tensorflow as tf

(x_train, y_train), (x_rest, y_rest) = tf.keras.datasets.mnist.load_data()

### Create test and validation datasets

In [2]:
num_val = 5000

x_val = x_rest[:num_val]
x_test = x_rest[num_val:]

y_val = y_rest[:num_val]
y_test = y_rest[num_val:]

### Add the channel dimension

In [3]:
import numpy as np

x_train_reshaped = x_train[..., np.newaxis]
x_val_reshaped = x_val[..., np.newaxis]
x_test_reshaped = x_test[..., np.newaxis]
print(x_train_reshaped.shape)

(60000, 28, 28, 1)


## Scale the data

In [4]:
x_train_reshaped = x_train_reshaped / x_train_reshaped.max()
x_val_reshaped = x_val_reshaped / x_val_reshaped.max()
x_test_reshaped = x_test_reshaped / x_test_reshaped.max()

## Convert integer outputs to one-hot encoding

In [5]:
from tensorflow.keras.utils import to_categorical

y_train_one_hot = to_categorical(y_train)
y_val_one_hot = to_categorical(y_val)
y_test_one_hot = to_categorical(y_test)
print(y_train[0])
print(y_train_one_hot[0])

5
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


## Create a CNN model using Keras

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense

model = Sequential([
    Conv2D(64, 7, activation='relu', padding='same', input_shape=[28,28,1]),
    MaxPooling2D(2),
    Conv2D(128, 3, activation='relu', padding='same'),
    Conv2D(128, 3, activation='relu', padding='same'),
    MaxPooling2D(2),
    Conv2D(256, 3, activation='relu', padding='same'),
    Conv2D(256, 3, activation='relu', padding='same'),
    MaxPooling2D(2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 64)        3200      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 128)       73856     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 128)       147584    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 128)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 256)         295168    
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 7, 7, 256)         5

In [7]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [8]:
model.fit(x_train_reshaped, 
          y_train_one_hot, 
          batch_size=64,
          epochs=10,
          validation_data=(x_val_reshaped, y_val_one_hot))

Train on 60000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f029800da20>