In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

## Load Dataset

In [None]:
# Load the MNIST dataset
mnist = tf.keras.datasets.mnist # 28x28 images of hand-written digits 0-9

In [None]:
# Unpack the dataset into training and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
# Check the shape of the data
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

In [None]:
# Show first 5 images in the training set
for i in range(5):
    plt.subplot(1, 5, i+1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_train[i])

## Data Preprocess

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
# Preprocess the data
IMG_SIZE = 28

num_classes = 10 # 10 digits

# Normalize the pixel values from [0, 255] to 0-1. (From RGB to grayscale)
## To improve the convergence and stability of the NN during training
x_train = x_train.astype('float32') / 255 
x_test = x_test.astype('float32') / 255

# Expanding dimensions
## The original MNIST dataset has shape (num_samples, height, width)
## We need to add a dimension to the end of the shape (num_samples, height, width, 1) for the channel
## This is because Keras expects the channel dimension to be present (1 for grayscale, 3 for RGB)
x_train = np.array(x_train).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
x_test = np.array(x_test).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

# One-hot encoding
## Convert the labels from integers to one-hot vectors
## For example, the label 3 will be converted to [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

In [None]:
# Check the shape of the data
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

## Build Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout

In [None]:
## Create the neural network model
model = Sequential()

## output_shape = ( (input_shape - filter_size) + 1 ) // max_pooling_size
## Relu for non-linearity (0 for negative values, x for positive values)

## Convolutional layer - 1
model.add(Conv2D(64, kernel_size=(3, 3), input_shape=x_train.shape[1:], activation='relu')) # Only first layer specify input shape
model.add(MaxPooling2D(pool_size=(2, 2)))

## Convolutional layer - 2
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

## Convolutional layer - 3
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))


## Fully connected layer -1
model.add(Flatten()) # Flatten 2D array to 1D array
model.add(Dense(64, activation='relu')) # 64 neurons

## Fully connected layer -2
model.add(Dense(32, activation='relu')) # 32 neurons

## Output layer
model.add(Dense(num_classes, activation='softmax')) # 10 neurons
## Softmax for probability distribution

In [None]:
model.summary()

In [None]:
# Total training sample
print('Total training sample:', x_train.shape[0])

In [None]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer= "adam",
              metrics=['accuracy'])

In [None]:
# Compile and train the model
batch_size = 128
epochs = 10


history = model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            verbose=1, # 0 = silent, 1 = progress bar, 2 = one line per epoch
            validation_data=(x_test, y_test))

In [None]:
# Evaluate the trained model
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
# Plot the accuracy and loss curves
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Save the model
model.save('mnist_cnn.h5')