# Deep Neural Network for MNIST Classification

## Import the relevant packages

In [1]:
import io
import itertools

import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorboard.plugins.hparams import api as hp

## Data

### Load the data from keras.datasets

In [2]:
mnist_dataset = keras.datasets.mnist.load_data()
mnist_train, mnist_test = mnist_dataset

### Scaling

In [3]:
scaled_train_inputs, train_targets = mnist_train[0] / 255., mnist_train[1]
scaled_test_inputs, test_targets = mnist_test[0] / 255., mnist_test[1]

### Shuffling

In [4]:
#train data
shuffled_indices = np.arange(scaled_train_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_train_inputs, shuffled_train_targets = scaled_train_inputs[shuffled_indices], train_targets[shuffled_indices]

#test data
shuffled_indices = np.arange(scaled_test_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_test_inputs, shuffled_test_targets = scaled_test_inputs[shuffled_indices], test_targets[shuffled_indices]

### Create validation data 

In [5]:
num_validation_samples = int(0.1 * shuffled_train_inputs.shape[0])

train_inputs = shuffled_train_inputs[num_validation_samples:].reshape((-1, 28, 28, 1))
train_targets = shuffled_train_targets[num_validation_samples:]

validation_inputs = shuffled_train_inputs[:num_validation_samples].reshape((-1, 28, 28, 1))
validation_targets = shuffled_train_targets[:num_validation_samples]

test_inputs, test_targets = shuffled_test_inputs.reshape((-1, 28, 28, 1)), shuffled_test_targets

print(train_inputs.shape)

(54000, 28, 28, 1)


## The model 

In [6]:
# It's a best practise to define some constants/hyperparameters in a visible place

max_epochs = 20
batch_size = 256 # A value that's a power of 2
steps_per_epoch = 100
output_size = 10

In [7]:
# Defining the hyperparameters we would tune, and their values to be tested
HP_FILTER_NUM = hp.HParam('filters_number', hp.Discrete([64, 96, 128]))
HP_FILTER_SIZE = hp.HParam('filter_size', hp.Discrete([5, 7]))

# Logging setup info
with tf.summary.create_file_writer('logs\\Model 2\\hparam_tuning\\').as_default():
    hp.hparams_config(
        hparams = [HP_FILTER_NUM, HP_FILTER_SIZE],
        metrics = [hp.Metric('accuracy', display_name='Accuracy')]
    )

In [8]:
# Wrapping our model and training in a function
def train_test_model(hparams, session_num):
    
    # Outlining the model/architecture of our CNN
    model = keras.Sequential([
        keras.layers.Conv2D(hparams[HP_FILTER_NUM], hparams[HP_FILTER_SIZE], activation='relu', input_shape=(28, 28, 1)),
        keras.layers.MaxPooling2D(pool_size=(2,2)),
        keras.layers.Conv2D(hparams[HP_FILTER_NUM], 3, activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Flatten(),
        keras.layers.Dense(output_size)
    ])
    
    # Defining the loss function
    loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    
    # Compiling the model
    model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
    
    # Defining the logging directory
    log_dir = "logs\\Model 2\\fit\\" + "run-{}".format(session_num)

    
    def plot_confusion_matrix(cm, class_names, normalized=True):

        """
        Returns a matplotlib figure containing the plotted confusion matrix.

        Args:
        cm (array, shape = [n, n]): a confusion matrix of integer classes
        class_names (array, shape = [n]): String names of the integer classes
        """

        figure = plt.figure(figsize=(12, 12))

        # Plot the image
        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
        plt.title("Confusion matrix")
        plt.colorbar()

        tick_marks = np.arange(len(class_names))
        plt.xticks(tick_marks, class_names, rotation=45)
        plt.yticks(tick_marks, class_names)

        # Normalize the confusion matrix.
        if not normalized:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

        cm = np.around(cm.astype('float'), decimals=2)

        # Use white text if squares are dark; otherwise black.
        threshold = cm.max() / 2.

        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):

            color = "white" if cm[i, j] > threshold else "black"
            plt.text(j, i, cm[i, j], horizontalalignment="center", color=color)


        plt.tight_layout() # Adjust the padding
        plt.ylabel('True label')
        plt.xlabel('Predicted label')

        return figure

    def plot_to_image(figure):
        """Converts the matplotlib plot specified with 'figure' to a PNG image and returns it,
        The supplied figure is closed and inaccessible after this call.
        """

        # Save the plot to a PNG in memory.
        buf = io.BytesIO()
        plt.savefig(buf, format='png')

        # Closing the figure prevents it from being displayed directly inside the notebook.
        plt.close()

        # Convert the PNG buffer to TF image.
        buf.seek(0)
        image = tf.image.decode_png(buf.getvalue(), channels=4)

        # Add the batch dimension.
        image = tf.expand_dims(image, 0)

        return image

    
    # Define a file writer variable for the logging purposes
    file_writer_cm = tf.summary.create_file_writer(log_dir + '/cm')

    # The lambda callback to be called should have the (epoch, logs) parameters
    def log_confusion_matrix(epoch, logs):

        predictions_raw = model.predict(validation_inputs)
        predictions = np.argmax(predictions_raw, axis=1)

        cm = confusion_matrix(validation_targets, predictions, normalize='true')

        # Log the confusion matrix as an image.
        figure = plot_confusion_matrix(cm, ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], True)
        cm_image = plot_to_image(figure)

        with file_writer_cm.as_default():
            tf.summary.image('Confusion Matrix', cm_image, step=epoch)
            
    
    # Define the Tensorboard and Confusion Matrix callbacks.
    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, profile_batch=0)
    cm_callback = keras.callbacks.LambdaCallback(on_epoch_end=log_confusion_matrix)
    
    # Defining early stopping to prevent overfitting
    early_stopping = keras.callbacks.EarlyStopping(
        monitor = 'val_loss',
        mode = 'auto',
        min_delta = 0,
        patience = 2,
        verbose = 0, 
        restore_best_weights = True
    )
    
    # Training the model
    model.fit(
        train_inputs,
        train_targets,
        batch_size=batch_size,
        epochs=max_epochs,
        callbacks=[tensorboard_callback, cm_callback, early_stopping],
        validation_data=(validation_inputs, validation_targets),
        verbose=2
    )
    
    # Evaluating the model's performance on the validation set
    _, accuracy = model.evaluate(validation_inputs, validation_targets)
    
    # Saving the current model for future reference
    model.save(r"saved_models\Model 2\Run-{}".format(session_num))
    
    return accuracy

In [9]:
# Creating a function to log the resuls
def run(log_dir, hparams, session_num):
    
    with tf.summary.create_file_writer(log_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        accuracy = train_test_model(hparams, session_num)
        tf.summary.scalar('accuracy', accuracy, step=1)

In [10]:
session_num = 1

for filter_size in HP_FILTER_SIZE.domain.values:
    for filter_num in HP_FILTER_NUM.domain.values:

        hparams = {
            HP_FILTER_SIZE: filter_size,
            HP_FILTER_NUM: filter_num
        }

        run_name = "run-%d" % session_num
        print('--- Starting trial: %s' % run_name)
        print({h.name: hparams[h] for h in hparams})
        run('logs\\Model 2\\hparam_tuning\\' + run_name, hparams, session_num)
        
        session_num += 1

--- Starting trial: run-1
{'filter_size': 3, 'filters_number': 32}
Epoch 1/20
422/422 - 18s - loss: 0.3565 - accuracy: 0.9038 - val_loss: 0.1246 - val_accuracy: 0.9638
Epoch 2/20
422/422 - 17s - loss: 0.0921 - accuracy: 0.9722 - val_loss: 0.0917 - val_accuracy: 0.9723
Epoch 3/20
422/422 - 18s - loss: 0.0669 - accuracy: 0.9799 - val_loss: 0.0763 - val_accuracy: 0.9752
Epoch 4/20
422/422 - 18s - loss: 0.0541 - accuracy: 0.9835 - val_loss: 0.0702 - val_accuracy: 0.9785
Epoch 5/20
422/422 - 18s - loss: 0.0452 - accuracy: 0.9865 - val_loss: 0.0651 - val_accuracy: 0.9810
Epoch 6/20
422/422 - 18s - loss: 0.0408 - accuracy: 0.9874 - val_loss: 0.0556 - val_accuracy: 0.9848
Epoch 7/20
422/422 - 18s - loss: 0.0355 - accuracy: 0.9894 - val_loss: 0.0561 - val_accuracy: 0.9842
Epoch 8/20
422/422 - 18s - loss: 0.0327 - accuracy: 0.9901 - val_loss: 0.0572 - val_accuracy: 0.9848
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instruct

INFO:tensorflow:Assets written to: saved_models\Model 2\Run-7\assets
--- Starting trial: run-8
{'filter_size': 5, 'filters_number': 128}
Epoch 1/20
422/422 - 77s - loss: 0.1875 - accuracy: 0.9453 - val_loss: 0.0735 - val_accuracy: 0.9780
Epoch 2/20
422/422 - 74s - loss: 0.0535 - accuracy: 0.9827 - val_loss: 0.0623 - val_accuracy: 0.9818
Epoch 3/20
422/422 - 71s - loss: 0.0369 - accuracy: 0.9885 - val_loss: 0.0527 - val_accuracy: 0.9843
Epoch 4/20
422/422 - 73s - loss: 0.0298 - accuracy: 0.9903 - val_loss: 0.0429 - val_accuracy: 0.9882
Epoch 5/20
422/422 - 70s - loss: 0.0240 - accuracy: 0.9919 - val_loss: 0.0409 - val_accuracy: 0.9893
Epoch 6/20
422/422 - 71s - loss: 0.0190 - accuracy: 0.9936 - val_loss: 0.0400 - val_accuracy: 0.9897
Epoch 7/20
422/422 - 68s - loss: 0.0141 - accuracy: 0.9954 - val_loss: 0.0414 - val_accuracy: 0.9907
Epoch 8/20
422/422 - 69s - loss: 0.0142 - accuracy: 0.9948 - val_loss: 0.0447 - val_accuracy: 0.9882
INFO:tensorflow:Assets written to: saved_models\Model 2

In [7]:
%load_ext tensorboard
%tensorboard --logdir "logs/Model 2/hparam_tuning"

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 7764), started 0:01:00 ago. (Use '!kill 7764' to kill it.)

In [7]:
%load_ext tensorboard
%tensorboard --logdir "logs/Model 2/fit"

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 12812), started 0:01:13 ago. (Use '!kill 12812' to kill it.)

## Test the model

In [8]:
# Loading a model to evaluate on the test set
model = tf.keras.models.load_model(r"saved_models\Model 2\Run-7")

In [9]:
test_pred = np.argmax(model.predict(test_inputs), axis=1)
test_accuracy = (test_targets == test_pred).sum() / test_targets.shape[0]

In [10]:
print('\nTest accuracy: {0:.2f}%'.format(test_accuracy * 100.))


Test accuracy: 99.10%
