In [45]:
# Importing the relevant packages
import tensorflow as tf
import tensorflow_datasets as tfds
import datetime

import io
import itertools

import numpy as np
import sklearn.metrics

import matplotlib.pyplot as plt

## Downloading and preprocessing the data

In [46]:
# Defining some constants/hyperparameters
BUFFER_SIZE = 70_000 # for reshuffling
BATCH_SIZE = 128
NUM_EPOCHS = 20

In [47]:
# Downloading the MNIST dataset
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [48]:
# Extracting the train and test datasets
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

In [49]:
# Creating a function to scale our image data
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.

    return image, label

In [50]:
# Scaling the data
train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

In [51]:
# Defining the size of the validation set
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

In [52]:
# Defining the size of the test set
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [53]:
# Reshuffling the dataset
train_and_validation_data = train_and_validation_data.shuffle(BUFFER_SIZE)

In [54]:
# Splitting the dataset into training + validation
train_data = train_and_validation_data.skip(num_validation_samples)
validation_data = train_and_validation_data.take(num_validation_samples)

In [55]:
# Batching the data
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

In [56]:
# Extracting the numpy arrays from the validation data for the calculation of the Confusion Matrix
for images, labels in validation_data:
    images_val = images.numpy()
    labels_val = labels.numpy()

## Creating the model and training it

In [57]:
# Outlining the model/architecture of our CNN
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(50, 5, activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Conv2D(50, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10)
])

In [58]:
# A brief summary of the model and parameters
model.summary(line_length = 75)

Model: "sequential_2"
___________________________________________________________________________
 Layer (type)                    Output Shape                  Param #     
 conv2d_4 (Conv2D)               (None, 24, 24, 50)            1300        
                                                                           
 max_pooling2d_4 (MaxPooling2D)  (None, 12, 12, 50)            0           
                                                                           
 conv2d_5 (Conv2D)               (None, 10, 10, 50)            22550       
                                                                           
 max_pooling2d_5 (MaxPooling2D)  (None, 5, 5, 50)              0           
                                                                           
 flatten_2 (Flatten)             (None, 1250)                  0           
                                                                           
 dense_2 (Dense)                 (None, 10)                    125

In [59]:
# Defining the loss function
# We use a loss calculation that automatically corrects for the missing softmax
# That is the reason for 'from_logits=True'
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [60]:
# Compiling the model with Adam optimizer and the categorical crossentropy as a loss function
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

In [61]:
# Logging the training process data to use later in tensorboard
log_dir = "logs\\fit\\" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [62]:
def plot_confusion_matrix(cm, class_names):
    """
    Returns a matplotlib figure containing the plotted confusion matrix.

    Args:
    cm (array, shape = [n, n]): a confusion matrix of integer classes
    class_names (array, shape = [n]): String names of the integer classes
    """
    figure = plt.figure(figsize=(12, 12))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title("Confusion matrix")
    plt.colorbar()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=45)
    plt.yticks(tick_marks, class_names)

    # Normalize the confusion matrix.
    cm = np.around(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis], decimals=2)

    # Use white text if squares are dark; otherwise black.
    threshold = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        color = "white" if cm[i, j] > threshold else "black"
        plt.text(j, i, cm[i, j], horizontalalignment="center", color=color)

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

    return figure

In [63]:
def plot_to_image(figure):
    """Converts the matplotlib plot specified by 'figure' to a PNG image and
    returns it. The supplied figure is closed and inaccessible after this call."""

    # Save the plot to a PNG in memory.
    buf = io.BytesIO()
    plt.savefig(buf, format='png')

    # Closing the figure prevents it from being displayed directly inside the notebook.
    plt.close(figure)

    buf.seek(0)

    # Convert PNG buffer to TF image
    image = tf.image.decode_png(buf.getvalue(), channels=4)

    # Add the batch dimension
    image = tf.expand_dims(image, 0)

    return image

In [64]:
# Define a file writer variable for logging purposes
file_writer_cm = tf.summary.create_file_writer(log_dir + '/cm')

def log_confusion_matrix(epoch, logs):
    # Use the model to predict the values from the validation dataset.
    test_pred_raw = model.predict(images_val)
    test_pred = np.argmax(test_pred_raw, axis=1)

    # Calculate the confusion matrix.
    cm = sklearn.metrics.confusion_matrix(labels_val, test_pred)

    # Log the confusion matrix as an image summary.
    figure = plot_confusion_matrix(cm, class_names=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
    cm_image = plot_to_image(figure)

    # Log the confusion matrix as an image summary.
    with file_writer_cm.as_default():
        tf.summary.image("Confusion Matrix", cm_image, step=epoch)

In [65]:
# Defining the callbacks
cm_callback = tf.keras.callbacks.LambdaCallback(on_epoch_end=log_confusion_matrix)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, profile_batch=0)

In [66]:
# Defining early stopping to prevent overfitting
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    mode = 'auto',
    min_delta = 0,
    patience = 2,
    verbose = 0,
    restore_best_weights = True
)

In [67]:
# Train the network
model.fit(
    train_data,
    epochs = NUM_EPOCHS,
    callbacks = [tensorboard_callback, early_stopping],
    validation_data = validation_data,
    verbose = 2
)

Epoch 1/20
422/422 - 76s - loss: 0.2748 - accuracy: 0.9204 - val_loss: 0.0942 - val_accuracy: 0.9720 - 76s/epoch - 179ms/step
Epoch 2/20
422/422 - 68s - loss: 0.0712 - accuracy: 0.9783 - val_loss: 0.0572 - val_accuracy: 0.9845 - 68s/epoch - 160ms/step
Epoch 3/20
422/422 - 67s - loss: 0.0514 - accuracy: 0.9842 - val_loss: 0.0352 - val_accuracy: 0.9895 - 67s/epoch - 159ms/step
Epoch 4/20
422/422 - 72s - loss: 0.0428 - accuracy: 0.9870 - val_loss: 0.0277 - val_accuracy: 0.9918 - 72s/epoch - 170ms/step
Epoch 5/20
422/422 - 65s - loss: 0.0351 - accuracy: 0.9893 - val_loss: 0.0277 - val_accuracy: 0.9913 - 65s/epoch - 155ms/step
Epoch 6/20
422/422 - 67s - loss: 0.0300 - accuracy: 0.9906 - val_loss: 0.0275 - val_accuracy: 0.9907 - 67s/epoch - 160ms/step
Epoch 7/20
422/422 - 66s - loss: 0.0259 - accuracy: 0.9918 - val_loss: 0.0255 - val_accuracy: 0.9917 - 66s/epoch - 157ms/step
Epoch 8/20
422/422 - 66s - loss: 0.0242 - accuracy: 0.9926 - val_loss: 0.0214 - val_accuracy: 0.9922 - 66s/epoch - 156

<keras.src.callbacks.History at 0x7f93b441be80>

## Testing our model

In [68]:
# Testing our model
test_loss, test_accuracy = model.evaluate(test_data)



In [69]:
# Printing the test results
print('Test loss: {0:.4f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.0292. Test accuracy: 99.05%


## Visualizing in Tensorboard

In [None]:
# Loading the Tensorboard extension
%load_ext tensorboard
%tensorboard --logdir "logs/fit"

Reusing a previously started TensorBoard instance should usually be fine, but just in case the instance has somehow gotten into a bad state. Run in command line these two:

taskkill /im tensorboard.exe /f

del /q %TMP%\.tensorboard-info\*