<h2><center>MNIST classification using <i>LeNet300</i></center></h2>

In [None]:
# As a first step, we may want to switch to a GPU-acceperated VM
# In the menu: Runtime->Change runtime type->Hardware Accelerator->GPU.
#
# This will thest if we have a GPU-equipped VM and return some useful system-level information
#!nvidia-smi

# Which GNU/Linux distribution is installed on our VM ?
#!lsb_release -a

# Which version of the Linux kernel our VM has ?
#!uname -a

# How much free memory our VM has ?
#!free -h

# Which storage facilities our VM has ?
#!mount

# Which python version our VM has installed ?
#!python --version

# Importing Keras

In [None]:
# Importing the Keras 2.x main module relying on tensorflow 2.x backend
import tensorflow as tf
import keras
print("Using tensorflow version " + str(tf.__version__))
print("Using keras version " + str(keras.__version__))

# Loading and preparing the MNIST dataset



Load the MNIST dataset via keras.datasets

In [None]:
#@title
# The MNSIT dataset is ready to be imported from Keras into RAM
# Warning: you cannot do that for larger databases (e.g., ImageNet)
from keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
print(train_images.shape)
print(test_images.shape)
print(train_labels.shape)
print(test_labels.shape)
#!free -h

In [None]:
# Let us visualize the first training sample using the Gnuplot library
from matplotlib import pyplot as plt
imageIndex = 0
print("Label for " + str(imageIndex) + "-th train image is: " + str(train_labels[0]))
plt.imshow(train_images[0])

In [None]:
# Labels are encode in one-hot format
from keras.utils.np_utils import to_categorical
#print("This is the native " + str(imageIndex) + "-th train label: " + str(train_labels[0]))
train_labels = to_categorical(train_labels)
#print("This is the one-hot encoding of the " + str(imageIndex) + "-th train label: " + str(train_labels[0]))
test_labels = to_categorical(test_labels)



In [None]:
# Reshape to proper images with 1 color channel according to backend scheme
img_rows, img_cols = train_images.shape[1], train_images.shape[2]
train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, 1)
test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols,1 )
print('train_images shape:', train_images.shape)
print('test_images shape:', test_images.shape)
print(train_images.shape[0], 'train samples')
print(test_images.shape[0], 'test samples')

# Cast pixels from uintt8 to float32 prior to normalization
train_images = train_images.astype('float32')
test_images = test_images.astype('float32')

# Normalize the images so that have zero mean and unitary deviation with respect to the train set statistics
train_mean = train_images.mean()
train_std = train_images.std()
train_images = (train_images - train_mean)/train_std
test_images = (test_images - train_mean)/train_std

# Alternatively, we could normalize the image in the [0-1] range instead
#train_images = ((train_images / 255) * 2) -1
#test_images = ((test_images / 255) * 2) -1

# Defining the neural network architecture (i.e., the network model)

In [None]:
# The Sequential module is sort of a container for more complex NN elements and
# defines a loop-less NN architecture
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten

input_shape = (img_rows, img_cols, 1)
output_shape = 10

model = Sequential()

model.add(Flatten(input_shape=input_shape))
model.add(Dense(300))
model.add(Activation('sigmoid'))

model.add(Dense(100))
model.add(Activation('sigmoid'))

model.add(Dense(output_shape))
model.add(Activation('softmax'))

Instantiating a SGD optimizer with LR = 10^-4 and printing the model

In [None]:
# The optimizers module provides a number of optimization algorithms for updating
# a netwok parameters accoridng to the computed error gradients

# Defining our SGD optimizer
optimizer=tf.keras.optimizers.SGD(lr=1e-3)

# Compiling a model in Keras amounts to associating th eoptimizer to a model with an appropriate loss function
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

# Let us have a look at the model topology
model.summary()

# Training the network

In [None]:
# This is where the actual training-testing happens
# Dimension of the batch size (number of images over which error gradients are averaged)
batch_size = 128
# Number of epochs we want to train
epochs = 10
# We restrict the training to 10k images to cut time
n_train_samples = 60000
history=model.fit(train_images[:n_train_samples], train_labels[:n_train_samples],
          batch_size=batch_size, epochs=epochs,
          verbose=1, shuffle=True, validation_data=(test_images, test_labels))

In [None]:
# We may want to independently test on a different dataset after training the network
#score = model.evaluate(test_images, test_labels, verbose=False)
#print('Test score:', score[0])
#print('Test accuracy:', score[1])

# Visualizing the network performance

In [None]:
# We now want to plot the train and validation loss functions and accuracy curves
#print(history.history.keys())

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.ylim(top=3)
plt.ylim(bottom=0)
plt.xlabel('Epoch')
plt.xlim(left=0)
plt.xlim(right=10)
plt.legend(['train', 'test'], loc='upper right')
plt.show()

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.ylim(top=1)
plt.ylim(bottom=0)
plt.xlabel('Epoch')
plt.xlim(left=0)
plt.xlim(right=10)
plt.legend(['train', 'test'], loc='lower right')
plt.show()


# Computing the confusion matrix
The confusion matrix allows to analyze the trained network performance on a per-class basis

In [None]:
# Example of a confusion matrix using sklearn.metrics
from sklearn.metrics import confusion_matrix
predictions = model.predict(test_images)
# Mind that confusion_matrix requires
matrix = confusion_matrix(test_labels.argmax(axis=1), predictions.argmax(axis=1))
print (matrix)