# Machine Learning Project Task 1:
Implementation of LeNet-5

In [None]:
# Load libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, AveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras import regularizers
from keras.datasets import mnist 
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load MNIST dataset from keras
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
rows, cols = 28, 28

# Reshape the data into a 4D Array
X_train = X_train.reshape(X_train.shape[0], rows, cols, 1)
X_test = X_test.reshape(X_test.shape[0], rows, cols, 1)

input_shape = (rows,cols,1) 

# Set type as float32 and normalize the values to [0,1]
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train = X_train / 255.0
X_test = X_test / 255.0

# Transform labels to one hot encoding
Y_train = to_categorical(Y_train, 10)
Y_test = to_categorical(Y_test, 10)

In [None]:

# Preview images from MNIST dataset
plt.figure(figsize=(15,4.5))
for i in range(30):  
    plt.subplot(3, 10, i+1)
    plt.imshow(X_train[i].reshape((28,28)),cmap=plt.cm.binary)
    plt.axis('off')
plt.subplots_adjust(wspace=-0.1, hspace=-0.1)
plt.show()

In [None]:
# Building the LeNet-5 CNN model

# Define a sequential model
model = Sequential()

# C1 Convolution Layer
model.add(Conv2D(filters=6, strides=(1,1), kernel_size=(5,5), activation='relu', input_shape=(28, 28, 1)))

# S2 SubSampling Layer
model.add(AveragePooling2D(pool_size=(2,2), strides=(2,2)))

# C3 Convolution Layer
model.add(Conv2D(filters=6, strides=(1,1), kernel_size=(5,5), activation='relu'))

# S4 SubSampling Layer
model.add(AveragePooling2D(pool_size=(2,2), strides=(2,2)))

# C5 Fully Connected Layer
model.add(Dense(units=120, activation='relu'))

# Flatten the output so that we can connect it with the fully connected layers by converting it into a 1D Array
model.add(Flatten())

# FC6 Fully Connected Layers
model.add(Dense(units=84, activation='relu'))

# Output Layer
model.add(Dense(units=10, activation='softmax'))

# Compile the Model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:

# Decrease learning rate each epoch
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)

# set epochs
history = [0] * 1
epochs = 25

# Split the data into training and validation sets
X_train2, X_val2, Y_train2, Y_val2 = train_test_split(X_train, Y_train, test_size=0.1)
X_train2, X_test2, Y_train2, Y_test2 = train_test_split(X_train2, Y_train2, test_size=0.01)

In [None]:
# Train the model using fit method
history = model.fit(x=X_train2, y=Y_train2, batch_size=64, epochs=epochs, validation_data=(X_val2, Y_val2), callbacks=[annealer], verbose=2)

# Print training and validation accuracy
print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(
    1, epochs, max(history.history['accuracy']), max(history.history['val_accuracy'])))

In [None]:
# Plot training and validation accuracy per epoch
def show_final_history(history):
        fig, ax = plt.subplots(1, 2, figsize=(24,8))
        ax[0].set_title('Loss rate over epochs')
        ax[0].plot(history.epoch, history.history["loss"], label="Training loss")
        ax[0].plot(history.epoch, history.history["val_loss"], label="Validation loss")
        ax[0].set_xlabel('Epoch')
        ax[0].set_ylabel('Loss (%)')
        ax[1].set_title('Accuracy over epochs')
        ax[1].plot(history.epoch, history.history["accuracy"], label="Training accuracy")
        ax[1].plot(history.epoch, history.history["val_accuracy"], label="Validation accuracy")
        ax[1].set_xlabel('Epoch')
        ax[1].set_ylabel('Accuracy (%)')
        ax[0].legend()
        ax[1].legend()

show_final_history(history)


In [None]:
# Use model to make predictions on test data
results = np.zeros( (X_test.shape[0],10) ) 

results = model.predict(X_test)
results = np.argmax(results,axis = 1)
results = pd.Series(results,name="Label")

In [None]:
# Display predicitions and input data
plt.figure(figsize=(15,6))
for i in range(40):  
    plt.subplot(4, 10, i+1)
    plt.imshow(X_test[i].reshape((28,28)),cmap=plt.cm.binary)
    plt.title("Prediction: %d" % results[i],y=0.9)
    plt.axis('off')
plt.subplots_adjust(wspace=0.3, hspace=-0.1)
plt.show()

In [None]:
#Confusion Matrix
#create array for actual and predicted labels

#predictions for validation data
results2 = model.predict(X_val2)
results2 = np.argmax(results2,axis = 1)
results2 = pd.Series(results2,name="Label")

#create array of true labels
Y_val2_labels = np.argmax(Y_val2, axis=1)

#compute confusion matrix
cm = confusion_matrix(Y_val2_labels,results2)

#plot confusion matrix with seaborn library

sns.heatmap(cm, annot=True, fmt='g', cmap='PuRd',
            xticklabels=np.arange(10), yticklabels=np.arange(10), vmin=0, vmax=10)
plt.xlabel('Predicted value',fontsize=13)
plt.ylabel('True value',fontsize=13)
plt.title('Confusion Matrix',fontsize=17)
plt.show()

In [None]:
import math

#Showing images that were incorrectly classified

# find number of errors
num_errors = 0
for i in range(len(Y_val2)):
    if (Y_val2_labels[i] != results2[i]):
        num_errors += 1

num_cols = 5  # columns for subplots
num_rows = math.ceil(num_errors / num_cols)  # rows

# Create subplots
fig, axs = plt.subplots(num_rows, num_cols, figsize=(15, 3*num_rows))

# plot erroneous images
error_count = 0
for i in range(len(Y_val2)):
    if (Y_val2_labels[i] != results2[i]):
        row = error_count // num_cols
        col = error_count % num_cols
        axs[row, col].imshow(X_val2[i].reshape((28, 28)), cmap=plt.cm.binary)
        axs[row, col].set_title("Prediction: %d, Actual: %d" % (results2[i], Y_val2_labels[i]), y=0.9)
        axs[row, col].axis('off')
        error_count += 1

# remove empty subplots
for i in range(error_count, num_cols * num_rows):
    row = i // num_cols
    col = i % num_cols
    axs[row, col].axis('off')

print('Number of errors: ', num_errors)
print('Accuracy %: ', 1-num_errors/len(results2))
print('Error %: ', num_errors/len(results2))
plt.subplots_adjust(wspace=0.3, hspace=-0.1)
plt.show()

In [None]:
#Confusion Matrix

#Predictions for unseen test MNIST data
results3 = model.predict(X_test2)
results3 = np.argmax(results3,axis = 1)
results3 = pd.Series(results3,name="Label")

Y_test2_labels = np.argmax(Y_test2, axis=1)

#compute confusion matrix
cm = confusion_matrix(Y_test2_labels,results3)

#plot confusion matrix with seaborn
sns.heatmap(cm, annot=True, fmt='g', cmap='PuRd',
            xticklabels=np.arange(10), yticklabels=np.arange(10), vmin=0, vmax=4)
plt.xlabel('Predicted value',fontsize=13)
plt.ylabel('True value',fontsize=13)
plt.title('Confusion Matrix',fontsize=17)
plt.show()

# Evaluating model performance using USPS dataset

In [None]:
#Uploading USPS dataset

with h5py.File('usps.h5', 'r') as hf:
        train = hf.get('train')
        X_train_usps = train.get('data')[:]
        Y_train_usps = train.get('target')[:]
        test = hf.get('test')
        X_test_usps = test.get('data')[:]
        Y_test_usps = test.get('target')[:]

#Process USPS images to be in correct format for model

# Pad images with zeros to resize them to 28x28
def pad_images(images):
    padded_images = []
    for image in images:
        # Reshape the image to (16, 16)
        image = image.reshape(16, 16)
        
        # Pad the image to (28, 28) with zeros
        padded_image = np.pad(image, ((6, 6), (6, 6)), mode='constant')
        padded_image = padded_image.reshape(28, 28, 1)
        padded_images.append(padded_image)
    return np.array(padded_images)

X_train_usps_p = pad_images(X_train_usps)
X_test_usps_p = pad_images(X_test_usps)

# reshape 4D array
X_train_usps_p = X_train_usps_p.reshape(-1, 28, 28, 1)
X_test_usps = X_test_usps_p.reshape(-1, 28, 28, 1)

# One-hot encode labels
enc = OneHotEncoder(sparse=False, categories='auto')
Y_train_usps = enc.fit_transform(Y_train_usps.reshape(-1, 1))
Y_test_usps = enc.transform(Y_test_usps.reshape(-1, 1))


In [None]:
# Predictions for USPS
results_usps = np.zeros( (X_test_usps_p.shape[0],10) ) 
results_usps = model.predict(X_test_usps_p)
results_usps = np.argmax(results_usps,axis = 1)
results_usps = pd.Series(results_usps,name="Label")

# Preview USPS predictions
plt.figure(figsize=(15,6))
for i in range(40):  
    plt.subplot(4, 10, i+1)
    plt.imshow(X_train_usps[i].reshape((28,28)),cmap=plt.cm.binary)
    plt.title("Prediction: %d" % results_usps[i],y=0.9)
    plt.axis('off')
plt.subplots_adjust(wspace=0.3, hspace=-0.1)
plt.show()

In [None]:
#Calc accuracy on USPS test data
Y_test_usps_labels = np.argmax(Y_test_usps, axis=1)
error_usps = 0
#show erroneous ones
for i in range(len(Y_test_usps_labels)):
    if (Y_test_usps_labels[i] != results_usps[i]):
        error_usps = error_usps + 1

print('Number of errors: ', error_usps)
print('Total classifications: ', len(Y_test_usps_labels))
print('Accuracy %: ', 1-(error_usps/len(Y_test_usps_labels)))
print('Error %: ', (error_usps/len(Y_test_usps_labels)))

In [None]:
#compute confusion matrix
cm = confusion_matrix(Y_test_usps_labels, results_usps)

#plot confusion matrix with seaborn

sns.heatmap(cm, annot=True, fmt='g', cmap='PuRd',
            xticklabels=np.arange(10), yticklabels=np.arange(10), vmin=0, vmax=25)
plt.xlabel('Predicted value',fontsize=13)
plt.ylabel('True value',fontsize=13)
plt.title('Confusion Matrix',fontsize=17)
plt.show()