#### INM702 Programming and Mathematics for Artificial Intelligence Coursework

**Importing Libraries** 

In [None]:
# Importing libraries

import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist

**Loading and Preprocessing the MNIST Dataset**

In [None]:
# Loading MNIST the dataset and preprocessing it (normalize pixel values, reshape, etc.)

# Defining function
def load_mnist_and_print():
    
    # Loading the MNIST dataset
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    # Splitting the dataset into training and testing sets, normalising and reshaping the data
    X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
    X_test = X_test.reshape(X_test.shape[0], -1) / 255.0


    # Printing a small part of the dataset
    print("X_train sample shape:", X_train.shape)
    print("X_train sample:", X_train[0])
    print("y_train sample:", y_train[0])
    print("X_test sample shape:", X_test.shape)
    print("X_test sample:", X_test[0])
    print("y_test sample:", y_test[0])

# Calling the function
load_mnist_and_print()
    

In [None]:
# The output indicates:

# X_train sample shape: This shows that we have 60,000 samples in the training set, each flattened into a vector of 784 elements 
#(as the original 28x28 pixel images are flattened).

# X_train sample: This is the first image in the training set, represented as an 1D array of normalised pixel values (ranging from 0 to 1).

# y_train sample: This indicates the label of the first image in the training dataset, which is "5" in this case.

# X_test sample shape: This shows the shape of the test dataset with 10,000 samples, also flattened into vectors of 784 elements.

# X_test sample: This represents the first image in the test set, similar to the training set.

# y_test sample: The label for the first image in the test set, which is "7".

In [None]:
# Defining a function to plot a single image from the dataset:

def plot_image(data, index):
    plt.figure(figsize=(4, 4))
    # Using the original X_train for visualisation
    plt.imshow(data[index], cmap='gray')
    plt.title(f"Label: {y_train[index]}")
    plt.show()

# Visualising the image at a specific index
plot_image(X_train, 0)  # This is index 0, we can see other images by changing the index

In [None]:
# Defining a function to plot multiple images from the dataset:

def plot_images(data, labels, start_index, num_images):
    plt.figure(figsize=(8,8)) # Creating a figure with a specified size (8x8 in this case)
    # Looping through the specified number of images
    for i in range(num_images):
        # Creating a subplot for each image
        plt.subplot(5, 5, i+1)  # Defining a 5x5 grid size and placing each image in sequence
        plt.xticks([]) # Removing the x-axis tick marks
        plt.yticks([]) # Removing the y-axis tick marks
        # plt.grid(False) # This line doesn't have a functional impact on the output as images are being displayed without a grid,
        # but usually it is used to toggle the visibility of the grid within a plot
        plt.imshow(data[start_index + i], cmap=plt.cm.binary)  # Displaying the image at the given index in grayscale
        plt.xlabel(f"Label: {labels[start_index + i]}") # Adding a label below the image showing its corresponding label from the dataset
    plt.show() # Displaying the entire set of subplots

# Plotting the first 25 images from the dataset
plot_images(X_train, y_train, 0, 25)

In [None]:
# Neural Network Class - Implement a neural network class with:
# Sigmoid and ReLU activation functions
# Softmax layer for classification
# Dropout for regularization
# Configurable architecture and parameters

# Forward and Backward Pass:
# Implement the forward pass to compute predictions
# Implement the backward pass to compute gradients using backpropagation.

**Implementing sigmoid and ReLU layers**

In [None]:
# For this sub-task, you should implement forward and backward pass for sigmoid and ReLU
# You should consider presenting these activation functions in the report with any pros cons if they have

**Implementing softmax layer**

In [None]:
# Implement softmax with both forward and backward pass
# Present the softmax in the report along with any numerical issues when calculating the softmax function

**Implementing dropout**

In [None]:
# Present dropout in the report
# Implement inverted dropout
# Forward and backward pass should be implemented
# Note: Since the test performance is critical, it is also preferable to leaving the forward pass unchanged at test time
# Therefore, in most implementations inverted dropout is employed to overcome the undesirable property of the original dropout

# Loss Function:
# Use a suitable loss function (e.g., cross-entropy) for classification tasks

**Implementing a fully parametrizable neural network class**

In [None]:
# Implement a fully-connected NN class where with number of hidden layers, units, activation functions can be changed 
# In addition, add dropout or regularizer (L1 or L2)
# Report the parameters used (update rule, learning rate, decay, epochs, batch size) and include the plotsin your report

**Implementing optimizer**

In [None]:
# Implement any two optimizers of your choice
# Briefly present the optimizers in the report
# The optimizers can be flavours of gradient descent. For instance: Stochastic gradient descent (SGD) and SGD with momentum
# SGD and mini-batch gradient descent, etc


# Optimizer:
# Implement an optimizer (e.g., SGD, Adam) to update weights based on gradients

# Training Loop:
# Train your neural network using the training set
# Monitor performance on the validation set

**Evaluating different neural network architectures/parameters, presenting and discussing the results**

In [None]:
# Be creative in the analysis and discussion.
# Evaluate different hyperparameters.
# For instance: different network architectures, activation functions, comparison of optimizers, L1/L2 performance comparison with dropout, etc. 
# Support your results with plots/graph and discussion


# Evaluation:
# Evaluate different architectures and parameters:
# Vary the number of layers, neurons per layer, learning rate, dropout rates, etc
# Use a validation set to choose the best-performing model

# Testing:
# Assess the final model on the test set to estimate its generalization performance

# Analysis:
# Analyze the results, understand the impact of different parameters on performance, and draw conclusions

# Documentation:
# Keep track of configurations, results, and any observations during the experimentation

In [None]:
# Implement any two optimizers of your choice
# Briefly present the optimizers in the report
# The optimizers can be flavours of gradient descent. For instance: Stochastic gradient descent (SGD) and SGD with momentum
# SGD and mini-batch gradient descent, etc


# Optimizer:
# Implement an optimizer (e.g., SGD, Adam) to update weights based on gradients

# Training Loop:
# Train your neural network using the training set
# Monitor performance on the validation set

**Evaluating different neural network architectures/parameters, presenting and discussing the results**

In [None]:
# Be creative in the analysis and discussion.
# Evaluate different hyperparameters.
# For instance: different network architectures, activation functions, comparison of optimizers, L1/L2 performance comparison with dropout, etc. 
# Support your results with plots/graph and discussion


# Evaluation:
# Evaluate different architectures and parameters:
# Vary the number of layers, neurons per layer, learning rate, dropout rates, etc
# Use a validation set to choose the best-performing model

# Testing:
# Assess the final model on the test set to estimate its generalization performance

# Analysis:
# Analyze the results, understand the impact of different parameters on performance, and draw conclusions

# Documentation:
# Keep track of configurations, results, and any observations during the experimentation