In [2]:
import numpy as np 
import struct
from array import array
import os
import random
import matplotlib.pyplot as plt
from mlp import MultilayerPerceptron, Layer, CrossEntropy, Relu, Softmax

# -------------------- MNIST Data Loader Class -------------------- #

class MnistDataloader(object):
    """
    A class to load and preprocess the MNIST dataset from IDX files.
    """
    def __init__(self, training_images_filepath, training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
    
    def read_images_labels(self, images_filepath, labels_filepath):        
        """
        Reads image and label data from IDX files.
        """
        # Read labels
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))  # Read magic number and size
            if magic != 2049:
                raise ValueError(f'Magic number mismatch, expected 2049, got {magic}')
            labels = array("B", file.read())        
        
        # Read images
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))  # Read metadata
            if magic != 2051:
                raise ValueError(f'Magic number mismatch, expected 2051, got {magic}')
            image_data = array("B", file.read())  # Read pixel data        
        
        # Reshape image data into (size, 28, 28)
        images = [np.array(image_data[i * rows * cols:(i + 1) * rows * cols]).reshape(28, 28) for i in range(size)]
        
        return images, labels
            
    def load_data(self):
        """
        Loads training and test datasets.
        """
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train), (x_test, y_test)  

# -------------------- Reading Dataset via MnistDataloader class -------------------- #

data_folder = 'MNIST-data/'  # Dataset directory

# File paths for MNIST dataset
training_images_filepath = os.path.join(data_folder, 'train-images.idx3-ubyte')
training_labels_filepath = os.path.join(data_folder, 'train-labels.idx1-ubyte')
test_images_filepath = os.path.join(data_folder, 't10k-images.idx3-ubyte')
test_labels_filepath = os.path.join(data_folder, 't10k-labels.idx1-ubyte')

# Loading MNIST dataset
mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
(x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()

# Converting lists to NumPy arrays and normalize pixel values
x_train = np.array(x_train).reshape(-1, 784) / 255.0  # Normalize & flatten
x_test = np.array(x_test).reshape(-1, 784) / 255.0
y_train = np.array(y_train)
y_test = np.array(y_test)

# Manual Train-Validation Split (80% train, 20% validation)
split_idx = int(len(x_train) * 0.8)
x_train, x_val = x_train[:split_idx], x_train[split_idx:]
y_train, y_val = y_train[:split_idx], y_train[split_idx:]

# Function to one-hot encode labels
def one_hot_encode(labels, num_classes=10):
    one_hot = np.zeros((len(labels), num_classes))
    for i, label in enumerate(labels):
        one_hot[i][label] = 1
    return one_hot

# Converting labels to one-hot encoding
y_train = one_hot_encode(y_train)
y_val = one_hot_encode(y_val)
y_test = one_hot_encode(y_test)

# Function to display sample images
def show_images(images, title_texts):
    cols = 5
    rows = int(len(images) / cols) + 1
    plt.figure(figsize=(30, 20))
    index = 1
    for img, title in zip(images, title_texts):
        plt.subplot(rows, cols, index)
        plt.imshow(img, cmap=plt.cm.gray)
        plt.title(title, fontsize=15)
        index += 1

# -------------------- MLP Architecture for MNIST Dataset -------------------- #

# Define different learning rates to test
learning_rates = [0.0001, 0.0005, 0.001, 0.005, 0.01]

# Fixed layer architecture for comparison
fixed_layers = [
    Layer(784, 128, Relu(), dropout_rate=0.2),
    Layer(128, 128, Relu(), dropout_rate=0.2),
    Layer(128, 10, Softmax())
]

# Store results of test accuracies
results = {}

# Loop through different learning rates
for idx, lr in enumerate(learning_rates):
    print(f"\nTraining with Learning Rate: {lr}")

    # Define the MLP model with fixed layers
    mlp = MultilayerPerceptron(fixed_layers)

    # Define loss function
    loss_function = CrossEntropy()

    # Train the model
    train_losses, val_losses = mlp.train(
        x_train, y_train, x_val, y_val,
        loss_function, learning_rate=lr,
        batch_size=64, epochs=50, optimizer='vanilla', momentum=0.7
    )

    # Evaluating model on test set
    y_pred = mlp.forward(x_test)
    test_accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1))

    # Store the result
    results[idx] = {
        "learning_rate": lr,
        "test_accuracy": test_accuracy
    }

    print(f"Learning Rate {lr}: Test Accuracy: {test_accuracy * 100:.2f}%")

# Find the best learning rate based on test accuracy
best_lr_idx = max(results, key=lambda x: results[x]["test_accuracy"])
best_learning_rate = results[best_lr_idx]

print("\nBest Learning Rate Configuration:")
print(f"Learning Rate: {best_learning_rate['learning_rate']}")
print(f"Test Accuracy: {best_learning_rate['test_accuracy'] * 100:.2f}%")


Training with Learning Rate: 0.0001
Epoch 1/50 - Train Loss: 0.9361, Val Loss: 0.4699
Epoch 2/50 - Train Loss: 0.4278, Val Loss: 0.3471
Epoch 3/50 - Train Loss: 0.3444, Val Loss: 0.2991
Epoch 4/50 - Train Loss: 0.2950, Val Loss: 0.2621
Epoch 5/50 - Train Loss: 0.2658, Val Loss: 0.2422
Epoch 6/50 - Train Loss: 0.2439, Val Loss: 0.2223
Epoch 7/50 - Train Loss: 0.2213, Val Loss: 0.2098
Epoch 8/50 - Train Loss: 0.2049, Val Loss: 0.1991
Epoch 9/50 - Train Loss: 0.1909, Val Loss: 0.1874
Epoch 10/50 - Train Loss: 0.1787, Val Loss: 0.1811
Epoch 11/50 - Train Loss: 0.1704, Val Loss: 0.1704
Epoch 12/50 - Train Loss: 0.1600, Val Loss: 0.1681
Epoch 13/50 - Train Loss: 0.1526, Val Loss: 0.1610
Epoch 14/50 - Train Loss: 0.1462, Val Loss: 0.1554
Epoch 15/50 - Train Loss: 0.1402, Val Loss: 0.1484
Epoch 16/50 - Train Loss: 0.1322, Val Loss: 0.1470
Epoch 17/50 - Train Loss: 0.1274, Val Loss: 0.1406
Epoch 18/50 - Train Loss: 0.1235, Val Loss: 0.1388
Epoch 19/50 - Train Loss: 0.1197, Val Loss: 0.1367
Epo