In [2]:
import numpy as np 
import struct
from array import array
import os
import random
import matplotlib.pyplot as plt
from mlp import MultilayerPerceptron, Layer, CrossEntropy, Relu, Softmax

# -------------------- MNIST Data Loader Class -------------------- #

class MnistDataloader(object):
    """
    A class to load and preprocess the MNIST dataset from IDX files.
    """
    def __init__(self, training_images_filepath, training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
    
    def read_images_labels(self, images_filepath, labels_filepath):        
        """
        Reads image and label data from IDX files.
        """
        # Read labels
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))  # Read magic number and size
            if magic != 2049:
                raise ValueError(f'Magic number mismatch, expected 2049, got {magic}')
            labels = array("B", file.read())        
        
        # Read images
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))  # Read metadata
            if magic != 2051:
                raise ValueError(f'Magic number mismatch, expected 2051, got {magic}')
            image_data = array("B", file.read())  # Read pixel data        
        
        # Reshape image data into (size, 28, 28)
        images = [np.array(image_data[i * rows * cols:(i + 1) * rows * cols]).reshape(28, 28) for i in range(size)]
        
        return images, labels
            
    def load_data(self):
        """
        Loads training and test datasets.
        """
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train), (x_test, y_test)  

# -------------------- Reading Dataset via MnistDataloader class -------------------- #

data_folder = 'MNIST-data/'  # Dataset directory

# File paths for MNIST dataset
training_images_filepath = os.path.join(data_folder, 'train-images.idx3-ubyte')
training_labels_filepath = os.path.join(data_folder, 'train-labels.idx1-ubyte')
test_images_filepath = os.path.join(data_folder, 't10k-images.idx3-ubyte')
test_labels_filepath = os.path.join(data_folder, 't10k-labels.idx1-ubyte')

# Loading MNIST dataset
mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
(x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()

# Converting lists to NumPy arrays and normalize pixel values
x_train = np.array(x_train).reshape(-1, 784) / 255.0  # Normalize & flatten
x_test = np.array(x_test).reshape(-1, 784) / 255.0
y_train = np.array(y_train)
y_test = np.array(y_test)

# Manual Train-Validation Split (80% train, 20% validation)
split_idx = int(len(x_train) * 0.8)
x_train, x_val = x_train[:split_idx], x_train[split_idx:]
y_train, y_val = y_train[:split_idx], y_train[split_idx:]

# Function to one-hot encode labels
def one_hot_encode(labels, num_classes=10):
    one_hot = np.zeros((len(labels), num_classes))
    for i, label in enumerate(labels):
        one_hot[i][label] = 1
    return one_hot

# Converting labels to one-hot encoding
y_train = one_hot_encode(y_train)
y_val = one_hot_encode(y_val)
y_test = one_hot_encode(y_test)

# Function to display sample images
def show_images(images, title_texts):
    cols = 5
    rows = int(len(images) / cols) + 1
    plt.figure(figsize=(30, 20))
    index = 1
    for img, title in zip(images, title_texts):
        plt.subplot(rows, cols, index)
        plt.imshow(img, cmap=plt.cm.gray)
        plt.title(title, fontsize=15)
        index += 1

# -------------------- MLP Architecture for MNIST Dataset -------------------- #

# Define different epoch values to test
epoch_values = [10, 20, 50, 100, 200]

# Fixed layer architecture for comparison
fixed_layers = [
    Layer(784, 128, Relu(), dropout_rate=0.0),
    Layer(128, 128, Relu(), dropout_rate=0.0),
    Layer(128, 10, Softmax())
]

# Store results of test accuracies
results = {}

# Loop through different epoch values
for idx, epochs in enumerate(epoch_values):
    print(f"\nTraining with {epochs} Epochs")

    # Define the MLP model with fixed layers
    mlp = MultilayerPerceptron(fixed_layers)

    # Define loss function
    loss_function = CrossEntropy()

    # Train the model
    train_losses, val_losses = mlp.train(
        x_train, y_train, x_val, y_val,
        loss_function, learning_rate=0.001,  # Keeping learning rate constant
        batch_size=64, epochs=epochs, optimizer='vanilla', momentum=0.7  # Keeping momentum constant
    )

    # Evaluating model on test set
    y_pred = mlp.forward(x_test)
    test_accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1))

    # Store the result
    results[idx] = {
        "epochs": epochs,
        "test_accuracy": test_accuracy
    }

    print(f"Epochs {epochs}: Test Accuracy: {test_accuracy * 100:.2f}%")

# Find the best epoch value based on test accuracy
best_epoch_idx = max(results, key=lambda x: results[x]["test_accuracy"])
best_epoch = results[best_epoch_idx]

print("\nBest Epoch Configuration:")
print(f"Epochs: {best_epoch['epochs']}")
print(f"Test Accuracy: {best_epoch['test_accuracy'] * 100:.2f}%")




Training with 10 Epochs
Epoch 1/10 - Train Loss: 0.3045, Val Loss: 0.1739
Epoch 2/10 - Train Loss: 0.1273, Val Loss: 0.1185
Epoch 3/10 - Train Loss: 0.0887, Val Loss: 0.1203
Epoch 4/10 - Train Loss: 0.0665, Val Loss: 0.1027
Epoch 5/10 - Train Loss: 0.0515, Val Loss: 0.0925
Epoch 6/10 - Train Loss: 0.0402, Val Loss: 0.0900
Epoch 7/10 - Train Loss: 0.0317, Val Loss: 0.0988
Epoch 8/10 - Train Loss: 0.0263, Val Loss: 0.1004
Epoch 9/10 - Train Loss: 0.0196, Val Loss: 0.0918
Epoch 10/10 - Train Loss: 0.0161, Val Loss: 0.1014
Epochs 10: Test Accuracy: 97.52%

Training with 20 Epochs
Epoch 1/20 - Train Loss: 0.0130, Val Loss: 0.0910
Epoch 2/20 - Train Loss: 0.0086, Val Loss: 0.0861
Epoch 3/20 - Train Loss: 0.0064, Val Loss: 0.0873
Epoch 4/20 - Train Loss: 0.0038, Val Loss: 0.0878
Epoch 5/20 - Train Loss: 0.0022, Val Loss: 0.0891
Epoch 6/20 - Train Loss: 0.0015, Val Loss: 0.0900
Epoch 7/20 - Train Loss: 0.0012, Val Loss: 0.0928
Epoch 8/20 - Train Loss: 0.0010, Val Loss: 0.0930
Epoch 9/20 - Tra