In [3]:
import numpy as np
from functions import *
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from mlp import MLP
import pickle

#boolean parameter to decide if i want to save the model weights and biases to a pickle file
save_model=True

# Hyperparameters
batch_size = 240  # Batch size for mini-batch gradient descent
learning_rate = 0.13  # Learning rate for weight updates
epochs = 20  # Number of epochs to train
input_size = 784  # Input size (28x28 flattened images)
output_size = 10  # Number of output neurons (10 classes for MNIST)

# Load MNIST dataset and apply transformations (convert to tensor, normalize)
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL image to tensor
    transforms.Normalize((0.5,), (0.5,))  # Optional: normalize the data
])

train_dataset = datasets.MNIST(
    root='./data', 
    train=True, 
    transform=transform, 
    download=True
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize MLP model
mlp = MLP(learning_rate=learning_rate)
j=0
# Training loop
for epoch in range(epochs):
    total_loss = 0  # Variable to store total loss for the epoch
    
    for batch_idx, (batch_images, batch_labels) in enumerate(train_loader):
        batch_images = batch_images.view(batch_size, -1)  # Flatten images to 1D vector (28x28 = 784)
        batch_labels = np.eye(output_size)[batch_labels]  # One-hot encode the labels

        # Perform forward pass: Get model predictions
        predictions = mlp.forward(batch_images.numpy())

        # Compute loss (Mean Squared Error)
        loss = cross_entropy_loss(batch_labels, predictions)
        total_loss += loss
        
        # Perform backpropagation
        gradients_w, gradients_b = mlp.back_propagation(batch_images.numpy(), batch_labels)

        # Update weights and biases
        mlp.update_weights(gradients_w, gradients_b)

    # Print loss after every epoch
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader)}")

#save the model weights and biases to a pickle file
if(save_model): 
    model_parameters = {
    "weights": mlp.weights,
    "biases": mlp.biases,
    "input_size": mlp.input_size,
    "hidden_layer_sizes": mlp.hidden_layer_sizes,
    "output_size": mlp.output_size,
    "learning_rate": mlp.learning_rate
}

with open("mlp_model.pkl", "wb") as f:
    pickle.dump(model_parameters, f)

print("Model saved successfully!")

Epoch 1/20, Loss: 2.3004975194180894
Epoch 2/20, Loss: 1.633078737325429
Epoch 3/20, Loss: 0.7761643164429516
Epoch 4/20, Loss: 0.4515405988337476
Epoch 5/20, Loss: 0.3656242952618376
Epoch 6/20, Loss: 0.40921484882612413
Epoch 7/20, Loss: 0.30383835069896387
Epoch 8/20, Loss: 0.28153205595546393
Epoch 9/20, Loss: 0.2546363619474503
Epoch 10/20, Loss: 0.24497405802247454
Epoch 11/20, Loss: 0.23278668916116232
Epoch 12/20, Loss: 0.22752159506571876
Epoch 13/20, Loss: 0.21269900686305265
Epoch 14/20, Loss: 0.2086051564184576
Epoch 15/20, Loss: 0.19765534169146967
Epoch 16/20, Loss: 0.1934451556674699
Epoch 17/20, Loss: 0.1965139794985459
Epoch 18/20, Loss: 0.22345550313905008
Epoch 19/20, Loss: 0.20712893996016848
Epoch 20/20, Loss: 0.1937831592786381
Model saved successfully!


In [2]:
# Load the model parameters
with open("mlp_model.pkl", "rb") as f:
    model_parameters = pickle.load(f)

# Recreate the MLP with the same structure
mlp_eval = MLP(
    input_size=model_parameters["input_size"],
    hidden_layer_sizes=model_parameters["hidden_layer_sizes"],
    output_size=model_parameters["output_size"],
    learning_rate=model_parameters["learning_rate"]
)

# Assign saved weights and biases
mlp_eval.weights = model_parameters["weights"]
mlp_eval.biases = model_parameters["biases"]

# Load MNIST test dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Evaluate model
all_accuracies = []
for batch_images, batch_labels in test_loader:
    batch_images = batch_images.view(batch_images.shape[0], -1).numpy()
    batch_labels = batch_labels.numpy()
    acc = mlp_eval.evaluate(batch_images, batch_labels)
    all_accuracies.append(acc)

final_accuracy = np.mean(all_accuracies)
print("Test Accuracy:", round(final_accuracy*100,2), "%")

Test Accuracy: 94.27 %
