In [10]:
import os
import numpy as np
from functions import *
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from mlp import MLP
import pickle

#boolean parameter to decide if i want to save the model weights and biases to a pickle file
save_model=True

# Hyperparameters
batch_size = 240  # Batch size for mini-batch gradient descent
learning_rate = 0.1  # Learning rate for weight updates
epochs = 2  # Number of epochs to train
input_size = 784  # Input size (28x28 flattened images)
output_size = 10  # Number of output neurons (10 classes for MNIST)

# array to store train loss per epoch
train_losses = []

# Load MNIST dataset and apply transformations (convert to tensor, normalize)
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL image to tensor
    transforms.Normalize((0.5,), (0.5,))  # Optional: normalize the data
])

train_dataset = datasets.MNIST(
    root='./data', 
    train=True, 
    transform=transform, 
    download=True
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize MLP model
mlp = MLP(learning_rate=learning_rate,activation_function='relu')

# Training loop
for epoch in range(epochs):
    total_loss = 0  # Variable to store total loss for the epoch

    for batch_idx, (batch_images, batch_labels) in enumerate(train_loader):
        batch_images = batch_images.view(batch_size, -1)  # Flatten images to 1D vector (28x28 = 784)
        batch_labels = np.eye(output_size)[batch_labels]  # One-hot encode the labels

        # Perform forward pass: Get model predictions
        predictions = mlp.forward(batch_images.numpy())

        # Compute loss (Mean Squared Error)
        loss = cross_entropy_loss(batch_labels, predictions)
        total_loss += loss

        # Compute accuracy for the batch
        preds = np.argmax(predictions, axis=1)
        
        # Perform backpropagation
        gradients_w, gradients_b = mlp.back_propagation(batch_images.numpy(), batch_labels)

        # Update weights and biases
        mlp.update_weights(gradients_w, gradients_b)

    # Compute average training loss and accuracy for the epoch
    avg_train_loss = total_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # Print loss after every epoch
    print(f"Epoch {epoch}, Loss: {avg_train_loss}")

print(train_losses)

#save the model weights and biases to a pickle file
if(save_model): 
    #define the model parameters to be saved
    model_parameters = {
    "weights": mlp.weights,
    "biases": mlp.biases,
    "input_size": mlp.input_size,
    "hidden_layer_sizes": mlp.hidden_layer_sizes,
    "output_size": mlp.output_size,
    "learning_rate": mlp.learning_rate
    }

# Define the directory where the model will be saved
    save_dir = "models_self_implementation"
    
    # Create the directory if it doesn't exist
    os.makedirs(save_dir, exist_ok=True)
    
    # Define the full path for the pickle file
    save_path = os.path.join(save_dir, "mlp_model.pkl")
    
    # Save the model parameters using pickle
    with open(save_path, "wb") as f:
        pickle.dump(model_parameters, f) 
    
    print(f"Model saved successfully in '{save_path}'")

Epoch 0, Loss: 2.299649317812557
Epoch 1, Loss: 1.7599660963725636
[2.299649317812557, 1.7599660963725636]
Model saved successfully in 'models_self_implementation/mlp_model.pkl'


In [12]:
# Load Model
save_dir = "models_self_implementation"
model_filename = "mlp_model.pkl"
save_path = os.path.join(save_dir, model_filename)
if os.path.exists(save_path):
    with open(save_path, "rb") as f:
        model_parameters = pickle.load(f)

# Recreate the MLP with the same structure
mlp_imp = MLP(
    input_size=model_parameters["input_size"],
    hidden_layer_sizes=model_parameters["hidden_layer_sizes"],
    output_size=model_parameters["output_size"],
    learning_rate=model_parameters["learning_rate"]
)

# Assign saved weights and biases
mlp_imp.weights = model_parameters["weights"]
mlp_imp.biases = model_parameters["biases"]

# Load MNIST test dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Evaluate model
all_accuracies = []
for batch_images, batch_labels in test_loader:
    batch_images = batch_images.view(batch_images.shape[0], -1).numpy()
    batch_labels = batch_labels.numpy()
    acc = mlp_imp.evaluate(batch_images, batch_labels)
    all_accuracies.append(acc)

print(all_accuracies)
final_accuracy = np.mean(all_accuracies)
print("Test Accuracy:", round(final_accuracy*100,2), "%")

[0.431, 0.421, 0.412, 0.435, 0.442, 0.49, 0.469, 0.474, 0.5, 0.494]
Test Accuracy: 45.68 %


In [3]:
# Load Model
save_dir = "models_self_implementation"
model_filename = "mlp_model.pkl"
save_path = os.path.join(save_dir, model_filename)
if os.path.exists(save_path):
    with open(save_path, "rb") as f:
        model_parameters = pickle.load(f)

# Recreate the MLP with the same structure
mlp_imp = MLP(
    input_size=model_parameters["input_size"],
    hidden_layer_sizes=model_parameters["hidden_layer_sizes"],
    output_size=model_parameters["output_size"],
    learning_rate=model_parameters["learning_rate"]
)

# Assign saved weights and biases
mlp_imp.weights = model_parameters["weights"]
mlp_imp.biases = model_parameters["biases"]

# Load MNIST test dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Specify how many images you want to test (0 to n)
n = 10  # Change this value as needed

# Loop over the specified range of indices
for idx in range(n):
    # Get the image and label from the dataset
    image, label = test_dataset[idx]
    
    # Prepare the image for the MLP: flatten and convert to numpy
    image_flat = image.view(-1).numpy().reshape(1, -1)
    
    # Run forward pass through the network
    output = mlp_imp.forward(image_flat)
    
    # Get predicted label
    predicted_label = np.argmax(output, axis=1)[0]
    
    # Print the result
    print(f"Index: {idx}, Predicted Label: {predicted_label}, Actual Label: {label}")

Index: 0, Predicted Label: 7, Actual Label: 7
Index: 1, Predicted Label: 2, Actual Label: 2
Index: 2, Predicted Label: 1, Actual Label: 1
Index: 3, Predicted Label: 0, Actual Label: 0
Index: 4, Predicted Label: 9, Actual Label: 4
Index: 5, Predicted Label: 1, Actual Label: 1
Index: 6, Predicted Label: 9, Actual Label: 4
Index: 7, Predicted Label: 4, Actual Label: 9
Index: 8, Predicted Label: 8, Actual Label: 5
Index: 9, Predicted Label: 7, Actual Label: 9
