In [1]:
import os
import numpy as np
from functions import *
from mlp import MLP
import pickle

#boolean parameter to decide if i want to save the model weights and biases to a pickle file
save_model=False

# Hyperparameters
batch_size = 200  # Batch size for mini-batch gradient descent
learning_rate = 0.1  # Learning rate for weight updates
epochs = 2  # Number of epochs to train
input_size = 784  # Input size (28x28 flattened images)
output_size = 10  # Number of output neurons (10 classes for MNIST)
hidden_layer_sizes = [128, 128 ]  # Number of neurons in hidden

# array to store train loss per epoch
train_losses = []

train_loader, test_loader = load_mnist(batch_size=batch_size)

# Initialize MLP model
mlp = MLP(learning_rate=learning_rate,input_size=input_size,output_size=output_size, hidden_layer_sizes=hidden_layer_sizes, activation_function='relu')

# Training loop
for epoch in range(epochs): #Iterate per epoch
    total_loss = 0  # Variable to store total loss for the epoch

    #iterate per batch
    for _, (batch_images, batch_labels) in enumerate(train_loader):
        batch_images = batch_images.view(batch_images.size(0), -1) # Flatten images to 1D vector (28x28 = 784)  
        batch_labels = np.eye(output_size)[batch_labels]  # One-hot encode the labels

        # Perform forward pass: Get model predictions
        predictions = mlp.forward(batch_images.numpy())

        # Compute loss (Mean Squared Error)
        loss = cross_entropy_loss(batch_labels, predictions)
        total_loss += loss

        # Compute accuracy for the batch
        preds = np.argmax(predictions, axis=1)
        
        # Perform backpropagation
        gradients_w, gradients_b = mlp.back_propagation(batch_images.numpy(), batch_labels)

        # Update weights and biases
        mlp.update_weights(gradients_w, gradients_b)
    # Compute average training loss and accuracy for the epoch
    avg_train_loss = total_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # Print loss after every epoch
    print(f"Epoch {epoch}, Loss: {avg_train_loss}")

#save the model weights and biases to a pickle file
if(save_model): 
    #define the model parameters to be saved
    model_parameters = {
    "weights": mlp.weights,
    "biases": mlp.biases,
    "input_size": mlp.input_size,
    "hidden_layer_sizes": mlp.hidden_layer_sizes,
    "output_size": mlp.output_size,
    "learning_rate": mlp.learning_rate
    }

# Define the directory where the model will be saved
    save_dir = "models_self_implementation"
    
    # Create the directory if it doesn't exist
    os.makedirs(save_dir, exist_ok=True)
    
    # Define the full path for the pickle file
    save_path = os.path.join(save_dir, "mlp_model.pkl")
    
    # Save the model parameters using pickle
    with open(save_path, "wb") as f:
        pickle.dump(model_parameters, f) 
    
    print(f"Model saved successfully in '{save_path}'")

  Referenced from: <CAF361F5-1CAC-3EBE-9FC4-4B823D275CAA> /opt/anaconda3/envs/computer_vision/lib/python3.8/site-packages/torchvision/image.so
  warn(


Epoch 0, Loss: 0.5406262880871573


KeyboardInterrupt: 

In [2]:
TRAIN=train_loader
TEST=test_loader

# Load Model
save_dir = "models_self_implementation"
model_filename = "mlp_model.pkl"
save_path = os.path.join(save_dir, model_filename)
if os.path.exists(save_path):
    with open(save_path, "rb") as f:
        model_parameters = pickle.load(f)

# recreate mlp 
mlp_imp = MLP(
    input_size=model_parameters["input_size"],
    hidden_layer_sizes=model_parameters["hidden_layer_sizes"],
    output_size=model_parameters["output_size"],
    learning_rate=model_parameters["learning_rate"]
)

# Assign saved weights and biases
mlp_imp.weights = model_parameters["weights"]
mlp_imp.biases = model_parameters["biases"]

# Evaluate model
all_accuracies = []
counter=0
for batch_images, batch_labels in TEST:
    counter+=1
    batch_images = batch_images.view(batch_images.shape[0], -1).numpy()
    batch_labels = batch_labels.numpy()
    acc = mlp_imp.evaluate(batch_images, batch_labels)
    all_accuracies.append(acc)
print(counter)
final_accuracy = np.mean(all_accuracies)
print(" Accuracy:", round(final_accuracy*100,2), "%")

50
 Accuracy: 97.4 %


In [3]:
parameters=mlp_imp.calculate_parameters()
print(parameters)

118282
