In [28]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from mlp import Layer, MultilayerPerceptron, SquaredError, CrossEntropy, Relu, Tanh, Sigmoid, Softplus, Mish, Linear
from sklearn.metrics import r2_score
import itertools

# -------------------- Loading MPG Dataset -------------------- #
class MPGDataLoader:
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path

    def load_data(self):
        columns = ["mpg", "cylinders", "displacement", "horsepower", "weight",
                   "acceleration", "model year", "origin", "car name"]
        df = pd.read_csv(self.dataset_path, sep='\s+', names=columns, na_values="?")
        df.dropna(inplace=True)
        df.drop(columns=["car name"], inplace=True, errors='ignore')
        df["horsepower"] = pd.to_numeric(df["horsepower"], errors='coerce')
        df.dropna(inplace=True)

        y = df["mpg"].values.reshape(-1, 1)
        X = df.drop(columns=["mpg"]).values
        return X, y

# Load dataset
dataset_path = 'mpg-data/auto-mpg.data'
dataloader = MPGDataLoader(dataset_path)
X, y = dataloader.load_data()

# -------------------- Data Preprocessing -------------------- #
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, shuffle=True)

scaler_x = StandardScaler()
scaler_y = StandardScaler()

X_train = scaler_x.fit_transform(X_train)
X_val = scaler_x.transform(X_val)
X_test = scaler_x.transform(X_test)

y_train = scaler_y.fit_transform(y_train)
y_val = scaler_y.transform(y_val)
y_test = scaler_y.transform(y_test)

# -------------------- Hyperparameter Tuning: Single Hidden Layer Activation, Optimizer, and Loss -------------------- #
input_dim = X_train.shape[1]

# Define different activation functions for the hidden layer
activation_functions = [Relu(), Sigmoid(), Tanh(), Mish(), Softplus()]

# Define different optimizers
optimizers = ['vanilla', 'rmsprop']

# Define different loss functions
loss_functions = [SquaredError()]

# Store results
results = {}

# Generate all possible combinations of (activation function, optimizer, loss function)
combinations = list(itertools.product(activation_functions, optimizers, loss_functions))

# Iterate through each combination
for idx, (activation, optimizer, loss_fn) in enumerate(combinations):
    print(f"\nTesting Configuration {idx+1}: Activation: {activation.__class__.__name__}, Optimizer: {optimizer}, Loss: {loss_fn.__class__.__name__}")

    layers = [
        Layer(input_dim, 64, activation),
        Layer(64, 1, Linear())  # Linear activation for regression
    ]

    mlp = MultilayerPerceptron(layers)

    # Train the model
    train_losses, val_losses = mlp.train(
        X_train, y_train, X_val, y_val,
        loss_fn, learning_rate=0.01, batch_size=32, epochs=150, optimizer=optimizer, momentum=0.9
    )

    # Evaluate on test set
    y_pred = mlp.forward(X_test, training=False)
    y_pred = scaler_y.inverse_transform(y_pred.reshape(-1, 1))
    y_test_original = scaler_y.inverse_transform(y_test)

    # Compute R² Score
    r2 = r2_score(y_test_original, y_pred)

    # Store results
    results[idx] = {
        "activation": activation.__class__.__name__,
        "optimizer": optimizer,
        "loss_function": loss_fn.__class__.__name__,
        "r2_score": round(r2 * 100, 2)
    }

    print(f"R² Score: {results[idx]['r2_score']}%")

# Identify Best Configuration
best_config_idx = max(results, key=lambda x: results[x]["r2_score"])
best_config = results[best_config_idx]

# -------------------- Summary of Results -------------------- #
print("\n===== Summary of Runs =====")
for idx, res in results.items():
    print(f"Input and Hidden Layers: {res['activation']}, Optimizer: {res['optimizer']}, Loss: {res['loss_function']}, R²: {res['r2_score']}%")

print(f"\nBest Configuration: Input and Hidden Layers: {best_config['activation']}, Optimizer: {best_config['optimizer']}, Loss: {best_config['loss_function']}, with R² = {best_config['r2_score']}%")



Testing Configuration 1: Activation: Relu, Optimizer: vanilla, Loss: SquaredError
Epoch 1/150 - Train Loss: 0.4765, Val Loss: 0.3683
Epoch 2/150 - Train Loss: 0.2563, Val Loss: 0.1620
Epoch 3/150 - Train Loss: 0.2013, Val Loss: 0.1267
Epoch 4/150 - Train Loss: 0.1537, Val Loss: 0.1034
Epoch 5/150 - Train Loss: 0.1297, Val Loss: 0.1000
Epoch 6/150 - Train Loss: 0.1359, Val Loss: 0.1008
Epoch 7/150 - Train Loss: 0.1268, Val Loss: 0.1015
Epoch 8/150 - Train Loss: 0.1145, Val Loss: 0.1033
Epoch 9/150 - Train Loss: 0.1162, Val Loss: 0.1040
Epoch 10/150 - Train Loss: 0.1129, Val Loss: 0.1015
Epoch 11/150 - Train Loss: 0.1044, Val Loss: 0.0988
Epoch 12/150 - Train Loss: 0.1066, Val Loss: 0.1008
Epoch 13/150 - Train Loss: 0.1050, Val Loss: 0.0983
Epoch 14/150 - Train Loss: 0.1061, Val Loss: 0.0998
Epoch 15/150 - Train Loss: 0.1034, Val Loss: 0.1003
Epoch 16/150 - Train Loss: 0.1049, Val Loss: 0.1011
Epoch 17/150 - Train Loss: 0.0972, Val Loss: 0.1020
Epoch 18/150 - Train Loss: 0.1040, Val Los