In [None]:
#Multilayer Perceptron and Hyper-parameter Tuning

In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
from itertools import product
import time

# Load and preprocess the MNIST dataset
mnist = fetch_openml('mnist_784', version=1, parser='auto')  # Load dataset
X = mnist.data.astype(np.float32) / 255.0  # Normalize pixel values to [0, 1]
y = OneHotEncoder(sparse_output=False).fit_transform(mnist.target.to_numpy().reshape(-1, 1))  # One-hot encode target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Multilayer Perceptron Class
class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate):
        np.random.seed(42)
        self.weights_input_hidden = np.random.randn(input_size, hidden_size) * 0.01
        self.bias_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.randn(hidden_size, output_size) * 0.01
        self.bias_output = np.zeros((1, output_size))
        self.learning_rate = learning_rate

    def forward(self, X):
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = sigmoid(self.hidden_input)
        self.final_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.final_output = sigmoid(self.final_input)
        return self.final_output

    def backward(self, X, y, output):
        error = y - output
        d_output = error * sigmoid_derivative(output)
        
        error_hidden = d_output.dot(self.weights_hidden_output.T)
        d_hidden = error_hidden * sigmoid_derivative(self.hidden_output)

        # Update weights and biases
        self.weights_hidden_output += self.hidden_output.T.dot(d_output) * self.learning_rate
        self.bias_output += np.sum(d_output, axis=0, keepdims=True) * self.learning_rate
        self.weights_input_hidden += X.T.dot(d_hidden) * self.learning_rate
        self.bias_hidden += np.sum(d_hidden, axis=0, keepdims=True) * self.learning_rate

    def train(self, X, y, epochs, batch_size):
        num_batches = X.shape[0] // batch_size
        for epoch in range(epochs):
            for batch in range(num_batches):
                start = batch * batch_size
                end = start + batch_size
                X_batch = X[start:end]
                y_batch = y[start:end]

                output = self.forward(X_batch)
                self.backward(X_batch, y_batch, output)

            # Calculate loss at the end of the epoch
            train_loss = np.mean(np.square(y - self.forward(X)))
            print(f'Epoch {epoch + 1}/{epochs}, Loss: {train_loss:.4f}')

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

# Define input size, output size, and other parameters
input_size = X_train.shape[1]  # Number of input neurons (784 for MNIST)
output_size = y_train.shape[1]  # Number of output neurons (10 for MNIST classes)

# Hyperparameter ranges
hidden_layer_sizes = [64, 128, 256, 512]  # Different hidden layer sizes to test
learning_rates = [0.001, 0.01, 0.1]       # Learning rates to explore
batch_sizes = [32, 64, 128]               # Batch sizes to experiment with
epochs = 10                               # Number of epochs for training


best_accuracy = 0
best_params = {}

# Grid search over hyper-parameters
for hidden_size, learning_rate, batch_size in product(hidden_layer_sizes, learning_rates, batch_sizes):
    print(f'Training with Hidden Size: {hidden_size}, Learning Rate: {learning_rate}, Batch Size: {batch_size}')
    start_time = time.time()
    
    # Initialize the model
    mlp = MLP(input_size, hidden_size, output_size, learning_rate)
    
    # Train the model
    mlp.train(X_train, y_train, epochs, batch_size)
    
    # Evaluate on test set
    y_pred = mlp.predict(X_test)
    accuracy = accuracy_score(np.argmax(y_test, axis=1), y_pred)
    end_time = time.time()
    print(f'Test Accuracy: {accuracy:.4f}, Time Taken: {end_time - start_time:.2f} seconds\n')
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = {'hidden_size': hidden_size, 'learning_rate': learning_rate, 'batch_size': batch_size}

# Best Hyperparameters and Accuracy
print(f"Best Hyper-parameters: {best_params}")
print(f"Best Accuracy: {best_accuracy:.4f}")

Training with Hidden Size: 64, Learning Rate: 0.001, Batch Size: 32
Epoch 1/10, Loss: 0.0899
Epoch 2/10, Loss: 0.0896
Epoch 3/10, Loss: 0.0882
Epoch 4/10, Loss: 0.0817
Epoch 5/10, Loss: 0.0719
Epoch 6/10, Loss: 0.0620
Epoch 7/10, Loss: 0.0525
Epoch 8/10, Loss: 0.0460
Epoch 9/10, Loss: 0.0412
Epoch 10/10, Loss: 0.0371
Test Accuracy: 0.8215, Time Taken: 72.19 seconds

Training with Hidden Size: 64, Learning Rate: 0.001, Batch Size: 64
Epoch 1/10, Loss: 0.0899
Epoch 2/10, Loss: 0.0896
Epoch 3/10, Loss: 0.0882
Epoch 4/10, Loss: 0.0817
Epoch 5/10, Loss: 0.0719
Epoch 6/10, Loss: 0.0620
Epoch 7/10, Loss: 0.0525
Epoch 8/10, Loss: 0.0460
Epoch 9/10, Loss: 0.0412
Epoch 10/10, Loss: 0.0371
Test Accuracy: 0.8215, Time Taken: 44.95 seconds

Training with Hidden Size: 64, Learning Rate: 0.001, Batch Size: 128
Epoch 1/10, Loss: 0.0899
Epoch 2/10, Loss: 0.0896
Epoch 3/10, Loss: 0.0882
Epoch 4/10, Loss: 0.0818
Epoch 5/10, Loss: 0.0719
Epoch 6/10, Loss: 0.0621
Epoch 7/10, Loss: 0.0526
Epoch 8/10, Loss: 

Epoch 2/10, Loss: 0.0208
Epoch 3/10, Loss: 0.0179
Epoch 4/10, Loss: 0.0162
Epoch 5/10, Loss: 0.0149
Epoch 6/10, Loss: 0.0139
Epoch 7/10, Loss: 0.0130
Epoch 8/10, Loss: 0.0123
Epoch 9/10, Loss: 0.0117
Epoch 10/10, Loss: 0.0111
Test Accuracy: 0.9342, Time Taken: 102.18 seconds

Training with Hidden Size: 256, Learning Rate: 0.01, Batch Size: 128
Epoch 1/10, Loss: 0.0811
Epoch 2/10, Loss: 0.0320
Epoch 3/10, Loss: 0.0206
Epoch 4/10, Loss: 0.0177
Epoch 5/10, Loss: 0.0160
Epoch 6/10, Loss: 0.0147
Epoch 7/10, Loss: 0.0137
Epoch 8/10, Loss: 0.0129
Epoch 9/10, Loss: 0.0122
Epoch 10/10, Loss: 0.0116
Test Accuracy: 0.9311, Time Taken: 69.29 seconds

Training with Hidden Size: 256, Learning Rate: 0.1, Batch Size: 32
Epoch 1/10, Loss: 0.1000
Epoch 2/10, Loss: 0.1000
Epoch 3/10, Loss: 0.1000
Epoch 4/10, Loss: 0.1000
Epoch 5/10, Loss: 0.1000
Epoch 6/10, Loss: 0.1000
Epoch 7/10, Loss: 0.1000
Epoch 8/10, Loss: 0.1000
Epoch 9/10, Loss: 0.1000
Epoch 10/10, Loss: 0.1000
Test Accuracy: 0.1024, Time Taken: 