# MLP from Scratch

### Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import random
import os
import random
from sklearn.model_selection import train_test_split
import cv2
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

### MLP with two layers and batches

In [None]:
class MLP_batches:
    def __init__(self, input_size, hidden_layer1, hidden_layer2, output_size, learning_rate, batch_size):
        self.input_size = input_size
        self.hidden_layer1 = hidden_layer1
        self.hidden_layer2 = hidden_layer2
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.loss = []

        # Initialize weights and biases
        self.weights = [
            np.random.uniform(-1, 1, size=(input_size, hidden_layer1)),
            np.random.uniform(-1, 1, size=(hidden_layer1, hidden_layer2)),
            np.random.uniform(-1, 1, size=(hidden_layer2, output_size))
        ]
        self.biases = [
            np.zeros((1, hidden_layer1)),
            np.zeros((1, hidden_layer2)),
            np.zeros((1, output_size))
        ]

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_prime(self, x):
        s = self.sigmoid(x)
        return s * (1 - s)
    
    def ReLU(self, x):
        return np.maximum(0, x)
    
    def ReLU_prime(self, x):
        return (x > 0) * 1

    def softmax(self, x):
        x_shifted = x - np.max(x, axis=1, keepdims=True)
        exp = np.exp(x_shifted)
        return exp / np.sum(exp, axis=1, keepdims=True)

    def cross_entropy_loss(self, y_pred, y_true):
        y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

    def train(self, X, y, epochs):
        self.loss = np.zeros(epochs)
        num_samples = X.shape[0]
        num_batches = num_samples // self.batch_size

        for epoch in range(epochs):
            epoch_loss = 0
            for i in range(0, num_samples, self.batch_size):
                # Get batch
                X_batch = X[i:i + self.batch_size]
                y_batch = y[i:i + self.batch_size]

                # Forward pass
                z = [None] * 3
                a = [X_batch]  # Input layer
                for layer in range(3):
                    z[layer] = a[layer].dot(self.weights[layer]) + self.biases[layer]
                    a.append(self.ReLU(z[layer]) if layer < 2 else self.softmax(z[layer]))

                # Loss calculation
                batch_loss = self.cross_entropy_loss(a[-1], y_batch)
                epoch_loss += batch_loss

                # Backward pass
                error = [None] * 3
                error[-1] = a[-1] - y_batch
                for layer in reversed(range(2)):
                    error[layer] = error[layer + 1].dot(self.weights[layer + 1].T) * self.ReLU_prime(z[layer])

                # Update weights and biases
                for layer in range(3):
                    grad_weights = a[layer].T.dot(error[layer]) / self.batch_size
                    grad_biases = np.sum(error[layer], axis=0, keepdims=True) / self.batch_size
                    self.weights[layer] -= self.learning_rate * grad_weights
                    self.biases[layer] -= self.learning_rate * grad_biases

            self.loss[epoch] = epoch_loss / num_batches
            if epoch % 10 == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {self.loss[epoch]:.4f}")

    def predict(self, X):
        # Forward pass
        a = X
        for layer in range(3):
            z = a.dot(self.weights[layer]) + self.biases[layer]
            a = self.ReLU(z) if layer < 2 else self.softmax(z)
        return np.argmax(a, axis=1)

In [None]:
class MLP_batches_hinge:
    def __init__(self, input_size, hidden_layer, output_size, learning_rate, batch_size):
        self.input_size = input_size
        self.hidden_layer = hidden_layer
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.loss = []

        # Initialize weights and biases
        self.weights = [
            np.random.uniform(-1, 1, size=(input_size, hidden_layer)),
            np.random.uniform(-1, 1, size=(hidden_layer, output_size))
        ]
        self.biases = [
            np.zeros((1, hidden_layer)),
            np.zeros((1, output_size))
        ]

    # Activation functions
    def ReLU(self, x):
        return np.maximum(0, x)

    def ReLU_prime(self, x):
        return (x > 0) * 1

    def softmax(self, x):
        x_shifted = x - np.max(x, axis=1, keepdims=True)
        exp = np.exp(x_shifted)
        return exp / np.sum(exp, axis=1, keepdims=True)

    # Loss functions
    def cross_entropy_loss(self, y_pred, y_true):
        y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

    def hinge_loss(self, y_pred, y_true):
        """
        Hinge loss for multi-class classification
        y_pred: Predicted logits (shape: batch_size x num_classes)
        y_true: One-hot encoded true labels (shape: batch_size x num_classes)
        """
        batch_size = y_true.shape[0]
        correct_class_scores = np.sum(y_pred * y_true, axis=1, keepdims=True)  # Predicted scores for correct classes
        margins = np.maximum(0, y_pred - correct_class_scores + 1)  # Hinge loss margin
        margins[y_true == 1] = 0  # Ignore correct classes
        return np.sum(margins) / batch_size

    # Training function
    def train(self, X, y, epochs, loss_function='cross_entropy'):
        self.loss = np.zeros(epochs)
        num_samples = X.shape[0]
        num_batches = num_samples // self.batch_size

        for epoch in range(epochs):
            epoch_loss = 0
            for i in range(0, num_samples, self.batch_size):
                # Get batch
                X_batch = X[i:i + self.batch_size]
                y_batch = y[i:i + self.batch_size]

                # Forward pass
                z = [None] * 2
                a = [X_batch]  # Input layer
                for layer in range(2):
                    z[layer] = a[layer].dot(self.weights[layer]) + self.biases[layer]
                    a.append(self.ReLU(z[layer]) if layer < 1 else self.softmax(z[layer]))

                # Loss calculation
                if loss_function == 'cross_entropy':
                    batch_loss = self.cross_entropy_loss(a[-1], y_batch)
                elif loss_function == 'hinge':
                    batch_loss = self.hinge_loss(z[-1], y_batch)  # Use logits directly for hinge loss
                else:
                    raise ValueError("Unsupported loss function. Choose 'cross_entropy' or 'hinge'.")

                epoch_loss += batch_loss

                # Backward pass
                if loss_function == 'cross_entropy':
                    error_out = a[-1] - y_batch
                elif loss_function == 'hinge':
                    correct_class_scores = np.sum(z[-1] * y_batch, axis=1, keepdims=True)
                    margins = (z[-1] - correct_class_scores + 1 > 0).astype(float)
                    margins[y_batch == 1] = 0
                    error_out = margins / self.batch_size

                # Compute gradients
                grad_weights = [None] * 2
                grad_biases = [None] * 2
                error = error_out
                for layer in reversed(range(2)):
                    grad_weights[layer] = a[layer].T.dot(error) / self.batch_size
                    grad_biases[layer] = np.sum(error, axis=0, keepdims=True) / self.batch_size
                    if layer > 0:
                        error = error.dot(self.weights[layer].T) * self.ReLU_prime(z[layer - 1])

                # Update weights and biases
                for layer in range(2):
                    self.weights[layer] -= self.learning_rate * grad_weights[layer]
                    self.biases[layer] -= self.learning_rate * grad_biases[layer]

            self.loss[epoch] = epoch_loss / num_batches
            if epoch % 10 == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {self.loss[epoch]:.4f}")

    def predict(self, X):
        # Forward pass
        a = X
        for layer in range(2):
            z = a.dot(self.weights[layer]) + self.biases[layer]
            a = self.ReLU(z) if layer < 1 else self.softmax(z)
        return np.argmax(a, axis=1)

### MLP with three layers and batches

In [None]:
class MLPThreeLayers:
    def __init__(self, input_size, hidden_sizes, output_size, learning_rate, batch_size):
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes  # List of hidden layer sizes
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.loss = []

        # Initialize weights and biases
        layer_sizes = [input_size] + hidden_sizes + [output_size]
        self.weights = [np.random.uniform(-1, 1, (layer_sizes[i], layer_sizes[i + 1])) for i in range(len(layer_sizes) - 1)]
        self.biases = [np.zeros((1, size)) for size in layer_sizes[1:]]

    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    @staticmethod
    def sigmoid_prime(x):
        s = MLPThreeLayers.sigmoid(x)
        return s * (1 - s)

    @staticmethod
    def relu(x):
        return np.maximum(0, x)

    @staticmethod
    def relu_prime(x):
        return (x > 0).astype(float)

    @staticmethod
    def softmax(x):
        x_shifted = x - np.max(x, axis=1, keepdims=True)
        exp = np.exp(x_shifted)
        return exp / np.sum(exp, axis=1, keepdims=True)

    @staticmethod
    def cross_entropy_loss(y_pred, y_true):
        y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

    def forward_pass(self, X):
        activations = [X]
        pre_activations = []
        
        for i in range(len(self.weights) - 1):
            z = activations[-1].dot(self.weights[i]) + self.biases[i]
            pre_activations.append(z)
            activations.append(self.relu(z))

        # Output layer (softmax activation)
        z = activations[-1].dot(self.weights[-1]) + self.biases[-1]
        pre_activations.append(z)
        activations.append(self.softmax(z))

        return activations, pre_activations

    def backward_pass(self, activations, pre_activations, y_batch):
        grads_w = [None] * len(self.weights)
        grads_b = [None] * len(self.biases)

        # Compute output error
        error = activations[-1] - y_batch
        grads_w[-1] = activations[-2].T.dot(error) / self.batch_size
        grads_b[-1] = np.sum(error, axis=0, keepdims=True) / self.batch_size

        # Backpropagate through hidden layers
        for i in range(len(self.weights) - 2, -1, -1):
            error = error.dot(self.weights[i + 1].T) * self.relu_prime(pre_activations[i])
            grads_w[i] = activations[i].T.dot(error) / self.batch_size
            grads_b[i] = np.sum(error, axis=0, keepdims=True) / self.batch_size

        return grads_w, grads_b

    def update_weights(self, grads_w, grads_b):
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * grads_w[i]
            self.biases[i] -= self.learning_rate * grads_b[i]

    def train(self, X, y, epochs):
        num_samples = X.shape[0]
        self.loss = np.zeros(epochs)

        for epoch in range(epochs):
            epoch_loss = 0
            for i in range(0, num_samples, self.batch_size):
                X_batch = X[i:i + self.batch_size]
                y_batch = y[i:i + self.batch_size]

                # Forward pass
                activations, pre_activations = self.forward_pass(X_batch)

                # Compute loss
                batch_loss = self.cross_entropy_loss(activations[-1], y_batch)
                epoch_loss += batch_loss

                # Backward pass
                grads_w, grads_b = self.backward_pass(activations, pre_activations, y_batch)

                # Update weights
                self.update_weights(grads_w, grads_b)

            self.loss[epoch] = epoch_loss / (num_samples / self.batch_size)
            if epoch % 10 == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {self.loss[epoch]:.4f}")

    def predict(self, X):
        activations, _ = self.forward_pass(X)
        return np.argmax(activations[-1], axis=1)


### Loading data

In [5]:
data_file = "C:/Users/afrod/Documents/Neural_Networks/MergedDataset"
classes = ["NonDemented", "VeryMildDemented", "MildDemented", "ModerateDemented"]
training_data = []


def create_training_data():
    for dementia_level in classes:
        path = os.path.join(data_file, dementia_level)
        class_num = classes.index(dementia_level)
        for img in os.listdir(path):
            # Convert to grayscale for smaller array dimensions
            img_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)
            final_array = cv2.resize(img_array, (100,95))
            training_data.append([final_array, class_num])

create_training_data()

### Preprocessing and Splitting

In [6]:
random.shuffle(training_data)

# Separating features and labels
# Images are also flattened to be used as input in the knn algorithm
X = np.array([features for features, _ in training_data]).reshape(-1, 100*95)
y = np.array([label for _, label in training_data])

# Rescaling
X = (X-X.min())/(X.max() - X.min())

# One-hot encoding
y_onehot = np.zeros((y.size, int(y.max()) + 1))
y_onehot[np.arange(y.size),y.astype(int)] = 1.0

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.4, random_state=42)
print(X_train.shape[0])

24230


### Defining model

In [7]:
mlp = MLP_batches_hinge(9500, 500, 4, 0.0001, 1000) #2350

### Training model

In [8]:
mlp.train(X_train, y_train, 50)

Epoch 1/50, Loss: 14.7627
Epoch 11/50, Loss: 14.0318
Epoch 21/50, Loss: 13.6307
Epoch 31/50, Loss: 13.2797
Epoch 41/50, Loss: 13.0062


### Saving loss vector

In [30]:
loss_mat = np.matrix(mlp.loss)

with open('100x95-2000;500-0.001-100-ReLUloss.txt','wb') as f:
    for line in loss_mat:
        np.savetxt(f, line, fmt='%.5f')

### Evaluating on test set

In [None]:
# Evaluate accuracy on test set
predictions = mlp.predict(X_test)
y_test_labels = np.argmax(y_test, axis=1)
accuracy = np.mean(predictions == y_test_labels)
print("\nMLP with ReLU and layers [9500, 500, 4], lr = 0.0001, epochs = 50:")
print(f'Test Accuracy: {accuracy * 100:.2f}%')
print("Accuracy:", accuracy_score(y_test_labels, predictions))
print("Classification Report:\n", classification_report(y_test_labels, predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test_labels, predictions))

### Evaluating on train set

In [None]:
# Evaluate accuracy on test set
predictions = mlp.predict(X_train)
y_train_labels = np.argmax(y_train, axis=1)
accuracy = np.mean(predictions == y_train_labels)
print("\nMLP with ReLU and layers [9500, 500, 4], lr = 0.0001, epochs = 50:")
print(f'Train Accuracy: {accuracy * 100:.2f}%')
print("Accuracy:", accuracy_score(y_train_labels, predictions))
print("Classification Report:\n", classification_report(y_train_labels, predictions))
print("Confusion Matrix:\n", confusion_matrix(y_train_labels, predictions))

### Plotting function

In [33]:
def plot_vector(vector, title="Vector Plot", xlabel="Index", ylabel="Value"):
    # Convert to numpy array for consistent handling
    vector = np.array(vector)
    
    # Generate indices for x-axis
    indices = np.arange(len(vector))
    
    # Create the plot
    plt.figure(figsize=(8, 5))
    plt.plot(indices, vector, marker='o', linestyle='-', color='b', label="Vector Values")
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend()
    plt.show()