# MLP from Scratch

### Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import random
import os
import random
from sklearn.model_selection import train_test_split
import cv2
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

### MLP with two layers and batches

In [None]:
class MLP_batches:
    def __init__(self, input_size, hidden_layer1, hidden_layer2, output_size, learning_rate, batch_size):
        self.input_size = input_size
        self.hidden_layer1 = hidden_layer1
        self.hidden_layer2 = hidden_layer2
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.loss = []

        # Initialize weights and biases
        self.weights1 = np.random.uniform(-1, 1, size=(input_size, hidden_layer1))
        self.weights2 = np.random.uniform(-1, 1, size=(hidden_layer1, hidden_layer2))
        self.weights3 = np.random.uniform(-1, 1, size=(hidden_layer2, output_size))

        self.biases1 = np.zeros((1, hidden_layer1))
        self.biases2 = np.zeros((1, hidden_layer2))
        self.biases3 = np.zeros((1, output_size))

    # Defining useful functions
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_prime(self, x):
        s = self.sigmoid(x)
        return s * (1 - s)
    
    def ReLU(self, x):
        return np.maximum(0, x)
    
    def ReLU_prime(self,x):
        return (x>0)*1

    def softmax(self, x):
        x_shifted = x - np.max(x, axis=1, keepdims=True)
        exp = np.exp(x_shifted)
        return exp / np.sum(exp, axis=1, keepdims=True)

    def cross_entropy_loss(self, y_pred, y_true):
        # Cliping to avoid inf values
        y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))
    
    # Multi-class hinge loss function
    def hinge_loss(self, y_pred, y_true):
        """
        Hinge loss for multi-class classification
        y_pred: Predicted logits (shape: batch_size x num_classes)
        y_true: One-hot encoded true labels (shape: batch_size x num_classes)
        """
        batch_size = y_true.shape[0]
        correct_class_scores = np.sum(y_pred * y_true, axis=1, keepdims=True)  # Predicted scores for correct classes
        margins = np.maximum(0, y_pred - correct_class_scores + 1)  # Hinge loss margin
        margins[y_true == 1] = 0  # Ignore correct classes
        return np.sum(margins) / batch_size


    def train(self, X, y, epochs):
        self.loss = np.zeros([epochs, 1])
        num_samples = X.shape[0]
        for epoch in range(epochs):
            epoch_loss = 0
            for i in range(0, num_samples, self.batch_size):
                # Get batch
                X_batch = X[i:i + self.batch_size]
                y_batch = y[i:i + self.batch_size]

                # Forward pass
                z1 = X_batch.dot(self.weights1) + self.biases1
                a1 = self.ReLU(z1)

                z2 = a1.dot(self.weights2) + self.biases2
                a2 = self.ReLU(z2)

                z3 = a2.dot(self.weights3) + self.biases3
                a3 = self.softmax(z3)

                # Loss calculation
                batch_loss = self.cross_entropy_loss(a3, y_batch)
                epoch_loss += batch_loss
                self.loss[epoch] = epoch_loss / (num_samples / self.batch_size)

                # I know these should have been for loops, just wanted to understand it step by step 
                # Backward pass
                error_out = a3 - y_batch
                grad_weights3 = a2.T.dot(error_out) / self.batch_size
                grad_biases3 = np.sum(error_out, axis=0, keepdims=True) / self.batch_size

                error_hidden2 = error_out.dot(self.weights3.T) * self.ReLU_prime(z2)
                grad_weights2 = a1.T.dot(error_hidden2) / self.batch_size
                grad_biases2 = np.sum(error_hidden2, axis=0, keepdims=True) / self.batch_size

                error_hidden1 = error_hidden2.dot(self.weights2.T) * self.ReLU_prime(z1)
                grad_weights1 = X_batch.T.dot(error_hidden1) / self.batch_size
                grad_biases1 = np.sum(error_hidden1, axis=0, keepdims=True) / self.batch_size

                # Update weights and biases
                self.weights3 -= self.learning_rate * grad_weights3
                self.biases3 -= self.learning_rate * grad_biases3

                self.weights2 -= self.learning_rate * grad_weights2
                self.biases2 -= self.learning_rate * grad_biases2

                self.weights1 -= self.learning_rate * grad_weights1
                self.biases1 -= self.learning_rate * grad_biases1

            if epoch % 10 == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / (num_samples / self.batch_size):.4f}")

    def predict(self, X):
        # Forward pass
        z1 = X.dot(self.weights1) + self.biases1
        a1 = self.ReLU(z1)

        z2 = a1.dot(self.weights2) + self.biases2
        a2 = self.ReLU(z2)

        z3 = a2.dot(self.weights3) + self.biases3
        a3 = self.softmax(z3)

        return np.argmax(a3, axis=1)

In [4]:
class MLP_batches_hinge:
    def __init__(self, input_size, hidden_layer, output_size, learning_rate, batch_size):
        self.input_size = input_size
        self.hidden_layer = hidden_layer
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.loss = []

        # Initialize weights and biases
        self.weights1 = np.random.uniform(-1, 1, size=(input_size, hidden_layer))
        self.weights2 = np.random.uniform(-1, 1, size=(hidden_layer, output_size))

        self.biases1 = np.zeros((1, hidden_layer))
        self.biases2 = np.zeros((1, output_size))

    # Activation functions
    def ReLU(self, x):
        return np.maximum(0, x)

    def ReLU_prime(self, x):
        return (x > 0) * 1

    def softmax(self, x):
        x_shifted = x - np.max(x, axis=1, keepdims=True)
        exp = np.exp(x_shifted)
        return exp / np.sum(exp, axis=1, keepdims=True)

    # Loss functions
    def cross_entropy_loss(self, y_pred, y_true):
        y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

    def hinge_loss(self, y_pred, y_true):
        """
        Hinge loss for multi-class classification
        y_pred: Predicted logits (shape: batch_size x num_classes)
        y_true: One-hot encoded true labels (shape: batch_size x num_classes)
        """
        batch_size = y_true.shape[0]
        correct_class_scores = np.sum(y_pred * y_true, axis=1, keepdims=True)  # Predicted scores for correct classes
        margins = np.maximum(0, y_pred - correct_class_scores + 1)  # Hinge loss margin
        margins[y_true == 1] = 0  # Ignore correct classes
        return np.sum(margins) / batch_size

    # Training function
    def train(self, X, y, epochs, loss_function='cross_entropy'):
        self.loss = np.zeros([epochs, 1])
        num_samples = X.shape[0]
        for epoch in range(epochs):
            epoch_loss = 0
            for i in range(0, num_samples, self.batch_size):
                # Get batch
                X_batch = X[i:i + self.batch_size]
                y_batch = y[i:i + self.batch_size]

                # Forward pass
                z1 = X_batch.dot(self.weights1) + self.biases1
                a1 = self.ReLU(z1)

                z2 = a1.dot(self.weights2) + self.biases2
                a2 = self.softmax(z2)

                # Loss calculation
                if loss_function == 'cross_entropy':
                    batch_loss = self.cross_entropy_loss(a2, y_batch)
                elif loss_function == 'hinge':
                    batch_loss = self.hinge_loss(z2, y_batch)  # Use logits directly for hinge loss
                else:
                    raise ValueError("Unsupported loss function. Choose 'cross_entropy' or 'hinge'.")

                epoch_loss += batch_loss
                self.loss[epoch] = epoch_loss / (num_samples / self.batch_size)

                # Backward pass
                if loss_function == 'cross_entropy':
                    error_out = a2 - y_batch
                elif loss_function == 'hinge':
                    correct_class_scores = np.sum(z2 * y_batch, axis=1, keepdims=True)
                    margins = (z2 - correct_class_scores + 1 > 0).astype(float)
                    margins[y_batch == 1] = 0
                    error_out = margins / self.batch_size

                grad_weights2 = a1.T.dot(error_out) / self.batch_size
                grad_biases2 = np.sum(error_out, axis=0, keepdims=True) / self.batch_size

                error_hidden = error_out.dot(self.weights2.T) * self.ReLU_prime(z1)
                grad_weights1 = X_batch.T.dot(error_hidden) / self.batch_size
                grad_biases1 = np.sum(error_hidden, axis=0, keepdims=True) / self.batch_size

                # Update weights and biases
                self.weights2 -= self.learning_rate * grad_weights2
                self.biases2 -= self.learning_rate * grad_biases2

                self.weights1 -= self.learning_rate * grad_weights1
                self.biases1 -= self.learning_rate * grad_biases1

            if epoch % 10 == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / (num_samples / self.batch_size):.4f}")

    def predict(self, X):
        # Forward pass
        z1 = X.dot(self.weights1) + self.biases1
        a1 = self.ReLU(z1)

        z2 = a1.dot(self.weights2) + self.biases2
        a2 = self.softmax(z2)

        return np.argmax(a2, axis=1)

### MLP with three layers and batches

In [None]:
class MLP_three_layers:
    def __init__(self, input_size, hidden_layer1, hidden_layer2, hidden_layer3, output_size, learning_rate, batch_size):
        self.input_size = input_size
        self.hidden_layer1 = hidden_layer1
        self.hidden_layer2 = hidden_layer2
        self.hidden_layer3 = hidden_layer3
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.loss = []

        # Initialize weights and biases
        self.weights1 = np.random.uniform(-1, 1, size=(input_size, hidden_layer1))
        self.weights2 = np.random.uniform(-1, 1, size=(hidden_layer1, hidden_layer2))
        self.weights3 = np.random.uniform(-1, 1, size=(hidden_layer2, hidden_layer3))
        self.weights4 = np.random.uniform(-1, 1, size=(hidden_layer3, output_size))

        self.biases1 = np.zeros((1, hidden_layer1))
        self.biases2 = np.zeros((1, hidden_layer2))
        self.biases3 = np.zeros((1, hidden_layer3))
        self.biases4 = np.zeros((1, output_size))

    # Defining useful functions
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_prime(self, x):
        s = self.sigmoid(x)
        return s * (1 - s)
    
    def ReLU(self, x):
        return np.maximum(0, x)
    
    def ReLU_prime(self, x):
        return (x > 0)*1

    def softmax(self, x):
        x_shifted = x - np.max(x, axis=1, keepdims=True)
        exp = np.exp(x_shifted)
        return exp / np.sum(exp, axis=1, keepdims=True)

    def cross_entropy_loss(self, y_pred, y_true):
        y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

    def train(self, X, y, epochs):
        self.loss = np.zeros([epochs, 1])
        num_samples = X.shape[0]
        for epoch in range(epochs):
            epoch_loss = 0
            for i in range(0, num_samples, self.batch_size):
                # Get batch
                X_batch = X[i:i + self.batch_size]
                y_batch = y[i:i + self.batch_size]

                # Forward pass
                z1 = X_batch.dot(self.weights1) + self.biases1
                a1 = self.ReLU(z1)

                z2 = a1.dot(self.weights2) + self.biases2
                a2 = self.ReLU(z2)

                z3 = a2.dot(self.weights3) + self.biases3
                a3 = self.ReLU(z3)
                
                z4 = a3.dot(self.weights4) + self.biases4
                a4 = self.softmax(z4)

                # Loss calculation
                batch_loss = self.cross_entropy_loss(a4, y_batch)
                epoch_loss += batch_loss
                self.loss[epoch] = epoch_loss / (num_samples / self.batch_size)

                # Backward pass
                error_out = a4 - y_batch
                grad_weights4 = a3.T.dot(error_out) / self.batch_size
                grad_biases4 = np.sum(error_out, axis=0, keepdims=True) / self.batch_size

                error_hidden3 = error_out.dot(self.weights4.T) * self.ReLU_prime(z3)
                grad_weights3 = a2.T.dot(error_hidden3) / self.batch_size
                grad_biases3 = np.sum(error_hidden3, axis=0, keepdims=True) / self.batch_size

                error_hidden2 = error_hidden3.dot(self.weights3.T) * self.ReLU_prime(z2)
                grad_weights2 = a1.T.dot(error_hidden2) / self.batch_size
                grad_biases2 = np.sum(error_hidden2, axis=0, keepdims=True) / self.batch_size

                error_hidden1 = error_hidden2.dot(self.weights2.T) * self.ReLU_prime(z1)
                grad_weights1 = X_batch.T.dot(error_hidden1) / self.batch_size
                grad_biases1 = np.sum(error_hidden1, axis=0, keepdims=True) / self.batch_size

                # Update weights and biases
                self.weights4 -= self.learning_rate * grad_weights4
                self.biases4 -= self.learning_rate * grad_biases4

                self.weights3 -= self.learning_rate * grad_weights3
                self.biases3 -= self.learning_rate * grad_biases3

                self.weights2 -= self.learning_rate * grad_weights2
                self.biases2 -= self.learning_rate * grad_biases2

                self.weights1 -= self.learning_rate * grad_weights1
                self.biases1 -= self.learning_rate * grad_biases1


            if epoch % 10 == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / (num_samples / self.batch_size):.4f}")

    def predict(self, X):
        # Forward pass
        z1 = X.dot(self.weights1) + self.biases1
        a1 = self.ReLU(z1)

        z2 = a1.dot(self.weights2) + self.biases2
        a2 = self.ReLU(z2)

        z3 = a2.dot(self.weights3) + self.biases3
        a3 = self.ReLU(z3)

        z4 = a3.dot(self.weights4) + self.biases4
        a4 = self.softmax(z4)

        return np.argmax(a4, axis=1)


### Loading data

In [5]:
data_file = "C:/Users/afrod/Documents/Neural_Networks/MergedDataset"
classes = ["NonDemented", "VeryMildDemented", "MildDemented", "ModerateDemented"]
training_data = []


def create_training_data():
    for dementia_level in classes:
        path = os.path.join(data_file, dementia_level)
        class_num = classes.index(dementia_level)
        for img in os.listdir(path):
            # Convert to grayscale for smaller array dimensions
            img_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)
            final_array = cv2.resize(img_array, (100,95))
            training_data.append([final_array, class_num])

create_training_data()

### Preprocessing and Splitting

In [6]:
random.shuffle(training_data)

# Separating features and labels
# Images are also flattened to be used as input in the knn algorithm
X = np.array([features for features, _ in training_data]).reshape(-1, 100*95)
y = np.array([label for _, label in training_data])

# Rescaling
X = (X-X.min())/(X.max() - X.min())

# One-hot encoding
y_onehot = np.zeros((y.size, int(y.max()) + 1))
y_onehot[np.arange(y.size),y.astype(int)] = 1.0

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.4, random_state=42)
print(X_train.shape[0])

24230


### Defining model

In [7]:
mlp = MLP_batches_hinge(9500, 500, 4, 0.0001, 1000) #2350

### Training model

In [8]:
mlp.train(X_train, y_train, 50)

Epoch 1/50, Loss: 14.7627
Epoch 11/50, Loss: 14.0318
Epoch 21/50, Loss: 13.6307
Epoch 31/50, Loss: 13.2797
Epoch 41/50, Loss: 13.0062


### Saving loss vector

In [30]:
loss_mat = np.matrix(mlp.loss)

with open('100x95-2000;500-0.001-100-ReLUloss.txt','wb') as f:
    for line in loss_mat:
        np.savetxt(f, line, fmt='%.5f')

### Evaluating on test set

In [9]:
# Evaluate accuracy on test set
predictions = mlp.predict(X_test)
y_test_labels = np.argmax(y_test, axis=1)
accuracy = np.mean(predictions == y_test_labels)
print("\nMLP with ReLU and layers [9500, 500, 4], lr = 0.0001, epochs = 50:")
print(f'Test Accuracy: {accuracy * 100:.2f}%')
print("Accuracy:", accuracy_score(y_test_labels, predictions))
print("Classification Report:\n", classification_report(y_test_labels, predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test_labels, predictions))


MLP with ReLU and layers [9500, 500, 4], lr = 0.0001, epochs = 50:
Test Accuracy: 32.82%
Accuracy: 0.32815401758078494
Classification Report:
               precision    recall  f1-score   support

           0       0.38      0.40      0.39      5092
           1       0.29      0.28      0.28      4472
           2       0.31      0.29      0.30      3976
           3       0.32      0.32      0.32      2614

    accuracy                           0.33     16154
   macro avg       0.32      0.32      0.32     16154
weighted avg       0.33      0.33      0.33     16154

Confusion Matrix:
 [[2062 1518  935  577]
 [1463 1251 1145  613]
 [1066 1131 1139  640]
 [ 828  444  493  849]]


### Evaluating on train set

In [10]:
# Evaluate accuracy on test set
predictions = mlp.predict(X_train)
y_train_labels = np.argmax(y_train, axis=1)
accuracy = np.mean(predictions == y_train_labels)
print("\nMLP with ReLU and layers [9500, 500, 4], lr = 0.0001, epochs = 50:")
print(f'Train Accuracy: {accuracy * 100:.2f}%')
print("Accuracy:", accuracy_score(y_train_labels, predictions))
print("Classification Report:\n", classification_report(y_train_labels, predictions))
print("Confusion Matrix:\n", confusion_matrix(y_train_labels, predictions))


MLP with ReLU and layers [9500, 500, 4], lr = 0.0001, epochs = 50:
Train Accuracy: 33.52%
Accuracy: 0.3352042921997524
Classification Report:
               precision    recall  f1-score   support

           0       0.40      0.42      0.41      7708
           1       0.30      0.29      0.29      6728
           2       0.30      0.29      0.30      5880
           3       0.31      0.32      0.32      3914

    accuracy                           0.34     24230
   macro avg       0.33      0.33      0.33     24230
weighted avg       0.33      0.34      0.33     24230

Confusion Matrix:
 [[3208 2197 1487  816]
 [2089 1934 1752  953]
 [1510 1677 1722  971]
 [1224  687  745 1258]]


### Plotting function

In [33]:
def plot_vector(vector, title="Vector Plot", xlabel="Index", ylabel="Value"):
    # Convert to numpy array for consistent handling
    vector = np.array(vector)
    
    # Generate indices for x-axis
    indices = np.arange(len(vector))
    
    # Create the plot
    plt.figure(figsize=(8, 5))
    plt.plot(indices, vector, marker='o', linestyle='-', color='b', label="Vector Values")
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend()
    plt.show()