# AI 201 Programming Assignment 3
## Multi-Layer Perceptron with Backpropagation

Submitted by: 
Jan Lendl R. Uy, 2019-00312

In [1]:
import numpy as np
import csv
import time
from imblearn.over_sampling import SMOTE

In [2]:
# Dataset Paths
path_to_train_set_features = "data.csv"
path_to_train_set_labels = "data_labels.csv"
path_to_test_set_features = "test_set.csv"

In [3]:
def read_csv(path):
    data = []

    with open(path, mode="r") as file:
        csv_reader = csv.reader(file)  
        for row in csv_reader:
            data.append(row)
    return np.array(data, dtype=float)

X = read_csv(path_to_train_set_features)
Y = read_csv(path_to_train_set_labels)
# X_test = read_csv(path_to_test_set_features)

In [4]:
def one_hot_encoding(x, length):
    encoding = np.zeros(length)
    encoding[int(x)-1] = 1
    return encoding

Y = Y.tolist()
for i in range(len(Y)):
    Y[i] = one_hot_encoding(Y[i][0], 8)
Y = np.array(Y)

In [5]:
smote = SMOTE()
X, Y = smote.fit_resample(X, Y)

print(X.shape)
print(Y.shape)

(13000, 354)
(13000, 8)


In [6]:
def custom_train_test_split(X, Y, val_size, random_state=None):    
    # Pair each document with its label
    paired = list(zip(X, Y))
    
    # Shuffle the paired documents and labels
    np.random.shuffle(paired)
    
    # Calculate the number of samples in the test set
    if isinstance(val_size, float):
        val_size = int(len(X)-800)
    
    # Split the paired list into training and testing sets
    train_pairs = paired[:-val_size]
    val_pairs = paired[-val_size:]
    
    # Unzip the pairs back into separate lists
    train_features, train_labels = zip(*train_pairs)
    val_features, val_labels = zip(*val_pairs)
    
    return np.array(train_features), np.array(val_features), np.array(train_labels), np.array(val_labels)

X_train, X_val, Y_train, Y_val = custom_train_test_split(X, Y, val_size=800, random_state=62)

print(X_train.shape)
print(X_val.shape)
print(Y_train.shape)
print(Y_val.shape)

(12200, 354)
(800, 354)
(12200, 8)
(800, 8)


In [7]:
class MultiLayerPerceptron:
    
    def __init__(self, input_size, hidden_sizes, output_size, 
                activation_functions=["tanh", "tanh", "logistic"], 
                batch_size=8, 
                learning_rate=0.01,
                momentum=0.9, 
                seed=0):
        
        # Network architecture parameters
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size
        self.activation_functions = activation_functions

        self.layer_sizes = [input_size] + hidden_sizes + [output_size]
        self.depth = len(self.layer_sizes)
        print(f"layer_sizes = {self.layer_sizes}")

        # Backpropagation parameters
        self.learning_rate = learning_rate
        self.momentum = momentum

        # Learnable parameters
        self.layers = []
        self.weights = self.__initialize_weights()
        self.biases = self.__initialize_biases()
        self.velocity = self.__initialize_velocity()
        
        # Gradients of each layer to be used for learning
        self.weight_gradients = []
        self.bias_gradients = []

        np.random.seed(seed)

    def __initialize_weights(self):
        weights = []
        # print(f"Weights shapes")
        for i in range(self.depth - 1):
            w = np.random.randn(self.layer_sizes[i], self.layer_sizes[i+1]) * np.sqrt(2.0/(self.layer_sizes[i]))
            weights.append(w)
            # print(w.shape)
        return weights
    
    def __initialize_biases(self):
        biases = []
        # print(f"Biases shapes")
        for i in range(self.depth - 1):
            b = np.zeros((1, self.layer_sizes[i+1]))
            biases.append(b)
            # print(b.shape)
        return biases

    def __initialize_velocity(self):
        velocity = []
        for i in range(self.depth - 1):
            velocity_layer = {
                "weights": np.zeros_like(self.weights[i]),
                "biases": np.zeros_like(self.biases[i])
            }
            velocity.append(velocity_layer)
        return velocity
    
    def logistic(self, x, a=2.0):
        return 1 / (1 + np.exp(-a * x))

    def logistic_derivative(self, x, a=2.0):
        sigma = self.logistic(x)
        return a * sigma * (1 - sigma)
    
    def tanh(self, x, a=1.716, b=2/3):
        return a * np.tanh(x * b)

    def tanh_derivative(self, x, a=1.716, b=2/3):
        return a*b * (1/np.cosh(b*x))**2
    
    def leaky_relu(x, alpha=0.01):
        return np.where(x > 0, x, alpha * x)

    def leaky_relu_derivative(x, alpha=0.01):
        return np.where(x > 0, 1, alpha)
    
    def activation(self, X, function):
        
        if function == "logistic":
            return self.logistic(X)
        elif function == "tanh":
            return self.tanh(X)
        elif function == "leaky_relu":
            return self.leaky_relu(X)
        else:
            raise ValueError(f"{function} is an unsupported activation function!")
        
    def activation_derivative(self, X, function):
        
        if function == "logistic":
            return self.logistic_derivative(X)
        elif function == "tanh":
            return self.tanh_derivative(X)
        elif function == "leaky_relu":
            return self.leaky_relu_derivative(X)
        else:
            raise ValueError(f"{function} is an unsupported activation function!")
    
    def sum_squared_errors(self, Y_true, Y_pred):
        
        E = 1/2 * np.sum((Y_true - Y_pred)**2)
        return E
    
    def __forward(self, x):

        # Insert the input as the first layer of the MLP
        self.layers.append(x)
        
        # Forward pass from input layer to output layer
        # print(f"{X.shape} x {self.weights[0].shape} x {self.biases[0].shape}")
        for i in range(self.depth - 1):
            z = x @ self.weights[i] + self.biases[i]
            phi = self.activation(z, self.activation_functions[i])
            self.layers.append(phi)
            x = phi

        return self.layers[-1]
    
    def __update_parameters(self):
        # Update weights and biases with momentum
        for i in range(self.depth - 1):
            # Update the velocity for weights and biases
            self.velocity[i]["weights"] = self.momentum * self.velocity[i]["weights"] - self.learning_rate * self.weight_gradients[i]
            self.velocity[i]["biases"] = self.momentum * self.velocity[i]["biases"] - self.learning_rate * self.bias_gradients[i]

            # Update the weights and biases using the new velocity
            self.weights[i] += self.velocity[i]['weights']
            self.biases[i] += self.velocity[i]['biases']

    def __backpropagation(self, Y_true):

        # Compute the sum of errors
        Y_pred = self.layers[-1]
        e = Y_true - Y_pred
        
        # Compute gradients in the output layer
        phi_prime = self.activation_derivative(self.layers[-1], self.activation_functions[-1])
        delta = -e * phi_prime
        weight_gradient = self.layers[-2].T @ delta
        bias_gradient = np.sum(delta, axis=0, keepdims=True)
        self.weight_gradients.append(weight_gradient)
        self.bias_gradients.append(bias_gradient)
        
        # Compute gradients in the hidden layers
        for i in range(-2, -self.depth, -1):
            phi_prime = self.activation_derivative(self.layers[i], self.activation_functions[i])
            delta = delta @ self.weights[i+1].T * phi_prime
            weight_gradient = self.layers[i-1].T @ delta
            bias_gradient = np.sum(delta, axis=0, keepdims=True)
            self.weight_gradients.append(weight_gradient)
            self.bias_gradients.append(bias_gradient)
            
        self.weight_gradients.reverse()
        self.bias_gradients.reverse()
        
        self.__update_parameters()
        
    def train(self, X_train, Y_train, epochs):
        
        for i in range(epochs):
            Y_pred = self.__forward(X_train)
            self.__backpropagation(Y_train)
            if (i % 100 == 0):
                print(f"Epoch {i}, Sum of Squared Errors: {self.sum_squared_errors(Y_train, Y_pred)}")

In [None]:
EPOCHS = 5000
input_size, output_size = X_train.shape[1], Y_train.shape[1]
hidden_layer_sizes = [100, 100]
activation_functions = ["tanh", "tanh", "logistic"]

mlp = MultiLayerPerceptron(input_size=input_size, hidden_sizes=hidden_layer_sizes, output_size=output_size, activation_functions=activation_functions)
mlp.train(X_train, Y_train, EPOCHS)

layer_sizes = [354, 100, 100, 8]
Epoch 0, Sum of Squared Errors: 13990.63921920437


  return 1 / (1 + np.exp(-a * x))


Epoch 100, Sum of Squared Errors: 14533.930007921286
Epoch 200, Sum of Squared Errors: 5275.29250658966
Epoch 300, Sum of Squared Errors: 2471.6040415599364
Epoch 400, Sum of Squared Errors: 3313.718653901646
Epoch 500, Sum of Squared Errors: 2787.490310970903
Epoch 600, Sum of Squared Errors: 1623.478027207096
Epoch 700, Sum of Squared Errors: 1961.5917019722701
Epoch 800, Sum of Squared Errors: 2034.537117706747
Epoch 900, Sum of Squared Errors: 2081.12965243935
Epoch 1000, Sum of Squared Errors: 1908.9296299654006
Epoch 1100, Sum of Squared Errors: 2082.429257670056
Epoch 1200, Sum of Squared Errors: 1552.260509444332
Epoch 1300, Sum of Squared Errors: 1493.0697426208317
Epoch 1400, Sum of Squared Errors: 1438.7337914690177
Epoch 1500, Sum of Squared Errors: 1392.9464657462968
Epoch 1600, Sum of Squared Errors: 1854.658767315432
Epoch 1700, Sum of Squared Errors: 2002.2088053956916
Epoch 1800, Sum of Squared Errors: 2647.9278931647864
Epoch 1900, Sum of Squared Errors: 2299.48568108