# Question 3

Construct a multi layer perceptron with 3 layers for the same dataset, train it, and see
whether the result improves or not.

In [1]:
## Import necessary libraries
import os 
import numpy as np 
import pandas as pd

import matplotlib.pyplot as plt

### Utilitty Functions

In [2]:
# Create a sigmoid activation function 
def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [3]:
# Create a ReLu function 
def relu(x):
    # Return x of x > 0 or 0 if x <= 0
    return x * (x > 0)

In [4]:
# Create a sigmoid derivative function
def der_sigmoid(x): 
    sigmoid(x)*(1-sigmoid(x))

In [5]:
# Define a cost function 
def cost_function1(pred, Y):
    
    # Number of samples is m
    m = Y.shape[1]
    
    # Compute the cross-entropy cost
    logprobs = Y * np.log(pred) + (1-Y) * np.log(1-pred)
    cost = -1/m * np.sum(logprobs)
    
    return cost

In [6]:
def cost_function(pred, Y):
    
    cost = np.mean(np.power(Y.T - pred.T, 2));
    
    return cost

### Read the data

In [7]:
train_data = pd.read_csv('../Datasets/final_data.csv', index_col=0)
test_data = pd.read_csv('../Datasets/test_data.csv', index_col=0)

train_data.shape, test_data.shape

((190, 2049), (20, 2049))

In [8]:
# Assign classes as 0s and 1s 
train_data['class'] = train_data['class'].map({ "cat":0, "dog":1 })
test_data['class'] = test_data['class'].map({ "cat":0, "dog":1 })

### Define the MLP

1. Initialize the weights and biases. Weights are randomly initialized while biases are given 0s. Save these 

In [9]:
class MLP:
    
    # Initialize the instance variables 
    def __init__(self, input_size, hidden_units, output_size):
        
        # Initialize the weights and the biases for the layers 
        W1 = np.random.randn(hidden_units, input_size)
        b1 = np.zeros(shape=(hidden_units, 1))
        
        W2 = np.random.randn(output_size, hidden_units)
        b2 = np.zeros(shape=(output_size, 1))
        
        # Show details of the matrices 
        print(f"Dimensions of the hidden layer weights: {W1.shape} and dimensions of the bias vector: {b1.shape}")
        print(f"\nDimensions of the output layer weights: {W2.shape} and dimensions of the bias vector: {b2.shape}")
        
        # We can save them in a parameters dictionary 
        self.params = {"W1": W1,
                       "b1": b1,
                       "W2": W2,
                       "b2": b2}
    
    # Forward propagation function 
    def forward_prop(self, X):
        
        # Perform forward prop for both the layers
        
        # Extract the layer parameters
        W1 = self.params['W1']
        b1 = self.params['b1']
        W2 = self.params['W2']
        b2 = self.params['b2']
        
        # Forward prop for layer 1
        z1 = np.dot(W1, X) + b1
        a1 = relu(z1)
        print(a1)
        print("Shape of the feature matrix after the first pass", a1.shape)
        
        # Forward prop for layer 2
        z2 = np.dot(W2, a1) + b2
        a2 = sigmoid(z2)
        print(a2)
        print("Shape of the feature matrix after the second pass", a2.shape)
        
        # Store the propagation outputs
        self.forward_outputs = {"z1": z1,
                           "a1": a1,
                           "z2": z2,
                           "a2": a2}
    
    # Define a backward propagation function 
    def backward_prop(self, X, Y):
        
        # Total units 
        m = X.shape[1]
        
        # Collect the relevant parameters 
        W1 = self.params['W1']
        W2 = self.params['W2']
        a1 = self.forward_outputs['a1']
        a2 = self.forward_outputs['a2']
        
        # Start backprop from the output unit 
        dZ2 = a2 - Y
        dW2 = 1/m * dZ2 @ a1.T
        db2 = 1/m * np.sum(dZ2, axis=1, keepdims=True)
        dZ1 = W2.T @ dZ2 * (1 - a1**2) 
        dW1 = 1/m * dZ1 @ X.T
        db1 = 1/m * np.sum(dZ1, axis=1, keepdims=True)
        
        # collect gradients
        self.grads = {"dW1": dW1,
                      "db1": db1,
                      "dW2": dW2,
                      "db2": db2}
    
    # Function to update the parameters 
    def update_params(self, learning_rate=1):
        
        # Get grads and params 
        W1 = self.params['W1']
        b1 = self.params['b1']
        W2 = self.params['W2']
        b2 = self.params['b2']
        
        dW1 = self.grads['dW1']
        db1 = self.grads['db1']
        dW2 = self.grads['dW2']
        db2 = self.grads['db2']
        
        # Update step 
        W1 = W1 - learning_rate * dW1
        b1 = b1 - learning_rate * db1
        W2 = W2 - learning_rate * dW2
        b2 = b2 - learning_rate * db2
        
        # Reset parameter values 
        self.params = {"W1": W1,
                       "b1": b1,
                       "W2": W2,
                       "b2": b2}

    
    # Create a function to train the neural network 
    def fit(self, X, y, epochs=5, learning_rate=0.1):
        
        print("\nTraining for ", epochs, " epochs")
        
        # Iterate through all epochs
        for e in range(epochs):
            print("Training Epoch: ", e+1)
            
            # Perform the forward prop step 
            self.forward_prop(X)
            
            # Compute the cost 
            preds = self.forward_outputs['a2']
            cost = cost_function(pred=preds, Y=y)
            print("The cost after this step:", cost)
            
            # Backprop step 
            self.backward_prop(X, y)
            
            # Update the parameters
            self.update_params(learning_rate=1)
       
    # Prediction function 
    def predict(self, X):
        
        # Perform a forward prop step
        self.forward_prop(X)
        a2 = self.forward_outputs['a2']
        
        # 1 if a2 > 0.5 else 0
        preds = (a2 > 0.5) * 1
        
        return preds
        

## Training Phase

In [11]:
# Get the training and testing data 
X_train = train_data.drop(['class'], axis=1).values.T
X_test = test_data.drop(['class'], axis=1).values.T

y_train = train_data[['class']].to_numpy().T
y_test = test_data[['class']].to_numpy().T
print(X_train.shape, X_test.shape, y_train.shape, y_train.shape)
print("\nNumber of training samples", X_train.shape[1])

(2048, 190) (2048, 20) (1, 190) (1, 190)

Number of training samples 190


In [12]:
# Set the initial params
input_size = X_train.shape[0]
hidden_units = 128
output_size = y_train.shape[0]

# Hyperparameters
epochs=5
learning_rate=0.01

In [13]:
# Initialize the MLP 
mlp = MLP(input_size, hidden_units, output_size)

Dimensions of the hidden layer weights: (128, 2048) and dimensions of the bias vector: (128, 1)

Dimensions of the output layer weights: (1, 128) and dimensions of the bias vector: (1, 1)


In [14]:
# Train on the data
mlp.fit(X=X_train, y=y_train, epochs=epochs, learning_rate=1)


Training for  5  epochs
Training Epoch:  1
[[   -0.            -0.            -0.         ... 45723.79184968
     -0.         10568.13838534]
 [   -0.         31576.09552567    -0.         ... 12853.26702586
   3156.24074705 34725.39641673]
 [ 5415.25054025 18460.9627581     -0.         ...    -0.
     -0.            -0.        ]
 ...
 [26299.44833916    -0.          9818.91906582 ...  9402.36689914
   6445.61613976    -0.        ]
 [   -0.         10580.65453939    -0.         ...    -0.
   5589.02548293  9719.93809793]
 [  884.95850326  4284.52023817    -0.         ...    -0.
     -0.          4411.36135293]]
Shape of the feature matrix after the first pass (128, 190)
[[0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1

  This is separate from the ipykernel package so we can avoid doing imports until


In [15]:
mlp.predict(X_test)

[[-0. -0. -0. ... -0. -0. -0.]
 [-0. -0. -0. ... -0. -0. -0.]
 [-0. -0. -0. ... -0. -0. -0.]
 ...
 [nan nan nan ... nan nan nan]
 [-0. -0. -0. ... -0. -0. -0.]
 [nan nan nan ... nan nan nan]]
Shape of the feature matrix after the first pass (128, 20)
[[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
  nan nan]]
Shape of the feature matrix after the second pass (1, 20)


  after removing the cwd from sys.path.


array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])