# Libraries

In [107]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Import Data

In [108]:
features = np.load('clean_data/features.npy')
labels = np.load('clean_data/labels.npy')

## Spilt Train, Cross Validation and Test Data

In [109]:
x_train, x_test_full, y_train, y_test_full = train_test_split(features, labels, train_size=0.6, random_state=1)
x_test, x_cv, y_test, y_cv = train_test_split(x_test_full, y_test_full, train_size=0.5, random_state=1)
print(x_train.shape)
print(x_cv.shape)
print(x_test.shape)
print(y_train.shape)

(33195, 33)
(11065, 33)
(11065, 33)
(33195,)


# Back Propogation, R  Back Propogation

# Activation Function

In [110]:
def sigmoid(Z): 
    return 1/(1+np.exp(-Z))

## Neural Network

In [111]:

class NeuralNetwork:
    def __init__(self, x, y,vocabulary_size=21361, nodes_in_layer1=60, nodes_in_layer2=30, nodes_in_layer3=1, l_rate=0.1):
        embedding_dim =50
        self.x=x
        vocab_size = vocabulary_size +1 # Add 1 for the out-of-vocabulary token
        self.embedding_weights = np.random.randn(vocab_size, embedding_dim)
        # define x, y
        self.inputs_in_layer0 =self.embedding(self.x)
        self.y = y.reshape(-1, 1)  # reshape y to be a column vector
        
        self.l_rate = l_rate  # learning rate
        
        # define and set the number of neurons in each layer
        self.nodes_in_layer1 = nodes_in_layer1
        self.nodes_in_layer2 = nodes_in_layer2
        self.nodes_in_layer3 = nodes_in_layer3
        
        # initialize the weights (theta) matrices
        self.thetas_layer0 = np.random.rand(self.inputs_in_layer0.shape[1] + 1, self.nodes_in_layer1) 
        self.thetas_layer1 = np.random.rand(self.nodes_in_layer1 + 1, self.nodes_in_layer2) 
        self.thetas_layer2 = np.random.rand(self.nodes_in_layer2 + 1, self.nodes_in_layer3)  
    def feedforward(self):      
        #compute all the nodes (a1, a2, a3, a4) in layer1
        n = self.inputs_in_layer0.shape[0]

        self.Z1 = self.thetas_layer0[0] + np.dot(self.inputs_in_layer0, self.thetas_layer0[1:])
        self.layer1 = sigmoid(self.Z1)  #values of a1, a2, a3, a4 in layer 1
        
        #compute all the nodes (a1, a2, a3) in layer2
        self.Z2 = self.thetas_layer1[0] + np.dot(self.layer1, self.thetas_layer1[1:])
        self.layer2 = sigmoid(self.Z2)  #values of a1, a2, a3 in layer 2
        
        #compute all the nodes (a1) in layer3
        self.Z3 = self.thetas_layer2[0] + np.dot(self.layer2, self.thetas_layer2[1:])
        self.layer3 = sigmoid(self.Z3)  #output layer      
        
        return self.layer3
    
    def cost_func(self):
        self.n = self.inputs_in_layer0.shape[0] #number of training examples
        self.cost = (1/self.n) * np.sum(-self.y * np.log(self.layer3) - (1 - self.y) * np.log(1 - self.layer3)) #cross entropy
        return self.cost 
    def embedding(self, x):
            self.embedded_input = self.embedding_weights[x]
            pooled_embeddings = np.mean(self.embedded_input, axis=1)
            return pooled_embeddings
    def calculate_accuracy(self ):
        actual_output=self.y
        predicted_output=self.layer3
        # Convert predicted probabilities to binary predictions (0 or 1)
        predicted_classes = (predicted_output >= 0.5).astype(int)

        # Compare predicted classes with actual classes
        correct_predictions = (predicted_classes == actual_output).sum()

        # Calculate accuracy
        accuracy = correct_predictions / len(actual_output)

        return accuracy

    def Rbackprop(self):
        # Define RProp parameters
        delta0 = 0.01  # Initial update value
        delta_max = 20  # Maximum update value
        delta_min = 1e-6  # Minimum update value
        eta_plus = 1.5  # Increase factor
        eta_minus = 0.5  # Decrease factor

        # dervative of E with respect to theta and bias in layer2
        self.dE_dlayer3 = (1 / self.n) * (self.layer3 - self.y) / (self.layer3 * (1 - self.layer3))
        self.dE_dZ3 = np.multiply(self.dE_dlayer3, sigmoid(self.Z3) * (1 - sigmoid(self.Z3)))
        self.dE_dtheta2 = np.dot(self.layer2.T, self.dE_dZ3)
        self.dE_dbias2 = np.dot(np.ones(self.n), self.dE_dZ3)

        # dervative of E with respect to theta and bias in layer1
        self.dE_dlayer2 = np.dot(self.dE_dZ3, self.thetas_layer2[1:].T)
        self.dE_dZ2 = np.multiply(self.dE_dlayer2, sigmoid(self.Z2) * (1 - sigmoid(self.Z2)))
        # self.dE_dZ1 = np.multiply(self.dE_dlayer2, dervative_relu(self.Z2))
        self.dE_dtheta1 = np.dot(self.layer1.T, self.dE_dZ2)
        self.dE_dbias1 = np.dot(np.ones(self.n), self.dE_dZ2)

        # dervative of E with respect to theta and bias in layer0
        self.dE_dlayer1 = np.dot(self.dE_dZ2, self.thetas_layer1[1:].T)
        self.dE_dZ1 = np.multiply(self.dE_dlayer1, sigmoid(self.Z1) * (1 - sigmoid(self.Z1)))
        # self.dE_dZ1 = np.multiply(self.dE_dlayer1, dervative_relu(self.Z1))
        self.dE_dtheta0 = np.dot(self.inputs_in_layer0.T, self.dE_dZ1)
        self.dE_dbias0 = np.dot(np.ones(self.n), self.dE_dZ1)

        # Initialize RProp update values
        if not hasattr(self, 'prev_dE_dtheta2'):
            self.prev_dE_dtheta2 = np.zeros_like(self.dE_dtheta2)
            self.delta_theta2 = np.full_like(self.dE_dtheta2, delta0)
        else:
            self.delta_theta2 = np.where(self.dE_dtheta2 * self.prev_dE_dtheta2 > 0,
                                        np.minimum(self.delta_theta2 * eta_plus, delta_max),
                                        np.maximum(self.delta_theta2 * eta_minus, delta_min))
        self.prev_dE_dtheta2 = self.dE_dtheta2

        if not hasattr(self, 'prev_dE_dtheta1'):
            self.prev_dE_dtheta1 = np.zeros_like(self.dE_dtheta1)
            self.delta_theta1 = np.full_like(self.dE_dtheta1, delta0)
        else:
            self.delta_theta1 = np.where(self.dE_dtheta1 * self.prev_dE_dtheta1 > 0,
                                        np.minimum(self.delta_theta1 * eta_plus, delta_max),
                                        np.maximum(self.delta_theta1 * eta_minus, delta_min))
        self.prev_dE_dtheta1 = self.dE_dtheta1

        if not hasattr(self, 'prev_dE_dtheta0'):
            self.prev_dE_dtheta0 = np.zeros_like(self.dE_dtheta0)
            self.delta_theta0 = np.full_like(self.dE_dtheta0, delta0)
        else:
            self.delta_theta0 = np.where(self.dE_dtheta0 * self.prev_dE_dtheta0 > 0,
                                        np.minimum(self.delta_theta0 * eta_plus, delta_max),
                                        np.maximum(self.delta_theta0 * eta_minus, delta_min))
        self.prev_dE_dtheta0 = self.dE_dtheta0

        # Updating theta using RProp in layers 2, 1, and 0
        self.thetas_layer2[1:] -= np.sign(self.dE_dtheta2) * self.delta_theta2
        self.thetas_layer1[1:] -= np.sign(self.dE_dtheta1) * self.delta_theta1
        self.thetas_layer0[1:] -= np.sign(self.dE_dtheta0) * self.delta_theta0

        # Updating bias using RProp in layers 2, 1, and 0
        self.thetas_layer2[0] -= np.sign(self.dE_dbias2) * self.delta_theta2[0]
        self.thetas_layer1[0] -= np.sign(self.dE_dbias1) * self.delta_theta1[0]
        self.thetas_layer0[0] -= np.sign(self.dE_dbias0) * self.delta_theta0[0]
        return self 
    
    def backprop(self):
        #dervative of E with respect to theta and bias in layer2
        self.dE_dlayer3 = (1/self.n) * (self.layer3-self.y)/(self.layer3*(1-self.layer3))
        self.dE_dZ3 = np.multiply(self.dE_dlayer3, (sigmoid(self.Z3)* (1-sigmoid(self.Z3))))
        self.dE_dtheta2 = np.dot(self.layer2.T, self.dE_dZ3)
        self.dE_dbias2 = np.dot(np.ones(self.n), self.dE_dZ3)
        
        #dervative of E with respect to theta and bias in layer1
        self.dE_dlayer2 = np.dot(self.dE_dZ3, self.thetas_layer2[1:].T)
        self.dE_dZ2 = np.multiply(self.dE_dlayer2, sigmoid(self.Z2)* (1-sigmoid(self.Z2)))
        self.dE_dtheta1 = np.dot(self.layer1.T, self.dE_dZ2)
        self.dE_dbias1 = np.dot(np.ones(self.n), self.dE_dZ2)
        
        #dervative of E with respect to theta and bias in layer0
        self.dE_dlayer1 = np.dot(self.dE_dZ2, self.thetas_layer1[1:].T)
        self.dE_dZ1 = np.multiply(self.dE_dlayer1, sigmoid(self.Z1)* (1-sigmoid(self.Z1)))
        self.dE_dtheta0 = np.dot(self.inputs_in_layer0.T, self.dE_dZ1)
        self.dE_dbias0 = np.dot(np.ones(self.n), self.dE_dZ1)
        #updating theta using gradient descent in layers 2, 1, and 0
        self.thetas_layer2[1:] = self.thetas_layer2[1:] - self.l_rate * self.dE_dtheta2
        self.thetas_layer1[1:] = self.thetas_layer1[1:] - self.l_rate * self.dE_dtheta1
        self.thetas_layer0[1:] = self.thetas_layer0[1:] - self.l_rate * self.dE_dtheta0
        # self.de_wegihts = np.dot(self.embedded_input.T, self.dE_dZ1)
        # self.embedding_weights = -self.embedding_weights - self.l_rate * self.de_wegihts

        
        #updating bias using gradient descent in layers 2, 1, and 0
        self.thetas_layer2[0] = self.thetas_layer2[0] - self.l_rate * self.dE_dbias2
        self.thetas_layer1[0] = self.thetas_layer1[0] - self.l_rate * self.dE_dbias1
        self.thetas_layer0[0] = self.thetas_layer0[0] - self.l_rate * self.dE_dbias0
        return self
    def fit(self,epochs,Backpropagate):
        losses=[]
        for i in range(epochs):
            self.feedforward()
            error=self.cost_func()
            losses.append(error)
            if Backpropagate==True:
                self.backprop()
            else:
                self.Rbackprop()
            print("iteration #",i+1)
            print('accuracy: ',self.calculate_accuracy())
            print("Cost: \n",error,"\n")
    def evaluate(self, x,y):
        inputs_layer0 = self.embedding(x)
        Z1 = self.thetas_layer0[0] + np.dot(inputs_layer0, self.thetas_layer0[1:])
        layer1 = sigmoid(Z1)

        Z2 = self.thetas_layer1[0] + np.dot(layer1, self.thetas_layer1[1:])
        layer2 = sigmoid(Z2)

        Z3 = self.thetas_layer2[0] + np.dot(layer2, self.thetas_layer2[1:])
        layer3 = sigmoid(Z3)
        loss= (1/inputs_layer0.shape[0]) * np.sum(-y * np.log(layer3) - (1 - y) * np.log(1 - layer3)) #cross entropy
        actual_output=y
        predicted_output=layer3
        # Convert predicted probabilities to binary predictions (0 or 1)
        predicted_classes = (predicted_output >= 0.5).astype(int)

        # Compare predicted classes with actual classes
        correct_predictions = (predicted_classes == actual_output).sum()

        # Calculate accuracy
        accuracy = correct_predictions / len(actual_output)
        return loss,accuracy,layer3          

# Train

In [112]:
nn=NeuralNetwork(x_train,y_train)
nn.fit(epochs=100,Backpropagate=True)

iteration # 1
accuracy:  0.4561229100768188
Cost: 
 7.219087715414043 

iteration # 2
accuracy:  0.4561229100768188
Cost: 
 6.302535098676623 

iteration # 3
accuracy:  0.4561229100768188
Cost: 
 5.386030158871479 

iteration # 4
accuracy:  0.4561229100768188
Cost: 
 4.469774396744802 

iteration # 5
accuracy:  0.4561229100768188
Cost: 
 3.554853540012603 

iteration # 6
accuracy:  0.4561229100768188
Cost: 
 2.64706626317619 

iteration # 7
accuracy:  0.4561229100768188
Cost: 
 1.7760735513282853 

iteration # 8
accuracy:  0.4561229100768188
Cost: 
 1.0649191795423756 

iteration # 9
accuracy:  0.4561229100768188
Cost: 
 0.7382407153018583 

iteration # 10
accuracy:  0.5438770899231812
Cost: 
 0.6920024432624904 

iteration # 11
accuracy:  0.5438770899231812
Cost: 
 0.6894312419396587 

iteration # 12
accuracy:  0.5438770899231812
Cost: 
 0.6892984218946997 

iteration # 13
accuracy:  0.5438770899231812
Cost: 
 0.6892913875175786 

iteration # 14
accuracy:  0.5438770899231812
Cost: 
 0

In [121]:
test=nn.evaluate(x_cv,y_cv.reshape(-1,1))
print('Loss :',test[0])
print('Acc :',test[1])

Loss : 0.6905621394269598
Acc : 0.5366470854044284


In [122]:
Test=nn.evaluate(x_test,y_test.reshape(-1,1))
print('Loss :',Test[0])
print('Acc :',Test[1])

Loss : 0.6899574235317435
Acc : 0.540081337550836


In [115]:
nn1=NeuralNetwork(x_train,y_train)
nn1.fit(epochs=100,Backpropagate=False)

iteration # 1
accuracy:  0.4561229100768188
Cost: 
 8.02056278901174 

iteration # 2
accuracy:  0.4561229100768188
Cost: 
 7.76371028061254 

iteration # 3
accuracy:  0.4561229100768188
Cost: 
 7.28279283794215 

iteration # 4
accuracy:  0.4561229100768188
Cost: 
 6.378419936982094 

iteration # 5
accuracy:  0.4561229100768188
Cost: 
 5.031855074525351 

iteration # 6
accuracy:  0.4561229100768188
Cost: 
 3.7442144790670175 

iteration # 7
accuracy:  0.4561229100768188
Cost: 
 2.6975413658964267 

iteration # 8
accuracy:  0.4561229100768188
Cost: 
 1.533843501897184 

iteration # 9
accuracy:  0.5438770899231812
Cost: 
 0.6952624809154464 

iteration # 10
accuracy:  0.4561229100768188
Cost: 
 0.8932826332841098 

iteration # 11
accuracy:  0.4560626600391625
Cost: 
 0.7126756155431136 

iteration # 12
accuracy:  0.5438770899231812
Cost: 
 0.7768232774196911 

iteration # 13
accuracy:  0.548666967916855
Cost: 
 0.6883778256845222 

iteration # 14
accuracy:  0.4561229100768188
Cost: 
 0.89

  return 1/(1+np.exp(-Z))


iteration # 26
accuracy:  0.5438770899231812
Cost: 
 1.2197521163421619 

iteration # 27
accuracy:  0.5427925892453683
Cost: 
 0.6974427081861422 

iteration # 28
accuracy:  0.4561229100768188
Cost: 
 0.8113514283535478 

iteration # 29
accuracy:  0.4561229100768188
Cost: 
 0.7699235952029498 

iteration # 30
accuracy:  0.4561229100768188
Cost: 
 0.9640498224001851 

iteration # 31
accuracy:  0.5439674649796656
Cost: 
 0.8402194942969355 

iteration # 32
accuracy:  0.45609278505799067
Cost: 
 0.6976372064994109 

iteration # 33
accuracy:  0.4561229100768188
Cost: 
 0.702001646643478 

iteration # 34
accuracy:  0.4561229100768188
Cost: 
 0.9255961000308928 

iteration # 35
accuracy:  0.5462569664106041
Cost: 
 0.6898086333745131 

iteration # 36
accuracy:  0.5438770899231812
Cost: 
 1.5157409741250725 

iteration # 37
accuracy:  0.5439975899984938
Cost: 
 0.9275988777002518 

iteration # 38
accuracy:  0.4561229100768188
Cost: 
 0.8917900033872441 

iteration # 39
accuracy:  0.4561229100

In [120]:
Test_1=nn1.evaluate(x_cv,y_cv.reshape(-1,1))
print('Loss :',Test_1[0])
print('Acc :',Test_1[1])

Loss : 0.6715898270471344
Acc : 0.5906009941256213


  return 1/(1+np.exp(-Z))


In [119]:
Test_2=nn1.evaluate(x_test,y_test.reshape(-1,1))
print('Loss :',Test_2[0])
print('Acc :',Test_2[1])

Loss : 0.6712509884497972
Acc : 0.5923181201988251


  return 1/(1+np.exp(-Z))
