In [1]:
'''
Design of a Neural Network from scratch

*************<IMP>*************
Mention hyperparameters used and describe functionality in detail in this space
- carries 1 mark
'''
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
class NN: 
    parameters = list()
    
    def init_params(self,neuron_count_per_layer):
        num_layers = len(neuron_count_per_layer)
        params = [None for i in range(2*(num_layers-1))]
        for i in range(1,num_layers):
            params[2*i-2]=np.random.randn(neuron_count_per_layer[i], neuron_count_per_layer[i-1]) * 0.01
            params[2*i-1]=np.zeros((neuron_count_per_layer[i], 1))
        return params
    
    #Clean the data by replacing the null values with the mean of the column
    def data_clean(self,df):
        df.columns = df.columns.str.strip()
        for column in df.columns:
            if column in ['Weight','HB','BP']:
                df[column].fillna(value=df[column].mean(), inplace=True)
            elif column in ['Community','Delivery phase','IFA','Education']:
                df[column].fillna(value=df[column].mode()[0], inplace=True)
            else:
                df[column].fillna(value=df[column].median(), inplace=True)
        return df
    
    def relu(self,Z):
        return np.maximum(0,Z),Z
    
    def sigmoid(self,Z):
        return 1/(1+np.exp(-Z)),Z
    
    def compute_activation(self,A,weight,bias,activation):
        Z = weight@A + bias
        cache1 = (A,weight,bias)
        if activation=='relu':
            A1,cache2 = self.relu(Z)
        else:
            A1,cache2 = self.sigmoid(Z)
        return A1,(cache1,cache2)
     
    def compute_gradients(self,dA,vals,activation):
        cache1,cache2 = vals
        if activation=='relu':
            dZ = np.array(dA,copy=True)
            #print(cache2.shape,dA.shape)
            dZ[cache2<=0]=0
        if activation=='sigmoid':
            sig = 1/(1+np.exp(-cache2))
            dZ = dA * sig * (1-sig)
        A_prev, W, b = cache1
        x = A_prev.shape[1]
        dW = 1 / x * dZ @ A_prev.T
        db = 1 / x * np.sum(dZ, axis=1, keepdims=True)
        dA_prev = W.T @ dZ
        return dA_prev,dW,db
            
    
    def forward_propogation(self,X,parameters):
        num_layers = len(parameters)//2
        back_prop_values = []
        A = X
        
        for i in range(1,num_layers):
            prev_act = A
            weight = parameters[2*i-2]
            bias = parameters[2*i-1]
            A,back_prop_value = self.compute_activation(prev_act,weight,bias,activation='relu')
            back_prop_values.append(back_prop_value)
        
    
        #For Last layer i.e sigmoid 
        A,back_prop_value = self.compute_activation(A,parameters[-2],parameters[-1],activation='sigmoid')
        back_prop_values.append(back_prop_value) 
        #print(len(back_prop_values))
        return A,back_prop_values
    
    def back_propogation(self,Y,activations,parameters,back_prop_values,alpha):
        #Computing the necessary derivatives
        gradients = {}
        num_layers = len(parameters)//2
        Y.reshape(activations.shape)
        dA = - (np.divide(Y, activations) - np.divide(1 - Y, 1 - activations))
        #print(dA.shape)
        vals = back_prop_values[num_layers-1]
        gradients["dA" + str(num_layers-1)], gradients["dW" + str(num_layers)], gradients["db" + str(num_layers)] = self.compute_gradients(dA,vals,'sigmoid')
        for layer in reversed(range(num_layers-1)):
            vals = back_prop_values[layer]
            gradients["dA" + str(layer)], gradients["dW" + str(layer + 1)], gradients["db" + str(layer + 1)] = self.compute_gradients(gradients['dA'+str(layer+1)], vals, 'relu')
        
        #Updating the parameters
        for i in range(1,num_layers+1):
            parameters[2*i-2]=parameters[2*i-2] - alpha * gradients['dW'+str(i)]
            parameters[2*i-1]=parameters[2*i-1] - alpha * gradients['db'+str(i)]
        
        return parameters
        

    def calc_cost(self,A,Y):
        return np.squeeze(-1 / len(Y) * np.sum(Y * np.log(A) + (1-Y) * np.log(1-A)))

    ''' X and Y are dataframes '''

    def fit(self,X,Y):
        '''
        Function that trains the neural network by taking x_train and y_train samples as input
        '''

        #Clean the data
        X = self.data_clean(X)
        #Set hyperparameters
        num_itertations = 45000
        alpha = 0.02
        
    
        #Init parameters
        neuron_count_per_layer = [9,30,30,25,1]
        self.parameters = self.init_params(neuron_count_per_layer)
        
        #Making necessary changes to dimensions
        X = np.transpose(np.array(X))
        Y = np.array(Y)
        Y = np.reshape(Y,(1,Y.shape[0]))
        
        for i in range(1,num_itertations+1):
            #Fp
            activations,back_prop_values = self.forward_propogation(X,self.parameters)
            #Bp
            self.parameters = self.back_propogation(Y,activations,self.parameters,back_prop_values,alpha)
            #Print Cost after every 10000 iters
            if i%500==0:
                print('Cost after iter '+str(i)+ ':' + str(self.calc_cost(activations,Y)/100))
 
        
        
        
    
    def predict(self,X):

        """
        The predict function performs a simple feed forward of weights
        and outputs yhat values 

        yhat is a list of the predicted value for df X
        """
        yhat = []
        X = self.data_clean(X)
        X = np.transpose(np.array(X))
        prob,_ = self.forward_propogation(X,self.parameters)
        for ans in prob[0]:
            if ans>0.5:
                yhat.append(1)
            else:
                yhat.append(0)
        return yhat

    def CM(self,y_test,y_test_obs):
        '''
        Prints confusion matrix 
        y_test is list of y values in the test dataset
        y_test_obs is list of y values predicted by the model

        '''

        for i in range(len(y_test_obs)):
            if(y_test_obs[i]>0.6):
                y_test_obs[i]=1
            else:
                y_test_obs[i]=0

        cm=[[0,0],[0,0]]
        fp=0
        fn=0
        tp=0
        tn=0

        for i in range(len(y_test)):
            if(y_test[i]==1 and y_test_obs[i]==1):
                tp=tp+1
            if(y_test[i]==0 and y_test_obs[i]==0):
                tn=tn+1
            if(y_test[i]==1 and y_test_obs[i]==0):
                fp=fp+1
            if(y_test[i]==0 and y_test_obs[i]==1):
                fn=fn+1
        cm[0][0]=tn
        cm[0][1]=fp
        cm[1][0]=fn
        cm[1][1]=tp

        p= tp/(tp+fp)
        r=tp/(tp+fn)
        f1=(2*p*r)/(p+r)
        acc = (tp+tn)/(tp+tn+fp+fn)
        print("Confusion Matrix : ")
        print(cm)
        print("\n")
        print(f"Precision : {p}")
        print(f"Recall : {r}")
        print(f"F1 SCORE : {f1}")
        print(f"Accuracy : {acc}")

   






In [19]:
import pandas as pd
df = pd.read_csv('LBW_Dataset.csv')
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [3]:
nn = NN()

In [4]:
nn.fit(X_train,y_train)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


Cost after iter 500:0.4160335016745384
Cost after iter 1000:0.41603347417266323
Cost after iter 1500:0.4160334740459443
Cost after iter 2000:0.416033473934557
Cost after iter 2500:0.41603347382463307
Cost after iter 3000:0.41603347369585053
Cost after iter 3500:0.4160334735844113
Cost after iter 4000:0.4160334734794684
Cost after iter 4500:0.4160334733707217
Cost after iter 5000:0.41603347326083956
Cost after iter 5500:0.4160334731493321
Cost after iter 6000:0.41603347303711985
Cost after iter 6500:0.4160334729282978
Cost after iter 7000:0.41603347281903214
Cost after iter 7500:0.41603347270800517
Cost after iter 8000:0.41603347259548085
Cost after iter 8500:0.41603347248182354
Cost after iter 9000:0.4160334723669667
Cost after iter 9500:0.4160334722495346
Cost after iter 10000:0.4160334721318543
Cost after iter 10500:0.4160334720149107
Cost after iter 11000:0.4160334719003345
Cost after iter 11500:0.41603347178474737
Cost after iter 12000:0.4160334716697549
Cost after iter 12500:0.416

In [5]:
y_hat = nn.predict(X_test)

In [6]:
nn.CM(y_test.tolist(),y_hat)

Confusion Matrix : 
[[0, 0], [6, 14]]


Precision : 1.0
Recall : 0.7
F1 SCORE : 0.8235294117647058
Accuracy : 0.7


In [279]:
X_train

Unnamed: 0,Community,Age,Weight,Delivery phase,HB,IFA,BP,Education,Residence
75,1,24.0,43.0,1.0,9.000000,1,1.666667,5.0,2.0
60,1,21.0,40.0,1.0,7.900000,0,1.666667,5.0,1.0
79,3,24.0,60.0,1.0,9.200000,1,1.714286,5.0,1.0
85,1,25.0,52.0,1.0,9.139286,0,1.804353,5.0,1.0
91,3,21.0,55.0,1.0,9.000000,0,1.375000,5.0,1.0
...,...,...,...,...,...,...,...,...,...
92,3,24.0,39.0,2.0,8.400000,0,1.500000,5.0,1.0
67,1,21.0,43.0,1.0,10.200000,1,1.571429,5.0,1.0
64,1,21.0,52.0,1.0,8.800000,1,13.875000,5.0,2.0
47,1,26.0,35.0,1.0,9.200000,1,1.571000,5.0,1.0


In [22]:
nn1 = NN()
nn1.fit(X_train,y_train)
y_hat1 = nn1.predict(X_test)

Cost after iter 500:0.3697503329495725
Cost after iter 1000:0.36833216036809424
Cost after iter 1500:0.3682983110880379
Cost after iter 2000:0.36829728810047707
Cost after iter 2500:0.368297091951979
Cost after iter 3000:0.36829691122891717
Cost after iter 3500:0.36829671007761255
Cost after iter 4000:0.3682964837717205
Cost after iter 4500:0.368296227763276
Cost after iter 5000:0.36829593635238916
Cost after iter 5500:0.3682956033342288
Cost after iter 6000:0.3682952203526847
Cost after iter 6500:0.36829477614278167
Cost after iter 7000:0.36829425413251904
Cost after iter 7500:0.3682936203894517
Cost after iter 8000:0.36829284514929844
Cost after iter 8500:0.3682919209413844
Cost after iter 9000:0.3682907793019294
Cost after iter 9500:0.368289381521606
Cost after iter 10000:0.3682876423572414
Cost after iter 10500:0.3682854205741322
Cost after iter 11000:0.36828251493211694
Cost after iter 11500:0.3682786322885097
Cost after iter 12000:0.36827326512423025
Cost after iter 12500:0.36826

In [23]:
nn1.CM(y_test.tolist(),y_hat1)

Confusion Matrix : 
[[4, 0], [4, 21]]


Precision : 1.0
Recall : 0.84
F1 SCORE : 0.9130434782608696
Accuracy : 0.8620689655172413
