<a href="https://colab.research.google.com/github/nssn96/ML_Neural-networks/blob/main/ML_nn_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ML Assignment 2 - Neural Networks


1.   Author : Surya Narayanan Nadhamuni Suresh
2.   UTA ID : 1001877873


In [323]:
#import lines
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

In [324]:
#References Used
#https://towardsdatascience.com/https-medium-com-piotr-skalski92-deep-dive-into-deep-networks-math-17660bc376ba
#https://towardsdatascience.com/lets-code-a-neural-network-in-plain-numpy-ae7e74410795
#https://towardsdatascience.com/an-introduction-to-neural-networks-with-implementation-from-scratch-using-python-da4b6a45c05b

# This is the Layer class

class Layer:

  def initial_w_bias(self,nn):
    param = {}
    #creating the values for the w and bias matrix
    for i in range(1,len(nn)):
      param['w' + str(i)] = np.random.randn(nn[i], nn[i-1])*0.01
      param['b' + str(i)] = np.random.randn(nn[i],1)*0.01
    return param


In [325]:
#This is the class for the hyperbolic tangent functions

#class htangent_activation(Layer):

def tanh_forwardprop(z):
  #2/(1 + e-2x) - 1
  return (2/(1+np.exp(-2*z)) -1)

def tanh_backprop(dA,z):
  value = tanh_forwardprop(z)
  return dA * value * (1-value)


In [326]:
#This is the class for the sigmoid function

#class sigmoid_activation(Layer):

def sigmoid_forwardprop(Z):
  return 1/(1+np.exp(-Z))
  
def sigmoid_backprop(dA,z):
  value = sigmoid_forwardprop(z)
  return dA * value * (1-value)

In [327]:
#This is the class for the softmax function

class softmax_activation(Layer):

  def softmax_forwardprop(self,z):
    return np.exp(z) / sum(np.exp(z))
  

  #derivative of softmax
  def softmax_backprop(self,z):
    exp=np.exp(z-z.max())
    return exp/np.sum(exp,axis=0)*(1-exp/np.sum(exp,axis=0))
    



  
  #Yet to edit this--- DONT FORGET+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  # def softmax_backprop(probs, bp_err):
  #   dim = probs.shape[1]
  #   output = np.empty(probs.shape)
  #   for j in range(dim):
  #       d_prob_over_xj = - (probs * probs[:,[j]])  # i.e. prob_k * prob_j, no matter k==j or not
  #       d_prob_over_xj[:,j] += probs[:,j]   # i.e. when k==j, +prob_j
  #       output[:,j] = np.sum(bp_err * d_prob_over_xj, axis=1)
  #   return output
  

In [328]:
# This is the Linear layer class

class LinearLayer(Layer):


  def forward(self,data,param):
    
    #Z_mat = np.dot(w,prev_out)
    
    layer_num = len(param)//2
    backup = {} # to store info required for the backward calculation

    #traversing over the layers in the neural network
    #this is the same steps that we used to assign the weights and bias for network
    for i in range(1,layer_num+1):
      #if it is the first iteration the input will be the input ie the train or the test data
      w = param['w'+str(i)]
      bias = param['b'+str(i)]
      if i==1:
        #Z_mat--> the input matrix calculation which will be passed in the activation function
        backup['Z' + str(i)] = np.dot(w, data) + bias
        backup['A' + str(i)] = tanh_forwardprop(backup['Z' + str(i)])
      else:
        backup['Z' + str(i)] = np.dot(w, backup['A' + str(i-1)]) + bias
        if i==layer_num:
          backup['A' + str(i)] = backup['Z' + str(i)]
        else:
          backup['A' + str(i)] = tanh_forwardprop(backup['Z' + str(i)])

      #storing the values for backward pass call
      #backup['Z' + str(i)] = Z_mat

    
    # for i in backup.keys():
    #   print(i)
    return backup
  
  def compute_loss(self,backup,target):
    layer_num = len(backup)//2
    y_pred = backup['A' + str(layer_num)]
    loss = cost = 1/(2*len(target)) * np.sum(np.square(y_pred - target))
    return loss
  
  def backward(self,data,target,param,backup):
    deriv_values= {}  #dictionary to store the derivative values
    layer_num = len(param)//2
    n = len(target)
     #since we need to go backward in backward propagation
    for i in range(layer_num,0,-1):

      # print(i)
      # print(backup['A'+str(i)])
      A_previous = backup['A'+str(i)]
      z_current = backup['Z'+str(i)]
      w = param['w'+str(i)]
      bias = param['b'+str(i)]

      if i == layer_num:
        dA_current = 1/n * (A_previous - target)
        dz = dA_current
      else:
        dA_current = np.dot(param['w' + str(i+1)].T, dz)
        dz = np.multiply(dA_current, np.where(A_previous>=0, 1, 0))
      if i==1:
        #dw = 1/n * np.dot(dz, data.T)
        dw = 1/n * np.dot(dz, data)
        dbias = 1/n * np.sum(dz, axis=1, keepdims=True)
      else:
        dw = 1/n * np.dot(dz,backup['A' + str(i-1)].T)
        dbias = 1/n * np.sum(dz, axis=1, keepdims=True)
      
      deriv_values['dw' + str(i)] = dw
      deriv_values['db' + str(i)] = dbias

    # for i in deriv_values.keys():
    #   print(i)
    return deriv_values
  
  def update_param(self,param,deriv_values,alpha):
    layer_num = len(param)//2
    updated_param = {}
    for i in range(1,layer_num+1):
      updated_param['w'+str(i)] = param['w' + str(i)] - alpha * deriv_values['dw' + str(i)]
      updated_param['b'+str(i)] = param['b' + str(i)] - alpha * deriv_values['db' + str(i)]
    return updated_param





In [329]:
# This is the class for entropy loss

class entropy_loss(LinearLayer):

  def cross_entropy_forward(self,y_pred,y):
    return -y * np.log(y_pred)
  
  def cross_entropy_backward(self,y_pred,y):
    return y-y_pred


In [330]:
#This part of the code is for sequential class

class Sequential(LinearLayer):

  def train(self,data,target,nn,steps,alpha):
    param = self.initial_w_bias(nn)
    for i in range(steps):
      backup = self.forward(data.T, param)
      loss = self.compute_loss(backup,target.T)
      deriv_values = self.backward(data,target,param,backup)
      param = self.update_param(param,deriv_values,alpha)
      print('Iteration ' + str(i+1) + ' , Loss = ' + str(loss) + '\n')
    return param
  
  def accuracy_calc(self,x_train,x_test,y_train,y_test,param,nn):
    backup_train = self.forward(x_train.T,param)
    backup_test = self.forward(x_test.T,param)
    accuracy_train = np.sqrt(mean_squared_error(y_train, backup_train['A' + str(len(nn)-1)].T))
    test_acc = np.sqrt(mean_squared_error(y_test, backup_test['A' + str(len(nn)-1)].T))
  
  def predict(self,data,param):
    backup = self.forward(data.T,param)
    pred = backup['A' + str(len(backup)//2)].T
    return pred


In [331]:
data = np.array([[0, 0],
                    [0, 1],
                    [1, 0],
                    [1, 1]])
target = np.array([0, 1, 1, 0])

In [332]:
# data = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
# target = np.array([[[0]], [[1]], [[1]], [[0]]])

In [333]:
# net = Sequential()
# data = load_boston()                                                              #load dataset
# X,Y = data["data"], data["target"]                                               #separate data into input and output features
# X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size = 0.2)           #split data into train and test sets in 80-20 ratio
# layer_sizes = [13, 5, 5, 1]                                                       #set layer sizes, do not change the size of the first and last layer 
# num_iters = 1000                                                                  #set number of iterations over the training set(also known as epochs in batch gradient descent context)
# learning_rate = 0.03                                                              #set learning rate for gradient descent
# params = net.train(X_train, Y_train, layer_sizes, num_iters, learning_rate)           #train the model
# train_acc, test_acc = net.accuracy_calc(X_train, X_test, Y_train, Y_test, params)  #get training and test accuracy
# print('Root Mean Squared Error on Training Data = ' + str(train_acc))
# print('Root Mean Squared Error on Test Data = ' + str(test_acc))

In [334]:
#training part
net = Sequential()
layers = [2,2,2,1]                                                        
steps = 5000                                                                 
alpha = 0.01                                                        
params = net.train(data, target, layers, steps, alpha)          
# train_acc, test_acc = compute_accuracy(X_train, X_test, Y_train, Y_test, params)  
# print('Root Mean Squared Error on Training Data = ' + str(train_acc))
# print('Root Mean Squared Error on Test Data = ' + str(test_acc))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Iteration 2501 , Loss = 0.12500047846593576

Iteration 2502 , Loss = 0.1250004760744909

Iteration 2503 , Loss = 0.12500047369499928

Iteration 2504 , Loss = 0.12500047132740105

Iteration 2505 , Loss = 0.12500046897163686

Iteration 2506 , Loss = 0.12500046662764755

Iteration 2507 , Loss = 0.1250004642953742

Iteration 2508 , Loss = 0.12500046197475834

Iteration 2509 , Loss = 0.12500045966574164

Iteration 2510 , Loss = 0.12500045736826612

Iteration 2511 , Loss = 0.12500045508227414

Iteration 2512 , Loss = 0.12500045280770825

Iteration 2513 , Loss = 0.1250004505445114

Iteration 2514 , Loss = 0.1250004482926267

Iteration 2515 , Loss = 0.12500044605199762

Iteration 2516 , Loss = 0.12500044382256797

Iteration 2517 , Loss = 0.12500044160428167

Iteration 2518 , Loss = 0.1250004393970831

Iteration 2519 , Loss = 0.12500043720091683

Iteration 2520 , Loss = 0.1250004350157277

Iteration 2521 , Loss = 0.125000432841460