<a href="https://colab.research.google.com/github/kshitijdesai99/my_data_science_projects/blob/main/NN_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import libraries

import numpy as np
import torch
import sys
import torch.nn as nn

## Stochiastic gradient descent from scratch

In [None]:
# parameters

# random.rand() - Generates random number from a uniform distribution over [0,1)
# 1000 rows and 10 columns
X = np.random.rand(1000, 10)

# random.randint() - Picks a random int between 0 and 1 of 1000 columns
y = np.random.randint(0,2,1000)

# W1, W2, W3 here are the weights in the 1st hidden layer, 2nd hidden layer and output layer

# pick a random number between 0 and 1 and create 2*10 matrix
W1 = torch.tensor(np.random.uniform(0,1,(2,10)),requires_grad=True)

# pick a random number between 0 and 1 and create 3*2 matrix
W2 = torch.tensor(np.random.uniform(0,1,(3,2)),requires_grad=True)

# pick a random number betwee 0 and 1 and create 1*3 marix
W3 = torch.tensor(np.random.uniform(0,1,3),requires_grad=True)

# add these weights to a list W_list
W_list = [W1, W2, W3]

#No of epochs
nepochs = 100

#learning rate
lr = 0.0001

# define the loss function here
loss_fn = nn.BCELoss()


In [None]:
# Helper functions

# Activation function
def activate(X):

  #sigmoid function
  return 1/(1+torch.exp(-X))




# Moving forward
def forwardStep(X, W_list):

  # converting numpy to torch data type
  h = torch.from_numpy(X)

  # Iterating through every weights of every layer
  for W in W_list:
    # (1*10) * (10*2) --> (1*2) * (2*3) --> (1*3) * (3*1) --> (1*1)
    z = torch.matmul(W, h)

    #applying non linear activation function
    h = activate(z)

  return h




#Update params - Based on gradients and learning rates weights are updated
def updateParams(W_list, dW_list, lr):

  # torch.no_grad() -> loop where requires_grad is set to false
  with torch.no_grad():
    for i in range(len(W_list)):
      W_list[i] -= lr*dW_list[i]
  return W_list


In [None]:
def trainNN_sgd(X, y, W_list, loss_fn, lr, nepochs):

  # Iterating every epoch
  for epoch in range(nepochs):

    # storing batch loss in a list for batch gradient
    avgLoss = []

    # Iterating through every output
    for i in range(len(y)):

      #passing input
      X_in = X[i, :]

      #passing output
      y_True = y[i]

      #predicting new output
      y_hat = forwardStep(X_in, W_list)

      #checking the loss
      loss = loss_fn(y_hat, torch.tensor(y_True, dtype=torch.double))

      #Backpropogating the gradients wrt current loss
      loss.backward()

      #Appending loss to averageLoss list
      avgLoss.append(loss.item())

      #Flushing the buffer to the terminal
      sys.stdout.flush()

      # Storing all the gradients in a list
      dW_list = []

      for j in range(len(W_list)):
        dW_list.append(W_list[j].grad.data)

      # Updating weights to reduce the loss      
      W_list = updateParams(W_list, dW_list, lr)

      # Setting the gradients to 0 since we are starting new epoch
      for j in range(len(W_list)):
        W_list[j].grad.data.zero_()

    # printing the loss and epoch name
    print("Loss after epoch = %d: %f"%(epoch, np.mean(np.array(avgLoss))))

  # finally return the last weights
  return W_list

In [None]:
# Calling the function
trainNN_sgd(X,y,W_list, loss_fn, lr, nepochs)

Loss after epoch = 0: 0.957474
Loss after epoch = 1: 0.939600
Loss after epoch = 2: 0.922793
Loss after epoch = 3: 0.907008
Loss after epoch = 4: 0.892200
Loss after epoch = 5: 0.878321
Loss after epoch = 6: 0.865326
Loss after epoch = 7: 0.853170
Loss after epoch = 8: 0.841809
Loss after epoch = 9: 0.831199
Loss after epoch = 10: 0.821298
Loss after epoch = 11: 0.812065
Loss after epoch = 12: 0.803461
Loss after epoch = 13: 0.795448
Loss after epoch = 14: 0.787990
Loss after epoch = 15: 0.781051
Loss after epoch = 16: 0.774600
Loss after epoch = 17: 0.768604
Loss after epoch = 18: 0.763034
Loss after epoch = 19: 0.757862
Loss after epoch = 20: 0.753061
Loss after epoch = 21: 0.748605
Loss after epoch = 22: 0.744472
Loss after epoch = 23: 0.740638
Loss after epoch = 24: 0.737084
Loss after epoch = 25: 0.733790
Loss after epoch = 26: 0.730736
Loss after epoch = 27: 0.727907
Loss after epoch = 28: 0.725286
Loss after epoch = 29: 0.722859
Loss after epoch = 30: 0.720611
Loss after epoch =

[tensor([[0.2994, 0.0401, 0.9599, 0.4630, 0.2344, 0.6865, 0.0861, 0.9256, 0.9877,
          0.0893],
         [0.8692, 0.5110, 0.9354, 0.3875, 0.1877, 0.8678, 0.6743, 0.5333, 0.5718,
          0.3638]], dtype=torch.float64, requires_grad=True),
 tensor([[0.4095, 0.9779],
         [0.6838, 0.4340],
         [0.2007, 0.3007]], dtype=torch.float64, requires_grad=True),
 tensor([-0.4641,  0.2157,  0.2898], dtype=torch.float64, requires_grad=True)]

## Batch Gradient Descent

In [None]:
# parameters

# random.rand() - Generates random number from a uniform distribution over [0,1)
# 1000 rows and 10 columns
X = np.random.rand(1000, 10)

# random.randint() - Picks a random int between 0 and 1 of 1000 columns
y = np.random.randint(0,2,1000)

# W1, W2, W3 here are the weights in the 1st hidden layer, 2nd hidden layer and output layer

# pick a random number between 0 and 1 and create 2*10 matrix
W1 = torch.tensor(np.random.uniform(0,1,(2,10)),requires_grad=True)

# pick a random number between 0 and 1 and create 3*2 matrix
W2 = torch.tensor(np.random.uniform(0,1,(3,2)),requires_grad=True)

# pick a random number betwee 0 and 1 and create 1*3 marix
W3 = torch.tensor(np.random.uniform(0,1,3),requires_grad=True)

# add these weights to a list W_list
W_list = [W1, W2, W3]

#No of epochs
nepochs = 100

#learning rate
lr = 0.0001

# define the loss function here
loss_fn = nn.BCELoss()

In [None]:
def trainNN_batch(X, y, W_list, loss_fn, lr, nepochs):
  n = len(y)
  # Iterating every epoch
  for epoch in range(nepochs):

    # storing batch loss in a list for batch gradient
    loss = 0

    # Iterating through every output
    for i in range(n):

      #passing input
      X_in = X[i, :]

      #passing output
      y_True = y[i]

      #predicting new output
      y_hat = forwardStep(X_in, W_list)

      #checking the loss
      loss += loss_fn(y_hat, torch.tensor(y_True, dtype=torch.double))

    loss = loss/n

    #Backpropogating the gradients wrt current loss
    loss.backward()

    #Flushing the buffer to the terminal
    sys.stdout.flush()

    # Storing all the gradients in a list
    dW_list = []

    for j in range(len(W_list)):
      dW_list.append(W_list[j].grad.data)

    # Updating weights to reduce the loss      
    W_list = updateParams(W_list, dW_list, lr)

    # Setting the gradients to 0 since we are starting new epoch
    for j in range(len(W_list)):
      W_list[j].grad.data.zero_()

    # printing the loss and epoch name
    print("Loss after epoch = %d: %f"%(epoch, loss))

  # finally return the last weights
  return W_list

In [None]:
# Calling the function
trainNN_batch(X,y,W_list, loss_fn, lr, nepochs)

Loss after epoch = 0: 0.692841
Loss after epoch = 1: 0.692841
Loss after epoch = 2: 0.692841
Loss after epoch = 3: 0.692841
Loss after epoch = 4: 0.692841
Loss after epoch = 5: 0.692841
Loss after epoch = 6: 0.692841
Loss after epoch = 7: 0.692841
Loss after epoch = 8: 0.692841
Loss after epoch = 9: 0.692841
Loss after epoch = 10: 0.692841
Loss after epoch = 11: 0.692841
Loss after epoch = 12: 0.692841
Loss after epoch = 13: 0.692841
Loss after epoch = 14: 0.692841
Loss after epoch = 15: 0.692841
Loss after epoch = 16: 0.692841
Loss after epoch = 17: 0.692841
Loss after epoch = 18: 0.692841
Loss after epoch = 19: 0.692841
Loss after epoch = 20: 0.692841
Loss after epoch = 21: 0.692841
Loss after epoch = 22: 0.692841
Loss after epoch = 23: 0.692841
Loss after epoch = 24: 0.692841
Loss after epoch = 25: 0.692841
Loss after epoch = 26: 0.692841
Loss after epoch = 27: 0.692841
Loss after epoch = 28: 0.692841
Loss after epoch = 29: 0.692841
Loss after epoch = 30: 0.692841
Loss after epoch =

[tensor([[0.2994, 0.0401, 0.9599, 0.4630, 0.2344, 0.6865, 0.0861, 0.9256, 0.9877,
          0.0893],
         [0.8692, 0.5110, 0.9354, 0.3875, 0.1877, 0.8678, 0.6743, 0.5333, 0.5718,
          0.3638]], dtype=torch.float64, requires_grad=True),
 tensor([[0.4095, 0.9779],
         [0.6838, 0.4340],
         [0.2007, 0.3007]], dtype=torch.float64, requires_grad=True),
 tensor([-0.4641,  0.2156,  0.2898], dtype=torch.float64, requires_grad=True)]

## Mini Batch Gradient Descent non vectorized

In [None]:
# parameters

# random.rand() - Generates random number from a uniform distribution over [0,1)
# 1000 rows and 10 columns
np.random.seed(42)
X = np.random.rand(1000, 10)

# random.randint() - Picks a random int between 0 and 1 of 1000 columns
y = np.random.randint(0,2,1000)

# W1, W2, W3 here are the weights in the 1st hidden layer, 2nd hidden layer and output layer

# pick a random number between 0 and 1 and create 2*10 matrix
W1 = torch.tensor(np.random.uniform(0,1,(2,10)),requires_grad=True)

# pick a random number between 0 and 1 and create 3*2 matrix
W2 = torch.tensor(np.random.uniform(0,1,(3,2)),requires_grad=True)

# pick a random number betwee 0 and 1 and create 1*3 marix
W3 = torch.tensor(np.random.uniform(0,1,3),requires_grad=True)

# add these weights to a list W_list
W_list = [W1, W2, W3]

#No of epochs
nepochs = 100

#learning rate
lr = 0.0001

# define the loss function here
loss_fn = nn.BCELoss()

In [None]:
batch_size = 16

In [None]:
def trainNN_batch_non_vectorized(X, y, W_list, loss_fn, lr, nepochs, batch_size):
  n = len(y)

  numBatches = n//batch_size

  # Iterating every epoch
  for epoch in range(nepochs):

      for batch in range(numBatches):
        X_batch = X[batch*batch_size : (batch+1)*batch_size, :] 
        y_batch = y[batch*batch_size : (batch+1)*batch_size]

        # storing batch loss in a list for batch gradient
        loss = 0

        # Iterating through every output
        for i in range(batch_size):

          #passing input
          X_in = X_batch[i, :]

          #passing output
          y_True = y_batch[i]

          #predicting new output
          y_hat = forwardStep(X_in, W_list)

          #checking the loss
          loss += loss_fn(y_hat, torch.tensor(y_True, dtype=torch.double))


        loss = loss/batch_size

        #Backpropogating the gradients wrt current loss
        loss.backward()

        #Flushing the buffer to the terminal
        sys.stdout.flush()

        # Storing all the gradients in a list
        dW_list = []

        for j in range(len(W_list)):
          dW_list.append(W_list[j].grad.data)

        # Updating weights to reduce the loss      
        W_list = updateParams(W_list, dW_list, lr)

        # Setting the gradients to 0 since we are starting new epoch
        for j in range(len(W_list)):
          W_list[j].grad.data.zero_()
    
      # printing the loss and epoch name
      print("Loss after epoch = %d: %f"%(epoch, loss))

  # finally return the last weights
  return W_list

In [None]:
# Calling the function
trainNN_batch_non_vectorized(X,y,W_list, loss_fn, lr, nepochs, batch_size)

Loss after epoch = 0: 0.778973
Loss after epoch = 1: 0.778440
Loss after epoch = 2: 0.777909
Loss after epoch = 3: 0.777381
Loss after epoch = 4: 0.776856
Loss after epoch = 5: 0.776332
Loss after epoch = 6: 0.775812
Loss after epoch = 7: 0.775294
Loss after epoch = 8: 0.774778
Loss after epoch = 9: 0.774264
Loss after epoch = 10: 0.773754
Loss after epoch = 11: 0.773245
Loss after epoch = 12: 0.772739
Loss after epoch = 13: 0.772236
Loss after epoch = 14: 0.771734
Loss after epoch = 15: 0.771235
Loss after epoch = 16: 0.770739
Loss after epoch = 17: 0.770245
Loss after epoch = 18: 0.769753
Loss after epoch = 19: 0.769264
Loss after epoch = 20: 0.768777
Loss after epoch = 21: 0.768292
Loss after epoch = 22: 0.767810
Loss after epoch = 23: 0.767330
Loss after epoch = 24: 0.766852
Loss after epoch = 25: 0.766377
Loss after epoch = 26: 0.765904
Loss after epoch = 27: 0.765433
Loss after epoch = 28: 0.764965
Loss after epoch = 29: 0.764499
Loss after epoch = 30: 0.764035
Loss after epoch =

[tensor([[0.0365, 0.0255, 0.7383, 0.3173, 0.7956, 0.4310, 0.4523, 0.5121, 0.7123,
          0.2727],
         [0.1208, 0.5552, 0.3260, 0.4416, 0.6462, 0.4933, 0.6195, 0.5433, 0.7390,
          0.7051]], dtype=torch.float64, requires_grad=True),
 tensor([[0.5220, 0.6662],
         [0.8732, 0.4319],
         [0.3930, 0.7437]], dtype=torch.float64, requires_grad=True),
 tensor([ 0.4793, -0.0344,  0.7898], dtype=torch.float64, requires_grad=True)]

## Mini Batch Gradient Descent vectorized [Faster]

In [None]:
# parameters

# random.rand() - Generates random number from a uniform distribution over [0,1)
# 1000 rows and 10 columns
np.random.seed(42)

X = np.random.rand(1000, 10)

# random.randint() - Picks a random int between 0 and 1 of 1000 columns
y = np.random.randint(0,2,1000)

# W1, W2, W3 here are the weights in the 1st hidden layer, 2nd hidden layer and output layer

# pick a random number between 0 and 1 and create 2*10 matrix
W1 = torch.tensor(np.random.uniform(0,1,(2,10)),requires_grad=True)

# pick a random number between 0 and 1 and create 3*2 matrix
W2 = torch.tensor(np.random.uniform(0,1,(3,2)),requires_grad=True)

# pick a random number betwee 0 and 1 and create 1*3 marix
W3 = torch.tensor(np.random.uniform(0,1,3),requires_grad=True)

# add these weights to a list W_list
W_list = [W1, W2, W3]

#No of epochs
nepochs = 100

#learning rate
lr = 0.0001

# define the loss function here
loss_fn = nn.BCELoss()


In [None]:
# Helper functions

# Activation function
def activate(X):

  #sigmoid function
  return 1/(1+torch.exp(-X))




# Moving forward
def forwardStep_batch(X, W_list):

  # converting numpy to torch data type
  h = torch.from_numpy(X.T)

  # Iterating through every weights of every layer
  for W in W_list:
    # (1*10) * (10*2) --> (1*2) * (2*3) --> (1*3) * (3*1) --> (1*1)
    z = torch.matmul(W,h)

    #applying non linear activation function
    h = activate(z)

  return h




#Update params - Based on gradients and learning rates weights are updated
def updateParams(W_list, dW_list, lr):

  # torch.no_grad() -> loop where requires_grad is set to false
  with torch.no_grad():
    for i in range(len(W_list)):
      W_list[i] -= lr*dW_list[i]
  return W_list

In [None]:
def trainNN_batch_vectorized(X, y, W_list, loss_fn, lr, nepochs, batch_size):
  n = len(y)

  numBatches = n//batch_size

  # Iterating every epoch
  for epoch in range(nepochs):

      for batch in range(numBatches):
        X_batch = X[batch*batch_size : (batch+1)*batch_size, :] 
        y_batch = y[batch*batch_size : (batch+1)*batch_size]

        # storing batch loss in a list for batch gradient
        loss = 0

        #predicting new output
        y_hat = forwardStep_batch(X_batch, W_list)

        #checking the loss
        loss = torch.sum(loss_fn(y_hat, torch.tensor(y_batch, dtype=torch.double)))

        # loss = loss/batch_size

        #Backpropogating the gradients wrt current loss
        loss.backward()

        #Flushing the buffer to the terminal
        sys.stdout.flush()

        # Storing all the gradients in a list
        dW_list = []

        for j in range(len(W_list)):
          dW_list.append(W_list[j].grad.data)

        # Updating weights to reduce the loss      
        W_list = updateParams(W_list, dW_list, lr)

        # Setting the gradients to 0 since we are starting new epoch
        for j in range(len(W_list)):
          W_list[j].grad.data.zero_()
    
      # printing the loss and epoch name
      print("Loss after epoch = %d: %f"%(epoch, loss))

  # finally return the last weights
  return W_list

In [None]:
# Calling the function
trainNN_batch_vectorized(X,y,W_list, loss_fn, lr, 100, 16)

Loss after epoch = 0: 0.778973
Loss after epoch = 1: 0.778440
Loss after epoch = 2: 0.777909
Loss after epoch = 3: 0.777381
Loss after epoch = 4: 0.776856
Loss after epoch = 5: 0.776332
Loss after epoch = 6: 0.775812
Loss after epoch = 7: 0.775294
Loss after epoch = 8: 0.774778
Loss after epoch = 9: 0.774264
Loss after epoch = 10: 0.773754
Loss after epoch = 11: 0.773245
Loss after epoch = 12: 0.772739
Loss after epoch = 13: 0.772236
Loss after epoch = 14: 0.771734
Loss after epoch = 15: 0.771235
Loss after epoch = 16: 0.770739
Loss after epoch = 17: 0.770245
Loss after epoch = 18: 0.769753
Loss after epoch = 19: 0.769264
Loss after epoch = 20: 0.768777
Loss after epoch = 21: 0.768292
Loss after epoch = 22: 0.767810
Loss after epoch = 23: 0.767330
Loss after epoch = 24: 0.766852
Loss after epoch = 25: 0.766377
Loss after epoch = 26: 0.765904
Loss after epoch = 27: 0.765433
Loss after epoch = 28: 0.764965
Loss after epoch = 29: 0.764499
Loss after epoch = 30: 0.764035
Loss after epoch =

[tensor([[0.0365, 0.0255, 0.7383, 0.3173, 0.7956, 0.4310, 0.4523, 0.5121, 0.7123,
          0.2727],
         [0.1208, 0.5552, 0.3260, 0.4416, 0.6462, 0.4933, 0.6195, 0.5433, 0.7390,
          0.7051]], dtype=torch.float64, requires_grad=True),
 tensor([[0.5220, 0.6662],
         [0.8732, 0.4319],
         [0.3930, 0.7437]], dtype=torch.float64, requires_grad=True),
 tensor([ 0.4793, -0.0344,  0.7898], dtype=torch.float64, requires_grad=True)]