# Q1 Backpropagation

In [1]:
import torch
# Setting up device as CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Creating class Net for easy use

In [2]:
class Net:
  def __init__(self, x):
    # Input
    self.x = x

    # Initializing first set of weights and bias as Torch tensors
    self.Wh = torch.tensor([[2, 4], [3, -5]], dtype=torch.int32)
    self.bh = torch.tensor([[-2], [1]], dtype=torch.int32)

    # Initializing second set of weights and bias as Torch tensors
    self.Wo = torch.tensor([4, 6], dtype=torch.int32)
    self.bo = torch.tensor([-3.5], dtype=torch.float32)

    # Initializing all computations as 0
    self.zh = 0
    self.uh = 0
    self.zo = 0
    self.y = 1

  def sigmoid(self, x):
    return 1 / (1 + torch.exp(-x))

  def ReLU(self, x):
    return torch.maximum(torch.zeros_like(x), x)
    
  def cross_entropy_loss(self, yHat, y):
    return -torch.sum(y * torch.log(yHat))
  
  def forward_pass(self):
    # Linear Map 1
    self.zh = torch.matmul(self.Wh, self.x) + self.bh
    
    # Activation ReLU
    self.uh = self.ReLU(self.zh)

    # Linear Map 2
    self.zo = torch.matmul(self.Wo, self.uh) + self.bo
    # Activation Sigmoid
    yHat = self.sigmoid(self.zo)

    # Calculate Cross-Entropy Loss
    loss = self.cross_entropy_loss(yHat, self.y)

    return loss, yHat

  def backward_pass(self, yHat):
    dLoss = -(self.y/yHat) + ((1 - self.y) / (1 - yHat))
    dyHat = self.sigmoid(self.zo) * (1 - self.sigmoid(self.zo))
    dzo_Wo = self.uh
    dzo_bo = 1
    dzo_uh = self.Wo
    duh = torch.tensor([[1 if self.zh[0]>=0 else 0], [1 if self.zh[1]>=0 else 0]], dtype=torch.int32)
    dzh_Wh = self.x
    dzh_bh = 1

    '''
    # FOR DEBUG PURPOSES
    print(dLoss, dLoss.shape, '\n')
    print(dyHat, dyHat.shape, '\n')
    print(dzo_Wo, dzo_Wo.shape, '\n')
    print(dzo_bo, '\n')
    print(dzo_uh, dzo_uh.shape, '\n')
    print(duh, duh.shape, '\n')
    print(dzh_Wh, dzh_Wh.shape, '\n')
    print(dzh_bh, '\n')
    '''

    # Calculating the gradients
    grad_L_Wo = dLoss * dyHat * dzo_Wo
    grad_L_bo = dLoss * dyHat * dzo_bo
    grad_L_Wh = dLoss * dyHat * dzo_uh * duh * dzh_Wh
    # grad_L_Wh = dLoss * dyHat * torch.matmul(dzo_uh, duh) * dzh_Wh
    # grad_L_bh = dLoss * dyHat * dzo_uh * duh * dzh_bh
    grad_L_bh = dLoss * dyHat * torch.matmul(dzo_uh, duh) * dzh_bh

    return grad_L_Wo, grad_L_bo, grad_L_Wh, grad_L_bh

## Computing the Forward phase and Backward phase

In [3]:
x = torch.tensor([[1], [1]], dtype=torch.int32)

# Forward phase
n = Net(x)
loss, yHat = n.forward_pass()

# Backward phase
grad_L_Wo, grad_L_bo, grad_L_Wh, grad_L_bh = n.backward_pass(yHat)

## Results

In [4]:
print("Prediction = ", yHat.numpy().item())
print("Loss = ", loss.numpy())

Prediction =  0.9999963045120239
Loss =  3.6954948e-06


In [5]:
print("dL/dWo = {} \n".format(grad_L_Wo.numpy()))
print("dL/dbo = {} \n".format(grad_L_bo.numpy().item()))
print("dL/dWh = {} \n".format(grad_L_Wh.numpy()))
print("dL/dbh = {} \n".format(grad_L_bh.numpy().item()))

dL/dWo = [[-1.4781952e-05]
 [-0.0000000e+00]] 

dL/dbo = -3.6954879760742188e-06 

dL/dWh = [[-1.4781952e-05 -2.2172928e-05]
 [-0.0000000e+00 -0.0000000e+00]] 

dL/dbh = -1.4781951904296875e-05 



# End of Notebook