In [None]:
import torch

#BackPropagation using Pytorch

In [None]:
#start forming a backward graph that tracks every operation applied on them to calculate the gradients using 
#something called a dynamic computation
x = torch.tensor(4.0, requires_grad = True)
x

tensor(4., requires_grad=True)

In [None]:
y = x**2
y 
#shows tensor of 16, as 4**2 = 16

tensor(16., grad_fn=<PowBackward0>)

In [None]:
#BACK PROPAGATION
# y = 2x

#Computes the gradient of current tensor w.r.t. graph leaves.
y.backward()

In [None]:
print(x.grad) #prints the derivative 
#Therefore, grad is another tensor that holds the gradient of y wrt to scalar value x

tensor(8.)
<class 'torch.Tensor'>


In [None]:
#Creating an input 
lst = [[2,3,1], [4,5,3], [7,6,4]] 
torch_input = torch.tensor(lst, requires_grad = True, dtype = float)
torch_input

tensor([[2., 3., 1.],
        [4., 5., 3.],
        [7., 6., 4.]], dtype=torch.float64, requires_grad=True)

In [None]:
#y = x**3 + x**2 
y = torch_input**3 + torch_input**2

In [None]:
y
#(2**3) + (2**2) = 8 + 4 = 12

tensor([[ 12.,  36.,   2.],
        [ 80., 150.,  36.],
        [392., 252.,  80.]], dtype=torch.float64, grad_fn=<AddBackward0>)

In [None]:
z = y.sum()
z

tensor(1040., dtype=torch.float64, grad_fn=<SumBackward0>)

In [None]:
z.backward

<bound method Tensor.backward of tensor(1040., dtype=torch.float64, grad_fn=<SumBackward0>)>

In [None]:
#Calculates the gradient of the input 
torch_input.grad

tensor([[ 16.,  33.,   5.],
        [ 56.,  85.,  33.],
        [161., 120.,  56.]], dtype=torch.float64)

#Implementing BackPropagation on Iris Dataset

In [None]:
import pandas as pd
import numpy as np
import seaborn as sb

In [None]:
from sklearn.datasets import load_iris
iris = sb.load_dataset('iris')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [None]:
#Create Categorical Variables into dummy variables
y = pd.get_dummies(iris.species).values
N = y.size

#Dropping the label
x = iris.drop(["species"], axis = 1).values

print(y)
print(x)

[[1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 

In [None]:
#Activation Function
def sigmoid(x):
  return 1/(1+np.exp(-x))

In [None]:
#Hyperparameters
#Learning Rate - set as 0.1
learning_rate = 0.1

#Set number of neurons in the input, hidden and output layer
n_input = 4
n_hidden = 2
n_output = 3

In [None]:
#Generate pseudo random numbers
np.random.seed(10)

#Setting the weights
weights_1 = np.random.normal(scale=0.5, size = (n_input, n_hidden))
weights_2 = np.random.normal(scale=0.5, size = (n_hidden, n_output))

#Printing
print("weights_1", weights_1)
print("weights_2", weights_2)

weights_1 [[ 0.66579325  0.35763949]
 [-0.77270015 -0.00419192]
 [ 0.31066799 -0.36004278]
 [ 0.13275579  0.05427426]]
weights_2 [[ 0.00214572 -0.08730011  0.21651309]
 [ 0.60151869 -0.48253284  0.51413704]]


In [None]:
#Forward Propagation

#Between the Input Layer and the Hidden Layer
#Calculates the dot product of the weights_1 with the inputs - input of the activation fxn
hidden_layer_inputs = np.dot(x, weights_1)

#Calculates the output of the activation function
hidden_layer_outputs = sigmoid(hidden_layer_inputs)

#Between the Hidden Layer and the Output Layer
#Calculates the dot product of the weights_2 with the output of the activation fxn 
output_layer_inputs = np.dot(hidden_layer_outputs, weights_2)

#Calculates the output of the activation fxn
output_layer_outputs = sigmoid(output_layer_inputs)

In [None]:
#BackPropagation
output_layer_error = output_layer_outputs - y
output_layer_delta = output_layer_error * output_layer_outputs * (1 - output_layer_outputs)

hidden_layer_error = np.dot(output_layer_delta, weights_2.T)
hidden_layer_delta = hidden_layer_error * hidden_layer_outputs * (1 - hidden_layer_outputs)

In [None]:
#Weights Update
weights_2_update = np.dot(hidden_layer_outputs.T, output_layer_delta) / N
weights_1_update = np.dot(x.T, hidden_layer_delta) / N

weights_2 = weights_2 - learning_rate * weights_2_update
weights_1 = weights_1 - learning_rate * weights_1_update

In [None]:
mse = ((output_layer_outputs - y)**2).sum()/(2*N)
mse

#The 2 is multiplied with N to obtain the derivative easily, this 2 gets cancelled of when we take the derivative.

0.13761957836254335

In [None]:
#Forwards Propagation - second time
hidden_layer_inputs = np.dot(x, weights_1)
hidden_layer_outputs = sigmoid(hidden_layer_inputs)

output_layer_inputs = np.dot(hidden_layer_outputs, weights_2)
output_layer_outputs = sigmoid(output_layer_inputs)

In [None]:
mse = ((output_layer_outputs - y)**2).sum()/(2*N)
mse

0.13730616116287855