# ***Backpropagation ***

3 steps

1) Forward pass : Compute Loss

2) Compute Local Gradients 

3) Backward pass : Compute dLoss/dWeights using the chain rule

***Now Let's see how can we use it on pytorch***

In [33]:
import torch 
import numpy as np
import torch.nn as nn

In [34]:
x = torch.tensor(1.0)

y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)


***Forward pass and compute the loss***

In [35]:
y_hat = w*x
loss = (y_hat - y)**2

print(loss)

tensor(1., grad_fn=<PowBackward0>)


***Backwardpass***

In [36]:
# Pytorch will calculate local gradients for us and will also computes the backward pass automatically

loss.backward() # This is the whole gradient calculation 
print(w.grad) 

tensor(-2.)


***In Step 1 :***

1.   Prediction : Manually
2.   Gradient Computation : Manually
3.   Loss Computation : Manually
4.   Parameter Updates : Manually

***In Step 2 :***

1.   Prediction : Manually
2.   Gradient Computation : Autograd
3.   Loss Computation : Manually
4.   Parameter Updates : Manually

***In Step 3 :***

1.   Prediction : Manually
2.   Gradient Computation : Autograd
3.   Loss Computation : PyTorch Loss
4.   Parameter Updates : PyTorch Optimizer


***In Step 4 :***

1.   Prediction : PyTorch Model
2.   Gradient Computation : Autograd
3.   Loss Computation : PyTorch Loss
4.   Parameter Updates : PyTorch Optimizer







In [37]:
# f = w * x

# f = 2 * x

# some training examples 

x = np.array([1,2,3,4], dtype=np.float32)
y = np.array([2,4,6,8], dtype=np.float32)

# Initializing our weights 

w = 0.0

# model prediction 

def forward(x):
  return w*x


# Loss = MSE

def loss(y,y_predicted):
  return ((y_predicted-y)**2).mean()

# gradient
# MSE = 1/N * (w*x-y)**2
# dJ/dw = 1/N 2x (w*x-y)

def gradient(x,y,y_predicted):
  return np.dot(2*x, y_predicted-y).mean()

print(f'Prediction before training : f(5) = {forward(5):.3f}')

# Training 

learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):

  # prediction = forward pass 
  y_pred = forward(x)

  # loss
  l = loss(y,y_pred)

  # gradients
  dw = gradient(x,y,y_pred)

  # update weights 
  w -= learning_rate * dw
 
  if epoch % 2 == 0:
    print(f'epoch {epoch+1}: w={w:.3f},loss = {l:.8f}')

print(f'Prediction after training : f(5) = {forward(5):.3f}')

Prediction before training : f(5) = 0.000
epoch 1: w=1.200,loss = 30.00000000
epoch 3: w=1.872,loss = 0.76800019
epoch 5: w=1.980,loss = 0.01966083
epoch 7: w=1.997,loss = 0.00050331
epoch 9: w=1.999,loss = 0.00001288
epoch 11: w=2.000,loss = 0.00000033
epoch 13: w=2.000,loss = 0.00000001
epoch 15: w=2.000,loss = 0.00000000
epoch 17: w=2.000,loss = 0.00000000
epoch 19: w=2.000,loss = 0.00000000
Prediction after training : f(5) = 10.000


 ***Everything with pyTorch and get rid of gradient(Manually)***

In [38]:
# f = w * x

# f = 2 * x

# some training examples 

x = torch.tensor([1,2,3,4], dtype=torch.float32)
y = torch.tensor([2,4,6,8], dtype=torch.float32)

# Initializing our weights 

w = torch.tensor(0.0, dtype=torch.float32,requires_grad=True)

# model prediction 

def forward(x):
  return w*x


# Loss = MSE

def loss(y,y_predicted):
  return ((y_predicted-y)**2).mean()

# gradient
# MSE = 1/N * (w*x-y)**2
# dJ/dw = 1/N 2x (w*x-y)



print(f'Prediction before training : f(5) = {forward(5):.3f}')

# Training 

learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):

  # prediction = forward pass 
  y_pred = forward(x)

  # loss
  l = loss(y,y_pred)

  # gradients = backward pass
  l.backward() # dl/dw

  # update weights 
  with torch.no_grad():
    w -= learning_rate * w.grad

  # Zero gradients
  w.grad.zero_()
 
  if epoch % 10 == 0:
    print(f'epoch {epoch+1}: w={w:.3f},loss = {l:.8f}')

print(f'Prediction after training : f(5) = {forward(5):.3f}')

Prediction before training : f(5) = 0.000
epoch 1: w=0.300,loss = 30.00000000
epoch 11: w=1.665,loss = 1.16278565
epoch 21: w=1.934,loss = 0.04506890
epoch 31: w=1.987,loss = 0.00174685
epoch 41: w=1.997,loss = 0.00006770
epoch 51: w=1.999,loss = 0.00000262
epoch 61: w=2.000,loss = 0.00000010
epoch 71: w=2.000,loss = 0.00000000
epoch 81: w=2.000,loss = 0.00000000
epoch 91: w=2.000,loss = 0.00000000
Prediction after training : f(5) = 10.000


***Setp 3 & 4***

# Training Pipeline: Model, Loss, and Optimizer

***General training pipleine in pyTorch:***

3 steps :

1.   Design Model (input,output size,forward pass)

2.   Construct the loss and optimizer
3.   Training loop
       -forward pass: compute prediction
       -backward pass : gradients
       -update weights 





In [39]:
# f = w * x

# f = 2 * x

# some training examples 

x = torch.tensor([1,2,3,4], dtype=torch.float32)
y = torch.tensor([2,4,6,8], dtype=torch.float32)

# Initializing our weights 

w = torch.tensor(0.0, dtype=torch.float32,requires_grad=True)

# model prediction 

def forward(x):
  return w*x




print(f'Prediction before training : f(5) = {forward(5):.3f}')

# Training 

learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD([w],lr=learning_rate)

for epoch in range(n_iters):

  # prediction = forward pass 
  y_pred = forward(x)

  # loss
  l = loss(y,y_pred)

  # gradients = backward pass
  l.backward() # dl/dw

  # update weights 
  optimizer.step()

  # Zero gradients
  optimizer.zero_grad()
 
  if epoch % 10 == 0:
    print(f'epoch {epoch+1}: w={w:.3f},loss = {l:.8f}')

print(f'Prediction after training : f(5) = {forward(5):.3f}')

Prediction before training : f(5) = 0.000
epoch 1: w=0.300,loss = 30.00000000
epoch 11: w=1.665,loss = 1.16278565
epoch 21: w=1.934,loss = 0.04506890
epoch 31: w=1.987,loss = 0.00174685
epoch 41: w=1.997,loss = 0.00006770
epoch 51: w=1.999,loss = 0.00000262
epoch 61: w=2.000,loss = 0.00000010
epoch 71: w=2.000,loss = 0.00000000
epoch 81: w=2.000,loss = 0.00000000
epoch 91: w=2.000,loss = 0.00000000
Prediction after training : f(5) = 10.000


***Step 4***

In [41]:
# f = w * x

# f = 2 * x

# some training examples 

# this must be 2D array now

x = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32) 
y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

x_test = torch.tensor([5],dtype=torch.float32)


n_samples, n_features= x.shape
print(n_samples,n_features)

input_size = n_features
output_size = n_features

#model = nn.Linear(input_size,output_size)

# Custom model

class LinearRegression(nn.Module):
  def __init__(self,input_dim,output_dim):
    super(LinearRegression,self).__init__()
    #define our layers
    self.lin = nn.Linear(input_dim,output_dim)

  def forward(self,x):
    return self.lin(x)

model=LinearRegression(input_size,output_size)


print(f'Prediction before training : f(5) = {model(x_test).item():.3f}')

# Training 

learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):

  # prediction = forward pass 
  y_pred = model(x)

  # loss
  l = loss(y,y_pred)

  # gradients = backward pass
  l.backward() # dl/dw

  # update weights 
  optimizer.step()

  # Zero gradients
  optimizer.zero_grad()
 
  if epoch % 10 == 0:
    [w,b] = model.parameters()
    print(f'epoch {epoch+1}: w={w[0][0].item():.3f},loss = {l:.8f}')

print(f'Prediction before training : f(5) = {model(x_test).item():.3f}')

4 1
Prediction before training : f(5) = -1.293
epoch 1: w=0.237,loss = 41.62461090
epoch 11: w=1.714,loss = 1.07693768
epoch 21: w=1.952,loss = 0.02787174
epoch 31: w=1.990,loss = 0.00072936
epoch 41: w=1.996,loss = 0.00002662
epoch 51: w=1.998,loss = 0.00000799
epoch 61: w=1.998,loss = 0.00000709
epoch 71: w=1.998,loss = 0.00000666
epoch 81: w=1.998,loss = 0.00000627
epoch 91: w=1.998,loss = 0.00000591
Prediction before training : f(5) = 9.996
