TUTORIAL 4

BACKPROPAGATION

In [1]:
import torch
x = torch.tensor(1.0)  #input
y = torch.tensor(2.0)  #ground truth

w = torch.tensor(1.0,requires_grad=True)

#forward pass and compute the loss
y_hat = w*x #predicted output
loss = (y_hat -y)**2

print(loss)

#backward pass
loss.backward() #pytorch calculates local gradients automatically for us and also uses chain rule automatically for us
print(w.grad)

#next update weights
#do next forward and backward pass for couple of iterations

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


TUTORIAL 5

1.Prediction: Manually 
2.Gradients Computation: Manually
3.Loss Computation: Manually
4.Parameters Updates: Manually

In [3]:
#Linear Regression
import numpy as np

# f = w*x
# let's say our original function is: f = 2*x

X = np.array([1,2,3,4],dtype=np.float32)
Y = np.array([2,4,6,8],dtype=np.float32)  #ground truth, we took it as 2*X as our original function

w = 0.0 #initial value

# model prediction
def forward(x):
    return w*x

#loss = MSE
def loss(y,y_hat):
    return ((y_hat-y)**2).mean()

#gradient of loss w.r.t. our parameters here w
#MSE = 1/N * (w*x-y)**2
#dloss/dw = 1/N * 2x * (wx-y)

def gradient(x,y,y_hat):
    return np.dot(2*x,y_hat-y).mean()

#Ground Truth is 20
print(f'Prediction before training on unseen test data: f(10) = {forward(10):.3f}')

#Training
learning_rate = 0.01
epochs = 15

for epoch in range(epochs):
    #prediction = forward pass
    y_hat = forward(X)
    
    #loss
    l = loss(Y,y_hat)
    
    #gradients
    dw = gradient(X,Y,y_hat)
    
    #update weights
    w = w - learning_rate*dw
    
    print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training on unseen test data: f(10) = {forward(10):.3f}')

 

    


Prediction before training on unseen test data: f(10) = 0.000
epoch 1: w = 1.200, loss = 30.00000000
epoch 2: w = 1.680, loss = 4.79999924
epoch 3: w = 1.872, loss = 0.76800019
epoch 4: w = 1.949, loss = 0.12288000
epoch 5: w = 1.980, loss = 0.01966083
epoch 6: w = 1.992, loss = 0.00314570
epoch 7: w = 1.997, loss = 0.00050332
epoch 8: w = 1.999, loss = 0.00008053
epoch 9: w = 1.999, loss = 0.00001288
epoch 10: w = 2.000, loss = 0.00000206
epoch 11: w = 2.000, loss = 0.00000033
epoch 12: w = 2.000, loss = 0.00000005
epoch 13: w = 2.000, loss = 0.00000001
epoch 14: w = 2.000, loss = 0.00000000
epoch 15: w = 2.000, loss = 0.00000000
Prediction after training on unseen test data: f(10) = 20.000


1.Prediction: Manually 
2.Gradients Computation: Using Autograd Package of Pytorch
3.Loss Computation: Manually
4.Parameters Updates: Manually

In [16]:
import torch

X = torch.tensor([1,2,3,4],dtype = torch.float32)
Y = torch.tensor([2,4,6,8],dtype = torch.float32)

w = torch.tensor(0.0,dtype = torch.float32, requires_grad=True)


# model prediction
def forward(x):
    return w*x

#loss = MSE
def loss(y,y_hat):
    return ((y_hat-y)**2).mean()

#Ground Truth is 20
print(f'Prediction before training on unseen test data: f(10) = {forward(10):.3f}')

#Training
learning_rate = 0.01
epochs = 80

for epoch in range(epochs):
    #prediction = forward pass
    y_hat = forward(X)
    
    #loss
    l = loss(Y,y_hat)
    
    #gradients = backward pass
    l.backward() #dl/dw
    
    #update weights & this operation should not be part of gradient tracking or computational graph
    with torch.no_grad():
        w = w - learning_rate*w.grad
        #w -= learning_rate * w.grad
    
    #empty the gradients, reason mentioned in tutorial 3
    w.grad.zero_()
    
    if(epoch%10)==0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training on unseen test data: f(10) = {forward(10):.3f}')



Prediction before training on unseen test data: f(10) = 0.000


AttributeError: 'NoneType' object has no attribute 'zero_'

The error we're encountering is because we're reassigning the w variable inside the with torch.no_grad() block. When we use the assignment w = w - learning_rate*w.grad, it creates a new tensor and breaks the gradient tracking.

To fix the issue, you should use the in-place subtraction operation -= to update the w tensor without breaking the gradient tracking.

In [17]:
import torch

X = torch.tensor([1,2,3,4],dtype = torch.float32)
Y = torch.tensor([2,4,6,8],dtype = torch.float32)

w = torch.tensor(0.0,dtype = torch.float32, requires_grad=True)


# model prediction
def forward(x):
    return w*x

#loss = MSE
def loss(y,y_hat):
    return ((y_hat-y)**2).mean()

#Ground Truth is 20
print(f'Prediction before training on unseen test data: f(10) = {forward(10):.3f}')

#Training
learning_rate = 0.01
epochs = 80

for epoch in range(epochs):
    #prediction = forward pass
    y_hat = forward(X)
    
    #loss
    l = loss(Y,y_hat)
    
    #gradients = backward pass
    l.backward() #dl/dw
    
    #update weights & this operation should not be part of gradient tracking or computational graph
    with torch.no_grad():
        #w = w - learning_rate*w.grad
        w -= learning_rate * w.grad
    
    #empty the gradients, reason mentioned in tutorial 3
    w.grad.zero_()
    
    if(epoch%10)==0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training on unseen test data: f(10) = {forward(10):.3f}')

Prediction before training on unseen test data: f(10) = 0.000
epoch 1: w = 0.300, loss = 30.00000000
epoch 11: w = 1.665, loss = 1.16278565
epoch 21: w = 1.934, loss = 0.04506890
epoch 31: w = 1.987, loss = 0.00174685
epoch 41: w = 1.997, loss = 0.00006770
epoch 51: w = 1.999, loss = 0.00000262
epoch 61: w = 2.000, loss = 0.00000010
epoch 71: w = 2.000, loss = 0.00000000
Prediction after training on unseen test data: f(10) = 20.000
