In [None]:
import torch
import numpy as np

In [None]:
# requires_grad tell the torch that now it need to create a computational graph to do the back propagation
# to optimize the value of the variable
w = torch.rand(3, requires_grad=True)
print(w)

In [None]:
# add something to w
# first do the forward pass
# y has the attribute grad_fn,which points to gradient function 'AddBackward0'
y = w+2
print(y)

In [None]:
#diffent operation on the tensor
# see the difference in the grad_fn
z = y*y*2
print(z)

In [None]:
#now when want to calculate the gradient call the .backward() function
#.backward() is the vector jacobian product, hence that's why in the below example we have to pass the vector

#create a vector, to do the vector-jacobian multiplication (if z is not a scalar value)

v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v)        # will cal dz/dw
print(w.grad)        # where gradients are stored

## Stop tracking gradient history
* how to prevent pytorch for tracking gradient history
* option 1 :w.requires_grad_(False)
* option2: w.detach()
* wrap in with statement i.e with torch.no_grad():

In [None]:
#option1
print(w)
w.requires_grad_(False)
print(w)

In [None]:
#option2- create a new tensor
y = w.detach()
print(w)
print(y)


In [None]:
#option3- wrap in with statement:
with torch.no_grad():
    z = w+ 2
    print(z)

## Dummy training example
    * key takeaway: gradients are summed up in the grad() function, hence may lead to incorrect weights update
    * always nullify the value of grad() before using further

In [None]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    #calculating the model output
    y = (weights*3).sum()
    
    #cal the grad of y w.rt to each weights using backprop
    y.backward()
    print(weights.grad)
    
    #zero the val of grad, in every training loop
    weights.grad.zero_()