In [30]:
import torch
import numpy as np

In [31]:
# requires_grad tell the torch that now it need to create a computational graph to do the back propagation
# to optimize the value of the variable
w = torch.rand(3, requires_grad=True)
print(w)

tensor([0.0077, 0.4261, 0.5286], requires_grad=True)


In [32]:
# add something to w
# first do the forward pass
# y has the attribute grad_fn,which points to gradient function 'AddBackward0'
y = w+2
print(y)

tensor([2.0077, 2.4261, 2.5286], grad_fn=<AddBackward0>)


In [33]:
#diffent operation on the tensor
# see the difference in the grad_fn
z = y*y*2
print(z)

tensor([ 8.0619, 11.7717, 12.7876], grad_fn=<MulBackward0>)


In [34]:
#now when want to calculate the gradient call the .backward() function
#.backward() is the vector jacobian product, hence that's why in the below example we have to pass the vector

#create a vector, to do the vector-jacobian multiplication (if z is not a scalar value)

v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v)        # will cal dz/dw
print(w.grad)        # where gradients are stored

tensor([0.8031, 9.7043, 0.0101])


## Stop tracking gradient history
* how to prevent pytorch for tracking gradient history
* option 1 :w.requires_grad_(False)
* option2: w.detach()
* wrap in with statement i.e with torch.no_grad():

In [28]:
#option1
print(w)
w.requires_grad_(False)
print(w)

In [29]:
#option2- create a new tensor
y = w.detach()
print(w)
print(y)


tensor([0.8518, 0.1273, 0.6178], requires_grad=True)
tensor([0.8518, 0.1273, 0.6178])


In [36]:
#option3- wrap in with statement:
with torch.no_grad():
    z = w+ 2
    print(z)

tensor([2.0077, 2.4261, 2.5286])
