In [1]:
import torch

In [2]:
x = torch.rand(3)
x

tensor([0.1613, 0.6244, 0.4868])

>### Later we want to compute gradient of some function with respect to x
>### For that we must specify ```require_grad = True```

In [3]:
x = torch.rand(3, requires_grad=True)
x

tensor([0.6785, 0.9504, 0.1064], requires_grad=True)

>### Every time we make an operation with ```x```, ```PyTorch``` will make computational graph to do gradient

In [4]:
y = x + 2
y

tensor([2.6785, 2.9504, 2.1064], grad_fn=<AddBackward0>)

In [5]:
z = y*y*2
z

tensor([14.3483, 17.4100,  8.8741], grad_fn=<MulBackward0>)

In [6]:
z = z.mean()
z

tensor(13.5441, grad_fn=<MeanBackward1>)

>### to compute gradient ```z.backward()```, it will compute $\frac{\partial z}{\partial x}$

In [7]:
z.backward()

>### $\frac{\partial z}{\partial x}$ is stored in ```x.grad```

In [8]:
x.grad

tensor([3.5713, 3.9339, 2.8086])

>## If the last value (z in this case) isn't scalar, it will not work
>## It is Jacobian product in the background

In [9]:
x = torch.rand(3, requires_grad=True)
y = x + 2
z = y*y*2

In [10]:
v = torch.tensor([.01, .4, .6])

In [11]:
z.backward(v)

In [12]:
x.grad

tensor([0.1047, 4.2861, 6.1152])

## Prevent PyTorch to prevent tracking gradient calculation/computational graph

In [13]:
# 1st method
x = torch.rand(3, requires_grad=True)
print(x)

x.requires_grad_(False)
print(x)

tensor([0.5787, 0.9365, 0.2022], requires_grad=True)
tensor([0.5787, 0.9365, 0.2022])


In [14]:
# 2nd method

x = torch.rand(3, requires_grad=True)
print(x)

y = x.detach() # creates new variable with requres_grad=False
print(y)

tensor([0.6508, 0.8021, 0.4315], requires_grad=True)
tensor([0.6508, 0.8021, 0.4315])


In [15]:
# 3rd way

x = torch.rand(3, requires_grad=True)
print(x)

with torch.no_grad():
    y = x + 2
    print(y)

tensor([0.8626, 0.9047, 0.6699], requires_grad=True)
tensor([2.8626, 2.9047, 2.6699])


>## ```.grad``` stores previous calculation and it will sum with current
>## gradient if ```.backward()``` called in loop

In [16]:
weights = torch.ones(3, requires_grad=True)

for epoch in range(3):
    model_output = (weights + 3).sum()
    model_output.backward()
    print(weights.grad)

tensor([1., 1., 1.])
tensor([2., 2., 2.])
tensor([3., 3., 3.])


>## We must empty grad before we go to next iteration

In [17]:
weights = torch.ones(3, requires_grad=True)

for epoch in range(3):
    model_output = (weights + 3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()

tensor([1., 1., 1.])
tensor([1., 1., 1.])
tensor([1., 1., 1.])
