In [1]:
import torch

In [7]:
# Create a tensor and set requires_grad=True to track computation with it
x = torch.randn(3, requires_grad=True)
print(f"x = {x}")

y = x * 2
print(f"y = x * 2 = {y}")

z = y * y * 3
z = z.mean()
print(f"z.mean() = {z}")
print(f"z.grad_fn = {z.grad_fn}")

x = tensor([ 0.7953, -1.5040,  0.2418], requires_grad=True)
y = x * 2 = tensor([ 1.5906, -3.0080,  0.4835], grad_fn=<MulBackward0>)
z.mean() = 11.811741828918457
z.grad_fn = <MeanBackward0 object at 0x00000250AFCF9B20>


In [8]:
# .backward() computes the gradient of the tensor
z.backward() # dz/dx
print(f"x.grad = {x.grad}")

x.grad = tensor([  6.3626, -12.0318,   1.9341])


In [10]:
# You can also stop autograd from tracking history on Tensors with .requires_grad=True by wrapping the code block in with torch.no_grad():
# 1. x.requires_grad_(False)
# 2. x.detach()
# 3. wrap in with torch.no_grad(): block 
print(f"x.requires_grad = {x.requires_grad}")
with torch.no_grad():
    print((x ** 2).requires_grad)

x.requires_grad = True
False


In [13]:
# gardients acumulate and add up in the .grad attribute hence you need to zero it out before calling .backward()

w = torch.ones(3, requires_grad=True)

for _ in range(3):
    out = (w * 2).sum()

    out.backward()
    print(w.grad)
    
    w.grad.zero_() # zero out the gradients

tensor([2., 2., 2.])
tensor([2., 2., 2.])
tensor([2., 2., 2.])
