In [1]:
import torch

In [8]:
x = torch.randn(3, requires_grad=True)
x

tensor([ 2.1693,  0.7000, -0.0987], requires_grad=True)

In [9]:
y = x + 2
y

tensor([4.1693, 2.7000, 1.9013], grad_fn=<AddBackward0>)

In [11]:
z = y * y * 2
z

tensor([34.7658, 14.5799,  7.2295], grad_fn=<MulBackward0>)

In [15]:
z = z.mean()

In [None]:
z.backward()  # z must be a scalar (single value) to do the backward, other wise, we have to give the backward function the vector of the same size of z to do the dot product.

In [18]:
x.grad

tensor([5.5590, 3.6000, 2.5350])

How to change the require_grad properties in the tensor to prevent gradient history

In [21]:
#
y = x.detach()
y

tensor([ 2.1693,  0.7000, -0.0987])

In [23]:
# or use in place require_grad
# x.requires_grad_(False)
# or wrap it with with torch.no_grad()
# with torch.no_grad(): ...

In [27]:
weights = torch.ones(4, requires_grad=True)

# during training steps, when we compute our gradient, at the end, we have to clear the gradient by using zero_() to prevent the gradient to accumulate

for epoch in range(3):
    model_output = (weights * 3).sum()

    model_output.backward()
    print(weights.grad)

    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
