In [19]:
import torch

In [20]:
x = torch.randn(3, requires_grad=True)
x

tensor([0.4639, 0.3339, 0.8020], requires_grad=True)

In [21]:
# whenever we do an operation, PyTorch will create a computational graph

y= x + 2
y

tensor([2.4639, 2.3339, 2.8020], grad_fn=<AddBackward0>)

In [22]:
y = x * 2
y

tensor([0.9278, 0.6677, 1.6040], grad_fn=<MulBackward0>)

In [23]:
z = y*y*2
z

tensor([1.7217, 0.8917, 5.1458], grad_fn=<MulBackward0>)

In [None]:
# z = z.mean()
# z

In [None]:
# z.backward() # dz/dx
# x.grad

In [None]:
# z.grad

In [None]:
# if z is not a scalar value, then this error will appear

z.backward()

In [24]:
# if z is not a scalar value, we must give it a vector, since we are doing a Jacobian matrix multiplication. 

v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v) #dz/dx
x.grad

tensor([0.7423, 5.3417, 0.0128])

## How to prevent PyTorch from tracking history (from tracking nodes)

When calculating gradient on the backward pass, no need to track nodes

In [None]:
# x.requires_grad_(False) the trailing underscore means modification in place
# x.detach - will create a new tensor
# with torch.no_grad():

In [28]:
x = torch.randn(3, requires_grad=True)
x.requires_grad_(False)
x

tensor([-1.3081, -0.5533, -0.9817])

In [29]:
x = torch.randn(3, requires_grad=True)
y = x.detach()
print(x)
print(y)

tensor([ 0.7483, -1.8355,  1.5224], requires_grad=True)
tensor([ 0.7483, -1.8355,  1.5224])


In [31]:
x = torch.randn(3, requires_grad=True)
print(x)
with torch.no_grad():
    y = x +2
    print(y)

tensor([-0.1801,  0.0269, -2.2201], requires_grad=True)
tensor([ 1.8199,  2.0269, -0.2201])


## Accumulation of gradient

In [34]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(1):
    model_output = (weights*3).sum() # sum() makes it a scalar, as discussed above
    model_output.backward()
    print(weights.grad)

tensor([3., 3., 3., 3.])


In [35]:
x = torch.ones(4, requires_grad=True)

for epoch in range(1):
    z = (x*3).sum() # sum() makes it a scalar, as discussed above
    z.backward()
    print(x.grad)

tensor([3., 3., 3., 3.])


In [37]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum() # sum() makes it a scalar, as discussed above
    model_output.backward()
    print(weights.grad)

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [39]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum() # sum() makes it a scalar, as discussed above
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()  # empty the gradient

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
