In [42]:
import torch

In [43]:
x = torch.randn(3, requires_grad=True)
print(x)

tensor([-1.3231, -0.5983,  2.0973], requires_grad=True)


In [44]:
y = x+2
print(y) #keeps track of operation
y = x*x*2
print(y)
y = x.mean()
print(y)

tensor([0.6769, 1.4017, 4.0973], grad_fn=<AddBackward0>)
tensor([3.5010, 0.7158, 8.7969], grad_fn=<MulBackward0>)
tensor(0.0586, grad_fn=<MeanBackward0>)


In [45]:
#calculate gradient

y.backward() #dy/dx - no argument required as y is scalar (mean)
print(x.grad)
y = x*x*2
v = torch.tensor([0.1,1.0,0.001], dtype=torch.float32)
y.backward(v) #pass tensor of same size
print(x.grad)

tensor([0.3333, 0.3333, 0.3333])
tensor([-0.1959, -2.0597,  0.3417])


In [46]:
#prevent pytorch from tracking gradient (eg. not needed 
#when updating weights during training)

x = torch.randn(3, requires_grad=True)
print(x)
x.requires_grad_(False) #method 1
print(x)
x = torch.randn(3, requires_grad=True)
y = x.detach() #method 2
print(y)
with torch.no_grad(): #method 3
    y = x+2
    print(y)

tensor([ 0.6122,  0.6284, -0.5484], requires_grad=True)
tensor([ 0.6122,  0.6284, -0.5484])
tensor([-0.0534,  1.2201, -0.1765])
tensor([1.9466, 3.2201, 1.8235])


In [53]:
#preventing gradients from accumulating

weights = torch.ones(4, requires_grad=True)
for epoch in range(2):
    model_output = (weights*3).sum()
    
    model_output.backward()
    
    print(weights.grad) #accumulating

print()
weights = torch.ones(4, requires_grad=True)
for epoch in range(2):
    model_output = (weights*3).sum()
    
    model_output.backward()
    
    print(weights.grad)
    
    weights.grad.zero_() #empty gradient
    

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
