In [1]:

import torch
# The autograd package provides automatic differentiation 
# for all operations on Tensors



In [2]:
# requires_grad = True -> tracks all operations on the tensor. 
x = torch.randn(3, requires_grad=True)
y = x + 2

# y was created as a result of an operation, so it has a grad_fn attribute.
# grad_fn: references a Function that has created the Tensor
print(x) # created by the user -> grad_fn is None
print(y)
print(y.grad_fn)

# Do more operations on y
z = y * y * 3
print(z)
z = z.mean()
print(z)

tensor([ 1.8102,  0.5468, -0.2160], requires_grad=True)
tensor([3.8102, 2.5468, 1.7840], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7feed0985600>
tensor([43.5525, 19.4582,  9.5483], grad_fn=<MulBackward0>)
tensor(24.1863, grad_fn=<MeanBackward0>)


In [3]:

z.backward()
print(x.grad) # dz/dx

tensor([7.6204, 5.0936, 3.5681])


In [4]:
x = torch.randn(3, requires_grad=True)

y = x * 2
for _ in range(10):
    y = y * 2

print(y)
print(y.shape)

tensor([2639.2017, 3363.9797,  738.1857], grad_fn=<MulBackward0>)
torch.Size([3])


In [5]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float32)
y.backward(v)
print(x.grad)

tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])


In [6]:
a = torch.randn(2, 2, requires_grad=True)
print(a.requires_grad)
with torch.no_grad():
    print((x ** 2).requires_grad)

True
False


In [7]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    # just a dummy example
    model_output = (weights*3).sum()
    model_output.backward()
    
    print(weights.grad)

    # optimize model, i.e. adjust weights...
    with torch.no_grad():
        weights -= 0.1 * weights.grad

    # this is important! It affects the final weights & output
    weights.grad.zero_()

print(weights)
print(model_output)

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([0.1000, 0.1000, 0.1000, 0.1000], requires_grad=True)
tensor(4.8000, grad_fn=<SumBackward0>)
