# Pytorch - autograd package

In [3]:
import torch

### auto_grad enabled - the basics

In [25]:
x = torch.ones(5, requires_grad=True) # this flag enables automatic gradient calculation with respect to x when calculatig some functions using x
y = torch.ones(5, requires_grad=True)
x, y

(tensor([1., 1., 1., 1., 1.], requires_grad=True),
 tensor([1., 1., 1., 1., 1.], requires_grad=True))

In [26]:
x2 = x * 2 
y2 = y * 2
x2, y2

(tensor([2., 2., 2., 2., 2.], grad_fn=<MulBackward0>),
 tensor([2., 2., 2., 2., 2.], grad_fn=<MulBackward0>))

In [27]:
# to calculate the gradient dy/dx we must use backward() method
# but this method can be only executed on a scalar -> hence the exaples below
sum = x2.sum()
avg = y2.mean()

sum, avg 



(tensor(10., grad_fn=<SumBackward0>), tensor(2., grad_fn=<MeanBackward0>))

In [28]:
# the above tensors have grad_function present (we see they have), what is needed to to run this backward() method on them
sum.backward() 
avg.backward()
sum, x.grad, avg, y.grad

(tensor(10., grad_fn=<SumBackward0>),
 tensor([2., 2., 2., 2., 2.]),
 tensor(2., grad_fn=<MeanBackward0>),
 tensor([0.4000, 0.4000, 0.4000, 0.4000, 0.4000]))

In [31]:
# calling backward() method again will produce an error
# sum.backward() 

### Calling backward on a vector

In [32]:

x = torch.ones(5, requires_grad=True) 
y = torch.ones(5, requires_grad=True)
x2 = x * 2 
y2 = y * 2
x2, y2

(tensor([2., 2., 2., 2., 2.], grad_fn=<MulBackward0>),
 tensor([2., 2., 2., 2., 2.], grad_fn=<MulBackward0>))

In [33]:
base = torch.tensor([1, 10, 100, 1000, 10000], dtype=torch.float32)
base

tensor([1.0000e+00, 1.0000e+01, 1.0000e+02, 1.0000e+03, 1.0000e+04])

In [34]:
x2.backward(base) # TODO how it works?
y2.backward(base)

x.grad, y.grad

(tensor([2.0000e+00, 2.0000e+01, 2.0000e+02, 2.0000e+03, 2.0000e+04]),
 tensor([2.0000e+00, 2.0000e+01, 2.0000e+02, 2.0000e+03, 2.0000e+04]))

### Gradient for multiple operations (chain rule)

In [46]:
x = torch.ones(3, requires_grad=True) 
base = torch.ones(3) 

In [47]:
for i in range(3):
    x2 = x * 2
    x2.backward(base)
    print(x.grad)

tensor([2., 2., 2.])
tensor([4., 4., 4.])
tensor([6., 6., 6.])


In [49]:
# so the gradient cumulates all operations

### Zero grad

In [50]:
x = torch.ones(3, requires_grad=True) 
base = torch.ones(3) 

In [51]:
for i in range(3):
    x2 = x * 2
    x2.backward(base)
    print(x.grad)
    x.grad.zero_()

tensor([2., 2., 2.])
tensor([2., 2., 2.])
tensor([2., 2., 2.])


### Tuning off gradinet calculation

In [40]:
# option 1
x = torch.ones(5, requires_grad=True) 
x.requires_grad_(False)
x

tensor([1., 1., 1., 1., 1.])

In [41]:
# option 2
x = torch.ones(5, requires_grad=True) 
y = x.detach() # this is used to detach a tensor from the current computational graph. It returns a new tensor that doesn't require a gradient.
y

tensor([1., 1., 1., 1., 1.])

In [None]:
# option 3

In [44]:
with torch.no_grad(): # context manager that disables gradient calculations
    y = x * 2
    print(y)

tensor([2., 2., 2., 2., 2.])
