### Autograd Package 

The autograd package provides automatic differentiation for all operations on Tensors

In [1]:
import torch

In [2]:
x = torch.randn(3,requires_grad=True)
print(x)

tensor([ 1.1896, -0.2289,  0.0616], requires_grad=True)


In [3]:
# calculate the gradient of a function with respect to x 
# by specifying the requires_grad=True, whenever we do an operation with x, pytorch creates a 'computational graph' 
# foraward pass
y = x + 1 # graph created node is addition, input x and 2, output y 
# y found ---> grad_fn function ---> Add backward dy/dx
print(y)
c = y * y * 2 
print(c)
v = torch.tensor([0.1,1.0,0.001], dtype=torch.float32)
c = c.mean()
print(c)
c.backward()# calculated dc/dx
#c.backward(v)
print(x.grad) # runtiime error if requires_grad=False

tensor([2.1896, 0.7711, 1.0616], grad_fn=<AddBackward0>)
tensor([9.5885, 1.1892, 2.2540], grad_fn=<MulBackward0>)
tensor(4.3439, grad_fn=<MeanBackward0>)
tensor([2.9194, 1.0281, 1.4155])


In [4]:
# prevent pytorch from tracking the history 
# updating the weights in the training loop should not be part of the gradient operation. 
# options: 
# x.requires_grad(false)
# x.detach()
# with.torch_no_grad()

x.requires_grad_(False)
print(x) # No require grad attribute. In place modification.
y = x.detach() # new tensor with the same values but not grad requires 
print(y)
with torch.no_grad(): 
        y = x + 2 
        print(y)
#########################################################################

tensor([ 1.1896, -0.2289,  0.0616])
tensor([ 1.1896, -0.2289,  0.0616])
tensor([3.1896, 1.7711, 2.0616])


In [5]:
# Dummy example 
weights = torch.ones(4, requires_grad=True)
for epoch in range(4):
    model_output =  (weights * 3).sum()
    model_output.backward() # calculates the gradient
    print(weights.grad) # print the grad of the weights
     # set the grads to zeros otherwise the grad accumulates and will be wrong.
    weights.grad.zero_() # before the next step in the optimization step 

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [13]:
# Taken From: the repo below, a highly recommended tutorial
# https://github.com/python-engineer/pytorchTutorial/blob/master/03_autograd.py 

# -------------
# Model with non-scalar output:
# If a Tensor is non-scalar (more than 1 elements), we need to specify arguments for backward() 
# specify a gradient argument that is a tensor of matching shape.
# needed for vector-Jacobian product

x = torch.randn(3, requires_grad=True)

y = x * 2
for _ in range(10):
    y = y * 2

print(y)
print(y.shape)

v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float32)
y.backward(v)
print(x.grad)

# -------------
# Stop a tensor from tracking history:
# For example during our training loop when we want to update our weights
# then this update operation should not be part of the gradient computation
# - x.requires_grad_(False)
# - x.detach()
# - wrap in 'with torch.no_grad():'

# .requires_grad_(...) changes an existing flag in-place.
a = torch.randn(2, 2)
print('For tensor a:' , a.requires_grad)
b = ((a * 3) / (a - 1))
print('grad_fn called on b tensor:' ,b.grad_fn)
a.requires_grad_(True)
print('in place requires_grad:', a.requires_grad)
b = (a * a).sum()
print('No none value anymore on tensor b:',b.grad_fn)

# .detach(): get a new Tensor with the same content but no gradient computation:
a = torch.randn(2, 2, requires_grad=True)
print(a.requires_grad)
b = a.detach()
print('detached tensor from grad requires no grad:' , b.requires_grad)

# wrap in 'with torch.no_grad():'
a = torch.randn(2, 2, requires_grad=True)
print(a.requires_grad)
with torch.no_grad():
    print((x ** 2).requires_grad)

# -------------

tensor([-156.9311, 2464.8921,  800.6077], grad_fn=<MulBackward0>)
torch.Size([3])
tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])
For tensor a: False
grad_fn called on b tensor: None
in place requires_grad: True
No none value anymore on tensor b: <SumBackward0 object at 0x1233f9a10>
True
detached tensor from grad requires no grad: False
True
False


In [16]:
# Taken From: the repo below, a highly recommended tutorial
# https://github.com/python-engineer/pytorchTutorial/blob/master/03_autograd.py 

# -------------
# backward() accumulates the gradient for this tensor into .grad attribute.
# !!! We need to be careful during optimization !!!
# Use .zero_() to empty the gradients before a new optimization step!
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    # just a dummy example
    model_output = (weights*3).sum()
    model_output.backward()
    
    print(weights.grad)

    # optimize model, i.e. adjust weights...
    with torch.no_grad():
        weights -= 0.1 * weights.grad

    # this is important! It affects the final weights & output
    weights.grad.zero_()

print(weights)
print(model_output)

# Optimizer has zero_grad() method
# optimizer = torch.optim.SGD([weights], lr=0.1)
# During training:
# optimizer.step()
# optimizer.zero_grad()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([0.1000, 0.1000, 0.1000, 0.1000], requires_grad=True)
tensor(4.8000, grad_fn=<SumBackward0>)
