In [5]:
import torch

In [6]:
x = torch.randn(3, requires_grad= True)
print(x)

tensor([-0.6560, -0.0331,  0.4846], requires_grad=True)


In [7]:
# Lets say we need to calculate gradients of some function wrt x.

In [8]:
y = x + 2
print(y)

tensor([1.3440, 1.9669, 2.4846], grad_fn=<AddBackward0>)


In [9]:
z = y * y * 2
print(z)

tensor([ 3.6124,  7.7377, 12.3464], grad_fn=<MulBackward0>)


In [10]:
z = z.mean()

In [11]:
print(z)

tensor(7.8988, grad_fn=<MeanBackward0>)


In [12]:
# when we want to calculate the gradients, we only need to do following:
z.backward() # dz/dx

In [13]:
# Then in x we will have following:
print(x.grad)

tensor([1.7919, 2.6226, 3.3128])


In [14]:
# if we do not add requires_grad=True, the we wont be able to call 
# backward function.

In [15]:
# What if z was vector
x = torch.randn(3, requires_grad= True)
y = x + 2
z = y * y * 2
print(z)

tensor([ 5.2056, 18.4464,  3.1373], grad_fn=<MulBackward0>)


In [16]:
# z.backward() will throw error.
# we will have to create vector and then pass it to backward function.
# What is backward doing : It is creating a Jacobian product.


In [17]:
v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v) # dz/dx
print(x.grad)

tensor([6.4533e-01, 1.2148e+01, 5.0098e-03])


In [18]:
# How to stop Pytorch from tracking history and calculate grad_fn 
# attribute ?

In [19]:
x = torch.randn(3, requires_grad= True)
print(x)
# 1. Set requires_grad = False
# 2. x.detach
# 3. with torch.no_grad():
x.requires_grad_(False)
print(x)



tensor([ 0.9093,  0.0928, -1.7218], requires_grad=True)
tensor([ 0.9093,  0.0928, -1.7218])


In [20]:
x = torch.randn(3, requires_grad= True)
print(x)
y = x.detach()
print(y)

tensor([0.2229, 0.0358, 0.4064], requires_grad=True)
tensor([0.2229, 0.0358, 0.4064])


In [21]:
print(x)
with torch.no_grad():
    y = x + 2
    print(y)

tensor([0.2229, 0.0358, 0.4064], requires_grad=True)
tensor([2.2229, 2.0358, 2.4064])


In [22]:
weights = torch.ones(4, requires_grad=True)
print(weights)
for epoch in range(3):
    model_output = (3 * weights).sum()
    model_output.backward()
    print(weights.grad)

tensor([1., 1., 1., 1.], requires_grad=True)
tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [23]:
#  Since gradients are getting accumulated, we need to reset them
weights = torch.ones(4, requires_grad=True)
print(weights)
for epoch in range(3):
    model_output = (3 * weights).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()
    print(weights.grad)

tensor([1., 1., 1., 1.], requires_grad=True)
tensor([3., 3., 3., 3.])
tensor([0., 0., 0., 0.])
tensor([3., 3., 3., 3.])
tensor([0., 0., 0., 0.])
tensor([3., 3., 3., 3.])
tensor([0., 0., 0., 0.])


In [24]:
##  Using optimizers, example
# weights = torch.ones(4, requires_grad=True)
# print(weights)
# optimizer = torch.optim.SGD(weights, lr=0.01)
# optimizer.step()
# optimizer.zero_grad()