In [1]:
import torch

In [5]:
x = torch.randn(3, requires_grad= True)
print(x)

tensor([ 1.1137, -0.6097, -0.7631], requires_grad=True)


In [6]:
# Lets say we need to calculate gradients of some function wrt x.

In [7]:
y = x + 2
print(y)

tensor([3.1137, 1.3903, 1.2369], grad_fn=<AddBackward0>)


In [8]:
z = y * y * 2
print(z)

tensor([19.3897,  3.8659,  3.0601], grad_fn=<MulBackward0>)


In [9]:
z = z.mean()

In [10]:
print(z)

tensor(8.7719, grad_fn=<MeanBackward0>)


In [11]:
# when we want to calculate the gradients, we only need to do following:
z.backward() # dz/dx

In [12]:
# Then in x we will have following:
print(x.grad)

tensor([4.1515, 1.8537, 1.6493])


In [13]:
# if we do not add requires_grad=True, the we wont be able to call backward function.

In [14]:
# What if z was vector
x = torch.randn(3, requires_grad= True)
y = x + 2
z = y * y * 2
print(z)

tensor([19.3960,  6.6440, 10.5337], grad_fn=<MulBackward0>)


In [19]:
# z.backward() will throw error.
# we will have to create vector and then pass it to backward function.
# What is backward doing : It is creating a Jacobian product.


In [18]:
v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v) # dz/dx
print(x.grad)

tensor([1.2457, 7.2905, 0.0092])


In [20]:
# How to stop Pytorch from tracking history and calculate grad_fn attribute ?

In [21]:
x = torch.randn(3, requires_grad= True)
print(x)
# 1 . Set requires_grad = False
# 2. x.detach
# 3. with torch.no_grad():
x.requires_grad_(False)
print(x)



tensor([ 0.7484, -0.6431, -0.9996], requires_grad=True)
tensor([ 0.7484, -0.6431, -0.9996])


In [23]:
x = torch.randn(3, requires_grad= True)
print(x)
y = x.detach()
print(y)

tensor([-0.1643, -0.0217,  1.2844], requires_grad=True)
tensor([-0.1643, -0.0217,  1.2844])


In [25]:
print(x)
with torch.no_grad():
    y = x + 2
    print(y)

tensor([-0.1643, -0.0217,  1.2844], requires_grad=True)
tensor([1.8357, 1.9783, 3.2844])


tensor([1., 1., 1., 1.], requires_grad=True)


In [30]:
weights = torch.ones(4, requires_grad=True)
print(weights)
for epoch in range(3):
    model_output = (3 * weights).sum()
    model_output.backward()
    print(weights.grad)

tensor([1., 1., 1., 1.], requires_grad=True)
tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [31]:
#  Since gradients are getting accumulated, we need to reset them
weights = torch.ones(4, requires_grad=True)
print(weights)
for epoch in range(3):
    model_output = (3 * weights).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()
    print(weights.grad)

tensor([1., 1., 1., 1.], requires_grad=True)
tensor([3., 3., 3., 3.])
tensor([0., 0., 0., 0.])
tensor([3., 3., 3., 3.])
tensor([0., 0., 0., 0.])
tensor([3., 3., 3., 3.])
tensor([0., 0., 0., 0.])


In [34]:
##  Using optimizers, example
# weights = torch.ones(4, requires_grad=True)
# print(weights)
# optimizer = torch.optim.SGD(weights, lr=0.01)
# optimizer.step()
# optimizer.zero_grad()