In [146]:
import torch

In [147]:
x = torch.randn(3)
print(x)

tensor([ 1.3606, -1.3497, -1.9065])


In [148]:
#requires_grad=False byDafult
x = torch.randn(3, requires_grad=True)
print(x)

tensor([ 0.2546, -2.8109,  1.5368], requires_grad=True)


In [149]:
y = x + 2

In [150]:
#Computation Graph
#  x
#    \
#     \
#      (+)----(y)
#     /
#    /
#  2

In [151]:
print(y)
# we see grad_fn=<AddBackward0>

tensor([ 2.2546, -0.8109,  3.5368], grad_fn=<AddBackward0>)


In [152]:
z = y*y*2
print(z)

tensor([10.1665,  1.3150, 25.0184], grad_fn=<MulBackward0>)


In [153]:
z = z.mean()
print(z)

tensor(12.1666, grad_fn=<MeanBackward0>)


In [154]:
z.backward()
print(x.grad)
#in background we have jacobian martrix multiplied by vector

tensor([ 3.0061, -1.0811,  4.7158])


In [155]:
#this worked bacause z had just one value as it is a mean
z = y*y*2
# z.backward()
# print(x.grad)
#these will give errors

v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v)
print(x.grad)

tensor([ 3.9080, -4.3246,  4.7299])


In [156]:
#preventing gradient history
#during our training loop
#when updating our weights
#these should not be part of our gradient computation

# x.requires_grad_(False)
# x.detach()
# with torch.no_grad():

In [157]:
x

tensor([ 0.2546, -2.8109,  1.5368], requires_grad=True)

In [158]:
x.requires_grad_(False)

tensor([ 0.2546, -2.8109,  1.5368])

In [159]:
y = x.detach()
print(y)

tensor([ 0.2546, -2.8109,  1.5368])


In [160]:
with torch.no_grad():
    y = x + 2
    print(y)

tensor([ 2.2546, -0.8109,  3.5368])


in the previous 3 ways, there we didn't had any grad_fn when we printed it<br><br>

Note: Whenever we call the backward function, the gradient for tensor will be accumulated into the .grad attribute, so the values will be summed up

### Training Examples

In [161]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(4):
    model_output = (weights*3).sum()

    model_output.backward()

    print(weights.grad)
#so these gradients are wrong as they are getting summed up

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])
tensor([12., 12., 12., 12.])


In [162]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(4):
    model_output = (weights*3).sum()

    model_output.backward()

    print(weights.grad)

    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [163]:
# optimizer = torch.optim.SGD(weights, lr=0.01)
# # SDG: Stochastic Gradient Descent
# optimizer.step()
# optimizer.zero_grad()