In [1]:
import torch

In [None]:
## to compute gradient of function wrt x

In [27]:
x = torch.randn(3, requires_grad=True)

In [25]:
print(x)

tensor([-0.3706, -0.1904,  0.0520], requires_grad=True)


In [28]:
y = x + 2 ## creates computational graph

In [9]:
print(y) ## has attribute grad_fn, a gradient function which allows us to compute gradients during backprop. Here grad function is addbackward

tensor([2.8474, 2.5126, 1.6623], grad_fn=<AddBackward0>)


In [29]:
z = y*y*2

In [31]:
print(z) ## here grad function is mulbackward

tensor([28.0146,  2.5764, 24.0387], grad_fn=<MulBackward0>)


In [32]:
z = z.mean() ## mean function

In [15]:
print(z) ## here grad function is meanbackward

tensor(11.4560, grad_fn=<MeanBackward0>)


In [33]:
z.backward() ## computes gradient of z wrt x

In [34]:
print(x.grad) ## Non-none value as x is leaf node

tensor([4.9902, 1.5133, 4.6225])


In [35]:
print(y.grad) ## None as y is not a leaf node

None


  print(y.grad)


In [37]:
y.retain_grad()

In [38]:
y

tensor([3.7426, 1.1350, 3.4669], grad_fn=<AddBackward0>)

In [39]:
z = y*y*2
z = z.mean()
z.backward()

In [40]:
y.grad ## Now you can see that grad is stored for y (a non-leaf tensor) because of the retain grad operation

tensor([4.9902, 1.5133, 4.6225])

In [None]:
## .backward syntax

In [43]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(2):
    print("epoch", epoch)
    model_output = (weights*3).sum()
    print(model_output)
    model_output.backward()
    print(weights.grad)

epoch 0
tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
epoch 1
tensor(12., grad_fn=<SumBackward0>)
tensor([6., 6., 6., 6.])


In [44]:
## Note that the gradient of w keep (incorrectly) increasing above. It should be only 3, but it becomes 6,
## etc as epochs increase, as previous gradient state is not cleared

## To clear , do as below (tensor.grad.zero_())

In [45]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(2):
    print("epoch", epoch)
    model_output = (weights*3).sum()
    print(model_output)
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()

epoch 0
tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
epoch 1
tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])


In [46]:
x = torch.randn(3, requires_grad=True)
print(x)

tensor([ 1.4410, -0.8621, -1.3491], requires_grad=True)


In [48]:
x.requires_grad_(False)

tensor([ 1.4410, -0.8621, -1.3491])

In [50]:
print(x) ## requires grad has been turned off

tensor([ 1.4410, -0.8621, -1.3491])


In [51]:
x.requires_grad_(True)  ## turned on again

tensor([ 1.4410, -0.8621, -1.3491], requires_grad=True)

In [52]:
x.detach_()

tensor([ 1.4410, -0.8621, -1.3491])

In [53]:
print(x) ## turned off requires grad

tensor([ 1.4410, -0.8621, -1.3491])


In [54]:
x.requires_grad_(True)

tensor([ 1.4410, -0.8621, -1.3491], requires_grad=True)

In [55]:
x.grad

In [56]:
with torch.no_grad():
    y = x + 2
    print("x", x)
    print("y",y)
    

x tensor([ 1.4410, -0.8621, -1.3491], requires_grad=True)
y tensor([3.4410, 1.1379, 0.6509])


In [None]:
## y does not have requires_grad = True

## Coding backpropagation
simple LR

In [59]:
import torch
x = torch.tensor(1.0)
y = torch.tensor(2.0)

In [61]:
w = torch.tensor(1.0, requires_grad=True)  ## initial weight

In [62]:
## forward_pass

y_hat = w*x ## ignoring bias

loss = (y_hat - y)**2


In [63]:
print(loss)

tensor(1., grad_fn=<PowBackward0>)


In [64]:
## backward_pass (pytorch computes local gradient and backprop already)

loss.backward()

In [65]:
print(w.grad)  ## -2 because dl/dw = dl/dy_hat * dy_hat/dw = 2*(y_hat-y) * x = 2*(-1)*1

tensor(-2.)


In [67]:
## update weights
## next forward and back ward pass

None


  print(y_hat.grad)


## References

https://www.youtube.com/watch?v=DbeIqrwb_dE&list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4&index=3

https://www.youtube.com/watch?v=MswxJw-8PvE

https://www.youtube.com/watch?v=3Kb0QS6z7WA&list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4&index=4
