In [55]:
#gradients are important for our optimization purpose
import torch
x = torch.rand(3,requires_grad=True)#later on we would like to calculcate gradients of some function wrt x
print(x)

tensor([0.1381, 0.6793, 0.3053], requires_grad=True)


In [56]:
y = x+2 #wheenever we do operations with this tensor, pytorch will create a computational graph for us
print(y)
#as we calculate x+2, a forward pass, a backpropagation function is added to calculate gradient descent for y
#dy/dx in this case
#AddBackward function is for that purpose down below

tensor([2.1381, 2.6793, 2.3053], grad_fn=<AddBackward0>)


In [57]:
z = y*y*2
print(z)

tensor([ 9.1426, 14.3570, 10.6292], grad_fn=<MulBackward0>)


In [58]:
z = z.mean()
print(z)

tensor(11.3763, grad_fn=<MeanBackward0>)


In [59]:
#we calculate gradients by calling this function
z.backward() #dz/dx, this works only if z is scalar
print(x.grad)

tensor([2.8507, 3.5724, 3.0738])


In [60]:
#chain rule and jacobian are used here

In [61]:
z = y*y*2
v = torch.tensor([0.1,1.0,0.001],dtype = torch.float32)
z.backward(v) #takes input, this works only if z is a vector
print(x.grad)

tensor([ 3.7060, 14.2895,  3.0830])


In [62]:
x.requires_grad_(False)#no more gradient

tensor([0.1381, 0.6793, 0.3053])

In [64]:
x = torch.rand(3,requires_grad=True)
y = x.detach()#new tensor with same values but gradient is not needed
print(y)

tensor([0.8612, 0.1660, 0.1731])


In [65]:
x = torch.rand(3,requires_grad=True)
with torch.no_grad():
    y = x+2
    print(y)

tensor([2.9802, 2.1664, 2.6959])


In [66]:
#the above are the 3 ways to prevent pytorch from creating gradient functions and tracking computational graph

In [67]:
weights = torch.ones(4, requires_grad=True)

In [68]:
for epoch in range(1):
    model_ouput = (weights+3).sum()
    model_ouput.backward()
    print(weights.grad)

tensor([1., 1., 1., 1.])


In [69]:
for epoch in range(2):
    model_ouput = (weights+3).sum()
    model_ouput.backward()
    print(weights.grad)#the gradients are accumulated, which is wrong.

tensor([2., 2., 2., 2.])
tensor([3., 3., 3., 3.])


In [71]:
#in this way, we can prevent the accumulation issue
weights.grad.zero_()
for epoch in range(3):
    model_ouput = (weights+3).sum()
    model_ouput.backward()
    print(weights.grad)
    weights.grad.zero_()

tensor([1., 1., 1., 1.])
tensor([1., 1., 1., 1.])
tensor([1., 1., 1., 1.])


In [None]:
optimizer = torch.optim.SGD(weights,tr=0.01)
optimizer.step()
optimizer.zero_grad()#we have to do similar thing as above in this step