# Autograd

Autograd is a core component of PyTorch that provides automatic differentiation for tensor operations. it enables gradient computation, which is essential for training machine learning models using optimization algorithms like gradient descent.

In [1]:
import torch

In [2]:
x=torch.tensor(3.0, requires_grad=True)

In [3]:
y=x**2 + 2*x + 1

In [4]:
x

tensor(3., requires_grad=True)

In [5]:
y

tensor(16., grad_fn=<AddBackward0>)

In [6]:
y.backward()

In [7]:
x.grad

tensor(8.)

In [8]:
t=torch.tensor(3.0, requires_grad=True)

In [9]:
y=t**3

In [10]:
z=torch.sin(y)

In [11]:
z

tensor(0.9564, grad_fn=<SinBackward>)

In [12]:
z.backward()

In [14]:
t.grad

tensor(-7.8877)

One thing to note here is if we try to get y.grad, we will not get an output, because y was not a leaf node, it was a variable being used for chaining.

In [15]:
y.grad

  y.grad


In [16]:
x=torch.tensor(6.7)
y=torch.tensor(0.0)

w=torch.tensor(1.0)
b=torch.tensor(0.0)

In [17]:
def binary_cross_entropy(y_pred, y):
    epsilon=1e-8
    y_pred=torch.clamp(y_pred, epsilon, 1.0-epsilon)
    return -(y*torch.log(y_pred) + (1-y)*torch.log(1-y_pred))

In [18]:
z=w*x+b
y_pred=torch.sigmoid(z)

In [19]:
loss=binary_cross_entropy(y_pred, y)

In [20]:
loss

tensor(6.7012)

In [21]:
## Manual Backpropagation
dloss_dypred=(y-y_pred)/(y_pred*(1-y_pred))

dypred_dz=y_pred*(1-y_pred)

dz_dw=x
dz_db=1

dL_dw=dloss_dypred*dypred_dz*dz_dw
dL_db=dloss_dypred*dypred_dz*dz_db

In [22]:
print(f"manual gradient of loss w.r.t w: {dL_dw}")
print(f"manual gradient of loss w.r.t b: {dL_db}")

manual gradient of loss w.r.t w: -6.691762447357178
manual gradient of loss w.r.t b: -0.998770534992218


In [29]:
x=torch.tensor(6.7)
y=torch.tensor(0.0)
w=torch.tensor(1.0, requires_grad=True)
b=torch.tensor(0.0, requires_grad=True)

In [30]:
z=w*x+b
y_pred=torch.sigmoid(z)

In [31]:
y_pred

tensor(0.9988, grad_fn=<SigmoidBackward>)

In [32]:
loss=binary_cross_entropy(y_pred, y)

In [33]:
loss

tensor(6.7012, grad_fn=<NegBackward>)

In [34]:
loss.backward()

In [35]:
w.grad

tensor(6.6918)

In [37]:
b.grad

tensor(0.9988)

In [38]:
x=torch.tensor([1.0,2.0,3.0], requires_grad=True)

In [39]:
x

tensor([1., 2., 3.], requires_grad=True)

In [45]:
y=(x**2).mean()

In [46]:
y

tensor(4.6667, grad_fn=<MeanBackward0>)

In [47]:
y.backward()

In [48]:
x.grad

tensor([0.6667, 1.3333, 2.0000])

In [44]:
# clearing the grads - if forward pass is done again, then gradients will be accumulated
x.grad.zero_()

tensor([0., 0., 0.])

In [49]:
# stopping the gradient tracking
x.requires_grad_(False)

tensor([1., 2., 3.])

In [50]:
z=x.detach()

In [51]:
z

tensor([1., 2., 3.])

In [52]:
with torch.no_grad():
    y=x**2