#### AutoGrad is basically the tool in PyTorch for computing the gradients automatically

In [1]:
import torch

In [3]:
x = torch.tensor(6.7)
x

tensor(6.7000)

In [4]:
y = torch.tensor(0.0)

In [5]:
w = torch.tensor(2.0, requires_grad=True)
b = torch.tensor(1.0, requires_grad=True)

In [7]:
z = w * x + b 
y_pred = torch.sigmoid(z)

In [8]:
def binary_cross_entropy(y_pred, y):
    epsilon = 1e-7  # small constant to avoid log(0)
    y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
    bce = - (y * torch.log(y_pred) + (1 - y) * torch.log(1 - y_pred))
    return bce

In [9]:
loss = binary_cross_entropy(y_pred, y)

In [None]:
loss.backward() # compute gradients for w and b

In [12]:
loss

tensor(14.3329, grad_fn=<NegBackward0>)

In [None]:
w.grad , b.grad ## gradients of w and b

(tensor(6.7000), tensor(1.0000))

##### So , in this way AutoGrad keeps the track of the operations and when told to compute the gradients of the parameters it computes automatically and returns thus reducing the immensly complex calculations of computing gradients manually 

### AutoGrad for vector of inputs 

In [15]:
x1 = torch.tensor([1.0,2.0,3.0,4.0,5.0],requires_grad=True)

In [22]:
x1

tensor([1., 2., 3., 4., 5.], requires_grad=True)

In [23]:
y = (x1**2).mean()
y

tensor(11., grad_fn=<MeanBackward0>)

In [24]:
y.backward()
y

tensor(11., grad_fn=<MeanBackward0>)

In [None]:
x1.grad ## gradient of y with respect to x1 , x2 ,x3 ,x4 ,x5

tensor([0.4000, 0.8000, 1.2000, 1.6000, 2.0000])

## Clearing out gradients 

##### Clearing out of gradients is necessary because on repetative computation the gradients gets accumulated , so to avoid that the gradients must be cleared out after each iteration. There are 3 methods of doing it : 

#####                    1. requires_grad = False
#####                    2. detach()
#####                    3. no_grad()

#### 1. Using requires_grad = False

In [26]:
x = torch.tensor(1.0, requires_grad=True)
x

tensor(1., requires_grad=True)

In [27]:
x.requires_grad_(False)

tensor(1.)

#### 2. Using detach()

In [28]:
x = torch.tensor(1.0, requires_grad=True)
x

tensor(1., requires_grad=True)

In [None]:
y = x.detach()
y ## new tensor with no gradient tracking and same value as x

tensor(1.)

#### 3. Using no_grad()

In [30]:
x = torch.tensor(1.0, requires_grad=True)
x

tensor(1., requires_grad=True)

In [None]:
with torch.no_grad():
    y = x ** 2 ## no gradient will be tracked for y