In [1]:
import torch

In [3]:
a = torch.tensor(3)

In [5]:
# we need to keep require_grad param True else we will get error like this
a.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [28]:
# Using autograd to take derivative
a = torch.tensor(3.0, requires_grad=True)

In [29]:
a.backward()

In [30]:
a.grad

tensor(1.)

### Taking derivative for two variables

In [31]:
x = torch.tensor(5.0, requires_grad=True)

In [32]:
x

tensor(5., requires_grad=True)

In [33]:
y = x**2

In [34]:
y

tensor(25., grad_fn=<PowBackward0>)

- when we are take gradient of y according to x, it becomes 2x

In [35]:
y.backward()

In [37]:
x.grad

tensor(10.)

### Taking Partial derivatives with three variables

- Let we are finding the partial derivative of z with respect to x
    - we need to take derivative of z with respect to y and then y with respect to x
        - x = 4, y = x^2, z = sin(y) and we need to get dz/dy

In [39]:
x = torch.tensor(2.0, requires_grad=True)
x

tensor(2., requires_grad=True)

In [41]:
y = x**2
y

tensor(4., grad_fn=<PowBackward0>)

In [42]:
z = torch.sin(y)
z

tensor(-0.7568, grad_fn=<SinBackward0>)

- we can see that how each variable is keeping track for derivative according to its previous var

In [43]:
z.backward()

In [44]:
x.grad

tensor(-2.6146)

### Performing derivative process for a perceptron

In [50]:
x = torch.tensor(3.5) # input
y = torch.tensor(0.0) # output


w = torch.tensor(1.0, requires_grad=True) # weight
b = torch.tensor(0.0, requires_grad=True) # bias

In [54]:
def binary_cross_entropy_loss(prediction, target):
    epsilon = 1e-8  # To prevent log(0)
    prediction = torch.clamp(prediction, epsilon, 1 - epsilon)
    return -(target * torch.log(prediction) + (1 - target) * torch.log(1 - prediction))

In [51]:
w

tensor(1., requires_grad=True)

In [52]:
z = w*x + b
z

tensor(3.5000, grad_fn=<AddBackward0>)

In [53]:
y_pred = torch.sigmoid(z)
y_pred

tensor(0.9707, grad_fn=<SigmoidBackward0>)

In [55]:
loss = binary_cross_entropy_loss(y_pred,y)

In [56]:
loss

tensor(3.5297, grad_fn=<NegBackward0>)

In [57]:
loss.backward()

In [59]:
w.grad

tensor(3.3974)

In [60]:
b.grad

tensor(0.9707)

### Performing on tensors not scalar

In [64]:
x = torch.tensor([2.0,3.0,5.0], requires_grad=True)
x

tensor([2., 3., 5.], requires_grad=True)

In [69]:
y = (x**2).mean()
y

tensor(12.6667, grad_fn=<MeanBackward0>)

In [70]:
y.backward()

In [71]:
x.grad

tensor([1.3333, 2.0000, 3.3333])

### Clearing grad
- It is important to clear gradient else they keep adding to the previous ones and we don't get the current gradient else it's the additon of previous ones as many time we call backward

In [89]:
x = torch.tensor(2.0, requires_grad=True)
x

tensor(2., requires_grad=True)

In [98]:
y = x**2
y

tensor(4., grad_fn=<PowBackward0>)

In [99]:
y.backward()

In [100]:
x.grad

tensor(4.)

In [102]:
# function to clear the gradients
x.grad.zero_()

tensor(0.)