### Automatic Differentiation in PyTorch
Ref: https://lightning.ai/courses/deep-learning-fundamentals/3-0-overview-model-training-in-pytorch/3-4-automatic-differentiation-in-pytorch/

In [28]:
import torch
import torch.nn.functional as F # sigmoid, tanh, relu, etc.
from torch.autograd import grad

#### Init weights

In [29]:
# weight & bias
w1 = torch.tensor([0.23], requires_grad=True)
b = torch.tensor([0.1], requires_grad=True)

# feature & label
x1 = torch.tensor([1.23])
y = torch.tensor([1.])

#### Weighted Sum

In [30]:
u = w1 * x1
z = u + b
z

tensor([0.3829], grad_fn=<AddBackward0>)

#### Activation Function

In [31]:
a = torch.sigmoid(z)
a

tensor([0.5946], grad_fn=<SigmoidBackward0>)

#### Loss Function

In [32]:
# calculate the loss with the prev computed activation
l = F.binary_cross_entropy(a, y)
l

tensor(0.5199, grad_fn=<BinaryCrossEntropyBackward0>)

In [33]:
# calculate the loss with without needed to calculate the activation
l2 = F.binary_cross_entropy_with_logits(z, y)
l2

tensor(0.5199, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

#### Compute Gradient (of Loss Function)

In [34]:
grad_L_w1 = grad(l, w1, retain_graph=True) # keep computation graph in memory
grad_L_w1

(tensor([-0.4987]),)

#### Compute Gradient using backward

In [36]:
l.backward()
w1.grad
# b.grad

tensor([-0.4987])