### Automatic Differentiation in PyTorch
Ref: https://lightning.ai/courses/deep-learning-fundamentals/3-0-overview-model-training-in-pytorch/3-4-automatic-differentiation-in-pytorch/

In [19]:
import torch
import torch.nn.functional as F # sigmoid, tanh, relu, etc.
from torch import autograd

#### Init weights

In [28]:
# weight & bias
w = torch.tensor([0.23], requires_grad=True)
b = torch.tensor([0.1], requires_grad=True)

# feature & label
x_feature = torch.tensor([1.])
y_label = torch.tensor([2.])

#### Weighted Sum

In [29]:
z = w * x_feature + b
z

tensor([0.3300], grad_fn=<AddBackward0>)

#### Activation Function

In [32]:
y_predict = torch.sigmoid(z)
y_predict

tensor([0.5818], grad_fn=<SigmoidBackward0>)

#### Loss Function

In [33]:
# calculate the loss with the prev computed activation
loss = F.binary_cross_entropy(y_predict, y_label)
loss
# calculate the loss with without needed to calculate the activation
# l2 = F.binary_cross_entropy_with_logits(z, y)

tensor(0.2117, grad_fn=<BinaryCrossEntropyBackward0>)

#### Compute Gradient (of Loss Function)

In [24]:
w_grad = autograd.grad(loss, w, retain_graph=True) # keep computation graph in memory
w_grad
# b_grad = grad(l, b, retain_graph=True)

(tensor([-1.4182]),)

#### Compute Gradient using backward

In [26]:
loss.backward()
w.grad
# b.grad

tensor([-1.4182])