<a href="https://colab.research.google.com/github/olcaykursun/ML/blob/main/Spring25/illustrating_backward_pass.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch import log

# Step 1: Input tensor with gradients enabled
x = torch.tensor([1.0, 2.0])
weights = torch.tensor([0.9, 0.1], requires_grad=True)
expected_output = 1

# Step 2: Forward pass with some "even strange" operations
y = x ** 2
z = y[0] * weights[0]**2 + y[1] * weights[1]
s = torch.sigmoid(z)

if expected_output == 1:
    bce_loss = -log(s)
else:
    bce_loss = -log(1-s)

# Step 3: Backpropagation figures out how each weight affects the loss.
bce_loss.backward()

# Print values and grads
print("x:", x.detach().numpy())
print("y = x ** 2:", y.detach().numpy())
print("z = weighted sum of y values:", z.item())
print("s = sigmoid(z):", s.item())
print("bce_loss", bce_loss.item())
print("weights.grad (dLoss/dw):", weights.grad.numpy())
# We could also calculate the grads manually
with torch.no_grad():
    grads = torch.empty_like(weights)
    grads[0] = (s - expected_output) * y[0] * 2 * weights[0]
    grads[1] = (s - expected_output) * y[1]
    print("Manual gradients:", grads.numpy())

# Step 4: Optimizer step
learning_rate = 0.1
# Gradient descent step uses the gradients to adjust the weights, scaled by the learning rate.
with torch.no_grad(): #Prevents PyTorch from tracking operations for automatic differentiation during the update step as we don’t want PyTorch to build a computation graph around the weight update
    weights = weights - weights.grad * learning_rate

# If we used an optimizer then optimizer.zero_grad() would actually loop over all parameters in the optimizer’s parameter list and calls .grad.zero_() on each one
# when managing tensors manually
weights.grad.zero_()

# Recheck prediction after weight update
with torch.no_grad():
    y = x ** 2
    z = y[0] * weights[0]**2 + y[1] * weights[1]
    s = torch.sigmoid(z)
    print(f"Updated prediction after gradient step: {s.item():.4f}")