<a href="https://colab.research.google.com/github/olcaykursun/ML/blob/main/Spring25/illustrating_backward_pass.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Dr. Kursun's - AUM Machine Learning Course, Spring 2025
import torch
from torch import log

learning_rate = 0.1

# Step 1: Input tensor with gradients enabled
x = torch.tensor([1.0, 2.0])
weights = torch.tensor([0.9, 0.1], requires_grad=True)
expected_output = 1

# Step 2: Forward pass with some operations
y = x ** 2
z = y[0] * weights[0]**2 + y[1] * weights[1]
s = torch.sigmoid(z)

if expected_output == 1:
    bce_loss = -log(s)
else:
    bce_loss = -log(1-s)

# Step 3: Backpropagation figures out how each weight affects the loss.
bce_loss.backward()

# Step 4: Print all values and gradients
print("x:", x.detach().numpy())
print("y = x ** 2:", y.detach().numpy())
print("z = weighted sum of y values:", z.item())
print("s = sigmoid(z):", s.item())
print("bce_loss", bce_loss.item())
print("weights.grad (dLoss/dw):", weights.grad.numpy())

with torch.no_grad():
    grads = torch.empty_like(weights)
    grads[0] = (s - expected_output) * y[0] * 2 * weights[0]
    grads[1] = (s - expected_output) * y[1]
    print("Manual gradients:", grads.numpy())

    # Gradient descent step uses the gradients to adjust the weights, scaled by the learning rate.
    weights -= learning_rate * weights.grad


# Step 5: Optimizer step
learning_rate = 0.1
weights_updated = weights - weights.grad * learning_rate

# Step 6: # Recheck prediction after weight update
with torch.no_grad():
    y = x ** 2
    z = y[0] * weights[0]**2 + y[1] * weights[1]
    s = torch.sigmoid(z)
    print(f"Updated prediction after gradient step: {s.item():.4f}")

x: [1. 2.]
y = x ** 2: [1. 4.]
z = weighted sum of y values: 1.209999918937683
s = sigmoid(z): 0.770298957824707
bce_loss 0.2609765827655792
weights.grad (dLoss/dw): [-0.41346186 -0.91880417]
Manual gradients: [-0.41346186 -0.91880417]
Updated prediction after gradient step: 0.8394
