# Goal:
Implement a complete training loop using only tensors -- no PyTorch abstractions.
This exercise will help understand every step involved before hiding them behind nn.Module in upcoming sessions

In [1]:
import torch

torch.manual_seed(42)

# Data: 50 molecules, 1 dummy descriptor, predicting binding affinity
x = torch.randn(50)
y_true = 0.5 * x + 0.1 + torch.randn(50) * 0.1

# Initialize weight and bias manually
w = torch.tensor(0.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)

learning_rate = 0.1

for epoch in range(50):
    # Forward pass
    y_pred = w * x + b

    # loss
    loss = ((y_pred - y_true) ** 2).mean()

    # Backward pass
    loss.backward() 
    # Note in this case, 2 independent partial derivatives are calculated: dloss/dw and dloss/db 
    # (aka with respect to all tensors with requires_grad=True)

    # Weight update
    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad
    
    # Zero gradients
    w.grad.zero_()
    b.grad.zero_()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}: loss={loss.item():.4f}, w={w.item():.4f}, b={b.item():.4f}")

Epoch 0: loss=0.3047, w=0.1109, b=0.0330
Epoch 10: loss=0.0103, w=0.4670, b=0.1172
Epoch 20: loss=0.0085, w=0.4966, b=0.1181
Epoch 30: loss=0.0084, w=0.4992, b=0.1175
Epoch 40: loss=0.0084, w=0.4995, b=0.1174


# Couple of things worth noting from the output
-loss plateaus after epoch 20. It's not reaching 0 due to noise term added to the y_true dataset.
-A perfect model cannot recover noise
-So, in the context of drug discovery, a model will have an irreducible error floor set by the quality of the experimental data

## Refactor: Raw Tensors -> nn.Module + torch.optim

In [None]:
import torch.nn as nn
torch.manual_seed(42)

model = nn.Linear(1,1)
