<b><u><span style="font-size:18px">Week 2 Review Questions</span></u></b>

<span style="font-size:14px">Implement one manual gradient in numpy and verify it with a finite difference check</span>

In [45]:
import numpy as np

def loss(w, x, t):
    """Scalar loss l(w) = 0.5 * (w.x - t)^2"""
    y = np.dot(w,x)
    return 0.5 * (y - t)**2

def manual_grad(w, x, t):
    """Analytical gradient ∇_w L = (w.x - t) * x"""
    y = np.dot(w, x)
    return (y - t) * x

def finite_diff_grad(w, x, t, eps=1e-6):
    """finite-difference approximation to ∇_w L."""
    grad_fd = np.zeros_like(w)
    for i in range(len(w)):
        w_pos = w.copy()
        w_neg = w.copy()
        w_pos[i] += eps
        w_neg[i] -= eps
        L_pos = loss(w_pos, x, t)
        L_neg = loss(w_neg, x, t)
        grad_fd[i] = (L_pos - L_neg) / (2.0 * eps) # central difference
    return grad_fd

# example values
w = np.array([5.0, -0.3, 0.1])
x = np.array([1.0, 2.0, -1.0])
t = 0.7

g_manual = manual_grad(w, x, t)
g_fd = finite_diff_grad(w, x, t)

print("Manual gradient:      ",g_manual)
print("Finite-diff gradient: ",g_fd)
print("Close?", np.allclose(g_manual, g_fd, rtol=1e-5, atol=1e-7))

Manual gradient:       [ 3.6  7.2 -3.6]
Finite-diff gradient:  [ 3.6  7.2 -3.6]
Close? True


<span style="font-size:14px">Build a minimal nn.module; run one epoch over a synthetic dataset</span>

In [46]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# reproducibility
torch.manual_seed(42)

# synthetic dataset
# X: points in R^2, y: binary label based on radius
N = 512
X = torch.randn(N,2)
r2 = (X[:, 0]**2 + X[:, 1]**2)
y = (r2 > 1.0).float().unsqueeze(1) # shape (N, 1), labels 0/1

dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# minimal MLP as nn.module
class TinyMLP(nn.Module):
    def __init__(self, in_dim=2, hidden_dim=16, out_dim=1):
        super().__init__()
        self.net = nn.Sequential(
        nn.Linear(in_dim, hidden_dim),
        nn.ReLU(),
        nn.Linear(hidden_dim, out_dim),
        )

    def forward(self, x):
        return self.net(x) # logits

model = TinyMLP()
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

# one training epoch
model.train() # training mode (important for dropout/BatchNorm; here just good habit)

running_loss = 0.0
for batch_idx, (xb, yb) in enumerate(dataloader):
    optimizer.zero_grad()         # clear old gradients

    logits = model(xb)            # forward
    loss = loss_fn(logits, yb)    # scalar loss

    loss.backward()               # backward: compute grads
    optimizer.step()              # update weights

    running_loss += loss.item()

avg_loss = running_loss / len(dataloader)
print(f"Average training loss after one epoch: {avg_loss:.4f}")


        

Average training loss after one epoch: 0.6693


In [47]:
import math
import random
from pathlib import Path

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split

# ----------------------------------------
# Reproducibility - set seeds
# ----------------------------------------
SEED = 42

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# ----------------------------------------
# Synthetic dataset: X: points in R^2, y: binary label based on radius
# ----------------------------------------
N = 512
X = torch.randn(N,2)
r2 = (X[:, 0]**2 + X[:, 1]**2)
y = (r2 > 1.0).float().unsqueeze(1) # shape (N, 1), labels 0/1

dataset = TensorDataset(X, y)

# Train/validation split (80/20)
val_fraction = 0.2
n_val = int(math.floor(N * val_fraction))
n_train = N - n_val

# use a generator
g = torch.Generator().manual_seed(SEED)
train_ds, val_ds = random_split(dataset, [n_train, n_val], generator = g)

# DataLoaders
batch_size = 32
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)

# ----------------------------------------
# minimal MLP model
# ----------------------------------------
class TinyMLP(nn.Module):
    def __init__(self, in_dim=2, hidden_dim=16, out_dim=1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, out_dim),
        )

    def forward(self, x):
        return self.net(x)

model = TinyMLP()
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-2, weight_decay = 1e-4)

# ------------------------------------------------
# training + validation loop with metrics and checking
# ------------------------------------------------
num_epochs = 20
checkpoint_path = Path("best_model.pt")

best_val_loss = float("inf")

for epoch in range(1, num_epochs + 1):
    # ----------- TRAIN -------------------------
    model.train()
    running_loss = 0.0
    running_correct = 0
    running_total =0

    for xb, yb in train_loader:
        optimizer.zero_grad()
        logits = model(xb)
        loss = loss_fn(logits, yb)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * xb.size(0)

        # train accuracy for this batch
        with torch.no_grad():
            preds = (torch.sigmoid(logits) > 0.5).float()
            running_correct += (preds == yb).sum().item()
            running_total += yb.size(0)

    train_loss = running_loss / running_total
    train_acc = running_correct / running_total

    # ---------- VALIDATION ----------------------
    model.eval()
    val_loss_sum = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for xb, yb in val_loader:
            logits = model(xb)
            loss = loss_fn(logits, yb)
            val_loss_sum += loss.item() * xb.size(0)

            preds = (torch.sigmoid(logits) > 0.5).float()
            val_correct += (preds == yb).sum().item()
            val_total += yb.size(0)

    val_loss = val_loss_sum / val_total
    val_acc = val_correct / val_total

    # --------- CHECKPOINTING -------------------
    improved = val_loss < best_val_loss
    if improved:
        best_val_loss = val_loss
        torch.save(
            {
                "epoch": epoch,
                "model_state": model.state_dict(),
                "optimizer_state": optimizer.state_dict(),
                "val_loss": val_loss,
                "val_acc": val_acc,
                "seed": SEED,
            },
            checkpoint_path,
        )

    print(
        f"Epoch {epoch:02d} | "
        f"train_loss={train_loss:.4f}, train_acc={train_acc:.3f} | "
        f"val_loss={val_loss:.4f}, val_acc={val_acc:.3f} "
        f"{'<-- best' if improved else ''}"
        )
print(f"\nBest validation loss: {best_val_loss:.4f}")
print(f"Best checkpoint saved to: {checkpoint_path.resolve()}")
        
            
            
                    
        
        

Epoch 01 | train_loss=0.6507, train_acc=0.617 | val_loss=0.6598, val_acc=0.529 <-- best
Epoch 02 | train_loss=0.6076, train_acc=0.620 | val_loss=0.6346, val_acc=0.529 <-- best
Epoch 03 | train_loss=0.5603, train_acc=0.620 | val_loss=0.5706, val_acc=0.559 <-- best
Epoch 04 | train_loss=0.5058, train_acc=0.688 | val_loss=0.5148, val_acc=0.657 <-- best
Epoch 05 | train_loss=0.4466, train_acc=0.763 | val_loss=0.4528, val_acc=0.735 <-- best
Epoch 06 | train_loss=0.3914, train_acc=0.878 | val_loss=0.3924, val_acc=0.863 <-- best
Epoch 07 | train_loss=0.3359, train_acc=0.920 | val_loss=0.3417, val_acc=0.902 <-- best
Epoch 08 | train_loss=0.2913, train_acc=0.959 | val_loss=0.2956, val_acc=0.961 <-- best
Epoch 09 | train_loss=0.2573, train_acc=0.978 | val_loss=0.2633, val_acc=0.941 <-- best
Epoch 10 | train_loss=0.2285, train_acc=0.978 | val_loss=0.2405, val_acc=0.931 <-- best
Epoch 11 | train_loss=0.2067, train_acc=0.980 | val_loss=0.2176, val_acc=0.951 <-- best
Epoch 12 | train_loss=0.1905, tr