#**Mount your Google Drive. Make sure your workspace is uploaded to the drive**
#Navigate to Runtime and change the settings to GPU

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#**You can run the below cells to verify the folder exists on the drive**

In [None]:
%cd /content/drive/MyDrive/hw10_workspace/src/model_generation
!ls

/content/drive/MyDrive/hw10_workspace/src/model_generation
dataset.py  eval.py  model.py  __pycache__  train.ipynb  util


#**Install dependencies, and import necessary libraries**



In [None]:
import os, math, argparse, torch, random
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.cuda.amp import GradScaler, autocast
from torch.optim import AdamW
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms as T
from PIL import Image

from dataset import make_loaders   # from dataset.py
from model import ImageOnlySteerNet

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

:
#**Q2.a Data Augmentation AND Preprocessing Pipeline**
In this section, you define how the input images are transformed
before being fed into the neural network.
There are two parts:
(1) `make_eval_tf()` - transformations for validation/test (NO augmentation)
(2) `make_train_tf()` - transformations for training (WITH augmentation)

The idea is to help the model generalize better to unseen lighting
conditions, camera shifts, or image distortions by applying random
transformations during training.

You are expected to:
→ Experiment with parameters inside `make_train_tf`
such as brightness, contrast, rotation, translation, and scale.
→ Leave `make_eval_tf()` unchanged.
→ Understand how these augmentations affect model performance.

Hints:
- Increase brightness/contrast jitter slightly if dataset lighting varies.
- Small random affine transforms simulate the TurtleBot camera moving.
- `TopCrop` removes the top part of the frame (e.g., ceiling or wall).

In [None]:
# ------------------ Augmentations ------------------
class TopCrop(torch.nn.Module):
    def __init__(self, frac: float): super().__init__(); self.frac = max(0.0, min(1.0, float(frac)))
    def forward(self, im: Image.Image):
        if self.frac <= 0: return im
        w, h = im.size; cut = int(h * self.frac)
        return im.crop((0, cut, w, h))

def make_eval_tf(short_side: int, top_crop: float):
    return T.Compose([
        TopCrop(top_crop),
        T.Resize(short_side),          # keeps aspect ratio (short side -> short_side)
        T.CenterCrop(short_side),      # make square for ResNet
        T.ToTensor(),
        T.Normalize(IMAGENET_MEAN, IMAGENET_STD),
    ])

def make_train_tf(args):
    return T.Compose([
        TopCrop(args.top_crop),
        T.Resize(args.short_side),
        T.CenterCrop(args.short_side),
        T.ColorJitter(brightness=args.jitter_b,
                      contrast=args.jitter_c,
                      saturation=args.jitter_s,
                      hue=args.jitter_h),
        T.RandomAffine(
            degrees=args.affine_deg,
            translate=(args.affine_trans, args.affine_trans),
            scale=(args.affine_scale_min, args.affine_scale_max),
            interpolation=T.InterpolationMode.BILINEAR,
            fill=0,
        ),
        T.ToTensor(),
        T.Normalize(IMAGENET_MEAN, IMAGENET_STD),
    ])
# ---------------------------------------------------

# **Q2.b Training Pipeline Set-Up**
# MODEL EVALUATION FUNCTION
This helper function evaluates the trained model on a dataset.
It computes two metrics:
- Mean Absolute Error (MAE)
- Root Mean Squared Error (RMSE)

You do NOT need to modify this function.

============================================================
# Helper: get_lr()
Returns the current learning rate from the optimizer.
Used for TensorBoard logging.
You do NOT need to modify this function.

In [None]:
@torch.no_grad()
def evaluate(model, loader, device, mu, sigma):
    model.eval()
    se = mae = n = 0
    for x, _, y_std, y_raw, _ in loader:
        x = x.to(device, non_blocking=True)
        y_std = y_std.to(device, non_blocking=True)
        yhat_std = model(x)                 # [B]
        yhat_raw = yhat_std * sigma + mu    # [B]
        diff = (yhat_raw.cpu() - y_raw)     # [B]
        mae += diff.abs().sum().item()
        se  += (diff**2).sum().item()
        n   += y_raw.shape[0]
    mae /= max(1, n)
    rmse = math.sqrt(se / max(1, n))
    return mae, rmse

def get_lr(optimizer):
    for pg in optimizer.param_groups:
        return pg.get("lr", None)

#**Q2.c EXPERIMENT SETUP**

The main training script below is where we:
--Define hyperparameters
--Prepare dataloaders
--Initialize model, optimizer, and loss function
--Configure TensorBoard logging

For this question, you will define these variables manually. You will have to modify the main function according to your system. FIne tuning the model will give you better results. Please study the function properly and play with the parameters to get good results.

============================================================

#**Q2.d TRAINING LOOP**
In this section, you will complete the model training loop.
The code below iterates through multiple epochs and performs:
 1️⃣ Forward pass (model prediction)
 2️⃣ Loss computation
 3️⃣ Backward pass (gradient computation)
 4️⃣ Optimizer step (parameter update)
 5️⃣ Logging metrics to TensorBoard

You are expected to:
- Implement the forward pass and loss calculation (TODOs below)
- Understand how mixed precision (autocast) and GradScaler work
- Track and average MAE and loss across batches

 ============================================================

# **Q2.e VALIDATION, CHECKPOINTING, AND TEST EVALUATION**
After each epoch of training, we evaluate model performance
on the validation (or training) set, log results to TensorBoard,
and save the best model checkpoint based on lowest MAE.
Finally, once training completes, we evaluate the final model
on the test dataset to estimate its generalization performance.
You must:
• Understand how evaluate() is used to measure MAE/RMSE
• Observe when and why checkpoints are saved
• Record test metrics and analyze how well the model learned

In [None]:
def main():
    # ------------------ TODO: Define Hyperparameters ------------------
    epochs = 20
    bs = 64
    lr = 1e-4
    wd = 1e-4
    dropout = 0.1
    use_aug = True
    fp16 = True                 # Use mixed precision if supported
    pretrained = True
    freeze_backbone = False
    seed = 42
    logdir = "runs/image_only"
    ckpt_out = "ckpt_best.pt"
    # -----------------------------------------------------------------

    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device: {device}")

    # ============================================================
    # Dataset Setup
    # ============================================================
    omega_sign = +1.0
    train_dl, val_dl, test_dl, stats = make_loaders(
        index="path/to/your/index_smooth.json",
        root="path/to/your/merged_dataset",
        bs=bs,
        hist_len=0,
        omega_sign=omega_sign,
        short_side=224,
        top_crop_frac=0.2
    )
    mu, sigma = stats["mu"], stats["sigma"]
    print(f"Label standardization: mu={mu:.6f}, sigma={sigma:.6f}")

    # ============================================================
    # Data Augmentations
    # ============================================================
    eval_tf  = make_eval_tf(224, 0.2)
    train_tf = make_train_tf(argparse.Namespace(
        top_crop=0.2, short_side=224,
        jitter_b=0.12, jitter_c=0.12, jitter_s=0.10, jitter_h=0.02,
        affine_deg=3.0, affine_trans=0.02,
        affine_scale_min=0.95, affine_scale_max=1.05
    )) if use_aug else eval_tf

    train_dl.dataset.img_tf = train_tf
    if val_dl:
        val_dl.dataset.img_tf = eval_tf
    if test_dl:
        test_dl.dataset.img_tf = eval_tf

    print(f"Augmentations: {'ON' if use_aug else 'OFF'} | top_crop=20% | short_side=224")

    # ============================================================
    # Model / Optimizer / Loss / AMP
    # ============================================================
    model = ImageOnlySteerNet(
        out_len=1,
        pretrained=pretrained,
        freeze_backbone=freeze_backbone,
        dropout=dropout,
    ).to(device)

    opt = #####
    loss_fn = #####
    scaler = #####

    writer = SummaryWriter(logdir)
    writer.add_text("hparams", str({
        "lr": lr, "bs": bs, "epochs": epochs,
        "use_aug": use_aug, "dropout": dropout
    }))
    writer.add_scalar("data/mu", mu, 0)
    writer.add_scalar("data/sigma", sigma, 0)

    best_mae = float("inf")
    global_step = 0

    # ============================================================
    # Training Loop
    # ============================================================
    for epoch in range(1, epochs + 1):
        model.train()
        running_loss = 0.0
        running_mae  = 0.0
        n_seen = 0

        # Iterate over all batches
        for x, _, y_std, y_raw, _ in train_dl:
            x = x.to(device, non_blocking=True)
            y_std = y_std.to(device, non_blocking=True)

            # Reset gradients
            ##### your code here #####

            # Forward pass
            ##### your code here #####

            # Backward pass + optimizer update
            ##### your code here #####

            # Log current batch loss and learning rate
            writer.add_scalar("train/step_loss", loss.item(), global_step)
            lr_now = get_lr(opt)
            if lr_now is not None:
                writer.add_scalar("train/lr", lr_now, global_step)

            # Compute metrics (MAE in raw units)
            with torch.no_grad():
                yhat_raw = (yhat_std * sigma + mu).cpu()
                mae_batch = (yhat_raw - y_raw).abs().sum().item()
                running_mae  += mae_batch
                running_loss += loss.item() * y_std.shape[0]
                n_seen += y_std.shape[0]

            global_step += 1

        # ============================================================
        # Compute epoch averages
        # ============================================================
        train_loss = running_loss / max(1, n_seen)
        train_mae  = running_mae  / max(1, n_seen)

        writer.add_scalar("train/epoch_loss", train_loss, epoch)
        writer.add_scalar("train/epoch_MAE",  train_mae,  epoch)

        print(f"Epoch {epoch:03d} | Train Loss: {train_loss:.4f} | Train MAE: {train_mae:.4f}")


#**Run the below cell to test the model.**
Q2.f What does the graph indicate? Is the loss function decreasing?

In [None]:
python3 eval.py --index data/processed/merged_dataset/index_smooth.json --root  data/processed/merged_dataset --ckpt  ckpt_best.pt --split test --outdir eval_out --save-overlays --flip-sign --short-side 224 --top-crop 0.2