In [7]:
!pip install torch torchvision timm

import os
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from timm import create_model

Defaulting to user installation because normal site-packages is not writeable
[0m

In [8]:
from pathlib import Path
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms as T

class UECFood100Detection(Dataset):
    def __init__(self, root_dir, bb_info_path, transforms=None):
        self.root = Path(root_dir)
        ann_path = Path(bb_info_path)

        self.samples   = {}
        self.img_paths = {}

        with ann_path.open() as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) < 5 or parts[0].lower() == "img_name":
                    continue

                if len(parts) == 6:
                    cls_str, img_name, *coords = parts
                else:
                    img_name, *coords = parts
                    cls_str = ann_path.parent.name

                try:
                    lbl       = int(cls_str)
                    x1, y1, x2, y2 = map(int, coords)
                except ValueError:
                    continue

                if img_name not in self.img_paths:
                    class_folder = self.root / cls_str
                    candidate = class_folder / img_name
                    if not candidate.exists():
                        matches = list(class_folder.glob(f"{Path(img_name).stem}.*"))
                        if not matches:
                            raise FileNotFoundError(f"No file matches {img_name} in {class_folder}")
                        candidate = matches[0]
                    self.img_paths[img_name] = candidate

                self.samples.setdefault(img_name, []).append(([x1, y1, x2, y2], lbl))

        self.imgs       = sorted(self.samples.keys())
        self.transforms = transforms or T.Compose([T.ToTensor()])

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        img_name = self.imgs[idx]
        img_path = self.img_paths[img_name]

        img = Image.open(img_path).convert("RGB")
        w, h = img.size

        boxes, labels = [], []
        for (x1, y1, x2, y2), lbl in self.samples[img_name]:
            boxes.append([max(0,x1), max(0,y1), min(w,x2), min(h,y2)])
            labels.append(lbl)

        target = {
            "boxes":  torch.tensor(boxes, dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.int64),
            "image_id": torch.tensor([idx]),
        }

        return self.transforms(img), target

In [9]:
BASE_DIR = Path("UECFOOD100") / "UECFOOD100"
all_ann = BASE_DIR / "all_bb_info.txt"

with open(all_ann, "w") as fout:
    for ann in sorted(BASE_DIR.rglob("bb_info.txt")):
        cls = ann.parent.name  
        with open(ann) as fin:
            for line in fin:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                if parts[0] == "img_name" and parts[1] == "x1":
                    continue
                fout.write(f"{cls} {' '.join(parts)}\n")

print(f"Created merged annotation file at {all_ann}")

Created merged annotation file at UECFOOD100/UECFOOD100/all_bb_info.txt


In [11]:
dataset = UECFood100Detection(
    root_dir=str(BASE_DIR),
    bb_info_path=str(all_ann)
)
print("Total images with boxes:", len(dataset))

Total images with boxes: 12740


In [12]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

def collate_fn(batch):
    return tuple(zip(*batch))

idxs = list(range(len(dataset)))
train_idxs, val_idxs = train_test_split(idxs, test_size=0.2, random_state=42)

train_ds = torch.utils.data.Subset(dataset, train_idxs)
val_ds   = torch.utils.data.Subset(dataset, val_idxs)

train_loader = DataLoader(
    train_ds,
    batch_size=4,
    shuffle=True,
    num_workers=4,
    collate_fn=collate_fn
)
val_loader = DataLoader(
    val_ds,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)

print(f"Train images: {len(train_ds)}, Val images: {len(val_ds)}")

Train images: 10192, Val images: 2548


In [15]:
#Baseline Faster R‑CNN (ResNet‑50 FPN)

from torchvision.models import ResNet50_Weights
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.optim import SGD
from tqdm import tqdm 

baseline_model = fasterrcnn_resnet50_fpn(
    weights=None,
    weights_backbone=ResNet50_Weights.IMAGENET1K_V1,
    num_classes=NUM_CLASSES
).to(device)

baseline_optimizer = SGD(
    baseline_model.parameters(),
    lr=0.005,     
    momentum=0.9,
    weight_decay=1e-4
)

def train_one_epoch(model, optimizer, data_loader, device, print_freq=100):
    model.train()
    for i, (images, targets) in enumerate(tqdm(data_loader, desc="Baseline Train"), 1):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        loss = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % print_freq == 0:
            print(f"  iter {i}/{len(data_loader)} — loss: {loss.item():.4f}") 

def evaluate_loss(model, data_loader, device):
    model.train()
    losses = []
    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="Baseline Val Loss"):
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(images, targets)
            total_loss = sum(loss for loss in loss_dict.values()).item()
            losses.append(total_loss)
    model.eval()
    return sum(losses) / len(losses)

num_epochs_baseline = 3
for epoch in range(1, num_epochs_baseline + 1):
    print(f"\n[Baseline] Epoch {epoch}/{num_epochs_baseline}")
    train_one_epoch(baseline_model, baseline_optimizer, train_loader, device)
    val_loss = evaluate_loss(baseline_model, val_loader, device)
    print(f"[Baseline] Validation Loss: {val_loss:.4f}")


[Baseline] Epoch 1/3


Baseline Train:   4%|▍         | 101/2548 [00:37<15:17,  2.67it/s]

  iter 100/2548 — loss: 0.3401


Baseline Train:   8%|▊         | 200/2548 [01:15<17:23,  2.25it/s]

  iter 200/2548 — loss: 0.2911


Baseline Train:  12%|█▏        | 300/2548 [01:54<16:25,  2.28it/s]

  iter 300/2548 — loss: 0.5499


Baseline Train:  16%|█▌        | 400/2548 [02:32<15:19,  2.34it/s]

  iter 400/2548 — loss: 0.3713


Baseline Train:  20%|█▉        | 500/2548 [03:10<15:48,  2.16it/s]

  iter 500/2548 — loss: 0.2939


Baseline Train:  24%|██▎       | 600/2548 [03:49<14:49,  2.19it/s]

  iter 600/2548 — loss: 0.3178


Baseline Train:  27%|██▋       | 700/2548 [04:29<13:46,  2.24it/s]

  iter 700/2548 — loss: 0.2936


Baseline Train:  31%|███▏      | 800/2548 [05:07<12:21,  2.36it/s]

  iter 800/2548 — loss: 0.3489


Baseline Train:  35%|███▌      | 900/2548 [05:45<11:02,  2.49it/s]

  iter 900/2548 — loss: 0.2406


Baseline Train:  39%|███▉      | 1000/2548 [06:23<10:49,  2.38it/s]

  iter 1000/2548 — loss: 0.2692


Baseline Train:  43%|████▎     | 1100/2548 [07:03<09:48,  2.46it/s]

  iter 1100/2548 — loss: 0.5063


Baseline Train:  47%|████▋     | 1200/2548 [07:41<10:05,  2.23it/s]

  iter 1200/2548 — loss: 0.3221


Baseline Train:  51%|█████     | 1300/2548 [08:19<08:50,  2.35it/s]

  iter 1300/2548 — loss: 0.3352


Baseline Train:  55%|█████▍    | 1400/2548 [08:58<07:50,  2.44it/s]

  iter 1400/2548 — loss: 0.2631


Baseline Train:  59%|█████▉    | 1500/2548 [09:36<06:59,  2.50it/s]

  iter 1500/2548 — loss: 0.3078


Baseline Train:  63%|██████▎   | 1600/2548 [10:14<06:57,  2.27it/s]

  iter 1600/2548 — loss: 0.3271


Baseline Train:  67%|██████▋   | 1700/2548 [10:52<05:46,  2.44it/s]

  iter 1700/2548 — loss: 0.3049


Baseline Train:  71%|███████   | 1800/2548 [11:30<05:21,  2.33it/s]

  iter 1800/2548 — loss: 0.2645


Baseline Train:  75%|███████▍  | 1900/2548 [12:08<04:33,  2.37it/s]

  iter 1900/2548 — loss: 0.4077


Baseline Train:  78%|███████▊  | 2000/2548 [12:46<03:59,  2.28it/s]

  iter 2000/2548 — loss: 0.3048


Baseline Train:  82%|████████▏ | 2100/2548 [13:25<03:21,  2.23it/s]

  iter 2100/2548 — loss: 0.2203


Baseline Train:  86%|████████▋ | 2200/2548 [14:03<02:26,  2.37it/s]

  iter 2200/2548 — loss: 0.3792


Baseline Train:  90%|█████████ | 2300/2548 [14:43<01:41,  2.44it/s]

  iter 2300/2548 — loss: 1.2609


Baseline Train:  94%|█████████▍| 2400/2548 [15:21<01:00,  2.44it/s]

  iter 2400/2548 — loss: 0.3511


Baseline Train:  98%|█████████▊| 2500/2548 [16:00<00:20,  2.33it/s]

  iter 2500/2548 — loss: 0.2431


Baseline Train: 100%|██████████| 2548/2548 [16:18<00:00,  2.60it/s]
Baseline Val Loss: 100%|██████████| 637/637 [01:59<00:00,  5.34it/s]


[Baseline] Validation Loss: 0.2938

[Baseline] Epoch 2/3


Baseline Train:   4%|▍         | 100/2548 [00:38<18:47,  2.17it/s]

  iter 100/2548 — loss: 0.4690


Baseline Train:   8%|▊         | 200/2548 [01:17<17:55,  2.18it/s]

  iter 200/2548 — loss: 0.2787


Baseline Train:  12%|█▏        | 300/2548 [01:55<16:10,  2.32it/s]

  iter 300/2548 — loss: 0.2513


Baseline Train:  16%|█▌        | 400/2548 [02:34<16:57,  2.11it/s]

  iter 400/2548 — loss: 0.3239


Baseline Train:  20%|█▉        | 500/2548 [03:11<14:13,  2.40it/s]

  iter 500/2548 — loss: 0.3723


Baseline Train:  24%|██▎       | 600/2548 [03:50<14:23,  2.25it/s]

  iter 600/2548 — loss: 0.2140


Baseline Train:  27%|██▋       | 700/2548 [04:28<12:36,  2.44it/s]

  iter 700/2548 — loss: 0.3772


Baseline Train:  31%|███▏      | 800/2548 [05:07<12:03,  2.42it/s]

  iter 800/2548 — loss: 0.2200


Baseline Train:  35%|███▌      | 900/2548 [05:46<12:12,  2.25it/s]

  iter 900/2548 — loss: 0.2532


Baseline Train:  39%|███▉      | 1000/2548 [06:25<13:19,  1.94it/s]

  iter 1000/2548 — loss: 0.2288


Baseline Train:  43%|████▎     | 1100/2548 [07:05<12:01,  2.01it/s]

  iter 1100/2548 — loss: 0.2579


Baseline Train:  47%|████▋     | 1200/2548 [07:43<08:35,  2.62it/s]

  iter 1200/2548 — loss: 0.3099


Baseline Train:  51%|█████     | 1300/2548 [08:21<08:55,  2.33it/s]

  iter 1300/2548 — loss: 0.4583


Baseline Train:  55%|█████▍    | 1400/2548 [08:59<08:03,  2.37it/s]

  iter 1400/2548 — loss: 0.2563


Baseline Train:  59%|█████▉    | 1500/2548 [09:37<07:39,  2.28it/s]

  iter 1500/2548 — loss: 0.2620


Baseline Train:  63%|██████▎   | 1600/2548 [10:14<06:52,  2.30it/s]

  iter 1600/2548 — loss: 0.4135


Baseline Train:  67%|██████▋   | 1700/2548 [10:51<05:19,  2.66it/s]

  iter 1700/2548 — loss: 0.2541


Baseline Train:  71%|███████   | 1800/2548 [11:30<05:52,  2.12it/s]

  iter 1800/2548 — loss: 0.1851


Baseline Train:  75%|███████▍  | 1900/2548 [12:09<04:58,  2.17it/s]

  iter 1900/2548 — loss: 0.3828


Baseline Train:  78%|███████▊  | 2000/2548 [12:47<03:33,  2.57it/s]

  iter 2000/2548 — loss: 0.1794


Baseline Train:  82%|████████▏ | 2100/2548 [13:25<03:07,  2.39it/s]

  iter 2100/2548 — loss: 0.2375


Baseline Train:  86%|████████▋ | 2200/2548 [14:04<02:36,  2.23it/s]

  iter 2200/2548 — loss: 0.3814


Baseline Train:  90%|█████████ | 2300/2548 [14:43<01:58,  2.09it/s]

  iter 2300/2548 — loss: 0.2157


Baseline Train:  94%|█████████▍| 2400/2548 [15:23<01:09,  2.13it/s]

  iter 2400/2548 — loss: 0.2360


Baseline Train:  98%|█████████▊| 2500/2548 [16:02<00:21,  2.28it/s]

  iter 2500/2548 — loss: 0.1999


Baseline Train: 100%|██████████| 2548/2548 [16:21<00:00,  2.60it/s]
Baseline Val Loss: 100%|██████████| 637/637 [01:58<00:00,  5.37it/s]


[Baseline] Validation Loss: 0.2722

[Baseline] Epoch 3/3


Baseline Train:   4%|▍         | 100/2548 [00:38<17:41,  2.31it/s]

  iter 100/2548 — loss: 0.1649


Baseline Train:   8%|▊         | 200/2548 [01:17<16:38,  2.35it/s]

  iter 200/2548 — loss: 0.2911


Baseline Train:  12%|█▏        | 300/2548 [01:56<16:16,  2.30it/s]

  iter 300/2548 — loss: 0.1979


Baseline Train:  16%|█▌        | 400/2548 [02:34<15:40,  2.28it/s]

  iter 400/2548 — loss: 0.1747


Baseline Train:  20%|█▉        | 500/2548 [03:12<13:37,  2.51it/s]

  iter 500/2548 — loss: 0.3705


Baseline Train:  24%|██▎       | 600/2548 [03:51<13:44,  2.36it/s]

  iter 600/2548 — loss: 0.2049


Baseline Train:  27%|██▋       | 700/2548 [04:29<12:50,  2.40it/s]

  iter 700/2548 — loss: 0.2283


Baseline Train:  31%|███▏      | 800/2548 [05:08<13:37,  2.14it/s]

  iter 800/2548 — loss: 0.1894


Baseline Train:  35%|███▌      | 900/2548 [05:47<11:10,  2.46it/s]

  iter 900/2548 — loss: 0.2088


Baseline Train:  39%|███▉      | 1000/2548 [06:26<11:25,  2.26it/s]

  iter 1000/2548 — loss: 0.1755


Baseline Train:  43%|████▎     | 1101/2548 [07:05<08:28,  2.84it/s]

  iter 1100/2548 — loss: 0.1664


Baseline Train:  47%|████▋     | 1200/2548 [07:42<09:51,  2.28it/s]

  iter 1200/2548 — loss: 0.2253


Baseline Train:  51%|█████     | 1300/2548 [08:21<08:05,  2.57it/s]

  iter 1300/2548 — loss: 0.1689


Baseline Train:  55%|█████▍    | 1400/2548 [09:00<08:14,  2.32it/s]

  iter 1400/2548 — loss: 0.2140


Baseline Train:  59%|█████▉    | 1500/2548 [09:38<07:50,  2.23it/s]

  iter 1500/2548 — loss: 0.4457


Baseline Train:  63%|██████▎   | 1600/2548 [10:16<06:42,  2.36it/s]

  iter 1600/2548 — loss: 0.2405


Baseline Train:  67%|██████▋   | 1700/2548 [10:55<05:46,  2.45it/s]

  iter 1700/2548 — loss: 0.1805


Baseline Train:  71%|███████   | 1800/2548 [11:34<05:20,  2.34it/s]

  iter 1800/2548 — loss: 0.2104


Baseline Train:  75%|███████▍  | 1900/2548 [12:12<04:58,  2.17it/s]

  iter 1900/2548 — loss: 0.2019


Baseline Train:  78%|███████▊  | 2000/2548 [12:49<03:55,  2.32it/s]

  iter 2000/2548 — loss: 0.2733


Baseline Train:  82%|████████▏ | 2100/2548 [13:28<03:33,  2.10it/s]

  iter 2100/2548 — loss: 0.1814


Baseline Train:  86%|████████▋ | 2200/2548 [14:06<02:35,  2.23it/s]

  iter 2200/2548 — loss: 0.1569


Baseline Train:  90%|█████████ | 2300/2548 [14:44<01:46,  2.33it/s]

  iter 2300/2548 — loss: 0.2163


Baseline Train:  94%|█████████▍| 2400/2548 [15:22<01:02,  2.37it/s]

  iter 2400/2548 — loss: 0.2013


Baseline Train:  98%|█████████▊| 2500/2548 [16:01<00:19,  2.44it/s]

  iter 2500/2548 — loss: 0.1828


Baseline Train: 100%|██████████| 2548/2548 [16:19<00:00,  2.60it/s]
Baseline Val Loss: 100%|██████████| 637/637 [01:59<00:00,  5.32it/s]

[Baseline] Validation Loss: 0.2448





In [18]:
from collections import OrderedDict
import torch.nn as nn

raw_backbone = create_model(
    'efficientnet_b0',
    pretrained=True,
    features_only=True,
    out_indices=(4,)   
)

class EfficientNetBackbone(nn.Module):
    def __init__(self, timm_backbone):
        super().__init__()
        self.body = timm_backbone
        self.out_channels = self.body.feature_info.channels()[-1] 

    def forward(self, x):
        feats = self.body(x)               
        return OrderedDict([("0", feats[0])])

backbone = EfficientNetBackbone(raw_backbone)

Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.


In [20]:
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection import FasterRCNN

anchor_generator = AnchorGenerator(
    sizes=((32, 64, 128, 256, 512),),
    aspect_ratios=((0.5, 1.0, 2.0),)
)

model = FasterRCNN(
    backbone,
    num_classes=101,
    rpn_anchor_generator=anchor_generator
).cuda()

In [21]:
model.train()
img, target = dataset[0]
loss_dict = model([img.cuda()], [{k: v.cuda() for k,v in target.items()}])
print({k: float(v) for k,v in loss_dict.items()})

{'loss_classifier': 4.770044326782227, 'loss_box_reg': 0.2090047001838684, 'loss_objectness': 0.6847230792045593, 'loss_rpn_box_reg': 0.04593349248170853}


In [22]:
print(f" train_loader batches: {len(train_loader)}")
print(f" val_loader   batches: {len(val_loader)}")

for epoch in range(1, 2):   
    print(f"--- Starting epoch {epoch} ---")
    model.train()
    for batch_idx, (imgs, targets) in enumerate(train_loader):
        if batch_idx == 0:
            print(f"  Got batch 0: {len(imgs)} images, {len(targets)} target dicts")
        break
    model.eval()
    for batch_idx, (imgs, targets) in enumerate(val_loader):
        if batch_idx == 0:
            print(f"  Got val batch 0: {len(imgs)} images, {len(targets)} target dicts")
        break
print("Debug done.")

 train_loader batches: 2548
 val_loader   batches: 637
--- Starting epoch 1 ---
  Got batch 0: 4 images, 4 target dicts
  Got val batch 0: 4 images, 4 target dicts
Debug done.


In [33]:
num_epochs = 10

for epoch in range(1, num_epochs + 1):
    print(f"Starting Epoch {epoch}/{num_epochs}")

    model.train()
    total_train_loss = 0.0
    for batch_idx, (imgs, targets) in enumerate(train_loader, 1):
        imgs    = [img.cuda() for img in imgs]
        targets = [{k: v.cuda() for k, v in t.items()} for t in targets]

        loss_dict = model(imgs, targets)
        loss = sum(loss_dict.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        if batch_idx % 500 == 0:
            print(f"   • Batch {batch_idx}/{len(train_loader)} — curr loss: {loss.item():.4f}")

    lr_scheduler.step()
    avg_train = total_train_loss / len(train_loader)

    total_val_loss = 0.0
    with torch.no_grad():
        model.train()  
        for imgs, targets in val_loader:
            imgs    = [img.cuda() for img in imgs]
            targets = [{k: v.cuda() for k, v in t.items()} for t in targets]

            loss_dict = model(imgs, targets)
            total_val_loss += sum(loss_dict.values()).item()

    avg_val = total_val_loss / len(val_loader)
    print(f" Epoch {epoch} complete — Train loss: {avg_train:.6f} — Val loss: {avg_val:.6f}\n")

Starting Epoch 1/10
   • Batch 500/2548 — curr loss: 5.4668
   • Batch 1000/2548 — curr loss: 5.5973
   • Batch 1500/2548 — curr loss: 5.7372
   • Batch 2000/2548 — curr loss: 5.6331
   • Batch 2500/2548 — curr loss: 5.6053
 Epoch 1 complete — Train loss: 5.636133 — Val loss: 5.632444

Starting Epoch 2/10
   • Batch 500/2548 — curr loss: 5.8262
   • Batch 1000/2548 — curr loss: 5.5411
   • Batch 1500/2548 — curr loss: 5.5906
   • Batch 2000/2548 — curr loss: 5.4399
   • Batch 2500/2548 — curr loss: 5.6176
 Epoch 2 complete — Train loss: 5.636046 — Val loss: 5.631989

Starting Epoch 3/10
   • Batch 500/2548 — curr loss: 5.6407
   • Batch 1000/2548 — curr loss: 5.7055
   • Batch 1500/2548 — curr loss: 5.6234
   • Batch 2000/2548 — curr loss: 5.5711
   • Batch 2500/2548 — curr loss: 5.7668
 Epoch 3 complete — Train loss: 5.635589 — Val loss: 5.632226

Starting Epoch 4/10
   • Batch 500/2548 — curr loss: 5.6427
   • Batch 1000/2548 — curr loss: 5.7461
   • Batch 1500/2548 — curr loss: 5.75

In [27]:
#Hyperparameter Tuning
from collections import OrderedDict
import torch.nn as nn

class EfficientNetBackbone(nn.Module):
    def __init__(self, raw_backbone):
        super().__init__()
        self.body = raw_backbone
        self.out_channels = raw_backbone.feature_info.info[-1]['num_chs']

    def forward(self, x):
        feats = self.body(x)
        return OrderedDict([("0", feats[-1])])
    
import itertools
import pandas as pd
from timm import create_model
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.ops import MultiScaleRoIAlign
from torch.optim import SGD

def train_and_eval(lr, wd, num_epochs=5):
    raw = create_model("efficientnet_b0", pretrained=True, features_only=True)
    backbone = EfficientNetBackbone(raw)

    anchor_gen = AnchorGenerator(
        sizes=((32, 64, 128, 256, 512),),
        aspect_ratios=((0.5, 1.0, 2.0),)
    )
    roi_pooler = MultiScaleRoIAlign(
        featmap_names=["0"],
        output_size=7,
        sampling_ratio=2
    )
    
    model = FasterRCNN(
        backbone,
        num_classes=NUM_CLASSES,
        rpn_anchor_generator=anchor_gen,
        box_roi_pool=roi_pooler
    ).to(device)
    optim = SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=wd)

    for _ in range(num_epochs):
        train_one_epoch(model, optim, train_loader, device)
    return evaluate_loss(model, val_loader, device)

lrs = [1e-3, 5e-4, 1e-4]
wds = [1e-4, 1e-5]
results = []
for lr, wd in itertools.product(lrs, wds):
    loss = train_and_eval(lr, wd, num_epochs=1)
    print(f"lr={lr:.0e}, wd={wd:.0e} → val_loss={loss:.4f}")
    results.append({"lr": lr, "weight_decay": wd, "val_loss": loss})

df = pd.DataFrame(results).sort_values("val_loss").reset_index(drop=True)
print(df.head())

Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.
Baseline Train:   4%|▍         | 101/2548 [00:18<07:05,  5.75it/s]

  iter 100/2548 — loss: 0.9036


Baseline Train:   8%|▊         | 201/2548 [00:35<06:26,  6.07it/s]

  iter 200/2548 — loss: 0.6728


Baseline Train:  12%|█▏        | 301/2548 [00:53<06:37,  5.66it/s]

  iter 300/2548 — loss: 0.6589


Baseline Train:  16%|█▌        | 401/2548 [01:11<06:12,  5.76it/s]

  iter 400/2548 — loss: 0.6161


Baseline Train:  20%|█▉        | 501/2548 [01:29<06:05,  5.60it/s]

  iter 500/2548 — loss: 0.6017


Baseline Train:  24%|██▎       | 601/2548 [01:47<05:23,  6.02it/s]

  iter 600/2548 — loss: 0.7006


Baseline Train:  28%|██▊       | 701/2548 [02:05<05:23,  5.70it/s]

  iter 700/2548 — loss: 0.4822


Baseline Train:  31%|███▏      | 801/2548 [02:23<05:27,  5.33it/s]

  iter 800/2548 — loss: 0.9139


Baseline Train:  35%|███▌      | 901/2548 [02:42<04:50,  5.67it/s]

  iter 900/2548 — loss: 0.5827


Baseline Train:  39%|███▉      | 1001/2548 [03:00<04:54,  5.25it/s]

  iter 1000/2548 — loss: 0.4982


Baseline Train:  43%|████▎     | 1101/2548 [03:18<04:10,  5.78it/s]

  iter 1100/2548 — loss: 0.4442


Baseline Train:  47%|████▋     | 1201/2548 [03:36<03:45,  5.97it/s]

  iter 1200/2548 — loss: 0.5358


Baseline Train:  51%|█████     | 1301/2548 [03:54<03:39,  5.68it/s]

  iter 1300/2548 — loss: 0.5677


Baseline Train:  55%|█████▍    | 1401/2548 [04:13<03:36,  5.31it/s]

  iter 1400/2548 — loss: 0.5137


Baseline Train:  59%|█████▉    | 1501/2548 [04:31<03:27,  5.03it/s]

  iter 1500/2548 — loss: 0.6581


Baseline Train:  63%|██████▎   | 1601/2548 [04:49<02:48,  5.61it/s]

  iter 1600/2548 — loss: 0.5203


Baseline Train:  67%|██████▋   | 1701/2548 [05:08<02:30,  5.64it/s]

  iter 1700/2548 — loss: 0.4932


Baseline Train:  71%|███████   | 1801/2548 [05:27<02:12,  5.63it/s]

  iter 1800/2548 — loss: 0.4396


Baseline Train:  75%|███████▍  | 1901/2548 [05:45<02:03,  5.24it/s]

  iter 1900/2548 — loss: 0.6475


Baseline Train:  79%|███████▊  | 2001/2548 [06:03<01:33,  5.83it/s]

  iter 2000/2548 — loss: 0.5407


Baseline Train:  82%|████████▏ | 2101/2548 [06:21<01:19,  5.62it/s]

  iter 2100/2548 — loss: 0.4351


Baseline Train:  86%|████████▋ | 2201/2548 [06:39<01:00,  5.77it/s]

  iter 2200/2548 — loss: 0.5445


Baseline Train:  90%|█████████ | 2301/2548 [06:58<00:43,  5.68it/s]

  iter 2300/2548 — loss: 0.3199


Baseline Train:  94%|█████████▍| 2401/2548 [07:15<00:27,  5.34it/s]

  iter 2400/2548 — loss: 0.5671


Baseline Train:  98%|█████████▊| 2501/2548 [07:34<00:08,  5.41it/s]

  iter 2500/2548 — loss: 0.4115


Baseline Train: 100%|██████████| 2548/2548 [07:42<00:00,  5.51it/s]
Baseline Val Loss: 100%|██████████| 637/637 [00:44<00:00, 14.33it/s]


lr=1e-03, wd=1e-04 → val_loss=0.4796


Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.
Baseline Train:   4%|▍         | 101/2548 [00:18<07:14,  5.63it/s]

  iter 100/2548 — loss: 0.6300


Baseline Train:   8%|▊         | 201/2548 [00:36<06:28,  6.04it/s]

  iter 200/2548 — loss: 0.6903


Baseline Train:  12%|█▏        | 301/2548 [00:54<07:14,  5.18it/s]

  iter 300/2548 — loss: 1.0979


Baseline Train:  16%|█▌        | 401/2548 [01:12<06:10,  5.79it/s]

  iter 400/2548 — loss: 0.4546


Baseline Train:  20%|█▉        | 501/2548 [01:30<06:07,  5.57it/s]

  iter 500/2548 — loss: 0.5573


Baseline Train:  24%|██▎       | 601/2548 [01:48<06:10,  5.25it/s]

  iter 600/2548 — loss: 0.5816


Baseline Train:  28%|██▊       | 701/2548 [02:06<05:16,  5.84it/s]

  iter 700/2548 — loss: 0.5249


Baseline Train:  31%|███▏      | 801/2548 [02:24<05:22,  5.41it/s]

  iter 800/2548 — loss: 0.6478


Baseline Train:  35%|███▌      | 901/2548 [02:42<04:57,  5.53it/s]

  iter 900/2548 — loss: 0.6078


Baseline Train:  39%|███▉      | 1001/2548 [03:00<05:09,  4.99it/s]

  iter 1000/2548 — loss: 0.5080


Baseline Train:  43%|████▎     | 1101/2548 [03:18<04:01,  5.99it/s]

  iter 1100/2548 — loss: 0.4056


Baseline Train:  47%|████▋     | 1201/2548 [03:36<04:04,  5.50it/s]

  iter 1200/2548 — loss: 0.5189


Baseline Train:  51%|█████     | 1301/2548 [03:55<03:51,  5.39it/s]

  iter 1300/2548 — loss: 0.5859


Baseline Train:  55%|█████▍    | 1401/2548 [04:13<03:33,  5.37it/s]

  iter 1400/2548 — loss: 0.5996


Baseline Train:  59%|█████▉    | 1501/2548 [04:31<03:10,  5.51it/s]

  iter 1500/2548 — loss: 0.4695


Baseline Train:  63%|██████▎   | 1601/2548 [04:49<02:48,  5.60it/s]

  iter 1600/2548 — loss: 0.5039


Baseline Train:  67%|██████▋   | 1701/2548 [05:08<02:45,  5.11it/s]

  iter 1700/2548 — loss: 0.6180


Baseline Train:  71%|███████   | 1801/2548 [05:26<02:10,  5.74it/s]

  iter 1800/2548 — loss: 0.6374


Baseline Train:  75%|███████▍  | 1901/2548 [05:44<02:04,  5.20it/s]

  iter 1900/2548 — loss: 0.4681


Baseline Train:  79%|███████▊  | 2001/2548 [06:02<01:32,  5.91it/s]

  iter 2000/2548 — loss: 0.4045


Baseline Train:  82%|████████▏ | 2101/2548 [06:21<01:23,  5.36it/s]

  iter 2100/2548 — loss: 0.5398


Baseline Train:  86%|████████▋ | 2201/2548 [06:39<00:58,  5.92it/s]

  iter 2200/2548 — loss: 0.3896


Baseline Train:  90%|█████████ | 2301/2548 [06:57<00:47,  5.24it/s]

  iter 2300/2548 — loss: 0.4464


Baseline Train:  94%|█████████▍| 2401/2548 [07:15<00:24,  5.91it/s]

  iter 2400/2548 — loss: 0.3403


Baseline Train:  98%|█████████▊| 2501/2548 [07:34<00:08,  5.43it/s]

  iter 2500/2548 — loss: 0.4097


Baseline Train: 100%|██████████| 2548/2548 [07:42<00:00,  5.51it/s]
Baseline Val Loss: 100%|██████████| 637/637 [00:44<00:00, 14.37it/s]


lr=1e-03, wd=1e-05 → val_loss=0.4925


Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.
Baseline Train:   4%|▍         | 101/2548 [00:18<06:57,  5.87it/s]

  iter 100/2548 — loss: 0.8706


Baseline Train:   8%|▊         | 201/2548 [00:36<07:15,  5.39it/s]

  iter 200/2548 — loss: 0.6870


Baseline Train:  12%|█▏        | 301/2548 [00:54<06:12,  6.04it/s]

  iter 300/2548 — loss: 0.6011


Baseline Train:  16%|█▌        | 401/2548 [01:12<06:25,  5.56it/s]

  iter 400/2548 — loss: 0.6780


Baseline Train:  20%|█▉        | 501/2548 [01:30<06:05,  5.61it/s]

  iter 500/2548 — loss: 0.7436


Baseline Train:  24%|██▎       | 601/2548 [01:48<05:51,  5.55it/s]

  iter 600/2548 — loss: 0.6192


Baseline Train:  28%|██▊       | 701/2548 [02:07<05:50,  5.27it/s]

  iter 700/2548 — loss: 0.6304


Baseline Train:  31%|███▏      | 801/2548 [02:25<05:12,  5.60it/s]

  iter 800/2548 — loss: 0.6504


Baseline Train:  35%|███▌      | 901/2548 [02:42<04:59,  5.51it/s]

  iter 900/2548 — loss: 0.5152


Baseline Train:  39%|███▉      | 1001/2548 [03:00<05:00,  5.14it/s]

  iter 1000/2548 — loss: 0.6135


Baseline Train:  43%|████▎     | 1101/2548 [03:19<04:10,  5.78it/s]

  iter 1100/2548 — loss: 0.4428


Baseline Train:  47%|████▋     | 1201/2548 [03:37<04:05,  5.50it/s]

  iter 1200/2548 — loss: 0.5627


Baseline Train:  51%|█████     | 1301/2548 [03:55<03:41,  5.63it/s]

  iter 1300/2548 — loss: 0.5354


Baseline Train:  55%|█████▍    | 1401/2548 [04:13<03:25,  5.58it/s]

  iter 1400/2548 — loss: 0.5459


Baseline Train:  59%|█████▉    | 1501/2548 [04:31<03:15,  5.34it/s]

  iter 1500/2548 — loss: 0.6093


Baseline Train:  63%|██████▎   | 1601/2548 [04:50<02:56,  5.35it/s]

  iter 1600/2548 — loss: 0.4644


Baseline Train:  67%|██████▋   | 1701/2548 [05:08<02:23,  5.90it/s]

  iter 1700/2548 — loss: 0.6246


Baseline Train:  71%|███████   | 1801/2548 [05:26<02:15,  5.52it/s]

  iter 1800/2548 — loss: 0.5154


Baseline Train:  75%|███████▍  | 1901/2548 [05:44<02:00,  5.36it/s]

  iter 1900/2548 — loss: 0.5513


Baseline Train:  79%|███████▊  | 2001/2548 [06:02<01:32,  5.89it/s]

  iter 2000/2548 — loss: 0.4606


Baseline Train:  82%|████████▏ | 2101/2548 [06:20<01:19,  5.62it/s]

  iter 2100/2548 — loss: 0.6533


Baseline Train:  86%|████████▋ | 2201/2548 [06:38<01:01,  5.61it/s]

  iter 2200/2548 — loss: 0.6439


Baseline Train:  90%|█████████ | 2301/2548 [06:56<00:47,  5.23it/s]

  iter 2300/2548 — loss: 0.6530


Baseline Train:  94%|█████████▍| 2401/2548 [07:14<00:27,  5.35it/s]

  iter 2400/2548 — loss: 0.5122


Baseline Train:  98%|█████████▊| 2501/2548 [07:32<00:07,  5.96it/s]

  iter 2500/2548 — loss: 0.4436


Baseline Train: 100%|██████████| 2548/2548 [07:41<00:00,  5.52it/s]
Baseline Val Loss: 100%|██████████| 637/637 [00:44<00:00, 14.45it/s]


lr=5e-04, wd=1e-04 → val_loss=0.5535


Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.
Baseline Train:   4%|▍         | 101/2548 [00:18<07:05,  5.76it/s]

  iter 100/2548 — loss: 1.4430


Baseline Train:   8%|▊         | 201/2548 [00:36<06:41,  5.85it/s]

  iter 200/2548 — loss: 1.1982


Baseline Train:  12%|█▏        | 301/2548 [00:53<06:42,  5.58it/s]

  iter 300/2548 — loss: 1.2519


Baseline Train:  16%|█▌        | 401/2548 [01:11<06:05,  5.88it/s]

  iter 400/2548 — loss: 0.9030


Baseline Train:  20%|█▉        | 501/2548 [01:29<06:03,  5.63it/s]

  iter 500/2548 — loss: 0.4763


Baseline Train:  24%|██▎       | 601/2548 [01:47<05:50,  5.56it/s]

  iter 600/2548 — loss: 0.5989


Baseline Train:  28%|██▊       | 701/2548 [02:05<05:59,  5.13it/s]

  iter 700/2548 — loss: 0.6743


Baseline Train:  31%|███▏      | 801/2548 [02:23<05:19,  5.46it/s]

  iter 800/2548 — loss: 0.5138


Baseline Train:  35%|███▌      | 901/2548 [02:42<05:06,  5.38it/s]

  iter 900/2548 — loss: 0.6087


Baseline Train:  39%|███▉      | 1001/2548 [03:00<04:46,  5.40it/s]

  iter 1000/2548 — loss: 0.6015


Baseline Train:  43%|████▎     | 1101/2548 [03:18<04:26,  5.44it/s]

  iter 1100/2548 — loss: 0.7834


Baseline Train:  47%|████▋     | 1201/2548 [03:36<03:56,  5.70it/s]

  iter 1200/2548 — loss: 0.5388


Baseline Train:  51%|█████     | 1301/2548 [03:54<03:33,  5.84it/s]

  iter 1300/2548 — loss: 0.5109


Baseline Train:  55%|█████▍    | 1401/2548 [04:13<03:37,  5.28it/s]

  iter 1400/2548 — loss: 0.6543


Baseline Train:  59%|█████▉    | 1501/2548 [04:31<03:06,  5.62it/s]

  iter 1500/2548 — loss: 0.7091


Baseline Train:  63%|██████▎   | 1601/2548 [04:49<03:03,  5.15it/s]

  iter 1600/2548 — loss: 0.5167


Baseline Train:  67%|██████▋   | 1701/2548 [05:07<02:31,  5.58it/s]

  iter 1700/2548 — loss: 0.6616


Baseline Train:  71%|███████   | 1801/2548 [05:25<02:15,  5.51it/s]

  iter 1800/2548 — loss: 0.6938


Baseline Train:  75%|███████▍  | 1901/2548 [05:43<01:52,  5.75it/s]

  iter 1900/2548 — loss: 0.7326


Baseline Train:  79%|███████▊  | 2001/2548 [06:01<01:38,  5.57it/s]

  iter 2000/2548 — loss: 0.8027


Baseline Train:  82%|████████▏ | 2101/2548 [06:19<01:19,  5.62it/s]

  iter 2100/2548 — loss: 0.4411


Baseline Train:  86%|████████▋ | 2201/2548 [06:37<00:59,  5.86it/s]

  iter 2200/2548 — loss: 0.5165


Baseline Train:  90%|█████████ | 2301/2548 [06:55<00:42,  5.75it/s]

  iter 2300/2548 — loss: 0.4667


Baseline Train:  94%|█████████▍| 2401/2548 [07:13<00:26,  5.45it/s]

  iter 2400/2548 — loss: 0.4851


Baseline Train:  98%|█████████▊| 2501/2548 [07:31<00:08,  5.67it/s]

  iter 2500/2548 — loss: 0.6609


Baseline Train: 100%|██████████| 2548/2548 [07:40<00:00,  5.54it/s]
Baseline Val Loss: 100%|██████████| 637/637 [00:44<00:00, 14.43it/s]


lr=5e-04, wd=1e-05 → val_loss=0.5456


Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.
Baseline Train:   4%|▍         | 101/2548 [00:19<07:16,  5.61it/s]

  iter 100/2548 — loss: 1.0473


Baseline Train:   8%|▊         | 201/2548 [00:37<06:56,  5.63it/s]

  iter 200/2548 — loss: 1.1575


Baseline Train:  12%|█▏        | 301/2548 [00:55<06:21,  5.89it/s]

  iter 300/2548 — loss: 1.0463


Baseline Train:  16%|█▌        | 401/2548 [01:12<06:12,  5.76it/s]

  iter 400/2548 — loss: 1.0980


Baseline Train:  20%|█▉        | 501/2548 [01:30<05:30,  6.20it/s]

  iter 500/2548 — loss: 0.9422


Baseline Train:  24%|██▎       | 601/2548 [01:47<05:41,  5.69it/s]

  iter 600/2548 — loss: 1.0952


Baseline Train:  28%|██▊       | 701/2548 [02:05<05:17,  5.81it/s]

  iter 700/2548 — loss: 0.7765


Baseline Train:  31%|███▏      | 801/2548 [02:22<04:51,  5.99it/s]

  iter 800/2548 — loss: 0.6784


Baseline Train:  35%|███▌      | 901/2548 [02:40<04:43,  5.81it/s]

  iter 900/2548 — loss: 0.8913


Baseline Train:  39%|███▉      | 1001/2548 [02:57<04:30,  5.73it/s]

  iter 1000/2548 — loss: 1.0259


Baseline Train:  43%|████▎     | 1101/2548 [03:14<04:01,  6.00it/s]

  iter 1100/2548 — loss: 0.7429


Baseline Train:  47%|████▋     | 1201/2548 [03:32<03:55,  5.73it/s]

  iter 1200/2548 — loss: 0.7866


Baseline Train:  51%|█████     | 1301/2548 [03:50<03:28,  5.97it/s]

  iter 1300/2548 — loss: 0.9048


Baseline Train:  55%|█████▍    | 1401/2548 [04:07<03:15,  5.87it/s]

  iter 1400/2548 — loss: 1.0313


Baseline Train:  59%|█████▉    | 1501/2548 [04:24<02:56,  5.93it/s]

  iter 1500/2548 — loss: 1.2455


Baseline Train:  63%|██████▎   | 1601/2548 [04:41<02:47,  5.65it/s]

  iter 1600/2548 — loss: 1.1888


Baseline Train:  67%|██████▋   | 1701/2548 [04:59<02:31,  5.58it/s]

  iter 1700/2548 — loss: 0.4656


Baseline Train:  71%|███████   | 1801/2548 [05:17<02:08,  5.80it/s]

  iter 1800/2548 — loss: 1.2285


Baseline Train:  75%|███████▍  | 1901/2548 [05:34<01:58,  5.47it/s]

  iter 1900/2548 — loss: 1.0141


Baseline Train:  79%|███████▊  | 2001/2548 [05:52<01:35,  5.74it/s]

  iter 2000/2548 — loss: 0.6315


Baseline Train:  82%|████████▏ | 2101/2548 [06:10<01:20,  5.57it/s]

  iter 2100/2548 — loss: 0.8674


Baseline Train:  86%|████████▋ | 2201/2548 [06:27<00:58,  5.94it/s]

  iter 2200/2548 — loss: 0.8633


Baseline Train:  90%|█████████ | 2301/2548 [06:45<00:41,  5.93it/s]

  iter 2300/2548 — loss: 0.5049


Baseline Train:  94%|█████████▍| 2401/2548 [07:03<00:25,  5.86it/s]

  iter 2400/2548 — loss: 0.4999


Baseline Train:  98%|█████████▊| 2501/2548 [07:21<00:08,  5.53it/s]

  iter 2500/2548 — loss: 0.5968


Baseline Train: 100%|██████████| 2548/2548 [07:29<00:00,  5.66it/s]
Baseline Val Loss: 100%|██████████| 637/637 [00:43<00:00, 14.67it/s]


lr=1e-04, wd=1e-04 → val_loss=0.6840


Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.
Baseline Train:   4%|▍         | 101/2548 [00:19<07:25,  5.49it/s]

  iter 100/2548 — loss: 1.1664


Baseline Train:   8%|▊         | 201/2548 [00:37<06:50,  5.71it/s]

  iter 200/2548 — loss: 1.0305


Baseline Train:  12%|█▏        | 301/2548 [00:56<06:29,  5.77it/s]

  iter 300/2548 — loss: 1.0341


Baseline Train:  16%|█▌        | 401/2548 [01:14<06:12,  5.76it/s]

  iter 400/2548 — loss: 0.8606


Baseline Train:  20%|█▉        | 501/2548 [01:32<05:54,  5.77it/s]

  iter 500/2548 — loss: 0.7314


Baseline Train:  24%|██▎       | 601/2548 [01:50<05:24,  6.00it/s]

  iter 600/2548 — loss: 0.7016


Baseline Train:  28%|██▊       | 701/2548 [02:07<05:09,  5.97it/s]

  iter 700/2548 — loss: 0.8954


Baseline Train:  31%|███▏      | 801/2548 [02:25<05:07,  5.69it/s]

  iter 800/2548 — loss: 0.6582


Baseline Train:  35%|███▌      | 901/2548 [02:42<04:57,  5.53it/s]

  iter 900/2548 — loss: 0.6309


Baseline Train:  39%|███▉      | 1001/2548 [02:59<04:31,  5.70it/s]

  iter 1000/2548 — loss: 1.0161


Baseline Train:  43%|████▎     | 1101/2548 [03:17<04:25,  5.45it/s]

  iter 1100/2548 — loss: 1.0293


Baseline Train:  47%|████▋     | 1201/2548 [03:35<03:52,  5.78it/s]

  iter 1200/2548 — loss: 0.5333


Baseline Train:  51%|█████     | 1301/2548 [03:52<03:36,  5.77it/s]

  iter 1300/2548 — loss: 1.0948


Baseline Train:  55%|█████▍    | 1401/2548 [04:10<03:11,  5.98it/s]

  iter 1400/2548 — loss: 0.5773


Baseline Train:  59%|█████▉    | 1501/2548 [04:27<02:51,  6.11it/s]

  iter 1500/2548 — loss: 0.6319


Baseline Train:  63%|██████▎   | 1601/2548 [04:45<03:04,  5.13it/s]

  iter 1600/2548 — loss: 1.1800


Baseline Train:  67%|██████▋   | 1701/2548 [05:02<02:23,  5.89it/s]

  iter 1700/2548 — loss: 0.7002


Baseline Train:  71%|███████   | 1801/2548 [05:20<02:09,  5.75it/s]

  iter 1800/2548 — loss: 0.8603


Baseline Train:  75%|███████▍  | 1901/2548 [05:38<01:54,  5.63it/s]

  iter 1900/2548 — loss: 0.8693


Baseline Train:  79%|███████▊  | 2001/2548 [05:56<01:33,  5.84it/s]

  iter 2000/2548 — loss: 0.5822


Baseline Train:  82%|████████▏ | 2101/2548 [06:13<01:17,  5.77it/s]

  iter 2100/2548 — loss: 0.5343


Baseline Train:  86%|████████▋ | 2201/2548 [06:31<01:00,  5.77it/s]

  iter 2200/2548 — loss: 0.8546


Baseline Train:  90%|█████████ | 2301/2548 [06:49<00:45,  5.42it/s]

  iter 2300/2548 — loss: 1.1404


Baseline Train:  94%|█████████▍| 2401/2548 [07:07<00:25,  5.67it/s]

  iter 2400/2548 — loss: 0.5418


Baseline Train:  98%|█████████▊| 2501/2548 [07:25<00:08,  5.59it/s]

  iter 2500/2548 — loss: 0.6119


Baseline Train: 100%|██████████| 2548/2548 [07:33<00:00,  5.62it/s]
Baseline Val Loss: 100%|██████████| 637/637 [00:43<00:00, 14.62it/s]

lr=1e-04, wd=1e-05 → val_loss=0.6672
       lr  weight_decay  val_loss
0  0.0010       0.00010  0.479602
1  0.0010       0.00001  0.492483
2  0.0005       0.00001  0.545558
3  0.0005       0.00010  0.553480
4  0.0001       0.00001  0.667174





In [28]:
from timm import create_model
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.ops import MultiScaleRoIAlign
from torch.optim import SGD
from collections import OrderedDict
import torch.nn as nn

class EfficientNetBackbone(nn.Module):
    def __init__(self, raw):
        super().__init__()
        self.body = raw
        self.out_channels = raw.feature_info.info[-1]['num_chs']
    def forward(self, x):
        feats = self.body(x)
        return OrderedDict([("0", feats[-1])])

raw = create_model('efficientnet_b0', pretrained=True, features_only=True)
backbone = EfficientNetBackbone(raw)
anchor_gen = AnchorGenerator(sizes=((32,64,128,256,512),), aspect_ratios=((0.5,1.0,2.0),))
roi_pooler = MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2)

model = FasterRCNN(
    backbone, 
    num_classes=NUM_CLASSES,
    rpn_anchor_generator=anchor_gen,
    box_roi_pool=roi_pooler
).to(device)

optimizer = SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)

num_epochs_final = 2
for epoch in range(1, num_epochs_final + 1):
    print(f"\n— Final Train Epoch {epoch}/{num_epochs_final} —")
    train_one_epoch(model, optimizer, train_loader, device)
    val_loss = evaluate_loss(model, val_loader, device)
    print(f"Validation Loss: {val_loss:.4f}")


Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.



— Final Train Epoch 1/2 —


Baseline Train:   4%|▍         | 101/2548 [00:18<07:04,  5.77it/s]

  iter 100/2548 — loss: 0.7562


Baseline Train:   8%|▊         | 201/2548 [00:36<06:47,  5.76it/s]

  iter 200/2548 — loss: 0.8222


Baseline Train:  12%|█▏        | 301/2548 [00:53<06:34,  5.70it/s]

  iter 300/2548 — loss: 0.7716


Baseline Train:  16%|█▌        | 401/2548 [01:11<06:55,  5.16it/s]

  iter 400/2548 — loss: 0.5998


Baseline Train:  20%|█▉        | 501/2548 [01:30<05:56,  5.74it/s]

  iter 500/2548 — loss: 0.8789


Baseline Train:  24%|██▎       | 601/2548 [01:48<05:39,  5.74it/s]

  iter 600/2548 — loss: 0.6460


Baseline Train:  28%|██▊       | 701/2548 [02:06<05:26,  5.65it/s]

  iter 700/2548 — loss: 0.6169


Baseline Train:  31%|███▏      | 801/2548 [02:24<05:10,  5.63it/s]

  iter 800/2548 — loss: 0.6961


Baseline Train:  35%|███▌      | 901/2548 [02:42<05:16,  5.20it/s]

  iter 900/2548 — loss: 0.4517


Baseline Train:  39%|███▉      | 1001/2548 [03:00<04:51,  5.30it/s]

  iter 1000/2548 — loss: 0.6362


Baseline Train:  43%|████▎     | 1101/2548 [03:18<04:28,  5.39it/s]

  iter 1100/2548 — loss: 0.6463


Baseline Train:  47%|████▋     | 1201/2548 [03:36<04:05,  5.50it/s]

  iter 1200/2548 — loss: 0.4751


Baseline Train:  51%|█████     | 1301/2548 [03:55<03:46,  5.50it/s]

  iter 1300/2548 — loss: 0.4333


Baseline Train:  55%|█████▍    | 1401/2548 [04:13<03:10,  6.03it/s]

  iter 1400/2548 — loss: 0.4696


Baseline Train:  59%|█████▉    | 1501/2548 [04:31<03:04,  5.68it/s]

  iter 1500/2548 — loss: 0.6184


Baseline Train:  63%|██████▎   | 1601/2548 [04:49<02:58,  5.30it/s]

  iter 1600/2548 — loss: 0.5993


Baseline Train:  67%|██████▋   | 1701/2548 [05:07<02:32,  5.57it/s]

  iter 1700/2548 — loss: 0.4890


Baseline Train:  71%|███████   | 1801/2548 [05:25<02:21,  5.29it/s]

  iter 1800/2548 — loss: 0.6131


Baseline Train:  75%|███████▍  | 1901/2548 [05:43<02:00,  5.37it/s]

  iter 1900/2548 — loss: 0.4387


Baseline Train:  79%|███████▊  | 2001/2548 [06:02<01:38,  5.54it/s]

  iter 2000/2548 — loss: 0.4491


Baseline Train:  82%|████████▏ | 2101/2548 [06:20<01:21,  5.50it/s]

  iter 2100/2548 — loss: 0.5266


Baseline Train:  86%|████████▋ | 2201/2548 [06:38<01:08,  5.07it/s]

  iter 2200/2548 — loss: 0.5762


Baseline Train:  90%|█████████ | 2301/2548 [06:57<00:43,  5.68it/s]

  iter 2300/2548 — loss: 0.5151


Baseline Train:  94%|█████████▍| 2401/2548 [07:16<00:26,  5.58it/s]

  iter 2400/2548 — loss: 0.5083


Baseline Train:  98%|█████████▊| 2501/2548 [07:34<00:08,  5.40it/s]

  iter 2500/2548 — loss: 0.4614


Baseline Train: 100%|██████████| 2548/2548 [07:43<00:00,  5.50it/s]
Baseline Val Loss: 100%|██████████| 637/637 [00:44<00:00, 14.31it/s]


Validation Loss: 0.4719

— Final Train Epoch 2/2 —


Baseline Train:   4%|▍         | 101/2548 [00:19<07:37,  5.35it/s]

  iter 100/2548 — loss: 0.4356


Baseline Train:   8%|▊         | 201/2548 [00:37<07:23,  5.30it/s]

  iter 200/2548 — loss: 0.4905


Baseline Train:  12%|█▏        | 301/2548 [00:55<06:25,  5.83it/s]

  iter 300/2548 — loss: 0.4518


Baseline Train:  16%|█▌        | 401/2548 [01:14<06:38,  5.39it/s]

  iter 400/2548 — loss: 0.3723


Baseline Train:  20%|█▉        | 501/2548 [01:32<06:38,  5.13it/s]

  iter 500/2548 — loss: 0.5447


Baseline Train:  24%|██▎       | 601/2548 [01:51<06:11,  5.25it/s]

  iter 600/2548 — loss: 0.5875


Baseline Train:  28%|██▊       | 701/2548 [02:09<05:26,  5.66it/s]

  iter 700/2548 — loss: 0.4388


Baseline Train:  31%|███▏      | 801/2548 [02:27<05:02,  5.77it/s]

  iter 800/2548 — loss: 0.5237


Baseline Train:  35%|███▌      | 901/2548 [02:46<04:54,  5.59it/s]

  iter 900/2548 — loss: 0.6204


Baseline Train:  39%|███▉      | 1001/2548 [03:04<05:05,  5.07it/s]

  iter 1000/2548 — loss: 0.4213


Baseline Train:  43%|████▎     | 1101/2548 [03:23<04:32,  5.31it/s]

  iter 1100/2548 — loss: 0.3968


Baseline Train:  47%|████▋     | 1201/2548 [03:41<04:12,  5.34it/s]

  iter 1200/2548 — loss: 0.3570


Baseline Train:  51%|█████     | 1301/2548 [03:59<03:55,  5.30it/s]

  iter 1300/2548 — loss: 0.4118


Baseline Train:  55%|█████▍    | 1401/2548 [04:18<03:43,  5.14it/s]

  iter 1400/2548 — loss: 0.4207


Baseline Train:  59%|█████▉    | 1501/2548 [04:37<03:15,  5.36it/s]

  iter 1500/2548 — loss: 0.3160


Baseline Train:  63%|██████▎   | 1601/2548 [04:55<02:57,  5.33it/s]

  iter 1600/2548 — loss: 0.5669


Baseline Train:  67%|██████▋   | 1701/2548 [05:14<02:24,  5.85it/s]

  iter 1700/2548 — loss: 0.3438


Baseline Train:  71%|███████   | 1801/2548 [05:32<02:10,  5.71it/s]

  iter 1800/2548 — loss: 0.4083


Baseline Train:  75%|███████▍  | 1901/2548 [05:50<02:05,  5.18it/s]

  iter 1900/2548 — loss: 0.5748


Baseline Train:  79%|███████▊  | 2001/2548 [06:09<01:40,  5.46it/s]

  iter 2000/2548 — loss: 0.3065


Baseline Train:  82%|████████▏ | 2101/2548 [06:27<01:21,  5.49it/s]

  iter 2100/2548 — loss: 0.3593


Baseline Train:  86%|████████▋ | 2201/2548 [06:46<01:05,  5.32it/s]

  iter 2200/2548 — loss: 0.3397


Baseline Train:  90%|█████████ | 2301/2548 [07:05<00:43,  5.64it/s]

  iter 2300/2548 — loss: 0.2677


Baseline Train:  94%|█████████▍| 2401/2548 [07:23<00:28,  5.19it/s]

  iter 2400/2548 — loss: 0.5234


Baseline Train:  98%|█████████▊| 2501/2548 [07:42<00:08,  5.80it/s]

  iter 2500/2548 — loss: 0.3807


Baseline Train: 100%|██████████| 2548/2548 [07:50<00:00,  5.41it/s]
Baseline Val Loss: 100%|██████████| 637/637 [00:44<00:00, 14.20it/s]

Validation Loss: 0.3860





In [29]:
from torchvision.ops import box_iou

model.eval()
ious = []

with torch.no_grad():
    for images, targets in val_loader:
        images = [img.to(device) for img in images]
        outputs = model(images) 
        for out, tgt in zip(outputs, targets):
            gt_boxes = tgt['boxes'].to(device)
            pred_boxes = out['boxes']

            if len(pred_boxes) == 0 or len(gt_boxes) == 0:
                continue

            iou_mat = box_iou(pred_boxes, gt_boxes)
            best_iou_per_pred, _ = iou_mat.max(dim=1)
            ious += best_iou_per_pred.cpu().tolist()

mean_iou = sum(ious) / len(ious)
print(f"Mean IoU over all predictions: {mean_iou:.3f}")

Mean IoU over all predictions: 0.559


In [30]:
model = baseline_model
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu