This file contains single frame and sequence data. It is using the data found under detection2_clean/yolo_split2. The augmentations for this file mimic those found in polyp_alb352_cos.yaml. (Note: There is no cosine learning rate here.)

In [1]:
%%capture
!pip install torch torchvision albumentations pycocotools opencv-python ultralytics

In [2]:
import torch, ultralytics
#!/usr/bin/env python3
import os, sys, json, random, shutil
from pathlib import Path
import re
import cv2
import yaml
from skimage.measure import label, regionprops
from ultralytics import YOLO

from typing import Tuple, Dict, List
from PIL import Image
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from PIL import Image, ImageDraw

import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations.core.serialization import from_dict

from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.ops import box_iou, nms

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
# mount google drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
ROOT = Path("/content/drive/MyDrive/ErdosFall25/")

In [5]:
# Fix the seed
def set_seed(s=42):
    random.seed(s); np.random.seed(s); torch.manual_seed(s); torch.cuda.manual_seed_all(s)
set_seed(42)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Convert YOLO Labels to COCO JSON files. The files are already created and the code is commented out.

In [6]:
from pathlib import Path
import json
from PIL import Image

def yolo_to_coco(images_dir, labels_dir, out_json, class_name="polyp"):
    """
    Convert YOLO .txt (class cx cy w h normalized) -> COCO JSON (for detection).
    """
    images, annotations = [], []
    categories = [{"id": 1, "name": class_name}]
    ann_id, img_id = 1, 1

    images_dir, labels_dir = Path(images_dir), Path(labels_dir)
    img_exts = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"}

    for img_path in sorted(images_dir.rglob("*")):
        if img_path.suffix.lower() not in img_exts:
            continue
        W, H = Image.open(img_path).size
        images.append({
            "id": img_id,
            "file_name": str(img_path.resolve()),
            "width": W,
            "height": H
        })

        label_file = labels_dir / f"{img_path.stem}.txt"
        if label_file.exists():
            with open(label_file) as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) != 5:
                        continue
                    cls, xc, yc, w, h = map(float, parts)
                    x = (xc - w/2) * W
                    y = (yc - h/2) * H
                    w *= W
                    h *= H
                    annotations.append({
                        "id": ann_id,
                        "image_id": img_id,
                        "category_id": 1,
                        "bbox": [x, y, w, h],
                        "area": float(w * h),
                        "iscrowd": 0
                    })
                    ann_id += 1
        img_id += 1

    coco = {"images": images, "annotations": annotations, "categories": categories}
    out_json = Path(out_json)
    out_json.parent.mkdir(parents=True, exist_ok=True)
    with open(out_json, "w") as f:
        json.dump(coco, f)
    print(f"Wrote {out_json} ({len(images)} images, {len(annotations)} boxes).")


In [7]:
# # train json
yolo_to_coco(ROOT/"data/detection2/yolo_split2/train/images", ROOT/"data/detection2/yolo_split2/train/labels", ROOT/"data/detection2/yolo_split2/train/annotations.json")


KeyboardInterrupt: 

In [8]:
# # valid json
yolo_to_coco(ROOT/"data/detection2/yolo_split2/val/images", ROOT/"data/detection2/yolo_split2/val/labels", ROOT/"data/detection2/yolo_split2/val/annotations.json")

Wrote /content/drive/MyDrive/ErdosFall25/data/detection2/yolo_split2/val/annotations.json (1666 images, 1446 boxes).


Load in JSON Files Annotations + Betul's Aug YAML File + Visualizer for Sanity Check

In [None]:
ANNOTATIONS_ROOT  = Path("/content/drive/MyDrive/ErdosFall25/data/detection2_clean/yolo_split2")
TRAIN_JSON        = ANNOTATIONS_ROOT / "train" / "annotations.json"
VAL_JSON          = ANNOTATIONS_ROOT / "val"   / "annotations.json"

In [None]:
# function to load in yaml file with option to normalize to tensor
# that is mean 0 and std 1 with max pixel value of 255.0

def load_albu_yaml(yaml_path, section="albumentations",
                   add_normalize_to_tensor=True,
                   use_masks=False,               # set True only if you'll pass masks to tfm(...)
                   min_visibility=0.001):          # drop near-vanishing boxes on train

    with open(yaml_path, "r") as f:
        cfg = yaml.safe_load(f)
    if section not in cfg:
        raise KeyError(f"YAML has keys {list(cfg.keys())}, but not '{section}'")

    compose_dict = cfg[section]

    # --- tiny compatibility tweaks on the raw dict (no re-writes to your file) ---
    def _fix_inplace(node):
        if isinstance(node, dict):
            cf = node.get("__class_fullname__", "")
            name = cf.split(".")[-1] if cf else ""

            # 1) ShiftScaleRotate => Affine (albu now prefers Affine)
            if name == "ShiftScaleRotate":
                # Replace class
                node["__class_fullname__"] = "albumentations.augmentations.geometric.transforms.Affine"
                # Map limits to Affine ranges
                shift = float(node.pop("shift_limit", 0.0))
                scale = float(node.pop("scale_limit", 0.0))
                rot   = float(node.pop("rotate_limit", 0.0))
                node["translate_percent"] = (-abs(shift), abs(shift))
                node["scale"]             = (1-abs(scale), 1+abs(scale))
                node["rotate"]            = (-abs(rot), abs(rot))
                # Keep border/interp if present
                # Albumentations uses 'mode' for border in Affine; reuse existing if given
                if "border_mode" in node:
                    node["mode"] = node.pop("border_mode")

            # 2) GaussNoise: some versions reject 'mean'
            if name == "GaussNoise":
                if "var_limit" in node and isinstance(node["var_limit"], list):
                    node["var_limit"] = tuple(node["var_limit"])
                # Drop mean if your installed albu doesn't accept it
                node.pop("mean", None)

            # 3) MaskDropout: if you are NOT passing masks, swap to CoarseDropout
            if name == "MaskDropout" and not use_masks:
                node["__class_fullname__"] = "albumentations.augmentations.dropout.coarse_dropout.CoarseDropout"
                # Map a couple of sensible defaults
                node["max_holes"]  = node.get("max_objects", 1)
                node["min_holes"]  = 1
                node["max_height"] = node.get("max_height", 64)
                node["max_width"]  = node.get("max_width", 64)
                node["min_height"] = node.get("min_height", 32)
                node["min_width"]  = node.get("min_width", 32)
                # CoarseDropout uses 'fill_value' only; keep if present

            # Recurse to children
            for k, v in list(node.items()):
                _fix_inplace(v)

        elif isinstance(node, list):
            for v in node:
                _fix_inplace(v)

    _fix_inplace(compose_dict)

    # Build the Compose from dict (now version-compatible)
    base = from_dict(compose_dict)  # -> A.Compose

    # Add Normalize + ToTensorV2 at the end if you want
    if add_normalize_to_tensor:
        tail = [
            A.Normalize(mean=(0,0,0), std=(1,1,1), max_pixel_value=255.0),
            A.pytorch.ToTensorV2(),
        ]
        base = A.Compose(base.transforms + tail, p=base.p)

    # Wrap with bbox params so boxes are transformed with the image
    wrapped = A.Compose(
        base.transforms,
        p=base.p,
        bbox_params=A.BboxParams(
            format="pascal_voc",
            label_fields=["labels"],
            min_visibility=min_visibility
        ),
        strict=False
    )
    return wrapped


In [None]:
# function to read in annotation file and make a dictionary containing
# image information (id, etc.)
def load_coco_state(ann_json, images_root=None):
  with open(ann_json, "r") as f:
    coco = json.load(f)

  images = coco['images']
  anns = coco['annotations']
  cats = coco['categories']

  anns_by_img = {}
  for a in anns:
    anns_by_img.setdefault(a['image_id'], []).append(a)

  cat_id_to_idx = {c['id']: i+1 for i,c in enumerate(cats)}
  idx_to_cat_id = {i + 1: c['id'] for i,c in enumerate(cats)}
  num_classes = len(cats) + 1

  state = {
      "images": images,
      "anns_by_img": anns_by_img,
      "cat_id_to_idx": cat_id_to_idx,
      "idx_to_cat_id" : idx_to_cat_id,
      "num_classes" : num_classes,
      "images_root": Path(images_root) if images_root else None
  }

  return state

In [None]:
def _compose_with_bboxes(compose: A.Compose):
  return A.Compose(
      compose.transforms,
      p=compose.p,
      bbox_params = A.BboxParams(
          format='pascal_voc',
          label_fields = ['labels'],
          min_visibility=0.001
      ),
      strict = False
  )

In [None]:
def get_example(i, state, tfm: A.Compose):

  imrec = state['images'][i]
  p = Path(imrec['file_name'])

  if state['images_root'] and not p.is_absolute():
    p = state['images_root'] / p

  img = cv2.imread(str(p))
  if img is None:
    raise FileNotFoundError(str(p))

  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

  H, W = img.shape[:2]

  boxes, labels = [], []
  for a in state['anns_by_img'].get(imrec['id'],[]):
    x, y, w, h = a['bbox']
    boxes.append([x, y , x + w , y + h])
    labels.append(state['cat_id_to_idx'][a['category_id']])

  boxes  = np.asarray(boxes,  dtype=np.float32) if boxes else np.zeros((0,4), np.float32)
  labels = np.asarray(labels, dtype=np.int64)   if labels else np.zeros((0,),  np.int64)

  if len(boxes) > 0:
    boxes[:, [0,2]] = np.clip(boxes[:, [0,2]], 0, W)
    boxes[:, [1,3]] = np.clip(boxes[:, [1,3]], 0, H)
    eps = 1e-6
    keep = (boxes[:,2]-boxes[:,0] > eps) & (boxes[:,3]-boxes[:,1] > eps)
    boxes, labels = boxes[keep], labels[keep]

  kwargs = dict(image=img, bboxes=boxes.tolist(), labels=labels.tolist())

  out = tfm(**kwargs)

  bxs, lbs = out["bboxes"], out["labels"]
  boxes  = np.asarray(bxs, dtype=np.float32) if bxs else np.zeros((0,4), np.float32)
  labels = np.asarray(lbs, dtype=np.int64)   if lbs else np.zeros((0,),  np.int64)

  target = {
      "boxes":    torch.as_tensor(boxes,  dtype=torch.float32),
      "labels":   torch.as_tensor(labels, dtype=torch.int64),
      "image_id": torch.tensor([imrec["id"]], dtype=torch.int64),
  }

  return out["image"], target

Begin Transfer Learning Setup

In [None]:
def make_collate_fn(state, tfm):

  tfm_with_boxes = _compose_with_bboxes(tfm)

  def collate_fn(batch_indices):
    imgs, targs = [], []
    for idx in batch_indices:
      img, targ = get_example(idx, state, tfm_with_boxes)
      imgs.append(img)
      targs.append(targ)
    return imgs, targs

  return collate_fn


In [None]:
def make_dataloader(state, tfm, batch_size = 1, shuffle = True, num_workers = 2, worker_seed = 42):
  N = len(state['images'])
  indices = list(range(N))

  generator = None
  if shuffle:
    g = torch.Generator()
    g.manual_seed(worker_seed)
    generator = g

  def _worker_init_fn(worker_id):
    import random
    base = worker_seed + worker_id * 9973
    np.random.seed(base % (2**32 - 1))
    random.seed(base)

  return DataLoader(
      indices,
      batch_size=batch_size,
      shuffle=shuffle,
      num_workers=num_workers,
      worker_init_fn=_worker_init_fn if num_workers > 0 else None,
      generator=generator,
      collate_fn=make_collate_fn(state, tfm)
  )

In [None]:
# load in coco states

train_state = load_coco_state(
    ann_json = TRAIN_JSON,
    images_root = ANNOTATIONS_ROOT / "train" / "images"
)

val_state   = load_coco_state(
    ann_json = VAL_JSON,
    images_root = ANNOTATIONS_ROOT / "val"   / "images"
)

In [None]:
aug_yaml_path = "/content/drive/.shortcut-targets-by-id/1sNBAVjJw_53OufvKWiqpnmt9aFuzBIrw/configs/polyp_alb352_cos.yaml"
train_tfm = load_albu_yaml(aug_yaml_path, add_normalize_to_tensor=True)

  return cls(**args)
  return cls(**args)
  return cls(**args)


In [None]:
train_tfm = A.Compose(
    [
        A.PadIfNeeded(min_height=352, min_width=352,
                      border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0)
    ] + train_tfm.transforms,
    p=train_tfm.p,
    bbox_params=A.BboxParams(format="pascal_voc",
                             label_fields=["labels"],
                             min_visibility=0.001),
    strict=False
)

  A.PadIfNeeded(min_height=352, min_width=352,


In [None]:
val_tfm = A.Compose([
    A.LongestMaxSize(max_size=352, p=1.0),
    A.PadIfNeeded(min_height=352, min_width=352,
                  border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0),
    A.Normalize(mean=(0,0,0), std=(1,1,1), max_pixel_value=255.0),
    A.pytorch.ToTensorV2(),
], bbox_params=A.BboxParams(format="pascal_voc", label_fields=["labels"], min_visibility=0.0))

  A.PadIfNeeded(min_height=352, min_width=352,


In [None]:
train_loader = make_dataloader(
    state=train_state,
    tfm=train_tfm,
    batch_size=1,
    shuffle=True,
    num_workers=2
)

val_loader = make_dataloader(
    state=val_state,
    tfm=val_tfm,
    batch_size=1,
    shuffle=True, # do we want shuffle on val?
    num_workers=2
)

In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# a) load COCO-pretrained model
model = fasterrcnn_resnet50_fpn(weights="DEFAULT")

# b) replace detection head (num_classes = background + polyp = 2)
in_feats = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_feats, num_classes=2)

# c) (transfer learning) freeze backbone params → only train the head
for p in model.backbone.parameters():
    p.requires_grad = False

trainable_param_names = []
for name, p in model.named_parameters():
    if p.requires_grad:
        trainable_param_names.append(name)

model = model.to(DEVICE)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


100%|██████████| 160M/160M [00:00<00:00, 192MB/s]


In [None]:
# train only trainable params (the head)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.AdamW(params, lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # optional
scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE.type=="cuda"))


  scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE.type=="cuda"))


In [None]:
import math, time
from tqdm.auto import tqdm

def train_one_epoch(model, loader, optimizer, scaler, max_norm=2.0):
    model.train()
    running = 0.0
    for images, targets in tqdm(loader, desc="Train", leave=False):
        images  = [im.to(DEVICE) for im in images]
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=(DEVICE.type == 'cuda')):
            loss_dict = model(images, targets)
            loss = sum(loss_dict.values())

        scaler.scale(loss).backward()
        # gradient clipping (just in case)
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_( [p for p in model.parameters() if p.requires_grad], max_norm )
        scaler.step(optimizer)
        scaler.update()

        running += loss.item()
    return running / max(1, len(loader))



In [None]:
EPOCHS = 5
ART = Path("./artifacts"); ART.mkdir(exist_ok=True, parents=True)

with open(ART/"idx_to_cat_id.json","w") as f: json.dump(train_state["idx_to_cat_id"], f)

best_path = ART/"model_best.pth"
last_path = ART/"model_last.pth"
best_loss = float("inf")

for ep in range(1, EPOCHS+1):
    t0 = time.time()
    tr_loss = train_one_epoch(model, train_loader, optimizer, scaler)
    scheduler.step()
    torch.save(model.state_dict(), last_path)  # checkpoint each epoch
    if tr_loss < best_loss:
        best_loss = tr_loss
        torch.save(model.state_dict(), best_path)
    print(f"Epoch {ep}/{EPOCHS} | train_loss={tr_loss:.4f} | time={time.time()-t0:.1f}s")

print("Saved:", best_path, last_path)

Train:   0%|          | 0/4169 [00:00<?, ?it/s]

Epoch 1/5 | train_loss=0.0730 | time=1395.4s


Train:   0%|          | 0/4169 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7839bc264c20>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7839bc264c20>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

Epoch 2/5 | train_loss=0.0631 | time=324.8s


Train:   0%|          | 0/4169 [00:00<?, ?it/s]

  visibility_ratios = remaining_areas / box_areas


Epoch 3/5 | train_loss=0.0666 | time=314.0s


Train:   0%|          | 0/4169 [00:00<?, ?it/s]

Epoch 4/5 | train_loss=0.0739 | time=311.6s


Train:   0%|          | 0/4169 [00:00<?, ?it/s]

Epoch 5/5 | train_loss=0.0730 | time=309.6s
Saved: artifacts/model_best.pth artifacts/model_last.pth


In [None]:
ART = Path("./artifacts")
with open(ART/"idx_to_cat_id.json") as f: IDX_TO_CAT = {int(k): int(v) for k,v in json.load(f).items()}
WEIGHTS = ART/"model_best.pth"   # or model_last.pth

from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2
model = fasterrcnn_resnet50_fpn_v2(num_classes=val_state["num_classes"]).to(DEVICE)
model.load_state_dict(torch.load(WEIGHTS, map_location=DEVICE))
model.eval()

@torch.no_grad()
def run_inference(model, loader, idx_to_cat_id, score_thr=0.001):
    results = []
    for images, targets in tqdm(loader, desc="Infer", leave=False):
        images = [im.to(DEVICE, non_blocking=True) for im in images]
        outputs = model(images)
        for out, targ in zip(outputs, targets):
            img_id = int(targ["image_id"].item())
            boxes  = out["boxes"].detach().cpu().numpy()
            scores = out["scores"].detach().cpu().numpy()
            labels = out["labels"].detach().cpu().numpy()
            for (x1,y1,x2,y2), s, lb in zip(boxes, scores, labels):
                if s < score_thr:
                    continue
                results.append({
                    "image_id": img_id,
                    "category_id": int(idx_to_cat_id[int(lb)]),
                    "bbox": [float(x1), float(y1), float(x2-x1), float(y2-y1)],
                    "score": float(s),
                })
    return results

# run + mAP
val_results = run_inference(model, val_loader, IDX_TO_CAT, score_thr=0.001)
with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False) as f:
    json.dump(val_results, f); res_json_path = f.name

coco_gt = COCO(str(VAL_JSON))
coco_dt = coco_gt.loadRes(res_json_path)
coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
coco_eval.evaluate(); coco_eval.accumulate(); coco_eval.summarize()
print("Primary mAP@[.50:.95]:", float(coco_eval.stats[0]))

RuntimeError: Error(s) in loading state_dict for FasterRCNN:
	Missing key(s) in state_dict: "backbone.fpn.inner_blocks.0.1.weight", "backbone.fpn.inner_blocks.0.1.bias", "backbone.fpn.inner_blocks.0.1.running_mean", "backbone.fpn.inner_blocks.0.1.running_var", "backbone.fpn.inner_blocks.1.1.weight", "backbone.fpn.inner_blocks.1.1.bias", "backbone.fpn.inner_blocks.1.1.running_mean", "backbone.fpn.inner_blocks.1.1.running_var", "backbone.fpn.inner_blocks.2.1.weight", "backbone.fpn.inner_blocks.2.1.bias", "backbone.fpn.inner_blocks.2.1.running_mean", "backbone.fpn.inner_blocks.2.1.running_var", "backbone.fpn.inner_blocks.3.1.weight", "backbone.fpn.inner_blocks.3.1.bias", "backbone.fpn.inner_blocks.3.1.running_mean", "backbone.fpn.inner_blocks.3.1.running_var", "backbone.fpn.layer_blocks.0.1.weight", "backbone.fpn.layer_blocks.0.1.bias", "backbone.fpn.layer_blocks.0.1.running_mean", "backbone.fpn.layer_blocks.0.1.running_var", "backbone.fpn.layer_blocks.1.1.weight", "backbone.fpn.layer_blocks.1.1.bias", "backbone.fpn.layer_blocks.1.1.running_mean", "backbone.fpn.layer_blocks.1.1.running_var", "backbone.fpn.layer_blocks.2.1.weight", "backbone.fpn.layer_blocks.2.1.bias", "backbone.fpn.layer_blocks.2.1.running_mean", "backbone.fpn.layer_blocks.2.1.running_var", "backbone.fpn.layer_blocks.3.1.weight", "backbone.fpn.layer_blocks.3.1.bias", "backbone.fpn.layer_blocks.3.1.running_mean", "backbone.fpn.layer_blocks.3.1.running_var", "rpn.head.conv.1.0.weight", "rpn.head.conv.1.0.bias", "roi_heads.box_head.0.0.weight", "roi_heads.box_head.0.1.weight", "roi_heads.box_head.0.1.bias", "roi_heads.box_head.0.1.running_mean", "roi_heads.box_head.0.1.running_var", "roi_heads.box_head.1.0.weight", "roi_heads.box_head.1.1.weight", "roi_heads.box_head.1.1.bias", "roi_heads.box_head.1.1.running_mean", "roi_heads.box_head.1.1.running_var", "roi_heads.box_head.2.0.weight", "roi_heads.box_head.2.1.weight", "roi_heads.box_head.2.1.bias", "roi_heads.box_head.2.1.running_mean", "roi_heads.box_head.2.1.running_var", "roi_heads.box_head.3.0.weight", "roi_heads.box_head.3.1.weight", "roi_heads.box_head.3.1.bias", "roi_heads.box_head.3.1.running_mean", "roi_heads.box_head.3.1.running_var", "roi_heads.box_head.5.weight", "roi_heads.box_head.5.bias". 
	Unexpected key(s) in state_dict: "backbone.fpn.inner_blocks.0.0.bias", "backbone.fpn.inner_blocks.1.0.bias", "backbone.fpn.inner_blocks.2.0.bias", "backbone.fpn.inner_blocks.3.0.bias", "backbone.fpn.layer_blocks.0.0.bias", "backbone.fpn.layer_blocks.1.0.bias", "backbone.fpn.layer_blocks.2.0.bias", "backbone.fpn.layer_blocks.3.0.bias", "roi_heads.box_head.fc6.weight", "roi_heads.box_head.fc6.bias", "roi_heads.box_head.fc7.weight", "roi_heads.box_head.fc7.bias". 

In [None]:
# EPOCHS = 5  # full epochs on heads; adjust as you like
# best_map = -1.0
# best_path = "fasterrcnn_heads_only_best.pth"

# IDX_TO_CAT = train_state['idx_to_cat_id']

# for epoch in range(1, EPOCHS+1):
#     t0 = time.time()
#     train_loss = train_one_epoch(model, train_loader, optimizer, scaler)
#     scheduler.step()

#     # Validation → COCO mAP
#     import json, tempfile
#     from pycocotools.coco import COCO
#     from pycocotools.cocoeval import COCOeval

#     val_results = run_inference(model, val_loader, IDX_TO_CAT, score_thr=0.001)

#     with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False) as f:
#         json.dump(val_results, f)
#         res_json_path = f.name

#     coco_gt = COCO(VAL_JSON)
#     coco_dt = coco_gt.loadRes(res_json_path)
#     coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
#     coco_eval.evaluate(); coco_eval.accumulate()
#     # capture COCO summary (coco_eval.stats: AP@[.50:.95], AP50, AP75, APS, APM, APL, AR1, AR10, AR100, ARS, ARM, ARL)
#     coco_eval.summarize()
#     ap = float(coco_eval.stats[0])  # primary mAP@[.50:.95]

#     if ap > best_map:
#         best_map = ap
#         torch.save(model.state_dict(), best_path)

#     dt = time.time() - t0
#     print(f"Epoch {epoch}/{EPOCHS} | train_loss={train_loss:.4f} | mAP={ap:.4f} | best_mAP={best_map:.4f} | {dt:.1f}s")
