In [None]:
!pip install -q pycocotools

import os, random, zipfile, json
from pycocotools.coco import COCO

DATASET_DIR = "mini_coco_sample"
TOTAL_IMAGES = 50
COCO_URL_IMAGES = "http://images.cocodataset.org/val2017/"
COCO_URL_ANN = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"

if not os.path.exists(f"{DATASET_DIR}/images"):
    if not os.path.exists("annotations"):
        !wget -q {COCO_URL_ANN}
        !unzip -q annotations_trainval2017.zip
    coco = COCO("annotations/instances_val2017.json")
    img_ids = coco.getImgIds()
    selected_ids = random.sample(img_ids, TOTAL_IMAGES)

    os.makedirs(f"{DATASET_DIR}/images", exist_ok=True)
    os.makedirs(f"{DATASET_DIR}/annotations", exist_ok=True)

    # download images
    for img_id in selected_ids:
        info = coco.loadImgs(img_id)[0]
        url = COCO_URL_IMAGES + info["file_name"]
        !wget -q -P {DATASET_DIR}/images {url}

    # subset annotations
    ann_ids = coco.getAnnIds(imgIds=selected_ids)
    anns = coco.loadAnns(ann_ids)
    mini_coco = {
        "images": [coco.loadImgs(i)[0] for i in selected_ids],
        "annotations": anns,
        "categories": coco.loadCats(coco.getCatIds())
    }
    with open(f"{DATASET_DIR}/annotations/instances.json","w") as f:
        json.dump(mini_coco,f,indent=2)

print("✅ mini_coco_sample is ready.")


loading annotations into memory...
Done (t=0.51s)
creating index...
index created!
✅ mini_coco_sample is ready.


In [None]:
import os, json, random, shutil

BASE = "mini_coco_sample"
random.seed(42)

# Load subset JSON
with open(f"{BASE}/annotations/instances.json","r") as f:
    data = json.load(f)

# Image IDs present in subset
image_ids = [img["id"] for img in data["images"]]
random.shuffle(image_ids)
split = int(0.8 * len(image_ids))
train_ids, val_ids = set(image_ids[:split]), set(image_ids[split:])

def build_split(ids_set):
    images = [im for im in data["images"] if im["id"] in ids_set]
    anns = [an for an in data["annotations"] if an["image_id"] in ids_set]
    return {"images": images, "annotations": anns, "categories": data["categories"]}

splits = {"train": build_split(train_ids), "val": build_split(val_ids)}

# Make split dirs and copy images
for sp in ["train","val"]:
    os.makedirs(f"{BASE}/{sp}/images", exist_ok=True)
    os.makedirs(f"{BASE}/{sp}/annotations", exist_ok=True)
    # save JSON
    with open(f"{BASE}/{sp}/annotations/instances.json","w") as f:
        json.dump(splits[sp], f, indent=2)
    # copy images
    present = {im["file_name"] for im in splits[sp]["images"]}
    for fname in present:
        src = f"{BASE}/images/{fname}"
        dst = f"{BASE}/{sp}/images/{fname}"
        if not os.path.exists(dst):
            shutil.copy(src, dst)

print("✅ Train/Val split created.")


✅ Train/Val split created.


In [None]:
!pip install -q pycocotools

import torch, torchvision, json, os, numpy as np
from PIL import Image, ImageDraw, ImageFont
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import to_tensor

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE


device(type='cuda')

In [None]:
import collections

class COCOSubset(Dataset):
    def __init__(self, images_dir, ann_json):
        self.images_dir = images_dir
        with open(ann_json,"r") as f:
            d = json.load(f)
        self.categories = d["categories"]
        # image_id -> file_name, size
        self.images = {im["id"]: im for im in d["images"]}
        # group annotations per image_id
        ann_per_img = collections.defaultdict(list)
        for an in d["annotations"]:
            # filter invalid boxes (area <=0, or w/h<=0)
            x,y,w,h = an["bbox"]
            if w <= 0 or h <= 0:
                continue
            ann_per_img[an["image_id"]].append(an)
        self.ann_per_img = ann_per_img
        self.ids = list(self.images.keys())

    def __len__(self): return len(self.ids)

    def __getitem__(self, idx):
        img_id = self.ids[idx]
        info = self.images[img_id]
        path = os.path.join(self.images_dir, info["file_name"])
        img = Image.open(path).convert("RGB")

        anns = self.ann_per_img.get(img_id, [])
        boxes, labels, areas, iscrowd = [], [], [], []
        for an in anns:
            x,y,w,h = an["bbox"]
            boxes.append([x, y, x+w, y+h])
            labels.append(an["category_id"])        # COCO IDs (1..90), OK for pretrained head
            areas.append(an.get("area", w*h))
            iscrowd.append(an.get("iscrowd", 0))

        if len(boxes)==0: # handle images with zero annotations gracefully
            boxes = np.zeros((0,4), dtype=np.float32)
            labels = np.zeros((0,), dtype=np.int64)
            areas = np.zeros((0,), dtype=np.float32)
            iscrowd = np.zeros((0,), dtype=np.int64)
        else:
            boxes = np.array(boxes, dtype=np.float32)
            labels = np.array(labels, dtype=np.int64)
            areas = np.array(areas, dtype=np.float32)
            iscrowd = np.array(iscrowd, dtype=np.int64)

        img_t = to_tensor(img)  # [0,1] float32

        target = {
            "boxes": torch.as_tensor(boxes, dtype=torch.float32),
            "labels": torch.as_tensor(labels, dtype=torch.int64),
            "image_id": torch.tensor([img_id]),
            "area": torch.as_tensor(areas, dtype=torch.float32),
            "iscrowd": torch.as_tensor(iscrowd, dtype=torch.int64),
        }
        return img_t, target

def collate_fn(batch):
    return tuple(zip(*batch))

train_ds = COCOSubset("mini_coco_sample/train/images", "mini_coco_sample/train/annotations/instances.json")
val_ds   = COCOSubset("mini_coco_sample/val/images",   "mini_coco_sample/val/annotations/instances.json")

train_dl = DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=2, collate_fn=collate_fn)
val_dl   = DataLoader(val_ds,   batch_size=2, shuffle=False, num_workers=2, collate_fn=collate_fn)

len(train_ds), len(val_ds)


(40, 10)

In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn

# Pretrained weights
model = fasterrcnn_resnet50_fpn(weights="DEFAULT")  # 91-class COCO head
model.to(DEVICE)

# Optimizer & LR scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.002, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

print("✅ Model ready on", DEVICE)


✅ Model ready on cuda


In [None]:
import os, torch
from PIL import Image, ImageDraw, ImageFont
import torchvision

os.makedirs("predictions", exist_ok=True)

# Try to load a default font for labels
try:
    font = ImageFont.truetype("DejaVuSans.ttf", 16)
except:
    font = ImageFont.load_default()

model.eval()
score_thresh = 0.5

# COCO category id -> name
with open("mini_coco_sample/train/annotations/instances.json","r") as f:
    cats = {c["id"]: c["name"] for c in json.load(f)["categories"]}

def draw_predictions(img_pil, boxes, labels, scores):
    draw = ImageDraw.Draw(img_pil)
    for box, lab, sc in zip(boxes, labels, scores):
        if sc < score_thresh:
            continue
        x1,y1,x2,y2 = box
        draw.rectangle([x1,y1,x2,y2], outline=(0,255,0), width=3)
        text = f"{cats.get(int(lab), lab)} {sc:.2f}"
        ts = draw.textbbox((0,0), text, font=font)
        tw, th = ts[2]-ts[0], ts[3]-ts[1]
        draw.rectangle([x1, y1-th-4, x1+tw+4, y1], fill=(0,255,0))
        draw.text((x1+2, y1-th-2), text, fill=(0,0,0), font=font)
    return img_pil

# Run on all val images
val_images = [im["file_name"] for im in json.load(open("mini_coco_sample/val/annotations/instances.json"))["images"]]
for fname in val_images:
    path = os.path.join("mini_coco_sample/val/images", fname)
    img = Image.open(path).convert("RGB")
    img_t = to_tensor(img).to(DEVICE).unsqueeze(0)
    with torch.no_grad():
        out = model(img_t)[0]
    boxes = out["boxes"].cpu().numpy()
    labels = out["labels"].cpu().numpy()
    scores = out["scores"].cpu().numpy()
    vis = draw_predictions(img.copy(), boxes, labels, scores)
    vis.save(os.path.join("predictions", fname))

print("✅ Saved visualized predictions in /predictions")


✅ Saved visualized predictions in /predictions


In [None]:
import zipfile, os
from google.colab import files

zip_name = "predictions.zip"
with zipfile.ZipFile(zip_name, "w", compression=zipfile.ZIP_DEFLATED) as zf:
    for f in os.listdir("predictions"):
        zf.write(os.path.join("predictions", f), f)

print(f"✅ {zip_name} ready.")
files.download(zip_name)


✅ predictions.zip ready.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
torch.save(model.state_dict(), "fasterrcnn_resnet50fpn_subset.pth")
from google.colab import files
files.download("fasterrcnn_resnet50fpn_subset.pth")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>