In [2]:
import os
import requests
from io import BytesIO
from PIL import Image
import torch
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# Install pycocotools before running (uncomment when needed)
# !pip install pycocotools

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np

In [3]:
import os
import requests
from io import BytesIO
from PIL import Image

# --- 1. SETUP AND DATA ACQUISITION ---

import os
import requests
from io import BytesIO
from PIL import Image

hf_token = "#"  ##your hugging face token 
api_url = "https://datasets-server.huggingface.co/rows?dataset=rafaelpadilla%2Fcoco2017&config=default&split=train&offset=0&length=100"

headers = {"Authorization": f"Bearer {hf_token}"}

# -----------------------------
# 1. Fetch metadata from HF
# -----------------------------
try:
    response = requests.get(api_url, headers=headers)
    response.raise_for_status()
    data = response.json()
except requests.exceptions.RequestException as e:
    print(f"Error fetching data: {e}")
    exit()

images_data = data.get("rows", [])

# folder for storing dataset images
os.makedirs("coco_images", exist_ok=True)

dataset = []

print("Processing images and annotations...")

# -----------------------------
# 2. Loop through HF metadata
# -----------------------------
for item in images_data:
    row = item["row"]

    img_url = row["image"]["src"]
    img_id = row["image_id"]
    img_path = f"coco_images/{img_id}.jpg"

    # ------------------------------------------
    # NEW: Option 1 → use local image if present
    # ------------------------------------------
    if os.path.exists(img_path):
        print(f"Found local image {img_id}. Using existing file.")
    else:
        # ------------------------------------------
        # Option 2 → download from Hugging Face
        # ------------------------------------------
        print(f"Local image not found for {img_id}. Downloading from HF...")
        try:
            img_response = requests.get(img_url, timeout=10)
            img_response.raise_for_status()

            img = Image.open(BytesIO(img_response.content)).convert("RGB")
            img.save(img_path)

            print(f"Downloaded and saved image {img_id}")
        except Exception as e:
            print(f"Failed to download image {img_id}: {e}")
            continue

    # Extract annotations
    objects = row.get("objects", {})
    boxes = objects.get("bbox", [])
    labels = objects.get("label", [])

    dataset.append({
        "image_path": img_path,
        "boxes": boxes,
        "labels": labels
    })

print(f"Total images processed: {len(dataset)}")

# Keep only samples with valid boxes
valid_dataset = [item for item in dataset if item.get("boxes")]
print(f"Valid samples with annotations: {len(valid_dataset)}")


Processing images and annotations...
Local image not found for 147328. Downloading from HF...
Downloaded and saved image 147328
Local image not found for 414738. Downloading from HF...
Downloaded and saved image 414738
Local image not found for 281563. Downloading from HF...
Downloaded and saved image 281563
Local image not found for 63879. Downloading from HF...
Downloaded and saved image 63879
Local image not found for 531349. Downloading from HF...
Downloaded and saved image 531349
Local image not found for 340329. Downloading from HF...
Downloaded and saved image 340329
Local image not found for 182236. Downloading from HF...
Downloaded and saved image 182236
Local image not found for 326820. Downloading from HF...
Downloaded and saved image 326820
Local image not found for 149364. Downloading from HF...
Downloaded and saved image 149364
Local image not found for 166598. Downloading from HF...
Downloaded and saved image 166598
Local image not found for 424792. Downloading from HF..

In [4]:
# --- 2. CUSTOM DATASET CLASS ---

class CustomCocoDataset(Dataset):
    def __init__(self, dataset_list, transforms=None):
        self.dataset_list = dataset_list
        self.transforms = transforms if transforms is not None else T.ToTensor()

    def __len__(self):
        return len(self.dataset_list)

    def __getitem__(self, idx):
        data = self.dataset_list[idx]
        img = Image.open(data['image_path']).convert("RGB")

        boxes = []
        labels = []
        for box, label in zip(data['boxes'], data['labels']):
            x, y, w, h = box
            x_min, y_min = x, y
            x_max, y_max = x + w, y + h

            if w > 0 and h > 0 and x_max > x_min and y_max > y_min:
                boxes.append([x_min, y_min, x_max, y_max])
                labels.append(label)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}
        img = self.transforms(img)

        return img, target

In [5]:
# --- 3. DATASET SPLIT AND DATALOADER SETUP ---

transform = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225])
])

val_size = int(0.2 * len(valid_dataset))
train_size = len(valid_dataset) - val_size
train_subset, val_subset = random_split(valid_dataset, [train_size, val_size])

train_dataset = CustomCocoDataset(train_subset, transforms=transform)
val_dataset = CustomCocoDataset(val_subset, transforms=transform)

def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    collate_fn=collate_fn
)

val_loader = DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    collate_fn=collate_fn
)


In [None]:
# --- 4. MODEL SETUP ---

model = fasterrcnn_resnet50_fpn(weights="DEFAULT")

num_classes = max([label for d in valid_dataset for label in d['labels']]) + 1

in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

print(f"✅ Model modified for {num_classes} classes and moved to {device}")

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to C:\Users\navee/.cache\torch\hub\checkpoints\fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


 15%|█▍        | 23.9M/160M [00:40<02:03, 1.15MB/s]

In [None]:
# --- 5. EVALUATION FUNCTION ---

def evaluate_coco(model, data_loader, device):
    model.eval()
    gt_annotations = []
    dt_annotations = []
    image_ids = []
    ann_id = 1
    img_id_counter = 0

    with torch.no_grad():
        for images, targets in data_loader:
            images = list(img.to(device) for img in images)
            outputs = model(images)

            for i, output in enumerate(outputs):
                gt_boxes = targets[i]['boxes'].cpu().numpy()
                gt_labels = targets[i]['labels'].cpu().numpy()
                img_id = img_id_counter

                for box, label in zip(gt_boxes, gt_labels):
                    x_min, y_min, x_max, y_max = box
                    width = x_max - x_min
                    height = y_max - y_min
                    area = width * height
                    gt_annotations.append({
                        "image_id": img_id,
                        "category_id": int(label),
                        "bbox": [float(x_min), float(y_min), float(width), float(height)],
                        "area": float(area),
                        "id": ann_id,
                        "iscrowd": 0
                    })
                    ann_id += 1

                pred_boxes = output['boxes'].cpu().numpy()
                pred_scores = output['scores'].cpu().numpy()
                pred_labels = output['labels'].cpu().numpy()
                for p_box, p_score, p_label in zip(pred_boxes, pred_scores, pred_labels):
                    x_min, y_min, x_max, y_max = p_box
                    width = x_max - x_min
                    height = y_max - y_min
                    dt_annotations.append({
                        "image_id": img_id,
                        "category_id": int(p_label),
                        "bbox": [float(x_min), float(y_min), float(width), float(height)],
                        "score": float(p_score)
                    })

                image_ids.append(img_id)
                img_id_counter += 1

    coco_gt = COCO()
    coco_gt.dataset = {
     "info": {},   # Add empty info dictionary
     "licenses": [],  # Add empty licenses list
      "images": [{"id": img_id} for img_id in image_ids],
     "annotations": gt_annotations,
      "categories": [{"id": i} for i in range(num_classes)]
    }
    coco_gt.createIndex()

    coco_gt.createIndex()

    coco_dt = coco_gt.loadRes(dt_annotations)

    coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
    coco_eval.params.imgIds = image_ids
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    return coco_eval.stats


In [None]:
import os

# --- 6. TRAINING LOOP WITH CHECKPOINT SAVING AND RESUME ---

optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
num_epochs = 12

checkpoint_dir = "checkpoints"
os.makedirs(checkpoint_dir, exist_ok=True)

# Check for existing checkpoints and load the latest if available
checkpoint_files = [f for f in os.listdir(checkpoint_dir) if f.endswith('.pth')]
if checkpoint_files:
    checkpoint_files.sort(key=lambda f: int(f.split('_')[-1].split('.')[0]))
    latest_checkpoint_path = os.path.join(checkpoint_dir, checkpoint_files[-1])
    checkpoint = torch.load(latest_checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    lr_scheduler.load_state_dict(checkpoint['lr_scheduler_state_dict'])
    start_epoch = checkpoint['epoch']
    print(f"Resuming training from epoch {start_epoch}")
else:
    start_epoch = 0
    print("Starting training from scratch")

print("Starting training...")

for epoch in range(start_epoch, num_epochs):
    model.train()
    epoch_loss = 0

    for i, (images, targets) in enumerate(train_loader):
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()

    lr_scheduler.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_loader):.4f}")

    # Save checkpoint for each epoch
    checkpoint_path = os.path.join(checkpoint_dir, f"fasterrcnn_epoch_{epoch+1}.pth")
    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'lr_scheduler_state_dict': lr_scheduler.state_dict(),
        'loss': epoch_loss / len(train_loader)
    }, checkpoint_path)
    print(f"✅ Saved checkpoint: {checkpoint_path}")

print("✅ Training complete!")


Resuming training from epoch 12
Starting training...
✅ Training complete!


In [None]:
# Load the best or final checkpoint
checkpoint_path = os.path.join(checkpoint_dir, f"fasterrcnn_epoch_{2}.pth")
checkpoint = torch.load(checkpoint_path, map_location=device)


In [None]:

model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
model.eval()

# Print the saved loss from this checkpoint
saved_loss = checkpoint.get('loss', None)
if saved_loss is not None:
    print(f"Training loss saved in checkpoint for epoch {checkpoint['epoch']}: {saved_loss:.4f}")

print("Starting final evaluation on validation set:")
metrics = evaluate_coco(model, val_loader, device)
print(f"Final Validation mAP (AP@[IoU=0.50:0.95]): {metrics[0]:.4f}")


Training loss saved in checkpoint for epoch 2: 1.0425
Starting final evaluation on validation set:
creating index...
index created!
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.17s).
Accumulating evaluation results...
DONE (t=0.38s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.005
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.004
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.003
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDe