In [None]:
from pathlib import Path
import time
import numpy as np
import pandas as pd
import torch
import torchvision.transforms.functional as F
import torchvision.models.detection as detection
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torchvision.ops import nms
from ultralytics import YOLO
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
#configure device, paths, and confidence threshold
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATA_DIR = Path("data/images")
RESULTS_DIR = Path("outputs")
DATA_DIR.mkdir(parents=True, exist_ok=True)
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
CONF_THRESHOLD = 0.25

In [None]:
#load class names and models (YOLOv8n and Faster R-CNN)
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
COCO_CLASSES = weights.meta["categories"]
print(COCO_CLASSES)
yolo_model = YOLO("yolov8n.pt").to(DEVICE)
frcnn_model = detection.fasterrcnn_resnet50_fpn(weights=weights).to(DEVICE)
frcnn_model.eval()

['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [None]:
#collect image list and basic sanity check
image_paths = sorted(DATA_DIR.glob("*"))
assert len(image_paths) >= 10, f"Found {len(image_paths)} images in {DATA_DIR}. Please add at least 10."
len(image_paths), image_paths[:2]

(10,
 [WindowsPath('data/images/image_1.jpg'),
  WindowsPath('data/images/image_10.jpg')])

In [None]:
#YOLOv8n inference helper
def run_yolo(image_path):
    start = time.perf_counter()
    results = yolo_model.predict(str(image_path), conf=CONF_THRESHOLD, verbose=False, device=DEVICE)
    elapsed = time.perf_counter() - start
    result = results[0]
    preds = []
    for box in result.boxes:
        label = result.names[int(box.cls[0].item())]
        score = float(box.conf[0].item())
        preds.append({"label": label, "score": score})
    return preds, elapsed

In [None]:
#faster R-CNN inference helper
def run_frcnn(image_path):
    image = Image.open(image_path).convert("RGB")
    tensor = F.to_tensor(image).to(DEVICE)
    start = time.perf_counter()
    with torch.no_grad():
        output = frcnn_model([tensor])[0]
    elapsed = time.perf_counter() - start
    keep = nms(output["boxes"], output["scores"], 0.4)
    preds = []
    for idx in keep:
        score = float(output["scores"][idx])
        if score < CONF_THRESHOLD:
            continue
        label = COCO_CLASSES[int(output["labels"][idx])]
        preds.append({"label": label, "score": score})
    return preds, elapsed

In [None]:
#run both models on each image and save comparison table
def summarize_preds(preds, top_k=3):
    preds_sorted = sorted(preds, key=lambda x: x["score"], reverse=True)
    count = len(preds_sorted)
    top = ", ".join([f"{p['label']} ({p['score']:.2f})" for p in preds_sorted[:top_k]])
    return count, top if top else "(none above threshold)"
records = []
for img in image_paths:
    preds, elapsed = run_yolo(img)
    count, top = summarize_preds(preds)
    records.append({"image": img.name, "model": "YOLOv8n", "inference_time_sec": elapsed, "objects_detected": count, "top_predictions": top})
    preds, elapsed = run_frcnn(img)
    count, top = summarize_preds(preds)
    records.append({"image": img.name, "model": "Faster R-CNN", "inference_time_sec": elapsed, "objects_detected": count, "top_predictions": top})
results_df = pd.DataFrame(records)
results_df.to_csv(RESULTS_DIR / "detection_comparison.csv", index=False)
results_df

Unnamed: 0,image,model,inference_time_sec,objects_detected,top_predictions
0,image_1.jpg,YOLOv8n,0.614021,11,"person (0.85), sports ball (0.76), person (0.74)"
1,image_1.jpg,Faster R-CNN,3.353831,21,"person (1.00), person (0.99), person (0.99)"
2,image_10.jpg,YOLOv8n,0.127034,1,umbrella (0.59)
3,image_10.jpg,Faster R-CNN,2.373355,1,mouse (0.49)
4,image_2.jpg,YOLOv8n,0.094814,1,cat (0.84)
5,image_2.jpg,Faster R-CNN,3.119037,1,cat (0.99)
6,image_3.jpg,YOLOv8n,0.127125,1,dog (0.83)
7,image_3.jpg,Faster R-CNN,2.870529,2,"dog (1.00), bird (0.49)"
8,image_4.jpg,YOLOv8n,0.101453,1,orange (0.64)
9,image_4.jpg,Faster R-CNN,2.171902,3,"banana (0.53), sports ball (0.51), dining tabl..."


In [None]:
#compute simple color/size stats and save
def color_stats(image_path):
    img = Image.open(image_path).convert("RGB")
    arr = np.array(img)
    avg_color = arr.reshape(-1, 3).mean(axis=0)
    brightness = arr.mean()
    return {"image": image_path.name, "width": img.width, "height": img.height, "avg_r": avg_color[0], "avg_g": avg_color[1], "avg_b": avg_color[2], "mean_brightness": brightness}
color_df = pd.DataFrame([color_stats(p) for p in image_paths])
color_df.to_csv(RESULTS_DIR / "image_color_stats.csv", index=False)
color_df

Unnamed: 0,image,width,height,avg_r,avg_g,avg_b,mean_brightness
0,image_1.jpg,204,247,131.124295,118.184349,105.818647,118.375764
1,image_10.jpg,225,225,164.24804,202.882331,176.935881,181.355417
2,image_2.jpg,275,183,48.291108,37.358549,29.746627,38.465428
3,image_3.jpg,194,259,121.91259,117.740278,111.183597,116.945488
4,image_4.jpg,180,180,201.59284,190.698519,149.572562,180.621307
5,image_5.jpg,226,223,229.955812,195.843149,195.486428,207.095129
6,image_6.jpg,275,183,136.254903,107.05852,75.033184,106.115536
7,image_7.jpg,1080,720,64.773942,79.515728,85.614201,76.634624
8,image_8.jpg,500,750,114.109237,117.610512,88.109677,106.609809
9,image_9.jpg,1200,900,98.664273,104.595762,68.036463,90.432166
