In [2]:
!pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
!pip install ultralytics pillow pandas tqdm opencv-python pytesseract


Looking in indexes: https://download.pytorch.org/whl/cpu
Collecting torch
  Downloading https://download.pytorch.org/whl/cpu/torch-2.9.1-cp313-none-macosx_11_0_arm64.whl.metadata (29 kB)
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cpu/torchvision-0.24.1-cp313-cp313-macosx_12_0_arm64.whl.metadata (5.9 kB)
Downloading https://download.pytorch.org/whl/cpu/torch-2.9.1-cp313-none-macosx_11_0_arm64.whl (74.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.4/74.4 MB[0m [31m37.4 MB/s[0m  [33m0:00:02[0mm0:00:01[0m00:01[0m
Downloading https://download.pytorch.org/whl/cpu/torchvision-0.24.1-cp313-cp313-macosx_12_0_arm64.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m37.3 MB/s[0m  [33m0:00:00[0m
Installing collected packages: torch, torchvision
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [torchvision][0m [torchvision]
Successfully installed torch-2.9.1 torchvision-0.2

In [6]:
# -----------------------------------------
# OBJECT DETECTOR COMPARISON 
# -----------------------------------------

import os
import time
from pathlib import Path
from PIL import Image
import torch
import torchvision
import torchvision.transforms as T
import pandas as pd
import numpy as np
from tqdm import tqdm
from IPython.display import display
import warnings

warnings.filterwarnings("ignore")

# ------------------------------
# CONFIGURATION
# ------------------------------
INPUT_DIR = "images"       # folder with your images
OUTPUT_DIR = "output"      # folder to save results
DEVICE = "cpu"             # change to "cuda" if GPU available
MIN_CONF = 0.4             # minimum confidence threshold

os.makedirs(OUTPUT_DIR, exist_ok=True)

transform = T.Compose([T.ToTensor()])

# ------------------------------
# HELPER FUNCTION: COLOR HISTOGRAM
# ------------------------------
def simple_color_histogram(img):
    arr = np.array(img)
    hist_r = np.histogram(arr[:, :, 0], bins=8, range=(0, 255))[0]
    hist_g = np.histogram(arr[:, :, 1], bins=8, range=(0, 255))[0]
    hist_b = np.histogram(arr[:, :, 2], bins=8, range=(0, 255))[0]
    return np.concatenate([hist_r, hist_g, hist_b]).tolist()

# ------------------------------
# BASE DETECTOR CLASS
# ------------------------------
class BaseDetector:
    def __init__(self, device="cpu", threshold=0.4):
        self.device = device
        self.threshold = threshold
        self.model = None
        self.name = "base"

    def predict(self, pil_image):
        return []

# ------------------------------
# FASTER RCNN DETECTOR
# ------------------------------
class FasterRCNNDetector(BaseDetector):
    def __init__(self, device="cpu", threshold=0.4):
        super().__init__(device, threshold)
        self.name = "fasterrcnn"
        self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
        self.model.eval().to(self.device)

    def predict(self, pil_image):
        img_t = transform(pil_image).to(self.device)
        with torch.no_grad():
            preds = self.model([img_t])[0]

        outputs = []
        for box, score, label in zip(preds["boxes"], preds["scores"], preds["labels"]):
            if score >= self.threshold:
                outputs.append({
                    "x1": float(box[0]), "y1": float(box[1]),
                    "x2": float(box[2]), "y2": float(box[3]),
                    "score": float(score),
                    "label": int(label)
                })
        return outputs

# ------------------------------
# MASK RCNN DETECTOR
# ------------------------------
class MaskRCNNDetector(BaseDetector):
    def __init__(self, device="cpu", threshold=0.4):
        super().__init__(device, threshold)
        self.name = "maskrcnn"
        self.model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT")
        self.model.eval().to(self.device)

    def predict(self, pil_image):
        img_t = transform(pil_image).to(self.device)
        with torch.no_grad():
            preds = self.model([img_t])[0]

        outputs = []
        for box, score, label in zip(preds["boxes"], preds["scores"], preds["labels"]):
            if score >= self.threshold:
                outputs.append({
                    "x1": float(box[0]), "y1": float(box[1]),
                    "x2": float(box[2]), "y2": float(box[3]),
                    "score": float(score),
                    "label": int(label)
                })
        return outputs

# ------------------------------
# YOLOv8 DETECTOR (OPTIONAL)
# ------------------------------
try:
    from ultralytics import YOLO

    class YOLODetector(BaseDetector):
        def __init__(self, device="cpu", threshold=0.4):
            super().__init__(device, threshold)
            self.name = "yolov8"
            self.model = YOLO("yolov8n.pt")  # small pretrained model

        def predict(self, pil_image):
            results = self.model.predict(pil_image, conf=self.threshold, verbose=False)
            outputs = []
            for r in results:
                if r.boxes is None:
                    continue
                for b in r.boxes:
                    outputs.append({
                        "x1": float(b.xyxy[0][0]),
                        "y1": float(b.xyxy[0][1]),
                        "x2": float(b.xyxy[0][2]),
                        "y2": float(b.xyxy[0][3]),
                        "score": float(b.conf[0]),
                        "label": int(b.cls[0])
                    })
            return outputs

except Exception:
    YOLODetector = None
    print("YOLOv8 not installed. Skipping YOLO.")

# ------------------------------
# PROCESS IMAGES
# ------------------------------
def process_images(input_dir, output_dir, models, min_confidence, device):
    input_dir = Path(input_dir)
    output_dir = Path(output_dir)
    image_paths = [p for p in input_dir.glob("*") if p.suffix.lower() in [".jpg", ".png", ".jpeg"]]

    if not image_paths:
        raise FileNotFoundError("No images found in your images folder.")

    rows = []
    summary = []

    for img_path in tqdm(image_paths, desc="Processing Images"):
        img = Image.open(img_path).convert("RGB")

        row = {
            "image": img_path.name,
            "color_histogram": simple_color_histogram(img)
        }

        for model_name, model_obj in models.items():
            start = time.time()
            preds = model_obj.predict(img)
            elapsed = time.time() - start

            row[f"{model_name}_detections"] = len(preds)
            row[f"{model_name}_time"] = elapsed
            row[f"{model_name}_avg_conf"] = np.mean([d["score"] for d in preds]) if preds else 0

        rows.append(row)

    df = pd.DataFrame(rows)

    for model_name in models.keys():
        summary.append({
            "model": model_name,
            "avg_detections": df[f"{model_name}_detections"].mean(),
            "avg_time_sec": df[f"{model_name}_time"].mean(),
            "avg_probability": df[f"{model_name}_avg_conf"].mean()
        })

    summary_df = pd.DataFrame(summary)

    # Save CSVs
    df.to_csv(output_dir / "results.csv", index=False)
    summary_df.to_csv(output_dir / "summary.csv", index=False)

    return df, summary_df

# ------------------------------
# INITIALIZE MODELS
# ------------------------------
models = {
    "fasterrcnn": FasterRCNNDetector(device=DEVICE, threshold=MIN_CONF),
    "maskrcnn": MaskRCNNDetector(device=DEVICE, threshold=MIN_CONF)
}

if YOLODetector:
    models["yolov8"] = YOLODetector(device=DEVICE, threshold=MIN_CONF)

# ------------------------------
# RUN PROCESSING
# ------------------------------
df, summary_df = process_images(
    input_dir=INPUT_DIR,
    output_dir=OUTPUT_DIR,
    models=models,
    min_confidence=MIN_CONF,
    device=DEVICE
)

# ------------------------------
# DISPLAY RESULTS IN NOTEBOOK
# ------------------------------
print("=== PER-IMAGE RESULTS ===")
display(df)

print("\n=== SUMMARY RESULTS ===")
display(summary_df)


Processing Images: 100%|████████████████████████| 12/12 [00:37<00:00,  3.14s/it]

=== PER-IMAGE RESULTS ===





Unnamed: 0,image,color_histogram,fasterrcnn_detections,fasterrcnn_time,fasterrcnn_avg_conf,maskrcnn_detections,maskrcnn_time,maskrcnn_avg_conf,yolov8_detections,yolov8_time,yolov8_avg_conf
0,image 14.jpg,"[314337, 738245, 739431, 1194152, 2084353, 116...",21,1.011581,0.80731,22,3.402952,0.813044,9,0.123433,0.79614
1,image 12.jpg,"[5748, 174413, 205566, 151865, 106577, 82056, ...",1,0.950009,0.998521,1,1.135054,0.998521,1,0.05319,0.911223
2,image 1.jpeg,"[1287030, 464420, 358403, 508010, 1353713, 125...",59,1.034521,0.853328,62,3.983706,0.836618,24,0.095013,0.625284
3,image 15.jpeg,"[436844, 297436, 468250, 440156, 1181948, 7199...",9,0.8823,0.901201,10,1.321941,0.833684,5,0.071335,0.518509
4,image 7.jpg,"[222339, 167626, 415241, 557590, 207638, 11742...",7,0.681083,0.673714,7,1.598382,0.723467,2,0.078434,0.834865
5,image 4.jpeg,"[418, 1657, 5913, 32300, 39901, 31121, 22577, ...",8,0.683935,0.757394,6,0.935445,0.858369,3,0.052875,0.712629
6,image 6.jpg,"[11796, 85232, 250895, 223962, 155317, 111052,...",12,1.054994,0.848556,13,1.657268,0.844065,7,0.051558,0.658
7,image 8.jpeg,"[13159, 13651, 9235, 6595, 4001, 3126, 540, 93...",21,1.186975,0.686701,23,1.790099,0.698814,2,0.041197,0.678849
8,image 5.jpg,"[37146, 63343, 40875, 95974, 169598, 264136, 2...",14,0.543432,0.867119,17,1.263422,0.765085,7,0.063573,0.646149
9,image 13.jpeg,"[470025, 1040570, 1019865, 1386690, 1304560, 5...",5,0.913659,0.87833,5,0.927317,0.912885,4,0.064715,0.815367



=== SUMMARY RESULTS ===


Unnamed: 0,model,avg_detections,avg_time_sec,avg_probability
0,fasterrcnn,17.166667,0.906421,0.836927
1,maskrcnn,18.333333,2.02366,0.8249
2,yolov8,7.666667,0.07268,0.705141
