In [None]:
#Task 4 - cell 1
from google.colab import files
import zipfile, os, glob

# Upload labels_my-project-name_....zip from your Downloads
uploaded = files.upload()
zip_name = list(uploaded.keys())[0]
print("Uploaded:", zip_name)

# Extract into gt_yolo/
os.makedirs("gt_yolo", exist_ok=True)
with zipfile.ZipFile(zip_name, "r") as z:
    z.extractall("gt_yolo")

# Find images inside the extracted folder
img_exts = ("*.jpg", "*.jpeg", "*.png")
image_paths = []
for ext in img_exts:
    image_paths.extend(glob.glob(os.path.join("gt_yolo", "**", ext), recursive=True))

print("Found images:", len(image_paths))
image_paths[:5]


In [None]:
#cell 2
def load_gt_boxes_yolo(image_paths, root_dir="gt_yolo"):
    """
    Returns dict:
      gt_boxes[img_path] = list of [x1, y1, x2, y2] (absolute pixel coords)
    """
    gt_boxes = {}

    for img_path in image_paths:
        img = cv2.imread(img_path)
        if img is None:
            continue
        h, w = img.shape[:2]

        base = os.path.splitext(os.path.basename(img_path))[0]
        txt_path = os.path.join(os.path.dirname(img_path), base + ".txt")

        if not os.path.exists(txt_path):
            # no labels for this frame -> skip
            continue

        boxes = []
        with open(txt_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                cls_id, xc, yc, bw, bh = map(float, parts)
                # Only keep class 0 = person
                if int(cls_id) != 0:
                    continue

                # YOLO normalized -> absolute xyxy
                x_center = xc * w
                y_center = yc * h
                box_w = bw * w
                box_h = bh * h

                x1 = x_center - box_w / 2
                y1 = y_center - box_h / 2
                x2 = x_center + box_w / 2
                y2 = y_center + box_h / 2

                boxes.append([x1, y1, x2, y2])

        if boxes:
            gt_boxes[img_path] = np.array(boxes, dtype=np.float32)

    return gt_boxes

gt_boxes = load_gt_boxes_yolo(image_paths)
print("Images with GT boxes:", len(gt_boxes))
list(gt_boxes.keys())[:5]


In [None]:
#cell 3
import numpy as np
import cv2
import os

# --- Helper: IoU between two boxes (x1,y1,x2,y2) ---
def iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interW = max(0, xB - xA)
    interH = max(0, yB - yA)
    interArea = interW * interH

    if interArea == 0:
        return 0.0

    boxAArea = max(0, boxA[2] - boxA[0]) * max(0, boxA[3] - boxA[1])
    boxBArea = max(0, boxB[2] - boxB[0]) * max(0, boxB[3] - boxB[1])

    return interArea / float(boxAArea + boxBArea - interArea + 1e-9)

# --- Run YOLO on each GT image and collect stats ---
conf_thresh = 0.25   # same-ish as before
iou_thresh  = 0.5    # standard mAP/PR IoU threshold

TP = FP = FN = 0
per_image_stats = []

for img_path, boxes_gt in gt_boxes.items():
    img = cv2.imread(img_path)
    if img is None:
        print("Could not read:", img_path)
        continue

    # YOLO inference
    results = model.predict(source=img, conf=conf_thresh, verbose=False)[0]

    # collect person detections (class 0)
    preds = []
    for b in results.boxes:
        if int(b.cls[0]) != 0:
            continue  # only 'person'
        x1, y1, x2, y2 = b.xyxy[0].cpu().numpy().tolist()
        preds.append([x1, y1, x2, y2])

    preds = np.array(preds, dtype=np.float32) if len(preds) > 0 else np.zeros((0,4), dtype=np.float32)
    gts   = boxes_gt.astype(np.float32)

    matched_gt = set()
    matched_pr = set()

    # greedy matching: for each prediction, match to best GT if IoU >= thresh
    for pi, p_box in enumerate(preds):
        best_iou = 0.0
        best_gi  = -1
        for gi, g_box in enumerate(gts):
            if gi in matched_gt:
                continue
            i = iou(p_box, g_box)
            if i > best_iou:
                best_iou = i
                best_gi  = gi
        if best_iou >= iou_thresh and best_gi >= 0:
            matched_pr.add(pi)
            matched_gt.add(best_gi)

    tp_i = len(matched_pr)
    fp_i = len(preds) - tp_i
    fn_i = len(gts) - tp_i

    TP += tp_i
    FP += fp_i
    FN += fn_i

    per_image_stats.append((
        os.path.basename(img_path),
        tp_i, fp_i, fn_i,
        len(preds), len(gts)
    ))

# --- Print per-image stats ---
print("Per-image stats (name, TP, FP, FN, #preds, #GT):\n")
for name, tp_i, fp_i, fn_i, n_pred, n_gt in per_image_stats:
    print(f"{name}: TP={tp_i}, FP={fp_i}, FN={fn_i}, preds={n_pred}, GT={n_gt}")

# --- Global metrics ---
precision = TP / (TP + FP + 1e-9) if (TP + FP) > 0 else 0.0
recall    = TP / (TP + FN + 1e-9) if (TP + FN) > 0 else 0.0
f1        = 2 * precision * recall / (precision + recall + 1e-9) if (precision + recall) > 0 else 0.0

print("\n==== Global detection metrics (IoU >= 0.5, class=person) ====")
print(f"TP: {TP}, FP: {FP}, FN: {FN}")
print(f"Precision: {precision:.3f}")
print(f"Recall:    {recall:.3f}")
print(f"F1-score:  {f1:.3f}")

