# Implementing the Fundamental Functions

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import random
from shapely.geometry import box
from sklearn.metrics import auc
import supervision as sv

%matplotlib inline

## 1. IoU (Intersection over Union) between two axis-aligned bounding boxes specified in the Ultralytics YOLO format

### Implementing IoU with Shapely and Supervision

In [2]:
def yolo_to_xyxy(bbox):
    x, y, w, h = bbox
    return [x - w / 2, y - h / 2, x + w / 2, y + h / 2]

# calculating IoU with shapely
def iou_shapely(box1, box2):
    # box takes (minx, miny, maxx, maxy), so unpack the values from yolo format
    box1_xyxy = box(*yolo_to_xyxy(box1))
    box2_xyxy = box(*yolo_to_xyxy(box2))

    intersection = box1_xyxy.intersection(box2_xyxy).area
    union = box1_xyxy.union(box2_xyxy).area

    return intersection / union if union > 0 else 0

# calculating IoU with supervision
def iou_supervision(box1, box2):
    # box_iou_batch takes a batch of boxes as a numpy array
    box1_xyxy = np.array([yolo_to_xyxy(box1)])
    box2_xyxy = np.array([yolo_to_xyxy(box2)])

    return sv.box_iou_batch(box1_xyxy, box2_xyxy)[0][0]

### Showing that IoU with Shapely and Supervision are equivalent

In [None]:
# function to generate random boxes with varying sizes
def generate_random_yolo_bbox(img_size=412, min_size=150, max_size=200):
    w = random.randint(min_size, max_size)
    h = random.randint(min_size, max_size)
    x = random.randint(w // 2, img_size - w // 2)
    y = random.randint(h // 2, img_size - h // 2)
    return (x, y, w, h)

# function that compares IoUs of both methods
def compare_iou(n_samples=10):
    for _ in range(n_samples):
        box1 = generate_random_yolo_bbox()
        box2 = generate_random_yolo_bbox()

        iou1 = iou_shapely(box1, box2)
        iou2 = iou_supervision(box1, box2)

        print(f"YOLO Box 1: {box1}, YOLO Box 2: {box2}")
        print(f"IoU (Shapely): {iou1:.4f}, IoU (Supervision): {iou2:.4f}")
        print(f"Difference: {abs(iou1 - iou2):.6f}")
        print("-" * 50)

compare_iou()

YOLO Box 1: (280, 257, 184, 180), YOLO Box 2: (288, 97, 150, 163)
IoU (Shapely): 0.0309, IoU (Supervision): 0.0309
Difference: 0.000000
--------------------------------------------------
YOLO Box 1: (277, 113, 152, 170), YOLO Box 2: (140, 311, 170, 172)
IoU (Shapely): 0.0000, IoU (Supervision): 0.0000
Difference: 0.000000
--------------------------------------------------
YOLO Box 1: (292, 270, 183, 188), YOLO Box 2: (299, 163, 198, 186)
IoU (Shapely): 0.2587, IoU (Supervision): 0.2587
Difference: 0.000000
--------------------------------------------------
YOLO Box 1: (200, 157, 172, 180), YOLO Box 2: (141, 290, 198, 161)
IoU (Shapely): 0.0813, IoU (Supervision): 0.0813
Difference: 0.000000
--------------------------------------------------
YOLO Box 1: (176, 326, 187, 156), YOLO Box 2: (333, 109, 154, 188)
IoU (Shapely): 0.0000, IoU (Supervision): 0.0000
Difference: 0.000000
--------------------------------------------------
YOLO Box 1: (210, 150, 179, 177), YOLO Box 2: (324, 177, 167,

As we can see above, all the differences are zero. Therefore, both Shapely and Supervision implementations for IoU are equivalent.

## 2. Write a function to compute Average Precision (AP) 

### a. Use Pascal VOC 11 point interpolation method to implement the function 

In [4]:
def pascal_voc_ap(recalls, precisions):
    ap = 0
    for t in np.linspace(0, 1, 11):
        ap += np.max(precisions[recalls >= t]) if len(precisions[recalls >= t]) > 0 else 0
    return ap / np.float32(11)

### b. Use COCO 101-point interpolation method to implement the function

In [5]:
def coco_ap(recalls, precisions):
    ap = 0
    for t in np.linspace(0, 1, 101):
        ap += np.max(precisions[recalls >= t]) if len(precisions[recalls >= t]) > 0 else 0
    return ap / np.float32(101)

### c. Use Area under Precision-Recall Curve (AP) method to implement the function 

In [6]:
def area_under_curve(recalls, precisions):
    return auc(recalls, precisions)

### d. Randomly generate 10 images of size 100x100. Randomly generate 10 ground truth boxes of size 20x20 and 10 predicted boxes of size 20x20 in each image. Assume there is only one class of objects. Compare the AP50 (Average Precision at IoU 0.5) computed by 3 of your methods

In [None]:
# fixed parameters
img_cnt = 10
boxes_per_img = 10
img_size = 100
box_size = 20

ground_truth_boxes = []
predicted_boxes = []

# function that generates a random box of fixed size as specified
def generate_fixed_yolo_bbox(img_size=100, box_size=20):
    margin = box_size // 2
    x = random.randint(margin, img_size - margin)
    y = random.randint(margin, img_size - margin)
    return (x, y, box_size, box_size)

# generate ground truths and predictions with random confidence for each image
for _ in range(img_cnt):
    ground_truth_cur = [generate_fixed_yolo_bbox(img_size, box_size) for _ in range(boxes_per_img)]
    predicted_cur = [generate_fixed_yolo_bbox(img_size, box_size) + (random.random(),) for _ in range(boxes_per_img)] # add random number to simulate prediction confidence
    ground_truth_boxes.append(ground_truth_cur)
    predicted_boxes.append(predicted_cur)

# print(ground_truth_boxes)
# print(predicted_boxes)

def compute_pr(ground_truth_boxes, predicted_boxes, iou_threshold=0.5):
    ans = []
    gt_cnt = 0

    for ground_truth, predictions in zip(ground_truth_boxes, predicted_boxes):
        gt_cnt += len(ground_truth)
        matched = [False] * len(ground_truth)
        # sort all predictions in decreasing order of confidence
        sorted_predictions = sorted(predictions, key=lambda x: x[4], reverse=True)
        for prediction in sorted_predictions:
            pred = prediction[:4]
            max_iou, best_gt = -1, -1
            for i, gt in enumerate(ground_truth):
                if matched[i]:
                    continue # skip if already matched
                iou = iou_supervision(yolo_to_xyxy(pred), yolo_to_xyxy(gt))
                if iou > max_iou:
                    max_iou = iou
                    best_gt = i
            if max_iou >= iou_threshold and best_gt != -1: # match if best and IoU > threshold
                matched[best_gt] = True
                ans.append([prediction[4], 1]) # 1 means true positive
            else:
                ans.append([prediction[4], 0]) # 0 means false positive

    ans = sorted(ans, key=lambda x: x[0], reverse=True)
    
    tp = np.array([x[1] for x in ans])

    cum_tp = np.cumsum(tp)
    cum_fp = np.cumsum(1 - tp)
    recalls = cum_tp / gt_cnt # recall = TP / (TP + FN) = TP / total ground truths
    precisions = cum_tp / (cum_tp + cum_fp) # precision = TP / (TP + FP) = TP / total predictions

    return recalls, precisions

recalls, precisions = compute_pr(ground_truth_boxes, predicted_boxes, iou_threshold=0.5)

print(recalls)
print(precisions)

ap_voc = pascal_voc_ap(recalls, precisions)
ap_coco = coco_ap(recalls, precisions)
ap_auc = area_under_curve(recalls, precisions)

print("AP50 (VOC 11-point interpolation):", ap_voc)
print("AP50 (COCO 101-point interpolation):", ap_coco)
print("AP50 (Area Under PR Curve):", ap_auc)

[[(82, 56, 20, 20), (60, 22, 20, 20), (40, 17, 20, 20), (17, 87, 20, 20), (56, 38, 20, 20), (78, 86, 20, 20), (12, 45, 20, 20), (15, 83, 20, 20), (35, 88, 20, 20), (18, 59, 20, 20)], [(66, 83, 20, 20), (67, 16, 20, 20), (78, 48, 20, 20), (75, 38, 20, 20), (74, 84, 20, 20), (47, 80, 20, 20), (15, 51, 20, 20), (43, 88, 20, 20), (61, 11, 20, 20), (89, 17, 20, 20)], [(79, 20, 20, 20), (11, 82, 20, 20), (25, 68, 20, 20), (66, 31, 20, 20), (53, 18, 20, 20), (41, 10, 20, 20), (39, 79, 20, 20), (69, 55, 20, 20), (68, 81, 20, 20), (14, 78, 20, 20)], [(56, 12, 20, 20), (18, 74, 20, 20), (76, 75, 20, 20), (48, 44, 20, 20), (25, 60, 20, 20), (80, 86, 20, 20), (21, 75, 20, 20), (15, 62, 20, 20), (46, 38, 20, 20), (49, 10, 20, 20)], [(66, 89, 20, 20), (73, 12, 20, 20), (82, 32, 20, 20), (64, 42, 20, 20), (66, 71, 20, 20), (17, 67, 20, 20), (73, 34, 20, 20), (83, 63, 20, 20), (73, 32, 20, 20), (11, 47, 20, 20)], [(53, 30, 20, 20), (16, 36, 20, 20), (49, 53, 20, 20), (36, 53, 20, 20), (46, 39, 20, 20)