In [2]:
import numpy as np
import torch
from collections import Counter

In [3]:
def IoU(boxes_preds, boxes_labels, box_format="midpoint"):
    # boxes_preds - N x 4, where N is # boxes
    if box_format == 'midpoint':
        # [x,y,w,h]
        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2 # N x 1
        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
        box1_x2 = boxes_preds[..., 2:3] + boxes_preds[..., 2:3] / 2
        box1_y2 = boxes_preds[..., 3:4] + boxes_preds[..., 3:4] / 2
        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
        box2_x2 = boxes_labels[..., 2:3] + boxes_labels[..., 2:3] / 2
        box2_y2 = boxes_labels[..., 3:4] + boxes_labels[..., 3:4] / 2
        
    if(box_format == 'corners'):
        # [x1,y1,x2,y2]
        box1_x1 = boxes_preds[..., 0:1] # N x 1
        box1_y1 = boxes_preds[..., 1:2]
        box1_x2 = boxes_preds[..., 2:3]
        box1_y2 = boxes_preds[..., 3:4]
        box2_x1 = boxes_labels[..., 0:1]
        box2_y1 = boxes_labels[..., 1:2]
        box2_x2 = boxes_labels[..., 2:3]
        box2_y2 = boxes_labels[..., 3:4]
        
    
    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.min(box1_x2, box2_x2)
    y2 = torch.min(box1_y2, box2_y2)
    
    
    # .clamp(0) is for the case when they DO NOT intersect. clamp(0) means set to 0 if it's less than 0
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
    
    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
    union = box1_area + box2_area - intersection
    
    print("intersection coordinates: [",x1.item(),y1.item(),x2.item(),y2.item(),"]")
    print("intersection:",intersection.item())
    print("union:",union.item())
    
    return (intersection / (union + 1e-6)).item()

In [7]:
def mAP(pred_boxses, true_boxes, iou_threshold=0.49, box_format="corners", num_classes=20):
    
    # pred_boxes (list): [[train_idx, class_pred, prob_score, x1, y1, x2, y2], [...], ...]
    average_precisions = []
    epsilon = 1e-6
    
    for c in range(num_classes):
        detections = []
        ground_truths = []
        
        for detection in pred_boxes:
            if detection[1] == c:
                detections.append(detection)
        
        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(true_box)
        
        # ex)
        # img 0 has 3 bboxes
        # img 1 has 5 bboxes
        # As a result: amount_bboxes ={0:3, 1:5}
        amount_bboxes = Counter([gt[0] for gt in ground_truths])
        
        for key, val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)
            
        # amount_boxes = {0:torch.tesnor([0,0,0]), 1:torch.tensor([0,0,0,0,0])}
        
        detections.sort(key=lambda x: x[2], reverse=True)
        TP = torch.zeros((len(detections)))
        FP = torch.zeros((len(detections)))
        total_true_bboxes = len(ground_truths)
        
        for detection_idx, detection in enumerate(detections):
            ground_truth_img = [
                bbox for bbox in ground_truths if bbox[0] == detection[0]
            ]
            
            num_gts = len(ground_truth_img) # number of target bounding boxes in this image
            
            best_iou = 0
            
            for idx, gt in enumerate(ground_truth_img):
                iou = IoU(torch.tensor(detection[3:]), torch.tensor(gt[3:]), box_format=box_format)
                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = idx
            
            # Now we have a single bbox for particular class in a partiualr image
            
            if best_iou > iou_threshold:
                if amount_bboxes[detection[0]][best_gt_idx] == 0: # This target bounding box has not yet been visited
                    TP[detection_idx] = 1
                    amount_bboxes[detection[0]][best_gt_idx] = 1 # Visited
                else: # If already visited
                    FP[detection_idx] = 1
            else:
                FP[detection_idx] = 1
                
        # [1, 1, 0, 1, 0] -> [1,2,2,3,3]
        TP_cumsum = torch.cumsum(TP, dim = 0) # cumsum -> prefix sum
        FP_cumsum = torch.cumsum(FP, dim = 0)
        recalls = TP_cumsum / (total_true_bboxes + epsilon)
        precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
        
        precisions = torch.cat((torch.tensor([1]), precisions)) # we need to have this for numerical integration
        recalls = torch.cat((torch.tensor([0]), recalls))
        
        average_precisions.append(torch.trapz(precision, recalls)) # trapz takes y, x
        
    return sum(average_precisions) / len(average_precisions) 
            
                    