<a href="https://colab.research.google.com/github/nicholaskuo/Mean_Average_Precision/blob/main/Mean_Average_Precision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from collections import Counter
import torchvision

**NOTE**: Usually mAP does multiiple IOU's (.5, .55, .6...,.95) as said in step 7, so we can just call mAP a few times with different iou_threshold and then average the values if needed :)

**mAP notes:**

Step 1: Get all bounding box predictions on our test set

Step 2: Sort by descending confidence score
*   Precision: (# detected ground truths) / (total # detected boxes) 
*   Recall: (# detected ground truths) / (total # ground truths)

Step 3: Calculate the Precision and Recall as we go through all output bounding boxes for a given class

Step 4: Plot the Precision-Recall graph (Recall on x axis, Precision on y axis)

Step 5: Calculate Area under curve to find average precision for given class

Step 6: Repeat process for all classes, take total average

Step 7: This mAP will be mAP@.5:.05:.95 (meaning we will calculate the IoU threshold at the different values of .5, .55, .6..., .95 then average all these values to find final result)





In [None]:
def mAP(
    predicted_boxes, gt_boxes, iou_threshold = .5, box_format = "corners", 
    num_classes = 21):
  '''
  inputs: 
    predicted_boxes (list): [[train_index, class_prediction, prob_score, x1, y1, x2, y2],[],...[]]
      train_index: index of image that the specific bbox comes from
      class_prediction: integer value representing class prediction
      prob_score: outputed objectiveness score for a bbox 
      x1,y1,x2,y2: (x1, y1) and (x2,y2) bbox coordinates
    gt_boxes (list): [[train_index, class_prediction, prob_score, x1, y1, x2, y2],[],...[]]
    iou_threshold
    num_classes: number of classes in dataset

  outputs:
    averageMeanPrecision: mean average precision 
  '''

  average_precisions = []
  epsilon = 1e-6

  #looping through each class
  for c in range(num_classes):
    # detections (list): predicted_boxes that are class c
    detections = []
    # ground_truths (list): gt_boxes that are class c
    ground_truths = []

    # looping through all predictions
    # detection[1] is the class prediction
    for detection in predicted_boxes:
      if detection[1] == c:
        detections.append(detection)
    
    # looping through all ground truth boxes
    for gt_box in gt_boxes:
      if gt_box[1] == c:
        ground_truths.append(gt_box)
    
    # use Counter to create a dictionary where key is image # and value
    # is the # of bboxes in the given image
    amount_bboxes = Counter([gt[0] for gt in ground_truths])

    # goal: keep track of the gt bboxes we have already "detected" with prior predicted bboxes
    # key: image #
    # value: tensor of 0's (size is equal to # of bboxes in the given image)
    for key, value in amount_bboxes.items():
      amount_bboxes[key] = torch.zeros(value)
    
    # sort over the probabiliity scores of the detections
    detections.sort(key = lambda x: x[2], reverse = True)

    true_Positives = torch.zeros(len(detections))
    false_Positives = torch.zeros(len(detections))
    total_gt_bboxes = len(ground_truths)

    # iterate through all detections in given class c
    for detection_index, detection in enumerate(detections):
      # detection[0] indicates image #
      # ground_truth_image: the gt bbox's that are in same image as detection
      ground_truth_image = [bbox for bbox in ground_truths if bbox[0] == detection[0]]

      # num_gt_boxes: number of ground truth boxes in given image
      num_gt_boxes = len(ground_truth_image)
      best_iou = 0
      best_gt_index = 0

      for index, gt in enumerate(ground_truth_image):
        iou = torchvision.ops.box_iou(torch.tensor(detection[3:]).unsqueeze(0), 
                                      torch.tensor(gt[3:]).unsqueeze(0))
        if iou > best_iou:
          best_iou = iou
          best_gt_index = index

      if best_iou > iou_threshold:
        # check if gt_bbox with best_iou was already covered by previous detection with higher confidence score
        # amount_bboxes[detection[0]][best_gt_index] == 0 if not discovered yet, 1 otherwise
        if amount_bboxes[detection[0]][best_gt_index] == 0:
          true_Positives[detection_index] = 1
          amount_bboxes[detection[0]][best_gt_index] == 1
        else:
          false_Positives[detection_index] = 1
      else:
        false_Positives[detection_index] = 1

    # tensor ex: [1, 0, 0, 1] -> [1, 1, 1, 2]
    true_pos_cumulative_sum = torch.cumsum(true_Positives, dim = 0)
    false_pos_cumulative_sum = torch.cumsum(false_Positives, dim = 0)
 
    # calculate recall and precision for given class
    recalls = true_pos_cumulative_sum / (total_gt_bboxes + epsilon)
    precisions = torch.divide(true_pos_cumulative_sum, (true_pos_cumulative_sum + false_pos_cumulative_sum + epsilon))
    
    # add 1 to precisions to start graph at (0,1) for integration
    precisions = torch.cat((torch.tensor([1]), precisions))
    recalls = torch.cat((torch.tensor([0]), recalls))

    # torch.trapz calculates area under a curve
    # torch.trapz(y, x, *, dim=-1) → Tensor
    average_precisions.append(torch.trapz(precisions, recalls))
  
  averageMeanPrecision = sum(average_precisions) / len(average_precisions)
  return averageMeanPrecision

Testing

In [None]:
# same bounding box, same class (should be 1)
numClass = 1
detected_bbox = [[0, 0, 1, 10, 10, 40, 40]]
gt_bbox = [[0, 0, 1, 10, 10, 40, 40]]
value = mAP(detected_bbox, gt_bbox, num_classes = numClass)
print(value)

# iou should be .5, which should mean detected_bbox is false neg
detected_bbox = [[0, 0, 1, 0, 0, 40, 40]]
gt_bbox = [[0, 0, 1, 0, 0, 40, 20]]
value = mAP(detected_bbox, gt_bbox, num_classes = numClass)
print(value)

# expected value should be .533 based on test info
detected_bbox = [[0, 0, .3, 60, 10, 75, 25], 
                 [0, 0, .6, 20, 35, 40, 45],
                 [0, 0, .7, 50, 35, 68, 50],
                 [1, 0, .5, 25, 5, 90, 50],
                 [2, 0, .2, 0, 10, 20, 25],
                 [2, 0, .8, 25, 10, 50, 23],
                 [2, 0, .9, 10, 28, 48, 95]]

gt_bbox = [[0, 0, 1, 20, 15, 40, 43],
           [0, 0, 1, 52, 35, 68, 50],
           [1, 0, 1, 30, 5, 85, 50],
           [2, 0, .9, 5, 28, 42, 91]]
value = mAP(detected_bbox, gt_bbox, num_classes = numClass)
print(value)

tensor(1.0000)
tensor(0.)
tensor(0.5333)
