# mAP Implementation with details
- We will implement Mean Average Precision (mAP) with pretrained YOLOv3 model (which publicly available on pytorch model)

In [36]:
import torch
import numpy as np

# output = torch.zeros(2, 30, 7, 7)
output = torch.unsqueeze(torch.unsqueeze(torch.from_numpy(np.array([
    # x, y, w, h, c, CIDs
    0.5, 0.5, 1.0, 1.0, 1.0,
    0.5, 0.5, 1.0, 1.0, 0.1,
    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0,
])).repeat(2, 1), 2), 3).repeat(1, 1, 7, 7).float()

# label = torch.zeros(2, 25, 7, 7)
label = torch.unsqueeze(torch.unsqueeze(torch.from_numpy(np.array([
    # x, y, w, h, c, CIDs
    0.6, 0.4, 0.9, 0.9, 1.0,
    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
    0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0,
])).repeat(2, 1), 2), 3).repeat(1, 1, 7, 7).float()

print("output", output.shape)
print("label", label.shape)

output torch.Size([2, 30, 7, 7])
label torch.Size([2, 25, 7, 7])


In [42]:
from torch import Tensor
from torch.nn import Module
import torchvision

class MeanAvgPrecision(Module):
    def __init__(self):
        super().__init__()
        self.confidence_threshold = 0.3
        
    def forward(self, output, label):
        classes = 20
        cells = output.shape[2]  # 2, 3 indicates cell count
        bboxes = (output.shape[1] - classes) // 5
        assert((output.shape[1] - classes) % 5 == 0)
        
        # do NMS over bboxes
        for b in range(output.shape[0]):
            # calculate per-class mAP
            for c in range(classes):
                # Organize bboxes for NMS algorithm
                bbox_cell_indicies = []
                bbox_coordinates = []
                bbox_scores = []
                bbox_classes = []
                for cell_idx_y in range(cells):
                    for cell_idx_x in range(cells):
                        for bbox_idx in range(bboxes):
                            current_predictor = output[b, 5 * (bbox_idx):5 * (bbox_idx + 1), cell_idx_y, cell_idx_x]
                            (cell_pos_x, cell_pos_y, width, height, confidence) = torch.sigmoid(current_predictor)

                            class_prob = torch.sigmoid(output[b, -20:, cell_idx_y, cell_idx_x])
                            class_id = torch.argmax(class_prob, dim=0)

                            if confidence < self.confidence_threshold:
                                continue
                            
                            if class_id != c:
                                continue

                            obj_center_x = (cell_idx_x + cell_pos_x) / cells
                            obj_center_y = (cell_idx_y + cell_pos_y) / cells
                            oxmin, oxmax = obj_center_x - (width / 2), obj_center_x + (width / 2)
                            oymin, oymax = obj_center_y - (height / 2), obj_center_y + (height / 2)

                            bbox_cell_indicies.append([cell_idx_x, cell_idx_y])
                            bbox_coordinates.append(torch.stack([oxmin, oymin, oxmax, oymax]))
                            bbox_scores.append(class_prob[class_id] * confidence)
                            bbox_classes.append(class_id)
                            
                # Skip empty class id (would result on NO CURVE)
                if len(bbox_cell_indicies) == 0:
                    continue

                bbox_cell_indicies = torch.from_numpy(np.array(bbox_cell_indicies))
                bbox_coordinates = torch.stack(bbox_coordinates, dim=0).float()
                bbox_scores = torch.stack(bbox_scores, dim=0).float()
                bbox_classes = torch.stack(bbox_classes, dim=0)

                # print(bbox_coordinates.shape, bbox_scores.shape, bbox_classes.shape)
                nms_indicies = torchvision.ops.nms(boxes=bbox_coordinates, scores=bbox_scores, iou_threshold=0.8)
                # print(nms_indicies.shape)

                bbox_filtered_cell_indicies = bbox_cell_indicies.index_select(0, nms_indicies)
                bbox_filtered_coordinates = bbox_coordinates.index_select(0, nms_indicies)
                bbox_filtered_scores = bbox_scores.index_select(0, nms_indicies)
                bbox_filtered_classes = bbox_classes.index_select(0, nms_indicies).int()
                tp_fp_metric = self.calculate_iou_from_gt(bbox_filtered_coordinates, bbox_filtered_cell_indicies, label[b, :5, :, :])

                # print(bbox_filtered_cell_indicies.shape, bbox_filtered_coordinates.shape, bbox_filtered_scores.shape, bbox_filtered_classes.shape)
                # torch.Size([49, 2]) torch.Size([49, 4]) torch.Size([49]) torch.Size([49])
                bbox_merged_result = torch.cat([
                    bbox_filtered_cell_indicies,
                    bbox_filtered_coordinates,
                    torch.unsqueeze(bbox_filtered_scores, 1),
                    torch.unsqueeze(bbox_filtered_classes, 1),
                ], dim=1)  # [49, 6] -> 6 [cell_idx_x(int), celL_idx_y(int), xmin, ymin, xmax, ymax, classprob, classid(int)]

                condfidence_sort_indicies = torch.argsort(bbox_filtered_scores, 0)
        
        return 0
    
    def calculate_iou_from_gt(self, out_bboxes, out_cell_indicies, label_bboxes):
        bboxes = out_bboxes.shape[0]
        cells = label_bboxes.shape[2]
        
        for b in range(bboxes):
            cix, ciy = out_cell_indicies[b]
            lcx, lcy, lw, lh, _ = label_bboxes[:, cix, ciy]
            lxc, lyc = (lcx + cix) / cells, (lcy + ciy) / cells
            oxmin, oxmax, oymin, oymax = out_bboxes[b]
            print(ocx, ocy, ow, oh)
        return None
    
#     @staticmethod
#     def xywhc_to_xyminmaxc(xywhc, index_table):  # using index_table with args minimizing unnecessary overhead
#         # xywhc -> [B, 5, C, C]
#         cell_size = xywhc.shape[2]
#         cxcy = (xywhc[:, :2, :, :] + index_table) / cell_size
        
#         xmin = torch.maximum(cxcy[:, 0, :, :] - (xywhc[:, 2, :, :] / 2), torch.Tensor([0]))
#         xmax = torch.minimum(cxcy[:, 0, :, :] + (xywhc[:, 2, :, :] / 2), torch.Tensor([1]))
#         ymin = torch.maximum(cxcy[:, 1, :, :] - (xywhc[:, 3, :, :] / 2), torch.Tensor([0]))
#         ymax = torch.minimum(cxcy[:, 1, :, :] + (xywhc[:, 3, :, :] / 2), torch.Tensor([1]))
#         return torch.stack([xmin, ymin, xmax, ymax, xywhc[:, 4, :, :]], dim=1).view(xywhc.shape[0], 5, -1)
        
    
#     @staticmethod
#     def generate_cell_index_table_order_xy(output):
#         index_map_x = torch.arange(0, 7, device=output.device).repeat(7)
#         index_map_y = torch.repeat_interleave(torch.arange(0, 7, device=output.device), 7)
#         index_map = torch.unsqueeze(torch.stack([index_map_x, index_map_y], dim=0).view(2, 7, 7), 0)
#         return index_map
#         # index_map -> [1, 2, 7, 7]
        
#     @staticmethod
#     def get_iou_between(xywhcp1: Tensor, xywhcp2: Tensor) -> Tensor:
        
#         return None
    
#     @staticmethod
#     def get_iou_xywh(input_xywh: Tensor, label_xywh: Tensor) -> Tensor:
#         index_map_x = torch.arange(0, 7, device=input_xywh.device).repeat(7)
#         index_map_y = torch.repeat_interleave(torch.arange(0, 7, device=input_xywh.device), 7)
#         index_map = torch.unsqueeze(torch.stack([index_map_y, index_map_x], dim=0).view(2, 7, 7), 0)
#         input_xy_global = (input_xywh[:, :2, :, :] + index_map) / 7
#         input_width_half, input_height_half = (input_xywh[:, 2, :, :] / 2), (input_xywh[:, 3, :, :] / 2)
#         input_xmin = input_xy_global[:, 0, :, :] - input_width_half  # x_center - width / 2
#         input_xmax = input_xy_global[:, 0, :, :] + input_width_half
#         input_ymin = input_xy_global[:, 1, :, :] - input_height_half
#         input_ymax = input_xy_global[:, 1, :, :] + input_height_half

#         label_xy_global = (label_xywh[:, :2, :, :] + index_map) / 7
#         label_width_half, label_height_half = (label_xywh[:, 2, :, :] / 2), (label_xywh[:, 3, :, :] / 2)
#         label_xmin = label_xy_global[:, 0, :, :] - label_width_half  # x_center - width / 2
#         label_xmax = label_xy_global[:, 0, :, :] + label_width_half
#         label_ymin = label_xy_global[:, 1, :, :] - label_height_half
#         label_ymax = label_xy_global[:, 1, :, :] + label_height_half

#         input_volume = input_xywh[:, 2, :, :] * input_xywh[:, 3, :, :]
#         label_volume = label_xywh[:, 2, :, :] * label_xywh[:, 3, :, :]
#         intersect_width = torch.minimum(input_xmax, label_xmax) - torch.maximum(input_xmin, label_xmin)
#         intersect_height = torch.minimum(input_ymax, label_ymax) - torch.maximum(input_ymin, label_ymin)
#         intersect_volume = intersect_width * intersect_height
#         union_volume = input_volume + label_volume - intersect_volume

#         return intersect_volume / union_volume
        
mean_avg_precision = MeanAvgPrecision()
print("mean_avg_precision(output, label):", mean_avg_precision(output, label))

tensor(-0.2766) tensor(-0.2766) tensor(0.4545) tensor(0.4545)
tensor(-0.2766) tensor(-0.2766) tensor(0.4545) tensor(0.4545)
tensor(-0.1337) tensor(-0.2766) tensor(0.5973) tensor(0.4545)
tensor(-0.1337) tensor(-0.2766) tensor(0.5973) tensor(0.4545)
tensor(0.0091) tensor(-0.2766) tensor(0.7402) tensor(0.4545)
tensor(0.0091) tensor(-0.2766) tensor(0.7402) tensor(0.4545)
tensor(0.1520) tensor(-0.2766) tensor(0.8830) tensor(0.4545)
tensor(0.1520) tensor(-0.2766) tensor(0.8830) tensor(0.4545)
tensor(0.2948) tensor(-0.2766) tensor(1.0259) tensor(0.4545)
tensor(0.2948) tensor(-0.2766) tensor(1.0259) tensor(0.4545)
tensor(0.4377) tensor(-0.2766) tensor(1.1687) tensor(0.4545)
tensor(0.4377) tensor(-0.2766) tensor(1.1687) tensor(0.4545)
tensor(0.5805) tensor(-0.2766) tensor(1.3116) tensor(0.4545)
tensor(0.5805) tensor(-0.2766) tensor(1.3116) tensor(0.4545)
tensor(-0.2766) tensor(-0.1337) tensor(0.4545) tensor(0.5973)
tensor(-0.2766) tensor(-0.1337) tensor(0.4545) tensor(0.5973)
tensor(-0.1337) te

tensor(-0.2766) tensor(-0.2766) tensor(0.4545) tensor(0.4545)
tensor(-0.2766) tensor(-0.2766) tensor(0.4545) tensor(0.4545)
tensor(-0.1337) tensor(-0.2766) tensor(0.5973) tensor(0.4545)
tensor(-0.1337) tensor(-0.2766) tensor(0.5973) tensor(0.4545)
tensor(0.0091) tensor(-0.2766) tensor(0.7402) tensor(0.4545)
tensor(0.0091) tensor(-0.2766) tensor(0.7402) tensor(0.4545)
tensor(0.1520) tensor(-0.2766) tensor(0.8830) tensor(0.4545)
tensor(0.1520) tensor(-0.2766) tensor(0.8830) tensor(0.4545)
tensor(0.2948) tensor(-0.2766) tensor(1.0259) tensor(0.4545)
tensor(0.2948) tensor(-0.2766) tensor(1.0259) tensor(0.4545)
tensor(0.4377) tensor(-0.2766) tensor(1.1687) tensor(0.4545)
tensor(0.4377) tensor(-0.2766) tensor(1.1687) tensor(0.4545)
tensor(0.5805) tensor(-0.2766) tensor(1.3116) tensor(0.4545)
tensor(0.5805) tensor(-0.2766) tensor(1.3116) tensor(0.4545)
tensor(-0.2766) tensor(-0.1337) tensor(0.4545) tensor(0.5973)
tensor(-0.2766) tensor(-0.1337) tensor(0.4545) tensor(0.5973)
tensor(-0.1337) te

mean_avg_precision(output, label): 0


In [4]:
torch.repeat_interleave(torch.Tensor([[1], [2], [3]]), 4, 1)

tensor([[1., 1., 1., 1.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.]])