In [1]:
import torch
import numpy as np
import cv2
import os
os.chdir("../")  # 注意这个cell,只能运行一次，不然工作路径会往上跳多级（../）

In [2]:
from model.fcos import FCOSDetector
from dataset.VOC_dataset import VOCDataset
from tqdm import tqdm

## 评估标准mAP

In [3]:
def collect_evaluate_data(data_loader):
    # 标签数据的容器
    gt_boxes = []
    gt_classes = []
    # 预测数据的容器
    pred_boxes = []
    pred_classes = []
    pred_scores = []

    # 往两类容器中填值
    for _, (img, boxes, classes) in enumerate(tqdm(data_loader)):
    # for img, boxes, classes in data_loader:
        with torch.no_grad():
            out = model(img.cuda())
            pred_boxes.append(out[2][0].cpu().numpy())
            pred_classes.append(out[1][0].cpu().numpy())
            pred_scores.append(out[0][0].cpu().numpy())
        gt_boxes.append(boxes[0].numpy())
        gt_classes.append(classes[0].numpy())

    return gt_boxes, gt_classes, pred_boxes, pred_classes, pred_scores

In [4]:
def sort_by_score(pred_boxes, pred_labels, pred_scores):
    score_seq = [(-score).argsort() for index, score in enumerate(pred_scores)]
    pred_boxes = [sample_boxes[mask] for sample_boxes, mask in zip(pred_boxes, score_seq)]
    pred_labels = [sample_boxes[mask] for sample_boxes, mask in zip(pred_labels, score_seq)]
    pred_scores = [sample_boxes[mask] for sample_boxes, mask in zip(pred_scores, score_seq)]
    return pred_boxes, pred_labels, pred_scores

In [5]:
def iou_2d(cubes_a, cubes_b):
    """
    numpy 计算IoU
    :param cubes_a: [N,(x1,y1,x2,y2)]
    :param cubes_b: [M,(x1,y1,x2,y2)]
    :return:  IoU [N,M]
    """
    # expands dim
    cubes_a = np.expand_dims(cubes_a, axis=1)  # [N,1,4]
    cubes_b = np.expand_dims(cubes_b, axis=0)  # [1,M,4]
    overlap = np.maximum(0.0,
                         np.minimum(cubes_a[..., 2:], cubes_b[..., 2:]) -
                         np.maximum(cubes_a[..., :2], cubes_b[..., :2]))  # [N,M,(w,h)]

    # overlap
    overlap = np.prod(overlap, axis=-1)  # [N,M]

    # compute area
    area_a = np.prod(cubes_a[..., 2:] - cubes_a[..., :2], axis=-1)
    area_b = np.prod(cubes_b[..., 2:] - cubes_b[..., :2], axis=-1)

    # compute iou
    iou = overlap / (area_a + area_b - overlap)
    return iou

In [6]:
def _compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves.
    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
    # Arguments
        recall:    The recall curve (list).
        precision: The precision curve (list).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """
    # correct AP calculation
    # first append sentinel values at the end
    mrec = np.concatenate(([0.], recall, [1.]))
    mpre = np.concatenate(([0.], precision, [0.]))

    # compute the precision envelope
    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    # to calculate area under PR curve, look for points
    # where X axis (recall) changes value
    i = np.where(mrec[1:] != mrec[:-1])[0]

    # and sum (\Delta recall) * prec
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap

In [7]:
def eval_ap_2d(gt_boxes, gt_labels, pred_boxes, pred_labels, pred_scores, iou_thread, num_cls):
    """
    :param gt_boxes: list of 2d array,shape[(a,(x1,y1,x2,y2)),(b,(x1,y1,x2,y2))...]
    :param gt_labels: list of 1d array,shape[(a),(b)...],value is sparse label index
    :param pred_boxes: list of 2d array, shape[(m,(x1,y1,x2,y2)),(n,(x1,y1,x2,y2))...]
    :param pred_labels: list of 1d array,shape[(m),(n)...],value is sparse label index
    :param pred_scores: list of 1d array,shape[(m),(n)...]
    :param iou_thread: eg. 0.5
    :param num_cls: eg. 4, total number of class including background which is equal to 0
    :return: a dict containing average precision for each cls
    """
    all_ap = {}
    for label in range(num_cls)[1:]:
        # get samples with specific label
        true_label_loc = [sample_labels == label for sample_labels in gt_labels]
        gt_single_cls = [sample_boxes[mask] for sample_boxes, mask in zip(gt_boxes, true_label_loc)]

        pred_label_loc = [sample_labels == label for sample_labels in pred_labels]
        bbox_single_cls = [sample_boxes[mask] for sample_boxes, mask in zip(pred_boxes, pred_label_loc)]
        scores_single_cls = [sample_scores[mask] for sample_scores, mask in zip(pred_scores, pred_label_loc)]

        fp = np.zeros((0,))
        tp = np.zeros((0,))
        scores = np.zeros((0,))
        total_gts = 0
        # loop for each sample
        for sample_gts, sample_pred_box, sample_scores in zip(gt_single_cls, bbox_single_cls, scores_single_cls):
            total_gts = total_gts + len(sample_gts)
            assigned_gt = []  # one gt can only be assigned to one predicted bbox
            # loop for each predicted bbox
            for index in range(len(sample_pred_box)):
                scores = np.append(scores, sample_scores[index])
                if len(sample_gts) == 0:  # if no gts found for the predicted bbox, assign the bbox to fp
                    fp = np.append(fp, 1)
                    tp = np.append(tp, 0)
                    continue
                pred_box = np.expand_dims(sample_pred_box[index], axis=0)
                iou = iou_2d(sample_gts, pred_box)
                gt_for_box = np.argmax(iou, axis=0)
                max_overlap = iou[gt_for_box, 0]
                if max_overlap >= iou_thread and gt_for_box not in assigned_gt:
                    fp = np.append(fp, 0)
                    tp = np.append(tp, 1)
                    assigned_gt.append(gt_for_box)
                else:
                    fp = np.append(fp, 1)
                    tp = np.append(tp, 0)
        # sort by score
        indices = np.argsort(-scores)
        fp = fp[indices]
        tp = tp[indices]
        # compute cumulative false positives and true positives
        fp = np.cumsum(fp)
        tp = np.cumsum(tp)
        # compute recall and precision
        recall = tp / total_gts
        precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
        ap = _compute_ap(recall, precision)
        all_ap[label] = ap
        # print(recall, precision)
    return all_ap

## 准备数据和模型

In [8]:
eval_dataset = VOCDataset(root_dir='./notebooks/dataset/VOCdevkit/VOC2012', resize_size=[800, 1333],
                          split='trainval_demoData', use_difficult=False, is_train=False, augment=None)
print("INFO===>eval dataset has %d imgs" % len(eval_dataset))
eval_loader = torch.utils.data.DataLoader(eval_dataset, batch_size=1, shuffle=False,
                                          collate_fn=eval_dataset.collate_fn)

INFO=====>voc dataset init finished  ! !
INFO===>eval dataset has 10 imgs


In [9]:
model = FCOSDetector(mode="inference")
model = torch.nn.DataParallel(model) 
model.load_state_dict(torch.load("./checkpoint/voc_77.8.pth", map_location=torch.device('cpu')))
model = model.cuda().eval()
print("===>success loading model")

INFO===>success frozen BN
INFO===>success frozen backbone stage1
===>success loading model


## 开始评估

In [12]:
# 收集数据
gt_boxes, gt_classes, pred_boxes, pred_classes, pred_scores = collect_evaluate_data(eval_loader)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:07<00:00,  1.42it/s]


In [17]:
gt_boxes, gt_classes

([array([[ 70.4     ,  21.333334, 953.60004 , 622.93335 ]], dtype=float32),
  array([[ 108.10811 ,   24.024025, 1198.7988  ,  797.5976  ],
         [ 146.54654 ,  454.05405 ,  196.997   ,  581.3814  ]],
        dtype=float32),
  array([[  0.     , 488.53336, 910.9334 , 622.93335]], dtype=float32),
  array([[ 94.117645, 155.65611 , 850.6787  , 758.37103 ],
         [284.1629  ,  77.82806 , 521.26697 , 300.45248 ]], dtype=float32),
  array([[ 460.80002,  341.33334,  625.0667 ,  469.33334],
         [ 989.8667 ,  354.13336, 1064.5334 ,  462.93335]], dtype=float32),
  array([[  27.733335,  313.6     , 1011.2001  ,  612.2667  ]],
        dtype=float32),
  array([[ 12.0120125,   0.       , 751.95197  , 627.02704  ],
         [ 93.693695 , 230.63063  , 288.2883   , 984.985    ],
         [326.7267   ,  84.08408  , 403.6036   , 259.45947  ],
         [430.03003  ,  84.08408  , 516.51654  , 247.44745  ],
         [228.22823  ,  91.29129  , 293.09308  , 245.04504  ]],
        dtype=float32),
  a

In [27]:
pred_boxes[0].shape

(238, 4)

In [28]:
pred_classes[0].shape

(238,)

In [29]:
pred_scores[0].shape

(238,)

In [13]:
# 调整预测数据
pred_boxes, pred_classes, pred_scores = sort_by_score(pred_boxes, pred_classes, pred_scores)

In [14]:
# 拿到数据后开始计算AP
all_AP = eval_ap_2d(gt_boxes, gt_classes, pred_boxes, pred_classes, pred_scores, 
                    0.5, len(eval_dataset.CLASSES_NAME))



Your code is trying to "divide by zero" or "divide by NaN". If you are aware of that and don't want it to bother you, then you can try:
>np.seterr(divide='ignore', invalid='ignore')

In [15]:
# 计算每一类的AP
print("all classes AP=====>\n")
for key, value in all_AP.items():
    print('ap for {} is {}'.format(eval_dataset.id2name[int(key)], value))
    
# 计算mAP    
mAP = 0.
for class_id, class_mAP in all_AP.items():
    mAP += float(class_mAP)
    
mAP /= (len(eval_dataset.CLASSES_NAME) - 1)
print("mAP=====>%.3f\n" % mAP)

all classes AP=====>

ap for aeroplane is 1.0
ap for bicycle is nan
ap for bird is nan
ap for boat is 1.0
ap for bottle is 1.0
ap for bus is nan
ap for car is 1.0
ap for cat is nan
ap for chair is nan
ap for cow is 1.0
ap for diningtable is nan
ap for dog is 1.0
ap for horse is 1.0
ap for motorbike is nan
ap for person is 0.8472222222222223
ap for pottedplant is nan
ap for sheep is nan
ap for sofa is nan
ap for train is 1.0
ap for tvmonitor is 1.0
mAP=====>nan



In [10]:
# voc2007 test
eval_dataset = VOCDataset(root_dir='../datasets/VOCdevkit/VOC2007', resize_size=[800, 1333],
                          split='test', use_difficult=False, is_train=False, augment=None)
print("INFO===>eval dataset has %d imgs" % len(eval_dataset))
eval_loader = torch.utils.data.DataLoader(eval_dataset, batch_size=1, shuffle=False,
                                          collate_fn=eval_dataset.collate_fn)

model = FCOSDetector(mode="inference")
model = torch.nn.DataParallel(model) 
model.load_state_dict(torch.load("./checkpoint/voc_77.8.pth", map_location=torch.device('cpu')))
model = model.cuda().eval()
print("===>success loading model")


# 标签数据的容器
gt_boxes = []
gt_classes = []

# 预测数据的容器
pred_boxes = []
pred_classes = []
pred_scores = []

# 往两类容器中填值
num = 0
for img, boxes, classes in eval_loader:
    with torch.no_grad():
        out = model(img.cuda())
        pred_boxes.append(out[2][0].cpu().numpy())
        pred_classes.append(out[1][0].cpu().numpy())
        pred_scores.append(out[0][0].cpu().numpy())
    gt_boxes.append(boxes[0].numpy())
    gt_classes.append(classes[0].numpy())
    num += 1
    print(num, end='\r')
    
# 拿到数据后开始计算AP
pred_boxes, pred_classes, pred_scores = sort_by_score(pred_boxes, pred_classes, pred_scores)

all_AP = eval_ap_2d(gt_boxes, gt_classes, pred_boxes, pred_classes, pred_scores, 0.5,
                    len(eval_dataset.CLASSES_NAME))


# 计算每一类的AP
print("all classes AP=====>\n")
for key, value in all_AP.items():
    print('ap for {} is {}'.format(eval_dataset.id2name[int(key)], value))
    
# 计算mAP    
mAP = 0.
for class_id, class_mAP in all_AP.items():
    mAP += float(class_mAP)
    
mAP /= (len(eval_dataset.CLASSES_NAME) - 1)
print("mAP=====>%.3f\n" % mAP)

all classes AP=====>

ap for aeroplane is 0.8467718402495026
ap for bicycle is 0.8574527632509155
ap for bird is 0.8424980411647369
ap for boat is 0.6914078119084456
ap for bottle is 0.7095268316636022
ap for bus is 0.8592892408461792
ap for car is 0.902261583538996
ap for cat is 0.923486914964034
ap for chair is 0.5606274429578793
ap for cow is 0.8475735368616513
ap for diningtable is 0.662367861880638
ap for dog is 0.8932658016045054
ap for horse is 0.848708053056753
ap for motorbike is 0.8132639327493589
ap for person is 0.8577320539239368
ap for pottedplant is 0.4828017808821626
ap for sheep is 0.8291090740890208
ap for sofa is 0.7029149826989491
ap for train is 0.8702907780568689
ap for tvmonitor is 0.8216938510961964
mAP=====>0.791

