# Inference mmdetection model from other format

## 1. Inference mmdetection pytorch model

In [1]:
from mmdet.apis import init_detector, inference_detector, async_inference_detector
from mmdet.utils import register_all_modules
import torch
import torchvision.transforms as transforms
from PIL import Image
import mmcv
import numpy as np
import os
import glob
import xmltodict
from mmengine.fileio import list_from_file

# 指定模型的配置文件和 checkpoint 文件路径
config_file = 'configs/yolo/yolov3_mobilenetv2_8xb24-ms-416-300e_coco.py'
checkpoint_file = 'work_dirs/yolov3_mobilenetv2_pretrained/best_pascal_voc_mAP_epoch_27.pth'
class_dict = {
    'aeroplane': 0,
    'bicycle': 1,
    'bird': 2,
    'boat': 3,
    'bottle': 4,
    'bus': 5,
    'car': 6,
    'cat': 7,
    'chair': 8,
    'cow': 9,
    'diningtable': 10,
    'dog': 11,
    'horse': 12,
    'motorbike': 13,
    'person': 14,
    'pottedplant': 15,
    'sheep': 16,
    'sofa': 17,
    'train': 18,
    'tvmonitor': 19
}

#Register all modules in mmdet into the registries
register_all_modules()
# 若检测到有GPU则使用GPU
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

# 根据配置文件和 checkpoint 文件构建模型
model = init_detector(config_file, checkpoint_file, device=device)
model.eval()

# 指定要遍历的文件夹路径
img_ids = list_from_file("../data/VOCdevkit/VOC2007/ImageSets/Main/test.txt")

det_results, annotations = [], []
# 遍历所有的jpg文件
for img_id in img_ids:

    img_path = os.path.join('../data/VOCdevkit/VOC2007/JPEGImages/', img_id + '.jpg')
    # 获取预测结果
    img = mmcv.imread(img_path)
    output2 = inference_detector(model, img)

    pred_bboxes = output2.pred_instances.bboxes.cpu().numpy()
    pred_scores = output2.pred_instances.scores.cpu().numpy()
    pred_labels = output2.pred_instances.labels.cpu().numpy()
    dets = []
    for label in range(len(class_dict)):
        index = np.where(pred_labels == label)[0]
        pred_bbox_scores = np.hstack(
            [pred_bboxes[index], pred_scores[index].reshape((-1, 1))])
        dets.append(pred_bbox_scores)
    
    det_results.append(dets)
    
    # 解析XML文件
    ann_path = os.path.join('../data/VOCdevkit/VOC2007/Annotations/', img_id + '.xml')
    with open(ann_path) as f:
        xml_data = xmltodict.parse(f.read())
    bboxes, labels = [], []
    bboxes_ignore, labels_ignore = [], []
    obj = xml_data['annotation']['object']
    if type(obj) == list:
        for i in range(len(obj)):
            if obj[i]['difficult'] == '0':
                bboxes.append([int(obj[i]['bndbox']['xmin'])-1, int(obj[i]['bndbox']['ymin'])-1, 
                            int(obj[i]['bndbox']['xmax'])-1, int(obj[i]['bndbox']['ymax'])-1])
                labels.append(class_dict[obj[i]['name']])
            else:
                bboxes_ignore.append([int(obj[i]['bndbox']['xmin'])-1, int(obj[i]['bndbox']['ymin'])-1, 
                            int(obj[i]['bndbox']['xmax'])-1, int(obj[i]['bndbox']['ymax'])-1])
                labels_ignore.append(class_dict[obj[i]['name']])
    else:
        if obj['difficult'] == '0':
            bboxes.append([int(obj['bndbox']['xmin'])-1, int(obj['bndbox']['ymin'])-1, 
                        int(obj['bndbox']['xmax'])-1, int(obj['bndbox']['ymax'])-1])
            labels.append(class_dict[obj['name']])
        else:
            bboxes_ignore.append([int(obj['bndbox']['xmin'])-1, int(obj['bndbox']['ymin'])-1, 
                        int(obj['bndbox']['xmax'])-1, int(obj['bndbox']['ymax'])-1])
            labels_ignore.append(class_dict[obj['name']])

    bboxes = torch.tensor(bboxes).cpu().numpy().astype(np.float32)
    labels = torch.tensor(labels).cpu().numpy()
    bboxes_ignore = torch.tensor(bboxes_ignore).cpu().numpy().astype(np.float32)
    labels_ignore = torch.tensor(labels_ignore).cpu().numpy()

    ann = {'bboxes': bboxes,
           'labels': labels,
           'bboxes_ignore': torch.empty(size=(0,4)).cpu().numpy() if len(bboxes_ignore) == 0 else bboxes_ignore,
           'labels_ignore': torch.empty(dtype=torch.int64, size=(0,)).cpu().numpy() if len(labels_ignore) == 0 else labels_ignore}
    annotations.append(ann)

print(det_results)
print(annotations)


Loads checkpoint by local backend from path: work_dirs/yolov3_mobilenetv2_pretrained/best_pascal_voc_mAP_epoch_27.pth
[[array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([[6.5432304e+01, 2.2208360e+02, 1.9658977e+02, 3.7054263e+02,
        2.4731791e-01]], dtype=float32), array([[4.8686481e+01, 1.7183488e+02, 2.1795456e+02, 3.6908371e+02,
        1.2959308e-03]], dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([[5.7897236e+01, 2.3479834e+02, 2.0540417e+02, 3.8921899e+02,
        2.0908432e-01]], dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([[ 12.824857  ,  -1.6313791 , 340.32996   , 516.36884   ,
          0.83057475]], dtype=float32), array([],

In [8]:
annotations[0]

{'bboxes': array([[ 47., 239., 194., 370.],
        [  7.,  11., 351., 497.]], dtype=float32),
 'labels': array([11, 14]),
 'bboxes_ignore': array([], shape=(0, 4), dtype=float32),
 'labels_ignore': array([], dtype=int64)}

## 2. Evaluation preds with ground truth

In [2]:
from mmdet.evaluation.functional.mean_ap import eval_map

mean_ap, eval_results = eval_map(det_results, annotations, eval_mode='11points', use_legacy_coordinate=True)



+-------+------+------+--------+-------+
| class | gts  | dets | recall | ap    |
+-------+------+------+--------+-------+
| 0     | 285  | 479  | 0.628  | 0.583 |
| 1     | 337  | 1403 | 0.810  | 0.696 |
| 2     | 459  | 652  | 0.573  | 0.490 |
| 3     | 263  | 996  | 0.574  | 0.439 |
| 4     | 469  | 1359 | 0.358  | 0.247 |
| 5     | 213  | 665  | 0.789  | 0.646 |
| 6     | 1201 | 2220 | 0.733  | 0.669 |
| 7     | 358  | 594  | 0.765  | 0.679 |
| 8     | 756  | 3621 | 0.589  | 0.365 |
| 9     | 244  | 737  | 0.762  | 0.524 |
| 10    | 206  | 1379 | 0.801  | 0.584 |
| 11    | 489  | 1138 | 0.802  | 0.672 |
| 12    | 348  | 1146 | 0.851  | 0.716 |
| 13    | 325  | 1123 | 0.834  | 0.709 |
| 14    | 4528 | 8963 | 0.700  | 0.586 |
| 15    | 480  | 1508 | 0.500  | 0.339 |
| 16    | 242  | 548  | 0.661  | 0.513 |
| 17    | 239  | 957  | 0.795  | 0.578 |
| 18    | 282  | 634  | 0.787  | 0.688 |
| 19    | 308  | 936  | 0.701  | 0.597 |
+-------+------+------+--------+-------+
| mAP   |      