# Inference mmdetection model from other format

## 1. Inference mmdetection pytorch model

In [1]:
from mmdet.apis import init_detector, inference_detector, async_inference_detector
from mmdet.utils import register_all_modules
import torch
import torchvision.transforms as transforms
from PIL import Image
import mmcv
import numpy as np
import os
import glob
import xmltodict
from mmengine.fileio import list_from_file

# 指定模型的配置文件和 checkpoint 文件路径
config_file = 'configs/yolo/yolov3_mobilenetv2_8xb24-ms-416-300e_coco.py'
checkpoint_file = 'work_dirs/yolov3_mobilenetv2_pretrained/best_pascal_voc_mAP_epoch_27.pth'
class_dict = {
    'aeroplane': 0,
    'bicycle': 1,
    'bird': 2,
    'boat': 3,
    'bottle': 4,
    'bus': 5,
    'car': 6,
    'cat': 7,
    'chair': 8,
    'cow': 9,
    'diningtable': 10,
    'dog': 11,
    'horse': 12,
    'motorbike': 13,
    'person': 14,
    'pottedplant': 15,
    'sheep': 16,
    'sofa': 17,
    'train': 18,
    'tvmonitor': 19
}

#Register all modules in mmdet into the registries
register_all_modules()
# 若检测到有GPU则使用GPU
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

# 根据配置文件和 checkpoint 文件构建模型
model = init_detector(config_file, checkpoint_file, device=device)
model.eval()

# 指定要遍历的文件夹路径
img_ids = list_from_file("../data/VOCdevkit/VOC2007/ImageSets/Main/test.txt")

det_results, annotations = [], []
# 遍历所有的jpg文件
for img_id in img_ids:

    img_path = os.path.join('../data/VOCdevkit/VOC2007/JPEGImages/', img_id + '.jpg')
    # 获取预测结果
    img = mmcv.imread(img_path, channel_order='rgb')
    output2 = inference_detector(model, img)

    pred_bboxes = output2.pred_instances.bboxes.cpu().numpy()
    pred_scores = output2.pred_instances.scores.cpu().numpy()
    pred_labels = output2.pred_instances.labels.cpu().numpy()
    dets = []
    for label in range(len(class_dict)):
        index = np.where(pred_labels == label)[0]
        pred_bbox_scores = np.hstack(
            [pred_bboxes[index], pred_scores[index].reshape((-1, 1))])
        dets.append(pred_bbox_scores)
    
    det_results.append(dets)
    
    # 解析XML文件
    ann_path = os.path.join('../data/VOCdevkit/VOC2007/Annotations/', img_id + '.xml')
    with open(ann_path) as f:
        xml_data = xmltodict.parse(f.read())
    bboxes, labels = [], []
    bboxes_ignore, labels_ignore = [], []
    obj = xml_data['annotation']['object']
    if type(obj) == list:
        for i in range(len(obj)):
            if obj[i]['difficult'] == '0':
                bboxes.append([int(obj[i]['bndbox']['xmin'])-1, int(obj[i]['bndbox']['ymin'])-1, 
                            int(obj[i]['bndbox']['xmax'])-1, int(obj[i]['bndbox']['ymax'])-1])
                labels.append(class_dict[obj[i]['name']])
            else:
                bboxes_ignore.append([int(obj[i]['bndbox']['xmin'])-1, int(obj[i]['bndbox']['ymin'])-1, 
                            int(obj[i]['bndbox']['xmax'])-1, int(obj[i]['bndbox']['ymax'])-1])
                labels_ignore.append(class_dict[obj[i]['name']])
    else:
        if obj['difficult'] == '0':
            bboxes.append([int(obj['bndbox']['xmin'])-1, int(obj['bndbox']['ymin'])-1, 
                        int(obj['bndbox']['xmax'])-1, int(obj['bndbox']['ymax'])-1])
            labels.append(class_dict[obj['name']])
        else:
            bboxes_ignore.append([int(obj['bndbox']['xmin'])-1, int(obj['bndbox']['ymin'])-1, 
                        int(obj['bndbox']['xmax'])-1, int(obj['bndbox']['ymax'])-1])
            labels_ignore.append(class_dict[obj['name']])

    bboxes = torch.tensor(bboxes).cpu().numpy().astype(np.float32)
    labels = torch.tensor(labels).cpu().numpy()
    bboxes_ignore = torch.tensor(bboxes_ignore).cpu().numpy().astype(np.float32)
    labels_ignore = torch.tensor(labels_ignore).cpu().numpy()

    ann = {'bboxes': bboxes,
           'labels': labels,
           'bboxes_ignore': torch.empty(size=(0,4)).cpu().numpy() if len(bboxes_ignore) == 0 else bboxes_ignore,
           'labels_ignore': torch.empty(dtype=torch.int64, size=(0,)).cpu().numpy() if len(labels_ignore) == 0 else labels_ignore}
    annotations.append(ann)

print(det_results)
print(annotations)


Loads checkpoint by local backend from path: work_dirs/yolov3_mobilenetv2_pretrained/best_pascal_voc_mAP_epoch_27.pth
<DetDataSample(

    META INFORMATION
    ori_shape: (500, 353)
    batch_input_shape: (224, 160)
    scale_factor: (0.4475920679886686, 0.448)
    img_id: 0
    img_shape: (224, 158)
    pad_shape: (224, 160)
    img_path: None

    DATA FIELDS
    gt_instances: <InstanceData(
        
            META INFORMATION
        
            DATA FIELDS
            bboxes: tensor([], size=(0, 4))
            labels: tensor([], dtype=torch.int64)
        ) at 0x7f7c64b7f700>
    pred_instances: <InstanceData(
        
            META INFORMATION
        
            DATA FIELDS
            bboxes: tensor([[  1.4124,   2.3510, 347.7869, 496.0396],
                        [ 63.7587, 245.1837, 171.3031, 365.1945],
                        [ 63.7587, 245.1837, 171.3031, 365.1945]])
            scores: tensor([0.9992, 0.6674, 0.3348])
            labels: tensor([14, 11,  7])
      

In [8]:
annotations[0]

{'bboxes': array([[ 47., 239., 194., 370.],
        [  7.,  11., 351., 497.]], dtype=float32),
 'labels': array([11, 14]),
 'bboxes_ignore': array([], shape=(0, 4), dtype=float32),
 'labels_ignore': array([], dtype=int64)}

## 2. Evaluation preds with ground truth

In [9]:
from mmdet.evaluation.functional.mean_ap import eval_map

mean_ap, eval_results = eval_map(det_results, annotations, eval_mode='11points', use_legacy_coordinate=True)



+-------+------+------+--------+-------+
| class | gts  | dets | recall | ap    |
+-------+------+------+--------+-------+
| 0     | 285  | 705  | 0.582  | 0.496 |
| 1     | 337  | 1842 | 0.748  | 0.613 |
| 2     | 459  | 720  | 0.497  | 0.399 |
| 3     | 263  | 1654 | 0.517  | 0.319 |
| 4     | 469  | 1205 | 0.260  | 0.208 |
| 5     | 213  | 712  | 0.732  | 0.591 |
| 6     | 1201 | 2840 | 0.649  | 0.566 |
| 7     | 358  | 773  | 0.696  | 0.587 |
| 8     | 756  | 3625 | 0.549  | 0.311 |
| 9     | 244  | 477  | 0.537  | 0.404 |
| 10    | 206  | 1486 | 0.718  | 0.491 |
| 11    | 489  | 1325 | 0.714  | 0.577 |
| 12    | 348  | 899  | 0.721  | 0.591 |
| 13    | 325  | 1460 | 0.806  | 0.690 |
| 14    | 4528 | 8503 | 0.624  | 0.533 |
| 15    | 480  | 1324 | 0.448  | 0.315 |
| 16    | 242  | 511  | 0.550  | 0.435 |
| 17    | 239  | 1156 | 0.770  | 0.521 |
| 18    | 282  | 750  | 0.716  | 0.638 |
| 19    | 308  | 1147 | 0.630  | 0.482 |
+-------+------+------+--------+-------+
| mAP   |      