# Inference mmdetection model from other format

## 1. Inference mmdetection pytorch model

In [3]:
from mmdet.apis import init_detector, inference_detector, async_inference_detector
from mmdet.utils import register_all_modules
import torch
import torchvision.transforms as transforms
from PIL import Image
import mmcv
import numpy as np
import os
import glob
import xmltodict

# 指定模型的配置文件和 checkpoint 文件路径
config_file = 'configs/yolo/yolov3_mobilenetv2_8xb24-ms-416-300e_coco.py'
checkpoint_file = 'work_dirs/yolov3_mobilenetv2_pretrained/best_pascal_voc_mAP_epoch_27.pth'
class_dict = {
    'aeroplane': 0,
    'bicycle': 1,
    'bird': 2,
    'boat': 3,
    'bottle': 4,
    'bus': 5,
    'car': 6,
    'cat': 7,
    'chair': 8,
    'cow': 9,
    'diningtable': 10,
    'dog': 11,
    'horse': 12,
    'motorbike': 13,
    'person': 14,
    'pottedplant': 15,
    'sheep': 16,
    'sofa': 17,
    'train': 18,
    'tvmonitor': 19
}

#Register all modules in mmdet into the registries
register_all_modules()
# 若检测到有GPU则使用GPU
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

# 根据配置文件和 checkpoint 文件构建模型
model = init_detector(config_file, checkpoint_file, device=device)
model.eval()


# 定义预处理方法
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

data_sample = []
# 加载图像并进行预处理
image = Image.open('../data/VOCdevkit/VOC2007/test_img/000001.jpg')
image_tensor = preprocess(image)
# 添加一个维度以匹配模型的输入
image_tensor = image_tensor.unsqueeze(0)
output = model(image_tensor)
batch_img_metas = ['../data/VOCdevkit/VOC2007/test_img/000001.jpg']
results = model.bbox_head.predict_by_feat(*output, batch_img_metas=batch_img_metas)
print(len(output))
break
# 指定要遍历的文件夹路径
folder_path = "../data/VOCdevkit/VOC2007/test_img/"
# 使用glob模块匹配文件夹下所有的jpg文件
jpg_files = glob.glob(os.path.join(folder_path, "*.jpg"))

data_samples = []
# 遍历所有的jpg文件
for file_path in jpg_files:

    data_sample = {}
    # 获取预测结果
    img = mmcv.imread(file_path, channel_order='rgb')
    output2 = inference_detector(model, img)

    data_sample.update({
        'pred_instances': {
            'bboxes': output2.pred_instances.bboxes,
            'labels': output2.pred_instances.labels,
            'scores': output2.pred_instances.scores
        }
    })
    
    # 解析XML文件
    file_name = os.path.basename(file_path)
    xml_file = os.path.join('../data/VOCdevkit/VOC2007/Annotations/', file_name[:-4] + '.xml')
    with open(xml_file) as f:
        xml_data = xmltodict.parse(f.read())

    bboxes, labels = [], []
    obj = xml_data['annotation']['object']
    if type(obj) == list:
        for i in range(len(obj)):
            bboxes.append([int(obj[i]['bndbox']['xmin']), int(obj[i]['bndbox']['ymin']), 
                           int(obj[i]['bndbox']['xmax']), int(obj[i]['bndbox']['ymax'])])
            labels.append(class_dict[obj[i]['name']])
    else:
        bboxes.append([int(obj['bndbox']['xmin']), int(obj['bndbox']['ymin']), 
                       int(obj['bndbox']['xmax']), int(obj['bndbox']['ymax'])])
        labels.append(class_dict[obj['name']])

    bboxes = torch.tensor(bboxes)
    labels = torch.tensor(labels)
    data_sample.update({
        'gt_instances': {
            'bboxes': bboxes,
            'labels': labels
        },

        'ignored_instances': {
            'bboxes': torch.empty(size=(0,4)),
            'labels': torch.empty(dtype=torch.int64, size=(0,))
        }
    })

    data_samples.append(data_sample)

print(len(data_samples))


Loads checkpoint by local backend from path: work_dirs/yolov3_mobilenetv2_pretrained/best_pascal_voc_mAP_epoch_27.pth
<class 'numpy.ndarray'>


TypeError: Unexpected type <class 'numpy.ndarray'>

: 

## 2. Evaluation preds with ground truth

In [8]:
from mmengine.evaluator import Evaluator
from mmengine.fileio import load

a  = {'classes': ['aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow','diningtable',
                  'dog','horse','motorbike','person','pottedplant','sheep','sofa','train','tvmonitor']}

# 构建评测器。参数 `metrics` 为评测指标配置
evaluator = Evaluator(metrics=dict(type='VOCMetric', metric='mAP', eval_mode='11points'))
evaluator.dataset_meta = a
print(evaluator.dataset_meta)

# 调用评测器离线评测接口，得到评测结果
# chunk_size 表示每次处理的样本数量，可根据内存大小调整
results = evaluator.offline_evaluate(data_samples, chunk_size=64)


{'classes': ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']}
05/25 10:58:47 - mmengine - INFO - 
---------------iou_thr: 0.5---------------
05/25 10:58:48 - mmengine - INFO - 
+-------------+------+------+--------+-------+
| class       | gts  | dets | recall | ap    |
+-------------+------+------+--------+-------+
| aeroplane   | 311  | 705  | 0.537  | 0.475 |
| bicycle     | 389  | 1842 | 0.663  | 0.537 |
| bird        | 576  | 720  | 0.398  | 0.321 |
| boat        | 393  | 1654 | 0.361  | 0.233 |
| bottle      | 657  | 1205 | 0.192  | 0.156 |
| bus         | 254  | 712  | 0.630  | 0.516 |
| car         | 1541 | 2840 | 0.517  | 0.466 |
| cat         | 370  | 773  | 0.678  | 0.583 |
| chair       | 1374 | 3625 | 0.370  | 0.222 |
| cow         | 329  | 477  | 0.422  | 0.325 |
| diningtable | 299  | 1486 | 0.572  | 0.385 |
| dog         | 53