# 使用预训练模型、基于单目图像检测场景中的物体

## 使用 Python API

预先下载一个配置文件和预训练模型，保存到 checkpoints 文件夹

这里给出的是SMKOE和FCOS3D各一个例子模型

In [None]:
!mim download mmdet3d --config smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d --dest checkpoints
!curl -sLO https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_20210715_235813-4bed5239.pth
!mv fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_20210715_235813-4bed5239.pth checkpoints/

准备一幅包含汽车、行人的街景图象，为了方便我们使用 `mmdet3d/demo/data` 下提供的一个 nuscenes 数据集中的图像文件

如果希望尝试更多数据，我们还 KITTI 数据集中裁剪了一个子集用于展示，下载解压后，点云文件储存在 `data/kitti/training/velodyne/` 和 `data/kitti/testing/velodyne/` 目录下

In [None]:
# !curl -sL -o kitti_tiny_3D.zip "https://onedrive.live.com/download?resid=CB1C03091115D5EA%21119&authkey=!AO57a1ru2Tz2jHQ"
# !unzip -d data/kitti kitti_tiny_3D.zip

初始化模型并执行推理

In [None]:
from mmdet3d.apis import init_model

# config_file = 'configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py'
# checkpoint_file = 'checkpoints/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_20210715_235813-4bed5239.pth'

config_file = 'configs/smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py'
checkpoint_file = 'checkpoints/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d_20210929_015553-d46d9bb0.pth'

model = init_model(config_file, checkpoint_file, device='cuda:0')

In [None]:
# test a single sample

from mmdet3d.apis import inference_mono_3d_detector
image = 'demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_BACK__1532402927637525.jpg'
ann = 'demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_BACK__1532402927637525_mono3d.coco.json'
result, data = inference_mono_3d_detector(model, image, ann)

In [None]:
# show the results

from mmdet3d.apis import show_result_meshlab
out_dir = './'
show_result_meshlab(data, result, out_dir, show=True, score_thr=0.1, task='mono-det')

## 使用 demo 程序

Demo 程序命令行参数

```
python demo/mono_det_demo.py ${IMAGE_FILE} ${ANNOTATION_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID}] [--out-dir ${OUT_DIR}] [--show]
```

In [None]:
!python demo/mono_det_demo.py \
    data/kitti/testing/image_2/000002.png \
    data/kitti/kitti_infos_test_mono3d.coco.json \
    configs/smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py \
    checkpoints/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d_20210929_015553-d46d9bb0.pth \
    --show

In [None]:
!python demo/mono_det_demo.py \
    demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_BACK__1532402927637525.jpg \
    demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_BACK__1532402927637525_mono3d.coco.json \
    configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune.py \
    .\checkpoints\fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune_20210717_095645-8d806dc2.pth \
    --show

### 推理自己的图像

In [1]:
from mmdet3d.apis import init_model

config_file = 'configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py'
checkpoint_file = 'checkpoints/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_20210715_235813-4bed5239.pth'

# config_file = 'configs/smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py'
# checkpoint_file = 'checkpoints/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d_20210929_015553-d46d9bb0.pth'

model = init_model(config_file, checkpoint_file, device='cuda:0')

  warn(f"Failed to load image Python extension: {e}")


load checkpoint from local path: checkpoints/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_20210715_235813-4bed5239.pth


In [None]:
# test a single sample

from mmdet3d.apis import inference_mono_3d_detector
image = 'game.png'
ann = 'game.json'
result, data = inference_mono_3d_detector(model, image, ann)

In [None]:
from mmdet3d.apis import show_result_meshlab
out_dir = './'
show_result_meshlab(data, result, out_dir, show=True, score_thr=0.15, task='mono-det')

### 在视频上推理

In [2]:
import mmcv 
from mmdet3d.apis import inference_mono_3d_detector, show_result_meshlab

video = mmcv.VideoReader('game.mp4')
ann_tmpl = 'game.json'
tmp_out = 'tmp'
frames = []

# iterate over all frames
for i, frame in enumerate(video):
    imfn = f'game/{i:04d}.jpg'
    mmcv.imwrite(frame, imfn)
    
    annfn = f'game/{i:04d}.json'
    ann = mmcv.load(ann_tmpl)
    ann['images'][0]['file_name'] = imfn
    mmcv.dump(ann, annfn)
    
    result, data = inference_mono_3d_detector(model, imfn, annfn)
    show_result_meshlab(data, result, tmp_out, show=False, score_thr=0.15, task='mono-det')
    
    frames.append(f'{tmp_out}/{i:04d}/{i:04d}_pred.png')

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [5]:
import cv2

vwriter = cv2.VideoWriter('game-out.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 20.0, (1920,1080))

for frame in frames:
    img = cv2.imread(frame)
    vwriter.write(img)