# 数据集准备（）

## 下载

In [1]:
!wget https://download.openmmlab.com/mmtracking/data/MOT17_tiny.zip -P ./datasets
!unzip -q ./datasets/MOT17_tiny.zip -d ./datasets

--2022-10-28 04:22:30--  https://download.openmmlab.com/mmtracking/data/MOT17_tiny.zip
Resolving download.openmmlab.com (download.openmmlab.com)... 47.102.71.233
Connecting to download.openmmlab.com (download.openmmlab.com)|47.102.71.233|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 344566302 (329M) [application/zip]
Saving to: ‘./datasets/MOT17_tiny.zip’


2022-10-28 04:23:12 (7.92 MB/s) - ‘./datasets/MOT17_tiny.zip’ saved [344566302/344566302]



## 转化为coco格式

In [5]:
# convert the dataset to coco format
!python ./tools/convert_datasets/mot/mot2coco.py -i ./datasets/MOT17_tiny/ -o ./datasets/MOT17_tiny/annotations --split-train --convert-det
# crop pedestrian patches from the original dataset for training reid model. It may take a few minutes.
!rm -rf ./datasets/MOT17_tiny/reid
!python ./tools/convert_datasets/mot/mot2reid.py -i ./datasets/MOT17_tiny/ -o ./datasets/MOT17_tiny/reid --val-split 0.9 --vis-threshold 0.8

Converting train set to COCO format
100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.78it/s]
train has 145 instances.
Done! Saved as ./datasets/MOT17_tiny/annotations/train_cocoformat.json and ./datasets/MOT17_tiny/annotations/train_detections.pkl
Converting test set to COCO format
0it [00:00, ?it/s]
test has 0 instances.
Done! Saved as ./datasets/MOT17_tiny/annotations/test_cocoformat.json and ./datasets/MOT17_tiny/annotations/test_detections.pkl
Converting half-train set to COCO format
100%|█████████████████████████████████████████████| 2/2 [00:02<00:00,  1.05s/it]
half-train has 104 instances.
Done! Saved as ./datasets/MOT17_tiny/annotations/half-train_cocoformat.json and ./datasets/MOT17_tiny/annotations/half-train_detections.pkl
Converting half-val set to COCO format
100%|█████████████████████████████████████████████| 2/2 [00:02<00:00,  1.06s/it]
half-val has 122 instances.
Done! Saved as ./datasets/MOT17_tiny/annotations/half-val_cocoformat.json and ./data

# 训练

## 训练detector

In [1]:
import mmcv
from mmdet.apis import set_random_seed
cfg = mmcv.Config.fromfile('./configs/det/faster-rcnn_r50_fpn_4e_mot17-half.py')
cfg.data_root = 'data/MOT17_tiny/'
cfg.data.test.ann_file = cfg.data.test.ann_file.replace('data/MOT17/','data/MOT17_tiny/')
cfg.data.train.ann_file = cfg.data.train.ann_file.replace('data/MOT17/','data/MOT17_tiny/')
cfg.data.val.ann_file = cfg.data.val.ann_file.replace('data/MOT17/','data/MOT17_tiny/')

cfg.data.test.img_prefix = cfg.data.test.img_prefix.replace('data/MOT17/','data/MOT17_tiny/')
cfg.data.train.img_prefix = cfg.data.train.img_prefix.replace('data/MOT17/','data/MOT17_tiny/')
cfg.data.val.img_prefix = cfg.data.val.img_prefix.replace('data/MOT17/','data/MOT17_tiny/')

cfg.work_dir = './tutorial_exps/detector'
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.device = "cuda"
# cfg.gpu_ids = range(1)
cfg.gpu_ids = [1]
print(f'Config:\n{cfg.pretty_text}')

  'On January 1, 2023, MMCV will release v2.0.0, in which it will remove '


Config:
model = dict(
    detector=dict(
        type='FasterRCNN',
        backbone=dict(
            type='ResNet',
            depth=50,
            num_stages=4,
            out_indices=(0, 1, 2, 3),
            frozen_stages=1,
            norm_cfg=dict(type='BN', requires_grad=True),
            norm_eval=True,
            style='pytorch',
            init_cfg=dict(
                type='Pretrained', checkpoint='torchvision://resnet50')),
        neck=dict(
            type='FPN',
            in_channels=[256, 512, 1024, 2048],
            out_channels=256,
            num_outs=5),
        rpn_head=dict(
            type='RPNHead',
            in_channels=256,
            feat_channels=256,
            anchor_generator=dict(
                type='AnchorGenerator',
                scales=[8],
                ratios=[0.5, 1.0, 2.0],
                strides=[4, 8, 16, 32, 64]),
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[0.0,

In [2]:
import os.path as osp

from mmtrack.datasets import build_dataset
from mmdet.apis import train_detector as train_model
from mmdet.models import build_detector as build_model

mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
model = build_model(cfg.model.detector)
model.init_weights()
datasets = [build_dataset(cfg.data.train)]
model.CLASSES = datasets[0].CLASSES
train_model(model, datasets, cfg, validate=True)

2022-11-08 14:16:39,394 - mmcv - INFO - initialize FasterRCNN with init_cfg {'type': 'Pretrained', 'checkpoint': 'http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth'}
2022-11-08 14:16:39,395 - mmcv - INFO - load model from: http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth
2022-11-08 14:16:39,396 - mmcv - INFO - load checkpoint from http path: http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth
Downloading: "http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth" to /root/.cache/torch/hub/checkpoints/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa1

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=167290877.0), HTML(value='')))


size mismatch for roi_head.bbox_head.fc_cls.weight: copying a param with shape torch.Size([81, 1024]) from checkpoint, the shape in current model is torch.Size([2, 1024]).
size mismatch for roi_head.bbox_head.fc_cls.bias: copying a param with shape torch.Size([81]) from checkpoint, the shape in current model is torch.Size([2]).
size mismatch for roi_head.bbox_head.fc_reg.weight: copying a param with shape torch.Size([320, 1024]) from checkpoint, the shape in current model is torch.Size([4, 1024]).
size mismatch for roi_head.bbox_head.fc_reg.bias: copying a param with shape torch.Size([320]) from checkpoint, the shape in current model is torch.Size([4]).



loading annotations into memory...
Done (t=0.17s)
creating index...
index created!


2022-11-08 14:16:57,397 - mmdet - INFO - Automatic scaling of learning rate (LR) has been disabled.
2022-11-08 14:16:57,540 - mmdet - INFO - Start running, host: root@5f2d80d40d9d, work_dir: /workdir/tutorial_exps/detector
2022-11-08 14:16:57,541 - mmdet - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) StepLrUpdaterHook                  
(NORMAL      ) CheckpointHook                     
(LOW         ) EvalHook                           
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) StepLrUpdaterHook                  
(LOW         ) IterTimerHook                      
(LOW         ) EvalHook                           
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_train_iter:
(VERY_HIGH   ) StepLrUpdaterHook                  
(LOW         ) IterTimerHook                      
(LOW         ) EvalHook                           
 -------------------- 
after_tr

loading annotations into memory...
Done (t=0.11s)
creating index...
index created!


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
2022-11-08 14:17:17,293 - mmdet - INFO - Epoch [1][50/414]	lr: 9.902e-03, eta: 0:10:31, time: 0.393, data_time: 0.047, memory: 2948, loss_rpn_cls: 0.0944, loss_rpn_bbox: 0.1177, loss_cls: 0.4038, acc: 80.9512, loss_bbox: 0.3394, loss: 0.9553
2022-11-08 14:17:34,604 - mmdet - INFO - Epoch [1][100/414]	lr: 1.980e-02, eta: 0:09:35, time: 0.346, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0491, loss_rpn_bbox: 0.1251, loss_cls: 0.3192, acc: 86.2480, loss_bbox: 0.2416, loss: 0.7351
2022-11-08 14:17:52,177 - mmdet - INFO - Epoch [1][150/414]	lr: 2.000e-02, eta: 0:09:07, time: 0.352, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0363, loss_rpn_bbox: 0.1061, loss_cls: 0.3055, acc: 87.0977, loss_bbox: 0.2132, loss: 0.6611
2022-11-08 14:18:09,658 - mmdet - INFO - Epoch [1][200/414]	lr: 2.000e-02, eta: 0:08:44, time: 0.349, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0234, loss_rpn_bbox: 0.0851, loss_cls:

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 823/823, 14.3 task/s, elapsed: 57s, ETA:     0s

2022-11-08 14:20:23,062 - mmdet - INFO - Evaluating bbox...


Loading and preparing results...
DONE (t=0.15s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=55.96s).
Accumulating evaluation results...


2022-11-08 14:21:20,755 - mmdet - INFO - 
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.481
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=1000 ] = 0.804
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=1000 ] = 0.532
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=1000 ] = 0.074
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=1000 ] = 0.407
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=1000 ] = 0.625
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.529
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=300 ] = 0.529
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=1000 ] = 0.529
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=1000 ] = 0.073
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=1000 ] = 0.465
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=1000

DONE (t=1.33s).


2022-11-08 14:21:39,803 - mmdet - INFO - Epoch [2][50/414]	lr: 2.000e-02, eta: 0:06:52, time: 0.378, data_time: 0.047, memory: 2948, loss_rpn_cls: 0.0130, loss_rpn_bbox: 0.0610, loss_cls: 0.2159, acc: 90.6836, loss_bbox: 0.1471, loss: 0.4371
2022-11-08 14:21:56,736 - mmdet - INFO - Epoch [2][100/414]	lr: 2.000e-02, eta: 0:06:34, time: 0.339, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0123, loss_rpn_bbox: 0.0575, loss_cls: 0.2060, acc: 91.2012, loss_bbox: 0.1405, loss: 0.4163
2022-11-08 14:22:13,867 - mmdet - INFO - Epoch [2][150/414]	lr: 2.000e-02, eta: 0:06:16, time: 0.343, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0126, loss_rpn_bbox: 0.0518, loss_cls: 0.2002, acc: 91.3574, loss_bbox: 0.1325, loss: 0.3971
2022-11-08 14:22:31,002 - mmdet - INFO - Epoch [2][200/414]	lr: 2.000e-02, eta: 0:05:59, time: 0.342, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0129, loss_rpn_bbox: 0.0613, loss_cls: 0.1994, acc: 91.3672, loss_bbox: 0.1333, loss: 0.4069
2022-11-08 14:22:48,168 - mmd

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 823/823, 14.5 task/s, elapsed: 57s, ETA:     0s

2022-11-08 14:24:42,072 - mmdet - INFO - Evaluating bbox...


Loading and preparing results...
DONE (t=0.06s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=59.98s).
Accumulating evaluation results...


2022-11-08 14:25:43,649 - mmdet - INFO - 
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.476
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=1000 ] = 0.809
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=1000 ] = 0.505
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=1000 ] = 0.081
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=1000 ] = 0.409
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=1000 ] = 0.614
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.528
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=300 ] = 0.528
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=1000 ] = 0.528
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=1000 ] = 0.093
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=1000 ] = 0.468
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=1000

DONE (t=1.30s).


2022-11-08 14:26:02,806 - mmdet - INFO - Epoch [3][50/414]	lr: 2.000e-02, eta: 0:04:24, time: 0.381, data_time: 0.047, memory: 2948, loss_rpn_cls: 0.0115, loss_rpn_bbox: 0.0468, loss_cls: 0.1680, acc: 92.7695, loss_bbox: 0.1098, loss: 0.3361
2022-11-08 14:26:19,763 - mmdet - INFO - Epoch [3][100/414]	lr: 2.000e-02, eta: 0:04:07, time: 0.339, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0096, loss_rpn_bbox: 0.0476, loss_cls: 0.1770, acc: 92.5254, loss_bbox: 0.1169, loss: 0.3511
2022-11-08 14:26:36,935 - mmdet - INFO - Epoch [3][150/414]	lr: 2.000e-02, eta: 0:03:50, time: 0.344, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0111, loss_rpn_bbox: 0.0580, loss_cls: 0.1772, acc: 92.5449, loss_bbox: 0.1170, loss: 0.3633
2022-11-08 14:26:54,046 - mmdet - INFO - Epoch [3][200/414]	lr: 2.000e-02, eta: 0:03:33, time: 0.342, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0102, loss_rpn_bbox: 0.0456, loss_cls: 0.1711, acc: 92.7285, loss_bbox: 0.1140, loss: 0.3409
2022-11-08 14:27:11,161 - mmd

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 823/823, 14.6 task/s, elapsed: 56s, ETA:     0s

2022-11-08 14:29:04,846 - mmdet - INFO - Evaluating bbox...


Loading and preparing results...
DONE (t=0.06s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=48.78s).
Accumulating evaluation results...


2022-11-08 14:29:55,119 - mmdet - INFO - 
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.491
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=1000 ] = 0.800
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=1000 ] = 0.545
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=1000 ] = 0.060
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=1000 ] = 0.411
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=1000 ] = 0.639
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.534
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=300 ] = 0.534
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=1000 ] = 0.534
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=1000 ] = 0.166
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=1000 ] = 0.468
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=1000

DONE (t=1.21s).


2022-11-08 14:30:14,282 - mmdet - INFO - Epoch [4][50/414]	lr: 2.000e-03, eta: 0:02:03, time: 0.381, data_time: 0.047, memory: 2948, loss_rpn_cls: 0.0059, loss_rpn_bbox: 0.0321, loss_cls: 0.1391, acc: 94.1230, loss_bbox: 0.0916, loss: 0.2688
2022-11-08 14:30:31,305 - mmdet - INFO - Epoch [4][100/414]	lr: 2.000e-03, eta: 0:01:46, time: 0.340, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0052, loss_rpn_bbox: 0.0259, loss_cls: 0.1295, acc: 94.5742, loss_bbox: 0.0816, loss: 0.2422
2022-11-08 14:30:48,483 - mmdet - INFO - Epoch [4][150/414]	lr: 2.000e-03, eta: 0:01:29, time: 0.344, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0049, loss_rpn_bbox: 0.0269, loss_cls: 0.1293, acc: 94.5957, loss_bbox: 0.0831, loss: 0.2443
2022-11-08 14:31:05,551 - mmdet - INFO - Epoch [4][200/414]	lr: 2.000e-03, eta: 0:01:12, time: 0.341, data_time: 0.005, memory: 2948, loss_rpn_cls: 0.0053, loss_rpn_bbox: 0.0281, loss_cls: 0.1359, acc: 94.1719, loss_bbox: 0.0884, loss: 0.2576
2022-11-08 14:31:22,757 - mmd

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 823/823, 14.5 task/s, elapsed: 57s, ETA:     0s

2022-11-08 14:33:16,816 - mmdet - INFO - Evaluating bbox...


Loading and preparing results...
DONE (t=0.05s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=44.30s).
Accumulating evaluation results...


2022-11-08 14:34:02,438 - mmdet - INFO - 
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.511
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=1000 ] = 0.798
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=1000 ] = 0.576
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=1000 ] = 0.094
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=1000 ] = 0.429
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=1000 ] = 0.670
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.549
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=300 ] = 0.549
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=1000 ] = 0.549
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=1000 ] = 0.138
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=1000 ] = 0.475
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=1000

DONE (t=1.05s).


## 训练ReID

### 数据集设置及模型设置

In [5]:
import mmcv
from mmdet.apis import set_random_seed
cfg = mmcv.Config.fromfile('./configs/reid/resnet50_b32x8_MOT17.py')
cfg.data_root = 'data/MOT17_tiny/'
cfg.data.test.ann_file = cfg.data.test.ann_file.replace('data/MOT17/','datasets/MOT17_tiny/')
cfg.data.train.ann_file = 'datasets/MOT17_tiny/reid/meta/train_9.txt'
cfg.data.val.ann_file = cfg.data.val.ann_file.replace('data/MOT17/','datasets/MOT17_tiny/')

cfg.data.test.data_prefix = cfg.data.test.data_prefix.replace('data/MOT17/','datasets/MOT17_tiny/')
cfg.data.train.data_prefix = cfg.data.train.data_prefix.replace('data/MOT17/','datasets/MOT17_tiny/')
cfg.data.val.data_prefix = cfg.data.val.data_prefix.replace('data/MOT17/','datasets/MOT17_tiny/')

# learning policy
cfg.lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=200,
    warmup_ratio=1.0 / 200,
    step=[1])
cfg.total_epochs = 2

cfg.work_dir = './tutorial_exps/reid'
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.device = "cuda"
# cfg.gpu_ids = range(1)
cfg.gpu_ids = [1]
print(f'Config:\n{cfg.pretty_text}')

Config:
dataset_type = 'ReIDDataset'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadMultiImagesFromFile', to_float32=True),
    dict(
        type='SeqResize',
        img_scale=(128, 256),
        share_params=False,
        keep_ratio=False,
        bbox_clip_border=False,
        override=False),
    dict(
        type='SeqRandomFlip',
        share_params=False,
        flip_ratio=0.5,
        direction='horizontal'),
    dict(
        type='SeqNormalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='VideoCollect', keys=['img', 'gt_label']),
    dict(type='ReIDFormatBundle')
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='Resize', img_scale=(128, 256), keep_ratio=False),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),

### 模型训练

In [6]:
from mmtrack.datasets import build_dataset
from mmdet.apis import train_detector as train_model
from mmtrack.models import build_reid as build_model


model = build_model(cfg.model.reid)
model.init_weights()
datasets = [build_dataset(cfg.data.train)]
model.CLASSES = datasets[0].CLASSES

train_model(model, datasets, cfg)

2022-10-28 08:56:43,296 - mmcv - INFO - initialize BaseReID with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth'}
2022-10-28 08:56:43,297 - mmcv - INFO - load model from: https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth
2022-10-28 08:56:43,297 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth

unexpected key in source state_dict: head.fc.weight, head.fc.bias

missing keys in source state_dict: head.fcs.0.fc.weight, head.fcs.0.fc.bias, head.fcs.0.bn.weight, head.fcs.0.bn.bias, head.fcs.0.bn.running_mean, head.fcs.0.bn.running_var, head.fc_out.weight, head.fc_out.bias, head.bn.weight, head.bn.bias, head.bn.running_mean, head.bn.running_var, head.classifier.weight, head.classifier.bias

2022-10-28 08:56:43,441 - mmdet - INF

2022-10-28 09:01:19,807 - mmdet - INFO - Epoch [1][1350/1576]	lr: 1.000e-01, eta: 0:06:08, time: 0.224, data_time: 0.005, memory: 2097, triplet_loss: 0.0000, ce_loss: 0.0002, top-1: 100.0000, loss: 0.0002
2022-10-28 09:01:31,111 - mmdet - INFO - Epoch [1][1400/1576]	lr: 1.000e-01, eta: 0:05:59, time: 0.226, data_time: 0.004, memory: 2097, triplet_loss: 0.0000, ce_loss: 0.0002, top-1: 100.0000, loss: 0.0002
2022-10-28 09:01:42,500 - mmdet - INFO - Epoch [1][1450/1576]	lr: 1.000e-01, eta: 0:05:50, time: 0.228, data_time: 0.005, memory: 2097, triplet_loss: 0.0000, ce_loss: 0.0002, top-1: 100.0000, loss: 0.0002
2022-10-28 09:01:53,810 - mmdet - INFO - Epoch [1][1500/1576]	lr: 1.000e-01, eta: 0:05:41, time: 0.226, data_time: 0.004, memory: 2097, triplet_loss: 0.0000, ce_loss: 0.0002, top-1: 100.0000, loss: 0.0002
2022-10-28 09:02:05,089 - mmdet - INFO - Epoch [1][1550/1576]	lr: 1.000e-01, eta: 0:05:32, time: 0.225, data_time: 0.004, memory: 2097, triplet_loss: 0.0000, ce_loss: 0.0002, top-1

# 验证模型

In [1]:
import mmcv
from mmdet.apis import set_random_seed
cfg = mmcv.Config.fromfile('./configs/mot/deepsort/deepsort_faster-rcnn_fpn_4e_mot17-private-half.py')
cfg.data_root = 'data/MOT17_tiny/'
cfg.data.test.ann_file = cfg.data.test.ann_file.replace('data/MOT17/','datasets/MOT17_tiny/')
cfg.data.train.ann_file = cfg.data.test.ann_file.replace('data/MOT17/','datasets/MOT17_tiny/')
cfg.data.val.ann_file = cfg.data.val.ann_file.replace('data/MOT17/','datasets/MOT17_tiny/')

cfg.data.test.img_prefix = cfg.data.test.img_prefix.replace('data/MOT17/','datasets/MOT17_tiny/')
cfg.data.train.img_prefix = cfg.data.train.img_prefix.replace('data/MOT17/','datasets/MOT17_tiny/')
cfg.data.val.img_prefix = cfg.data.val.img_prefix.replace('data/MOT17/','datasets/MOT17_tiny/')

cfg.model.detector.init_cfg.checkpoint = './tutorial_exps/detector/epoch_4.pth'
cfg.model.reid.init_cfg.checkpoint = './tutorial_exps/reid/epoch_2.pth'

cfg.work_dir = './tutorial_exps'
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.device = 'cuda'
# cfg.gpu_ids = range(1)
cfg.gpu_ids = [0]
cfg.data.test.test_mode = True
print(f'Config:\n{cfg.pretty_text}')

Config:
model = dict(
    detector=dict(
        type='FasterRCNN',
        backbone=dict(
            type='ResNet',
            depth=50,
            num_stages=4,
            out_indices=(0, 1, 2, 3),
            frozen_stages=1,
            norm_cfg=dict(type='BN', requires_grad=True),
            norm_eval=True,
            style='pytorch',
            init_cfg=dict(
                type='Pretrained', checkpoint='torchvision://resnet50')),
        neck=dict(
            type='FPN',
            in_channels=[256, 512, 1024, 2048],
            out_channels=256,
            num_outs=5),
        rpn_head=dict(
            type='RPNHead',
            in_channels=256,
            feat_channels=256,
            anchor_generator=dict(
                type='AnchorGenerator',
                scales=[8],
                ratios=[0.5, 1.0, 2.0],
                strides=[4, 8, 16, 32, 64]),
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[0.0,

In [2]:
from mmtrack.datasets import build_dataloader
from mmtrack.apis import init_model
from mmcv.parallel import MMDataParallel
from mmtrack.apis import single_gpu_test
from mmtrack.datasets import build_dataset

dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(
    dataset,
    samples_per_gpu=1,
    workers_per_gpu=cfg.data.workers_per_gpu,
    dist=False,
    shuffle=False)

# build the model and load checkpoint
model = init_model(cfg)

model = MMDataParallel(model, device_ids=cfg.gpu_ids)
outputs = single_gpu_test(model, data_loader)

eval_kwargs = cfg.get('evaluation', {}).copy()
# hard-code way to remove EvalHook args
eval_hook_args = [
    'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
    'rule', 'by_epoch'
]
for key in eval_hook_args:
    eval_kwargs.pop(key, None)
eval_kwargs.update(dict(metric=['track']))
metric = dataset.evaluate(outputs, **eval_kwargs)
print(metric)

loading annotations into memory...
Done (t=0.12s)
creating index...
index created!


2022-10-28 10:42:48,070 - mmtrack - INFO - initialize FasterRCNN with init_cfg {'type': 'Pretrained', 'checkpoint': './tutorial_exps/detector/epoch_4.pth'}
2022-10-28 10:42:48,071 - mmcv - INFO - load model from: ./tutorial_exps/detector/epoch_4.pth
2022-10-28 10:42:48,072 - mmcv - INFO - load checkpoint from local path: ./tutorial_exps/detector/epoch_4.pth
2022-10-28 10:42:49,852 - mmtrack - INFO - initialize BaseReID with init_cfg {'type': 'Pretrained', 'checkpoint': './tutorial_exps/reid/epoch_2.pth'}
2022-10-28 10:42:49,853 - mmcv - INFO - load model from: ./tutorial_exps/reid/epoch_2.pth
2022-10-28 10:42:49,854 - mmcv - INFO - load checkpoint from local path: ./tutorial_exps/reid/epoch_2.pth


[                                 ] 1/823, 1.9 task/s, elapsed: 1s, ETA:   430s

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 823/823, 8.4 task/s, elapsed: 98s, ETA:     0sEvaluate CLEAR MOT results.

Eval Config:
USE_PARALLEL         : False                         
NUM_PARALLEL_CORES   : 8                             
BREAK_ON_ERROR       : True                          
RETURN_ON_ERROR      : False                         
LOG_ON_ERROR         : /opt/conda/lib/python3.7/site-packages/error_log.txt
PRINT_RESULTS        : True                          
PRINT_ONLY_COMBINED  : False                         
PRINT_CONFIG         : True                          
TIME_PROGRESS        : True                          
DISPLAY_LESS_PROGRESS : True                          
OUTPUT_SUMMARY       : True                          
OUTPUT_EMPTY_CLASSES : True                          
OUTPUT_DETAILED      : True                          
PLOT_CURVES          : True                          

MotChallenge2DBox Config:
GT_FOLDER            : /tmp/tmpni87iqvn              
TRACKERS_FOLDER    

<Figure size 432x288 with 0 Axes>

# 推理

In [4]:
# run mot demo
import mmcv
import tempfile
from mmtrack.apis import inference_mot, init_model
cfg = mmcv.Config.fromfile('./configs/mot/deepsort/deepsort_faster-rcnn_fpn_4e_mot17-private-half.py')
cfg.model.detector.init_cfg.checkpoint = './tutorial_exps/detector/epoch_4.pth'
cfg.model.reid.init_cfg.checkpoint = './tutorial_exps/reid/epoch_2.pth'

input_video = './demo/demo.mp4'
imgs = mmcv.VideoReader(input_video)
# build the model from a config file
mot_model = init_model(cfg)
prog_bar = mmcv.ProgressBar(len(imgs))
out_dir = tempfile.TemporaryDirectory()
out_path = out_dir.name
# test and show/save the images
for i, img in enumerate(imgs):
    result = inference_mot(mot_model, img, frame_id=i)
    mot_model.show_result(
            img,
            result,
            show=False,
            wait_time=int(1000. / imgs.fps),
            out_file=f'{out_path}/{i:06d}.jpg')
    prog_bar.update()

output = './demo/mot.mp4'
print(f'\n making the output video at {output} with a FPS of {imgs.fps}')
mmcv.frames2video(out_path, output, fps=imgs.fps, fourcc='mp4v')
out_dir.cleanup()

2022-11-08 14:47:20,573 - mmcv - INFO - initialize FasterRCNN with init_cfg {'type': 'Pretrained', 'checkpoint': './tutorial_exps/detector/epoch_4.pth'}
2022-11-08 14:47:20,573 - mmcv - INFO - load model from: ./tutorial_exps/detector/epoch_4.pth
2022-11-08 14:47:20,574 - mmcv - INFO - load checkpoint from local path: ./tutorial_exps/detector/epoch_4.pth
2022-11-08 14:47:20,775 - mmcv - INFO - initialize BaseReID with init_cfg {'type': 'Pretrained', 'checkpoint': './tutorial_exps/reid/epoch_2.pth'}
2022-11-08 14:47:20,775 - mmcv - INFO - load model from: ./tutorial_exps/reid/epoch_2.pth
2022-11-08 14:47:20,776 - mmcv - INFO - load checkpoint from local path: ./tutorial_exps/reid/epoch_2.pth
2022-11-08 14:47:22,703 - mmcv - INFO - 
detector.backbone.conv1.weight - torch.Size([64, 3, 7, 7]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,704 - mmcv - INFO - 
detector.backbone.bn1.weight - torch.Size([64]): 
PretrainedInit: load from ./tutorial_exps

2022-11-08 14:47:22,722 - mmcv - INFO - 
detector.backbone.layer2.0.downsample.1.bias - torch.Size([512]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,722 - mmcv - INFO - 
detector.backbone.layer2.1.conv1.weight - torch.Size([128, 512, 1, 1]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,723 - mmcv - INFO - 
detector.backbone.layer2.1.bn1.weight - torch.Size([128]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,723 - mmcv - INFO - 
detector.backbone.layer2.1.bn1.bias - torch.Size([128]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,724 - mmcv - INFO - 
detector.backbone.layer2.1.conv2.weight - torch.Size([128, 128, 3, 3]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,724 - mmcv - INFO - 
detector.backbone.layer2.1.bn2.weight - torch.Size([128]): 
PretrainedInit: load from ./tutor

2022-11-08 14:47:22,749 - mmcv - INFO - 
detector.backbone.layer3.1.bn3.bias - torch.Size([1024]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,749 - mmcv - INFO - 
detector.backbone.layer3.2.conv1.weight - torch.Size([256, 1024, 1, 1]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,750 - mmcv - INFO - 
detector.backbone.layer3.2.bn1.weight - torch.Size([256]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,750 - mmcv - INFO - 
detector.backbone.layer3.2.bn1.bias - torch.Size([256]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,751 - mmcv - INFO - 
detector.backbone.layer3.2.conv2.weight - torch.Size([256, 256, 3, 3]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,751 - mmcv - INFO - 
detector.backbone.layer3.2.bn2.weight - torch.Size([256]): 
PretrainedInit: load from ./tutorial_exp

2022-11-08 14:47:22,778 - mmcv - INFO - 
detector.backbone.layer4.0.downsample.1.bias - torch.Size([2048]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,779 - mmcv - INFO - 
detector.backbone.layer4.1.conv1.weight - torch.Size([512, 2048, 1, 1]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,779 - mmcv - INFO - 
detector.backbone.layer4.1.bn1.weight - torch.Size([512]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,780 - mmcv - INFO - 
detector.backbone.layer4.1.bn1.bias - torch.Size([512]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,780 - mmcv - INFO - 
detector.backbone.layer4.1.conv2.weight - torch.Size([512, 512, 3, 3]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,780 - mmcv - INFO - 
detector.backbone.layer4.1.bn2.weight - torch.Size([512]): 
PretrainedInit: load from ./tut

2022-11-08 14:47:22,801 - mmcv - INFO - 
detector.roi_head.bbox_head.shared_fcs.1.bias - torch.Size([1024]): 
PretrainedInit: load from ./tutorial_exps/detector/epoch_4.pth 
 
2022-11-08 14:47:22,801 - mmcv - INFO - 
reid.backbone.conv1.weight - torch.Size([64, 3, 7, 7]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,802 - mmcv - INFO - 
reid.backbone.bn1.weight - torch.Size([64]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,802 - mmcv - INFO - 
reid.backbone.bn1.bias - torch.Size([64]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,802 - mmcv - INFO - 
reid.backbone.layer1.0.conv1.weight - torch.Size([64, 64, 1, 1]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,803 - mmcv - INFO - 
reid.backbone.layer1.0.bn1.weight - torch.Size([64]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,803 - mmcv - INFO - 

2022-11-08 14:47:22,825 - mmcv - INFO - 
reid.backbone.layer2.1.bn2.bias - torch.Size([128]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,825 - mmcv - INFO - 
reid.backbone.layer2.1.conv3.weight - torch.Size([512, 128, 1, 1]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,826 - mmcv - INFO - 
reid.backbone.layer2.1.bn3.weight - torch.Size([512]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,826 - mmcv - INFO - 
reid.backbone.layer2.1.bn3.bias - torch.Size([512]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,827 - mmcv - INFO - 
reid.backbone.layer2.2.conv1.weight - torch.Size([128, 512, 1, 1]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,827 - mmcv - INFO - 
reid.backbone.layer2.2.bn1.weight - torch.Size([128]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,827 

2022-11-08 14:47:22,860 - mmcv - INFO - 
reid.backbone.layer3.2.bn3.bias - torch.Size([1024]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,860 - mmcv - INFO - 
reid.backbone.layer3.3.conv1.weight - torch.Size([256, 1024, 1, 1]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,861 - mmcv - INFO - 
reid.backbone.layer3.3.bn1.weight - torch.Size([256]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,861 - mmcv - INFO - 
reid.backbone.layer3.3.bn1.bias - torch.Size([256]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,861 - mmcv - INFO - 
reid.backbone.layer3.3.conv2.weight - torch.Size([256, 256, 3, 3]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,862 - mmcv - INFO - 
reid.backbone.layer3.3.bn2.weight - torch.Size([256]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,86

2022-11-08 14:47:22,883 - mmcv - INFO - 
reid.backbone.layer4.2.bn1.bias - torch.Size([512]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,884 - mmcv - INFO - 
reid.backbone.layer4.2.conv2.weight - torch.Size([512, 512, 3, 3]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,884 - mmcv - INFO - 
reid.backbone.layer4.2.bn2.weight - torch.Size([512]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,885 - mmcv - INFO - 
reid.backbone.layer4.2.bn2.bias - torch.Size([512]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,885 - mmcv - INFO - 
reid.backbone.layer4.2.conv3.weight - torch.Size([2048, 512, 1, 1]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,886 - mmcv - INFO - 
reid.backbone.layer4.2.bn3.weight - torch.Size([2048]): 
PretrainedInit: load from ./tutorial_exps/reid/epoch_2.pth 
 
2022-11-08 14:47:22,88

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 8/8, 5.9 task/s, elapsed: 1s, ETA:     0s
 making the output video at ./demo/mot.mp4 with a FPS of 3.0
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 8/8, 36.5 task/s, elapsed: 0s, ETA:     0s
