### Train an MMDetection Network
- See [tutorial](https://github.com/open-mmlab/mmdetection/blob/main/demo/MMDet_Tutorial.ipynb)

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
from pathlib import Path
import sys
from datetime import datetime

### Download checkpoint for a pretrained model (if desired)
Alternatively, use a previous mouse model as a pretrained model

In [3]:
pretrained_model_directory = Path("/n/groups/datta/tim_sainburg/datasets/scratch/pretrained_mm_models")
pretrained_model_directory.mkdir(parents=True, exist_ok=True)

In [4]:
# find models here: https://github.com/open-mmlab/mmdetection/tree/main/configs/rtmdet
pretrain_model = "rtmdet_s_8xb32-300e_coco"

In [5]:
# command = f"source activate {Path(sys.executable).parents[1]}; mim download mmdet --config {pretrain_model} --dest {pretrained_model_directory.as_posix()}"
# print(command)

In [6]:
# !{command}

In [7]:
!ls {pretrained_model_directory.as_posix()}

rtmdet_s_8xb32-300e_coco_20220905_161602-387a891e.pth
rtmdet_s_8xb32-300e_coco.py
rtmpose-m_8xb64-210e_ap10k-256x256.py
rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.pth


In [8]:
pretrained_model_directory

PosixPath('/n/groups/datta/tim_sainburg/datasets/scratch/pretrained_mm_models')

### Parameters and dataset

In [4]:
model_name = 'rtmdet_small_8xb32-300e_coco_chronic'

# Where the COCO format dataset is located (created in the previous notebook)
# dataset_directory = Path("/n/groups/datta/tim_sainburg/projects/24-04-02-neuropixels-chronic/data/keypoints/coco-trainingsets/240408-mmpose-multianimal-chronic_v3/")
dataset_directory = Path("/n/groups/datta/6cam_keypoint_networks/training_data/JP_CW_scale_annos/COCO_format/")

# which config to use (this is what we base the config off of). Should be in the mmdeteciton repo. 
config_loc = Path('/n/groups/datta/tim_sainburg/projects/mmdetection/configs/rtmdet/rtmdet_s_8xb32-300e_coco.py')

# which pretrained model to use (point to .pth file). Pretrained model should be the same model architecture. 
# pretrained_model = pretrained_model_directory / "rtmdet_s_8xb32-300e_coco_20220905_161602-387a891e.pth"
# pretrained_model = Path('/n/groups/datta/tim_sainburg/projects/24-01-05-multicamera_keypoints_mm2d/models/rtmdet/rtmdet_tiny_8xb32-300e_coco_24-01-05-11-25-00_102726/epoch_300.pth')
pretrained_model = Path("/n/groups/datta/6cam_keypoint_networks/mm_pose/Jonah/20241030_v1/rtmdet/rtmdet_small_8xb32-300e_coco_chronic_24-10-31-10-59-24/epoch_3.pth")
use_pretrained_model = True

# working directory (where model output is saved)
# output_directory = Path("/n/groups/datta/tim_sainburg/datasets/scratch/mm_training")
output_directory = Path("/n/groups/datta/6cam_keypoint_networks/mm_pose/Jonah/20241030_v1")
formatted_datetime = datetime.now().strftime("%y-%m-%d-%H-%M-%S")
working_directory = (output_directory / 'rtmdet' / f"{model_name}_{formatted_datetime}")
working_directory.mkdir(parents=True, exist_ok=True)

# You shouldn't need to change anything below here

### Display compute / environment info (for future reference)

In [5]:
# Check nvcc version
!nvcc -V
# Check GCC version
!gcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Mon_Apr__3_17:16:06_PDT_2023
Cuda compilation tools, release 12.1, V12.1.105
Build cuda_12.1.r12.1/compiler.32688072_0
gcc (GCC) 6.2.0
Copyright (C) 2016 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [6]:
from mmengine.utils import get_git_hash
from mmengine.utils.dl_utils import collect_env as collect_base_env
import sys
import mmdet
import torch, torchvision
import mmpose
from mmcv.ops import get_compiling_cuda_version, get_compiler_version

def collect_env():
    """Collect the information of the running environments."""
    env_info = collect_base_env()
    env_info['MMDetection'] = f'{mmdet.__version__}+{get_git_hash()[:7]}'
    return env_info

print(f"Environment: {sys.executable}")
for name, val in collect_env().items():
    print(f'{name}: {val}')
# Check Pytorch installation
print('cuda version:', get_compiling_cuda_version())
print('compiler information:', get_compiler_version())
print('torch version:', torch.__version__, torch.cuda.is_available())
print('torchvision version:', torchvision.__version__)
print('mmpose version:', mmpose.__version__) 

Environment: /n/groups/datta/tim_sainburg/conda_envs/mmdeploy/bin/python
sys.platform: linux
Python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
CUDA available: True
numpy_random_seed: 2147483648
GPU 0: Tesla V100S-PCIE-32GB
CUDA_HOME: /n/groups/datta/tim_sainburg/conda_envs/mmdeploy
NVCC: Cuda compilation tools, release 12.1, V12.1.105
GCC: gcc (GCC) 6.2.0
PyTorch: 2.1.1
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX512
  - CUDA Runtime 12.1
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70

### Create the config file

In [7]:
from mmengine import Config
from pathlib import Path

In [8]:
cfg = Config.fromfile(config_loc.as_posix())

# set the dataset directory
cfg.data_root = dataset_directory.as_posix()

# set the working directory
cfg.work_dir = working_directory.as_posix()

# set head to only care about the mouse class
cfg.model.bbox_head.num_classes = 1

# set the metainfo
cfg.metainfo = {
    'classes': ('Mouse', ),
    'palette': [
        (220, 20, 60),
    ]
}

# specify the dataset
cfg.dataset_type = 'CocoDataset'

# load COCO pre-trained weight
if use_pretrained_model:
    cfg.load_from = pretrained_model.as_posix()

In [9]:
cfg.data_root

'/n/groups/datta/6cam_keypoint_networks/training_data/JP_CW_scale_annos/COCO_format'

In [10]:
cfg.train_dataloader.dataset.data_root = cfg.data_root
cfg.train_dataloader.dataset.metainfo = cfg.metainfo
cfg.train_dataloader.dataset.data_prefix = dict(img='train/')
cfg.train_dataloader.dataset.ann_file = 'annotations/instances_train.json'

cfg.val_dataloader.dataset.data_root = cfg.data_root
cfg.val_dataloader.dataset.metainfo = cfg.metainfo
cfg.val_dataloader.dataset.data_prefix = dict(img='val/')
cfg.val_dataloader.dataset.ann_file = 'annotations/instances_val.json'

cfg.train_dataloader.dataset.type = cfg.dataset_type
cfg.val_dataloader.dataset.type = cfg.dataset_type

cfg.val_evaluator.ann_file= cfg.data_root + '/annotations/instances_val.json'
cfg.test_evaluator.ann_file= cfg.data_root + '/annotations/instances_val.json'

cfg.default_hooks.checkpoint.max_keep_ckpts = 15
# cfg.default_hooks.checkpoint.interval = 50
cfg.default_hooks.checkpoint.interval = 3  # save more often for larger datasets bc it's slower

cfg.max_epochs = 2000
cfg.train_cfg.max_epochs = 2000

In [11]:
print(cfg.model)

{'type': 'RTMDet', 'data_preprocessor': {'type': 'DetDataPreprocessor', 'mean': [103.53, 116.28, 123.675], 'std': [57.375, 57.12, 58.395], 'bgr_to_rgb': False, 'batch_augments': None}, 'backbone': {'type': 'CSPNeXt', 'arch': 'P5', 'expand_ratio': 0.5, 'deepen_factor': 0.33, 'widen_factor': 0.5, 'channel_attention': True, 'norm_cfg': {'type': 'SyncBN'}, 'act_cfg': {'type': 'SiLU', 'inplace': True}, 'init_cfg': {'type': 'Pretrained', 'prefix': 'backbone.', 'checkpoint': 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth'}}, 'neck': {'type': 'CSPNeXtPAFPN', 'in_channels': [128, 256, 512], 'out_channels': 128, 'num_csp_blocks': 1, 'expand_ratio': 0.5, 'norm_cfg': {'type': 'SyncBN'}, 'act_cfg': {'type': 'SiLU', 'inplace': True}}, 'bbox_head': {'type': 'RTMDetSepBNHead', 'num_classes': 1, 'in_channels': 128, 'stacked_convs': 2, 'feat_channels': 128, 'anchor_generator': {'type': 'MlvlPointGenerator', 'offset': 0, 'strides': [8, 16, 32]}, '

In [12]:
# save configuration file for future reference
cfg.dump(working_directory / 'config.py')

In [13]:
print(working_directory)

/n/groups/datta/6cam_keypoint_networks/mm_pose/Jonah/20241030_v1/rtmdet/rtmdet_small_8xb32-300e_coco_chronic_24-10-31-12-48-26


### Train

In [14]:
from mmengine.config import Config, DictAction
from mmengine.runner import Runner

In [15]:
# build the runner from config
runner = Runner.from_cfg(cfg)

10/31 12:50:23 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
    CUDA available: True
    numpy_random_seed: 420280809
    GPU 0: Tesla V100S-PCIE-32GB
    CUDA_HOME: /n/groups/datta/tim_sainburg/conda_envs/mmdeploy
    NVCC: Cuda compilation tools, release 12.1, V12.1.105
    GCC: gcc (GCC) 6.2.0
    PyTorch: 2.1.1
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX512
  - CUDA Runtime 12.1
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;ar

  check_for_updates()


10/31 12:50:25 - mmengine - [4m[97mINFO[0m - Config:
auto_scale_lr = dict(base_batch_size=16, enable=False)
backend_args = None
base_lr = 0.004
checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth'
custom_hooks = [
    dict(
        ema_type='ExpMomentumEMA',
        momentum=0.0002,
        priority=49,
        type='EMAHook',
        update_buffers=True),
    dict(
        switch_epoch=280,
        switch_pipeline=[
            dict(backend_args=None, type='LoadImageFromFile'),
            dict(type='LoadAnnotations', with_bbox=True),
            dict(
                keep_ratio=True,
                ratio_range=(
                    0.5,
                    2.0,
                ),
                scale=(
                    640,
                    640,
                ),
                type='RandomResize'),
            dict(crop_size=(
                640,
                640,
            ), type='RandomCrop'),
  

In [16]:
# start training
runner.train()

loading annotations into memory...
Done (t=0.46s)
creating index...
index created!
10/31 12:50:34 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.stem.0.bn.weight:weight_decay=0.0
10/31 12:50:34 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.stem.0.bn.bias:weight_decay=0.0
10/31 12:50:34 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.stem.1.bn.weight:weight_decay=0.0
10/31 12:50:34 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.stem.1.bn.bias:weight_decay=0.0
10/31 12:50:34 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.stem.2.bn.weight:weight_decay=0.0
10/31 12:50:34 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.stem.2.bn.bias:weight_decay=0.0
10/31 12:50:34 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.stage1.0.bn.weight:weight_decay=0.0
10/31 12:50:34 - mmengine - [4m[97mINFO[0m - paramwise_options -- backbone.stage1.0.bn.bias:weight_decay=0.0
10/31 12:50:34 - mmengine

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


10/31 12:51:12 - mmengine - [4m[97mINFO[0m - Epoch(train)    [1][ 50/482]  base_lr: 1.9623e-04 lr: 1.9623e-04  eta: 7 days, 17:51:53  time: 0.6941  data_time: 0.0803  memory: 14040  loss: 0.6760  loss_cls: 0.3319  loss_bbox: 0.3441
10/31 12:51:39 - mmengine - [4m[97mINFO[0m - Epoch(train)    [1][100/482]  base_lr: 3.9643e-04 lr: 3.9643e-04  eta: 6 days, 23:23:46  time: 0.5563  data_time: 0.0041  memory: 14040  loss: 0.6158  loss_cls: 0.2971  loss_bbox: 0.3187
10/31 12:52:07 - mmengine - [4m[97mINFO[0m - Epoch(train)    [1][150/482]  base_lr: 5.9663e-04 lr: 5.9663e-04  eta: 6 days, 17:38:31  time: 0.5608  data_time: 0.0038  memory: 14040  loss: 0.5815  loss_cls: 0.2785  loss_bbox: 0.3030
10/31 12:52:35 - mmengine - [4m[97mINFO[0m - Epoch(train)    [1][200/482]  base_lr: 7.9683e-04 lr: 7.9683e-04  eta: 6 days, 14:20:47  time: 0.5546  data_time: 0.0037  memory: 14040  loss: 0.5681  loss_cls: 0.2682  loss_bbox: 0.3000
10/31 12:53:03 - mmengine - [4m[97mINFO[0m - Epoch(train)

### The config and path for running inference will be in the working directory