In [None]:
# We need older numpy version, otherwise an error occurs (newer numpy has no member "int", "float",...)
# %pip install opencv-python numpy==1.23.5 torchvision
# %pip install opencv-python numpy torchvision # TODO try this instead

In [None]:
# %pip install -U openmim
# !mim install "mmengine>=0.3.1"
# !mim install "mmcv>=2.0.0rc1,<2.1.0"

# !mim install "mmdet>=3.0.0rc5,<3.1.0"

# We can install mmdet from repository, to which we can make changes
# %cd $paths.mmdetection_path/..
# !git clone https://github.com/open-mmlab/mmdetection.git
# %cd mmdetection
# %pip install -e .
# %cd $paths.proj_path

In [None]:
from copy import deepcopy
from pprint import pprint
from time import time
import os
import traceback

import process_dataset.common as common

### Additional paths

In [None]:
import paths

# Print paths.py file
with open("./paths.py") as f:
    lines = f.readlines()
    for line in lines.copy():
        if line.startswith("#"):
            lines.remove(line)
    print("".join(lines).replace("\n\n\n", "\n"))

# Assert everything is in the right place
assert os.path.exists(paths.proj_path)
assert os.path.exists(paths.process_dataset_dirpath)

# Download YOLOX-s config and checkpoint (pre-trained)

In [None]:
# TODO don't download it here. Leave it to the user
# Memory requirements for YOLOX models:
# YOLOX-s	7.6 GB
# YOLOX-l	19.9 GB
# YOLOX-x	28.1 GB

# url = "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/" + model_checkpoint_filename
# url = "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/" + paths.model_checkpoint_filename
# !wget -c $url -O $paths.model_checkpoint_filepath

assert os.path.exists(paths.model_config_filepath)
assert os.path.exists(paths.model_checkpoint_filepath)

# Model config

In [None]:
# https://colab.research.google.com/github/ZwwWayne/mmdetection/blob/update-colab/demo/MMDet_Tutorial.ipynb#scrollTo=hamZrlnH-YDD
# from mmcv import Config
from mmengine.config import Config

cfg = Config.fromfile(paths.model_config_filepath)

# TODO works?
if paths.last_checkpoint_filepath:
    cfg.load_from = paths.last_checkpoint_filepath
    cfg.resume = True
else:
    cfg.load_from = paths.model_checkpoint_filepath

cfg.work_dir = paths.working_dirpath

data_root = common.datasets_dirpath
cfg.data_root = data_root

# Set classes
cfg["metainfo"] = dict(
    classes = tuple(common.classes_ids.keys())
)

# Batch size (default 8)
# batch_size = 8, # Default
# batch_size = 32, # Let's try 32 - nope, cuda out of memory on 7400
# batch_size = 16, # nope, cuda out of memory on 7400
# batch_size = 12, # This seems to work (11111 of 11178MiB used) - not on P52
batch_size = 11 # Works on P52

# Workers per gpu (default 4)
# Tested 8, 12 and 16 on P52 and higher numbers actually made the training (ETA) longer
# With 12, ETA was about 10% longer than at default. Using 2, speed is slightly improved (~2%)
# num_workers = 4, # Default
# num_workers = 8, # Doesn't seem to do much
num_workers = 2

# Not needed
# img_norm_cfg = {
#     # "mean": [103.530, 116.280, 123.675], # Taken from yolof
#     "mean": [114.0, 114.0, 114.0], 
#     "std": [1.0, 1.0, 1.0], 
#     "to_rgb": False
# }

# Like this, we can use different augmentations for each dataset
# + Mosaic? Probably not very useful here
# + MixUp? Doesn't seem useful either
per_dataset_train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize',
        scale=cfg.img_scale,
        keep_ratio=True),
    dict(type='Pad',
        pad_to_square=True,
        pad_val=dict(img=(114.0, 114.0, 114.0))),
    dict(type='RandomAffine',
        # min_bbox_size=8, # No need. Done in FilterAnnotations
        # border=(-cfg.img_scale[0] // 2, -cfg.img_scale[1] // 2), # This was a problem. No idea why I added it. Shouldn't exist
        scaling_ratio_range=(0, 0), # Needs to be adjusted per dataset later below
        max_rotate_degree=10,
        max_shear_degree=5),
]

train_datasets_scaling_ratios = {
    "mio-tcd"     : (0.7, 1.1),
    "aau"         : (0.8, 1.1),
    "ndis"        : (0.9, 3),
    "mtid"        : (0.9, 2),
    "visdrone_det": (1.5, 3),
    "detrac"      : (0.8, 1.2)
}

train_pipeline = [
    dict(type='YOLOXHSVRandomAug'),
    dict(type='RandomFlip',
         prob=0.5,
         direction="horizontal"), # (horizontal is implicit)
    dict(type="PhotoMetricDistortion"),
    dict(type='FilterAnnotations',
        min_gt_bbox_wh=(8, 8), # Should be okay, I think 16x16 causes small objects (even 64x64) to be undetected
        keep_empty=False),
    #  Is this OK? Heard that YOLOX does not need normalization or something...
    #  Nope, it definitely does not. I tried one epoch with and one without, and
    #  the results (loss plots) were actually the same
    # dict(type='Normalize', **img_norm_cfg), # img_norm_cfg taken from yolof
    dict(type="PackDetInputs")
]

train_datasets_repeats = {
    "mio-tcd"     : 1,
    "aau"         : 3, # There are some misannotations so don't make it too frequent
    "ndis"        : 25,
    "mtid"        : 6, # It's a video, so already a lot repeats, but it's a great dataset
    "visdrone_det": 4, # Good dataset, but not very important in this project
    "detrac"      : 2
}

train_datasets = []
for dataset_name in list(common.datasets.keys()):
    ds = dict(
        type = "RepeatDataset",
        times = train_datasets_repeats[dataset_name],
        dataset = dict(
            type = "CocoDataset",
            ann_file = os.path.join(common.datasets_dirpath, common.datasets[dataset_name]["path"], common.gt_filename),
            data_prefix = dict(img=data_root),
            data_root = data_root,
            pipeline = deepcopy(per_dataset_train_pipeline),
        )
    )

    # Set RandomAffine scaling range individually for each dataset
    assert ds["dataset"]["pipeline"][4]["type"] == "RandomAffine"
    ds["dataset"]["pipeline"][4]["scaling_ratio_range"] = train_datasets_scaling_ratios[dataset_name]

    train_datasets.append(ds)

cfg.train_dataloader = dict(
    batch_size = batch_size,

    num_workers = num_workers,

    persistent_workers = True,

    sampler=dict(type="DefaultSampler", shuffle=True),

    dataset = dict(
        # Tried removing MultiImageMixDataset and using ClassBalancedDataset
        # directly, but got an error about tensor sizes - MultiImageMixDataset
        # is necessary
        type = "MultiImageMixDataset",

        # TODO restore this and use class balanced dataset (was getting an exception when used)
        # "The dataset needs to instantiate self.get_cat_ids() to support ClassBalancedDataset."
        # So if I have ConcatDataset in ClassBalancedDataset, the ConcatDataset must have get_cat_ids()
        # dataset = dict(
        #     type = 'ClassBalancedDataset',
        #     # oversample_thr = 1e-3, # Default
        #     oversample_thr = 0.1, # Seems good
        #     dataset = dict(
        #         type = "ConcatDataset",
        #         datasets = train_datasets
        #     )
        # ),

        # This works (omitting class balanced dataset)
        dataset = dict(
            type = "ConcatDataset",
            datasets = train_datasets
        ),

        pipeline = train_pipeline,
    ),
)

cfg.val_dataloader = dict(
    batch_size = batch_size,
    num_workers = num_workers,
    persistent_workers = True,
    drop_last = False,
    sampler = dict(type="DefaultSampler", shuffle=False),
    dataset = dict(
        type = "CocoDataset",
        data_root = data_root,
        ann_file = os.path.basename(common.dataset_val_filepath),
        data_prefix = dict(img=""),
        test_mode = True,
        pipeline = cfg.val_dataloader.dataset.pipeline, # Default
    )
)

cfg.test_dataloader = dict(
    batch_size = batch_size,
    num_workers = num_workers,
    persistent_workers = True,
    drop_last = False,
    sampler = dict(type="DefaultSampler", shuffle=False),
    dataset = dict(
        type = "CocoDataset",
        data_root = data_root,
        ann_file = os.path.basename(common.dataset_test_filepath),
        data_prefix = dict(img=""),
        test_mode = True,
        pipeline = cfg.test_dataloader.dataset.pipeline, # Default
    )
)

cfg.val_evaluator = dict(
    type = "CocoMetric",
    ann_file = common.dataset_val_filepath,
    metric = "bbox"
)

cfg.test_evaluator = dict(
    type = "CocoMetric",
    ann_file = common.dataset_test_filepath,
    metric = "bbox"
)

cfg.gpu_ids = [0]
cfg.device = "cuda"
# cfg.device = "cpu"

cfg.seed = int(time())

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
# cfg.optimizer.lr = 0.02 # This instead of 0.02 / 8 - nope, that's too much
# cfg.optimizer.lr = 0.001 # Not better than 0.00125
cfg.optim_wrapper.optimizer.lr /= 8 # 0.00125 # As seen on the internet, seems to be good

# Orig yolox-s config says: USER SHOULD NOT CHANGE ITS VALUES
# ! But that, hopefully, means that user should not change LR, but can change
# the auto_scale_lr setting...
# cfg.auto_scale_lr = {}
cfg.auto_scale_lr = {
    "enable": True, 
    "base_batch_size": cfg.train_dataloader.batch_size
    }

# Set to log every Nth batch
for hook_name in list(cfg.default_hooks.keys()):
    if cfg.default_hooks[hook_name].type == "LoggerHook":
        cfg.default_hooks[hook_name].interval = 1

# We can set the checkpoint saving interval to reduce the storage cost
cfg.default_hooks.checkpoint.interval = 1
cfg.default_hooks.checkpoint.max_keep_ckpts = 10

cfg.max_epochs = 300

# We can also use tensorboard to log the training process
cfg.visualizer.vis_backends = [
    dict(type='LocalVisBackend'),
    dict(type='TensorboardVisBackend')
]

# TODO validation sometimes?
# cfg.workflow = [('train', 1), ('val', 1)]

# Removing useless keys so they don't confuse
cfg.pop("data_root")
cfg.pop("dataset_type")
cfg.pop("train_pipeline")
cfg.pop("train_dataset")
cfg.pop("test_pipeline")
cfg.pop("max_epochs")
cfg.pop("num_last_epochs")
cfg.pop("interval")
cfg.pop("base_lr")
cfg.pop("vis_backends")

# Set number of classes
cfg.model.bbox_head.num_classes = len(common.classes_ids)

In [None]:
print(cfg.pretty_text)

In [None]:
# https://colab.research.google.com/github/ZwwWayne/mmdetection/blob/update-colab/demo/MMDet_Tutorial.ipynb#scrollTo=hamZrlnH-YDD
from mmengine.runner import Runner

try:
    runner = Runner.from_cfg(cfg)
    runner.train()
except:
    traceback.print_exc()
    raise

In [None]:
# from mmdet.apis import single_gpu_test
# from mmdet.datasets import build_dataloader, build_dataset
# from mmdet.utils import build_dp

# data_loader = build_dataloader(build_dataset(cfg.data.test), samples_per_gpu=64, workers_per_gpu=1)
# dp = build_dp(model, cfg.device, device_ids=cfg.gpu_ids)
# outputs = single_gpu_test(dp, data_loader, out_dir=paths.working_dirpath)