In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# specify substep parameters for interactive run
# this cell will be replaced during job run with the parameters from json within params subfolder
substep_params={}

In [None]:
# load pipeline and step parameters - do not edit
from sinara.substep import get_pipeline_params, get_step_params
pipeline_params = get_pipeline_params(pprint=True)
step_params = get_step_params(pprint=True)

In [None]:
# define substep interface
from sinara.substep import NotebookSubstep, ENV_NAME, PIPELINE_NAME, ZONE_NAME, STEP_NAME, RUN_ID, ENTITY_NAME, ENTITY_PATH, SUBSTEP_NAME

substep = NotebookSubstep(pipeline_params, step_params, substep_params)

substep.interface(
    inputs =
    [ 
      { STEP_NAME: "data_prep", ENTITY_NAME: "coco_train_dataset"}, # train coco dataset from data_prep step
      { STEP_NAME: "data_prep", ENTITY_NAME: "coco_eval_dataset"} # eval coco dataset from data_prep step

    ],
    tmp_outputs =
    [
        { ENTITY_NAME: "coco_train_eval_dataset" }, # temporary coco datasets for train and eval on next substep
        { ENTITY_NAME: "obj_detect_train_work_dir"} # temporary mmcv working dir for next substep
    ]
)
substep.print_interface_info()

substep.exit_in_visualize_mode()

In [None]:
# specify all notebook wide libraries imports here
# Sinara lib imports is left in the place of their usage
import os.path as osp
import os
from pathlib import Path

import torch
import copy
import time
import warnings
import math

import mmcv
from mmengine.config import Config as MmConfig

import mmdet
from mmengine.runner import set_random_seed as mm_set_random_seed

import json
import io, re

print(f"{mmcv.__version__=}")
print(f"{mmdet.__version__=}")
print(f"{torch.__version__=}")

In [None]:
# Checking the version of libraries and checking the availability of the cuda kernel
assert torch.cuda.is_available(), f"Cuda not available"

if torch.cuda.is_available():
    device_id = torch.cuda.current_device()
    device_name = torch.cuda.get_device_name(device_id)
    print(f"{device_name=}")
    print(f"{torch.cuda.device_count()=}")

In [None]:
# run spark
from sinara.spark import SinaraSpark
from sinara.archive import SinaraArchive

spark = SinaraSpark.run_session(0)
archive = SinaraArchive(spark)
SinaraSpark.ui_url()

In [None]:
data_prep_inputs = substep.inputs(step_name = "data_prep")
tmp_outputs = substep.tmp_outputs()

archive.unpack_files_from_store_to_tmp(store_path=data_prep_inputs.coco_train_dataset, tmp_entity_dir=tmp_outputs.coco_train_eval_dataset)
archive.unpack_files_from_store_to_tmp(store_path=data_prep_inputs.coco_eval_dataset, tmp_entity_dir=tmp_outputs.coco_train_eval_dataset)

## Setting up the training model

### Defining basic variables for train

In [None]:
train_params = step_params["train_params"]

#mm_set_random_seed(train_params['SEED'], deterministic=False)

EPOCH_COUNT = train_params['EPOCH_COUNT']
BATCH       = train_params['BATCH']
WORKERS     = int(os.cpu_count() // 2)

#MODEL_NAME   = train_params['MODEL_NAME']
OPTIMIZER_LR = train_params['OPTIMIZER_LR']
WEIGHT_DECAY = train_params['WEIGHT_DECAY']

CHECKPOINT_INTERVAL = 1
############################################
MAX_SIZE       = train_params['MAX_SIZE']

### Setting up basic model training mmengine config parameters

In [None]:
rtmdet_cfg_path = osp.join(osp.dirname(mmdet.__file__), '.mim', 'configs', "rtmdet", "rtmdet_l_8xb32-300e_coco.py")
rtmdet_mmengine_cfg = MmConfig.fromfile(rtmdet_cfg_path)

# print(rtmdet_mmengine_cfg.pretty_text)
rtmdet_mmengine_cfg.keys()

In [None]:
rtmdet_mmengine_cfg.default_hooks

In [None]:
yolox_cfg_path = osp.join(osp.dirname(mmdet.__file__), '.mim', 'configs', "yolox", "yolox_s_8xb8-300e_coco.py") 
mmengine_cfg = MmConfig.fromfile(yolox_cfg_path)

# Read class names of objects from train_coco_annotations.json
with open(osp.join(tmp_outputs.coco_train_eval_dataset, "train_coco_annotations.json")) as f:
    train_coco = json.load(f)
train_coco_categories = sorted(train_coco["categories"], key=lambda x: x["id"])  # sorted class names of objects by id
mmcv_classes = [cat_info.get("name") for cat_info in train_coco_categories] # get class names of objects

mmengine_cfg.evaluation = {'metric': ['bbox'], 'save_best' : 'bbox_mAP'}

#### set directory for saving model
mmengine_cfg.work_dir = tmp_outputs.obj_detect_train_work_dir
mmengine_cfg.model.bbox_head.num_classes = len(mmcv_classes)
mmengine_cfg.model.test_cfg.nms.iou_threshold=0.5

#### Set pretain_weights
#mmengine_cfg.load_from = tmp_outputs.yolox_pth_pretrain_weights

#### Set frozen backbone
# mmengine_cfg.model.backbone.frozen_stages = 4

workflow = [('train', 1), ('val', 1)]
mmengine_cfg.workflow = workflow

mmengine_cfg.img_norm_cfg = train_params["MMCV_NORMALIZE"]
mmengine_cfg.data_root = ''

### Configuring mmengine pipelines for training and validation

In [None]:
mmengine_cfg.metainfo = {'classes': mmcv_classes}
mmengine_cfg.img_size = (MAX_SIZE, MAX_SIZE)

albu_train_transforms = [
                        {"type": 'ChannelShuffle'},
                        {"type": 'RandomGamma'},
                        {"type": 'RandomBrightnessContrast'},
                        {"type": 'HueSaturationValue'},
                        {"type": 'Resize', 
                         "height": MAX_SIZE,
                         "width": MAX_SIZE,
                         "p": 0.5},
                        {"type": 'JpegCompression', 
                         "quality_lower": 85,
                         "quality_upper": 95,
                         "p": 0.2},
                        ]

train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type='RandomResize',
        scale=(MAX_SIZE, MAX_SIZE),
        ratio_range=(0.5, 2.0),
        keep_ratio=False),
    dict(type='RandomCrop', crop_size=(MAX_SIZE, MAX_SIZE)),
    dict(type='YOLOXHSVRandomAug'),
    dict(type='RandomFlip', prob=0.5),
    dict(
        type='Albu',
        transforms=albu_train_transforms,
        bbox_params=dict(
            type='BboxParams',
            format='pascal_voc',
            label_fields=['gt_bboxes_labels'],
            min_visibility=0.0,
            filter_lost_elements=False),
        ),
    dict(type='Resize', scale=(MAX_SIZE, MAX_SIZE),  keep_ratio=False),
    dict(type='PackDetInputs')
]

test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', scale=(MAX_SIZE, MAX_SIZE),  keep_ratio=False),
    dict(type='Pad', size=(MAX_SIZE, MAX_SIZE), pad_val=dict(img=(114, 114, 114))),
    dict(type='PackDetInputs')
]

train_dataloader = dict(
    batch_size=BATCH,
    num_workers=WORKERS,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=True),
    batch_sampler=dict(type='AspectRatioBatchSampler'),
    dataset=dict(
        type='CocoDataset',
        data_root=mmengine_cfg.data_root,
        ann_file=osp.join(tmp_outputs.coco_train_eval_dataset, "train_coco_annotations.json"),
        data_prefix=dict(img=tmp_outputs.coco_train_eval_dataset),
        # filter_cfg=dict(filter_empty_gt=False, min_size=32),
        pipeline=train_pipeline,
        backend_args=None,
        metainfo=mmengine_cfg.metainfo))

val_dataloader = dict(
    batch_size=BATCH,
    num_workers=WORKERS,
    persistent_workers=True,
    drop_last=False,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
        type='CocoDataset',
        data_root=mmengine_cfg.data_root,
        ann_file=osp.join(tmp_outputs.coco_train_eval_dataset, "val_coco_annotations.json"),
        data_prefix=dict(img=tmp_outputs.coco_train_eval_dataset),
        test_mode=True,
        backend_args=None,
        pipeline=test_pipeline,
        metainfo=mmengine_cfg.metainfo)
    )

new_visualizer_cfg = dict(
    visualizer=dict(
        type="DetLocalVisualizer", # LocalVisualizer уникальный для каждого фреймворка mm*
        vis_backends=[
            dict(type="LocalVisBackend"), # STORE DATA LOCAL
            dict(
                type="TensorboardVisBackend",
                save_dir="./tmp/tensorboard/{}".format(substep.run_id),
            ),
        ],
        name="visualizer",
        save_dir=osp.join(mmengine_cfg.work_dir, "visualizer"),
    ),
)

new_optim_cfg = dict(
    # learning rate
    # optimizer
    optim_wrapper=dict(
        type="OptimWrapper",
        optimizer=dict(type="AdamW", lr=0.001, weight_decay=0.0005),
        # clip_grad=dict(max_norm=1, norm_type=2),
    ),
    param_scheduler=[
        dict(
            type="CosineRestartLR",
            by_epoch=True,
            periods=[int(EPOCH_COUNT * 0.1)] + [int(EPOCH_COUNT * 0.15)] * 4,
            restart_weights=[1, 1, 1, 1, 1],
            eta_min=1e-7,
        ),
    ],
)

custom_hooks =  [{'type': 'EMAHook',
                  'ema_type': 'ExpMomentumEMA',
                  'momentum': 0.0002,
                  'update_buffers': True,
                  'priority': 49},
                 {'type': 'PipelineSwitchHook',
                  'switch_epoch': 280,
                  'switch_pipeline': [{'type': 'LoadImageFromFile', 'backend_args': None},
                   {'type': 'LoadAnnotations', 'with_bbox': True},
                   {'type': 'RandomResize',
                    'scale': (MAX_SIZE, MAX_SIZE),
                    'ratio_range': (0.1, 2.0),
                    'keep_ratio': True},
                   {'type': 'RandomCrop', 'crop_size': (MAX_SIZE, MAX_SIZE)},
                   {'type': 'YOLOXHSVRandomAug'},
                   {'type': 'RandomFlip', 'prob': 0.5},
                   {'type': 'Pad', 'size': (MAX_SIZE, MAX_SIZE), 'pad_val': {'img': (114, 114, 114)}},
                   {'type': 'PackDetInputs'}]}]

mmengine_cfg.merge_from_dict(new_visualizer_cfg)
mmengine_cfg.merge_from_dict(new_optim_cfg)

# Modify metric config
mmengine_cfg.custom_hooks = custom_hooks
mmengine_cfg.val_evaluator.ann_file = val_dataloader["dataset"]["ann_file"]
mmengine_cfg.train_pipeline = train_pipeline
mmengine_cfg.test_pipeline = test_pipeline
mmengine_cfg.train_dataloader = train_dataloader
mmengine_cfg.val_dataloader = val_dataloader
mmengine_cfg.train_dataset = mmengine_cfg.train_dataloader["dataset"]
mmengine_cfg.test_dataloader.dataset.pipeline = test_pipeline

### Setting up the optimizer configuration for model mmengine_cfg.train_dataloader

In [None]:
# Setting up the optimizer configuration
mmengine_cfg.optimizer = dict(type='Adam', lr=OPTIMIZER_LR)

# Configuration for the optimizer wrapper
optim_wrapper = dict(  
    type='OptimWrapper',  
    optimizer=dict(  
        type='Adam',  
        lr=OPTIMIZER_LR,  # Base learning rate
        weight_decay=WEIGHT_DECAY),  # Weight decay
    )
mmengine_cfg.optim_wrapper = optim_wrapper

# Configuration for Training and Testing
mmengine_cfg.max_epochs = EPOCH_COUNT
mmengine_cfg.train_cfg["max_epochs"] = EPOCH_COUNT
mmengine_cfg.train_cfg["val_interval"] = CHECKPOINT_INTERVAL

# Configuration for Saving Checkpoints
mmengine_cfg.default_hooks.checkpoint.interval = CHECKPOINT_INTERVAL
mmengine_cfg.default_hooks.checkpoint.save_best='auto'
mmengine_cfg.resume  = False # resume from the latest checkpoint automatically

### Saving prepared config

In [None]:
# dump parameters for train substep
mmengine_cfg_file = osp.join(tmp_outputs.obj_detect_train_work_dir, "last_cfg.py")
mmengine_cfg.dump(file=mmengine_cfg_file)

In [None]:
# stop spark
SinaraSpark.stop_session()