## Reference 

1. MMDetection Inspired Notebooks:
    1. https://www.kaggle.com/awsaf49/sartorius-mmdetection-train
    1. https://www.kaggle.com/its7171/mmdetection-for-segmentation-training
    1. https://www.kaggle.com/vexxingbanana/sartorius-mmdetection-training
    1. https://www.kaggle.com/remekkinas/yolox-training-pipeline-cots-dataset-lb-0-507
    1. https://www.kaggle.com/superkevingit/faster-rcnn-with-mmdetection-without-internet
    1. 
1. Yolox COCO Dataset : [Efficient Coco datasest generator](https://www.kaggle.com/coldfir3/simple-yolox-dataset-generator-coco-json)
1. Torch Dataset
    1. https://www.kaggle.com/ttjccc/pytorch-190

# LOGS
1. [Inference Notebook](https://www.kaggle.com/mlneo07/mmdetection-swin-transfomer-frcnn-inference)
1. Minor Bugs Fixes
1. Changed Tiny --> Small transformer
1. New loss_bbox=dict(type='GIoULoss', loss_weight=10.0)))) ; IoULoss --> GIoULoss
1. FasterRCNN -- > CascadeRCNN
1.    
    train_cfg=dict(
        rpn=dict(sampler=dict(neg_pos_ub=5), allowed_border=-1),
        rcnn=dict(
            sampler=dict(
                _delete_=True,
                type='CombinedSampler',
                num=512,
                pos_fraction=0.25,
                add_gt_as_proposals=True,
                pos_sampler=dict(type='InstanceBalancedPosSampler'),
                neg_sampler=dict(
                    type='IoUBalancedNegSampler',
                    floor_thr=-1,
                    floor_fraction=0,
                    num_bins=3)))))

# **Install MMDetection and MMDetection-Compatible Torch**

In [None]:
!pip install '../input/pytorch-190/torch-1.9.0+cu111-cp37-cp37m-linux_x86_64.whl' --no-deps
# !pip install '../input/pytorch-190/torchvision-0.10.0+cu111-cp37-cp37m-linux_x86_64.whl' --no-deps

In [None]:
!rm -rf mmdetection
!git clone https://github.com/open-mmlab/mmdetection.git /kaggle/working/mmdetection

In [None]:
!pip install openmim
!mim install mmdet

# **Import Libraries** 

In [None]:
# import sys
# sys.path.append('./mmdetection')

import numpy as np
import os
import pandas as pd

import mmdet
import mmcv

from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
from mmcv import Config
from mmdet.apis import inference_detector, init_detector, set_random_seed

In [None]:
seed = 3047
set_random_seed(seed, deterministic=False)

# **Helper Functions**

In [None]:
import zipfile

os.makedirs('./images') 
with zipfile.ZipFile('../input/simple-yolox-dataset-generator-coco-json/train2017.zip', 'r') as zip_ref:
    zip_ref.extractall('./images')
    
with zipfile.ZipFile('../input/simple-yolox-dataset-generator-coco-json/val2017.zip', 'r') as zip_ref:
    zip_ref.extractall('./images')

In [None]:
%%writefile labels.txt 
cots

## Model Config

In [None]:
%%writefile ./mmdetection/configs/swin/TFGBR_swin_base_faster_rcnn_fp16.py

_base_ = [
    '../_base_/models/cascade_rcnn_r50_fpn.py',
    '../_base_/datasets/coco_detection.py',
    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
]
pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth'
model = dict(
    backbone=dict(
        _delete_=True,
        type='SwinTransformer',
        embed_dims=96,
        depths=[2, 2, 18, 2],
        num_heads=[3, 6, 12, 24],
        window_size=7,
        mlp_ratio=4,
        qkv_bias=True,
        qk_scale=None,
        drop_rate=0.,
        attn_drop_rate=0.,
        drop_path_rate=0.3,
        patch_norm=True,
        out_indices=(0, 1, 2, 3),
        with_cp=False,
        convert_weights=True,
        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
    neck=dict(in_channels=[96, 192, 384, 768]),
    roi_head=dict(
        type='CascadeRoIHead',
        num_stages=3,
        stage_loss_weights=[1, 0.5, 0.25],
        bbox_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        bbox_head=[
            dict(
                type='Shared2FCBBoxHead',
                in_channels=256,
                fc_out_channels=1024,
                roi_feat_size=7,
                num_classes=1,
                bbox_coder=dict(
                    type='DeltaXYWHBBoxCoder',
                    target_means=[0., 0., 0., 0.],
                    target_stds=[0.1, 0.1, 0.2, 0.2]),
                reg_class_agnostic=True,
                loss_cls=dict(
                    type='CrossEntropyLoss',
                    use_sigmoid=False,
                    loss_weight=1.0),
                reg_decoded_bbox=True,
                loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),
            dict(
                type='Shared2FCBBoxHead',
                in_channels=256,
                fc_out_channels=1024,
                roi_feat_size=7,
                num_classes=1,
                bbox_coder=dict(
                    type='DeltaXYWHBBoxCoder',
                    target_means=[0., 0., 0., 0.],
                    target_stds=[0.05, 0.05, 0.1, 0.1]),
                reg_class_agnostic=True,
                loss_cls=dict(
                    type='CrossEntropyLoss',
                    use_sigmoid=False,
                    loss_weight=1.0),
                reg_decoded_bbox=True,
                loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),
            dict(
                type='Shared2FCBBoxHead',
                in_channels=256,
                fc_out_channels=1024,
                roi_feat_size=7,
                num_classes=1,
                bbox_coder=dict(
                    type='DeltaXYWHBBoxCoder',
                    target_means=[0., 0., 0., 0.],
                    target_stds=[0.033, 0.033, 0.067, 0.067]),
                reg_class_agnostic=True,
                loss_cls=dict(
                    type='CrossEntropyLoss',
                    use_sigmoid=False,
                    loss_weight=1.0),
                reg_decoded_bbox=True,
                loss_bbox=dict(type='GIoULoss', loss_weight=10.0))
        ]))

optimizer = dict(
    _delete_=True,
    type='AdamW',
    lr=0.0004,
    betas=(0.9, 0.999),
    weight_decay=0.05,
    paramwise_cfg=dict(
        custom_keys={
            'absolute_pos_embed': dict(decay_mult=0.),
            'relative_position_bias_table': dict(decay_mult=0.),
            'norm': dict(decay_mult=0.)
        }))
lr_config = dict(warmup_iters=500, step=[8, 11])
runner = dict(max_epochs=14)

In [None]:
cfg = Config.fromfile('./mmdetection/configs/swin/TFGBR_swin_base_faster_rcnn_fp16.py')

In [None]:
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile',to_float32=True),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type='AutoAugment',
        policies=[[
            dict(
                type='Resize',
                img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
                           (608, 1333), (640, 1333), (672, 1333), (704, 1333),
                           (736, 1333), (768, 1333), (800, 1333)],
                multiscale_mode='value',
                keep_ratio=True)
        ],
                  [
                      dict(
                          type='Resize',
                          img_scale=[(400, 1333), (500, 1333), (600, 1333)],
                          multiscale_mode='value',
                          keep_ratio=True),
                      dict(
                          type='RandomCrop',
                          crop_type='absolute_range',
                          crop_size=(384, 600),
                          allow_negative_crop=True),
                      dict(
                          type='Resize',
                          img_scale=[(480, 1333), (512, 1333), (544, 1333),
                                     (576, 1333), (608, 1333), (640, 1333),
                                     (672, 1333), (704, 1333), (736, 1333),
                                     (768, 1333), (800, 1333)],
                          multiscale_mode='value',
                          override=True,
                          keep_ratio=True),
                      dict(
                            type='PhotoMetricDistortion',
                            brightness_delta=32,
                            contrast_range=(0.5, 1.5),
                            saturation_range=(0.5, 1.5),
                            hue_delta=18),
                    dict(
                            type='MinIoURandomCrop',
                            min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
                            min_crop_size=0.3),
                    dict(
                            type='CutOut',
                            n_holes=(5, 10),
                            cutout_shape=[(4, 4), (4, 8), (8, 4), (8, 8),
                                          (16, 32), (32, 16), (32, 32),
                                          (32, 48), (48, 32), (48, 48)]
                            )
                  ]]),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]

test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='DefaultFormatBundle'),
            dict(type='Collect', keys=['img'])
        ])
]

In [None]:
cfg.classes = '/kaggle/working/labels.txt'
cfg.work_dir = '/kaggle/working/model_output'
cfg.data_root = '/kaggle/working'

# for head in cfg.model.roi_head.bbox_head:
#     head.num_classes = 1

cfg.data.test.type = 'CocoDataset'
cfg.data.test.classes = 'labels.txt'
cfg.data.test.ann_file = '../input/simple-yolox-dataset-generator-coco-json/annotations_valid.json'
cfg.data.test.img_prefix = './images'

cfg.data.train.type = 'CocoDataset'
cfg.data.train.ann_file = '../input/simple-yolox-dataset-generator-coco-json/annotations_train.json'
cfg.data.train.img_prefix = './images'
cfg.data.train.classes = 'labels.txt'

cfg.data.val.type = 'CocoDataset'
cfg.data.val.ann_file = '../input/simple-yolox-dataset-generator-coco-json/annotations_valid.json'
cfg.data.val.img_prefix = './images'
cfg.data.val.classes = 'labels.txt'

cfg.data.samples_per_gpu = 2
cfg.data.workers_per_gpu = 2

cfg.train_pipeline = train_pipeline
cfg.val_pipeline = test_pipeline
cfg.test_pipeline = test_pipeline

cfg.data.train.pipeline = cfg.train_pipeline
cfg.data.val.pipeline = cfg.val_pipeline
cfg.data.test.pipeline = cfg.test_pipeline 

cfg.lr_config = dict(
    policy='CosineAnnealing', 
    by_epoch=False,
    warmup='linear', 
    warmup_iters= 1000, 
    warmup_ratio= 1/10,
    min_lr=1e-07)

cfg.evaluation.interval = 2
cfg.evaluation.save_best='auto'

cfg.seed = seed
cfg.gpu_ids = range(1)

cfg.fp16 = dict(loss_scale=dict(init_scale=512.))

cfg.log_config = dict(
    interval=100,
    hooks=[
        dict(type='TextLoggerHook'),
        dict(type='TensorboardLoggerHook')
    ])

meta = dict()
meta['config'] = cfg.pretty_text

In [None]:
datasets = [build_dataset(cfg.data.train)]
model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))
model.init_weights()
model.CLASSES = datasets[0].CLASSES

In [None]:
mmcv.mkdir_or_exist(os.path.abspath(cfg.work_dir))
train_detector(model, datasets, cfg, distributed=False, validate=True, meta = meta)

In [None]:
import shutil
shutil.rmtree('./mmdetection')
shutil.rmtree('./images')