In [1]:
import argparse
import copy
import os
import os.path as osp
import time
import warnings

import mmcv
import torch
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist, load_checkpoint
from mmcv.utils import get_git_hash

from mmdet import __version__
from mmdet.apis import set_random_seed, train_detector
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.utils import collect_env, get_root_logger
import torch.distributed as dist
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '6008'
dist.init_process_group('gloo', rank=0, world_size=1)

In [2]:
classes = ("UNKNOWN", "General trash", "Paper", "Paper pack", "Metal", "Glass", 
           "Plastic", "Styrofoam", "Plastic bag", "Battery", "Clothing")
# config file 들고오기
cfg = Config.fromfile('./configs/swin/cascade_mask_rcnn_swin_base_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco.py')

PREFIX = '../../input/data/'


# dataset 바꾸기
cfg.data.train.classes = classes
cfg.data.train.img_prefix = PREFIX
cfg.data.train.ann_file = PREFIX + 'train.json'
# cfg.data.train.pipeline[2]['img_scale'] = (512, 512)

cfg.data.val.classes = classes
cfg.data.val.img_prefix = PREFIX
cfg.data.val.ann_file = PREFIX + 'val.json'
# cfg.data.val.pipeline[1]['img_scale'] = (512, 512)

cfg.data.test.classes = classes
cfg.data.test.img_prefix = PREFIX
cfg.data.test.ann_file = PREFIX + 'test.json'
# cfg.data.test.pipeline[1]['img_scale'] = (512, 512)

cfg.data.samples_per_gpu = 2

cfg.seed=2020
cfg.gpu_ids = [0]
cfg.work_dir = './work_dirs/swin'

for i in range(len(cfg.model.roi_head.bbox_head)):
    cfg.model.roi_head.bbox_head[i].num_classes = 11
cfg.model.roi_head.mask_head.num_classes = 11
cfg.optimizer_config.grad_clip = dict(max_norm=35, norm_type=2)

cfg.optimizer.lr = 1e-3
cfg.checkpoint_config.interval = 5
# cfg.model.backbone.use_checkpoint = True

In [3]:
meta = dict()
env_info_dict = collect_env()
env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
dash_line = '-' * 60 + '\n'
meta['env_info'] = env_info
meta['config'] = cfg.pretty_text
meta['seed'] = cfg.seed
meta['exp_name'] = 'swin'

In [4]:
for k in cfg.keys():
    print("key = ",k)
    print(cfg[k])

key =  model
{'type': 'CascadeRCNN', 'pretrained': None, 'backbone': {'type': 'SwinTransformer', 'embed_dim': 128, 'depths': [2, 2, 18, 2], 'num_heads': [4, 8, 16, 32], 'window_size': 7, 'mlp_ratio': 4.0, 'qkv_bias': True, 'qk_scale': None, 'drop_rate': 0.0, 'attn_drop_rate': 0.0, 'drop_path_rate': 0.3, 'ape': False, 'patch_norm': True, 'out_indices': (0, 1, 2, 3), 'use_checkpoint': False}, 'neck': {'type': 'FPN', 'in_channels': [128, 256, 512, 1024], 'out_channels': 256, 'num_outs': 5}, 'rpn_head': {'type': 'RPNHead', 'in_channels': 256, 'feat_channels': 256, 'anchor_generator': {'type': 'AnchorGenerator', 'scales': [8], 'ratios': [0.5, 1.0, 2.0], 'strides': [4, 8, 16, 32, 64]}, 'bbox_coder': {'type': 'DeltaXYWHBBoxCoder', 'target_means': [0.0, 0.0, 0.0, 0.0], 'target_stds': [1.0, 1.0, 1.0, 1.0]}, 'loss_cls': {'type': 'CrossEntropyLoss', 'use_sigmoid': True, 'loss_weight': 1.0}, 'loss_bbox': {'type': 'SmoothL1Loss', 'beta': 0.1111111111111111, 'loss_weight': 1.0}}, 'roi_head': {'type'

In [5]:
datasets = [build_dataset(cfg.data.train)]

in
build
loading annotations into memory...
Done (t=3.63s)
creating index...
index created!


In [6]:

def load_swin(cfg,ck_path):
    # make temp_cfg
    temp_cfg = copy.deepcopy(cfg)
    
    for i in range(len(temp_cfg.model.roi_head.bbox_head)):
        temp_cfg.model.roi_head.bbox_head[i].num_classes = 80

    # make original swin
    pre_trained = build_detector(temp_cfg.model,train_cfg=None,
        test_cfg=None)
    load_checkpoint(pre_trained,ck_path, map_location='cpu')

    model = build_detector(cfg.model,train_cfg=None,
        test_cfg=None)

    # make pretrained state dict
    pretrained_dict = pre_trained.backbone.state_dict()
    model_dict = model.backbone.state_dict()
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
    model_dict.update(pretrained_dict) 
    del pre_trained
    torch.cuda.empty_cache()
    model.backbone.load_state_dict(model_dict)
    return model

def get_model(network,cfg,train=True):
    repo_root = ''
    
    if network[-1] == 's':
        ck_path = os.path.join(repo_root,'pretrained/cascade_mask_rcnn_swin_small_patch4_window7.pth')
    elif network[-1] == 'b':
        ck_path = os.path.join(repo_root,'pretrained/cascade_mask_rcnn_swin_base_patch4_window7.pth')
    else:
        ck_path = os.path.join(repo_root,'pretrained/cascade_mask_rcnn_swin_tiny_patch4_window7.pth')        
    
    if train:
        return load_swin(cfg,ck_path)
    else:
        return make_swin_model(cfg)

In [7]:
model = get_model('swin_b',cfg)

Use load_from_local loader
The model and loaded state dict do not match exactly

size mismatch for roi_head.mask_head.0.conv_logits.weight: copying a param with shape torch.Size([80, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([11, 256, 1, 1]).
size mismatch for roi_head.mask_head.0.conv_logits.bias: copying a param with shape torch.Size([80]) from checkpoint, the shape in current model is torch.Size([11]).
size mismatch for roi_head.mask_head.1.conv_logits.weight: copying a param with shape torch.Size([80, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([11, 256, 1, 1]).
size mismatch for roi_head.mask_head.1.conv_logits.bias: copying a param with shape torch.Size([80]) from checkpoint, the shape in current model is torch.Size([11]).
size mismatch for roi_head.mask_head.2.conv_logits.weight: copying a param with shape torch.Size([80, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([11, 256, 1, 1]).
size mismatch for

In [8]:
cfg.lr_config

{'policy': 'step',
 'warmup': 'linear',
 'warmup_iters': 500,
 'warmup_ratio': 0.001,
 'step': [27, 33]}

In [None]:
train_detector(
        model,
        datasets,
        cfg,
        distributed=False,
        validate=True,
        meta=meta)

in
build
loading annotations into memory...


2021-05-13 09:46:08,960 - mmdet - INFO - Start running, host: root@28e81ee99bc4, work_dir: /opt/ml/code/Swin-Transformer-Object-Detection/work_dirs/swin
2021-05-13 09:46:08,962 - mmdet - INFO - workflow: [('train', 1)], max: 36 epochs


Done (t=0.90s)
creating index...
index created!


2021-05-13 09:47:09,252 - mmdet - INFO - Epoch [1][50/1309]	lr: 9.890e-05, eta: 15:45:52, time: 1.206, data_time: 0.064, memory: 13427, loss_rpn_cls: 0.4589, loss_rpn_bbox: 0.0631, s0.loss_cls: 0.7412, s0.acc: 88.2012, s0.loss_bbox: 0.1936, s0.loss_mask: 0.8713, s1.loss_cls: 0.3209, s1.acc: 88.3301, s1.loss_bbox: 0.0253, s1.loss_mask: 0.4995, s2.loss_cls: 0.1452, s2.acc: 90.6406, s2.loss_bbox: 0.0027, s2.loss_mask: 0.2546, loss: 3.5762, grad_norm: 11.4270
2021-05-13 09:48:09,492 - mmdet - INFO - Epoch [1][100/1309]	lr: 1.988e-04, eta: 15:44:33, time: 1.205, data_time: 0.019, memory: 13816, loss_rpn_cls: 0.1857, loss_rpn_bbox: 0.0654, s0.loss_cls: 0.4911, s0.acc: 87.5469, s0.loss_bbox: 0.4911, s0.loss_mask: 0.6400, s1.loss_cls: 0.1360, s1.acc: 94.4473, s1.loss_bbox: 0.0683, s1.loss_mask: 0.3237, s2.loss_cls: 0.0403, s2.acc: 97.2324, s2.loss_bbox: 0.0076, s2.loss_mask: 0.1646, loss: 2.6138, grad_norm: 6.3144
2021-05-13 09:49:09,344 - mmdet - INFO - Epoch [1][150/1309]	lr: 2.987e-04, eta: