# Transfer Learning DOTA 학습(Train) 코드 

In [15]:
# 라이브러리 로딩 
import mmrotate
import torch
import os 
print(torch.__version__)

1.10.1+cu111


In [16]:
# 다운받은 폴더 00.Reproduction_Test 아래의 configs 폴더 안에서 아래의 py파일을 찾을 수 있음. 
config_file = './configs/kfiou/roi_trans_kfiou_ln_r50_fpn_1x_dota_ms_rr_le90.py'

In [17]:
# config 확인 
from mmcv import Config

cfg = Config.fromfile(config_file)
print(cfg.pretty_text)

dataset_type = 'DOTADataset'
data_root = 'data/split_ms_dota1_0/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='RResize', img_scale=(1024, 1024)),
    dict(
        type='RRandomFlip',
        flip_ratio=[0.25, 0.25, 0.25],
        direction=['horizontal', 'vertical', 'diagonal'],
        version='le90'),
    dict(
        type='PolyRandomRotate',
        rotate_ratio=0.5,
        angles_range=180,
        auto_bound=False,
        rect_classes=[9, 11],
        version='le90'),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
 

In [18]:
#-------------------------------------
# 앞서 pre-train 된 모델의 웨이트를 수정해 줄 필요가 있음. 
# 대회기간 중 학습된 웨이트들의 경우 00.Weights/trained_weights_competition_period 폴더 안에 저장이 되어 있음. 

# pre-train resnet50 layer cut 

res_a = torch.load( '/mnt/hdd/eric/.tmp_ipy/00.Weights/trained_weights_competition_period/resnet50_epoch_538.pth')
res_a_model = res_a['model']

# unexpected key in source state_dict: fc.weight, fc.bias
del res_a_model['fc.weight']
del res_a_model['fc.bias']

In [19]:
# 수정된 웨이트를 다시 00.Weights/trained_weights_competition_period 안에 저장 
torch.save(res_a_model,'/mnt/hdd/eric/.tmp_ipy/00.Weights/trained_weights_competition_period/resnet50_epoch_538_tmp.pth')


In [20]:
#--------------------------------------
# resnet50 backbone
# 00.Weights/trained_weights_competition_period 폴더에 저장해놓은 resnet50 weight를 불러옴 
cfg.model.backbone.init_cfg.checkpoint = '/mnt/hdd/eric/.tmp_ipy/00.Weights/trained_weights_competition_period/resnet50_epoch_538_del_fc.pth'

# 모델의 학습 결과파일이 저장될 폴더 00.Checkpoint 에 저장.  
cfg.work_dir = f'/mnt/hdd/eric/.tmp_ipy/00.Checkpoint/Dota'

# train dataset
cfg.data_root = f"/mnt/hdd/eric/.tmp_ipy/00.Data/DOTA_dataset/dota1_ms_train"
cfg.data.train.data_root =f"/mnt/hdd/eric/.tmp_ipy/00.Data/DOTA_dataset/dota1_ms_train"
cfg.data.train.ann_file="/mnt/hdd/eric/.tmp_ipy/00.Data/DOTA_dataset/dota1_ms_train/annfiles"
cfg.data.train.img_prefix ="/mnt/hdd/eric/.tmp_ipy/00.Data/DOTA_dataset/dota1_ms_train/images"

# validation dataset
cfg.data.val.data_root =f"/mnt/hdd/eric/.tmp_ipy/00.Data/DOTA_dataset/dota1_origin"
cfg.data.val.ann_file="/mnt/hdd/eric/.tmp_ipy/00.Data/DOTA_dataset/dota1_origin/val/annfiles"
cfg.data.val.img_prefix ="/mnt/hdd/eric/.tmp_ipy/00.Data/DOTA_dataset/dota1_origin/val/images"


# classes
cfg.model.roi_head.bbox_head[0].num_classes=16
cfg.model.roi_head.bbox_head[1].num_classes=16


DOTA_ORIGIN_CLASSES = ('plane', 'baseball-diamond', 'bridge', 'ground-track-field',
               'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
               'basketball-court', 'storage-tank', 'soccer-ball-field',
               'roundabout', 'harbor', 'swimming-pool', 'helicopter','container-crane')



cfg.data.train.classes=DOTA_ORIGIN_CLASSES
cfg.data.val.classes=DOTA_ORIGIN_CLASSES

# epochs 
cfg.runner.max_epochs = 12

# batch 
cfg.data.samples_per_gpu = 2
cfg.data.workers_per_gpu = 32

# log interval
cfg.evaluation.interval = 1
cfg.checkpoint_config.interval = 1
cfg.log_config.interval = 50

# gpu 4개 사용
cfg.gpu_ids = [0,1,2,3]
cfg.device='cuda'

# random seed 고정 
cfg.seed=22


In [21]:
# 저장폴더 확인 
cfg.work_dir 

'/mnt/hdd/eric/.tmp_ipy/00.Checkpoint/Dota'

In [22]:
# 저장폴더에 위에서 저장한 config 저장 
import os

if os.path.join(cfg.work_dir,"config_save_res50.py"):
    os.remove(os.path.join(cfg.work_dir,"config_save_res50.py"))
cfg.dump(os.path.join(cfg.work_dir,"config_save_res50.py"))

In [23]:
# 구축된 train dataset 확인 

from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

# Train dataset check 

datasets = [build_dataset(cfg.data.train)] 

In [24]:
# 구축된 train dataset 확인 

print(datasets[0])
datasets[0].__dict__.keys()


DOTADataset Train dataset with number of images 58580, and instance counts: 
+------------------------+--------+----------------------+--------+------------------+-------+------------------------+-------+-------------------+---------+
| category               | count  | category             | count  | category         | count | category               | count | category          | count   |
+------------------------+--------+----------------------+--------+------------------+-------+------------------------+-------+-------------------+---------+
| 0 [plane]              | 77319  | 1 [baseball-diamond] | 3185   | 2 [bridge]       | 18441 | 3 [ground-track-field] | 2705  | 4 [small-vehicle] | 1062404 |
| 5 [large-vehicle]      | 173164 | 6 [ship]             | 303096 | 7 [tennis-court] | 14479 | 8 [basketball-court]   | 3974  | 9 [storage-tank]  | 53588   |
| 10 [soccer-ball-field] | 2700   | 11 [roundabout]      | 3895   | 12 [harbor]      | 49736 | 13 [swimming-pool]     | 19307 | 14 [

dict_keys(['version', 'difficulty', 'ann_file', 'data_root', 'img_prefix', 'seg_prefix', 'seg_suffix', 'proposal_file', 'test_mode', 'filter_empty_gt', 'file_client', 'CLASSES', 'img_ids', 'data_infos', 'proposals', 'flag', 'pipeline'])

In [25]:
datasets[0][0]

{'img_metas': DataContainer({'filename': '/mnt/hdd/eric/.tmp_ipy/00.Data/DOTA_dataset/dota1_ms_train/images/P1158__682__2443___3839.png', 'ori_filename': 'P1158__682__2443___3839.png', 'ori_shape': (682, 682, 3), 'img_shape': (1024, 1024, 3), 'pad_shape': (1024, 1024, 3), 'scale_factor': array([1.5015, 1.5015, 1.5015, 1.5015], dtype=float32), 'flip': False, 'flip_direction': None, 'img_norm_cfg': {'mean': array([123.675, 116.28 , 103.53 ], dtype=float32), 'std': array([58.395, 57.12 , 57.375], dtype=float32), 'to_rgb': True}}),
 'img': DataContainer(tensor([[[ 0.2624,  0.2624,  0.2796,  ...,  0.4679,  0.4679,  0.4679],
          [ 0.2624,  0.2796,  0.2796,  ...,  0.4679,  0.4679,  0.4679],
          [ 0.2796,  0.2796,  0.2796,  ...,  0.4679,  0.4679,  0.4679],
          ...,
          [ 0.2453,  0.1254,  0.0056,  ..., -0.6965, -0.6794, -0.6623],
          [ 0.3138,  0.1939,  0.0741,  ..., -0.6623, -0.6452, -0.6452],
          [ 0.3652,  0.2453,  0.1254,  ..., -0.6452, -0.6452, -0.6281]

In [26]:
# validation dataset check 


val_datasets = [build_dataset(cfg.data.val)] 
print(val_datasets[0])
val_datasets[0].__dict__.keys()



DOTADataset Train dataset with number of images 458, and instance counts: 
+------------------------+-------+----------------------+-------+------------------+-------+------------------------+-------+-------------------+-------+
| category               | count | category             | count | category         | count | category               | count | category          | count |
+------------------------+-------+----------------------+-------+------------------+-------+------------------------+-------+-------------------+-------+
| 0 [plane]              | 2550  | 1 [baseball-diamond] | 213   | 2 [bridge]       | 465   | 3 [ground-track-field] | 145   | 4 [small-vehicle] | 43323 |
| 5 [large-vehicle]      | 5139  | 6 [ship]             | 10764 | 7 [tennis-court] | 763   | 8 [basketball-court]   | 143   | 9 [storage-tank]  | 2940  |
| 10 [soccer-ball-field] | 149   | 11 [roundabout]      | 185   | 12 [harbor]      | 2102  | 13 [swimming-pool]     | 576   | 14 [helicopter]   | 78    |


dict_keys(['version', 'difficulty', 'ann_file', 'data_root', 'img_prefix', 'seg_prefix', 'seg_suffix', 'proposal_file', 'test_mode', 'filter_empty_gt', 'file_client', 'CLASSES', 'img_ids', 'data_infos', 'proposals', 'flag', 'pipeline'])

In [27]:
datasets[0][0]

{'img_metas': DataContainer({'filename': '/mnt/hdd/eric/.tmp_ipy/00.Data/DOTA_dataset/dota1_ms_train/images/P1158__682__2443___3839.png', 'ori_filename': 'P1158__682__2443___3839.png', 'ori_shape': (682, 682, 3), 'img_shape': (1024, 1024, 3), 'pad_shape': (1024, 1024, 3), 'scale_factor': array([1.5015, 1.5015, 1.5015, 1.5015], dtype=float32), 'flip': False, 'flip_direction': None, 'img_norm_cfg': {'mean': array([123.675, 116.28 , 103.53 ], dtype=float32), 'std': array([58.395, 57.12 , 57.375], dtype=float32), 'to_rgb': True}}),
 'img': DataContainer(tensor([[[-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          ...,
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179]

# Train

- 아래의 bash 를 통해서 train 실행. 
- 4개 gpu 사용 (RTX 3090)
- 평균 학습시간 6시간~8시간 

In [29]:
%%bash

export PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb=256"
bash tools/dist_train.sh /mnt/hdd/eric/.tmp_ipy/00.Checkpoint/Dota/config_save_res50.py 4 --seed 22

and will be removed in future. Use torchrun.
Note that --use_env is set by default in torchrun.
If your script expects `--local_rank` argument to be set, please
change it to read from `os.environ['LOCAL_RANK']` instead. See 
https://pytorch.org/docs/stable/distributed.html#launch-utility for 
further instructions

*****************************************
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. 
*****************************************
fatal: not a git repository (or any parent up to mount point /mnt)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
fatal: not a git repository (or any parent up to mount point /mnt)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
fatal: not a git repository (or any parent up to mount point /mnt)
Stopping at filesystem boundary (GIT_DI

Error while terminating subprocess (pid=742657): 


2023-07-27 14:39:29,692 - mmrotate - INFO - Epoch [1][250/7323]	lr: 3.327e-03, eta: 7:04:26, time: 0.253, data_time: 0.008, memory: 12186, loss_rpn_cls: 0.1132, loss_rpn_bbox: 0.0621, s0.loss_cls: 0.3609, s0.acc: 89.0830, s0.loss_bbox: 0.4868, s1.loss_cls: 0.2403, s1.acc: 93.1218, s1.loss_bbox: 0.1487, loss: 1.4120, grad_norm: 5.2903
