# Install mmdetection
Before we install `mmdetection`, we need to modify `mmdetection/mmdet/datasets/pipelines/loading.py`. Function `_load_bboxes` in `LoadAnnotations` has a bug.

```python
<<<<<<<<<<<<<<<<<<<<
67 results['gt_bboxes_ignore'] = ann_info.get('bboxes_ignore', None)
68 results['bbox_fields'].extend(['gt_bboxes', 'gt_bboxes_ignore'])
====================
67 tmp = ann_info.get('bboxes_ignore', None)
68 if tmp is None:
69     results['bbox_fields'].append('gt_bboxes')
70 else:
71     results['gt_bboxes_ignore'] = tmp
72     results['bbox_fields'].extend(['gt_bboxes', 'gt_bboxes_ignore'])
>>>>>>>>>>>>>>>>>>>>
```

In [0]:
# Mount google drive
from google.colab import drive
drive.mount('/gdrive')

! git clone https://github.com/open-mmlab/mmdetection.git
! cp /gdrive/'My Drive'/'DLCV Homework'/'HW03'/'mmdetection_patch'/loading.py \
./mmdetection/mmdet/datasets/pipelines/
! pip install -q mmcv terminaltables
%cd /content/mmdetection
! python setup.py install
! pip install -r requirements.txt
%cd /content

# Prepare SVHN data
I put SVHN data and annotations in the google drive. Copy them to the workdpace.

In [0]:
# Move test.zip and train.tar.gz to the workspace.
! mkdir -p data
! unzip -q /gdrive/'My Drive'/'DLCV Homework'/'HW03'/'data'/test.zip -d ./data/
! tar zxf /gdrive/'My Drive'/'DLCV Homework'/'HW03'/'data'/train.tar.gz -C ./data/
# Copy the annotation data to the workspace.
! cp /gdrive/'My Drive'/'DLCV Homework'/'HW03'/'data'/SVHN.pkl ./data/
# Copy the configure file to the workspace.
! mkdir -p config
! cp /gdrive/'My Drive'/'DLCV Homework'/'HW03'/'config'/retinanet_r50_fpn_1x.py ./config/
# Copy all helper files to the workspace.
! mkdir -p src
! cp /gdrive/'My Drive'/'DLCV Homework'/'HW03'/'src'/* ./src/
# Copy the latest checkpoint to the workspace.
! mkdir -p train_logs
! cp /gdrive/'My Drive'/'DLCV Homework'/'HW03'/'checkpoints'/retinanet_resnet101/epoch_26.pth ./train_logs/
! cp /gdrive/'My Drive'/'DLCV Homework'/'HW03'/'checkpoints'/retinanet_resnet101/latest.pth ./train_logs/

Caclulate mean and std of the dataset.

In [0]:
# # Caclulate mean and std of the dataset.
# import cv2
# import numpy as np

# mean = np.array([0.0, 0.0, 0.0])
# std = np.array([0.0, 0.0, 0.0])
# for i in range(1, 33403):
#     img = cv2.imread('./data/train/'+str(i)+'.png')
#     mean += np.array([np.mean(img[:, :, j]) for j in range(img.ndim)])
#     std += np.array([np.std(img[:, :, j]) for j in range(img.ndim)])
# mean /= 33402
# std /= 33402
# print(mean, std)

# # Caclulate mean and std of the dataset.
# import cv2
# import numpy as np

# mean = 0.0
# std = 0.0
# for i in range(1, 33403):
#     img = cv2.imread('./data/train/'+str(i)+'.png', cv2.IMREAD_GRAYSCALE)
#     mean += np.array(np.mean(img))
#     std += np.array(np.std(img))
# mean /= 33402
# std /= 33402
# print(mean, std)

Create a dataset object used in `mmdetection`.

In [0]:
from mmcv import Config
from mmdet.datasets import build_dataset
import numpy as np

cfg = Config.fromfile('./config/retinanet_r50_fpn_1x.py')
dataset = build_dataset(cfg.data.train, {'test_mode': True})
dataset.CLASSES = ('1', '2', '3', '4', '5', '6', '7', '8', '9', '0')
dataset.flag = np.ones(len(dataset), dtype=np.uint8)
dataset.test_mode = False

Show the distribution of the dataset.

In [0]:
# import matplotlib.pyplot as plt

# w = [0 for _ in range(1000)]; h = [0 for _ in range(600)]
# ratio = [0 for _ in range(20)]
# for data in dataset.img_infos:
#     img_w = int(data['width'])
#     img_h = int(data['height'])
#     for bbox in data['ann']['bboxes']:
#         tmp_w = bbox[2] - bbox[0]
#         tmp_h = bbox[3] - bbox[1]
#         # if tmp_w <= 79:
#         #     w[int(tmp_w)] += 1;
#         # # if tmp_h <= 99:
#         # #     h[int(tmp_h)] += 1
#         # if round(tmp_h*4.5) < 599.5:
#         #     h[int(round(tmp_h*4.5))] += 1
#         if img_w <= 999:
#             w[int(img_w)] += 1
#         if img_h <= 599:
#             h[int(img_h)] += 1
#         if round(tmp_h*4.5/tmp_w) < 19.5:
#             ratio[int(round(tmp_h*4.5/tmp_w))] += 1
#         # if round(img_h*10/img_w) < 9.5:
#         #     ratio[int(round(img_h*10/img_w))] += 1
# plt.plot(w)
# plt.show()
# plt.plot(h)
# plt.show()
# plt.plot(ratio)
# plt.show()

Visualize the bounding boxes to check whether we parse the annotation data correctly.

In [0]:
from src.show_bbox import imshow_bboxes

# show image with bounding boxes
print(dataset.img_infos[0])
bboxes = dataset.img_infos[0]['ann']['bboxes']
imshow_bboxes('./data/train/1.png', bboxes)

# Build RetinaNet

In [0]:
from mmdet import __version__
from mmdet.apis import (get_root_logger, init_dist, set_random_seed,
                        train_detector)
from mmdet.models import build_detector

# Init distributed env first, since logger depends on the dist info.
distributed = False

# init logger before other steps
logger = get_root_logger(cfg.log_level)
logger.info('Distributed training: {}'.format(distributed))

# Set random seed.
if cfg.seed is not None:
    logger.info('Set random seed to {}'.format(cfg.seed))
    set_random_seed(cfg.seed)

# Build the model.
model = build_detector(
    cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
model.CLASSES = dataset.CLASSES

if cfg.checkpoint_config is not None:
    # Save mmdet version, config file content and class names in
    # checkpoints as meta data
    cfg.checkpoint_config.meta = dict(
        mmdet_version=__version__,
        config=cfg.text,
        CLASSES=dataset.CLASSES)

# Train the model.
for epoch in range(26, 27):
    if epoch >= 1:
        cfg.resume_from = './train_logs/latest.pth'
    cfg.total_epochs = epoch + 1
    train_detector(
        model,
        dataset,
        cfg,
        distributed=distributed,
        logger=logger)
    from google.colab import drive
    drive.mount('/gdrive')
    ! cp ./train_logs/latest.pth /gdrive/'My Drive'/'DLCV Homework'/HW03/checkpoints/retinanet_resnet101/
    ! cp {'./train_logs/epoch_'+str(epoch+1)+'.pth'} /gdrive/'My Drive'/'DLCV Homework'/HW03/checkpoints/retinanet_resnet101/
    ! cp ./train_logs/*.log* /gdrive/'My Drive'/'DLCV Homework'/HW03/train_logs/retinanet_resnet101/
    ! rm ./train_logs/*.log*

# Results
Result of single image.

In [0]:
from mmcv.runner import load_checkpoint
from mmdet.apis import inference_detector, show_result_pyplot, init_detector

checkpoint_file = './train_logs/latest.pth'
score_thr = 0.5

# build the model from a config file and a checkpoint file
test_model = init_detector('./config/retinanet_r50_fpn_1x.py', checkpoint_file)

img = './data/test/30.png'
result = inference_detector(test_model, img)
for num in range(len(result)):
    if result[num].shape[0] != 0:
        for r in result[num]:
            print('label: %2d, score: %.3f' % (num+1, r[-1]))
show_result_pyplot(img, result, test_model.CLASSES, score_thr=score_thr)

Time performance.

In [33]:
%%timeit
result = inference_detector(test_model, './data/test/1.png')

10 loops, best of 3: 128 ms per loop


Dump result to json format.

In [0]:
from mmcv.runner import load_checkpoint
from mmdet.apis import inference_detector, show_result_pyplot, init_detector
import json

checkpoint_file = './train_logs/latest.pth'
score_thr = 0.5

# build the model from a config file and a checkpoint file
test_model = init_detector('./config/retinanet_r50_fpn_1x.py', checkpoint_file)

# test a single image and show the results
ret = []
for img_id in range(1, 13069):
    img = './data/test/' + str(img_id) + '.png'
    result = inference_detector(test_model, img)
    now = {
        'bbox': [],
        'score': [],
        'label': []
    }
    for num in range(len(result)):
        for r in result[num]:
            now['bbox'].append(
                (float(r[1]), float(r[0]), float(r[3]), float(r[2]))
                )
            now['score'].append(float(r[-1]))
            now['label'].append(int(num+1))
    ret.append(now)

with open('0856030.json', 'w') as outfile:
    json.dump(ret, outfile)

# Modify learning rate in the checkpoint

In [0]:
import torch
from mmcv.runner import load_checkpoint

tmp_model = build_detector(
    cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)

device_id = torch.cuda.current_device()
checkpoint = load_checkpoint(
    tmp_model,
    'train_logs/epoch_25.pth',
    map_location=lambda storage, loc: storage.cuda(device_id))

print(checkpoint['optimizer'].keys())
print(checkpoint['optimizer']['param_groups'][0]['initial_lr'])
print(checkpoint['optimizer']['param_groups'][0]['lr'])
checkpoint['optimizer']['param_groups'][0]['initial_lr'] = 0.001
torch.save(checkpoint, 'train_logs/test.pth')