# What is Object Detection?

# How ancestors did?

# How done recently?

### Single stage 

### Two stage

### What is the difference?

# MMDetection

# MS COCO Dataset

# What is Anchor?

# How to train object detector?

# Import Module

In [1]:
import numpy as np
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import torch
import torch.nn as nn
import torch.nn.functional as F

from matplotlib.lines import Line2D
from matplotlib.patches import Patch

from src.anchor.anchor_generator import (gen_base_anchors, get_anchors, 
                              grid_anchors, meshgrid)
from src.anchor.assigner import assign_wrt_overlaps, bbox_overlaps
from src.anchor.loss import binary_cross_entropy, smooth_l1_loss
from src.anchor.prediction import predict_anchors
from src.anchor.transforms import bbox2delta, delta2bbox
from src.anchor.visualize import (draw_anchor_gt_overlaps, draw_anchor_samples_on_image, 
                       draw_base_anchor_on_grid, draw_pos_assigned_bboxes)
from src.datasets.loader.build_loader import build_dataloader
from src.models.builder import build_backbone, build_neck, build_head
from mmcv.runner import obj_from_dict
from mmcv.utils.config import Config
from src.core import multi_apply, weighted_smoothl1, weighted_sigmoid_focal_loss
from src.core.anchor import anchor_target_single, images_to_levels


## build data loader

In [2]:
cfg = Config.fromfile('config/retinanet_r50_fpn_1x.py')

train_cfg = cfg.train_cfg
dataset_cfg = cfg.data.train

loader = iter(build_dataloader(dataset_cfg))

loading annotations into memory...
Done (t=8.80s)
creating index...
index created!


## extract feature

In [3]:
class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        backbone_cfg = dict(
                type='ResNet',
                depth=50,
                num_stages=4,
                out_indices=(0, 1, 2, 3),
                frozen_stages=1,
                style='pytorch')

        neck_cfg =dict(
                type='FPN',
                in_channels=[256, 512, 1024, 2048],
                out_channels=256,
                start_level=1,
                add_extra_convs=True,
                num_outs=5)
        self.resnet_backbone = build_backbone(backbone_cfg)
        self.fpn_neck = build_neck(neck_cfg)
    
    def forward(self, x):
        print(x.shape)
        feature = self.resnet_backbone(x)
        multi_level_feature = self.fpn_neck(feature)
        print(len(feature))
        print(len(multi_level_feature))
        print([x.shape for x in feature])
        print([x.shape for x in multi_level_feature])
        return multi_level_feature

# Todo : sample dict의 key목록 확인

In [4]:
sample = next(loader)
img = sample['img'].data[0]
img_metas = sample['img_meta'].data[0]
gt_bboxes = sample['gt_bboxes'].data[0]
gt_labels = sample['gt_labels'].data[0]

feature_extractor = FeatureExtractor()
feature = feature_extractor(img)

torch.Size([1, 3, 768, 1344])
4
5
[torch.Size([1, 256, 192, 336]), torch.Size([1, 512, 96, 168]), torch.Size([1, 1024, 48, 84]), torch.Size([1, 2048, 24, 42])]
[torch.Size([1, 256, 96, 168]), torch.Size([1, 256, 48, 84]), torch.Size([1, 256, 24, 42]), torch.Size([1, 256, 12, 21]), torch.Size([1, 256, 6, 11])]


## build rpn head

In [5]:
rpn_config = cfg.model.bbox_head

"""
dict(
    type='RetinaHead',
    num_classes=81,
    in_channels=256,
    stacked_convs=4,
    feat_channels=256,
    octave_base_scale=4,
    scales_per_octave=3,
    anchor_ratios=[0.5, 1.0, 2.0],
    anchor_strides=[8, 16, 32, 64, 128],
    target_means=[.0, .0, .0, .0],
    target_stds=[1.0, 1.0, 1.0, 1.0])
"""
rpn_head = build_head(rpn_config)
cls_score, bbox_pred = rpn_head(feature)

In [6]:
print([x.shape for x in cls_score])
print([x.shape for x in bbox_pred])

[torch.Size([1, 720, 96, 168]), torch.Size([1, 720, 48, 84]), torch.Size([1, 720, 24, 42]), torch.Size([1, 720, 12, 21]), torch.Size([1, 720, 6, 11])]
[torch.Size([1, 36, 96, 168]), torch.Size([1, 36, 48, 84]), torch.Size([1, 36, 24, 42]), torch.Size([1, 36, 12, 21]), torch.Size([1, 36, 6, 11])]


## get losses

### define anchor_generators

In [7]:
rpn_head.init_anchor_generator()

### get anchors

In [8]:
def get_anchors(anchor_generators, anchor_strides, featmap_sizes, img_metas):
    """Get anchors according to feature map sizes.

    Args:
        featmap_sizes (list[tuple]): Multi-level feature map sizes.
        img_metas (list[dict]): Image meta info.

    Returns:
        tuple: anchors of each image, valid flags of each image
    """
    num_imgs = len(img_metas)
    num_levels = len(featmap_sizes)

    # since feature map sizes of all images are the same, we only compute
    # anchors for one time
    multi_level_anchors = []
    for i in range(num_levels):
        anchors = anchor_generators[i].grid_anchors(
            featmap_sizes[i], anchor_strides[i], device='cpu')
        multi_level_anchors.append(anchors)
    anchor_list = [multi_level_anchors for _ in range(num_imgs)]

    # for each image, we compute valid flags of multi level anchors
    valid_flag_list = []
    for img_id, img_meta in enumerate(img_metas):
        multi_level_flags = []
        for i in range(num_levels):
            anchor_stride = anchor_strides[i]
            feat_h, feat_w = featmap_sizes[i]
            h, w, _ = img_meta['pad_shape']
            valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)
            valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)
            flags = anchor_generators[i].valid_flags(
                (feat_h, feat_w), (valid_feat_h, valid_feat_w))
            multi_level_flags.append(flags)
        valid_flag_list.append(multi_level_flags)

    return anchor_list, valid_flag_list

In [9]:
featmap_sizes = [featmap.size()[-2:] for featmap in cls_score]
anchor_list, valid_flag_list = get_anchors(rpn_head.anchor_generators, rpn_head.anchor_strides, featmap_sizes, img_metas)

### make target

#### anchor를 통해 target 후보를 만들고
#### sample을 통해 유의미한 target을 골라낸다

# Todo : anchor_target 해체, anchor_target_single 가져오고 문제거리 만들기

In [10]:
num_imgs = len(img_metas)
assert len(anchor_list) == len(valid_flag_list) == num_imgs

num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]

for i in range(num_imgs):
    assert len(anchor_list[i]) == len(valid_flag_list[i])
    anchor_list[i] = torch.cat(anchor_list[i])
    valid_flag_list[i] = torch.cat(valid_flag_list[i])
    
gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
gt_labels_list = [None for _ in range(num_imgs)]

In [42]:
anchor_target_variable = (anchor_list, valid_flag_list, gt_bboxes, gt_bboxes_ignore_list, gt_labels_list, img_metas, train_cfg)
%store anchor_target_variable

Stored 'anchor_target_variable' (tuple)


In [11]:
(all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
pos_inds_list, neg_inds_list) = multi_apply(
   anchor_target_single,
    anchor_list,
    valid_flag_list,
    gt_bboxes,
    gt_bboxes_ignore_list,
    gt_labels_list,
    img_metas,
    target_means=[.0, .0, .0, .0],
    target_stds=[1.0, 1.0, 1.0, 1.0],
    cfg=train_cfg,
    label_channels=rpn_head.cls_out_channels,
    sampling=False,
    unmap_outputs=True)

# no valid anchors
if any([labels is None for labels in all_labels]):
    assert False

# sampled anchors of all images
num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
# split targets to a list w.r.t. multiple levels
labels_list = images_to_levels(all_labels, num_level_anchors)
label_weights_list = images_to_levels(all_label_weights, num_level_anchors)
bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors)
bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors)


# Todo : assign & sample용 notebook 따로 만들기

# Todo : loss_single 가져오고 문제로 만들기

### get loss

In [12]:
def loss_single(cls_score, bbox_pred, labels, label_weights,
                bbox_targets, bbox_weights, num_total_samples, cfg, cls_out_channels):
    # classification loss
    labels = labels.reshape(-1, cls_out_channels)
    label_weights = label_weights.reshape(-1, cls_out_channels)
    cls_score = cls_score.permute(0, 2, 3, 1).reshape(
        -1, cls_out_channels)
    loss_cls = weighted_sigmoid_focal_loss(
        cls_score,
        labels,
        label_weights,
        gamma=cfg.gamma,
        alpha=cfg.alpha,
        avg_factor=num_total_samples)

    # regression loss
    bbox_targets = bbox_targets.reshape(-1, 4)
    bbox_weights = bbox_weights.reshape(-1, 4)
    bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
    loss_reg = weighted_smoothl1(
        bbox_pred,
        bbox_targets,
        bbox_weights,
        beta=cfg.smoothl1_beta,
        avg_factor=num_total_samples)
    return loss_cls, loss_reg

In [13]:
losses_cls, losses_reg = multi_apply(
    loss_single,
    cls_score,
    bbox_pred,
    labels_list,
    label_weights_list,
    bbox_targets_list,
    bbox_weights_list,
    num_total_samples=num_total_pos,
    cfg=cfg.train_cfg,
    cls_out_channels=rpn_head.cls_out_channels)

In [14]:
losses_cls

[tensor([12832.7061], grad_fn=<DivBackward0>),
 tensor([3182.1497], grad_fn=<DivBackward0>),
 tensor([793.8104], grad_fn=<DivBackward0>),
 tensor([194.2783], grad_fn=<DivBackward0>),
 tensor([50.7037], grad_fn=<DivBackward0>)]

In [15]:
losses_reg

[tensor([0.2658], grad_fn=<DivBackward0>),
 tensor([0.4994], grad_fn=<DivBackward0>),
 tensor([0.], grad_fn=<DivBackward0>),
 tensor([0.1951], grad_fn=<DivBackward0>),
 tensor([0.0294], grad_fn=<DivBackward0>)]

## get results

In [36]:
def get_bboxes(cls_scores, bbox_preds, img_metas, cfg,
                anchor_generators, anchor_strides, cls_out_channels):
    assert len(cls_scores) == len(bbox_preds)
    num_levels = len(cls_scores)

    mlvl_anchors = [
        anchor_generators[i].grid_anchors(cls_scores[i].size()[-2:],
                                            anchor_strides[i], device='cpu')
        for i in range(num_levels)
    ]
    result_list = []
    for img_id in range(len(img_metas)):
        cls_score_list = [
            cls_scores[i][img_id].detach() for i in range(num_levels)
        ]
        bbox_pred_list = [
            bbox_preds[i][img_id].detach() for i in range(num_levels)
        ]
        img_shape = img_metas[img_id]['img_shape']
        scale_factor = img_metas[img_id]['scale_factor']
        proposals = get_bboxes_single(cls_score_list, bbox_pred_list,
                                        mlvl_anchors, img_shape,
                                        scale_factor, cfg, cls_out_channels)
        result_list.append(proposals)
    return result_list

def get_bboxes_single(cls_scores,
                      bbox_preds,
                      mlvl_anchors,
                      img_shape,
                      scale_factor,
                      cfg,
                      cls_out_channels,
                      target_means=[.0, .0, .0, .0],
                      target_stds=[1.0, 1.0, 1.0, 1.0]):
    assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
    mlvl_bboxes = []
    mlvl_scores = []
    for cls_score, bbox_pred, anchors in zip(cls_scores, bbox_preds,
                                                mlvl_anchors):
        assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
        cls_score = cls_score.permute(1, 2, 0).reshape(
            -1, cls_out_channels)

        scores = cls_score.sigmoid()

        bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
        nms_pre = cfg.get('nms_pre', -1)
        if nms_pre > 0 and scores.shape[0] > nms_pre:

            max_scores, _ = scores.max(dim=1)
            _, topk_inds = max_scores.topk(nms_pre)
            anchors = anchors[topk_inds, :]
            bbox_pred = bbox_pred[topk_inds, :]
            scores = scores[topk_inds, :]
        bboxes = delta2bbox(anchors, bbox_pred, target_means,
                            target_stds, img_shape)
        mlvl_bboxes.append(bboxes)
        mlvl_scores.append(scores)
    mlvl_bboxes = torch.cat(mlvl_bboxes)
    mlvl_scores = torch.cat(mlvl_scores)

    padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
    mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
    det_bboxes, det_labels = multiclass_nms(
        mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img)
    return det_bboxes, det_labels

### get bboxes

In [37]:
bbox_list = get_bboxes(cls_score, bbox_pred, img_metas, test_cfg,
                       rpn_head.anchor_generators, rpn_head.anchor_strides, rpn_head.cls_out_channels)

NameError: name 'multiclass_nms' is not defined

### get result

In [38]:
bbox_results = [
    bbox2result(det_bboxes, det_labels, rpn_head.num_classes)
    for det_bboxes, det_labels in bbox_list
][0]

NameError: name 'bbox_list' is not defined