In [0]:
!pip3 uninstall pytorch-hrvvi-ext
!pip3 install -U --no-cache-dir --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple pytorch-hrvvi-ext

In [0]:
import sys
import os

import torch
import hutil
import matplotlib.pyplot as plt
print(hutil.__version__)

1.4.5


In [0]:
%load_ext autoreload
%autoreload 2

In [0]:
gdrive = "/gdrive"
from google.colab import drive
drive.mount(gdrive, force_remount=True)
mydrive = os.path.join(gdrive, "My Drive")
!ls /gdrive/My\ Drive

def gpath(p):
    return os.path.join(mydrive, p)

Mounted at /gdrive
'Colab Notebooks'   eng-fra.pt	 images   repo	   weixin.pkl
 datasets	    fonts	 models   result


In [0]:
from math import sqrt
import random

from toolz import curry

import torch
import torch.nn as nn
import torch.nn.functional as F

from PIL import Image
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import LambdaLR, MultiStepLR
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate
from torchvision.transforms import ToTensor

from hutil import cuda
from hutil.train import init_weights, Trainer, Args
from hutil.data import train_test_split, CachedDataset
from hutil.transformers import Compose, Resize, InputTransform, CenterCrop
from hutil.datasets import SVHNDetection
from hutil.train.metrics import TrainLoss, MeanAveragePrecision
from hutil.ext.summary import summary
from hutil.detection import BoundingBox, BoundingBoxFormat, transform_bbox, transform_bboxes, iou_1m
from hutil.transformers import Compose, Resize, ToTensor, ToPercentCoords, CenterCrop

█

In [0]:
def iou_1m_centers(box, boxes):
    box = transform_bboxes(
        box, format=BoundingBoxFormat.XYWH, to=BoundingBoxFormat.LTRB)
    boxes = transform_bboxes(
        boxes, format=BoundingBoxFormat.XYWH, to=BoundingBoxFormat.LTRB)
    return iou_1m(box, boxes)

def non_max_suppression(boxes, confidences, max_boxes, iou_threshold, inplace=False):
    r"""
    Args:
        boxes:       (N, 4)
        confidences: (N,)
        max_boxes (int): 
        iou_threshold (float):
    Returns:
        indices: (N,)
    """
    if len(boxes) == 0:
        return []
    if not inplace:
        boxes = boxes.clone()
        confidences = confidences.clone()
    boxes = boxes.view(-1, 4)
    confidences = confidences.view(-1)
    indices = []
    while True:
        ind = confidences.argmax()
        indices.append(ind.item())
        boxes_iou = iou_1m(boxes[ind], boxes)
        mask = boxes_iou > iou_threshold
        boxes.masked_fill_(mask.unsqueeze(-1), 0)
        confidences.masked_fill_(mask, 0)
        if len(indices) >= max_boxes or confidences.sum() == 0:
            return indices


In [0]:
def compute_default_boxes(lx, ly, scale, ars):
    default_boxes = torch.zeros(lx, ly, len(ars), 4)
    default_boxes[:, :, :, 0] = (torch.arange(
        lx, dtype=torch.float).view(lx, 1, 1).expand(lx, ly, len(ars)) + 0.5) / lx
    default_boxes[:, :, :, 1] = (torch.arange(
        ly, dtype=torch.float).view(1, ly, 1).expand(lx, ly, len(ars)) + 0.5) / ly
    default_boxes[:, :, :, 2] = scale * torch.sqrt(ars)
    default_boxes[:, :, :, 3] = scale / torch.sqrt(ars)
    return default_boxes


def compute_scales(num_feature_maps, s_min, s_max):
    return [
        s_min + (s_max - s_min) * k / (num_feature_maps - 1)
        for k in range(num_feature_maps)
    ]


def compute_loc_target(gt_box, default_boxes):
    box_txty = (gt_box[:2] - default_boxes[..., :2]) \
        / default_boxes[..., 2:]
    box_twth = torch.log(gt_box[2:] / default_boxes[..., 2:])
    return torch.cat((box_txty, box_twth), dim=-1)


class SSDTransform:

    def __init__(self, scales, default_boxes, num_classes, label_field="category_id", bbox_field="bbox"):
        self.f_scales = scales
        self.f_default_boxes = default_boxes
        self.num_classes = num_classes
        self.label_field = label_field
        self.bbox_field = bbox_field

    def __call__(self, img, anns):
        f_default_boxes = self.f_default_boxes
        f_classes = []
        f_boxes = []
        for d_boxes in self.f_default_boxes:
            shape = d_boxes.shape[:3]
            f_classes.append(torch.full(
                shape, self.num_classes - 1, dtype=torch.long))
            f_boxes.append(torch.zeros(*shape, 4))
        for ann in anns:
            label = ann[self.label_field]
            x, y, w, h = ann[self.bbox_field]
            cx = x + w / 2
            cy = y + h / 2
            bbox = torch.tensor([cx, cy, w, h])

            max_ious = []
            for default_boxes, classes, boxes in zip(f_default_boxes, f_classes, f_boxes):
                lx, ly = default_boxes.shape[:2]
                xi = int(cx * lx)
                yi = int(cy * ly)

                ious = iou_1m_centers(bbox, default_boxes)

                iou_mask = ious > 0.5
                if ious[iou_mask].nelement() != 0:
                    classes[iou_mask] = label
                    boxes[iou_mask] = compute_loc_target(
                        bbox, default_boxes[iou_mask])

                max_iou, max_i = ious.view(-1).max(dim=0)
                max_ious.append((max_iou, max_i))

            f_i, (max_iou, max_i) = max(
                enumerate(max_ious), key=lambda t: t[1][0])
            f_classes[f_i].view(-1)[max_i] = label
            max_boxes = f_default_boxes[f_i].view(-1, 4)[max_i]
            f_boxes[f_i].view(-1, 4)[max_i] = \
                compute_loc_target(bbox, max_boxes)
        return img, [f_classes, f_boxes]


class SSDLoss(nn.Module):
    def __init__(self, num_classes, neg_pos_ratio=3):
        super().__init__()
        self.num_classes = num_classes
        self.neg_pos_ratio = neg_pos_ratio

    def forward(self, fs, f_classes, f_boxes):
        total_loc_loss = 0
        total_conf_loss_neg = 0
        total_conf_loss_pos = 0
        loss = 0
        total_pos = 0
        for f, classes, boxes in zip(fs, f_classes, f_boxes):
            n_ars = boxes.size(3)
            f = f.view(*f.size()[:3], n_ars, -1)
            loc_pred = f[..., :4]
            logits_pred = f[..., 4:]
            BACKGROUND_CLASS = self.num_classes - 1
            pos = classes != BACKGROUND_CLASS
            num_pos = pos.sum().item()
            total_pos += num_pos
            if num_pos == 0:
                continue
            conf_loss_pos = F.cross_entropy(
                logits_pred[pos], classes[pos], reduction='sum')

            conf_loss_neg = -F.log_softmax(
                logits_pred[~pos], dim=1)[..., BACKGROUND_CLASS]
            num_neg = min(self.neg_pos_ratio * num_pos, len(conf_loss_neg))
            if num_neg != 0:
                conf_loss_neg = torch.topk(
                    conf_loss_neg, num_neg, sorted=False)[0].sum()
            else:
                conf_loss_neg = torch.zeros_like(conf_loss_pos)
            loc_loss = F.smooth_l1_loss(
                loc_pred[pos], boxes[pos], reduction='sum')
            total_loc_loss += loc_loss
            total_conf_loss_pos += conf_loss_pos
            total_conf_loss_neg += conf_loss_neg
        if random.random() < 0.01:
            print("loc: %.4f  conf_neg: %.4f conf_pos: %.4f" %
                  (total_loc_loss.item() / total_pos,
                   total_conf_loss_neg.item() / total_pos,
                   total_conf_loss_pos.item() / total_pos))
        loss = (total_loc_loss + total_conf_loss_neg +
                total_conf_loss_pos) / total_pos
        return loss


class SSDInference:

    def __init__(self, width, height, f_default_boxes, num_classes, confidence_threshold=0.01, max_boxes=10, iou_threshold=0.45):
        self.width = width
        self.height = height
        self.f_default_boxes = f_default_boxes
        self.confidence_threshold = confidence_threshold
        self.max_boxes = max_boxes
        self.iou_threshold = iou_threshold
        self.num_classes = num_classes

    def __call__(self, fs):
        detections = []
        for f, default_boxes in zip(fs, self.f_default_boxes):
            batch_size = f.size(0)
            lx, ly, num_ars = default_boxes.size()[:3]
            f = f.view(batch_size, lx, ly, num_ars, -1)
            boxes_txty = f[..., 0:2]
            boxes_twth = f[..., 2:4]
            logits = f[..., 4:]

            boxes_cxcy = boxes_txty.mul_(
                default_boxes[..., 2:]).add_(default_boxes[..., :2])
            boxes_wh = boxes_twth.exp_().mul_(default_boxes[..., 2:])
            boxes = f[..., :4]  # inplace
            boxes[..., [0, 2]] *= self.width
            boxes[..., [1, 3]] *= self.height
            boxes = transform_bboxes(
                boxes, format=BoundingBoxFormat.XYWH, to=BoundingBoxFormat.LTRB, inplace=True)
            confidences = torch.softmax(logits, dim=-1)
            # confidences, classes = torch.softmax(logits, dim=-1).max(dim=-1)

            mask = confidences > self.confidence_threshold
            # confidences = confidences[mask]
            # classes =

            for i in range(batch_size):
                for c in range(self.num_classes - 1):
                    bc_mask = mask[i, ..., c]
                    bc_confidences = confidences[i, ..., c][bc_mask]
                    bc_boxes = boxes[i][bc_mask]
                    indices = non_max_suppression(
                        bc_boxes, bc_confidences, self.max_boxes, self.iou_threshold)
                    for ind in indices:
                        detections.append(
                            BoundingBox(
                                image_name=i,
                                class_id=c,
                                box=bc_boxes[ind].tolist(),
                                confidence=bc_confidences[ind].item(),
                                box_format=BoundingBoxFormat.LTRB,
                            )
                        )
        return detections


In [0]:

class SELayer(nn.Module):
    def __init__(self, in_channels, reduction=8):
        super().__init__()
        channels = in_channels // reduction
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.layers = nn.Sequential(
            nn.Linear(in_channels, channels),
            nn.ReLU(True),
            nn.Linear(channels, in_channels),
            nn.Sigmoid(),
        )

    def forward(self, x):
        b, c = x.size()[:2]
        s = self.avgpool(x).view(b, c)
        s = self.layers(s).view(b, c, 1, 1)
        return x * s


class PredTransition(nn.Module):
    def __init__(self, in_channels, out_channels, last=False):
        super().__init__()
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_channels, out_channels // 2, kernel_size=1)
        self.bn2 = nn.BatchNorm2d(out_channels // 2)
        self.relu2 = nn.ReLU(inplace=True)
        if last:
            self.conv2 = nn.Conv2d(out_channels // 2, out_channels,
                                   kernel_size=3)
        else:
            self.conv2 = nn.Conv2d(out_channels // 2, out_channels,
                                   kernel_size=3, stride=2, padding=1)

    def forward(self, x):
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.conv1(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.conv2(x)
        return x


class Bottleneck(nn.Module):
    def __init__(self, in_channels, growth_rate, with_se=False):
        super().__init__()
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_channels, 4 * growth_rate, kernel_size=1)
        self.bn2 = nn.BatchNorm2d(4 * growth_rate)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(4 * growth_rate, growth_rate,
                               kernel_size=3, stride=1, padding=1)
        self.se = None
        if with_se:
            self.se = SELayer(growth_rate)

    def forward(self, x):
        residual = x
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.conv1(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.conv2(x)

        if self.se:
            x = self.se(x)
        return torch.cat((residual, x), dim=1)


class DenseBlock(nn.Module):
    def __init__(self, in_channels, growth_rate, n, with_se=False):
        super().__init__()
        layers = []
        channels = in_channels
        for i in range(n):
            layers.append(Bottleneck(channels, growth_rate, with_se=with_se))
            channels += growth_rate
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        x = self.layers(x)
        return x


class Transition(nn.Module):
    def __init__(self, in_channels, out_channels, with_pool=True):
        super().__init__()
        self.with_pool = with_pool
        self.bn = nn.BatchNorm2d(in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        x = self.bn(x)
        x = self.relu(x)
        x = self.conv(x)
        if self.with_pool:
            x = F.max_pool2d(x, kernel_size=2, stride=2, ceil_mode=True)
        return x


class DSOD(nn.Module):

    def __init__(self, layers, growth_rate, in_channels=3, out_channels=None, reduction=0.5, with_se=False):
        super().__init__()
        channels = 32
        self.stem = nn.Sequential(
            nn.Conv2d(in_channels, channels,
                      kernel_size=3, stride=2, padding=1),
            nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1),
            nn.Conv2d(channels, 2 * channels,
                      kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        )
        channels = 2 * channels
        self.block1 = DenseBlock(
            channels, growth_rate, layers[0], with_se=with_se)
        channels += layers[0] * growth_rate
        self.transition1 = Transition(channels, int(channels * reduction))
        channels = int(channels * reduction)

        self.block2 = DenseBlock(
            channels, growth_rate, layers[1], with_se=with_se)
        channels += layers[1] * growth_rate
        self.pred1 = nn.Linear(channels, out_channels[0])
        self.transition2 = Transition(channels, int(channels * reduction))
        channels = int(channels * reduction)

        self.block3 = DenseBlock(
            channels, growth_rate, layers[2], with_se=with_se)
        channels += layers[2] * growth_rate
        self.transition3 = Transition(channels, int(
            channels * reduction), with_pool=False)
        channels = int(channels * reduction)

        self.block4 = DenseBlock(
            channels, growth_rate, layers[3], with_se=with_se)
        channels += layers[3] * growth_rate
        self.transition4 = Transition(channels, int(
            channels * reduction), with_pool=False)
        channels = int(channels * reduction)

        self.pred2 = nn.Linear(channels, out_channels[1])
        self.t1 = PredTransition(channels, 128)
        self.pred3 = nn.Linear(128, out_channels[2])
        self.t2 = PredTransition(128, 128, last=True)
        self.pred4 = nn.Linear(128, out_channels[3])

        # self.avgpool = nn.AdaptiveAvgPool2d(1)
        # self.fc = nn.Linear(channels, out_channels)

    def forward(self, x):
        x = self.stem(x)

        x = self.block1(x)
        x = self.transition1(x)

        x = self.block2(x)
        f1 = self.pred1(x.permute(0, 3, 2, 1).contiguous())
        x = self.transition2(x)

        x = self.block3(x)
        x = self.transition3(x)

        x = self.block4(x)
        x = self.transition4(x)

        f2 = self.pred2(x.permute(0, 3, 2, 1).contiguous())
        x = self.t1(x)
        f3 = self.pred3(x.permute(0, 3, 2, 1).contiguous())
        x = self.t2(x)
        f4 = self.pred4(x.permute(0, 3, 2, 1).contiguous())

        # x = self.avgpool(x)
        # x = x.view(x.size(0), -1)
        # x = self.fc(x)
        return [[f1, f2, f3, f4]]


In [0]:
def val_collate_fn(batch):
    x, y = zip(*batch)
    ground_truths = []
    for i in range(len(y)):
        for ann in y[i]:
            ground_truths.append(
                BoundingBox(
                    image_name=i,
                    class_id=ann["label"],
                    box=ann["bbox"],
                    box_format=BoundingBoxFormat.LTWH,
                )
            )
    return default_collate(x), Args(ground_truths)


WIDTH = 75
HEIGHT = 75
LOCATIONS = [
    (10, 10),
    (5, 5),
    (3, 3),
    (1, 1),
]
ASPECT_RATIOS = [
    (1, 2, 1/2),
    (1, 2, 3, 1/2, 1/3),
    (1, 2, 3, 1/2, 1/3),
    (1, 2, 1/2),
]
ASPECT_RATIOS = [torch.tensor(ars) for ars in ASPECT_RATIOS]
NUM_FEATURE_MAPS = len(ASPECT_RATIOS)
SCALES = compute_scales(NUM_FEATURE_MAPS, 0.2, 0.9)
DEFAULT_BOXES = [
    compute_default_boxes(lx, ly, scale, ars)
    for (lx, ly), scale, ars in zip(LOCATIONS, SCALES, ASPECT_RATIOS)
]

NUM_CLASSES = 10 + 1


In [0]:

train_transform = Compose([
    Resize(HEIGHT),
    CenterCrop(HEIGHT),
    ToPercentCoords(),
    ToTensor(),
    SSDTransform(SCALES, DEFAULT_BOXES, NUM_CLASSES, label_field='label'),
])

test_transform = Compose([
    Resize(HEIGHT),
    CenterCrop(HEIGHT),
    ToTensor(),
])

data_home = "./SVHN"
ds = SVHNDetection(data_home, split='train', download=True)
ds_train, ds_val = train_test_split(
    ds, test_ratio=0.05,
    transform=train_transform,
    test_transform=test_transform)
ds_val = CachedDataset(ds_val)

Dataset found. Skip download or extract


In [0]:
out_channels = [
    (NUM_CLASSES + 4) * len(ars)
    for ars in ASPECT_RATIOS
]
net = DSOD([3, 4, 4, 4], 24, out_channels=out_channels, reduction=1)
net = net.apply(init_weights(nonlinearity='relu'))

In [0]:
criterion = SSDLoss(NUM_CLASSES)
optimizer = SGD(net.parameters(), lr=0.06, momentum=0.9, dampening=0, nesterov=True, weight_decay=1e-4)
lr_scheduler = MultiStepLR(optimizer, [40, 70, 110, 140], gamma=0.2)
# optimizer = Adam(net.parameters(), lr=1e-3, weight_decay=5e-4)
# lr_scheduler = LambdaLR(optimizer, lambda x: 0.96 ** x)


metrics = {
    'loss': TrainLoss(),
}
test_metrics = {
    'mAP': MeanAveragePrecision(
        SSDInference(
            width=WIDTH, height=HEIGHT,
            f_default_boxes=[ cuda(d) for d in DEFAULT_BOXES ],
            num_classes=NUM_CLASSES,
        )
    )
}

trainer = Trainer(net, criterion, optimizer, lr_scheduler,
                  metrics=metrics, evaluate_metrics=test_metrics,
                  save_path=gpath("models"), name="DSOD-SVHN")

In [0]:
lr_scheduler.milestones = [20, 40, 60]

In [0]:
summary(net, (3,HEIGHT, WIDTH))

In [0]:
train_loader = DataLoader(
    ds_train, batch_size=32, shuffle=True, num_workers=1, pin_memory=True)
val_loader = DataLoader(
    ds_val, batch_size=128, collate_fn=val_collate_fn)


In [0]:
trainer.fit(train_loader, 10)

Epoch 41/50
loc: 0.0248  conf_neg: 0.1721 conf_pos: 0.2618
loc: 0.0189  conf_neg: 0.1093 conf_pos: 0.1845
loc: 0.0227  conf_neg: 0.1108 conf_pos: 0.1931
loc: 0.0170  conf_neg: 0.1314 conf_pos: 0.1832
loc: 0.0228  conf_neg: 0.2574 conf_pos: 0.1149
loc: 0.0271  conf_neg: 0.1293 conf_pos: 0.2098
loc: 0.0216  conf_neg: 0.1595 conf_pos: 0.2095
loc: 0.0206  conf_neg: 0.1704 conf_pos: 0.1339
elapsed: 218s	loss: 0.3764	
Epoch 42/50
loc: 0.0242  conf_neg: 0.0905 conf_pos: 0.1880
loc: 0.0256  conf_neg: 0.1344 conf_pos: 0.1803
loc: 0.0196  conf_neg: 0.0963 conf_pos: 0.1748
loc: 0.0196  conf_neg: 0.0775 conf_pos: 0.1825
loc: 0.0203  conf_neg: 0.0678 conf_pos: 0.1455
loc: 0.0222  conf_neg: 0.0928 conf_pos: 0.1251
loc: 0.0164  conf_neg: 0.0595 conf_pos: 0.1160
loc: 0.0243  conf_neg: 0.1685 conf_pos: 0.1827
elapsed: 222s	loss: 0.2823	
Epoch 43/50
loc: 0.0235  conf_neg: 0.1420 conf_pos: 0.1715
loc: 0.0224  conf_neg: 0.0647 conf_pos: 0.1643
loc: 0.0257  conf_neg: 0.0522 conf_pos: 0.0958
loc: 0.0235  co

In [0]:
trainer.evaluate(val_loader)

{'mAP': 0.742937477071926}