In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.ops import boxes as box_ops
from torchvision.models.resnet import resnet50
from torch.utils.data import Dataset, DataLoader
import math

In [None]:
class RetinaFace(nn.Module):
    def __init__(self, pretrained=True):
        super().__init__()
        backbone = resnet50(pretrained=pretrained)
        self.body = nn.Sequential(*list(backbone.children())[:-2])  # up to conv5_x

        self.ssh = nn.Conv2d(2048, 512, kernel_size=1)
        self.cls_head = nn.Conv2d(512, 6 * 2, kernel_size=1)  # 6 anchors * 2 classes
        self.bbox_head = nn.Conv2d(512, 6 * 4, kernel_size=1) # 6 anchors * 4 bbox coords
        self.lmk_head = nn.Conv2d(512, 6 * 10, kernel_size=1) # 6 anchors * 5 landmarks

    def forward(self, x):
        feat = self.body(x)
        feat = F.relu(self.ssh(feat))

        cls = self.cls_head(feat)
        bbox = self.bbox_head(feat)
        lmk = self.lmk_head(feat)

        return cls, bbox, lmk, feat.shape[2]

In [None]:
class RetinaFaceLoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, pred_cls, pred_bbox, pred_lmk, anchors, targets):
        # Very basic example: all anchors match to target[0]
        matched_gt = targets['bbox'][0].unsqueeze(0).expand_as(anchors)
        cls_target = targets['cls'][0].expand(pred_cls.shape[0])
        lmk_target = targets['lmk'][0].expand(pred_lmk.shape[0], 10)

        cls_loss = F.cross_entropy(pred_cls, cls_target)
        bbox_loss = F.smooth_l1_loss(pred_bbox, matched_gt)
        lmk_loss = F.smooth_l1_loss(pred_lmk, lmk_target)

        return cls_loss + bbox_loss + lmk_loss

In [1]:
from zipfile import ZipFile

In [2]:
with ZipFile("../WIDER_train.zip") as zip:
    for info in zip.infolist():
        print(info)
        break

<ZipInfo filename='WIDER_train/' external_attr=0x10>
