In [1]:
import torch
import torch.nn.functional as F
import cv2
import numpy as np
from dataset import create_datasets
from model import Net
from anchor import generate_anchors, mark_anchors

## 创建Datasets && 随机看一些图片和真值

In [2]:
train_dataset, val_dataset = create_datasets('/home/louis/datasets/wider_face')
# TODO: show some images and ground truth bbox here

## 创建模型并随机用某张图片正向获得Predictions

In [3]:
image, annotation = train_dataset[0]
height, width = image.shape[:2]
image = cv2.resize(image, (640, 640))

height_scale = 640.0 / height
width_scale, height_scale = 640.0 / width, 640.0 / height

_input = torch.tensor(image).permute(2, 0, 1).unsqueeze(0).float()
_inputs = torch.cat( [_input, _input] )
net = Net()
predictions = list(net(_inputs))

In [7]:
print(len(predictions))

6


In [4]:
for index, prediction in enumerate(predictions):
    predictions[index] = prediction.view(1, 6, -1) # 1 should be replaced by batch size

### 每个feature map的预测数量

In [5]:
prediction_count = list(map(lambda x: x.size()[2], predictions))
print("predictions(4 + 2) of every feature map(from bottom to up) is {}".format( list(prediction_count) ) )

predictions(4 + 2) of every feature map(from bottom to up) is [25600, 6400, 1600, 400, 100, 25]


## 生成的Anchor

In [6]:
anchors = generate_anchors() # 使用默认设置

### 把predictions和anchor转换成二维矩阵   ( batch_size x anchor_size x 6 or 4)

In [7]:
predictions_concated = torch.cat(predictions, dim=2).permute(0, 2, 1)
print(predictions_concated.size())

torch.Size([1, 34125, 6])


In [8]:
anchors = np.vstack(list(map(lambda x: np.array(x), anchors)))
print(anchors.shape)

(34125, 4)


### 使用Ground Truth来标记Anchor


In [9]:
gt_bboxes_scaled = list(map(lambda x: [  x[0] * height_scale, x[1] * width_scale, x[2] * height_scale, x[3] * width_scale ] , annotation))
gt_bboxes_scaled = np.array(gt_bboxes_scaled)
positive_anchor_indices, matched_gt_box_indices, negative_anchor_indices = mark_anchors(anchors, gt_bboxes_scaled)

print(positive_anchor_indices)
print(matched_gt_box_indices)
print(negative_anchor_indices)

(array([32378, 32417, 32418, 32419]),)
[0 0 0 0]
(array([    0,     1,     2, ..., 34122, 34123, 34124]),)


## 计算Loss

针对每一个Anchor，我们有一个大小为6的预测向量（position delta + score），现在我们又根据真值标记出了正Anchor和负Anchor，以及每个正Anchor所对应的真值bbox，则可以进行Loss的计算了。

这里有两个loss，回归（Smooth L1 loss）和分类（cross entropy loss）

### 回归Loss，参考Faster RCNN 公式2

In [10]:
# 预测bbox 和 真值bbox 同时对 Anchor 计算 t 和 t'  ， loss = L1 loss of  t and t'， 注意 x y对应的是center，而不是left top
def change_coordinate(coordinates):
    """change top left bottom right to center x center y, width, height"""
    width = (coordinates[:, 3] - coordinates[:, 1])[:, np.newaxis]
    height = (coordinates[: ,2] - coordinates[: ,0])[:, np.newaxis]
    center_x = ((coordinates[: ,3] + coordinates[:, 1]) / 2)[:, np.newaxis]
    center_y = ((coordinates[:, 2] + coordinates[: ,0]) / 2)[:, np.newaxis]
    return np.concatenate( [center_x, center_y, width, height] , axis=1  )

In [11]:
# 选出参与计算回归Loss的Anchor和predictions，包括对应的ground truth
anchors_positive = torch.tensor(change_coordinate(anchors[positive_anchor_indices])).float() / 640
matched_bboxes = torch.tensor(change_coordinate(gt_bboxes_scaled[matched_gt_box_indices])).float() / 640
positive_predictions = torch.squeeze(predictions_concated)[positive_anchor_indices][:, :4]

epsilon = 0.0001
# 计算RPN公式5的8个变量
tx = positive_predictions[:, 0] - anchors_positive[:, 0]
ty = positive_predictions[:, 1] - anchors_positive[:, 1]
tw = torch.log( (positive_predictions[:, 2] + epsilon) / anchors_positive[:, 2])
th = torch.log( (positive_predictions[:, 3] + epsilon) / anchors_positive[:, 3])
t = torch.stack( (tx, ty, tw, th) )

gtx = matched_bboxes[:, 0] - anchors_positive[:, 0]
gty = matched_bboxes[:, 1] - anchors_positive[:, 1]
gtw = torch.log(matched_bboxes[:, 2] / anchors_positive[:, 2])
gth = torch.log(matched_bboxes[:, 3] / anchors_positive[:, 3])
gt = torch.stack( (gtx, gty, gtw, gth) )

In [12]:
F.smooth_l1_loss(t, gt)

tensor(2.5198)

In [27]:
# 选出所有被标记为正/负的anchor，以及对应的predictions
positive_predictions = torch.squeeze(predictions_concated)[positive_anchor_indices][:, 4:]
negative_predictions = torch.squeeze(predictions_concated)[negative_anchor_indices][:, 4:]

In [46]:
positive_target = torch.zeros_like( positive_predictions )
positive_target[:, 1] = 1.0
negative_target = torch.zeros_like(  negative_predictions )
negative_target[:, 0] = 1.0


effective_predictions = torch.cat( (positive_predictions, negative_predictions))
targets = torch.cat( (positive_target, negative_target) )

shuffle_indexes = torch.randperm(effective_predictions.size()[0])
effective_predictions = effective_predictions[shuffle_indexes]
targets = targets[shuffle_indexes]


In [47]:
F.binary_cross_entropy(F.sigmoid(effective_predictions), targets)

tensor(0.7147)