-
Notifications
You must be signed in to change notification settings - Fork 466
/
anchor_target_layer.py
309 lines (263 loc) · 12.1 KB
/
anchor_target_layer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------
import os
import yaml
import numpy as np
import numpy.random as npr
from .generate_anchors import generate_anchors
from ..utils.cython_bbox import bbox_overlaps, bbox_intersections
# TODO: make fast_rcnn irrelevant
# >>>> obsolete, because it depends on sth outside of this project
from ..fast_rcnn.config import cfg
from ..fast_rcnn.bbox_transform import bbox_transform
# <<<< obsolete
DEBUG = False
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride=[16, ],
anchor_scales=[4, 8, 16, 32]):
"""
Assign anchors to ground-truth targets. Produces anchor classification
labels and bounding-box regression targets.
Parameters
----------
rpn_cls_score: for pytorch (1, Ax2, H, W) bg/fg scores of previous conv layer
gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
gt_ishard: (G, 1), 1 or 0 indicates difficult or not
dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
im_info: a list of [image_height, image_width, scale_ratios]
_feat_stride: the downsampling ratio of feature map to the original input image
anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
----------
Returns
----------
rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
that are the regression objectives
rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg
rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg,
beacuse the numbers of bgs and fgs mays significiantly different
"""
_anchors = generate_anchors(scales=np.array(anchor_scales))
_num_anchors = _anchors.shape[0]
if DEBUG:
print 'anchors:'
print _anchors
print 'anchor shapes:'
print np.hstack((
_anchors[:, 2::4] - _anchors[:, 0::4],
_anchors[:, 3::4] - _anchors[:, 1::4],
))
_counts = cfg.EPS
_sums = np.zeros((1, 4))
_squared_sums = np.zeros((1, 4))
_fg_sum = 0
_bg_sum = 0
_count = 0
# allow boxes to sit over the edge by a small amount
_allowed_border = 0
# map of shape (..., H, W)
# height, width = rpn_cls_score.shape[1:3]
im_info = im_info[0]
# Algorithm:
#
# for each (H, W) location i
# generate 9 anchor boxes centered on cell i
# apply predicted bbox deltas at cell i to each of the 9 anchors
# filter out-of-image anchors
# measure GT overlap
assert rpn_cls_score.shape[0] == 1, \
'Only single item batches are supported'
# map of shape (..., H, W)
# pytorch (bs, c, h, w)
height, width = rpn_cls_score.shape[2:4]
if DEBUG:
print 'AnchorTargetLayer: height', height, 'width', width
print ''
print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
print 'scale: {}'.format(im_info[2])
print 'height, width: ({}, {})'.format(height, width)
print 'rpn: gt_boxes.shape', gt_boxes.shape
print 'rpn: gt_boxes', gt_boxes
# 1. Generate proposals from bbox deltas and shifted anchors
shift_x = np.arange(0, width) * _feat_stride
shift_y = np.arange(0, height) * _feat_stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order
# K is H x W
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = _num_anchors
K = shifts.shape[0]
all_anchors = (_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)
# only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -_allowed_border) &
(all_anchors[:, 1] >= -_allowed_border) &
(all_anchors[:, 2] < im_info[1] + _allowed_border) & # width
(all_anchors[:, 3] < im_info[0] + _allowed_border) # height
)[0]
if DEBUG:
print 'total_anchors', total_anchors
print 'inds_inside', len(inds_inside)
# keep only inside anchors
anchors = all_anchors[inds_inside, :]
if DEBUG:
print 'anchors.shape', anchors.shape
# label: 1 is positive, 0 is negative, -1 is dont care
# (A)
labels = np.empty((len(inds_inside),), dtype=np.float32)
labels.fill(-1)
# overlaps between the anchors and the gt boxes
# overlaps (ex, gt), shape is A x G
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
argmax_overlaps = overlaps.argmax(axis=1) # (A)
max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0) # G
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# assign bg labels first so that positive labels can clobber them
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# preclude dontcare areas
if dontcare_areas is not None and dontcare_areas.shape[0] > 0:
# intersec shape is D x A
intersecs = bbox_intersections(
np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4
np.ascontiguousarray(anchors, dtype=np.float) # A x 4
)
intersecs_ = intersecs.sum(axis=0) # A x 1
labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1
# preclude hard samples that are highly occlusioned, truncated or difficult to see
if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[0] > 0:
assert gt_ishard.shape[0] == gt_boxes.shape[0]
gt_ishard = gt_ishard.astype(int)
gt_hardboxes = gt_boxes[gt_ishard == 1, :]
if gt_hardboxes.shape[0] > 0:
# H x A
hard_overlaps = bbox_overlaps(
np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4
np.ascontiguousarray(anchors, dtype=np.float)) # A x 4
hard_max_overlaps = hard_overlaps.max(axis=0) # (A)
labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1
labels[max_intersec_label_inds] = -1 #
# subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1
# subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
# print "was %s inds, disabling %s, now %s inds" % (
# len(bg_inds), len(disable_inds), np.sum(labels == 0))
# bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
# uniform weighting of examples (given non-uniform sampling)
# num_examples = np.sum(labels >= 0) + 1
# positive_weights = np.ones((1, 4)) * 1.0 / num_examples
# negative_weights = np.ones((1, 4)) * 1.0 / num_examples
positive_weights = np.ones((1, 4))
negative_weights = np.zeros((1, 4))
else:
assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
(cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
(np.sum(labels == 1)) + 1)
negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
(np.sum(labels == 0)) + 1)
bbox_outside_weights[labels == 1, :] = positive_weights
bbox_outside_weights[labels == 0, :] = negative_weights
if DEBUG:
_sums += bbox_targets[labels == 1, :].sum(axis=0)
_squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
_counts += np.sum(labels == 1)
means = _sums / _counts
stds = np.sqrt(_squared_sums / _counts - means ** 2)
print 'means:'
print means
print 'stdevs:'
print stds
# map up to original set of anchors
labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
if DEBUG:
print 'rpn: max max_overlap', np.max(max_overlaps)
print 'rpn: num_positive', np.sum(labels == 1)
print 'rpn: num_negative', np.sum(labels == 0)
_fg_sum += np.sum(labels == 1)
_bg_sum += np.sum(labels == 0)
_count += 1
print 'rpn: num_positive avg', _fg_sum / _count
print 'rpn: num_negative avg', _bg_sum / _count
# labels
# pdb.set_trace()
labels = labels.reshape((1, height, width, A))
labels = labels.transpose(0, 3, 1, 2)
rpn_labels = labels.reshape((1, 1, A * height, width)).transpose(0, 2, 3, 1)
# bbox_targets
bbox_targets = bbox_targets \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
rpn_bbox_targets = bbox_targets
# bbox_inside_weights
bbox_inside_weights = bbox_inside_weights \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
# assert bbox_inside_weights.shape[2] == height
# assert bbox_inside_weights.shape[3] == width
rpn_bbox_inside_weights = bbox_inside_weights
# bbox_outside_weights
bbox_outside_weights = bbox_outside_weights \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
# assert bbox_outside_weights.shape[2] == height
# assert bbox_outside_weights.shape[3] == width
rpn_bbox_outside_weights = bbox_outside_weights
return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def _unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if len(data.shape) == 1:
ret = np.empty((count,), dtype=np.float32)
ret.fill(fill)
ret[inds] = data
else:
ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
ret.fill(fill)
ret[inds, :] = data
return ret
def _compute_targets(ex_rois, gt_rois):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 5
return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)