In [1]:
import numpy as np

In [3]:
a = np.random.random((1,9,4))
b = np.random.random((1900,1,4))
c = a + b
c.shape

(1900, 9, 4)

In [17]:
base_size=16
ratios=[0.5, 1, 2]
scales=2**np.arange(3, 6)
base_anchor = np.array([1, 1, base_size, base_size]) - 1 #array([ 0,  0, 15, 15])

In [18]:
def _mkanchors(ws, hs, x_ctr, y_ctr):
    """
    Given a vector of widths (ws) and heights (hs) around a center
    (x_ctr, y_ctr), output a set of anchors (windows).
    """

    ws = ws[:, np.newaxis]
    hs = hs[:, np.newaxis]
    anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
                         y_ctr - 0.5 * (hs - 1),
                         x_ctr + 0.5 * (ws - 1),
                         y_ctr + 0.5 * (hs - 1)))
    return anchors

def _scale_enum(anchor, scales):
    """
    Enumerate a set of anchors for each scale wrt an anchor.
    """

    w, h, x_ctr, y_ctr = _whctrs(anchor)
    ws = w * scales
    hs = h * scales
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors

def _whctrs(anchor):
    """
    Return width, height, x center, and y center for an anchor (window).
    """

    w = anchor[2] - anchor[0] + 1
    h = anchor[3] - anchor[1] + 1
    x_ctr = anchor[0] + 0.5 * (w - 1)
    y_ctr = anchor[1] + 0.5 * (h - 1)
    return w, h, x_ctr, y_ctr

def _ratio_enum(anchor, ratios):
    """
    Enumerate a set of anchors for each aspect ratio wrt an anchor.
    """
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    size = w * h
    size_ratios = size / ratios
    ws = np.round(np.sqrt(size_ratios))
    hs = np.round(ws * ratios)
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors

In [23]:
ratio_anchors = _ratio_enum(base_anchor, ratios)
ratio_anchors

array([[-3.5,  2. , 18.5, 13. ],
       [ 0. ,  0. , 15. , 15. ],
       [ 2.5, -3. , 12.5, 18. ]])

In [21]:
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) for i in range(ratio_anchors.shape[0])])

In [22]:
anchors

array([[ -84.,  -40.,   99.,   55.],
       [-176.,  -88.,  191.,  103.],
       [-360., -184.,  375.,  199.],
       [ -56.,  -56.,   71.,   71.],
       [-120., -120.,  135.,  135.],
       [-248., -248.,  263.,  263.],
       [ -36.,  -80.,   51.,   95.],
       [ -80., -168.,   95.,  183.],
       [-168., -344.,  183.,  359.]])

In [24]:
shift_x = np.arange(0, 50) * 16 # 16
shift_y = np.arange(0, 38) * 16

shift_x, shift_y = np.meshgrid(shift_x, shift_y)

# shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
#                           shift_x.ravel(), shift_y.ravel())).transpose())
# shifts = shifts.contiguous().type_as(scores).float()

In [34]:
shift_x
shift_x.shape
shift_y

array([[  0,  16,  32, ..., 752, 768, 784],
       [  0,  16,  32, ..., 752, 768, 784],
       [  0,  16,  32, ..., 752, 768, 784],
       ...,
       [  0,  16,  32, ..., 752, 768, 784],
       [  0,  16,  32, ..., 752, 768, 784],
       [  0,  16,  32, ..., 752, 768, 784]])

(38, 50)

array([[  0,   0,   0, ...,   0,   0,   0],
       [ 16,  16,  16, ...,  16,  16,  16],
       [ 32,  32,  32, ...,  32,  32,  32],
       ...,
       [560, 560, 560, ..., 560, 560, 560],
       [576, 576, 576, ..., 576, 576, 576],
       [592, 592, 592, ..., 592, 592, 592]])

In [48]:
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel() ))
shifts
shifts = shifts.transpose()
shifts

array([[  0,  16,  32, ..., 752, 768, 784],
       [  0,   0,   0, ..., 592, 592, 592],
       [  0,  16,  32, ..., 752, 768, 784],
       [  0,   0,   0, ..., 592, 592, 592]])

array([[  0,   0,   0,   0],
       [ 16,   0,  16,   0],
       [ 32,   0,  32,   0],
       ...,
       [752, 592, 752, 592],
       [768, 592, 768, 592],
       [784, 592, 784, 592]])

In [49]:
import torch
shifts = torch.from_numpy(shifts)
shifts = shifts.contiguous().float()
shifts

tensor([[  0.,   0.,   0.,   0.],
        [ 16.,   0.,  16.,   0.],
        [ 32.,   0.,  32.,   0.],
        ...,
        [752., 592., 752., 592.],
        [768., 592., 768., 592.],
        [784., 592., 784., 592.]])

In [52]:
shifts.size()

torch.Size([1900, 4])

In [None]:

# bbox_transform_inv(anchors, bbox_deltas, batch_size)
def bbox_transform_inv(boxes, deltas, batch_size):
    widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0
    heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0
    ctr_x = boxes[:, :, 0] + 0.5 * widths
    ctr_y = boxes[:, :, 1] + 0.5 * heights

    dx = deltas[:, :, 0::4]
    dy = deltas[:, :, 1::4]
    dw = deltas[:, :, 2::4]
    dh = deltas[:, :, 3::4]

    pred_ctr_x = dx * widths.unsqueeze(2) + ctr_x.unsqueeze(2)
    pred_ctr_y = dy * heights.unsqueeze(2) + ctr_y.unsqueeze(2)
    pred_w = torch.exp(dw) * widths.unsqueeze(2)
    pred_h = torch.exp(dh) * heights.unsqueeze(2)

    pred_boxes = deltas.clone()
    # x1
    pred_boxes[:, :, 0::4] = pred_ctr_x - 0.5 * pred_w
    # y1
    pred_boxes[:, :, 1::4] = pred_ctr_y - 0.5 * pred_h
    # x2
    pred_boxes[:, :, 2::4] = pred_ctr_x + 0.5 * pred_w
    # y2
    pred_boxes[:, :, 3::4] = pred_ctr_y + 0.5 * pred_h

    return pred_boxes