<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Region-proposal-network" data-toc-modified-id="Region-proposal-network-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Region proposal network</a></span></li></ul></div>

This notebook aims to provide functions that produce anchor boxes as decribed in the paper.

A box will be describe either as a numpy array $[y^-, x^-, y^+, x^+]$  or as a numpy array $[c_y, c_x, h,w]$

In [1]:
import numpy as np

In [2]:
def vertice_to_yxhw(anchor):
    res = (np.mean((anchor[0],anchor[2])),np.mean((anchor[1],anchor[3])), anchor[2] - anchor[0], anchor[3] - anchor[1])
    return np.array(res)

In [30]:
def yxhw_to_vertice(anchor):
    res = (anchor[0] - anchor[2]/2, anchor[1] - anchor[3]/2, anchor[0] + anchor[2]/2, anchor[1] + anchor[3]/2)
    return np.array(res)

In [199]:
image_test = np.random.randn(800,800)
image_test_feature = np.random.randn(50,50)
ratio = [0.5, 1, 2]
anchor_scales = [8, 16, 32]
gt_box = [np.array([20, 30, 400, 500]), np.array([300, 400, 500, 600])]
labels_gt_box = np.array(("chien","chat"))

In [5]:
def anchor_box(center, ratio, scale, shape_initial, shape_featured):
    sub_width = shape_initial[0]/shape_featured[0]
    sub_height = shape_initial[1]/shape_featured[1]
    anchor_width = sub_width*scale*np.sqrt(ratio)
    anchor_height = sub_height*scale/np.sqrt(ratio)
    
    ym = center[1] - anchor_height/2
    yp = center[1] + anchor_height/2
    xm = center[0] - anchor_width/2
    xp = center[0] + anchor_width/2
    
    anchor = np.array((ym,xm,yp,xp))
    return(anchor)

In [6]:
def list_centers(shape_initial, shape_featured):
    ratio_h = shape_initial[1]/shape_featured[1]
    ratio_w = shape_initial[0]/shape_featured[0]
    #intiail center is the center at the left top corner
    all_centers = [np.array((ratio_w/2, ratio_h/2),dtype=float) + np.array((ratio_w*i, ratio_h*j),dtype=float) for i in range(int(shape_featured[0])) for j in range(int(shape_featured[1]))]
    return(all_centers)

In [7]:
def anchor_boxes(list_ratios, list_scales, shape_initial, shape_featured):
    list_center = list_centers(shape_initial, shape_featured)
    all_anchors = [anchor_box(center, ratio, scale,shape_initial,shape_featured) for center in list_center for ratio in list_ratios
                   for scale in list_scales]
    return(all_anchors)

In [8]:
def check_anchor_inside(anchor_box, shape_initial):
    ym = anchor_box[0]
    yp = anchor_box[2]
    xm = anchor_box[1]
    xp = anchor_box[3]
    is_inside = (min(xm,xp)>0) & (max(xm,xp)<shape_initial[0]) & (max(yp,ym) < shape_initial[1]) & (min(ym,yp) > 0) 
    return(is_inside)

In [70]:
def iou(box1,box2):
    xm = max(box1[1], box2[1])
    xp = min(box1[3], box2[3])
    ym = max(box1[0], box2[0])
    yp = min(box1[2], box2[2])
    
    intersection = 0
    
    if((xm < xp) &(ym < yp)):
        intersection = (xp - xm)*(yp-ym)
    
    union = (box1[3]-box1[1])*(box1[2] - box1[0]) + (box2[3]-box2[1])*(box2[2] - box2[0]) - intersection
    return(intersection/union)

In [10]:
iou(np.array([1,1,3,3]),np.array([2,2,4,4]))

0.14285714285714285

In [11]:
list_anchors = anchor_boxes(ratio,anchor_scales,(800,800),(50,50))

In [12]:
def iou_anchors_vs_gtbox(list_anchors, list_gt_box):
    res = np.transpose([[iou(anchor, gt_box) for anchor in list_anchors] for gt_box in list_gt_box])
    return(np.array(res))

**TODO** : changer la forme de cette fonction en utilisant que des *arrays*.

In [13]:
#Return an array with :
#for all ground truth box, the anchors which maximize the IOU with it
#for all anchor, the max of the IOU

#the first column of the array is the index and the last the IOU 
def best_anchors_from_iou(dt_anchors_vs_gtbox):
    #index highest by gtbox (cond a)
    dt_anchors_vs_gtbox.argmax(axis = 0)
    ind_argmax = np.where(dt_anchors_vs_gtbox == dt_anchors_vs_gtbox.max(axis = 0))[0]
    cond_a = dt_anchors_vs_gtbox[ind_argmax,:].max(axis = 1)
    
    #highest by anchors box (cond b)
    index = dt_anchors_vs_gtbox.argmax(axis = 1)
    iou_max = dt_anchors_vs_gtbox.max(axis = 1)
    cond_b = dt_anchors_vs_gtbox[np.arange(dt_anchors_vs_gtbox.shape[0]),index]
    
    index_res = np.concatenate((ind_argmax,np.arange(dt_anchors_vs_gtbox.shape[0])))
    res = np.concatenate((cond_a, cond_b), axis=0)
    res = np.column_stack((index_res,res))
    return(res)

In [14]:
#label_from_iou returns a np.array containing for each anchor its label. (+1 if foreground, 0 if background and -1 if not used
#during the learning phase)
#The default thresholds are defined according the original paper about Fatest RCNN.

def label_from_iou(dt_anchors_vs_gtbox,pos_threshold = 0.7, neg_threshold = 0.3):
    label = np.full(dt_anchors_vs_gtbox.shape[0],-1)
    iou_max = dt_anchors_vs_gtbox.max(axis = 1)
    #positive labels : 1
    label[iou_max > pos_threshold] = 1
    #negative labels : 0
    label[iou_max < neg_threshold] = 0
    #for anchors whose maximize IOU for a given object : +1
    dt_anchors_vs_gtbox.argmax(axis = 0)
    ind_argmax = np.where(dt_anchors_vs_gtbox == dt_anchors_vs_gtbox.max(axis = 0))[0]
    label[ind_argmax] = 1
    return(label)

In [16]:
sum([check_anchor_inside(anchor, (800,800)) for anchor in list_anchors])

8940

In [19]:
def loc(anchor_box, gt_box):
    anchor_box = vertice_to_yxhw(anchor_box)
    gt_box = vertice_to_yxhw(gt_box)
    
    y = (gt_box[0] - anchor_box[0])/anchor_box[2]
    x = (gt_box[1] - anchor_box[1])/anchor_box[3]
    w = np.log(gt_box[3]/anchor_box[3])
    h = np.log(gt_box[2]/anchor_box[2])
    
    return np.array((y,x,h,w))

In [20]:
def deloc(anchor_box, reparam_box):
    anchor_box = vertice_to_yxhw(anchor_box)
    y = anchor_box[0] + (reparam_box[0] * anchor_box[2])
    x = anchor_box[1] + (reparam_box[1] * anchor_box[3])
    h = np.exp(reparam_box[2])*anchor_box[2]
    w = np.exp(reparam_box[3])*anchor_box[3]
    return np.array((y,x,h,w))

In [42]:
def reparam_all_anchors(list_anchors, list_gt_box):
    iou = iou_anchors_vs_gtbox(list_anchors, gt_box)
    index_max_gtbox = iou.argmax(axis = 1)
    gt_box_by_anchors = [list_gt_box[i] for i in index_max_gtbox]
    #suboptimal
    res = [loc(anchor, gt_box) for anchor,gt_box in zip(list_anchors, gt_box_by_anchors)]
    
    #compute labels
    labels = label_from_iou(iou)
    return res,labels

In [43]:
res_reparam = reparam_all_anchors(list_anchors,gt_box)
res_reparam[0]

[array([1.11590289, 2.83947567, 0.7415674 , 1.64727602]),
 array([0.55795144, 1.41973783, 0.04842022, 0.95412884]),
 array([ 0.27897572,  0.70986892, -0.64472696,  0.26098166]),
 array([1.578125  , 2.0078125 , 1.08814099, 1.30070243]),
 array([0.7890625 , 1.00390625, 0.39499381, 0.60755525]),
 array([ 0.39453125,  0.50195312, -0.29815337, -0.08559193]),
 array([2.23180578, 1.41973783, 1.43471458, 0.95412884]),
 array([1.11590289, 0.70986892, 0.7415674 , 0.26098166]),
 array([ 0.55795144,  0.35493446,  0.04842022, -0.43216552]),
 array([1.02751454, 2.83947567, 0.7415674 , 1.64727602]),
 array([0.51375727, 1.41973783, 0.04842022, 0.95412884]),
 array([ 0.25687864,  0.70986892, -0.64472696,  0.26098166]),
 array([1.453125  , 2.0078125 , 1.08814099, 1.30070243]),
 array([0.7265625 , 1.00390625, 0.39499381, 0.60755525]),
 array([ 0.36328125,  0.50195312, -0.29815337, -0.08559193]),
 array([2.05502908, 1.41973783, 1.43471458, 0.95412884]),
 array([1.02751454, 0.70986892, 0.7415674 , 0.260981

In [52]:
def deparam_all_anchors(list_anchors, list_box_param):
    res = [(deloc(anchor, box_param)) for anchor,box_param in zip(list_anchors, list_box_param)]
    return res

In [55]:
np.array(deparam_all_anchors(list_anchors, res_reparam[0]))

array([[210., 265., 380., 470.],
       [210., 265., 380., 470.],
       [210., 265., 380., 470.],
       ...,
       [210., 265., 380., 470.],
       [210., 265., 380., 470.],
       [210., 265., 380., 470.]])

In [18]:
#TODO : heck how to fill when

def batch_training_proposal(dt_anchors_vs_gtbox, nsize = 256, pos_ratio = 0.5):
    #number of positive units we need to reach in the training sample (we want a balanced sample)
    nb_pos_to_drawn = round(nsize*pos_ratio)
    lab = label_from_iou(dt_anchors_vs_gtbox)
    pos_lab = np.where(lab == 1)[0]
    neg_lab = np.where(lab == 0)[0]
    pos = len(pos_lab)
    neg = len(neg_lab)
    
    if (pos > nb_pos_to_drawn):
        disabled_index_pos = np.random.choice(pos_lab, size=(pos - nb_pos_to_drawn), replace = False)
        lab[disabled_index_pos] = -1
    
    if (neg > nsize - nb_pos_to_drawn):
        if(pos < nb_pos_to_drawn):
            disabled_index_neg = np.random.choice(neg_lab, size=(neg - nsize + pos), replace = False)
        else:
            disabled_index_neg = np.random.choice(neg_lab, size=(neg + nb_pos_to_drawn - nsize), replace = False)
        
        lab[disabled_index_neg] = -1    
    
    res = np.where((lab == 0) | (lab == 1))[0]
    return res

TO DO A FUNCTION THAT KEEPS ONLY THOSE WITH 0 OR 1 après reparam
on obtient label avec la sortie de batch, il faut garder un vecteur de taille nb anchors mais avec 256 + ou -

In [149]:
def clip_predicted_boxes(list_box, th_min, th_max):
    list_box = np.array(list_box)
    return list(np.clip(list_box,th_min,th_max))

In [156]:
#remove all boxes with at least the width or the height less that 16
def boxes_hw_min(list_box, list_score, min_size = 16):
    boxes = np.array(list_box)
    height = boxes[:, 2] - boxes[:, 0]
    width = boxes[:, 3] - boxes[:, 1]
    box_kept = np.where((height > min_size) & (width > min_size))[0]
    list_box_kept = [list_box[j] for j in box_kept]
    list_score = [list_score[j] for j in box_kept]
    return list_box_kept, list_score

In [155]:
a = [3,4,5,6,7]
b = [2,4]
a[slice(b)]

TypeError: slice indices must be integers or None or have an __index__ method

In [186]:
np.where((np.array([3,2,1]) == 1))[0]+1

array([3], dtype=int64)

In [189]:
def nms(list_box, list_score, top_pre, top_post, thresold):
    list_score = np.array(list_score)
    order = list_score.argsort()[::-1]
    order = order[:top_pre]
    keep = []
    list_box = np.array(list_box)
    
    ym = list_box[:,0]
    xm = list_box[:,1]
    yp = list_box[:,2]
    xp = list_box[:,3]
    areas = (xp - xm + 1) * (yp - ym + 1)

    while len(order)>0:
        i = order[0]
        yym = np.maximum(ym[i], ym[order[1:]])
        xxm = np.maximum(xm[i], xm[order[1:]])
        yyp = np.minimum(yp[i], yp[order[1:]])
        xxp = np.minimum(xp[i], xp[order[1:]])
        
        width = np.maximum(0.0, xxp - xxm + 1)
        height = np.maximum(0.0, yyp - yym + 1)
        intersection = width*height
        ovr = intersection/(areas[i] + areas[order[1:]] - intersection)
        
        ind_to_keep = np.where(ovr <= thresold)[0]
        order = order[ind_to_keep + 1]
        keep.append(i)
    
    keep = keep[:top_post]
    return(list_box[keep,:], list_score[keep])

In [None]:
list_score = np.random.uniform(size = len(list_anchors))
list_box = np.array(list_anchors)

In [101]:
def nms(list_box, list_score, top_pre, top_post, thresold):
    order = list_score.argsort()[::-1]
    order = order[:top_pre]
    keep = []
    while len(order)>0:
        i = order[0]
        iou_between = [iou(list_box[i], list_box[j]) for j in order]
        ind_to_keep = np.where((np.array(iou_between) <= thresold))[0]
        order = order[ind_to_keep]
        keep.append(i)
    return(np.array(list_box)[keep,:])

In [118]:
len(nms(list_anchors, list_score, 12000,2000, 0.7))

2000

In [77]:
test = [3,4,5,6,7]
print(np.where((np.array(test) > 5)))

(array([3, 4], dtype=int64),)


# Region proposal network

In [27]:
import torch
import torch.nn as nn
mid_channels = 512
in_channels = 512 
n_anchor = 9 #nb_weight * nb_height
conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
reg_layer = nn.Conv2d(mid_channels, n_anchor *4, 1, 1, 0)
cls_layer = nn.Conv2d(mid_channels, n_anchor *2, 1, 1, 0) 

# conv sliding layer
conv1.weight.data.normal_(0, 0.01)
conv1.bias.data.zero_()
# Regression layer
reg_layer.weight.data.normal_(0, 0.01)
reg_layer.bias.data.zero_()
# classification layer
cls_layer.weight.data.normal_(0, 0.01)
cls_layer.bias.data.zero_()

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [123]:
x = torch.rand(1,512,50,50)
pred_anchor_locs = reg_layer(x)
pred_cls_scores = cls_layer(x)

In [124]:
pred_anchor_locs = pred_anchor_locs.permute(0, 2, 3, 1).contiguous().view(1, -1, 4)
print(pred_anchor_locs.shape)
#Out: torch.Size([1, 22500, 4])
pred_cls_scores = pred_cls_scores.permute(0, 2, 3, 1).contiguous()
print(pred_cls_scores)
#Out torch.Size([1, 50, 50, 18])
objectness_score = pred_cls_scores.view(1, 50, 50, 9, 2)[:, :, :, :, 1].contiguous().view(1, -1)
print(objectness_score.shape)
#Out torch.Size([1, 22500])
pred_cls_scores  = pred_cls_scores.view(1, -1, 2)
print(pred_cls_scores.shape)
# Out torch.size([1, 22500, 2])

torch.Size([1, 22500, 4])
tensor([[[[ 0.1054, -0.0132, -0.0183,  ..., -0.0489,  0.0355,  0.1080],
          [ 0.1714, -0.0209, -0.0059,  ...,  0.0098,  0.0295,  0.1754],
          [ 0.1429, -0.0469, -0.0882,  ..., -0.0068, -0.0256,  0.0923],
          ...,
          [-0.0278,  0.0678, -0.0948,  ...,  0.0360,  0.0110,  0.1779],
          [ 0.0831,  0.0731,  0.0817,  ...,  0.1672, -0.1304, -0.0011],
          [ 0.1170,  0.0021,  0.0164,  ...,  0.1509,  0.0567, -0.0171]],

         [[ 0.0534,  0.0927, -0.1854,  ...,  0.0551,  0.0442,  0.0947],
          [ 0.0979,  0.0076, -0.1124,  ...,  0.1187, -0.0626,  0.1494],
          [-0.0010,  0.0596, -0.0278,  ...,  0.0131, -0.0701,  0.3052],
          ...,
          [ 0.0973, -0.0371, -0.0537,  ...,  0.1661, -0.0345,  0.0211],
          [ 0.1140, -0.0071, -0.1287,  ...,  0.0974, -0.0092,  0.2186],
          [ 0.2481,  0.0673,  0.0411,  ...,  0.0756,  0.0148,  0.2545]],

         [[ 0.1035,  0.0543, -0.1031,  ...,  0.1707, -0.0143,  0.1191],
    

tensor([[[-0.1044, -0.1174, -0.1226, -0.0288],
         [-0.0800,  0.1410, -0.1113, -0.1160],
         [-0.0528, -0.1964, -0.0551,  0.0069],
         ...,
         [ 0.1836,  0.1449,  0.0459, -0.0474],
         [ 0.1467,  0.1143,  0.0113, -0.1079],
         [-0.1453,  0.0871,  0.1216, -0.0816]]], grad_fn=<SqueezeBackward1>)

In [234]:
#0 in labels_gt_box must be the background
def batch_training_proposal_FastRCNN(list_box,list_gt_box,labels_gt_box, nsize = 128, pos_ratio = 0.25, pos_iou_threshold = 0.5,
                                    neg_iou_threshold_p = 0.5, neg_iou_threshold_n = 0.0):
    #compute iou between each pair
    dt_anchors_vs_gtbox = iou_anchors_vs_gtbox(list_box,list_gt_box)
    
    #number of positive units we need to reach in the training sample (we want a balanced sample)
    nb_pos_to_drawn = round(nsize*pos_ratio)
    iou = iou_anchors_vs_gtbox(roi_pred, gt_box)
    #compute the maximum for each anchor
    gt_roi_label = np.argmax(iou, axis = 1)
    gt_roi_max = np.max(iou, axis = 1)
    labels = labels_gt_box[gt_roi_label]
    
    #assign the label if greater that pos_iou_threshold
    #assign background if between the two negative thresholds
    gt_pos = np.where((gt_roi_max > pos_iou_threshold))[0]
    gt_neg = np.where((gt_roi_max < neg_iou_threshold_p) & (gt_roi_max > neg_iou_threshold_n))[0] #background -- 0

    #Nb of positives and negatives boxes get using the thresholds
    pos = len(gt_pos)
    neg = len(gt_neg)
    
    #Subsampling from it
    if (pos > nb_pos_to_drawn):
        disabled_index_pos = np.random.choice(range(len(gt_pos)), size=(pos - nb_pos_to_drawn), replace = False)
        gt_pos = np.delete(gt_pos, disabled_index_pos)
    
    if (neg > nsize - nb_pos_to_drawn):
        if(pos < nb_pos_to_drawn):
            disabled_index_neg = np.random.choice(range(len(gt_neg)), size=(neg - nsize + pos), replace = False)
            gt_neg = np.delete(gt_neg, disabled_index_neg)
        else:
            disabled_index_neg = np.random.choice(range(len(gt_neg)), size=(neg + nb_pos_to_drawn - nsize), replace = False)
            gt_neg = np.delete(gt_neg, disabled_index_neg)
        
    
    #if negative : assign background labels with it's "0"
    labels[gt_neg] = "0"
    final_index = np.append(gt_pos,gt_neg)
    
    #Reparams
    reparam = [loc(box,gt_box) for box,gt_box in zip(list(np.array(list_box)[final_index,:]), list(np.array(list_gt_box)[gt_roi_label,:]))]
    
    return reparam,labels[final_index]

In [207]:
#RPN
image = (800,800)

pred_anchor = torch.squeeze(pred_anchor_locs,0).detach().numpy()
pred_score = torch.squeeze(objectness_score,0).detach().numpy()

deparam_pred = deparam_all_anchors(list_anchors, pred_anchor)
deparam_pred = clip_predicted_boxes(deparam_pred, 0, image[1])
box_pred, score_pred = boxes_hw_min(deparam_pred, pred_score,16)
roi_pred, score_pred = nms(box_pred, score_pred, 12000, 2000, 0.7)

In [235]:
batch_training_proposal_FastRCNN(roi_pred,gt_box,labels_gt_box, nsize = 128, pos_ratio = 0.25, pos_iou_threshold = 0.5,
                                    neg_iou_threshold_p = 0.5, neg_iou_threshold_n = 0.0)

([array([-0.11650544, -0.07619625, -0.04326101,  0.29366306]),
  array([-0.26851319,  0.02044298, -0.17726915,  0.03043412]),
  array([-0.11129657,  0.08881933, -0.19695847,  0.04336698]),
  array([-0.01576858, -0.29441941, -0.03116676, -0.01573826]),
  array([ 0.00085513, -0.12903704, -0.03481304,  0.54004987]),
  array([-0.12771864, -0.02820597,  0.12351517,  0.05061924]),
  array([ 0.33993493,  0.40872029, -0.78865692, -0.86363078]),
  array([ 0.3434266 ,  0.71636546, -0.5089896 , -0.72042345]),
  array([-1.1931376 , -0.97378316,  1.03722163,  0.7409745 ]),
  array([-0.15144037, -0.20131316,  0.04562208, -0.09068628]),
  array([-0.19168329,  0.07854732, -0.58356402,  0.18093502]),
  array([ 0.48742756,  0.48004078, -0.42844377, -0.42630529]),
  array([-1.10030642,  0.08059591,  1.03233346,  0.08285884]),
  array([-0.5957268 ,  0.02758616,  0.45619459, -0.06644023]),
  array([-2.76127331,  0.70851118,  2.29492636, -0.72690156]),
  array([-0.12418029, -1.63178097, -0.08622592,  1.6443