In [0]:
def generate_anchor(input_tensor, fmap_tensor):
    # input shape 
    input_h = K.shape(inputs)[1]
    input_w = K.shape(inputs)[2]

    # backbone shape 
    backbone_h = K.shape(backbone_layer)[1]
    backbone_w = K.shape(backbone_layer)[2]
    
    # to calculate the distance btw feature map pixels 
    pixel_gap = tf.ceil(input_h / backbone_h)
    
    # generate anchor sizes
    anchor_default_sizes = [32., 64., 128.]  
    anchor_ratio = [0.5, 1, 2]
    n_anchor_sizes = len(anchor_default_sizes) * len(anchor_ratio)
    anchor_sizes = [] 
    for size in anchor_default_sizes:
        anchor_sizes.extend([[size, size],[size, size*2], [size*2, size]])
    anchor_sizes = np.asarray(anchor_sizes)
    
    # generate anchor grid
    # 4 => cx, cy, w, h
    fmap_grid = tf.ones(shape=[backbone_h, backbone_w], dtype=tf.float64)

    # generate coordinate center_x, center_y
    range_h = tf.range(backbone_h)
    range_w = tf.range(backbone_w)
    cx, cy = tf.meshgrid(range_w, range_h)
    cx = tf.cast(cx, tf.float64)
    cy = tf.cast(cy, tf.float64)

    # shift cx ,cy 
    # pixel_gap//2 은 stride 때문에 저렇게 된다. 
    # pixel 간 거리는 stride 만큼 떨어져 있다. 
    cx = cx * pixel_gap + pixel_gap//2

    cy = cy * pixel_gap + pixel_gap//2

    # cx 는 anchor 갯수만큼 있어서 저렇게 만든다 
    grid_cx = tf.stack([cx]*n_anchor_sizes, axis=-1) 
    grid_cy = tf.stack([cy]*n_anchor_sizes, axis=-1) 

    # mapping ws, hs to anchor grid 
    anchor_ws = anchor_sizes[:, 0]
    anchor_hs = anchor_sizes[:, 1]
    grid_ws = tf.expand_dims(fmap_grid, axis=-1) * anchor_ws
    grid_hs = tf.expand_dims(fmap_grid, axis=-1) * anchor_hs
    
    
    """
    Description:
        grid_cx shape = (7,7,9), 
        grid_cx[0, 0, :] => [x1,x2,x3 .. ] 
        
        grid_cy = shape = (7,7,9)                 [[x1, x2, x3, ...]
        grid_cy[0, 0, :] => [y1,y2,y3 .. ]         [y1, y2, y3, ...]
                                            ==>    [w1, w2, w3, ...]
        grid_ws = shape = (7,7,9)                  [h1, h2, h3, ...]]
        grid_ws[0, 0, :] => [w1,w2,w3 .. ] 
        
        grid_hs = shape = (7,7,9)
        grid_hs[0, 0, :] => [h1,h2,h3 .. ] 
    """ 
    anchor_grid = tf.stack([grid_cx, grid_cy, grid_ws, grid_hs], axis=-1)


    """
    Description:
    [[x1, x2, x3, ...]
     [y1, y2, y3, ...]
     [w1, w2, w3, ...]  => [x1,y1,w1,h1, x2,y2,w2,h2 ...] 
     [h1, h2, h3, ...]]

    """ 
    anchor_grid = tf.reshape(anchor_grid, [backbone_h, backbone_w, -1])
    return anchor_grid

# Test Set up 코드 

In [0]:
import cv2
import glob
import numpy as np 
from PIL import Image
import tensorflow as tf 
import matplotlib.pyplot as plt 

from tensorflow.python.keras.applications import ResNet50
from tensorflow.python.keras.layers import Layer
from tensorflow.python.keras.layers import Conv2D
from tensorflow.python.keras.layers import ZeroPadding2D
from tensorflow.python.keras.layers import Input
from tensorflow.python.keras.layers import UpSampling2D
from tensorflow.python.keras.layers import BatchNormalization
from tensorflow.python.keras.layers import Flatten
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.layers import Dropout
from tensorflow.python.keras import optimizers
from tensorflow.keras.models import Model
from keras.utils import np_utils
from keras import backend as K
import urllib.request


# Sample Image Download 
sample_name = '342.jpg'

image_url = "https://pai-datasets.s3.ap-northeast-2.amazonaws.com/pascal/images/{}".format(sample_name)
urllib.request.urlretrieve(image_url, "sample_image.jpg")

mask_url = "https://pai-datasets.s3.ap-northeast-2.amazonaws.com/pascal/roidb/{}.npy".format(sample_name)
urllib.request.urlretrieve(mask_url, "sample_label.npy")


# Image Checking 
image_path = "sample_image.jpg"
label_path = "sample_label.npy"


# load sample image 
sample_img = np.asarray(Image.open(image_path).convert('RGB'))
sample_img=sample_img/255.
sample_tensor = np.expand_dims(sample_img, axis=0)

# load label 
sample_bboxes = np.load(label_path).astype(np.int32)

# draw bounding boxes 
for bbox in sample_bboxes:
    print(bbox)
    patched_image = sample_tensor[0].copy()
    cv2.rectangle(patched_image, 
                                  (bbox[0], bbox[1]), 
                                  (bbox[2], bbox[3]),
                                  (255,0,0),
                                  10)
plt.imshow(patched_image)
plt.show()


# Sample CNN Model Setup 
inputs = Input(shape=(None, None, 3), name='images')
backbone_layer = ResNet50(weights='imagenet', input_shape=(None,None,3), 
                 include_top=False)(inputs)

# obejct detection layer 
n_anchor = 9 
n_reg = 4 
layer = Conv2D(filters = 128, activation='relu', kernel_size=3, padding='same')(backbone_layer)
layer = Conv2D(filters = 256, activation='relu', kernel_size=3 ,padding='same')(layer)
layer = Conv2D(filters = n_anchor * n_reg , activation='relu', kernel_size=3, padding='same')(layer)

model = Model(inputs, layer)
top_conv = model.predict(sample_tensor)


def calculate_iou(sample_bboxes, gt_bboxes):
    
    """
    sample_bboxes : Ndarray, 1D array [x1, x2, y1, y2, x1, x2, y1, y2, ... ]
    sample_bboxes : Ndarray, 1D array [x1, x2, y1, y2, x1, x2, y1, y2, ... ]
    """

    # 1D array to 2D array 
    #[x1, x2, y1, y2, x1, x2, y1, y2 ]
    # >>> 
    #[[x1, x2, y1, y2],
    # [x1, x2, y1, y2]] 
    res_sample_bboxes = sample_bboxes.reshape([-1, 4])
    gt_sample_bboxes = gt_bboxes.reshape([-1, 4])

    # Get Area 
    area_sample = (res_sample_bboxes[:, 0] - res_sample_bboxes[:, 2]) * (res_sample_bboxes[:, 1] - res_sample_bboxes[:, 3])
    area_gt = (gt_sample_bboxes[:, 0] - gt_sample_bboxes[:, 2]) * (gt_sample_bboxes[:, 1] - gt_sample_bboxes[:, 3])

    # expand dims for using broadcasting
    # (N, 4) -> (N, 1, 4)
    expand_sample = np.expand_dims(res_sample_bboxes, axis=1)
    # (N, 4) -> (1, N, 4)
    expand_gt = np.expand_dims(gt_sample_bboxes, axis=0)

    # search Maximun  
    x1y1 = np.where(expand_sample[:, :, :2] > expand_gt[:, :, :2], expand_sample[:, :, :2], expand_gt[:, :, :2])
    # search Minimun  
    x2y2 = np.where(expand_sample[:, :, 2:] < expand_gt[:, :, 2:], expand_sample[:, :, 2:], expand_gt[:, :, 2:])

    # get overlay area 
    overlay_area = np.prod(x1y1 - x2y2, axis=-1)
    
    # expand dimension for broadcasting 
    expand_area_sample= np.expand_dims(area_sample, axis=-1)

    iou = overlay_area / (expand_area_sample + area_gt - overlay_area)

    return iou

def matching_policy(iou_matrix, iou_threshold=0.7):
    """
    Args:
        iou_matrix : Ndarray, 2D array 
        
        [[anchor1_gt1 , anchor1_gt2,  anthor1_gt3],
        [anchor2_gt1  , anchor2_gt2,  anthor2_gt3],
        [anchor3_gt1  , anchor3_gt2,  anthor3_gt3],
        [anchor4_gt1  , anchor4_gt2,  anthor4_gt3],
        
                        ...
                        
        [anchor5_gt1  , anchor5_gt2,  anthor5_gt3]]
        
        
    Return:
        anchor_flag : Ndarray, 1D array     
    """
    threshold_mask = np.sum(iou_matrix > iou_threshold, axis=-1).astype(bool)
    best_match_mask = np.argmax(iou_matrix, axis=0)
    
    
    n_anchors = len(iou_matrix)
    # -1 mean useless 
    anchor_flag = np.ones([n_anchors]) * -1 

    # apply best & IOU > 70 anchors 
    anchor_flag[best_match_mask] = 1 
    anchor_flag[threshold_mask] = 1 
    assert len(anchor_flag) == len(iou_matrix)
    return anchor_flag

## Test Code 

In [0]:
anchor_grid = generate_anchor(inputs, backbone_layer)

In [0]:
sess = K.get_session()
sess.run(tf.global_variables_initializer())
anchor_grid_ = sess.run(anchor_grid, {inputs:sample_tensor})

In [0]:
res_anchor_grid = anchor_grid_.reshape(anchor_grid_.shape[0],anchor_grid_.shape[1], 9, 4)

In [0]:
# CX CY W H => X1 Y1 X2 Y2

# x1
x1_grid = res_anchor_grid[:,:,:,0] - res_anchor_grid[:,:,:,2]/2

# x2
x2_grid = res_anchor_grid[:,:,:,0] + res_anchor_grid[:,:,:,2]/2

# y1
y1_grid = res_anchor_grid[:,:,:,1] - res_anchor_grid[:,:,:,3]/2

# y2
y2_grid = res_anchor_grid[:,:,:,1] + res_anchor_grid[:,:,:,3]/2

res_anchor_grid[:,:,:,0]=x1_grid
res_anchor_grid[:,:,:,1]=y1_grid
res_anchor_grid[:,:,:,2]=x2_grid
res_anchor_grid[:,:,:,3]=y2_grid

In [0]:
sample_anchor = res_anchor_grid.reshape(-1)
ground_truth = sample_bboxes
iou_matrix = calculate_iou(sample_anchor, ground_truth)
matching_matrix = matching_policy(iou_matrix)
matching_indices = np.where(matching_matrix == 1)

repeat_matching_indices = np.repeat(matching_matrix, 4)
pos_anchors = sample_anchor[np.where(repeat_matching_indices == 1)]
pos_anchors = pos_anchors.reshape([-1, 4])

In [0]:
sample_bboxes
print(pos_anchors.shape)
for bbox in pos_anchors.astype(np.int32):
    patched_image = sample_tensor[0].copy()

    patched_image = cv2.rectangle(patched_image, 
                                  (bbox[0], bbox[1]), 
                                  (bbox[2], bbox[3]),
                                  (255,0,0),
                                  10)
    plt.imshow(patched_image)

In [0]:
x