In [1]:
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm import tqdm
from itertools import product

import os

In [2]:
df = pd.read_csv('./res/train_df.csv')
image = cv2.imread('./res/train_imgs/001-1-1-01-Z17_A-0000001.jpg', cv2.COLOR_BGR2RGB)
inputs = tf.expand_dims(image, 0)

In [512]:
# plt.imshow(image)
# plt.axis('off')
# plt.show()

In [5]:
df['x_min'] = df.iloc[:, 1:49:2].apply(lambda x: int(min(x)), axis=1)
df['x_max'] = df.iloc[:, 1:49:2].apply(lambda x: int(max(x)), axis=1)
df['y_min'] = df.iloc[:, 2:49:2].apply(lambda x: int(min(x)), axis=1)
df['y_max'] = df.iloc[:, 2:49:2].apply(lambda x: int(max(x)), axis=1)

In [6]:
df.head()

Unnamed: 0,image,nose_x,nose_y,left_eye_x,left_eye_y,right_eye_x,right_eye_y,left_ear_x,left_ear_y,right_ear_x,...,spine1(waist)_x,spine1(waist)_y,left_instep_x,left_instep_y,right_instep_x,right_instep_y,x_min,x_max,y_min,y_max
0,001-1-1-01-Z17_A-0000001.jpg,1046.389631,344.757881,1041.655294,329.820225,1059.429507,334.48423,1020.117796,338.890539,1048.0,...,1026.51577,514.05473,998.578836,826.718013,1063.204067,838.827465,956,1134,316,838
1,001-1-1-01-Z17_A-0000003.jpg,1069.850679,340.711494,1058.608552,324.59369,1075.242111,325.59369,1041.422997,331.694815,1065.593682,...,1058.766231,508.797029,1002.265676,699.062706,1066.376234,841.499445,974,1144,323,841
2,001-1-1-01-Z17_A-0000005.jpg,1084.475902,337.000008,1078.717997,323.757889,1095.648412,325.242119,1061.039884,329.351571,1086.461032,...,1052.844144,495.890539,989.437847,808.757889,1066.071417,841.749554,984,1163,319,841
3,001-1-1-01-Z17_A-0000007.jpg,1042.320047,361.452689,1037.907194,344.117804,1050.328382,353.913729,1016.844144,340.913737,1042.164191,...,990.375124,507.624866,1001.305177,829.233767,1159.516499,599.389997,941,1159,328,829
4,001-1-1-01-Z17_A-0000009.jpg,1058.046395,343.164191,1046.717997,331.703163,1058.13265,331.781079,1031.258806,338.59369,1049.81262,...,1034.391088,510.843791,998.625231,805.218921,1059.625956,839.765102,961,1132,331,839


# Faster R-CNN

## backbone

### ResNet

In [7]:
# resnet = tf.keras.applications.resnet

### VGG

In [8]:
def get_base(img_size, model='vgg'):
    if model=='vgg':
        base_model = tf.keras.applications.VGG16(include_top=False, input_shape=img_size)
    elif model == 'resnet':
        pass
    else:
        raise Exception('vgg, resnet')
        
    base_model = tf.keras.applications.VGG16(include_top=False, weights='imagenet', input_shape=img_size)
    feature_extractor = base_model.get_layer("block5_conv3")
    base_model = tf.keras.models.Model(inputs=base_model.input, outputs=feature_extractor.output)
    return base_model

In [9]:
# base_model = get_base(image.shape, model='vgg')

# inputs = tf.expand_dims(image, 0)

# output_map = base_model(inputs)
# imgArray = output_map.numpy().squeeze(0)
# # output_map

## Region Proposal Network

### Image resize

In [10]:
df_new = df.copy()

In [11]:
Rx = 800/1920
Ry = 800/1080

df_new.iloc[:, 1:49:2] = df_new.iloc[:, 1:49:2] * Rx
df_new.iloc[:, 2:49:2] = df_new.iloc[:, 2:49:2] * Ry

In [12]:
df_new['x_min'] = df_new.iloc[:, 1:49:2].apply(lambda x: int(min(x)), axis=1)
df_new['x_max'] = df_new.iloc[:, 1:49:2].apply(lambda x: int(max(x)), axis=1)
df_new['y_min'] = df_new.iloc[:, 2:49:2].apply(lambda x: int(min(x)), axis=1)
df_new['y_max'] = df_new.iloc[:, 2:49:2].apply(lambda x: int(max(x)), axis=1)

In [13]:
image = np.copy(cv2.resize(image, (800, 800)))
inputs = tf.expand_dims(image, 0)

### Anchor boxes

In [513]:
# ratio = 2**4

# w=image.shape[1]//ratio
# h=image.shape[0]//ratio

# img_ = np.copy(image)
# plt.figure(figsize=(10, 10))
# for x in range(img_.shape[1]//w//2, img_.shape[1], img_.shape[1]//w):
#     for y in range(img_.shape[0]//h//2, img_.shape[0], img_.shape[0]//h):
#             cv2.circle(img_, (x, y), radius=1, color=(255, 0, 0), thickness=3)
            
# plt.imshow(img_)
# plt.axis('off')
# plt.show()

In [15]:
def anchor_box_generator(x, y):
    ratio = [(1/np.sqrt(2), np.sqrt(2)), (1, 1), (np.sqrt(2), 1/np.sqrt(2))]
    scales = [128, 256, 512]
    anchor_boxes = []
    for scale in scales:
        for w, h in ratio:
            w *= scale
            h *= scale
            anchor_boxes.append([x, y, w, h])
    return anchor_boxes

In [16]:
def Anchor_Boxes(img_shape, model='vgg'):
    '''
    input
    img_shape : image shape
    output 
    numpy array shape (w * h * 9, 4)
    '''
    if model == 'vgg':
        ratio = 2**4
        
    w=image.shape[1]//ratio
    h=image.shape[0]//ratio
    
    anchor_boxes = []
    for x in range(image.shape[1]//w//2, image.shape[1], image.shape[1]//w):
        for y in range(image.shape[0]//h//2, image.shape[0], image.shape[0]//h):
            anchor_boxes.append(anchor_box_generator(x, y))
    return np.array(anchor_boxes).reshape(-1, 4)

In [17]:
anchor_boxes = Anchor_Boxes(img_shape=image.shape, model='vgg')

### Ground Truth Generating 

In [18]:
# ground_truth = df.iloc[:,:1].copy() 
# ground_truth['x_min'] = df['x_min'] - (df['x_max'] - df['x_min'])*.1
# ground_truth['x_max'] = df['x_max'] + (df['x_max'] - df['x_min'])*.1
# ground_truth['y_min'] = df['y_min'] - (df['y_max'] - df['y_min'])*.1
# ground_truth['y_max'] = df['y_max'] + (df['y_max'] - df['y_min'])*.07

In [19]:
ground_truth = df_new.iloc[:,:1].copy() 
ground_truth['x_min'] = df_new['x_min'] - (df_new['x_max'] - df_new['x_min'])*.1
ground_truth['x_max'] = df_new['x_max'] + (df_new['x_max'] - df_new['x_min'])*.1
ground_truth['y_min'] = df_new['y_min'] - (df_new['y_max'] - df_new['y_min'])*.1
ground_truth['y_max'] = df_new['y_max'] + (df_new['y_max'] - df_new['y_min'])*.07

In [20]:
ground_truth['w'] = ground_truth['x_max'] - ground_truth['x_min']
ground_truth['h'] = ground_truth['y_max'] - ground_truth['y_min']
ground_truth['x'] = ground_truth['w']/2 + ground_truth['x_min']
ground_truth['y'] = ground_truth['h']/2 + ground_truth['y_min']

In [21]:
GT = np.array(ground_truth[['x', 'y', 'w', 'h']])

In [22]:
GT = GT[:10]

### IoU

In [23]:
def anchor_to_coordinate(box):    
    x1 = box[0] - box[2]/2
    x2 = box[0] + box[2]/2
    y1 = box[1] - box[3]/2
    y2 = box[1] + box[3]/2
    return (x1, x2, y1, y2)

In [24]:
def coordinate_to_anxhor(box):
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = box[0] + w/2
    y = box[2] + h/2
    return (x, y, w, h)

In [25]:
def IoU(box1, box2):
    '''
    anchor ver
    '''
    box1_area = box1[2] * box1[3]
    box2_area = box2[2] * box2[3]
    
    x1 = max(box1[0] - box1[2]/2, box2[0] - box2[2]/2)
    x2 = min(box1[0] + box1[2]/2, box2[0] + box2[2]/2)
    
    y1 = max(box1[1] - box1[3]/2, box2[1] - box2[3]/2)
    y2 = min(box1[1] + box1[3]/2, box2[1] + box2[3]/2)
    
    h = max(0.0, y2 - y1 + 1)
    w = max(0.0, x2 - x1 + 1)
    
    if (w <= 0) or (h <= 0):
        return 0.0
    
    intersect = h * w
    union = box1_area + box2_area - intersect
    return intersect / union 

In [26]:
# def IoU(box1, box2):
#     '''
#     coordinate ver
#     '''
#     box1_area = (box1[1] - box1[0] + 1) * (box1[3] - box1[2] + 1)
#     box2_area = (box2[1] - box2[0] + 1) * (box2[3] - box2[2] + 1)
    
#     x1 = max(box1[0], box2[0])
#     x2 = min(box1[1], box2[1])
    
#     y1 = max(box1[2], box2[2])
#     y2 = min(box1[3], box2[3])    
    
#     h = max(0.0, y2 - y1 + 1)
#     w = max(0.0, x2 - x1 + 1)
    
#     if (w <= 0) or (h <= 0):
#         return 0.0
    
#     intersect = h * w
#     union = box1_area + box2_area - intersect
    
#     iou = intersect / union
#     return iou

### Label Generating

In [257]:
def label_generator(GT, anchor_boxes):
    cls_label = -np.ones(shape=(len(GT), anchor_boxes.shape[0]))
    reg_label = np.zeros(shape=(len(GT), anchor_boxes.shape[0], 4))
    
    pos_iou_threshold = 0.7
    neg_iou_threshold = 0.3
    
    n_sample = 256
    pos_ratio = 0.5
    n_pos = int(pos_ratio * n_sample)
    
    for i in tqdm(range(cls_label.shape[0])):
        gt = GT[i]
        max_iou = 0
        
        x = gt[0]
        y = gt[1]
        w = gt[2]
        h = gt[3]
        
        for j in range(cls_label.shape[1]):
            iou = IoU(gt, anchor_boxes[j])
            xa = anchor_boxes[j][0]
            ya = anchor_boxes[j][1]
            wa = anchor_boxes[j][2]
            ha = anchor_boxes[j][3]
            
            if iou >= pos_iou_threshold:
                cls_label[i][j] = 1
                reg_label[i][j][0] = (x - xa)/wa
                reg_label[i][j][1] = (y - ya)/ha
                reg_label[i][j][2] = np.log(w/wa)
                reg_label[i][j][3] = np.log(h/ha)
                
            elif iou < neg_iou_threshold:
                cls_label[i][j] = 0
                
            if iou > max_iou:
                max_iou = iou
                tmp_i, tmp_j = i, j
                tmp_tx = (x - xa)/wa
                tmp_ty = (y - ya)/ha
                tmp_tw = np.log(w/wa)
                tmp_th = np.log(h/ha)

        cls_label[tmp_i][tmp_j] = 1
        reg_label[tmp_i][tmp_j][0] = tmp_tx
        reg_label[tmp_i][tmp_j][1] = tmp_ty
        reg_label[tmp_i][tmp_j][2] = tmp_tw
        reg_label[tmp_i][tmp_j][3] = tmp_th
        
        pos_index = np.where(cls_label[i] == 1)[0]
        
        if len(pos_index) > n_pos:
            disable_index = np.random.choice(
                pos_index,
                size = (len(pos_index) - n_pos)
            )
            cls_label[i][disable_index] = -1
            reg_label[i][disable_index] = 0
            
    return cls_label, reg_label

In [258]:
cls_label, reg_label = label_generator(GT, anchor_boxes)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:04<00:00,  2.06it/s]


In [283]:
np.where(reg_label[1] != 0)

(array([11469, 11469, 11469, 11469], dtype=int64),
 array([0, 1, 2, 3], dtype=int64))

In [465]:
def label_generator(GT, anchor_boxes):
    cls_label = -np.ones(shape=(len(GT), anchor_boxes.shape[0]))
    pos_iou_threshold = 0.5
    neg_iou_threshold = 0.3
    n_sample = 256
    pos_ratio = 0.5
    n_pos = int(pos_ratio * n_sample)
    
    for i in tqdm(range(cls_label.shape[0])):
        gt = GT[i]
        max_iou = 0
        
        for j in range(cls_label.shape[1]):
            iou = IoU(gt, anchor_boxes[j])
            
            if iou >= pos_iou_threshold:
                cls_label[i][j] = 1
            elif iou < neg_iou_threshold:
                cls_label[i][j] = 0
                
            if iou > max_iou:
                max_iou = iou
                tmp_i, tmp_j = i, j
                
        cls_label[tmp_i][tmp_j] = 1
        
        pos_index = np.where(cls_label[i] == 1)[0]
        if len(pos_index) > n_pos:
            disable_index = np.random.choice(
                pos_index,
                size = (len(pos_index) - n_pos)
            )
            cls_label[i][disable_index] = -1
            
    reg_label = np.zeros(shape=(len(GT), anchor_boxes.shape[0], 4))
#     for i in range(len(cls_label)):
#         reg_label[i] = anchor_boxes * np.broadcast_to(tf.cast(cls_label[i] > 0, tf.int32), (4, len(cls_label[i]))).T
#         indices = np.where(reg_label[i] != 0)[0]
#         print(indices)
#         x, y, w, h = GT[i][0], GT[i][1], GT[i][2], GT[i][3]
        
#         tx = (x - reg_label[i][indices][0]) / (reg_label[i][indices][2])
#         ty = (y - reg_label[i][indices][1]) / (reg_label[i][indices][3])
#         tw = np.log(w / reg_label[i][indices][2]) 
#         th = np.log(h / reg_label[i][indices][3]) 
#         reg_label[i][indices] = np.stack([tx, ty, tw, th])
        
    return cls_label, reg_label

In [466]:
cls_label, reg_label = label_generator(GT, anchor_boxes)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:04<00:00,  2.42it/s]


In [491]:
for i in range(len(cls_label)):
    print(i)
    reg_label[i] = anchor_boxes * np.broadcast_to(tf.cast(cls_label[i] > 0, tf.int32), (4, len(cls_label[i]))).T
    indices = np.where(reg_label[i] != 0)[0]
    x, y, w, h = GT[i][0], GT[i][1], GT[i][2], GT[i][3]

    tx = (x - reg_label[i][indices][0]) / (reg_label[i][indices][2])
    ty = (y - reg_label[i][indices][1]) / (reg_label[i][indices][3])
    tw = np.log(w / reg_label[i][indices][2]) 
    th = np.log(h / reg_label[i][indices][3]) 
    reg_label[i][indices] = np.stack([tx, ty, tw, th])

0
1
2
3


ValueError: shape mismatch: value array of shape (4,4) could not be broadcast to indexing result of shape (140,4)

In [496]:
i=3
reg_label[i] = anchor_boxes * np.broadcast_to(tf.cast(cls_label[i] > 0, tf.int32), (4, len(cls_label[i]))).T

In [497]:
reg_label[i]

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [498]:
indices = np.where(reg_label[i] != 0)
indices

(array([11460, 11460, 11460, 11460, 11469, 11469, 11469, 11469, 11478,
        11478, 11478, 11478, 11487, 11487, 11487, 11487, 11496, 11496,
        11496, 11496, 11505, 11505, 11505, 11505, 11514, 11514, 11514,
        11514, 11910, 11910, 11910, 11910, 11919, 11919, 11919, 11919,
        11928, 11928, 11928, 11928, 11937, 11937, 11937, 11937, 11946,
        11946, 11946, 11946, 11955, 11955, 11955, 11955, 11964, 11964,
        11964, 11964, 12360, 12360, 12360, 12360, 12369, 12369, 12369,
        12369, 12378, 12378, 12378, 12378, 12387, 12387, 12387, 12387,
        12396, 12396, 12396, 12396, 12405, 12405, 12405, 12405, 12414,
        12414, 12414, 12414, 12810, 12810, 12810, 12810, 12819, 12819,
        12819, 12819, 12828, 12828, 12828, 12828, 12837, 12837, 12837,
        12837, 12846, 12846, 12846, 12846, 12855, 12855, 12855, 12855,
        12864, 12864, 12864, 12864, 13260, 13260, 13260, 13260, 13269,
        13269, 13269, 13269, 13278, 13278, 13278, 13278, 13287, 13287,
      

In [509]:
reg_label[i][indices]

array([408.        , 376.        , 181.01933598, 362.03867197,
       408.        , 392.        , 181.01933598, 362.03867197,
       408.        , 408.        , 181.01933598, 362.03867197,
       408.        , 424.        , 181.01933598, 362.03867197,
       408.        , 440.        , 181.01933598, 362.03867197,
       408.        , 456.        , 181.01933598, 362.03867197,
       408.        , 472.        , 181.01933598, 362.03867197,
       424.        , 376.        , 181.01933598, 362.03867197,
       424.        , 392.        , 181.01933598, 362.03867197,
       424.        , 408.        , 181.01933598, 362.03867197,
       424.        , 424.        , 181.01933598, 362.03867197,
       424.        , 440.        , 181.01933598, 362.03867197,
       424.        , 456.        , 181.01933598, 362.03867197,
       424.        , 472.        , 181.01933598, 362.03867197,
       440.        , 376.        , 181.01933598, 362.03867197,
       440.        , 392.        , 181.01933598, 362.03

In [499]:
x, y, w, h = GT[i][0], GT[i][1], GT[i][2], GT[i][3]

In [502]:
tx = (x - reg_label[i][indices][0]) / (reg_label[i][indices][2])
ty = (y - reg_label[i][indices][1]) / (reg_label[i][indices][3])
tw = np.log(w / reg_label[i][indices][2]) 
th = np.log(h / reg_label[i][indices][3]) 

In [503]:
np.stack([tx, ty, tw, th])

array([ 0.16296602,  0.12964085, -0.50542279,  0.18145478])

In [489]:
reg_label[i][indices]

array([ 0.23754368,  0.08202162, -0.7122172 ,  0.22367741])

In [441]:
x = GT[ii][0]
y = GT[ii][1]
w = GT[ii][2]
h = GT[ii][3]

In [442]:
tx = (x - reg_label[ii][idc][0]) / (reg_label[ii][idc][2])
ty = (y - reg_label[ii][idc][1]) / (reg_label[ii][idc][3])
tw = np.log(w / reg_label[ii][idc][2]) 
th = np.log(h / reg_label[ii][idc][3]) 

In [443]:
tx

0.23754368430485584

In [444]:
np.stack([tx, ty, tw, th])

array([ 0.23754368,  0.08202162, -0.7122172 ,  0.22367741])

In [352]:
reg_label[0][idc] = np.stack([tx, ty, tw, th])

In [298]:
np.where(indices == True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64),
 array([11019, 11469, 11469, 11469, 11019, 11934, 11469, 12384, 11019,
        11469], dtype=int64))

In [31]:
# index_inside = np.where(
#     (anchor_boxes[:, 0] - anchor_boxes[:, 2]/2 >= 0) &
#     (anchor_boxes[:, 0] + anchor_boxes[:, 2]/2 <= image.shape[1]) &
#     (anchor_boxes[:, 1] - anchor_boxes[:, 3]/2 >= 0) &
#     (anchor_boxes[:, 1] + anchor_boxes[:, 3]/2 <= image.shape[0]),
# )[0]

# valid_anchor_boxes = anchor_boxes[index_inside]

# valid_anchor_boxes

### Region Proposal Network

In [238]:
class RPN(tf.keras.models.Model):
    def __init__(self, img_size, anchor_boxes, k=9, backbone='vgg',**kwargs):
        super(RPN, self).__init__(**kwargs)
        self.backbone = backbone
        self.img_size = img_size
        self.anchor_boxes = anchor_boxes
        self.k = k
        self.base_model = get_base(self.img_size, model=self.backbone)
        self.window = tf.keras.layers.Conv2D(filters=256, kernel_size=3, strides=1, padding='same')
        self.bbox_reg = tf.keras.layers.Conv2D(filters=self.k*4, kernel_size=1, activation='relu')
        self.bbox_reg_reshape = tf.keras.layers.Reshape((-1, 4), name='reg_out')
        self.cls = tf.keras.layers.Conv2D(filters=self.k, kernel_size=1, activation='sigmoid')
        self.cls_reshape = tf.keras.layers.Reshape((-1, 1), name='cls_out')
    
    @tf.function
    def boundingbox_regression(self, gt_pred):
        pass
    
#     # 256개 sampling
#     def positive_label_selector(self, label):
#         pass
    
    
    def compile(self, optimizer):
        super(RPN, self).compile()
        self.optimizer = optimizer
        self.loss_tracker = tf.keras.metrics.Mean(name='loss')
    
    def Cls_Loss(self, y_true, y_pred):
        indices = tf.where(tf.not_equal(y_true, tf.constant(-1.0, dtype=tf.float64)))
        target = tf.gather_nd(y_true, indices)
        output = tf.gather_nd(y_pred, indices)
        return tf.losses.BinaryCrossentropy()(target, output)

    def Reg_Loss(self, y_true, y_pred):
        indices = tf.reduce_any(tf.not_equal(y_true, 0), axis=-1)
        loss_fn = tf.losses.Huber(reduction=tf.losses.Reduction.NONE)
        loss_for_all = loss_fn(y_true[indices], y_pred[indices])
        loss_for_all = tf.reduce_mean(loss_for_all, axis=-1)
        return loss_for_all
    
    def train_step(self, data):
        x, y = data
        y_cls = y[0]
        y_reg = y[1]
        rpn_lambda = 10
        
        with tf.GradientTape() as tape:
            cls, bbox_reg = self(x, training=True)
            cls_loss = self.Cls_Loss(y_cls, cls)
            reg_loss = self.Reg_Loss(y_reg, bbox_reg)
            losses = cls_loss + rpn_lambda * reg_loss
            
        trainable_vars = self.trainable_variables
        grad = tape.gradient(losses, trainable_vars)
        self.optimizer.apply_gradients(zip(grad, trainable_vars))
        self.loss_tracker.update_state(losses)
        return {'rpn_loss': self.loss_tracker.result()}

    def call(self, inputs):
        feature_extractor = self.base_model(inputs)
        intermediate = self.window(feature_extractor)
        cls_ = self.cls(intermediate)
        cls = self.cls_reshape(cls_)
        bbox_reg_ = self.bbox_reg(intermediate)
        bbox_reg = self.bbox_reg_reshape(bbox_reg_)
        return cls, bbox_reg

In [33]:
tf.keras.backend.clear_session()
rpn = RPN(img_size=image.shape, k=9, backbone='vgg')
cls, bounding = rpn(inputs)

In [34]:
rpn.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)
)

In [35]:
rpn.fit(
    x=tf.expand_dims(image, 0), 
    y=(tf.expand_dims(cls_label[0], 0), tf.expand_dims(reg_label[0], 0))
)



<tensorflow.python.keras.callbacks.History at 0x25d8a992dc8>

In [47]:
score, rp = rpn(tf.expand_dims(image, 0))

In [37]:
score = tf.squeeze(score, 0)
score = tf.squeeze(score, -1)

In [38]:
rp = tf.squeeze(rp, 0)

In [78]:
rp

<tf.Tensor: shape=(1, 22500, 4), dtype=float32, numpy=
array([[[ 27.072367, -17.757364,  46.20609 ,  56.99929 ],
        [  9.043662, -68.60224 ,  23.83387 , -48.317142],
        [-47.323994, -65.72677 ,   8.107268,  14.995268],
        ...,
        [-66.37635 , -16.582926, -31.908205, -57.88652 ],
        [-43.274048,  25.766932, -27.276676,  29.072243],
        [-11.111572, -56.162544,  41.466   , -37.205826]]], dtype=float32)>

In [76]:
x = rp[:, :, 0]
y = rp[:, :, 1]
w = rp[:, :, 2]
h = rp[:, :, 3]

In [77]:
x

<tf.Tensor: shape=(1, 22500), dtype=float32, numpy=
array([[ 27.072367,   9.043662, -47.323994, ..., -66.37635 , -43.274048,
        -11.111572]], dtype=float32)>

In [81]:
xa = tf.expand_dims(reg_label[0], 0)[:, :, 0]

<tf.Tensor: shape=(1, 22500), dtype=float64, numpy=array([[0., 0., 0., ..., 0., 0., 0.]])>

In [44]:
selected_indices = tf.image.non_max_suppression(rp, score, max_output_size=128, score_threshold=0.01)
rpbox = tf.gather(rp, selected_indices)

In [45]:
rpbox

<tf.Tensor: shape=(128, 4), dtype=float32, numpy=
array([[-1.53232965e+01,  1.85822353e+01, -6.47718735e+01,
         3.01725159e+01],
       [ 4.49958992e+00,  1.61456413e+01,  1.20101337e+01,
        -2.58533592e+01],
       [ 5.27767992e+00,  2.28491039e+01,  9.12208557e+00,
        -1.71123066e+01],
       [-6.59737885e-01,  1.40798702e+01,  8.64930725e+00,
        -2.41604137e+01],
       [ 8.02763224e-01,  2.25022888e+01,  5.21410036e+00,
        -1.14673538e+01],
       [ 4.57173347e+00,  2.12132034e+01,  1.06718044e+01,
        -1.56425714e+00],
       [-4.58380604e+00,  1.25502472e+01,  4.99760437e+00,
        -1.61065998e+01],
       [-2.20352411e+00,  2.22810440e+01,  1.22726977e+00,
         1.36037910e+00],
       [ 5.03216696e+00,  2.12393665e+01,  6.82959270e+00,
         1.11996050e+01],
       [-3.98395920e+00,  2.25362444e+00,  9.38191891e+00,
        -1.06109552e+01],
       [-1.00711288e+01,  1.14771023e+01,  4.24704170e+00,
        -1.60390425e+00],
       [-7.6752

In [219]:
def draw_nms(ground_truth_row, anchor_boxes, score, max_output_size=5):
    colors = {k: tuple(map(int, np.random.randint(0, 255, 3))) for k in range(max_output_size)}
    x1 = int(ground_truth_row['x_min'])
    x2 = int(ground_truth_row['x_max'])
    y1 = int(ground_truth_row['y_min'])
    y2 = int(ground_truth_row['y_max'])
    
    image = cv2.imread(f'./res/train_imgs/{ground_truth_row["image"]}', cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (800, 800))
    cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), thickness=2)

    GT = ground_truth_row[['x', 'y', 'w', 'h']]
    
    selected_indices = tf.image.non_max_suppression(anchor_boxes, score, max_output_size=max_output_size, score_threshold=0.01)
    anchor_boxes = tf.gather(anchor_boxes, selected_indices)
    
    for i, anchor_box in enumerate(anchor_boxes):
        anchor_box = anchor_to_coordinate(anchor_box.numpy())
        cv2.rectangle(
            image, 
            (int(anchor_box[0]), int(anchor_box[2])), (int(anchor_box[1]), int(anchor_box[3])), 
            colors.get(i), 
            thickness=2
        )

    fig, ax = plt.subplots(dpi=200)
    ax.imshow(image)
    ax.axis('off')
    plt.show()

In [510]:
# draw_nms(ground_truth.iloc[0], rp, score, max_output_size=10000)

## Detector

### Non-Maximum Suppression

In [452]:
ious.shape

(50, 44210)

In [118]:
iou = []
qq = np.array(GT)[0]
for box in valid_anchor_boxes:
    iou.append(IoU(qq, box))
iou = np.array(iou)

In [123]:
np.array(GT)[0]

array([1045.  ,  569.17,  213.6 ,  610.74])

In [119]:
selected_indices = tf.image.non_max_suppression(valid_anchor_boxes, iou, max_output_size=128, score_threshold=0.01)
# tf.gather(valid_anchor_boxes, selected_indices)

In [120]:
for i, j in zip(iou[np.array(selected_indices)][:5], tf.gather(valid_anchor_boxes, selected_indices)[:5]):
    print(f'{i}, \t{j}{i}')

0.5086602644910649, 	[1032.          456.          181.01933598  362.03867197]0.5086602644910649
0.5086602644910649, 	[1032.          552.          181.01933598  362.03867197]0.5086602644910649
0.5023728653744052, 	[984.         520.         362.03867197 724.07734394]0.5023728653744052
0.5023728653744052, 	[1064.          616.          362.03867197  724.07734394]0.5023728653744052
0.4567209359713382, 	[984.         664.         362.03867197 724.07734394]0.4567209359713382


In [115]:
def draw_nms(ground_truth_row, anchor_boxes, score, max_output_size=5):
    colors = {k: tuple(map(int, np.random.randint(0, 255, 3))) for k in range(max_output_size)}
    x1 = int(ground_truth_row['x_min'])
    x2 = int(ground_truth_row['x_max'])
    y1 = int(ground_truth_row['y_min'])
    y2 = int(ground_truth_row['y_max'])
    
    image = cv2.imread(f'./res/train_imgs/{ground_truth_row["image"]}', cv2.COLOR_BGR2RGB)
    cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), thickness=4)

    GT = ground_truth_row[['x', 'y', 'w', 'h']]
    iou = []
    for box in anchor_boxes:
        iou.append(IoU(GT, box))
    iou = np.array(iou)
    
    selected_indices = tf.image.non_max_suppression(anchor_boxes, iou, max_output_size=max_output_size, score_threshold=0.01)
    anchor_boxes = tf.gather(anchor_boxes, selected_indices)
    
    for i, anchor_box in enumerate(anchor_boxes):
        anchor_box = anchor_to_coordinate(anchor_box.numpy())
        cv2.rectangle(
            image, 
            (int(anchor_box[0]), int(anchor_box[2])), (int(anchor_box[1]), int(anchor_box[3])), 
            colors.get(i), 
            thickness=2
        )

    fig, ax = plt.subplots(dpi=200)
    ax.imshow(image)
    ax.axis('off')
    plt.show()

In [511]:
# GT = ground_truth.iloc[0][['x', 'y', 'w', 'h']]
# iou = []
# for box in anchor_boxes:
#     iou.append(IoU(GT, box))
# iou = np.array(iou)

# draw_nms(ground_truth.iloc[0], valid_anchor_boxes, max_output_size=5)

### Regional Interest Projection

In [None]:
class Regional_Interest_Projection(tf.keras.layers.Lyaer):
    def __init__(self, base_layer, regional_interest, **kwargs):
        super(Regional_Interest_Projection, self).__init__(**kwargs)
        self.base_layer = base_layer
        self.regional_interest = regional_interest
        
    def projection(self, x):
        pass
        
    def call(self, anchor_box):
        x = projection(anchor_box)
        return x

In [None]:
def projection(inputs):
    pass

def Regional_Interest_Projection(feature_map, regional_interest):
    return projection(feature_map, regional_interest)

### RoI pooling

In [106]:
class RoIPooling(tf.keras.layers.Layer):
    def __init__(self, pool_size, num_rois, **kwargs):
        super(RoIPooling, self).__init__(**kwargs)
        self.pool_size = pool_size
        self.num_rois = num_rois

    def build(self, input_shape):
        self.nb_channels = input_shape[0][3]   

    def compute_output_shape(self, input_shape):
        return (None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)

    def call(self, x):
        
        assert(len(x) == 2)

        # x[0] is image with shape (rows, cols, channels)
        img = x[0]

        # x[1] is roi with shape (num_rois,4) with ordering (x,y,w,h)
        rois = x[1]

        input_shape = tf.shape(img)

        outputs = []
        for roi_idx in range(self.num_rois):

            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]

            x = tf.keras.backend.cast(x, 'int32')
            y = tf.keras.backend.cast(y, 'int32')
            w = tf.keras.backend.cast(w, 'int32')
            h = tf.keras.backend.cast(h, 'int32')

            # Resized roi of the image to pooling size (7x7)
            rs = tf.image.resize(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
            outputs.append(rs)
                

        final_output = tf.keras.backend.concatenate(outputs, axis=0)

        # Reshape to (1, num_rois, pool_size, pool_size, nb_channels)
        final_output = tf.keras.backend.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))

        # permute_dimensions is similar to transpose
        final_output = tf.keras.backend.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output
    
    
    def get_config(self):
        config = {'pool_size': self.pool_size,
                  'num_rois': self.num_rois}
        base_config = super(RoiPoolingConv, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [27]:
class ROIPoolingLayer(tf.keras.layers.Layer):
    def __init__(self, h, w, **kwargs):
        super(RoIPooling, self).__init__(**kwargs)
        self.h = h
        self.w = w
        
    @staticmethod
    def _pool_rois(feature_map, rois, pooled_height, pooled_width):
        def curried_pool_roi(roi):
            return ROIPoolingLayer._pool_roi(feature_map, roi,
                                             pooled_height, pooled_width)

        pooled_areas = tf.map_fn(curried_pool_roi, rois, dtype=tf.float32)
        return pooled_areas

    @staticmethod
    def _pool_roi(feature_map, roi, pooled_height, pooled_width):

        feature_map_height = int(feature_map.shape[0])
        feature_map_width = int(feature_map.shape[1])

        h_start = tf.cast(feature_map_height * roi[0], 'int32')
        w_start = tf.cast(feature_map_width * roi[1], 'int32')
        h_end = tf.cast(feature_map_height * roi[2], 'int32')
        w_end = tf.cast(feature_map_width * roi[3], 'int32')

        region = feature_map[h_start:h_end, w_start:w_end, :]

        region_height = h_end - h_start
        region_width = w_end - w_start
        h_step = tf.cast(region_height / pooled_height, 'int32')
        w_step = tf.cast(region_width / pooled_width, 'int32')

        areas = [[(
            i*h_step,
            j*w_step,
            (i+1)*h_step if i+1 < pooled_height else region_height,
            (j+1)*w_step if j+1 < pooled_width else region_width
        )
            for j in range(pooled_width)]
            for i in range(pooled_height)]

        def pool_area(x):
            return tf.math.reduce_max(region[x[0]:x[2], x[1]:x[3], :], axis=[0, 1])

        pooled_features = tf.stack([[pool_area(x) for x in row] for row in areas])
        return pooled_features
        
    def call(self, x):
        def curried_pool_rois(x):
            return ROIPoolingLayer._pool_rois(x[0], x[1],
                                              self.pooled_height,
                                              self.pooled_width)

        pooled_areas = tf.map_fn(curried_pool_rois, x, dtype=tf.float32)
        return pooled_areas
        
    def compute_output_shape(self, input_shape):
        feature_map_shape, rois_shape = input_shape
        assert feature_map_shape[0] == rois_shape[0]
        batch_size = feature_map_shape[0]
        n_rois = rois_shape[1]
        n_channels = feature_map_shape[3]
        return (batch_size, n_rois, self.pooled_height,
                self.pooled_width, n_channels)

###  Classifier

In [None]:
class Classifie(tf.keras.layers.Layer):
    def __init__(self, base_layers, input_rois, **kwargs):
        super(Classifie, self).__init__(**kwargs)
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense()
        self.dense2 = tf.kersa.layers.Dense()
        
        self.dense3 = tf.keras.layers.Dense()
        
        self.dense4a = tf.keras.layers.Dense(activation='softmax')
        self.dense4b = tf.keras.layers.Dense()
        
    def call(self, inputs):
        x = self.flatten(inputs)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        
        softmax = self.dense4a(x)
        bbox_regressor = self.dense4b(x)

## Faster R-CNN

In [None]:
class Faster_RCNN(tf.kersa.Model):
    def __init__(self, train_step=0, **kwargs):
        super(Faster_RCNN, self).__init__(*kwargs)
        self.rpn = RPN()
        self.rois = RoI()
        self.detector = Detector()
        self.train_step = train_step

    def compile(self, optimizer, ...):
        super(Faster_RCNN, self).compile()
        self.optimizer = optimizer
        ...
        
        self.rpn_loss_tracker = tf.keras.metrics.Mean(name='rpn_loss')
        self.detector_loss_tracker = tf.keras.metrics.Mean(name='detector_loss')
        
    def RPN_Loss(self, z):
        pass

    def Detector_Loss(self, x, z):
        pass
        
    def train_step(self, data):
        batch_size = tf.shape(data)[0]

        with tf.GradientTape() as tape:
            generated_image = self.Generating(num=batch_size)
            discriminator_loss = self.Discriminator_Loss(data, generated_image)
        grad = tape.gradient(discriminator_loss, self.discriminator.trainable_weights)
        self.discriminator_optimizer.apply_gradients(zip(grad, self.discriminator.trainable_weights))

        with tf.GradientTape() as tape:
            generated_image = self.Generating(num=batch_size)
            generator_loss = self.Generator_Loss(generated_image)
        grad = tape.gradient(generator_loss, self.generator.trainable_weights)
        self.generator_optimizer.apply_gradients(zip(grad, self.generator.trainable_weights))

        self.generator_loss_tracker.update_state(generator_loss)
        self.discriminator_loss_tracker.update_state(discriminator_loss)

        return {
            'discriminator_loss': self.discriminator_loss_tracker.result(),
            'generator_loss' : self.generator_loss_tracker.result()
        }

## Loss

### classification Loss

### bounding box regression Loss

In [260]:
def rpn_loss_regr_fixed_num(y_true, y_pred):

        # x is the difference between true value and predicted vaue
        x = y_true[:, :, :, 4 * num_anchors:] - y_pred

        # absolute value of x
        x_abs = K.abs(x)

        # If x_abs &lt;= 1.0, x_bool = 1
        x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32)

        return lambda_rpn_regr * K.sum(
            y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])


def rpn_loss_regr(num_anchors):
    """Loss function for rpn regression
    Args:
        num_anchors: number of anchors (9 in here)
    Returns:
        Smooth L1 loss function 
                           0.5*x*x (if x_abs &lt; 1)
                           x_abx - 0.5 (otherwise)
    """
    return rpn_loss_regr_fixed_num
  
def rpn_loss_cls_fixed_num(y_true, y_pred):

            return lambda_rpn_class * K.sum(y_true[:, :, :, :num_anchors] * K.binary_crossentropy(y_pred[:, :, :, :], y_true[:, :, :, num_anchors:])) / K.sum(epsilon + y_true[:, :, :, :num_anchors])


def rpn_loss_cls(num_anchors):
    """Loss function for rpn classification
    Args:
        num_anchors: number of anchors (9 in here)
        y_true[:, :, :, :9]: [0,1,0,0,0,0,0,1,0] means only the second and the eighth box is valid which contains pos or neg anchor =&gt; isValid
        y_true[:, :, :, 9:]: [0,1,0,0,0,0,0,0,0] means the second box is pos and eighth box is negative
    Returns:
        lambda * sum((binary_crossentropy(isValid*y_pred,y_true))) / N
    """
    

    return rpn_loss_cls_fixed_num

tqdm.std.tqdm