In [1]:
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm import tqdm
from itertools import product

import os

In [2]:
df = pd.read_csv('./res/train_df.csv')
image = cv2.imread('./res/train_imgs/001-1-1-01-Z17_A-0000001.jpg', cv2.COLOR_BGR2RGB)

In [3]:
image.shape

(1080, 1920, 3)

In [4]:
df['x_min'] = df.iloc[:, 1:49:2].apply(lambda x: int(min(x)), axis=1)
df['x_max'] = df.iloc[:, 1:49:2].apply(lambda x: int(max(x)), axis=1)
df['y_min'] = df.iloc[:, 2:49:2].apply(lambda x: int(min(x)), axis=1)
df['y_max'] = df.iloc[:, 2:49:2].apply(lambda x: int(max(x)), axis=1)

In [5]:
df.head()

Unnamed: 0,image,nose_x,nose_y,left_eye_x,left_eye_y,right_eye_x,right_eye_y,left_ear_x,left_ear_y,right_ear_x,...,spine1(waist)_x,spine1(waist)_y,left_instep_x,left_instep_y,right_instep_x,right_instep_y,x_min,x_max,y_min,y_max
0,001-1-1-01-Z17_A-0000001.jpg,1046.389631,344.757881,1041.655294,329.820225,1059.429507,334.48423,1020.117796,338.890539,1048.0,...,1026.51577,514.05473,998.578836,826.718013,1063.204067,838.827465,956,1134,316,838
1,001-1-1-01-Z17_A-0000003.jpg,1069.850679,340.711494,1058.608552,324.59369,1075.242111,325.59369,1041.422997,331.694815,1065.593682,...,1058.766231,508.797029,1002.265676,699.062706,1066.376234,841.499445,974,1144,323,841
2,001-1-1-01-Z17_A-0000005.jpg,1084.475902,337.000008,1078.717997,323.757889,1095.648412,325.242119,1061.039884,329.351571,1086.461032,...,1052.844144,495.890539,989.437847,808.757889,1066.071417,841.749554,984,1163,319,841
3,001-1-1-01-Z17_A-0000007.jpg,1042.320047,361.452689,1037.907194,344.117804,1050.328382,353.913729,1016.844144,340.913737,1042.164191,...,990.375124,507.624866,1001.305177,829.233767,1159.516499,599.389997,941,1159,328,829
4,001-1-1-01-Z17_A-0000009.jpg,1058.046395,343.164191,1046.717997,331.703163,1058.13265,331.781079,1031.258806,338.59369,1049.81262,...,1034.391088,510.843791,998.625231,805.218921,1059.625956,839.765102,961,1132,331,839


# Faster R-CNN

## backbone

### ResNet

In [6]:
# resnet = tf.keras.applications.resnet

### VGG

In [7]:
inputs = tf.keras.layers.Input(shape=(image.shape))
vgg = tf.keras.applications.VGG19(
    include_top=False,
    weights='imagenet', 
    input_tensor=inputs
)

In [8]:
feature_map = vgg(tf.expand_dims(tf.ones_like(image), 0))

In [9]:
vgg.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1080, 1920, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 1080, 1920, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 1080, 1920, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 540, 960, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 540, 960, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 540, 960, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 270, 480, 128)     0     

## Region Proposal Network

### Anchor boxes

In [10]:
def anchor_box_generator(x, y):
    ratio = [(1, 1), (2, 1), (1, 2)]
    scales = [64, 128, 256]
    anchor_boxes = []
    for scale in scales:
        for w, h in ratio:
            w *= scale
            h *= scale
            
            x1 = x - w/2
            x2 = x + w/2
            
            y1 = y - h/2
            y2 = y + h/2
            anchor_boxes.append([x1, x2, y1, y2])
        
    return anchor_boxes

In [11]:
def Anchor_Boxes(w=33, h=60, img_shape=(1080, 1920)):
    anchor_boxes = []
    for x in range(image.shape[0]//w//2, img_shape[0], image.shape[0]//w):
        for y in range(image.shape[1]//h//2, img_shape[1], image.shape[1]//h):
            anchor_boxes.append(anchor_box_generator(x, y))
    return np.array(anchor_boxes).reshape(-1, 4)

In [12]:
anchor_boxes = Anchor_Boxes(w=image.shape[0]//2**4, h=image.shape[1]//2**4, img_shape=image.shape)

In [13]:
ground_truth = df.iloc[:,:1].copy() 
ground_truth['x_min'] = df['x_min'] - (df['x_max'] - df['x_min'])*.05
ground_truth['x_max'] = df['x_max'] + (df['x_max'] - df['x_min'])*.05
ground_truth['y_min'] = df['y_min'] - (df['y_max'] - df['y_min'])*.05
ground_truth['y_max'] = df['y_max'] + (df['y_max'] - df['y_min'])*.05

In [14]:
ground_truth.tail()

Unnamed: 0,image,x_min,x_max,y_min,y_max
4190,642-2-4-31-Z148_E-0000023.jpg,590.95,1142.05,157.1,924.9
4191,642-2-4-31-Z148_E-0000025.jpg,590.95,1142.05,296.75,918.25
4192,642-2-4-31-Z148_E-0000027.jpg,590.95,1142.05,296.75,918.25
4193,642-2-4-31-Z148_E-0000029.jpg,590.95,1142.05,296.75,918.25
4194,642-2-4-31-Z148_E-0000031.jpg,589.9,1164.1,296.75,918.25


### IoU

In [15]:
def IoU(box1, box2, eps=1e-5):
    box1_area = (box1[1] - box1[0] + 1) * (box1[3] - box1[2] + 1)
    box2_area = (box2[1] - box2[0] + 1) * (box2[3] - box2[2] + 1)
    
    x1 = max(box1[0], box2[0])
    x2 = min(box1[1], box2[1])
    
    y1 = max(box1[2], box2[2])
    y2 = min(box1[3], box2[3])    
    
    h = max(0.0, y2 - y1 + 1)
    w = max(0.0, x2 - x1 + 1)
    
    if (w <= 0) or (h <= 0):
        return 0.0
    
    intersect = h * w
    union = box1_area + box2_area - intersect
    
    iou = intersect / (union + eps)
    return iou

In [17]:
# from joblib import Parallel, delayed

# def label_generator(gt, anchor_boxes):
#     label = np.empty(shape=(1, anchor_boxes.shape[0]))
#     max_iou = 0
#     for j in range(label.shape[1]):
#         iou = IoU(gt, anchor_boxes[j])
#         if iou >= 0.7:
#             label[0][j] = 1
#         elif iou < 0.3:
#             label[0][j] = -1
#         else:
#             label[0][j] = 0

#         if iou > max_iou:
#             tmp = j
#     label[0][tmp] = 1
#     return label.tolist()

# n_jobs=2
# parallel = Parallel(n_jobs=n_jobs)
# ground_truths = np.array(ground_truth.iloc[:,1:])

# label = parallel(
#     delayed(label_generator)(gt=gt, anchor_boxes=anchor_boxes) 
#     for gt in tqdm(ground_truths)
# )

# label = np.array(label).reshape(-1, 72360)

In [21]:
def label_generator(ground_truth, anchor_boxes):
    ground_truth = np.array(ground_truth.iloc[:,1:])
    label = np.empty(shape=(len(ground_truth), anchor_boxes.shape[0]))
    for i in tqdm(range(label.shape[0])):
        gt = ground_truth[i]
        max_iou = 0
        for j in range(label.shape[1]):
            iou = IoU(gt, anchor_boxes[j])
            if iou >= 0.7:
                label[i][j] = 1
            elif iou < 0.3:
                label[i][j] = 0
            else:
                label[i][j] = -1
                
            if iou > max_iou:
                tmp = i, j
                
        label[tmp[0]][tmp[1]] = 1
    return label    

In [22]:
labels = label_generator(ground_truth, anchor_boxes)

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:40<00:00,  1.23it/s]


In [24]:
labels

array([[-1., -1., -1., ..., -1., -1., -1.],
       [-1., -1., -1., ..., -1., -1., -1.],
       [-1., -1., -1., ..., -1., -1., -1.],
       ...,
       [-1., -1., -1., ..., -1., -1., -1.],
       [-1., -1., -1., ..., -1., -1., -1.],
       [-1., -1., -1., ..., -1., -1., -1.]])

### Region Proposal Network

In [33]:
class RPN(tf.keras.layers.Layer):
    def __init__(self, k=9, **kwargs):
        super(RPN, self).__init__(**kwargs)
        self.k = k
        self.window = tf.keras.layers.Conv2D(filters=256, kernel_size=3, strides=1, padding='same')
        self.bounding = tf.keras.layers.Conv2D(filters=self.k*4, kernel_size=1, activation='sigmoid')
        self.cls = tf.keras.layers.Conv2D(filters=self.k, kernel_size=1, activation='relu')
        
    def call(self, inputs):
        intermediate = self.window(inputs)
        cls_ = self.cls(intermediate)
        bounding_ = self.bounding(intermediate)
        return cls_, bounding_

In [34]:
rpn = RPN()

In [35]:
cls, bounding = rpn(feature_map)

In [39]:
cls.shape

TensorShape([1, 33, 60, 18])

In [40]:
bounding.shape

TensorShape([1, 33, 60, 36])

In [25]:
33* 60 * 9

17820

## Detector

### RoI pooling

In [105]:
tf.keras.backend

<module 'tensorflow.keras.backend' from 'C:\\Users\\kwon\\anaconda3\\lib\\site-packages\\tensorflow\\keras\\backend\\__init__.py'>

In [106]:
class RoIPooling(tf.keras.layers.Layer):
    def __init__(self, pool_size, num_rois, **kwargs):
        super(RoIPooling, self).__init__(**kwargs)
        self.pool_size = pool_size
        self.num_rois = num_rois

    def build(self, input_shape):
        self.nb_channels = input_shape[0][3]   

    def compute_output_shape(self, input_shape):
        return (None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)

    def call(self, x):
        
        assert(len(x) == 2)

        # x[0] is image with shape (rows, cols, channels)
        img = x[0]

        # x[1] is roi with shape (num_rois,4) with ordering (x,y,w,h)
        rois = x[1]

        input_shape = tf.shape(img)

        outputs = []
        for roi_idx in range(self.num_rois):

            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]

            x = tf.keras.backend.cast(x, 'int32')
            y = tf.keras.backend.cast(y, 'int32')
            w = tf.keras.backend.cast(w, 'int32')
            h = tf.keras.backend.cast(h, 'int32')

            # Resized roi of the image to pooling size (7x7)
            rs = tf.image.resize(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
            outputs.append(rs)
                

        final_output = tf.keras.backend.concatenate(outputs, axis=0)

        # Reshape to (1, num_rois, pool_size, pool_size, nb_channels)
        final_output = tf.keras.backend.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))

        # permute_dimensions is similar to transpose
        final_output = tf.keras.backend.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output
    
    
    def get_config(self):
        config = {'pool_size': self.pool_size,
                  'num_rois': self.num_rois}
        base_config = super(RoiPoolingConv, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [27]:
class ROIPoolingLayer(tf.keras.layers.Layer):
    def __init__(self, h, w, **kwargs):
        super(RoIPooling, self).__init__(**kwargs)
        self.h = h
        self.w = w
        
    @staticmethod
    def _pool_rois(feature_map, rois, pooled_height, pooled_width):
        def curried_pool_roi(roi):
            return ROIPoolingLayer._pool_roi(feature_map, roi,
                                             pooled_height, pooled_width)

        pooled_areas = tf.map_fn(curried_pool_roi, rois, dtype=tf.float32)
        return pooled_areas

    @staticmethod
    def _pool_roi(feature_map, roi, pooled_height, pooled_width):

        feature_map_height = int(feature_map.shape[0])
        feature_map_width = int(feature_map.shape[1])

        h_start = tf.cast(feature_map_height * roi[0], 'int32')
        w_start = tf.cast(feature_map_width * roi[1], 'int32')
        h_end = tf.cast(feature_map_height * roi[2], 'int32')
        w_end = tf.cast(feature_map_width * roi[3], 'int32')

        region = feature_map[h_start:h_end, w_start:w_end, :]

        region_height = h_end - h_start
        region_width = w_end - w_start
        h_step = tf.cast(region_height / pooled_height, 'int32')
        w_step = tf.cast(region_width / pooled_width, 'int32')

        areas = [[(
            i*h_step,
            j*w_step,
            (i+1)*h_step if i+1 < pooled_height else region_height,
            (j+1)*w_step if j+1 < pooled_width else region_width
        )
            for j in range(pooled_width)]
            for i in range(pooled_height)]

        def pool_area(x):
            return tf.math.reduce_max(region[x[0]:x[2], x[1]:x[3], :], axis=[0, 1])

        pooled_features = tf.stack([[pool_area(x) for x in row] for row in areas])
        return pooled_features
        
    def call(self, x):
        def curried_pool_rois(x):
            return ROIPoolingLayer._pool_rois(x[0], x[1],
                                              self.pooled_height,
                                              self.pooled_width)

        pooled_areas = tf.map_fn(curried_pool_rois, x, dtype=tf.float32)
        return pooled_areas
        
    def compute_output_shape(self, input_shape):
        feature_map_shape, rois_shape = input_shape
        assert feature_map_shape[0] == rois_shape[0]
        batch_size = feature_map_shape[0]
        n_rois = rois_shape[1]
        n_channels = feature_map_shape[3]
        return (batch_size, n_rois, self.pooled_height,
                self.pooled_width, n_channels)

### Non-Maximum Suppression

In [77]:
scores = [1.8, 0.4, 2, 5]
selected_indices = tf.image.non_max_suppression(ground_truths[:4], scores, max_output_size=100)
tf.gather(ground_truths[:4], selected_indices)

<tf.Tensor: shape=(1, 4), dtype=float64, numpy=array([[ 930.1 , 1169.9 ,  302.95,  854.05]])>

In [81]:
Non_Maximum_Suppression(ground_truths[:4], scores, iou_threshold=1)

array([[ 930, 1169,  302,  854],
       [ 975, 1171,  292,  867],
       [ 947, 1142,  289,  864],
       [ 965, 1152,  297,  866]])

### RoI Sampling

###  Classifier

In [None]:
classifier_layer

In [None]:
# inputs : RoI pooled fixed size feature map
# output : 

class Classifie(tf.keras.layers.Layer):
    def __init__(self, base_layers, input_rois, **kwargs):
        super(Classifie, self).__init__(**kwargs)
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense()
        self.dense2 = tf.kersa.layers.Dense()
        
        self.dense3 = tf.keras.layers.Dense()
        
        self.dense4a = tf.keras.layers.Dense(activation='softmax')
        self.dense4b = tf.keras.layers.Dense()
        
    def call(self, inputs):
        x = self.flatten(inputs)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        
        softmax = self.dense4a(x)
        bbox_regressor = self.dense4b(x)

## Faster R-CNN

In [None]:
class Faster_RCNN(tf.kersa.Model):
    def __init__(self, **kwargs):
        super(Faster_RCNN, self).__init__(*kwargs)
        self.rpn = RPN()
        self.rois = RoI()
        self.detector = Detector()

    def compile(self, optimizer, ...):
        super(Faster_RCNN, self).compile()
        self.optimizer = optimizer
        ...
        
        self.rpn_loss_tracker = tf.keras.metrics.Mean(name='rpn_loss')
        self.detector_loss_tracker = tf.keras.metrics.Mean(name='detector_loss')
        
    def RPN_Loss(self, z):
        pass

    def Detector_Loss(self, x, z):
        pass
        
    def train_step(self, data):
        batch_size = tf.shape(data)[0]

        with tf.GradientTape() as tape:
            generated_image = self.Generating(num=batch_size)
            discriminator_loss = self.Discriminator_Loss(data, generated_image)
        grad = tape.gradient(discriminator_loss, self.discriminator.trainable_weights)
        self.discriminator_optimizer.apply_gradients(zip(grad, self.discriminator.trainable_weights))

        with tf.GradientTape() as tape:
            generated_image = self.Generating(num=batch_size)
            generator_loss = self.Generator_Loss(generated_image)
        grad = tape.gradient(generator_loss, self.generator.trainable_weights)
        self.generator_optimizer.apply_gradients(zip(grad, self.generator.trainable_weights))

        self.generator_loss_tracker.update_state(generator_loss)
        self.discriminator_loss_tracker.update_state(discriminator_loss)

        return {
            'discriminator_loss': self.discriminator_loss_tracker.result(),
            'generator_loss' : self.generator_loss_tracker.result()
        }

## Loss

### classification Loss

### bounding box regression Loss

In [260]:
tqdm

tqdm.std.tqdm