# FASTER R-CNN

In [8]:
# imports
import numpy as np
import tensorflow as tf

In [11]:
#Feature Extraction
# create random image
image = np.zeros((1,3,800,800))
#change type to float
image = image.astype(np.float32)
# convert to tensor
image = tf.convert_to_tensor(image)

In [28]:
# Using just tensorflow to create generic data of ...
# image
image = tf.zeros([1,800,800,3], tf.float32)
# bbox
bbox = tf.constant([[20, 30, 400, 500], [300, 400, 500, 600]])
# labels for each bbox
labels = tf.constant([6,8]) 
sub_sample = 16

The VGG16 network is used as a feature extraction module here, This acts as a backbone for both the RPN network and Fast_R-CNN network. We need to make a few changes to the VGG network inorder to make this work. Since the input of the network is 800, the output of the feature extraction module should have a feature map size of (800//16). So we need to check where the VGG16 module is achieving this feature map size and trim the network till der. This can be done in the following way.

#### Pytorch implementation:
https://medium.com/@fractaldle/guide-to-build-faster-rcnn-in-pytorch-95b10c273439

## VGG16  import

In [63]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
import tensorflow.keras.layers as layers

In [23]:
# The default input size for this model is 224x224.
vgg = VGG16(weights='imagenet', include_top=False, input_tensor=layers.Input(shape=[800, 800, 3]))

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [24]:
vgg.trainable = False

In [59]:
#Show the layers
vgg.layers
# or uncomment the next line
#vgg.summary()

[<tensorflow.python.keras.engine.input_layer.InputLayer at 0x1ee5787d388>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x1ee5787dd08>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x1ee57848dc8>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x1ee579ea1c8>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x1ee579df608>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x1ee579f3148>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x1ee579e6ac8>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x1ee57a06408>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x1ee57a15d48>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x1ee57a17a48>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x1ee57a21248>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x1ee57a2b388>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x1ee57a34c88>,
 <tensorflow.python.keras.layers.convolutional.Co

In [46]:
#how to pass a input test tensor through the layers without compile the model?
im = image
fe_layers = vgg.layers
l_c = 0
req_layers = []
print("size of vgg feature layers: ", len(fe_layers))
# Use CPU to test the convolutions
with tf.device('CPU:0'):
    for i in fe_layers:
        im = i(im)
        if im.shape[1] < 800//16:
            print('brake')
            break
        req_layers.append(i)
        l_c += 1
        out_channels = im.shape
print('shape of ouput: ', out_channels)
print('number of layers needed: ', l_c)

size of vgg feature layers:  19
brake
shape of ouput:  (1, 50, 50, 512)
number of layers needed:  18


In [47]:
# print the required layers
print(req_layers)

[<tensorflow.python.keras.engine.input_layer.InputLayer object at 0x000001EE5787D388>, <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001EE5787DD08>, <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001EE57848DC8>, <tensorflow.python.keras.layers.pooling.MaxPooling2D object at 0x000001EE579EA1C8>, <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001EE579DF608>, <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001EE579F3148>, <tensorflow.python.keras.layers.pooling.MaxPooling2D object at 0x000001EE579E6AC8>, <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001EE57A06408>, <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001EE57A15D48>, <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001EE57A17A48>, <tensorflow.python.keras.layers.pooling.MaxPooling2D object at 0x000001EE57A21248>, <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001EE

In [60]:
# create the bakcbone for fast r-cnn
input_fe = layers.Input(shape=[800, 800, 3])
fe_extractor = req_layers[1](input_fe)
for l in range(2,len(req_layers)):
    fe_extractor = req_layers[l](fe_extractor)

In [70]:
# Test feature extractor model output
fe_model = Model(inputs=input_fe, outputs=fe_extractor)
im = image
with tf.device('CPU:0'):
    im = fe_model(im)
print("shape of output: ",im.shape)
del(fe_model)

# Input = input_fe,
# Output of extractor feature = fe_extractor

shape of output:  (1, 50, 50, 512)


## Anchor boxes

We will use anchor_scales of 8, 16, 32, ratio of 0.5, 1, 2 and sub sampling of 16 (Since we have pooled our image from 800 px to 50px). Now every pixel in the output feature map maps to corresponding 16 * 16 pixels in the image.

At each pixel location on the feature map, We need to generate 9 anchor boxes (number of anchor_scales and number of ratios) and each anchor box will have ‘y1’, ‘x1’, ‘y2’, ‘x2’. So at each location anchor will have a shape of (9, 4). 

In [75]:
# As an example will be created 9 anchor boxes space for 'y1', 'x1', 'y2', 'x2'
ratios = [0.5, 1, 2]
anchor_scales = [8, 16, 32]
anchor_base = np.zeros((len(ratios)*len(anchor_scales), 4), dtype=np.float32)

print(anchor_base)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [74]:
# fill the anchor axis values for the 9 anchor boxes of the respective size and ratio
sub_sample = 16
ctr_y = sub_sample / 2.
ctr_x = sub_sample / 2.

print('center of the sample anchor boxes: yc = {}, xc = {}'.format(ctr_y, ctr_x))
for i in range(len(ratios)):
    for j in range(len(anchor_scales)):
        h = sub_sample * anchor_scales[j] * np.sqrt(ratios[i])
        



center: yc = 8.0, xc = 8.0
