In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.initializers import RandomNormal, Constant
from tensorflow.keras.layers import (Input,
                                     Conv2D, 
                                     BatchNormalization, 
                                     ReLU, 
                                     UpSampling2D,
                                     Add)

In [2]:
def conv_block(input_tensor=None,
               filters=None,
               kernel_size=None,
               strides=1,
               padding='same',
               kernel_init='he_normal',
               bias_init='zeros',
               bn_act=True,
               name_prefix=None):
    
    _x = Conv2D(filters=filters, kernel_size=kernel_size,
                padding=padding, strides=strides,
                kernel_initializer=kernel_init,
                bias_initializer=bias_init,
                name='{}_conv_{}x{}'.format(name_prefix,
                                            kernel_size,
                                            kernel_size))(input_tensor)
    if bn_act:
        _x = BatchNormalization(
            name='{}_bn'.format(name_prefix))(_x)
        _x = ReLU(name='{}_relu'.format(name_prefix))(_x)
    return _x

In [6]:
class FCOS:
    def __init__(self, config):
        self._validate_config(config)
        for attr in config:
            setattr(self, attr, config[attr])
        self._build_fpn()
        self._build_model()

    def _validate_config(self, config):
        attr_list = [
            'mode',
            'distribute_strategy',
            'image_height',
            'image_width',
            'num_classes',
            'dataset',
            'epochs',
            'learning_rate',
            'model_dir',
            'tensorboard_log_dir'
        ]
        for attr in attr_list:
            assert attr in config, 'Missing {} in config'.format(attr)

    def _build_fpn(self):
        '''
            From the FPN paper, "To start the iteration, we simply attach a
            1×1 convolutional layer on C5 to produce the coarsest resolution map.
            Finally, we append a 3×3 convolution on each merged map to generate
            the final feature map, which is to reduce the aliasing effect of
            upsampling. This final set of feature maps is called
            {P2, P3, P4, P5}, corresponding to {C2, C3, C4, C5} that are
            respectively of the same spatial sizes".
            From the FCOS paper, "P6 and P7 are produced by applying one
            convolutional layer with the stride being 2 on P5 and P6, respectively".
        '''
        with self.distribute_strategy.scope():
            print('****Building FPN')
            self._backbone = tf.keras.applications.ResNet50V2(
                input_shape=[self.image_height, self.image_width, 3],
                weights='imagenet',
                include_top=False)
            C5 = self._backbone.get_layer('post_relu').output
            C4 = self._backbone.get_layer('conv4_block6_1_relu').output
            C3 = self._backbone.get_layer('conv3_block4_1_relu').output

            M5 = conv_block(C5, 256, 1, bn_act=False, name_prefix='C5')
            P5 = conv_block(M5, 256, 3, bn_act=False, name_prefix='P5')
            M5_upsampled = UpSampling2D(size=(2, 2),
                                        interpolation='nearest',
                                        name='M5_upsampled')(M5)

            M4 = conv_block(C4, 256, 1, bn_act=False, name_prefix='C4')
            M4 = tf.keras.layers.Add(name='M4_M5_add')([M4, M5_upsampled])
            P4 = conv_block(M4, 256, 3, bn_act=False, name_prefix='P4')
            M4_upsampled = UpSampling2D(size=(2, 2),
                                        interpolation='nearest',
                                        name='M4_upsampled')(M4)

            M3 = conv_block(C3, 256, 1, bn_act=False, name_prefix='C3')
            P3 = Add(name='M3_M4_add')([M3, M4_upsampled])
            P3 = conv_block(P3, 256, 3, bn_act=False, name_prefix='P3')

            P6 = conv_block(P5, 256, 3, 2, bn_act=False, name_prefix='P6')
            P6_relu = ReLU(name='P6_relu')(P6)
            P7 = conv_block(P6_relu, 256, 3, 2, bn_act=False, name_prefix='P7')

            self._pyramid_features = {
                'P3': P3,
                'P4': P4,
                'P5': P5,
                'P6': P6,
                'P7': P7
            }

    def _get_classification_head(self, p=0.01):
        kernel_init = RandomNormal(0.0, 0.01)
        bias_init = Constant(-np.log((1 - p) / p))

        input_layer = Input(shape=[None, None, 256])
        x = input_layer

        for i in range(4):
            x = conv_block(x, 256, 3, kernel_init=kernel_init,
                           name_prefix='c_head_{}'.format(i))
        classification_logits = conv_block(x, self.num_classes,
                                           3, kernel_init=kernel_init,
                                           bias_init=bias_init, bn_act=False,
                                           name_prefix='cls_logits')
        centerness_logits = conv_block(x, 1, 3,
                                       kernel_init=kernel_init, bn_act=False,
                                       name_prefix='ctr_logits')
        outputs = [classification_logits, centerness_logits]
        return tf.keras.Model(inputs=[input_layer],
                              outputs=[outputs],
                              name='classification_head')

    def _get_regression_head(self):
        kernel_init = RandomNormal(0.0, 0.01)
        input_layer = Input(shape=[None, None, 256])
        x = input_layer

        for i in range(4):
            x = conv_block(x, 256, 3, kernel_init=kernel_init,
                           name_prefix='r_head_{}'.format(i))
        regression_logits = conv_block(x, 4, 3, kernel_init=kernel_init,
                                       bn_act=False, name_prefix='reg_logits')
        return tf.keras.Model(inputs=[input_layer],
                              outputs=[regression_logits],
                              name='regression_head')

    def _build_model(self):
        with self.distribute_strategy.scope():
            print('****Building FCOS')
            self._classification_head = self._get_classification_head()
            self._regression_head = self._get_regression_head()
            
            self._classification_logits = []
            self._centerness_logits = []
            self._regression_logits = []
            
            for i in range(3, 8):
                feature = self._pyramid_features['P{}'.format(i)]
                _cls_head_logits = self._classification_head(feature)
                self._classification_logits.append(_cls_head_logits[0][0])
                self._centerness_logits.append(_cls_head_logits[0][1])
                self._regression_logits.append(self._regression_head(feature))
                
            _image_input = self._backbone.input
            outputs = [self._classification_logits,
                       self._centerness_logits, 
                       self._regression_logits]
            self.model = tf.keras.Model(inputs=[_image_input], outputs=outputs, name='FCOS')

In [7]:
config = {
    'mode':'train',
    'distribute_strategy':tf.distribute.OneDeviceStrategy(device='/cpu:0'),
    'image_height':800,
    'image_width':1024,
    'num_classes':80,
    'dataset':None,
    'epochs':250,
    'learning_rate':1e-4,
    'model_dir':'model_files',
    'tensorboard_log_dir':'logs'
}

In [8]:
fcos = FCOS(config)

****Building FPN
****Building FCOS


In [10]:
fcos.model.outputs

[<tf.Tensor 'classification_head/Identity:0' shape=(None, 100, 128, 80) dtype=float32>,
 <tf.Tensor 'classification_head_1/Identity:0' shape=(None, 50, 64, 80) dtype=float32>,
 <tf.Tensor 'classification_head_2/Identity:0' shape=(None, 25, 32, 80) dtype=float32>,
 <tf.Tensor 'classification_head_3/Identity:0' shape=(None, 13, 16, 80) dtype=float32>,
 <tf.Tensor 'classification_head_4/Identity:0' shape=(None, 7, 8, 80) dtype=float32>,
 <tf.Tensor 'classification_head/Identity_1:0' shape=(None, 100, 128, 1) dtype=float32>,
 <tf.Tensor 'classification_head_1/Identity_1:0' shape=(None, 50, 64, 1) dtype=float32>,
 <tf.Tensor 'classification_head_2/Identity_1:0' shape=(None, 25, 32, 1) dtype=float32>,
 <tf.Tensor 'classification_head_3/Identity_1:0' shape=(None, 13, 16, 1) dtype=float32>,
 <tf.Tensor 'classification_head_4/Identity_1:0' shape=(None, 7, 8, 1) dtype=float32>,
 <tf.Tensor 'regression_head/Identity:0' shape=(None, 100, 128, 4) dtype=float32>,
 <tf.Tensor 'regression_head_1/Ident

In [12]:
fcos._regression_head.summary(), fcos._classification_head.summary()

Model: "regression_head"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, None, None, 256)] 0         
_________________________________________________________________
r_head_0_conv_3x3 (Conv2D)   multiple                  590080    
_________________________________________________________________
r_head_0_bn (BatchNormalizat multiple                  1024      
_________________________________________________________________
r_head_0_relu (ReLU)         multiple                  0         
_________________________________________________________________
r_head_1_conv_3x3 (Conv2D)   multiple                  590080    
_________________________________________________________________
r_head_1_bn (BatchNormalizat multiple                  1024      
_________________________________________________________________
r_head_1_relu (ReLU)         multiple              

(None, None)

In [13]:
fcos._classification_logits

[<tf.Tensor 'classification_head/Identity:0' shape=(None, 100, 128, 80) dtype=float32>,
 <tf.Tensor 'classification_head_1/Identity:0' shape=(None, 50, 64, 80) dtype=float32>,
 <tf.Tensor 'classification_head_2/Identity:0' shape=(None, 25, 32, 80) dtype=float32>,
 <tf.Tensor 'classification_head_3/Identity:0' shape=(None, 13, 16, 80) dtype=float32>,
 <tf.Tensor 'classification_head_4/Identity:0' shape=(None, 7, 8, 80) dtype=float32>]

In [14]:
fcos._centerness_logits

[<tf.Tensor 'classification_head/Identity_1:0' shape=(None, 100, 128, 1) dtype=float32>,
 <tf.Tensor 'classification_head_1/Identity_1:0' shape=(None, 50, 64, 1) dtype=float32>,
 <tf.Tensor 'classification_head_2/Identity_1:0' shape=(None, 25, 32, 1) dtype=float32>,
 <tf.Tensor 'classification_head_3/Identity_1:0' shape=(None, 13, 16, 1) dtype=float32>,
 <tf.Tensor 'classification_head_4/Identity_1:0' shape=(None, 7, 8, 1) dtype=float32>]

In [15]:
fcos._regression_logits

[<tf.Tensor 'regression_head/Identity:0' shape=(None, 100, 128, 4) dtype=float32>,
 <tf.Tensor 'regression_head_1/Identity:0' shape=(None, 50, 64, 4) dtype=float32>,
 <tf.Tensor 'regression_head_2/Identity:0' shape=(None, 25, 32, 4) dtype=float32>,
 <tf.Tensor 'regression_head_3/Identity:0' shape=(None, 13, 16, 4) dtype=float32>,
 <tf.Tensor 'regression_head_4/Identity:0' shape=(None, 7, 8, 4) dtype=float32>]

In [10]:
model._pyramid_features

{'P3': <tf.Tensor 'P3_conv_3x3/Identity:0' shape=(None, 100, 128, 256) dtype=float32>,
 'P4': <tf.Tensor 'P4_conv_3x3/Identity:0' shape=(None, 50, 64, 256) dtype=float32>,
 'P5': <tf.Tensor 'P5_conv_3x3/Identity:0' shape=(None, 25, 32, 256) dtype=float32>,
 'P6': <tf.Tensor 'P6_conv_3x3/Identity:0' shape=(None, 13, 16, 256) dtype=float32>,
 'P7': <tf.Tensor 'P7_conv_3x3/Identity:0' shape=(None, 7, 8, 256) dtype=float32>}