In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.initializers import RandomNormal, Constant
from tensorflow.keras.layers import (Input,
                                     Conv2D, 
                                     Concatenate,
                                     BatchNormalization,
                                     Lambda,
                                     ReLU,
                                     Reshape,
                                     Add)

In [2]:
def conv_block(input_tensor=None,
               filters=None,
               kernel_size=None,
               strides=1,
               padding='same',
               kernel_init='he_normal',
               bias_init='zeros',
               bn_act=True,
               name_prefix=None):
    
    _x = Conv2D(filters=filters, kernel_size=kernel_size,
                padding=padding, strides=strides,
                kernel_initializer=kernel_init,
                bias_initializer=bias_init,
                name='{}_conv_{}x{}'.format(name_prefix,
                                            kernel_size,
                                            kernel_size))(input_tensor)
    if bn_act:
        _x = BatchNormalization(
            name='{}_bn'.format(name_prefix))(_x)
        _x = ReLU(name='{}_relu'.format(name_prefix))(_x)
    return _x


def upsample_like(input_tensor, target_tensor, name=None):
    _, fh, fw, _ = target_tensor.shape
    _upsampled_tensor = tf.image.resize(input_tensor,
                                        size=[fh, fw],
                                        method='nearest', 
                                        name=name)
    return _upsampled_tensor

In [3]:
class FCOS:
    def __init__(self, config):
        self._validate_config(config)
        for attr in config:
            setattr(self, attr, config[attr])
        self._build_fpn()
        self._build_model()

    def _validate_config(self, config):
        attr_list = [
            'mode',
            'distribute_strategy',
            'image_height',
            'image_width',
            'num_classes',
            'dataset',
            'epochs',
            'learning_rate',
            'model_dir',
            'tensorboard_log_dir'
        ]
        for attr in attr_list:
            assert attr in config, 'Missing {} in config'.format(attr)

    def _build_fpn(self):
        '''
            From the FPN paper, "To start the iteration, we simply attach a
            1×1 convolutional layer on C5 to produce the coarsest resolution map.
            Finally, we append a 3×3 convolution on each merged map to generate
            the final feature map, which is to reduce the aliasing effect of
            upsampling. This final set of feature maps is called
            {P2, P3, P4, P5}, corresponding to {C2, C3, C4, C5} that are
            respectively of the same spatial sizes".
            From the FCOS paper, "P6 and P7 are produced by applying one
            convolutional layer with the stride being 2 on P5 and P6, respectively".
        '''
        with self.distribute_strategy.scope():
            print('****Building FPN')
            self._backbone = tf.keras.applications.ResNet50V2(
                input_shape=[self.image_height, self.image_width, 3],
                weights='imagenet',
                include_top=False)
            C5 = self._backbone.get_layer('post_relu').output
            C4 = self._backbone.get_layer('conv4_block6_1_relu').output
            C3 = self._backbone.get_layer('conv3_block4_1_relu').output

            M5 = conv_block(C5, 256, 1, bn_act=False, name_prefix='C5')
            P5 = conv_block(M5, 256, 3, bn_act=False, name_prefix='P5')
            M5_upsampled = upsample_like(M5, C4, name='M5_upsampled')

            M4 = conv_block(C4, 256, 1, bn_act=False, name_prefix='C4')
            M4 = tf.keras.layers.Add(name='M4_M5_add')([M4, M5_upsampled])
            P4 = conv_block(M4, 256, 3, bn_act=False, name_prefix='P4')
            M4_upsampled = upsample_like(M4, C3, name='M4_upsampled')

            M3 = conv_block(C3, 256, 1, bn_act=False, name_prefix='C3')
            P3 = Add(name='M3_M4_add')([M3, M4_upsampled])
            P3 = conv_block(P3, 256, 3, bn_act=False, name_prefix='P3')

            P6 = conv_block(P5, 256, 3, 2, bn_act=False, name_prefix='P6')
            P6_relu = ReLU(name='P6_relu')(P6)
            P7 = conv_block(P6_relu, 256, 3, 2, bn_act=False, name_prefix='P7')

            self._pyramid_features = {
                'P3': P3,
                'P4': P4,
                'P5': P5,
                'P6': P6,
                'P7': P7
            }

    def _get_classification_head(self, p=0.01):
        kernel_init = RandomNormal(0.0, 0.01)
        bias_init = Constant(-np.log((1 - p) / p))

        input_layer = Input(shape=[None, None, 256])
        x = input_layer

        for i in range(4):
            x = conv_block(x, 256, 3, kernel_init=kernel_init,
                           name_prefix='c_head_{}'.format(i))
        classification_logits = conv_block(x, self.num_classes,
                                           3, kernel_init=kernel_init,
                                           bias_init=bias_init, bn_act=False,
                                           name_prefix='cls_logits')
        centerness_logits = conv_block(x, 1, 3,
                                       kernel_init=kernel_init, bn_act=False,
                                       name_prefix='ctr_logits')
        classification_logits = Reshape(
            target_shape=[-1, self.num_classes])(classification_logits)
        centerness_logits = Reshape(target_shape=[-1, 1])(centerness_logits)

        outputs = [classification_logits, centerness_logits]
        return tf.keras.Model(inputs=[input_layer],
                              outputs=[outputs],
                              name='classification_head')

    def _get_regression_head(self):
        '''
            From the FCOS paper, "since the regression targets are always positive
            we employ exp(x) to map any real number to (0, ∞) on the top of the
            regression branch"
        '''
        kernel_init = RandomNormal(0.0, 0.01)
        input_layer = Input(shape=[None, None, 256])
        x = input_layer

        for i in range(4):
            x = conv_block(x, 256, 3, kernel_init=kernel_init,
                           name_prefix='r_head_{}'.format(i))
        regression_logits = conv_block(x, 4, 3, kernel_init=kernel_init,
                                       bn_act=False, name_prefix='reg_logits')
        regression_logits = Lambda(
            tf.exp, name='reg_logits')(regression_logits)
        regression_logits = Reshape(target_shape=[-1, 4])(regression_logits)
        return tf.keras.Model(inputs=[input_layer],
                              outputs=[regression_logits],
                              name='regression_head')

    def _build_model(self):
        with self.distribute_strategy.scope():
            print('****Building FCOS')
            self._classification_head = self._get_classification_head()
            self._regression_head = self._get_regression_head()

            self._classification_logits = []
            self._centerness_logits = []
            self._regression_logits = []

            for i in range(3, 8):
                feature = self._pyramid_features['P{}'.format(i)]
                _cls_head_logits = self._classification_head(feature)
                _reg_head_logits = self._regression_head(feature)
                self._classification_logits.append(_cls_head_logits[0][0])
                self._centerness_logits.append(_cls_head_logits[0][1])
                self._regression_logits.append(_reg_head_logits)

            self._classification_logits = Concatenate(
                axis=1,
                name='classification_outputs')(self._classification_logits)
            self._centerness_logits = Concatenate(
                axis=1, name='centerness_outputs')(self._centerness_logits)
            self._regression_logits = Concatenate(
                axis=1, name='regression_outputs')(self._regression_logits)

            _image_input = self._backbone.input
            outputs = [self._classification_logits,
                       self._centerness_logits,
                       self._regression_logits]
            self.model = tf.keras.Model(
                inputs=[_image_input], outputs=outputs, name='FCOS')
            self.model.build([self.image_height, self.image_width, 3])

    @staticmethod
    def _classification_loss(labels, logits):
        # TODO
        pass

    @staticmethod
    def _centerness_loss(labels, logits):
        # TODO
        pass

    @staticmethod
    def _regression_loss(labels, logits):
        # TODO
        pass

    @staticmethod
    def _compute_total_loss(labels, logits):
        # TODO
        pass


In [4]:
config = {
    'mode':'train',
    'distribute_strategy':tf.distribute.OneDeviceStrategy(device='/cpu:0'),
    'image_height':720,
    'image_width':1280,
    'num_classes':80,
    'dataset':None,
    'epochs':250,
    'learning_rate':1e-4,
    'model_dir':'model_files',
    'tensorboard_log_dir':'logs'
}

In [5]:
fcos = FCOS(config)
dummy_tensor = tf.random.normal(shape=[1, 720, 1280, 3])
dummy_output = fcos.model(dummy_tensor, training=False)

****Building FPN
****Building FCOS


In [6]:
dummy_output

[<tf.Tensor: id=12616, shape=(1, 19220, 80), dtype=float32, numpy=
 array([[[-4.600041 , -4.5943484, -4.5941334, ..., -4.601539 ,
          -4.60411  , -4.5999866],
         [-4.600617 , -4.591286 , -4.587539 , ..., -4.605673 ,
          -4.594533 , -4.5912547],
         [-4.595271 , -4.585014 , -4.5905824, ..., -4.6086802,
          -4.5917597, -4.5997343],
         ...,
         [-4.5870996, -4.581433 , -4.61062  , ..., -4.6127114,
          -4.5829835, -4.584749 ],
         [-4.587243 , -4.592647 , -4.60513  , ..., -4.6059704,
          -4.58951  , -4.586633 ],
         [-4.588526 , -4.5929556, -4.6019373, ..., -4.5983734,
          -4.5911183, -4.586005 ]]], dtype=float32)>,
 <tf.Tensor: id=12614, shape=(1, 19220, 1), dtype=float32, numpy=
 array([[[0.00471219],
         [0.00884129],
         [0.00691961],
         ...,
         [0.00733578],
         [0.00571065],
         [0.00074757]]], dtype=float32)>,
 <tf.Tensor: id=12612, shape=(1, 19220, 4), dtype=float32, numpy=
 array([[

In [7]:
fcos.model.outputs

[<tf.Tensor 'classification_outputs/Identity:0' shape=(None, None, 80) dtype=float32>,
 <tf.Tensor 'centerness_outputs/Identity:0' shape=(None, None, 1) dtype=float32>,
 <tf.Tensor 'regression_outputs/Identity:0' shape=(None, None, 4) dtype=float32>]

In [8]:
fcos._regression_head.summary(), fcos._classification_head.summary()

Model: "regression_head"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, None, None, 256)] 0         
_________________________________________________________________
r_head_0_conv_3x3 (Conv2D)   multiple                  590080    
_________________________________________________________________
r_head_0_bn (BatchNormalizat multiple                  1024      
_________________________________________________________________
r_head_0_relu (ReLU)         multiple                  0         
_________________________________________________________________
r_head_1_conv_3x3 (Conv2D)   multiple                  590080    
_________________________________________________________________
r_head_1_bn (BatchNormalizat multiple                  1024      
_________________________________________________________________
r_head_1_relu (ReLU)         multiple              

(None, None)

In [12]:
fcos._pyramid_features

{'P3': <tf.Tensor 'P3_conv_3x3/Identity:0' shape=(None, 90, 160, 256) dtype=float32>,
 'P4': <tf.Tensor 'P4_conv_3x3/Identity:0' shape=(None, 45, 80, 256) dtype=float32>,
 'P5': <tf.Tensor 'P5_conv_3x3/Identity:0' shape=(None, 23, 40, 256) dtype=float32>,
 'P6': <tf.Tensor 'P6_conv_3x3/Identity:0' shape=(None, 12, 20, 256) dtype=float32>,
 'P7': <tf.Tensor 'P7_conv_3x3/Identity:0' shape=(None, 6, 10, 256) dtype=float32>}

In [12]:
fcos.model.summary()

Model: "FCOS"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 720, 1280, 3 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 726, 1286, 3) 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 360, 640, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 362, 642, 64) 0           conv1_conv[0][0]                 
_______________________________________________________________________________________________