In [1]:
import numpy as np
from pprint import pprint
import tensorflow as tf
from tensorflow.keras.initializers import RandomNormal, Constant
from tensorflow.keras.layers import (Input,
                                     Conv2D, 
                                     Concatenate,
                                     BatchNormalization,
                                     Lambda,
                                     ReLU,
                                     Reshape,
                                     Add)
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
from tensorflow.keras import backend as K
from tqdm.notebook import tqdm

print('TensorFlow:', tf.__version__)

TensorFlow: 2.0.0


In [2]:
def conv_block(input_tensor=None,
               filters=None,
               kernel_size=None,
               strides=1,
               padding='same',
               kernel_init='he_normal',
               bias_init='zeros',
               bn_act=True,
               name_prefix=None):
    
    _x = Conv2D(filters=filters, kernel_size=kernel_size,
                padding=padding, strides=strides,
                kernel_initializer=kernel_init,
                bias_initializer=bias_init,
                name='{}_conv_{}x{}'.format(name_prefix,
                                            kernel_size,
                                            kernel_size))(input_tensor)
    if bn_act:
        _x = BatchNormalization(
            name='{}_bn'.format(name_prefix))(_x)
        _x = ReLU(name='{}_relu'.format(name_prefix))(_x)
    return _x


def upsample_like(input_tensor, target_tensor, name=None):
    _, fh, fw, _ = target_tensor.shape
    _upsampled_tensor = tf.image.resize(input_tensor,
                                        size=[fh, fw],
                                        method='nearest', 
                                        name=name)
    return _upsampled_tensor



class Scale(tf.keras.layers.Layer):
    def __init__(self, init_value=1.0, **kwargs):
        super(Scale, self).__init__(**kwargs)
        self.init_value = init_value

    def build(self, input_shape):
        self.scale = \
            self.add_weight(name='scale',
                            shape=[1],
                            dtype=K.floatx(),
                            trainable=True,
                            initializer=Constant(value=self.init_value))

    def call(self, x):
        scaled_inputs = tf.multiply(self.scale, x)
        return tf.exp(scaled_inputs)

    def compute_output_shape(self, input_shape):
        return input_shape

    def get_config(self):
        config = super(Scale, self).get_config()
        return config

In [3]:
class FCOS:
    def __init__(self, config):
        self._validate_config(config)
        for attr in config:
            setattr(self, attr, config[attr])
        self._build_fpn()
        self._build_model()
        self._build_datasets()
        self._build_optimizer()
        self._build_callbacks()

    def _validate_config(self, config):
        attr_list = [
            'mode',
            'distribute_strategy',
            'image_height',
            'image_width',
            'num_classes',
            'data_dir',
            'dataset_fn',
            'batch_size',
            'epochs',
            'learning_rate',
            'model_dir',
            'tensorboard_log_dir'
        ]
        for attr in attr_list:
            assert attr in config, 'Missing {} in config'.format(attr)
        pprint('****Initializing FCOS with the following config')
        pprint(config)

    def _build_fpn(self):
        '''
            From the FPN paper, "To start the iteration, we simply attach a
            1×1 convolutional layer on C5 to produce the coarsest resolution
            map. Finally, we append a 3×3 convolution on each merged map to
            generate the final feature map, which is to reduce the aliasing
            effect of upsampling. This final set of feature maps is called
            {P2, P3, P4, P5}, corresponding to {C2, C3, C4, C5} that are
            respectively of the same spatial sizes".
            From the FCOS paper, "P6 and P7 are produced by applying one
            convolutional layer with the stride being 2 on P5 and P6,
            respectively".
        '''
        with self.distribute_strategy.scope():
            pprint('****Building FPN')
            self._backbone = tf.keras.applications.ResNet50V2(
                input_shape=[self.image_height, self.image_width, 3],
                weights='imagenet',
                include_top=False)
            C5 = self._backbone.get_layer('post_relu').output
            C4 = self._backbone.get_layer('conv4_block6_1_relu').output
            C3 = self._backbone.get_layer('conv3_block4_1_relu').output

            M5 = conv_block(C5, 256, 1, bn_act=False, name_prefix='C5')
            P5 = conv_block(M5, 256, 3, bn_act=False, name_prefix='P5')
            M5_upsampled = upsample_like(M5, C4, name='M5_upsampled')

            M4 = conv_block(C4, 256, 1, bn_act=False, name_prefix='C4')
            M4 = tf.keras.layers.Add(name='M4_M5_add')([M4, M5_upsampled])
            P4 = conv_block(M4, 256, 3, bn_act=False, name_prefix='P4')
            M4_upsampled = upsample_like(M4, C3, name='M4_upsampled')

            M3 = conv_block(C3, 256, 1, bn_act=False, name_prefix='C3')
            P3 = Add(name='M3_M4_add')([M3, M4_upsampled])
            P3 = conv_block(P3, 256, 3, bn_act=False, name_prefix='P3')

            P6 = conv_block(P5, 256, 3, 2, bn_act=False, name_prefix='P6')
            P6_relu = ReLU(name='P6_relu')(P6)
            P7 = conv_block(P6_relu, 256, 3, 2, bn_act=False, name_prefix='P7')

            self._pyramid_features = {
                'P3': P3,
                'P4': P4,
                'P5': P5,
                'P6': P6,
                'P7': P7
            }

    def _get_classification_head(self, p=0.01):
        kernel_init = RandomNormal(0.0, 0.01)
        bias_init = Constant(-np.log((1 - p) / p))

        input_layer = Input(shape=[None, None, 256])
        x = input_layer

        for i in range(4):
            x = conv_block(x, 256, 3, kernel_init=kernel_init,
                           bn_act=False, name_prefix='c_head_{}'.format(i))
        classification_logits = conv_block(x, self.num_classes,
                                           3, kernel_init=kernel_init,
                                           bias_init=bias_init, bn_act=False,
                                           name_prefix='cls_logits')
        centerness_logits = conv_block(x, 1, 3,
                                       kernel_init=kernel_init, bn_act=False,
                                       name_prefix='ctr_logits')
        classification_logits = Reshape(
            target_shape=[-1, self.num_classes])(classification_logits)
        centerness_logits = Reshape(target_shape=[-1, 1])(centerness_logits)

        outputs = [classification_logits, centerness_logits]
        return tf.keras.Model(inputs=[input_layer],
                              outputs=[outputs],
                              name='classification_head')

    def _get_regression_head(self):
        kernel_init = RandomNormal(0.0, 0.01)
        input_layer = Input(shape=[None, None, 256])
        x = input_layer

        for i in range(4):
            x = conv_block(x, 256, 3, kernel_init=kernel_init,
                           bn_act=False, name_prefix='r_head_{}'.format(i))
        regression_logits = conv_block(x, 4, 3, kernel_init=kernel_init,
                                       bn_act=False, name_prefix='reg_logits')
        regression_logits = Reshape(target_shape=[-1, 4])(regression_logits)
        return tf.keras.Model(inputs=[input_layer],
                              outputs=[regression_logits],
                              name='regression_head')

    def _build_model(self):
        with self.distribute_strategy.scope():
            pprint('****Building FCOS')
            self._classification_head = self._get_classification_head()
            self._regression_head = self._get_regression_head()

            self._classification_logits = []
            self._centerness_logits = []
            self._regression_logits = []

            for i in range(3, 8):
                feature = self._pyramid_features['P{}'.format(i)]
                _cls_head_logits = self._classification_head(feature)
                _reg_head_logits = self._regression_head(feature)
                _reg_head_logits = \
                    Scale(init_value=1.0,
                          name='P{}_reg_outputs'.format(i))(_reg_head_logits)

                self._classification_logits.append(_cls_head_logits[0][0])
                self._centerness_logits.append(_cls_head_logits[0][1])
                self._regression_logits.append(_reg_head_logits)

            self._classification_logits = Concatenate(
                axis=1,
                name='classification_outputs')(self._classification_logits)
            self._centerness_logits = Concatenate(
                axis=1, name='centerness_outputs')(self._centerness_logits)
            self._regression_logits = Concatenate(
                axis=1, name='regression_outputs')(self._regression_logits)

            _image_input = self._backbone.input
            outputs = [self._classification_logits,
                       self._centerness_logits,
                       self._regression_logits]
            self.model = tf.keras.Model(
                inputs=[_image_input], outputs=outputs, name='FCOS')
            self.model.build([self.image_height, self.image_width, 3])

    def _build_datasets(self):
        pprint('****Building Datasets')
        with self.distribute_strategy.scope():
            self.train_dataset, self.val_dataset, \
                num_train_images, num_val_images =  \
                self.dataset_fn(self.image_height,
                                self.image_width,
                                self.data_dir,
                                self.batch_size)

            self.train_steps = num_train_images // self.batch_size
            self.val_steps = num_val_images // self.batch_size

    def _build_callbacks(self):
        pprint('****Setting Up Callbacks')
        self.callbacks = [
            TensorBoard(log_dir=self.tensorboard_log_dir),
            ModelCheckpoint(filepath=self.model_dir + '/ckpt-{epoch:02d}',
                            monitor='val_loss',
                            save_weights_only=True,
                            save_best_only=True)
        ]

    def _build_optimizer(self):
        pprint('****Setting Up Optimizer')
        self.optimizer = tf.keras.optimizers.Adam(lr=self.learning_rate)

    def _classification_loss(self, alpha=0.25, gamma=2):
        # TODO
        #   a) Double check if tf.keras.Model.fit is handling
        #      loss scaling for distributed training if not
        #      use tf.nn.compute_average_loss fn
        @tf.function
        def focal_loss(y_true, y_pred):
            fg_mask = tf.cast(y_true != 0, dtype=tf.float32)
            y_true = tf.one_hot(
                tf.cast(y_true, dtype=tf.int32), depth=self.num_classes + 1)
            y_true = y_true[:, :, 1:]
            y_pred_ = tf.sigmoid(y_pred)

            at = alpha * y_true + (1 - y_true) * (1 - alpha)
            pt = y_true * y_pred_ + (1 - y_true) * (1 - y_pred_)
            f_loss = at * \
                tf.pow(1 - pt, gamma) * \
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=y_true, logits=y_pred)
            f_loss = tf.reduce_mean(f_loss, axis=2)
            f_loss = f_loss * fg_mask
            f_loss = tf.reduce_sum(f_loss, axis=1, keepdims=True)
            normalizer_value = tf.reduce_sum(fg_mask, axis=1, keepdims=True)
            f_loss = f_loss / normalizer_value
            return f_loss
        return focal_loss

    @tf.function
    def _centerness_loss(self, labels, logits):
        # TODO
        #   a) Double check if tf.keras.Model.fit is handling
        #      loss scaling for distributed training if not
        #      use tf.nn.compute_average_loss fn
        fg_mask = tf.cast(labels != 0, dtype=tf.float32)
        bce_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=labels, logits=logits)
        bce_loss = bce_loss * fg_mask
        bce_loss = tf.reduce_sum(bce_loss, axis=1)
        normalizer_value = tf.reduce_sum(fg_mask, axis=1)
        bce_loss = bce_loss / normalizer_value
        return bce_loss

    @tf.function
    def _regression_loss(self, labels, logits):
        # TODO
        #   a) Double check if tf.keras.Model.fit is handling
        #      loss scaling for distributed training if not
        #      use tf.nn.compute_average_loss fn
        fg_mask = tf.cast(labels != 0, dtype=tf.float32)
        boxes_true = tf.concat([
            self._centers - labels[:, :, :2],
            self._centers + labels[:, :, 2:]], axis=-1) * fg_mask

        boxes_pred = tf.concat([
            self._centers - logits[:, :, :2],
            self._centers + logits[:, :, 2:]], axis=-1) * fg_mask

        lu = tf.maximum(boxes_true[:, :, :2], boxes_pred[:, :, :2])
        rd = tf.minimum(boxes_true[:, :, 2:], boxes_pred[:, :, 2:])
        intersection = tf.maximum(0.0, rd - lu)
        intersection_area = intersection[:, :, 0] * intersection[:, :, 1]
        boxes_true_area = tf.reduce_prod(
            boxes_true[:, :, 2:] - boxes_true[:, :, :2], axis=2)
        boxes_pred_area = tf.reduce_prod(
            boxes_pred[:, :, 2:] - boxes_pred[:, :, :2], axis=2)
        union_area = tf.maximum(
            boxes_true_area + boxes_pred_area - intersection_area, 1e-10)
        iou = tf.clip_by_value(intersection_area / union_area, 0.0, 1.0)

        fg_mask = tf.reduce_sum(fg_mask, axis=2)
        fg_mask = tf.cast(fg_mask != 0, dtype=tf.float32)
        normalizer_value = tf.reduce_sum(fg_mask, axis=1, keepdims=True)

        bg_mask = (1 - fg_mask) * 1e-7
        iou_loss = iou + bg_mask
        iou_loss = -1 * tf.math.log(iou_loss)
        iou_loss = iou_loss * fg_mask
        iou_loss = tf.reduce_sum(iou_loss, axis=1, keepdims=True)
        iou_loss = iou_loss / normalizer_value
        return iou_loss

    def train(self):
        loss_dict = {
            'classification_outputs': self._classification_loss(alpha=0.25,
                                                                gamma=2),
            'centerness_outputs': self._centerness_loss,
            'regression_outputs': self._regression_loss
        }
        self._centers = get_all_centers(self.image_height, self.image_width)
        with self.distribute_strategy.scope():
            self.model.compile(optimizer=self.optimizer,
                               loss=loss_dict)
            self.model.fit(self.train_dataset,
                           epochs=self.epochs,
                           steps_per_epoch=self.training_steps,
                           validation_data=self.val_dataset,
                           validation_steps=self.val_steps,
                           validation_freq=2,
                           callbacks=self.callbacks)

In [4]:
@tf.function
def flip_data(image, boxes, w):
    if tf.random.uniform(()) > 0.5:
        image = tf.image.flip_left_right(image)
        boxes = tf.stack([
            w - boxes[:, 2],
            boxes[:, 1],
            w - boxes[:, 0],
            boxes[:, 3]
        ], axis=-1)
    return image, boxes


@tf.function
def random_jitter(image):
    # ToDo
    pass


def compute_area(boxes):
    h_ = boxes[:, 2] - boxes[:, 0]
    w_ = boxes[:, 3] - boxes[:, 1]
    return h_ * w_


def compute_feature_sizes(H, W):
    fm_sizes = []
    for i in range(3, 8):
        stride = 2.**i
        fm_sizes.append([tf.math.ceil(H / stride),
                         tf.math.ceil(W / stride), stride])
    return fm_sizes


def get_centers(fm_h, fm_w, stride=None):
    rx = (tf.range(fm_w) + 0.5) * (stride)
    ry = (tf.range(fm_h) + 0.5) * (stride)
    sx, sy = tf.meshgrid(rx, ry)
    cxy = tf.stack([sx, sy], axis=-1)
    return cxy


def get_all_centers(H, W):
    centers_list = []
    feature_sizes = compute_feature_sizes(H, W)
    for fm_h, fm_w, stride in feature_sizes:
        cyx = get_centers(fm_h, fm_w, stride)
        cyx = tf.reshape(cyx, shape=[-1, 2])
        centers_list.append(cyx)
    return centers_list


@tf.function(input_signature=[
    tf.TensorSpec(shape=[None, 2], dtype=tf.float32),
    tf.TensorSpec(shape=[None, 5], dtype=tf.float32),
    tf.TensorSpec(shape=[], dtype=tf.float32),
    tf.TensorSpec(shape=[], dtype=tf.float32)
])
def compute_targets_(centers, labels, low, high):
    '''
        From the FCOS paper, "Specifically, location (x, y) is
        considered as a positive sample if it falls into any
        ground-truth box and the class label c* of the location is
        the class label of the ground-truth box. Otherwise it is a
        negative sample and class* = 0 (background class)
        Besides the label for classification, we also have a 4D
        real vector t* = (l*, t*, r*, b*) being the regression
        targets for the location. Here l*, t*, r* and b* are the
        distances from the location to the four sides of the bounding
        box ...If a location falls into multiple bounding boxes, it is
        considered as an ambiguous sample. We simply choose the
        bounding box with minimal area as its regression target.
        ...we firstly compute the regressiontargets l*, t*, r* and b*
        for each location on all feature levels. Next, if a location
        satisfies max(l*, t*, r*, b*) > mi or max(l*, t*, r*, b*) < mi−1,
        it is set as a negative sample and is thus not required to
        regress a bounding box anymore. Here mi is the maximum distance
        that feature level i needs to regress. In this work, m2, m3, m4,
        m5, m6 and m7 are set as 0, 64, 128, 256, 512 and ∞, respectively"
        Args:
            centers (M, 2): Centers for the current feature level
            labels (N, 5):  All labels for the current image
            low: Lower limit for ltrb value for the current feature level
            high: Upper limit for ltrb value for the current feature level
    '''
    boxes_ = labels[:, :4]
    class_ids_ = labels[:, 4]

    # Sorted the boxes by area in ascending order so that
    # we pick the smallest box when computing ltbr values
    areas = compute_area(boxes_)
    sorted_indices = tf.argsort(areas)
    boxes = tf.gather(boxes_, indices=sorted_indices)
    class_ids = tf.gather(class_ids_, indices=sorted_indices)

    xy_min_ = boxes[:, :2]
    xy_max_ = boxes[:, 2:]
    lt_ = centers[:, None] - xy_min_
    rb_ = xy_max_ - centers[:, None]
    ltrb_ = tf.concat([lt_, rb_], axis=2)  # (M, N, 4)

    # check if max(lbtr) lies in the valid_range for this
    # feature level
    max_ltrb_ = tf.reduce_max(ltrb_, axis=2)  # (M, N)
    mask_ltrb_size = tf.logical_and(max_ltrb_ > low, max_ltrb_ < high)

    mask_lt = tf.logical_and(ltrb_[:, :, 0] > 0, ltrb_[:, :, 1] > 0)
    mask_rb = tf.logical_and(ltrb_[:, :, 2] > 0, ltrb_[:, :, 3] > 0)
    mask = tf.logical_and(mask_lt, mask_rb)
    mask = tf.logical_and(mask, mask_ltrb_size)  # (M, N)

    mask = tf.cast(mask, dtype=tf.float32)
    fg_mask = tf.reduce_sum(mask, axis=1) != 0  # (M,)
    fg_mask = tf.cast(fg_mask, dtype=tf.float32)
    fg_mask = tf.tile(fg_mask[:, None], multiples=[1, 4])

    valid_indices = tf.argmax(mask, axis=1)  # (M, )
    matched_boxes = tf.gather(boxes, valid_indices)
    matched_class_ids = tf.gather(class_ids, valid_indices) + 1

    x_min, y_min, x_max, y_max = tf.split(matched_boxes,
                                          num_or_size_splits=4,
                                          axis=1)
    l = tf.abs(centers[:, 0] - x_min[:, 0])
    t = tf.abs(centers[:, 1] - y_min[:, 0])
    r = tf.abs(x_max[:, 0] - centers[:, 0])
    b = tf.abs(y_max[:, 0] - centers[:, 1])
    lr = tf.stack([l, r], axis=1)
    tb = tf.stack([t, b], axis=1)

    min_lr = tf.reduce_min(lr, axis=1)
    max_lr = tf.reduce_max(lr, axis=1)
    min_tb = tf.reduce_min(tb, axis=1)
    max_tb = tf.reduce_max(tb, axis=1)

    classification_target = matched_class_ids * fg_mask[:, 0]
    centerness_target = tf.sqrt(
        (min_lr / max_lr) * (min_tb / max_tb)) * fg_mask[:, 0]
    regression_target = tf.stack([l, t, r, b], axis=1) * fg_mask

    return classification_target, centerness_target, regression_target


def compute_targets(H, W, labels):
    centers_list = get_all_centers(H, W)
    m = [
        [0.0, 64.0],
        [64.0, 128.0],
        [128.0, 256.0],
        [256.0, 512.0],
        [512.0, 1e8]]
    classification_target = []
    centerness_target = []
    regression_target = []
    for i in range(5):
        centers = centers_list[i]
        low, high = m[i]
        cls_target, \
            ctr_target, \
            reg_target = compute_targets_(centers, labels, low, high)

        classification_target.append(cls_target)
        centerness_target.append(ctr_target)
        regression_target.append(reg_target)

    classification_target = tf.concat(classification_target, axis=0)
    centerness_target = tf.concat(centerness_target, axis=0)
    centerness_target = tf.expand_dims(centerness_target, axis=-1)
    regression_target = tf.concat(regression_target, axis=0)
    return classification_target, centerness_target, regression_target


feature_description = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'xmins': tf.io.VarLenFeature(tf.float32),
    'ymins': tf.io.VarLenFeature(tf.float32),
    'xmaxs': tf.io.VarLenFeature(tf.float32),
    'ymaxs': tf.io.VarLenFeature(tf.float32),
    'labels': tf.io.VarLenFeature(tf.float32)
}


@tf.function
def parse_example(example_proto):
    parsed_example = tf.io.parse_single_example(
        example_proto, feature_description)
    image = tf.image.decode_jpeg(parsed_example['image'], channels=3)
    bboxes = tf.stack([
        tf.sparse.to_dense(parsed_example['xmins']),
        tf.sparse.to_dense(parsed_example['ymins']),
        tf.sparse.to_dense(parsed_example['xmaxs']),
        tf.sparse.to_dense(parsed_example['ymaxs'])
    ], axis=-1)
    class_ids = tf.reshape(tf.sparse.to_dense(
        parsed_example['labels']), [-1, 1])
    return image, bboxes, class_ids



def load_data(h, w):
    @tf.function
    def load_data_(example_proto):
        image, boxes_, class_ids = parse_example(example_proto)
        image.set_shape([None, None, 3])
        image = tf.image.resize(image, size=[h, w])
        boxes = tf.stack([
            tf.clip_by_value(boxes_[:, 0] * w, 0, w),
            tf.clip_by_value(boxes_[:, 1] * h, 0, h),
            tf.clip_by_value(boxes_[:, 2] * w, 0, w),
            tf.clip_by_value(boxes_[:, 3] * h, 0, h)
        ], axis=-1)
        image, boxes = flip_data(image, boxes, w)
        label = tf.concat([boxes, class_ids], axis=-1)
        classification_target, centerness_target, regression_target = \
            compute_targets(h, w, label)
        return image, \
            (classification_target, centerness_target, regression_target)
    return load_data_

In [5]:
def create_dataset(H, W, tf_records_pattern, batch_size):
    autotune = tf.data.experimental.AUTOTUNE
    options = tf.data.Options()
    options.experimental_deterministic = False
    train_files = tf.data.Dataset.list_files(tf_records_pattern)
    dataset = train_files.interleave(tf.data.TFRecordDataset,
                                     cycle_length=16,
                                     block_length=16,
                                     num_parallel_calls=autotune)
    dataset = dataset.map(
        load_data(H, W), num_parallel_calls=autotune)
    dataset = dataset.shuffle(512)
    dataset = dataset.batch(batch_size, drop_remainder=True).repeat()
    dataset = dataset.prefetch(autotune)
    dataset = dataset.with_options(options)
    return dataset


def dataset_fn(H, W, data_dir, batch_size):
    train_tf_records_pattern = data_dir + '/train*'
    val_tf_records_pattern = data_dir + '/val*'
    train_dataset = \
        create_dataset(H, W, train_tf_records_pattern, batch_size)
    val_dataset = \
        create_dataset(H, W, val_tf_records_pattern, batch_size)
    num_train_images = 70000
    num_val_images = 10000
    return train_dataset, val_dataset, num_train_images, num_val_images

In [6]:
config = {
    'mode': 'train',
    'distribute_strategy': tf.distribute.MirroredStrategy(),
    'image_height': 720,
    'image_width': 1280,
    'num_classes': 10,
    'dataset_fn': dataset_fn,
    'data_dir': '../tfrecords',
    'batch_size': 4,
    'epochs': 250,
    'learning_rate': 1e-4,
    'model_dir': 'model_files',
    'tensorboard_log_dir': 'logs'
}

In [7]:
fcos = FCOS(config)

'****Initializing FCOS with the following config'
{'batch_size': 4,
 'data_dir': '../tfrecords',
 'dataset_fn': <function dataset_fn at 0x7f8e662260d0>,
 'distribute_strategy': <tensorflow.python.distribute.mirrored_strategy.MirroredStrategy object at 0x7f8e6eb62128>,
 'epochs': 250,
 'image_height': 720,
 'image_width': 1280,
 'learning_rate': 0.0001,
 'mode': 'train',
 'model_dir': 'model_files',
 'num_classes': 10,
 'tensorboard_log_dir': 'logs'}
'****Building FPN'
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/r

In [8]:
dummy_tensor = tf.random.normal(shape=[4, 720, 1280, 3])
for i in tqdm(range(200)):
    dummy_output = fcos.model(dummy_tensor, training=False)

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




In [9]:
tfrecords_base_dir = '../tfrecords'
BATCH_SIZE = 2
H, W = tf.constant([720., 1280.])
autotune = tf.data.experimental.AUTOTUNE
class_map = {value: idx for idx, value in enumerate(['bus',
                                                     'traffic light',
                                                     'traffic sign',
                                                     'person',
                                                     'bike',
                                                     'truck',
                                                     'motor',
                                                     'car',
                                                     'train',
                                                     'rider'])}
num_classes = len(class_map)

In [10]:
options = tf.data.Options()
options.experimental_deterministic = False
train_files = tf.data.Dataset.list_files('{}/train*'.format(tfrecords_base_dir))
train_dataset = train_files.interleave(tf.data.TFRecordDataset,
                                       cycle_length=16,
                                       block_length=16,
                                       num_parallel_calls=autotune)
train_dataset = train_dataset.map(
    load_data(H, W), num_parallel_calls=autotune)
train_dataset = train_dataset.shuffle(512)
train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=True).repeat()
train_dataset = train_dataset.prefetch(autotune)
train_dataset = train_dataset.with_options(options)
tf.data.experimental.get_structure(train_dataset)

(TensorSpec(shape=(2, 720, 1280, 3), dtype=tf.float32, name=None),
 (TensorSpec(shape=(2, 19220), dtype=tf.float32, name=None),
  TensorSpec(shape=(2, 19220, 1), dtype=tf.float32, name=None),
  TensorSpec(shape=(2, 19220, 4), dtype=tf.float32, name=None)))

In [139]:
centers_list = get_all_centers(H, W)
centers = tf.concat(centers_list, axis=0)

def _regression_loss(labels, logits):
    # TODO
    #   a) IOU loss
    #   b) mask negative locations
    #   c) normalize loss value
    fg_mask = tf.cast(labels != 0, dtype=tf.float32)
    boxes_true = tf.concat([
        centers - labels[:, :, :2],
        centers + labels[:, :, 2:]], axis=-1) * fg_mask

    boxes_pred = tf.concat([
        centers - logits[:, :, :2],
        centers + logits[:, :, 2:]], axis=-1) * fg_mask

    lu = tf.maximum(boxes_true[:, :, :2], boxes_pred[:, :, :2])
    rd = tf.minimum(boxes_true[:, :, 2:], boxes_pred[:, :, 2:])

    intersection = tf.maximum(0.0, rd - lu)
    intersection_area = intersection[:, :, 0] * intersection[:, :, 1]

    boxes_true_area = tf.reduce_prod(
        boxes_true[:, :, 2:] - boxes_true[:, :, :2], axis=2)
    boxes_pred_area = tf.reduce_prod(
        boxes_pred[:, :, 2:] - boxes_pred[:, :, :2], axis=2)

    union_area = tf.maximum(
        boxes_true_area + boxes_pred_area - intersection_area, 1e-10)
    iou = tf.clip_by_value(intersection_area / union_area, 0.0, 1.0)

    fg_mask = tf.reduce_sum(fg_mask, axis=2)
    fg_mask = tf.cast(fg_mask != 0, dtype=tf.float32)
    normalizer_value = tf.reduce_sum(fg_mask, axis=1, keepdims=True)
    bg_mask = (1 - fg_mask) * 1e-7
    iou_loss = iou + bg_mask
    iou_loss = -1 * tf.math.log(iou_loss)
    iou_loss = iou_loss * fg_mask
    iou_loss = tf.reduce_sum(iou_loss, axis=1, keepdims=True)
    iou_loss = iou_loss / normalizer_value 
    return iou_loss
    
    
for batch in tqdm(train_dataset.take(1)):
    image, (cls_target, ctr_target, reg_target) = batch
    cls_target = tf.cast(cls_target, dtype=tf.int32)
    positive_centers = tf.reduce_sum(tf.cast(cls_target != 0,
                                             dtype=tf.float32))
print('Number of positive centers: {}'.format(positive_centers.numpy()))
cls_out, ctr_out, reg_out = fcos.model(image, training=False)
image.shape, cls_target.shape, ctr_target.shape, reg_target.shape

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Number of positive centers: 504.0


(TensorShape([2, 720, 1280, 3]),
 TensorShape([2, 19220]),
 TensorShape([2, 19220, 1]),
 TensorShape([2, 19220, 4]))

In [140]:
l = _regression_loss(reg_target, reg_out)


In [141]:
l

<tf.Tensor: id=300574, shape=(2, 1), dtype=float32, numpy=
array([[20.546144],
       [15.958465]], dtype=float32)>

In [None]:
y_true = tf.one_hot(cls_target[0][0], 11)[1:]
y_pred = cls_out[0][0]
y_pred_ = tf.sigmoid(y_pred)
alpha = 0.25
gamma = 2.0

at = alpha * y_true + (1 - y_true) * (1 - alpha)
pt = y_true * y_pred_ + (1 - y_true) * (1 - y_pred_)
f_loss = at * \
    tf.pow(1 - pt, gamma) * \
    tf.nn.sigmoid_cross_entropy_with_logits(
        labels=y_true, logits=y_pred)
f_loss

In [22]:
y_true = tf.constant([1., 0., 0., 1.])
y_pred = tf.constant([-4.59511985013459, -4.59511985013459, -4.59511985013459, -4.59511985013459])
y_pred_ = tf.sigmoid(y_pred)
alpha = 0.25
gamma = 2.0

at = alpha * y_true + (1 - y_true) * (1 - alpha)
pt = y_true * y_pred_ + (1 - y_true) * (1 - y_pred_)
f_loss = at * \
    tf.pow(1 - pt, gamma) * \
    tf.nn.sigmoid_cross_entropy_with_logits(
        labels=y_true, logits=y_pred)
f_loss

<tf.Tensor: id=256418, shape=(4,), dtype=float32, numpy=
array([1.1283818e+00, 7.5377369e-07, 7.5377369e-07, 1.1283818e+00],
      dtype=float32)>

<tf.Tensor: id=256355, shape=(10,), dtype=float32, numpy=
array([0.01023436, 0.00952402, 0.01098686, 0.01150885, 0.00954002,
       0.01119524, 0.01038414, 0.00972429, 0.00950115, 0.01010294],
      dtype=float32)>