In [1]:
import json
import tensorflow as tf
import numpy as np

In [2]:
import PIL
from PIL import Image

In [3]:
import random

In [4]:
import os
os.chdir("../")

In [5]:
class_map = {"Dog": 1}

In [6]:
def read_label_json(filepath):
    """
    Will contain
    """
    with open(filepath, "r") as filereader:
        labelled_data = json.load(filereader)
    return labelled_data


def annotation_generator(annotations, image_width, image_height, batch_size):
    random.shuffle(annotations)
    annotation_length = len(annotations)
    for annotation_ndx in range(0, annotation_length, batch_size):
        batched_images_as_list = []
        batched_labels_as_list = []
        for batch_index, batch_num in enumerate(range(annotation_ndx, 
                                                      min(annotation_ndx + batch_size, annotation_length))):
            image = Image.open(annotations[batch_num]["image_path"])
            image = image.resize(size=(image_width, image_height), resample=PIL.Image.LANCZOS)
            bboxes = np.array(
                [
                    [
                        bbox[0] / image_width,
                        bbox[1] / image_height,
                        bbox[2] / image_width,
                        bbox[3] / image_height,
                    ] 
                    for bbox in annotations[batch_num]["bboxes"]
                ]
            )
            classes = np.array(
                [[class_map[class_label]] for class_label in annotations[batch_num]["classes"]]
            )
            label = np.concatenate([bboxes, classes], axis=-1)
            batched_labels_as_list.append(
                np.concatenate(
                    [
                        np.expand_dims(np.ones(label.shape[0]) * batch_index, axis=-1),
                        label
                    ],
                    axis=-1
                )
            )
            batched_images_as_list.append(image)
        
        # combine them
        batch_images = np.stack(batched_images_as_list, axis=0)
        batch_labels = np.concatenate(batched_labels_as_list, axis=0)
        yield batch_images, batch_labels


#     images, labels = [], []
#     for annotation in annotations:
#         image = Image.open(annotation["image_path"])  # read image
#         image = image.resize(
#             size=(image_width, image_height), resample=PIL.Image.LANCZOS
#         )
#         bboxes = np.array(
#             [
#                 [
#                     bbox[0] / image_width,
#                     bbox[1] / image_height,
#                     bbox[2] / image_width,
#                     bbox[3] / image_height,
#                 ]
#                 # [bbox[0]/640, bbox[1]/640, bbox[2]/640, bbox[3]/640]
#                 for bbox in annotation["bboxes"]
#             ]
#         )
#         classes = np.array(
#             [[class_map[class_label]] for class_label in annotation["classes"]]
#         )
#         label = np.concatenate([bboxes, classes], axis=-1)
#         images.append(image)
#         labels.append(label)
#     images = images * 10
#     labels = labels * 10
    
#     # batch generator
#     l = len(images)
#     for ndx in range(0, l, batch_size):
#         batch_images = np.stack(images[ndx:min(ndx + batch_size, l)], axis=0)
#         labels_stacked = []
#         for batch_index, label in enumerate(labels[ndx:min(ndx + batch_size, l)]):
#             labels_stacked.append(
#                 np.concatenate(
#                     [
#                         np.expand_dims(np.ones(label.shape[0]) * batch_index, axis=-1),
#                         label
#                     ],
#                     axis=-1
#                 )
#             )
#         batch_labels = np.concatenate(labels_stacked, axis=0)
#         yield np.cast[np.float32](batch_images), batch_labels


        
def combine(images, labels):
    images = tf.stack(images, axis=0) / 255.0
    labels_list_with_batch = []
    for batch_index, label in enumerate(labels):
        labels_list_with_batch.append(
            tf.concat(
                [tf.expand_dims(tf.ones(label.shape[0]) * batch_index, axis=-1), label],
                axis=-1,
            )   
        )
    return images, tf.concat(labels_list_with_batch, axis=0)


def batch(dataset, batch_size):
    batch_size = batch_size
    batch = []
    for num, (i, l) in enumerate(dataset):
        batch.append(num)
        if len(batch) == batch_size:
            yield batch
            batch = []
    if batch:
        yield batch        


def tf_dataloader(config_filepath):
    # Get Values from the config
    input_height = config_filepath["INPUT_H"]
    input_width = config_filepath["INPUT_W"]
    annotation_filename = config_filepath["annotation_path"]
    annotations = read_label_json(annotation_filename)
    batch_size = 2
    train_dataset = tf.data.Dataset.from_generator(
        lambda: annotation_generator(
            annotations["annotations"]["train"] * 100, input_width, input_height, batch_size
        ),  
        (tf.float32, tf.float32),
    )   
#     train_dataset = train_dataset.repeat(10)
    validation_dataset = tf.data.Dataset.from_generator(
        lambda: annotation_generator(
            annotations["annotations"]["validation"]*10, input_width, input_height, batch_size
        ),  
        (tf.float32, tf.float32),
    )   
#     validation_dataset = validation_dataset.repeat(10)

    return train_dataset, validation_dataset


def tf_dataloader_v2(label_file_path, image_height, image_width):
    if label_file_path:
        annotations = read_label_json(label_file_path)
        train_dataset = tf.data.Dataset.from_generator(
            lambda: annotation_generator(annotations, input_width, input_height),
            (tf.float32, tf.float32),
        )   
        return train_dataset
    else:
        return None


In [7]:
with open('cfg/config.json', 'r') as f:
    cfg = json.load(f)

In [8]:
t, v = tf_dataloader(cfg)

### Creating a TFRecord Dataset, it's respective parser and then modifying the loss fn accordingly

In [9]:
train_json = "data/sample/train.json"
validation_json = "data/sample/validation.json"

train_label = read_label_json(train_json)
validation_label = read_label_json(validation_json)

In [10]:
im_w = 416
im_h = 416

In [11]:
for image_label in train_label:
    filepath = image_label['image_path']
    
    xmins, ymins, xmaxs, ymaxs = [], [], [], []
    for bbox in image_label['bboxes']:
        xmins.append(bbox[0])

In [12]:
def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _bytes_feature_list(value):
  """Returns a bytes_list from a string / byte."""
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))


def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))


def _float_feature_list(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=value))


def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def _int64_feature_list(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=value))


def get_serialized_example(filename, annotation, input_width, input_height):
    # image stuff
    image = Image.open(filename)
    image = image.resize(size=(input_width, input_height),
                         resample=PIL.Image.LANCZOS)
    image_string = np.array(image).tostring()

    # bounding boxes
    x_list, y_list, w_list, h_list = [], [], [], []
    labels = []

    num_labels = len(annotation['classes'])
    for label_index in range(num_labels):
        bbox = annotation['bboxes'][label_index]
        class_label = int(annotation['classes'][label_index])
        x, y, w, h = bbox
        x_list.append(x)
        y_list.append(y)
        w_list.append(w)
        h_list.append(h)
        labels.append(class_label)
        

    return tf.train.Example(features=tf.train.Features(feature={
        'image': _bytes_feature(image_string),
        'x': _float_feature_list(x_list),
        'y': _float_feature_list(y_list),
        'w': _float_feature_list(w_list),
        'h': _float_feature_list(h_list),
        'label': _int64_feature_list(labels)
    }))


def write_tfrecords(record_path, annotation_dict, input_width, input_height):
    """
    Processes the dictionary containing annotation information
    and returns classes dictionary, as well as the annotation
    """
    with tf.io.TFRecordWriter(record_path) as record_writer:        
        for annotation_object in annotation_dict:
            current_filename = annotation_object["image_path"]
            current_annotation = {
                "bboxes": [
                    [
                        box[0] / input_height,
                        box[1] / input_width,
                        box[2] / input_height,
                        box[3] / input_width,
                    ]
                    for box in annotation_object["bboxes"]
                ],
                "classes": [
                    class_map[class_label]
                    for class_label in annotation_object["classes"]
                ],
            }
            example = get_serialized_example(current_filename,
                                             current_annotation,
                                             input_width,
                                             input_height)
            record_writer.write(example.SerializeToString())
        print("Completed Writing TFRecord {}".format(record_path))
    return

In [13]:
# write train record
train_record = "data/sample/train.record"
write_tfrecords(train_record, train_label, im_w, im_h)

# write validation record
validation_record = "data/sample/validation.record"
write_tfrecords(validation_record, validation_label, im_w, im_h)

Completed Writing TFRecord data/sample/train.record
Completed Writing TFRecord data/sample/validation.record


 Read in the TFRecord paths.

In [14]:
# create dataset
# make it better with parallel calls

dataset = tf.data.TFRecordDataset([train_record, validation_record])

In [15]:
cn = 0
for d in dataset.take(2):
    if cn == 0:
        d1 = d
        cn+=1
    else:
        d2 = d
        cn+=1
        break

In [16]:
def parser(example):
    features = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'x': tf.io.VarLenFeature(tf.float32),
        'y': tf.io.VarLenFeature(tf.float32),
        'w': tf.io.VarLenFeature(tf.float32),
        'h': tf.io.VarLenFeature(tf.float32),
        'label': tf.io.VarLenFeature(tf.int64)
    }
    return tf.io.parse_single_example(example, features)

In [17]:
# single example being decoded
d1_parsed = parser(d1)
d2_parsed = parser(d2)

In [18]:
# converting single decoded image into tensor i.e. sparse to dense
image = tf.io.decode_raw(input_bytes=d1_parsed['image'], out_type=tf.uint8)
image_tensor = tf.expand_dims(tf.reshape(image, (416, 416, 3)), axis=0)

In [19]:
# parsing labels, x, y, w, h
labels = tf.cast(tf.sparse.to_dense(d1_parsed['label']), dtype=tf.float32)
x = tf.sparse.to_dense(d1_parsed['x'])
y = tf.sparse.to_dense(d1_parsed['y'])
w = tf.sparse.to_dense(d1_parsed['w'])
h = tf.sparse.to_dense(d1_parsed['h'])
stacked = tf.stack([x, y, w, h, labels], axis=1)
paddings = [[0, 100-tf.shape(stacked)[0]], [0, 0]]
l1 = tf.pad(stacked, paddings)

In [20]:
image = tf.io.decode_raw(input_bytes=d2_parsed['image'], out_type=tf.uint8)
image_tensor = tf.expand_dims(tf.reshape(image, (416, 416, 3)), axis=0)

In [21]:
labels = tf.cast(tf.sparse.to_dense(d2_parsed['label']), dtype=tf.float32)
x = tf.sparse.to_dense(d2_parsed['x'])
y = tf.sparse.to_dense(d2_parsed['y'])
w = tf.sparse.to_dense(d2_parsed['w'])
h = tf.sparse.to_dense(d2_parsed['h'])
stacked = tf.stack([x, y, w, h, labels], axis=1)
paddings = [[0, 100-tf.shape(stacked)[0]], [0, 0]]
l2 = tf.pad(stacked, paddings)

In [22]:
l1, l2

(<tf.Tensor: shape=(100, 5), dtype=float32, numpy=
 array([[0.50961536, 0.5168269 , 0.46875   , 0.5769231 , 1.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.    

In [25]:
image_tensor/255

<tf.Tensor: shape=(1, 416, 416, 3), dtype=float32, numpy=
array([[[[0.23137255, 0.36862746, 0.14901961],
         [0.21960784, 0.34901962, 0.14117648],
         [0.19215687, 0.31764707, 0.1254902 ],
         ...,
         [0.09803922, 0.19607843, 0.07843138],
         [0.09411765, 0.19215687, 0.07843138],
         [0.09411765, 0.19215687, 0.07843138]],

        [[0.23529412, 0.37254903, 0.14509805],
         [0.21960784, 0.34901962, 0.14117648],
         [0.19215687, 0.31764707, 0.1254902 ],
         ...,
         [0.09803922, 0.19607843, 0.07843138],
         [0.09803922, 0.19607843, 0.08235294],
         [0.09411765, 0.19215687, 0.07843138]],

        [[0.23921569, 0.3764706 , 0.14901961],
         [0.22745098, 0.35686275, 0.14901961],
         [0.2       , 0.3254902 , 0.13333334],
         ...,
         [0.10196079, 0.2       , 0.08235294],
         [0.09803922, 0.19607843, 0.08235294],
         [0.09411765, 0.19215687, 0.07843138]],

        ...,

        [[0.7176471 , 0.73333335, 

### Replicating the loss fn for the new labels

In [23]:
batch_size = 2

In [24]:
top_k = 100

In [25]:
anchors_list = [[[116,90], [156,198], [373,326]],
               [[30,61], [62,45], [59,119]],
               [[10,13], [16,30], [33,23]]]

In [26]:
tf.constant(anchors_list[0])

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[116,  90],
       [156, 198],
       [373, 326]], dtype=int32)>

In [27]:
out_0 = np.load('/home/karan/knapanda_local/playground/yolo_outs/out_0.npy')
out_1 = np.load('/home/karan/knapanda_local/playground/yolo_outs/out_1.npy')
out_2 = np.load('/home/karan/knapanda_local/playground/yolo_outs/out_2.npy')

In [28]:
tf.compat.v1.reset_default_graph()

In [29]:
output_0 = tf.concat([out_0, out_0], axis=0)
output_1 = tf.concat([out_1, out_1], axis=0)
output_2 = tf.concat([out_2, out_2], axis=0)

In [30]:
output_0_reshaped = tf.reshape(output_0, [2, 3, 13, 13, 6])
output_1_reshaped = tf.reshape(output_1, [2, 3, 26, 26, 6])
output_2_reshaped = tf.reshape(output_2, [2, 3, 52, 52, 6])

In [31]:
label = tf.concat([tf.expand_dims(l1, axis=0), tf.expand_dims(l2, axis=0)], axis=0)

In [32]:
def overlap_tf(x1, w1, x2, w2):
    num_1 = x1.shape[0]
    num_2 = x2.shape[0]
    x1 = tf.broadcast_to(x1, [num_2, num_1])
    w1 = tf.broadcast_to(w1, [num_2, num_1])
    x2 = tf.transpose(tf.broadcast_to(x2, [num_1, num_2]))
    w2 = tf.transpose(tf.broadcast_to(w2, [num_1, num_2]))
    
    l1 = x1 - (w1/2)
    l2 = x2 - (w2/2)
    l = tf.math.maximum(l1, l2)
    r1 = x1 + (w1/2)
    r2 = x2 + (w2/2)
    r = tf.math.minimum(r1, r2)
    return r - l

def box_intersection_tf(a, b):
    w = overlap_tf(a[:, 0], a[:, 2], b[:, 0], b[:, 2])
    w = tf.where(tf.math.greater(w, 0.0), w, 0.0)
    h = overlap_tf(a[:, 1], a[:, 3], b[:, 1], b[:, 3])
    h = tf.where(tf.math.greater(h, 0.0), h, 0.0)
    area = w * h
    return area

def box_union_tf(a, b):
    a_num = a.shape[0]
    b_num = b.shape[0]
    intersection = box_intersection_tf(a, b)
    a_w = a[:, 2]
    a_h = a[:, 3]
    b_w = b[:, 2]
    b_h = b[:, 3]
    a_w = tf.broadcast_to(a_w, [b_num, a_num])
    a_h = tf.broadcast_to(a_h, [b_num, a_num])
    b_w = tf.transpose(tf.broadcast_to(b_w, [a_num, b_num]))
    b_h = tf.transpose(tf.broadcast_to(b_h, [a_num, b_num]))
    union = (a_w * a_h) + (b_w * b_h) - intersection
    return union

def box_iou_tf(a, b):
    return box_intersection_tf(a, b)/box_union_tf(a, b)


In [33]:
# let's consider only output0
grid_y_num, grid_x_num = tf.shape(output_0)[1:3]
# x - offset
intermediate_center_x_offset = tf.broadcast_to(
    tf.range(grid_x_num, dtype=tf.float32),
    [grid_y_num, grid_x_num]
)
# center_x_offsets = tf.broadcast_to(
#     intermediate_center_x_offset,
#     [batch_size, grid_y_num, grid_x_num]
# )

# y - offsets
intermediate_center_y_offset = tf.broadcast_to(
    tf.range(grid_y_num, dtype=tf.float32),
    [grid_x_num, grid_y_num]
)
intermediate_center_y_offset = tf.transpose(intermediate_center_y_offset)
# center_y_offsets = tf.broadcast_to(
#     intermediate_center_y_offset,
#     [batch_size, grid_y_num, grid_x_num]
# )

In [34]:
is_gt = tf.where(label[:, :, -1] == 0.0, False, True)

In [35]:
# anchors as tensor
current_anchor_index = 0
anchor_tensor = tf.constant(anchors_list[0], dtype=tf.float32)
batch_size = output_0.shape[0]
num_anchors = anchor_tensor.shape[0]

# iou computation
im_wh = tf.broadcast_to([[416.0, 416.0]], [3, 2]) # image width, height broadcasted to anchor size
anchors_norm = anchor_tensor/im_wh # normalize by width,height

# wh only
anchors_wh = tf.concat(
    [tf.zeros_like(anchors_norm[:, :2], dtype=tf.float32), # zeros
    anchors_norm[:, :2]], # anchor wh
    axis=1
)
#anchors_wh = tf.broadcast_to(tf.expand_dims(anchors_wh, axis=0), [batch_size, 3, 4])
gt_wh = tf.concat(
    [
        tf.zeros_like(label[:, :, -3:-1]),
        label[:, :, -3:-1]
    ], 
    axis=-1
)
anchors_wh_norm_flattened = tf.reshape(anchors_wh, [-1, 4])
gt_wh_norm_flattened = tf.reshape(gt_wh, [-1, 4])

In [36]:
iou = box_iou_tf(anchors_wh_norm_flattened, gt_wh_norm_flattened)

In [37]:
iou_reshape = tf.reshape(iou, [batch_size, top_k, num_anchors])

In [38]:
iou_argmax_per_anchor = tf.argmax(iou_reshape, axis=-1, output_type=tf.int32)

In [39]:
expanded_anchors = tf.broadcast_to(anchor_tensor, [2, 100, 3, 2])

In [40]:
# MSE Loss (Localization Loss)
label_noclass = label[:, :, :-1] # only x, y, w, h
i_gt = tf.cast(label_noclass[:, :, 0] * tf.cast(grid_x_num, dtype=tf.float32), 
               dtype=tf.int32)
j_gt = tf.cast(label_noclass[:, :, 1] * tf.cast(grid_y_num, dtype=tf.float32), 
               dtype=tf.int32)
batch_broadcasted = tf.broadcast_to(tf.expand_dims(tf.range(2), axis=1), [batch_size, top_k])

In [41]:
best_anchor_indices_per_gt = tf.stack(
    [batch_broadcasted, 
     tf.stack([tf.range(100), tf.range(100)]),
     iou_argmax_per_anchor
    ], axis=-1
)

In [42]:
# stores the anchor w/h selected from the best indices calculated from the iou
# size (2, 100, 2)?
best_anchors_per_gt = tf.gather_nd(expanded_anchors, best_anchor_indices_per_gt)

In [43]:
# indices to gather from the outputs
indices_to_subset = tf.stack([batch_broadcasted, iou_argmax_per_anchor, j_gt, i_gt], axis=-1)

In [44]:
# boolean mask to only consider gts that are valid and not all 100
valid_label_indices = tf.boolean_mask(indices_to_subset, is_gt)

In [45]:
# BCE Objectness Loss
predicted_bboxes = output_0_reshaped[..., :4]
predicted_objectness = tf.math.sigmoid(output_0_reshaped[..., 4])
predicted_classes = tf.math.sigmoid(output_0_reshaped[..., 5:])

In [46]:
predicted_classes

<tf.Tensor: shape=(2, 3, 13, 13, 1), dtype=float32, numpy=
array([[[[[0.47926983],
          [0.51614344],
          [0.4992844 ],
          ...,
          [0.5359981 ],
          [0.52558875],
          [0.4790221 ]],

         [[0.53981775],
          [0.5264706 ],
          [0.47973907],
          ...,
          [0.52938604],
          [0.47787607],
          [0.5358088 ]],

         [[0.52761084],
          [0.48422247],
          [0.53768736],
          ...,
          [0.5018741 ],
          [0.5229705 ],
          [0.51987016]],

         ...,

         [[0.49683288],
          [0.5177637 ],
          [0.48948792],
          ...,
          [0.51220363],
          [0.48012853],
          [0.49885696]],

         [[0.5163    ],
          [0.48162195],
          [0.49557665],
          ...,
          [0.49975002],
          [0.5144084 ],
          [0.49003065]],

         [[0.48536378],
          [0.4932135 ],
          [0.5224819 ],
          ...,
          [0.49316624],
          

In [47]:
w_index = 0
h_index = 1
w_offsets = tf.broadcast_to(tf.expand_dims(tf.broadcast_to(tf.expand_dims(tf.broadcast_to(tf.expand_dims(anchor_tensor[:, w_index], axis=-1), [num_anchors, grid_y_num]), axis=-1), [num_anchors, grid_y_num, grid_x_num]), axis=0), [batch_size, num_anchors, grid_y_num, grid_x_num])
h_offsets = tf.broadcast_to(tf.expand_dims(tf.broadcast_to(tf.expand_dims(tf.broadcast_to(tf.expand_dims(anchor_tensor[:, h_index], axis=-1), [num_anchors, grid_y_num]), axis=-1), [num_anchors, grid_y_num, grid_x_num]), axis=0), [batch_size, num_anchors, grid_y_num, grid_x_num])

In [48]:
center_x_offsets = tf.broadcast_to(
    intermediate_center_x_offset,
    [batch_size, num_anchors, grid_y_num, grid_x_num]
)
center_y_offsets = tf.broadcast_to(
    intermediate_center_y_offset,
    [batch_size, num_anchors, grid_y_num, grid_x_num]
)

In [49]:
pred_x = tf.math.sigmoid(predicted_bboxes[..., 0])
pred_y = tf.math.sigmoid(predicted_bboxes[..., 1])
pred_w = predicted_bboxes[..., 2]
pred_h = predicted_bboxes[..., 3]

In [50]:
predicted_x = (pred_x + center_x_offsets)/tf.cast(grid_x_num, dtype=tf.float32)
predicted_y = (pred_y + center_y_offsets)/tf.cast(grid_y_num, dtype=tf.float32)
predicted_w = tf.exp(pred_w) * (w_offsets/416.0)
predicted_h = tf.exp(pred_h) * (h_offsets/416.0)

In [51]:
predictions_bb = tf.stack([
    predicted_x,
    predicted_y,
    predicted_w,
    predicted_h
],
    axis=-1
)

In [52]:
gathered_outputs = tf.gather_nd(predictions_bb, valid_label_indices)

In [53]:
# lets create gt predictions
gt_tx = (label_noclass[..., 0] * tf.cast(grid_x_num, tf.float32)) - tf.cast(i_gt, tf.float32)
gt_ty = (label_noclass[..., 1] * tf.cast(grid_y_num, tf.float32)) - tf.cast(j_gt, tf.float32)
gt_tw = tf.math.log(label_noclass[..., 2] * 416.0/best_anchors_per_gt[...,0])
gt_th = tf.math.log(label_noclass[..., 2] * 416.0/best_anchors_per_gt[...,1])
gt_all = tf.stack([
    gt_tx,
    gt_ty,
    gt_tw,
    gt_th
], axis=-1)

In [54]:
# the ground truth bounding boxes for valid bb
gt_boundingboxes = tf.boolean_mask(gt_all, is_gt)

In [55]:
mse_loss = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.SUM)
mse_loss(gt_boundingboxes, gathered_outputs)

<tf.Tensor: shape=(), dtype=float32, numpy=0.54535985>

In [56]:
# gt_preds_iou = box_iou_tf(
#     label_noclass_flattened,
#     flattened_predictions_bb
# )

# @why
# converting the above calculation into a map_fn to reduce redundancy from for loops.
# tf.map_fn internally parallelizes the map call.
# it's definitely not as fast as a vectorized operation
# but reduces the creation of intermediate, non-reusable tensors

# predictions_bb is of shape (batch_size, anchor_size, grid_y, grid_x, ... (4))
# hence, the reshape is required and it's reshaped on output

# gt_preds_iou = tf.map_fn(
#     lambda x: tf.reshape(box_iou_tf(x[0], x[1]),
#                         [num_anchors, grid_y_num, grid_x_num, top_k]),
#     elems=(label_noclass,
#            tf.reshape(predictions_bb, [2, -1, 4])),
#     dtype=(tf.float32, tf.float32),
#     fn_output_signature=tf.float32
# )

# WE DON'T NEED TO DO THIS. REFERRING TO DetectionLoss in yolo code.

In [57]:
tf.boolean_mask(tf.reshape(iou, [2, 100, 3]), is_gt)

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[0.22307692, 0.66      , 0.38487476],
       [0.26363638, 0.78000003, 0.3256632 ],
       [0.2652439 , 0.78475606, 0.32368952]], dtype=float32)>

In [58]:
candidate_pos = tf.where(
    iou_reshape > 0.5,
)

# the output of this gives [batch_index, gt_index, anchor_index]
# batch_index, gt_index is the indexing for the labels

In [59]:
# indices to get candidate positives
batch_index = tf.cast(candidate_pos[:, 0], dtype=tf.int32)
gt_index_per_batch = tf.cast(candidate_pos[:, 1], dtype=tf.int32)
anchor_index_per_candidate = tf.cast(candidate_pos[:, 2], dtype=tf.int32)

batch_gt_per_candidate = tf.cast(candidate_pos[:, :2], dtype=tf.int32)

In [60]:
candidate_pos_x = tf.gather_nd(i_gt, batch_gt_per_candidate)
candidate_pos_y = tf.gather_nd(j_gt, batch_gt_per_candidate)

In [61]:
# candidate positive indices
candidate_positives = tf.stack(
    [
        batch_index,
        anchor_index_per_candidate,
        candidate_pos_y,
        candidate_pos_x
    ],
    axis=-1
)

In [62]:
# the above list is appeneded with the ground truth one 
# since they are objects too (you know :')). 
# And this combined list will have some duplicates which will have
# values greater than 1 when applied with scatter_nd.
# this is rectified to 1
pos_concat = tf.concat([candidate_positives, valid_label_indices], axis=0)
obj_gt_vals = tf.ones_like(pos_concat[:, 0], dtype=tf.int32)

In [63]:
objectness_mask = tf.scatter_nd(
    pos_concat,
    obj_gt_vals,
    [2, 3, 13, 13]
)
objectness_mask = tf.where(objectness_mask != 0, 1, 0)

In [64]:
objectness_indices = tf.where(objectness_mask != 0)

In [65]:
objectness_float = tf.cast(objectness_mask, dtype=tf.float32)

In [66]:
pred_objectness = tf.gather_nd(predicted_objectness, objectness_indices)
gt_objectness = tf.boolean_mask(objectness_float, objectness_mask)

In [67]:
bce = tf.keras.losses.BinaryCrossentropy()
objectness_loss = bce(gt_objectness, pred_objectness)

In [68]:
# BCE Classification Loss
noobj_mask = tf.cast(tf.logical_not(tf.cast(objectness_mask, dtype=tf.bool)), dtype=tf.int32)

In [69]:
noobj_indices = tf.where(noobj_mask != 0)

In [70]:
pred_noobj = tf.gather_nd(predicted_objectness, noobj_indices)
gt_noobj = tf.boolean_mask(objectness_float, noobj_mask)

In [84]:
gt_noobj

<tf.Tensor: shape=(1011,), dtype=float32, numpy=array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)>

In [71]:
noobjectness_loss = bce(gt_noobj, pred_noobj)

In [72]:
noobjectness_loss

<tf.Tensor: shape=(), dtype=float32, numpy=0.6643688>

In [73]:
tf.cast(tf.boolean_mask(label, is_gt)[:, -1], dtype=tf.int32)

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 1, 1], dtype=int32)>

In [86]:
dummy_gt_class_indices = tf.concat([valid_label_indices, tf.expand_dims(tf.cast(tf.boolean_mask(label, is_gt)[:, -1], dtype=tf.int32), axis=-1)], axis=-1)

In [75]:
dummy_preds_classes = tf.concat([predicted_classes, predicted_classes], axis=-1)

In [76]:
dummy_gt_class_indices

<tf.Tensor: shape=(3, 5), dtype=int32, numpy=
array([[0, 1, 6, 6, 0],
       [1, 1, 6, 4, 1],
       [1, 1, 6, 9, 0]], dtype=int32)>

In [78]:
preds_classes = tf.gather_nd(dummy_preds_classes, dummy_gt_class_indices)

In [79]:
gt_classes = tf.ones_like(preds_classes)

In [80]:
bce(gt_classes, preds_classes)

<tf.Tensor: shape=(), dtype=float32, numpy=0.6844391>

#### Looking at batch 6 and batch 11 data

In [233]:
with open('/home/karan/batch6_tl_data.json', 'r') as f:
    batch6 = json.load(f)

In [235]:
with open('/home/karan/batch12_tl_data.json', 'r') as f:
    batch12 = json.load(f)

In [240]:
with open('data/sample/label.json', 'r')as f:
    yolov3_sample_json = json.load(f)

In [253]:
print(batch12[0])

{'TrafficLight-NotVisible': [{'p1': {'x': 282.013351051628, 'y': 348.38909748027027}, 'p2': {'x': 294.94974646119493, 'y': 380.0345544323436}}, {'p1': {'x': 1213.0695722013738, 'y': 95.67767034449548}, 'p2': {'x': 1248.7721122263304, 'y': 186.5342678826389}}, {'p1': {'x': 1173.2873455978736, 'y': 173.32215153121493}, 'p2': {'x': 1201.8337740357058, 'y': 245.12734416547283}}, {'p1': {'x': 456.37739981428615, 'y': 435.4329556136607}, 'p2': {'x': 467.3436508346806, 'y': 443.9970547837003}}], 'filename': 'runs_data/northEnd291Ray/TLImages_1599844978/Pedestrian_Light_00000000-0000-0000-0000-000000009350/1599849483.png', 'full_path': '/mnt/KiwiFTP/kiwi_transfer/hive_data/images_sent/batch_12/runs_data/northEnd291Ray/TLImages_1599844978/Pedestrian_Light_00000000-0000-0000-0000-000000009350/1599849483.png', 'TrafficLight-Off': [], 'TrafficLight-Red': [{'p1': {'x': 110.12370594621925, 'y': 304.745441860671}, 'p2': {'x': 123.76888660763292, 'y': 338.74308435489}}, {'p1': {'x': 184.07765365908034

In [255]:
print(yolov3_sample_json)

{'classes': {'Dog': 0}, 'annotations': {'train': [{'image_path': 'data/sample/dog.jpg', 'bboxes': [[212, 215, 195, 240]], 'classes': ['Dog']}, {'image_path': 'data/sample/dog2.jpg', 'bboxes': [[157, 215, 165, 240], [297, 222, 160, 246]], 'classes': ['Dog', 'Dog']}], 'validation': [{'image_path': 'data/sample/dog.jpg', 'bboxes': [[212, 215, 195, 240]], 'classes': ['Dog']}, {'image_path': 'data/sample/dog2.jpg', 'bboxes': [[157, 215, 165, 240], [297, 222, 160, 246]], 'classes': ['Dog', 'Dog']}]}}


creating a new train dictionary

In [259]:
new_label = {}
new_train = []
new_validation = []
new_classmap = {}

Getting all classes

In [246]:
unnecessary_keys = ['filename', 'full_path']

In [250]:
traffic_light_labels = list(filter(lambda x: x not in unnecessary_keys, batch12[0].keys()))

In [251]:
traffic_light_class_integer_map = {
    label: label_index + 1 for label_index, label in enumerate(traffic_light_labels)
}

In [262]:
for annotation in batch12:
    img_path = annotation['full_path']
    classes = []
    bboxes = []
    converted_annotation = {}
    for class_label in traffic_light_labels:
        class_label_labels = annotation[class_label]
        for bbox in class_label_labels:
            x1 = (bbox['p1']['x']/1280)*416
            y1 = (bbox['p1']['y']/800)*416
            x2 = (bbox['p2']['x']/1280)*416
            y2 = (bbox['p2']['y']/800)*416
            x = int((x1 + x2)/2)
            y = int((y1 + y2)/2)
            w = int(x2 - x1)
            h = int(y2 - y1)
            bboxes.append([x, y, w, h])
            classes.append(class_label)
    converted_annotation['img_path'] = img_path
    converted_annotation['classes'] = classes
    converted_annotation['bboxes'] = bboxes
    
    new_train.append(converted_annotation)

In [264]:
# spit to train and validation
import random
random.shuffle(new_train)

In [267]:
new_validation = new_train[-int(len(new_train)*0.2):]

In [269]:
new_train = new_train[:-int(len(new_train)*0.2)]

In [270]:
print(len(new_train))
print(len(new_validation))

35410
8852


In [271]:
new_label = {
    'classes': traffic_light_class_integer_map,
    'annotations': {
        'train': new_train,
        'validation': new_validation
    }
}

In [272]:
with open('../yolov3_data/batch12_yolov3-tf2.json', 'w') as f:
    json.dump(new_label, f)

In [273]:
with open('../yolov3_data/batch12_yolov3-tf2.json', 'r') as f:
    ll = json.load(f)