#### Architecture YOLO_V1


In [2]:
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras import layers
import numpy as np

In [3]:
architecture_config = [
    (7, 64, 2, 3),
    "M",
    (3, 192, 1, 1),
    "M",
    (1, 128, 1, 0),
    (3, 256, 1, 1),
    (1, 256, 1, 0),
    (3, 512, 1, 1),
    "M",
    [(1, 256, 1, 0), (3, 512, 1, 1), 4],
    (1, 512, 1, 0),
    (3, 1024, 1, 1),
    "M",
    [(1, 512, 1, 0), (3, 1024, 1, 1), 2],
    (3, 1024, 1, 1),
    (3, 1024, 2, 1),
    (3, 1024, 1, 1),
    (3, 1024, 1, 1),
]

In [4]:
#block_model = CNNBlock(in_channels=2)
#x = tf.random.uniform(shape=(2,224,224,3))
#y = block_model(x)

In [5]:
#block_model.summary()

In [6]:
class YOLO1(keras.Model):
  def __init__(self,split_size,num_boxes,num_classes, in_channels=3):
    super(YOLO1,self).__init__()
    self.S = split_size
    self.B = num_boxes
    self.C = num_classes
    output = (self.S)*(self.S)*((self.C)+(self.B)*5)
    self.architecture=architecture_config
    self.in_channels = in_channels
    self.darknet =self.create_conv_layers(self.architecture)
    self.fl = layers.Flatten()
    self.linear1 = layers.Dense(49)
    self.drp=layers.Dropout(0.1)
    self.lr = layers.LeakyReLU(0.1)
    self.linear2 = layers.Dense(output)

  def call(self,input_tensor):
    input_tensor1=tf.convert_to_tensor(input_tensor, dtype=tf.float32)
    input_tensor1 = self.darknet(input_tensor1) 
    input_tensor1 = self.fl(input_tensor1)
    input_tensor1 = self.linear1(input_tensor1)
    input_tensor1 = self.drp(input_tensor1)
    input_tensor1 = self.lr(input_tensor1)
    input_tensor1 = self.linear2(input_tensor1)    

    return input_tensor1


  def create_conv_layers(self,architecture):
    lyr =[]
    in_channels = self.in_channels

    for x in (architecture):
      if type(x)==tuple:
        lyr.append(layers.Conv2D(in_channels,kernel_size=(x[0],x[0]),
                                strides=(x[2],x[2]),
                                activation=tf.keras.layers.LeakyReLU(alpha=0.1)))
        lyr.append(layers.BatchNormalization())
        lyr.append(layers.ZeroPadding2D(padding=(x[3],x[3])))

        in_channels = x[1]
#        print(len(lyr))
      if type(x)==str:
        lyr.append(layers.MaxPooling2D(pool_size=2, strides=(1,1)))
#        print(len(lyr))
      if type(x)==list:
        conv1 = x[0]
        conv2 = x[1]
        num_rep = x[2]
        for _ in range(num_rep):
          lyr.append(layers.Conv2D(in_channels, kernel_size=conv1[0],strides=(conv1[2],conv1[2]),
                                   activation=tf.keras.layers.LeakyReLU(alpha=0.1)))
          lyr.append(layers.BatchNormalization())

          in_channels = conv1[1]
          lyr.append(layers.Conv2D(conv1[1],kernel_size=conv2[0], strides=(conv2[2],conv1[2]),
                                   activation=tf.keras.layers.LeakyReLU(alpha=0.1)))
          lyr.append(layers.BatchNormalization())
          lyr.append(layers.ZeroPadding2D(padding=(conv2[3],conv2[3])))

          in_channels = conv2[1]
#          print(len(lyr))
    return keras.Sequential(lyr)

In [7]:
S=7
B=2
C=20
model = YOLO1(in_channels=3, split_size=S,num_boxes=B,num_classes=C)
inputs = tf.random.uniform(shape=(2,448,448,3))
inputs=tf.convert_to_tensor(inputs, dtype=tf.float32)
y = model(inputs)
print(y.shape)
# S*S*(C+(B*5)

(2, 1470)


In [8]:
#We can also use this block instead of using Conv2D with LeakyRelu Activation function and the Normalization
class CNNBlock(keras.Model):
  def __init__(self,in_channels,pad=(0,0), **kwargs):
    super(CNNBlock,self).__init__()
    self.conv =layers.Conv2D(in_channels,use_bias=False,**kwargs)
    self.batchnorm= layers.BatchNormalization()
    self.padding = layers.ZeroPadding2D(padding=(pad,pad)) 
    self.rl = layers.LeakyReLU(0.1)

  def call(self,x):
    x = self.padding(self.conv(x))
    x = self.rl(x)
    x = self.batchnorm(x)
    return 

In [9]:
model.summary()

Model: "yol_o1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential (Sequential)      (2, 113, 113, 1024)       36408840  
_________________________________________________________________
flatten (Flatten)            multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  640697393 
_________________________________________________________________
dropout (Dropout)            multiple                  0         
_________________________________________________________________
leaky_re_lu_12 (LeakyReLU)   multiple                  0         
_________________________________________________________________
dense_1 (Dense)              multiple                  73500     
Total params: 677,179,733
Trainable params: 677,167,695
Non-trainable params: 12,038
_________________________________________

#### IntersectionOverUnion

In [10]:
import tensorflow as tf
def Intersection_Over_Union(boxes_preds, boxes_labels, box_format="mid_point"):
#  if box_format =="mid_point":
  if box_format =="mid_point":
    box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
    box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
    box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
    box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
    box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
    box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
    box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
    box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2

  if box_format =="corners":
    # shape(N,4)
    
    box1_x1 = boxes_preds[...,0:1]
    box1_y1 = boxes_preds[...,1:2]
    box1_x2 = boxes_preds[...,2:3]
    box1_y2 = boxes_preds[...,3:4]

    box2_x1 = boxes_labels[...,0:1]
    box2_y1 = boxes_labels[...,1:2]
    box2_x2 = boxes_labels[...,2:3]
    box2_y2 = boxes_labels[...,3:4]

  
  x1=tf.math.maximum(box1_x1,box2_x1)
  y1=tf.math.maximum(box1_y1,box2_y1)
  x2=tf.math.minimum(box1_x2,box2_x2)
  y2=tf.math.minimum(box1_y2,box2_y2)

  Inter = (tf.clip_by_value((x2-x1),clip_value_min=0,clip_value_max=512)) *(tf.clip_by_value((y2-y1),clip_value_min=0,clip_value_max=512))

  box1_area = (abs(box1_x2 -box1_x1))*(abs(box1_y2-box1_y1))
  box2_area = (abs(box2_x2 -box2_x1))*(abs(box2_y2-box2_y1))
  Union = (box1_area + box2_area) - Inter

  return (Inter / (Union + 1e-6))  


In [11]:
boxes_preds=[[39, 63, 203, 112], 
             [49, 75, 203, 125], 
             [31, 69, 201, 125], 
             [50, 72, 197, 121], 
             [35, 51, 196, 110]],

boxes_labels = [[54, 66, 198, 114],
                [42, 78, 186, 126],
                [18, 63, 235, 135],
                [54, 72, 198, 120],
                [36, 60, 180, 108]]

In [12]:
boxes_preds =tf.convert_to_tensor(boxes_preds, dtype=tf.float32)
boxes_labels =tf.convert_to_tensor(boxes_labels, dtype=tf.float32)


In [13]:
Intersection_Over_Union(boxes_preds,boxes_labels)

<tf.Tensor: shape=(1, 5, 1), dtype=float32, numpy=
array([[[0.8193792 ],
        [0.8754707 ],
        [0.7806803 ],
        [0.95253724],
        [0.7826087 ]]], dtype=float32)>

#### LOSS Function

In [65]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import metrics
# tf.keras.metrics.MeanIoU

In [119]:
class YoloLoss(keras.Model):
    def __init__(self, S=7, B=2, C=20):
        super(YoloLoss, self).__init__()
        self.mse = tf.keras.losses.MeanSquaredError(reduction="sum")
        self.S = S
        self.B = B
        self.C = C
        # These are from Yolo paper, signifying how much we should
        # pay loss for no object (noobj) and the box coordinates (coord)
        self.lambda_noobj = 0.5
        self.lambda_coord = 5

    def call(self, predictions, target):
        # predictions are shaped (BATCH_SIZE, S*S(C+B*5) when inputted
        predictions = tf.reshape(predictions,(-1, self.S, self.S, self.C + self.B * 5))

        # Calculate IoU for the two predicted bounding boxes with target bbox
        iou_b1 = Intersection_Over_Union(predictions[..., 21:25], target[..., 21:25])
        iou_b2 = Intersection_Over_Union(predictions[..., 26:30], target[..., 21:25])
        ious = tf.concat([tf.expand_dims(iou_b1, 0), tf.expand_dims(iou_b2, 0)], axis=0)

        iou_maxes = tf.reduce_max(ious)
        bestbox = tf.argmax(ious, 0)
        #print("BEST-BOX:  ", bestbox.shape)
        exists_box = tf.squeeze(target[..., 20])  # in paper this is Iobj_i
        bestbox = tf.cast(bestbox, tf.float32)

        #print("BEST-BOX:  ", bestbox.shape)
        #print("predictions[26:30]:  ", predictions[..., 26:30].shape)
        #print("predictions[21:25]:  ", predictions[..., 21:25].shape)

        # ======================== #
        #   FOR BOX COORDINATES    #
        # ======================== #

        #print("bestbox:  ", bestbox.shape)
        #print("exists_box:  ", exists_box.shape)
        exists_box1 = tf.expand_dims(exists_box,-1)   
        #print("exists_box:  ", exists_box1.shape)
        #print("bestbox1:  ", bestbox1.shape)
        # predictions, which is the one with highest Iou calculated previously.
        #pred1 = predictions[..., 26:30]
        #pred2 = predictions[..., 21:25]

        box_predictions = exists_box1 * ((bestbox * predictions[..., 26:30] + (1 - bestbox) * predictions[..., 21:25]))
        box_targets = exists_box1 * target[..., 21:25]
        #box11 = tf.math.sign(box_predictions[..., 2:4])
        #box12 = tf.math.abs(box_predictions[..., 2:4])
        #box21 = tf.math.sqrt(box12 + 1e-6)
        #tensor1 = box_predictions    # tf.rank(tensor) == 1
        #indices1 = [[[[2],[4]]]]      
        # num_updates == 4, index_depth == 1
        #updates1 = (tf.math.sign(box_predictions[..., 2:4])) * (tf.math.abs(box_predictions[..., 2:4]))           
        # num_updates == 4
        #tf.tensor_scatter_nd_update(tensor1, indices1, updates1) 
        box_predictions_np = box_predictions.numpy()
        box_predictions_np[..., 2:4] = (tf.math.sign(box_predictions[..., 2:4])) * (tf.math.abs(box_predictions[..., 2:4]))
        box_predictions = tf.convert_to_tensor(box_predictions_np, dtype=tf.float32)
        #tensor2 = box_targets
        #indices2 = [..., 2:4]       # num_updates == 4, index_depth == 1
        #updates1 = tf.math.sqrt(box_targets[..., 2:4])            # num_updates == 4
        #tf.tensor_scatter_nd_update(tensor2, indices2, updates2) 
        box_targets_np = box_targets.numpy()
        box_targets_np[..., 2:4] = tf.math.sqrt(box_targets[..., 2:4])
        box_targets = tf.convert_to_tensor(box_targets_np, dtype=tf.float32)
        
        N = box_predictions.shape[0]
        box_loss = self.mse(
            tf.reshape(box_predictions, shape=(N*S*S,4)),
            tf.reshape(box_targets, shape=(N*S*S,4)),)

        # ==================== #
        #   FOR OBJECT LOSS    #
        # ==================== #

        # pred_box is the confidence score for the bbox with highest IoU
        #pred3 = predictions[..., 25:26]
        #pred4 = predictions[..., 20:21]
        pred_box = (bestbox * predictions[..., 25:26] + (1 - bestbox) * predictions[..., 20:21])

        f00 = (exists_box1 * pred_box)
        f01 = (exists_box1 * target[..., 20:21])
        object_loss = self.mse(
            tf.reshape(f00,shape=(N*S*S,1)),
            tf.reshape(f01,shape=(N*S*S,1)),
        )

        # ======================= #
        #   FOR NO OBJECT LOSS    #
        # ======================= #

        #max_no_obj = torch.max(predictions[..., 20:21], predictions[..., 25:26])
        #no_object_loss = self.mse(
        #    torch.flatten((1 - exists_box1) * max_no_obj, start_dim=1),
        #    torch.flatten((1 - exists_box1) * target[..., 20:21], start_dim=1),
        #)
        fl1 = (1 - exists_box1) * predictions[..., 20:21]
        fl2 = (1 - exists_box1) * target[..., 20:21]
        B1 = fl1.shape[3]        
        B2 = fl2.shape[3]
        no_object_loss = self.mse(
            tf.reshape(fl1, shape=(N*S*S,B1)),
            tf.reshape(fl2, shape=(N*S*S,B2)),
        )
        box01 = (1 - exists_box1) * predictions[..., 25:26]
        box11 = (1 - exists_box1) * target[..., 20:21]
        no_object_loss += self.mse(
            tf.reshape(box01,shape=(N*S*S,B1)),
            tf.reshape(box11,shape=(N*S*S,B2))
        )

        # ================== #
        #   FOR CLASS LOSS   #
        # ================== #
        fl3 = exists_box1 * predictions[..., :20]
        fl4 = exists_box1 * target[..., :20]
        B3 = fl3.shape[3]        
        B4 = fl4.shape[3]
        class_loss = self.mse(
            tf.reshape(fl3, shape=(N*S*S,B3)),
            tf.reshape(fl4, shape=(N*S*S,B4),
        ))

        loss = (
            self.lambda_coord * box_loss  # first two rows in paper
            + 
            object_loss  # third row in paper
            + self.lambda_noobj * no_object_loss  # forth row
            + class_loss  # fifth row
        )

        return loss

In [120]:
predictions = y

In [121]:
target = tf.random.uniform((2,7,7,25))

In [122]:
target[...,2:4].shape

TensorShape([2, 7, 7, 2])

In [123]:
target.shape

TensorShape([2, 7, 7, 25])

In [124]:
LossF = YoloLoss()

In [125]:
target = tf.cast(target, tf.float32)
predictions = tf.cast(predictions, tf.float32)


In [126]:
c = LossF(predictions, target)

In [127]:
print(c)

tf.Tensor(119.14648, shape=(), dtype=float32)
