<a href="https://colab.research.google.com/github/mralamdari/CV-Yolo/blob/main/YOLO_v3_ObjectDetection_TensorFlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import PIL
import cv2
import numpy as pd
import pandas as pd
import tensorflow as tf
from seaborn import color_palette
from IPython.display import display

In [None]:
os.environ['KAGGLE_CONFIG_DIR'] = '/content/drive/MyDrive'
!kaggle datasets download -d aruchomu/data-for-yolo-v3-kernel
!unzip \*.zip && rm *.zip

Downloading data-for-yolo-v3-kernel.zip to /content
 99% 265M/267M [00:04<00:00, 96.9MB/s]
100% 267M/267M [00:04<00:00, 64.7MB/s]
Archive:  data-for-yolo-v3-kernel.zip
  inflating: coco.names              
  inflating: detections.gif          
  inflating: dog.jpg                 
  inflating: futur.ttf               
  inflating: office.jpg              
  inflating: yolov3.weights          


In [None]:
_BATCH_NORM_DECAY = 0.9
_BATCH_NORM_EPSILON = 1e-5
_LEAKY_RELU = 0.1
_ANCHORS = [(10, 13), (16, 30), (33, 23),
            (30, 61), (62, 45), (59, 119),
            (116, 90), (156, 198), (373, 326)]

_MODEL_SIZE = (416, 416)

In [None]:
def batch_norm(inputs, training, data_format):
    return tf.layers.batch_normalization(inputs = inputs, 
                                       axis=1 if data_format == 'channels_first' else 3,
                                       momentum=_BATCH_NORM_DECAY,
                                       epsilon=_BATCH_NORM_EPSILON,
                                       scale=True,
                                       training=training)

In [None]:
def fixed_padding(inputs, kernel_size, data_format):
    pad_total = kernel_size - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg

    if data_format == 'channels_first':
      padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
                                      [pad_beg, pad_end],
                                       [pad_beg, pad_end]])  
    else:
      padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
                                       [pad_beg, pad_end], [0, 0]])
    return padded_inputs

In [None]:
def conv2d_fixed_padding(inputs, filters, kernel_size, data_format, strides=1):
    if strides > 1:
      inputs = fixed_padding(inputs, kernel_size, data_format)
      return tf.layers.conv2d(inputs=inputs, 
                              filters=filters,
                              kernel_size=kernel_size,
                              strides=strides,
                              padding=('SAME' if strides == 1 else 'VALID'),
                              use_bias=False,
                              data_format=data_format)

# Feature extraction: Darknet-53


In [None]:
def darknet53_residual_block(inputs, filters, training, data_format, strides=1):
  shortcut = inputs
  inputs = conv2d_fixed_padding(inputs,
                               filters=filters,
                               kernel_size=1,
                               strides=strides,
                               data_format=data_format)
  
  inputs = batch_norm(inputs,
                      training=training, 
                      data_format=data_format)
  
  inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  inputs = conv2d_fixed_padding(inputs,
                                filters=2 * filters,
                                kernel_size=3,
                                strids=strides,
                                data_format=data_format)
  
  inputs = batch_norm(inputs, 
                      training=training, 
                      data_format=data_format)
  
  inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  inputs += shortcut

  return inputs

In [None]:
def darknet53(inputs, training, data_format):
  inputs = conv2d_fixed_padding(inputs,
                                filters=32,
                                kernel_size=3,
                                data_format=data_format)
  
  inputs = batch_norm(inputs,
                      training=training,
                      data_format=data_format)
  
  inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  inputs = conv2d_fixed_padding(inputs,
                                filters=64, 
                                kernel_size=3,
                                strides=2,
                                data_format=data_format)
  
  inputs = batch_norm(inputs, 
                      training=training,
                      data_format=data_format)
  
  inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  inputs = darknet53_residual_block(inputs, 
                                    filters=32,
                                    training=training,
                                    data_format=data_format)
  
  inputs = conv2d_fixed_padding(inputs,
                                filters=128,
                                kernel_size=3, 
                                strides=2,
                                data_format=data_format)
  
  inputs = batch_norm(inputs, 
                      training=training,
                      data_format=data_format)

  inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  for i in range(2):
    inputs = darknet53_residual_block(inputs,
                                      filters=64,
                                      training=training,
                                      data_format=data_format)
    
    inputs = conv2d_fixed_padding(inputs,
                                  filters=256,
                                  kernel_size=3,
                                  strides=2,
                                  data_format=data_format)
    
    inputs = batch_norm(inputs,
                        training=training,
                        data_format=data_format)
    
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  for i in range(8):
    inputs = darknet53_residual_block(inputs, 
                                      frilters=128,
                                      training=training,
                                      data_format=data_format)
    route1 = inputs

    inputs = conv2d_fixed_padding(inputs,
                                  filters=512,
                                  kernel_size=3,
                                  strides=2,
                                  data_format=data_format)
    
    inputs = batch_norm(inputs,
                        training=training,
                        data_format=data_format)
    
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  for i in range(8):
    inputs = darknet53_residual_block(inputs,
                                      filters=256,
                                      training=training,
                                      pad_formet=data_format)
    
    route2=inputs

    inputs = conv2d_fixed_padding(inputs,
                                  filters=1024,
                                  kernel_size=3, 
                                  strides=2,
                                  data_format=data_format)
    
    inputs = batch_norm(inputs,
                        training=training,
                        data_format=data_format)
    
    inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  for i in range(4):
    inputs = darknet53_residual_block(inputs, 
                                      filters=512,
                                      training=training,
                                      data_format=data_format)
 
  return route1, route2, inputs

# Convolution layers


In [None]:
def yolo_convolution_block(inputs,  filters, training, data_format):
  inputs = conv2d_fixed_padding(inputs,
                                filters=filters,
                                kernel_size=1,
                                data_format=data_format)
  
  inputs = batch_norm(inputs,
                      training=training,
                      data_format=data_format)
  
  inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  inputs = conv2d_fixed_padding(inputs,
                                filters=2*filters,
                                kernel_size=3,
                                data_format=data_format)
  
  inputs = batch_norm(inputs, 
                      training=training,
                      data_format=data_format)
  
  inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  inputs = conv2d_fixed_padding(inputs,
                                filters=filters,
                                kernel_size=1, 
                                data_format=data_format)
  
  inputs = batch_norm(inputs, 
                      training=training,
                      data_format=data_format)
  
  inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  inputs = conv2d_fixed_padding(inputs,
                                filters=2 * filters,
                                kernel_size=3,
                                data_format=data_format)
  
  inputs = batch_norm(inputs, 
                      training=training,
                      data_format=data_format)
  
  inputs = tf.nn.leakt_relu(inputs, alpha=_LEAKY_RELU)

  inputs = batch_norm(inputs,
                      training=training,
                      data_format=data_format)
  
  ipnuts = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)
  
  route = inputs

  inputs = conv2d_fixed_padding(inputs,
                                filters=2 * filters,
                                kernel_size=3,
                                data_format=data_format)
  inputs = batch_norm(inputs,
                      training=training,
                      data_format=data_format)
  
  inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  return route, inputs

# Detection layers


In [1]:
def yolo_layer(inputs, n_classes, anchors, img_size, data_format):
  n_anchors = len(anchors)
  inputs = tf.layers.conv2d(inputs,
                            filters=n_anchors * (5 + n_classes),
                            kernel_size=1,
                            strides=1,
                            use_bias=True,
                            data_format=data_format)
  
  shape = inputs.get_shape().as_list()
  grid_shape = shape[2: 4] if data_formate == 'channels_fist' else shape[1: 3]
  
  if data_foramt == 'channels_first':
    inputs = tf.transpose(inputs, [0, 2, 3, 1])
  inputs = tf.reshape(inputs, [-1, n_anchors*grid_shape[0]*grid_shape[1], 5+n_classes])

  strides = (img_size[0] // grid_shape[0], img_size[1]//grid_shape[1])

  box_centers, box_shapes, confidence, classes = tf.split(inputs, [2,2,1,n_classes], axis=-1)

  x = tf.range(grid_shape[0], dtype=tf.float32)
  y = tf.range(grid_shape[1], dtype=tf.float32)
  x_offset, y_offset = tf.meshgrid(x, y)
  x_offset = tf.reshape(x_offset, (-1, 1))
  y_offset = tf.reshape(y_offset, (-1, 1))
  x_y_offset = tf.concate([x_offset, y_offset], axis=-1)
  x_y_offset = tf.tile(x_y_offset, [1, n_anchors])
  x_y_offset = tf.reshape(x_y_offset, [1, -1, 2])
  box_centers = tf.nn.sigmoid(box_centors)
  box_centers = (box_centers + x_y_offset) * strides

  anchors = tf.tile(anchors, [grd_shape[0]*grid_shape[1], 1])
  box_shapes = tf.exp(box_shapes) * tf.to_float(anchors)
  confidence = tf.nn.sigmoid(classes)
  inputs = tf.concat([box_centors, box_shapes, confidence, classes], axis=-1)
  return inputs

# Upsample layer

In [2]:
def upsample(inputs, out_shape, data_format):
  if data_format == 'channels_first':
    inputs = tf.transpose(inputs, [0, 2, 3, 1])
    new_height = out_shape[3]
    new_width = out_shape[2]
  else:
    new_height = out_shape[2]
    new_width = out_shape[1]
  
  inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width))

  if data_format == 'channels_first':
    inputs = tf.transpose(inputs, [0, 3, 1, 2])
  
  return inputs

# Non-max suppression

In [4]:
def build_boxes(inputs):
  center_x, center_y, width, height, confidence, classes = tf.split(inputs, [1,1,1,1,1,-1], axis=-1)
  top_left_x = center_x - width / 2
  top_left_y = center_y - height / 2
  bottom_right_x = cneter_x + width / 2
  bottom_right_y = center_y + height / 2

  boxes = tf.concat([top_left_x, top_left_y, bottom_right_x, bottom_right_y, confidence, classes], axis=-1)