<a href="https://colab.research.google.com/github/mralamdari/CV-Yolo/blob/main/YOLO_v3_ObjectDetection_TensorFlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import os
import PIL
import cv2
import numpy as pd
import pandas as pd
import tensorflow as tf
from seaborn import color_palette
from IPython.display import display

In [6]:
os.environ['KAGGLE_CONFIG_DIR'] = '/content/drive/MyDrive'
!kaggle datasets download -d aruchomu/data-for-yolo-v3-kernel
!unzip \*.zip && rm *.zip

Downloading data-for-yolo-v3-kernel.zip to /content
 99% 265M/267M [00:04<00:00, 96.9MB/s]
100% 267M/267M [00:04<00:00, 64.7MB/s]
Archive:  data-for-yolo-v3-kernel.zip
  inflating: coco.names              
  inflating: detections.gif          
  inflating: dog.jpg                 
  inflating: futur.ttf               
  inflating: office.jpg              
  inflating: yolov3.weights          


In [9]:
_BATCH_NORM_DECAY = 0.9
_BATCH_NORM_EPSILON = 1e-5
_LEAKY_RELU = 0.1
_ANCHORS = [(10, 13), (16, 30), (33, 23),
            (30, 61), (62, 45), (59, 119),
            (116, 90), (156, 198), (373, 326)]

_MODEL_SIZE = (416, 416)

In [17]:
def batch_norm(inputs, training, data_format):
    return tf.layers.batch_normalization(inputs = inputs, 
                                       axis=1 if data_format == 'channels_first' else 3,
                                       momentum=_BATCH_NORM_DECAY,
                                       epsilon=_BATCH_NORM_EPSILON,
                                       scale=True,
                                       training=training)

In [16]:
def fixed_padding(inputs, kernel_size, data_format):
    pad_total = kernel_size - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg

    if data_format == 'channels_first':
      padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
                                      [pad_beg, pad_end],
                                       [pad_beg, pad_end]])  
    else:
      padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
                                       [pad_beg, pad_end], [0, 0]])
    return padded_inputs

In [15]:
def conv2d_fixed_padding(inputs, filters, kernel_size, data_format, strides=1):
    if strides > 1:
      inputs = fixed_padding(inputs, kernel_size, data_format)
      return tf.layers.conv2d(inputs=inputs, 
                              filters=filters,
                              kernel_size=kernel_size,
                              strides=strides,
                              padding=('SAME' if strides == 1 else 'VALID'),
                              use_bias=False,
                              data_format=data_format)

# Feature extraction: Darknet-53


In [14]:
def darknet53_residual_block(inputs, filters, training, data_format, strides=1):
  shortcut = inputs
  inputs = conv2d_fixed_padding(inputs,
                               filters=filters,
                               kernel_size=1,
                               strides=strides,
                               data_format=data_format)
  
  inputs = batch_norm(inputs,
                      training=training, 
                      data_format=data_format)
  
  inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  inputs = conv2d_fixed_padding(inputs,
                                filters=2 * filters,
                                kernel_size=3,
                                strids=strides,
                                data_format=data_format)
  
  inputs = batch_norm(inputs, 
                      training=training, 
                      data_format=data_format)
  
  inputs = tf.nn.leaky_relu(inputs, alpha=_LEAKY_RELU)

  inputs += shortcut

  return inputs