<a href="https://colab.research.google.com/github/mralamdari/CV-Yolo/blob/main/YOLO_v3_ObjectDetection_TensorFlow2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import PIL
import colorsys
import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
from tensorflow.python.saved_model import tag_constants

In [None]:
os.environ['KAGGLE_CONFIG_DIR'] = '/content/drive/MyDrive'
!kaggle datasets download -d aruchomu/data-for-yolo-v3-kernel
!wget -P model_data https://pjreddie.com/media/files/yolov3.weights
!unzip \*.zip && rm *.zip

In [None]:
def load_yolo_model(input_size, input_classes, class_names):
    
  checkpoint = "./checkpoints/yoloV3_custom"
  yolo = create_yolo_model(class_names, input_size=input_size, classes=input_classes)
  yolo.load_weights(checkpoint)
  return yolo

In [None]:
def create_yolo_model(class_names, classes, input_size=416, channels=3, training=False):
    
    num_classes = len(class_names)
    input_layer  = tf.keras.layers.Input([input_size, input_size, channels])
    convolutional_layers = YOLOv3(input_layer, num_classes)

    output_tensors = []
    for i, conv_layer in enumerate(convolutional_layers):
        pred_tensor = decode(conv_layer, num_classes, i)
        
        if training: 
          output_tensors.append(conv_layer)
        
        output_tensors.append(pred_tensor)

    Yolo = tf.keras.Model(input_layer, output_tensors)

    return Yolo

In [None]:
def upsample(input_layer):
    return tf.keras.layers.UpSampling2D(2)(input_layer)

In [1]:
def convolutional(input_layer, input_dim, output_dim, kernel_size, downsample=False, activate=True, bn=True, activate_type='leaky'):
    if downsample:
        input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
        padding = 'valid'
        strides = 2
    else:
        strides = 1
        padding = 'same'

    conv = tf.keras.layers.Conv2D(filters=output_dim, kernel_size=kernel_size, strides=strides,
                  padding=padding, use_bias=not bn, kernel_regularizer=tf.keras.regularizers.L2(0.0005),
                  kernel_initializer=tf.random_normal_initializer(stddev=0.01),
                  bias_initializer=tf.constant_initializer(0.))(input_layer)
    if bn:
        conv = tf.keras.layers.BatchNormalization()(conv)
    if activate:
      conv = tf.keras.layers.LeakyReLU(alpha=0.1)(conv)
    return conv

In [None]:
def residual_block(x, channels, filter1, filter2, activation='leaky'):
    shortcut = x
    x = convolutional(input_layer=x, input_dim=channels, output_dim=filter1, kernel_size=1, activate_type=activation)
    x = convolutional(input_layer=x, input_dim=filter1, output_dim=2*filter1, kernel_size=3, activate_type=activation)
    residual_layer = shortcut + x
    return residual_layer

In [None]:
input_classes="/content/coco.names"

class_names = {}
with open(input_classes, 'r') as data:
    for ID, name in enumerate(data):
        class_names[ID] = name.strip('\n')

yolo = Load_Yolo_model(input_size=416, input_classes=input_classes, class_names=class_names)

image_path = '/content/dog.jpg'
image_path = '/content/office.jpg'

In [None]:
def darknet53(input_data):
    input_data = convolutional(input_data, (3, 3,  3,  32))
    input_data = convolutional(input_data, (3, 3, 32,  64), downsample=True)

    for i in range(1):
        input_data = residual_block(input_data,  64,  32, 64)

    input_data = convolutional(input_data, (3, 3,  64, 128), downsample=True)

    for i in range(2):
        input_data = residual_block(input_data, 128,  64, 128)

    input_data = convolutional(input_data, (3, 3, 128, 256), downsample=True)

    for i in range(8):
        input_data = residual_block(input_data, 256, 128, 256)

    route_1 = input_data
    input_data = convolutional(input_data, (3, 3, 256, 512), downsample=True)

    for i in range(8):
        input_data = residual_block(input_data, 512, 256, 512)

    route_2 = input_data
    input_data = convolutional(input_data, (3, 3, 512, 1024), downsample=True)

    for i in range(4):
        input_data = residual_block(input_data, 1024, 512, 1024)

    return route_1, route_2, input_data

In [None]:
def YOLOv3(input_layer, NUM_CLASS):
    # After the input layer enters the Darknet-53 network, we get three branches
    route_1, route_2, conv = darknet53(input_layer)
    # See the orange module (DBL) in the figure above, a total of 5 Subconvolution operation
    conv = convolutional(conv, (1, 1, 1024,  512))
    conv = convolutional(conv, (3, 3,  512, 1024))
    conv = convolutional(conv, (1, 1, 1024,  512))
    conv = convolutional(conv, (3, 3,  512, 1024))
    conv = convolutional(conv, (1, 1, 1024,  512))
    conv_lobj_branch = convolutional(conv, (3, 3, 512, 1024))
    
    # conv_lbbox is used to predict large-sized objects , Shape = [None, 13, 13, 255] 
    conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False)

    conv = convolutional(conv, (1, 1,  512,  256))
    # upsample here uses the nearest neighbor interpolation method, which has the advantage that the
    # upsampling process does not need to learn, thereby reducing the network parameter  
    conv = upsample(conv)

    conv = tf.concat([conv, route_2], axis=-1)
    conv = convolutional(conv, (1, 1, 768, 256))
    conv = convolutional(conv, (3, 3, 256, 512))
    conv = convolutional(conv, (1, 1, 512, 256))
    conv = convolutional(conv, (3, 3, 256, 512))
    conv = convolutional(conv, (1, 1, 512, 256))
    conv_mobj_branch = convolutional(conv, (3, 3, 256, 512))

    # conv_mbbox is used to predict medium-sized objects, shape = [None, 26, 26, 255]
    conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)

    conv = convolutional(conv, (1, 1, 256, 128))
    conv = upsample(conv)

    conv = tf.concat([conv, route_1], axis=-1)
    conv = convolutional(conv, (1, 1, 384, 128))
    conv = convolutional(conv, (3, 3, 128, 256))
    conv = convolutional(conv, (1, 1, 256, 128))
    conv = convolutional(conv, (3, 3, 128, 256))
    conv = convolutional(conv, (1, 1, 256, 128))
    conv_sobj_branch = convolutional(conv, (3, 3, 128, 256))
    
    # conv_sbbox is used to predict small size objects, shape = [None, 52, 52, 255]
    conv_sbbox = convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)
        
    return [conv_sbbox, conv_mbbox, conv_lbbox]