<a href="https://colab.research.google.com/github/mralamdari/CV-Yolo/blob/main/Yolo_TensorFlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import PIL
import colorsys
import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
from tensorflow.python.saved_model import tag_constants

In [None]:
os.environ['KAGGLE_CONFIG_DIR'] = '/content/drive/MyDrive'
!kaggle datasets download -d aruchomu/data-for-yolo-v3-kernel
!unzip \*.zip && rm *.zip

# Load The Yolo Model

choose between yolo frameworks between; tf and trt


and yolo types between;
yolov3
yolov3-tiny
yolov4
yolov4-tiny


 and decide if you need custom weights 

In [None]:
def load_yolo_model(yolo_framework, yolo_type, yolo_costom_weights, input_size, input_classes, class_names):
    
    physical_devices = tf.config.list_physical_devices('GPU')
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except:
        pass

    if yolo_framework == "tf": # TensorFlow detection framework
        
        if yolo_costom_weights:
            checkpoint = f"./checkpoints/{yolo_type}_custom"
            print(f"Loading custom weights from: {checkpoint}")
            yolo = create_yolo_model(yolo_type, class_names, input_size=input_size, classes=input_classes)
            yolo.load_weights(checkpoint)
        else:
            Darknet_weights = f'model_data/{yolo_type}.weights'
            print(f"Loading Darknet_weights from: {Darknet_weights}")
            yolo = create_yolo_model(yolo_type, class_names, input_size=input_size, classes=input_classes)
            load_yolo_weights(yolo, Darknet_weights) # use Darknet weights
        
    elif yolo_framework == "trt": # TensorRT detection framework
        saved_model_loaded = tf.saved_model.load(yolo_costom_weights, tags=[tag_constants.SERVING])
        signature_keys = list(saved_model_loaded.signatures.keys())
        yolo = saved_model_loaded.signatures['serving_default']

    return yolo 

# Create Yolo Model


In [None]:
def create_yolo_model(yolo_type, class_names, classes, input_size=416, channels=3, training=False):
    
    num_classes = len(class_names)
    input_layer  = tf.keras.layers.Input([input_size, input_size, channels])

    if yolo_type[-4:] == 'tiny':
        if yolo_type == "yolov4":
            convolutional_layers = YOLOv4_tiny(input_layer, num_classes)
        if yolo_type == "yolov3":
            convolutional_layers = YOLOv3_tiny(input_layer, num_classes)
    else:
        if yolo_type == "yolov4":
            convolutional_layers = YOLOv4(input_layer, num_classes)
        if yolo_type == "yolov3":
            convolutional_layers = YOLOv3(input_layer, num_classes)

    output_tensors = []
    for i, conv_layer in enumerate(convolutional_layers):
        pred_tensor = decode(conv_layer, num_classes, i)
        
        if training: 
          output_tensors.append(conv_layer)
        
        output_tensors.append(pred_tensor)

    Yolo = tf.keras.Model(input_layer, output_tensors)

    return Yolo

# Up Sample

resize the batch of images' height and weidth

    # shape=(None, 13, 13, 256)    ===>   (None, 26, 26, 256)
    # shape=(None, 26, 26, 128)    ===>   (None, 56, 56, 128)

In [None]:
def upsample(input_layer):
    return tf.keras.layers.UpSampling2D(2)(input_layer)
    # return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest')

# Convolutional Layer

In [None]:
def convolutional(input_layer, input_dim, output_dim, kernel_size, downsample=False, activate=True, bn=True, activate_type='leaky'):
    if downsample:
        input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
        padding = 'valid'
        strides = 2
    else:
        strides = 1
        padding = 'same'

    conv = tf.keras.layers.Conv2d(filters=output_dim,
                                  kernel_size=kernel_size,
                                  strides=strides,
                                  padding=padding,
                                  use_bias=not bn,
                                  kernel_regularizer=tf.keras.regularizers.L2(0.0005),
                                  kernel_initializer=tf.random_normal_initializer(stddev=0.01),
                                  bias_initializer=tf.constant_initializer(0.))(input_layer)  

    if bn: # BatchNormalization
        conv = tf.keras.layers.BatchNormalization()(conv)
    if activate == True: # Activation
        if activate_type == "leaky":
            conv = tf.keras.layers.LeakyReLU(alpha=0.1)(conv)
        elif activate_type == "mish":
          conv = tf.math.softplus(conv)
          conv = conv * tf.math.tanh(conv)

    return conv 

# Residual Block

this blocks uses 2 convolutional layers with different kernels and filters, but at last, their output's and input's dimention are same so we can concatenate them and prevent the model from loosing the details in lower layers.


In [None]:
def residual_block(x, channels, filter1, filter2, activation='leaky'):
    shortcut = x
    x = convolutional(x, channels,filter1, 1, activate_type=activation)
    x = convolutional(x, filter1, filter2, 3, activate_type=activation)

    residual_layer = shortcut + x
    return residual_layer

# Yolo V3

In [None]:
!wget -P model_data https://pjreddie.com/media/files/yolov3.weights

## DarkNet 53

it returns 3 branches to the yolo model

In [None]:
def darknet53(input_data):
    input_data = convolutional(input_data, 3, 32, 3)
    input_data = convolutional(input_data, 32, 64, 3, downsample=True)

    for i in range(1):
        input_data = residual_block(input_data,  64, 32, 64)

    input_data = convolutional(input_data, 64, 128, 3, downsample=True)

    for i in range(2):
        input_data = residual_block(input_data, 128, 64, 128)

    input_data = convolutional(input_data, 128, 256, 3, downsample=True)

    for i in range(8):
        input_data = residual_block(input_data, 256, 128, 256)

    route_1 = input_data
    input_data = convolutional(input_data, 256, 512, 3, downsample=True)

    for i in range(8):
        input_data = residual_block(input_data, 512, 256, 512)

    route_2 = input_data
    input_data = convolutional(input_data, 512, 1024, 3, downsample=True)

    for i in range(4):
        input_data = residual_block(input_data, 1024, 512, 1024)

    return route_1, route_2, input_data

## Yolov3 model

it gets the results from the Darknet-53 bloack then predicts the pictures in 3 scales

In [None]:
def YOLOv3(input_layer, classes_count):
    route_1, route_2, conv = darknet53(input_layer)

    conv = convolutional(conv, 1024, 512, 1)
    conv = convolutional(conv, 512, 1024, 3)
    conv = convolutional(conv, 1024, 512, 1)
    conv = convolutional(conv, 512, 1024, 3)
    conv = convolutional(conv, 1024, 512, 1)
    conv_lobj_branch = convolutional(conv, 512, 1024, 3)

    # convolution_lbbox is used to predict large-sized objects , Shape = [None, 13, 13, 255]     
    convolution_lbbox = convolutional(conv_lobj_branch, 1024, 3*(classes_count + 5), 1, activate=False, bn=False)

    conv = convolutional(conv, 512,  256, 1)
    # upsample here uses the "nearest neighbor interpolation" method, which has the advantage that the
    # upsampling process does not need to learn, thereby reducing the network parameter  
    conv = upsample(conv)

    conv = tf.concat([conv, route_2], axis=-1)

    conv = convolutional(conv, 768, 256, 1)
    conv = convolutional(conv, 256, 512, 3)
    conv = convolutional(conv, 512, 256, 1)
    conv = convolutional(conv, 256, 512, 3)
    conv = convolutional(conv, 512, 256, 1)
    conv_mobj_branch = convolutional(conv, 256, 512, 3)

    # convolution_mbbox is used to predict medium-sized objects, shape = [None, 26, 26, 255]
    convolution_mbbox = convolutional(conv_mobj_branch, 512, 3*(classes_count + 5), 1, activate=False, bn=False)

    conv = convolutional(conv, 256, 128, 1)
    conv = upsample(conv)

    conv = tf.concat([conv, route_1], axis=-1)
    conv = convolutional(conv, 384, 128, 1)
    conv = convolutional(conv, 128, 256, 3)
    conv = convolutional(conv, 256, 128, 1)
    conv = convolutional(conv, 128, 256, 3)
    conv = convolutional(conv, 256, 128, 1)
    conv_sobj_branch = convolutional(conv, 128, 256, 3)

    # conv_sbbox is used to predict small size objects, shape = [None, 52, 52, 255]
    conv_sbbox = convolutional(conv_sobj_branch, 256, 3*(classes_count +5), 1, activate=False, bn=False)
        
    return [conv_sbbox, convolution_mbbox, convolution_lbbox]

# Yolo V3 Tiny

In [None]:
!wget -P model_data https://pjreddie.com/media/files/yolov3-tiny.weights

## DarkNet19_tiny

In [None]:
def darknet19_tiny(input_data):
    input_data = convolutional(input_data, 3, 16, 3)
    input_data = tf.keras.layers.MaxPool2D(2, 2, 'same')(input_data)
    input_data = convolutional(input_data, 16, 32, 3)
    input_data = tf.keras.layers.MaxPool2D(2, 2, 'same')(input_data)
    input_data = convolutional(input_data, 32, 64, 3)
    input_data = tf.keras.layers.MaxPool2D(2, 2, 'same')(input_data)
    input_data = convolutional(input_data, 64, 128, 3)
    input_data = tf.keras.layers.MaxPool2D(2, 2, 'same')(input_data)
    input_data = convolutional(input_data, 128, 256, 3)
    route_1 = input_data
    input_data = tf.keras.layers.MaxPool2D(2, 2, 'same')(input_data)
    input_data = convolutional(input_data, 256, 512, 3)
    input_data = tf.keras.layers.MaxPool2D(2, 1, 'same')(input_data)
    input_data = convolutional(input_data, 512, 1024, 3)

    return route_1, input_data

## Yolov3-Tiny model


In [None]:
def YOLOv3_tiny(input_layer, NUM_CLASS):
    # After the input layer enters the Darknet-19 network, we get two branches
    route_1, conv = darknet19_tiny(input_layer)

    conv = convolutional(conv, 1024, 256, 1)
    conv_lobj_branch = convolutional(conv, 256, 512, 3)
    
    # conv_lbbox is used to predict large-sized objects , Shape = [None, 26, 26, 255]
    conv_lbbox = convolutional(conv_lobj_branch, 512, 3*(NUM_CLASS + 5), 1, activate=False, bn=False)

    conv = convolutional(conv, 256, 128, 1)
    # upsample here uses the nearest neighbor interpolation method, which has the advantage that the
    # upsampling process does not need to learn, thereby reducing the network parameter  
    conv = upsample(conv)
    
    conv = tf.concat([conv, route_1], axis=-1)
    conv_mobj_branch = convolutional(conv, 128, 256, 3)
    # conv_mbbox is used to predict medium size objects, shape = [None, 13, 13, 255]
    conv_mbbox = convolutional(conv_mobj_branch, 256, 3 * (NUM_CLASS + 5), 1, activate=False, bn=False)

    return [conv_mbbox, conv_lbbox]

# Yolo V4

In [None]:
!wget -P model_data https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights

In [None]:
def cspdarknet53(input_data):
    input_data = convolutional(input_data, 3,  32, 3, activate_type="mish")
    input_data = convolutional(input_data, 32, 64, 3, downsample=True, activate_type="mish")

    route = input_data
    route = convolutional(route, 64, 64, 1, activate_type="mish")
    input_data = convolutional(input_data, 64, 64, 1, activate_type="mish")

    for i in range(1):
        input_data = residual_block(input_data,  64,  32, 64, activate_type="mish")

    input_data = convolutional(input_data, 64, 64, 1, activate_type="mish")

    input_data = tf.concat([input_data, route], axis=-1)
    input_data = convolutional(input_data, 128, 64, 1, activate_type="mish")
    input_data = convolutional(input_data, 64, 128, 3, downsample=True, activate_type="mish")
    route = input_data

    route = convolutional(route, 128, 64, 1, activate_type="mish")
    input_data = convolutional(input_data, 128, 64, 1, activate_type="mish")

    for i in range(2):
        input_data = residual_block(input_data, 64,  64, 64, activate_type="mish")

    input_data = convolutional(input_data, 64, 64, 1, activate_type="mish")
    input_data = tf.concat([input_data, route], axis=-1)

    input_data = convolutional(input_data, 128, 128, 1, activate_type="mish")
    input_data = convolutional(input_data, 128, 256, 3, downsample=True, activate_type="mish")
    route = input_data

    route = convolutional(route, 256, 128, 1, activate_type="mish")
    input_data = convolutional(input_data, 256, 128, 1, activate_type="mish")

    for i in range(8):
        input_data = residual_block(input_data, 128, 128, 128, activate_type="mish")

    input_data = convolutional(input_data, 128, 128, 1, activate_type="mish")
    input_data = tf.concat([input_data, route], axis=-1)

    input_data = convolutional(input_data, 256, 256, 1, activate_type="mish")
    route_1 = input_data

    input_data = convolutional(input_data, 256, 512, 3, downsample=True, activate_type="mish")
    route = input_data

    route = convolutional(route, 512, 256, 1, activate_type="mish")
    input_data = convolutional(input_data, 512, 256, 1, activate_type="mish")

    for i in range(8):
        input_data = residual_block(input_data, 256, 256, 256, activate_type="mish")

    input_data = convolutional(input_data, 256, 256, 1, activate_type="mish")
    input_data = tf.concat([input_data, route], axis=-1)

    input_data = convolutional(input_data, 512, 512, 1, activate_type="mish")
    route_2 = input_data

    input_data = convolutional(input_data, 512, 1024, 3, downsample=True, activate_type="mish")
    route = input_data

    route = convolutional(route, 1024, 512, 1, activate_type="mish")
    input_data = convolutional(input_data, 1024, 512, 1, activate_type="mish")

    for i in range(4):
        input_data = residual_block(input_data, 512, 512, 512, activate_type="mish")
        
    input_data = convolutional(input_data, 512, 512, 1, activate_type="mish")
    input_data = tf.concat([input_data, route], axis=-1)

    input_data = convolutional(input_data, 1024, 1024, 1, activate_type="mish")
    input_data = convolutional(input_data, 1024, 512, 1)
    input_data = convolutional(input_data, 512, 1024, 3)
    input_data = convolutional(input_data, 1024, 512, 1)

    max_pooling_1 = tf.keras.layers.MaxPool2D(pool_size=13, padding='SAME', strides=1)(input_data)
    max_pooling_2 = tf.keras.layers.MaxPool2D(pool_size=9, padding='SAME', strides=1)(input_data)
    max_pooling_3 = tf.keras.layers.MaxPool2D(pool_size=5, padding='SAME', strides=1)(input_data)
    input_data = tf.concat([max_pooling_1, max_pooling_2, max_pooling_3, input_data], axis=-1)

    input_data = convolutional(input_data, 2048, 512, 1)
    input_data = convolutional(input_data, 512, 1024, 3)
    input_data = convolutional(input_data, 1024, 512, 1)

    return route_1, route_2, input_data

In [None]:
def YOLOv4(input_layer, NUM_CLASS):
    route_1, route_2, conv = cspdarknet53(input_layer)

    route = conv
    conv = convolutional(conv, 512, 256, 1)
    conv = upsample(conv)
    route_2 = convolutional(route_2, 512, 256, 1)
    conv = tf.concat([route_2, conv], axis=-1)

    conv = convolutional(conv, 512, 256, 1)
    conv = convolutional(conv, 256, 512, 1)
    conv = convolutional(conv, 512, 256, 1)
    conv = convolutional(conv, 256, 512, 1)
    conv = convolutional(conv, 512, 256, 1)

    route_2 = conv
    conv = convolutional(conv, 256, 128, 1)
    conv = upsample(conv)
    route_1 = convolutional(route_1, 256, 128, 1)
    conv = tf.concat([route_1, conv], axis=-1)

    conv = convolutional(conv, 256, 128, 1)
    conv = convolutional(conv, 128, 256, 3)
    conv = convolutional(conv, 256, 128, 1)
    conv = convolutional(conv, 128, 256, 3)
    conv = convolutional(conv, 256, 128, 1)

    route_1 = conv
    conv = convolutional(conv, 128, 256, 3)
    conv_sbbox = convolutional(conv, 256, 3 * (NUM_CLASS + 5), 1, activate=False, bn=False)

    conv = convolutional(route_1, (3, 3, 128, 256), downsample=True)
    conv = tf.concat([conv, route_2], axis=-1)

    conv = convolutional(conv, 512, 256, 1)
    conv = convolutional(conv, 256, 512, 1)
    conv = convolutional(conv, 512, 256, 1)
    conv = convolutional(conv, 256, 512, 1)
    conv = convolutional(conv, 512, 256, 1)

    route_2 = conv
    conv = convolutional(conv, 256, 512, 1)
    conv_mbbox = convolutional(conv, 512, 3 * (NUM_CLASS + 5), 1, activate=False, bn=False)

    conv = convolutional(route_2, 256, 512, 3, downsample=True)
    conv = tf.concat([conv, route], axis=-1)

    conv = convolutional(conv, 1024, 512, 1)
    conv = convolutional(conv, 512, 1024, 3)
    conv = convolutional(conv, 1024, 512, 1)
    conv = convolutional(conv, 512, 1024, 3)
    conv = convolutional(conv, 1024, 512, 1)

    conv = convolutional(conv, 512, 1024, 3)
    conv_lbbox = convolutional(conv, 1024, 3 * (NUM_CLASS + 5), 1, activate=False, bn=False)

    return [conv_sbbox, conv_mbbox, conv_lbbox]

# Yolo V4 Tiny

In [None]:
!wget -P model_data https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights

In [None]:
def route_group(input_layer, groups, group_id):
    convs = tf.split(input_layer, num_or_size_splits=groups, axis=-1)
    return convs[group_id]

In [None]:
def cspdarknet53_tiny(input_data): # not sure how this should be called
    input_data = convolutional(input_data, (3, 3, 3, 32), downsample=True)
    input_data = convolutional(input_data, (3, 3, 32, 64), downsample=True)
    input_data = convolutional(input_data, (3, 3, 64, 64))

    route = input_data
    input_data = route_group(input_data, 2, 1)
    input_data = convolutional(input_data, (3, 3, 32, 32))
    route_1 = input_data
    input_data = convolutional(input_data, (3, 3, 32, 32))
    input_data = tf.concat([input_data, route_1], axis=-1)
    input_data = convolutional(input_data, (1, 1, 32, 64))
    input_data = tf.concat([route, input_data], axis=-1)
    input_data = tf.keras.layers.MaxPool2D(2, 2, 'same')(input_data)

    input_data = convolutional(input_data, (3, 3, 64, 128))
    route = input_data
    input_data = route_group(input_data, 2, 1)
    input_data = convolutional(input_data, (3, 3, 64, 64))
    route_1 = input_data
    input_data = convolutional(input_data, (3, 3, 64, 64))
    input_data = tf.concat([input_data, route_1], axis=-1)
    input_data = convolutional(input_data, (1, 1, 64, 128))
    input_data = tf.concat([route, input_data], axis=-1)
    input_data = tf.keras.layers.MaxPool2D(2, 2, 'same')(input_data)

    input_data = convolutional(input_data, (3, 3, 128, 256))
    route = input_data
    input_data = route_group(input_data, 2, 1)
    input_data = convolutional(input_data, (3, 3, 128, 128))
    route_1 = input_data
    input_data = convolutional(input_data, (3, 3, 128, 128))
    input_data = tf.concat([input_data, route_1], axis=-1)
    input_data = convolutional(input_data, (1, 1, 128, 256))
    route_1 = input_data
    input_data = tf.concat([route, input_data], axis=-1)
    input_data = tf.keras.layers.MaxPool2D(2, 2, 'same')(input_data)

    input_data = convolutional(input_data, (3, 3, 512, 512))

    return route_1, input_data

In [None]:
def YOLOv4_tiny(input_layer, NUM_CLASS):
    route_1, conv = cspdarknet53_tiny(input_layer)

    conv = convolutional(conv, (1, 1, 512, 256))

    conv_lobj_branch = convolutional(conv, (3, 3, 256, 512))
    conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 512, 3 * (NUM_CLASS + 5)), activate=False, bn=False)

    conv = convolutional(conv, (1, 1, 256, 128))
    conv = upsample(conv)
    conv = tf.concat([conv, route_1], axis=-1)

    conv_mobj_branch = convolutional(conv, (3, 3, 128, 256))
    conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 256, 3 * (NUM_CLASS + 5)), activate=False, bn=False)

    return [conv_mbbox, conv_lbbox]

#Boxes

In [None]:
# yolo_framework    ====> 'tf', 'trt'   
# yolo_type         ====> yolov3, yolov4, yolov3-tiny, yolov4-tiny
# input_classes     ====> "mnist/mnist.names", "/content/coco.names"

input_classes="/content/coco.names"

class_names = {}
with open(input_classes, 'r') as data:
    for ID, name in enumerate(data):
        class_names[ID] = name.strip('\n')

yolo = Load_Yolo_model(yolo_framework='tf',
                       yolo_type='yolov3',
                       yolo_costom_weights=False,
                       input_size=416,
                       input_classes=input_classes, 
                       class_names=class_names)

image_path = '/content/dog.jpg'
image_path = '/content/office.jpg'