<a href="https://colab.research.google.com/github/mralamdari/ComputerVision_with_ImageProcessing/blob/main/YoloV3_with_TensorFlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import PIL
import colorsys
import scipy.io
import scipy.misc
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
from tensorflow.python.saved_model import tag_constants

In [None]:
os.environ['KAGGLE_CONFIG_DIR'] = '/content/drive/MyDrive'
!kaggle datasets download -d aruchomu/data-for-yolo-v3-kernel
!unzip \*.zip && rm *.zip

# Load The Yolo Model

choose between yolo frameworks between; tf and trt


and yolo types between;
yolov3
yolov3-tiny
yolov4
yolov4-tiny


 and decide if you need custom weights 

In [None]:
def load_yolo_model(yolo_costom_weights, input_size, input_classes, class_names):
    
    physical_devices = tf.config.list_physical_devices('GPU')
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except:
        pass

    if yolo_costom_weights:
        checkpoint = f"./checkpoints/yolov3_custom"
        print(f"Loading custom weights from: {checkpoint}")
        yolo = create_yolo_model(class_names, input_size=input_size, classes=input_classes)
        yolo.load_weights(checkpoint)
    else:
        Darknet_weights = f'model_data/yolov3.weights'
        print(f"Loading Darknet_weights from: {Darknet_weights}")
        yolo = create_yolo_model(class_names, input_size=input_size, classes=input_classes)
        load_yolo_weights(yolo, Darknet_weights) # use Darknet weights
        
    return yolo          

# Load The Yolo Weights

In [None]:
def load_yolo_weights(model, weights_file):
    tf.keras.backend.clear_session() # used to reset layer names
    range1 = 75
    range2 = [58, 66, 74]
    
    with open(weights_file, 'rb') as wf:
        major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)

        j = 0
        for i in range(range1):
            conv_layer_name = 'conv2d' if i == 0 else f'conv2d_{i}'
            bn_layer_name   = 'batch_normalization' if j == 0 else f'batch_normalization_{j}'
            
            conv_layer = model.get_layer(conv_layer_name)
            
            filters = conv_layer.filters
            k_size = conv_layer.kernel_size[0]
            in_dim = conv_layer.input_shape[-1]

            if i not in range2:
                # darknet weights: [beta, gamma, mean, variance]
                bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
                # tf weights: [gamma, beta, mean, variance]
                bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
                bn_layer = model.get_layer(bn_layer_name)
                j += 1
            else:
                conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)

            # darknet shape (out_dim, in_dim, height, width)
            conv_shape = (filters, in_dim, k_size, k_size)
            # filters * in_dim * k_size * k_size
            conv_count = np.product(conv_shape)    
            conv_weights = np.fromfile(wf, dtype=np.float32, count=conv_count)
            # tf shape (height, width, in_dim, out_dim)
            conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
            # conv_weights = conv_weights.reshape(conv_shape[::-1])  # why this doesn't works?


            if i not in range2:
                conv_layer.set_weights([conv_weights])
                bn_layer.set_weights(bn_weights)
            else:
                conv_layer.set_weights([conv_weights, conv_bias])

        assert len(wf.read()) == 0, 'failed to read all data'


# Create Yolo Model


In [None]:
def create_yolo_model(class_names, classes, input_size=416, channels=3, training=False):
    
    num_classes = len(class_names)
    input_layer  = tf.keras.layers.Input([input_size, input_size, channels])  

    yolo_anchors  = [[[10,  13], [16,   30], [33,   23]],
                      [[30,  61], [62,   45], [59,  119]],
                      [[116, 90], [156, 198], [373, 326]]] 
    convolutional_layers = YOLOv3(input_layer, num_classes)


    anchors = (np.array(yolo_anchors).T / strides).T
    output_tensors = []
    for i, conv_layer in enumerate(convolutional_layers):
        pred_tensor = decode(conv_layer, num_classes, i, strides, anchors)

        
        if training: 
          output_tensors.append(conv_layer)
        
        output_tensors.append(pred_tensor)

    Yolo = tf.keras.Model(input_layer, output_tensors)

    return Yolo    

# Up Sample

resize the batch of images' height and weidth

    # shape=(None, 13, 13, 256)    ===>   (None, 26, 26, 256)
    # shape=(None, 26, 26, 128)    ===>   (None, 56, 56, 128)

In [None]:
def upsample(input_layer):
    return tf.keras.layers.UpSampling2D(2)(input_layer)

# Convolutional Layer

In [None]:
def convolutional(input_layer, input_dim, output_dim, kernel_size, downsample=False, activate=True, bn=True, activate_type='leaky'):
    if downsample:
        input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
        padding = 'valid'
        strides = 2
    else:
        strides = 1
        padding = 'same'

    conv = tf.keras.layers.Conv2D(filters=output_dim,
                                  kernel_size=kernel_size,
                                  strides=strides,
                                  padding=padding,
                                  use_bias=not bn,
                                  kernel_regularizer=tf.keras.regularizers.L2(0.0005),
                                  kernel_initializer=tf.random_normal_initializer(stddev=0.01),
                                  bias_initializer=tf.constant_initializer(0.))(input_layer)  

    if bn: # BatchNormalization
        conv = tf.keras.layers.BatchNormalization()(conv)
    if activate == True: # Activation
        if activate_type == "leaky":
            conv = tf.keras.layers.LeakyReLU(alpha=0.1)(conv)
        elif activate_type == "mish":
          conv = tf.math.softplus(conv)
          conv = conv * tf.math.tanh(conv)

    return conv 

# Residual Block

this blocks uses 2 convolutional layers with different kernels and filters, but at last, their output's and input's dimention are same so we can concatenate them and prevent the model from loosing the details in lower layers.


In [None]:
def residual_block(x, channels, filter1, filter2, activate_type='leaky'):
    shortcut = x
    x = convolutional(x, channels,filter1, 1, activate_type=activate_type)
    x = convolutional(x, filter1, filter2, 3, activate_type=activate_type)

    residual_layer = shortcut + x
    return residual_layer

# Yolo V3

In [None]:
!wget -P model_data https://pjreddie.com/media/files/yolov3.weights

## DarkNet 53

it returns 3 branches to the yolo model

In [None]:
def darknet53(input_data):
    input_data = convolutional(input_data, 3, 32, 3)
    input_data = convolutional(input_data, 32, 64, 3, downsample=True)

    for i in range(1):
        input_data = residual_block(input_data,  64, 32, 64)

    input_data = convolutional(input_data, 64, 128, 3, downsample=True)

    for i in range(2):
        input_data = residual_block(input_data, 128, 64, 128)

    input_data = convolutional(input_data, 128, 256, 3, downsample=True)

    for i in range(8):
        input_data = residual_block(input_data, 256, 128, 256)

    route_1 = input_data
    input_data = convolutional(input_data, 256, 512, 3, downsample=True)

    for i in range(8):
        input_data = residual_block(input_data, 512, 256, 512)

    route_2 = input_data
    input_data = convolutional(input_data, 512, 1024, 3, downsample=True)

    for i in range(4):
        input_data = residual_block(input_data, 1024, 512, 1024)

    return route_1, route_2, input_data

#### Yolov3 model it gets the results from the Darknet-53 bloack then predicts the pictures in 3 scales

In [None]:
def YOLOv3(input_layer, classes_count):
    route_1, route_2, conv = darknet53(input_layer)

    conv = convolutional(conv, 1024, 512, 1)
    conv = convolutional(conv, 512, 1024, 3)
    conv = convolutional(conv, 1024, 512, 1)
    conv = convolutional(conv, 512, 1024, 3)
    conv = convolutional(conv, 1024, 512, 1)
    conv_lobj_branch = convolutional(conv, 512, 1024, 3)

    # convolution_lbbox is used to predict large-sized objects , Shape = [None, 13, 13, 255]     
    convolution_lbbox = convolutional(conv_lobj_branch, 1024, 3*(classes_count + 5), 1, activate_type=False, bn=False)

    conv = convolutional(conv, 512,  256, 1)
    # upsample here uses the "nearest neighbor interpolation" method, which has the advantage that the
    # upsampling process does not need to learn, thereby reducing the network parameter  
    conv = upsample(conv)

    conv = tf.concat([conv, route_2], axis=-1)

    conv = convolutional(conv, 768, 256, 1)
    conv = convolutional(conv, 256, 512, 3)
    conv = convolutional(conv, 512, 256, 1)
    conv = convolutional(conv, 256, 512, 3)
    conv = convolutional(conv, 512, 256, 1)
    conv_mobj_branch = convolutional(conv, 256, 512, 3)

    # convolution_mbbox is used to predict medium-sized objects, shape = [None, 26, 26, 255]
    convolution_mbbox = convolutional(conv_mobj_branch, 512, 3*(classes_count + 5), 1, activate_type=False, bn=False)

    conv = convolutional(conv, 256, 128, 1)
    conv = upsample(conv)

    conv = tf.concat([conv, route_1], axis=-1)
    conv = convolutional(conv, 384, 128, 1)
    conv = convolutional(conv, 128, 256, 3)
    conv = convolutional(conv, 256, 128, 1)
    conv = convolutional(conv, 128, 256, 3)
    conv = convolutional(conv, 256, 128, 1)
    conv_sobj_branch = convolutional(conv, 128, 256, 3)

    # conv_sbbox is used to predict small size objects, shape = [None, 52, 52, 255]
    conv_sbbox = convolutional(conv_sobj_branch, 256, 3*(classes_count +5), 1, activate_type=False, bn=False)
        
    return [conv_sbbox, convolution_mbbox, convolution_lbbox]

# Decoder

In [None]:
def decode(conv_output, classes_count, i, strides, anchors):

    # where i = 0, 1 or 2 to correspond to the three grid scales  
    conv_shape       = tf.shape(conv_output)
    batch_size       = conv_shape[0]
    output_size      = conv_shape[1]

    conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + classes_count))
    conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (2, 2, 1, classes_count), axis=-1)

    xy_grid = tf.meshgrid(tf.range(output_size), tf.range(output_size))
    xy_grid = tf.expand_dims(tf.stack(xy_grid, axis=-1), axis=2)  # [gx, gy, 1, 2]
    xy_grid = tf.tile(tf.expand_dims(xy_grid, axis=0), [batch_size, 1, 1, 3, 1])
    xy_grid = tf.cast(xy_grid, tf.float32)
    
    # Calculate the center position of the prediction box:
    pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * strides[i]
    # Calculate the length and width of the prediction box:
    pred_wh = (tf.exp(conv_raw_dwdh) * anchors[i]) * strides[i]

    pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
    pred_conf = tf.sigmoid(conv_raw_conf) # object box calculates the predicted confidence
    pred_prob = tf.sigmoid(conv_raw_prob) # calculating the predicted probability category box object

    # calculating the predicted probability category box object
    return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)

In [None]:
def detect_image(input_type, Yolo, image_path, class_names,classes, input_size=416, show=False, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', yolo_framework='tf'):
    if input_type == 'image':
      original_image      = cv2.imread(image_path)
      original_image      = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
      original_image      = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

      image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
      image_data = np.expand_dims(image_data, 0).astype(np.float32)


      if yolo_framework == "tf":
          pred_bbox = Yolo.predict(image_data)
      elif yolo_framework == "trt":
          batched_input = tf.constant(image_data)
          result = Yolo(batched_input)
          pred_bbox = []
          for key, value in result.items():
              value = value.numpy()
              pred_bbox.append(value)

      pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
      pred_bbox = tf.concat(pred_bbox, axis=0)
      
      bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold)
      bboxes = nms(bboxes, iou_threshold, method='nms')

      image = draw_bbox(original_image, bboxes, class_names, classes=classes, rectangle_colors=rectangle_colors)

      output_path = f'{image_path[:-4]}_pred.jpg'
      cv2.imwrite(output_path, image)
      if show:
          cv2.imshow('', image)
          cv2.waitKey(0)
          cv2.destroyAllWindows()


    elif input_type == 'video':
      win_name = 'Video Detection'
      cap = cv2.VideoCapture(image_path)
      frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
      fourcc = cv2.VideoWriter_fourcc(*'X264')
      fps = cap.get(cv2.CAP_PROP_FPS)
      out = cv2.VideoWriter('/results.gif', fourcc, fps, (int(frame_size[0]), int(frame_size[1])))
      while True:
        ret, fram = cap.read()
        if not ret:
          break

        original_image = fram

        image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
        image_data = np.expand_dims(image_data, 0).astype(np.float32)

        pred_bbox = Yolo.predict(image_data)
        
        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)
        
        bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        image = draw_bbox(original_image, bboxes, class_names, classes=classes, rectangle_colors=rectangle_colors)
        cv2.imshow('', image)
        out.write(image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
      cv2.destroyAllWindows()
      out.release()
      cap.release()

      
    elif input_type == 'webcam':
      win_name = 'WebCam Detection'
      cap = cv2.VideoCapture(1)
      frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
      fourcc = cv2.VideoWriter_fourcc(*'X264')
      fps = cap.get(cv2.CAP_PROP_FPS)
      out = cv2.VideoWriter('/webcam_results.mp4', fourcc, fps, (int(frame_size[0]), int(frame_size[1])))
      while True:
        ret, fram = cap.read()
        if not ret:
          break

        original_image = fram
        image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
        image_data = np.expand_dims(image_data, 0).astype(np.float32)

        pred_bbox = Yolo.predict(image_data)
        
        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)
        
        bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        image = draw_bbox(original_image, bboxes, class_names, classes=classes, rectangle_colors=rectangle_colors)
        
        cv2.imshow('', image)
        out.write(image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
    out.release()
    cap.release()

In [None]:
def image_preprocess(image, target_size, gt_boxes=None):
    target_height, target_width  = target_size
    h,  w, _  = image.shape

    scale = min(target_width / w, target_height / h)
    scaled_width, scaled_height  = int(scale * w), int(scale * h)

    image_resized = cv2.resize(image, (scaled_width, scaled_height))
    
    image_paded = np.full(shape=[target_height, target_width, 3], fill_value=127.5)# fill_value = 255/2 == 127.5

    offset_width, offset_height = (target_width - scaled_width) // 2, (target_height - scaled_height) // 2

    image_paded[offset_height : scaled_height+offset_height, offset_width : scaled_width+offset_width, :] = image_resized
    
    image_paded = image_paded / 255.

    if gt_boxes is None:
        return image_paded

    else:
        gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + offset_width
        gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + offset_height
        return image_paded, gt_boxes

In [None]:
def postprocess_boxes(pred_bbox, original_image, input_size, score_threshold):
    valid_scale=[0, np.inf]
    pred_bbox = np.array(pred_bbox)

    pred_xywh = pred_bbox[:, 0:4]
    pred_conf = pred_bbox[:, 4]
    pred_prob = pred_bbox[:, 5:]


    # 1. (x, y, w, h) --> (xmin, ymin, xmax, ymax)
    pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
                                pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
    
    
    # 2. (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
    org_h, org_w = original_image.shape[:2]
    resize_ratio = min(input_size / org_w, input_size / org_h)

    dw = (input_size - resize_ratio * org_w) / 2
    dh = (input_size - resize_ratio * org_h) / 2

#  pred_coor[:, 1::2] strart from 1 and jump indexis 2 by 2

    pred_coor[:, 0::2] = (pred_coor[:, 0::2] - dw) / resize_ratio   #X, W
    pred_coor[:, 1::2] = (pred_coor[:, 1::2] - dh) / resize_ratio   #Y, H
    # 3. clip some boxes those are out of range
    pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
                                np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
    invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))  # x>w or y>h
    pred_coor[invalid_mask] = 0

    # 4. discard some invalid boxes
    bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
    scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))

    # 5. discard boxes with low scores
    classes = np.argmax(pred_prob, axis=-1)
    scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
    # scores = pred_conf * pred_prob[:, classes]

    score_mask = scores > score_threshold
    mask = np.logical_and(scale_mask, score_mask)
    coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
    return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)

In [None]:
def draw_bbox(image, bboxes, class_names, classes, show_label=True, show_confidence = True, Text_colors=(255,255,0), rectangle_colors='', tracking=False):   
    num_classes = len(class_names)
    image_h, image_w, _ = image.shape
    hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))

    np.random.seed(0)
    np.random.shuffle(colors)
    np.random.seed(None)

    for i, bbox in enumerate(bboxes):
        coor = np.array(bbox[:4], dtype=np.int32)
        score = bbox[4]
        class_ind = int(bbox[5])
        bbox_color = rectangle_colors if rectangle_colors != '' else colors[class_ind]
        bbox_thick = int(0.6 * (image_h + image_w) / 1000)
        if bbox_thick < 1: bbox_thick = 1
        fontScale = 0.75 * bbox_thick
        (x1, y1), (x2, y2) = (coor[0], coor[1]), (coor[2], coor[3])

        # put object rectangle
        cv2.rectangle(image, (x1, y1), (x2, y2), bbox_color, bbox_thick*2)

        if show_label:
            # get text label
            score_str = " {:.3f}".format(score) if show_confidence else ""

            if tracking: score_str = " "+str(score)

            try:
                label = "{}".format(class_names[class_ind]) + score_str
            except KeyError:
                print("""You received KeyError, this might be that you are trying to use yolo original weights,
                while using custom classes, if using custom model in configs.py set YOLO_CUSTOM_WEIGHTS = True""")

            # get text size
            (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale, thickness=bbox_thick)
            # put filled text rectangle
            cv2.rectangle(image, (x1, y1), (x1 + text_width, y1 - text_height - baseline), bbox_color, thickness=cv2.FILLED)

            # put text above rectangle
            cv2.putText(image, label, (x1, y1-4), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                        fontScale, Text_colors, bbox_thick, lineType=cv2.LINE_AA)

    return image

In [None]:
def bboxes_iou(boxes1, boxes2):
    boxes1 = np.array(boxes1)
    boxes2 = np.array(boxes2)

    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
    left_up       = np.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down    = np.minimum(boxes1[..., 2:], boxes2[..., 2:])

    inter_section = np.maximum(right_down - left_up, 0.0)
    inter_area    = inter_section[..., 0] * inter_section[..., 1]
    union_area    = boxes1_area + boxes2_area - inter_area
    ious          = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)

    return ious

In [None]:
def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
    """
    :param bboxes: (xmin, ymin, xmax, ymax, score, class)
    Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
          https://github.com/bharatsingh430/soft-nms
    """
    classes_in_img = list(set(bboxes[:, 5]))
    best_bboxes = []
    for cls in classes_in_img:
        cls_mask = (bboxes[:, 5] == cls)
        cls_bboxes = bboxes[cls_mask]
        # Process 1: Determine whether the number of bounding boxes is greater than 0 
        while len(cls_bboxes) > 0:
            # Process 2: Select the bounding box with the highest score according to score order A
            max_ind = np.argmax(cls_bboxes[:, 4])
            best_bbox = cls_bboxes[max_ind]
            best_bboxes.append(best_bbox)
            cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
            # Process 3: Calculate this bounding box A and
            # Remain all iou of the bounding box and remove those bounding boxes whose iou value is higher than the threshold 
            iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
            weight = np.ones((len(iou),), dtype=np.float32)

            assert method in ['nms', 'soft-nms']

            if method == 'nms':
                iou_mask = iou > iou_threshold
                weight[iou_mask] = 0.0

            if method == 'soft-nms':
                weight = np.exp(-(1.0 * iou ** 2 / sigma))

            cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
            score_mask = cls_bboxes[:, 4] > 0.
            cls_bboxes = cls_bboxes[score_mask]

    return best_bboxes

#Boxes

In [None]:
!wget https://media.giphy.com/media/oO5a8cQSmxb3b1TBRu/giphy.gif

--2022-08-22 13:49:27--  https://media.giphy.com/media/oO5a8cQSmxb3b1TBRu/giphy.gif
Resolving media.giphy.com (media.giphy.com)... 199.232.194.2, 199.232.198.2
Connecting to media.giphy.com (media.giphy.com)|199.232.194.2|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1349105 (1.3M) [image/gif]
Saving to: ‘giphy.gif’


2022-08-22 13:49:27 (24.0 MB/s) - ‘giphy.gif’ saved [1349105/1349105]



In [None]:
# input_classes     ====> 'mnist/mnist.names', '/content/coco.names'

input_classes="/content/coco.names"

class_names = {}
with open(input_classes, 'r') as data:
    for ID, name in enumerate(data):
        class_names[ID] = name.strip('\n')

yolo = load_yolo_model(yolo_costom_weights=False,
                       input_size=416,
                       input_classes=input_classes, 
                       class_names=class_names)

image_path = '/content/dog.jpg'
image_path = '/content/office.jpg'
gif_path   = '/content/giphy.gif' 

# input_type = 'image', 'webcam', 'video'
input_type = 'webcam'


detect_image(input_type,yolo, gif_path, class_names=class_names, classes = input_classes ,input_size=416, show=True, rectangle_colors=(255,0,0))

Loading Darknet_weights from: model_data/yolov3.weights
