# Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Helper

In [None]:

import random
import colorsys
import numpy as np, cv2
# from architecture import ANCHORS, CLASS
import wget


# Download weighs file to working directory
def download_weights():
    file_url = 'https://pjreddie.com/media/files/yolo.weights'
    print('Downloading weights file...')
    wget.download(file_url)
    print('Download complete.')


# Sigmoid
def sigmoid(x):
    return 1. / (1. + np.exp(-x))


# Softmax
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)


# Load weights from file (after model is created)
def load_weights(model, weights_file):

    class WeightReader:
        def __init__(self, weight_file):
            self.offset = 4
            self.all_weights = np.fromfile(weight_file, dtype='float32')

        def read_bytes(self, size):
            self.offset = self.offset + size
            return self.all_weights[self.offset - size:self.offset]

        def reset(self):
            self.offset = 4

    weight_reader = WeightReader(weights_file)
    weight_reader.reset()
    nb_conv = 23
    i = 1

    while True:

        try:
            conv_layer = model.get_layer('conv_' + str(i))

        except:
            break

        if i < nb_conv:
            norm_layer = model.get_layer('norm_' + str(i))

            size = np.prod(norm_layer.get_weights()[0].shape)

            beta = weight_reader.read_bytes(size)
            gamma = weight_reader.read_bytes(size)
            mean = weight_reader.read_bytes(size)
            var = weight_reader.read_bytes(size)

            norm_layer.set_weights([gamma, beta, mean, var])

        if len(conv_layer.get_weights()) > 1:
            bias = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
            kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
            kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
            kernel = kernel.transpose([2, 3, 1, 0])
            conv_layer.set_weights([kernel, bias])
        else:
            kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
            kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
            kernel = kernel.transpose([2, 3, 1, 0])
            conv_layer.set_weights([kernel])

        i += 1

    print('Weights loaded.')


# Count dimensions overlap of two boxes
def interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b

    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2, x4) - x1
    else:
        if x2 < x3:
            return 0
        else:
            return min(x2, x4) - x3


# Count intersection over union
def iou(box1, box2):
    x1_min = box1['x'] - box1['w'] / 2
    x1_max = box1['x'] + box1['w'] / 2
    y1_min = box1['y'] - box1['h'] / 2
    y1_max = box1['y'] + box1['h'] / 2

    x2_min = box2['x'] - box2['w'] / 2
    x2_max = box2['x'] + box2['w'] / 2
    y2_min = box2['y'] - box2['h'] / 2
    y2_max = box2['y'] + box2['h'] / 2

    intersect_w = interval_overlap([x1_min, x1_max], [x2_min, x2_max])
    intersect_h = interval_overlap([y1_min, y1_max], [y2_min, y2_max])

    intersect = intersect_w * intersect_h

    union = box1['w'] * box1['h'] + box2['w'] * box2['h'] - intersect

    return float(intersect) / union


# Filter boxes by score threshold and put them to list
def filter_boxes(yolo_output, obj_threshold, anchors=ANCHORS):

    grid_h, grid_w, nb_box = yolo_output.shape[:3]

    # score = softmax(confidence score) * sigmoid(class probabilities)
    yolo_output[..., 5:] = softmax(yolo_output[..., 5:]) * sigmoid(yolo_output[..., 4][..., np.newaxis])

    # set score to 0, if score < obj_threshold
    yolo_output[..., 5:] *= yolo_output[..., 5:] > obj_threshold

    boxes = []

    for row in range(grid_h):
        for col in range(grid_w):
            for b in range(nb_box):
                # from 4th element onwards are confidence and class classes
                classes = yolo_output[row, col, b, 5:]

                if np.sum(classes) > 0:
                    # first 4 elements are x, y, w, and h
                    x, y, w, h = yolo_output[row, col, b, :4]

                    x = (col + sigmoid(x)) / grid_w  # center position, unit: image width
                    y = (row + sigmoid(y)) / grid_h  # center position, unit: image height
                    w = anchors[b, 0] * np.exp(w) / grid_w  # unit: image width
                    h = anchors[b, 1] * np.exp(h) / grid_h  # unit: image height
                    label = np.argmax(classes)
                    score = np.max(classes)

                    box = {'x':x, 'y':y, 'w':w, 'h':h, 'classes':classes, 'label':label, 'score':score, 'iou':True}

                    boxes.append(box)

    return boxes


# Apply non-max suppression and filter boxes again
def non_max_suppress(boxes, nms_threshold, nb_class=CLASS):

    # suppress non-maximal boxes
    for c in range(nb_class):
        sorted_indices = list(reversed(np.argsort([box['classes'][c] for box in boxes])))

        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]

            if boxes[index_i]['classes'][c] == 0:
                continue
            else:
                for j in range(i + 1, len(sorted_indices)):
                    index_j = sorted_indices[j]

                    if iou(boxes[index_i], boxes[index_j]) > nms_threshold:
                        boxes[index_j]['iou'] = False

    # remove the boxes which have too high iou
    boxes = [box for box in boxes if box['iou'] is True]

    return boxes


# Generate list of colours for drowing boxes
def generate_colors(class_names):
    hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
    random.seed(10101)  # Fixed seed for consistent colors across runs.
    random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
    random.seed(None)  # Reset seed to default.
    return colors


# Draw boxes on image
def draw_boxes(image, boxes, labels, colours):
    for box in boxes:
        xmin = int((box['x'] - box['w'] / 2) * image.shape[1])
        xmax = int((box['x'] + box['w'] / 2) * image.shape[1])
        ymin = int((box['y'] - box['h'] / 2) * image.shape[0])
        ymax = int((box['y'] + box['h'] / 2) * image.shape[0])

        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), colours[box['label']], 2)
        cv2.putText(image,
                    labels[box['label']] + ' ' + str(box['score'].round(2)),
                    (xmin, ymin - 13),
                    cv2.FONT_HERSHEY_SIMPLEX ,
                    2e-3 * image.shape[0],
                    colours[box['label']], 2)

    return image


# Add text
def add_text(image):
    font = cv2.FONT_HERSHEY_SIMPLEX
    bottomLeftCornerOfText = (2, 14)
    fontScale = 0.6
    fontColor = (255, 255, 255)
    lineType = 1

    cv2.putText(image, "Press 'q' to quit.",
                bottomLeftCornerOfText,
                font,
                fontScale,
                fontColor,
                lineType)

    return image

In [None]:
pip install wget

Collecting wget
  Downloading https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-cp36-none-any.whl size=9682 sha256=7b95b2135157b284d708ed549a4a3813c027453e2a4916af8365a00d7b7a7a1b
  Stored in directory: /root/.cache/pip/wheels/40/15/30/7d8f7cea2902b4db79e3fea550d7d7b85ecb27ef992b618f3f
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


# Architecture

In [None]:
from keras.layers import *
from keras.models import Model
import tensorflow.compat.v1 as tf

In [None]:

from keras.layers import *
from keras.models import Model
import tensorflow.compat.v1 as tf


IMAGE_H, IMAGE_W = 416, 416
GRID_H,  GRID_W  = 13 , 13
BOX = 5
CLASS = 80


# Full model of yolo v2
def create_model():

    # the function to implement the organization layer (thanks to github.com/allanzelener/YAD2K)
    def space_to_depth_x2(x):
        return tf.space_to_depth(x, block_size=2)

    # Define input
    x_input = Input([IMAGE_H, IMAGE_W, 3])

    # Layer 1
    x = Conv2D(32, (3, 3), strides=(1, 1), padding='same', name='conv_1', use_bias=False)(x_input)
    x = BatchNormalization(name='norm_1')(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Layer 2
    x = Conv2D(64, (3, 3), strides=(1, 1), padding='same', name='conv_2', use_bias=False)(x)
    x = BatchNormalization(name='norm_2')(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Layer 3
    x = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_3', use_bias=False)(x)
    x = BatchNormalization(name='norm_3')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 4
    x = Conv2D(64, (1, 1), strides=(1, 1), padding='same', name='conv_4', use_bias=False)(x)
    x = BatchNormalization(name='norm_4')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 5
    x = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_5', use_bias=False)(x)
    x = BatchNormalization(name='norm_5')(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Layer 6
    x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_6', use_bias=False)(x)
    x = BatchNormalization(name='norm_6')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 7
    x = Conv2D(128, (1, 1), strides=(1, 1), padding='same', name='conv_7', use_bias=False)(x)
    x = BatchNormalization(name='norm_7')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 8
    x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_8', use_bias=False)(x)
    x = BatchNormalization(name='norm_8')(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Layer 9
    x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_9', use_bias=False)(x)
    x = BatchNormalization(name='norm_9')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 10
    x = Conv2D(256, (1, 1), strides=(1, 1), padding='same', name='conv_10', use_bias=False)(x)
    x = BatchNormalization(name='norm_10')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 11
    x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_11', use_bias=False)(x)
    x = BatchNormalization(name='norm_11')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 12
    x = Conv2D(256, (1, 1), strides=(1, 1), padding='same', name='conv_12', use_bias=False)(x)
    x = BatchNormalization(name='norm_12')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 13
    x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_13', use_bias=False)(x)
    x = BatchNormalization(name='norm_13')(x)
    x = LeakyReLU(alpha=0.1)(x)

    skip_connection = x

    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Layer 14
    x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_14', use_bias=False)(x)
    x = BatchNormalization(name='norm_14')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 15
    x = Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_15', use_bias=False)(x)
    x = BatchNormalization(name='norm_15')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 16
    x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_16', use_bias=False)(x)
    x = BatchNormalization(name='norm_16')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 17
    x = Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_17', use_bias=False)(x)
    x = BatchNormalization(name='norm_17')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 18
    x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_18', use_bias=False)(x)
    x = BatchNormalization(name='norm_18')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 19
    x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_19', use_bias=False)(x)
    x = BatchNormalization(name='norm_19')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 20
    x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_20', use_bias=False)(x)
    x = BatchNormalization(name='norm_20')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 21
    skip_connection = Conv2D(64, (1, 1), strides=(1, 1), padding='same', name='conv_21', use_bias=False)(
        skip_connection)
    skip_connection = BatchNormalization(name='norm_21')(skip_connection)
    skip_connection = LeakyReLU(alpha=0.1)(skip_connection)
    skip_connection = Lambda(space_to_depth_x2)(skip_connection)

    x = concatenate([skip_connection, x])

    # Layer 22
    x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_22', use_bias=False)(x)
    x = BatchNormalization(name='norm_22')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 23
    x = Conv2D(BOX * (4 + 1 + CLASS), (1, 1), strides=(1, 1), padding='same', name='conv_23')(x)


    output = Reshape([GRID_H, GRID_W, BOX, 4 + 1 + CLASS])(x)

    model = Model(x_input, output)

    print('Model created.')

    return model



LABELS = ['person', 'bicycle', 'car', 'motorcycle', 'airplane',
          'bus', 'train', 'truck', 'boat', 'traffic light',
          'fire hydrant', 'stop sign', 'parking meter', 'bench',
          'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
          'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
          'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
          'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
          'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
          'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
          'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
          'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
          'couch', 'potted plant', 'bed', 'dining table', 'toilet',
          'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
          'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
          'book', 'clock', 'vase', 'scissors', 'teddy bear',
          'hair drier', 'toothbrush']


ANCHORS = np.array([[0.57273, 0.677385],
                    [1.87446, 2.06253],
                    [3.33843, 5.47434],
                    [7.88282, 3.52778],
                    [9.77052, 9.16828]])

# YOLO

In [None]:
# from helpers import load_weights, filter_boxes, non_max_suppress, generate_colors, draw_boxes, download_weights, add_text
# from architecture import create_model, LABELS, IMAGE_H, IMAGE_W
import numpy as np
from tqdm import tqdm
import cv2
import sys
import os
# import argparse


# Thresholds for confidence score and non-max suppression
OBJ_THRESHOLD = 0.6
NMS_THRESHOLD = 0.5


# All yolo actions from input to output
def make_yolo(original_image, model):

    input_image = cv2.resize(original_image, (IMAGE_H, IMAGE_W)) / 255.
    input_image = input_image[:, :, ::-1]
    input_image = np.expand_dims(input_image, 0)
    yolo_output = np.squeeze(model.predict(input_image))
    boxes = filter_boxes(yolo_output, OBJ_THRESHOLD)
    boxes = non_max_suppress(boxes, NMS_THRESHOLD)
    colours = generate_colors(LABELS)
    output_image = draw_boxes(original_image, boxes, LABELS, colours)

    return output_image



################### TEST YOLO ON VIDEO ###################

# Objects detection from video
def yolo_video(video_path, model, faster_times=1):
    print("making YOLO")
    # Path for output video
    video_out = '/content/drive/My Drive/CV_Final_Proj/yolo2_.mp4'

    # Set video reader and writer
    video_reader = cv2.VideoCapture(video_path)
    nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = video_reader.get(cv2.CAP_PROP_FPS)
    video_writer = cv2.VideoWriter(video_out, cv2.VideoWriter_fourcc(*'MP4V'),
                                   fps * faster_times, (frame_w, frame_h))
    # Iterate over all frames
    for _ in tqdm(range(nb_frames)):

        ret, original_image = video_reader.read()
        image = make_yolo(original_image, model)
        video_writer.write(np.uint8(image))

    video_reader.release()
    video_writer.release()
    print("Output file saved to:", video_out)

In [None]:

# Run yolo
if __name__ == '__main__':

    path_to_file = '/content/drive/My Drive/CV_Final_Proj/7rings_video.mp4'
    if os.path.isfile(path_to_file):
        # Create model and load weights
        yolo = create_model()
        # download_weights()
        load_weights(yolo, 'yolo.weights')
        # yolo.save('/content/drive/My Drive/CV_Final_Proj/model.h5')
        yolo_video(path_to_file, yolo, faster_times=1)


Model created.
Weights loaded.
making YOLO


[1;30;43mStreaming output truncated to the last 5000 lines.[0m

 44%|████▎     | 1934/4433 [22:18<28:31,  1.46it/s][A[A

 44%|████▎     | 1935/4433 [22:18<28:36,  1.46it/s][A[A

 44%|████▎     | 1936/4433 [22:19<28:35,  1.46it/s][A[A

 44%|████▎     | 1937/4433 [22:20<28:28,  1.46it/s][A[A

 44%|████▎     | 1938/4433 [22:20<28:24,  1.46it/s][A[A

 44%|████▎     | 1939/4433 [22:21<28:29,  1.46it/s][A[A

 44%|████▍     | 1940/4433 [22:22<28:23,  1.46it/s][A[A

 44%|████▍     | 1941/4433 [22:23<28:21,  1.46it/s][A[A

 44%|████▍     | 1942/4433 [22:23<28:25,  1.46it/s][A[A

 44%|████▍     | 1943/4433 [22:24<28:13,  1.47it/s][A[A

 44%|████▍     | 1944/4433 [22:25<28:34,  1.45it/s][A[A

 44%|████▍     | 1945/4433 [22:25<28:25,  1.46it/s][A[A

 44%|████▍     | 1946/4433 [22:26<28:16,  1.47it/s][A[A

 44%|████▍     | 1947/4433 [22:27<28:24,  1.46it/s][A[A

 44%|████▍     | 1948/4433 [22:27<28:22,  1.46it/s][A[A

 44%|████▍     | 1949/4433 [22:28<28:16,  1.46it/

Output file saved to: /content/drive/My Drive/CV_Final_Proj/yolo2_.mp4





In [None]:
yolo_video(path_to_file, yolo, faster_times=1)



  0%|          | 0/4222 [00:00<?, ?it/s][A[A

making YOLO


[1;30;43mStreaming output truncated to the last 5000 lines.[0m

 41%|████      | 1723/4222 [19:49<29:11,  1.43it/s][A[A

 41%|████      | 1724/4222 [19:49<28:57,  1.44it/s][A[A

 41%|████      | 1725/4222 [19:50<28:31,  1.46it/s][A[A

 41%|████      | 1726/4222 [19:51<28:22,  1.47it/s][A[A

 41%|████      | 1727/4222 [19:51<28:11,  1.48it/s][A[A

 41%|████      | 1728/4222 [19:52<27:56,  1.49it/s][A[A

 41%|████      | 1729/4222 [19:53<27:53,  1.49it/s][A[A

 41%|████      | 1730/4222 [19:53<27:56,  1.49it/s][A[A

 41%|████      | 1731/4222 [19:54<27:45,  1.50it/s][A[A

 41%|████      | 1732/4222 [19:55<27:52,  1.49it/s][A[A

 41%|████      | 1733/4222 [19:55<27:52,  1.49it/s][A[A

 41%|████      | 1734/4222 [19:56<27:55,  1.48it/s][A[A

 41%|████      | 1735/4222 [19:57<27:56,  1.48it/s][A[A

 41%|████      | 1736/4222 [19:57<28:07,  1.47it/s][A[A

 41%|████      | 1737/4222 [19:58<27:52,  1.49it/s][A[A

 41%|████      | 1738/4222 [19:59<27:46,  1.49it/

Output file saved to: /content/drive/My Drive/CV_Final_Proj/yolo_.mp4





In [None]:
yolo.save('/content/drive/My Drive/CV_Final_Proj/model.h5')

In [None]:
from keras.models import load_model
model_yolo = load_model('/content/drive/My Drive/CV_Final_Proj/model.h5')
yolo_video(path_to_file, model_yolo, faster_times=1)

NameError: ignored