In [3]:
import numpy as np
import cv2
from google.colab.patches import cv2_imshow ## im_show was disabled in colab

In [4]:
## Global Variables

VIDEO_PATH = '/content/yolo_test.mp4'
CONFIG_FILE = '/content/yolov3.cfg'
WEIGTS_FILE = '/content/yolov3.weights'
CLASSES_FILE = '/content/COCO_Class_Codes.txt'

SCALE = 1.0/255 ## converts color code range from 0-255 to 0-1
SIZE = (320, 320)
MEAN = (0,0,0)

CLASSES = []
with open(CLASSES_FILE , 'r') as f:
  read_data = f.read().split('\'')
  CLASSES = [read_data[i] for i in range(len(read_data)) if i%2==1]

wanted_classes_idx = [0, 2, 5] #human, car, and bus

In [6]:
def find_obejct(outputs):

  class_ids, confidences, boxes = [], [], []

  for output in outputs:
    
    for detection in output:

      scores = detection[5:]
      max_score_wanted_classes_idx = np.argmax([scores[idx] for idx in wanted_classes_idx])
      class_id = wanted_classes_idx[max_score_wanted_classes_idx]
      confidence = scores[class_id]

      if confidence > 0.5:
        center_x, center_y, w, h = (detection[0:4] * np.array([width, height, width, height])).astype("int")
        x = int(center_x - w / 2)
        y = int(center_y - h / 2)
        boxes.append([x, y, int(w), int(h)])
        confidences.append(float(confidence))
        class_ids.append(class_id)

  return class_ids, confidences, boxes



def show_detected_object(frame, class_ids, confidences, boxes):
  
  colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))

  for i, box in enumerate(boxes):
    x, y, w, h = box
    label_and_confidence = "{} {:.2f}".format(CLASSES[class_ids[i]], confidences[i])
    color = colors[i]
    cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
    cv2.putText(frame, label_and_confidence, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

  return frame

In [8]:
net = cv2.dnn.readNet(WEIGTS_FILE, CONFIG_FILE)

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

vid = cv2.VideoCapture(VIDEO_PATH)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_video = cv2.VideoWriter('output.mp4',fourcc, 5, (1280,720))

while True:
  
  ret, frame = vid.read()
  if not ret:
    break

  height, width, _ = frame.shape
  blob = cv2.dnn.blobFromImage(frame, SCALE, SIZE, MEAN, swapRB=True, crop=False)
  net.setInput(blob)
  outs = net.forward(output_layers)
  class_ids, confidences, boxes = find_obejct(outs)
  frame = show_detected_object(frame, class_ids, confidences, boxes)

  output_video.write(frame)

  if cv2.waitKey(25) & 0xFF == ord("q"):
    break

vid.release()
output_video.release()
cv2.destroyAllWindows()