# **YOLO v3 TO DETECT OBJECTS IN A VIDEO**

In [1]:
import cv2
from google.colab.patches import cv2_imshow
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Loading YOLO weights and config file

In [3]:
net = cv2.dnn.readNet('/content/drive/MyDrive/DeepLearning/yolov3.weights','/content/drive/MyDrive/DeepLearning/yolov3.cfg')

# The class labels used in training YOLO

In [4]:
classes = []
with open('/content/drive/MyDrive/DeepLearning/coco.names', 'r') as f:
  classes = f.read().splitlines()
   
print(classes)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


# Testing with a traffic video from internet

In [7]:
cap = cv2.VideoCapture('/content/drive/MyDrive/DeepLearning/test.mp4') 

In [8]:
 # img = cv2.imread('image.jpg')

while True:
  _, img = cap.read()
  height, width, _ = img.shape

  blob = cv2.dnn.blobFromImage(img, 1/255, (416,416), (0,0,0), swapRB=True, crop=False)

  #for b in blob:
  #  for n, img_blob in enumerate(b):
  #      cv2.imshow(str(n), img_blob)

  net.setInput(blob)

  output_layers_names = net.getUnconnectedOutLayersNames()
  layerOutputs = net.forward(output_layers_names)

  boxes = []
  confidences = []
  class_ids = []
  
  for output in layerOutputs:
    for detection in output:
      scores = detection[5:]
      class_id = np.argmax(scores)
      confidence = scores[class_id]

      if confidence > 0.5:
        center_x = int(detection[0]*width)
        center_y = int(detection[1]*height)
        w = int(detection[2]*width)
        h = int(detection[3]*height)

        x = int(center_x - w/2)
        y = int(center_y - h/2)

        boxes.append([x, y, w, h])
        confidences.append((float(confidence))) 
        class_ids.append(class_id)  

  #print(len(boxes))
  indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
  # print(indexes.flatten())

  font = cv2.FONT_HERSHEY_PLAIN
  colors = np.random.uniform(0, 255, size=(len(boxes), 3))

  for i in indexes.flatten():
    x,y,w,h = boxes[i] 
    label = str(classes[class_ids[i]])
    confidence = str(round(confidences[i], 2))
    color = colors[i]
    cv2.rectangle(img, (x,y), (x+w,  y+h), color, 2)  
    cv2.putText(img, label + " " + confidence, (x,y+20), font, 2, (255,255,255), 2)
  
  cv2_imshow(img)
  key = cv2.waitKey(1)
  if key == 27:
    break 

cap.release()
cv2.destroyAllWindows()
  

Output hidden; open in https://colab.research.google.com to view.

  # Testing with a sample video taken myself

In [5]:
cap = cv2.VideoCapture('/content/drive/MyDrive/DeepLearning/test1.mp4') 

In [6]:
 # img = cv2.imread('image.jpg')

while True:
  _, img = cap.read()
  height, width, _ = img.shape

  blob = cv2.dnn.blobFromImage(img, 1/255, (416,416), (0,0,0), swapRB=True, crop=False)

  #for b in blob:
  #  for n, img_blob in enumerate(b):
  #      cv2.imshow(str(n), img_blob)

  net.setInput(blob)

  output_layers_names = net.getUnconnectedOutLayersNames()
  layerOutputs = net.forward(output_layers_names)

  boxes = []
  confidences = []
  class_ids = []
  
  for output in layerOutputs:
    for detection in output:
      scores = detection[5:]
      class_id = np.argmax(scores)
      confidence = scores[class_id]

      if confidence > 0.5:
        center_x = int(detection[0]*width)
        center_y = int(detection[1]*height)
        w = int(detection[2]*width)
        h = int(detection[3]*height)

        x = int(center_x - w/2)
        y = int(center_y - h/2)

        boxes.append([x, y, w, h])
        confidences.append((float(confidence))) 
        class_ids.append(class_id)  

  #print(len(boxes))
  indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
  # print(indexes.flatten())

  font = cv2.FONT_HERSHEY_PLAIN
  colors = np.random.uniform(0, 255, size=(len(boxes), 3))

  for i in indexes.flatten():
    x,y,w,h = boxes[i] 
    label = str(classes[class_ids[i]])
    confidence = str(round(confidences[i], 2))
    color = colors[i]
    cv2.rectangle(img, (x,y), (x+w,  y+h), color, 2)  
    cv2.putText(img, label + " " + confidence, (x,y+20), font, 2, (255,255,255), 2)
  
  cv2_imshow(img)
  key = cv2.waitKey(1)
  if key == 27:
    break 

cap.release()
cv2.destroyAllWindows()
  

Output hidden; open in https://colab.research.google.com to view.