In [1]:
import torch
import torchvision
import numpy as np
import cv2 as cv2
from deep_sort_realtime.deepsort_tracker import DeepSort

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
video_path = 'traffic-1.mp4'
cap = cv2.VideoCapture(video_path)
output_path = 'output_video.mp4'
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
scale = 0.00392

classes = None

with open('yolov3.txt', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

In [3]:
print(fps,width,height)

30 2400 1080


In [4]:
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))

In [5]:
def get_output_layers(net):
    
    layer_names = net.getLayerNames()
    try:
        output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
    except:
        output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    return output_layers

In [6]:
def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    
    label = str(classes[class_id])

    color = COLORS[class_id]

    cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)

    cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 30)
    return img

In [7]:
tracker = DeepSort()
frames =[]
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

    blob = cv2.dnn.blobFromImage(frame, scale, (416,416), (0,0,0), True, crop=False)

    net.setInput(blob)

    outs = net.forward(get_output_layers(net))
    class_ids = []
    confidences = []
    boxes = []
    conf_threshold = 0.5
    nms_threshold = 0.4
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = center_x - w / 2
                y = center_y - h / 2
                class_ids.append(class_id)
                confidences.append(float(confidence))
                boxes.append([x, y, w, h])

    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
    for i in indices:
        try:
            box = boxes[i]
        except:
            i = i[0]
            box = boxes[i]
    
        x = box[0]
        y = box[1]
        w = box[2]
        h = box[3]
        modified_frame = draw_prediction(frame, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))
    frames.append(modified_frame)



In [8]:

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
for frame in frames:
    out.write(frame)

out.release()