In [None]:
import numpy as np
import imutils
import time
import cv2
import os
from imutils.video import FPS
import dlib

In [None]:
LABELS = open("coco.names").read().strip().split("\n")

np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

weights = "yolov3.weights"
configFile = "yolov3.cfg"

print("[INFO] loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(configFile, weights)
ln = net.getLayerNames()
ln = [ln[i[0]-1] for i in net.getUnconnectedOutLayers()]

vs = cv2.VideoCapture("C:/Users/shubh/Downloads/project_video.mp4")
try:
    prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \
    else cv2.CAP_PROP_FRAME_COUNT
    total = int(vs.get(prop))
    print("[INFO] {} total frames in video".format(total))
except:
    print("[INFO] couldn't determine frames in video")
    total = -1

In [None]:
trackers = []
labels = []

fps = FPS().start()

while True:
    (ret, frame) = vs.read()
    if not ret:
        break
    (H, W) = frame.shape[:2]
    
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(ln)
    
    boxes = []
    confidences = []
    classIDs = []
    
    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
            
            if confidence > 0.5:
                label = LABELS[classID]
                
                if label != "car":
                    continue
                
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")
                
                x = int(centerX - (width/2))
                y = int(centerY - (height/2))
                
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)
                
                idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3)
                if len(idxs) > 0:
                    for i in idxs.flatten():
                        (x, y) = (boxes[i][0], boxes[i][1])
                        (w, h) = (boxes[i][2], boxes[i][3])
                
                        t = dlib.correlation_tracker()
                        rect = dlib.rectangle(x, y, x + w, y + h)
                        t.start_track(frame, rect)
                
                        labels.append(label)
                        trackers.append(t)
                
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(frame, label, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
    else:
        for (t, l) in zip(trackers, labels):
            t.update(frame)
            pos = t.get_position()
            
            startX = int(pos.left())
            startY = int(pos.top())
            endX = int(pos.right())
            endY = int(pos.bottom())
            
            cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2)
            cv2.putText(frame, label, (startX, startY - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF
    
    if key == ord("q"):
        break
    
    fps.update()

    
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
                
#postprocess(frame, outs)
    
#    t, _ = net.getPerfProfile()
#    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
#    cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
#    cv.imshow("Frame", frame)
    
cv2.destroyAllWindows()
cap.release()
    