Standard Imports

In [17]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import cv2
import numpy as np
import tensorflow as tf
from yolov3.utils import Load_Yolo_model, image_preprocess, postprocess_boxes, nms, draw_bbox, read_class_names
from yolov3.configs import *
import time
from deep_sort import nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from deep_sort import generate_detections as gdet

In [18]:
path_to_video   = "video3.mp4"

def Car_tracking(Yolo, path_to_video, output_path, input_size=400, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.2, iou_threshold=0.4, rectangle_colors='', Track_only = []):
    
    max_distance = 0.6
    
    #initialize deep sort object
    model_filename = 'model_data/coco/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    eval_metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_distance,None)
    tracker = Tracker(eval_metric)

    time1 = []
    time2 = []

    if path_to_video:
        vid = cv2.VideoCapture(path_to_video) # detect on video
    else:
        vid = cv2.VideoCapture(0) # detect from webcam

    # by default VideoCapture returns float instead of int
    w = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps, (w, h)) # output_path must be .mp4

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys()) 
    val_list = list(NUM_CLASS.values())
    while True:
        _, frame = vid.read()

        try: #converting each frame (image) of the video into RGB format
            original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        except:
            break
        
        image_data = image_preprocess(np.copy(original_frame), [input_size, input_size]) #changing the image dimentions. 
        # image_data = image_data[np.newaxis, ...].astype(np.float32)
        image_data = np.float32(image_data[np.newaxis, ...])

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_boundingbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_boundingbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_boundingbox.append(value)
        
        t2 = time.time()
        
        pred_boundingbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_boundingbox]
        pred_boundingbox = tf.concat(pred_boundingbox, axis=0)

        boundingboxes = postprocess_boxes(pred_boundingbox, original_frame, input_size, score_threshold)
        boundingboxes = nms(boundingboxes, iou_threshold, method='nms')

        # extract bboxes to boxes (x, y, width, height), scores and names
        # boxes, scores, names = [], [], []
        boxes = []
        scores = []
        names = []

        for boundingbox in boundingboxes:
            if len(Track_only) !=0 and NUM_CLASS[int(boundingbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([boundingbox[0].astype(int), boundingbox[1].astype(int), boundingbox[2].astype(int)-boundingbox[0].astype(int), boundingbox[3].astype(int)-boundingbox[1].astype(int)])
                scores.append(boundingbox[4])
                names.append(NUM_CLASS[int(boundingbox[5])])

        #Obtaining all the detections for a frame of he video
        boxes = np.array(boxes) 
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(original_frame, boxes))
        detections = [Detection(boundingbox, score, class_name, feature) for boundingbox, score, class_name, feature in zip(boxes, scores, names, features)]

        # Obtaining track information after passing the detections to the deepsort object
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_boundingboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue 
            boundingbox = track.to_tlbr() # Get the corrected/predicted bounding box
            class_name = track.get_class() #Get the class name of particular object
            tracking_id = track.track_id # Get the ID for the particular track
            index = key_list[val_list.index(class_name)] # Get predicted object index by object name
            tracked_boundingboxes.append(boundingbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function

        # drawing the detections on a frame
        image = draw_bbox(original_frame, tracked_boundingboxes, CLASSES=CLASSES, tracking=True)

        t3 = time.time()
        time1.append(t2-t1)
        time2.append(t3-t1)
        
        time1 = time1[-20:]
        time2 = time2[-20:]

        ms = sum(time1)/len(time1)*1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(time2)/len(time2)*1000)
        
        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 2)


        print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)
            
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break
            
    cv2.destroyAllWindows()


yolo_v3 = Load_Yolo_model()
Car_tracking(yolo_v3, path_to_video, "output.mp4", input_size=YOLO_INPUT_SIZE, show=False, iou_threshold=0.1, rectangle_colors=(0,0, 255), Track_only = ["car"])


OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


Time: 8249.89ms, Detection FPS: 0.1, total FPS: 0.1
Time: 5469.31ms, Detection FPS: 0.2, total FPS: 0.1
Time: 4304.00ms, Detection FPS: 0.2, total FPS: 0.2
Time: 3522.58ms, Detection FPS: 0.3, total FPS: 0.2
Time: 2996.95ms, Detection FPS: 0.3, total FPS: 0.2
Time: 2663.25ms, Detection FPS: 0.4, total FPS: 0.3
Time: 2397.12ms, Detection FPS: 0.4, total FPS: 0.3
Time: 2197.15ms, Detection FPS: 0.5, total FPS: 0.3
Time: 2041.15ms, Detection FPS: 0.5, total FPS: 0.4
Time: 1912.94ms, Detection FPS: 0.5, total FPS: 0.4
Time: 1810.58ms, Detection FPS: 0.6, total FPS: 0.4
Time: 1723.13ms, Detection FPS: 0.6, total FPS: 0.4
Time: 1648.45ms, Detection FPS: 0.6, total FPS: 0.5
Time: 1582.88ms, Detection FPS: 0.6, total FPS: 0.5
Time: 1528.03ms, Detection FPS: 0.7, total FPS: 0.5
Time: 1478.97ms, Detection FPS: 0.7, total FPS: 0.5
Time: 1433.39ms, Detection FPS: 0.7, total FPS: 0.5
Time: 1394.03ms, Detection FPS: 0.7, total FPS: 0.5
Time: 1358.47ms, Detection FPS: 0.7, total FPS: 0.6
Time: 1328.7