In [2]:
import cv2
import torch
import numpy as np
from speed_funcs import *

yolov5 = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained =True)

video_path = "data/session5_center/video.avi"

Using cache found in /home/kasvoy/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2023-3-26 Python-3.11.2 torch-2.0.0+cu117 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB)

Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


In [3]:
"""
Original points (leftmost points starting from up):

p1: (774, 85)
p2: (706, 163)
p3: (601, 284)
p4: (423, 491)
p5: (79, 909)


Transformed:

p1: (845, 60)
p2: (845, 277)
p3: (845, 502)
p4: (845, 731)
p5: (851, 956)
"""

'\nOriginal points (leftmost points starting from up):\n\np1: (774, 85)\np2: (706, 163)\np3: (601, 284)\np4: (423, 491)\np5: (79, 909)\n\n\nTransformed:\n\np1: (845, 60)\np2: (845, 277)\np3: (845, 502)\np4: (845, 731)\np5: (851, 956)\n'

In [4]:
#function that converts yolov5's bounding box format to ltwh format for deepsort
def xyxy_to_bb(result_tensor):
    bbs = []
    
    for det_tensor in result_tensor:
        det_list = det_tensor.tolist()
        
        x_min = det_list[0]
        y_min = det_list[1]
        x_max = det_list[2]
        y_max = det_list[3]
        conf = det_list[4]
        det_class = det_list[5]
        
        height = y_max - y_min
        width = x_max - x_min
        
        ltwh = [x_min, y_min, width, height], conf, det_class
        #consider only vehicles
        if det_class in {2, 3, 5, 7}:
            bbs.append(ltwh)
    return bbs   

In [5]:
def play_transformed_vid(video_path, transform_matrix):
    
    cap = cv2.VideoCapture(video_path)
    
    while cap.isOpened():
        
        ret, frame = cap.read()
        
        if ret:
            
            current_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000

            # Display the current runtime in seconds
            #print("Current Runtime: {} seconds".format(current_time))
            
            cv2.imshow('t', cv2.warpPerspective(frame, transform_matrix, (1920,1080)))
            
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
            
        else:
            break    
    
    cap.release()
    cv2.destroyAllWindows()

In [6]:
#play_transformed_vid(video_path, transform_matrix)

In [7]:
#show model detections
def show_dets(video_path):
    video = cv2.VideoCapture(video_path)
    
    while video.isOpened():
        
        ret, frame = video.read()
        
        if ret:
        
            cv2.namedWindow("Resized_Window", cv2.WINDOW_NORMAL)
            cv2.resizeWindow("Resized_Window", 1920, 1080)
        
            result = yolov5(extract_roi(frame))
                
            cv2.imshow("Resized_Window", np.squeeze(result.render()))
        else:
            break
    
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
        
    video.release()
    cv2.destroyAllWindows()

In [8]:
def show_nth_frame(video_path, n, transformed=True):
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, n)
    _, frame = cap.read()

    if transformed:
        cv2.imshow("frame", cv2.warpPerspective(frame, TRANSFORM_MATRIX, (1920,1080)))
    else:
        cv2.imshow("frame", frame)

    cv2.waitKey(0)
    cv2.destroyAllWindows()
    cap.release()

In [9]:
def show_time_frame(video_path, video_time = 25, transformed=True): 
        
    cap = cv2.VideoCapture(video_path)
        
    cap.set(cv2.CAP_PROP_POS_MSEC,video_time*1000) 
    
    _, frame = cap.read()
    
    if transformed:
        cv2.imshow("frame", cv2.warpPerspective(frame, TRANSFORM_MATRIX, (1920,1080)))
    else:
        cv2.imshow("frame", frame)
    
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    cap.release()

In [16]:
from deep_sort_realtime.deepsort_tracker import DeepSort


def play_tracker_video(detection_model, video_path, transform_matrix=None, video_time=30, speed_limit=80):

    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_MSEC,video_time*1000)
    
    tracker = DeepSort(max_age=1, n_init=2, nms_max_overlap=1.0,embedder_gpu=True)
    
    #coordinates of the 5 marked lines in transformed space
    #points are roughly equidistant - 7 meters real world space
    points = {
        'P1': [845, 60],
        'P2': [845, 277],
        'P3': [845, 502],
        'P4': [845, 731],
        'P5': [851, 956]
    }
    
    section_length = 7
    total_length = 28
    len25 = 21
    
    speeds_dict = dict()
    

    while cap.isOpened():
        ret, frame = cap.read()
        frame = extract_roi(frame)
        
        if ret:
            current_time = float('{:.2f}'.format(cap.get(cv2.CAP_PROP_POS_MSEC)/1000))
            #display_video_time(current_time, frame)
            
            results = detection_model(frame)
        
            bbs = xyxy_to_bb(results.xyxy[0])
    
            tracks = tracker.update_tracks(bbs, frame=frame) 
            for track in tracks:
                if not track.is_confirmed():
                    continue
                track_id = track.track_id
                
                #[bbox[0], bbox[1]] <-- TOP LEFT, [bbox[2], bbox[3]] <-- BOTTOM RIGHT
                bbox = list(track.to_ltrb())
                br = np.array([int(bbox[2]), int(bbox[3])], dtype=np.float32)
                #bl = np.array([int(bbox[2]), int(bbox[3])], dtype=np.float32)
                
                br_transformed = get_point_under_transform(br, transform_matrix)
                
                if track_id not in speeds_dict:
                    speeds_dict[track_id] = {
                        'P1': 0, 'P2': 0, 'P3': 0, 'P4': 0, 'P5': 0,
                        'delta12': 0, 'delta23': 0, 'delta34': 0, 'delta45': 0, 'total_delta': 0,
                        'speed12': 0,'speed23': 0, 'speed34': 0, 'speed45': 0, 'avg_speed': 0}
                    
                 
                assign_point_time(speeds_dict, points, br_transformed, track_id, current_time, tolerance=10)
                            
                point_times = list(speeds_dict[track_id].values())[:5]  
                section_speeds = list(speeds_dict[track_id].values())[10:14]
                
                current_speed = section_speeds[0]
                
                #print current section speed when new one is calculated
                if section_speeds[1] !=0 and section_speeds[2] == 0:
                    current_speed = section_speeds[1]
                
                elif section_speeds[2] !=0 and section_speeds[3] == 0:
                    current_speed = section_speeds[2]
                
                elif section_speeds[3] != 0:
                    current_speed = section_speeds[3]
                 
                #average speed calculation
                if point_times[0] !=0 and point_times[4] !=0:
                    total_delta = point_times[4] - point_times[0]
                    speeds_dict[track_id]['total_delta'] = total_delta
                    
                    speeds_dict[track_id]['avg_speed'] = round((total_length/total_delta) * 3.6, 3)
                    
                    text = f"average speed: {speeds_dict[track_id]['avg_speed']}km/h"
                    color_avg_speed = (255,255,255)
                    
                    if speeds_dict[track_id]['avg_speed'] > speed_limit:
                        color_avg_speed = (0,0,255)
                    
                    cv2.putText(frame, text, (int(bbox[0]), int(bbox[1])-60), cv2.FONT_HERSHEY_SIMPLEX, 1, color_avg_speed, 2)
                
                txt = f"{track_id}, speed: {current_speed}km/h"
                
                (label_width,label_height), baseline = cv2.getTextSize(txt , cv2.FONT_HERSHEY_SIMPLEX,1,1)
                org = tuple(map(int,[int(bbox[0]),int(bbox[1])-baseline]))

                bbox_color = (0,255,0)
                if current_speed > speed_limit:
                    bbox_color = (0,0,255)
                
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), bbox_color, 1)
                cv2.putText(frame, txt, org, cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 1)
            
            
            cv2.putText(frame, f"SPEED LIMIT: {speed_limit}km/h", (200,250), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
            cv2.imshow('Video', frame)
            key = cv2.waitKey(1)
            
            if key & 0xFF == ord('q'):
                break
            if key & 0xFF == ord('p'):
                cv2.waitKey(-1)
        
        else:
            break


    cap.release()
    cv2.destroyAllWindows()
    
    return speeds_dict

In [18]:
speeds_dict = play_tracker_video(yolov5, video_path, TRANSFORM_MATRIX, video_time=0, speed_limit=80)

In [12]:
for car in speeds_dict.items():
    print(f"CAR: {car[0]}, average_speed: {car[1]['avg_speed']}")

CAR: 1, average_speed: 96.923
CAR: 2, average_speed: 0
CAR: 3, average_speed: 0
CAR: 4, average_speed: 0


In [13]:
for val in speeds_dict.values():
    if val['avg_speed'] !=0:
        print(val)

{'P1': 94.12, 'P2': 94.4, 'P3': 94.66, 'P4': 94.92, 'P5': 95.16, 'delta12': 0.28, 'delta23': 0.26, 'delta34': 0.26, 'delta45': 0.24, 'total_delta': 1.039999999999992, 'speed12': 90.0, 'speed23': 96.923, 'speed34': 96.923, 'speed45': 105.0, 'avg_speed': 96.923}
