In [3]:
import cv2
import torch
import numpy as np

model = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained =True)

video_path = "data/session5_center/video.avi"

Using cache found in /home/kasvoy/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2023-3-26 Python-3.11.2 torch-2.0.0+cu117 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB)

Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


In [4]:
br = [1598, 967]
bl = [470, 924]
tl = [911, 85]
tr = [1273, 85]

br_new = [1400, 959]
tr_new = [1400, 60]
tl_new = [1000 ,60]
bl_new = [1000, 959]

src = np.float32([tl, tr, br, bl])
dst = np.float32([tl_new, tr_new, br_new, bl_new])

transform_matrix = cv2.getPerspectiveTransform(src, dst)

In [5]:
transform_matrix

array([[     1.5617,      4.1251,     -435.51],
       [  0.0035754,      4.6042,     -314.34],
       [  5.959e-05,   0.0033356,           1]])

In [6]:
"""
Original points (leftmost points starting from up):

p1: (774, 85)
p2: (706, 163)
p3: (601, 284)
p4: (423, 491)
p5: (79, 909)


Transformed:

p1: (845, 60)
p2: (845, 277)
p3: (845, 502)
p4: (845, 731)
p5: (851, 956)
"""

'\nOriginal points (leftmost points starting from up):\n\np1: (774, 85)\np2: (706, 163)\np3: (601, 284)\np4: (423, 491)\np5: (79, 909)\n\n\nTransformed:\n\np1: (845, 60)\np2: (845, 277)\np3: (845, 502)\np4: (845, 731)\np5: (851, 956)\n'

In [7]:
#ROI includes only the lanes going in direction of the camera
def extract_roi(frame):
    return frame[:, :1450] 

In [8]:
def get_point_under_transform(pt, transform_matrix):
    #point has to be an np.array, dtype=np.float32
    return cv2.perspectiveTransform(pt.reshape(-1, 1, 2), transform_matrix).reshape(2,)

In [9]:
#function that converts yolov5's bounding box format to ltwh format for deepsort
def xyxy_to_bb(result_tensor):
    bbs = []
    
    for det_tensor in result_tensor:
        det_list = det_tensor.tolist()
        
        x_min = det_list[0]
        y_min = det_list[1]
        x_max = det_list[2]
        y_max = det_list[3]
        conf = det_list[4]
        det_class = det_list[5]
        
        height = y_max - y_min
        width = x_max - x_min
        
        ltwh = [x_min, y_min, width, height], conf, det_class
        #consider only vehicles
        if det_class in {2, 3, 5, 7}:
            bbs.append(ltwh)
    return bbs   

In [10]:
def play_transformed_vid(video_path, transform_matrix):
    
    cap = cv2.VideoCapture(video_path)
    
    while cap.isOpened():
        
        ret, frame = cap.read()
        
        if ret:
            
            current_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000

            # Display the current runtime in seconds
            #print("Current Runtime: {} seconds".format(current_time))
            
            cv2.imshow('t', cv2.warpPerspective(frame, transform_matrix, (1920,1080)))
            
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
            
        else:
            break    
    
    cap.release()
    cv2.destroyAllWindows()
    

In [11]:
#play_transformed_vid(video_path, transform_matrix)

In [12]:
#show model detections
def show_dets(video_path):
    video = cv2.VideoCapture(video_path)
    
    while video.isOpened():
        
        ret, frame = video.read()
        
        if ret:
        
            cv2.namedWindow("Resized_Window", cv2.WINDOW_NORMAL)
            cv2.resizeWindow("Resized_Window", 1920, 1080)
        
            result = model(extract_roi(frame))
                
            cv2.imshow("Resized_Window", np.squeeze(result.render()))
        else:
            break
    
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
        
    video.release()
    cv2.destroyAllWindows()

In [13]:
def show_nth_frame(video_path, n, transformed=True):
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, n)
    _, frame = cap.read()

    if transformed:
        cv2.imshow("frame", cv2.warpPerspective(frame, transform_matrix, (1920,1080)))
    else:
        cv2.imshow("frame", frame)

    cv2.waitKey(0)
    cv2.destroyAllWindows()
    cap.release()

In [14]:
def show_time_frame(video_path, video_time = 25, transformed=True): 
        
    cap = cv2.VideoCapture(video_path)
        
    cap.set(cv2.CAP_PROP_POS_MSEC,video_time*1000) 
    
    _, frame = cap.read()
    
    if transformed:
        cv2.imshow("frame", cv2.warpPerspective(frame, transform_matrix, (1920,1080)))
    else:
        cv2.imshow("frame", frame)
    
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    cap.release()

In [15]:
def display_video_time(current_time, frame):
    current_time_trunc = float('{:.2f}'.format(current_time))

    text = f"Video time: {current_time_trunc}"

    cv2.putText(frame, text, (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, 2)

In [84]:
#add an entry to the dictionary which cars crossed which points at what times - changes state of dictionary
def assign_point_time(speeds_dict, points, br_transformed, track_id, cap, tolerance):
    
    time = float('{:.2f}'.format(cap.get(cv2.CAP_PROP_POS_MSEC)/1000))
    
    for point in points.items():
        
        point_name = point[0]
        point_y_coord = point[1][1]
        previous_point_name = 'P'+str(int(point_name[1])-1)
        offset = 0
        first_time_here = False    
        
        if point_name == 'P1':
            offset = 10
            
        if br_transformed[1] >= point_y_coord-(tolerance+offset) and br_transformed[1] <= point_y_coord+tolerance:
            if speeds_dict[track_id][point_name] == 0:
                speeds_dict[track_id][point_name] = time
                first_time_here = True
            
            if previous_point_name in ['P2', 'P3', 'P4'] and first_time_here:
                time_at_prev = speeds_dict[track_id][previous_point_name]
                
                if time_at_prev != 0:
                    delta = round(time - time_at_prev, 3)
                    section_speed = 7/delta
                                        
                    if previous_point_name == 'P2':
                        speeds_dict[track_id]['delta23'] = delta
                        speeds_dict[track_id]['speed23'] = section_speed
                                            
                    elif previous_point_name == 'P3':
                        speeds_dict[track_id]['delta34'] = delta
                        speeds_dict[track_id]['speed34'] = section_speed

                    else:
                        speeds_dict[track_id]['delta45'] = delta 
                        speeds_dict[track_id]['speed45'] = section_speed

In [76]:
from deep_sort_realtime.deepsort_tracker import DeepSort

def play_tracker_video(video_path, transform_matrix=None, video_time=30, speed_limit=80):

    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_MSEC,video_time*1000)
    
    tracker = DeepSort(max_age=1, n_init=2, nms_max_overlap=1.0,embedder_gpu=True)
    
    #coordinates of the 5 marked lines in transformed space
    #points are roughly equidistant - 7 meters real world space
    points = {
        'P1': [845, 60],
        'P2': [845, 277],
        'P3': [845, 502],
        'P4': [845, 731],
        'P5': [851, 956]
    }
    
    section_length = 7
    total_length = 28
    len25 = 21
    
    speeds_dict = dict()

    while cap.isOpened():
        ret, frame = cap.read()
        frame = extract_roi(frame)
            
        if ret:
            current_time = float('{:.2f}'.format(cap.get(cv2.CAP_PROP_POS_MSEC)/1000))
            display_video_time(current_time, frame)
            
            results = model(frame)
            bbs = xyxy_to_bb(results.xyxy[0])
            
            tracks = tracker.update_tracks(bbs, frame=frame) 
            for track in tracks:
                if not track.is_confirmed():
                    continue
                track_id = track.track_id
                
                #[bbox[0], bbox[1]] <-- TOP LEFT, [bbox[2], bbox[3]] <-- BOTTOM RIGHT
                bbox = list(track.to_ltrb())
                br = np.array([int(bbox[2]), int(bbox[3])], dtype=np.float32)
                #bl = np.array([int(bbox[2]), int(bbox[3])], dtype=np.float32)
                
                br_transformed = get_point_under_transform(br, transform_matrix)
                
                if track_id not in speeds_dict:
                    speeds_dict[track_id] = {
                        'P1': 0, 'P2': 0, 'P3': 0, 'P4': 0, 'P5': 0,
                        'delta23': 0, 'delta34': 0, 'delta45': 0, 'total_delta': 0,
                        'speed23': 0, 'speed34': 0, 'speed45': 0, 'avg_speed': 0}
                 
                assign_point_time(speeds_dict, points, br_transformed, track_id, cap, tolerance=10)
                
                
                avg_speed_kph = speeds_dict[track_id]['avg_speed']
                
                point_times = list(speeds_dict[track_id].values())[:5]  
                track_deltas = list(speeds_dict[track_id].values())[5:8]
                section_speeds = list(speeds_dict[track_id].values())[9:12]
                
                
                #current_speed = next(section_speeds_iter)
                 
                #average speed calculation
                if point_times[0] !=0 and point_times[4] !=0:
                    total_delta = point_times[4] - point_times[0]
                    speeds_dict[track_id]['total_delta'] = total_delta
                    
                    speeds_dict[track_id]['avg_speed'] = (total_length/total_delta) * 3.6
                    
                    text = f"CAR {track_id}: average speed: {avg_speed_kph}"
                    
                    cv2.putText(frame, text, (400,400), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
                
                txt = f"id: {str(track.track_id)}, current speed: {avg_speed_kph}"
                
                (label_width,label_height), baseline = cv2.getTextSize(txt , cv2.FONT_HERSHEY_SIMPLEX,1,1)
                org = tuple(map(int,[int(bbox[0]),int(bbox[1])-baseline]))

                bbox_color = (0,255,0)
                if speeds_dict[track_id]['avg_speed'] > speed_limit:
                    bbox_color = (0,0,255)
                
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), bbox_color, 1)
                cv2.putText(frame, txt, org, cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 1)
            
            
            #cv2.imshow('Video', cv2.warpPerspective(frame, transform_matrix, (1920,1080)))
            cv2.imshow('Video', frame)
            key = cv2.waitKey(1)
            
            if key & 0xFF == ord('q'):
                break
            if key & 0xFF == ord('p'):
                cv2.waitKey(-1)
        
        else:
            break


    cap.release()
    cv2.destroyAllWindows()
    
    return speeds_dict

In [77]:
#85 s --> gtruth id 23
speeds_dict = play_tracker_video(video_path, transform_matrix, video_time=85, speed_limit=80)

In [78]:
for val in speeds_dict.values():
    if val['avg_speed'] !=0:
        print(val)

{'P1': 86.34, 'P2': 86.68, 'P3': 86.98, 'P4': 87.3, 'P5': 87.6, 'delta23': 0.3, 'delta34': 0.32, 'delta45': 0.3, 'total_delta': 1.259999999999991, 'speed23': 0, 'speed34': 0, 'speed45': 0, 'avg_speed': 80.00000000000057}


In [62]:
87.6-86.34

1.259999999999991

In [63]:
0.3+0.32+0.3+(86.68-86.34)

1.2600000000000033

In [53]:
round(88.08-87.76,3)

0.32

In [None]:
87.6-8

In [30]:
speeds_dict['2']

{'P1': 86.34,
 'P2': 86.68,
 'P3': 86.98,
 'P4': 87.3,
 'P5': 87.6,
 'delta23': 0.32,
 'delta34': 0.32,
 'delta45': 0.3,
 'total_delta': 1.259999999999991,
 'speed23': 0,
 'speed34': 0,
 'speed45': 0,
 'avg_speed': 80.00000000000057}

In [40]:
list(speeds_dict['2'].values())[9:12]

[0, 0, 0]

In [20]:
import pickle

with open('data/session5_center/gt_data.pkl', 'rb') as pkl_file:
    speed_data = pickle.load(pkl_file, encoding='latin')

In [21]:
speed_data['cars'][22:50]

[{'acceleration': 0.369,
  'intersections': [{'measurementLineId': 1, 'videoTime': 86.304486},
   {'measurementLineId': 0, 'videoTime': 87.58067}],
  'carId': 23,
  'timeIntersectionLastShifted': 87.58067,
  'valid': True,
  'laneIndex': {0},
  'speed': 79.0344},
 {'acceleration': 0.243,
  'intersections': [{'measurementLineId': 1, 'videoTime': 87.39473799999999},
   {'measurementLineId': 0, 'videoTime': 88.68946199999999}],
  'carId': 24,
  'timeIntersectionLastShifted': 88.68946199999999,
  'valid': True,
  'laneIndex': {0},
  'speed': 77.9004},
 {'acceleration': -0.004,
  'intersections': [{'measurementLineId': 1, 'videoTime': 88.575238},
   {'measurementLineId': 0, 'videoTime': 89.924822}],
  'carId': 25,
  'timeIntersectionLastShifted': 89.924822,
  'valid': True,
  'laneIndex': {0},
  'speed': 74.736},
 {'acceleration': -0.241,
  'intersections': [{'measurementLineId': 1, 'videoTime': 91.408238},
   {'measurementLineId': 0, 'videoTime': 92.81025}],
  'carId': 26,
  'timeIntersect