In [2]:
import cv2
import torch
import numpy as np

model = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained =True)

video_path = "data/session5_center/video.avi"

Using cache found in /home/kasvoy/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2023-3-26 Python-3.11.2 torch-2.0.0+cu117 CUDA:0 (NVIDIA GeForce GTX 1060 6GB, 6070MiB)

Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


In [3]:
br = [1598, 967]
bl = [470, 924]
tl = [911, 85]
tr = [1273, 85]

br_new = [1400, 959]
tr_new = [1400, 60]
tl_new = [1000 ,60]
bl_new = [1000, 959]

src = np.float32([tl, tr, br, bl])
dst = np.float32([tl_new, tr_new, br_new, bl_new])

transform_matrix = cv2.getPerspectiveTransform(src, dst)

In [4]:
transform_matrix

array([[     1.5617,      4.1251,     -435.51],
       [  0.0035754,      4.6042,     -314.34],
       [  5.959e-05,   0.0033356,           1]])

In [5]:
"""
Original points (leftmost points starting from up):

p1: (774, 85)
p2: (706, 163)
p3: (601, 284)
p4: (423, 491)
p5: (79, 909)


Transformed:

p1: (845, 60)
p2: (845, 277)
p3: (845, 502)
p4: (845, 731)
p5: (851, 956)
"""

'\nOriginal points (leftmost points starting from up):\n\np1: (774, 85)\np2: (706, 163)\np3: (601, 284)\np4: (423, 491)\np5: (79, 909)\n\n\nTransformed:\n\np1: (845, 60)\np2: (845, 277)\np3: (845, 502)\np4: (845, 731)\np5: (851, 956)\n'

In [6]:
def get_point_under_transform(pt, transform_matrix):
    #point has to be an np.array, dtype=np.float32
    return cv2.perspectiveTransform(pt.reshape(-1, 1, 2), transform_matrix).reshape(2,)

In [7]:
def xyxy_to_bb(result_tensor):
    bbs = []
    
    for det_tensor in result_tensor:
        det_list = det_tensor.tolist()
        
        x_min = det_list[0]
        y_min = det_list[1]
        x_max = det_list[2]
        y_max = det_list[3]
        conf = det_list[4]
        det_class = det_list[5]
        
        height = y_max - y_min
        width = x_max - x_min

        
        ltwh = [x_min, y_min, width, height], conf, det_class
        #consider only vehicles
        if det_class in {2, 3, 5, 7}:
            bbs.append(ltwh)
    return bbs   

In [8]:
def play_transformed_vid(video_path, transform_matrix):
    
    cap = cv2.VideoCapture(video_path)
    
    
    
    while cap.isOpened():
        
        ret, frame = cap.read()
        
        if ret:
            
            current_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000

            # Display the current runtime in seconds
            #print("Current Runtime: {} seconds".format(current_time))
            
            cv2.imshow('t', cv2.warpPerspective(frame, transform_matrix, (1920,1080)))
            
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
            
        else:
            break
            
    
    
    cap.release()
    cv2.destroyAllWindows()
    

In [9]:
#play_transformed_vid(video_path, transform_matrix)

In [10]:
def show_dets(video_path):
    video = cv2.VideoCapture(video_path)
    
    while video.isOpened():
        
        ret, frame = video.read()
        
        if ret:
            result = model(frame)
                
            cv2.imshow("v", np.squeeze(result.render()))
        else:
            break
    
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    video.release()
    cv2.destroyAllWindows()

In [11]:
show_dets(video_path)

In [16]:
from deep_sort_realtime.deepsort_tracker import DeepSort

def play_tracker_video2(video_path, transform_matrix = None):

    cap = cv2.VideoCapture(video_path)
    tracker = DeepSort(max_age=1, n_init=2, nms_max_overlap=1.0,embedder_gpu=True)

    while cap.isOpened():
        ret, frame = cap.read()
        
                
        if ret:
            
            results = model(frame)
            bbs = xyxy_to_bb(results.xyxy[0])
            
            tracks = tracker.update_tracks(bbs, frame=frame) 
            for track in tracks:
                if not track.is_confirmed():
                    continue
                track_id = track.track_id
                ltrb = track.to_ltrb()
                
                bbox = list(track.to_ltrb())
                br = np.array([int(bbox[2]), int(bbox[3])], dtype=np.float32)
                br_transformed = get_point_under_transform(br, transform_matrix)
                #print(f"id: {track_id}")
                #print(get_point_under_transform(br, transform_matrix))
                
                """
                p1: (845, 60)
                p2: (845, 277)
                p3: (845, 502)
                p4: (845, 731)
                p5: (851, 956)
                """


                txt = 'id:' + str(track.track_id)
                
                (label_width,label_height), baseline = cv2.getTextSize(txt , cv2.FONT_HERSHEY_SIMPLEX,1,1)
                org = tuple(map(int,[int(bbox[0]),int(bbox[1])-baseline]))

                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0,255,0), 1)
                cv2.putText(frame, txt, org, cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 1)
                

            cv2.imshow('Video', cv2.warpPerspective(frame, transform_matrix, (1920,1080)))
            #cv2.imshow('Video', frame)
            key = cv2.waitKey(1)
            
            if key & 0xFF == ord('q'):
                break
            if key & 0xFF == ord('p'):
                cv2.waitKey(-1)
            
            
        else:
            break


    cap.release()
    cv2.destroyAllWindows()

In [17]:
play_tracker_video2(video_path, transform_matrix)

In [None]:
def show_nth_frame(video_path, n):
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, n)
    res, nframe = cap.read()

    cv2.imshow("Frame", nframe)


    cv2.waitKey(0)
    cv2.destroyAllWindows()
    cap.release()

In [None]:
#show_nth_frame(video_path, 1000)