In [1]:
from ultralytics import YOLO
from PIL import Image
import cv2
from tqdm import tqdm
import numpy as np
from deep_sort.deep_sort import DeepSort
import yaml
import os
import glob
from data_utils import extract_rectangles_from_xml

In [2]:
def calculate_speed(tracking_history, fps, pixel_to_meter_ratio):

    if len(tracking_history) < 3:
        speed = None  # Not enough data to calculate speed
    else:

        velocities = []
        num_frames_for_speed = min(10, len(tracking_history) - 1)

        tracking_history_short = tracking_history[-num_frames_for_speed:]
        #print(tracking_history_short)
        total_distance = 0
        for i in range(1, len(tracking_history_short)):
            # Calculate Euclidean distance between consecutive positions
            distance = np.linalg.norm(np.array(tracking_history_short[i]) - np.array(tracking_history_short[i - 1]))
            total_distance += distance

        # Find the corresponding pixel-to-meter ratio based on the y-coordinate of the object
        current_pixel_to_meter_ratio = 0.05  # Default value
        for y_range, value in pixel_to_meter_ratio.items():
            if tracking_history[-1][1] < y_range[0] and tracking_history[-1][1] >= y_range[1]:
                current_pixel_to_meter_ratio = value
                break
        
        #print(current_pixel_to_meter_ratio, tracking_history[-1][1])
        # Calculate speed (meters per second)
        total_distance_meters = total_distance * current_pixel_to_meter_ratio
        total_time_seconds = (len(tracking_history_short)-1) / fps  # Time in seconds
        speed_mps = total_distance_meters / total_time_seconds
        speed = speed_mps * 3.6  # Convert to km/h
        
    return speed


"""def calculate_speed(tracking_history, fps, pixel_to_meter_ratio):

    if len(tracking_history) < 2:
        speed = None  # Not enough data to calculate speed
    else:
        total_distance = 0
        for i in range(1, len(tracking_history)):
            # Calculate Euclidean distance between consecutive positions
            distance = np.linalg.norm(np.array(tracking_history[i]) - np.array(tracking_history[i - 1]))
            total_distance += distance
        # Calculate speed (meters per second)
        total_distance_meters = total_distance * pixel_to_meter_ratio
        total_time_seconds = (len(tracking_history) - 1) / fps  # Time in seconds
        speed_mps = total_distance_meters / total_time_seconds
        speed = speed_mps * 3.6  # Convert to km/h
    return speed"""

def perform_tracking(source, config, save_path, annotation, pixel_to_meter_ratio):
    cap = cv2.VideoCapture(source)
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    size = (int(frame_width * config["video"]['scale_video_size']), 
                    int(frame_height * config["video"]['scale_video_size']))

    starting_frame = 0
    cap.set(cv2.CAP_PROP_POS_FRAMES, starting_frame)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoWriter(save_path+'.mp4', fourcc, fps, (width, height), True)

    frame_count = -1
    tracking = {}
    tracking_video = []
    colors = {}
    tracking_history = {}

    while(cap.isOpened()):
        ret, frame = cap.read()
        frame_count += 1
        if ret is True:
            bboxes = annotation[frame_count]

            if len(bboxes) > 0:
                for bbox in bboxes:
                    obj_id = int(bbox[4])
                    bbox = [int(i) for i in bbox[:4]] # bb_left, bb_top, bb_right, bb_bottom
                    
                    # Assign a unique color if new object
                    if obj_id not in colors:
                        colors[obj_id] = (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255))

                    # Draw the bounding box
                    start_point = (int(bbox[0]), int(bbox[1]))
                    end_point = (int(bbox[2]), int(bbox[3]))
                    frame = cv2.rectangle(frame, start_point, end_point, colors[obj_id], 2)
                    frame = cv2.putText(frame, str(obj_id), start_point, cv2.FONT_HERSHEY_SIMPLEX, 1, colors[obj_id], 2, cv2.LINE_AA)
                    
                    # Update tracking history
                    center_position = ((start_point[0] + end_point[0]) // 2, (start_point[1] + end_point[1]) // 2)
                    if obj_id not in tracking_history:
                        tracking_history[obj_id] = [center_position]
                    else:
                        tracking_history[obj_id].append(center_position)
                    
                    # Draw tracking line (polyline for all historical positions)
                    if len(tracking_history[obj_id]) > 1:
                        for j in range(1, len(tracking_history[obj_id])):
                            cv2.line(frame, tracking_history[obj_id][j - 1], tracking_history[obj_id][j], colors[obj_id], 2)

                    # Estimate speeds in km/h
                    speed = calculate_speed(tracking_history[obj_id], fps, pixel_to_meter_ratio)
                    if speed is not None:
                        frame = cv2.putText(frame, f"{speed:.2f} km/h", (start_point[0], start_point[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA)

                video.write(frame)

        else:
            print("Finish!")
            break   

    video.release() 
    return tracking

In [3]:
def export_results(save_path, tracking):
    # Placeholder values for <conf>, <x>, <y>, <z> since these are not provided
    conf, x, y, z = 1, -1, -1, -1  # Using -1 to indicate unknown or not applicable

    # Convert data to the required gt.txt format
    gt_content = []
    for frame, bboxes in tracking.items():
        for bbox in bboxes:
            bb_left, bb_top, bb_right, bb_bottom, obj_id = map(int, bbox)
            bb_width = bb_right - bb_left
            bb_height = bb_bottom - bb_top
            gt_content.append(f"{frame}, {obj_id}, {bb_left}, {bb_top}, {bb_width}, {bb_height}, {conf}, {x}, {y}, {z}")

    # Join all entries to form the final content for the gt.txt file
    gt_text = "\n".join(gt_content)

    with open(save_path+'.txt', 'w') as f:
        f.write(gt_text)

In [4]:
measurements = [(851, 794), (671,631), (545, 518), (454, 435), (393, 379), (349, 338), (306, 297), (275, 268), (249, 244)]


image = cv2.imread("frame.jpg")
y, x, _ = image.shape

pixel_to_meter_ratio = {}
for i in range(len(measurements)):
    if i+1 == len(measurements): 
        t = (y, 0)
    else:
        t = (y, measurements[i][0])
    y = measurements[i][0]
    distance_pixels = measurements[i][0] - measurements[i][1]
    pixel_to_meter_ratio[t] = 1 / distance_pixels

{(1080, 851): 0.017543859649122806, (851, 671): 0.025, (671, 545): 0.037037037037037035, (545, 454): 0.05263157894736842, (454, 393): 0.07142857142857142, (393, 349): 0.09090909090909091, (349, 306): 0.1111111111111111, (306, 275): 0.14285714285714285, (275, 0): 0.2}


In [5]:
root = '../../data/aic19-track1-mtmc-train/train/*/*/*.avi'
model = YOLO("yolov8n.pt") 
config="./config.yaml"

#config = load_config(config)
with open(config) as file:
    config = yaml.safe_load(file)
deepsort = DeepSort(model_path=config['deepsort_tracker']['model_path'],
                max_dist=config['deepsort_tracker']['max_dist'],
                min_confidence=config['deepsort_tracker']['min_confidence'], 
                nms_max_overlap=config['deepsort_tracker']['nms_max_overlap'],
                max_iou_distance=config['deepsort_tracker']['max_iou_distance'], 
                max_age=config['deepsort_tracker']['max_age'], 
                n_init=config['deepsort_tracker']['n_init'], 
                nn_budget=config['deepsort_tracker']['nn_budget'], 
                use_cuda=config['deepsort_tracker']['use_cuda'])


source = '../../data/aic19-track1-mtmc-train/train/S03/c010/vdo.avi'
#source = 'aux.mp4'
save_path = "./results/out"
annotation = extract_rectangles_from_xml('../../data/ai_challenge_s03_c010-full_annotation.xml', add_track_id=True)

tracking = perform_tracking(source, config, save_path, annotation, pixel_to_meter_ratio)
export_results(save_path, tracking)

KeyboardInterrupt: 