In [1]:
from ultralytics import YOLO
from PIL import Image
import cv2
from tqdm import tqdm
import numpy as np
from deep_sort.deep_sort import DeepSort
import yaml
import os
import glob

In [2]:
def perform_tracking(source, config, save_path, model, deepsort):
    cap = cv2.VideoCapture(source)
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    size = (int(frame_width * config["video"]['scale_video_size']), 
                    int(frame_height * config["video"]['scale_video_size']))

    starting_frame = 0
    cap.set(cv2.CAP_PROP_POS_FRAMES, starting_frame)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoWriter(save_path+'.mp4', fourcc, fps, (width, height), True)

    frame_count = 0
    tracking = {}
    tracking_video = []
    colors = {}
    tracking_history = {}
    while(cap.isOpened()):
        ret, frame = cap.read()
        frame_count += 1
        if ret is True:
            # Run model on current frame
            results = model(frame, verbose=False)

            # Save frame detections
            frame_detections = results[0].boxes
            filtered_pred = [box.xywh[0].to(int).tolist() + box.conf.tolist() for box in frame_detections if box.cls == 2]
            frame = cv2.resize(frame, size, interpolation = cv2.INTER_AREA)

            dets = np.array(filtered_pred)
            if len(dets) > 0:
                result = dets[:,:4]
                conf = dets[:,-1:]
                track_result = deepsort.update(result, conf, frame)
                
                tracking[frame_count] = track_result
                tracking_video.append(track_result)

            #track_box, track_id = track_result[:, :4], track_result[:, -1]
                if len(track_result) > 0:
                    track_box, track_id = track_result[:, :4], track_result[:, -1]
                    for i in range(len(track_box)):
            
                        # Draw detected bounding boxes and tracking lines
                        bbox = track_box[i]
                        obj_id = track_id[i]
                        # Assign a unique color if new object
                        if obj_id not in colors:
                            colors[obj_id] = (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255))

                        # Draw the bounding box
                        start_point = (int(bbox[0]), int(bbox[1]))
                        end_point = (int(bbox[2]), int(bbox[3]))
                        frame = cv2.rectangle(frame, start_point, end_point, colors[obj_id], 2)
                        frame = cv2.putText(frame, str(obj_id), start_point, cv2.FONT_HERSHEY_SIMPLEX, 1, colors[obj_id], 2, cv2.LINE_AA)
                        
                        # Update tracking history
                        center_position = ((start_point[0] + end_point[0]) // 2, (start_point[1] + end_point[1]) // 2)
                        if obj_id not in tracking_history:
                            tracking_history[obj_id] = [center_position]
                        else:
                            tracking_history[obj_id].append(center_position)
                        
                        # Draw tracking line (polyline for all historical positions)
                        if len(tracking_history[obj_id]) > 1:
                            for j in range(1, len(tracking_history[obj_id])):
                                cv2.line(frame, tracking_history[obj_id][j - 1], tracking_history[obj_id][j], colors[obj_id], 2)
                video.write(frame)

        else:
            print("Finish!")
            break   

    video.release() 
    return tracking

In [3]:
def export_results(save_path, tracking):
    # Placeholder values for <conf>, <x>, <y>, <z> since these are not provided
    conf, x, y, z = 1, -1, -1, -1  # Using -1 to indicate unknown or not applicable

    # Convert data to the required gt.txt format
    gt_content = []
    for frame, bboxes in tracking.items():
        for bbox in bboxes:
            bb_left, bb_top, bb_right, bb_bottom, obj_id = map(int, bbox)
            bb_width = bb_right - bb_left
            bb_height = bb_bottom - bb_top
            gt_content.append(f"{frame}, {obj_id}, {bb_left}, {bb_top}, {bb_width}, {bb_height}, {conf}, {x}, {y}, {z}")

    # Join all entries to form the final content for the gt.txt file
    gt_text = "\n".join(gt_content)

    with open(save_path+'.txt', 'w') as f:
        f.write(gt_text)

In [4]:
root = '../../data/aic19-track1-mtmc-train/train/*/*/*.avi'
model = YOLO("yolov8n.pt") 
config="./config.yaml"

#config = load_config(config)
with open(config) as file:
    config = yaml.safe_load(file)
deepsort = DeepSort(model_path=config['deepsort_tracker']['model_path'],
                max_dist=config['deepsort_tracker']['max_dist'],
                min_confidence=config['deepsort_tracker']['min_confidence'], 
                nms_max_overlap=config['deepsort_tracker']['nms_max_overlap'],
                max_iou_distance=config['deepsort_tracker']['max_iou_distance'], 
                max_age=config['deepsort_tracker']['max_age'], 
                n_init=config['deepsort_tracker']['n_init'], 
                nn_budget=config['deepsort_tracker']['nn_budget'], 
                use_cuda=config['deepsort_tracker']['use_cuda'])

for source in glob.glob(root):
    #source = '../../data/aic19-track1-mtmc-train/train/S03/c011/vdo.avi'
    name = source[-16:-13] + '_' + source[-12:-8]
    path = os.path.join('./results', name)
    if not os.path.exists(path):
        os.makedirs(path)
    save_path = f"{path}/week3"

    tracking = perform_tracking(source, config, save_path, model, deepsort)
    export_results(save_path, tracking)

Finish!
