In [1]:
from ultralytics import YOLO
from PIL import Image
from data_utils import extract_rectangles_from_xml
from eval_utils import mAP
import cv2
from tqdm import tqdm
import numpy as np
from sort import Sort

In [2]:
finetuned = True

# Load a model
if finetuned:
    model = YOLO("last.pt")  # load a finetuned model
else:
    model = YOLO("yolov8n.pt") 

In [3]:
cap = cv2.VideoCapture('data/S03/c010/vdo.avi')
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# We only do inference on the last 75% of the video
starting_frame = int(n_frames // 4)
cap.set(cv2.CAP_PROP_POS_FRAMES, starting_frame)

frame_detections = []
for _ in tqdm(range(starting_frame, starting_frame+100)):
    if not cap.isOpened():
        break
    ret, frame = cap.read()
    # Run model on current frame
    results = model(frame, verbose=False)

    # Save frame detections
    frame_detections.append(results[0].boxes)

if not finetuned:
    filtered_predictions = [[box.xyxy[0].to(int).tolist() + box.conf.tolist() for box in frame_boxes if box.cls == 2] for frame_boxes in frame_detections]
else:
    filtered_predictions = [[box.xyxy[0].to(int).tolist() + box.conf.tolist() for box in frame_boxes] for frame_boxes in frame_detections]

100%|██████████| 100/100 [00:32<00:00,  3.09it/s]


In [7]:
# Initialize SORT tracker
mot_tracker = Sort()

# Track objects across frames
tracking = dict()
tracking_video = []
for i,dets in enumerate(filtered_predictions):
    tracked_detections = mot_tracker.update(np.array(dets))
    tracking[i+starting_frame+1] = tracked_detections
    tracking_video.append(tracked_detections)

In [None]:
# Placeholder values for <conf>, <x>, <y>, <z> since these are not provided
conf, x, y, z = 1, -1, -1, -1  # Using -1 to indicate unknown or not applicable

# Convert data to the required gt.txt format
gt_content = []
for frame, bboxes in tracking.items():
    for bbox in bboxes:
        bb_left, bb_top, bb_right, bb_bottom, obj_id = map(int, bbox)
        bb_width = bb_right - bb_left
        bb_height = bb_bottom - bb_top
        gt_content.append(f"{frame}, {obj_id}, {bb_left}, {bb_top}, {bb_width}, {bb_height}, {conf}, {x}, {y}, {z}")

# Join all entries to form the final content for the gt.txt file
gt_text = "\n".join(gt_content)

file_path = 'TrackEval/data/trackers/mot_challenge/week2-train/kalmantrackerft/data/week2-01.txt'  # Define the file path
with open(file_path, 'w') as f:
    f.write(gt_text)

In [None]:
cap = cv2.VideoCapture('data/S03/c010/vdo.avi')

# Store tracking history for each object
tracking_history = {}
# Store colors for each object ID
colors = {}

for start in tqdm(range(starting_frame, n_frames, 100)):

    cap.set(cv2.CAP_PROP_POS_FRAMES, start)
    video = cv2.VideoWriter(f'tracking/kalmantrackerft/tracking_{start}.mp4', -1, fps, (width, height), True)

    for i in range(start, start + 100):
        ret, frame = cap.read()
        if not ret:
            break

        # Draw detected bounding boxes and tracking lines
        for x in tracking_video[i-starting_frame]:
            bbox = list(map(int, x[:4]))
            obj_id = int(x[4])
            # Assign a unique color if new object
            if obj_id not in colors:
                colors[obj_id] = (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255))

            # Draw the bounding box
            start_point = (int(bbox[0]), int(bbox[1]))
            end_point = (int(bbox[2]), int(bbox[3]))
            frame = cv2.rectangle(frame, start_point, end_point, colors[obj_id], 2)
            frame = cv2.putText(frame, str(obj_id), start_point, cv2.FONT_HERSHEY_SIMPLEX, 1, colors[obj_id], 2, cv2.LINE_AA)
            
            # Update tracking history
            center_position = ((start_point[0] + end_point[0]) // 2, (start_point[1] + end_point[1]) // 2)
            if obj_id not in tracking_history:
                tracking_history[obj_id] = [center_position]
            else:
                tracking_history[obj_id].append(center_position)
            
            # Draw tracking line (polyline for all historical positions)
            if len(tracking_history[obj_id]) > 1:
                for j in range(1, len(tracking_history[obj_id])):
                    cv2.line(frame, tracking_history[obj_id][j - 1], tracking_history[obj_id][j], colors[obj_id], 2)

        video.write(frame)

video.release()