In [3]:
from ultralytics import YOLO
from PIL import Image
from data_utils import extract_rectangles_from_xml
from eval_utils import mAP
import cv2
from tqdm import tqdm
import numpy as np
from typing import List, Tuple

In [44]:
finetuned = True

# Load a model
if finetuned:
    model = YOLO("last.pt")  # load a finetuned model
else:
    model = YOLO("yolov8n.pt") 

In [45]:
cap = cv2.VideoCapture('data/S03/c010/vdo.avi')
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

frame_detections = []
for _ in tqdm(range(n_frames)):
    if not cap.isOpened():
        break
    ret, frame = cap.read()
    # Run model on current frame
    results = model(frame, verbose=False)

    # Save frame detections
    frame_detections.append(results[0].boxes)

if not finetuned:
    filtered_predictions = [[box.xyxy[0].to(int).tolist() for box in frame_boxes if box.cls == 2] for frame_boxes in frame_detections]
else:
    filtered_predictions = [[box.xyxy[0].to(int).tolist() for box in frame_boxes] for frame_boxes in frame_detections]

  0%|          | 0/2141 [00:00<?, ?it/s]

 52%|█████▏    | 1107/2141 [05:52<05:04,  3.39it/s]

In [39]:
annotation = extract_rectangles_from_xml('data/ai_challenge_s03_c010-full_annotation.xml', add_track_id=True)

# create gt.txt
# Placeholder values for <conf>, <x>, <y>, <z> since these are not provided
conf, x, y, z = 1, -1, -1, -1  # Using -1 to indicate unknown or not applicable

# Convert data to the required gt.txt format
gt_content = []
for frame, bboxes in annotation.items():
    for bbox in bboxes:
        bb_left, bb_top, bb_right, bb_bottom, obj_id = map(int, bbox)
        bb_width = bb_right - bb_left
        bb_height = bb_bottom - bb_top
        gt_content.append(f"{frame+1}, {obj_id}, {bb_left}, {bb_top}, {bb_width}, {bb_height}, {conf}, {x}, {y}, {z}")

# Join all entries to form the final content for the gt.txt file
gt_text = "\n".join(gt_content)

file_path = 'TrackEval/data/gt/mot_challenge/week2-train/week2-01/gt/gt.txt'  # Define the file path
with open(file_path, 'w') as f:
    f.write(gt_text)

In [33]:
def voc_iou(pred: List[int], gt: np.ndarray):
    """
    Calculate IoU between detect box and gt boxes.
    :param pred: Predicted bounding box coordinates [x1, y1, x2, y2].
    :param gt: Ground truth bounding box coordinates [[x1, y1, x2, y2]].
    """
    # compute overlaps
    # intersection
    ixmin = np.maximum(gt[0], pred[0])
    iymin = np.maximum(gt[1], pred[1])
    ixmax = np.minimum(gt[2], pred[2])
    iymax = np.minimum(gt[3], pred[3])
    iw = np.maximum(ixmax - ixmin + 1.0, 0.0)
    ih = np.maximum(iymax - iymin + 1.0, 0.0)
    inters = iw * ih

    # union
    uni = (
        (pred[2] - pred[0] + 1.0) * (pred[3] - pred[1] + 1.0)
        + (gt[2] - gt[0] + 1.0) * (gt[3] - gt[1] + 1.0)
        - inters
    )

    return inters / uni

In [42]:
active_objects = {}  # Maps object ID to last seen bounding box
next_object_id = 0
iou_threshold = 0.3  # Minimum IoU to consider a match

tracking = dict()

# Compute IoU with previous frame's objects and assign IDs
for i,detection in tqdm(enumerate(filtered_predictions), total=len(filtered_predictions)):
    current_objects = {}
    for bbox_curr in detection:
        best_id = None
        max_iou = 0  # Initialize the maximum IoU for the current detection to zero

        for obj_id, bbox_prev in active_objects.items():
            iou = voc_iou(bbox_curr, bbox_prev)
            if iou > max_iou:  # Find the maximum IoU and corresponding object ID
                max_iou = iou
                best_id = obj_id
    
        if max_iou > iou_threshold:  # Only consider a match if the IoU is above the threshold
            current_objects[best_id] = bbox_curr
        else:
            current_objects[next_object_id] = bbox_curr
            next_object_id += 1

    # Update tracking data for the next frame
    active_objects = current_objects
    tracking[i+1] = [value + [key] for key, value in active_objects.items()]


  0%|          | 0/2141 [00:00<?, ?it/s]

100%|██████████| 2141/2141 [00:03<00:00, 574.72it/s]


In [43]:
# create gt.txt
# Placeholder values for <conf>, <x>, <y>, <z> since these are not provided
conf, x, y, z = 1, -1, -1, -1  # Using -1 to indicate unknown or not applicable

# Convert data to the required gt.txt format
gt_content = []
for frame, bboxes in tracking.items():
    for bbox in bboxes:
        bb_left, bb_top, bb_right, bb_bottom, obj_id = map(int, bbox)
        bb_width = bb_right - bb_left
        bb_height = bb_bottom - bb_top
        gt_content.append(f"{frame}, {obj_id}, {bb_left}, {bb_top}, {bb_width}, {bb_height}, {conf}, {x}, {y}, {z}")

# Join all entries to form the final content for the gt.txt file
gt_text = "\n".join(gt_content)

file_path = 'TrackEval/data/trackers/mot_challenge/week2-train/yolotrackerft/data/week2-01.txt'  # Define the file path
with open(file_path, 'w') as f:
    f.write(gt_text)

In [6]:
cap = cv2.VideoCapture('data/S03/c010/vdo.avi')

# Store tracking history for each object
tracking_history = {}
# Store colors for each object ID
colors = {}

for start in tqdm(range(0, n_frames, 100)):

    cap.set(cv2.CAP_PROP_POS_FRAMES, start)
    video = cv2.VideoWriter(f'tracking/yolotrackerft/tracking_{start}.mp4', -1, fps, (width, height), True)

    for i in range(start, start + 100):
        ret, frame = cap.read()
        if not ret:
            break

        # Draw detected bounding boxes and tracking lines
        for obj_id, bbox in tracking[i].items():
            # Assign a unique color if new object
            if obj_id not in colors:
                colors[obj_id] = (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255))

            # Draw the bounding box
            start_point = (int(bbox[0]), int(bbox[1]))
            end_point = (int(bbox[2]), int(bbox[3]))
            frame = cv2.rectangle(frame, start_point, end_point, colors[obj_id], 2)
            frame = cv2.putText(frame, str(obj_id), start_point, cv2.FONT_HERSHEY_SIMPLEX, 1, colors[obj_id], 2, cv2.LINE_AA)
            
            # Update tracking history
            center_position = ((start_point[0] + end_point[0]) // 2, (start_point[1] + end_point[1]) // 2)
            if obj_id not in tracking_history:
                tracking_history[obj_id] = [center_position]
            else:
                tracking_history[obj_id].append(center_position)
            
            # Draw tracking line (polyline for all historical positions)
            if len(tracking_history[obj_id]) > 1:
                for j in range(1, len(tracking_history[obj_id])):
                    cv2.line(frame, tracking_history[obj_id][j - 1], tracking_history[obj_id][j], colors[obj_id], 2)

        video.write(frame)

video.release()

  0%|          | 0/22 [00:00<?, ?it/s]

100%|██████████| 22/22 [01:44<00:00,  4.74s/it]
