SOULIER\
François\
SCIA - 2024

# TP2 - Multi-object IOU Tracker (Bounding-Box Tracker)

In [1]:
import os
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2

1. Load detections (det) stored in a MOT-challenge like formatted text file. Each line represents one object instance and contains 10 values:
* frame = frame number
* id = number identifies that object as belonging to a trajectory by assigning a unique ID (set to
−1 in a detection file, as no ID is assigned yet).
* bb_left, bb_top, bb_width, bb_height: bounding box position in 2D image coordinates i.e. the
top-left corner as well as width and height
* conf: detection confidence score
* x,y,z: the world coordinates are ignored for the 2D challenge and can be filled with -1.

In [2]:
column_names = ['frame', 'id', 'bb_left', 'bb_top', 'bb_width', 'bb_height', 'conf', 'x', 'y', 'z']

det_df = pd.read_csv('data/det.txt', sep=',', header=None)
det_df.columns = column_names
det_df.head()

Unnamed: 0,frame,id,bb_left,bb_top,bb_width,bb_height,conf,x,y,z
0,1,-1,1689,385,146.62,332.71,67.567,-1,-1,-1
1,1,-1,1303,503,61.514,139.59,29.439,-1,-1,-1
2,1,-1,1258,569,40.123,91.049,19.601,-1,-1,-1
3,1,-1,31,525,113.37,257.27,17.013,-1,-1,-1
4,1,-1,1800,483,94.66,214.81,11.949,-1,-1,-1


2. Implement IoU for tracking
* Compute similarity score using the Jaccard index (intersection-over-union) for each pair of
bounding boxes

In [4]:
def IoU(bbox_1: np.ndarray, bbox_2: np.ndarray) -> float:
    """
    Calculates the Intersection over Union (IoU) of two bounding boxes.
    Args:
        bbox_1: A numpy array of shape (4,) representing the left, top, width, and height of the first bounding box.
        bbox_2: A numpy array of shape (4,) representing the left, top, width, and height of the second bounding box.
    Returns:
        The IoU of the two bounding boxes.
    """
    x1, y1, w1, h1 = bbox_1
    x2, y2, w2, h2 = bbox_2
    
    xA, yA = max(x1, x2), max(y1, y2)
    xB, yB = min(x1 + w1, x2 + w2), min(y1 + h1, y2 + h2)
    
    if xB <= xA or yB <= yA:
        return 0.0
    
    intersection_area = (xB - xA) * (yB - yA)
    
    bbox_1_area = w1 * h1
    bbox_2_area = w2 * h2
    
    iou = intersection_area / float(bbox_1_area + bbox_2_area - intersection_area)
    
    if iou < 0.0:
        iou = 0.0
    elif iou > 1.0:
        iou = 1.0

    return iou

* Create a similarity matrix that stores the IoU for all boxes

In [5]:
def similarity_matrix(bbox_list_left: np.ndarray, bbox_list_right: np.ndarray) -> np.ndarray:
    """
    Calculates the similarity matrix two list of bounding boxes.
    Args:
        det_df: A pandas DataFrame containing the detections.
    Returns:
        A numpy array of shape (num_detections, num_detections) containing the IoU of each detection pair.
    """
    sim_matrix = np.zeros((bbox_list_left.shape[0], bbox_list_right.shape[0]))
    for (i, bbox_left) in enumerate(bbox_list_left):
        for (j, bbox_right) in enumerate(bbox_list_right):
            sim_matrix[i, j] = IoU(bbox_left, bbox_right)
    return sim_matrix

3. Associate the detections to tracks in a greedy manner using IoU/ threshold sigma_iou. A track gets the detection with the highest intersection-over-union to its last known object position (i.e. the previous detection of the track) assigned.

In [6]:
def match_tracks_to_detections(df: pd.DataFrame) -> pd.DataFrame:
    """
    Matches tracks to detections.
    Args:
        det_df: A pandas DataFrame containing the detections.
    Returns:
        A pandas DataFrame containing the matches.
    """
    frames = df['frame'].unique()
    list_columns = ['bb_left', 'bb_top', 'bb_width', 'bb_height']
    for frame in range(1, frames.shape[0]):
        # Assign IDs to the first frame
        if frame == 1:
            df.loc[df['frame'] == frame, 'id'] = np.arange(df[df['frame'] == frame].shape[0])

        n_frame = df[df['frame'] == frame]
        n_plus_1_frame = df[df['frame'] == frame + 1]
        sim_matrix = similarity_matrix(n_frame[list_columns].values, n_plus_1_frame[list_columns].values)
        sim_matrix[sim_matrix < 0.5] = 0.0
        max_indices = np.argmax(sim_matrix, axis=1)

        for i, max_index in enumerate(max_indices):
            if sim_matrix[i, max_index] > 0.0:
                n_plus_1_frame.iloc[max_index, 1] = n_frame.iloc[i, 1]
        
        for i, row in n_plus_1_frame.iterrows():
            if row['id'] == -1:
                n_plus_1_frame.loc[i, 'id'] = n_plus_1_frame['id'].max() + 1
        
        df.loc[df['frame'] == frame + 1, 'id'] = n_plus_1_frame['id'].values

    return df

new_df = match_tracks_to_detections(det_df.copy())
new_df.head(20)

Unnamed: 0,frame,id,bb_left,bb_top,bb_width,bb_height,conf,x,y,z
0,1,0,1689,385,146.62,332.71,67.567,-1,-1,-1
1,1,1,1303,503,61.514,139.59,29.439,-1,-1,-1
2,1,2,1258,569,40.123,91.049,19.601,-1,-1,-1
3,1,3,31,525,113.37,257.27,17.013,-1,-1,-1
4,1,4,1800,483,94.66,214.81,11.949,-1,-1,-1
5,2,0,1689,385,146.62,332.71,66.725,-1,-1,-1
6,2,1,1312,503,61.514,139.59,36.614,-1,-1,-1
7,2,4,1744,476,123.42,280.06,16.976,-1,-1,-1
8,2,2,1254,537,52.0,118.0,15.979,-1,-1,-1
9,2,3,55,542,94.66,214.81,9.3326,-1,-1,-1


5. Develop an interface for tracking results check to see if the tracker properly keeps track of objects by associating the correct IDs in the video stream
* Draw rectangular bounding box around the detected object in images
* Draw attributed ID to each tracked objects
* Draw the trajectory (tracking path ) in an image

In [7]:
if os.path.exists('outputs/'):
    shutil.rmtree('outputs/')
os.mkdir('outputs/')

In [8]:
def export_video_with_tracking(df: pd.DataFrame, folder_path: str, output: str, fps: int, frame_size: tuple) -> None:
    """
    Exports a video with tracking.
    Args:
        df: A pandas DataFrame containing the detections.
        folder_path: The path to the folder containing the images.
        output: The path to the output video.
        fps: The FPS of the output video.
        frame_size: The frame size of the output video.
    """
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    video_writer = cv2.VideoWriter(output, fourcc, fps, frame_size)

    color = (0, 0, 255)

    for i, filename in enumerate(sorted(os.listdir(folder_path))):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)

            image = cv2.resize(image, frame_size)

            for _, row in df[df['frame'] == i + 1].iterrows():
                bbox = row[['bb_left', 'bb_top', 'bb_width', 'bb_height']].values.astype(np.int32)
                cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), color, 2)
                cv2.putText(image, f"{int(row['id'])}", (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

            video_writer.write(image)

    video_writer.release()

In [9]:
export_video_with_tracking(new_df, 'data/video_iou/', 'outputs/greedy_video.avi', 30, (1920, 1080))

The video can be found at path `output_video.avi`