In [35]:
import cv2 
import numpy as np  
import json
from glob import glob 
from tqdm import tqdm
from utils import *
from optical_filter_methods import *
import random
from scipy.optimize import linear_sum_assignment

In [67]:

all_frames = glob('/Users/dianatat/Documents/Master/C6 Video Analysis/project/extracted_frames/*')
assert len(all_frames) > 0, 'No frames found'

# Sort the frames
all_frames.sort(key=lambda x: int(x.split('/')[-1].split('.')[0].replace('frame_', '')))

# Check the length of all_frames
print("Number of frames:", len(all_frames))

# Ensure that the list is not empty before accessing the first frame
if len(all_frames) > 0:
    prev_frame = cv2.imread(all_frames[0])
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    current_gray = prev_gray
else:
    print("No frames available.")

Number of frames: 2141


In [68]:
# load annotation 
with open('/Users/dianatat/Documents/Master/C6 Video Analysis/project/frames_with_movement.json', 'r') as f:
    annotation = json.load(f)
total_frames = len(all_frames)

In [27]:
#taken from last week

class Tracker:
    def __init__(self, id, kf, bbox=None):
        self.id = id
        self.kf = kf
        self.lost = 0  
        self.trail = []  
        self.bbox = bbox 
        self.trail_color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))

In [28]:
#taken from last week

trackers = []  
next_id = 0  
max_lost = 5  
max_consecutive_absence = 1  

dt = 1  # Time step
process_noise = 1e-5
measurement_noise = 1e-1
initial_uncertainty = 1.0
all_gt_bboxes = {}  
tracked_bbox_annotation = {}
idf1_scores = []
hota_scores = []

prev_frame = None
prev_gray = None
current_gray = None 

In [29]:
#taken from last week

class KalmanFilter:
    def __init__(self, dt, process_noise, measurement_noise, initial_state, initial_uncertainty):
        """
        Initialize the Kalman Filter.
        :param dt: Time step
        :param process_noise: Process noise covariance
        :param measurement_noise: Measurement noise covariance
        :param initial_state: Initial state vector
        :param initial_uncertainty: Initial uncertainty covariance
        """
        self.dt = dt  
        self.A = np.array([[1, 0, dt, 0],  
                           [0, 1, 0, dt],
                           [0, 0, 1, 0],
                           [0, 0, 0, 1]])
        self.B = np.eye(4)  
        self.H = np.array([[1, 0, 0, 0],  
                           [0, 1, 0, 0]])
        self.Q = process_noise * np.eye(4)  
        self.R = measurement_noise * np.eye(2)  
        self.P = initial_uncertainty  
        self.x = initial_state  

    def predict(self):
        """
        Predict the next state.
        """
        self.x = np.dot(self.A, self.x)
        self.P = np.dot(np.dot(self.A, self.P), self.A.T) + self.Q

    def update(self, z):
        """
        Update the state based on measurement.
        :param z: Measurement
        """
        y = z - np.dot(self.H, self.x)  # measurement pre-fit residual
        S = np.dot(self.H, np.dot(self.P, self.H.T)) + self.R  # residual covariance
        K = np.dot(np.dot(self.P, self.H.T), np.linalg.inv(S))  # Kalman gain
        self.x = self.x + np.dot(K, y)
        self.P = self.P - np.dot(K, np.dot(self.H, self.P))


### Farneback Optical Flow

In [69]:
if len(all_frames) > 0:
    prev_frame = cv2.imread(all_frames[0])
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

    # Initialize variables for tracking
    tracked_bbox_annotation_mean_of = {}
    idf1_scores_mean_of = []
    hota_scores_mean_of = []

    for frame_path in all_frames:
        frame = cv2.imread(frame_path)
        frame_idx = frame_path.split('/')[-1].split('.')[0].replace('frame_', '')
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        frame_annotations = annotation.get(frame_idx, [])  # Assuming this is a list of detections
        current_measurements = []
        current_bboxes = []
        tracked_bbox_annotation_mean_of[frame_idx] = []
        for bbox_ann in frame_annotations:
            
            # sample bbox_ann:[{'label': 'bike', 'xtl': 475.92, 'ytl': 212.16, 'xbr': 522.36, 'ybr': 357.15}]
            if bbox_ann['label'] != 'car':
                continue
            detection = [bbox_ann['xtl'], bbox_ann['ytl'], bbox_ann['xbr'], bbox_ann['ybr']]
            x_center = (detection[0] + detection[2]) / 2
            y_center = (detection[1] + detection[3]) / 2
            
            # calculate mean of within the bounding box
            x_min, y_min, x_max, y_max = map(int, detection)
            x_min = max(0, x_min)
            y_min = max(0, y_min)
            x_max = min(frame.shape[1], x_max)
            y_max = min(frame.shape[0], y_max)
            
            if prev_frame is not None and flow is not None:
                flow_roi = flow[y_min:y_max, x_min:x_max]
                mean_flow = np.mean(flow_roi, axis=(0, 1))
                x_center += mean_flow[0]
                y_center += mean_flow[1]
            
            current_measurements.append(np.array([x_center, y_center]))
            current_bboxes.append(detection)
        
        if not trackers:  #create a tracker for each detection
            for measurement, bbox in zip(current_measurements, current_bboxes):
                kf = KalmanFilter(dt, process_noise, measurement_noise, np.array([measurement[0], measurement[1], 0, 0]), initial_uncertainty)
                trackers.append(Tracker(next_id, kf, bbox))
                next_id += 1
        else:
            for tracker in trackers:
                tracker.kf.predict()
            
            num_trackers = len(trackers)
            num_measurements = len(current_measurements)
            cost_matrix = np.zeros((num_trackers, num_measurements))
            for i, tracker in enumerate(trackers):
                for j, measurement in enumerate(current_measurements):
                    cost_matrix[i, j] = np.linalg.norm(measurement - tracker.kf.x[:2])
            tracker_indices, measurement_indices = linear_sum_assignment(cost_matrix)
            
            for t_idx, m_idx in zip(tracker_indices, measurement_indices):
                trackers[t_idx].kf.update(current_measurements[m_idx])
                trackers[t_idx].bbox = current_bboxes[m_idx]
                trackers[t_idx].lost = 0 
            
            unmatched_detections = set(range(num_measurements)) - set(measurement_indices)
            for m_idx in unmatched_detections:
                measurement = current_measurements[m_idx]
                bbox = current_bboxes[m_idx]
                kf = KalmanFilter(dt, process_noise, measurement_noise, np.array([measurement[0], measurement[1], 0, 0]), initial_uncertainty)
                trackers.append(Tracker(next_id, kf, bbox))
                next_id += 1
        
        for tracker in trackers:
            if tracker.id not in {tracker.id for tracker in trackers}:
                tracker.lost += 1
        
        removed_trackers = []
        for tracker in trackers:
            if tracker.lost > max_lost:
                removed_trackers.append(tracker)
            else:
                tracker.lost += 1
                if tracker.id not in {tracker.id for tracker in trackers}:
                    tracker.lost = 1
                if tracker.lost > max_consecutive_absence:
                    removed_trackers.append(tracker)
        
        for tracker in removed_trackers:
            if tracker in trackers:
                trackers.remove(tracker)
                # Reset trail
                tracker.trail = []
        
        frame_with_tracking = frame.copy()

        tracked_bbox_annotation_mean_of[frame_idx] = []
        for tracker in trackers:
            if tracker.bbox:
                x_min, y_min, x_max, y_max = map(int, tracker.bbox)
                x_min = int(x_min)
                y_min = int(y_min)
                x_max = int(x_max)
                y_max = int(y_max)
                
                cv2.rectangle(frame_with_tracking, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
                cv2.putText(frame_with_tracking, f'ID: {tracker.id}', (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

                tracked_bbox_annotation_mean_of[frame_idx].append(
                    { "tracked_id": tracker.id,
                "xtl": int(x_min),
                "ytl": int(y_min),
                "xbr":int(x_max),
                "ybr": int(y_max)}
                )

            if tracker in trackers:
                x_estimated, y_estimated = int(tracker.kf.x[0]), int(tracker.kf.x[1])
                tracker.trail.append((x_estimated, y_estimated))
                for i in range(1, len(tracker.trail)):
                    cv2.line(frame_with_tracking, tracker.trail[i - 1], tracker.trail[i], tracker.trail_color, 10)
        
            
        tracked_bboxes = [list(map(int, tracker.bbox)) for tracker in trackers if tracker.bbox is not None]

        gt_boxes = [[int(bbox_ann['xtl']), int(bbox_ann['ytl']),int( bbox_ann['xbr']), int(bbox_ann['ybr'])] for bbox_ann in frame_annotations]

        idf1 = calculate_idf1(gt_boxes, tracked_bboxes)
        idf1_scores_mean_of.append(idf1)
        
        idtp = calculate_idtp(tracked_bboxes, gt_boxes)
        idfp = calculate_idfp(tracked_bboxes, gt_boxes)
        idfn = calculate_idfn(tracked_bboxes, gt_boxes)

        try:
            hota = idtp / (idtp + 0.5 * (idfp + idfn))
        except ZeroDivisionError:
            hota = 0.0
        hota_scores_mean_of.append(hota)
        
        prev_frame = frame.copy()
        prev_gray = current_gray

    # Save the tracked bbox 
    with open("tracked_data_farneback_of.json", "w") as file:
        json.dump(tracked_bbox_annotation_mean_of, file)

else:
    print("No frames available.")

### Mean Optical Flow

In [62]:
if len(all_frames) > 0:
    prev_frame = cv2.imread(all_frames[0])
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

    tracked_bbox_annotation = {}
    all_gt_bboxes = {}
    idf1_scores = []
    hota_scores = []

    for frame_path in all_frames:
        frame = cv2.imread(frame_path)
        frame_idx = frame_path.split('/')[-1].split('.')[0].replace('frame_', '')
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        frame_annotations = annotation.get(frame_idx, [])  
        current_measurements = []
        current_bboxes = []
        tracked_bbox_annotation[frame_idx] = []
        for bbox_ann in frame_annotations:
            if bbox_ann['label'] != 'car':
                continue
            detection = [bbox_ann['xtl'], bbox_ann['ytl'], bbox_ann['xbr'], bbox_ann['ybr']]
            x_center = (detection[0] + detection[2]) / 2
            y_center = (detection[1] + detection[3]) / 2
            current_measurements.append(np.array([x_center, y_center]))
            current_bboxes.append(detection)
        
        if not trackers:  
            for measurement, bbox in zip(current_measurements, current_bboxes):
                kf = KalmanFilter(dt, process_noise, measurement_noise, np.array([measurement[0], measurement[1], 0, 0]), initial_uncertainty)
                trackers.append(Tracker(next_id, kf, bbox))
                next_id += 1
        else:
            for tracker in trackers:
                tracker.kf.predict()
            
            num_trackers = len(trackers)
            num_measurements = len(current_measurements)
            cost_matrix = np.zeros((num_trackers, num_measurements))
            for i, tracker in enumerate(trackers):
                for j, measurement in enumerate(current_measurements):
                    cost_matrix[i, j] = np.linalg.norm(measurement - tracker.kf.x[:2])
            tracker_indices, measurement_indices = linear_sum_assignment(cost_matrix)
            
            for t_idx, m_idx in zip(tracker_indices, measurement_indices):
                trackers[t_idx].kf.update(current_measurements[m_idx])
                trackers[t_idx].bbox = current_bboxes[m_idx]
                trackers[t_idx].lost = 0  
            
            unmatched_detections = set(range(num_measurements)) - set(measurement_indices)
            for m_idx in unmatched_detections:
                measurement = current_measurements[m_idx]
                bbox = current_bboxes[m_idx]
                kf = KalmanFilter(dt, process_noise, measurement_noise, np.array([measurement[0], measurement[1], 0, 0]), initial_uncertainty)
                trackers.append(Tracker(next_id, kf, bbox))
                next_id += 1
        
        for tracker in trackers:
            if tracker.id not in {tracker.id for tracker in trackers}:
                tracker.lost += 1
        
        removed_trackers = []
        for tracker in trackers:
            if tracker.lost > max_lost:
                removed_trackers.append(tracker)
            else:
                tracker.lost += 1
                if tracker.id not in {tracker.id for tracker in trackers}:
                    tracker.lost = 1
                if tracker.lost > max_consecutive_absence:
                    removed_trackers.append(tracker)
        
        for tracker in removed_trackers:
            if tracker in trackers:
                trackers.remove(tracker)
                tracker.trail = []
        
        frame_with_tracking = frame.copy()

        tracked_bbox_annotation[frame_idx] = []
        for tracker in trackers:
            if tracker.bbox:
                x_min, y_min, x_max, y_max = map(int, tracker.bbox)
                x_min = int(x_min)
                y_min = int(y_min)
                x_max = int(x_max)
                y_max = int(y_max)
                
                cv2.rectangle(frame_with_tracking, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
                cv2.putText(frame_with_tracking, f'ID: {tracker.id}', (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)


                tracked_bbox_annotation[frame_idx].append(
                    { "tracked_id": tracker.id,
                "xtl": int(x_min),
                "ytl": int(y_min),
                "xbr":int(x_max),
                "ybr": int(y_max)}
                )
            if tracker in trackers:
                x_estimated, y_estimated = int(tracker.kf.x[0]), int(tracker.kf.x[1])
                tracker.trail.append((x_estimated, y_estimated))
                for i in range(1, len(tracker.trail)):
                    cv2.line(frame_with_tracking, tracker.trail[i - 1], tracker.trail[i], tracker.trail_color, 10)
        
            
        tracked_bboxes = [list(map(int, tracker.bbox)) for tracker in trackers if tracker.bbox is not None]
        
        gt_boxes = [[int(bbox_ann['xtl']), int(bbox_ann['ytl']),int( bbox_ann['xbr']), int(bbox_ann['ybr'])] for bbox_ann in frame_annotations]

        idf1 = calculate_idf1(gt_boxes, tracked_bboxes)
        idf1_scores.append(idf1)
        
        cv2.imshow('Frame with Tracking', frame_with_tracking)
        if cv2.waitKey(30) & 0xFF == ord('q'):
            break
        
        print(gt_boxes, tracked_bboxes)
        
        idtp = calculate_idtp(tracked_bboxes, gt_boxes)
        idfp = calculate_idfp(tracked_bboxes, gt_boxes)
        idfn = calculate_idfn(tracked_bboxes, gt_boxes)

        # Compute HOTA score for the current frame
        try:
            hota = idtp / (idtp + 0.5 * (idfp + idfn))
        except ZeroDivisionError:
            hota = 0.0
        hota_scores.append(hota)
        

    cv2.destroyAllWindows()

    # Save the tracked bbox 
    with open("tracked_data_mean_of.json", "w") as file:
        json.dump(tracked_bbox_annotation, file)

else:
    print("No frames available.")


Number of frames: 2141
[[558, 94, 663, 169], [573, 72, 661, 145], [913, 93, 972, 144], [894, 95, 944, 144], [878, 107, 927, 145], [1176, 82, 1253, 174], [1285, 363, 1516, 546], [931, 78, 1013, 146]] [[558, 94, 663, 169], [573, 72, 661, 145], [913, 93, 972, 144], [894, 95, 944, 144], [878, 107, 927, 145], [1176, 82, 1253, 174], [1285, 363, 1516, 546], [931, 78, 1013, 146], [910, 119, 1018, 203]]
[[558, 94, 663, 169], [573, 72, 661, 145], [913, 93, 972, 144], [894, 95, 944, 144], [878, 107, 927, 145], [1176, 82, 1253, 174], [1285, 363, 1516, 546], [931, 78, 1013, 146]] [[558, 94, 663, 169], [573, 72, 661, 145], [913, 93, 972, 144], [894, 95, 944, 144], [878, 107, 927, 145], [1176, 82, 1253, 174], [1285, 363, 1516, 546], [931, 78, 1013, 146], [910, 119, 1018, 203]]
[[558, 94, 663, 169], [573, 72, 661, 145], [913, 93, 972, 144], [894, 95, 944, 144], [878, 107, 927, 145], [1176, 82, 1253, 174], [1285, 363, 1516, 546], [931, 78, 1013, 146]] [[558, 94, 663, 169], [573, 72, 661, 145], [913, 93

### Median Optical Flow

In [63]:
if len(all_frames) > 0:
    prev_frame = cv2.imread(all_frames[0])
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

    # Initialize variables for tracking
    tracked_bbox_annotation_median_of = {}
    idf1_scores_median_of = []
    hota_scores_median_of = []

    for frame_path in all_frames:
        frame = cv2.imread(frame_path)
        frame_idx = frame_path.split('/')[-1].split('.')[0].replace('frame_', '')
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        frame_annotations = annotation.get(frame_idx, [])  
        current_bboxes = []
        tracked_bbox_annotation_median_of[frame_idx] = []
        
        for bbox_ann in frame_annotations:
            if bbox_ann['label'] != 'car':
                continue
            detection = [bbox_ann['xtl'], bbox_ann['ytl'], bbox_ann['xbr'], bbox_ann['ybr']]
            x_center = (detection[0] + detection[2]) / 2
            y_center = (detection[1] + detection[3]) / 2
            
            # calculate median of within the bounding box
            x_min, y_min, x_max, y_max = map(int, detection)
            x_min = max(0, x_min)
            y_min = max(0, y_min)
            x_max = min(frame.shape[1], x_max)
            y_max = min(frame.shape[0], y_max)
            
            if prev_frame is not None and flow is not None:
                flow_roi = flow[y_min:y_max, x_min:x_max]
                median_flow = np.median(flow_roi, axis=(0, 1))
                x_center += median_flow[0]
                y_center += median_flow[1]
            
            current_measurements.append(np.array([x_center, y_center]))
            current_bboxes.append(detection)
        
        if not trackers:
            for measurement, bbox in zip(current_measurements, current_bboxes):
                kf = KalmanFilter(dt, process_noise, measurement_noise, np.array([measurement[0], measurement[1], 0, 0]), initial_uncertainty)
                trackers.append(Tracker(next_id, kf, bbox))
                next_id += 1
        else:
            for tracker in trackers:
                tracker.kf.predict()
            
            num_trackers = len(trackers)
            num_measurements = len(current_measurements)
            cost_matrix = np.zeros((num_trackers, num_measurements))
            for i, tracker in enumerate(trackers):
                for j, measurement in enumerate(current_measurements):
                    cost_matrix[i, j] = np.linalg.norm(measurement - tracker.kf.x[:2])
            tracker_indices, measurement_indices = linear_sum_assignment(cost_matrix)
            
            for t_idx, m_idx in zip(tracker_indices, measurement_indices):
                trackers[t_idx].kf.update(current_measurements[m_idx])
                trackers[t_idx].bbox = current_bboxes[m_idx]
                trackers[t_idx].lost = 0 
            
            unmatched_detections = set(range(num_measurements)) - set(measurement_indices)
            for m_idx in unmatched_detections:
                measurement = current_measurements[m_idx]
                bbox = current_bboxes[m_idx]
                kf = KalmanFilter(dt, process_noise, measurement_noise, np.array([measurement[0], measurement[1], 0, 0]), initial_uncertainty)
                trackers.append(Tracker(next_id, kf, bbox))
                next_id += 1
        
        for tracker in trackers:
            if tracker.id not in {tracker.id for tracker in trackers}:
                tracker.lost += 1
        
        removed_trackers = []
        for tracker in trackers:
            if tracker.lost > max_lost:
                removed_trackers.append(tracker)
            else:
                tracker.lost += 1
                if tracker.id not in {tracker.id for tracker in trackers}:
                    tracker.lost = 1
                if tracker.lost > max_consecutive_absence:
                    removed_trackers.append(tracker)
        
        for tracker in removed_trackers:
            if tracker in trackers:
                trackers.remove(tracker)
                tracker.trail = []
        
        frame_with_tracking = frame.copy()

        tracked_bbox_annotation_median_of[frame_idx] = []
        for tracker in trackers:
            if tracker.bbox:
                x_min, y_min, x_max, y_max = map(int, tracker.bbox)
                x_min = int(x_min)
                y_min = int(y_min)
                x_max = int(x_max)
                y_max = int(y_max)
                
                cv2.rectangle(frame_with_tracking, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
                cv2.putText(frame_with_tracking, f'ID: {tracker.id}', (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

                tracked_bbox_annotation_median_of[frame_idx].append(
                    { "tracked_id": tracker.id,
                "xtl": int(x_min),
                "ytl": int(y_min),
                "xbr":int(x_max),
                "ybr": int(y_max)}
                )

            if tracker in trackers:
                x_estimated, y_estimated = int(tracker.kf.x[0]), int(tracker.kf.x[1])
                tracker.trail.append((x_estimated, y_estimated))
                for i in range(1, len(tracker.trail)):
                    cv2.line(frame_with_tracking, tracker.trail[i - 1], tracker.trail[i], tracker.trail_color, 10)
        
            
        tracked_bboxes = [list(map(int, tracker.bbox)) for tracker in trackers if tracker.bbox is not None]

        gt_boxes = [[int(bbox_ann['xtl']), int(bbox_ann['ytl']),int( bbox_ann['xbr']), int(bbox_ann['ybr'])] for bbox_ann in frame_annotations]

        idf1 = calculate_idf1(gt_boxes, tracked_bboxes)
        idf1_scores_median_of.append(idf1)
        
        idtp = calculate_idtp(tracked_bboxes, gt_boxes)
        idfp = calculate_idfp(tracked_bboxes, gt_boxes)
        idfn = calculate_idfn(tracked_bboxes, gt_boxes)

        try:
            hota = idtp / (idtp + 0.5 * (idfp + idfn))
        except ZeroDivisionError:
            hota = 0.0
        hota_scores_median_of.append(hota)
        
        prev_frame = frame.copy()
        prev_gray = current_gray

    cv2.destroyAllWindows()

    # Save the tracked bbox 
    with open("tracked_data_median_of.json", "w") as file:
        json.dump(tracked_bbox_annotation_median_of, file)

else:
    print("No frames available.")


Number of frames: 2141


### Max Optical Flow

In [66]:
import cv2
import json
import numpy as np
from glob import glob
from scipy.optimize import linear_sum_assignment

# Load annotation data
with open('/Users/dianatat/Documents/Master/C6 Video Analysis/project/frame_with_movement_data.json', 'r') as f:
    annotation = json.load(f)

# Replace the placeholder all_frames list with your actual list
all_frames = glob('/Users/dianatat/Documents/Master/C6 Video Analysis/project/extracted_frames/*')
assert len(all_frames) > 0, 'No frames found'

# Sort the frames
all_frames.sort(key=lambda x: int(x.split('/')[-1].split('.')[0].replace('frame_', '')))

# Check the length of all_frames
print("Number of frames:", len(all_frames))

# Ensure that the list is not empty before accessing the first frame
if len(all_frames) > 0:
    prev_frame = cv2.imread(all_frames[0])
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

    # Initialize variables for tracking
    tracked_bbox_annotation_max_of = {}
    idf1_scores_max_of = []
    hota_scores_max_of = []

    for frame_path in all_frames:
        frame = cv2.imread(frame_path)
        frame_idx = frame_path.split('/')[-1].split('.')[0].replace('frame_', '')
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        frame_annotations = annotation.get(frame_idx, [])  # Assuming this is a list of detections
        current_measurements = []
        current_bboxes = []
        tracked_bbox_annotation_max_of[frame_idx] = []
        for bbox_ann in frame_annotations:
            
            # sample bbox_ann:[{'label': 'bike', 'xtl': 475.92, 'ytl': 212.16, 'xbr': 522.36, 'ybr': 357.15}]
            if bbox_ann['label'] != 'car':
                continue
            detection = [bbox_ann['xtl'], bbox_ann['ytl'], bbox_ann['xbr'], bbox_ann['ybr']]
            x_center = (detection[0] + detection[2]) / 2
            y_center = (detection[1] + detection[3]) / 2
            
            # calculate maximum of within the bounding box
            x_min, y_min, x_max, y_max = map(int, detection)
            x_min = max(0, x_min)
            y_min = max(0, y_min)
            x_max = min(frame.shape[1], x_max)
            y_max = min(frame.shape[0], y_max)
            
            if prev_frame is not None and flow is not None:
                flow_roi = flow[y_min:y_max, x_min:x_max]
                max_flow = np.max(flow_roi, axis=(0, 1))
                x_center += max_flow[0]
                y_center += max_flow[1]
            
            current_measurements.append(np.array([x_center, y_center]))
            current_bboxes.append(detection)
        
        if not trackers:  #create a tracker for each detection
            for measurement, bbox in zip(current_measurements, current_bboxes):
                kf = KalmanFilter(dt, process_noise, measurement_noise, np.array([measurement[0], measurement[1], 0, 0]), initial_uncertainty)
                trackers.append(Tracker(next_id, kf, bbox))
                next_id += 1
        else:
            for tracker in trackers:
                tracker.kf.predict()
            
            num_trackers = len(trackers)
            num_measurements = len(current_measurements)
            cost_matrix = np.zeros((num_trackers, num_measurements))
            for i, tracker in enumerate(trackers):
                for j, measurement in enumerate(current_measurements):
                    cost_matrix[i, j] = np.linalg.norm(measurement - tracker.kf.x[:2])
            tracker_indices, measurement_indices = linear_sum_assignment(cost_matrix)
            
            for t_idx, m_idx in zip(tracker_indices, measurement_indices):
                trackers[t_idx].kf.update(current_measurements[m_idx])
                trackers[t_idx].bbox = current_bboxes[m_idx]
                trackers[t_idx].lost = 0 
            
            unmatched_detections = set(range(num_measurements)) - set(measurement_indices)
            for m_idx in unmatched_detections:
                measurement = current_measurements[m_idx]
                bbox = current_bboxes[m_idx]
                kf = KalmanFilter(dt, process_noise, measurement_noise, np.array([measurement[0], measurement[1], 0, 0]), initial_uncertainty)
                trackers.append(Tracker(next_id, kf, bbox))
                next_id += 1
        
        for tracker in trackers:
            if tracker.id not in {tracker.id for tracker in trackers}:
                tracker.lost += 1
        
        removed_trackers = []
        for tracker in trackers:
            if tracker.lost > max_lost:
                removed_trackers.append(tracker)
            else:
                tracker.lost += 1
                if tracker.id not in {tracker.id for tracker in trackers}:
                    tracker.lost = 1
                if tracker.lost > max_consecutive_absence:
                    removed_trackers.append(tracker)
        
        for tracker in removed_trackers:
            if tracker in trackers:
                trackers.remove(tracker)
                # Reset trail
                tracker.trail = []
        
        frame_with_tracking = frame.copy()

        tracked_bbox_annotation_max_of[frame_idx] = []
        for tracker in trackers:
            if tracker.bbox:
                x_min, y_min, x_max, y_max = map(int, tracker.bbox)
                x_min = int(x_min)
                y_min = int(y_min)
                x_max = int(x_max)
                y_max = int(y_max)
                
                cv2.rectangle(frame_with_tracking, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
                cv2.putText(frame_with_tracking, f'ID: {tracker.id}', (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

                tracked_bbox_annotation_max_of[frame_idx].append(
                    { "tracked_id": tracker.id,
                "xtl": int(x_min),
                "ytl": int(y_min),
                "xbr":int(x_max),
                "ybr": int(y_max)}
                )

            if tracker in trackers:
                x_estimated, y_estimated = int(tracker.kf.x[0]), int(tracker.kf.x[1])
                tracker.trail.append((x_estimated, y_estimated))
                for i in range(1, len(tracker.trail)):
                    cv2.line(frame_with_tracking, tracker.trail[i - 1], tracker.trail[i], tracker.trail_color, 10)
        
            
        tracked_bboxes = [list(map(int, tracker.bbox)) for tracker in trackers if tracker.bbox is not None]

        gt_boxes = [[int(bbox_ann['xtl']), int(bbox_ann['ytl']),int( bbox_ann['xbr']), int(bbox_ann['ybr'])] for bbox_ann in frame_annotations]

        idf1 = calculate_idf1(gt_boxes, tracked_bboxes)
        idf1_scores_max_of.append(idf1)
        
        idtp = calculate_idtp(tracked_bboxes, gt_boxes)
        idfp = calculate_idfp(tracked_bboxes, gt_boxes)
        idfn = calculate_idfn(tracked_bboxes, gt_boxes)

        try:
            hota = idtp / (idtp + 0.5 * (idfp + idfn))
        except ZeroDivisionError:
            hota = 0.0
        hota_scores_max_of.append(hota)
        
        prev_frame = frame.copy()
        prev_gray = current_gray

    cv2.destroyAllWindows()

    # Save the tracked bbox 
    with open("tracked_data_max_of.json", "w") as file:
        json.dump(tracked_bbox_annotation_max_of, file)

else:
    print("No frames available.")


Number of frames: 2141
