### Import libraries

In [10]:
import cv2
import os
import numpy as np
import tqdm
from utils import calculate_iou, extract_frames, parse_xml_annotations, read_data

### Paths

In [21]:
video_path = "../AICity_data/train/S03/c010/vdo.avi"
frame_output_folder = "output/frames"
xml_annotation_path = "../ai_challenge_s03_c010-full_annotation.xml"
output_base_folder = "output"

### Extract frames

In [19]:
num_frames_extracted = extract_frames.extract_frames(video_path, frame_output_folder)

Finished extracting frames.


### Extract annotation

In [8]:
frame_with_movement_data , frame_without_movement_data = parse_xml_annotations(xml_file=xml_annotation_path)

In [None]:
for frame, annotations in frame_with_movement_data.items():
    print(f"Frame {frame} - Annotations with movement:")
    for annotation in annotations:
        print(f"  Label: {annotation['label']}, Coordinates: ({annotation['xtl']}, {annotation['ytl']}) to ({annotation['xbr']}, {annotation['ybr']})")

for frame, annotations in frame_without_movement_data.items():
    print(f"Frame {frame} - Annotations without movement:")
    for annotation in annotations:
        print(f"  Label: {annotation['label']}, Coordinates: ({annotation['xtl']}, {annotation['ytl']}) to ({annotation['xbr']}, {annotation['ybr']})")

In [None]:
annotations, sorted_frames = read_data.parse_annotations(path="../ai_challenge_s03_c010-full_annotation.xml", isGT=False)

for annotation in annotations:
    print(f"Frame {annotation['frame']} - Bounding Boxes:")
    for bbox in annotation['bbox']:
        print(f"  Label: {bbox[2]}, Coordinates: ({bbox[0]}, {bbox[1]}) to ({bbox[2]}, {bbox[3]})")

## Class Methods

In [4]:
class MOGBackgroundSubtractor:
    def __init__(self, history=100, var_threshold=16, detect_shadows=True):
        self.history = history
        self.var_threshold = var_threshold
        self.detect_shadows = detect_shadows
        self.bg_model = None

    def initialize(self, frame):
        self.bg_model = cv2.createBackgroundSubtractorMOG2(
            history=self.history,
            varThreshold=self.var_threshold,
            detectShadows=self.detect_shadows
        )
        _ = self.bg_model.apply(frame)  # Initialize the model

    def apply(self, frame):
        if self.bg_model is None:
            self.initialize(frame)
        return self.bg_model.apply(frame)

class LSBPBackgroundSubtractor:
    def __init__(self, threshold=30):
        self.threshold = threshold
        self.prev_frame = None
        self.frames_processed = 0

    def apply(self, frame):
        if self.prev_frame is None:
            self.prev_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            self.frames_processed += 1
            return None  # Skip background subtraction during initialization

        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # calculate absolute difference between frames
        diff = cv2.absdiff(self.prev_frame, gray_frame)

        # apply threshold to create a binary mask
        _, binary_mask = cv2.threshold(diff, self.threshold, 255, cv2.THRESH_BINARY)

        self.prev_frame = gray_frame
        return binary_mask

class GMGBackgroundSubtractor:
    def __init__(self, initialization_frames=120):
        self.learning_rate = 0.02
        self.initialization_frames = initialization_frames
        self.gmg_model = None
        self.frames_processed = 0

    def initialize(self, frame):
        self.gmg_model = frame.copy().astype(np.float32)
        self.frames_processed += 1

    def apply(self, frame):
        if self.gmg_model is None:
            self.initialize(frame)
            return None

        # Convert the frame to float32 for GMG
        frame_float32 = frame.astype(np.float32)

        # Update the GMG model
        cv2.accumulateWeighted(frame_float32, self.gmg_model, self.learning_rate)

        # Calculate the foreground mask
        fg_mask = cv2.absdiff(frame_float32, self.gmg_model).astype(np.uint8)
        fg_mask = cv2.threshold(fg_mask, 30, 255, cv2.THRESH_BINARY)[1]

        return fg_mask

class GSOCBackgroundSubtractor:
    def __init__(self):
        self.learning_rate = 0.01
        self.gsoc_model = None

    def initialize(self, frame):
        self.gsoc_model = frame.copy().astype(np.float32)

    def apply(self, frame):
        if self.gsoc_model is None:
            self.initialize(frame)
            return None

        # Convert the frame to float32 for GSOC
        frame_float32 = frame.astype(np.float32)

        # Update the GSOC model
        cv2.accumulateWeighted(frame_float32, self.gsoc_model, self.learning_rate)

        # Calculate the foreground mask
        fg_mask = cv2.absdiff(frame_float32, self.gsoc_model).astype(np.uint8)
        fg_mask = cv2.threshold(fg_mask, 30, 255, cv2.THRESH_BINARY)[1]

        return fg_mask

class CNTBackgroundSubtractor:
    def __init__(self):
        self.frames_processed = 0

    def apply(self, frame):
        if self.frames_processed == 0:
            self.prev_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            self.frames_processed += 1
            return None  # Skip background subtraction during initialization

        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # calculate absolute difference between frames
        diff = cv2.absdiff(self.prev_frame, gray_frame)

        # apply threshold to create a binary mask
        _, binary_mask = cv2.threshold(diff, 30, 255, cv2.THRESH_BINARY)

        self.prev_frame = gray_frame
        return binary_mask

### Evaluate SOTA methods, such as MOG, MOG2, KNN, GMG, CNT, LSBP and GSOC and calculate AP50 considering all the annotations

In [12]:
def apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method=''):
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    frame_with_movement_data, _ = parse_xml_annotations(xml_file=xml_annotation_path)

    print(f"Frames with annotations: {frame_with_movement_data.keys()}")

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    iou_threshold = 0.5
    true_positives = 0
    false_positives = 0
    annotated_frames = 0

    output_folder_masks = os.path.join(output_base_folder, method.lower() + "_masks") 
    if not os.path.exists(output_folder_masks):
        os.makedirs(output_folder_masks)

    if method == 'MOG':
        bg_subtractor = MOGBackgroundSubtractor()
    elif method == 'MOG2':
        bg_subtractor = cv2.createBackgroundSubtractorMOG2()
    elif method == 'LSBP':
        bg_subtractor = LSBPBackgroundSubtractor()
    elif method == 'KNN':
        bg_subtractor = cv2.createBackgroundSubtractorKNN()
    elif method == 'CNT':
        bg_subtractor = CNTBackgroundSubtractor()
    elif method == 'GSOC':
        bg_subtractor = GSOCBackgroundSubtractor()
    elif method == 'GMG':
        bg_subtractor = GMGBackgroundSubtractor()
    else:
        print(f"Unsupported method: {method}")
        return

    with tqdm(total=total_frames, desc=f"Processing frames with {method} background subtraction") as pbar:
        for current_frame in range(total_frames):
            ret, frame = cap.read()

            if not ret:
                break

            # check if ground truth annotations are available for the current frame
            if current_frame in frame_with_movement_data:
                annotated_frames += 1

                fg_mask = bg_subtractor.apply(frame)

                if fg_mask is None:
                    print(f"Foreground mask is empty for frame {current_frame}. Skipping...")
                    continue  

                if method == 'MOG2':
                    _, fg_mask_binary = cv2.threshold(fg_mask, 1, 255, cv2.THRESH_BINARY)
                elif method == 'GMG':
                    _, fg_mask_binary = cv2.threshold(fg_mask, 128, 255, cv2.THRESH_BINARY)
                else:
                    fg_mask_binary = fg_mask

                if len(fg_mask_binary.shape) > 2:
                    fg_mask_binary = cv2.cvtColor(fg_mask_binary, cv2.COLOR_BGR2GRAY)

                output_folder = os.path.join(output_base_folder, method.lower(), f"frames_{current_frame:04d}")  
                if not os.path.exists(output_folder):
                    os.makedirs(output_folder)

                mask_name = os.path.join(output_folder_masks, f"mask_{current_frame:04d}.jpg")
                cv2.imwrite(mask_name, fg_mask_binary)

                # connected components analysis to obtain detections from the masks
                num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
                    fg_mask_binary,  
                    64, cv2.CV_32S
                )
                detections = []

                for i in range(1, num_labels):  
                    x, y, w, h, area = stats[i]
                    if area < 100:
                        continue
                    detections.append({
                        "label": "bike",
                        "xtl": x,
                        "ytl": y,
                        "xbr": x + w,
                        "ybr": y + h,
                        "confidence": 1,
                    })

                # match detections with ground truth
                ground_truth = frame_with_movement_data[current_frame]

                for det in detections:
                    matched = False
                    for ann in ground_truth:
                        iou = calculate_iou.calculate_iou(
                            (det["xtl"], det["ytl"], det["xbr"] - det["xtl"], det["ybr"] - det["ytl"]),
                            (ann["xtl"], ann["ytl"], ann["xbr"] - ann["xtl"], ann["ybr"] - ann["ytl"])
                        )
                        if iou >= iou_threshold:
                            true_positives += 1
                            matched = True
                            break

                    if not matched:
                        false_positives += 1

                pbar.update(1)

    cap.release()

    # calculate precision only if there are annotated frames
    if annotated_frames > 0:
        precision = true_positives / (true_positives + false_positives)
        print(f"Finished applying {method} background subtraction.")
        print(f"Average Precision: {precision}")
    else:
        print("No annotated frames found.")


### Check the results

In [39]:
apply_background_subtraction(video_path, frame_output_folder, xml_annotation_path, method="MOG")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with MOG background subtraction:  98%|█████████▊| 2107/2141 [01:16<00:01, 27.70it/s]

Finished applying MOG background subtraction.
Average Precision: 0.4202364799379725





In [35]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="MOG2")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with MOG2 background subtraction:  98%|█████████▊| 2107/2141 [01:37<00:01, 21.65it/s]

Finished applying MOG2 background subtraction.
Average Precision: 0.2180253164556962





In [40]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="GMG")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with GMG background subtraction:   0%|          | 0/2141 [00:00<?, ?it/s]

Foreground mask is empty for frame 0. Skipping...


Processing frames with GMG background subtraction:  98%|█████████▊| 2106/2141 [00:57<00:00, 36.59it/s]

Finished applying GMG background subtraction.
Average Precision: 0.30575692963752665





In [41]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="LSBP")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with LSBP background subtraction:   0%|          | 0/2141 [00:00<?, ?it/s]

Foreground mask is empty for frame 0. Skipping...


Processing frames with LSBP background subtraction:  98%|█████████▊| 2106/2141 [00:24<00:00, 84.97it/s] 

Finished applying LSBP background subtraction.
Average Precision: 0.822399420079739





In [31]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="KNN")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with KNN background subtraction:  98%|█████████▊| 2107/2141 [01:32<00:01, 22.85it/s]

Finished applying KNN background subtraction.
Average Precision: 0.188483456664115





In [38]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="KNN")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with KNN background subtraction:  98%|█████████▊| 2107/2141 [01:36<00:01, 21.92it/s]

Finished applying KNN background subtraction.
Average Precision: 0.18829732947780234





In [42]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="CNT")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with CNT background subtraction:   0%|          | 0/2141 [00:00<?, ?it/s]

Foreground mask is empty for frame 0. Skipping...


Processing frames with CNT background subtraction:  98%|█████████▊| 2106/2141 [00:24<00:00, 85.99it/s] 

Finished applying CNT background subtraction.
Average Precision: 0.822399420079739





In [33]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="GSOC")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with GSOC background subtraction:   0%|          | 4/2141 [00:00<01:00, 35.36it/s]

Foreground mask is empty for frame 0. Skipping...


Processing frames with GSOC background subtraction:  98%|█████████▊| 2106/2141 [01:00<00:01, 34.75it/s]

Finished applying GSOC background subtraction.
Average Precision: 0.26045221843003413





### Evaluate SOTA methods, such as MOG, MOG2, KNN, GMG, CNT, LSBP and GSOC and calculate AP50 removing "bike" from annotation and considering the bounding boxes which are considerably bigger 

In [22]:
def apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method=''):
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    frame_with_movement_data, _ = parse_xml_annotations(xml_file=xml_annotation_path)

    print(f"Frames with annotations: {frame_with_movement_data.keys()}")

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    iou_threshold = 0.5
    true_positives = 0
    false_positives = 0
    annotated_frames = 0

    output_folder_masks = os.path.join(output_base_folder, method.lower() + "_masks")  
    if not os.path.exists(output_folder_masks):
        os.makedirs(output_folder_masks)

    if method == 'MOG':
        bg_subtractor = MOGBackgroundSubtractor()
    elif method == 'MOG2':
        bg_subtractor = cv2.createBackgroundSubtractorMOG2()
    elif method == 'LSBP':
        bg_subtractor = LSBPBackgroundSubtractor()
    elif method == 'KNN':
        bg_subtractor = cv2.createBackgroundSubtractorKNN()
    elif method == 'CNT':
        bg_subtractor = CNTBackgroundSubtractor()
    elif method == 'GSOC':
        bg_subtractor = GSOCBackgroundSubtractor()
    elif method == 'GMG':
        bg_subtractor = GMGBackgroundSubtractor()
    else:
        print(f"Unsupported method: {method}")
        return

    with tqdm(total=total_frames, desc=f"Processing frames with {method} background subtraction") as pbar:
        for current_frame in range(total_frames):
            ret, frame = cap.read()

            if not ret:
                break

            if current_frame in frame_with_movement_data:
                annotated_frames += 1

                fg_mask = bg_subtractor.apply(frame)

                if fg_mask is None:
                    print(f"Foreground mask is empty for frame {current_frame}. Skipping...")
                    continue  

                if method == 'MOG2':
                    _, fg_mask_binary = cv2.threshold(fg_mask, 1, 255, cv2.THRESH_BINARY)
                elif method == 'GMG':
                    _, fg_mask_binary = cv2.threshold(fg_mask, 128, 255, cv2.THRESH_BINARY)
                else:
                    fg_mask_binary = fg_mask

                # check if the mask is a single-channel binary image
                if len(fg_mask_binary.shape) > 2:
                    fg_mask_binary = cv2.cvtColor(fg_mask_binary, cv2.COLOR_BGR2GRAY)

                output_folder = os.path.join(output_base_folder, method.lower(), f"frames_{current_frame:04d}")  # Modify this line
                if not os.path.exists(output_folder):
                    os.makedirs(output_folder)

                mask_name = os.path.join(output_folder_masks, f"mask_{current_frame:04d}.jpg")
                cv2.imwrite(mask_name, fg_mask_binary)

                # Ccnnected components analysis to obtain detections from the masks
                num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
                    fg_mask_binary, 
                    64, cv2.CV_32S
                )
                detections = []

                min_detection_area = 100

                ground_truth = frame_with_movement_data[current_frame]

                # check if the area of the detection is considerably bigger than the corresponding annotation
                for i in range(1, num_labels): 
                    x, y, w, h, area = stats[i]
                    if area < min_detection_area:
                        continue

                    for ann in ground_truth:  
                        if w * h > 2 * (ann["xbr"] - ann["xtl"]) * (ann["ybr"] - ann["ytl"]):
                            detections.append({
                                "label": "bike",
                                "xtl": x,
                                "ytl": y,
                                "xbr": x + w,
                                "ybr": y + h,
                                "confidence": 1,
                            })

                # match with ground truth
                for det in detections:
                    matched = False

                    for ann in ground_truth:
                        iou = calculate_iou.calculate_iou(
                            (det["xtl"], det["ytl"], det["xbr"] - det["xtl"], det["ybr"] - det["ytl"]),
                            (ann["xtl"], ann["ytl"], ann["xbr"] - ann["xtl"], ann["ybr"] - ann["ytl"])
                        )
                        if iou >= iou_threshold:
                            true_positives += 1
                            matched = True
                            break

                    if not matched:
                        false_positives += 1

                pbar.update(1)


        cap.release()

    # calculate precision only if there are annotated frames
    if annotated_frames > 0:
        precision = true_positives / (true_positives + false_positives)

        print(f"Finished applying {method} background subtraction.")
        print(f"Average Precision: {precision}")
    else:
        print("No annotated frames found.")


### Check the results with the new annotation consideration

In [24]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="MOG")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with MOG background subtraction:  98%|█████████▊| 2107/2141 [01:10<00:01, 29.70it/s]

Finished applying MOG background subtraction.
Average Precision: 0.4202364799379725





In [25]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="MOG2")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with MOG2 background subtraction:  98%|█████████▊| 2107/2141 [01:24<00:01, 24.87it/s]

Finished applying MOG2 background subtraction.
Average Precision: 0.2180253164556962





In [26]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="LSBP")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with LSBP background subtraction:   0%|          | 0/2141 [00:00<?, ?it/s]

Foreground mask is empty for frame 0. Skipping...


Processing frames with LSBP background subtraction:  98%|█████████▊| 2106/2141 [00:23<00:00, 90.81it/s] 

Finished applying LSBP background subtraction.
Average Precision: 0.822399420079739





In [23]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="GMG")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with GMG background subtraction:   0%|          | 4/2141 [00:00<01:02, 34.22it/s]

Foreground mask is empty for frame 0. Skipping...


Processing frames with GMG background subtraction:  98%|█████████▊| 2106/2141 [00:52<00:00, 39.77it/s]

Finished applying GMG background subtraction.
Average Precision: 0.30575692963752665





In [27]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="CNT")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with CNT background subtraction:   0%|          | 0/2141 [00:00<?, ?it/s]

Foreground mask is empty for frame 0. Skipping...


Processing frames with CNT background subtraction:  98%|█████████▊| 2106/2141 [00:22<00:00, 92.06it/s] 

Finished applying CNT background subtraction.
Average Precision: 0.822399420079739





In [28]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="KNN")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with KNN background subtraction:  98%|█████████▊| 2107/2141 [01:27<00:01, 24.12it/s]

Finished applying KNN background subtraction.
Average Precision: 0.18617961451056309





In [29]:
apply_background_subtraction(video_path, output_base_folder, xml_annotation_path, method="GSOC")

Frames with annotations: dict_keys([86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345

Processing frames with GSOC background subtraction:   0%|          | 0/2141 [00:00<?, ?it/s]

Foreground mask is empty for frame 0. Skipping...


Processing frames with GSOC background subtraction:  98%|█████████▊| 2106/2141 [00:58<00:00, 36.22it/s]

Finished applying GSOC background subtraction.
Average Precision: 0.26045221843003413



