# 1. Dataset Preparation

## Mount Google Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 2. Person Detection

## Install YOLOv8 (ultralytics package)

In [3]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.75-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

## Import required libraries

In [4]:
from IPython.display import clear_output
import cv2
import os
import json
import numpy as np
from ultralytics import YOLO
from google.colab.patches import cv2_imshow
import gc

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


## Load the YOLOv8 model (nano version for speed; adjust as needed)

In [5]:
model = YOLO("yolov8n.pt")  # This downloads the model if not already present

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 48.9MB/s]


## Define paths for each camera's images

In [6]:
# Adjust the base_path to match the location of your dataset on Drive.
base_path = "/content/drive/MyDrive/dataset/Wildtrack/Image_subsets"
camera_dirs = {
    "Camera_1": os.path.join(base_path, "C1"),
    "Camera_2": os.path.join(base_path, "C2"),
    "Camera_3": os.path.join(base_path, "C3"),
    "Camera_4": os.path.join(base_path, "C4"),
    "Camera_5": os.path.join(base_path, "C5"),
    "Camera_6": os.path.join(base_path, "C6"),
    "Camera_7": os.path.join(base_path, "C7")
}

## Directory to save intermediate detection results

In [7]:
output_dir = "/content/drive/MyDrive"
os.makedirs(output_dir, exist_ok=True)

In [8]:
if not os.path.exists(output_dir):
    print("Creating output directory at:", output_dir)
    os.makedirs(output_dir, exist_ok=True)
else:
    print("Output directory exists:", output_dir)

Output directory exists: /content/drive/MyDrive


## Set detection confidence threshold

In [9]:
conf_thresh = 0.5

## Process images for each camera folder

In [11]:
for camera_id, camera_path in camera_dirs.items():
    print(f"Processing images for {camera_id} in {camera_path}")
    detections_storage = {}

    # List and sort image files (supports .jpg, .jpeg, .png)
    image_files = sorted([f for f in os.listdir(camera_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])

    batch_size = 15
    for i in range(0, len(image_files), batch_size):
        batch_files = image_files[i:i+batch_size]
        for image_name in batch_files:
            image_path = os.path.join(camera_path, image_name)
            frame = cv2.imread(image_path)
            if frame is None:
                continue

            # Run YOLOv8 on the image
            results = model(frame, verbose=False)
            person_detections = []
            for box in results[0].boxes.data.cpu().numpy():
                # Each detection box: [x1, y1, x2, y2, confidence, class]
                x1, y1, x2, y2, conf, cls = box
                # Filter for 'person' detections (COCO class id 0)
                if int(cls) == 0 and conf >= conf_thresh:
                    x = int(x1)
                    y = int(y1)
                    w = int(x2 - x1)
                    h = int(y2 - y1)
                    person_detections.append((x, y, w, h, float(conf)))
            detections_storage[image_name] = person_detections

            # (Optional) Visualize only the first image in each batch to monitor progress
            if batch_files.index(image_name) == 0:
                for (x, y, w, h, conf) in person_detections:
                    cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
                    cv2.putText(frame, f"{conf:.2f}", (x, y-10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                cv2_imshow(frame)
                cv2.waitKey(10)

        # Save intermediate results to disk after processing each batch
        intermediate_file = os.path.join(output_dir, f"{camera_id}_detections_{i}_{i+batch_size}.json")
        with open(intermediate_file, 'w') as f:
            json.dump(detections_storage, f)
        print(f"Saved detections for images {i} to {i+batch_size} of {camera_id}")

        # Clear the storage and output to prevent disconnection
        detections_storage.clear()
        gc.collect()
        clear_output(wait=True)

    print(f"Finished processing {camera_id}")

Finished processing Camera_3
Processing images for Camera_4 in /content/drive/MyDrive/dataset/Wildtrack/Image_subsets/C4


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/dataset/Wildtrack/Image_subsets/C4'

# 3. Parsing Annotations & Recovering 3D Coordinates

In [None]:
import os
import json
import re

In [None]:
# Function to compute 3D coordinates given a positionID

def compute_3d_coordinates(position_id):
    X = -3.0 + 0.025 * (position_id % 480)
    Y = -9.0 + 0.025 * (position_id // 480)
    return X, Y

## Step 1: Load Annotations & Compute 3D Coordinates

In [None]:
# Path to the annotations folder (update with your Drive path)
annotations_dir = "/content/drive/MyDrive/Wildtrack/annotations_positions"

# Dictionary to store processed annotations (keyed by JSON filename)
annotations = {}

# Loop through each JSON file in the annotations folder
for filename in sorted(os.listdir(annotations_dir)):
    if filename.endswith(".json"):
        filepath = os.path.join(annotations_dir, filename)
        with open(filepath, 'r') as f:
            data = json.load(f)

        # List to store all annotations for this frame/file
        frame_annotations = []
        for ann in data:
            # Extract metadata (adjust key names if necessary)
            position_id = ann.get("positionID")
            rectangle_id = ann.get("rectangleID")
            camera_id = ann.get("cameraID")

            # Compute 3D coordinates using the provided formulas
            X, Y = compute_3d_coordinates(position_id)

            # Store the annotation information
            annotation_info = {
                "positionID": position_id,
                "rectangleID": rectangle_id,
                "cameraID": camera_id,
                "X": X,
                "Y": Y
            }
            frame_annotations.append(annotation_info)

        # Use the filename as the key (or extract a frame number if needed)
        annotations[filename] = frame_annotations

## Step 2: Parse rectangles.pom to Build a Mapping

In [None]:
# Path to the rectangles.pom file (update with your Drive path)
rectangles_pom_file = "/content/drive/MyDrive/Wildtrack/rectangles.pom"

# Dictionary to store rectangle mapping: key=(cameraID, rectangleID), value=(x, y, w, h)
rect_map = {}
with open(rectangles_pom_file, 'r') as f:
    for line in f:
        line = line.strip()
        if line.startswith("Rectangle"):
            # Expected format: "Rectangle ID=178 Cam=1 X=531 Y=192 W=32 H=88"
            m = re.search(r"Rectangle ID=(\d+)\s+Cam=(\d+)\s+X=([\d\.]+)\s+Y=([\d\.]+)\s+W=([\d\.]+)\s+H=([\d\.]+)", line)
            if m:
                rect_id = int(m.group(1))
                cam_id = int(m.group(2))
                x_val = float(m.group(3))
                y_val = float(m.group(4))
                w_val = float(m.group(5))
                h_val = float(m.group(6))
                rect_map[(cam_id, rect_id)] = (x_val, y_val, w_val, h_val)

## Step 3: Match Annotations with 2D Detections Using the Rectangle Mapping

In [None]:
for frame_file, ann_list in annotations.items():
    for ann in ann_list:
        cam_id = ann.get("cameraID")
        rect_id = ann.get("rectangleID")
        # Look up the 2D bounding box from the rectangles mapping
        bbox_2d = rect_map.get((cam_id, rect_id))
        ann["bbox_2d"] = bbox_2d  # This will be None if no match is found

## Print Sample Output for Verification

In [None]:
for frame_file, ann_list in annotations.items():
    print("Frame:", frame_file)
    for ann in ann_list:
        print(ann)
    print("-" * 40)

In [None]:
import os
import json
import re

# -------------------------------
# Helper Function: Compute 3D Coordinates
# -------------------------------
def compute_3d_coordinates(position_id):
    # Formulas:
    #   X = -3.0 + 0.025 * (positionID % 480)
    #   Y = -9.0 + 0.025 * (positionID // 480)
    X = -3.0 + 0.025 * (position_id % 480)
    Y = -9.0 + 0.025 * (position_id // 480)
    return X, Y

# -------------------------------
# Step 1: Load Annotations & Compute 3D Coordinates
# -------------------------------
annotations_dir = "/content/drive/MyDrive/Wildtrack/annotations_positions"
output_annotations = {}  # Will store: {frame_filename: [annotation, ...], ...}

for filename in sorted(os.listdir(annotations_dir)):
    if filename.endswith(".json"):
        filepath = os.path.join(annotations_dir, filename)
        with open(filepath, "r") as f:
            data = json.load(f)

        frame_annotations = []
        for ann in data:
            # Extract metadata using the available keys
            personID = ann.get("personID")
            positionID = ann.get("positionID")
            views = ann.get("views", [])

            # Skip if no positionID
            if positionID is None:
                continue

            # Compute 3D coordinates
            X, Y = compute_3d_coordinates(positionID)

            # Build a list of view annotations (each view provides 2D bounding box info)
            view_annotations = []
            for v in views:
                viewNum = v.get("viewNum")  # This will be used as the camera identifier
                xmin = v.get("xmin")
                xmax = v.get("xmax")
                ymin = v.get("ymin")
                ymax = v.get("ymax")
                view_info = {
                    "viewNum": viewNum,
                    "xmin": xmin,
                    "xmax": xmax,
                    "ymin": ymin,
                    "ymax": ymax,
                    "bbox_2d": None  # Placeholder; will be computed in optional matching below
                }
                view_annotations.append(view_info)

            annotation_info = {
                "personID": personID,
                "positionID": positionID,
                "X": X,
                "Y": Y,
                "views": view_annotations  # List of per-view annotations
            }
            frame_annotations.append(annotation_info)

        output_annotations[filename] = frame_annotations

# -------------------------------
# Step 2: Parse rectangles.pom to Build a Rectangle Mapping (Optional)
# -------------------------------
# This mapping is built for reference. Your annotations don't include rectangleID,
# so we won't use it directly, but it's available for further association if needed.
rectangles_pom_file = "/content/drive/MyDrive/Wildtrack/rectangles.pom"
rect_map = {}  # Mapping: {(cameraID, rectangleID): (x, y, w, h)}

with open(rectangles_pom_file, "r") as f:
    for line in f:
        line = line.strip()
        if line.startswith("Rectangle"):
            # Expected format: "Rectangle ID=178 Cam=1 X=531 Y=192 W=32 H=88"
            match = re.search(r"Rectangle\s+ID=(\d+)\s+Cam=(\d+)\s+X=([\d\.]+)\s+Y=([\d\.]+)\s+W=([\d\.]+)\s+H=([\d\.]+)", line)
            if match:
                rect_id = int(match.group(1))
                cam_id = int(match.group(2))
                x_val = float(match.group(3))
                y_val = float(match.group(4))
                w_val = float(match.group(5))
                h_val = float(match.group(6))
                rect_map[(cam_id, rect_id)] = (x_val, y_val, w_val, h_val)

# -------------------------------
# Step 3: Optional Matching Using "viewNum" from Annotations
# -------------------------------
# Since your annotations don't provide explicit rectangleID or cameraID,
# we'll use the "viewNum" in each view as the camera identifier.
# And we'll compute a 2D bounding box from the annotated xmin, xmax, ymin, ymax values.
for frame_file, ann_list in output_annotations.items():
    for ann in ann_list:
        for view in ann.get("views", []):
            xmin = view.get("xmin")
            xmax = view.get("xmax")
            ymin = view.get("ymin")
            ymax = view.get("ymax")
            # Check if the values are valid (not -1)
            if (xmin is not None and xmax is not None and ymin is not None and ymax is not None and
                xmin != -1 and xmax != -1 and ymin != -1 and ymax != -1):
                width = xmax - xmin
                height = ymax - ymin
                view["bbox_2d"] = (xmin, ymin, width, height)
            else:
                view["bbox_2d"] = None
            # Optionally, set the annotation's cameraID if it's not set, using viewNum.
            if ann.get("cameraID") is None:
                ann["cameraID"] = view.get("viewNum")

# -------------------------------
# Step 4: Print Sample Output for Verification
# -------------------------------
for frame_file, ann_list in output_annotations.items():
    print("Frame:", frame_file)
    for ann in ann_list:
        print(ann)
    print("-" * 40)


In [None]:
import os
import json
import re
import cv2

# -------------------------------
# Dummy Placeholder for DeepOcclusionRefiner
# -------------------------------
try:
    from deep_occlusion import DeepOcclusionRefiner  # Production module
except ModuleNotFoundError:
    print("Warning: deep_occlusion module not found. Using dummy occlusion refiner.")
    class DeepOcclusionRefiner:
        def __init__(self, model_path, config_path):
            self.model_path = model_path
            self.config_path = config_path
            print(f"Initialized dummy DeepOcclusionRefiner with model: {model_path} and config: {config_path}")
        def refine_bbox(self, image, bbox):
            # In production, apply occlusion handling here.
            # For now, simply return the original bounding box.
            return bbox

# -------------------------------
# Helper Function: Compute 3D Coordinates
# -------------------------------
def compute_3d_coordinates(position_id):
    # Using formulas:
    #   X = -3.0 + 0.025 * (positionID % 480)
    #   Y = -9.0 + 0.025 * (positionID // 480)
    X = -3.0 + 0.025 * (position_id % 480)
    Y = -9.0 + 0.025 * (position_id // 480)
    return X, Y

# -------------------------------
# Step 1: Load Annotations & Compute 3D Coordinates
# -------------------------------
annotations_dir = "/content/drive/MyDrive/Wildtrack/annotations_positions"
output_annotations = {}  # {frame_filename: [annotation, ...], ...}

for filename in sorted(os.listdir(annotations_dir)):
    if filename.endswith(".json"):
        filepath = os.path.join(annotations_dir, filename)
        with open(filepath, "r") as f:
            data = json.load(f)

        frame_annotations = []
        for ann in data:
            # Extract available metadata from annotations
            personID = ann.get("personID")
            positionID = ann.get("positionID")
            views = ann.get("views", [])
            if positionID is None:
                continue  # Skip if no positionID

            # Compute 3D coordinates from positionID
            X, Y = compute_3d_coordinates(positionID)

            # Process each view to extract the 2D bounding box
            view_annotations = []
            for v in views:
                viewNum = v.get("viewNum")  # Using this as camera identifier
                xmin = v.get("xmin")
                xmax = v.get("xmax")
                ymin = v.get("ymin")
                ymax = v.get("ymax")
                bbox_2d = None
                if xmin != -1 and xmax != -1 and ymin != -1 and ymax != -1:
                    width = xmax - xmin
                    height = ymax - ymin
                    bbox_2d = (xmin, ymin, width, height)
                view_annotations.append({
                    "viewNum": viewNum,
                    "xmin": xmin,
                    "xmax": xmax,
                    "ymin": ymin,
                    "ymax": ymax,
                    "bbox_2d": bbox_2d
                })

            annotation_info = {
                "personID": personID,
                "positionID": positionID,
                "X": X,
                "Y": Y,
                "views": view_annotations
            }
            frame_annotations.append(annotation_info)

        output_annotations[filename] = frame_annotations

# -------------------------------
# Step 2: Parse rectangles.pom to Build a Rectangle Mapping (Optional)
# -------------------------------
rectangles_pom_file = "/content/drive/MyDrive/Wildtrack/rectangles.pom"
rect_map = {}  # Mapping: {(cameraID, rectangleID): (x, y, w, h)}
with open(rectangles_pom_file, "r") as f:
    for line in f:
        line = line.strip()
        if line.startswith("Rectangle"):
            match = re.search(r"Rectangle\s+ID=(\d+)\s+Cam=(\d+)\s+X=([\d\.]+)\s+Y=([\d\.]+)\s+W=([\d\.]+)\s+H=([\d\.]+)", line)
            if match:
                rect_id = int(match.group(1))
                cam_id = int(match.group(2))
                x_val = float(match.group(3))
                y_val = float(match.group(4))
                w_val = float(match.group(5))
                h_val = float(match.group(6))
                rect_map[(cam_id, rect_id)] = (x_val, y_val, w_val, h_val)

# -------------------------------
# Step 3: (Optional) Matching using "viewNum"
# -------------------------------
# In this implementation, each view's bbox_2d is directly computed from annotation.
# Additional matching using rect_map can be implemented if needed.
# For now, we simply assume the annotation's bbox_2d is our ground truth.

# -------------------------------
# Step 4: Integrate Occlusion Handling using DeepOcclusion (Production-Level)
# -------------------------------
# Initialize the occlusion refiner with your production model and configuration.
occlusion_refiner = DeepOcclusionRefiner(
    model_path="/content/drive/MyDrive/deep_occlusion_model.pth",
    config_path="/content/drive/MyDrive/deep_occlusion_config.yaml"
)

# For production, process each frame to refine each valid bounding box using the occlusion handler.
# In a real project, you would load the correct image from the corresponding camera folder.
# Here, we assume that the annotation filename "00001020.json" corresponds to an image "00001020.jpg".
# Adjust 'base_images_dirs' to map viewNum to the correct folder.
base_images_dirs = {
    # Assuming viewNum 0 corresponds to camera folder "C1", viewNum 1 -> "C2", etc.
    0: "/content/drive/MyDrive/Wildtrack/Image_subsets/C1",
    1: "/content/drive/MyDrive/Wildtrack/Image_subsets/C2",
    2: "/content/drive/MyDrive/Wildtrack/Image_subsets/C3",
    3: "/content/drive/MyDrive/Wildtrack/Image_subsets/C4",
    4: "/content/drive/MyDrive/Wildtrack/Image_subsets/C5",
    5: "/content/drive/MyDrive/Wildtrack/Image_subsets/C6",
    6: "/content/drive/MyDrive/Wildtrack/Image_subsets/C7"
}

# Iterate over all annotations and refine bboxes for each view using occlusion handling.
for frame_file, ann_list in output_annotations.items():
    # Derive image filename from frame_file (e.g., "00001020.json" -> "00001020.jpg")
    image_filename = frame_file.replace(".json", ".jpg")
    for ann in ann_list:
        for view in ann.get("views", []):
            if view.get("bbox_2d") is not None:
                view_num = view.get("viewNum")
                # Get the base image directory for this viewNum
                base_dir = base_images_dirs.get(view_num)
                if base_dir is None:
                    continue
                image_path = os.path.join(base_dir, image_filename)
                if not os.path.exists(image_path):
                    continue
                image = cv2.imread(image_path)
                if image is None:
                    continue
                # Refine the bounding box using the occlusion refiner
                original_bbox = view["bbox_2d"]
                refined_bbox = occlusion_refiner.refine_bbox(image, original_bbox)
                view["bbox_2d"] = refined_bbox

# -------------------------------
# Step 5: Save the Final Refined Annotations
# -------------------------------
final_output_file = "/content/drive/MyDrive/Wildtrack/refined_annotations.json"
with open(final_output_file, "w") as f:
    json.dump(output_annotations, f, indent=2)

# -------------------------------
# (Optional) Print Sample Output for Verification
# -------------------------------
for frame_file, ann_list in output_annotations.items():
    print("Frame:", frame_file)
    for ann in ann_list:
        print(ann)
    print("-" * 40)


# 5. Cross-Camera & Temporal Matching

In [None]:
import os
import json
import re
import cv2
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as T
import numpy as np
import torch.nn.functional as F
from PIL import Image

# -------------------------------
# Define Re-ID Model (ResNet50-based)
# -------------------------------
class ReIDModel(nn.Module):
    def __init__(self, output_dim=512):
        super(ReIDModel, self).__init__()
        resnet = torchvision.models.resnet50(pretrained=True)
        modules = list(resnet.children())[:-1]  # Remove the FC layer
        self.backbone = nn.Sequential(*modules)
        self.fc = nn.Linear(2048, output_dim)

    def forward(self, x):
        x = self.backbone(x)  # shape: (B, 2048, 1, 1)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = F.normalize(x, p=2, dim=1)
        return x

# Instantiate the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
reid_model = ReIDModel(output_dim=512).to(device)
reid_model.eval()

# Define image transformation for Re-ID model
transform = T.Compose([
    T.Resize((256, 128)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225])
])

def extract_feature_from_crop(crop_image):
    """
    crop_image: a PIL image crop.
    Returns: L2 normalized feature vector as a numpy array.
    """
    img_tensor = transform(crop_image).unsqueeze(0).to(device)
    with torch.no_grad():
        feature = reid_model(img_tensor)
    return feature.cpu().numpy().flatten()

# -------------------------------
# Assume `output_annotations` from previous steps is available.
# Each annotation has a "views" list with "bbox_2d" computed.
# Also assume a mapping from viewNum to the corresponding image folder.
# -------------------------------
base_images_dirs = {
    0: "/content/drive/MyDrive/Wildtrack/Image_subsets/C1",
    1: "/content/drive/MyDrive/Wildtrack/Image_subsets/C2",
    2: "/content/drive/MyDrive/Wildtrack/Image_subsets/C3",
    3: "/content/drive/MyDrive/Wildtrack/Image_subsets/C4",
    4: "/content/drive/MyDrive/Wildtrack/Image_subsets/C5",
    5: "/content/drive/MyDrive/Wildtrack/Image_subsets/C6",
    6: "/content/drive/MyDrive/Wildtrack/Image_subsets/C7"
}

# Get list of frame annotation files (assumed to be keys in output_annotations)
frame_files = sorted(output_annotations.keys())

# -------------------------------
# Cross-Camera & Temporal Matching
# -------------------------------
# We'll use a simple incremental matching strategy.
# "tracks" is a dict mapping track_id to track info.
tracks = {}  # track_id -> dict with keys: 'detections' (list), 'last_feature', 'last_frame', 'last_camera'
next_track_id = 0
similarity_threshold = 0.8

def cosine_similarity(vec1, vec2):
    # Normalize and compute dot product
    return np.dot(vec1 / np.linalg.norm(vec1), vec2 / np.linalg.norm(vec2))

# Process each frame in temporal order.
for frame_file in frame_files:
    ann_list = output_annotations[frame_file]
    # Derive image filename from frame_file (e.g., "00001020.json" -> "00001020.jpg")
    image_filename = frame_file.replace(".json", ".jpg")
    for ann in ann_list:
        for view in ann["views"]:
            bbox = view.get("bbox_2d")
            if bbox is None:
                continue
            view_num = view.get("viewNum")
            base_dir = base_images_dirs.get(view_num)
            if base_dir is None:
                continue
            image_path = os.path.join(base_dir, image_filename)
            if not os.path.exists(image_path):
                continue
            try:
                pil_image = Image.open(image_path).convert("RGB")
            except Exception as e:
                continue
            xmin, ymin, width, height = bbox
            crop = pil_image.crop((xmin, ymin, xmin + width, ymin + height))

            # Extract re-id feature
            feature = extract_feature_from_crop(crop)

            # Matching: compare with existing tracks' last feature
            best_track_id = None
            best_similarity = -1
            for track_id, track_data in tracks.items():
                sim = cosine_similarity(feature, track_data["last_feature"])
                if sim > best_similarity:
                    best_similarity = sim
                    best_track_id = track_id
            if best_similarity >= similarity_threshold:
                # Assign existing track
                tracks[best_track_id]["detections"].append({
                    "frame": frame_file,
                    "viewNum": view_num,
                    "bbox": bbox,
                    "feature": feature.tolist()
                })
                tracks[best_track_id]["last_feature"] = feature
                tracks[best_track_id]["last_frame"] = frame_file
                tracks[best_track_id]["last_camera"] = view_num
                view["track_id"] = best_track_id
            else:
                # Create a new track
                new_track_id = next_track_id
                next_track_id += 1
                tracks[new_track_id] = {
                    "detections": [{
                        "frame": frame_file,
                        "viewNum": view_num,
                        "bbox": bbox,
                        "feature": feature.tolist()
                    }],
                    "last_feature": feature,
                    "last_frame": frame_file,
                    "last_camera": view_num
                }
                view["track_id"] = new_track_id

# -------------------------------
# Output Matching Results
# -------------------------------
# Print the assembled tracks, including last seen information.
for track_id, track_data in tracks.items():
    print(f"Track ID: {track_id}")
    print(f"Last seen in frame: {track_data['last_frame']} at camera: {track_data['last_camera']}")
    print("Detections:")
    for d in track_data["detections"]:
        print(d)
    print("-" * 40)