# Post-Processing Only Notebook

This notebook runs post-processing on already-tracked data without re-running tracking.
It assumes **bidirectional (forward + backward) tracking** was enabled and the following files exist:

- `*_forward.csv` - Raw forward tracking output
- `*_backward.csv` - Raw backward tracking output

The notebook will:
1. Load forward and backward CSV files
2. Apply post-processing (break trajectories at velocity/distance jumps)
3. Merge forward and backward trajectories into consensus trajectories
4. Apply interpolation
5. Scale coordinates back to original video space
6. Save the final merged output

In [1]:
# === IMPORTS ===
import logging
import os
import sys

import cv2
import numpy as np
import pandas as pd

# Add the src directory to path if running from notebooks folder
project_root = os.path.dirname(os.path.dirname(os.path.abspath("__file__")))
src_path = os.path.join(project_root, "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Import post-processing functions
from multi_tracker.core.post_processing import (
    interpolate_trajectories,
    process_trajectories_from_csv,
    resolve_trajectories,
)

# Set up logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

print("‚úì Imports successful!")

‚úì Imports successful!


## Configuration

Set your file paths and parameters here. You need to provide:
1. Path to the forward tracking CSV
2. Path to the backward tracking CSV
3. Path to the original video file (to get total frame count and verify resize factor)

In [2]:
# ===================================================================
# FILE PATHS - UPDATE THESE!
# ===================================================================

# Base path to your tracking output CSV (without _forward/_backward suffix)
# Example: if your files are "video_forward.csv" and "video_backward.csv",
# set this to "video.csv"
BASE_CSV_PATH = "emi_short_tracking.csv"

# Path to the original video file (used to get total frame count)
VIDEO_PATH = "emi_short.mp4"

# Output path for final merged trajectories
OUTPUT_CSV_PATH = None  # Will auto-generate as *_final.csv if None

# ===================================================================
# DERIVED PATHS (auto-generated)
# ===================================================================
base, ext = os.path.splitext(BASE_CSV_PATH)
FORWARD_CSV_PATH = f"{base}_forward{ext}"
BACKWARD_CSV_PATH = f"{base}_backward{ext}"

if OUTPUT_CSV_PATH is None:
    OUTPUT_CSV_PATH = f"{base}_final{ext}"

print(f"Forward CSV:  {FORWARD_CSV_PATH}")
print(f"Backward CSV: {BACKWARD_CSV_PATH}")
print(f"Output CSV:   {OUTPUT_CSV_PATH}")
print(f"Video file:   {VIDEO_PATH}")

Forward CSV:  emi_short_tracking_forward.csv
Backward CSV: emi_short_tracking_backward.csv
Output CSV:   emi_short_tracking_final.csv
Video file:   emi_short.mp4


In [3]:
# ===================================================================
# POST-PROCESSING PARAMETERS
# ===================================================================
# These should match the values used during tracking, or adjust as needed

# Resize factor used during tracking (1.0 = no resize)
# This is needed to scale coordinates back to original video space
RESIZE_FACTOR = 1.0

# Reference body size in pixels (used for scaling thresholds)
# This is the typical size of your tracked animal
REFERENCE_BODY_SIZE = 77.0

# Trajectory post-processing parameters
params = {
    # Minimum trajectory length (in frames) - shorter ones are removed
    "MIN_TRAJECTORY_LENGTH": 10,
    # Maximum velocity before breaking trajectory (pixels/frame)
    # Jumps faster than this indicate tracking errors
    # Note: MAX_DISTANCE_BREAK is now computed dynamically as MAX_VELOCITY_BREAK * frame_diff
    "MAX_VELOCITY_BREAK": 1.0 * REFERENCE_BODY_SIZE * RESIZE_FACTOR,
    # Maximum consecutive occluded frames before breaking trajectory
    "MAX_OCCLUSION_GAP": 5,
    # Conservative merge parameters for forward/backward trajectory resolution
    # AGREEMENT_DISTANCE: Max distance (px) for frames to be considered "agreeing"
    # Frames within this distance are merged; frames outside create separate trajectories
    "AGREEMENT_DISTANCE": REFERENCE_BODY_SIZE * RESIZE_FACTOR * 0.25,
    # MIN_OVERLAP_FRAMES: Minimum number of agreeing frames required to consider merging
    "MIN_OVERLAP_FRAMES": 2,
}

# Interpolation settings
INTERPOLATION_METHOD = "linear"  # Options: "none", "linear", "cubic", "spline"
INTERPOLATION_MAX_GAP = 5  # Maximum gap size to interpolate (frames)

print("Parameters configured:")
for k, v in params.items():
    print(f"  {k}: {v}")
print(f"\nInterpolation: {INTERPOLATION_METHOD} (max_gap={INTERPOLATION_MAX_GAP})")

Parameters configured:
  MIN_TRAJECTORY_LENGTH: 10
  MAX_VELOCITY_BREAK: 77.0
  MAX_OCCLUSION_GAP: 5
  AGREEMENT_DISTANCE: 19.25
  MIN_OVERLAP_FRAMES: 2

Interpolation: linear (max_gap=5)


## Validate Input Files

In [4]:
# Check that all required files exist
files_to_check = [
    ("Forward CSV", FORWARD_CSV_PATH),
    ("Backward CSV", BACKWARD_CSV_PATH),
    ("Video file", VIDEO_PATH),
]

all_ok = True
for name, path in files_to_check:
    if os.path.exists(path):
        print(f"‚úì {name}: {path}")
    else:
        print(f"‚úó {name} NOT FOUND: {path}")
        all_ok = False

if not all_ok:
    raise FileNotFoundError(
        "One or more required files are missing. Please check the paths above."
    )

print("\nAll files found!")

‚úì Forward CSV: emi_short_tracking_forward.csv
‚úì Backward CSV: emi_short_tracking_backward.csv
‚úì Video file: emi_short.mp4

All files found!


In [5]:
# Get total frame count from video
cap = cv2.VideoCapture(VIDEO_PATH)
if not cap.isOpened():
    raise ValueError(f"Could not open video: {VIDEO_PATH}")

TOTAL_FRAMES = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
FPS = cap.get(cv2.CAP_PROP_FPS)
WIDTH = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
HEIGHT = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()

print(f"Video info:")
print(f"  Total frames: {TOTAL_FRAMES}")
print(f"  FPS: {FPS}")
print(f"  Resolution: {WIDTH} x {HEIGHT}")
print(f"  Duration: {TOTAL_FRAMES/FPS:.2f} seconds")

Video info:
  Total frames: 750
  FPS: 25.0
  Resolution: 4512 x 4512
  Duration: 30.00 seconds


## Step 1: Load and Process Forward Trajectories

In [6]:
# Load forward CSV and preview
forward_raw = pd.read_csv(FORWARD_CSV_PATH)
print(f"Forward raw trajectories:")
print(f"  Rows: {len(forward_raw)}")
print(f"  Unique trajectories: {forward_raw['TrajectoryID'].nunique()}")
print(f"  Columns: {list(forward_raw.columns)}")
print(f"  Frame range: {forward_raw['FrameID'].min()} - {forward_raw['FrameID'].max()}")
forward_raw.head()

Forward raw trajectories:
  Rows: 18750
  Unique trajectories: 83
  Columns: ['TrackID', 'TrajectoryID', 'Index', 'X', 'Y', 'Theta', 'FrameID', 'State', 'DetectionConfidence', 'AssignmentConfidence', 'PositionUncertainty']
  Frame range: 1 - 750


Unnamed: 0,TrackID,TrajectoryID,Index,X,Y,Theta,FrameID,State,DetectionConfidence,AssignmentConfidence,PositionUncertainty
0,0,0,0,,,,1,occluded,0.0,0.0,20.109999
1,1,1,0,,,,1,occluded,0.0,0.0,20.109999
2,2,2,0,,,,1,occluded,0.0,0.0,20.109999
3,3,3,0,,,,1,occluded,0.0,0.0,20.109999
4,4,4,0,,,,1,occluded,0.0,0.0,20.109999


In [7]:
# Process forward trajectories
print("Processing forward trajectories...")
forward_processed, forward_stats = process_trajectories_from_csv(
    FORWARD_CSV_PATH, params
)

print(f"\nForward processing stats:")
for k, v in forward_stats.items():
    print(f"  {k}: {v}")

if forward_processed is not None and not forward_processed.empty:
    print(
        f"\nProcessed forward trajectories: {forward_processed['TrajectoryID'].nunique()}"
    )
else:
    print("WARNING: No forward trajectories after processing!")

2026-02-03 13:44:36,495 - multi_tracker.core.post_processing - INFO - Loaded 18750 rows from emi_short_tracking_forward.csv with columns: ['TrackID', 'TrajectoryID', 'Index', 'X', 'Y', 'Theta', 'FrameID', 'State', 'DetectionConfidence', 'AssignmentConfidence', 'PositionUncertainty']


2026-02-03 13:44:36,495 - multi_tracker.core.post_processing - INFO - Dropped columns: []
2026-02-03 13:44:36,496 - multi_tracker.core.post_processing - INFO - Setting X, Y, Theta to NaN for 4802 occluded/lost detections


Processing forward trajectories...


2026-02-03 13:44:37,231 - multi_tracker.core.post_processing - INFO - Post-processing stats: {'original_count': 83, 'removed_short': 0, 'broken_velocity': 53, 'broken_occlusion': 153, 'broken_spatial_gap': 13, 'final_count': 131}



Forward processing stats:
  original_count: 83
  removed_short: 0
  broken_velocity: 53
  broken_occlusion: 153
  broken_spatial_gap: 13
  final_count: 131

Processed forward trajectories: 131


## Step 2: Load and Process Backward Trajectories

In [8]:
# Load backward CSV and preview
backward_raw = pd.read_csv(BACKWARD_CSV_PATH)
print(f"Backward raw trajectories:")
print(f"  Rows: {len(backward_raw)}")
print(f"  Unique trajectories: {backward_raw['TrajectoryID'].nunique()}")
print(f"  Columns: {list(backward_raw.columns)}")
print(
    f"  Frame range (before transform): {backward_raw['FrameID'].min()} - {backward_raw['FrameID'].max()}"
)
backward_raw.head()

Backward raw trajectories:
  Rows: 18750
  Unique trajectories: 102
  Columns: ['TrackID', 'TrajectoryID', 'Index', 'X', 'Y', 'Theta', 'FrameID', 'State', 'DetectionConfidence', 'AssignmentConfidence', 'PositionUncertainty']
  Frame range (before transform): 1 - 750


Unnamed: 0,TrackID,TrajectoryID,Index,X,Y,Theta,FrameID,State,DetectionConfidence,AssignmentConfidence,PositionUncertainty
0,0,0,0,,,,1,occluded,0.0,0.0,20.109999
1,1,1,0,,,,1,occluded,0.0,0.0,20.109999
2,2,2,0,,,,1,occluded,0.0,0.0,20.109999
3,3,3,0,,,,1,occluded,0.0,0.0,20.109999
4,4,4,0,,,,1,occluded,0.0,0.0,20.109999


In [9]:
# Process backward trajectories
print("Processing backward trajectories...")
backward_processed, backward_stats = process_trajectories_from_csv(
    BACKWARD_CSV_PATH, params
)

print(f"\nBackward processing stats:")
for k, v in backward_stats.items():
    print(f"  {k}: {v}")

if backward_processed is not None and not backward_processed.empty:
    print(
        f"\nProcessed backward trajectories: {backward_processed['TrajectoryID'].nunique()}"
    )
else:
    print("WARNING: No backward trajectories after processing!")

2026-02-03 13:44:37,479 - multi_tracker.core.post_processing - INFO - Loaded 18750 rows from emi_short_tracking_backward.csv with columns: ['TrackID', 'TrajectoryID', 'Index', 'X', 'Y', 'Theta', 'FrameID', 'State', 'DetectionConfidence', 'AssignmentConfidence', 'PositionUncertainty']
2026-02-03 13:44:37,480 - multi_tracker.core.post_processing - INFO - Dropped columns: []
2026-02-03 13:44:37,480 - multi_tracker.core.post_processing - INFO - Setting X, Y, Theta to NaN for 4885 occluded/lost detections


Processing backward trajectories...


2026-02-03 13:44:38,221 - multi_tracker.core.post_processing - INFO - Post-processing stats: {'original_count': 102, 'removed_short': 1, 'broken_velocity': 55, 'broken_occlusion': 144, 'broken_spatial_gap': 15, 'final_count': 134}



Backward processing stats:
  original_count: 102
  removed_short: 1
  broken_velocity: 55
  broken_occlusion: 144
  broken_spatial_gap: 15
  final_count: 134

Processed backward trajectories: 134


In [10]:
# ===================================================================
# DIAGNOSTIC: Check for jumps in RAW tracking data (before processing)
# ===================================================================
print("=" * 60)
print("JUMP ANALYSIS IN RAW DATA (before any post-processing)")
print("=" * 60)

MAX_RAW_JUMP = REFERENCE_BODY_SIZE * RESIZE_FACTOR * 2  # 2x body size


def analyze_raw_jumps(df, name, max_jump):
    """Analyze jumps in raw tracking data."""
    jumps = []
    for traj_id in df["TrajectoryID"].unique():
        traj = df[df["TrajectoryID"] == traj_id].sort_values("FrameID")
        if len(traj) < 2:
            continue
        for i in range(1, len(traj)):
            prev = traj.iloc[i - 1]
            curr = traj.iloc[i]
            if pd.isna(prev["X"]) or pd.isna(curr["X"]):
                continue
            dist = np.sqrt((curr["X"] - prev["X"]) ** 2 + (curr["Y"] - prev["Y"]) ** 2)
            frame_gap = curr["FrameID"] - prev["FrameID"]
            velocity = dist / max(frame_gap, 1)
            if velocity > max_jump:
                jumps.append(
                    {
                        "TrajectoryID": traj_id,
                        "FromFrame": int(prev["FrameID"]),
                        "ToFrame": int(curr["FrameID"]),
                        "FrameGap": frame_gap,
                        "Distance": dist,
                        "Velocity": velocity,
                    }
                )
    return jumps


forward_raw_jumps = analyze_raw_jumps(forward_raw, "Forward", MAX_RAW_JUMP)
backward_raw_jumps = analyze_raw_jumps(backward_raw, "Backward", MAX_RAW_JUMP)

print(
    f"\nForward raw data: {len(forward_raw_jumps)} large jumps (vel > {MAX_RAW_JUMP:.1f} px/frame)"
)
print(
    f"Backward raw data: {len(backward_raw_jumps)} large jumps (vel > {MAX_RAW_JUMP:.1f} px/frame)"
)

if forward_raw_jumps:
    print(f"\nTop 5 forward raw jumps:")
    fwd_jump_df = pd.DataFrame(forward_raw_jumps)
    print(fwd_jump_df.nlargest(5, "Velocity").to_string())

if backward_raw_jumps:
    print(f"\nTop 5 backward raw jumps:")
    bwd_jump_df = pd.DataFrame(backward_raw_jumps)
    print(bwd_jump_df.nlargest(5, "Velocity").to_string())

print(
    f"\nIf there are many jumps in raw data, the issue is in TRACKING, not post-processing!"
)

JUMP ANALYSIS IN RAW DATA (before any post-processing)

Forward raw data: 95 large jumps (vel > 154.0 px/frame)
Backward raw data: 100 large jumps (vel > 154.0 px/frame)

Top 5 forward raw jumps:
    TrajectoryID  FromFrame  ToFrame  FrameGap     Distance     Velocity
91            78        708      709         1  4418.730700  4418.730700
77            54        409      410         1  2650.672368  2650.672368
81            60        493      494         1  2629.846573  2629.846573
80            58        473      474         1  2626.634729  2626.634729
79            57        447      448         1  2617.483715  2617.483715

Top 5 backward raw jumps:
    TrajectoryID  FromFrame  ToFrame  FrameGap     Distance     Velocity
48            43         29       30         1  3825.281297  3825.281297
46            42         26       27         1  3286.583180  3286.583180
74            65        247      248         1  1291.410469  1291.410469
79            67        273      274         1 

## Step 3: Merge Forward and Backward Trajectories (Conservative Strategy)

This step resolves conflicts between forward and backward tracking using a **conservative consensus-based approach**:

1. **Adjust backward data**: Frame numbers are flipped (they were stored in reverse), and theta is rotated by 180¬∞
2. **Find merge candidates**: Pairs must have at least `MIN_OVERLAP_FRAMES` frames where positions agree (within `AGREEMENT_DISTANCE`)
3. **Conservative merge**: 
   - **Agreeing frames** (both exist within threshold): Merge into average position
   - **Disagreeing frames** (both exist but too far apart): Split into separate trajectory segments
   - **Unique frames** (only one direction has data): Keep as-is

This prioritizes **identity confidence** over trajectory completeness - you may get more trajectory fragments, but each fragment has higher confidence in identity.

In [11]:
# Helper function to convert DataFrame to list of DataFrames (one per trajectory)


def prepare_trajs_for_merge(trajs_df):
    """Convert a single DataFrame to a list of DataFrames (one per trajectory)."""
    if trajs_df is None or trajs_df.empty:
        return []
    return [group.copy() for _, group in trajs_df.groupby("TrajectoryID")]


# Prepare trajectories for merging
forward_prepared = prepare_trajs_for_merge(forward_processed)
backward_prepared = prepare_trajs_for_merge(backward_processed)

print(f"Forward trajectories ready for merge: {len(forward_prepared)}")
print(f"Backward trajectories ready for merge: {len(backward_prepared)}")

Forward trajectories ready for merge: 131
Backward trajectories ready for merge: 134


In [12]:
# Resolve (merge) forward and backward trajectories
print("Resolving forward and backward trajectories...")
print("=" * 60)

resolved_trajectories = resolve_trajectories(
    forward_prepared,
    backward_prepared,
    video_length=TOTAL_FRAMES,
    params=params,
)

print("=" * 60)
print(f"\nResolution complete! Got {len(resolved_trajectories)} merged trajectories.")

2026-02-03 13:44:58,607 - multi_tracker.core.post_processing - INFO - Starting conservative trajectory resolution with 131 forward and 134 backward trajectories
2026-02-03 13:44:58,607 - multi_tracker.core.post_processing - INFO - Parameters: AGREEMENT_DISTANCE=19.25px, MIN_OVERLAP_FRAMES=2, MIN_LENGTH=10
2026-02-03 13:44:58,720 - multi_tracker.core.post_processing - INFO - After cleaning: 131 forward, 134 backward


Resolving forward and backward trajectories...


2026-02-03 13:44:58,966 - multi_tracker.core.post_processing - INFO - Found 333 merge candidates
2026-02-03 13:45:00,379 - multi_tracker.core.post_processing - INFO - Removed 80 spatially redundant trajectories
2026-02-03 13:45:12,211 - multi_tracker.core.post_processing - INFO - Processed overlapping trajectories in 5 iterations
2026-02-03 13:45:12,236 - multi_tracker.core.post_processing - INFO - Final result: 191 trajectories



Resolution complete! Got 191 merged trajectories.


In [13]:
# Convert list of DataFrames back to single DataFrame
if resolved_trajectories and isinstance(resolved_trajectories, list):
    if isinstance(resolved_trajectories[0], pd.DataFrame):
        # Reassign TrajectoryID to ensure unique IDs
        for new_id, traj_df in enumerate(resolved_trajectories):
            traj_df["TrajectoryID"] = new_id
        merged_df = pd.concat(resolved_trajectories, ignore_index=True)
    else:
        # Fallback for old tuple format
        all_data = []
        for traj_id, traj in enumerate(resolved_trajectories):
            for x, y, theta, frame in traj:
                all_data.append(
                    {
                        "TrajectoryID": traj_id,
                        "X": x,
                        "Y": y,
                        "Theta": theta,
                        "FrameID": frame,
                    }
                )
        merged_df = pd.DataFrame(all_data) if all_data else pd.DataFrame()
else:
    merged_df = pd.DataFrame()

print(f"Merged DataFrame:")
print(f"  Rows: {len(merged_df)}")
print(
    f"  Unique trajectories: {merged_df['TrajectoryID'].nunique() if not merged_df.empty else 0}"
)
if not merged_df.empty:
    print(f"  Frame range: {merged_df['FrameID'].min()} - {merged_df['FrameID'].max()}")

Merged DataFrame:
  Rows: 13200
  Unique trajectories: 191
  Frame range: 1 - 750


In [14]:
# ===================================================================
# DIAGNOSTIC: Check for jumps in merged data (after merge, before interpolation)
# ===================================================================
MAX_ALLOWED_JUMP = REFERENCE_BODY_SIZE * RESIZE_FACTOR * 2  # 2x body size threshold

merged_jump_count = 0
merged_jump_details = []

for traj_id in sorted(merged_df["TrajectoryID"].unique()):
    traj_df = merged_df[merged_df["TrajectoryID"] == traj_id].sort_values("FrameID")

    for i in range(1, len(traj_df)):
        curr = traj_df.iloc[i]
        prev = traj_df.iloc[i - 1]

        # Skip if either position is NaN
        if pd.isna(curr["X"]) or pd.isna(prev["X"]):
            continue

        frame_gap = curr["FrameID"] - prev["FrameID"]
        distance = np.sqrt((curr["X"] - prev["X"]) ** 2 + (curr["Y"] - prev["Y"]) ** 2)

        # Normalize distance by frame gap (distance per frame)
        effective_jump = distance / frame_gap if frame_gap > 0 else distance

        if effective_jump > MAX_ALLOWED_JUMP:
            merged_jump_count += 1
            merged_jump_details.append(
                {
                    "TrajectoryID": traj_id,
                    "FromFrame": prev["FrameID"],
                    "ToFrame": curr["FrameID"],
                    "FrameGap": frame_gap,
                    "Distance": distance,
                    "EffectiveJump": effective_jump,
                    "FromPos": (prev["X"], prev["Y"]),
                    "ToPos": (curr["X"], curr["Y"]),
                }
            )

print(f"\n{'='*70}")
print(f"MERGED DATA JUMP ANALYSIS (threshold: {MAX_ALLOWED_JUMP:.1f} px/frame)")
print(f"{'='*70}")
print(f"Total jumps exceeding threshold: {merged_jump_count}")

if merged_jump_count > 0:
    # Sort by effective jump distance
    merged_jump_details.sort(key=lambda x: x["EffectiveJump"], reverse=True)

    print(f"\nTop 10 largest jumps:")
    for jump in merged_jump_details[:10]:
        print(
            f"  Traj {jump['TrajectoryID']:3d}: Frame {jump['FromFrame']:5.0f}‚Üí{jump['ToFrame']:5.0f} "
            f"(gap={jump['FrameGap']:2.0f}): {jump['EffectiveJump']:6.1f} px/frame "
            f"({jump['Distance']:6.1f} px total)"
        )

    # Count by trajectory
    from collections import Counter

    traj_counts = Counter([j["TrajectoryID"] for j in merged_jump_details])
    print(f"\nTrajectories with most jumps:")
    for traj_id, count in traj_counts.most_common(10):
        print(f"  Trajectory {traj_id}: {count} jumps")
else:
    print("‚úì No significant jumps found in merged data!")


MERGED DATA JUMP ANALYSIS (threshold: 154.0 px/frame)
Total jumps exceeding threshold: 0
‚úì No significant jumps found in merged data!


In [15]:
# ===================================================================
# DIAGNOSTIC: Check for "HIDDEN JUMPS" - spatial discontinuities across NaN gaps
# These will be revealed by interpolation!
# ===================================================================
MAX_ALLOWED_VELOCITY = REFERENCE_BODY_SIZE * RESIZE_FACTOR * 2  # 2x body size per frame

hidden_jump_count = 0
hidden_jump_details = []

print(f"\n{'='*70}")
print(f"HIDDEN JUMP ANALYSIS (spatial discontinuities across NaN gaps)")
print(f"{'='*70}")
print(f"Threshold: {MAX_ALLOWED_VELOCITY:.1f} px/frame")

for traj_id in sorted(merged_df["TrajectoryID"].unique()):
    traj_df = (
        merged_df[merged_df["TrajectoryID"] == traj_id]
        .sort_values("FrameID")
        .reset_index(drop=True)
    )

    # Find all valid (non-NaN) positions
    valid_positions = traj_df[traj_df["X"].notna()].copy()

    if len(valid_positions) < 2:
        continue

    # Check jumps between consecutive valid positions (skipping NaN gaps)
    for i in range(1, len(valid_positions)):
        curr = valid_positions.iloc[i]
        prev = valid_positions.iloc[i - 1]

        frame_gap = curr["FrameID"] - prev["FrameID"]

        # Only check if there's a gap (frame_gap > 1 means there were NaN frames between)
        if frame_gap > 1:
            distance = np.sqrt(
                (curr["X"] - prev["X"]) ** 2 + (curr["Y"] - prev["Y"]) ** 2
            )
            velocity = distance / frame_gap  # Average velocity across gap

            if velocity > MAX_ALLOWED_VELOCITY:
                hidden_jump_count += 1
                hidden_jump_details.append(
                    {
                        "TrajectoryID": traj_id,
                        "FromFrame": prev["FrameID"],
                        "ToFrame": curr["FrameID"],
                        "FrameGap": frame_gap,
                        "Distance": distance,
                        "Velocity": velocity,
                        "FromPos": (prev["X"], prev["Y"]),
                        "ToPos": (curr["X"], curr["Y"]),
                        "NaNFrames": frame_gap - 1,
                    }
                )

print(f"Hidden jumps found: {hidden_jump_count}")

if hidden_jump_count > 0:
    # Sort by velocity
    hidden_jump_details.sort(key=lambda x: x["Velocity"], reverse=True)

    print(f"\nTop 10 largest hidden jumps:")
    for jump in hidden_jump_details[:10]:
        print(
            f"  Traj {jump['TrajectoryID']:3d}: Frame {jump['FromFrame']:5.0f}‚Üí{jump['ToFrame']:5.0f} "
            f"(gap={jump['FrameGap']:2.0f}, NaN={jump['NaNFrames']:2.0f}): "
            f"{jump['Velocity']:6.1f} px/frame ({jump['Distance']:6.1f} px total)"
        )

    # Count by trajectory
    from collections import Counter

    traj_counts_hidden = Counter([j["TrajectoryID"] for j in hidden_jump_details])
    print(f"\nTrajectories with most hidden jumps:")
    for traj_id, count in traj_counts_hidden.most_common(10):
        print(f"  Trajectory {traj_id}: {count} hidden jumps")

    print(f"\n‚ö†Ô∏è  These hidden jumps will be REVEALED by interpolation!")
    print(
        f"    Interpolation will create {sum(j['NaNFrames'] for j in hidden_jump_details)} new frames"
    )
    print(
        f"    across these gaps, making the jumps visible as consecutive-frame velocity."
    )
else:
    print("‚úì No hidden jumps found! Interpolation should not create new jumps.")


HIDDEN JUMP ANALYSIS (spatial discontinuities across NaN gaps)
Threshold: 154.0 px/frame
Hidden jumps found: 0
‚úì No hidden jumps found! Interpolation should not create new jumps.


In [16]:
# ===================================================================
# DIAGNOSTIC: Analyze jumps in merged data BEFORE interpolation
# ===================================================================
print("=" * 60)
print("JUMP ANALYSIS (before interpolation)")
print("=" * 60)

# Calculate jumps for each trajectory
MAX_ALLOWED_JUMP = REFERENCE_BODY_SIZE * RESIZE_FACTOR * 2  # 2x body size
print(f"Max allowed jump threshold: {MAX_ALLOWED_JUMP:.1f} px")

jump_analysis = []
for traj_id in merged_df["TrajectoryID"].unique():
    traj = merged_df[merged_df["TrajectoryID"] == traj_id].sort_values("FrameID")

    if len(traj) < 2:
        continue

    # Calculate frame-to-frame jumps
    for i in range(1, len(traj)):
        prev = traj.iloc[i - 1]
        curr = traj.iloc[i]

        # Skip if either position is NaN
        if pd.isna(prev["X"]) or pd.isna(curr["X"]):
            continue

        dx = curr["X"] - prev["X"]
        dy = curr["Y"] - prev["Y"]
        dist = np.sqrt(dx**2 + dy**2)
        frame_gap = curr["FrameID"] - prev["FrameID"]

        # Normalize by frame gap (for multi-frame gaps)
        velocity = dist / max(frame_gap, 1)

        if velocity > MAX_ALLOWED_JUMP:
            jump_analysis.append(
                {
                    "TrajectoryID": traj_id,
                    "FromFrame": int(prev["FrameID"]),
                    "ToFrame": int(curr["FrameID"]),
                    "FrameGap": frame_gap,
                    "Distance": dist,
                    "Velocity": velocity,
                    "FromX": prev["X"],
                    "FromY": prev["Y"],
                    "ToX": curr["X"],
                    "ToY": curr["Y"],
                }
            )

print(
    f"\nFound {len(jump_analysis)} large jumps (velocity > {MAX_ALLOWED_JUMP:.1f} px/frame)"
)

if jump_analysis:
    jump_df = pd.DataFrame(jump_analysis)
    print(f"\nTop 10 largest jumps:")
    print(
        jump_df.nlargest(10, "Velocity")[
            ["TrajectoryID", "FromFrame", "ToFrame", "FrameGap", "Distance", "Velocity"]
        ].to_string()
    )

    # Which trajectories have the most jumps?
    print(f"\nTrajectories with most jumps:")
    print(jump_df.groupby("TrajectoryID").size().nlargest(10).to_string())
else:
    print("No large jumps found!")

JUMP ANALYSIS (before interpolation)
Max allowed jump threshold: 154.0 px

Found 0 large jumps (velocity > 154.0 px/frame)
No large jumps found!


## Step 4: Apply Interpolation

In [17]:
# Apply interpolation if enabled
if INTERPOLATION_METHOD.lower() != "none" and not merged_df.empty:
    print(
        f"Applying {INTERPOLATION_METHOD} interpolation (max_gap={INTERPOLATION_MAX_GAP})..."
    )

    # Count NaN values before
    nan_before = merged_df[["X", "Y"]].isna().sum().sum()

    merged_df = interpolate_trajectories(
        merged_df,
        method=INTERPOLATION_METHOD,
        max_gap=INTERPOLATION_MAX_GAP,
    )

    # Count NaN values after
    nan_after = merged_df[["X", "Y"]].isna().sum().sum()

    print(f"Interpolation complete!")
    print(f"  NaN values before: {nan_before}")
    print(f"  NaN values after: {nan_after}")
    print(
        f"  Filled: {nan_before - nan_after} ({100*(nan_before-nan_after)/max(nan_before,1):.1f}%)"
    )
else:
    print("Skipping interpolation (disabled or no data)")

2026-02-03 13:49:37,461 - multi_tracker.core.post_processing - INFO - Interpolating trajectories using linear method (max_gap=5)


Applying linear interpolation (max_gap=5)...


2026-02-03 13:49:37,769 - multi_tracker.core.post_processing - INFO - Interpolation complete


Interpolation complete!
  NaN values before: 1368
  NaN values after: 0
  Filled: 1368 (100.0%)


In [18]:
# ===================================================================
# DIAGNOSTIC: Check for jumps AFTER interpolation (before scaling)
# ===================================================================
MAX_ALLOWED_JUMP_POST_INTERP = (
    REFERENCE_BODY_SIZE * RESIZE_FACTOR * 2
)  # 2x body size threshold

post_interp_jump_count = 0
post_interp_jump_details = []

for traj_id in sorted(merged_df["TrajectoryID"].unique()):
    traj_df = merged_df[merged_df["TrajectoryID"] == traj_id].sort_values("FrameID")

    for i in range(1, len(traj_df)):
        curr = traj_df.iloc[i]
        prev = traj_df.iloc[i - 1]

        # Skip if either position is NaN (though there should be fewer after interpolation)
        if pd.isna(curr["X"]) or pd.isna(prev["X"]):
            continue

        frame_gap = curr["FrameID"] - prev["FrameID"]
        distance = np.sqrt((curr["X"] - prev["X"]) ** 2 + (curr["Y"] - prev["Y"]) ** 2)

        # For consecutive frames (frame_gap=1), check if distance exceeds threshold
        # For non-consecutive frames, normalize by gap
        if frame_gap == 1:
            velocity = distance  # px/frame
        else:
            velocity = distance / frame_gap  # px/frame (normalized)

        if velocity > MAX_ALLOWED_JUMP_POST_INTERP:
            post_interp_jump_count += 1
            post_interp_jump_details.append(
                {
                    "TrajectoryID": traj_id,
                    "FromFrame": prev["FrameID"],
                    "ToFrame": curr["FrameID"],
                    "FrameGap": frame_gap,
                    "Distance": distance,
                    "Velocity": velocity,
                    "FromPos": (prev["X"], prev["Y"]),
                    "ToPos": (curr["X"], curr["Y"]),
                }
            )

print(f"\n{'='*70}")
print(
    f"POST-INTERPOLATION JUMP ANALYSIS (threshold: {MAX_ALLOWED_JUMP_POST_INTERP:.1f} px/frame)"
)
print(f"{'='*70}")
print(f"Total jumps exceeding threshold: {post_interp_jump_count}")

if post_interp_jump_count > 0:
    # Sort by velocity
    post_interp_jump_details.sort(key=lambda x: x["Velocity"], reverse=True)

    print(f"\nTop 10 largest jumps (after interpolation):")
    for jump in post_interp_jump_details[:10]:
        print(
            f"  Traj {jump['TrajectoryID']:3d}: Frame {jump['FromFrame']:5.0f}‚Üí{jump['ToFrame']:5.0f} "
            f"(gap={jump['FrameGap']:2.0f}): {jump['Velocity']:6.1f} px/frame "
            f"({jump['Distance']:6.1f} px total)"
        )

    # Count by trajectory
    from collections import Counter

    traj_counts_post = Counter([j["TrajectoryID"] for j in post_interp_jump_details])
    print(f"\nTrajectories with most jumps (after interpolation):")
    for traj_id, count in traj_counts_post.most_common(10):
        print(f"  Trajectory {traj_id}: {count} jumps")

    # Compare with pre-interpolation
    if merged_jump_count > 0:
        reduction = (
            (merged_jump_count - post_interp_jump_count) / merged_jump_count
        ) * 100
        print(
            f"\nJump reduction from interpolation: {merged_jump_count} ‚Üí {post_interp_jump_count} "
            f"({reduction:+.1f}% change)"
        )
else:
    print("‚úì No significant jumps found after interpolation!")
    if merged_jump_count > 0:
        print(f"  Interpolation successfully eliminated all {merged_jump_count} jumps!")


POST-INTERPOLATION JUMP ANALYSIS (threshold: 154.0 px/frame)
Total jumps exceeding threshold: 0
‚úì No significant jumps found after interpolation!


## Step 5: Scale to Original Video Space

In [19]:
# Scale coordinates back to original video space
if RESIZE_FACTOR != 1.0 and not merged_df.empty:
    print(
        f"Scaling coordinates from resized space (factor={RESIZE_FACTOR}) to original space..."
    )

    merged_df[["X", "Y"]] = merged_df[["X", "Y"]] / RESIZE_FACTOR

    if "Width" in merged_df.columns:
        merged_df["Width"] /= RESIZE_FACTOR
    if "Height" in merged_df.columns:
        merged_df["Height"] /= RESIZE_FACTOR

    print("‚úì Coordinates scaled to original video space")
else:
    print("No scaling needed (resize_factor=1.0)")

No scaling needed (resize_factor=1.0)


## Step 6: Save Final Output

In [20]:
# Preview final data
print("Final merged trajectories:")
print(f"  Rows: {len(merged_df)}")
print(f"  Unique trajectories: {merged_df['TrajectoryID'].nunique()}")
print(f"  Columns: {list(merged_df.columns)}")

if not merged_df.empty:
    print(f"  Frame range: {merged_df['FrameID'].min()} - {merged_df['FrameID'].max()}")
    print(f"  X range: {merged_df['X'].min():.1f} - {merged_df['X'].max():.1f}")
    print(f"  Y range: {merged_df['Y'].min():.1f} - {merged_df['Y'].max():.1f}")

merged_df.head(10)

Final merged trajectories:
  Rows: 13734
  Unique trajectories: 191
  Columns: ['TrajectoryID', 'X', 'Y', 'Theta', 'FrameID', 'State', 'DetectionConfidence', 'AssignmentConfidence', 'PositionUncertainty']
  Frame range: 1 - 750
  X range: 456.0 - 4046.0
  Y range: 328.0 - 3862.0


Unnamed: 0,TrajectoryID,X,Y,Theta,FrameID,State,DetectionConfidence,AssignmentConfidence,PositionUncertainty
0,0,732.0,398.0,3.604497,19,active,0.960815,0.972963,0.55872
1,0,718.0,401.0,3.305332,20,active,0.9745,0.853365,0.575555
2,0,712.0,405.0,3.207548,21,active,0.977804,0.80933,0.5885
3,0,695.0,409.0,2.936751,22,active,0.956972,0.749061,0.593665
4,0,676.0,412.0,3.036397,23,active,0.956708,0.804415,0.567792
5,0,665.0,413.0,3.090848,24,active,0.937172,0.817859,0.609486
6,0,657.0,413.0,3.250555,25,active,0.950802,0.511432,1.533771
7,0,646.5,393.5,3.121234,26,occluded,,,
8,0,636.0,374.0,2.991913,27,active,0.837836,0.828887,0.575455
9,0,645.0,370.0,2.992872,28,active,0.91643,0.538257,0.577364


In [21]:
# Save to CSV
if not merged_df.empty:
    merged_df.to_csv(OUTPUT_CSV_PATH, index=False)
    print(f"‚úì Final trajectories saved to: {OUTPUT_CSV_PATH}")
    print(f"  File size: {os.path.getsize(OUTPUT_CSV_PATH) / 1024:.1f} KB")
else:
    print("WARNING: No data to save!")

‚úì Final trajectories saved to: emi_short_tracking_final.csv
  File size: 1226.7 KB


## Summary Statistics

In [22]:
# Print summary
print("=" * 60)
print("POST-PROCESSING SUMMARY")
print("=" * 60)

print(f"\nüìÅ Input files:")
print(f"   Forward:  {forward_raw['TrajectoryID'].nunique()} trajectories")
print(f"   Backward: {backward_raw['TrajectoryID'].nunique()} trajectories")

print(f"\nüîß After individual post-processing:")
print(f"   Forward:  {forward_stats.get('final_count', 0)} trajectories")
print(f"   Backward: {backward_stats.get('final_count', 0)} trajectories")

print(f"\nüîÄ After merging:")
print(f"   Final: {merged_df['TrajectoryID'].nunique()} trajectories")

print(f"\nüíæ Output saved to:")
print(f"   {OUTPUT_CSV_PATH}")

print("\n" + "=" * 60)

POST-PROCESSING SUMMARY

üìÅ Input files:
   Forward:  83 trajectories
   Backward: 102 trajectories

üîß After individual post-processing:
   Forward:  131 trajectories
   Backward: 134 trajectories

üîÄ After merging:
   Final: 191 trajectories

üíæ Output saved to:
   emi_short_tracking_final.csv



## Optional: Generate Annotated Video

Generate a video with trajectory overlays similar to the main tracker output.

In [23]:
# ===================================================================
# VIDEO OUTPUT SETTINGS
# ===================================================================

# Generate video?
GENERATE_VIDEO = True

# Output video path (auto-generated if None)
VIDEO_OUTPUT_PATH = None  # Will be *_annotated.mp4 if None

# Visualization options
SHOW_LABELS = True  # Show trajectory ID labels
SHOW_ORIENTATION = True  # Show orientation arrows
SHOW_TRAILS = True  # Show trajectory trails
TRAIL_DURATION_SEC = 5.0  # Trail duration in seconds

# Drawing parameters (relative to body size)
MARKER_SIZE = 0.1  # Circle radius as fraction of body size
ARROW_LENGTH = 0.25  # Arrow length as fraction of body size
TEXT_SCALE = 3.0  # Text size scale factor

# Auto-generate output path
if VIDEO_OUTPUT_PATH is None:
    base_video, ext_video = os.path.splitext(VIDEO_PATH)
    VIDEO_OUTPUT_PATH = f"{base_video}_annotated.mp4"

print(f"Video output: {VIDEO_OUTPUT_PATH}")
print(
    f"Options: labels={SHOW_LABELS}, orientation={SHOW_ORIENTATION}, trails={SHOW_TRAILS}"
)

Video output: emi_short_annotated.mp4
Options: labels=True, orientation=True, trails=True


In [24]:
def generate_annotated_video(
    video_path,
    output_path,
    trajectories_df,
    reference_body_size=77.0,
    show_labels=True,
    show_orientation=True,
    show_trails=True,
    trail_duration_sec=2.0,
    marker_size=0.1,
    arrow_length=0.25,
    text_scale=3.0,
):
    """
    Generate annotated video with trajectory overlays.

    Args:
        video_path: Path to input video
        output_path: Path to output video
        trajectories_df: DataFrame with columns TrajectoryID, FrameID, X, Y, Theta
        reference_body_size: Reference body size in pixels for scaling
        show_labels: Show trajectory ID labels
        show_orientation: Show orientation arrows
        show_trails: Show trajectory trails
        trail_duration_sec: Duration of trails in seconds
        marker_size: Circle radius as fraction of body size
        arrow_length: Arrow length as fraction of body size
        text_scale: Text size scale factor
    """
    import cv2
    import numpy as np
    from tqdm.notebook import tqdm

    # Open video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Could not open video: {video_path}")

    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    print(
        f"Input video: {frame_width}x{frame_height} @ {fps:.1f} FPS, {total_frames} frames"
    )

    # Create video writer
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    if not out.isOpened():
        cap.release()
        raise ValueError(f"Could not create output video: {output_path}")

    # Calculate trail duration in frames
    trail_duration_frames = int(trail_duration_sec * fps)

    # Scale drawing parameters by body size
    marker_radius = int(marker_size * reference_body_size)
    arrow_len = int(arrow_length * reference_body_size)
    text_size = 0.5 * text_scale
    marker_thickness = max(2, int(0.15 * reference_body_size))

    # Default colors (BGR format for OpenCV)
    default_colors = [
        (0, 255, 0),  # Green
        (255, 0, 0),  # Blue
        (0, 0, 255),  # Red
        (255, 255, 0),  # Cyan
        (255, 0, 255),  # Magenta
        (0, 255, 255),  # Yellow
        (128, 0, 255),  # Orange
        (255, 128, 0),  # Light blue
        (0, 128, 255),  # Orange-red
        (128, 255, 0),  # Lime
    ]

    # Build lookup for trajectories by frame
    print("Building trajectory lookup...")
    traj_by_frame = {}
    traj_by_track = {}

    for _, row in trajectories_df.iterrows():
        frame_num = int(row["FrameID"])
        track_id = int(row["TrajectoryID"])

        if frame_num not in traj_by_frame:
            traj_by_frame[frame_num] = []
        traj_by_frame[frame_num].append(row)

        if track_id not in traj_by_track:
            traj_by_track[track_id] = []
        traj_by_track[track_id].append(row)

    # Process video frame by frame
    print(f"Generating video: {output_path}")

    for frame_idx in tqdm(range(total_frames), desc="Processing frames"):
        ret, frame = cap.read()
        if not ret:
            break

        # Get trajectories for this frame
        frame_trajs = traj_by_frame.get(frame_idx, [])

        # Draw trails first (underneath current positions)
        if show_trails:
            for traj in frame_trajs:
                track_id = int(traj["TrajectoryID"])
                color = default_colors[track_id % len(default_colors)]

                # Get trail points (past N frames)
                trail_points = []
                if track_id in traj_by_track:
                    for past_row in traj_by_track[track_id]:
                        past_frame = int(past_row["FrameID"])
                        if frame_idx - trail_duration_frames <= past_frame < frame_idx:
                            px, py = past_row["X"], past_row["Y"]
                            if not pd.isna(px) and not pd.isna(py):
                                trail_points.append((int(px), int(py), past_frame))

                # Draw trail as fading line segments
                if len(trail_points) > 1:
                    trail_points.sort(key=lambda p: p[2])
                    for i in range(len(trail_points) - 1):
                        pt1 = (trail_points[i][0], trail_points[i][1])
                        pt2 = (trail_points[i + 1][0], trail_points[i + 1][1])

                        # Calculate opacity based on age
                        age = frame_idx - trail_points[i][2]
                        alpha = 1.0 - (age / trail_duration_frames)
                        faded_color = tuple(int(c * alpha) for c in color)

                        cv2.line(
                            frame, pt1, pt2, faded_color, max(1, marker_thickness // 2)
                        )

        # Draw current positions
        for traj in frame_trajs:
            track_id = int(traj["TrajectoryID"])
            cx, cy = traj["X"], traj["Y"]

            # Skip if NaN
            if pd.isna(cx) or pd.isna(cy):
                continue

            cx, cy = int(cx), int(cy)
            color = default_colors[track_id % len(default_colors)]

            # Draw circle at position
            cv2.circle(frame, (cx, cy), marker_radius, color, marker_thickness)

            # Draw label
            if show_labels:
                label = f"ID{track_id}"
                label_offset = int(marker_radius + 5)
                cv2.putText(
                    frame,
                    label,
                    (cx + label_offset, cy - label_offset),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    text_size,
                    color,
                    max(1, int(text_scale * 2)),
                )

            # Draw orientation arrow
            if (
                show_orientation
                and "Theta" in traj.index
                and not pd.isna(traj["Theta"])
            ):
                heading = traj["Theta"]
                end_x = int(cx + arrow_len * np.cos(heading))
                end_y = int(cy + arrow_len * np.sin(heading))
                cv2.arrowedLine(
                    frame,
                    (cx, cy),
                    (end_x, end_y),
                    color,
                    marker_thickness,
                    tipLength=0.3,
                )

        # Write frame
        out.write(frame)

    # Cleanup
    cap.release()
    out.release()

    print(f"‚úì Video saved to: {output_path}")
    print(f"  File size: {os.path.getsize(output_path) / (1024*1024):.1f} MB")


print("Video generation function defined.")

Video generation function defined.


In [25]:
# Generate the annotated video
if GENERATE_VIDEO and not merged_df.empty:
    generate_annotated_video(
        video_path=VIDEO_PATH,
        output_path=VIDEO_OUTPUT_PATH,
        trajectories_df=merged_df,
        reference_body_size=REFERENCE_BODY_SIZE,  # Use original body size (coords already scaled)
        show_labels=SHOW_LABELS,
        show_orientation=SHOW_ORIENTATION,
        show_trails=SHOW_TRAILS,
        trail_duration_sec=TRAIL_DURATION_SEC,
        marker_size=MARKER_SIZE,
        arrow_length=ARROW_LENGTH,
        text_scale=TEXT_SCALE,
    )
else:
    if not GENERATE_VIDEO:
        print("Video generation disabled (GENERATE_VIDEO=False)")
    else:
        print("No trajectory data available for video generation")

Input video: 4512x4512 @ 25.0 FPS, 750 frames
Building trajectory lookup...
Generating video: emi_short_annotated.mp4


Processing frames:   0%|          | 0/750 [00:00<?, ?it/s]

‚úì Video saved to: emi_short_annotated.mp4
  File size: 115.9 MB


## Optional: Quick Static Plots

Generate static plots for quick overview (useful if video generation is slow).

In [26]:
# Optional: Plot trajectory overview
import matplotlib.pyplot as plt

if not merged_df.empty:
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    # Plot 1: Spatial trajectories
    ax1 = axes[0]
    for traj_id in merged_df["TrajectoryID"].unique():
        traj = merged_df[merged_df["TrajectoryID"] == traj_id]
        ax1.plot(traj["X"], traj["Y"], alpha=0.7, linewidth=0.5)
    ax1.set_xlabel("X (pixels)")
    ax1.set_ylabel("Y (pixels)")
    ax1.set_title(f'All Trajectories ({merged_df["TrajectoryID"].nunique()} total)')
    ax1.set_aspect("equal")
    ax1.invert_yaxis()  # Flip Y axis to match image coordinates

    # Plot 2: Trajectory lengths
    ax2 = axes[1]
    traj_lengths = merged_df.groupby("TrajectoryID").size()
    ax2.hist(traj_lengths, bins=50, edgecolor="black", alpha=0.7)
    ax2.set_xlabel("Trajectory Length (frames)")
    ax2.set_ylabel("Count")
    ax2.set_title(
        f"Trajectory Length Distribution\nMean: {traj_lengths.mean():.1f}, Median: {traj_lengths.median():.1f}"
    )
    ax2.axvline(
        traj_lengths.mean(),
        color="red",
        linestyle="--",
        label=f"Mean ({traj_lengths.mean():.1f})",
    )
    ax2.axvline(
        traj_lengths.median(),
        color="orange",
        linestyle="--",
        label=f"Median ({traj_lengths.median():.1f})",
    )
    ax2.legend()

    plt.tight_layout()
    plt.show()
else:
    print("No data to visualize!")

In [27]:
# Optional: Per-trajectory statistics
if not merged_df.empty:
    traj_stats = (
        merged_df.groupby("TrajectoryID")
        .agg(
            {
                "FrameID": ["min", "max", "count"],
                "X": ["mean", "std"],
                "Y": ["mean", "std"],
            }
        )
        .round(2)
    )

    traj_stats.columns = [
        "Start Frame",
        "End Frame",
        "Length",
        "X Mean",
        "X Std",
        "Y Mean",
        "Y Std",
    ]
    traj_stats["Duration (s)"] = (
        traj_stats["End Frame"] - traj_stats["Start Frame"]
    ) / FPS

    print("Per-trajectory statistics:")
    display(traj_stats)

Per-trajectory statistics:


Unnamed: 0_level_0,Start Frame,End Frame,Length,X Mean,X Std,Y Mean,Y Std,Duration (s)
TrajectoryID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,19,29,11,674.95,34.08,399.95,15.27,0.40
1,1,34,34,927.44,146.10,481.78,142.06,1.32
2,54,750,697,848.59,223.69,740.06,280.11,27.84
3,48,601,554,2150.99,399.18,2505.56,670.96,22.12
4,1,685,685,1500.03,540.88,1575.47,936.63,27.36
...,...,...,...,...,...,...,...,...
186,124,153,30,856.55,43.96,1139.62,48.65,1.16
187,477,487,11,1165.05,1.58,1141.23,3.54,0.40
188,197,207,11,1285.23,62.41,1247.36,32.60,0.40
189,3,64,62,824.22,88.90,978.10,62.32,2.44
