In [None]:
%pip install -q --upgrade ultralytics opencv-python numpy imutils tqdm


In [1]:
from __future__ import annotations

import os
import time
from dataclasses import dataclass
from typing import Dict, List, Tuple

import cv2
import numpy as np


@dataclass
class Track:
    object_id: int
    centroid: Tuple[int, int]
    bbox: Tuple[int, int, int, int]
    disappeared: int = 0


class CentroidTracker:
    def __init__(self, max_disappeared: int = 30, max_distance: float = 80.0):
        self.next_object_id: int = 0
        self.tracks: Dict[int, Track] = {}
        self.max_disappeared = max_disappeared
        self.max_distance = max_distance

    def register(self, centroid: Tuple[int, int], bbox: Tuple[int, int, int, int]):
        self.tracks[self.next_object_id] = Track(
            object_id=self.next_object_id,
            centroid=centroid,
            bbox=bbox,
            disappeared=0,
        )
        self.next_object_id += 1

    def deregister(self, object_id: int):
        if object_id in self.tracks:
            del self.tracks[object_id]

    def update(self, rects: List[Tuple[int, int, int, int]]) -> Dict[int, Track]:
        # No detections: mark disappearances and remove stale tracks
        if len(rects) == 0:
            for object_id in list(self.tracks.keys()):
                self.tracks[object_id].disappeared += 1
                if self.tracks[object_id].disappeared > self.max_disappeared:
                    self.deregister(object_id)
            return dict(self.tracks)

        # Compute input centroids
        input_centroids = np.zeros((len(rects), 2), dtype=np.float32)
        for i, (x1, y1, x2, y2) in enumerate(rects):
            cX = int((x1 + x2) * 0.5)
            cY = int((y1 + y2) * 0.5)
            input_centroids[i] = (cX, cY)

        # If no existing tracks, register all detections
        if len(self.tracks) == 0:
            for i, box in enumerate(rects):
                self.register((int(input_centroids[i][0]), int(input_centroids[i][1])), box)
            return dict(self.tracks)

        # Build arrays of existing track centroids and ids
        object_ids = list(self.tracks.keys())
        object_centroids = np.array([self.tracks[oid].centroid for oid in object_ids], dtype=np.float32)

        # Compute pairwise distances
        D = np.linalg.norm(object_centroids[:, None, :] - input_centroids[None, :, :], axis=2)

        # Greedy matching by minimum distance
        rows = D.min(axis=1).argsort()
        cols = D.argmin(axis=1)[rows]

        used_rows = set()
        used_cols = set()

        for row, col in zip(rows, cols):
            if row in used_rows or col in used_cols:
                continue
            if D[row, col] > self.max_distance:
                continue

            object_id = object_ids[row]
            centroid = (int(input_centroids[col][0]), int(input_centroids[col][1]))
            self.tracks[object_id].centroid = centroid
            self.tracks[object_id].bbox = rects[col]
            self.tracks[object_id].disappeared = 0

            used_rows.add(row)
            used_cols.add(col)

        # Determine unused rows and columns
        unused_rows = set(range(0, D.shape[0])).difference(used_rows)
        unused_cols = set(range(0, D.shape[1])).difference(used_cols)

        # For tracks that didn't get matched, mark disappeared
        for row in unused_rows:
            object_id = object_ids[row]
            self.tracks[object_id].disappeared += 1
            if self.tracks[object_id].disappeared > self.max_disappeared:
                self.deregister(object_id)

        # Register new detections for unmatched columns
        for col in unused_cols:
            centroid = (int(input_centroids[col][0]), int(input_centroids[col][1]))
            self.register(centroid, rects[col])

        return dict(self.tracks)


In [2]:
from pathlib import Path
from datetime import datetime

# Paths
VIDEO_PATH = Path("data/videos/onikuru_cropped_mini.mp4")  # change if needed

# Prefer YOLOv12 first, with graceful fallbacks
MODEL_CANDIDATES = []
for name in ["yolo12m.pt", "yolo12s.pt", "yolo12n.pt"]:
    p = Path(name)
    MODEL_CANDIDATES.append(p if p.exists() else name)
for name in ["yolo11m.pt", "yolov8s.pt", "yolo11n.pt"]:
    p = Path(name)
    MODEL_CANDIDATES.append(p if p.exists() else name)

STAMP = datetime.now().strftime("%Y%m%d_%H%M%S")
OUT_DIR = Path("output") / f"people_track_{STAMP}"
OUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_PATH = OUT_DIR / "_tmp_center_crop.mp4"

# Detection/tracking parameters - tuned for small people
CONF_THRES = 0.15
IOU_THRES = 0.55
IMG_SIZE = 1536           # larger resolution improves small-object recall
MAX_DET = 1000
PERSON_CLASS_ID = 0       # COCO person

# Tracker params
MAX_DISAPPEARED = 30
MAX_DISTANCE = 90.0       # allow looser matches when subjects are tiny

# Optional counting across a virtual line
ENABLE_COUNTING = False
LINE_Y_FRACTION = 0.55    # relative Y for line (e.g., 0.55 * frame_height)

print(f"VIDEO_PATH: {VIDEO_PATH}")
print(f"MODEL_CANDIDATES: {MODEL_CANDIDATES}")
print(f"OUTPUT_PATH: {OUTPUT_PATH}")


VIDEO_PATH: data\videos\onikuru_cropped_mini.mp4
MODEL_CANDIDATES: [WindowsPath('yolo12m.pt'), 'yolo12s.pt', 'yolo12n.pt', WindowsPath('yolo11m.pt'), WindowsPath('yolov8s.pt'), 'yolo11n.pt']
OUTPUT_PATH: output\people_track_20251204_212637\_tmp_center_crop.mp4


In [8]:
# 4K-focused overrides (tune for small people in 3840x2160)
CONF_THRES = 0.06
IOU_THRES = 0.4
IMG_SIZE = 1920
MAX_DET = 3000
AGNOSTIC_NMS = True
DEVICE = 0  # 0 for CUDA GPU, or "cpu"

# Tracker tuning
MAX_DISTANCE = 150.0

# Tiling settings
TILING_2X2 = True
TILE_OVERLAP = 160  # pixels

print({
    "CONF_THRES": CONF_THRES,
    "IOU_THRES": IOU_THRES,
    "IMG_SIZE": IMG_SIZE,
    "MAX_DET": MAX_DET,
    "AGNOSTIC_NMS": AGNOSTIC_NMS,
    "DEVICE": DEVICE,
    "TILING_2X2": TILING_2X2,
    "TILE_OVERLAP": TILE_OVERLAP,
})


{'CONF_THRES': 0.06, 'IOU_THRES': 0.4, 'IMG_SIZE': 1920, 'MAX_DET': 3000, 'AGNOSTIC_NMS': True, 'DEVICE': 0, 'TILING_2X2': True, 'TILE_OVERLAP': 160}


In [9]:
from ultralytics import YOLO
from tqdm import tqdm

# Initialize model (prefer YOLOv12)
last_error = None
model = None
for candidate in MODEL_CANDIDATES:
    try:
        model = YOLO(str(candidate))
        print(f"Loaded model: {candidate}")
        break
    except Exception as e:
        last_error = e
        print(f"Failed to load {candidate}: {e}")
assert model is not None, f"Could not load any model from {MODEL_CANDIDATES}. Last error: {last_error}"

# Open video
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), f"Cannot open video: {VIDEO_PATH}"

fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Total frames (may be 0 for some codecs)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if total_frames <= 0:
    total_frames = None

# Writer
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = cv2.VideoWriter(str(OUTPUT_PATH), fourcc, fps, (width, height))

# Tracker and counts
tracker = CentroidTracker(max_disappeared=MAX_DISAPPEARED, max_distance=MAX_DISTANCE)
count_in, count_out = 0, 0
line_y = int(LINE_Y_FRACTION * height)

# Inference options tuned for small people
predict_kwargs = dict(
    conf=CONF_THRES,
    iou=IOU_THRES,
    imgsz=IMG_SIZE,
    classes=[PERSON_CLASS_ID],  # person only
    agnostic_nms=False,
    max_det=MAX_DET,
    augment=True,
    verbose=False,
)

prev_centroids: Dict[int, Tuple[int, int]] = {}

start_time = time.time()
frame_count = 0

pbar = tqdm(total=total_frames, desc="Processing frames", unit="frame")

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame_count += 1
    pbar.update(1)

    # Run YOLO inference
    results = model.predict(frame, **predict_kwargs)
    boxes_xyxy: List[Tuple[int, int, int, int]] = []
    if len(results) > 0:
        r = results[0]
        if r.boxes is not None and len(r.boxes) > 0:
            xyxy = r.boxes.xyxy.cpu().numpy().astype(int)
            confs = r.boxes.conf.cpu().numpy()
            clss = r.boxes.cls.cpu().numpy().astype(int)
            for (x1, y1, x2, y2), c, k in zip(xyxy, confs, clss):
                if k == PERSON_CLASS_ID and c >= CONF_THRES:
                    # clamp to frame
                    x1, y1 = max(0, x1), max(0, y1)
                    x2, y2 = min(width - 1, x2), min(height - 1, y2)
                    if x2 > x1 and y2 > y1:
                        boxes_xyxy.append((x1, y1, x2, y2))

    # Update tracker
    tracks = tracker.update(boxes_xyxy)

    # Optional counting (line-crossing)
    if ENABLE_COUNTING:
        for oid, tr in tracks.items():
            cX, cY = tr.centroid
            if oid in prev_centroids:
                prevY = prev_centroids[oid][1]
                # Upwards vs downwards crossing
                if prevY < line_y <= cY:
                    count_in += 1
                elif prevY > line_y >= cY:
                    count_out += 1
            prev_centroids[oid] = (cX, cY)

    # Draw
    if ENABLE_COUNTING:
        cv2.line(frame, (0, line_y), (width, line_y), (0, 255, 255), 2)
        cv2.putText(frame, f"IN: {count_in}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 200, 0), 2)
        cv2.putText(frame, f"OUT: {count_out}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 200), 2)

    for oid, tr in tracks.items():
        x1, y1, x2, y2 = tr.bbox
        cX, cY = tr.centroid
        # draw bbox
        cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 140, 0), 2)
        # draw centroid
        cv2.circle(frame, (cX, cY), 3, (0, 255, 255), -1)
        # id label
        cv2.putText(frame, f"ID {oid}", (x1, max(0, y1 - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 140, 0), 2)

    # FPS + progress overlay
    elapsed = time.time() - start_time
    fps_text = f"FPS: {frame_count / elapsed:.1f}" if elapsed > 0 else "FPS: --"
    cv2.putText(frame, fps_text, (width - 180, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (50, 220, 50), 2)
    prog_text = f"{frame_count}/{total_frames}" if total_frames else f"{frame_count}"
    cv2.putText(frame, prog_text, (width - 180, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 200, 50), 2)

    writer.write(frame)

pbar.close()
cap.release()
writer.release()

print(f"Saved: {OUTPUT_PATH}")


Loaded model: yolo12m.pt


Processing frames: 100%|██████████| 615/615 [01:42<00:00,  6.02frame/s]


Saved: output\people_track_20251204_212637\_tmp_center_crop.mp4


In [10]:
# Center-crop run (writes cropped annotated video)
from ultralytics import YOLO
from tqdm import tqdm

# Ensure model is loaded
try:
    _ = model
except NameError:
    last_error = None
    model = None
    for candidate in MODEL_CANDIDATES:
        try:
            model = YOLO(str(candidate))
            print(f"Loaded model: {candidate}")
            break
        except Exception as e:
            last_error = e
            print(f"Failed to load {candidate}: {e}")
    assert model is not None, f"Could not load any model from {MODEL_CANDIDATES}. Last error: {last_error}"

# Open video
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), f"Cannot open video: {VIDEO_PATH}"

fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
src_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
src_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Choose crop size: default to 1920x1080; change if you want
CROP_W = min(1920, src_w)
CROP_H = min(1080, src_h)

# Center crop rectangle in source frame
x0 = max(0, (src_w - CROP_W) // 2)
y0 = max(0, (src_h - CROP_H) // 2)
x1 = min(src_w, x0 + CROP_W)
y1 = min(src_h, y0 + CROP_H)

out_w, out_h = (x1 - x0), (y1 - y0)

# Writer (cropped size)
out_path = OUT_DIR / "_tmp_center_crop.mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = cv2.VideoWriter(str(out_path), fourcc, fps, (out_w, out_h))

# Tracker and counts
tracker = CentroidTracker(max_disappeared=MAX_DISAPPEARED, max_distance=MAX_DISTANCE)
count_in, count_out = 0, 0
line_y = int(LINE_Y_FRACTION * out_h)

predict_kwargs = dict(
    conf=CONF_THRES,
    iou=IOU_THRES,
    imgsz=IMG_SIZE,
    classes=[PERSON_CLASS_ID],
    agnostic_nms=globals().get("AGNOSTIC_NMS", False),
    max_det=MAX_DET,
    augment=True,
    verbose=False,
    device=globals().get("DEVICE", 0),
    half=True,
    amp=True,
)

prev_centroids: Dict[int, Tuple[int, int]] = {}

# Progress bar
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if total_frames <= 0:
    total_frames = None
pbar = tqdm(total=total_frames, desc="Center-crop processing", unit="frame")

start_time = time.time()
frame_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame_count += 1
    pbar.update(1)

    crop = frame[y0:y1, x0:x1]

    # Run YOLO inference on crop
    results = model.predict(crop, **predict_kwargs)
    boxes_xyxy: List[Tuple[int, int, int, int]] = []
    if len(results) > 0:
        r = results[0]
        if r.boxes is not None and len(r.boxes) > 0:
            xyxy = r.boxes.xyxy.cpu().numpy().astype(int)
            confs = r.boxes.conf.cpu().numpy()
            clss = r.boxes.cls.cpu().numpy().astype(int)
            for (x1b, y1b, x2b, y2b), c, k in zip(xyxy, confs, clss):
                if k == PERSON_CLASS_ID and c >= CONF_THRES:
                    x1b, y1b = max(0, x1b), max(0, y1b)
                    x2b, y2b = min(out_w - 1, x2b), min(out_h - 1, y2b)
                    if x2b > x1b and y2b > y1b:
                        boxes_xyxy.append((x1b, y1b, x2b, y2b))

    # Update tracker with crop-space boxes
    tracks = tracker.update(boxes_xyxy)

    # Optional counting
    if ENABLE_COUNTING:
        for oid, tr in tracks.items():
            cX, cY = tr.centroid
            if oid in prev_centroids:
                prevY = prev_centroids[oid][1]
                if prevY < line_y <= cY:
                    count_in += 1
                elif prevY > line_y >= cY:
                    count_out += 1
            prev_centroids[oid] = (cX, cY)

    # Draw overlays onto crop
    if ENABLE_COUNTING:
        cv2.line(crop, (0, line_y), (out_w, line_y), (0, 255, 255), 2)
        cv2.putText(crop, f"IN: {count_in}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 200, 0), 2)
        cv2.putText(crop, f"OUT: {count_out}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 200), 2)

    for oid, tr in tracks.items():
        x1b, y1b, x2b, y2b = tr.bbox
        cX, cY = tr.centroid
        cv2.rectangle(crop, (x1b, y1b), (x2b, y2b), (255, 140, 0), 2)
        cv2.circle(crop, (cX, cY), 3, (0, 255, 255), -1)
        cv2.putText(crop, f"ID {oid}", (x1b, max(0, y1b - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 140, 0), 2)

    # FPS + progress
    elapsed = time.time() - start_time
    fps_text = f"FPS: {frame_count / elapsed:.1f}" if elapsed > 0 else "FPS: --"
    cv2.putText(crop, fps_text, (out_w - 180, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (50, 220, 50), 2)
    prog_text = f"{frame_count}/{total_frames}" if total_frames else f"{frame_count}"
    cv2.putText(crop, prog_text, (out_w - 180, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 200, 50), 2)

    writer.write(crop)

pbar.close()
cap.release()
writer.release()

print(f"Saved center-crop video: {out_path}")


Center-crop processing: 100%|██████████| 615/615 [01:22<00:00,  7.48frame/s]

Saved center-crop video: output\people_track_20251204_212637\_tmp_center_crop.mp4





In [None]:
# Full-frame run with trajectory visualization
from ultralytics import YOLO
from tqdm import tqdm
from collections import deque

# Ensure model is loaded
try:
    _ = model
except NameError:
    last_error = None
    model = None
    for candidate in MODEL_CANDIDATES:
        try:
            model = YOLO(str(candidate))
            print(f"Loaded model: {candidate}")
            break
        except Exception as e:
            last_error = e
            print(f"Failed to load {candidate}: {e}")
    assert model is not None, f"Could not load any model from {MODEL_CANDIDATES}. Last error: {last_error}"

# Open video
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), f"Cannot open video: {VIDEO_PATH}"

fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Writer
out_path_traj = OUT_DIR / "_tmp_with_traj.mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = cv2.VideoWriter(str(out_path_traj), fourcc, fps, (width, height))

# Tracker and counts
tracker = CentroidTracker(max_disappeared=MAX_DISAPPEARED, max_distance=MAX_DISTANCE)
count_in, count_out = 0, 0
line_y = int(LINE_Y_FRACTION * height)

# Trajectory store: per-id deque of centroids
TRAJ_MAX_POINTS = 40
TRAJ_THICKNESS = 2
tracks_history: Dict[int, deque] = {}

def id_color(oid: int) -> Tuple[int, int, int]:
    # Deterministic color per id
    r = (37 * oid) % 255
    g = (17 * oid + 85) % 255
    b = (97 * oid + 170) % 255
    return int(b), int(g), int(r)

# Inference options
predict_kwargs = dict(
    conf=CONF_THRES,
    iou=IOU_THRES,
    imgsz=IMG_SIZE,
    classes=[PERSON_CLASS_ID],
    agnostic_nms=globals().get("AGNOSTIC_NMS", False),
    max_det=MAX_DET,
    augment=True,
    verbose=False,
    device=globals().get("DEVICE", 0),
    half=True,
    amp=True,
)

prev_centroids: Dict[int, Tuple[int, int]] = {}

# Progress bar
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if total_frames <= 0:
    total_frames = None
pbar = tqdm(total=total_frames, desc="Full-frame w/ traj", unit="frame")

start_time = time.time()
frame_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame_count += 1
    pbar.update(1)

    # YOLO inference
    results = model.predict(frame, **predict_kwargs)
    boxes_xyxy: List[Tuple[int, int, int, int]] = []
    if len(results) > 0:
        r = results[0]
        if r.boxes is not None and len(r.boxes) > 0:
            xyxy = r.boxes.xyxy.cpu().numpy().astype(int)
            confs = r.boxes.conf.cpu().numpy()
            clss = r.boxes.cls.cpu().numpy().astype(int)
            for (x1, y1, x2, y2), c, k in zip(xyxy, confs, clss):
                if k == PERSON_CLASS_ID and c >= CONF_THRES:
                    x1, y1 = max(0, x1), max(0, y1)
                    x2, y2 = min(width - 1, x2), min(height - 1, y2)
                    if x2 > x1 and y2 > y1:
                        boxes_xyxy.append((x1, y1, x2, y2))

    # Update tracker
    tracks = tracker.update(boxes_xyxy)

    # Maintain trajectory
    for oid, tr in tracks.items():
        cX, cY = tr.centroid
        if oid not in tracks_history:
            tracks_history[oid] = deque(maxlen=TRAJ_MAX_POINTS)
        tracks_history[oid].append((cX, cY))

    # Optional counting
    if ENABLE_COUNTING:
        for oid, tr in tracks.items():
            cX, cY = tr.centroid
            if oid in prev_centroids:
                prevY = prev_centroids[oid][1]
                if prevY < line_y <= cY:
                    count_in += 1
                elif prevY > line_y >= cY:
                    count_out += 1
            prev_centroids[oid] = (cX, cY)

    # Draw overlays
    if ENABLE_COUNTING:
        cv2.line(frame, (0, line_y), (width, line_y), (0, 255, 255), 2)
        cv2.putText(frame, f"IN: {count_in}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 200, 0), 2)
        cv2.putText(frame, f"OUT: {count_out}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 200), 2)

    for oid, tr in tracks.items():
        x1, y1, x2, y2 = tr.bbox
        cX, cY = tr.centroid
        color = id_color(oid)
        # bbox and id
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.circle(frame, (cX, cY), 3, (0, 255, 255), -1)
        cv2.putText(frame, f"ID {oid}", (x1, max(0, y1 - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
        # trajectory polyline
        pts = tracks_history.get(oid, None)
        if pts and len(pts) > 1:
            for i in range(1, len(pts)):
                pt1 = pts[i - 1]
                pt2 = pts[i]
                cv2.line(frame, pt1, pt2, color, TRAJ_THICKNESS)

    # FPS + progress
    elapsed = time.time() - start_time
    fps_text = f"FPS: {frame_count / elapsed:.1f}" if elapsed > 0 else "FPS: --"
    cv2.putText(frame, fps_text, (width - 200, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (50, 220, 50), 2)
    prog_text = f"{frame_count}/{total_frames}" if total_frames else f"{frame_count}"
    cv2.putText(frame, prog_text, (width - 200, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 200, 50), 2)

    writer.write(frame)

pbar.close()
cap.release()
writer.release()

print(f"Saved full-frame with trajectories: {out_path_traj}")


Full-frame w/ traj:  29%|██▉       | 179/615 [00:30<01:12,  6.00frame/s]

In [None]:
# Center-crop run with trajectory visualization
from ultralytics import YOLO
from tqdm import tqdm
from collections import deque

# Ensure model is loaded
try:
    _ = model
except NameError:
    last_error = None
    model = None
    for candidate in MODEL_CANDIDATES:
        try:
            model = YOLO(str(candidate))
            print(f"Loaded model: {candidate}")
            break
        except Exception as e:
            last_error = e
            print(f"Failed to load {candidate}: {e}")
    assert model is not None, f"Could not load any model from {MODEL_CANDIDATES}. Last error: {last_error}"

# Open video
cap = cv2.VideoCapture(str(VIDEO_PATH))
assert cap.isOpened(), f"Cannot open video: {VIDEO_PATH}"

fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
src_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
src_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Crop size (adjust as needed)
CROP_W = min(1920, src_w)
CROP_H = min(1080, src_h)

x0 = max(0, (src_w - CROP_W) // 2)
y0 = max(0, (src_h - CROP_H) // 2)
x1 = min(src_w, x0 + CROP_W)
y1 = min(src_h, y0 + CROP_H)

out_w, out_h = (x1 - x0), (y1 - y0)

out_path_traj_crop = OUT_DIR / "_tmp_center_crop_traj.mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = cv2.VideoWriter(str(out_path_traj_crop), fourcc, fps, (out_w, out_h))

# Tracker and trajectory store
tracker = CentroidTracker(max_disappeared=MAX_DISAPPEARED, max_distance=MAX_DISTANCE)
tracks_history: Dict[int, deque] = {}
TRAJ_MAX_POINTS = 40
TRAJ_THICKNESS = 2

count_in, count_out = 0, 0
line_y = int(LINE_Y_FRACTION * out_h)


def id_color(oid: int) -> Tuple[int, int, int]:
    r = (37 * oid) % 255
    g = (17 * oid + 85) % 255
    b = (97 * oid + 170) % 255
    return int(b), int(g), int(r)

predict_kwargs = dict(
    conf=CONF_THRES,
    iou=IOU_THRES,
    imgsz=IMG_SIZE,
    classes=[PERSON_CLASS_ID],
    agnostic_nms=globals().get("AGNOSTIC_NMS", False),
    max_det=MAX_DET,
    augment=True,
    verbose=False,
    device=globals().get("DEVICE", 0),
    half=True,
    amp=True,
)

prev_centroids: Dict[int, Tuple[int, int]] = {}

# Progress bar
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if total_frames <= 0:
    total_frames = None
pbar = tqdm(total=total_frames, desc="Center-crop w/ traj", unit="frame")

start_time = time.time()
frame_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame_count += 1
    pbar.update(1)

    crop = frame[y0:y1, x0:x1]

    results = model.predict(crop, **predict_kwargs)
    boxes_xyxy: List[Tuple[int, int, int, int]] = []
    if len(results) > 0:
        r = results[0]
        if r.boxes is not None and len(r.boxes) > 0:
            xyxy = r.boxes.xyxy.cpu().numpy().astype(int)
            confs = r.boxes.conf.cpu().numpy()
            clss = r.boxes.cls.cpu().numpy().astype(int)
            for (x1b, y1b, x2b, y2b), c, k in zip(xyxy, confs, clss):
                if k == PERSON_CLASS_ID and c >= CONF_THRES:
                    x1b, y1b = max(0, x1b), max(0, y1b)
                    x2b, y2b = min(out_w - 1, x2b), min(out_h - 1, y2b)
                    if x2b > x1b and y2b > y1b:
                        boxes_xyxy.append((x1b, y1b, x2b, y2b))

    tracks = tracker.update(boxes_xyxy)

    # Update trajectory deque
    for oid, tr in tracks.items():
        cX, cY = tr.centroid
        if oid not in tracks_history:
            tracks_history[oid] = deque(maxlen=TRAJ_MAX_POINTS)
        tracks_history[oid].append((cX, cY))

    if ENABLE_COUNTING:
        for oid, tr in tracks.items():
            cX, cY = tr.centroid
            if oid in prev_centroids:
                prevY = prev_centroids[oid][1]
                if prevY < line_y <= cY:
                    count_in += 1
                elif prevY > line_y >= cY:
                    count_out += 1
            prev_centroids[oid] = (cX, cY)

    if ENABLE_COUNTING:
        cv2.line(crop, (0, line_y), (out_w, line_y), (0, 255, 255), 2)
        cv2.putText(crop, f"IN: {count_in}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 200, 0), 2)
        cv2.putText(crop, f"OUT: {count_out}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 200), 2)

    for oid, tr in tracks.items():
        x1b, y1b, x2b, y2b = tr.bbox
        cX, cY = tr.centroid
        color = id_color(oid)
        cv2.rectangle(crop, (x1b, y1b), (x2b, y2b), color, 2)
        cv2.circle(crop, (cX, cY), 3, (0, 255, 255), -1)
        cv2.putText(crop, f"ID {oid}", (x1b, max(0, y1b - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
        pts = tracks_history.get(oid, None)
        if pts and len(pts) > 1:
            for i in range(1, len(pts)):
                cv2.line(crop, pts[i-1], pts[i], color, TRAJ_THICKNESS)

    elapsed = time.time() - start_time
    fps_text = f"FPS: {frame_count / elapsed:.1f}" if elapsed > 0 else "FPS: --"
    cv2.putText(crop, fps_text, (out_w - 200, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (50, 220, 50), 2)
    prog_text = f"{frame_count}/{total_frames}" if total_frames else f"{frame_count}"
    cv2.putText(crop, prog_text, (out_w - 200, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 200, 50), 2)

    writer.write(crop)

pbar.close()
cap.release()
writer.release()

print(f"Saved center-crop with trajectories: {out_path_traj_crop}")
