In [None]:
# Do four steps: 
# 1: Export bounding boxes with YOLO: 

from pathlib import Path
import numpy as np
import cv2
from tqdm.notebook import tqdm
import torch
from ultralytics import YOLO
import shutil

CONFIDENCE_THRESHOLD = 0.15

# === Paths ===
video_folder = Path("/Users/Christian/Downloads/Microadaptive Teaching Dritter Teil - LAs/Marlon/Erste Sitzung/YOLO")
output_base = Path("/Users/Christian/Downloads/Microadaptive Teaching Dritter Teil - LAs/Marlon/Erste Sitzung/YOLO/DLC")

if output_base.exists():
    shutil.rmtree(output_base)
output_base.mkdir(parents=True)

video_files = list(video_folder.glob("*.mp4")) + list(video_folder.glob("*.MP4")) + \
              list(video_folder.glob("*.Mp4")) + list(video_folder.glob("*.mP4"))

print(f"🎥 Found {len(video_files)} video(s) to process.")
if not video_files:
    raise FileNotFoundError("No video files found!")

# === Load YOLOv11 model
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"⚡ Using device: {device}")
model = YOLO("yolo11l.pt")

# === Detection Loop ===
for i, video_path in enumerate(tqdm(video_files, desc="YOLO Detection", position=0)):
    print(f"\n📹 [{i+1}/{len(video_files)}] Processing: {video_path.name}")

    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        print(f"⚠️ Could not open video: {video_path}")
        continue

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_name = video_path.stem
    frame_output_dir = output_base / video_name / "frames"
    bbox_output_dir = output_base / video_name / "bboxes"
    frame_output_dir.mkdir(parents=True, exist_ok=True)
    bbox_output_dir.mkdir(parents=True, exist_ok=True)

    frame_idx = 0

    with tqdm(total=total_frames, desc="Processing frames", leave=False, position=1) as pbar:
        while True:
            ret, frame = cap.read()
            if not ret or frame is None:
                break

            # Save frame
            frame_path = frame_output_dir / f"{frame_idx:05d}.jpg"
            cv2.imwrite(str(frame_path), frame)

            # Run YOLOv8 detection (no classes filter here!)
            results = model.predict(frame, verbose=False, device=device, imgsz=640)

            # Manually filter for persons (class 0)
            bboxes_xywh = []
            for result in results:
                boxes = result.boxes
                if boxes is not None and boxes.xyxy is not None:
                    xyxy = boxes.xyxy.cpu().numpy()
                    cls = boxes.cls.cpu().numpy()
                    conf = boxes.conf.cpu().numpy()  # ← Add this line to get the confidences
                    for (x1, y1, x2, y2), label, confidence in zip(xyxy, cls, conf):
                        if int(label) == 0 and confidence >= CONFIDENCE_THRESHOLD:  # class 0 = person + confidence check
                            x = float(x1)
                            y = float(y1)
                            w = float(x2 - x1)
                            h = float(y2 - y1)
                            bboxes_xywh.append([x, y, w, h])

            bboxes_xywh = np.array(bboxes_xywh[:20], dtype=np.float32)

            if bboxes_xywh.size == 0:
                bboxes_xywh = np.empty((0, 4), dtype=np.float32)

            bbox_path = bbox_output_dir / f"{frame_idx:05d}.npy"
            np.save(str(bbox_path), bboxes_xywh)

            frame_idx += 1
            pbar.update(1)

    cap.release()
    print(f"✅ Finished: {video_name} with {frame_idx} frames processed.")

In [None]:
# Step 2: RTM pose with DLC:

import deeplabcut.pose_estimation_pytorch as dlc_torch
from deeplabcut.utils.video_processor import VideoProcessorCV
from deeplabcut.utils.make_labeled_video import CreateVideo
import numpy as np
import torch
import cv2
import gc
from tqdm.notebook import tqdm
from pathlib import Path
import sys
import os
from contextlib import contextmanager
import deeplabcut.utils
deeplabcut.utils.tqdm = tqdm
import shutil

@contextmanager
def suppress_stdout():
    original_stdout = sys.stdout
    sys.stdout = open(os.devnull, 'w')
    try:
        yield
    finally:
        sys.stdout.close()
        sys.stdout = original_stdout



# === Model Configuration Paths ===
path_model_config = Path("/Users/Christian/rtm_pose/rtmpose-x_simcc-body7_pytorch_config.yaml")
path_snapshot = Path("/Users/Christian/rtm_pose/rtmpose-x_simcc-body7.pt")
input_folder = Path("/Users/Christian/Downloads/Microadaptive Teaching Dritter Teil - LAs/Marlon/Erste Sitzung/YOLO/DLC") # Change the folder here!!!

# === Pose Model Settings ===
device = "mps"  # Use Apple Silicon MPS
pose_cfg = dlc_torch.config.read_config_as_dict(path_model_config)
runner = dlc_torch.get_pose_inference_runner(
    pose_cfg,
    snapshot_path=path_snapshot,
    batch_size=4,
    max_individuals=20,
    device=device
)

# === Load video directories ===
video_dirs = [d for d in input_folder.iterdir() if d.is_dir()]
print(f"📂 Found {len(video_dirs)} videos to process.")

# === Pose Estimation Loop ===
for video_dir in tqdm(video_dirs, desc="Pose Estimation", position=0):
    print(f"\n🧍‍♂️ Processing: {video_dir.name}")
    frame_dir = video_dir / "frames"
    bbox_dir = video_dir / "bboxes"

    frame_files = sorted(frame_dir.glob("*.jpg"))
    bbox_files = sorted(bbox_dir.glob("*.npy"))

    assert len(frame_files) == len(bbox_files), "Mismatch between frames and bbox files."

    output_csv_path = input_folder / f"{video_dir.name}_predictions.csv"
    partial_predictions = {}

    with tqdm(total=len(frame_files), desc="Pose estimation frames", leave=False, position=1) as pbar:
        for idx, (frame_file, bbox_file) in enumerate(zip(frame_files, bbox_files)):
            frame = cv2.imread(str(frame_file))
            if frame is None:
                print(f"⚠️ Failed to load frame: {frame_file}")
                continue

            bboxes = np.load(str(bbox_file), allow_pickle=True)
            frame_context = {"bboxes": bboxes}

            # Run inference on single frame
            pred = runner.inference([(frame, frame_context)])[0]
            partial_predictions[idx] = pred

            # Save every 100 frames
            if (idx + 1) % 100 == 0 or (idx + 1) == len(frame_files):
                df_partial = dlc_torch.build_predictions_dataframe(
                    scorer="rtmpose-body7",
                    predictions=partial_predictions,
                    parameters=dlc_torch.PoseDatasetParameters(
                        bodyparts=pose_cfg["metadata"]["bodyparts"],
                        unique_bpts=pose_cfg["metadata"]["unique_bodyparts"],
                        individuals=[f"idv_{i}" for i in range(20)]
                    )
                )
                df_partial.to_csv(output_csv_path)
                print(f"💾 Saved intermediate predictions at frame {idx+1}")
        
            pbar.update(1)

    print(f"✅ Finished pose estimation: {video_dir.name}")

    create_labeled_video = False  # Set this to False if you DON'T want labeled videos!!!

    # === Optional: Create labeled video IF NEEDED
    if create_labeled_video:
        original_video_path = Path("XXX") / f"{video_dir.name}.mp4"
        output_video_path = input_folder / f"{video_dir.name}_labeled.mp4"
    
        if original_video_path.exists():
            clip = VideoProcessorCV(str(original_video_path), sname=str(output_video_path), codec="mp4v")
            df_final = dlc_torch.build_predictions_dataframe(
                scorer="rtmpose-body7",
                predictions=partial_predictions,
                parameters=dlc_torch.PoseDatasetParameters(
                    bodyparts=pose_cfg["metadata"]["bodyparts"],
                    unique_bpts=pose_cfg["metadata"]["unique_bodyparts"],
                    individuals=[f"idv_{i}" for i in range(20)]
                )
            )
        
            print(f"🎬 Creating labeled video: {output_video_path.name}", end="", flush=True)
            
            with suppress_stdout():
                CreateVideo(
                    clip,
                    df_final,
                    pcutoff=0.4,
                    dotsize=5,
                    colormap="rainbow",
                    bodyparts2plot=pose_cfg["metadata"]["bodyparts"],
                    trailpoints=0,
                    cropping=False,
                    x1=0,
                    x2=clip.w,
                    y1=0,
                    y2=clip.h,
                    bodyparts2connect=[
                        [15, 13], [13, 11], [16, 14], [14, 12], [11, 12],
                        [5, 11], [6, 12], [5, 6], [5, 7], [6, 8],
                        [7, 9], [8, 10], [1, 2], [0, 1], [0, 2],
                        [1, 3], [2, 4], [3, 5], [4, 6]
                    ],
                    skeleton_color="k",
                    draw_skeleton=True,
                    displaycropped=False,
                    color_by="bodypart",
                )
            print(f"🎬 Labeled video saved: {output_video_path.name}")
        else:
            print(f"⚠️ Original video {original_video_path.name} not found, skipping labeled video.")
    
    # Continue cleanup regardless of the flag
    del partial_predictions
    torch.mps.empty_cache()
    gc.collect()

print("\n🎉 All pose estimations complete!")

# === REMOVE ALL DLC SUBFOLDERS (after all pose estimations are complete) ===
for subfolder in input_folder.iterdir():
    if subfolder.is_dir():
        try:
            shutil.rmtree(subfolder)
            print(f"🗑️ Deleted DLC subfolder: {subfolder}")
        except Exception as e:
            print(f"⚠️ Error deleting {subfolder}: {e}")

In [None]:
# Step 3: Merge skeletons with YOLO Person detection

# REASSIGNMENT === UPDATED SCRIPT: Add Full Skeleton ===
# === FINAL CLEAN SCRIPT: Stable ID Assignment + Pose Matching + True Reassignment (Skeleton to Skeleton) ===

# === IMPORTS ===
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm.notebook import tqdm

# === SETTINGS ===
MATCH_THRESHOLD_FRAME = 150          # Pixel-based threshold if not normalizing
USE_NORMALIZED_DISTANCE = True       # Switch for normalized matching, otherwise fixed distance matching
NORMALIZED_MATCH_THRESHOLD = 0.3     # Threshold if normalizing distances

# === LOAD VIDEO FILES ===
base_folder = Path("/Users/Christian/Downloads/Microadaptive Teaching Dritter Teil - LAs/Marlon/Erste Sitzung/YOLO")
video_files = list(base_folder.glob("*.mp4")) + list(base_folder.glob("*.MP4"))

# === MAIN LOOP: PROCESS EACH VIDEO ===
for video_path in video_files:
    print(f"\n🎥 Processing video: {video_path.name}")

    INPUT_PATH = video_path

    # === LOAD TRACKING DATA (Bounding Boxes) ===
    tracking_df = pd.read_csv(INPUT_PATH.parent / "YOLO" / (INPUT_PATH.stem + ".csv"))

    # === LOAD POSE ESTIMATION DATA (Skeletons) ===
    pose_df = pd.read_csv(INPUT_PATH.parent / "DLC" / (INPUT_PATH.stem + "_predictions.csv"), low_memory=False)

    # === PREPARE POSE DATAFRAME ===
    multi_index = pd.MultiIndex.from_arrays(pose_df.iloc[0:3].values, names=["individual", "bodypart", "coord"])
    pose_data_cleaned = pose_df.iloc[3:].copy()
    pose_data_cleaned.columns = multi_index
    pose_data_cleaned.reset_index(drop=True, inplace=True)
    pose_data_cleaned["Frame"] = pose_data_cleaned.index.astype(int)

    # === DETECT BODY PARTS (skip non-body columns) ===
    bodyparts = pose_data_cleaned.columns.get_level_values("bodypart").unique()
    bodyparts = [bp for bp in bodyparts if bp not in ["Frame", "scorer"]]

    # === FINAL DATA COLLECTOR ===
    final_data = []

    # === FRAME-BY-FRAME PROCESSING ===
    for frame_idx, pose_row in tqdm(pose_data_cleaned.iterrows(), total=len(pose_data_cleaned), desc=f"Processing {INPUT_PATH.stem}"):
        frame_num = int(pose_row["Frame"].iloc[0] if isinstance(pose_row["Frame"], pd.Series) else pose_row["Frame"])
        frame_boxes = tracking_df[tracking_df.Frame == frame_num]

        # === EXTRACT ALL SKELETON CENTERS IN THIS FRAME ===
        skeleton_centers = {}
        individuals = pose_row.index.get_level_values("individual").unique()
        individuals = [ind for ind in individuals if ind.startswith("idv_")]

        for ind in individuals:
            keypoints = []
            for bp in bodyparts:
                try:
                    x = float(pose_row[(ind, bp, "x")])
                    y = float(pose_row[(ind, bp, "y")])
                    if pd.notna(x) and pd.notna(y):
                        keypoints.append((x, y))
                except:
                    continue
            if keypoints:
                cx, cy = np.mean(keypoints, axis=0)  # Compute mean x, mean y = skeleton center
                skeleton_centers[ind] = (cx, cy)

        # === MATCH EACH BOUNDING BOX TO NEAREST SKELETON ===
        for _, box in frame_boxes.iterrows():
            x_center = (box.X1 + box.X2) / 2
            y_center = (box.Y1 + box.Y2) / 2
            pid = int(box.Person_ID)

            best_match = None
            min_distance = float('inf')

            for skel_id, (cx, cy) in skeleton_centers.items():
                center_distance = np.linalg.norm(np.array([cx, cy]) - np.array([x_center, y_center]))

                # === OPTIONAL: Normalize distance by bounding box size ===
                if USE_NORMALIZED_DISTANCE:
                    bbox_width = box.X2 - box.X1
                    bbox_height = box.Y2 - box.Y1
                    avg_bbox_size = (bbox_width + bbox_height) / 2
                    if avg_bbox_size > 0:
                        center_distance /= avg_bbox_size

                threshold = NORMALIZED_MATCH_THRESHOLD if USE_NORMALIZED_DISTANCE else MATCH_THRESHOLD_FRAME

                # === KEEP CLOSEST SKELETON UNDER THRESHOLD ===
                if center_distance < min_distance and center_distance < threshold:
                    best_match = skel_id
                    min_distance = center_distance

            # === SAVE MATCH INFORMATION ===
            data = {
                "Frame": frame_num,
                "Old_ID": pid,
                "New_ID": pid,
                "X1": box.X1,
                "Y1": box.Y1,
                "X2": box.X2,
                "Y2": box.Y2,
            }

            if best_match:
                skel_num = best_match.split("_")[1]
                for bp in bodyparts:
                    x = pose_row.get((f"idv_{skel_num}", bp, "x"), np.nan)
                    y = pose_row.get((f"idv_{skel_num}", bp, "y"), np.nan)
                    conf = pose_row.get((f"idv_{skel_num}", bp, "likelihood"), np.nan)
                    data[f"{bp}_x"] = x
                    data[f"{bp}_y"] = y
                    data[f"{bp}_conf"] = conf

            final_data.append(data)

    # === SAVE FINAL CORRECTED CSV ===
    final_df = pd.DataFrame(final_data)
    final_df = final_df.sort_values(["Frame", "New_ID"])
    final_df.drop(columns=["bodyparts_x", "bodyparts_y", "bodyparts_conf", "_x", "_y", "_conf"], errors="ignore", inplace=True)

    OUTPUT_FOLDER = INPUT_PATH.parent / "corrected"
    OUTPUT_FOLDER.mkdir(parents=True, exist_ok=True)

    output_csv_name = f"{INPUT_PATH.stem}_corrected.csv"
    final_df.to_csv(OUTPUT_FOLDER / output_csv_name, index=False)

    print("\u2705 Final corrected CSV saved for:", video_path.name)

print("\n🎉 All sessions processed!")

In [None]:
# Step 4: Merge IDs, skeletons and object detection: 

import pandas as pd
from pathlib import Path

# === PATHS ===
main_folder = Path("/Users/Christian/Downloads/Javelin training/analysis/corrected")
object_folder = Path("/Users/Christian/Downloads/Javelin training/analysis/YOLO_Object")
merged_folder = main_folder / "with_objects"
merged_folder.mkdir(parents=True, exist_ok=True)

# === PROCESSING LOOP ===
for corrected_file in main_folder.glob("*_corrected.csv"):
    base_name = corrected_file.stem.replace("_corrected", "")
    object_file = object_folder / f"{base_name}.csv"

    if not object_file.exists():
        print(f"Skipping {base_name} — no object file found.")
        continue

    # Load both dataframes
    df_people = pd.read_csv(corrected_file)
    df_object = pd.read_csv(object_file)

    # Compute center points
    df_object["center_x"] = (df_object["X1"] + df_object["X2"]) / 2
    df_object["center_y"] = (df_object["Y1"] + df_object["Y2"]) / 2

    # Pivot: one row per frame, columns like Tip_center_x, Handle_center_y, etc.
    # Keep only needed columns
    df_object_slim = df_object[["Frame", "Label", "X1", "Y1", "X2", "Y2", "center_x", "center_y"]]
    
    # Pivot: one row per frame, columns like Tip_X1, Tip_center_x, etc.
    df_object_pivot = df_object_slim.pivot_table(
        index="Frame",
        columns="Label",
        aggfunc="first"
    )
    

    # Flatten multi-index columns
    df_object_pivot.columns = [f"{label}_{coord}" for coord, label in df_object_pivot.columns]
    df_object_pivot.reset_index(inplace=True)

    # Merge person + object center info
    df_merged = df_people.merge(df_object_pivot, on="Frame", how="left")

    # Save to new folder
    out_path = merged_folder / f"{base_name}_merged.csv"
    df_merged.to_csv(out_path, index=False)
    print(f"✅ Merged and saved: {out_path.name}")

print("\n🎉 All sessions processed!")