In [6]:
import cv2
import mediapipe as mp
import numpy as np
import os
import argparse
from tqdm import tqdm
import math
import time
from dataclasses import dataclass
from typing import List, Dict, Tuple, Optional

# MediaPipe setup
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

@dataclass
class EmptyLandmark:
    """Simple class to substitute for MediaPipe landmarks when needed"""
    x: float = 0.0
    y: float = 0.0
    z: float = 0.0
    visibility: float = 0.0

class Joint:
    """Class representing a joint in the BVH skeleton"""
    def __init__(self, name, parent=None):
        self.name = name
        self.parent = parent
        self.children = []
        self.offset = np.zeros(3)
        self.channels = []
        self.rotation_order = 'XYZ'  # Using XYZ order for better Blender compatibility
        
    def add_child(self, child):
        self.children.append(child)
        
class MotionData:
    """Class to store motion data for the BVH file"""
    def __init__(self, num_frames):
        self.positions = {}  # Dict to store positions for each joint
        self.rotations = {}  # Dict to store rotations for each joint
        self.num_frames = num_frames
        
    def set_joint_position(self, frame, joint_name, position):
        if joint_name not in self.positions:
            self.positions[joint_name] = [np.zeros(3) for _ in range(self.num_frames)]
        self.positions[joint_name][frame] = position
        
    def set_joint_rotation(self, frame, joint_name, rotation):
        if joint_name not in self.rotations:
            self.rotations[joint_name] = [np.zeros(3) for _ in range(self.num_frames)]
        self.rotations[joint_name][frame] = rotation
        
    def get_joint_position(self, frame, joint_name):
        return self.positions.get(joint_name, [np.zeros(3)])[frame]
        
    def get_joint_rotation(self, frame, joint_name):
        return self.rotations.get(joint_name, [np.zeros(3)])[frame]

def create_skeleton():
    """Create a skeleton structure that matches MediaPipe's pose landmarks."""
    # Root joint
    hips = Joint("Hips")
    
    # Spine
    spine = Joint("Spine", hips)
    hips.add_child(spine)
    
    chest = Joint("Chest", spine)
    spine.add_child(chest)
    
    neck = Joint("Neck", chest)
    chest.add_child(neck)
    
    head = Joint("Head", neck)
    neck.add_child(head)
    
    # Left arm
    left_shoulder = Joint("LeftShoulder", chest)
    chest.add_child(left_shoulder)
    
    left_arm = Joint("LeftArm", left_shoulder)
    left_shoulder.add_child(left_arm)
    
    left_forearm = Joint("LeftForeArm", left_arm)
    left_arm.add_child(left_forearm)
    
    left_hand = Joint("LeftHand", left_forearm)
    left_forearm.add_child(left_hand)
    
    # Right arm
    right_shoulder = Joint("RightShoulder", chest)
    chest.add_child(right_shoulder)
    
    right_arm = Joint("RightArm", right_shoulder)
    right_shoulder.add_child(right_arm)
    
    right_forearm = Joint("RightForeArm", right_arm)
    right_arm.add_child(right_forearm)
    
    right_hand = Joint("RightHand", right_forearm)
    right_forearm.add_child(right_hand)
    
    # Left leg
    left_up_leg = Joint("LeftUpLeg", hips)
    hips.add_child(left_up_leg)
    
    left_leg = Joint("LeftLeg", left_up_leg)
    left_up_leg.add_child(left_leg)
    
    left_foot = Joint("LeftFoot", left_leg)
    left_leg.add_child(left_foot)
    
    left_toe = Joint("LeftToeBase", left_foot)
    left_foot.add_child(left_toe)
    
    # Right leg
    right_up_leg = Joint("RightUpLeg", hips)
    hips.add_child(right_up_leg)
    
    right_leg = Joint("RightLeg", right_up_leg)
    right_up_leg.add_child(right_leg)
    
    right_foot = Joint("RightFoot", right_leg)
    right_leg.add_child(right_foot)
    
    right_toe = Joint("RightToeBase", right_foot)
    right_foot.add_child(right_toe)
    
    return hips

def get_mediapipe_landmark_indices():
    """Define the landmark indices in MediaPipe format"""
    return {
        "hip_center": [mp_pose.PoseLandmark.LEFT_HIP, mp_pose.PoseLandmark.RIGHT_HIP],
        "spine": [mp_pose.PoseLandmark.LEFT_HIP, mp_pose.PoseLandmark.RIGHT_HIP, 
                  mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.RIGHT_SHOULDER],
        "chest": [mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.RIGHT_SHOULDER],
        "neck": [mp_pose.PoseLandmark.LEFT_EAR, mp_pose.PoseLandmark.RIGHT_EAR, 
                mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.RIGHT_SHOULDER],
        "head": [mp_pose.PoseLandmark.NOSE, mp_pose.PoseLandmark.LEFT_EAR, mp_pose.PoseLandmark.RIGHT_EAR],
        
        "left_shoulder": [mp_pose.PoseLandmark.LEFT_SHOULDER],
        "left_elbow": [mp_pose.PoseLandmark.LEFT_ELBOW],
        "left_wrist": [mp_pose.PoseLandmark.LEFT_WRIST],
        "left_hand": [mp_pose.PoseLandmark.LEFT_PINKY, mp_pose.PoseLandmark.LEFT_INDEX, 
                     mp_pose.PoseLandmark.LEFT_THUMB],
        
        "right_shoulder": [mp_pose.PoseLandmark.RIGHT_SHOULDER],
        "right_elbow": [mp_pose.PoseLandmark.RIGHT_ELBOW],
        "right_wrist": [mp_pose.PoseLandmark.RIGHT_WRIST],
        "right_hand": [mp_pose.PoseLandmark.RIGHT_PINKY, mp_pose.PoseLandmark.RIGHT_INDEX, 
                      mp_pose.PoseLandmark.RIGHT_THUMB],
        
        "left_hip": [mp_pose.PoseLandmark.LEFT_HIP],
        "left_knee": [mp_pose.PoseLandmark.LEFT_KNEE],
        "left_ankle": [mp_pose.PoseLandmark.LEFT_ANKLE],
        "left_foot": [mp_pose.PoseLandmark.LEFT_FOOT_INDEX, mp_pose.PoseLandmark.LEFT_HEEL],
        
        "right_hip": [mp_pose.PoseLandmark.RIGHT_HIP],
        "right_knee": [mp_pose.PoseLandmark.RIGHT_KNEE],
        "right_ankle": [mp_pose.PoseLandmark.RIGHT_ANKLE],
        "right_foot": [mp_pose.PoseLandmark.RIGHT_FOOT_INDEX, mp_pose.PoseLandmark.RIGHT_HEEL]
    }

def get_joint_mapping():
    """Map MediaPipe landmarks to BVH skeleton joints"""
    mediapipe_indices = get_mediapipe_landmark_indices()
    
    return {
        "Hips": mediapipe_indices["hip_center"],
        "Spine": mediapipe_indices["spine"],
        "Chest": mediapipe_indices["chest"],
        "Neck": mediapipe_indices["neck"],
        "Head": mediapipe_indices["head"],
        
        "LeftShoulder": mediapipe_indices["left_shoulder"],
        "LeftArm": mediapipe_indices["left_elbow"],
        "LeftForeArm": mediapipe_indices["left_wrist"],
        "LeftHand": mediapipe_indices["left_hand"],
        
        "RightShoulder": mediapipe_indices["right_shoulder"],
        "RightArm": mediapipe_indices["right_elbow"],
        "RightForeArm": mediapipe_indices["right_wrist"],
        "RightHand": mediapipe_indices["right_hand"],
        
        "LeftUpLeg": mediapipe_indices["left_hip"],
        "LeftLeg": mediapipe_indices["left_knee"],
        "LeftFoot": mediapipe_indices["left_ankle"],
        "LeftToeBase": mediapipe_indices["left_foot"],
        
        "RightUpLeg": mediapipe_indices["right_hip"],
        "RightLeg": mediapipe_indices["right_knee"],
        "RightFoot": mediapipe_indices["right_ankle"],
        "RightToeBase": mediapipe_indices["right_foot"]
    }

def get_joint_connections():
    """Define parent-child connections for calculating rotations"""
    return {
        "Hips": ["Spine", "LeftUpLeg", "RightUpLeg"],
        "Spine": ["Chest"],
        "Chest": ["Neck", "LeftShoulder", "RightShoulder"],
        "Neck": ["Head"],
        "Head": [],
        
        "LeftShoulder": ["LeftArm"],
        "LeftArm": ["LeftForeArm"],
        "LeftForeArm": ["LeftHand"],
        "LeftHand": [],
        
        "RightShoulder": ["RightArm"],
        "RightArm": ["RightForeArm"],
        "RightForeArm": ["RightHand"],
        "RightHand": [],
        
        "LeftUpLeg": ["LeftLeg"],
        "LeftLeg": ["LeftFoot"],
        "LeftFoot": ["LeftToeBase"],
        "LeftToeBase": [],
        
        "RightUpLeg": ["RightLeg"],
        "RightLeg": ["RightFoot"],
        "RightFoot": ["RightToeBase"],
        "RightToeBase": []
    }

def get_parent_joint(joint_name):
    """Get parent joint name for given joint"""
    connections = get_joint_connections()
    
    for parent, children in connections.items():
        if joint_name in children:
            return parent
    
    return None  # No parent (root)

def get_landmark_position(landmarks, idx):
    """Safely get the position of a landmark by index"""
    if landmarks and idx < len(landmarks):
        lm = landmarks[idx]
        if hasattr(lm, 'x') and hasattr(lm, 'y') and hasattr(lm, 'z'):
            if not (np.isnan(lm.x) or np.isnan(lm.y) or np.isnan(lm.z)):
                return np.array([lm.x, lm.y, lm.z])
    return None

def get_average_landmark_position(landmarks, indices):
    """Get average position from multiple landmark indices"""
    positions = []
    
    for idx in indices:
        pos = get_landmark_position(landmarks, idx)
        if pos is not None:
            positions.append(pos)
    
    if positions:
        return np.mean(positions, axis=0)
    else:
        return None

def get_joint_positions(frame_landmarks, joint_mapping):
    """Extract joint positions from landmarks for the current frame"""
    joint_positions = {}
    
    for joint_name, indices in joint_mapping.items():
        pos = get_average_landmark_position(frame_landmarks, indices)
        if pos is not None:
            # MediaPipe coordinate system: X right, Y down, Z forward (from camera)
            # Convert to BVH coordinate system: X right, Y up, Z forward
            pos_bvh = np.array([pos[0], -pos[1], pos[2]])
            joint_positions[joint_name] = pos_bvh
    
    return joint_positions

def interpolate_missing_joints(joint_positions, connections):
    """Fill in missing joints using interpolation from parent to child"""
    all_joints = list(connections.keys())
    filled_positions = joint_positions.copy()
    
    # Multiple passes to handle multi-level interpolation
    for _ in range(3):
        for joint in all_joints:
            if joint not in filled_positions:
                parent = get_parent_joint(joint)
                children = connections.get(joint, [])
                
                # If parent and at least one child are available, interpolate
                if parent and parent in filled_positions:
                    child_positions = []
                    for child in children:
                        if child in filled_positions:
                            child_positions.append(filled_positions[child])
                    
                    if child_positions:
                        # Average position of all available children
                        avg_child = np.mean(child_positions, axis=0)
                        # Interpolate halfway between parent and average child
                        filled_positions[joint] = (filled_positions[parent] + avg_child) / 2
                
                # Or if just parent is available, estimate
                elif parent and parent in filled_positions:
                    # Estimate based on parent and joint type
                    parent_pos = filled_positions[parent]
                    
                    # Custom offsets based on joint types
                    if "UpLeg" in joint:
                        # Hips to upper leg: down and out
                        direction = np.array([0.1 if "Right" in joint else -0.1, -0.1, 0])
                    elif "Leg" in joint:
                        # Upper leg to lower leg: down
                        direction = np.array([0, -0.15, 0])
                    elif "Foot" in joint:
                        # Leg to foot: down and forward
                        direction = np.array([0, -0.1, 0.05])
                    elif "Toe" in joint:
                        # Foot to toe: forward
                        direction = np.array([0, -0.05, 0.1])
                    elif "Shoulder" in joint:
                        # Chest to shoulder: out
                        direction = np.array([0.1 if "Right" in joint else -0.1, 0, 0])
                    elif "Arm" in joint and not "Fore" in joint:
                        # Shoulder to arm: out
                        direction = np.array([0.1 if "Right" in joint else -0.1, -0.02, 0])
                    elif "ForeArm" in joint:
                        # Arm to forearm: out and down
                        direction = np.array([0.1 if "Right" in joint else -0.1, -0.02, 0])
                    elif "Hand" in joint:
                        # Forearm to hand: out
                        direction = np.array([0.08 if "Right" in joint else -0.08, 0, 0])
                    elif joint == "Head":
                        # Neck to head: up
                        direction = np.array([0, 0.1, 0])
                    elif joint == "Neck":
                        # Chest to neck: up
                        direction = np.array([0, 0.1, 0])
                    elif joint == "Chest":
                        # Spine to chest: up
                        direction = np.array([0, 0.15, 0])
                    elif joint == "Spine":
                        # Hips to spine: up
                        direction = np.array([0, 0.1, 0])
                    else:
                        # Default: small upward direction
                        direction = np.array([0, 0.1, 0])
                    
                    filled_positions[joint] = parent_pos + direction
    
    return filled_positions

def calculate_joint_offsets(skeleton, reference_frame_positions):
    """Calculate joint offsets for the skeleton based on a reference frame"""
    def process_joint(joint):
        if joint.parent:
            # If both this joint and parent are in the reference frame
            if joint.name in reference_frame_positions and joint.parent.name in reference_frame_positions:
                parent_pos = reference_frame_positions[joint.parent.name]
                joint_pos = reference_frame_positions[joint.name]
                joint.offset = joint_pos - parent_pos
            else:
                # Use default offsets if not in reference frame
                if "Left" in joint.name:
                    joint.offset = np.array([-0.1, 0, 0])
                elif "Right" in joint.name:
                    joint.offset = np.array([0.1, 0, 0])
                elif "Head" in joint.name:
                    joint.offset = np.array([0, 0.1, 0])
                elif "Neck" in joint.name:
                    joint.offset = np.array([0, 0.08, 0])
                elif "Chest" in joint.name:
                    joint.offset = np.array([0, 0.15, 0])
                elif "Spine" in joint.name:
                    joint.offset = np.array([0, 0.15, 0])
                elif "Toe" in joint.name:
                    joint.offset = np.array([0, 0, 0.1])
                else:
                    joint.offset = np.array([0, 0.1, 0])
            
            # Ensure non-zero offset (critical for Blender)
            if np.linalg.norm(joint.offset) < 0.01:
                if "Left" in joint.name:
                    joint.offset = np.array([-0.1, 0, 0])
                elif "Right" in joint.name:
                    joint.offset = np.array([0.1, 0, 0])
                elif "Head" in joint.name:
                    joint.offset = np.array([0, 0.1, 0])
                elif "Hand" in joint.name:
                    if "Left" in joint.name:
                        joint.offset = np.array([-0.1, 0, 0])
                    else:
                        joint.offset = np.array([0.1, 0, 0])
                elif "Foot" in joint.name:
                    joint.offset = np.array([0, -0.1, 0.05])
                else:
                    joint.offset = np.array([0, 0.1, 0])
        else:
            # Root joint has no offset
            joint.offset = np.array([0, 0, 0])
        
        # Scale the offset to reasonable size
        joint.offset *= 10.0  # Scale factor can be adjusted
        
        for child in joint.children:
            process_joint(child)
    
    process_joint(skeleton)

def vector_to_euler(forward, up, order='XYZ'):
    """Convert forward and up vectors to Euler angles"""
    # Make sure vectors are normalized
    forward = forward / np.linalg.norm(forward)
    up = up / np.linalg.norm(up)
    
    # Get the third perpendicular axis (right vector)
    right = np.cross(forward, up)
    right = right / np.linalg.norm(right)
    
    # Rebuild the up vector to ensure orthogonality
    up = np.cross(right, forward)
    
    # Create rotation matrix
    rotation_matrix = np.array([
        [right[0], up[0], forward[0]],
        [right[1], up[1], forward[1]],
        [right[2], up[2], forward[2]]
    ])
    
    # Convert rotation matrix to Euler angles
    if order == 'XYZ':
        # Extract Euler angles
        sy = math.sqrt(rotation_matrix[0, 0] * rotation_matrix[0, 0] + rotation_matrix[1, 0] * rotation_matrix[1, 0])
        
        if sy > 1e-6:
            x = math.atan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
            y = math.atan2(-rotation_matrix[2, 0], sy)
            z = math.atan2(rotation_matrix[1, 0], rotation_matrix[0, 0])
        else:
            x = math.atan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
            y = math.atan2(-rotation_matrix[2, 0], sy)
            z = 0
    else:
        # Handle other rotation orders if needed
        x, y, z = 0, 0, 0
    
    return np.array([math.degrees(x), math.degrees(y), math.degrees(z)])

def calculate_bone_direction(joint_positions, joint_name, child_name):
    """Calculate normalized direction vector from joint to child"""
    if joint_name in joint_positions and child_name in joint_positions:
        start = joint_positions[joint_name]
        end = joint_positions[child_name]
        direction = end - start
        length = np.linalg.norm(direction)
        
        if length > 1e-10:
            return direction / length
    
    return None

def calculate_joint_rotations(joint_positions, connections):
    """Calculate joint rotations based on the positions of connected joints"""
    joint_rotations = {}
    
    for joint_name, children in connections.items():
        if joint_name not in joint_positions or not children:
            continue
        
        # Find at least one connected child
        child_directions = []
        for child in children:
            if child in joint_positions:
                direction = calculate_bone_direction(joint_positions, joint_name, child)
                if direction is not None:
                    child_directions.append((child, direction))
        
        if not child_directions:
            continue
        
        # Use the first child for primary direction
        primary_child, forward = child_directions[0]
        
        # Determine up vector based on joint context
        up = np.array([0, 1, 0])  # Default up
        
        # For limbs, use a more anatomically appropriate up vector
        if "Arm" in joint_name or "ForeArm" in joint_name:
            # For arms, "up" depends on left/right
            if "Left" in joint_name:
                up = np.array([0, 0, -1])  # Left arm: up is backward
            else:
                up = np.array([0, 0, -1])  # Right arm: up is backward
        elif "UpLeg" in joint_name or "Leg" in joint_name:
            # For legs, "up" points forward
            up = np.array([0, 0, 1])
        elif joint_name in ["Spine", "Chest", "Neck"]:
            # For spine/chest/neck, "up" depends on context
            # Try to use left/right directions to determine orientation
            if "LeftShoulder" in joint_positions and "RightShoulder" in joint_positions:
                left = joint_positions["LeftShoulder"] - joint_positions[joint_name]
                right = joint_positions["RightShoulder"] - joint_positions[joint_name]
                front_cross = np.cross(right, left)
                if np.linalg.norm(front_cross) > 1e-10:
                    up = front_cross / np.linalg.norm(front_cross)
        
        # Make sure up is perpendicular to forward
        up = up - np.dot(up, forward) * forward
        if np.linalg.norm(up) > 1e-10:
            up = up / np.linalg.norm(up)
        else:
            # If up became zero, find a new perpendicular vector
            if abs(forward[0]) < abs(forward[1]):
                up = np.cross(forward, np.array([1, 0, 0]))
            else:
                up = np.cross(forward, np.array([0, 1, 0]))
            
            if np.linalg.norm(up) > 1e-10:
                up = up / np.linalg.norm(up)
        
        # Convert to Euler angles
        euler = vector_to_euler(forward, up)
        joint_rotations[joint_name] = euler
    
    return joint_rotations

def smooth_rotations(all_frame_rotations, window=5):
    """Apply a moving average filter to smooth rotations"""
    joints = all_frame_rotations[0].keys()
    num_frames = len(all_frame_rotations)
    
    smoothed_rotations = [{} for _ in range(num_frames)]
    
    for joint in joints:
        # Extract this joint's rotations across all frames
        joint_rots = np.array([frame.get(joint, np.zeros(3)) for frame in all_frame_rotations])
        
        # Apply moving average filter
        smoothed_joint_rots = np.copy(joint_rots)
        half_window = window // 2
        
        for i in range(num_frames):
            start = max(0, i - half_window)
            end = min(num_frames, i + half_window + 1)
            
            # Use weighted average (center frame has more weight)
            weights = np.ones(end - start)
            center_idx = i - start
            if center_idx >= 0 and center_idx < len(weights):
                weights[center_idx] = 2.0  # Higher weight for center frame
            
            weights = weights / np.sum(weights)
            
            # Weighted average for each axis
            for axis in range(3):
                smoothed_joint_rots[i, axis] = np.sum(joint_rots[start:end, axis] * weights)
        
        # Store smoothed rotations
        for i in range(num_frames):
            smoothed_rotations[i][joint] = smoothed_joint_rots[i]
    
    return smoothed_rotations

def write_bvh_file(skeleton, all_frame_rotations, frame_time, output_file):
    """Write the BVH file with motion data"""
    print(f"Writing BVH file to {output_file}...")
    try:
        with open(output_file, 'w') as f:
            # Write header
            f.write("HIERARCHY\n")
            
            # Write joint hierarchy recursively
            write_joint_hierarchy(f, skeleton, 0)
            
            # Write motion data
            num_frames = len(all_frame_rotations)
            f.write("MOTION\n")
            f.write(f"Frames: {num_frames}\n")
            f.write(f"Frame Time: {frame_time:.6f}\n")
            
            # Extract hip positions and rotations for all frames
            hip_positions = np.zeros((num_frames, 3))
            
            # For each frame, write data
            for frame_idx in tqdm(range(num_frames), desc="Writing animation data"):
                frame_data = []
                frame_rotations = all_frame_rotations[frame_idx]
                
                # Root position (track position changes)
                frame_data.extend(hip_positions[frame_idx])
                
                # Write rotations for all joints in depth-first order
                write_joint_rotations(skeleton, frame_rotations, frame_data)
                
                f.write(" ".join([f"{val:.6f}" for val in frame_data]) + "\n")
                
        print(f"BVH file created successfully: {output_file}")
    except Exception as e:
        print(f"Error writing BVH file: {e}")

def write_joint_hierarchy(f, joint, indent_level):
    """Write the joint hierarchy recursively to the BVH file"""
    indent = "  " * indent_level
    
    if joint.parent is None:
        # Root joint
        f.write(f"{indent}ROOT {joint.name}\n")
    else:
        # Child joint
        f.write(f"{indent}JOINT {joint.name}\n")
    
    f.write(f"{indent}" + "{\n")
    
    # Write offset
    f.write(f"{indent}  OFFSET {joint.offset[0]:.6f} {joint.offset[1]:.6f} {joint.offset[2]:.6f}\n")
    
    # Write channels
    if joint.parent is None:
        # Root has 6 channels: position and rotation
        f.write(f"{indent}  CHANNELS 6 Xposition Yposition Zposition {joint.rotation_order[0]}rotation {joint.rotation_order[1]}rotation {joint.rotation_order[2]}rotation\n")
    else:
        # Other joints have 3 channels: rotation only
        f.write(f"{indent}  CHANNELS 3 {joint.rotation_order[0]}rotation {joint.rotation_order[1]}rotation {joint.rotation_order[2]}rotation\n")
    
    # Process children
    for child in joint.children:
        write_joint_hierarchy(f, child, indent_level + 1)
    
    # If no children, write end site
    if not joint.children:
        f.write(f"{indent}  End Site\n")
        f.write(f"{indent}  " + "{\n")
        
        # Use a non-zero offset for end sites
        if "Hand" in joint.name:
            # Extend in the appropriate direction
            if "Left" in joint.name:
                f.write(f"{indent}    OFFSET -5.0 0.0 0.0\n")
            else:
                f.write(f"{indent}    OFFSET 5.0 0.0 0.0\n")
        elif "Toe" in joint.name:
            f.write(f"{indent}    OFFSET 0.0 0.0 5.0\n")  # Forward
        elif "Head" in joint.name:
            f.write(f"{indent}    OFFSET 0.0 5.0 0.0\n")  # Upward
        else:
            f.write(f"{indent}    OFFSET 0.0 3.0 0.0\n")  # Default upward
        
        f.write(f"{indent}  " + "}\n")
    
    f.write(f"{indent}" + "}\n")

def write_joint_rotations(joint, frame_rotations, frame_data):
    """Write rotations for a joint and its children recursively"""
    # Add this joint's rotation
    if joint.name in frame_rotations:
        frame_data.extend(frame_rotations[joint.name])
    else:
        # Default to zero rotation if not found
        frame_data.extend([0.0, 0.0, 0.0])
    
    # Process all children in order
    for child in joint.children:
        write_joint_rotations(child, frame_rotations, frame_data)

def get_all_joints(skeleton):
    """Get a list of all joints in the skeleton"""
    joints = []
    
    def collect_joints(joint):
        joints.append(joint)
        for child in joint.children:
            collect_joints(child)
    
    collect_joints(skeleton)
    return joints

def render_skeleton_preview(frame_landmarks, frame, output_dir, frame_idx, joint_positions=None):
    """Render a preview image of the detected skeleton"""
    mp_drawing = mp.solutions.drawing_utils
    mp_drawing_styles = mp.solutions.drawing_styles
    
    # Draw the pose annotation on the image
    annotated_image = frame.copy()
    
    # First, draw the MediaPipe skeleton
    mp_drawing.draw_landmarks(
        annotated_image,
        frame_landmarks,
        mp_pose.POSE_CONNECTIONS,
        landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
    
    # If we have joint positions, draw our calculated bones
    if joint_positions:
        connections = get_joint_connections()
        for joint_name, children in connections.items():
            if joint_name in joint_positions:
                start_pos = joint_positions[joint_name]
                
                # Convert to image coordinates (estimated)
                start_x = int((start_pos[0] + 0.5) * annotated_image.shape[1])
                start_y = int((-start_pos[1] + 0.5) * annotated_image.shape[0])
                
                for child in children:
                    if child in joint_positions:
                        end_pos = joint_positions[child]
                        
                        # Convert to image coordinates
                        end_x = int((end_pos[0] + 0.5) * annotated_image.shape[1])
                        end_y = int((-end_pos[1] +.5) * annotated_image.shape[0])
                        
                        # Draw this bone with a different color
                        cv2.line(annotated_image, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
    
    # Save the annotated image
    os.makedirs(output_dir, exist_ok=True)
    cv2.imwrite(os.path.join(output_dir, f"frame_{frame_idx:04d}.png"), annotated_image)

def process_video(video_path, output_bvh, confidence_threshold=0.5, sample_rate=1, preview=False):
    """Process video and create BVH file"""
    print(f"Opening video file: {video_path}")
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return
    
    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    print(f"Video properties: {width}x{height}, {fps} FPS, {frame_count} frames")
    print(f"Sampling every {sample_rate} frames, resulting in approximately {frame_count//sample_rate} animation frames")
    
    # Calculate frame time based on original FPS and sampling rate
    frame_time = 1.0 / (fps / sample_rate)
    
    # Create output dirs if needed
    preview_dir = os.path.splitext(output_bvh)[0] + "_preview"
    if preview:
        os.makedirs(preview_dir, exist_ok=True)
    
    # Create pose detector
    print("Initializing MediaPipe Pose detector...")
    with mp_pose.Pose(
        static_image_mode=False,          # Video mode
        model_complexity=1,               # Balanced accuracy and speed
        smooth_landmarks=True,            # Enable temporal smoothing
        enable_segmentation=False,        # No need for segmentation
        smooth_segmentation=False,
        min_detection_confidence=0.5,     # Initial detection confidence
        min_tracking_confidence=0.5       # Tracking confidence between frames
    ) as pose:
        
        # Process frames
        all_landmarks = []
        all_frames = []  # Store frames for preview if needed
        frame_idx = 0
        sampled_frames = 0
        
        print(f"Processing video frames (sampling every {sample_rate} frames)...")
        
        with tqdm(total=frame_count) as pbar:
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                
                # Process only every sample_rate frames
                if frame_idx % sample_rate == 0:
                    # Convert BGR to RGB
                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    
                    # Process the frame
                    results = pose.process(frame_rgb)
                    
                    if results.pose_landmarks:
                        # Store landmarks and original frame (if preview enabled)
                        all_landmarks.append(results.pose_landmarks)
                        if preview:
                            all_frames.append(frame)
                        sampled_frames += 1
                    else:
                        # If no landmarks detected, use empty landmarks
                        empty_landmarks = mp.solutions.pose.PoseLandmark(33)  # Create empty landmarks
                        all_landmarks.append(empty_landmarks)
                        if preview:
                            all_frames.append(frame)
                        print(f"Warning: No pose detected in frame {frame_idx}. Using empty landmarks.")
                
                frame_idx += 1
                pbar.update(1)
        
        cap.release()
        
        if not all_landmarks:
            print("Error: No frames with detected poses found in the video.")
            return
        
        print(f"Video processing complete. Collected {len(all_landmarks)} frames of pose data.")
        
        # Extract joint mapping
        joint_mapping = get_joint_mapping()
        connections = get_joint_connections()
        
        # Find a good reference frame for the skeleton structure
        print("Finding a good reference frame for skeletal structure...")
        ref_frame_idx = 0
        best_detection_score = 0
        
        for i in range(min(len(all_landmarks), 30)):  # Check first 30 frames at most
            # Count how many key landmarks are detected
            landmarks = all_landmarks[i]
            if not hasattr(landmarks, 'landmark'):
                continue
                
            detection_score = sum(lm.visibility > 0.5 for lm in landmarks.landmark if hasattr(lm, 'visibility'))
            
            if detection_score > best_detection_score:
                best_detection_score = detection_score
                ref_frame_idx = i
                
                # If all key points detected well, break early
                if detection_score > 25:  # MediaPipe has 33 landmarks total
                    break
        
        print(f"Using frame {ref_frame_idx} for skeletal structure (detection score: {best_detection_score}/33)")
        
        # Process all frames to extract positions
        all_frame_positions = []
        all_joint_rotations = []
        
        for i, landmarks in enumerate(tqdm(all_landmarks, desc="Processing motion data")):
            # Skip frames with bad detection
            if not hasattr(landmarks, 'landmark'):
                # If this is the first frame, we can't continue
                if i == 0:
                    print("Error: First frame has no valid landmarks. Cannot continue.")
                    return
                
                # Otherwise, reuse the previous frame
                all_frame_positions.append(all_frame_positions[-1])
                all_joint_rotations.append(all_joint_rotations[-1])
                continue
            
            # Extract joint positions from landmarks
            joint_positions = get_joint_positions(landmarks.landmark, joint_mapping)
            
            # Interpolate missing joints
            joint_positions = interpolate_missing_joints(joint_positions, connections)
            
            # Calculate joint rotations
            joint_rotations = calculate_joint_rotations(joint_positions, connections)
            
            # Save for this frame
            all_frame_positions.append(joint_positions)
            all_joint_rotations.append(joint_rotations)
            
            # Generate preview frame if enabled
            if preview:
                render_skeleton_preview(landmarks, all_frames[i], preview_dir, i, joint_positions)
        
        # Create skeleton structure
        skeleton = create_skeleton()
        
        # Calculate joint offsets from the reference frame
        reference_positions = all_frame_positions[ref_frame_idx]
        calculate_joint_offsets(skeleton, reference_positions)
        
        # Smooth rotations to reduce jitter
        print("Smoothing rotations...")
        smoothed_rotations = smooth_rotations(all_joint_rotations, window=5)
        
        # Write BVH file
        write_bvh_file(skeleton, smoothed_rotations, frame_time, output_bvh)

# def main():
#     parser = argparse.ArgumentParser(description="Convert video to BVH using MediaPipe")
#     parser.add_argument("--video", required=True, help="Path to input video file")
#     parser.add_argument("--output", required=True, help="Path to output BVH file")
#     parser.add_argument("--confidence", type=float, default=0.5, help="Confidence threshold for pose detection")
#     parser.add_argument("--sample-rate", type=int, default=2, help="Process every Nth frame")
#     parser.add_argument("--preview", action="store_true", help="Generate preview images")
    
#     args = parser.parse_args()
    
#     print("Starting MediaPipe to BVH conversion...")
#     start_time = time.time()
    
#     process_video(args.video, args.output, args.confidence, args.sample_rate, args.preview)
    
#     end_time = time.time()
#     print(f"Conversion completed in {end_time - start_time:.2f} seconds")

# if __name__ == "__main__":
#     main()

In [7]:
filename = "cxk"
process_video(f"{filename}.mp4", f"{filename}.bvh", 0.5, 2, True)
# process_video(args.video, args.output, args.confidence, args.sample_rate)

I0000 00:00:1740686747.616388   33277 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5


Opening video file: cxk.mp4
Video properties: 1920x1080, 60.0 FPS, 900 frames
Sampling every 2 frames, resulting in approximately 450 animation frames
Initializing MediaPipe Pose detector...
Processing video frames (sampling every 2 frames)...


I0000 00:00:1740686747.617148   35745 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.3), renderer: Mesa Intel(R) UHD Graphics (CML GT2)
  0%|                                                   | 0/900 [00:00<?, ?it/s]

W0000 00:00:1740686747.707506   35736 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1740686747.749877   35735 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
100%|█████████████████████████████████████████| 900/900 [00:16<00:00, 54.16it/s]


Video processing complete. Collected 450 frames of pose data.
Finding a good reference frame for skeletal structure...
Using frame 0 for skeletal structure (detection score: 33/33)


Processing motion data: 100%|█████████████████| 450/450 [00:26<00:00, 17.09it/s]


UFuncTypeError: Cannot cast ufunc 'multiply' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'

In [4]:
%cd myVideoToBvh/

/home/nlarion/Desktop/nlp_html_ads/myVideoToBvh


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
