In [1]:
import cv2
import mediapipe as mp
import numpy as np
import argparse
from pathlib import Path
import math

# BVH writing utilities
class BVHJoint:
    def __init__(self, name, offset=(0, 0, 0), children=None):
        self.name = name
        self.offset = offset
        self.children = children or []
        self.channels = []
        self.is_end_site = False

    def add_child(self, child):
        self.children.append(child)
        return child

class BVHWriter:
    def __init__(self, root_joint, frame_time=0.033333):
        self.root_joint = root_joint
        self.frame_time = frame_time
        self.motion_data = []
        
    def add_frame(self, frame_data):
        self.motion_data.append(frame_data)
        
    def write_to_file(self, file_path):
        with open(file_path, 'w') as f:
            f.write("HIERARCHY\n")
            self._write_joint(f, self.root_joint, 0)
            
            f.write("MOTION\n")
            f.write(f"Frames: {len(self.motion_data)}\n")
            f.write(f"Frame Time: {self.frame_time}\n")
            
            for frame in self.motion_data:
                f.write(" ".join(map(str, frame)) + "\n")
                
    def _write_joint(self, file, joint, indent_level):
        indent = "  " * indent_level
        
        if indent_level == 0:
            file.write(f"{indent}ROOT {joint.name}\n")
        elif joint.is_end_site:
            file.write(f"{indent}End Site\n")
        else:
            file.write(f"{indent}JOINT {joint.name}\n")
            
        file.write(f"{indent}{{\n")
        file.write(f"{indent}  OFFSET {joint.offset[0]} {joint.offset[1]} {joint.offset[2]}\n")
        
        if not joint.is_end_site:
            if indent_level == 0:
                # Root joint typically has 6 channels: position and rotation
                file.write(f"{indent}  CHANNELS 6 Xposition Yposition Zposition Zrotation Xrotation Yrotation\n")
                joint.channels = ["Xposition", "Yposition", "Zposition", "Zrotation", "Xrotation", "Yrotation"]
            else:
                # Other joints typically have 3 channels for rotation only
                file.write(f"{indent}  CHANNELS 3 Zrotation Xrotation Yrotation\n")
                joint.channels = ["Zrotation", "Xrotation", "Yrotation"]
        
        for child in joint.children:
            self._write_joint(file, child, indent_level + 1)
            
        file.write(f"{indent}}}\n")

# MediaPipe pose detection setup
def setup_mediapipe():
    mp_pose = mp.solutions.pose
    return mp_pose.Pose(
        static_image_mode=False,
        model_complexity=2,
        enable_segmentation=False,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    )

# Create a simplified skeleton hierarchy for BVH
def create_skeleton():
    root = BVHJoint("Hips", offset=(0, 0, 0))
    
    spine = root.add_child(BVHJoint("Spine", offset=(0, 10, 0)))
    neck = spine.add_child(BVHJoint("Neck", offset=(0, 15, 0)))
    head = neck.add_child(BVHJoint("Head", offset=(0, 5, 0)))
    head_end = head.add_child(BVHJoint("HeadEnd", offset=(0, 3, 0)))
    head_end.is_end_site = True
    
    left_shoulder = spine.add_child(BVHJoint("LeftShoulder", offset=(5, 12, 0)))
    left_elbow = left_shoulder.add_child(BVHJoint("LeftElbow", offset=(10, 0, 0)))
    left_wrist = left_elbow.add_child(BVHJoint("LeftWrist", offset=(8, 0, 0)))
    left_hand = left_wrist.add_child(BVHJoint("LeftHand", offset=(4, 0, 0)))
    left_hand.is_end_site = True
    
    right_shoulder = spine.add_child(BVHJoint("RightShoulder", offset=(-5, 12, 0)))
    right_elbow = right_shoulder.add_child(BVHJoint("RightElbow", offset=(-10, 0, 0)))
    right_wrist = right_elbow.add_child(BVHJoint("RightWrist", offset=(-8, 0, 0)))
    right_hand = right_wrist.add_child(BVHJoint("RightHand", offset=(-4, 0, 0)))
    right_hand.is_end_site = True
    
    left_hip = root.add_child(BVHJoint("LeftHip", offset=(3.5, 0, 0)))
    left_knee = left_hip.add_child(BVHJoint("LeftKnee", offset=(0, -15, 0)))
    left_ankle = left_knee.add_child(BVHJoint("LeftAnkle", offset=(0, -15, 0)))
    left_foot = left_ankle.add_child(BVHJoint("LeftFoot", offset=(0, -3, 5)))
    left_foot.is_end_site = True
    
    right_hip = root.add_child(BVHJoint("RightHip", offset=(-3.5, 0, 0)))
    right_knee = right_hip.add_child(BVHJoint("RightKnee", offset=(0, -15, 0)))
    right_ankle = right_knee.add_child(BVHJoint("RightAnkle", offset=(0, -15, 0)))
    right_foot = right_ankle.add_child(BVHJoint("RightFoot", offset=(0, -3, 5)))
    right_foot.is_end_site = True
    
    return root

# Extract joint position from MediaPipe landmarks
def get_joint_position(landmarks, idx):
    landmark = landmarks[idx]
    # Scale and transform coordinates
    return np.array([
        landmark.x * 100,
        -landmark.y * 100 + 100, # Convert from top-left to bottom-left origin
        landmark.z * 100
    ])

# Calculate angle between two vectors
def angle_between(v1, v2):
    v1_u = v1 / np.linalg.norm(v1)
    v2_u = v2 / np.linalg.norm(v2)
    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

# Simple rotation calculation
def calculate_joint_rotation(joint_pos, child_pos, ref_vector=None):
    if ref_vector is None:
        # Default reference vectors
        refs = {
            "x": np.array([1, 0, 0]),
            "y": np.array([0, 1, 0]),
            "z": np.array([0, 0, 1])
        }
    else:
        refs = ref_vector
    
    # Calculate direction vector from joint to child
    direction = child_pos - joint_pos
    if np.linalg.norm(direction) < 1e-6:
        return [0, 0, 0]  # No rotation if points are too close
    
    direction = direction / np.linalg.norm(direction)
    
    # Calculate rotation around each axis
    # Note: This is a simplified calculation that may not be fully accurate
    # for complex rotations, but often works well in practice for visualization
    
    # Project to planes and calculate angles
    xy_proj = np.array([direction[0], direction[1], 0])
    if np.linalg.norm(xy_proj) > 1e-6:
        xy_proj = xy_proj / np.linalg.norm(xy_proj)
        z_rot = np.degrees(angle_between(np.array([1, 0, 0]), xy_proj))
        if xy_proj[1] < 0:
            z_rot = -z_rot
    else:
        z_rot = 0
    
    xz_proj = np.array([direction[0], 0, direction[2]])
    if np.linalg.norm(xz_proj) > 1e-6:
        xz_proj = xz_proj / np.linalg.norm(xz_proj)
        y_rot = np.degrees(angle_between(np.array([1, 0, 0]), xz_proj))
        if xz_proj[2] < 0:
            y_rot = -y_rot
    else:
        y_rot = 0
    
    yz_proj = np.array([0, direction[1], direction[2]])
    if np.linalg.norm(yz_proj) > 1e-6:
        yz_proj = yz_proj / np.linalg.norm(yz_proj)
        x_rot = np.degrees(angle_between(np.array([0, 1, 0]), yz_proj))
        if yz_proj[2] < 0:
            x_rot = -x_rot
    else:
        x_rot = 0
    
    # BVH rotation order is ZXY
    return [z_rot, x_rot, y_rot]

# Process video and generate BVH
def process_video(video_path, output_path, fps_target=30):
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_fps = cap.get(cv2.CAP_PROP_FPS)
    
    # Determine frame sampling to match target FPS
    frame_skip = max(1, round(video_fps / fps_target))
    frame_time = 1.0 / fps_target
    
    # Initialize MediaPipe
    pose_detector = setup_mediapipe()
    
    # Create BVH skeleton and writer
    root_joint = create_skeleton()
    bvh_writer = BVHWriter(root_joint, frame_time=frame_time)
    
    frame_count = 0
    processed_count = 0
    
    mp_pose = mp.solutions.pose
    
    # Mapping from MediaPipe landmarks to our skeleton
    landmarks_map = {
        "Hips": [mp_pose.PoseLandmark.LEFT_HIP.value, mp_pose.PoseLandmark.RIGHT_HIP.value],
        "Spine": [mp_pose.PoseLandmark.LEFT_SHOULDER.value, mp_pose.PoseLandmark.RIGHT_SHOULDER.value],
        "Neck": [mp_pose.PoseLandmark.LEFT_SHOULDER.value, mp_pose.PoseLandmark.RIGHT_SHOULDER.value],
        "Head": [mp_pose.PoseLandmark.NOSE.value],
        "LeftShoulder": [mp_pose.PoseLandmark.LEFT_SHOULDER.value],
        "LeftElbow": [mp_pose.PoseLandmark.LEFT_ELBOW.value],
        "LeftWrist": [mp_pose.PoseLandmark.LEFT_WRIST.value],
        "RightShoulder": [mp_pose.PoseLandmark.RIGHT_SHOULDER.value],
        "RightElbow": [mp_pose.PoseLandmark.RIGHT_ELBOW.value],
        "RightWrist": [mp_pose.PoseLandmark.RIGHT_WRIST.value],
        "LeftHip": [mp_pose.PoseLandmark.LEFT_HIP.value],
        "LeftKnee": [mp_pose.PoseLandmark.LEFT_KNEE.value],
        "LeftAnkle": [mp_pose.PoseLandmark.LEFT_ANKLE.value],
        "RightHip": [mp_pose.PoseLandmark.RIGHT_HIP.value],
        "RightKnee": [mp_pose.PoseLandmark.RIGHT_KNEE.value],
        "RightAnkle": [mp_pose.PoseLandmark.RIGHT_ANKLE.value]
    }
    
    # Joint connections for calculating rotations
    joint_connections = {
        "Hips": "Spine",
        "Spine": "Neck",
        "Neck": "Head",
        "Head": None,  # No further connection
        "LeftShoulder": "LeftElbow",
        "LeftElbow": "LeftWrist",
        "LeftWrist": None,  # End joint
        "RightShoulder": "RightElbow",
        "RightElbow": "RightWrist",
        "RightWrist": None,  # End joint
        "LeftHip": "LeftKnee",
        "LeftKnee": "LeftAnkle",
        "LeftAnkle": None,  # End joint
        "RightHip": "RightKnee",
        "RightKnee": "RightAnkle",
        "RightAnkle": None  # End joint
    }
    
    # For simple rotation smoothing
    prev_rotations = {}
    
    print(f"Processing video: {video_path}")
    print(f"Target FPS: {fps_target}, Frame time: {frame_time}")
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_count += 1
        
        # Process only selected frames to match target FPS
        if (frame_count - 1) % frame_skip != 0:
            continue
        
        processed_count += 1
        
        # Convert to RGB for MediaPipe
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Detect pose
        results = pose_detector.process(rgb_frame)
        
        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark
            
            # Extract positions for all landmarks
            joint_positions = {}
            for joint_name, landmark_indices in landmarks_map.items():
                if landmark_indices:
                    # For joints that are defined by multiple landmarks (e.g., Hips), take the average
                    positions = [get_joint_position(landmarks, idx) for idx in landmark_indices]
                    joint_positions[joint_name] = np.mean(positions, axis=0)
            
            # Generate frame data for BVH
            frame_data = []
            
            # Root position (translation)
            if "Hips" in joint_positions:
                hips_pos = joint_positions["Hips"]
                frame_data.extend([hips_pos[0], hips_pos[1], hips_pos[2]])
            else:
                # Default position if hips not detected
                frame_data.extend([0, 0, 0])
            
            # Calculate rotations for all joints
            current_rotations = {}
            
            for joint_name, next_joint_name in joint_connections.items():
                if joint_name in joint_positions:
                    if next_joint_name and next_joint_name in joint_positions:
                        # Calculate rotation based on the direction to the next joint
                        rotation = calculate_joint_rotation(
                            joint_positions[joint_name],
                            joint_positions[next_joint_name]
                        )
                        current_rotations[joint_name] = rotation
                    else:
                        # For end joints, use default rotation or propagate from parent
                        current_rotations[joint_name] = [0, 0, 0]
                else:
                    # If joint is not detected, use default rotation
                    current_rotations[joint_name] = [0, 0, 0]
            
            # Apply simple temporal smoothing (helps with jitter)
            smoothing_factor = 0.5  # Lower = less smoothing
            smoothed_rotations = {}
            
            for joint_name, rotation in current_rotations.items():
                if joint_name in prev_rotations:
                    smoothed_rotations[joint_name] = [
                        prev_rotations[joint_name][0] * smoothing_factor + rotation[0] * (1 - smoothing_factor),
                        prev_rotations[joint_name][1] * smoothing_factor + rotation[1] * (1 - smoothing_factor),
                        prev_rotations[joint_name][2] * smoothing_factor + rotation[2] * (1 - smoothing_factor)
                    ]
                else:
                    smoothed_rotations[joint_name] = rotation
            
            prev_rotations = smoothed_rotations
            
            # Add rotations to frame data in correct hierarchical order
            def add_rotations_recursively(joint):
                if joint.name in smoothed_rotations:
                    frame_data.extend(smoothed_rotations[joint.name])
                else:
                    frame_data.extend([0, 0, 0])  # Default rotation
                
                for child in joint.children:
                    if not child.is_end_site:
                        add_rotations_recursively(child)
            
            # Start with root
            add_rotations_recursively(root_joint)
            
            # Add frame to BVH
            bvh_writer.add_frame(frame_data)
            
            if processed_count % 10 == 0:
                print(f"Processed {processed_count} frames...")
    
    cap.release()
    
    # Write BVH file
    bvh_writer.write_to_file(output_path)
    print(f"BVH file created: {output_path}")
    print(f"Total frames processed: {processed_count}")


2025-02-25 17:16:02.661575: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740532562.678734   36185 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740532562.684707   36185 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-25 17:16:02.702125: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:

# parser = argparse.ArgumentParser(description="Convert video to BVH motion capture file")
# parser.add_argument("video_path", help="Path to input video file")
# parser.add_argument("--output", help="Path to output BVH file")
# parser.add_argument("--fps", type=int, default=30, help="Target frames per second for BVH file")

# args = parser.parse_args()

# video_path = args.video_path
# output_path = args.output

# if not output_path:
#     # Create output path if not specified
#     video_name = Path(video_path).stem
#     output_path = f"{video_name}.bvh"
filename = "dance5"
process_video(f"{filename}.mp4", f"{filename}.bvh", fps_target=30)

Processing video: dance5.mp4
Target FPS: 30, Frame time: 0.03333333333333333
BVH file created: dance5.bvh
Total frames processed: 0


I0000 00:00:1740532616.427119   36185 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1740532616.428166   36336 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.3), renderer: Mesa Intel(R) UHD Graphics (CML GT2)
W0000 00:00:1740532616.517583   36322 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1740532616.625148   36331 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
