In [1]:
import cv2
import mediapipe as mp
import numpy as np
import os
import argparse
from bvhtoolbox import BvhTree, BvhNode
from bvhtoolbox.bvh_writer import BvhWriter
import math

class MediapipeToBvh:
    def __init__(self):
        # Initialize MediaPipe Pose
        self.mp_pose = mp.solutions.pose
        self.pose = self.mp_pose.Pose(
            static_image_mode=False,
            model_complexity=2,  # Use the most accurate model
            enable_segmentation=False,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5
        )
        
        # Define joint hierarchy for BVH
        self.joint_hierarchy = {
            'Hips': ['LeftUpLeg', 'RightUpLeg', 'Spine'],
            'LeftUpLeg': ['LeftLeg'],
            'LeftLeg': ['LeftFoot'],
            'LeftFoot': ['LeftToeBase'],
            'LeftToeBase': [],
            'RightUpLeg': ['RightLeg'],
            'RightLeg': ['RightFoot'],
            'RightFoot': ['RightToeBase'],
            'RightToeBase': [],
            'Spine': ['Spine1'],
            'Spine1': ['Spine2'],
            'Spine2': ['Neck', 'LeftShoulder', 'RightShoulder'],
            'Neck': ['Head'],
            'Head': [],
            'LeftShoulder': ['LeftArm'],
            'LeftArm': ['LeftForeArm'],
            'LeftForeArm': ['LeftHand'],
            'LeftHand': [],
            'RightShoulder': ['RightArm'],
            'RightArm': ['RightForeArm'],
            'RightForeArm': ['RightHand'],
            'RightHand': []
        }
        
        # Mapping from MediaPipe landmarks to BVH joints
        self.landmark_to_joint = {
            # Torso and head
            self.mp_pose.PoseLandmark.NOSE.value: 'Head',
            self.mp_pose.PoseLandmark.LEFT_SHOULDER.value: 'LeftShoulder',
            self.mp_pose.PoseLandmark.RIGHT_SHOULDER.value: 'RightShoulder',
            self.mp_pose.PoseLandmark.LEFT_HIP.value: 'LeftUpLeg',
            self.mp_pose.PoseLandmark.RIGHT_HIP.value: 'RightUpLeg',
            
            # Left arm
            self.mp_pose.PoseLandmark.LEFT_ELBOW.value: 'LeftArm',
            self.mp_pose.PoseLandmark.LEFT_WRIST.value: 'LeftForeArm',
            self.mp_pose.PoseLandmark.LEFT_PINKY.value: 'LeftHand',
            
            # Right arm
            self.mp_pose.PoseLandmark.RIGHT_ELBOW.value: 'RightArm',
            self.mp_pose.PoseLandmark.RIGHT_WRIST.value: 'RightForeArm',
            self.mp_pose.PoseLandmark.RIGHT_PINKY.value: 'RightHand',
            
            # Left leg
            self.mp_pose.PoseLandmark.LEFT_KNEE.value: 'LeftLeg',
            self.mp_pose.PoseLandmark.LEFT_ANKLE.value: 'LeftFoot',
            self.mp_pose.PoseLandmark.LEFT_HEEL.value: 'LeftToeBase',
            
            # Right leg
            self.mp_pose.PoseLandmark.RIGHT_KNEE.value: 'RightLeg',
            self.mp_pose.PoseLandmark.RIGHT_ANKLE.value: 'RightFoot',
            self.mp_pose.PoseLandmark.RIGHT_HEEL.value: 'RightToeBase',
        }
        
        # Define additional joints that need to be calculated
        self.midhip_index = "MIDHIP"
        self.neck_index = "NECK"
        self.spine1_index = "SPINE1"
        self.spine2_index = "SPINE2"
        
        # Store motion data for BVH
        self.frames = []
        self.joint_offsets = {}  # Store the initial offsets for each joint
        self.fps = 30  # Default fps

    def extract_frames(self, video_path, output_dir=None):
        """Extract frames from video"""
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)
            
        # Open the video file
        video = cv2.VideoCapture(video_path)
        if not video.isOpened():
            raise ValueError(f"Could not open video file: {video_path}")
        
        # Get video properties
        self.fps = video.get(cv2.CAP_PROP_FPS)
        frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        
        print(f"Processing video: {video_path}")
        print(f"FPS: {self.fps}, Total frames: {frame_count}")
        
        frame_number = 0
        all_landmarks = []
        
        while video.isOpened():
            ret, frame = video.read()
            if not ret:
                break
                
            # Convert color to RGB for MediaPipe
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # Process frame with MediaPipe
            results = self.pose.process(frame_rgb)
            
            if results.pose_world_landmarks:
                # Store landmarks
                landmarks = results.pose_world_landmarks.landmark
                all_landmarks.append([
                    [landmark.x, landmark.y, landmark.z] for landmark in landmarks
                ])
                
                # Save frame if output directory is specified
                if output_dir:
                    # Draw pose landmarks on the frame
                    annotated_frame = frame.copy()
                    mp.solutions.drawing_utils.draw_landmarks(
                        annotated_frame, 
                        results.pose_landmarks, 
                        self.mp_pose.POSE_CONNECTIONS
                    )
                    
                    # Save the frame
                    frame_path = os.path.join(output_dir, f"frame_{frame_number:04d}.jpg")
                    cv2.imwrite(frame_path, annotated_frame)
            
            frame_number += 1
            if frame_number % 100 == 0:
                print(f"Processed {frame_number}/{frame_count} frames")
        
        video.release()
        print(f"Extracted {len(all_landmarks)} valid frames with pose data")
        
        return all_landmarks

    def calculate_additional_joints(self, landmarks):
        """Calculate additional joints not provided by MediaPipe"""
        # Mid hip (root) position - between left and right hip
        left_hip = landmarks[self.mp_pose.PoseLandmark.LEFT_HIP.value]
        right_hip = landmarks[self.mp_pose.PoseLandmark.RIGHT_HIP.value]
        mid_hip = [
            (left_hip[0] + right_hip[0]) / 2,
            (left_hip[1] + right_hip[1]) / 2,
            (left_hip[2] + right_hip[2]) / 2
        ]
        
        # Neck position - between shoulders
        left_shoulder = landmarks[self.mp_pose.PoseLandmark.LEFT_SHOULDER.value]
        right_shoulder = landmarks[self.mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
        neck = [
            (left_shoulder[0] + right_shoulder[0]) / 2,
            (left_shoulder[1] + right_shoulder[1]) / 2,
            (left_shoulder[2] + right_shoulder[2]) / 2
        ]
        
        # Spine1 and Spine2 - interpolated positions between hips and neck
        spine1 = [
            mid_hip[0] + (neck[0] - mid_hip[0]) * 0.33,
            mid_hip[1] + (neck[1] - mid_hip[1]) * 0.33,
            mid_hip[2] + (neck[2] - mid_hip[2]) * 0.33
        ]
        
        spine2 = [
            mid_hip[0] + (neck[0] - mid_hip[0]) * 0.66,
            mid_hip[1] + (neck[1] - mid_hip[1]) * 0.66,
            mid_hip[2] + (neck[2] - mid_hip[2]) * 0.66
        ]
        
        # Add to landmarks
        landmarks_extended = landmarks.copy()
        landmarks_extended.append(mid_hip)  # Add as the last index
        landmarks_extended.append(neck)
        landmarks_extended.append(spine1)
        landmarks_extended.append(spine2)
        
        return landmarks_extended, {
            self.midhip_index: len(landmarks),
            self.neck_index: len(landmarks) + 1,
            self.spine1_index: len(landmarks) + 2,
            self.spine2_index: len(landmarks) + 3
        }

    def process_landmark_frames(self, all_landmarks):
        """Process all landmark frames for BVH conversion"""
        all_extended_landmarks = []
        
        # Process first frame to get joint offsets
        ext_landmarks, ext_indices = self.calculate_additional_joints(all_landmarks[0])
        all_extended_landmarks.append(ext_landmarks)
        
        # Calculate joint offsets from the first frame
        self.calculate_joint_offsets(ext_landmarks, ext_indices)
        
        # Process the rest of the frames
        for i in range(1, len(all_landmarks)):
            ext_landmarks, _ = self.calculate_additional_joints(all_landmarks[i])
            all_extended_landmarks.append(ext_landmarks)
        
        # Convert landmarks to motion data
        self.convert_to_motion_data(all_extended_landmarks, ext_indices)
        
        return all_extended_landmarks

    def calculate_joint_offsets(self, landmarks, ext_indices):
        """Calculate joint offsets for BVH skeleton"""
        # Define the mapping from extended landmarks to joints
        landmark_map = self.landmark_to_joint.copy()
        landmark_map[ext_indices[self.midhip_index]] = 'Hips'
        landmark_map[ext_indices[self.neck_index]] = 'Neck'
        landmark_map[ext_indices[self.spine1_index]] = 'Spine1'
        landmark_map[ext_indices[self.spine2_index]] = 'Spine2'
        
        # Get parent-child relationships
        for joint_name in self.joint_hierarchy:
            parent_idx = None
            parent_pos = None
            
            # Find parent position
            for idx, name in landmark_map.items():
                if name == joint_name:
                    for i, pos in enumerate(landmarks):
                        if i == idx:
                            parent_pos = pos
                            break
                    break
            
            if parent_pos is None:
                # Some joints might not have direct mapping to landmarks
                continue
            
            # Calculate offsets for all children
            for child_name in self.joint_hierarchy[joint_name]:
                child_pos = None
                
                # Find child position
                for idx, name in landmark_map.items():
                    if name == child_name:
                        for i, pos in enumerate(landmarks):
                            if i == idx:
                                child_pos = pos
                                break
                        break
                
                if child_pos is not None:
                    # Calculate offset from parent to child
                    offset = [
                        child_pos[0] - parent_pos[0],
                        child_pos[1] - parent_pos[1],
                        child_pos[2] - parent_pos[2]
                    ]
                    
                    # Scale for better visualization
                    scale = 100  # Adjust scale as needed
                    offset = [x * scale for x in offset]
                    
                    self.joint_offsets[child_name] = offset

    def calculate_joint_rotations(self, joint_name, child_name, parent_pos, child_pos):
        """Calculate joint rotations in XYZ Euler angles"""
        # Vector from parent to child
        direction = [
            child_pos[0] - parent_pos[0],
            child_pos[1] - parent_pos[1],
            child_pos[2] - parent_pos[2]
        ]
        
        # Calculate rotations (simple approach)
        # In a proper implementation, you would use quaternions and full FK/IK
        length = math.sqrt(sum(x*x for x in direction))
        if length < 0.0001:
            return [0, 0, 0]  # Avoid division by zero
            
        # Normalize direction vector
        direction = [x / length for x in direction]
        
        # Calculate Euler angles (approximate)
        # Note: This is a simplified calculation
        roll = math.atan2(direction[1], direction[2]) * 180.0 / math.pi
        pitch = math.atan2(direction[0], math.sqrt(direction[1]**2 + direction[2]**2)) * 180.0 / math.pi
        yaw = math.atan2(direction[0], direction[2]) * 180.0 / math.pi
        
        return [roll, pitch, yaw]

    def convert_to_motion_data(self, all_landmarks, ext_indices):
        """Convert landmarks to motion data for BVH"""
        landmark_map = self.landmark_to_joint.copy()
        landmark_map[ext_indices[self.midhip_index]] = 'Hips'
        landmark_map[ext_indices[self.neck_index]] = 'Neck'
        landmark_map[ext_indices[self.spine1_index]] = 'Spine1'
        landmark_map[ext_indices[self.spine2_index]] = 'Spine2'
        
        inverse_map = {}
        for idx, name in landmark_map.items():
            inverse_map[name] = idx
        
        for frame_idx, landmarks in enumerate(all_landmarks):
            frame_data = {}
            
            # Root position (Hips)
            hips_idx = ext_indices[self.midhip_index]
            hips_pos = landmarks[hips_idx]
            
            # Scale position for better visualization
            scale = 100  # Adjust as needed
            root_pos = [x * scale for x in hips_pos]
            
            # Add root position to frame data
            frame_data['Hips'] = {
                'position': root_pos,
                'rotation': [0, 0, 0]  # Initialize with zero rotation
            }
            
            # Calculate rotations for all joints
            for joint_name, children in self.joint_hierarchy.items():
                if joint_name not in inverse_map:
                    continue
                    
                parent_idx = inverse_map[joint_name]
                parent_pos = landmarks[parent_idx]
                
                for child_name in children:
                    if child_name not in inverse_map:
                        continue
                        
                    child_idx = inverse_map[child_name]
                    child_pos = landmarks[child_idx]
                    
                    # Calculate rotation
                    rotation = self.calculate_joint_rotations(joint_name, child_name, parent_pos, child_pos)
                    
                    # Add to frame data
                    if child_name not in frame_data:
                        frame_data[child_name] = {'rotation': rotation}
            
            self.frames.append(frame_data)

    def create_bvh_skeleton(self):
        """Create BVH skeleton using bvhtoolbox"""
        # Create the root node (Hips)
        root = BvhNode('Hips', [0, 0, 0], 'ROOT')
        nodes = {'Hips': root}
        
        # Build the skeleton recursively
        self._build_skeleton_recursive('Hips', nodes, root)
        
        # Create the BVH tree
        tree = BvhTree()
        tree.root = root
        
        return tree

    def _build_skeleton_recursive(self, joint_name, nodes, parent_node):
        """Recursively build the BVH skeleton"""
        for child_name in self.joint_hierarchy[joint_name]:
            # Get offset for the child joint
            offset = self.joint_offsets.get(child_name, [0, 0, 0])
            
            # Create a new node
            if len(self.joint_hierarchy[child_name]) > 0:
                # This is a joint with children
                node_type = 'JOINT'
            else:
                # This is an end site
                node_type = 'END'
            
            child_node = BvhNode(child_name, offset, node_type)
            parent_node.add_child(child_node)
            nodes[child_name] = child_node
            
            # Recursively add children
            self._build_skeleton_recursive(child_name, nodes, child_node)

    def create_bvh_file(self, output_file):
        """Create and save the BVH file"""
        # Create the skeleton
        tree = self.create_bvh_skeleton()
        
        # Create the motion data
        # For each frame, we need to provide:
        # - Root position (x, y, z)
        # - All joint rotations in the hierarchy (rx, ry, rz)
        motion_data = []
        
        for frame in self.frames:
            frame_motion = []
            
            # Add root position
            root_pos = frame['Hips']['position']
            frame_motion.extend(root_pos)
            
            # Add all joint rotations in the correct order (depth-first traversal)
            self._add_joint_rotations('Hips', frame, frame_motion)
            
            motion_data.append(frame_motion)
        
        # Set the motion data in the BVH tree
        tree.nframes = len(motion_data)
        tree.frame_time = 1.0 / self.fps
        tree.frames = motion_data
        
        # Write the BVH file
        with open(output_file, 'w') as f:
            writer = BvhWriter(f)
            writer.write(tree)
        
        print(f"BVH file saved to: {output_file}")

    def _add_joint_rotations(self, joint_name, frame_data, frame_motion):
        """Add joint rotations to the frame motion data in the correct order"""
        # Add current joint rotation
        if joint_name in frame_data:
            rotation = frame_data[joint_name].get('rotation', [0, 0, 0])
            frame_motion.extend(rotation)
        else:
            # Default to zero rotation if not found
            frame_motion.extend([0, 0, 0])
        
        # Add children rotations
        for child_name in self.joint_hierarchy[joint_name]:
            self._add_joint_rotations(child_name, frame_data, frame_motion)

    def process_video(self, video_path, output_bvh, extract_frames_dir=None):
        """Process a video file and convert to BVH"""
        # Extract frames and pose data
        all_landmarks = self.extract_frames(video_path, extract_frames_dir)
        
        # Process landmarks
        all_extended_landmarks = self.process_landmark_frames(all_landmarks)
        
        # Create BVH file
        self.create_bvh_file(output_bvh)
        
        print(f"Successfully processed video to BVH: {output_bvh}")
        print(f"Total frames in BVH: {len(self.frames)}")

# def main():
#     parser = argparse.ArgumentParser(description='Convert video to BVH animation using MediaPipe')
#     parser.add_argument('video_path', help='Path to the input video file')
#     parser.add_argument('output_bvh', help='Path to the output BVH file')
#     parser.add_argument('--extract-frames', help='Directory to save extracted frames (optional)')
    
#     args = parser.parse_args()
    
#     converter = MediapipeToBvh()
#     converter.process_video(args.video_path, args.output_bvh, args.extract_frames)

# if __name__ == "__main__":
#     main()

ModuleNotFoundError: No module named 'bvhtoolbox.bvh_writer'

In [2]:

converter = VideoToBvhConverter()
filename = "fight2"
converter.process_video(f"videos/{filename}.mp4", f"bvh/{filename}.bvh", 30)

NameError: name 'VideoToBvhConverter' is not defined