In [1]:
import json
from pathlib import Path

with open("motion_mapping.json", "r") as f:
    mocap_data = json.load(f)



In [2]:

# Define keyword lists
walk_keywords = ['walk', 'walking', 'wander', 'stride', 'pacing', 'march', 'limp', 'mope']
run_keywords = ['run', 'running', 'jog', 'jogging', 'scramble']
jump_keywords = ['jump', 'jumping', 'hop', 'hopping', 'leap', 'jete', 'skip', 'bound']

# Initialize datasets
walk_dataset = {}
run_dataset = {}
jump_dataset = {}

for description, ids in mocap_data.items():
    desc_lower = description.lower()
    
    # Check for Walk
    if any(k in desc_lower for k in walk_keywords):
        walk_dataset[description] = ids
        
    # Check for Run
    if any(k in desc_lower for k in run_keywords):
        run_dataset[description] = ids
        
    # Check for Jump
    if any(k in desc_lower for k in jump_keywords):
        jump_dataset[description] = ids

# Example output
print(f"Walk samples: {len(walk_dataset)}")
print(f"Run samples: {len(run_dataset)}")
print(f"Jump samples: {len(jump_dataset)}")

Walk samples: 258
Run samples: 75
Jump samples: 102


In [3]:
import os
import csv
import numpy as np
from typing import Optional, List
from bvh import Bvh

def convert(
    bvh_path: str,
    out_csv_path: Optional[str] = None,
    *,
    drop_tpose_frame0: bool = True,      # CMU/cgspeed release adds a T-pose at frame 0
    downsample: int = 1,                 # e.g., 4 keeps every 4th frame
    rotations_to_radians: bool = False,  # BVH rotations are in degrees
    dtype=np.float32
) -> str:
    """
    Load a BVH file, apply basic preprocessing, and save a CSV.

    Preprocessing included:
      - Optional removal of frame 0 (T-pose) commonly added by the CMU/cgspeed conversion.
      - Optional temporal downsampling.
      - Optional degree->radian conversion for rotation channels (keeps positions unchanged).

    The CSV columns are ordered exactly as BVH channels appear (HIERARCHY traversal order).
    Column names are generated as: "{JointName}_{ChannelName}" (e.g., "Hips_Xposition").
    """

    # --- Parse BVH ---
    with open(bvh_path, "r", encoding="utf-8", errors="ignore") as f:
        mocap = Bvh(f.read())

    # Frames: list[list[str]] where each inner list is all channel values for that frame
    frames = mocap.frames
    if frames is None or len(frames) == 0:
        raise ValueError(f"No MOTION frames found in: {bvh_path}")

    data = np.asarray(frames, dtype=dtype)  # shape: (T, D)

    # --- Build column names (channel order must match mocap.frames order) ---
    # The 'bvh' library keeps joints in file order, so we reproduce that:
    joint_names = mocap.get_joints_names()

    col_names: List[str] = []
    for j in joint_names:
        chans = mocap.joint_channels(j)  # e.g., ['Xposition','Yposition','Zposition','Zrotation','Yrotation','Xrotation']
        for ch in chans:
            col_names.append(f"{j}_{ch}")

    # Sanity check: columns must match data width
    if len(col_names) != data.shape[1]:
        # Fallback: still save, but with generic names if mismatch occurs
        col_names = [f"dim_{i}" for i in range(data.shape[1])]

    # --- Preprocess ---
    # 1) Drop frame 0 T-pose if desired (CMU/cgspeed: "T pose is in frame 0")
    if drop_tpose_frame0 and data.shape[0] > 1:
        data = data[1:, :]

    # 2) Downsample
    if downsample is not None and int(downsample) > 1:
        ds = int(downsample)
        data = data[::ds, :]

    # 3) Convert rotation channels to radians (positions unchanged)
    if rotations_to_radians:
        rot_mask = np.array([name.endswith("_Xrotation") or
                             name.endswith("_Yrotation") or
                             name.endswith("_Zrotation") for name in col_names], dtype=bool)
        data[:, rot_mask] = np.deg2rad(data[:, rot_mask])

    # --- Save CSV ---
    os.makedirs(os.path.dirname(out_csv_path) or ".", exist_ok=True)

    with open(out_csv_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(col_names)
        writer.writerows(data.tolist())

    return out_csv_path


In [4]:
# Save dataset as .csv in new directory 'walk', 'run', 'jump'
import csv
from tqdm import tqdm

for key, file_ids in tqdm(walk_dataset.items(), desc="Converting walking"):
    for file_id in file_ids:

        dir_number = file_id.split('_')[0]
        
        if len(dir_number) == 1:
            dir_number = f"00{dir_number}"
        elif len(dir_number) == 2:
            dir_number = f"0{dir_number}"
        
        bvh_file = f"data/{dir_number}/{file_id}.bvh"
        out_csv_path = f"walk/{file_id}.csv"
        convert(str(bvh_file), out_csv_path=out_csv_path, downsample=4, drop_tpose_frame0=True, rotations_to_radians=False)

Converting walking: 100%|██████████| 258/258 [01:20<00:00,  3.19it/s]


In [5]:
# Save dataset as .csv in new directory 'walk', 'run', 'jump'
import csv
from tqdm import tqdm

for key, file_ids in tqdm(run_dataset.items(), desc="Converting walking"):
    for file_id in file_ids:

        dir_number = file_id.split('_')[0]
        
        if len(dir_number) == 1:
            dir_number = f"00{dir_number}"
        elif len(dir_number) == 2:
            dir_number = f"0{dir_number}"
        
        bvh_file = f"data/{dir_number}/{file_id}.bvh"
        out_csv_path = f"run/{file_id}.csv"
        convert(str(bvh_file), out_csv_path=out_csv_path, downsample=4, drop_tpose_frame0=True, rotations_to_radians=False)

Converting walking: 100%|██████████| 75/75 [00:09<00:00,  7.59it/s]


In [6]:
# Save dataset as .csv in new directory 'walk', 'run', 'jump'
import csv
from tqdm import tqdm

for key, file_ids in tqdm(jump_dataset.items(), desc="Converting walking"):
    for file_id in file_ids:

        dir_number = file_id.split('_')[0]
        
        if len(dir_number) == 1:
            dir_number = f"00{dir_number}"
        elif len(dir_number) == 2:
            dir_number = f"0{dir_number}"
        
        bvh_file = f"data/{dir_number}/{file_id}.bvh"
        out_csv_path = f"jump/{file_id}.csv"
        convert(str(bvh_file), out_csv_path=out_csv_path, downsample=4, drop_tpose_frame0=True, rotations_to_radians=False)

Converting walking: 100%|██████████| 102/102 [00:20<00:00,  5.09it/s]


In [8]:
import json

# Combine datasets into one dictionary
combined_data = {
    "walk": walk_dataset,
    "run": run_dataset,
    "jump": jump_dataset
}

# Save to a JSON file
output_path = "walk_run_jump_datasets.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(combined_data, f, indent=4, ensure_ascii=False)

print(f"Datasets saved to {output_path}")

Datasets saved to walk_run_jump_datasets.json
