## Extract Poses from Amass Dataset

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook
%matplotlib inline

import sys, os
import zipfile
import torch
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm



from human_body_prior.tools.omni_tools import copy2cpu as c2c

os.environ['PYOPENGL_PLATFORM'] = 'egl'

In [2]:
# Choose the device to run the body model on.
comp_device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [3]:
from human_body_prior.body_model.body_model import BodyModel

neutral_bm_path = './body_models/smplx/SMPLX_NEUTRAL_2020.npz'
neutral_bm = BodyModel(bm_fname=neutral_bm_path, num_betas=300, num_expressions=100).to(comp_device)
faces = c2c(neutral_bm.f)

In [4]:
import pathlib

beat2_d = pathlib.Path('beat2_data')
motion_fs = [pathlib.Path(f'{rt}/{f}')
             for rt,ds,fs in os.walk(beat2_d)
             for f in fs
             if f.endswith('.npz')]
motion_fs.sort()
print('num files:',len(motion_fs))

num files: 2048


In [6]:
vis = {3,11,22,28}
def beat2_to_pose(motion_f, joints_f, i_motion):
    bdata = np.load(motion_f, allow_pickle=True)
    assert bdata['mocap_frame_rate'].item() == 30
    assert bdata['model'].item() == 'smplx2020'
    assert bdata['gender'].item() == 'neutral'
    bm = neutral_bm
    T = bdata['poses'].shape[0]
    B = 1024 # batch size
    results = list()
    for i_beg in range(0,T,B):
        bdata_poses = bdata['poses'][i_beg:i_beg+B]
        bdata_trans = bdata['trans'][i_beg:i_beg+B]
        bdata_expression = bdata['expressions'][i_beg:i_beg+B]
        body_parms = {
                'root_orient': torch.Tensor(bdata_poses[:,:3]).to(comp_device),
                'pose_body'  : torch.Tensor(bdata_poses[:,3:66]).to(comp_device),
                'pose_hand'  : torch.Tensor(bdata_poses[:,75:]).to(comp_device),
                'trans'      : torch.Tensor(bdata_trans).to(comp_device),
                'betas'      : torch.Tensor(np.repeat(bdata['betas'][np.newaxis], repeats=len(bdata_trans), axis=0)).to(comp_device),
                'expression' : torch.Tensor(bdata_expression).to(comp_device)
            }    
        with torch.no_grad():
            body = bm(**body_parms)
        pose_seq_np = body.Jtr.detach().cpu().numpy()
        # Unlike AMASS datasets, here the XZ plane is already the ground plane.
        # So no need for `trans_matrix`.
        results.append(pose_seq_np)

        ############################################################################
        global vis
        if i_motion in vis and i_beg == 0:
            import trimesh
            vertices_all = body.v.detach().cpu().numpy()
            faces = body.f.detach().cpu().numpy()
            src_f = pathlib.Path(motion_f).relative_to('beat2_data')
            ply_d = (pathlib.Path('/vision/vision_data_2/VGGSound_shards_fixed/shrinidhi/meshes_beat2_pose_data') / 
                    src_f.parent / src_f.stem)
            ply_d.mkdir(parents=True,exist_ok=True)
            for f,vertices in enumerate(vertices_all):
                obj_f = ply_d / f'{f+i_beg:05}.ply'
                mesh = trimesh.Trimesh(vertices=vertices,
                                    faces=faces,
                                    process=False)
                mesh.export(obj_f)
            print(ply_d)
        ############################################################################
    results = np.concatenate(results,axis=0)
    assert results.shape[0] == T
    # np.save(joints_f,results)

In [7]:
pose_data_d = pathlib.Path('pose_data_beat2')
bad_zip_files = list()
for i_motion,motion_f in enumerate(tqdm(motion_fs,desc='beat2-to-pose',ncols=150)):
    joints_f = pose_data_d / motion_f.relative_to(beat2_d).with_suffix('.npy')
    # if joints_f.is_file():
    #     joints_f.unlink()
    # joints_f.parent.mkdir(parents=True,exist_ok=True)
    try:
        beat2_to_pose(motion_f,joints_f,i_motion)
    except zipfile.BadZipFile:
        bad_zip_files.append(motion_f)
print('bad zip files:',len(bad_zip_files))

beat2-to-pose:   0%|▏                                                                                              | 4/2048 [00:11<2:12:40,  3.89s/it]

/vision/vision_data_2/VGGSound_shards_fixed/shrinidhi/meshes_beat2_pose_data/beat_chinese_v2.0.0/smplxflame_30/12_zhao_2_103_103


beat2-to-pose:   1%|▌                                                                                             | 12/2048 [00:20<1:15:40,  2.23s/it]

/vision/vision_data_2/VGGSound_shards_fixed/shrinidhi/meshes_beat2_pose_data/beat_chinese_v2.0.0/smplxflame_30/12_zhao_2_111_111


beat2-to-pose:   1%|█                                                                                             | 23/2048 [00:31<1:37:12,  2.88s/it]

/vision/vision_data_2/VGGSound_shards_fixed/shrinidhi/meshes_beat2_pose_data/beat_chinese_v2.0.0/smplxflame_30/12_zhao_2_1_1


beat2-to-pose:   1%|█▍                                                                                            | 30/2048 [00:41<1:13:57,  2.20s/it]

/vision/vision_data_2/VGGSound_shards_fixed/shrinidhi/meshes_beat2_pose_data/beat_chinese_v2.0.0/smplxflame_30/12_zhao_2_2_2


beat2-to-pose: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 2048/2048 [09:51<00:00,  3.46it/s]


bad zip files: 0


## Segment, Mirror and Relocate Motions

In [7]:
from collections import defaultdict
import csv
import os
import pathlib

from tqdm import tqdm
import numpy as np

bm_params_f = pathlib.Path('body_models/smplx/SMPLX_NEUTRAL_2020.npz')
index_f = pathlib.Path('index.csv')
pose_data_d  = pathlib.Path('pose_data_beat2')
joints_d = pathlib.Path('joints_beat2')

Find the corresponding left/right joints from model npy file. We will mirror left/right joints to augment data.

In [8]:
bm_params = np.load(bm_params_f,allow_pickle=True)
joint2ind = bm_params['joint2num'].item()
ind2joint = {v:k
             for k,v in joint2ind.items()}
l_joints,r_joints = list(),list()
for j in joint2ind:
    if j.startswith('L_'):
        l_j = j
        r_j = j.replace('L_','R_')
        l_joints.append(joint2ind[l_j])
        r_joints.append(joint2ind[r_j])
joints_to_drop = [joint2ind['Jaw'],
                  joint2ind['L_Eye'],
                  joint2ind['R_Eye']]

print('num joints to swap:',len(l_joints))
print('left joints:',l_joints)
print('right joints:',r_joints)
print('joints to drop:',joints_to_drop)
for l,r in sorted(zip(l_joints,r_joints)):
    print(f'{ind2joint[l]:10} ({l:2}) <--> ({r:2}) {ind2joint[r]}')

num joints to swap: 24
left joints: [30, 23, 13, 34, 35, 36, 39, 38, 37, 27, 26, 25, 10, 4, 18, 31, 32, 33, 28, 29, 20, 16, 1, 7]
right joints: [45, 24, 14, 49, 50, 51, 54, 53, 52, 42, 41, 40, 11, 5, 19, 46, 47, 48, 43, 44, 21, 17, 2, 8]
joints to drop: [22, 23, 24]
L_Hip      ( 1) <--> ( 2) R_Hip
L_Knee     ( 4) <--> ( 5) R_Knee
L_Ankle    ( 7) <--> ( 8) R_Ankle
L_Foot     (10) <--> (11) R_Foot
L_Collar   (13) <--> (14) R_Collar
L_Shoulder (16) <--> (17) R_Shoulder
L_Elbow    (18) <--> (19) R_Elbow
L_Wrist    (20) <--> (21) R_Wrist
L_Eye      (23) <--> (24) R_Eye
L_Index1   (25) <--> (40) R_Index1
L_Index2   (26) <--> (41) R_Index2
L_Index3   (27) <--> (42) R_Index3
L_Middle1  (28) <--> (43) R_Middle1
L_Middle2  (29) <--> (44) R_Middle2
L_Middle3  (30) <--> (45) R_Middle3
L_Pinky1   (31) <--> (46) R_Pinky1
L_Pinky2   (32) <--> (47) R_Pinky2
L_Pinky3   (33) <--> (48) R_Pinky3
L_Ring1    (34) <--> (49) R_Ring1
L_Ring2    (35) <--> (50) R_Ring2
L_Ring3    (36) <--> (51) R_Ring3
L_Thumb1 

Mirror each file and split to max sequence length of 200 each because MotionGPT uses these limits.  
Following code is explained in [this issue](https://github.com/EricGuo5513/HumanML3D/issues/20).
```
data_m[...,0] *= -1
```

In [11]:
data_fs = [pathlib.Path(f'{rt}/{f}')
           for rt,ds,fs in os.walk(pose_data_d)
           for f in fs
           if f.endswith('.npy')]
data_fs.sort()
max_seq_len = 200
pose_data_to_joints_map = ['file,id']
pbar = tqdm(data_fs,desc='mirror & split files',ncols=150)
for i,f in enumerate(pbar):
    data = np.load(f)
    data_m = data.copy()
    data_m[:,l_joints] = data[:,r_joints]
    data_m[:,r_joints] = data[:,l_joints]
    # Unlike AMASS datasets, we have flip the mirrored version.
    data_m[...,0] *= -1
    data = np.delete(data,joints_to_drop,axis=1)
    data_m = np.delete(data_m,joints_to_drop,axis=1)
    for j,beg in enumerate(range(0,data.shape[0],max_seq_len)):
        id = f'{i:06}_{j:03}'
        id_m = f'M{i:06}_{j:03}'
        out_f = joints_d / f'{id}.npy'
        out_m_f = joints_d / f'{id_m}.npy'
        if out_f.is_file():
            out_f.unlink()
        if out_m_f.is_file():
            out_m_f.unlink()
        np.save(out_f,data[beg:beg+max_seq_len])
        np.save(out_m_f,data_m[beg:beg+max_seq_len])
        pose_data_to_joints_map.append(f'{f},{id}')
        pbar.set_postfix({'samples':2*len(pose_data_to_joints_map)})
_ = open('pose_data_to_joints_map_beat2.txt','w').write('\n'.join(pose_data_to_joints_map) + '\n')
pbar.close()

mirror & split files:   0%|                                                                                       | 0/2048 [00:00<?, ?it/s, samples=6]

mirror & split files: 100%|████████████████████████████████████████████████████████████████████████| 2048/2048 [08:08<00:00,  4.19it/s, samples=73548]


Write fake text files for each example. Copy a single text file from AMASS dataset to all the examples.

In [13]:
!git checkout main -- HumanML3D/texts.zip
!rm -rf HumanML3D/texts
!unzip -q HumanML3D/texts.zip -d HumanML3D
!rm HumanML3D/texts.zip
!mv HumanML3D/texts HumanML3D/texts_orig


EnvironmentNameNotFound: Could not find conda environment: avjoint
You can list all discoverable environments with `conda info --envs`.



EnvironmentNameNotFound: Could not find conda environment: avjoint
You can list all discoverable environments with `conda info --envs`.



EnvironmentNameNotFound: Could not find conda environment: avjoint
You can list all discoverable environments with `conda info --envs`.



EnvironmentNameNotFound: Could not find conda environment: avjoint
You can list all discoverable environments with `conda info --envs`.



EnvironmentNameNotFound: Could not find conda environment: avjoint
You can list all discoverable environments with `conda info --envs`.




In [15]:
import shutil

src_f = pathlib.Path('HumanML3D/texts_orig/000000.txt')
texts_d = pathlib.Path('HumanML3D_beat2/texts')
texts_d.mkdir(parents=True,exist_ok=True)
data_fs = list(joints_d.iterdir())
data_fs.sort()

for f in tqdm(data_fs,'texts',ncols=150):
    txt_f = f.with_suffix('.txt').name
    txt_f = texts_d / txt_f
    shutil.copyfile(src_f,txt_f)

texts: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 73546/73546 [04:03<00:00, 302.38it/s]


In [16]:
!rm -r HumanML3D/texts_orig


EnvironmentNameNotFound: Could not find conda environment: avjoint
You can list all discoverable environments with `conda info --envs`.


