## Extract Poses from Amass Dataset

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook
%matplotlib inline

import sys, os
import zipfile
import torch
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm



from human_body_prior.tools.omni_tools import copy2cpu as c2c

os.environ['PYOPENGL_PLATFORM'] = 'egl'

### Please remember to download the following subdataset from AMASS website: https://amass.is.tue.mpg.de/download.php. Note only download the <u>SMPL+H G</u> data.
* ACCD (ACCD)
* HDM05 (MPI_HDM05)
* TCDHands (TCD_handMocap)
* SFU (SFU)
* BMLmovi (BMLmovi)
* CMU (CMU)
* Mosh (MPI_mosh)
* EKUT (EKUT)
* KIT  (KIT)
* Eyes_Janpan_Dataset (Eyes_Janpan_Dataset)
* BMLhandball (BMLhandball)
* Transitions (Transitions_mocap)
* PosePrior (MPI_Limits)
* HumanEva (HumanEva)
* SSM (SSM_synced)
* DFaust (DFaust_67)
* TotalCapture (TotalCapture)
* BMLrub (BioMotionLab_NTroje)

### Unzip all datasets. In the bracket we give the name of the unzipped file folder. Please correct yours to the given names if they are not the same.

### Place all files under the directory **./amass_data/**. The directory structure shoud look like the following:  
./amass_data/  
./amass_data/ACCAD/  
./amass_data/BioMotionLab_NTroje/  
./amass_data/BMLhandball/  
./amass_data/BMLmovi/   
./amass_data/CMU/  
./amass_data/DFaust_67/  
./amass_data/EKUT/  
./amass_data/Eyes_Japan_Dataset/  
./amass_data/HumanEva/  
./amass_data/KIT/  
./amass_data/MPI_HDM05/  
./amass_data/MPI_Limits/  
./amass_data/MPI_mosh/  
./amass_data/SFU/  
./amass_data/SSM_synced/  
./amass_data/TCD_handMocap/  
./amass_data/TotalCapture/  
./amass_data/Transitions_mocap/  

**Please make sure the file path are correct, otherwise it can not succeed.**

In [None]:
# Choose the device to run the body model on.
comp_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
from human_body_prior.body_model.body_model import BodyModel

neutral_bm_path = 'body_models/smplx/SMPLX_NEUTRAL_2020.npz'
num_betas = 16
neutral_bm = BodyModel(bm_fname=neutral_bm_path, num_betas=num_betas, num_expressions=0).to(comp_device)
faces = c2c(neutral_bm.f)

In [None]:
import pathlib
from collections import defaultdict
from rich import print

amass_data_d = pathlib.Path('./amass_data/')

data_fs = [pathlib.Path(f'{rt}/{f}')
           for rt,ds,fs in os.walk(amass_data_d,followlinks=True)
           for f in fs
           if f.endswith('.npz')]

new_data_fs = list()
files_not_readable = list()
files_model_type_not_usable = list()
files_mocap_frame_rate_key_missing = list()
files_frame_rate_not_multiple = list()
all_model_types = defaultdict(int)
all_fps = defaultdict(int)
for f in tqdm(data_fs,ncols=150):
    try:
        data = np.load(f,allow_pickle=True)
    except zipfile.BadZipFile:
        files_not_readable.append(f)
        continue
    all_model_types[data['surface_model_type'].item()] += 1
    if data['surface_model_type'].item() not in {'smplx','smplx_locked_head'}:
        files_model_type_not_usable.append(f)
        continue
    if 'mocap_frame_rate' not in data:
        files_mocap_frame_rate_key_missing.append(f)
        continue
    fps = int(data['mocap_frame_rate'].item())
    all_fps[fps] += 1
    if fps % 30 != 0:
        files_frame_rate_not_multiple.append(f)
        continue
    new_data_fs.append(f)
print('total files:',len(data_fs))
print('usable_files:',len(new_data_fs))
print('files not readable:',len(files_not_readable))
print('files model type not usable:',len(files_model_type_not_usable))
print('files missing mocap_frame_rate:',len(files_mocap_frame_rate_key_missing))
print('files with frame rante not multiple of 30fps:',len(files_frame_rate_not_multiple))
print('model types:',*sorted(all_model_types.items()))
print('mocap frame rate:',*sorted(all_fps.items()))
assert len(data_fs) == len(new_data_fs) + len(files_not_readable) + len(files_model_type_not_usable) + len(files_mocap_frame_rate_key_missing) + len(files_frame_rate_not_multiple)
data_fs = new_data_fs

In [None]:
trans_matrix = np.array([[1.0, 0.0, 0.0],
                         [0.0, 0.0, 1.0],
                         [0.0, 1.0, 0.0]])
ex_fps = 30
def amass_to_pose(src_path, save_path):
    bdata = np.load(src_path, allow_pickle=True)
    fps = bdata['mocap_frame_rate']
    assert int(fps) % ex_fps == 0
    assert bdata['surface_model_type'].item() == 'smplx'
    assert bdata['gender'] == 'neutral'
    bm = neutral_bm
    down_sample = int(fps / ex_fps)
    bdata_poses = bdata['poses'][::down_sample,...]
    bdata_trans = bdata['trans'][::down_sample,...]
    body_parms = {
            'root_orient': torch.Tensor(bdata_poses[:, :3]).to(comp_device),
            'pose_body': torch.Tensor(bdata_poses[:, 3:66]).to(comp_device),
            'pose_hand': torch.Tensor(bdata_poses[:, 75:]).to(comp_device),
            'trans': torch.Tensor(bdata_trans).to(comp_device),
            'betas': torch.Tensor(np.repeat(bdata['betas'][:num_betas][np.newaxis], repeats=len(bdata_trans), axis=0)).to(comp_device),
        }
    with torch.no_grad():
        body = bm(**body_parms)
    pose_seq_np = body.Jtr.detach().cpu().numpy()
    pose_seq_np_n = np.dot(pose_seq_np, trans_matrix)    
    np.save(save_path, pose_seq_np_n)
    return

In [None]:
pose_data_d = pathlib.Path('pose_data')
for f in tqdm(data_fs,ncols=150):
    out_f = pose_data_d / f.relative_to(amass_data_d).with_suffix('.npy')
    if out_f.is_file():
        out_f.unlink()
    amass_to_pose(f,out_f)

In [None]:
!find pose_data/ -iname '*.npy' | wc -l

## Segment, Mirror and Relocate Motions

In [None]:
from collections import defaultdict
import csv
import os
import pathlib

from tqdm import tqdm
import numpy as np

bm_params_f = pathlib.Path('./body_models/smplx/SMPLX_NEUTRAL_2020.npz')
index_f = pathlib.Path('./index.csv')
pose_data_d  = pathlib.Path('./pose_data')
joints_d = pathlib.Path('./joints')
fps = 20

Find the corresponding left/right joints from model npy file. We will mirror left/right joints to augment data.

In [None]:
bm_params = np.load(bm_params_f,allow_pickle=True)
joint2ind = bm_params['joint2num'].item()
ind2joint = {v:k
             for k,v in joint2ind.items()}
l_joints,r_joints = list(),list()
for j in joint2ind:
    if j.startswith('L_'):
        l_j = j
        r_j = j.replace('L_','R_')
        l_joints.append(joint2ind[l_j])
        r_joints.append(joint2ind[r_j])
# print('num joints to swap:',len(l_joints))
# for l,r in sorted(zip(l_joints,r_joints)):
#     print(ind2joint[l],ind2joint[r])
joints_to_drop = [joint2ind['Jaw'],
                  joint2ind['L_Eye'],
                  joint2ind['R_Eye']]

To sample frames according to HumanML3D, create the dictionary of files to sample, their start/end frames, their original ids.

In [None]:
to_sample = defaultdict(list)
n_dropped = 0
n_missing = 0
for row in csv.DictReader(open(index_f)):
    data_f = row['source_path']
    # Not using `humanact12` dataset. Discard those entries in val set.
    if 'humanact12' in data_f:
        n_dropped += 1
        continue
    # SMPL-X version of AMASS is missing the following. Ignore them.
    if ('BMLhandball' in data_f or
        'DanceDB' in data_f or
        'HUMAN4D' in data_f or 
        'CMU/22_23_Rory' in data_f or
        'CMU/18_19_rory' in data_f or
        'CMU/18_19_Justin' in data_f or
        'CMU/20_21_rory1' in data_f or
        'CMU/20_21_Justin1' in data_f or
        'CMU/22_23_justin' in data_f):
        n_dropped += 1
        continue
    # SMPL-X version of AMASS has renamed many files. Map the old names to new.
    data_f = (data_f
              .replace('/BioMotionLab_NTroje/','/BMLrub/')
              .replace('/DFaust_67/','/DFaust/')
              .replace('/MPI_mosh/','/MoSh/')
              .replace('/MPI_HDM05/','/HDM05/')
              .replace('/MPI_Limits/','/PosePrior/')
              .replace('/SSM_synced/','/SSM/')
              .replace('/TCD_handMocap/','/TCDHands/')
              .replace('/Transitions_mocap/','/Transitions/')
              .replace('.npz','')
              .replace('.npy','')
              .replace('_poses','')
              .replace(' ','_'))
    data_f = pathlib.Path(data_f)
    data_d = data_f.parent
    assert data_d.is_dir(),data_d
    for f in data_d.iterdir():
        if f.name.startswith(data_f.name):
            to_sample[f].append((int(row['start_frame']),
                                 int(row['end_frame']),
                                 row['new_name'].replace('.npy','')))
            break
    else:
        # assert False
        n_missing += 1

Many files need to be sampled multiple times because there are multiple entries in `index.csv` for those files.

In [None]:
print('samples dropped:',n_dropped)
print('samples missing:',n_missing)
print('files to sample:',len(to_sample))
print('files with >1 sample:', 
      sum(1
          for v in to_sample.values()
          if len(v) > 1))
print('total samples:',
      sum(len(v)
          for v in to_sample.values()))

Gather all file names.

In [None]:
data_fs = [pathlib.Path(f'{rt}/{f}')
           for rt,ds,fs in os.walk(pose_data_d,followlinks=True)
           for f in fs
           if f.endswith('.npy')]
data_fs.sort()
print('num files:',len(data_fs))

Sample each file as per HumanML3D, create a mirrored version of the sample, save both.

In [None]:
n_dropped = 0
pose_data_to_joints_map = ['file,new_id,orig_id']
pbar = tqdm(data_fs,desc='mirroring/pruning files',ncols=150)
for i,f in enumerate(pbar):
    if f not in to_sample:
        n_dropped += 1
        continue
    for j,(i_beg,i_end,orig_id) in enumerate(sorted(to_sample[f])):
        id = f'{i:06}_{j:02}'
        id_m = f'M{i:06}_{j:02}'
        out_f = joints_d / f'{id}.npy'
        out_m_f = joints_d / f'{id_m}.npy'
        data = np.load(f)
        if 'humanact12' not in str(f):
            if 'Eyes_Japan_Dataset' in str(f):
                data = data[3*fps:]
            if 'HDM05' in str(f):
                data = data[3*fps:]
            if 'TotalCapture' in str(f):
                data = data[1*fps:]
            if 'PosePrior' in str(f):
                data = data[1*fps:]
            if 'Transitions' in str(f):
                data = data[int(0.5*fps):]
        data = data[i_beg:i_end]
        if out_f.is_file() and out_m_f.is_file():
            pose_data_to_joints_map.append(f'{f},{id},{orig_id}')    
            continue
        data[...,0] *= -1
        data_m = data.copy()
        data_m[:,l_joints] = data[:,r_joints]
        data_m[:,r_joints] = data[:,l_joints]
        data = np.delete(data,joints_to_drop,axis=1)
        data_m = np.delete(data_m,joints_to_drop,axis=1)
        np.save(out_f,data)
        np.save(out_m_f,data_m)
        pose_data_to_joints_map.append(f'{f},{id},{orig_id}')
        pbar.set_postfix({'samples':2*len(pose_data_to_joints_map),
                          'dropped':n_dropped})
_ = open('pose_data_to_joints_map.txt','w').write('\n'.join(pose_data_to_joints_map) + '\n')
pbar.close()

Checkout original train/val/test sets and texts. Just to make sure we have the original versions.

In [None]:
!git checkout main -- HumanML3D/train.txt HumanML3D/val.txt HumanML3D/test.txt HumanML3D/texts.zip
!rm -rf HumanML3D/train_orig.txt HumanML3D/val_orig.txt HumanML3D/test_orig.txt HumanML3D/texts_orig HumanML3D/texts

In [None]:
import shutil

_ = shutil.move('HumanML3D/train.txt','HumanML3D/train_orig.txt')
_ = shutil.move('HumanML3D/val.txt','HumanML3D/val_orig.txt')
_ = shutil.move('HumanML3D/test.txt','HumanML3D/test_orig.txt')

Make new test set from original test set with missing samples removed. SMPL-X version of AMASS doesn't seem to have all the original samples from SMPL-H version.

In [None]:
orig_test = [l.strip()
             for l in open('./HumanML3D/test_orig.txt')]
print('orig test set:',len(orig_test))

pose_data_to_joints_map_f = pathlib.Path('pose_data_to_joints_map.txt')
orig_id_to_new_id = {row['orig_id']:row['new_id']
                     for row in csv.DictReader(open(pose_data_to_joints_map_f))}

index_f = pathlib.Path('./index.csv')
orig_id_to_orig_file = {row['new_name'].replace('.npy',''):row['source_path']
                        for row in csv.DictReader(open(index_f))}

new_test = list()
for orig_id in orig_test:
    is_m_id = orig_id.startswith('M')
    if is_m_id:
        orig_m_id = orig_id
        orig_id = orig_m_id[1:]
    else:
        orig_m_id = f'M{orig_id}'
    data_f = orig_id_to_orig_file[orig_id]
    # Not using `humanact12` dataset. Discard those entries in val set.
    if 'humanact12' in data_f:
        continue
    # SMPL-X version of AMASS is missing the following. Ignore them.
    if ('BMLhandball' in data_f or
        'DanceDB' in data_f or
        'HUMAN4D' in data_f or 
        'CMU/22_23_Rory' in data_f or
        'CMU/18_19_rory' in data_f or
        'CMU/18_19_Justin' in data_f or
        'CMU/20_21_rory1' in data_f or
        'CMU/20_21_Justin1' in data_f or
        'CMU/22_23_justin' in data_f):
        continue
    if orig_id not in orig_id_to_new_id:
        continue
    new_test.append(orig_id_to_new_id[orig_id])
open('HumanML3D/test.txt','w').write('\n'.join(sorted(new_test))+'\n')
print('new test set:',len(new_test))

Use the remaining samples as the train set.

In [None]:
orig_train = [l.strip()
             for l in open('./HumanML3D/train_orig.txt')]
print('orig train set:',len(orig_train))

new_test = set(new_test)
data_d = pathlib.Path('./joints')
new_train = list()
for f in tqdm(list(sorted(data_d.iterdir())),desc='sample',ncols=150):
    id = f.with_suffix('').name
    if id not in new_test:
        new_train.append(id)
_ = open('HumanML3D/train.txt','w').write('\n'.join(sorted(new_train)) + '\n')
print('new train set:',len(new_train))

Find the corresponding text file for each example in the new train/test set.

In [None]:
!unzip -q HumanML3D/texts.zip -d HumanML3D

In [None]:
import shutil
import csv

shutil.move('HumanML3D/texts','HumanML3D/texts_orig')

texts_orig_d = pathlib.Path('HumanML3D/texts_orig')
texts_d = pathlib.Path('HumanML3D/texts')
texts_d.mkdir(parents=True,exist_ok=True)

pose_data_to_joints_map_f = pathlib.Path('pose_data_to_joints_map.txt')
new_id_to_orig_id = {row['new_id']:row['orig_id']
                     for row in csv.DictReader(open(pose_data_to_joints_map_f))}
for f in tqdm(list(sorted(data_d.iterdir())),desc='sample',ncols=150):
    id = f.with_suffix('').name
    text_f = texts_d / f'{id}.txt'
    if id.startswith('M'):
        id = id[1:]
    orig_id = new_id_to_orig_id[id]
    text_orig_f = texts_orig_d / f'{orig_id}.txt'
    shutil.copyfile(text_orig_f,text_f)

shutil.rmtree('HumanML3D/texts_orig')

In [1]:
from itertools import chain
import pathlib
import shutil
from tqdm import tqdm

texts_1_d = pathlib.Path('/vision/changan/shrinidhi/data2/HumanML3D_amass30_smplx_2020/texts')
texts_2_d = pathlib.Path('/vision/changan/shrinidhi/data2/HumanML3D_beat2_smplx_2020/texts')
out_d = pathlib.Path('/vision/changan/shrinidhi/data2/HumanML3D_amass30_beat2_smplx_2020/texts')

for f in tqdm(list(chain(texts_1_d.iterdir(),texts_2_d.iterdir())),ncols=150):
    shutil.copyfile(f, out_d / f.name)

100%|██████████| 75090/75090 [04:51<00:00, 257.23it/s]
