In [23]:
import sys
import numpy as np
import torch
import os, glob
import smplx
import json

In [24]:
from GRAB.tools.objectmodel import ObjectModel
from GRAB.tools.utils import parse_npz, prepare_params, params2torch, to_cpu, append2dict
from GRAB.tools.meshviewer import Mesh
import trimesh

from manopth.manolayer import ManoLayer

In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data input dirs
root_dir = '/media/erik/DATA/grab'
grab_dir = os.path.abspath(f'{root_dir}/grab_extracted/grab')
grab_amass_dir = os.path.abspath(f'{root_dir}/grab_amass')
model_path = os.path.abspath('./body_models/')

# out dir for reorganized SMPL and object data for the HumanML3D pipeline
out_dir = os.path.abspath(f'{root_dir}/grab_preprocessed')

# texts
text_annotation_path = os.path.abspath('./grab_annotations.csv')
text_annotation_out_dir = os.path.abspath('/media/erik/DATA/texts/')

# objects
object_mesh_dir = os.path.abspath(f'{root_dir}/grab_extracted/tools/object_meshes/contact_meshes')
object_mesh_out_dir = os.path.abspath('/media/erik/DATA/object_mesh')
object_sample_out_dir = os.path.abspath('/media/erik/DATA/object_sample')

# hand output directory
hand_out_dir = os.path.abspath('/media/erik/DATA/hands')


os.makedirs(out_dir, exist_ok=True)
os.makedirs(text_annotation_out_dir, exist_ok=True)
os.makedirs(object_mesh_out_dir, exist_ok=True)
os.makedirs(object_sample_out_dir, exist_ok=True)
os.makedirs(hand_out_dir, exist_ok=True)

print('Using device:', device)
print('Data directory:', grab_dir)
print('AMASS directory:', grab_amass_dir)
print('Output directory:', out_dir)
print('SMPLX/MANO Model directory:', model_path)
print('Text annotation file:', text_annotation_path)
print('Text annotation output directory:', text_annotation_out_dir)
print('Object mesh directory:', object_mesh_dir)
print('Object mesh output directory:', object_mesh_out_dir)
print('Object sample output directory:', object_sample_out_dir)
print('Hand output directory:', hand_out_dir)

Using device: cuda
Data directory: /media/erik/DATA/grab/grab_extracted/grab
AMASS directory: /media/erik/DATA/grab/grab_amass
Output directory: /media/erik/DATA/grab/grab_preprocessed
SMPLX/MANO Model directory: /home/erik/ethz/digital-humans/dex-hoi/data_preprocessing/grab_preprocessing/body_models
Text annotation file: /home/erik/ethz/digital-humans/dex-hoi/data_preprocessing/grab_preprocessing/grab_annotations.csv
Text annotation output directory: /media/erik/DATA/texts
Object mesh directory: /media/erik/DATA/grab/grab_extracted/tools/object_meshes/contact_meshes
Object mesh output directory: /media/erik/DATA/object_mesh
Object sample output directory: /media/erik/DATA/object_sample
Hand output directory: /media/erik/DATA/hands


In [26]:
# map directory names to lists of files in them
data_dict = {}

for root, dirs, files in os.walk(grab_dir):
    if len(files) > 0:
        print(f'Adding directory {root} with {len(files)} files, creating output folder structure...')
        subject_id = os.path.basename(root)
        data_dict[subject_id] = {}

        for file in files:
            task_description = os.path.splitext(file)[0]
            file_in = os.path.join(root, file)
            file_out_dir = os.path.join(out_dir, f'{subject_id}_{task_description}')
            os.makedirs(file_out_dir, exist_ok=True)
            data_dict[subject_id][task_description] = {'full_info': file_in, 'preprocessed_out': file_out_dir}

for root, dirs, files in os.walk(grab_amass_dir):
    subject_id = os.path.basename(root)
    if subject_id in data_dict:
        for file in files:
            # all processed files should have 'stageii' in their name
            if 'stageii' not in file:
                continue

            task_description = os.path.splitext(file)[0].replace('_stageii', '')
            if task_description in data_dict[subject_id]:
                data_dict[subject_id][task_description]['amass_info'] = os.path.join(root, file)
            elif 'pick_all' in task_description:
                task_description = task_description.replace('pick_all', 'lift')
                if task_description in data_dict[subject_id]:
                    data_dict[subject_id][task_description]['amass_info'] = os.path.join(root, file)
                else:
                    print(f'No corresponding task for {task_description} in {subject_id} found')
            else:
                print(f'No corresponding task for {task_description} in {subject_id} found')



Adding directory /media/erik/DATA/grab/grab_extracted/grab/s2 with 93 files, creating output folder structure...
Adding directory /media/erik/DATA/grab/grab_extracted/grab/s6 with 148 files, creating output folder structure...
Adding directory /media/erik/DATA/grab/grab_extracted/grab/s9 with 125 files, creating output folder structure...
Adding directory /media/erik/DATA/grab/grab_extracted/grab/s1 with 198 files, creating output folder structure...
Adding directory /media/erik/DATA/grab/grab_extracted/grab/s10 with 145 files, creating output folder structure...
Adding directory /media/erik/DATA/grab/grab_extracted/grab/s5 with 106 files, creating output folder structure...
Adding directory /media/erik/DATA/grab/grab_extracted/grab/s8 with 162 files, creating output folder structure...
Adding directory /media/erik/DATA/grab/grab_extracted/grab/s3 with 125 files, creating output folder structure...
Adding directory /media/erik/DATA/grab/grab_extracted/grab/s4 with 113 files, creating o

In [27]:
# get all object mesh files (.ply)
object_mesh_files = glob.glob(os.path.join(object_mesh_dir, '*.ply'))

# create a folder inside object_mesh_out_dir for each object and copy the mesh file there
for obj_mesh_file in object_mesh_files:
    obj_name = os.path.basename(obj_mesh_file).replace('.ply', '')
    obj_out_dir = os.path.join(object_mesh_out_dir, obj_name)
    os.makedirs(obj_out_dir, exist_ok=True)
    os.system(f'cp {obj_mesh_file} {obj_out_dir}')

In [28]:
from text_preprocessing import process_grab
import pandas as pd

# load csv dataset as dataframe
df = pd.read_csv(text_annotation_path, header=None, names=['seq_name', 'caption'])
os.makedirs(text_annotation_out_dir, exist_ok=True)

process_grab(df, out_path=text_annotation_out_dir)

100%|██████████| 1333/1333 [00:06<00:00, 219.08it/s]


In [42]:
def load_sbj_verts(seq_data):
        mesh_path = os.path.join(grab_dir, '..',seq_data.body.vtemp)
        sbj_vtemp = np.array(Mesh(filename=mesh_path).vertices)
        return sbj_vtemp

def load_obj_verts(obj_name, seq_data, n_verts_sample=512):
    mesh_path = os.path.join(grab_dir, '..',seq_data.object.object_mesh)
    np.random.seed(100)
    obj_mesh = Mesh(filename=mesh_path)
    verts_obj = np.array(obj_mesh.vertices)
    faces_obj = np.array(obj_mesh.faces)

    if verts_obj.shape[0] > n_verts_sample:
        verts_sample_id = np.random.choice(verts_obj.shape[0], n_verts_sample, replace=False)
    else:
        verts_sample_id = np.arange(verts_obj.shape[0])

    verts_sampled = verts_obj[verts_sample_id]
    obj_info = {'verts': verts_obj,
                'faces': faces_obj,
                'verts_sample_id': verts_sample_id,
                'verts_sample': verts_sampled,
                'obj_mesh_file': mesh_path}

    return obj_info

save_body_verts = False
save_lhand_verts = True
save_rhand_verts = True
save_object_verts = True
save_contact = True
n_verts_sample = 512
n_comps_hands = 24


def process_data_entry(in_file, amass_file, out_dir):
    body_data = {
        'global_orient': [],'body_pose': [],'transl': [],
        'right_hand_pose': [],'left_hand_pose': [],
        'jaw_pose': [],'leye_pose': [],'reye_pose': [],
        'expression': [],'fullpose': [],
        'contact':[], 'verts' :[]
    }

    object_data = {'verts': [], 'global_orient': [], 'transl': [], 'contact': []}
    lhand_data = {'verts': [], 'global_orient': [], 'hand_pose': [], 'transl': [], 'fullpose': [], 'joints': []}
    rhand_data = {'verts': [], 'global_orient': [], 'hand_pose': [], 'transl': [], 'fullpose': [], 'joints': []}
    print(f'Processing {in_file} and {amass_file}')
    seq_data = parse_npz(in_file)
    amass_data = np.load(amass_file, allow_pickle=True)
    smplh_data = {
         'poses': amass_data['poses'], # T x 156
         'betas': amass_data['betas'], # 16 -> need to reshape to T x 10
         'trans': amass_data['trans'], # T x 3
        #  'root_orient': amass_data['root_orient'], # T x 3, don't need this for BEHAVE format
    }

    # reshape betas to T x 10 with np.tile
    smplh_data['betas'] = np.tile(smplh_data['betas'][:10], (smplh_data['poses'].shape[0], 1))
    num_amass_timesteps = smplh_data['poses'].shape[0]
    
    obj_name = seq_data.obj_name
    sbj_id   = seq_data.sbj_id
    n_comps  = seq_data.n_comps
    gender   = seq_data.gender

    # need this for other methods from GRAB, this should not filter out any frames
    frame_mask = (seq_data['contact']['object']>-1).any(axis=1)
    T = frame_mask.sum()
    
    # make sure AMASS data has the same number of timesteps as GRAB data, otherwise something is wrong
    assert num_amass_timesteps == T, f'Number of timesteps in GRAB and AMASS data do not match: {num_amass_timesteps} vs {T}'

    sbj_params = prepare_params(seq_data.body.params, frame_mask)
    rh_params  = prepare_params(seq_data.rhand.params, frame_mask)
    lh_params  = prepare_params(seq_data.lhand.params, frame_mask)
    obj_params = prepare_params(seq_data.object.params, frame_mask)

    print(f'lhand params: {lh_params.keys()}')
    append2dict(body_data, sbj_params)
    append2dict(rhand_data, rh_params)
    append2dict(lhand_data, lh_params)
    append2dict(object_data, obj_params)

    sbj_vtemp = load_sbj_verts(seq_data)

    if save_body_verts:

        sbj_m = smplx.create(model_path=model_path,
                                model_type='smplx',
                                gender=gender,
                                num_pca_comps=n_comps,
                                v_template=sbj_vtemp,
                                batch_size=T)

        sbj_parms = params2torch(sbj_params)
        verts_sbj = to_cpu(sbj_m(**sbj_parms).vertices)
        body_data['verts'].append(verts_sbj)

    if save_lhand_verts:
        lh_mesh = os.path.join(grab_dir, '..', seq_data.lhand.vtemp)
        lh_vtemp = np.array(Mesh(filename=lh_mesh).vertices)

        lh_m = smplx.create(model_path=model_path,
                            model_type='mano',
                            is_rhand=False,
                            v_template=lh_vtemp,
                            num_pca_comps=n_comps_hands,
                            flat_hand_mean=True,
                            batch_size=T)

        print(f'lh params: {lh_params["hand_pose"].shape}')
        lh_parms = params2torch(lh_params)
        model_output = lh_m(**lh_parms)
        # verts_lh = to_cpu(model_output.vertices)

        hand_pose = model_output.hand_pose # T x 45-dim
        betas = model_output.betas # T x 10-dim

        # only use n_comps_hands PCA components
        lhand_data['pca_pose'] = to_cpu(hand_pose)

        # lhand_data['verts'].append(verts_lh)

    if save_rhand_verts:
        rh_mesh = os.path.join(grab_dir, '..', seq_data.rhand.vtemp)
        rh_vtemp = np.array(Mesh(filename=rh_mesh).vertices)

        rh_m = smplx.create(model_path=model_path,
                            model_type='mano',
                            is_rhand=True,
                            v_template=rh_vtemp,
                            num_pca_comps=n_comps_hands,
                            flat_hand_mean=True,
                            batch_size=T)

        rh_parms = params2torch(rh_params)
        model_output = rh_m(**rh_parms)
        # verts_rh = to_cpu(model_output.vertices)
        
        hand_pose = model_output.hand_pose # T x 45-dim
        betas = model_output.betas # T x 10-dim

        # only use n_comps_hands PCA components
        rhand_data['pca_pose'] = to_cpu(hand_pose)

        # print(f'HAND DATA PCA SHAPE: {rhand_data["pca_pose"].shape}')
        # rhand_data['verts'].append(verts_rh)

    ### for objects

    obj_info = load_obj_verts(obj_name, seq_data, n_verts_sample)

    if save_object_verts:

        obj_m = ObjectModel(v_template=obj_info['verts_sample'],
                            batch_size=T)
        obj_parms = params2torch(obj_params)
        verts_obj = to_cpu(obj_m(**obj_parms).vertices)
        object_data['verts'].append(verts_obj)

    if save_contact:
        body_data['contact'].append(seq_data.contact.body[frame_mask])
        object_data['contact'].append(seq_data.contact.object[frame_mask][:,obj_info['verts_sample_id']])

    
    behave_format_object_data = {
        'angles': object_data['global_orient'][0], # T x 3
        'trans': object_data['transl'][0] # T x 3
    }

    # save left hand, right hand data in hand_out_dir
    # lhand_out_file = os.path.join(hand_out_dir, f'{os.path.basename(out_dir)}_lhand.npy')
    # rhand_out_file = os.path.join(hand_out_dir, f'{os.path.basename(out_dir)}_rhand.npy')
    # np.save(lhand_out_file, lhand_data)
    # np.save(rhand_out_file, rhand_data)

    out_data = [body_data, rhand_data, lhand_data, object_data, smplh_data, behave_format_object_data]
    out_data_name = ['body_data', 'rhand_data', 'lhand_data','object_data', 'smpl_fit_all', 'object_fit_all']
    # save with numpy npz
    for i in range(len(out_data)):
        out_file = os.path.join(out_dir, f'{out_data_name[i]}.npz')
        np.savez_compressed(out_file, **out_data[i])

    info_dict = {
        'gender': gender,
    }
    json_str = json.dumps(info_dict)
    with open(os.path.join(out_dir, 'info.json'), 'w') as f:
        f.write(json_str)

    # save object sample ids in object_sample_out_dir
    obj_sample_out_file = os.path.join(object_sample_out_dir, f'{os.path.basename(out_dir)}.npy')
    np.save(obj_sample_out_file, obj_info['verts_sample_id'])

    print(f'Processed {in_file}, saved motion info to {out_dir} and object sample to {obj_sample_out_file}')


In [43]:
from tqdm.auto import tqdm
# iterate over all files and process them, show tqdm progress bar
for subject_id, task_description in data_dict.items():
    for task_dict in tqdm(task_description.values()):
        try:
            in_file = task_dict['full_info']
            amass_file = task_dict['amass_info']
            task_out_dir = task_dict['preprocessed_out']
        except KeyError:
            print(f'No file found for {subject_id} - {task_description}')
            continue
        process_data_entry(in_file, amass_file, task_out_dir)

  0%|          | 0/93 [00:00<?, ?it/s]

Processing /media/erik/DATA/grab/grab_extracted/grab/s2/flashlight_on_1.npz and /media/erik/DATA/grab/grab_amass/GRAB/s2/flashlight_on_1_stageii.npz
lhand params: dict_keys(['global_orient', 'hand_pose', 'transl', 'fullpose'])
lh params: (1041, 24)


RuntimeError: The size of tensor a (27) must match the size of tensor b (48) at non-singleton dimension 1

In [None]:
# generate a file called split.json in parent directory of out_dir, which has a list for training and test splits that contain sequence names

# get all sequence names - these are subdirectories in out_dir
seq_names = [os.path.basename(x) for x in glob.glob(os.path.join(out_dir, '*'))]
# print(seq_names[:10])
train_test_ratio = 0.8
# randomly select train_test_ratio of the sequences for training
np.random.seed(100)
np.random.shuffle(seq_names)
split_idx = int(len(seq_names) * train_test_ratio)
train_seq_names = seq_names[:split_idx]
test_seq_names = seq_names[split_idx:]

split_dict = {
    'train': train_seq_names,
    'test': test_seq_names
}

split_file = os.path.join(out_dir, '..', 'split.json')
json_str = json.dumps(split_dict, indent=4)

print(f'Saving split file to {split_file}')
with open(split_file, 'w') as f:
    f.write(json_str)

# also save 2 test files called train.txt and test.txt in the same directory, each line containing a direcotry name
train_file = os.path.join(out_dir, '..', 'train.txt')
test_file = os.path.join(out_dir, '..', 'test.txt')

with open(train_file, 'w') as f:
    for seq_name in train_seq_names:
        f.write(f'{seq_name}\n')

with open(test_file, 'w') as f:
    for seq_name in test_seq_names:
        f.write(f'{seq_name}\n')

print(f'Saved train and test files to {train_file} and {test_file}')

Saving split file to /media/erik/DATA/grab/grab_preprocessed/../split.json
Saved train and test files to /media/erik/DATA/grab/grab_preprocessed/../train.txt and /media/erik/DATA/grab/grab_preprocessed/../test.txt
