In [2]:
import os
import json
import h5py
import numpy as np

import robomimic
import robomimic.utils.file_utils as FileUtils
import robomimic.utils.env_utils as EnvUtils
import robomimic.utils.obs_utils as ObsUtils
import imageio
import tqdm
from robomimic.utils.file_utils import create_hdf5_filter_key

    No private macro file found!
    It is recommended to use a private macro file
    To setup, run: python /home/ns1254/mimicgen/envs/robomimic/robomimic/scripts/setup_macros.py
)[0m


In [2]:
dataset_path = "/home/ns1254/data_franka/bellpepper_oma/ola_114.hdf5"

f_org = h5py.File(dataset_path, "r")
demos = list(f_org["data"].keys())

In [3]:
lengths=[]
for demo_name in demos:
    demo=f_org['data'][demo_name]
    num_samples=demo.attrs['num_samples']
    lengths.append(num_samples)

lengths=np.array(lengths)

print('Number of demos: ', len(demos))
print('Max length: ', np.max(lengths))
print('Min length: ', np.min(lengths))
print('Mean length: ', np.mean(lengths))

Number of demos:  114
Max length:  761
Min length:  106
Mean length:  161.50877192982455


In [4]:
f_org['mask'].keys()

<KeysViewHDF5 ['bad', 'good']>

In [12]:
def copy_group(src_group, dest_group):
    # Copy attributes
    for attr_name, attr_value in src_group.attrs.items():
        dest_group.attrs[attr_name] = attr_value

    for key, item in src_group.items():
        if isinstance(item, h5py.Group):
            # Create a new group in the destination and recursively copy contents
            new_group = dest_group.create_group(key)
            copy_group(item, new_group)
        elif isinstance(item, h5py.Dataset):
            # Copy datasets
            dataset = dest_group.create_dataset(key, data=item[...])
            # Copy attributes for the dataset
            for attr_name, attr_value in item.attrs.items():
                dataset.attrs[attr_name] = attr_value


def copy_demos(f_src, f_dest, demos2copy, next_id):
    new_demo_names=[]
    i=next_id
    for demo_name in tqdm.tqdm(demos2copy):
        demo=f_src['data'][demo_name]  

        demo_name_new="demo_"+str(i) 
        new_demo=f_dest["data"].create_group(demo_name_new) 
        copy_group(demo, new_demo) 

        new_demo_names.append(demo_name_new)
        i = i+1

    return i , new_demo_names

### create sub data

In [9]:
group_name = "bad"
demos2copy =[demo.decode('utf-8') for demo in f_org['mask'][group_name]]
demos2copy=sorted(demos2copy, key=lambda x: int(x.split('_')[-1]))
len(demos2copy), demos2copy

(20,
 ['demo_10',
  'demo_12',
  'demo_13',
  'demo_20',
  'demo_40',
  'demo_44',
  'demo_46',
  'demo_63',
  'demo_64',
  'demo_65',
  'demo_66',
  'demo_71',
  'demo_86',
  'demo_87',
  'demo_92',
  'demo_93',
  'demo_97',
  'demo_102',
  'demo_104',
  'demo_106'])

In [8]:
dataset_path_sub =dataset_path
dataset_path_sub=dataset_path_sub.replace(".hdf5", "")+"_sub.hdf5"
dataset_path_sub

'/home/ns1254/data_franka/bellpepper_oma/ola_114_sub.hdf5'

In [10]:
f_sub = h5py.File(dataset_path_sub, "w") 
f_sub.create_group("data")
f_sub.create_group("mask")

<HDF5 group "/mask" (0 members)>

In [13]:
print('copying demos...') 
next_id , new_demo_names=copy_demos(f_src=f_org, f_dest=f_sub, demos2copy=demos2copy, next_id=0)

copying demos...


100%|██████████| 20/20 [00:02<00:00,  9.29it/s]


In [14]:
f_org.close()
f_sub.close()

### verify

In [3]:
dataset_path = '/home/ns1254/data_franka/bellpepper_oma/ola_114_sub.hdf5'

f_org = h5py.File(dataset_path, "r")
demos = list(f_org["data"].keys())

lengths=[]
for demo_name in demos:
    demo=f_org['data'][demo_name]
    num_samples=demo.attrs['num_samples']
    lengths.append(num_samples)

lengths=np.array(lengths)

print('Number of demos: ', len(demos))
print('Max length: ', np.max(lengths))
print('Min length: ', np.min(lengths))
print('Mean length: ', np.mean(lengths))

f_org['mask'].keys()

f_org.close()

Number of demos:  20
Max length:  761
Min length:  125
Mean length:  225.4
