In [1]:
import os
import json
import h5py
import numpy as np

import robomimic
import robomimic.utils.file_utils as FileUtils
import robomimic.utils.env_utils as EnvUtils
import robomimic.utils.obs_utils as ObsUtils
import imageio
import tqdm
from create_groups import create_hdf5_filter_key

In [2]:
# dataset_path_base="/home/ns/collect_robomimic_demos/Lift_01_30_2024_05_03PM_sojib/lift_mixed_v0.hdf5"
dataset_path_base = "/home/ns/collect_robomimic_demos/sojib_can_expert/PickPlaceCan_01_30_2024_01_45PM_sojib/can_mix_v0.hdf5"
# dataset_path_base = "/home/ns/collect_robomimic_demos/sojib_square_expert/NutAssemblySquare_02_01_2024_06_38PM_sojib/square_mix_v0.hdf5"

In [3]:
f = h5py.File(dataset_path_base, "r+")
demos = list(f["data"].keys())

In [4]:
lengths=[]
demos_minmax={}
for demo_name in demos:
    demo=f['data'][demo_name]
    num_samples=demo.attrs['num_samples']
    lengths.append(num_samples)

    action=f['data'][demo_name]['actions']
    action=np.array(action) 
    demos_minmax[demo_name] = (np.min(action, axis=0), np.max(action, axis=0))


lengths=np.array(lengths)

print('Number of demos: ', len(demos))
print('Max length: ', np.max(lengths))
print('Min length: ', np.min(lengths))
print('Mean length: ', np.mean(lengths))
print('Median length: ', np.median(lengths))

Number of demos:  200
Max length:  704
Min length:  204
Mean length:  352.405
Median length:  326.0


In [5]:
for key in f['mask'].keys():
    print(key, f['mask'][key])

expert <HDF5 dataset "expert": shape (100,), type "|S16">
train <HDF5 dataset "train": shape (63,), type "|S12">
valid <HDF5 dataset "valid": shape (7,), type "|S12">


In [6]:
max_id=np.max([int(demo.split('_')[1]) for demo in demos])
print(f'max_id: {max_id}')

next_id=max_id+1
print(f'next_id: {next_id}')

max_id: 200
next_id: 201


In [7]:
f.close()

In [10]:
# demos

In [7]:
f['mask'].keys()
# del f['mask']['expert']

<KeysViewHDF5 ['expert']>

In [7]:
demos_expert=[f'demo_{id}' for id in range(1, 100+1)]
demos_good=[f'demo_{id}' for id in range(101, 200+1)]
len(demos_expert)

100

In [8]:
demos_expert[:3], demos_expert[-3:]

(['demo_1', 'demo_2', 'demo_3'], ['demo_98', 'demo_99', 'demo_100'])

In [9]:
demos_good[:3], demos_good[-3:]

(['demo_101', 'demo_102', 'demo_103'], ['demo_198', 'demo_199', 'demo_200'])

In [10]:
demos_expert = np.array(demos_expert, dtype='S16') 
demos_good = np.array(demos_good, dtype='S16') 
demos_expert.shape, demos_good.shape

((100,), (100,))

In [11]:
hdf5_path=dataset_path_base 

# filter_keys=sorted([elem for elem in demos_expert])
# filter_name="expert"
# filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

filter_keys=sorted([elem for elem in demos_good])
filter_name="good"
filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

In [12]:
f.close()

### rename

In [13]:
demos2rename=[demo_name for demo_name in demos if 'True' in demo_name]
len(demos2rename)

70

In [11]:
demos[:5]

['demo-True_1', 'demo-True_10', 'demo-True_11', 'demo-True_12', 'demo-True_13']

In [14]:
for demo_name in demos2rename:
    id=eval(demo_name.split("_")[1])
    demo=f['data'][demo_name]
    del f['data'][demo_name]
    f['data'][f'demo_{id}'] = demo 

In [10]:
demo

<HDF5 group "/data/demo-True_1" (6 members)>

In [11]:
# f['data']['test_demo'] = demo

In [8]:
# del f['data']['test_demo']

In [11]:
f.close()

In [7]:
def copy_demos(f1, f2, demos2copy, next_id):
    new_demos=[]
    i=next_id
    for demo_name in tqdm.tqdm(demos2copy):
        demo2=f2['data'][demo_name]
        num_samples= demo2.attrs['num_samples']
        model_file= demo2.attrs['model_file']

        bdemo_name="demo_"+str(i)
        new_demos.append(bdemo_name)
        i = i+1

        f1["data"].create_group(bdemo_name)


        ks=['actions', 'dones', 'rewards', 'states']
        for k in ks:
            kdata = demo2[k]
            f1["data"][bdemo_name].create_dataset(k, data=kdata)

        obs_keys =demo2['obs'].keys()
        next_obs_keys= demo['next_obs'].keys()
        for k in obs_keys:
            kdata = demo2['obs'][k]
            f1["data"][bdemo_name].create_dataset(f'obs/{k}', data=kdata)
        for k in next_obs_keys:
            kdata = demo2['next_obs'][k]
            f1["data"][bdemo_name].create_dataset(f'next_obs/{k}', data=kdata)

        f1["data"][bdemo_name].attrs["num_samples"] = num_samples
        f1["data"][bdemo_name].attrs["model_file"] = model_file

    # f.close()
    f2.close()
    return i , new_demos

In [8]:
dataset_paths =[
    "/home/ns/collect_robomimic_demos/Alyssa/PickPlaceCan_01_27_2024_04_35PM_AlyssaColandreo/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Benjamin/PickPlaceCan_01_29_2024_02_44PM_Benjamin/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Elbert/PickPlaceCan_01_26_2024_06_40PM_Elbert/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Lancaster/PickPlaceCan_01_29_2024_01_39PM_lancaster/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Nalene/PickPlaceCan_01_26_2024_05_17PM_nalene/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/PickPlaceCan_02_02_2024_07_14PM_sojib/demo_image.hdf5"
]

In [9]:
for dataset_path in dataset_paths:
    print(dataset_path)
    f2 = h5py.File(dataset_path, "r")
    demos = list(f2["data"].keys())

    lengths=[]
    for demo_name in demos:
        demo=f2['data'][demo_name]
        num_samples=demo.attrs['num_samples']
        lengths.append(num_samples)

    lengths=np.array(lengths)

    # print('Number of demos: ', len(demos))
    # print('Max length: ', np.max(lengths))
    # print('Min length: ', np.min(lengths))
    # print('Mean length: ', np.mean(lengths))
    # print('Median length: ', np.median(lengths))

    key='good'
    print(key, f2['mask'][key])

    demos2copy=[demo.decode('utf-8') for demo in f2['mask'][key]] 
    print('to copy: ', len(demos2copy))


    next_id, new_demos=copy_demos(f, f2, demos2copy, next_id)

    f2.close()

f.close()

/home/ns/collect_robomimic_demos/Alyssa/PickPlaceCan_01_27_2024_04_35PM_AlyssaColandreo/demo_image.hdf5
good <HDF5 dataset "good": shape (16,), type "|S16">
to copy:  16


  0%|          | 0/16 [00:00<?, ?it/s]

100%|██████████| 16/16 [00:12<00:00,  1.27it/s]


/home/ns/collect_robomimic_demos/Benjamin/PickPlaceCan_01_29_2024_02_44PM_Benjamin/demo_image.hdf5
good <HDF5 dataset "good": shape (6,), type "|S16">
to copy:  6


100%|██████████| 6/6 [00:05<00:00,  1.09it/s]


/home/ns/collect_robomimic_demos/Elbert/PickPlaceCan_01_26_2024_06_40PM_Elbert/demo_image.hdf5
good <HDF5 dataset "good": shape (21,), type "|S16">
to copy:  21


100%|██████████| 21/21 [00:09<00:00,  2.25it/s]


/home/ns/collect_robomimic_demos/Lancaster/PickPlaceCan_01_29_2024_01_39PM_lancaster/demo_image.hdf5
good <HDF5 dataset "good": shape (12,), type "|S16">
to copy:  12


100%|██████████| 12/12 [00:07<00:00,  1.70it/s]


/home/ns/collect_robomimic_demos/Nalene/PickPlaceCan_01_26_2024_05_17PM_nalene/demo_image.hdf5
good <HDF5 dataset "good": shape (14,), type "|S16">
to copy:  14


100%|██████████| 14/14 [00:06<00:00,  2.16it/s]


/home/ns/collect_robomimic_demos/PickPlaceCan_02_02_2024_07_14PM_sojib/demo_image.hdf5
good <HDF5 dataset "good": shape (31,), type "|S16">
to copy:  31


100%|██████████| 31/31 [00:02<00:00, 14.58it/s]


In [12]:
demos2copy=[demo.decode('utf-8') for demo in f2['mask'][key]] 

In [13]:
demos2copy

['demo-True_1',
 'demo-True_10',
 'demo-True_11',
 'demo-True_12',
 'demo-True_13',
 'demo-True_14',
 'demo-True_15',
 'demo-True_16',
 'demo-True_17',
 'demo-True_18',
 'demo-True_19',
 'demo-True_2',
 'demo-True_20',
 'demo-True_21',
 'demo-True_22',
 'demo-True_24',
 'demo-True_25',
 'demo-True_26',
 'demo-True_27',
 'demo-True_28',
 'demo-True_29',
 'demo-True_3',
 'demo-True_30',
 'demo-True_31',
 'demo-True_32',
 'demo-True_33',
 'demo-True_34',
 'demo-True_35',
 'demo-True_36',
 'demo-True_37',
 'demo-True_38',
 'demo-True_39',
 'demo-True_4',
 'demo-True_40',
 'demo-True_41',
 'demo-True_42',
 'demo-True_43',
 'demo-True_44',
 'demo-True_45',
 'demo-True_46',
 'demo-True_47',
 'demo-True_48',
 'demo-True_49',
 'demo-True_5',
 'demo-True_50',
 'demo-True_51',
 'demo-True_52',
 'demo-True_53',
 'demo-True_54',
 'demo-True_55',
 'demo-True_56',
 'demo-True_57',
 'demo-True_58',
 'demo-True_59',
 'demo-True_6',
 'demo-True_60',
 'demo-True_61',
 'demo-True_62',
 'demo-True_63',
 'd

In [11]:
f.close()

### delete groups

In [8]:
df="/home/ns/collect_robomimic_demos/Alyssa/PickPlaceCan_01_27_2024_04_35PM_AlyssaColandreo/demo_image.hdf5"

In [9]:
f2 = h5py.File(df, "r+")
f2['mask'].keys()

<KeysViewHDF5 ['train', 'valid']>

In [4]:
names=[b.decode('utf-8') for b in f2['mask']['good']]
names[:5]

['demo-True_1', 'demo-True_12', 'demo-True_15', 'demo-True_19', 'demo-True_2']

In [5]:
len(names)

14

In [6]:
# del f2['mask']['expert']
del f2['mask']['good']
# del f2['mask']['slow_success']
# del f2['mask']['try_good']

In [22]:
#delete demos not in expert
demos=f2['data'].keys()
demos

<KeysViewHDF5 ['demo-True_1', 'demo-True_10', 'demo-True_12', 'demo-True_13', 'demo-True_14', 'demo-True_15', 'demo-True_16', 'demo-True_17', 'demo-True_18', 'demo-True_2', 'demo-True_20', 'demo-True_21', 'demo-True_23', 'demo-True_24', 'demo-True_25', 'demo-True_26', 'demo-True_27', 'demo-True_28', 'demo-True_29', 'demo-True_30', 'demo-True_31', 'demo-True_33', 'demo-True_34', 'demo-True_35', 'demo-True_36', 'demo-True_39', 'demo-True_4', 'demo-True_40', 'demo-True_6', 'demo-True_7', 'demo-True_8']>

In [23]:
len(demos)

31

In [17]:
for demo_name in demos:
    if demo_name not in names:
        print('demo_name=', demo_name)
        # del f2['data'][demo_name]

In [10]:
f2.close()