In [30]:
import os
import json
import h5py
import numpy as np

import robomimic
import robomimic.utils.file_utils as FileUtils
import robomimic.utils.env_utils as EnvUtils
import robomimic.utils.obs_utils as ObsUtils
import imageio
import tqdm
from create_groups import create_hdf5_filter_key

In [31]:
dataset_path_base = "/home/ns1254/data_robomimic/carl_dataset/mixed_human_original/spoon_pick_129_int3.hdf5" 

In [32]:
f = h5py.File(dataset_path_base, "r+")
demos = list(f["data"].keys())

In [33]:
lengths=[]
demos_minmax={}
for demo_name in demos:
    demo=f['data'][demo_name]
    num_samples=demo.attrs['num_samples']
    lengths.append(num_samples)

    action=f['data'][demo_name]['actions']
    action=np.array(action) 
    demos_minmax[demo_name] = (np.min(action, axis=0), np.max(action, axis=0))


lengths=np.array(lengths)

print('Number of demos: ', len(demos))
print('Max length: ', np.max(lengths))
print('Min length: ', np.min(lengths))
print('Mean length: ', np.mean(lengths))
print('Median length: ', np.median(lengths))

Number of demos:  129
Max length:  567
Min length:  52
Mean length:  131.62790697674419
Median length:  71.0


In [5]:
# f.create_group("mask")

In [34]:
f.keys()

<KeysViewHDF5 ['data', 'mask']>

In [40]:
for key in f['mask'].keys():
    print(key, f['mask'][key])

expert <HDF5 dataset "expert": shape (100,), type "|S16">
g56b14 <HDF5 dataset "g56b14": shape (70,), type "|S16">
g64i16 <HDF5 dataset "g64i16": shape (80,), type "|S16">
g80 <HDF5 dataset "g80": shape (80,), type "|S16">
g80b20 <HDF5 dataset "g80b20": shape (100,), type "|S16">
intentional <HDF5 dataset "intentional": shape (29,), type "|S16">
p20i <HDF5 dataset "p20i": shape (100,), type "|S16">


In [36]:
max_id=np.max([int(demo.split('_')[1]) for demo in demos])
print(f'max_id: {max_id}')

next_id=max_id+1
print(f'next_id: {next_id}')

max_id: 128
next_id: 129


In [29]:
f.close()

In [10]:
# demos

In [23]:
f['mask'].keys()
# del f['mask']['expert']
# del f['mask']['train']
del f['mask']['g54b16']

In [37]:
demos_expert=[f'demo_{id}' for id in range(0, 100)]
demo_other=[b.decode('utf-8') for b in f['mask']['intentional']]
len(demos_expert), len(demo_other)

(100, 29)

In [38]:
# p20i= demos_expert[44:] + demo_other[15:]
p20i= demos_expert[20:] + demo_other[9:]
len(p20i)

100

In [39]:
hdf5_path=dataset_path_base 
 
p20i = np.array(p20i, dtype='S16')

filter_keys=sorted([elem for elem in p20i])
filter_name="g80b20"
filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)


In [41]:
p20i

array([b'demo_20', b'demo_21', b'demo_22', b'demo_23', b'demo_24',
       b'demo_25', b'demo_26', b'demo_27', b'demo_28', b'demo_29',
       b'demo_30', b'demo_31', b'demo_32', b'demo_33', b'demo_34',
       b'demo_35', b'demo_36', b'demo_37', b'demo_38', b'demo_39',
       b'demo_40', b'demo_41', b'demo_42', b'demo_43', b'demo_44',
       b'demo_45', b'demo_46', b'demo_47', b'demo_48', b'demo_49',
       b'demo_50', b'demo_51', b'demo_52', b'demo_53', b'demo_54',
       b'demo_55', b'demo_56', b'demo_57', b'demo_58', b'demo_59',
       b'demo_60', b'demo_61', b'demo_62', b'demo_63', b'demo_64',
       b'demo_65', b'demo_66', b'demo_67', b'demo_68', b'demo_69',
       b'demo_70', b'demo_71', b'demo_72', b'demo_73', b'demo_74',
       b'demo_75', b'demo_76', b'demo_77', b'demo_78', b'demo_79',
       b'demo_80', b'demo_81', b'demo_82', b'demo_83', b'demo_84',
       b'demo_85', b'demo_86', b'demo_87', b'demo_88', b'demo_89',
       b'demo_90', b'demo_91', b'demo_92', b'demo_93', b'demo_

In [16]:
demos_expert=[f'demo_{id}' for id in range(1, 100)]
demos_good=[f'demo_{id}' for id in range(122, 145)]
len(demos_expert), len(demos_good)

(99, 23)

In [10]:
p20i=demos_expert[:64]+demos_good[:16]
p20i = np.array(p20i, dtype='S16') 

hdf5_path=dataset_path_base 

filter_keys=sorted([elem for elem in p20i])
filter_name="g64b16i2"
filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

In [10]:
demos_expert[:3], demos_expert[-3:]

(['demo_0', 'demo_1', 'demo_2'], ['demo_97', 'demo_98', 'demo_99'])

In [11]:
demos_good[:3], demos_good[-3:]

(['demo_100', 'demo_101', 'demo_102'], ['demo_119', 'demo_120', 'demo_121'])

In [12]:
demos_expert = np.array(demos_expert, dtype='S16') 
demos_good = np.array(demos_good, dtype='S16') 
demos_expert.shape, demos_good.shape

((100,), (22,))

In [13]:
hdf5_path=dataset_path_base 

filter_keys=sorted([elem for elem in demos_expert])
filter_name="expert"
filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

filter_keys=sorted([elem for elem in demos_good])
filter_name="intentional"
filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

In [12]:
f.close()

### rename

In [13]:
demos2rename=[demo_name for demo_name in demos if 'True' in demo_name]
len(demos2rename)

70

In [11]:
demos[:5]

['demo-True_1', 'demo-True_10', 'demo-True_11', 'demo-True_12', 'demo-True_13']

In [14]:
for demo_name in demos2rename:
    id=eval(demo_name.split("_")[1])
    demo=f['data'][demo_name]
    del f['data'][demo_name]
    f['data'][f'demo_{id}'] = demo 

In [10]:
demo

<HDF5 group "/data/demo-True_1" (6 members)>

In [11]:
# f['data']['test_demo'] = demo

In [8]:
# del f['data']['test_demo']

In [8]:
f.close()

In [17]:
f['data']['demo_1'].keys()

<KeysViewHDF5 ['actions', 'obs', 'times']>

In [11]:
def copy_demos_sawyer(f1, f2, demos2copy, next_id):
    new_demos=[]
    i=next_id
    for demo_name in tqdm.tqdm(demos2copy):
        demo2=f2['data'][demo_name]
        num_samples= demo2.attrs['num_samples']
        # model_file= demo2.attrs['model_file']

        bdemo_name="demo_"+str(i)
        new_demos.append(bdemo_name)
        i = i+1

        f1["data"].create_group(bdemo_name)


        # ks=['actions', 'dones', 'rewards', 'states']
        ks=['actions', 'times']
        for k in ks:
            kdata = demo2[k]
            f1["data"][bdemo_name].create_dataset(k, data=kdata)

        obs_keys =demo2['obs'].keys()
        # next_obs_keys= demo['next_obs'].keys()
        for k in obs_keys:
            kdata = demo2['obs'][k]
            f1["data"][bdemo_name].create_dataset(f'obs/{k}', data=kdata)
        # for k in next_obs_keys:
        #     kdata = demo2['next_obs'][k]
        #     f1["data"][bdemo_name].create_dataset(f'next_obs/{k}', data=kdata)

        f1["data"][bdemo_name].attrs["num_samples"] = num_samples
        # f1["data"][bdemo_name].attrs["model_file"] = model_file

    # f.close()
    f2.close()
    return i , new_demos

In [13]:
# expert
dataset_paths_expert_can=[
    "/home/ns/collect_robomimic_demos/sojib_can_expert/PickPlaceCan_01_30_2024_01_19PM_sojib/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/sojib_can_expert/PickPlaceCan_01_30_2024_02_33PM_sojib/demo_image.hdf5" 
]

In [7]:
#good
dataset_paths_good_can =[
    "/home/ns/collect_robomimic_demos/Alyssa/PickPlaceCan_01_27_2024_04_35PM_AlyssaColandreo/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Benjamin/PickPlaceCan_01_29_2024_02_44PM_Benjamin/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Elbert/PickPlaceCan_01_26_2024_06_40PM_Elbert/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Lancaster/PickPlaceCan_01_29_2024_01_39PM_lancaster/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Nalene/PickPlaceCan_01_26_2024_05_17PM_nalene/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/PickPlaceCan_02_02_2024_07_14PM_sojib/demo_image.hdf5"
]

In [2]:
#lift good
dataset_paths_good_lift=[
    "/home/ns/collect_robomimic_demos/Alyssa/Lift_01_27_2024_04_29PM_AlyssaColandreo/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Benjamin/Lift_01_29_2024_02_36PM_Benjamin/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Elbert/Lift_01_26_2024_06_34PM_Elbert/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Emma/Lift_01_27_2024_10_34AM_Emma/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/James/Lift_01_27_2024_03_31PM_JamesonDunsford/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Lancaster/Lift_01_29_2024_01_28PM_Lancaster/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Mia/Lift_01_28_2024_10_00AM_Mia/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Nalene/Lift_01_26_2024_05_12PM_nalene/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Nusrat/Lift_01_29_2024_11_53AM_Nusrat/demo_image.hdf5" 
]

In [12]:
dataset_paths_second=[
    "/home/ns1254/data_robomimic/carl_dataset/mixed_human_original/spoon_pick_int3_29.hdf5"
]

In [13]:
next_id

100

In [14]:
dataset_paths=dataset_paths_second
key='all'

In [15]:
#sawyer
for dataset_path in dataset_paths:
    print(dataset_path)
    f2 = h5py.File(dataset_path, "r")
    demos = list(f2["data"].keys())

    lengths=[]
    for demo_name in demos:
        demo=f2['data'][demo_name]
        num_samples=demo.attrs['num_samples']
        lengths.append(num_samples)

    lengths=np.array(lengths)

    # print('Number of demos: ', len(demos))
    # print('Max length: ', np.max(lengths))
    # print('Min length: ', np.min(lengths))
    # print('Mean length: ', np.mean(lengths))
    # print('Median length: ', np.median(lengths))

    if key=='all':
        demos2copy=demos
    else:
        print(key, f2['mask'][key]) 
        demos2copy=[demo.decode('utf-8') for demo in f2['mask'][key]] 
    
    # demos2copy=demos2copy[:50] #special case.
    print('to copy: ', len(demos2copy))


    next_id, new_demos=copy_demos_sawyer(f, f2, demos2copy, next_id)

    f2.close()

# f.close()

/home/ns1254/data_robomimic/carl_dataset/mixed_human_original/spoon_pick_int3_29.hdf5
to copy:  29


100%|██████████| 29/29 [00:02<00:00, 11.54it/s]


In [11]:
def copy_demos(f1, f2, demos2copy, next_id):
    new_demos=[]
    i=next_id
    for demo_name in tqdm.tqdm(demos2copy):
        demo2=f2['data'][demo_name]
        num_samples= demo2.attrs['num_samples']
        model_file= demo2.attrs['model_file']

        bdemo_name="demo_"+str(i)
        new_demos.append(bdemo_name)
        i = i+1

        f1["data"].create_group(bdemo_name)


        ks=['actions', 'dones', 'rewards', 'states']
        for k in ks:
            kdata = demo2[k]
            f1["data"][bdemo_name].create_dataset(k, data=kdata)

        obs_keys =demo2['obs'].keys()
        next_obs_keys= demo['next_obs'].keys()
        for k in obs_keys:
            kdata = demo2['obs'][k]
            f1["data"][bdemo_name].create_dataset(f'obs/{k}', data=kdata)
        for k in next_obs_keys:
            kdata = demo2['next_obs'][k]
            f1["data"][bdemo_name].create_dataset(f'next_obs/{k}', data=kdata)

        f1["data"][bdemo_name].attrs["num_samples"] = num_samples
        f1["data"][bdemo_name].attrs["model_file"] = model_file

    # f.close()
    f2.close()
    return i , new_demos

In [12]:
for dataset_path in dataset_paths:
    print(dataset_path)
    f2 = h5py.File(dataset_path, "r")
    demos = list(f2["data"].keys())

    lengths=[]
    for demo_name in demos:
        demo=f2['data'][demo_name]
        num_samples=demo.attrs['num_samples']
        lengths.append(num_samples)

    lengths=np.array(lengths)

    # print('Number of demos: ', len(demos))
    # print('Max length: ', np.max(lengths))
    # print('Min length: ', np.min(lengths))
    # print('Mean length: ', np.mean(lengths))
    # print('Median length: ', np.median(lengths))

    if key=='all':
        demos2copy=demos[:20] 
    else:
        print(key, f2['mask'][key]) 
        demos2copy=[demo.decode('utf-8') for demo in f2['mask'][key]] 
    
    # demos2copy=demos2copy[:50] #special case.
    print('to copy: ', len(demos2copy))


    next_id, new_demos=copy_demos(f, f2, demos2copy, next_id)

    f2.close()

# f.close()

/home/ns1254/data_robomimic/carl_dataset/mixed_human_original/lift_try_good_21.hdf5
to copy:  20


100%|██████████| 20/20 [00:00<00:00, 20.51it/s]


In [19]:
f.close()

In [16]:
demos_group= np.array(new_demos, dtype='S16') 
demos_group

array([b'demo_100', b'demo_101', b'demo_102', b'demo_103', b'demo_104',
       b'demo_105', b'demo_106', b'demo_107', b'demo_108', b'demo_109',
       b'demo_110', b'demo_111', b'demo_112', b'demo_113', b'demo_114',
       b'demo_115', b'demo_116', b'demo_117', b'demo_118', b'demo_119',
       b'demo_120', b'demo_121', b'demo_122', b'demo_123', b'demo_124',
       b'demo_125', b'demo_126', b'demo_127', b'demo_128'], dtype='|S16')

In [17]:
hdf5_path=dataset_path_base 

# filter_keys=sorted([elem for elem in demos_expert])
# filter_name="expert"
# filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

filter_keys=sorted([elem for elem in demos_group])
filter_name="intentional"
filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

In [12]:
demos2copy=[demo.decode('utf-8') for demo in f2['mask'][key]] 

In [13]:
demos2copy

['demo-True_1',
 'demo-True_10',
 'demo-True_11',
 'demo-True_12',
 'demo-True_13',
 'demo-True_14',
 'demo-True_15',
 'demo-True_16',
 'demo-True_17',
 'demo-True_18',
 'demo-True_19',
 'demo-True_2',
 'demo-True_20',
 'demo-True_21',
 'demo-True_22',
 'demo-True_24',
 'demo-True_25',
 'demo-True_26',
 'demo-True_27',
 'demo-True_28',
 'demo-True_29',
 'demo-True_3',
 'demo-True_30',
 'demo-True_31',
 'demo-True_32',
 'demo-True_33',
 'demo-True_34',
 'demo-True_35',
 'demo-True_36',
 'demo-True_37',
 'demo-True_38',
 'demo-True_39',
 'demo-True_4',
 'demo-True_40',
 'demo-True_41',
 'demo-True_42',
 'demo-True_43',
 'demo-True_44',
 'demo-True_45',
 'demo-True_46',
 'demo-True_47',
 'demo-True_48',
 'demo-True_49',
 'demo-True_5',
 'demo-True_50',
 'demo-True_51',
 'demo-True_52',
 'demo-True_53',
 'demo-True_54',
 'demo-True_55',
 'demo-True_56',
 'demo-True_57',
 'demo-True_58',
 'demo-True_59',
 'demo-True_6',
 'demo-True_60',
 'demo-True_61',
 'demo-True_62',
 'demo-True_63',
 'd

In [15]:
f.close()

### delete groups

In [8]:
df="/home/ns/collect_robomimic_demos/Alyssa/PickPlaceCan_01_27_2024_04_35PM_AlyssaColandreo/demo_image.hdf5"

In [9]:
f2 = h5py.File(df, "r+")
f2['mask'].keys()

<KeysViewHDF5 ['train', 'valid']>

In [4]:
names=[b.decode('utf-8') for b in f2['mask']['good']]
names[:5]

['demo-True_1', 'demo-True_12', 'demo-True_15', 'demo-True_19', 'demo-True_2']

In [5]:
len(names)

14

In [6]:
# del f2['mask']['expert']
del f2['mask']['good']
# del f2['mask']['slow_success']
# del f2['mask']['try_good']

In [22]:
#delete demos not in expert
demos=f2['data'].keys()
demos

<KeysViewHDF5 ['demo-True_1', 'demo-True_10', 'demo-True_12', 'demo-True_13', 'demo-True_14', 'demo-True_15', 'demo-True_16', 'demo-True_17', 'demo-True_18', 'demo-True_2', 'demo-True_20', 'demo-True_21', 'demo-True_23', 'demo-True_24', 'demo-True_25', 'demo-True_26', 'demo-True_27', 'demo-True_28', 'demo-True_29', 'demo-True_30', 'demo-True_31', 'demo-True_33', 'demo-True_34', 'demo-True_35', 'demo-True_36', 'demo-True_39', 'demo-True_4', 'demo-True_40', 'demo-True_6', 'demo-True_7', 'demo-True_8']>

In [23]:
len(demos)

31

In [17]:
for demo_name in demos:
    if demo_name not in names:
        print('demo_name=', demo_name)
        # del f2['data'][demo_name]

In [10]:
f2.close()