In [1]:
import os
import json
import h5py
import numpy as np

import robomimic
import robomimic.utils.file_utils as FileUtils
import robomimic.utils.env_utils as EnvUtils
import robomimic.utils.obs_utils as ObsUtils
import imageio
import tqdm 
import glob
import argparse
from create_groups import create_hdf5_filter_key

In [2]:
# dataset_path_base="/home/ns/collect_robomimic_demos/sojib_can_expert/PickPlaceCan_01_30_2024_01_45PM_sojib/can_mix_v0.hdf5"
dataset_path_base_lift = "/home/ns/collect_robomimic_demos/processed/demo_image_sub1.hdf5"

In [3]:
dataset_path_base=dataset_path_base_lift

In [4]:
f_base = h5py.File(dataset_path_base, "r+")
demos = list(f_base["data"].keys())

lengths=[]
demos_minmax={}
for demo_name in demos:
    demo=f_base['data'][demo_name]
    num_samples=demo.attrs['num_samples']
    lengths.append(num_samples)

    action=f_base['data'][demo_name]['actions']
    action=np.array(action) 
    demos_minmax[demo_name] = (np.min(action, axis=0), np.max(action, axis=0))


lengths=np.array(lengths)

print('Number of demos: ', len(demos))
print('Max length: ', np.max(lengths))
print('Min length: ', np.min(lengths))
print('Mean length: ', np.mean(lengths))
print('Median length: ', np.median(lengths))
print('')

for key in f_base['mask'].keys():
    print(key, f_base['mask'][key])

print('')
max_id=np.max([int(demo.split('_')[1]) for demo in demos])
print(f'max_id: {max_id}')

next_id=max_id+1
print(f'next_id: {next_id}')

Number of demos:  200
Max length:  800
Min length:  116
Mean length:  459.275
Median length:  255.0

bad_hover <HDF5 dataset "bad_hover": shape (78,), type "|S16">
bad_touch <HDF5 dataset "bad_touch": shape (22,), type "|S16">
expert <HDF5 dataset "expert": shape (100,), type "|S16">
failed <HDF5 dataset "failed": shape (100,), type "|S16">

max_id: 200
next_id: 201


In [6]:
# demos_expert=[f'demo_{id}' for id in range(1, 100+1)]
# demos_expert = np.array(demos_expert, dtype='S16') 

# hdf5_path=dataset_path_base 

# filter_keys=sorted([elem for elem in demos_expert])
# filter_name="expert"
# filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

In [5]:
expert_names=[b.decode('utf-8') for b in f_base['mask']['expert'] ]
expert_names[:5]

['demo_1', 'demo_10', 'demo_100', 'demo_11', 'demo_12']

In [5]:
f_base.close()

In [5]:
# del f_base['mask']['train']
# del f_base['mask']['valid']

### create group from text file (no data copy)

In [10]:
demos_txt=[]
with open('tmp_txts/bad_touch.txt') as f:
    lines=f.readlines()
    for line in lines:
        demos_txt.append(line.strip().split('.')[0])
len(demos_txt)

22

In [11]:
demos_txt[:5]

['demo_104', 'demo_109', 'demo_110', 'demo_112', 'demo_113']

In [12]:
demos_txt = np.array(demos_txt, dtype='S16') 

hdf5_path=dataset_path_base 

filter_keys=sorted([elem for elem in demos_txt])
filter_name="bad_touch"
filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

In [13]:
f_base.close()

In [6]:
def copy_demos(f1, f2, demos2copy, next_id):
    new_demos=[]
    i=next_id
    for demo_name in tqdm.tqdm(demos2copy):
        demo2=f2['data'][demo_name]
        num_samples= demo2.attrs['num_samples']
        model_file= demo2.attrs['model_file']

        bdemo_name="demo_"+str(i)
        new_demos.append(bdemo_name)
        i = i+1

        f1["data"].create_group(bdemo_name)


        ks=['actions', 'dones', 'rewards', 'states']
        for k in ks:
            kdata = demo2[k]
            f1["data"][bdemo_name].create_dataset(k, data=kdata)

        obs_keys =demo2['obs'].keys()
        next_obs_keys= demo['next_obs'].keys()
        for k in obs_keys:
            kdata = demo2['obs'][k]
            f1["data"][bdemo_name].create_dataset(f'obs/{k}', data=kdata)
        for k in next_obs_keys:
            kdata = demo2['next_obs'][k]
            f1["data"][bdemo_name].create_dataset(f'next_obs/{k}', data=kdata)

        f1["data"][bdemo_name].attrs["num_samples"] = num_samples
        f1["data"][bdemo_name].attrs["model_file"] = model_file

    # f.close()
    f2.close()
    return i , new_demos

In [7]:
dataset_paths_can=[
    "/home/ns/collect_robomimic_demos/Alyssa/PickPlaceCan_01_27_2024_04_35PM_AlyssaColandreo/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Benjamin/PickPlaceCan_01_29_2024_02_44PM_Benjamin/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Elbert/PickPlaceCan_01_26_2024_06_40PM_Elbert/demo_image.hdf5", 
    "/home/ns/collect_robomimic_demos/Emma/PickPlaceCan_01_27_2024_10_41AM_Emma/demo_image.hdf5", 
    "/home/ns/collect_robomimic_demos/James/PickPlaceCan_01_27_2024_03_37PM_JaJamesonDunsford/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Lancaster/PickPlaceCan_01_29_2024_01_39PM_lancaster/demo_image.hdf5", 
    "/home/ns/collect_robomimic_demos/Mia/PickPlaceCan_01_28_2024_10_12AM_Mia/demo_image.hdf5", 
    "/home/ns/collect_robomimic_demos/Nalene/PickPlaceCan_01_26_2024_05_17PM_nalene/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Nusrat/PickPlaceCan_01_29_2024_12_03PM_Nusrat/demo_image.hdf5", 
    "/home/ns/collect_robomimic_demos/sumaya/PickPlaceCan_01_27_2024_01_30PM_sumayaakter/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/tazul/PickPlaceCan_01_27_2024_12_40PM_tazulislam/demo_image.hdf5"
]

In [7]:
dataset_paths_lift=[
    "/home/ns/collect_robomimic_demos/Alyssa/Lift_01_27_2024_04_29PM_AlyssaColandreo/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Benjamin/Lift_01_29_2024_02_36PM_Benjamin/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Elbert/Lift_01_26_2024_06_34PM_Elbert/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Emma/Lift_01_27_2024_10_34AM_Emma/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/James/Lift_01_27_2024_03_31PM_JamesonDunsford/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Lancaster/Lift_01_29_2024_01_28PM_Lancaster/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Mia/Lift_01_28_2024_10_00AM_Mia/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Nalene/Lift_01_26_2024_05_12PM_nalene/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Nusrat/Lift_01_29_2024_11_53AM_Nusrat/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/ola/Lift_01_25_2024_05_25PM_ola/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/sumaya/Lift_01_27_2024_01_20PM_sumayaakter/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/tazul/Lift_01_27_2024_12_28PM_tazul/demo_image.hdf5"
]

In [8]:
group="try_failed" 
# group="hl"
# group="slow_timeout"

In [9]:
dataset_paths=dataset_paths_lift

In [10]:
new_demoss=[]
for dataset_path in dataset_paths:
    path=os.path.dirname(dataset_path)
    video_group=path+"/videos/"+group
    files=glob.glob(video_group+"/*.mp4")
    demos2copy = [os.path.basename(file).replace(".mp4", "") for file in files]
    

    # print(video_group, len(demos2copy))

    if len(demos2copy)==0: 
        continue 

    print(video_group, len(demos2copy))
    f2 = h5py.File(dataset_path, "r")
    next_id, new_demos=copy_demos(f_base, f2, demos2copy, next_id)
    new_demoss.extend(new_demos) 
    
f_base.close()

/home/ns/collect_robomimic_demos/Benjamin/Lift_01_29_2024_02_36PM_Benjamin/videos/try_failed 2


  0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 2/2 [00:00<00:00,  7.18it/s]


/home/ns/collect_robomimic_demos/Emma/Lift_01_27_2024_10_34AM_Emma/videos/try_failed 2


100%|██████████| 2/2 [00:00<00:00,  8.54it/s]


/home/ns/collect_robomimic_demos/Lancaster/Lift_01_29_2024_01_28PM_Lancaster/videos/try_failed 7


100%|██████████| 7/7 [00:00<00:00,  9.28it/s]


/home/ns/collect_robomimic_demos/Mia/Lift_01_28_2024_10_00AM_Mia/videos/try_failed 18


100%|██████████| 18/18 [00:03<00:00,  5.76it/s]


/home/ns/collect_robomimic_demos/Nusrat/Lift_01_29_2024_11_53AM_Nusrat/videos/try_failed 15


100%|██████████| 15/15 [00:01<00:00,  9.11it/s]


/home/ns/collect_robomimic_demos/ola/Lift_01_25_2024_05_25PM_ola/videos/try_failed 9


100%|██████████| 9/9 [00:00<00:00, 12.39it/s]


/home/ns/collect_robomimic_demos/sumaya/Lift_01_27_2024_01_20PM_sumayaakter/videos/try_failed 22


100%|██████████| 22/22 [00:05<00:00,  4.15it/s]


/home/ns/collect_robomimic_demos/tazul/Lift_01_27_2024_12_28PM_tazul/videos/try_failed 25


100%|██████████| 25/25 [00:14<00:00,  1.68it/s]


In [11]:
dataset_path

'/home/ns/collect_robomimic_demos/tazul/Lift_01_27_2024_12_28PM_tazul/demo_image.hdf5'

In [12]:
demos2copy[:4]

['demo-False_8', 'demo-False_16', 'demo-False_1', 'demo-False_21']

In [13]:
new_demoss

['demo_101',
 'demo_102',
 'demo_103',
 'demo_104',
 'demo_105',
 'demo_106',
 'demo_107',
 'demo_108',
 'demo_109',
 'demo_110',
 'demo_111',
 'demo_112',
 'demo_113',
 'demo_114',
 'demo_115',
 'demo_116',
 'demo_117',
 'demo_118',
 'demo_119',
 'demo_120',
 'demo_121',
 'demo_122',
 'demo_123',
 'demo_124',
 'demo_125',
 'demo_126',
 'demo_127',
 'demo_128',
 'demo_129',
 'demo_130',
 'demo_131',
 'demo_132',
 'demo_133',
 'demo_134',
 'demo_135',
 'demo_136',
 'demo_137',
 'demo_138',
 'demo_139',
 'demo_140',
 'demo_141',
 'demo_142',
 'demo_143',
 'demo_144',
 'demo_145',
 'demo_146',
 'demo_147',
 'demo_148',
 'demo_149',
 'demo_150',
 'demo_151',
 'demo_152',
 'demo_153',
 'demo_154',
 'demo_155',
 'demo_156',
 'demo_157',
 'demo_158',
 'demo_159',
 'demo_160',
 'demo_161',
 'demo_162',
 'demo_163',
 'demo_164',
 'demo_165',
 'demo_166',
 'demo_167',
 'demo_168',
 'demo_169',
 'demo_170',
 'demo_171',
 'demo_172',
 'demo_173',
 'demo_174',
 'demo_175',
 'demo_176',
 'demo_177',

In [8]:
f_base.close()

In [14]:
demos_group= np.array(new_demoss, dtype='S16') 
demos_group

array([b'demo_101', b'demo_102', b'demo_103', b'demo_104', b'demo_105',
       b'demo_106', b'demo_107', b'demo_108', b'demo_109', b'demo_110',
       b'demo_111', b'demo_112', b'demo_113', b'demo_114', b'demo_115',
       b'demo_116', b'demo_117', b'demo_118', b'demo_119', b'demo_120',
       b'demo_121', b'demo_122', b'demo_123', b'demo_124', b'demo_125',
       b'demo_126', b'demo_127', b'demo_128', b'demo_129', b'demo_130',
       b'demo_131', b'demo_132', b'demo_133', b'demo_134', b'demo_135',
       b'demo_136', b'demo_137', b'demo_138', b'demo_139', b'demo_140',
       b'demo_141', b'demo_142', b'demo_143', b'demo_144', b'demo_145',
       b'demo_146', b'demo_147', b'demo_148', b'demo_149', b'demo_150',
       b'demo_151', b'demo_152', b'demo_153', b'demo_154', b'demo_155',
       b'demo_156', b'demo_157', b'demo_158', b'demo_159', b'demo_160',
       b'demo_161', b'demo_162', b'demo_163', b'demo_164', b'demo_165',
       b'demo_166', b'demo_167', b'demo_168', b'demo_169', b'dem

In [15]:
hdf5_path=dataset_path_base 

# filter_keys=sorted([elem for elem in demos_expert])
# filter_name="expert"
# filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

filter_keys=sorted([elem for elem in demos_group])
filter_name="failed"
filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

In [16]:
f_base.close()

In [4]:
demos2copy

NameError: name 'demos2copy' is not defined