In [1]:
import os
import json
import h5py
import numpy as np

import robomimic
import robomimic.utils.file_utils as FileUtils
import robomimic.utils.env_utils as EnvUtils
import robomimic.utils.obs_utils as ObsUtils
import imageio
import tqdm 
import glob
import argparse
from create_groups import create_hdf5_filter_key

In [2]:
dataset_path_base="/home/ns/collect_robomimic_demos/sojib_can_expert/PickPlaceCan_01_30_2024_01_45PM_sojib/can_mix_v0.hdf5"

In [3]:
f_base = h5py.File(dataset_path_base, "r+")
demos = list(f_base["data"].keys())

lengths=[]
demos_minmax={}
for demo_name in demos:
    demo=f_base['data'][demo_name]
    num_samples=demo.attrs['num_samples']
    lengths.append(num_samples)

    action=f_base['data'][demo_name]['actions']
    action=np.array(action) 
    demos_minmax[demo_name] = (np.min(action, axis=0), np.max(action, axis=0))


lengths=np.array(lengths)

print('Number of demos: ', len(demos))
print('Max length: ', np.max(lengths))
print('Min length: ', np.min(lengths))
print('Mean length: ', np.mean(lengths))
print('Median length: ', np.median(lengths))
print('')

for key in f_base['mask'].keys():
    print(key, f_base['mask'][key])

print('')
max_id=np.max([int(demo.split('_')[1]) for demo in demos])
print(f'max_id: {max_id}')

next_id=max_id+1
print(f'next_id: {next_id}')

Number of demos:  200
Max length:  704
Min length:  204
Mean length:  352.405
Median length:  326.0

expert <HDF5 dataset "expert": shape (100,), type "|S16">
good <HDF5 dataset "good": shape (100,), type "|S16">

max_id: 200
next_id: 201


In [4]:
f_base.close()

In [5]:
# del f_base['mask']['train']
# del f_base['mask']['valid']

In [4]:
def copy_demos(f1, f2, demos2copy, next_id):
    new_demos=[]
    i=next_id
    for demo_name in tqdm.tqdm(demos2copy):
        demo2=f2['data'][demo_name]
        num_samples= demo2.attrs['num_samples']
        model_file= demo2.attrs['model_file']

        bdemo_name="demo_"+str(i)
        new_demos.append(bdemo_name)
        i = i+1

        f1["data"].create_group(bdemo_name)


        ks=['actions', 'dones', 'rewards', 'states']
        for k in ks:
            kdata = demo2[k]
            f1["data"][bdemo_name].create_dataset(k, data=kdata)

        obs_keys =demo2['obs'].keys()
        next_obs_keys= demo['next_obs'].keys()
        for k in obs_keys:
            kdata = demo2['obs'][k]
            f1["data"][bdemo_name].create_dataset(f'obs/{k}', data=kdata)
        for k in next_obs_keys:
            kdata = demo2['next_obs'][k]
            f1["data"][bdemo_name].create_dataset(f'next_obs/{k}', data=kdata)

        f1["data"][bdemo_name].attrs["num_samples"] = num_samples
        f1["data"][bdemo_name].attrs["model_file"] = model_file

    # f.close()
    f2.close()
    return i , new_demos

In [5]:
dataset_paths=[
    "/home/ns/collect_robomimic_demos/Alyssa/PickPlaceCan_01_27_2024_04_35PM_AlyssaColandreo/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Benjamin/PickPlaceCan_01_29_2024_02_44PM_Benjamin/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Elbert/PickPlaceCan_01_26_2024_06_40PM_Elbert/demo_image.hdf5", 
    "/home/ns/collect_robomimic_demos/Emma/PickPlaceCan_01_27_2024_10_41AM_Emma/demo_image.hdf5", 
    "/home/ns/collect_robomimic_demos/James/PickPlaceCan_01_27_2024_03_37PM_JaJamesonDunsford/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Lancaster/PickPlaceCan_01_29_2024_01_39PM_lancaster/demo_image.hdf5", 
    "/home/ns/collect_robomimic_demos/Mia/PickPlaceCan_01_28_2024_10_12AM_Mia/demo_image.hdf5", 
    "/home/ns/collect_robomimic_demos/Nalene/PickPlaceCan_01_26_2024_05_17PM_nalene/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/Nusrat/PickPlaceCan_01_29_2024_12_03PM_Nusrat/demo_image.hdf5", 
    "/home/ns/collect_robomimic_demos/sumaya/PickPlaceCan_01_27_2024_01_30PM_sumayaakter/demo_image.hdf5",
    "/home/ns/collect_robomimic_demos/tazul/PickPlaceCan_01_27_2024_12_40PM_tazulislam/demo_image.hdf5"
]

In [6]:
group="try_failed" 
# group="hl"
# group="slow_timeout"

In [7]:
for dataset_path in dataset_paths:
    path=os.path.dirname(dataset_path)
    video_group=path+"/videos/"+group
    files=glob.glob(video_group+"/*.mp4")
    demos2copy = [os.path.basename(file).replace(".mp4", "") for file in files]
    

    print(video_group, len(demos2copy))
    
    f2 = h5py.File(dataset_path, "r")
    next_id, new_demos=copy_demos(f_base, f2, demos2copy, next_id)
    f2.close()
    
f_base.close()

/home/ns/collect_robomimic_demos/Alyssa/PickPlaceCan_01_27_2024_04_35PM_AlyssaColandreo/videos/try_failed 1


100%|██████████| 1/1 [00:01<00:00,  1.88s/it]


/home/ns/collect_robomimic_demos/Benjamin/PickPlaceCan_01_29_2024_02_44PM_Benjamin/videos/try_failed 1


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]


/home/ns/collect_robomimic_demos/Elbert/PickPlaceCan_01_26_2024_06_40PM_Elbert/videos/try_failed 0


0it [00:00, ?it/s]


/home/ns/collect_robomimic_demos/Emma/PickPlaceCan_01_27_2024_10_41AM_Emma/videos/try_failed 5


100%|██████████| 5/5 [00:06<00:00,  1.37s/it]


/home/ns/collect_robomimic_demos/James/PickPlaceCan_01_27_2024_03_37PM_JaJamesonDunsford/videos/try_failed 1


100%|██████████| 1/1 [00:05<00:00,  5.23s/it]


/home/ns/collect_robomimic_demos/Lancaster/PickPlaceCan_01_29_2024_01_39PM_lancaster/videos/try_failed 1


100%|██████████| 1/1 [00:07<00:00,  7.18s/it]


/home/ns/collect_robomimic_demos/Mia/PickPlaceCan_01_28_2024_10_12AM_Mia/videos/try_failed 8


100%|██████████| 8/8 [00:11<00:00,  1.45s/it]


/home/ns/collect_robomimic_demos/Nalene/PickPlaceCan_01_26_2024_05_17PM_nalene/videos/try_failed 0


0it [00:00, ?it/s]


/home/ns/collect_robomimic_demos/Nusrat/PickPlaceCan_01_29_2024_12_03PM_Nusrat/videos/try_failed 9


100%|██████████| 9/9 [00:05<00:00,  1.76it/s]


/home/ns/collect_robomimic_demos/sumaya/PickPlaceCan_01_27_2024_01_30PM_sumayaakter/videos/try_failed 15


 27%|██▋       | 4/15 [00:06<00:16,  1.51s/it]


OSError: [Errno 28] Can't write data (file write failed: time = Sat Feb  3 12:10:02 2024
, filename = '/home/ns/collect_robomimic_demos/sojib_can_expert/PickPlaceCan_01_30_2024_01_45PM_sojib/can_mix_v0.hdf5', file descriptor = 69, errno = 28, error message = 'No space left on device', buf = 0x8e1e4e4, total write size = 21870940, bytes this sub-write = 21870940, bytes actually written = 18446744073709551615, offset = 0)

In [34]:
demos2copy

['demo-False_16',
 'demo-False_1',
 'demo-False_12',
 'demo-False_24',
 'demo-False_17',
 'demo-False_9',
 'demo-False_13',
 'demo-False_6',
 'demo-False_2',
 'demo-False_3',
 'demo-False_11',
 'demo-False_18',
 'demo-False_15',
 'demo-False_4',
 'demo-False_10',
 'demo-False_22',
 'demo-False_19']

In [8]:
f_base.close()

In [None]:
for dataset_path in dataset_paths:
    print(dataset_path)
    f2 = h5py.File(dataset_path, "r")
    demos = list(f2["data"].keys())

    lengths=[]
    for demo_name in demos:
        demo=f2['data'][demo_name]
        num_samples=demo.attrs['num_samples']
        lengths.append(num_samples)

    lengths=np.array(lengths)

    # print('Number of demos: ', len(demos))
    # print('Max length: ', np.max(lengths))
    # print('Min length: ', np.min(lengths))
    # print('Mean length: ', np.mean(lengths))
    # print('Median length: ', np.median(lengths))

    key='good'
    print(key, f2['mask'][key])

    demos2copy=[demo.decode('utf-8') for demo in f2['mask'][key]] 
    print('to copy: ', len(demos2copy))


    next_id, new_demos=copy_demos(f, f2, demos2copy, next_id)

    f2.close()

f.close()