In [1]:
import os
import json
import h5py
import numpy as np

import robomimic
import robomimic.utils.file_utils as FileUtils
import robomimic.utils.env_utils as EnvUtils
import robomimic.utils.obs_utils as ObsUtils
import imageio
import tqdm
from robomimic.utils.file_utils import create_hdf5_filter_key
import shutil

In [2]:
dataset_path = "/home/ns1254/data_robomimic/sawyer/drawyer_push86v4.h5" 

In [3]:
# f.close()

In [4]:
f = h5py.File(dataset_path, "r")
demos = list(f["data"].keys())

In [5]:
lengths=[]
for demo_name in demos:
    demo=f['data'][demo_name]
    num_samples=demo.attrs['num_samples']
    lengths.append(num_samples)

lengths=np.array(lengths)

print('Number of demos: ', len(demos))
print('Max length: ', np.max(lengths))
print('Min length: ', np.min(lengths))
print('Mean length: ', np.mean(lengths))

Number of demos:  172
Max length:  234
Min length:  14
Mean length:  59.22093023255814


In [6]:
for key in f['mask'].keys():
    print(key, f['mask'][key])

50g10i <HDF5 dataset "50g10i": shape (60,), type "|S8">
50g10i_val <HDF5 dataset "50g10i_val": shape (60,), type "|S8">
50g10p <HDF5 dataset "50g10p": shape (60,), type "|S8">
50g10p_val <HDF5 dataset "50g10p_val": shape (60,), type "|S8">
50g10s <HDF5 dataset "50g10s": shape (60,), type "|S8">
50g10s_val <HDF5 dataset "50g10s_val": shape (60,), type "|S8">
good <HDF5 dataset "good": shape (50,), type "|S8">
inefficient <HDF5 dataset "inefficient": shape (12,), type "|S8">
push <HDF5 dataset "push": shape (11,), type "|S8">
short <HDF5 dataset "short": shape (13,), type "|S8">


In [7]:
gp=[b.decode('utf-8') for b in f['mask']['50g10p'] ]
gp[-5:], len(gp)

(['demo_69', 'demo_70', 'demo_71', 'demo_72', 'demo_73'], 60)

In [8]:
demo_name='demo_72'
action=f['data'][demo_name]['actions']
action=np.array(action)
np.min(action), np.max(action)

(-5.892637205834761, 1.0)

In [9]:
mins=[]
maxs=[]
for demo_name in demos:
    action=f['data'][demo_name]['actions']
    action=np.array(action) 
    mins.append(np.min(action))
    maxs.append(np.max(action))
np.min(mins), np.max(maxs)

(-5.897678637081707, 1.0)

In [10]:
data=f['data'][demo_name]
data

<HDF5 group "/data/demo_9" (3 members)>

In [11]:
data['obs'].keys()

<KeysViewHDF5 ['robot0_eef_pos', 'robot0_eye_in_hand_image', 'robot0_joint_pos', 'robot0_joint_vel']>

In [12]:
v=data['obs']['robot0_joint_vel']
v=np.array(v)
np.min(v), np.max(v)

(-0.152, 0.232)

In [13]:
mins=[]
maxs=[]
for demo_name in demos:
    data=f['data'][demo_name]
    v=data['obs']['robot0_joint_vel']
    v=np.array(v)
    mins.append(np.min(v))
    maxs.append(np.max(v))

np.min(mins), np.max(maxs)

(-1.666, 1.402)

In [14]:
def normalize_vel(data, min_val=-1.7, max_val=1.7):
    v_norm = 2 * ((data - min_val) / (max_val - min_val)) - 1
    return v_norm

def unnormalize_vel(data, min_val=-1.7, max_val=1.7):
    v_unnorm = ((data + 1) / 2) * (max_val - min_val) + min_val
    return v_unnorm

In [15]:
demo=f['data'][demo_name]
demo.keys(), demo['obs'].keys()

(<KeysViewHDF5 ['actions', 'obs', 'time']>,
 <KeysViewHDF5 ['robot0_eef_pos', 'robot0_eye_in_hand_image', 'robot0_joint_pos', 'robot0_joint_vel']>)

In [16]:
jvels=demo['obs']['robot0_joint_vel']
jvels=np.array(jvels)
jvels_norm=normalize_vel(jvels)

jvels.min(), jvels.max()

(-0.152, 0.232)

In [17]:
jvels_norm.min(), jvels_norm.max()

(-0.0894117647058823, 0.13647058823529412)

In [18]:
jvels_unnorm=unnormalize_vel(jvels_norm)
jvels_unnorm.min(), jvels_unnorm.max()  

(-0.1519999999999999, 0.23199999999999998)

In [19]:
jvels.shape

(61, 7)

In [22]:
len(demos)

172

In [11]:
demos=[b.decode('utf-8') for b in f['mask']['50g10p_val'] ]

In [12]:
mins=[]
maxs=[]
for demo_name in tqdm.tqdm(demos):
    demo_id=int(demo_name.split('_')[1])
    if demo_id<100:
        continue
    b_demo=f['data'][demo_name]
    actions=b_demo['actions']
    actions=np.array(actions)
    mins.append(np.min(actions))
    maxs.append(np.max(actions))

np.min(mins), np.max(maxs)

100%|██████████| 60/60 [00:00<00:00, 5844.63it/s]


(-0.8500000000000001, 0.5229411764705882)

In [13]:
f.close()

In [21]:
i=100
for demo_name in tqdm.tqdm(demos):
    b_demo=f['data'][demo_name]
    num_samples=b_demo.attrs['num_samples']
    # model_file=b_demo.attrs['model_file']

    jvels=b_demo['obs']['robot0_joint_vel']
    jvels=np.array(jvels)
    jvels_norm=normalize_vel(jvels)           #use as action. 


    bdemo_name="demo_"+str(i)
    i = i+1

    f["data"].create_group(bdemo_name) 
    
    f["data"][bdemo_name].create_dataset('actions', data=jvels_norm) 
    f["data"][bdemo_name].create_dataset('time', data=b_demo['time'])

  
    obs_keys =b_demo['obs'].keys() 
    for k in obs_keys:
        kdata = b_demo['obs'][k]
        f["data"][bdemo_name].create_dataset(f'obs/{k}', data=kdata)
     
    f["data"][bdemo_name].attrs["num_samples"] = num_samples
    # f["data"][bdemo_name].attrs["model_file"] = model_file

f.close() 

100%|██████████| 86/86 [00:01<00:00, 73.15it/s] 


### create groups

In [22]:
f = h5py.File(dataset_path, "r+")
demos = list(f["data"].keys())

In [23]:
lengths=[]
for demo_name in demos:
    demo=f['data'][demo_name]
    num_samples=demo.attrs['num_samples']
    lengths.append(num_samples)

lengths=np.array(lengths)

print('Number of demos: ', len(demos))
print('Max length: ', np.max(lengths))
print('Min length: ', np.min(lengths))
print('Mean length: ', np.mean(lengths))

Number of demos:  172
Max length:  234
Min length:  14
Mean length:  59.22093023255814


In [24]:
# f.close()
f['mask'].keys()

<KeysViewHDF5 ['50g10i', '50g10p', '50g10s', 'good', 'inefficient', 'push', 'short']>

In [33]:
group_name='50g10s'
group_name='50g10p'
group_name='50g10i'

gp=[b.decode('utf-8') for b in f['mask'][group_name] ]
gp[-5:], len(gp)

(['demo_80', 'demo_81', 'demo_82', 'demo_83', 'demo_84'], 60)

In [34]:
vgp=[]
for demo_name in gp:
    demo_no=int(demo_name.split('_')[1])
    vgp.append(f'demo_{100+demo_no}')

vgp[-5:], len(vgp)

(['demo_180', 'demo_181', 'demo_182', 'demo_183', 'demo_184'], 60)

In [35]:
keys=np.array(vgp, dtype='S8')

In [36]:
hdf5_path=dataset_path
filter_keys=vgp
filter_name=f"{group_name}_val"
filter_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filter_keys, key_name=filter_name)

In [37]:
f.close()

### verify

In [38]:
f = h5py.File(dataset_path, "r")
demos = list(f["data"].keys())

f['mask'].keys()

<KeysViewHDF5 ['50g10i', '50g10i_val', '50g10p', '50g10p_val', '50g10s', '50g10s_val', 'good', 'inefficient', 'push', 'short']>

In [39]:
len(demos)

172

In [40]:
demo

<Closed HDF5 group>

In [41]:
gp=[b.decode('utf-8') for b in f['mask']['50g10s'] ]
vgp=[b.decode('utf-8') for b in f['mask']['50g10s_val'] ]

gp[-5:], len(gp), vgp[-5:], len(vgp)

(['demo_56', 'demo_57', 'demo_58', 'demo_59', 'demo_60'],
 60,
 ['demo_156', 'demo_157', 'demo_158', 'demo_159', 'demo_160'],
 60)

In [42]:
demo_name='demo_156'
data=f['data'][demo_name]

In [43]:
v=data['actions'] 
v=np.array(v)
np.min(v), np.max(v)

(-0.5247058823529411, 0.13411764705882345)

In [44]:
len(demos)

172

In [45]:
demos=demos[86:]
len(demos)

86

In [46]:
mins=[]
maxs=[]
for demo_name in demos:
    data=f['data'][demo_name]
    v=data['actions'] 
    v=np.array(v)
    mins.append(np.min(v))
    maxs.append(np.max(v))

np.min(mins), np.max(maxs)

(-5.897678637081707, 1.0)

In [47]:
mins=[]
maxs=[]
for demo_name in demos:
    data=f['data'][demo_name]
    v=data['actions'] 
    v=np.array(v)
    v=unnormalize_vel(v)
    mins.append(np.min(v))
    maxs.append(np.max(v))

np.min(mins), np.max(maxs)

(-10.026053683038901, 1.7)