In [15]:
import h5py
import sys

def explore_hdf5(file_path):
    def print_attrs(name, obj):
        print(f"\nName: {name}")
        if isinstance(obj, h5py.Dataset):
            print(f"  Type: Dataset")
            print(f"  Shape: {obj.shape}")
            print(f"  Dtype: {obj.dtype}")
        elif isinstance(obj, h5py.Group):
            print(f"  Type: Group")
        # Print attributes if any
        if obj.attrs:
            print("  Attributes:")
            for key, val in obj.attrs.items():
                print(f"    {key}: {val}")

    with h5py.File(file_path, 'r') as f:
        print(f"Exploring file: {file_path}")
        f.visititems(print_attrs)

explore_hdf5(file_path="./desmontrations/chopsticks/episode_30.h5")

Exploring file: ./desmontrations/chopsticks/episode_30.h5

Name: action
  Type: Dataset
  Shape: (400, 7)
  Dtype: float32

Name: observations
  Type: Group

Name: observations/images
  Type: Group

Name: observations/images/cam_0
  Type: Dataset
  Shape: (396, 480, 640, 3)
  Dtype: uint8

Name: observations/images/cam_1
  Type: Dataset
  Shape: (396, 480, 640, 3)
  Dtype: uint8

Name: observations/qangl
  Type: Dataset
  Shape: (400, 7)
  Dtype: float32

Name: observations/qpos
  Type: Dataset
  Shape: (400, 7)
  Dtype: float32


In [None]:
import h5py
import os

folder_path = "./desmontrations/chopsticks"

# Loop through all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".hdf5"):
        file_path = os.path.join(folder_path, filename)
        try:
            with h5py.File(file_path, "a") as f:
                f.attrs['sim'] = False
                print(f"Updated: {filename}")
        except Exception as e:
            print(f"Failed to update {filename}: {e}")


Updated: episode_24.hdf5
Updated: episode_0.hdf5
Updated: episode_1.hdf5
Updated: episode_10.hdf5
Updated: episode_11.hdf5
Updated: episode_12.hdf5
Updated: episode_13.hdf5
Updated: episode_14.hdf5
Updated: episode_15.hdf5
Updated: episode_16.hdf5
Updated: episode_17.hdf5
Updated: episode_18.hdf5
Updated: episode_19.hdf5
Updated: episode_2.hdf5
Updated: episode_20.hdf5
Updated: episode_21.hdf5
Updated: episode_22.hdf5
Updated: episode_23.hdf5
Updated: episode_25.hdf5
Updated: episode_26.hdf5
Updated: episode_27.hdf5
Updated: episode_28.hdf5
Updated: episode_29.hdf5
Updated: episode_3.hdf5
Updated: episode_30.hdf5
Updated: episode_31.hdf5
Updated: episode_32.hdf5
Updated: episode_33.hdf5
Updated: episode_34.hdf5
Updated: episode_35.hdf5
Updated: episode_36.hdf5
Updated: episode_37.hdf5
Updated: episode_38.hdf5
Updated: episode_39.hdf5
Updated: episode_4.hdf5
Updated: episode_40.hdf5
Updated: episode_41.hdf5
Updated: episode_42.hdf5
Updated: episode_43.hdf5
Updated: episode_44.hdf5
Updat

In [18]:
import os
import h5py
import numpy as np

folder_path = './desmontrations/chopsticks'  # change to your actual path
target_len = 400

# Non-image datasets to modify
datasets_to_process = {
    "action": (target_len, 7),
    "observations/qangl": (target_len, 7),
    "observations/qpos": (target_len, 6),
}

def pad_or_truncate(data, target_shape):
    """Pad or truncate 2D (or 4D for images) data to target length."""
    current_len = data.shape[0]
    if current_len == target_shape[0]:
        return data  # Already correct length
    elif current_len > target_shape[0]:
        return data[:target_shape[0]]  # Truncate
    else:
        pad_len = target_shape[0] - current_len
        # Repeat last element along time axis
        padding = np.repeat(data[-1:], pad_len, axis=0)
        return np.concatenate([data, padding], axis=0)

for filename in os.listdir(folder_path):
    if filename.endswith(".hdf5"):
        filepath = os.path.join(folder_path, filename)
        try:
            with h5py.File(filepath, 'a') as f:
                # Handle state/action datasets
                for dset_path, shape in datasets_to_process.items():
                    if dset_path in f:
                        d = f[dset_path][()]
                        new_d = pad_or_truncate(d, shape)
                        del f[dset_path]  # Replace dataset
                        f.create_dataset(dset_path, data=new_d)
                        print(f"{filename}: Updated {dset_path}")
                    else:
                        print(f"{filename}: Missing dataset {dset_path}")

                # Handle image datasets
                if "observations/images" in f:
                    for cam_name in f["observations/images"]:
                        img_path = f"observations/images/{cam_name}"
                        d = f[img_path][()]
                        target_shape = (target_len,) + d.shape[1:]  # Preserve HWC
                        new_d = pad_or_truncate(d, target_shape)
                        del f[img_path]
                        f.create_dataset(img_path, data=new_d)
                        print(f"{filename}: Updated {img_path}")

                # Add attribute if needed
                f.attrs['sim'] = False
        except Exception as e:
            print(f"Error processing {filename}: {e}")


episode_24.hdf5: Updated action
episode_24.hdf5: Updated observations/qangl
episode_24.hdf5: Updated observations/qpos
episode_24.hdf5: Updated observations/images/cam_0
episode_24.hdf5: Updated observations/images/cam_1
episode_0.hdf5: Updated action
episode_0.hdf5: Updated observations/qangl
episode_0.hdf5: Updated observations/qpos
episode_0.hdf5: Updated observations/images/cam_0
episode_0.hdf5: Updated observations/images/cam_1
episode_1.hdf5: Updated action
episode_1.hdf5: Updated observations/qangl
episode_1.hdf5: Updated observations/qpos
episode_1.hdf5: Updated observations/images/cam_0
episode_1.hdf5: Updated observations/images/cam_1
episode_10.hdf5: Updated action
episode_10.hdf5: Updated observations/qangl
episode_10.hdf5: Updated observations/qpos
episode_10.hdf5: Updated observations/images/cam_0
episode_10.hdf5: Updated observations/images/cam_1
episode_11.hdf5: Updated action
episode_11.hdf5: Updated observations/qangl
episode_11.hdf5: Updated observations/qpos
episode

In [14]:
import numpy as np
import h5py

def expand_qpos_to_7(file_path):
    """
    Modifies the 'observations/qpos' dataset in the given .h5 file,
    expanding from shape (N, 6) to (N, 7) by appending 0s.
    """
    with h5py.File(file_path, 'a') as f:
        qpos_path = 'observations/qpos'
        
        if qpos_path not in f:
            print(f"{file_path}: 'observations/qpos' not found.")
            return
        
        qpos = f[qpos_path][()]  # Read the existing data
        if qpos.shape[1] == 7:
            print(f"{file_path}: 'qpos' already has shape (N, 7).")
            return
        
        if qpos.shape[1] != 6:
            print(f"{file_path}: 'qpos' shape is unexpected: {qpos.shape}")
            return
        
        # Append a column of zeros
        new_qpos = np.hstack([qpos, np.zeros((qpos.shape[0], 1), dtype=qpos.dtype)])
        
        # Delete and recreate the dataset
        del f[qpos_path]
        f.create_dataset(qpos_path, data=new_qpos)
        print(f"{file_path}: 'qpos' expanded to shape (N, 7)")

import os

folder_path = './desmontrations/chopsticks'

for fname in os.listdir(folder_path):
    if fname.endswith('.h5'):
        expand_qpos_to_7(os.path.join(folder_path, fname))

./desmontrations/chopsticks/episode_24.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_1.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_10.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_11.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_12.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_13.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_14.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_15.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_16.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_17.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_18.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_19.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks/episode_2.h5: 'qpos' expanded to shape (N, 7)
./desmontrations/chopsticks