In [4]:
import h5py
import sys

def explore_hdf5(file_path):
    def print_attrs(name, obj):
        print(f"\nName: {name}")
        if isinstance(obj, h5py.Dataset):
            print(f"  Type: Dataset")
            print(f"  Shape: {obj.shape}")
            print(f"  Dtype: {obj.dtype}")
        elif isinstance(obj, h5py.Group):
            print(f"  Type: Group")
        # Print attributes if any
        if obj.attrs:
            print("  Attributes:")
            for key, val in obj.attrs.items():
                print(f"    {key}: {val}")

    with h5py.File(file_path, 'r') as f:
        print(f"Exploring file: {file_path}")
        f.visititems(print_attrs)

explore_hdf5(file_path="./desmontrations/sortpen/episode_5.h5")

Exploring file: ./desmontrations/sortpen/episode_5.h5

Name: action
  Type: Dataset
  Shape: (464, 7)
  Dtype: float32

Name: observations
  Type: Group

Name: observations/images
  Type: Group

Name: observations/images/cam_0
  Type: Dataset
  Shape: (464, 480, 640, 3)
  Dtype: uint8

Name: observations/images/cam_1
  Type: Dataset
  Shape: (464, 480, 640, 3)
  Dtype: uint8

Name: observations/images/cam_2
  Type: Dataset
  Shape: (464, 480, 640, 3)
  Dtype: uint8

Name: observations/qangl
  Type: Dataset
  Shape: (464, 7)
  Dtype: float32

Name: observations/qpos
  Type: Dataset
  Shape: (464, 6)
  Dtype: float32


In [5]:
def explore_hdf5(file_path):


    with h5py.File(file_path, 'r') as f:
        print(f["action"][:,6])

explore_hdf5(file_path="./desmontrations/sortpen/episode_40.h5")

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1.

In [6]:
import os
import h5py
import numpy as np

folder_path = './desmontrations/sortpen'
target_len = 500

# Dataset (không phải ảnh) cần pad/truncate
datasets_to_process = {
    "action": (target_len, 7),
    "observations/qangl": (target_len, 7),
    "observations/qpos": (target_len, 6),  # sẽ được mở rộng thành 7 sau
}

def pad_or_truncate(data, target_shape):
    """Pad hoặc truncate dữ liệu (2D hoặc 4D cho ảnh) về target_shape."""
    current_len = data.shape[0]
    if current_len == target_shape[0]:
        return data
    elif current_len > target_shape[0]:
        return data[:target_shape[0]]
    else:
        pad_len = target_shape[0] - current_len
        padding = np.repeat(data[-1:], pad_len, axis=0)
        return np.concatenate([data, padding], axis=0)

def expand_qpos_to_7(file_path):
    """
    Mở rộng 'observations/qpos' từ (N,6) thành (N,7).
    Cột thứ 7 của qpos sẽ = cột thứ 7 của action (action[:,6]).
    """
    with h5py.File(file_path, 'a') as f:
        qpos_path = 'observations/qpos'
        action_path = 'action'

        if qpos_path not in f or action_path not in f:
            print(f"{file_path}: Thiếu qpos hoặc action.")
            return

        qpos = f[qpos_path][()]
        action = f[action_path][()]

        if qpos.shape[1] == 7:
            print(f"{file_path}: 'qpos' đã có dạng (N,7).")
            return
        if qpos.shape[1] != 6 or action.shape[1] < 7:
            print(f"{file_path}: Dạng qpos hoặc action không hợp lệ: {qpos.shape}, {action.shape}")
            return

        # Thêm cột thứ 7 = action[:,6]
        new_col = action[:, 6:7]  # giữ dạng (N,1)
        new_qpos = np.hstack([qpos, new_col])

        del f[qpos_path]
        f.create_dataset(qpos_path, data=new_qpos)
        print(f"{file_path}: 'qpos' đã mở rộng (N,7) với qpos[:,6] = action[:,6]")

# --- Đổi tên toàn bộ .h5 thành .hdf5 ---
for fname in os.listdir(folder_path):
    if fname.endswith('.h5'):
        old_path = os.path.join(folder_path, fname)
        new_path = os.path.join(folder_path, fname[:-3] + '.hdf5')
        os.rename(old_path, new_path)
        print(f"Renamed: {fname} -> {os.path.basename(new_path)}")

# --- Xử lý từng file .hdf5 ---
for filename in os.listdir(folder_path):
    if not filename.endswith('.hdf5'):
        continue

    filepath = os.path.join(folder_path, filename)
    try:
        with h5py.File(filepath, 'a') as f:
            # Pad/truncate các dataset chính
            for dset_path, shape in datasets_to_process.items():
                if dset_path in f:
                    d = f[dset_path][()]
                    new_d = pad_or_truncate(d, shape)
                    del f[dset_path]
                    f.create_dataset(dset_path, data=new_d)
                    print(f"{filename}: Updated {dset_path}")
                else:
                    print(f"{filename}: Missing dataset {dset_path}")

            # Pad/truncate các dataset ảnh
            if "observations/images" in f:
                for cam_name in f["observations/images"]:
                    img_path = f"observations/images/{cam_name}"
                    d = f[img_path][()]
                    target_shape = (target_len,) + d.shape[1:]
                    new_d = pad_or_truncate(d, target_shape)
                    del f[img_path]
                    f.create_dataset(img_path, data=new_d)
                    print(f"{filename}: Updated {img_path}")

            # Thêm attribute sim=False
            f.attrs['sim'] = False

        # Sau khi pad xong, mở rộng qpos lên 7 với qpos[:,6] = action[:,6]
        expand_qpos_to_7(filepath)

    except Exception as e:
        print(f"Error processing {filename}: {e}")


Renamed: episode_24.h5 -> episode_24.hdf5
Renamed: episode_0.h5 -> episode_0.hdf5
Renamed: episode_1.h5 -> episode_1.hdf5
Renamed: episode_10.h5 -> episode_10.hdf5
Renamed: episode_11.h5 -> episode_11.hdf5
Renamed: episode_12.h5 -> episode_12.hdf5
Renamed: episode_13.h5 -> episode_13.hdf5
Renamed: episode_14.h5 -> episode_14.hdf5
Renamed: episode_15.h5 -> episode_15.hdf5
Renamed: episode_16.h5 -> episode_16.hdf5
Renamed: episode_17.h5 -> episode_17.hdf5
Renamed: episode_18.h5 -> episode_18.hdf5
Renamed: episode_19.h5 -> episode_19.hdf5
Renamed: episode_2.h5 -> episode_2.hdf5
Renamed: episode_20.h5 -> episode_20.hdf5
Renamed: episode_21.h5 -> episode_21.hdf5
Renamed: episode_22.h5 -> episode_22.hdf5
Renamed: episode_23.h5 -> episode_23.hdf5
Renamed: episode_25.h5 -> episode_25.hdf5
Renamed: episode_26.h5 -> episode_26.hdf5
Renamed: episode_27.h5 -> episode_27.hdf5
Renamed: episode_28.h5 -> episode_28.hdf5
Renamed: episode_29.h5 -> episode_29.hdf5
Renamed: episode_3.h5 -> episode_3.hdf5
