In [1]:
import os

os.environ["HF_LEROBOT_HOME"] = "/mnt/sda/lwh/openpi/.cache"

import shutil

from lerobot.common.datasets.lerobot_dataset import HF_LEROBOT_HOME
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
import tensorflow_datasets as tfds

REPO_NAME = "tmp/libero"  # Name of the output dataset, also used for the Hugging Face Hub
RAW_DATASET_NAMES = [
    "libero_10_no_noops",
    "libero_goal_no_noops",
    "libero_object_no_noops",
    "libero_spatial_no_noops",
]  # For simplicity we will combine multiple Libero datasets into one training dataset

In [2]:
# Clean up any existing dataset in the output directory
output_path = HF_LEROBOT_HOME / REPO_NAME
if output_path.exists():
    shutil.rmtree(output_path)
# Create LeRobot dataset, define features to store
# OpenPi assumes that proprio is stored in `state` and actions in `action`
# LeRobot assumes that dtype of image data is `image`
dataset = LeRobotDataset.create(
    repo_id=REPO_NAME,
    robot_type="panda",
    fps=10,
    features={
        "image": {
            "dtype": "image",
            "shape": (256, 256, 3),
            "names": ["height", "width", "channel"],
        },
        "wrist_image": {
            "dtype": "image",
            "shape": (256, 256, 3),
            "names": ["height", "width", "channel"],
        },
        "state": {
            "dtype": "float32",
            "shape": (8,),
            "names": ["state"],
        },
        "actions": {
            "dtype": "float32",
            "shape": (7,),
            "names": ["actions"],
        },
    },
    image_writer_threads=10,
    image_writer_processes=5,
)

In [None]:
# Loop over raw Libero datasets and write episodes to the LeRobot dataset
# You can modify this for your own data format
data_dir = "/mnt/sda/datasets/modified_libero_rlds"
for raw_dataset_name in RAW_DATASET_NAMES:
    raw_dataset = tfds.load(raw_dataset_name, data_dir=data_dir, split="train")
    for episode in raw_dataset:
        for step in episode["steps"].as_numpy_iterator():
            dataset.add_frame(
                {
                    "image": step["observation"]["image"],
                    "wrist_image": step["observation"]["wrist_image"],
                    "state": step["observation"]["state"],
                    "actions": step["action"],
                    "task": step["language_instruction"].decode(),
                }
            )
        dataset.save_episode()