## Converting Existing LeRobot Datasets from v2.1 to v3.0

In [50]:
# Pre-release build with v3 support:
%pip install "https://github.com/huggingface/lerobot/archive/33cad37054c2b594ceba57463e8f11ee374fa93c.zip"

[0mCollecting https://github.com/huggingface/lerobot/archive/33cad37054c2b594ceba57463e8f11ee374fa93c.zip
  Using cached https://github.com/huggingface/lerobot/archive/33cad37054c2b594ceba57463e8f11ee374fa93c.zip
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[0m

In [2]:
# Step 1: Login to Hugging Face
import huggingface_hub
huggingface_hub.login(token="")

In [36]:
# Step 2: Download the dataset locally
from huggingface_hub import snapshot_download
from pathlib import Path

# Define local directory for the dataset
local_dataset_dir = Path("./datasets")
local_dataset_dir.mkdir(exist_ok=True)

# Download the v2.1 dataset
# TODO: Make repo_id the dataset repo id you want to convert
repo_id = "youliangtan/so101-table-cleanup"
local_path = snapshot_download(
    repo_id=repo_id,
    repo_type="dataset",
    revision="v2.1",  # Download the v2.1 version
    local_dir=local_dataset_dir / repo_id,
)

print(f"Dataset downloaded to: {local_path}")

Fetching 246 files:   0%|          | 0/246 [00:00<?, ?it/s]

Dataset downloaded to: /content/datasets/youliangtan/so101-table-cleanup


In [37]:
# Step 3: Run the conversion script
# Using the convert_dataset function with the appropriate arguments

from lerobot.datasets.v30.convert_dataset_v21_to_v30 import convert_dataset

# Convert the dataset in place and push to hub
convert_dataset(
    repo_id=repo_id,
    branch=None,  # Use main branch
    data_file_size_in_mb=None,  # Use defaults (100 MB for data)
    video_file_size_in_mb=None,  # Use defaults (500 MB for videos)
    root=str(local_dataset_dir),  # Use the local directory where we downloaded
    push_to_hub=False,  # Push the converted dataset to hub
    force_conversion=False,  # Don't force if v3.0 already exists
)

print("Conversion complete!")

Using local dataset at datasets/youliangtan/so101-table-cleanup


convert data files: 100%|██████████| 80/80 [00:00<00:00, 1997.09it/s]
convert videos of observation.images.front: 100%|██████████| 80/80 [00:01<00:00, 50.45it/s]
convert videos of observation.images.wrist: 100%|██████████| 80/80 [00:01<00:00, 46.44it/s]
convert videos: 100%|██████████| 80/80 [00:00<00:00, 157310.98it/s]


Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Conversion complete!


In [None]:
# NOTE: Replace dillondesilva/so101-table-cleanup with the 
# repo id you want to push to. Ensure you have logged in
# to huggingface using the command:
# huggingface-cli login
!huggingface-cli repo create dillondesilva/so101-table-cleanup --type dataset
!huggingface-cli upload dillondesilva/so101-table-cleanup /content/datasets/youliangtan/so101-table-cleanup --repo-type dataset --revision v3.0

[33mThe --type argument is deprecated and will be removed in a future version. Use --repo-type instead.[0m
Successfully created [1mdillondesilva/so101-table-cleanup[0m on the Hub.
Your repo is now available at [1mhttps://huggingface.co/datasets/dillondesilva/so101-table-cleanup[0m
Branch 'v3.0' not found. Creating it...
Start hashing 9 files.
Finished hashing 9 files.
Processing Files (0 / 0)      : |          |  0.00B /  0.00B            
New Data Upload               : |          |  0.00B /  0.00B            [A

  ...hunk-000/file-000.parquet: 100% 90.1k/90.1k [00:00<?, ?B/s][A[A


  ...hunk-000/file-000.parquet: 100% 1.41M/1.41M [00:00<?, ?B/s][A[A[A



  ...leanup/meta/tasks.parquet: 100% 2.27k/2.27k [00:00<?, ?B/s][A[A[A[A




  ...st/chunk-000/file-001.mp4:  51% 16.8M/33.1M [00:00<?, ?B/s][A[A[A[A[A





  ...nt/chunk-000/file-000.mp4:   8% 16.7M/209M [00:00<?, ?B/s][A[A[A[A[A[A






  ...st/chunk-000/file-000.mp4:   4% 8.38M/208M [00:00<?, ?B/s][A[A

In [None]:
# Step 4: Verify the conversion from your repo
from lerobot.datasets.lerobot_dataset import LeRobotDataset

# Load the converted dataset from your repo to verify
# Note: download_videos=True forces download from HuggingFace Hub
# instead of relying on local cache
your_repo_id = "dillondesilva/so101-table-cleanup"
dataset = LeRobotDataset(
    repo_id=your_repo_id,
    force_cache_sync=True
)

print(f"Dataset loaded successfully from {your_repo_id}!")
print(f"Dataset metadata: {dataset.meta}")

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

Dataset loaded successfully from dillondesilva/so101-table-cleanup!
Dataset Metadata: LeRobotDatasetMetadata({
    Repository ID: 'dillondesilva/so101-table-cleanup',
    Total episodes: '80',
    Total frames: '47513',
    Features: '['action', 'observation.state', 'observation.images.front', 'observation.images.wrist', 'timestamp', 'frame_index', 'episode_index', 'index', 'task_index']',
})',

