In [23]:
import os

# 设置 HuggingFace 镜像和缓存路径
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
os.environ["HF_HOME"]  = "/zhouzhili/liber"

In [24]:
import torch 
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.streaming_dataset import StreamingLeRobotDataset

### delta_timestamps

1. 定义：delta_timestamps 用来为指定键取一段时间窗口的帧（过去/当前/未来），单位秒；会把样本从“单帧”变成“时间序列”（增加时间维度 T），并在越界时提供掩码，便于时序建模。
2. 为某些键指定“相对当前帧的时间偏移”（单位=秒）。负数=过去，0=当前，正数=未来。
3. 作用：让每个样本自带一段时间窗口的数据，给模型提供时序上下文（例如看过去几帧的图像/状态，以便预测当前或未来的动作）
4. 对张量形状的影响
* 不设置时：键的张量是常规形状，如图像为 (C, H, W)。
* 设置后：会在最前面多一个时间维度 T，如图像为 (T, C, H, W)，T 等于你给的偏移数量。

In [25]:
delta_timestamps = {
    "observation.images.up": [-0.2, -0.1, 0.0] 
}

## Batch

In [26]:
dataset = LeRobotDataset(
    repo_id="lerobot/svla_so101_pickplace",
    delta_timestamps=delta_timestamps
)

In [32]:
# fps
print(dataset.fps)

30


In [27]:
# 获取数据集中的第 100 帧
sample = dataset[100]

In [28]:
print(type(sample))
print("keys:", list(sample.keys()))

for k, v in sample.items():
    shape = getattr(v, "shape", None)
    print(f"{k:40s}  type={type(v).__name__}  shape={shape}")

<class 'dict'>
keys: ['observation.images.up', 'observation.images.side', 'action', 'observation.state', 'timestamp', 'frame_index', 'episode_index', 'index', 'task_index', 'observation.images.up_is_pad', 'task']
observation.images.up                     type=Tensor  shape=torch.Size([3, 3, 480, 640])
observation.images.side                   type=Tensor  shape=torch.Size([3, 480, 640])
action                                    type=Tensor  shape=torch.Size([6])
observation.state                         type=Tensor  shape=torch.Size([6])
timestamp                                 type=Tensor  shape=torch.Size([])
frame_index                               type=Tensor  shape=torch.Size([])
episode_index                             type=Tensor  shape=torch.Size([])
index                                     type=Tensor  shape=torch.Size([])
task_index                                type=Tensor  shape=torch.Size([])
observation.images.up_is_pad              type=Tensor  shape=torch.Size([3])

In [29]:
print(sample)

{'observation.images.up': tensor([[[[0.4941, 0.4941, 0.4941,  ..., 0.6745, 0.6745, 0.6745],
          [0.4980, 0.4980, 0.4980,  ..., 0.6745, 0.6745, 0.6745],
          [0.5098, 0.5098, 0.5098,  ..., 0.6745, 0.6745, 0.6745],
          ...,
          [0.6039, 0.6039, 0.6039,  ..., 0.5216, 0.5216, 0.5216],
          [0.6039, 0.6039, 0.6039,  ..., 0.5216, 0.5216, 0.5216],
          [0.6039, 0.6039, 0.6039,  ..., 0.5216, 0.5216, 0.5216]],

         [[0.5176, 0.5176, 0.5176,  ..., 0.6824, 0.6824, 0.6824],
          [0.5216, 0.5216, 0.5216,  ..., 0.6824, 0.6824, 0.6824],
          [0.5333, 0.5333, 0.5333,  ..., 0.6824, 0.6824, 0.6824],
          ...,
          [0.6196, 0.6196, 0.6196,  ..., 0.5216, 0.5216, 0.5216],
          [0.6196, 0.6196, 0.6196,  ..., 0.5216, 0.5216, 0.5216],
          [0.6196, 0.6196, 0.6196,  ..., 0.5216, 0.5216, 0.5216]],

         [[0.5216, 0.5216, 0.5216,  ..., 0.6824, 0.6824, 0.6824],
          [0.5255, 0.5255, 0.5255,  ..., 0.6824, 0.6824, 0.6824],
          [0.537

In [None]:
batch_size = 16
# 将数据集包装在 DataLoader 中，以便将其批量处理用于训练目的
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=batch_size,
)

In [None]:
# 在训练循环中迭代 DataLoader
num_epochs = 1
device = "cuda" if torch.cuda.is_available() else "cpu"

for epoch in range(num_epochs):
    for batch in data_loader:
        # 移动数据到适当的设备（例如 GPU）
        observations = batch["observation.state"].to(device)
        actions = batch["action"].to(device)
        
        # model.forward(batch)
        print(f"batch keys: {batch.keys()}")
        print(f"observations: {observations.shape}")
        print(f"actions: {actions.shape}")
        break

## Streaming

In [33]:
# 使用 StreamingLeRobotDataset 来避免下载数据集
# 从 Hugging Face Hub 流式传输帧，无需加载到内存中
streaming_dataset = StreamingLeRobotDataset(
    "lerobot/svla_so101_pickplace",
    delta_timestamps=delta_timestamps
)

In [34]:
# fps
print(streaming_dataset.fps)

30


In [35]:
streaming_sample = next(iter(streaming_dataset))
print(f"streaming_dataset keys: {streaming_sample.keys()}")

streaming_dataset keys: dict_keys(['action', 'observation.state', 'timestamp', 'frame_index', 'episode_index', 'index', 'task_index', 'observation.images.up', 'observation.images.side', 'observation.images.up_is_pad', 'task'])


In [22]:
streaming_img = streaming_sample["observation.images.up"]
streaming_img.shape

torch.Size([3, 3, 480, 640])