In [3]:
import logging
from pathlib import Path

import torch

from lerobot.common.utils.utils import init_logging
from lerobot.configs.train import TrainPipelineConfig
from lerobot.configs.default import DatasetConfig, WandBConfig, EvalConfig
from lerobot.configs.types import PolicyFeature, FeatureType, NormalizationMode
from lerobot.common.policies.diffusion.configuration_diffusion import DiffusionConfig
from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
from lerobot.scripts.train import train

from lerobot.common.envs.factory import make_env_config # make_env

In [9]:
logging.info("Starting training for Diffusion Policy with keypoints")

# Define device
device = "cuda" if torch.cuda.is_available() else "cpu"
logging.info(f"Using device: {device}")

# Define output directory
output_dir = Path("../../outputs/train/diffusion_pusht_keypoints")
# output_dir.mkdir(parents=True, exist_ok=True)

In [10]:
# Define policy input and output features
input_features = {
    "observation.state": PolicyFeature(type=FeatureType.STATE, shape=(2,)),
    "observation.environment_state": PolicyFeature(type=FeatureType.ENV, shape=(16,))
}

output_features = {
    "action": PolicyFeature(type=FeatureType.ACTION, shape=(2,))
}

# Create normalization mapping
normalization_mapping = {
    "STATE": NormalizationMode.MIN_MAX,
    "ENV": NormalizationMode.MIN_MAX,
    "ACTION": NormalizationMode.MIN_MAX,
    "VISUAL": NormalizationMode.IDENTITY,
}

# Create the policy configuration
policy_config = DiffusionConfig(
    n_obs_steps=2,
    horizon=16,
    n_action_steps=8,
    input_features=input_features,
    output_features=output_features,
    normalization_mapping=normalization_mapping,
    beta_schedule="squaredcos_cap_v2",
    beta_start=0.0001,
    beta_end=0.02,
    num_train_timesteps=100,
    prediction_type="epsilon",
    device=device,
    state_backbone="MLP",
    state_encoder_block_channels=[64, 256],
    state_encoder_feature_dim=256,
    state_encoder_use_layernorm=True,
    # Training hyperparameters
    optimizer_lr=1e-4,
    optimizer_betas=(0.95, 0.999),
    optimizer_eps=1e-8,
    optimizer_weight_decay=1e-6,
    scheduler_name="cosine",
    scheduler_warmup_steps=500,
)

# Create environment config for evaluation
env_config = make_env_config(
    env_type="pusht",
    obs_type="environment_state_agent_pos",
)

# Create dataset config
dataset_config = DatasetConfig(
    repo_id="lerobot/pusht_keypoints"
)

# Create WandB config
wandb_config = WandBConfig(
    enable=True,  # Set to False if you don't want to use WandB
    project="diffusion-pusht-keypoints",
    entity="fiatlux",  # Your WandB username or team name
)

# Create eval config
eval_config = EvalConfig(
    n_episodes=10,
    batch_size=4,
)

In [5]:
# Create training pipeline config
train_config = TrainPipelineConfig(
    dataset=dataset_config,
    env=env_config,
    policy=policy_config,
    output_dir=output_dir,
    job_name="diffusion_pusht_keypoints",
    seed=1000,
    num_workers=4,
    batch_size=64,
    steps=1, # 50000,
    eval_freq=1, #5000,
    log_freq=1, #100,
    save_checkpoint=True,
    save_freq=1, #5000,
    use_policy_training_preset=True,
    eval=eval_config,
    wandb=wandb_config,
)

logging.info("Starting training with config:")

init_logging()
train(train_config)


The dataset you requested (lerobot/pusht_keypoints) is in 2.0 format.
While current version of LeRobot is backward-compatible with it, the version of your dataset still uses global
stats instead of per-episode stats. Update your dataset stats to the new format using this command:
```
python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py --repo-id=lerobot/pusht_keypoints
```

If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb)
or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose).

The dataset you requested (lerobot/pusht_keypoints) is in 2.0 format.
While current version of LeRobot is backward-compatible with it, the version of your dataset still uses global
stats instead of per-episode stats. Update your dataset stats to the new format using this command:
```
python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py --repo-id=lerobot/pusht_keypoints
```

If you encounter a proble

[1m[34mLogs will be synced with wandb.[0m


Resolving data files:   0%|          | 0/206 [00:00<?, ?it/s]

  logger.deprecation(
  logger.deprecation(
Stepping through eval batches: 100%|██████████| 3/3 [03:41<00:00, 73.79s/it, running_success_rate=0.0%]


In [13]:
last_checkpoint = f"{output_dir}/checkpoints/last/pretrained_model"
trained_policy = DiffusionPolicy.from_pretrained(last_checkpoint, config=policy_config) 
trained_policy.push_to_hub(
    "the-future-dev/diffusion_pusht_keypoints",
    commit_message="First epoch of training",
    private=False
)

Loading weights from local directory


model.safetensors:   0%|          | 0.00/1.05G [00:00<?, ?B/s]

KeyboardInterrupt: 

In [4]:
# del trained_policy
torch.cuda.empty_cache()