In [1]:
from pathlib import Path
import json
import numpy as np
import torch
import imageio
import gym_pusht  # noqa: F401
import gymnasium as gym
import matplotlib.pyplot as plt

from lerobot.scripts.eval import eval_policy
from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
from lerobot.common.policies.diffusion.configuration_diffusion import DiffusionConfig
from lerobot.configs.types import PolicyFeature, FeatureType, NormalizationMode
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset


In [2]:
device = "cpu"

output_directory = Path("../../outputs/eval/diffusion_pusht_keypoints")
output_directory.mkdir(parents=True, exist_ok=True)

videos_dir = output_directory / "videos"
videos_dir.mkdir(parents=True, exist_ok=True)

In [3]:
dataset = LeRobotDataset("lerobot/pusht_keypoints")

The dataset you requested (lerobot/pusht_keypoints) is in 2.0 format.
While current version of LeRobot is backward-compatible with it, the version of your dataset still uses global
stats instead of per-episode stats. Update your dataset stats to the new format using this command:
```
python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py --repo-id=lerobot/pusht_keypoints
```

If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb)
or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose).



Resolving data files:   0%|          | 0/206 [00:00<?, ?it/s]

In [4]:
model_dir = Path("../../outputs/train/diffusion_pusht_keypoints")
# policy_config = DiffusionConfig.from_pretrained(pretrained_name_or_path=f"{model_dir}/checkpoints/last/pretrained_model", device="cpu")

In [5]:
policy = DiffusionPolicy.from_pretrained(f"{model_dir}/checkpoints/last/pretrained_model")
policy.to(device)

# print(policy)
print("\n=== Policy Configuration ===")
print(f"Input features: {policy.config.input_features}")
print(f"Output features: {policy.config.output_features}")
print(f"Image features: {policy.config.image_features}")
print(f"Observation steps: {policy.config.n_obs_steps}")
print(f"Action steps: {policy.config.n_action_steps}")
print(f"Horizon: {policy.config.horizon}")

Loading weights from local directory

=== Policy Configuration ===
Input features: {'observation.state': PolicyFeature(type=<FeatureType.STATE: 'STATE'>, shape=(2,)), 'observation.environment_state': PolicyFeature(type=<FeatureType.ENV: 'ENV'>, shape=(16,))}
Output features: {'action': PolicyFeature(type=<FeatureType.ACTION: 'ACTION'>, shape=(2,))}
Image features: {}
Observation steps: 2
Action steps: 8
Horizon: 16


In [6]:
env = gym.make(
    "gym_pusht/PushT-v0",
    obs_type="environment_state_agent_pos",
    max_episode_steps=300,
)
print("Environment observation space:", env.observation_space)
print("Environment action space:", env.action_space)

  logger.deprecation(


Environment observation space: Dict('agent_pos': Box(0.0, 512.0, (2,), float64), 'environment_state': Box(0.0, 512.0, (16,), float64), 'goal_state': Box(0.0, 512.0, (16,), float64))
Environment action space: Box(0.0, 512.0, (2,), float32)


### Pusht-v0 - keypoints env

**Env outputs**: env.observation_space
- "environment_state" => shape (16,)
- "goal_state" => shape (16,)
- "agent_pos" => shape (2,)

**Env inputs**: env.action_space
- shape (2,)

### Diffusion Policy

**Policy inputs**: policy.config.input_features
- "observation.environment_state" => (32,)
- "observation.state" => shape (2,)

**Policy outputs**: policy.config.output_features
- "action" => shape (2,)



In [7]:
# SINGLE ENVIRONMENT ROLLOUT #################################################################
policy.reset()
numpy_observation, info = env.reset(seed=10000)

# Prepare to collect every rewards and all the frames of the episode,
# from initial state to final state.
rewards = []
frames = []

# Render frame of the initial state
frames.append(env.render())

step = 0
done = False

while not done:
    state = torch.from_numpy(numpy_observation["agent_pos"])
    env_state = torch.from_numpy(numpy_observation["environment_state"])
    # env_state = torch.cat([env_state, 
    #                        torch.from_numpy(numpy_observation["goal_state"])], dim=0)
    
    state = state.to(torch.float32).to(device, non_blocking=True)
    env_state = env_state.to(torch.float32).to(device, non_blocking=True)
    
    # Batch dimension
    state = state.unsqueeze(0)
    env_state = env_state.unsqueeze(0)

    policy_input = {
        "observation.state": state,
        "observation.environment_state": env_state
    }

    with torch.inference_mode():
        action = policy.select_action(policy_input)

    numpy_action = action.squeeze(0).to("cpu").numpy()

    numpy_observation, reward, terminated, truncated, info = env.step(numpy_action)
    # print(f"{step=} {reward=} {terminated=}")

    rewards.append(reward)
    frames.append(env.render())

    done = terminated | truncated | done
    step += 1

if terminated:
    print("Success!")
else:
    print("Failure!")

# Get the speed of environment (i.e. its number of frames per second).
fps = env.metadata["render_fps"]

# Encode all frames into a mp4 video.
video_path = output_directory / "single_eval_rollout.mp4"
imageio.mimsave(str(video_path), np.stack(frames), fps=fps)
print(f"Video of the evaluation is available in '{video_path}'.")


Success!




Video of the evaluation is available in '../../outputs/eval/diffusion_pusht_keypoints/single_eval_rollout.mp4'.


In [None]:
# BATCHED ROLLOUT for policy evaluation #####################################################################

# Configure the number of environments and episodes
n_envs = 3  # Number of parallel environments
n_episodes = 3  # Total number of episodes to evaluate
start_seed = 50  # Starting seed

# Create a vectorized environment
env_config = make_env_config(
    env_type="pusht",
    obs_type="pixels_agent_pos",
    render_mode="rgb_array",

)
# print(env_config)

# Create the vectorized environment
env = make_env(
    env_config, 
    n_envs=n_envs,
    use_async_envs=True  # Using AsyncVectorEnv for better performance
)

print(f"Created vectorized environment with {n_envs} parallel environments")
print(f"Running evaluation for {n_episodes} episodes starting from seed {start_seed}")


In [None]:
eval_results = eval_policy(
    env=env,
    policy=policy,
    n_episodes=n_episodes,
    max_episodes_rendered=10,  # Only render 1 video
    videos_dir=videos_dir,
    return_episode_data=False,
    start_seed=start_seed,
)

# Close the environment
env.close()

In [None]:
# Print the aggregated metrics
print("\n=== Evaluation Results ===")
print(f"Average sum reward: {eval_results['aggregated']['avg_sum_reward']:.4f}")
print(f"Average max reward: {eval_results['aggregated']['avg_max_reward']:.4f}")
print(f"Success rate: {eval_results['aggregated']['pc_success']:.2f}%")
print(f"Total evaluation time: {eval_results['aggregated']['eval_s']:.2f} seconds")
print(f"Average time per episode: {eval_results['aggregated']['eval_ep_s']:.2f} seconds")

# If you want to analyze per-episode results
success_by_episode = [ep["success"] for ep in eval_results["per_episode"]]
rewards_by_episode = [ep["sum_reward"] for ep in eval_results["per_episode"]]

plt.figure(figsize=(12, 5))

# Plot rewards
plt.subplot(1, 2, 1)
plt.plot(rewards_by_episode)
plt.title('Rewards by Episode')
plt.xlabel('Episode')
plt.ylabel('Sum Reward')

# Plot success
plt.subplot(1, 2, 2)
plt.plot([int(s) for s in success_by_episode])
plt.title('Success by Episode')
plt.xlabel('Episode')
plt.ylabel('Success Rate')
plt.ylim(-0.1, 1.1)

plt.tight_layout()
plt.savefig(output_directory / "evaluation_results.png")
plt.show()

# Save the evaluation results to a file
with open(output_directory / "eval_results.json", "w") as f:
    json.dump(eval_results, f, indent=2)

# Print video path if any videos were generated
if "video_paths" in eval_results and eval_results["video_paths"]:
    print(f"\nGenerated video is available at: {eval_results['video_paths'][0]}")
else:
    print("No videos generated")


In [17]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [18]:
policy.save_pretrained(
    "the-future-dev/diffusion-pusht-keypoints",
    push_to_hub=True,
    commit_message="DiffusionPolicy for Pusht trained with keypoints of the current T position and the end T position"
)

model.safetensors:   0%|          | 0.00/1.05G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/the-future-dev/diffusion-pusht-keypoints/commit/2294bc4519685ceebe9ae97f8576e56d9d9db65f', commit_message='DiffusionPolicy for Pusht trained with keypoints of the current T position and the end T position', commit_description='', oid='2294bc4519685ceebe9ae97f8576e56d9d9db65f', pr_url=None, repo_url=RepoUrl('https://huggingface.co/the-future-dev/diffusion-pusht-keypoints', endpoint='https://huggingface.co', repo_type='model', repo_id='the-future-dev/diffusion-pusht-keypoints'), pr_revision=None, pr_num=None)

In [None]:
del policy
torch.cuda.empty_cache()