## Evaluating the visual-based diffusion policy on a hard episode, 250 times across 5 random seeds

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# setup vulkan
!mkdir -p /usr/share/vulkan/icd.d
!wget -q https://raw.githubusercontent.com/haosulab/ManiSkill/main/docker/nvidia_icd.json
!wget -q https://raw.githubusercontent.com/haosulab/ManiSkill/main/docker/10_nvidia.json
!mv nvidia_icd.json /usr/share/vulkan/icd.d
!mv 10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json
!apt-get install -y --no-install-recommends libvulkan-dev
# dependencies
!pip install --upgrade mani_skill tyro

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libvulkan-dev is already the newest version (1.3.204.1-2).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.


In [3]:
# Import required packages
import gymnasium as gym
from tqdm.notebook import tqdm
import numpy as np
import mani_skill.envs
import matplotlib.pyplot as plt

In [4]:
import sys
sys.path.append('/content/drive/MyDrive/diffusionpolicy')

In [5]:
from diffusion_policy.evaluate import evaluate
from diffusion_policy.make_env import make_eval_envs
from train_rgbd_2b import Agent, Args
import torch
import numpy as np

In [6]:
!ls /content/drive/MyDrive/diffusionpolicy/runs/diffusion_policy-PushT-v1-rgbd-2b-00_rl_demos-/checkpoints

best_eval_success_once.pt


In [17]:
ckpt_path = "/content/drive/MyDrive/diffusionpolicy/runs/diffusion_policy-PushT-v1-rgbd-2b-00_rl_demos-/checkpoints/best_eval_success_once.pt"
ckpt = torch.load(ckpt_path, map_location='cuda')

In [18]:
args = Args(
    env_id="PushT-v1",
    obs_mode="rgbd",
    control_mode="pd_ee_delta_pos",
    sim_backend="physx_cuda",
    num_eval_envs=10,
    max_episode_steps=100,
    obs_horizon=2
)

In [19]:
env_kwargs = dict(
      control_mode=args.control_mode,
      reward_mode="sparse",
      obs_mode=args.obs_mode,
      render_mode="rgb_array",
      human_render_camera_configs=dict(shader_pack="default")
)

other_kwargs = dict(obs_horizon=args.obs_horizon)

In [21]:
from mani_skill.utils.wrappers.flatten import FlattenRGBDObservationWrapper

envs = make_eval_envs(
    args.env_id,
    args.num_eval_envs,
    args.sim_backend,
    env_kwargs,
    other_kwargs,
    video_dir=None,
    wrappers=[FlattenRGBDObservationWrapper],
)

In [22]:
agent = Agent(envs, args)
agent.load_state_dict(ckpt["ema_agent"])
agent.eval()
agent = agent.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

number of parameters: 6.07M


In [24]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = agent.to(device)
agent.device = device

In [25]:
results = []
for seed in range(5):
    np.random.seed(seed)
    torch.manual_seed(seed)
    envs.reset(seed=seed)
    metrics = evaluate(
        n=250,
        agent=agent,
        eval_envs=envs,
        device=agent.device if hasattr(agent, 'device') else torch.device("cpu"),
        sim_backend=args.sim_backend,
        progress_bar=True
    )
    results.append(metrics)
    print(f"Seed {seed}: success_once mean={np.mean(metrics['success_once']):.4f}, "
          f"success_at_end mean={np.mean(metrics['success_at_end']):.4f}")


  0%|          | 0/250 [00:00<?, ?it/s][A
  4%|▍         | 10/250 [00:20<08:05,  2.02s/it][A
  8%|▊         | 20/250 [00:37<07:09,  1.87s/it][A
 12%|█▏        | 30/250 [00:55<06:39,  1.82s/it][A
 16%|█▌        | 40/250 [01:14<06:26,  1.84s/it][A
 20%|██        | 50/250 [01:32<06:09,  1.85s/it][A
 24%|██▍       | 60/250 [01:51<05:55,  1.87s/it][A
 28%|██▊       | 70/250 [02:10<05:35,  1.86s/it][A
 32%|███▏      | 80/250 [02:29<05:17,  1.87s/it][A
 36%|███▌      | 90/250 [02:47<04:59,  1.87s/it][A
 40%|████      | 100/250 [03:05<04:34,  1.83s/it][A
 44%|████▍     | 110/250 [03:23<04:17,  1.84s/it][A
 48%|████▊     | 120/250 [03:41<03:56,  1.82s/it][A
 52%|█████▏    | 130/250 [03:59<03:37,  1.81s/it][A
 56%|█████▌    | 140/250 [04:18<03:21,  1.83s/it][A
 60%|██████    | 150/250 [04:36<03:04,  1.84s/it][A
 64%|██████▍   | 160/250 [04:56<02:48,  1.87s/it][A
 68%|██████▊   | 170/250 [05:15<02:30,  1.88s/it][A
 72%|███████▏  | 180/250 [05:34<02:12,  1.89s/it][A
 76%|██████

Seed 0: success_once mean=0.0000, success_at_end mean=0.0000



  0%|          | 0/250 [00:00<?, ?it/s][A
  4%|▍         | 10/250 [00:19<07:39,  1.92s/it][A
  8%|▊         | 20/250 [00:36<06:59,  1.82s/it][A
 12%|█▏        | 30/250 [00:54<06:37,  1.81s/it][A
 16%|█▌        | 40/250 [01:12<06:19,  1.81s/it][A
 20%|██        | 50/250 [01:30<05:58,  1.79s/it][A
 24%|██▍       | 60/250 [01:48<05:42,  1.80s/it][A
 28%|██▊       | 70/250 [02:07<05:28,  1.83s/it][A
 32%|███▏      | 80/250 [02:26<05:15,  1.85s/it][A
 36%|███▌      | 90/250 [02:45<05:00,  1.88s/it][A
 40%|████      | 100/250 [03:04<04:42,  1.88s/it][A
 44%|████▍     | 110/250 [03:23<04:23,  1.88s/it][A
 48%|████▊     | 120/250 [03:40<03:59,  1.84s/it][A
 52%|█████▏    | 130/250 [03:58<03:39,  1.83s/it][A
 56%|█████▌    | 140/250 [04:17<03:21,  1.83s/it][A
 60%|██████    | 150/250 [04:35<03:03,  1.83s/it][A
 64%|██████▍   | 160/250 [04:54<02:47,  1.86s/it][A
 68%|██████▊   | 170/250 [05:13<02:28,  1.86s/it][A
 72%|███████▏  | 180/250 [05:32<02:11,  1.88s/it][A
 76%|██████

Seed 1: success_once mean=0.0000, success_at_end mean=0.0000



  0%|          | 0/250 [00:00<?, ?it/s][A
  4%|▍         | 10/250 [00:19<07:53,  1.97s/it][A
  8%|▊         | 20/250 [00:38<07:21,  1.92s/it][A
 12%|█▏        | 30/250 [00:57<06:58,  1.90s/it][A
 16%|█▌        | 40/250 [01:15<06:30,  1.86s/it][A
 20%|██        | 50/250 [01:33<06:09,  1.85s/it][A
 24%|██▍       | 60/250 [01:50<05:44,  1.81s/it][A
 28%|██▊       | 70/250 [02:08<05:23,  1.80s/it][A
 32%|███▏      | 80/250 [02:27<05:08,  1.81s/it][A
 36%|███▌      | 90/250 [02:44<04:47,  1.80s/it][A
 40%|████      | 100/250 [03:02<04:29,  1.80s/it][A
 44%|████▍     | 110/250 [03:20<04:11,  1.80s/it][A
 48%|████▊     | 120/250 [03:38<03:52,  1.79s/it][A
 52%|█████▏    | 130/250 [03:56<03:36,  1.80s/it][A
 56%|█████▌    | 140/250 [04:14<03:17,  1.79s/it][A
 60%|██████    | 150/250 [04:32<02:59,  1.79s/it][A
 64%|██████▍   | 160/250 [04:50<02:42,  1.81s/it][A
 68%|██████▊   | 170/250 [05:08<02:23,  1.80s/it][A
 72%|███████▏  | 180/250 [05:27<02:06,  1.81s/it][A
 76%|██████

Seed 2: success_once mean=0.0000, success_at_end mean=0.0000



  0%|          | 0/250 [00:00<?, ?it/s][A
  4%|▍         | 10/250 [00:19<07:50,  1.96s/it][A
  8%|▊         | 20/250 [00:37<07:09,  1.87s/it][A
 12%|█▏        | 30/250 [00:56<06:56,  1.89s/it][A
 16%|█▌        | 40/250 [01:14<06:28,  1.85s/it][A
 20%|██        | 50/250 [01:32<06:04,  1.82s/it][A
 24%|██▍       | 60/250 [01:51<05:49,  1.84s/it][A
 28%|██▊       | 70/250 [02:08<05:27,  1.82s/it][A
 32%|███▏      | 80/250 [02:27<05:09,  1.82s/it][A
 36%|███▌      | 90/250 [02:45<04:50,  1.81s/it][A
 40%|████      | 100/250 [03:03<04:31,  1.81s/it][A
 44%|████▍     | 110/250 [03:21<04:16,  1.83s/it][A
 48%|████▊     | 120/250 [03:39<03:55,  1.81s/it][A
 52%|█████▏    | 130/250 [03:57<03:36,  1.81s/it][A
 56%|█████▌    | 140/250 [04:15<03:19,  1.81s/it][A
 60%|██████    | 150/250 [04:33<02:59,  1.80s/it][A
 64%|██████▍   | 160/250 [04:51<02:43,  1.81s/it][A
 68%|██████▊   | 170/250 [05:09<02:24,  1.81s/it][A
 72%|███████▏  | 180/250 [05:27<02:06,  1.80s/it][A
 76%|██████

Seed 3: success_once mean=0.0000, success_at_end mean=0.0000



  0%|          | 0/250 [00:00<?, ?it/s][A
  4%|▍         | 10/250 [00:19<07:42,  1.93s/it][A
  8%|▊         | 20/250 [00:37<07:04,  1.85s/it][A
 12%|█▏        | 30/250 [00:54<06:39,  1.82s/it][A
 16%|█▌        | 40/250 [01:13<06:26,  1.84s/it][A
 20%|██        | 50/250 [01:31<06:03,  1.82s/it][A
 24%|██▍       | 60/250 [01:49<05:45,  1.82s/it][A
 28%|██▊       | 70/250 [02:07<05:27,  1.82s/it][A
 32%|███▏      | 80/250 [02:25<05:07,  1.81s/it][A
 36%|███▌      | 90/250 [02:44<04:51,  1.82s/it][A
 40%|████      | 100/250 [03:02<04:32,  1.82s/it][A
 44%|████▍     | 110/250 [03:20<04:12,  1.80s/it][A
 48%|████▊     | 120/250 [03:38<03:55,  1.81s/it][A
 52%|█████▏    | 130/250 [03:56<03:36,  1.80s/it][A
 56%|█████▌    | 140/250 [04:14<03:18,  1.80s/it][A
 60%|██████    | 150/250 [04:32<03:00,  1.81s/it][A
 64%|██████▍   | 160/250 [04:50<02:41,  1.80s/it][A
 68%|██████▊   | 170/250 [05:08<02:25,  1.81s/it][A
 72%|███████▏  | 180/250 [05:26<02:06,  1.80s/it][A
 76%|██████

Seed 4: success_once mean=0.0000, success_at_end mean=0.0000



