## Steering the visual-based diffusion policy

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Remember to switch to a GPU environment. Only then click the play button below. This will install all the dependencies for the future code.

In [2]:
# setup vulkan
!mkdir -p /usr/share/vulkan/icd.d
!wget -q https://raw.githubusercontent.com/haosulab/ManiSkill/main/docker/nvidia_icd.json
!wget -q https://raw.githubusercontent.com/haosulab/ManiSkill/main/docker/10_nvidia.json
!mv nvidia_icd.json /usr/share/vulkan/icd.d
!mv 10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json
!apt-get install -y --no-install-recommends libvulkan-dev
# dependencies
!pip install --upgrade mani_skill tyro

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libvulkan-dev is already the newest version (1.3.204.1-2).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.


In [3]:
# Import required packages
import gymnasium as gym
from tqdm.notebook import tqdm
import numpy as np
import mani_skill.envs
import matplotlib.pyplot as plt

Download the demos.

In [4]:
!python -m mani_skill.utils.download_demo PushT-v1

Downloading demonstrations to /root/.maniskill/demos - 1/1, PushT-v1
35.7Mit [00:00, 42.2Mit/s]              


In [5]:
!ls /root/.maniskill/demos/PushT-v1/rl

ppo_pd_ee_delta_pos_ckpt.pt
ppo_pd_ee_delta_pose_ckpt.pt
ppo_pd_joint_delta_pos_ckpt.pt
sample_pd_ee_delta_pose.mp4
sample_pd_ee_delta_pos.mp4
sample_pd_joint_delta_pos.mp4
trajectory.none.pd_ee_delta_pose.physx_cuda.h5
trajectory.none.pd_ee_delta_pose.physx_cuda.json
trajectory.none.pd_ee_delta_pos.physx_cuda.h5
trajectory.none.pd_ee_delta_pos.physx_cuda.json
trajectory.none.pd_joint_delta_pos.physx_cuda.h5
trajectory.none.pd_joint_delta_pos.physx_cuda.json


In [6]:
%cd /content/drive/MyDrive/diffusionpolicy/

/content/drive/MyDrive/diffusionpolicy


In [7]:
!python -m mani_skill.trajectory.replay_trajectory \
  --traj-path ~/.maniskill/demos/PushT-v1/rl/trajectory.none.pd_ee_delta_pos.physx_cuda.h5 \
  --use-env-states -c pd_ee_delta_pos -o rgbd \
  --save-traj --num-envs 256 -b physx_cuda

  0% 0/888 [00:00<?, ?step/s]0step [00:00, ?step/s]Downloading PhysX GPU library to /root/.sapien/physx/105.1-physx-5.3.1.patch0 from Github. This can take several minutes. If it fails to download, please manually download fhttps://github.com/sapien-sim/physx-precompiled/releases/download/105.1-physx-5.3.1.patch0/linux-so.zip and unzip at /root/.sapien/physx/105.1-physx-5.3.1.patch0.
Download complete.
100% 888/888 [3:01:46<00:00, 12.28s/step]
  0% 0/888 [3:01:59<?, ?step/s]
Replayed 888 episodes, 799/888=89.98% demos saved


In [8]:
!ls /root/.maniskill/demos/PushT-v1/rl

ppo_pd_ee_delta_pos_ckpt.pt
ppo_pd_ee_delta_pose_ckpt.pt
ppo_pd_joint_delta_pos_ckpt.pt
sample_pd_ee_delta_pose.mp4
sample_pd_ee_delta_pos.mp4
sample_pd_joint_delta_pos.mp4
trajectory.none.pd_ee_delta_pose.physx_cuda.h5
trajectory.none.pd_ee_delta_pose.physx_cuda.json
trajectory.none.pd_ee_delta_pos.physx_cuda.h5
trajectory.none.pd_ee_delta_pos.physx_cuda.json
trajectory.none.pd_joint_delta_pos.physx_cuda.h5
trajectory.none.pd_joint_delta_pos.physx_cuda.json
trajectory.rgbd.pd_ee_delta_pos.physx_cuda.h5
trajectory.rgbd.pd_ee_delta_pos.physx_cuda.json


In [None]:
!zip -j /content/trajectory_rgbd_bundle.zip \
  /root/.maniskill/demos/PushT-v1/rl/trajectory.rgbd.pd_ee_delta_pos.physx_cuda.h5 \
  /root/.maniskill/demos/PushT-v1/rl/trajectory.rgbd.pd_ee_delta_pos.physx_cuda.json

  adding: trajectory.rgbd.pd_ee_delta_pos.physx_cuda.h5 (deflated 30%)
  adding: trajectory.rgbd.pd_ee_delta_pos.physx_cuda.json (deflated 94%)


In [None]:
from google.colab import files
files.download("/content/trajectory_rgbd_bundle.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import os
print(os.path.exists('/root/.maniskill/demos/PushT-v1/rl/trajectory.rgbd.pd_ee_delta_pos.physx_cuda.json'))
print(os.path.exists('/root/.maniskill/demos/PushT-v1/rl/trajectory.rgbd.pd_ee_delta_pos.physx_cuda.h5'))

True
True


In [None]:
import json

json_path = '/root/.maniskill/demos/PushT-v1/rl/trajectory.rgbd.pd_ee_delta_pos.physx_cuda.json'
with open(json_path, 'r') as f:
    data = json.load(f)
print('Top-level keys:', list(data.keys()))
for k in list(data.keys())[:3]:
    print(f'Key: {k}, Value (truncated):', str(data[k])[:300])

Top-level keys: ['env_info', 'commit_info', 'episodes']
Key: env_info, Value (truncated): {'env_id': 'PushT-v1', 'env_kwargs': {'num_envs': 256, 'reconfiguration_freq': 1, 'human_render_camera_configs': {'shader_pack': 'default'}, 'obs_mode': 'rgbd', 'render_mode': 'rgb_array', 'sim_backend': 'physx_cuda', 'control_mode': 'pd_ee_delta_pos', 'reward_mode': None}, 'max_episode_steps': 100}
Key: commit_info, Value (truncated): None
Key: episodes, Value (truncated): [{'episode_id': 0, 'episode_seed': 144360722, 'control_mode': 'pd_ee_delta_pos', 'elapsed_steps': 40, 'reset_kwargs': {}, 'success': True}, {'episode_id': 1, 'episode_seed': 821489836, 'control_mode': 'pd_ee_delta_pos', 'elapsed_steps': 52, 'reset_kwargs': {}, 'success': True}, {'episode_id': 2, 'ep


In [None]:
import h5py

h5_path = '/root/.maniskill/demos/PushT-v1/rl/trajectory.rgbd.pd_ee_delta_pos.physx_cuda.h5'
with h5py.File(h5_path, 'r') as f:
    print('Top-level keys:', list(f.keys()))

Top-level keys: ['traj_0', 'traj_1', 'traj_10', 'traj_100', 'traj_101', 'traj_102', 'traj_103', 'traj_104', 'traj_105', 'traj_106', 'traj_107', 'traj_108', 'traj_109', 'traj_11', 'traj_110', 'traj_111', 'traj_112', 'traj_113', 'traj_114', 'traj_115', 'traj_116', 'traj_117', 'traj_118', 'traj_119', 'traj_12', 'traj_120', 'traj_121', 'traj_122', 'traj_123', 'traj_124', 'traj_125', 'traj_126', 'traj_127', 'traj_128', 'traj_129', 'traj_13', 'traj_130', 'traj_131', 'traj_132', 'traj_133', 'traj_134', 'traj_135', 'traj_136', 'traj_137', 'traj_138', 'traj_139', 'traj_14', 'traj_140', 'traj_141', 'traj_142', 'traj_143', 'traj_144', 'traj_145', 'traj_146', 'traj_147', 'traj_148', 'traj_149', 'traj_15', 'traj_150', 'traj_151', 'traj_152', 'traj_153', 'traj_154', 'traj_155', 'traj_156', 'traj_157', 'traj_158', 'traj_159', 'traj_16', 'traj_160', 'traj_161', 'traj_162', 'traj_163', 'traj_164', 'traj_165', 'traj_166', 'traj_167', 'traj_168', 'traj_169', 'traj_17', 'traj_170', 'traj_171', 'traj_172',

Once you're done with the first run, provide details around the training process (such as training loss, hyperparameters, training time, VRAM usage). Document challenges.

And now you'll need to tweak the hyperparameters. The two levers you can pull are the prediction horizon and acting horizon of the diffusion policy.

In [None]:
seed=1
demos=100

# add your tensorboard code above

!python train_rgbd_steering.py --env-id PushT-v1 \
  --demo-path /root/.maniskill/demos/PushT-v1/rl/trajectory.rgbd.pd_ee_delta_pos.physx_cuda.h5 \
  --control-mode "pd_ee_delta_pos" --sim-backend "physx_cuda" --num-demos ${demos} --max_episode_steps 100 \
  --total_iters 30000 --num_demos 600 \
  --exp-name diffusion_policy-PushT-v1-rgbd-steer-${demos}_rl_demos-${seed} \
  --track # track training on wandb

2025-04-26 03:43:15.131249: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745638995.430292   62001 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745638995.506923   62001 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-26 03:43:16.138009: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
