<a href="https://colab.research.google.com/github/simpler-env/SimplerEnv/blob/main/example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SimplerEnv: Simulated Manipulation Policy Evaluation for Real-World Robots

- Project page: <https://simpler-env.github.io/>
- Code: <https://github.com/simpler-env/SimplerEnv>

## Installation


## Create a Simulated Environment and Take Random Actions

In [9]:
import simpler_env
from simpler_env.utils.env.observation_utils import get_image_from_maniskill2_obs_dict
import mediapy
import sapien.core as sapien

task_name = "google_robot_open_drawer"  # @param ["google_robot_pick_coke_can", "google_robot_move_near", "google_robot_open_drawer", "google_robot_close_drawer", "widowx_spoon_on_towel", "widowx_carrot_on_plate", "widowx_stack_cube", "widowx_put_eggplant_in_basket"]

if 'env' in locals():
  print("Closing existing env")
  env.close()
  del env
env = simpler_env.make(task_name)
# Colab GPU does not supoort denoiser
sapien.render_config.rt_use_denoiser = False
obs, reset_info = env.reset()
instruction = env.get_language_instruction()
print("Reset info", reset_info)
print("Instruction", instruction)

frames = []
done, truncated = False, False
while not (done or truncated):
   # action[:3]: delta xyz; action[3:6]: delta rotation in axis-angle representation;
   # action[6:7]: gripper (the meaning of open / close depends on robot URDF)
   image = get_image_from_maniskill2_obs_dict(env, obs)
   action = env.action_space.sample() # replace this with your policy inference
   obs, reward, done, truncated, info = env.step(action)
   frames.append(image)

episode_stats = info.get('episode_stats', {})
print("Episode stats", episode_stats)
mediapy.show_video(frames, fps=10)

Closing existing env
Reset info {'scene_name': 'dummy_drawer', 'scene_offset': None, 'scene_pose': None, 'scene_table_height': 0.87, 'urdf_version': 'recolor_tabletop_visual_matching_2', 'rgb_overlay_path': '/home/lakomchik/projects/smiles_2024/SimplerEnv/ManiSkill2_real2sim/data/real_inpainting/open_drawer_a0.png', 'rgb_overlay_cameras': ['overhead_camera'], 'rgb_overlay_mode': 'background', 'disable_bad_material': True, 'drawer_pose_wrt_robot_base': Pose([0.947, 0.00899999, 0.36095], [0, 3.72529e-09, 0, -1]), 'cabinet_pose_wrt_robot_base': Pose([0.947, 0.00899999, -0.0620503], [0, 0, 0, -1]), 'station_name': 'mk_station_recolor', 'light_mode': 'simple'}
Instruction open middle drawer
Episode stats OrderedDict([('qpos', '0.000')])


0
This browser does not support the video tag.


## Run Inference on Simulated Environments

In [10]:
# @title Setup

import os
import numpy as np
import simpler_env
from simpler_env.utils.env.observation_utils import get_image_from_maniskill2_obs_dict
import mediapy

os.makedirs('checkpoints', exist_ok=True)


RT_1_CHECKPOINTS = {
    "rt_1_x": "rt_1_x_tf_trained_for_002272480_step",
    "rt_1_400k": "rt_1_tf_trained_for_000400120",
    "rt_1_58k": "rt_1_tf_trained_for_000058240",
    "rt_1_1k": "rt_1_tf_trained_for_000001120",
}


def get_rt_1_checkpoint(name, ckpt_dir="./checkpoints"):
  assert name in RT_1_CHECKPOINTS, name
  ckpt_name = RT_1_CHECKPOINTS[name]
  ckpt_path = os.path.join(ckpt_dir, ckpt_name)
  if not os.path.exists(ckpt_path):
    if name == "rt_1_x":
      !gsutil -m cp -r gs://gdm-robotics-open-x-embodiment/open_x_embodiment_and_rt_x_oss/{ckpt_name}.zip {ckpt_dir}
      !unzip {ckpt_dir}/{ckpt_name}.zip -d {ckpt_dir}
    else:
      !gsutil -m cp -r gs://gdm-robotics-open-x-embodiment/open_x_embodiment_and_rt_x_oss/{ckpt_name} {ckpt_dir}
  return ckpt_path

In [15]:
# @title Select your model and environment

task_name = "google_robot_open_drawer"  # @param ["google_robot_pick_coke_can", "google_robot_move_near", "google_robot_open_drawer", "google_robot_close_drawer", "widowx_spoon_on_towel", "widowx_carrot_on_plate", "widowx_stack_cube", "widowx_put_eggplant_in_basket"]

if 'env' in locals():
  print("Closing existing env")
  env.close()
  del env
env = simpler_env.make(task_name)

# Note: we turned off the denoiser as the colab kernel will crash if it's turned on
# To use the denoiser, please git clone our SIMPLER environments
# and perform evaluations locally.
sapien.render_config.rt_use_denoiser = False

obs, reset_info = env.reset()
instruction = env.get_language_instruction()
print("Reset info", reset_info)
print("Instruction", instruction)

if "google" in task_name:
  policy_setup = "google_robot"
else:
  policy_setup = "widowx_bridge"

Closing existing env
Reset info {'scene_name': 'dummy_drawer', 'scene_offset': None, 'scene_pose': None, 'scene_table_height': 0.87, 'urdf_version': 'recolor_tabletop_visual_matching_2', 'rgb_overlay_path': '/home/lakomchik/projects/smiles_2024/SimplerEnv/ManiSkill2_real2sim/data/real_inpainting/open_drawer_a0.png', 'rgb_overlay_cameras': ['overhead_camera'], 'rgb_overlay_mode': 'background', 'disable_bad_material': True, 'drawer_pose_wrt_robot_base': Pose([0.947, 0.00899999, 0.36095], [0, 3.72529e-09, 0, -1]), 'cabinet_pose_wrt_robot_base': Pose([0.947, 0.00899999, -0.0620503], [0, 0, 0, -1]), 'station_name': 'mk_station_recolor', 'light_mode': 'simple'}
Instruction open middle drawer


In [16]:
# @title Select your model and environment

model_name = "rt_1_x" # @param ["rt_1_x", "rt_1_400k", "rt_1_58k", "rt_1_1k", "octo-base", "octo-small"]

if "rt_1" in model_name:
  from simpler_env.policies.rt1.rt1_model import RT1Inference

  ckpt_path = get_rt_1_checkpoint(model_name)
  model = RT1Inference(saved_model_path=ckpt_path, policy_setup=policy_setup)
elif "octo" in model_name:
  from simpler_env.policies.octo.octo_model import OctoInference

  model = OctoInference(model_type=model_name, policy_setup=policy_setup, init_rng=0)
else:
  raise ValueError(model_name)




In [17]:
#@title Run inference

obs, reset_info = env.reset()
instruction = env.get_language_instruction()
model.reset(instruction)
print(instruction)

image = get_image_from_maniskill2_obs_dict(env, obs)  # np.ndarray of shape (H, W, 3), uint8
images = [image]
predicted_terminated, success, truncated = False, False, False
timestep = 0
while not (predicted_terminated or truncated):
    # step the model; "raw_action" is raw model action output; "action" is the processed action to be sent into maniskill env
    raw_action, action = model.step(image)
    predicted_terminated = bool(action["terminate_episode"][0] > 0)
    obs, reward, success, truncated, info = env.step(
        np.concatenate([action["world_vector"], action["rot_axangle"], action["gripper"]])
    )
    print(timestep, info)
    # update image observation
    image = get_image_from_maniskill2_obs_dict(env, obs)
    images.append(image)
    timestep += 1

episode_stats = info.get("episode_stats", {})
print(f"Episode success: {success}")

open top drawer
0 {'elapsed_steps': 1, 'success': False, 'qpos': 4.321821e-15, 'episode_stats': OrderedDict([('qpos', '0.000')])}
1 {'elapsed_steps': 2, 'success': False, 'qpos': 5.2316766e-15, 'episode_stats': OrderedDict([('qpos', '0.000')])}
2 {'elapsed_steps': 3, 'success': False, 'qpos': 5.2316766e-15, 'episode_stats': OrderedDict([('qpos', '0.000')])}
3 {'elapsed_steps': 4, 'success': False, 'qpos': 5.2316766e-15, 'episode_stats': OrderedDict([('qpos', '0.000')])}
4 {'elapsed_steps': 5, 'success': False, 'qpos': 5.2316766e-15, 'episode_stats': OrderedDict([('qpos', '0.000')])}
5 {'elapsed_steps': 6, 'success': False, 'qpos': 5.2316766e-15, 'episode_stats': OrderedDict([('qpos', '0.000')])}
6 {'elapsed_steps': 7, 'success': False, 'qpos': 5.2316766e-15, 'episode_stats': OrderedDict([('qpos', '0.000')])}
7 {'elapsed_steps': 8, 'success': False, 'qpos': 5.2316766e-15, 'episode_stats': OrderedDict([('qpos', '0.000')])}
8 {'elapsed_steps': 9, 'success': False, 'qpos': 5.2316766e-15, '

In [18]:
print(task_name, model_name)
mediapy.show_video(images, fps=10)

google_robot_open_drawer rt_1_x


0
This browser does not support the video tag.


## Gallery

In [7]:
# @markdown RT-1-X close drawer
print(task_name, model_name)
mediapy.show_video(images, fps=10)
# Note: we turned off the denoiser as the colab kernel will crash if it's turned on
# To use the denoiser, please git clone our SIMPLER environments
# and perform evaluations locally.

google_robot_pick_coke_can rt_1_x


0
This browser does not support the video tag.
