# Run a trained policy

This notebook will provide examples on how to run a trained policy and visualize the rollout.

In [2]:
import isaacgym
import isaacgymenvs
from hydra import compose, initialize_config_dir
from omegaconf import OmegaConf
from pathlib import Path

plt_root = Path("../../../policy_learning_toolkit/").expanduser()
igenvs_root = Path("~/diff_manip/external/IsaacGymEnvs").expanduser()

import argparse
import json
import h5py
import isaacgym
import imageio
import numpy as np
import os
from copy import deepcopy

import torch

import robomimic
import robomimic.utils.file_utils as FileUtils
import robomimic.utils.torch_utils as TorchUtils
import robomimic.utils.tensor_utils as TensorUtils
import robomimic.utils.obs_utils as ObsUtils
from robomimic.envs.env_base import EnvBase
from robomimic.algo import RolloutPolicy

import urllib.request


Importing module 'gym_38' (/home/krishnans/carbgym/python/isaacgym/_bindings/linux-x86_64/gym_38.so)
Setting GYM_USD_PLUG_INFO_PATH to /home/krishnans/carbgym/python/isaacgym/_bindings/linux-x86_64/usd/plugInfo.json
    No private macro file found!
    It is recommended to use a private macro file
    To setup, run: python /home/krishnans/ngc/robomimic/robomimic/scripts/setup_macros.py
)[0m


### Download policy checkpoint
First, let's try downloading a pretrained model from our model zoo.

In [2]:
# Get pretrained checkpooint from the model zoo

ckpt_path = "lift_ph_low_dim_epoch_1000_succ_100.pth"
# Lift (Proficient Human)
urllib.request.urlretrieve(
    "http://downloads.cs.stanford.edu/downloads/rt_benchmark/model_zoo/lift/bc_rnn/lift_ph_low_dim_epoch_1000_succ_100.pth",
    filename=ckpt_path
)

assert os.path.exists(ckpt_path)

In [3]:
ckpt_path = "../../bc_trained_models/test/20240403143734/models/model_epoch_2000.pth"

### Loading trained policy
We have a convenient function called `policy_from_checkpoint` that takes care of building the correct model from the checkpoint and load the trained weights. Of course you could also load the checkpoint manually.

In [4]:
device = TorchUtils.get_torch_device(try_to_use_cuda=True)

# restore policy
policy, ckpt_dict = FileUtils.policy_from_checkpoint(ckpt_path=ckpt_path, device=device, verbose=True)

{
    "algo_name": "bc",
    "experiment": {
        "name": "test",
        "validate": false,
        "logging": {
            "terminal_output_to_txt": true,
            "log_tb": true,
            "log_wandb": false,
            "wandb_proj_name": "debug"
        },
        "mse": {
            "enabled": false,
            "every_n_epochs": 50,
            "on_save_ckpt": true,
            "num_samples": 20,
            "visualize": true
        },
        "save": {
            "enabled": true,
            "every_n_seconds": null,
            "every_n_epochs": 50,
            "epochs": [],
            "on_best_validation": false,
            "on_best_rollout_return": false,
            "on_best_rollout_success_rate": true
        },
        "epoch_every_n_steps": 100,
        "validation_epoch_every_n_steps": 10,
        "env": null,
        "additional_envs": null,
        "render": false,
        "render_video": true,
        "keep_all_videos": false,
        "video_skip": 5,
  



### Creating rollout envionment
The policy checkpoint also contains sufficient information to recreate the environment that it's trained with. Again, you may manually create the environment.

In [5]:
# create environment from saved checkpoint
env, _ = FileUtils.env_from_checkpoint(
    ckpt_dict=ckpt_dict, 
    render=False, # we won't do on-screen rendering in the notebook
    render_offscreen=True, # render to RGB images for video
    verbose=True,
)

KeyError: 'type'

In [6]:
config_dir = str(igenvs_root / "isaacgymenvs" / "cfg")
overrides = ["task=ArticulateTaskSprayScissorsCamera", "test=true", "num_envs=100", 
             "train=ArticulateTaskPPONew"]
with initialize_config_dir(config_dir=config_dir, version_base="1.3"):
    cfg_expert = compose(config_name="config.yaml", overrides=overrides)
# task_cfg = OmegaConf.load(igenvs_root / "task" / "ArticulateTaskSprayScissors.yaml")

In [7]:
# OmegaConf.load("../../bc_trained_models/test/20240403143734/config.yaml")
# ("runs/articulate_scissors1_relac_expert/config.yaml")
env = isaacgymenvs.make(
            cfg_expert.seed,
            cfg_expert.task_name,
            cfg_expert.task.env.numEnvs,
            cfg_expert.sim_device,
            cfg_expert.rl_device,
            cfg_expert.graphics_device_id,
            cfg_expert.headless,
            cfg_expert.multi_gpu,
            cfg_expert.capture_video,
            cfg_expert.force_render,
            cfg_expert,
            # **kwargs,
        )


PyTorch version 2.0.1+cu118
Device count 1
/home/krishnans/carbgym/python/isaacgym/_bindings/src/gymtorch


Using /home/krishnans/.cache/torch_extensions/py38_cu118 as PyTorch extensions root...
Emitting ninja build file /home/krishnans/.cache/torch_extensions/py38_cu118/gymtorch/build.ninja...
Building extension module gymtorch...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module gymtorch...
2024-04-04 05:34:39,337 - INFO - logger - logger initialized


ninja: no work to do.


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  def get_axis_params(value, axis_idx, x_value=0., dtype=np.float, n_dims=3):


Error: FBX library failed to load - importing FBX data will not succeed. Message: No module named 'fbx'
FBX tools must be installed from https://help.autodesk.com/view/FBX/2020/ENU/?guid=FBX_Developer_Help_scripting_with_python_fbx_installing_python_fbx_html


  from collections import Mapping
  from collections import Mapping, Set, Iterable
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  (np.int, "int"), (np.int8, "int"),
  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  asset_options.default_dof_drive_mode = gymapi.DOF_MODE_POS


Not connected to PVD
+++ Using GPU PhysX
Physics Engine: PhysX
Physics Device: cuda:0
GPU Pipeline: enabled
Num hand dofs:  22
Using VHACD cache directory '/home/krishnans/.isaacgym/vhacd'
Found existing convex decomposition for mesh '/home/krishnans/diff_manip/external/IsaacGymEnvs/isaacgymenvs/tasks/../../assets/urdf/objects/spray_bottle/textured_objs/original-10.obj'
Found existing convex decomposition for mesh '/home/krishnans/diff_manip/external/IsaacGymEnvs/isaacgymenvs/tasks/../../assets/urdf/objects/spray_bottle/textured_objs/original-11.obj'
Found existing convex decomposition for mesh '/home/krishnans/diff_manip/external/IsaacGymEnvs/isaacgymenvs/tasks/../../assets/urdf/objects/spray_bottle/textured_objs/original-12.obj'
Found existing convex decomposition for mesh '/home/krishnans/diff_manip/external/IsaacGymEnvs/isaacgymenvs/tasks/../../assets/urdf/objects/spray_bottle/textured_objs/original-13.obj'
Found existing convex decomposition for mesh '/home/krishnans/diff_manip/ex

  return torch.tensor(x, dtype=dtype, device=device, requires_grad=requires_grad)
  return torch.tensor(x, dtype=dtype, device=device, requires_grad=requires_grad)


Obs dictionary: 
{'hand_joint_pos': [22], 'hand_joint_vel': [22], 'object_pos': [3], 'object_quat': [4], 'goal_pos': [3], 'goal_quat': [4], 'object_lin_vel': [3], 'object_ang_vel': [3], 'object_dof_pos': [1], 'goal_dof_pos': [1], 'hand_palm_pos': [3], 'hand_palm_quat': [4], 'object_type_one_hot': [7], 'object_instance_one_hot': [5], 'actions': [22], 'hand_camera': [64, 64, 3]}
hand_joint_pos: [22]
hand_joint_pos: torch.Size([100, 22])
hand_joint_vel: [22]
hand_joint_vel: torch.Size([100, 22])
object_pos: [3]
object_pos: torch.Size([100, 3])
object_quat: [4]
object_quat: torch.Size([100, 4])
goal_pos: [3]
goal_pos: torch.Size([100, 3])
goal_quat: [4]
goal_quat: torch.Size([100, 4])
object_lin_vel: [3]
object_lin_vel: torch.Size([100, 3])
object_ang_vel: [3]
object_ang_vel: torch.Size([100, 3])
object_dof_pos: [1]
object_dof_pos: torch.Size([100, 1])
goal_dof_pos: [1]
goal_dof_pos: torch.Size([100, 1])
hand_palm_pos: [3]
hand_palm_pos: torch.Size([100, 3])
hand_palm_quat: [4]
hand_palm_q

### Define the rollout loop
Now let's define the main rollout loop. The loop runs the policy to a target `horizon` and optionally writes the rollout to a video.

In [9]:
obs = env.reset()
obs['obs'].keys()
env.rollout_exceptions = ()
set(policy.policy.nets.policy.obs_shapes.keys()) - set(obs['obs'].keys())

{'goal_dof_pos_scaled', 'object_dof_pos_scaled', 'object_type'}

In [41]:
def rollout(policy, env, horizon, render=False, video_writer=None, video_skip=5, camera_names=None):
    """
    Helper function to carry out rollouts. Supports on-screen rendering, off-screen rendering to a video, 
    and returns the rollout trajectory.
    Args:
        policy (instance of RolloutPolicy): policy loaded from a checkpoint
        env (instance of EnvBase): env loaded from a checkpoint or demonstration metadata
        horizon (int): maximum horizon for the rollout
        render (bool): whether to render rollout on-screen
        video_writer (imageio writer): if provided, use to write rollout to video
        video_skip (int): how often to write video frames
        camera_names (list): determines which camera(s) are used for rendering. Pass more than
            one to output a video with multiple camera views concatenated horizontally.
    Returns:
        stats (dict): some statistics for the rollout - such as return, horizon, and task success
    """
    # assert isinstance(env, EnvBase)
    assert isinstance(policy, RolloutPolicy)
    assert not (render and (video_writer is not None))

    policy.start_episode()
    obs = env.reset()
    obs_keys = set(policy.policy.nets.policy.obs_shapes.keys())
    # state_dict = env.get_state()

    # hack that is necessary for robosuite tasks for deterministic action playback
    # obs = env.reset_to(state_dict)

    results = {}
    video_count = 0  # video frame counter
    total_reward = 0.
    try:
        for step_i in range(horizon):
            obs_dict = {}
            for k in obs_keys:
                if "camera" in k:
                    obs_dict[k] = env.obs_dict[k].permute(0, 3, 1, 2)
                    print(obs_dict[k].shape)
                else:
                    obs_dict[k] = env.obs_dict[k]

            # get action from policy
            act = policy(ob=obs_dict, batched=True)

            # play action
            next_obs, r, done, info = env.step(torch.tensor(act,device=env.device, dtype=torch.float))

            # compute reward
            total_reward += r
            success = info["success"] # env.is_success()["task"]

            # visualization
            if render:
                env.render(mode="human", camera_name=camera_names[0])
            if video_writer is not None:
                if video_count % video_skip == 0:
                    video_img = []
                    for cam_name in camera_names:
                        video_img.append(obs_dict[cam_name].cpu().numpy()[0])
                    video_img = np.concatenate(video_img, axis=1) # concatenate horizontally
                    video_writer.append_data(video_img)
                video_count += 1

            # break if done or if success
            if done or success:
                break

            # update for next iter
            obs = deepcopy(next_obs)
            state_dict = env.get_state()

    except env.rollout_exceptions as e:
        print("WARNING: got rollout exception {}".format(e))

    stats = dict(Return=total_reward, Horizon=(step_i + 1), Success_Rate=float(success))

    return stats


### Run the policy
Now let's rollout the policy!

In [42]:
rollout_horizon = 400
np.random.seed(0)
torch.manual_seed(0)
video_path = "rollout.mp4"
video_writer = imageio.get_writer(video_path, fps=20)

In [43]:
stats = rollout(
    policy=policy, 
    env=env, 
    horizon=rollout_horizon, 
    render=False, 
    video_writer=video_writer, 
    video_skip=5, 
    camera_names=["hand_camera"]
)
print(stats)
video_writer.close()

torch.Size([100, 3, 64, 64])


  return torch.tensor(x, dtype=dtype, device=device, requires_grad=requires_grad)


ValueError: Image must have 1, 2, 3 or 4 channels

### Visualize the rollout

In [None]:
from IPython.display import Video
Video(video_path)