# Deploy Trained Policy

<img src="./media/rollout.gif" width="480" height="360">

Deploy trained policy in simulation.

In [1]:
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
import numpy as np
from lerobot.common.datasets.utils import write_json, serialize_dict
from lerobot.common.policies.act.configuration_act import ACTConfig
from lerobot.common.policies.act.modeling_act import ACTPolicy
from lerobot.configs.types import FeatureType
from lerobot.common.datasets.factory import resolve_delta_timestamps
from lerobot.common.datasets.utils import dataset_to_policy_features
import torch
from PIL import Image
import torchvision

## Load Policy

In [2]:
device = 'cuda'

In [3]:
dataset_metadata = LeRobotDatasetMetadata("omy_pnp", root='./demo_data')
features = dataset_to_policy_features(dataset_metadata.features)
output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION}
input_features = {key: ft for key, ft in features.items() if key not in output_features}
input_features.pop("observation.wrist_image")
# Policies are initialized with a configuration class, in this case `DiffusionConfig`. For this example,
# we'll just use the defaults and so no arguments other than input/output features need to be passed.
# Temporal ensemble to make smoother trajectory predictions
cfg = ACTConfig(input_features=input_features, output_features=output_features, chunk_size= 10, n_action_steps=1, temporal_ensemble_coeff = 0.9)
delta_timestamps = resolve_delta_timestamps(cfg, dataset_metadata)
# We can now instantiate our policy with this config and the dataset stats.
policy = ACTPolicy.from_pretrained('./ckpt/act_y', config = cfg, dataset_stats=dataset_metadata.stats)
policy.to(device)



Loading weights from local directory


ACTPolicy(
  (normalize_inputs): Normalize(
    (buffer_observation_image): ParameterDict(
        (mean): Parameter containing: [torch.cuda.FloatTensor of size 3x1x1 (cuda:0)]
        (std): Parameter containing: [torch.cuda.FloatTensor of size 3x1x1 (cuda:0)]
    )
    (buffer_observation_state): ParameterDict(
        (mean): Parameter containing: [torch.cuda.FloatTensor of size 6 (cuda:0)]
        (std): Parameter containing: [torch.cuda.FloatTensor of size 6 (cuda:0)]
    )
  )
  (normalize_targets): Normalize(
    (buffer_action): ParameterDict(
        (mean): Parameter containing: [torch.cuda.FloatTensor of size 7 (cuda:0)]
        (std): Parameter containing: [torch.cuda.FloatTensor of size 7 (cuda:0)]
    )
  )
  (unnormalize_outputs): Unnormalize(
    (buffer_action): ParameterDict(
        (mean): Parameter containing: [torch.cuda.FloatTensor of size 7 (cuda:0)]
        (std): Parameter containing: [torch.cuda.FloatTensor of size 7 (cuda:0)]
    )
  )
  (model): ACT(
    (v

## Load Environment

In [4]:
from mujoco_env.y_env import SimpleEnv
xml_path = './asset/example_scene_y.xml'
PnPEnv = SimpleEnv(xml_path, action_type='joint_angle')

/home/ubuntu/projects/lerobot-mujoco-tutorial/asset/example_scene_y.xml
['agentview', 'topview', 'sideview', 'egocentric']
name:[Tabletop] dt:[0.002] HZ:[500]
n_qpos:[24] n_qvel:[22] n_qacc:[22] n_ctrl:[10]

n_body:[21]
 [0/21] [world] mass:[0.00]kg
 [1/21] [front_object_table] mass:[1.00]kg
 [2/21] [camera] mass:[0.00]kg
 [3/21] [camera2] mass:[0.00]kg
 [4/21] [camera3] mass:[0.00]kg
 [5/21] [link1] mass:[2.06]kg
 [6/21] [link2] mass:[3.68]kg
 [7/21] [link3] mass:[2.39]kg
 [8/21] [link4] mass:[1.40]kg
 [9/21] [link5] mass:[1.40]kg
 [10/21] [link6] mass:[0.65]kg
 [11/21] [camera_center] mass:[0.00]kg
 [12/21] [tcp_link] mass:[0.32]kg
 [13/21] [rh_p12_rn_r1] mass:[0.07]kg
 [14/21] [rh_p12_rn_r2] mass:[0.02]kg
 [15/21] [rh_p12_rn_l1] mass:[0.07]kg
 [16/21] [rh_p12_rn_l2] mass:[0.02]kg
 [17/21] [body_obj_mug_5] mass:[0.00]kg
 [18/21] [object_mug_5] mass:[0.08]kg
 [19/21] [body_obj_plate_11] mass:[0.00]kg
 [20/21] [object_plate_11] mass:[0.10]kg
body_total_mass:[13.27]kg

n_geom:[83]
geom_

## Roll-Out Your Policy

In [6]:
step = 0
PnPEnv.reset(seed=0)
policy.reset()
policy.eval()
save_image = True
img_transform = torchvision.transforms.ToTensor()
while PnPEnv.env.is_viewer_alive():
    PnPEnv.step_env()
    if PnPEnv.env.loop_every(HZ=20):
        # Check if the task is completed
        success = PnPEnv.check_success()
        if success:
            print('Success')
            # Reset the environment and action queue
            policy.reset()
            PnPEnv.reset(seed=0)
            step = 0
            save_image = False
        # Get the current state of the environment
        state = PnPEnv.get_ee_pose()
        # Get the current image from the environment
        image, wirst_image = PnPEnv.grab_image()
        image = Image.fromarray(image)
        image = image.resize((256, 256))
        image = img_transform(image)
        wrist_image = Image.fromarray(wirst_image)
        wrist_image = wrist_image.resize((256, 256))
        wrist_image = img_transform(wrist_image)
        data = {
            'observation.state': torch.tensor([state]).to(device),
            'observation.image': image.unsqueeze(0).to(device),
            'observation.wrist_image': wrist_image.unsqueeze(0).to(device),
            'task': ['Put mug cup on the plate'],
            'timestamp': torch.tensor([step/20]).to(device)
        }
        # Select an action
        action = policy.select_action(data)
        action = action[0].cpu().detach().numpy()
        # Take a step in the environment
        _ = PnPEnv.step(action)
        PnPEnv.render()
        step += 1
        success = PnPEnv.check_success()
        if success:
            print('Success')
            break

DONE INITIALIZATION


  'observation.state': torch.tensor([state]).to(device),
