# Deploy Trained Smolvla Policy

<img src="./media/rollout3.gif" width="480" height="360">

Deploy trained policy in simulation.
# ========================================
# 8.smolvla.ipynb - SmolVLA策略部署与测试
# ========================================
# 功能：加载训练好的SmolVLA模型，并在MuJoCo仿真环境中进行实际部署测试
# SmolVLA = Small Vision-Language-Action Model（小型视觉-语言-动作模型）

In [None]:
# Cell 1 - 设置环境变量(必须第一个运行)
import os

# 1. 设置DISPLAY
os.environ['DISPLAY'] = ':0'
os.environ['XAUTHORITY'] = os.path.expanduser('~/.Xauthority')
print(f"✓ DISPLAY设置为: {os.environ['DISPLAY']}")

# 2. 强制使用GPU渲染(关键!)
os.environ['MUJOCO_GL'] = 'egl'  # EGL后端GPU加速
print(f"✓ MUJOCO_GL: egl (GPU硬件加速)")

# 3. NVIDIA GPU优化
os.environ['__GL_SYNC_TO_VBLANK'] = '0'  # 关闭垂直同步
os.environ['__GL_YIELD'] = 'NOTHING'      # 减少CPU等待
print("✓ NVIDIA GPU优化已启用")

# 4. OpenGL性能优化
os.environ['__GL_FSAA_MODE'] = '0'        # 关闭抗锯齿
os.environ['__GL_LOG_MAX_ANISO'] = '0'    # 关闭各向异性过滤
print("✓ OpenGL性能优化已启用")

In [None]:
!pip install transformers==4.50.3
!pip install num2words
!pip install accelerate
!pip install safetensors>=0.4.3

### [Optional] Download Dataset

In [None]:
'''
If you want to use the collected dataset, please download it from Hugging Face.
'''
#!git clone https://huggingface.co/datasets/Jeongeun/omy_pnp_language

In [None]:
!huggingface-cli download Jeongeun/omy_pnp_language --repo-type dataset --local-dir ./demo_data_language


## Step 2. Train Model

In [None]:
#!python train_model.py --config_path smolvla_omy.yaml
from train_with_monitor_cell import train_with_monitor
train_with_monitor("smolvla_omy.yaml")

## Step 3. Deploy

In [None]:
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
import numpy as np
from lerobot.common.datasets.utils import write_json, serialize_dict
from lerobot.common.policies.smolvla.configuration_smolvla import SmolVLAConfig
from lerobot.common.policies.smolvla.modeling_smolvla import SmolVLAPolicy
from lerobot.configs.types import FeatureType
from lerobot.common.datasets.factory import resolve_delta_timestamps
from lerobot.common.datasets.utils import dataset_to_policy_features
import torch
from PIL import Image
import torchvision

In [None]:
device = 'cuda' 

In [None]:
try:
    dataset_metadata = LeRobotDatasetMetadata("omy_pnp_language", root='./demo_data_language')
except:
    dataset_metadata = LeRobotDatasetMetadata("omy_pnp_language", root='./omy_pnp_language')
features = dataset_to_policy_features(dataset_metadata.features)
output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION}
input_features = {key: ft for key, ft in features.items() if key not in output_features}
# Policies are initialized with a configuration class, in this case `DiffusionConfig`. For this example,
# we'll just use the defaults and so no arguments other than input/output features need to be passed.
# Temporal ensemble to make smoother trajectory predictions
cfg = SmolVLAConfig(input_features=input_features, output_features=output_features, chunk_size= 5, n_action_steps=5)
delta_timestamps = resolve_delta_timestamps(cfg, dataset_metadata)

In [None]:
# We can now instantiate our policy with this config and the dataset stats.
policy = SmolVLAPolicy.from_pretrained('ckpt/smolvla_omy_20251103_140619/checkpoints/last/pretrained_model', dataset_stats=dataset_metadata.stats)
# You can load the trained policy from hub if you don't have the resources to train it.
# policy = SmolVLAPolicy.from_pretrained("Jeongeun/omy_pnp_pi0", config=cfg, dataset_stats=dataset_metadata.stats)
policy.to(device)



In [None]:
from mujoco_env.y_env2 import SimpleEnv2
xml_path = './asset/example_scene_y2.xml'
PnPEnv = SimpleEnv2(xml_path, action_type='joint_angle')

In [None]:
from torchvision import transforms
# Approach 1: Using torchvision.transforms
def get_default_transform(image_size: int = 224):
    """
    Returns a torchvision transform that:
     Converts to a FloatTensor and scales pixel values [0,255] -> [0.0,1.0]
    """
    return transforms.Compose([
        transforms.ToTensor(),  # PIL [0–255] -> FloatTensor [0.0–1.0], shape C×H×W
    ])

In [None]:
step = 0
PnPEnv.reset(seed=0)
policy.reset()
policy.eval()
save_image = True
IMG_TRANSFORM = get_default_transform()
while PnPEnv.env.is_viewer_alive():
    PnPEnv.step_env()
    if PnPEnv.env.loop_every(HZ=20):
        # Check if the task is completed
        success = PnPEnv.check_success()
        if success:
            print('Success')
            # Reset the environment and action queue
            policy.reset()
            PnPEnv.reset()
            step = 0
            save_image = False
        # Get the current state of the environment
        state = PnPEnv.get_joint_state()[:6]
        # Get the current image from the environment
        image, wirst_image = PnPEnv.grab_image()
        image = Image.fromarray(image)
        image = image.resize((256, 256))
        image = IMG_TRANSFORM(image)
        wrist_image = Image.fromarray(wirst_image)
        wrist_image = wrist_image.resize((256, 256))
        wrist_image = IMG_TRANSFORM(wrist_image)
        data = {
            'observation.state': torch.tensor([state]).to(device),
            'observation.image': image.unsqueeze(0).to(device),
            'observation.wrist_image': wrist_image.unsqueeze(0).to(device),
            'task': [PnPEnv.instruction],
        }
        # Select an action
        action = policy.select_action(data)
        action = action[0,:7].cpu().detach().numpy()
        # Take a step in the environment
        _ = PnPEnv.step(action)
        PnPEnv.render()
        step += 1
        success = PnPEnv.check_success()
        if success:
            print('Success')
            break

In [None]:
# policy.push_to_hub(
#     repo_id='Jeongeun/omy_pnp_smolvla',
#     commit_message='Add trained policy for PnP task',
# )

In [None]:
PnPEnv.env.close_viewer()