In [1]:
import argparse
import json
import h5py
import imageio
import numpy as np
import os
from copy import deepcopy
import pprint

import torch

import robomimic
import robomimic.utils.file_utils as FileUtils
import robomimic.utils.torch_utils as TorchUtils
import robomimic.utils.tensor_utils as TensorUtils
import robomimic.utils.obs_utils as ObsUtils
from robomimic.envs.env_base import EnvBase
from robomimic.algo import RolloutPolicy

from robomimic.envs.env_gym import EnvGym
import robosuite as suite

from robosuite.wrappers import GymWrapper
from robosuite.controllers import load_controller_config
import matplotlib.pyplot as plt

import urllib.request

from diffusion_policy.policy.diffusion_transformer_lowdim_policy import DiffusionTransformerLowdimPolicy
import hydra
from omegaconf import OmegaConf

from diffusion_policy.policy.diffusion_transformer_hybrid_image_policy import DiffusionTransformerHybridImagePolicy
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
from diffusion_policy.model.diffusion.ema_model import EMAModel

from diffusion_policy.env_runner.base_image_runner import BaseImageRunner
import os
import json
import cv2


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


  from .autonotebook import tqdm as notebook_tqdm




### Load checkpoint

In [2]:
cd ..

/home/sen/Desktop/awe


In [3]:
ckpt_path = "ckpts/diffusion_transformer.ckpt"
ckpt = torch.load(ckpt_path, map_location=device)
assert os.path.exists(ckpt_path)

### Load config

In [4]:

# Load the YAML configuration using OmegaConf
# You can use either a) awe + diffusion policy transformer 
# or b) diffusion poluciy transformer without awe 
# a) uses "config/waypoint_image_can_ph_diffusion_policy_transformer.yaml"
# b) uses "config/baseline_image_can_ph_diffusion_policy_transformer.yaml"

cfg = OmegaConf.load("config/baseline_image_can_ph_diffusion_policy_transformer.yaml")
OmegaConf.register_new_resolver("eval", eval, replace=True)
# Resolve variables like ${horizon}
cfg = OmegaConf.to_container(cfg, resolve=True)  # Now it replaces ${horizon} with its actual value

policy_cfg = cfg["policy"]  # Now it should have resolved values


# Create Noise Scheduler
noise_scheduler = DDPMScheduler(
    num_train_timesteps=policy_cfg["noise_scheduler"]["num_train_timesteps"],
    beta_start=policy_cfg["noise_scheduler"]["beta_start"],
    beta_end=policy_cfg["noise_scheduler"]["beta_end"],
    beta_schedule=policy_cfg["noise_scheduler"]["beta_schedule"],
    variance_type=policy_cfg["noise_scheduler"]["variance_type"],
    clip_sample=policy_cfg["noise_scheduler"]["clip_sample"],
    prediction_type=policy_cfg["noise_scheduler"]["prediction_type"],
)

# Initialize the policy
policy = DiffusionTransformerHybridImagePolicy(
    shape_meta=policy_cfg["shape_meta"],
    noise_scheduler=noise_scheduler,
    horizon=int(policy_cfg["horizon"]),  # Ensure it's an integer
    n_action_steps=int(policy_cfg["n_action_steps"]),
    n_obs_steps=int(policy_cfg["n_obs_steps"]),
    num_inference_steps=int(policy_cfg["num_inference_steps"]),
    crop_shape=tuple(map(int, policy_cfg["crop_shape"])),  # Convert tuple elements to int
    obs_encoder_group_norm=policy_cfg["obs_encoder_group_norm"],
    eval_fixed_crop=policy_cfg["eval_fixed_crop"],
    n_layer=int(policy_cfg["n_layer"]),
    n_cond_layers=int(policy_cfg["n_cond_layers"]),
    n_head=int(policy_cfg["n_head"]),
    n_emb=int(policy_cfg["n_emb"]),
    p_drop_emb=float(policy_cfg["p_drop_emb"]),  # Ensure float conversion
    p_drop_attn=float(policy_cfg["p_drop_attn"]),
    causal_attn=policy_cfg["causal_attn"],
    time_as_cond=policy_cfg["time_as_cond"],
    obs_as_cond=policy_cfg["obs_as_cond"],
)

# Set to evaluation mode
policy.eval()

# load state dict from checkpoint
policy.load_state_dict(ckpt["state_dicts"]["ema_model"])

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
policy.to(device)

print("Model is initialized and ready for evaluation.")




using obs modality: low_dim with keys: ['robot0_eef_quat', 'robot0_gripper_qpos', 'robot0_eef_pos']
using obs modality: rgb with keys: ['agentview_image', 'robot0_eye_in_hand_image']
using obs modality: depth with keys: []
using obs modality: scan with keys: []




Model is initialized and ready for evaluation.


### Visualise Rollout
Note that changes are made in robomimic_image_runner.py 
Controller values for example are rewritten there

In [5]:
cfg["task"]["env_runner"]["n_envs"] = 1  # Set num_envs to 1
cfg["task"]["env_runner"]["n_test_vis"] = 1 # visualise 1 test
cfg["task"]["env_runner"]["n_test"] = 1 # 1 test
env_runner = hydra.utils.instantiate(cfg["task"]["env_runner"], output_dir="rollout_outputs")
# run roll out and save vid 
env_runner.run(policy)

env_meta {'env_name': 'PickPlaceCan', 'type': 1, 'env_kwargs': {'has_renderer': False, 'has_offscreen_renderer': True, 'ignore_done': True, 'use_object_obs': False, 'use_camera_obs': True, 'control_freq': 20, 'controller_configs': {'type': 'OSC_POSE', 'input_max': 1, 'input_min': -1, 'output_max': [0.05, 0.05, 0.05, 0.5, 0.5, 0.5], 'output_min': [-0.05, -0.05, -0.05, -0.5, -0.5, -0.5], 'kp': 150, 'damping': 1, 'impedance_mode': 'fixed', 'kp_limits': [0, 300], 'damping_limits': [0, 10], 'position_limits': None, 'orientation_limits': None, 'uncouple_pos_ori': True, 'control_delta': False, 'interpolation': None, 'ramp_ratio': 0.2}, 'robots': ['Panda'], 'camera_depths': False, 'camera_heights': 84, 'camera_widths': 84, 'reward_shaping': False, 'camera_names': ['agentview', 'robot0_eye_in_hand'], 'render_gpu_device_id': 0}}
Created environment with name PickPlaceCan
Action size is 7
Found 2 GPUs for rendering. Using device 1.
Created environment with name PickPlaceCan
Action size is 7


Eval PickPlaceCanImage 1/1:   4%|▍         | 8/200 [00:01<00:47,  4.06it/s]

KeyboardInterrupt: 