In [6]:
import torch
import os
from pathlib import Path
os.environ["MKL_SERVICE_FORCE_INTEL"] = "1"
os.environ["MUJOCO_GL"] = "egl"
os.environ["CUDA_VISIBLE_DEVICES"]="1"
from disrep4rl.environments.metaworld_dm_env import make_metaworld
from disrep4rl import utils
from disrep4rl.video import VideoRecorder, FrameRecorder
from termcolor import colored
from disrep4rl.logger import _format

In [7]:
PRINT_FORMAT = [
    ("episode", "E", "int"),
    ("episode_length", "L", "int"),
    ("episode_reward", "R", "float"),
    ("success", "S", "int"),
]


def print_episode_stats(data):
    prefix = "Data Generation"
    prefix = colored(prefix, "blue")
    pieces = [f"| {prefix: <14}"]
    for key, disp_key, ty in PRINT_FORMAT:
        value = data.get(key, 0)
        pieces.append(_format(disp_key, value, ty))
    print(" | ".join(pieces))

In [10]:
seed = 1
exp = "144_10"
snapshot = f"../snapshots/{exp}.pt"
payload = torch.load(snapshot)
agent = payload['agent']

# payload['task_name'] = "metaworld_mt10"
# payload['frame_stack'] = 3
# payload['action_repeat'] = 2
# payload['discount'] = 0.99
# payload['camera_name'] = 'corner'
# payload['add_segmentation_to_obs'] = True

eval_env = make_metaworld(payload['task_name'].split("_")[1], payload['frame_stack'],
                          payload['action_repeat'], payload['discount'], seed,
                          payload['camera_name'], payload['add_segmentation_to_obs'])
video_recorder = VideoRecorder(Path.cwd(), payload['camera_name'])
frame_recorder = FrameRecorder(Path.cwd())

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [8]:
if not os.path.exists(f"eval_video/{exp}"):
    os.mkdir(f"eval_video/{exp}")

for episode in range(0, 30):
    time_step = eval_env.reset()
    total_reward = 0
    max_success = 0
    step = 0
    video_recorder.init(eval_env)
    frame_recorder.init(agent, time_step.observation)
    while not time_step.last():
        with torch.no_grad(), utils.eval_mode(agent):
            action = agent.act(time_step.observation,
                                    payload['_global_step'],
                                    eval_mode=True)
        time_step = eval_env.step(action)
        total_reward += time_step.reward["reward"]
        success = int(time_step.reward["success"])
        max_success = max(max_success, success)
        step += 1
        video_recorder.record(eval_env)
        frame_recorder.record(agent, time_step.observation)
    
    episode_stats = {
        "episode": episode,
        "episode_length": step,
        "episode_reward": total_reward,
        "success": max_success,
    }
    print_episode_stats(episode_stats)

    video_recorder.save(f"{exp}/{exp}_{episode}_high_res.mp4", -1)
    frame_recorder.save(f"{exp}/{exp}_{episode}", -1)

| [34mData Generation[0m | E: 30 | L: 250 | R: 20.0187 | S: 0
| [34mData Generation[0m | E: 31 | L: 250 | R: 4556.3322 | S: 1
| [34mData Generation[0m | E: 32 | L: 250 | R: 4769.5744 | S: 1
| [34mData Generation[0m | E: 33 | L: 250 | R: 5.4771 | S: 0
| [34mData Generation[0m | E: 34 | L: 250 | R: 3804.1858 | S: 1
| [34mData Generation[0m | E: 35 | L: 250 | R: 21.0344 | S: 0
| [34mData Generation[0m | E: 36 | L: 250 | R: 3.5950 | S: 0
| [34mData Generation[0m | E: 37 | L: 250 | R: 3458.3160 | S: 1
| [34mData Generation[0m | E: 38 | L: 250 | R: 16.5466 | S: 0
| [34mData Generation[0m | E: 39 | L: 250 | R: 4748.8660 | S: 1
