## Dependencies

In [1]:
import random

import torch

import context_changers
import ct_model
import dmc
import drqv2
import utils
import numpy as np
import rl_model

import imageio
from matplotlib import pyplot as plt

## Hyperparameters

In [2]:
task_name = 'reacher_hard'
expert_frame_stack = 3
action_repeat = 2
seed = 6
xml_path = 'domain_xmls/reacher.xml'
episode_len = 100
context_camera_ids = [0]
learner_camera_id = 0
im_w = 64
im_h = 64
state_dim = 1024
cam_id = random.choice(context_camera_ids)

  and should_run_async(code)


In [3]:
utils.set_seed_everywhere(seed)

  and should_run_async(code)


## Loading of the trained models

In [4]:
expert: drqv2.DrQV2Agent = drqv2.DrQV2Agent.load('experts/reacher_hard.pt')
expert.train(training=False)

context_translator: ct_model.CTNet = ct_model.CTNet.load('ct/reacher_hard.pt').to(utils.device())
context_translator.eval()

mlp_context_translator: ct_model.CTNet = ct_model.CTNet.load('ct/reacher_hard_mlp.pt').to(utils.device())
context_translator.eval()

  and should_run_async(code)


CTNet(
  (enc1): EncoderNet(
    (leaky_relu): LeakyReLU(negative_slope=0.2)
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (conv_1): Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2))
    (b_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_2): Conv2d(64, 128, kernel_size=(5, 5), stride=(2, 2))
    (b_norm_2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_3): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2))
    (b_norm_3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv_4): Conv2d(256, 512, kernel_size=(5, 5), stride=(2, 2))
    (b_norm_4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (fc1): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1))
    (b_norm_fc_1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (fc2): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1)

## Loading and wrapping of the environment

In [5]:
expert_env = dmc.make(task_name, expert_frame_stack, action_repeat, seed, xml_path, episode_len=episode_len)

agent_file = 'rl_exp_local/reacher_hard/na_fs_3_discount_0_7/snapshot.pt'
agent = rl_model.RLAgent.load(agent_file)
frame_stack = 3
n_video=32
eval_env = dmc.make(task_name, frame_stack, action_repeat, seed + 1, xml_path, learner_camera_id, im_w, im_h, context_changers.ReacherHardContextChanger(), episode_len)
eval_env = dmc.EncodeStackWrapper(eval_env, expert, context_translator, expert_env, context_camera_ids, n_video, im_w, im_h, state_dim, frame_stack, context_changers.ReacherHardContextChanger(), dist_reward=False)

  and should_run_async(code)


## Building of the agent video

In [6]:
agent_video = []

time_step = eval_env.reset()
frame = eval_env.physics.render(im_w, im_h, camera_id=learner_camera_id)
agent_video.append(frame)
while not time_step.last():
    with torch.no_grad(), utils.eval_mode(agent):
        state = torch.tensor(time_step.observation, device=utils.device(), dtype=torch.float)
        action = agent.act(state, 1, eval_mode=True)

    time_step = eval_env.step(action)
    frame = eval_env.physics.render(im_w, im_h, camera_id=learner_camera_id)
    agent_video.append(frame)

  and should_run_async(code)


In [7]:
agent_video = np.array(agent_video)
source_video = eval_env.expert_videos[0].transpose((0, 2, 3, 1))
predicted_video = eval_env.avg_frames.transpose((0, 2, 3, 1))

  and should_run_async(code)


In [8]:
all_video = np.zeros((source_video.shape[0], source_video.shape[1], source_video.shape[2] * 3, source_video.shape[3]))

all_video[:, :, 0:64, :] = source_video
all_video[:, :, 64:128:, :] = predicted_video
all_video[:, :, 128:, :] = agent_video

  and should_run_async(code)


## Generation of the final video

The video path is `demo/demo_ifo.mp4'.

In [9]:
imageio.mimwrite('demo/demo_ifo.mp4', all_video, format='mp4', fps=24)

  and should_run_async(code)
