In [None]:
import argparse
import os
import time

import gym
import numpy as np
from stable_baselines.common import set_global_seeds

from stable_baselines.common.vec_env import DummyVecEnv

from config import ENV_ID

from vae.controller import VAEController
from donkey_gym.envs.donkey_env import DonkeyVAEHistoryEnv

from algos.custom_sac import SACWithVAE, CustomSACPolicy

In [None]:
# display in notebook

import matplotlib.pyplot as plt
%matplotlib inline
from IPython import display

def show_state(episode, step, obs, reward, info):
    plt.figure(3)
    plt.clf()
    plt.imshow(obs)
    plt.title("Episode : %d.%d | Reward: %f | Info: %s" % (episode, step, reward, info))
    plt.axis('off')

    display.clear_output(wait=True)
    display.display(plt.gcf())

In [None]:
# Load VAE
vae = VAEController()
vae.load("vae-level-0-dim-32.pkl")

env = DonkeyVAEHistoryEnv(
  level=0, frame_skip=2, max_cte_error=3.0, 
  vae=vae, n_command_history=20, 
  min_steering=-1, max_steering=1,
  min_throttle=0.4, max_throttle=0.6
)

In [None]:
model = SACWithVAE.load("logs/sac/0/model.pkl")

obs = env.reset()

# Force deterministic for SAC and DDPG
deterministic = True

print("Deterministic actions: {}".format(deterministic))

running_reward = 0.0
ep_len = 0
while True:
    action, _ = model.predict(obs, deterministic=deterministic)
    # Clip Action to avoid out of bound errors
    if isinstance(env.action_space, gym.spaces.Box):
        action = np.clip(action, env.action_space.low, env.action_space.high)
    obs, reward, done, infos = env.step(action)
    # Show env
    show_state(0, 0, env.render('rgb_array'), reward, infos)
    running_reward += reward
    ep_len += 1

    if done:
        # NOTE: for env using VecNormalize, the mean reward
        # is a normalized reward when `--norm_reward` flag is passed
        print("Episode Reward: {:.2f}".format(running_reward))
        print("Episode Length", ep_len)
        running_reward = 0.0
        ep_len = 0
        break

In [None]:
env.unwrapped.close()