In [1]:
import os

import gym
import matplotlib.pyplot as plt
import numpy as np
import torch

from dreamerv2.training.config import MinAtarConfig
from dreamerv2.training.evaluator import Evaluator
from dreamerv2.utils.wrapper import GymMinAtar, OneHotAction, freewayPOMDP

env_name = "freeway"
exp_id = "0_pomdp"
device = "cuda:0"

ACTIONS = {
    0: "STAY",
    1: "UP",
    2: "DOWN",
}

env = freewayPOMDP(OneHotAction(GymMinAtar(env_name)))
env.env.env.seed(42)
torch.manual_seed(42)

result_dir = os.path.join("../results_official", f"{env_name}_{exp_id}")
model_dir = os.path.join(result_dir, "models")
f = "models_best.pth"

config = MinAtarConfig(
    env=env_name,
    obs_shape=env.observation_space.shape,
    action_size=env.action_space.shape[0],
    obs_dtype=bool,
    action_dtype=np.float32,
    model_dir=model_dir,
    eval_episode=10,
    eval_render=False,
)

In [2]:
evaluator = Evaluator(config, device)
eval_score = evaluator.eval_saved_agent(env, os.path.join(model_dir, f))
print("mean evaluation score:", eval_score)

average evaluation score for model at ../results_official/freeway_0_pomdp/models/models_best.pth = 14.5
mean evaluation score: 14.5
