In [None]:
base_experiment_path = "data\\plays"
experiment_path = f"{base_experiment_path}\\multiple_plays"
device = "cpu"

In [None]:
NUMBER_OF_ENVIRONMENTS = 3

## Load Models / Policies

In [None]:
import os

models_folder = 'C:\\rgoncalo\\ricardo-goncalo-thesis-project\\project\\examples\\simple_rl\\data\\models'
model_paths = os.listdir(models_folder)

In [None]:
def load_model(index):
    return models_folder[index]

In [None]:
from project.automl.rl.policy.qpolicy import QPolicy

def load_policy(index):
    return (QPolicy, load_model(index))

## Setup Environment

In [None]:
from automl.rl.environment.gymnasium_env import GymnasiumEnvironmentWrapper

env_input = {"environment_input" : {"environment" : "MountainCar-v0", "render_mode" : "human", "device" : device}}

base_env_definition = (GymnasiumEnvironmentWrapper, env_input)


In [None]:
env_list = [base_env_definition for _ in range(NUMBER_OF_ENVIRONMENTS)]

In [None]:
from automl.rl.environment.environment_sampler import EnvironmentCycler


env = EnvironmentCycler()

env.pass_input({
    "environments" : env_list,
    "generate_name" : True
})

## Setup Single Agent

In [None]:
from automl.rl.agent.agent_components import AgentSchema

def single_agent(index):

    policy_input = {}


    agent_input = {
        "policy": load_policy(index),

    }

    agent = (AgentSchema, 
             agent_input)
    
    return agent

## Setup all Agents

In [None]:
all_agents_input = {
    "device" : device,
}

In [None]:
def agents(index):
    return single_agent(index)

## Setup player

In [None]:
from automl.rl.rl_player.rl_player import RLPlayer

def rl_player(index):

    rl_player_input = {
        "base_directory" : experiment_path,
        "artifact_relative_directory" : "play",
        "create_new_directory" : True,
        "agents" : agents(index),
        "agents_input" : all_agents_input,
        "num_episodes" : 5,
        "store_env_at_end" : True    
        }

    rl_player_class = RLPlayer

    return (rl_player_class, rl_player_input)

In [None]:
from automl.rl.evaluators.rl_evaluator_player import EvaluatorWithPlayer

def eval_with_player(index):

    return EvaluatorWithPlayer({
        
                "rl_player_definition" : rl_player(index),
                "number_of_episodes" : 3, # number of episodes per environment
                "number_of_evaluations" : NUMBER_OF_ENVIRONMENTS # number of sampled environments essentially
    
    })


## Play

In [None]:
#rl_player.run()
from automl.utils.files_utils import open_or_create_folder
from automl.utils.json_component_utils import save_configuration


experiment_path = open_or_create_folder(experiment_path, create_new=True)

print(f"Using experiment path {experiment_path} from base experiment path {experiment_path}")

for index in range(len(model_paths)):

    experiment_path_index = open_or_create_folder(f"{experiment_path}\\model_{index}", create_new=True)

    evaluator_with_player = eval_with_player(index)

    evaluator_with_player.evaluate((agents, device, experiment_path_index, env))

    save_configuration(evaluator_with_player, experiment_path_index, "config.json", save_exposed_values=True, ignore_defaults=False)

## Store results