In [None]:
base_experiment_path = "data\\plays"
experiment_path = f"{base_experiment_path}\\play"
device = "cpu"

## Load Model / Policy

In [None]:
model_definition_strategy = 'BY_PATH'

### Model by class and input definition

In [None]:
if model_definition_strategy == 'BY_CLASS_INPUT':

    from automl.external_support.sb3.sb3_model_wrapper import SB3WrapperTorch as model_class
    
    model_input = {"sb3_model" : "ppo-CartPole-v1-actor", "device" : device}
    
    model = (model_class, model_input)

### Model by path

In [None]:
if model_definition_strategy == 'BY_PATH':

    model = "data\\models\\sb3_CartPole_ppo\\sb3_CartPole_ppo"

## Setup Model / Policy

In [None]:
from automl.rl.policy.stochastic_policy import StochasticPolicy as policy_class


policy = (policy_class, {"model" : model})

## Setup Environment

In [None]:


env_input = {
    #"environment" : "MountainCar-v0",
    "environment" : "CartPole-v1",
    
    "render_mode" : "human",
    "device" : device
}

In [None]:
from automl.rl.environment.gymnasium_env import GymnasiumEnvironmentWrapperSampler as env_class


env_input = {"environment_input" : "CartPole-v1"}

In [None]:
env = (env_class, env_input)

## Setup Single Agent

In [None]:
from automl.rl.agent.agent_components import AgentSchema

policy_input = {}


agent_input = {
    "policy": policy,

}

agent = (AgentSchema, 
         agent_input)

## Setup all Agents

In [None]:
all_agents_input = {
    "device" : device,
}

In [None]:
agents = agent

## Setup player

In [None]:
from automl.rl.rl_player.rl_player import RLPlayer

rl_player_input = {
    "base_directory" : experiment_path,
    "artifact_relative_directory" : "play",
    "create_new_directory" : True,
    "agents" : agents,
    "agents_input" : all_agents_input,
    "num_episodes" : 5,
    "store_env_at_end" : True    
    }

rl_player_class = RLPlayer

rl_player = (rl_player_class, rl_player_input)

In [None]:
from automl.rl.evaluators.rl_evaluator_player import EvaluatorWithPlayer

evaluator_with_player = EvaluatorWithPlayer({
    
            "rl_player_definition" : rl_player,
            "number_of_episodes" : 2, # number of episodes per environment
            "number_of_evaluations" : 2, # number of sampled environments essentially
            "environment" : env

})


## Play

In [None]:
#rl_player.run()
from automl.utils.files_utils import open_or_create_folder

experiment_path = open_or_create_folder(experiment_path, create_new=True)

print(f"Using experiment path {experiment_path} from base experiment path {experiment_path}")

evaluator_with_player.evaluate((agents, device, experiment_path, env))

## Store results

In [None]:
from automl.utils.configuration_component_utils import save_configuration


save_configuration(evaluator_with_player, experiment_path, "config.json", save_exposed_values=True, ignore_defaults=False)