In [None]:
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().resolve().parent))

In [None]:
from experiments.run_experiment import (
    TrainingConfig,
    run_training,
    EvaluateConfig,
    run_evaluation,
    generate_training_plots,
)
from environments.fronzenlake import FrozenLakeConfig, get_frozenlake_env
from agents.sarsa_td0 import SarsaTD0Agent, SarsaTD0Config
from agents.sarsa_td0_entropic import SarsaTD0EntropicAgent, SarsaTD0EntropicConfig
from agents.q_td0 import QLearningTD0Agent, QLearningTD0Config
from metrics.learning_mertrics import total_reward_per_episode, episode_won
from metrics.frustration_metrics import (
    total_td_error_per_episode,
    frustration_rate_per_episode,
    tail_frustration_per_episode,
    cvar_tail_frustration_per_episode,
)

In [None]:
# env_config = FrozenLakeConfig(map_name="4x4", is_slippery=False)
env_config = FrozenLakeConfig(
    map_name="4x4", is_slippery=True, reward_schedule=(1.0, -1.0, -0.1), success_rate=0.90
)

In [None]:
# sarsa_td0 agent
agent_factory = QLearningTD0Agent

q_td0_config = QLearningTD0Config(
    alpha=0.05,
    gamma=0.99,
    epsilon=0.3,
    reward_metrics={
        "total_reward_per_episode": total_reward_per_episode,
        "episode_won": episode_won,
    },
    td_error_metrics={
        "total_td_error_per_episode": total_td_error_per_episode,
        "frustration_rate_per_episode": frustration_rate_per_episode,
        "tail_frustration_per_episode": lambda td: tail_frustration_per_episode(
            td, percentile=0.90
        ),
        "cvar_tail_frustration_per_episode": lambda td: cvar_tail_frustration_per_episode(
            td, percentile=0.90
        ),
    },
)

In [None]:
# sarsa_td0 agent
agent_factory = SarsaTD0Agent

sarsa_td0_config = SarsaTD0Config(
    alpha=0.05,
    gamma=0.99,
    epsilon=0.3,
    reward_metrics={
        "total_reward_per_episode": total_reward_per_episode,
        "episode_won": episode_won,
    },
    td_error_metrics={
        "total_td_error_per_episode": total_td_error_per_episode,
        "frustration_rate_per_episode": frustration_rate_per_episode,
        "tail_frustration_per_episode": lambda td: tail_frustration_per_episode(
            td, percentile=0.90
        ),
        "cvar_tail_frustration_per_episode": lambda td: cvar_tail_frustration_per_episode(
            td, percentile=0.90
        ),
    },
)

In [None]:
training_config = TrainingConfig(
    name="sarsa_frozenlake",
    num_train_episodes=300000,
    # seed=1,
    env_kwargs={"config": env_config},
    agent_kwargs={"config": q_td0_config},
)


agent, training_metrics = run_training(
    config=training_config,
    env_factory=get_frozenlake_env,
    agent_factory=agent_factory,
)

In [None]:
eval_config = EvaluateConfig(
    name="sarsa_frozenlake",
    num_eval_episodes=4000,
    env_kwargs={"config": env_config},
)

evaluation_metrics = run_evaluation(
    config=eval_config, env_factory=get_frozenlake_env, agent=agent
)

evaluation_metrics

In [None]:
generate_training_plots(training_metrics, window_size=500)

In [None]:
# sarsa_td0_entropic agent

agent_factory = SarsaTD0EntropicAgent

sarsa_td0_config = SarsaTD0EntropicConfig(
    alpha=0.1,
    gamma=0.99,
    epsilon=0.1,
    beta=0.2,
    reward_metrics={
        "total_reward_per_episode": total_reward_per_episode,
        "episode_won": episode_won,
    },
    td_error_metrics={
        "total_td_error_per_episode": total_td_error_per_episode,
        "frustration_rate_per_episode": frustration_rate_per_episode,
        "tail_frustration_per_episode": lambda td: tail_frustration_per_episode(
            td, percentile=0.90
        ),
        "cvar_tail_frustration_per_episode": lambda td: cvar_tail_frustration_per_episode(
            td, percentile=0.90
        ),
    },
)