In [1]:
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().resolve().parent))

In [2]:
from experiments.run_experiment import (
    TrainingConfig,
    EvaluateConfig,
)
from experiments.sweep_utils import run_sweep
from environments.fronzenlake import FrozenLakeConfig, get_frozenlake_env
from agents.sarsa_td0 import SarsaTD0Agent, SarsaTD0Config
from agents.sarsa_td0_entropic import SarsaTD0EntropicAgent, SarsaTD0EntropicConfig
from agents.q_td0 import QLearningTD0Agent, QLearningTD0Config
from metrics.learning_mertrics import total_reward_per_episode, episode_won
from metrics.frustration_metrics import (
    total_td_error_per_episode,
    frustration_rate_per_episode,
    tail_frustration_per_episode,
    cvar_tail_frustration_per_episode,
)

In [3]:
# env_config = FrozenLakeConfig(map_name="4x4", is_slippery=False)
env_config = FrozenLakeConfig(
    map_name="4x4",
    is_slippery=True,
    reward_schedule=(1.0, -1.0, -0.1),
    success_rate=0.90,
)

In [4]:
# sarsa_td0 agent
agent_factory = SarsaTD0Agent

sarsa_td0_config = SarsaTD0Config(
    alpha=0.05,
    gamma=0.99,
    epsilon=0.3,
    reward_metrics={
        "total_reward_per_episode": total_reward_per_episode,
        "episode_won": episode_won,
    },
    td_error_metrics={
        "total_td_error_per_episode": total_td_error_per_episode,
        "frustration_rate_per_episode": frustration_rate_per_episode,
        "tail_frustration_per_episode": lambda td: tail_frustration_per_episode(
            td, percentile=0.90
        ),
        "cvar_tail_frustration_per_episode": lambda td: cvar_tail_frustration_per_episode(
            td, percentile=0.90
        ),
    },
)

In [5]:
base_train = TrainingConfig(
    name="sarsa_frozenlake",
    num_train_episodes=30000,
    env_kwargs={"config": env_config},
    agent_kwargs={"config": sarsa_td0_config},
)

base_eval = EvaluateConfig(
    name="sarsa_frozenlake",
    num_eval_episodes=4000,
    env_kwargs={"config": env_config},
)

In [6]:
sweep = {
    "agent_kwargs": [
        {"alpha": 0.05, "epsilon": 0.2, "seed": 1},
        {"alpha": 0.1, "epsilon": 0.2, "seed": 1},
        {"alpha": 0.2, "epsilon": 0.2, "seed": 1},
    ],
    "env_kwargs": [
        {"success_rate": 0.9},
        {"success_rate": 0.95},
    ],
}

In [7]:
results = run_sweep(
    base_training=base_train,
    base_evaluation=base_eval,
    sweep=sweep,
    env_factory=get_frozenlake_env,
    agent_factory=agent_factory,
)

In [8]:
results

[{'params': {'agent_kwargs': {'alpha': 0.05, 'epsilon': 0.2, 'seed': 1},
   'env_kwargs': {'success_rate': 0.9}},
  'training': {'reward': {'total_reward_per_episode': [-2.2,
     -1.1,
     -1.2,
     -1.7,
     -1.3,
     -1.3,
     -3.0,
     -2.6,
     -2.2,
     -0.10000000000000006,
     -2.0,
     -2.1,
     -3.6,
     -1.7,
     -1.5,
     -1.8,
     -2.2,
     -1.5,
     -2.3000000000000003,
     -2.6,
     -2.1,
     -1.7000000000000002,
     -2.8000000000000003,
     -2.3000000000000003,
     -4.6000000000000005,
     -6.300000000000001,
     -1.7,
     -1.3,
     -5.551115123125783e-17,
     -3.3000000000000003,
     -1.8,
     -2.3000000000000003,
     -0.9000000000000001,
     -2.2,
     0.29999999999999993,
     -2.2,
     0.29999999999999993,
     -0.8,
     0.5,
     -2.5,
     -0.5000000000000001,
     -1.1,
     -0.9000000000000001,
     0.29999999999999993,
     -1.5,
     -3.4000000000000004,
     -1.9000000000000001,
     -2.2,
     -1.4,
     -0.10000000000000006