In [1]:
import os
import optuna

import numpy as np

from algorithms import DoubleDQN
from environment import CreditPayerEnv
from pipeline import MetricsStudy

os.makedirs("figures", exist_ok=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def eval_trajectory(env: CreditPayerEnv, agent: DoubleDQN, trajectory_max_len: int):
    total_reward = 0
    state = env.reset()
    for i in range(trajectory_max_len):
        action = agent.get_action(state)

        state, reward, done, _ = env.step(action)

        total_reward += reward

        if done:
            break

    return total_reward

In [3]:
def eval_model(
    env: CreditPayerEnv, agent: DoubleDQN, trajectory_max_len: int, repeat_count: int
):
    rewards = []
    for i in range(repeat_count):
        reward = eval_trajectory(env, agent, trajectory_max_len)
        rewards.append(reward)

    return np.mean(rewards)

In [4]:
def double_objective(trial: optuna.Trial):
    env = CreditPayerEnv()
    state_dim = env.state_dim
    action_n = env.action_n

    episode_n = 50
    trajectory_max_len = 100

    epsilon_decrease = 1 / episode_n / trajectory_max_len

    gamma = trial.suggest_float("gamma", 0.95, 1)
    tau = trial.suggest_float("tau", 0.001, 0.05)

    batch_size = trial.suggest_int('batch_size', 64, 128)

    agent = DoubleDQN(
        state_dim,
        action_n,
        gamma=gamma,
        lr=0.001,
        tau=tau,
        batch_size=batch_size,
        epsilon_decrease=epsilon_decrease,
        epsilon_min=0,
    )
    study = MetricsStudy(env, agent, trajectory_max_len)

    study.study_agent(episode_n=50)

    agent.epsilon_min = 0
    agent.epsilon = 0

    return eval_model(env, agent, trajectory_max_len, repeat_count=10)

In [5]:
double_study = optuna.create_study(direction="maximize")
double_study.optimize(double_objective, n_trials=100)

trial = double_study.best_trial

print("Reward: {}".format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[I 2024-01-03 10:39:28,610] A new study created in memory with name: no-name-d916820f-3b14-42f1-9f1d-8ccb7de2dd51
[I 2024-01-03 10:39:50,043] Trial 0 finished with value: -96.9 and parameters: {'gamma': 0.930864622296778, 'tau': 0.03242799563442478, 'batch_size': 65}. Best is trial 0 with value: -96.9.
[I 2024-01-03 10:40:11,715] Trial 1 finished with value: -439.7 and parameters: {'gamma': 0.9147129969432642, 'tau': 0.02051491453381674, 'batch_size': 64}. Best is trial 0 with value: -96.9.
[I 2024-01-03 10:40:33,529] Trial 2 finished with value: -22.6 and parameters: {'gamma': 0.917928679913425, 'tau': 0.027950888740676333, 'batch_size': 68}. Best is trial 2 with value: -22.6.
[I 2024-01-03 10:40:57,942] Trial 3 finished with value: -60.5 and parameters: {'gamma': 0.9906986834242006, 'tau': 0.08425960082978606, 'batch_size': 66}. Best is trial 2 with value: -22.6.
[I 2024-01-03 10:41:20,792] Trial 4 finished with value: -8.4 and parameters: {'gamma': 0.9025759187773467, 'tau': 0.02867

Reward: 2.8
Best hyperparameters: {'gamma': 0.9171561919787107, 'tau': 0.046132622379041546, 'batch_size': 66}


In [9]:
fig = optuna.visualization.plot_optimization_history(double_study)
fig.write_html('figures/optimization_history.html')
fig.write_image('figures/optimization_history.png')
fig

In [10]:
fig = optuna.visualization.plot_slice(double_study)
fig.write_html('figures/slice.html')
fig.write_image('figures/slice.png')
fig