In [1]:
from DQN import DQN_Agent
from QR_DQN import QR_DQN_Agent
from IQN import IQN_Agent
from NAF import NAF_Agent
from TD3 import TD3_Agent
from PPO import PPO_Agent
from SAC import SAC_Agent

import optuna
from optuna import create_study
from optuna.samplers import TPESampler
from optuna.pruners import PatientPruner, MedianPruner

import json
import torch

  from .autonotebook import tqdm as notebook_tqdm


# DEFINITION OF THE TRIAL OBJECT

Below there are defined all the optimizer object for each agent. Choose the one you prefer.

In [9]:
def DQN_optim(trial):
      # Define the space of hyperparameters to run the search for optimization
      int_batch_size = trial.suggest_categorical("batch_size", [64, 128])
      int_gamma = trial.suggest_float("gamma", 0.90, 0.99)
      int_eps_start = trial.suggest_float("eps_start", 0.95, 0.99)
      int_eps_decay = trial.suggest_categorical("eps_decay", [500, 750, 1000, 1250])
      int_eps_end = trial.suggest_float("eps_end", 0.025, 0.1)
      int_tau = trial.suggest_float("tau", 0.0025, 0.0075)
      int_lr = trial.suggest_float("lr", 1e-5, 1e-3)

      # init the agent
      model = DQN_Agent(ENV_NAME="LunarLander-v2",
            BATCH_SIZE=int(int_batch_size),
            GAMMA=int_gamma,
            EPS_START=int_eps_start,
            EPS_DECAY=int(int_eps_decay),
            EPS_END=int_eps_end,
            TAU=int_tau,
            LR=int_lr,
            N_EPISODES=400,
            PRINT_PLOT=True)
      # run the training
      model.training()
      # return the loss to choose the hyper parameters
      return  float(model.return_metric(4))


In [None]:
def TD3_optim(trial):
      # Define the space of hyperparameters to run the search for optimization
      int_batch_size = trial.suggest_categorical("batch_size", [64, 128])
      int_gamma = trial.suggest_float("gamma", 0.90, 0.99)
      int_sd_noise = trial.suggest_float("sd_noise", 0.3, 1)
      int_sd_noise_decay = trial.suggest_float("sd_noise_decay", 0.1, 0.99)
      int_steps_decay_sd = trial.suggest_categorical("steps_decay_sd", [5, 10, 15, 20])
      int_cp_value = trial.suggest_float("cp_value", 0.01, 0.2)
      int_steps_update_policy = trial.suggest_categorical("steps_update_policy", [2, 4, 6, 8, 10, 12, 14, 16, 18, 20])
      int_tau = trial.suggest_float("tau", 0.0025, 0.0075)
      int_lr = trial.suggest_float("lr", 1e-5, 1e-3)
      int_repetition = trial.suggest_categorical("repetition", [2, 4, 6, 8 ,10])

      # init the agent
      model = TD3_Agent(ENV_NAME="CarRacing-v3",
            BATCH_SIZE=int(int_batch_size),
            GAMMA=int_gamma,
            SD_NOISE=int_sd_noise,
            SD_DECAY=int(int_sd_noise_decay),
            STEPS_DECAY_SD=int_steps_decay_sd,
            CP_VALUE=int_cp_value,
            STEP_UPT_POLICY=int(int_steps_update_policy),
            TAU=int_tau,
            LR=int_lr,
            REPETITION=int(int_repetition),
            N_EPISODES=400,
            PRINT_PLOT=False)
      # run the training
      model.training()
      # return the loss to choose the hyper parameters
      return  float(model.return_metric(4))


In [None]:
def NAF_optim(trial):
      # Define the space of hyperparameters to run the search for optimization
      int_batch_size = trial.suggest_categorical("batch_size", [64, 128, 256, 512])
      int_gamma = trial.suggest_float("gamma", 0.90, 0.99)
      int_eps = trial.suggest_float("eps", 0.5, 1)
      int_eps_decay = trial.suggest_float("eps_decay", 0.025, 0.1)
      int_steps_decay = trial.suggest_categorical("steps_decay", [5, 10, 15, 20, 40, 60])
      int_tau = trial.suggest_float("tau", 0.0025, 0.0075)
      int_lr = trial.suggest_float("lr", 1e-5, 1e-3)
      int_repetition = trial.suggest_categorical("repetition", [2, 4, 6, 8, 10])

      # init the agent
      model = NAF_Agent(ENV_NAME="CarRacing-v3",
            BATCH_SIZE=int(int_batch_size),
            GAMMA=int_gamma,
            EPSILON = int_eps,
            EPSILON_DECAY=int_eps_decay,
            STEPS_DECAY=int_steps_decay,
            TAU=int_tau,
            LR=int_lr,
            REPETITION=int(int_repetition),
            N_EPISODES=200,
            PRINT_PLOT=False)
      # run the training
      model.training()
      # return the loss to choose the hyper parameters
      return  float(model.return_metric(4))

In [7]:
def QR_DQN_optim(trial):
      # Define the space of hyperparameters to run the search for optimization
      int_batch_size = trial.suggest_categorical("batch_size", [64, 128])
      int_gamma = trial.suggest_float("gamma", 0.90, 0.99)
      int_eps_start = trial.suggest_float("eps_start", 0.95, 0.99)
      int_eps_decay = trial.suggest_categorical("eps_decay", [500, 750, 1000, 1250])
      int_eps_end = trial.suggest_float("eps_end", 0.025, 0.1)
      int_tau = trial.suggest_float("tau", 0.0025, 0.0075)
      int_lr = trial.suggest_float("lr", 1e-5, 1e-3)
      int_n_quantiles = trial.suggest_int("n_quantiles", 40, 70)

      # init the agent
      model = QR_DQN_Agent(ENV_NAME="LunarLander-v2",
            BATCH_SIZE=int(int_batch_size),
            GAMMA=int_gamma,
            EPS_START=int_eps_start,
            EPS_DECAY=int(int_eps_decay),
            EPS_END=int_eps_end,
            TAU=int_tau,
            LR=int_lr,
            N_QUANTILES=int_n_quantiles,
            N_EPISODES=400,
            PRINT_PLOT=False)
      # run the training
      model.training()
      # return the loss to choose the hyper parameters
      return  float(model.return_metric(4))

In [None]:
def SAC_optim(trial):
      # Define the space of hyperparameters to run the search for optimization
      int_batch_size = trial.suggest_categorical("batch_size", [64, 128, 256, 512])
      int_gamma = trial.suggest_float("gamma", 0.90, 0.99)
      int_entropy_param = trial.suggest_float("entropy", 0.05, 0.2)
      int_epochs = trial.suggest_int("epochs", 4, 30)
      int_steps_update = trial.suggest_categorical("steps_update", [10, 20, 30])
      int_tau = trial.suggest_float("tau", 0.0025, 0.0075)
      int_lr = trial.suggest_float("lr", 1e-5, 1e-3)
      int_repetition = trial.suggest_categorical("repetition", [2, 4, 6, 8 ,10])

      # init the agent
      model = SAC_Agent(ENV_NAME="CarRacing-v3",
            BATCH_SIZE=int(int_batch_size),
            GAMMA=int_gamma,
            ENTROPY_PARAM=int_entropy_param,
            K_EPOCHS=int_epochs,
            STEPS_UPDATE=int_steps_update,
            TAU=int_tau,
            LR=int_lr,
            REPETITION=int(int_repetition),
            N_EPISODES=400,
            PRINT_PLOT=False)
      # run the training
      model.training()
      # return the loss to choose the hyper parameters
      return  float(model.return_metric(4))


In [None]:
def IQN_optim(trial):
      # Define the space of hyperparameters to run the search for optimization
      int_batch_size = trial.suggest_categorical("batch_size", [64, 128, 256, 512])
      int_gamma = trial.suggest_float("gamma", 0.90, 0.99)
      int_eps_start = trial.suggest_float("eps_start", 0.95, 0.99)
      int_eps_decay = trial.suggest_categorical("eps_decay", [500, 750, 1000, 1250])
      int_eps_end = trial.suggest_float("eps_end", 0.025, 0.1)
      int_tau = trial.suggest_float("tau", 0.0025, 0.0075)
      int_lr = trial.suggest_float("lr", 1e-5, 1e-3)
      int_sub_agents = trial.suggest_int("sub_agents", 2, 10)

      # init the agent
      model = IQN_Agent(ENV_NAME="LunarLander-v2",
            BATCH_SIZE=int(int_batch_size),
            GAMMA=int_gamma,
            EPS_START=int_eps_start,
            EPS_DECAY=int(int_eps_decay),
            EPS_END=int_eps_end,
            TAU=int_tau,
            LR=int_lr,
            SUB_AGENTS=int_sub_agents
            N_EPISODES=400,
            PRINT_PLOT=False)
      # run the training
      model.training()
      # return the loss to choose the hyper parameters
      return  float(model.return_metric(4))

In [None]:
def PPO_optim(trial):
      # Define the space of hyperparameters to run the search for optimization
      int_batch_size = trial.suggest_categorical("batch_size", [64, 128, 256, 512])
      int_num_batch_max = trial.suggest_int("max_n_batch", 2, 10)
      int_gamma = trial.suggest_float("gamma", 0.90, 0.99)
      int_trunc_param = trial.suggest_int("trunc_param", 2, 6)
      int_lambda = trial.suggest_float("lambda", 0.01, 0.99)
      int_max_len_traj = int.suggest_int("len_max_traj", 500, 2000)
      int_n_actors = trial.suggest_int("n_actors", 2, 6)
      int_epochs = trial.suggest_int("epochs", 4, 30)
      int_clip_value = trial.suggest_float("clip_value", 0.01, 0.3)
      int_entropy = trial.suggest_float("entropy_coef", 0.1, 0.6)
      int_lr = trial.suggest_float("lr", 1e-5, 1e-3)
      int_repetition = trial.suggest_categorical("repetition", [2, 4, 6, 8 ,10])

      # init the agent
      model = PPO_Agent(ENV_NAME="CarRacing-v3",
            BATCH_SIZE=int(int_batch_size),
            NUM_BATCH_MAX=int_num_batch_max,
            GAMMA=int_gamma,
            TRUNC_PARAM=int_trunc_param,
            LAMBDA=int_lambda,
            MAX_LEN_TRAJ=int_max_len_traj,
            N_ACTORS=int_n_actors,
            K_EPOCHS=int_epochs,
            CLIP_VALUE=int_clip_value,
            COEF_H=int_entropy,
            LR=int_lr,
            REPETITION=int(int_repetition),
            N_EPISODES=400,
            PRINT_PLOT=False)
      # run the training
      model.training()
      # return the loss to choose the hyper parameters
      return  float(model.return_metric(4))


# INITIALIZATION OF THE HYPERPARAMETER OPTIMIZER AND RUN

Here, replace DQN_optim with the model you selected above (so replace it with the correct optim).

In [8]:
study = create_study(direction="maximize", sampler=TPESampler(), pruner=PatientPruner(MedianPruner(), patience=3))
study.optimize(QR_DQN_optim, n_trials=60, n_jobs=1)

  study = create_study(direction="maximize", sampler=TPESampler(), pruner=PatientPruner(MedianPruner(), patience=3))
[I 2024-12-01 15:33:08,088] A new study created in memory with name: no-name-ac1335e0-974b-454b-82ce-16c04f21d430


Moviepy - Building video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4



[W 2024-12-01 15:33:11,096] Trial 0 failed with parameters: {'batch_size': 128, 'gamma': 0.964727910854953, 'eps_start': 0.9646448664481089, 'eps_decay': 1000, 'eps_end': 0.06021138714889231, 'tau': 0.006188506403533536, 'lr': 0.00030694694214773233, 'n_quantiles': 57} because of the following error: RuntimeError('shape mismatch: value tensor of shape [127, 51] cannot be broadcast to indexing result of shape [127, 57]').
Traceback (most recent call last):
  File "c:\Users\matte\myenv_RL\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\matte\AppData\Local\Temp\ipykernel_1840\138376655.py", line 25, in QR_DQN_optim
    model.training()
  File "c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\QR_DQN.py", line 144, in training
    self.optimize_model()
  File "c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\QR_DQN.py", line 103, in optimize_model
    next_state

Moviepy - Done !
Moviepy - video ready c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4


RuntimeError: shape mismatch: value tensor of shape [127, 51] cannot be broadcast to indexing result of shape [127, 57]

GET HYPERPARAMETERS AND RE-TRAIN

In [11]:
best_param = study.best_trial.params

In [12]:
best_param

{'batch_size': 128,
 'gamma': 0.9870468671594415,
 'eps_start': 0.9689218973923628,
 'eps_decay': 1000,
 'eps_end': 0.0841014572098897,
 'tau': 0.003998980031293827,
 'lr': 0.00037088923945978876}

In [13]:
model = DQN_Agent(ENV_NAME="LunarLander-v2",
                  BATCH_SIZE=best_param["batch_size"],
                  GAMMA=best_param["gamma"],
                  EPS_START=best_param["eps_start"],
                  EPS_DECAY=best_param["eps_decay"],
                  EPS_END=best_param["eps_end"],
                  TAU=best_param["tau"],
                  LR=best_param["lr"],
                  N_EPISODES=600,
                  PRINT_PLOT=True)

  logger.warn(


In [None]:
model = TD3_Agent(ENV_NAME="CarRacing-v3",
                  BATCH_SIZE=best_param["batch_size"],
                  GAMMA=best_param["gamma"],
                  SD_NOISE=best_param["sd_noise"],
                  SD_DECAY=best_param["sd_noise_decay"],
                  STEPS_DECAY_SD=best_param["steps_decay_sd"],
                  CP_VALUE=best_param["cp_value"],
                  STEP_UPT_POLICY=best_param["steps_update_policy"],
                  TAU=best_param["tau"],
                  LR=best_param["lr"],
                  REPETITION=best_param["repetition"],
                  N_EPISODES=400,
                  PRINT_PLOT=True)

In [None]:
model = NAF_Agent(ENV_NAME="CarRacing-v3",
                  BATCH_SIZE=best_param["batch_size"],
                  GAMMA=best_param["gamma"],
                  EPSILON = best_param["eps"],
                  EPSILON_DECAY=best_param["eps_decay"],
                  STEPS_DECAY=best_param["steps_decay"],
                  TAU=best_param["tau"],
                  LR=best_param["lr"],
                  REPETITION=best_param["repetition"],
                  N_EPISODES=200,
                  PRINT_PLOT=False)

In [None]:
model = QR_DQN_Agent(ENV_NAME="LunarLander-v2",
                     BATCH_SIZE=best_param["batch_size"],
                     GAMMA=best_param["gamma"],
                     EPS_START=best_param["eps_start"],
                     EPS_DECAY=best_param["eps_decay"],
                     EPS_END=best_param["eps_end"],
                     TAU=best_param["tau"],
                     LR=best_param["lr"],
                     N_QUANTILES=best_param["n_quantiles"],
                     N_EPISODES=400,
                     PRINT_PLOT=True)

In [None]:
model = SAC_Agent(ENV_NAME="CarRacing-v3",
                  BATCH_SIZE=best_param["batch_size"],
                  GAMMA=best_param["gamma"],
                  ENTROPY_PARAM=best_param["entropy"],
                  K_EPOCHS=best_param["epochs"],
                  STEPS_UPDATE=best_param["steps_update"],
                  TAU=best_param["tau"],
                  LR=best_param["lr"],
                  REPETITION=best_param["repetition"],
                  N_EPISODES=400,
                  PRINT_PLOT=False)

In [None]:
model = IQN_Agent(ENV_NAME="LunarLander-v2",
                     BATCH_SIZE=best_param["batch_size"],
                     GAMMA=best_param["gamma"],
                     EPS_START=best_param["eps_start"],
                     EPS_DECAY=best_param["eps_decay"],
                     EPS_END=best_param["eps_end"],
                     TAU=best_param["tau"],
                     LR=best_param["lr"],
                     SUB_AGENTS=best_param["sub_agents"],
                     N_EPISODES=400,
                     PRINT_PLOT=True)

In [None]:
model = PPO_Agent(ENV_NAME="CarRacing-v3",            
                 BATCH_SIZE=best_param["batch_size"],
                 NUM_BATCH_MAX=best_param["max_n_batch"],
                 GAMMA=best_param["gamma"],
                 TRUNC_PARAM=best_param["trunc_param"],
                 LAMBDA=best_param["lambda"],
                 MAX_LEN_TRAJ=best_param["len_max_traj"],
                 N_ACTORS=best_param["n_actors"],
                 K_EPOCHS=best_param["epochs"],
                 CLIP_VALUE=best_param["clip_value"],
                 COEF_H=best_param["entropy_coef"],
                 LR=best_param["lr"],
                 REPETITION=best_param["repetition"],
                 N_EPISODES=400,
                 PRINT_PLOT=False)

In [14]:
model.training()

<Figure size 640x480 with 0 Axes>

SAVE BEST COMBINATION

In [15]:
FILE_PATH1 = "DQN_best_hyperparameters.json"
FILE_PATH2 = "DQN_best_parameters.pt"

with open(FILE_PATH1, 'w') as json_file:
    json.dump(best_param, json_file, indent=4)

torch.save(model.return_weights(), FILE_PATH2)
