In [75]:
from DQN import DQN_Agent
from QR_DQN import QR_DQN_Agent
from NAF import NAF_Agent
from TD3 import TD3_Agent

import optuna
from optuna import create_study
from optuna.samplers import TPESampler

import json
import torch

# DEFINITION OF THE TRIAL OBJECT

In [None]:
def DQN_optim(trial):
      # Define the space of hyperparameters to run the search for optimization
      int_batch_size = trial.suggest_categorical("batch_size", [64, 128, 256, 512])
      int_gamma = trial.suggest_float("gamma", 0.90, 0.99)
      int_eps_start = trial.suggest_float("eps_start", 0.95, 0.99)
      int_eps_decay = trial.suggest_categorical("eps_decay", [500, 750, 1000, 1250])
      int_eps_end = trial.suggest_float("eps_end", 0.025, 0.1)
      int_tau = trial.suggest_float("tau", 0.0025, 0.0075)
      int_lr = trial.suggest_float("lr", 1e-5, 1e-3)

      # init the agent
      model = DQN_Agent(ENV_NAME="LunarLander-v2",
            BATCH_SIZE=int(int_batch_size),
            GAMMA=int_gamma,
            EPS_START=int_eps_start,
            EPS_DECAY=int(int_eps_decay),
            EPS_END=int_eps_end,
            TAU=int_tau,
            LR=int_lr,
            N_EPISODES=400,
            PRINT_PLOT=False)
      # run the training
      model.training()
      # return the loss to choose the hyper parameters
      return  float(model.return_metric(1))


# INITIALIZATION OF THE HYPERPARAMETER OPTIMIZER AND RUN

In [49]:
DQN_study = create_study(direction="maximize", sampler=TPESampler())
DQN_study.optimize(DQN_optim, n_trials=10, n_jobs=1)

[I 2024-11-30 09:41:25,164] A new study created in memory with name: no-name-fcdbb753-5acb-4bc3-9660-ebe03a5d84c3


Moviepy - Building video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4


[I 2024-11-30 09:41:26,134] Trial 0 finished with value: -3.0469532994282646 and parameters: {'batch_size': 128, 'gamma': 0.9729239384216913, 'eps_start': 0.9527577276834668, 'eps_decay': 750, 'eps_end': 0.09924963592760644, 'tau': 0.00577146659897232, 'lr': 0.0003204551366322824}. Best is trial 0 with value: -3.0469532994282646.


Moviepy - Building video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4


[I 2024-11-30 09:41:27,134] Trial 1 finished with value: -1.7553530009979976 and parameters: {'batch_size': 256, 'gamma': 0.981532021376052, 'eps_start': 0.9558646616856268, 'eps_decay': 1000, 'eps_end': 0.091831265814524, 'tau': 0.0054325782837228095, 'lr': 0.0003664640317912199}. Best is trial 1 with value: -1.7553530009979976.


Moviepy - Building video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4


[I 2024-11-30 09:41:29,038] Trial 2 finished with value: -0.2025026485973885 and parameters: {'batch_size': 128, 'gamma': 0.9356548302004652, 'eps_start': 0.9642786183061381, 'eps_decay': 500, 'eps_end': 0.057820353276921796, 'tau': 0.004776918193639891, 'lr': 0.000988582081818452}. Best is trial 2 with value: -0.2025026485973885.


Moviepy - Building video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4



[I 2024-11-30 09:41:29,567] Trial 3 finished with value: -3.4211748126987116 and parameters: {'batch_size': 512, 'gamma': 0.9625606879964652, 'eps_start': 0.9769628869713171, 'eps_decay': 750, 'eps_end': 0.052275301302460796, 'tau': 0.006116951981727238, 'lr': 0.00027524489479032854}. Best is trial 2 with value: -0.2025026485973885.


Moviepy - Done !
Moviepy - video ready c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4
Moviepy - Building video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4


[I 2024-11-30 09:41:31,054] Trial 4 finished with value: -1.4791237774016628 and parameters: {'batch_size': 128, 'gamma': 0.9638072019294385, 'eps_start': 0.9777715712863785, 'eps_decay': 500, 'eps_end': 0.053315681432813504, 'tau': 0.006958893075061347, 'lr': 0.0006991884632456242}. Best is trial 2 with value: -0.2025026485973885.


Moviepy - Building video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4


[I 2024-11-30 09:41:31,799] Trial 5 finished with value: -2.4291298051960513 and parameters: {'batch_size': 512, 'gamma': 0.9246083012006939, 'eps_start': 0.9604490041033112, 'eps_decay': 1000, 'eps_end': 0.031508327875970826, 'tau': 0.005662565054163085, 'lr': 0.0006850129532703021}. Best is trial 2 with value: -0.2025026485973885.


Moviepy - Building video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4


[I 2024-11-30 09:41:32,880] Trial 6 finished with value: -1.8805982664291845 and parameters: {'batch_size': 64, 'gamma': 0.9099562658453141, 'eps_start': 0.9886475392242859, 'eps_decay': 750, 'eps_end': 0.09104016626595965, 'tau': 0.002786301447803551, 'lr': 0.0001449014194939433}. Best is trial 2 with value: -0.2025026485973885.


Moviepy - Building video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4



[I 2024-11-30 09:41:33,421] Trial 7 finished with value: -2.8932671409256114 and parameters: {'batch_size': 512, 'gamma': 0.9432649998406702, 'eps_start': 0.9679360768346319, 'eps_decay': 1000, 'eps_end': 0.06063498494174532, 'tau': 0.003950399476396582, 'lr': 0.0005751515757696072}. Best is trial 2 with value: -0.2025026485973885.


Moviepy - Done !
Moviepy - video ready c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4
Moviepy - Building video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4



[I 2024-11-30 09:41:33,988] Trial 8 finished with value: -4.489769823357037 and parameters: {'batch_size': 512, 'gamma': 0.9008438348888954, 'eps_start': 0.9876712776474785, 'eps_decay': 1250, 'eps_end': 0.06565488550882637, 'tau': 0.0029445323304938525, 'lr': 0.0008953975455513059}. Best is trial 2 with value: -0.2025026485973885.


Moviepy - Done !
Moviepy - video ready c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4
Moviepy - Building video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4



[I 2024-11-30 09:41:34,566] Trial 9 finished with value: -1.7899175286925961 and parameters: {'batch_size': 256, 'gamma': 0.9577831222591969, 'eps_start': 0.9671232986383895, 'eps_decay': 1250, 'eps_end': 0.05712204588977512, 'tau': 0.00526323107358285, 'lr': 0.0009172623741736855}. Best is trial 2 with value: -0.2025026485973885.


Moviepy - Done !
Moviepy - video ready c:\Users\matte\Documents\REI LEA\Progetto Esame\Deep-RL-Algo\videos\rl-video-episode-0.mp4


GET HYPERPARAMETERS AND RE-TRAIN

In [None]:
best_param = DQN_study.best_trial.params

{'batch_size': 128,
 'gamma': 0.9356548302004652,
 'eps_start': 0.9642786183061381,
 'eps_decay': 500,
 'eps_end': 0.057820353276921796,
 'tau': 0.004776918193639891,
 'lr': 0.000988582081818452}

In [None]:
model = DQN_Agent(ENV_NAME="LunarLander-v2",
          BATCH_SIZE=best_param["batch_size"],
          GAMMA=best_param["gamma"],
          EPS_START=best_param["eps_start"],
          EPS_DECAY=best_param["eps_decay"],
          EPS_END=best_param["eps_end"],
          TAU=best_param["tau"],
          LR=best_param["lr"],
          N_EPISODES=400,
          PRINT_PLOT=True)

In [60]:
model.training()

<Figure size 640x480 with 0 Axes>

SAVE BEST COMBINATION

In [78]:
FILE_PATH1 = "best_hyperparameters.json"
FILE_PATH2 = "best_parameters.pt"

with open(FILE_PATH1, 'w') as json_file:
    json.dump(best_param, json_file, indent=4)

torch.save(model.return_weights(), FILE_PATH2)
