In [4]:
import warnings
import logging


warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [5]:
from utils import make_environments
from utils import pearl_utils
from configs import defaults
from utils.reward_functions import log_reward_function,cumulative_reward_function,sharpe_reward_function
from utils. utils import make_hidden_dims
import optuna
from optuna.samplers import TPESampler

from neuralforecast.core import NeuralForecast
from Pearl.pearl.utils.instantiations.environments.gym_environment import GymEnvironment
from Pearl.pearl.utils.functional_utils.train_and_eval.online_learning import \
    online_learning
import datetime

In [6]:
reward_functions=[log_reward_function,cumulative_reward_function,sharpe_reward_function]
train_env,test_env=make_environments.make_envs(reward_function=log_reward_function)


Seed set to 20
Seed set to 17
Seed set to 17


['data/binanceus-DOGEUSDT-1h.pkl']


100%|██████████| 78/78 [00:00<00:00, 11337.14it/s]
1it [00:00, 17.78it/s]
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

2024-10-20 16:00:00 2024-11-19 16:00:00


In [7]:
sp=train_env.positions
train_env.action_space.n
train_env.name

'DOGEUSDT_train'

In [8]:
make_hidden_dims(n_layers=3, n_units=64)

[64, 64, 64]

In [9]:
agent=pearl_utils.create_ddqn_model(

        observation_space_dim=train_env.observation_space.shape[0], 
        action_space_dim=train_env.action_space.n,
        hidden_dims=[64,64, 64], 
        training_rounds=20,
        learning_rate = 0.001,
        discount_factor = 0.99,
        batch_size = 128,
        target_update_freq = 10,
        soft_update_tau = 0.75,  # a value of 1 indicates no soft updates
        is_conservative = False,
        conservative_alpha = False,
        replay_buffer_size = 10_000,
        lstm=False)

In [10]:
test_env.observation_space.shape,train_env.action_space.n

((30,), np.int64(2))

In [11]:
env=GymEnvironment(train_env)

obs,action_space=env.reset()
agent.reset(   obs, action_space)

In [12]:
# done = False
# while not done:
#     action = agent.act(exploit=False)
#     action_result = env.step(action)
    
#     agent.observe(action_result)
#     loss=agent.learn()

#     done = action_result.done

In [13]:
info = online_learning(
        agent=agent,
        env=env,
        # number_of_episodes=10,
        number_of_steps=168,
        print_every_x_episodes=2,   # print returns after every 10 episdoes
        print_every_x_steps=1,   # print returns after every 10 episdoes
        learn_every_k_steps=20,   # print returns after every 10 episdoes
        learn_after_episode=False,
        record_period=169,   # instead of updating after every environment interaction, Q networks are updates at the end of each episode
        seed=0
    )

  0%|          | 0/168 [00:00<?, ?it/s]

In [14]:
33//1

33

In [15]:


def objective_function(trial):
 
    reward_id=trial.suggest_categorical('reward_function', [0,1,2])
    algo=trial.suggest_categorical('algorithm', ['dqn','ddqn'])    

    # reward_id=0
    
    reward_func=reward_functions[reward_id]
    train_env.reward_func=reward_func
    test_env.reward_func=reward_func
    
    observation_space_dim=train_env.observation_space.shape[0]
    action_space_dim=len(train_env.positions)
    n_layers=trial.suggest_int('n_layers', 1, 3)
    n_units=trial.suggest_categorical('n_units', [64,128,256,512])
    
    hidden_dims=make_hidden_dims(n_layers= n_layers, n_units=n_units)
    
    search_space={
                'observation_space_dim': observation_space_dim,
                'action_space_dim': action_space_dim,
                'hidden_dims': hidden_dims,
                'training_rounds': trial.suggest_int('training_rounds', 5, 30),
                'learning_rate': trial.suggest_float('learning_rate', 1e-6, 1e-4),
                'discount_factor': trial.suggest_float('discount_factor', 0.8, 0.99), # gamma (greediness)
                'batch_size': trial.suggest_categorical('batch_size', [64, 128]),
                'target_update_freq': trial.suggest_categorical('target_update_freq', [1, 5, 10, 24]),
                'soft_update_tau': trial.suggest_float('soft_update_tau', 0.1, .99), 
                'is_conservative': trial.suggest_categorical('is_conservative', [True, False]),
                'lstm': trial.suggest_categorical('lstm', [True, False]),
                'conservative_alpha': trial.suggest_float('conservative_alpha', 0.5, 1.0),
                }

    learning_space={'learn_after_episode':trial.suggest_categorical('learn_after_episode', [True, False]),
                    'learning_steps':trial.suggest_int('learning_steps', 10, 89),
                    'n_epochs':trial.suggest_categorical('n_epochs',[100,500]),
                    }
    #
    # print('n_epochs',n_epochs)
    if algo=='dqn':
        agent=pearl_utils.create_dqn_model(**search_space)
    elif algo=='ddqn':
        agent=pearl_utils.create_ddqn_model(**search_space)

        
    agent=pearl_utils.train_pearl_model(agent,train_env,**learning_space)
    profit,n_trades=pearl_utils.test_pearl_model(agent,test_env)
    objectives={'profit':profit,'n_trades':n_trades}

    print('profit',profit,'n_trades',n_trades)

    return profit,n_trades

In [23]:
model_name=defaults.model_name
model_name

study_name=f"{defaults.model_name}"
storage_name="sqlite:///PearlHPTuning.sqlite3"

In [24]:
from optuna import create_study

In [27]:

study=create_study(study_name=study_name, 
             storage=storage_name, 
             load_if_exists=True,
             directions=['maximize','maximize'],
             sampler=TPESampler()
             )

[I 2024-11-19 15:52:15,194] Using an existing study with name 'DOGEUSDTSPOT' instead of creating a new one.


In [28]:
study.optimize(objective_function, n_trials=30)


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [04:58<?, ?it/s]
[I 2024-11-19 15:59:22,367] Trial 0 finished with values: [1423.0532380485854, 0.46] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 256, 'training_rounds': 8, 'learning_rate': 3.757902105533474e-05, 'discount_factor': 0.9259513955826991, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.5443969708496746, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9096580075756356, 'learn_after_episode': False, 'learning_steps': 74, 'n_epochs': 100}.


profit 1423.0532380485854 n_trades 0.46


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [52:03<?, ?it/s]
[I 2024-11-19 16:53:47,299] Trial 1 finished with values: [1044.8420718893076, 70.84] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 20, 'learning_rate': 5.351602134066019e-05, 'discount_factor': 0.8254778989193878, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.6142957062398715, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.6642486751622055, 'learn_after_episode': False, 'learning_steps': 75, 'n_epochs': 500}.


profit 1044.8420718893076 n_trades 70.84


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [06:08<?, ?it/s]
[I 2024-11-19 17:02:14,735] Trial 2 finished with values: [1024.4878989919725, 5.57] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 256, 'training_rounds': 26, 'learning_rate': 9.253466472219328e-05, 'discount_factor': 0.8462934098248838, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.6713495949030748, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.6952673480450033, 'learn_after_episode': True, 'learning_steps': 65, 'n_epochs': 100}.


profit 1024.4878989919725 n_trades 5.57


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:35<?, ?it/s]
[I 2024-11-19 17:02:52,871] Trial 3 finished with values: [1093.871802437584, 40.24] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 256, 'training_rounds': 21, 'learning_rate': 3.2507681034328363e-05, 'discount_factor': 0.9255955810614617, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.6723693771058151, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.7479824879039227, 'learn_after_episode': False, 'learning_steps': 29, 'n_epochs': 100}.


profit 1093.871802437584 n_trades 40.24


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:38<?, ?it/s]
[I 2024-11-19 17:03:33,384] Trial 4 finished with values: [1165.4043903121283, 27.56] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 29, 'learning_rate': 5.722490854629807e-05, 'discount_factor': 0.9818046811053813, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.21833797964764654, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6992223728177329, 'learn_after_episode': True, 'learning_steps': 10, 'n_epochs': 500}.


profit 1165.4043903121283 n_trades 27.56


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:30<?, ?it/s]
[I 2024-11-19 17:04:06,578] Trial 5 finished with values: [1082.2632626335967, 59.14] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 256, 'training_rounds': 21, 'learning_rate': 7.783108001276111e-05, 'discount_factor': 0.8812978598926988, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.30570536884395816, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.7481089744378919, 'learn_after_episode': False, 'learning_steps': 23, 'n_epochs': 100}.


profit 1082.2632626335967 n_trades 59.14


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [09:32<?, ?it/s]
[I 2024-11-19 17:13:43,343] Trial 6 finished with values: [1183.9876748505383, 61.05] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 29, 'learning_rate': 4.9043254065469676e-05, 'discount_factor': 0.8448209410243246, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.6880326022560119, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.52273543943938, 'learn_after_episode': False, 'learning_steps': 24, 'n_epochs': 500}.


profit 1183.9876748505383 n_trades 61.05


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [01:33<?, ?it/s]
[I 2024-11-19 17:15:20,101] Trial 7 finished with values: [1166.0221371255036, 45.1] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 15, 'learning_rate': 7.622343360638977e-05, 'discount_factor': 0.875473211887314, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.39089679827155976, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.7521636961518408, 'learn_after_episode': False, 'learning_steps': 80, 'n_epochs': 500}.


profit 1166.0221371255036 n_trades 45.1


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:04<?, ?it/s]
[I 2024-11-19 17:15:27,686] Trial 8 finished with values: [1042.3869070368567, 66.95] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 11, 'learning_rate': 2.394721427170762e-05, 'discount_factor': 0.9528450420131132, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.36652749574330545, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9299078413804187, 'learn_after_episode': True, 'learning_steps': 40, 'n_epochs': 100}.


profit 1042.3869070368567 n_trades 66.95


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:05<?, ?it/s]
[I 2024-11-19 17:15:35,026] Trial 9 finished with values: [1080.1497512621145, 88.97] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 11, 'learning_rate': 9.537722608965365e-05, 'discount_factor': 0.9458174345070368, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.9870547152023663, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.8693676725350628, 'learn_after_episode': False, 'learning_steps': 82, 'n_epochs': 100}.


profit 1080.1497512621145 n_trades 88.97


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [30:49<?, ?it/s]
[I 2024-11-19 17:48:58,398] Trial 10 finished with values: [1065.0813876069421, 4.46] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 512, 'training_rounds': 26, 'learning_rate': 2.9731165265082507e-06, 'discount_factor': 0.8071178099647397, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.9117615015257374, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5066230461006795, 'learn_after_episode': True, 'learning_steps': 56, 'n_epochs': 500}.


profit 1065.0813876069421 n_trades 4.46


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:03<?, ?it/s]
[I 2024-11-19 17:49:03,895] Trial 11 finished with values: [1199.141291531122, 16.3] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 5, 'learning_rate': 9.864504483163224e-05, 'discount_factor': 0.9891893876758102, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.9691028953452985, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.8487880382937174, 'learn_after_episode': False, 'learning_steps': 85, 'n_epochs': 100}.


profit 1199.141291531122 n_trades 16.3


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:07<?, ?it/s]
[I 2024-11-19 17:49:13,508] Trial 12 finished with values: [1185.9230533919385, 37.08] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 14, 'learning_rate': 7.186735171315688e-05, 'discount_factor': 0.9208608914497881, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.8421308278542015, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9801401499189992, 'learn_after_episode': False, 'learning_steps': 48, 'n_epochs': 100}.


profit 1185.9230533919385 n_trades 37.08


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:06<?, ?it/s]
[I 2024-11-19 17:49:22,346] Trial 13 finished with values: [1002.653571003455, 62.07] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 64, 'training_rounds': 12, 'learning_rate': 6.413776758799948e-05, 'discount_factor': 0.9555366409178272, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.8172748043342444, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.8408321820104281, 'learn_after_episode': False, 'learning_steps': 65, 'n_epochs': 100}.


profit 1002.653571003455 n_trades 62.07


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:30<?, ?it/s]
[I 2024-11-19 17:49:54,977] Trial 14 finished with values: [1023.5906522572063, 27.27] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 8, 'learning_rate': 8.652403474824905e-05, 'discount_factor': 0.8995347922292788, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.7779104200455872, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.5839633723681885, 'learn_after_episode': False, 'learning_steps': 88, 'n_epochs': 500}.


profit 1023.5906522572063 n_trades 27.27


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:10<?, ?it/s]
[I 2024-11-19 17:50:07,223] Trial 15 finished with values: [1346.793678570731, 1.27] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 17, 'learning_rate': 7.063019511583738e-06, 'discount_factor': 0.9515703787427277, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.49589638340411674, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.8473691946647696, 'learn_after_episode': False, 'learning_steps': 37, 'n_epochs': 100}.


profit 1346.793678570731 n_trades 1.27


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [41:55<?, ?it/s]
[I 2024-11-19 18:43:14,944] Trial 16 finished with values: [993.4068317418579, 9.4] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 64, 'training_rounds': 9, 'learning_rate': 4.283202053181672e-05, 'discount_factor': 0.8548069105534023, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.9799243672226613, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9851693464643849, 'learn_after_episode': True, 'learning_steps': 61, 'n_epochs': 500}.


profit 993.4068317418579 n_trades 9.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:27<?, ?it/s]
[I 2024-11-19 18:43:44,728] Trial 17 finished with values: [1084.1208438465244, 62.79] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 16, 'learning_rate': 8.608404109402027e-05, 'discount_factor': 0.905484414818783, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.10811990246677433, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.7962721485322966, 'learn_after_episode': False, 'learning_steps': 15, 'n_epochs': 100}.


profit 1084.1208438465244 n_trades 62.79


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:10<?, ?it/s]
[I 2024-11-19 18:43:58,586] Trial 18 finished with values: [1047.0246470901593, 58.27] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 5, 'learning_rate': 1.5735626769690555e-05, 'discount_factor': 0.9635275141207652, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.7473368620916079, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9079975010218981, 'learn_after_episode': False, 'learning_steps': 46, 'n_epochs': 100}.


profit 1047.0246470901593 n_trades 58.27


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [1:08:08<?, ?it/s]
[I 2024-11-19 19:54:04,505] Trial 19 finished with values: [1066.381803381373, 0.69] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 12, 'learning_rate': 6.432829813945095e-05, 'discount_factor': 0.937254959805319, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.8957363830381931, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6281314664045141, 'learn_after_episode': True, 'learning_steps': 72, 'n_epochs': 500}.


profit 1066.381803381373 n_trades 0.69


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:09<?, ?it/s]
[I 2024-11-19 19:54:16,821] Trial 20 finished with values: [1196.3365560571085, 44.86] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 64, 'training_rounds': 19, 'learning_rate': 4.671577796086594e-05, 'discount_factor': 0.8689602407740552, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.46471978696204874, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.5718231368869076, 'learn_after_episode': False, 'learning_steps': 57, 'n_epochs': 100}.


profit 1196.3365560571085 n_trades 44.86


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [04:07<?, ?it/s]
[I 2024-11-19 19:58:28,375] Trial 21 finished with values: [1052.31127842453, 66.97] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 30, 'learning_rate': 4.966286095852234e-05, 'discount_factor': 0.8677280365985045, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.7042853356512264, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.5228885209594548, 'learn_after_episode': False, 'learning_steps': 56, 'n_epochs': 500}.


profit 1052.31127842453 n_trades 66.97


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [03:57<?, ?it/s]
[I 2024-11-19 20:02:29,627] Trial 22 finished with values: [1180.8020672187827, 81.11] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 24, 'learning_rate': 4.403085106429863e-05, 'discount_factor': 0.8348577340944441, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.507281071521004, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.5685225047523212, 'learn_after_episode': False, 'learning_steps': 32, 'n_epochs': 500}.


profit 1180.8020672187827 n_trades 81.11


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [03:58<?, ?it/s]
[I 2024-11-19 20:06:32,651] Trial 23 finished with values: [1065.1831652154779, 66.57] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 24, 'learning_rate': 2.9896398653929366e-05, 'discount_factor': 0.8027791445182954, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.43352138948368835, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.5692993155464433, 'learn_after_episode': False, 'learning_steps': 33, 'n_epochs': 500}.


profit 1065.1831652154779 n_trades 66.57


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [38:45<?, ?it/s]
[I 2024-11-19 20:47:23,404] Trial 24 finished with values: [1089.6385707965628, 1.27] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 24, 'learning_rate': 2.3320327577625975e-05, 'discount_factor': 0.8275103606272973, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.5539504725706653, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5901692244540463, 'learn_after_episode': True, 'learning_steps': 20, 'n_epochs': 500}.


profit 1089.6385707965628 n_trades 1.27


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [02:32<?, ?it/s]
[I 2024-11-19 20:49:59,671] Trial 25 finished with values: [1183.8729654735441, 106.54] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 19, 'learning_rate': 4.242483171059622e-05, 'discount_factor': 0.8318353760943185, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.49169390605199403, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6258847103375103, 'learn_after_episode': False, 'learning_steps': 41, 'n_epochs': 500}.


profit 1183.8729654735441 n_trades 106.54


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [26:08<?, ?it/s]
[I 2024-11-19 21:18:07,596] Trial 26 finished with values: [1011.6010159956437, 100.4] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 18, 'learning_rate': 8.16475393118449e-06, 'discount_factor': 0.8172805307946088, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.49203632385812635, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6421570584037625, 'learn_after_episode': True, 'learning_steps': 38, 'n_epochs': 500}.


profit 1011.6010159956437 n_trades 100.4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [02:10<?, ?it/s]
[I 2024-11-19 21:20:22,222] Trial 27 finished with values: [974.927443120815, 84.96] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 18, 'learning_rate': 1.669515422838089e-05, 'discount_factor': 0.8619084584280354, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.28465611983492817, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6167848019804593, 'learn_after_episode': False, 'learning_steps': 42, 'n_epochs': 500}.


profit 974.927443120815 n_trades 84.96


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [01:57<?, ?it/s]
[I 2024-11-19 21:22:23,040] Trial 28 finished with values: [1108.355859301906, 51.76] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 19, 'learning_rate': 6.225399058859204e-05, 'discount_factor': 0.8822318739602012, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.46161826363872105, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6887439020791601, 'learn_after_episode': False, 'learning_steps': 55, 'n_epochs': 500}.


profit 1108.355859301906 n_trades 51.76


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [1:05:02<?, ?it/s]
[I 2024-11-19 22:29:59,747] Trial 29 finished with values: [1103.6676968356815, 44.92] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 16, 'learning_rate': 3.681561941184994e-05, 'discount_factor': 0.8167826783794293, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.5780146554966249, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7906856638651517, 'learn_after_episode': False, 'learning_steps': 51, 'n_epochs': 500}.


profit 1103.6676968356815 n_trades 44.92


In [29]:
# print(f"Best value: {study.best_value} (params: {study.best_params})")
best_trials=study.best_trials
best_trials

[FrozenTrial(number=0, state=TrialState.COMPLETE, values=[1423.0532380485854, 0.46], datetime_start=datetime.datetime(2024, 11, 19, 15, 52, 15, 747219), datetime_complete=datetime.datetime(2024, 11, 19, 15, 59, 22, 364972), params={'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 256, 'training_rounds': 8, 'learning_rate': 3.757902105533474e-05, 'discount_factor': 0.9259513955826991, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.5443969708496746, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9096580075756356, 'learn_after_episode': False, 'learning_steps': 74, 'n_epochs': 100}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'reward_function': CategoricalDistribution(choices=(0, 1, 2)), 'algorithm': CategoricalDistribution(choices=('dqn', 'ddqn')), 'n_layers': IntDistribution(high=3, log=False, low=1, step=1), 'n_units': CategoricalDistribution(choices=(64, 128, 256, 512)), 'training_rounds': IntDistributi

In [30]:
best_trials=study.best_trials
best_trail=best_trials[3]
best_params=best_trail.params
reward_func=reward_functions[best_params.pop('reward_function')]
train_env.reward_func=reward_func
test_env.reward_func=reward_func

In [31]:
algo=best_params.pop('algorithm')

learning_params={'learn_after_episode':best_params.pop('learn_after_episode'),
                    'learning_steps':best_params.pop('learning_steps'),
                    'n_epochs':best_params.pop('n_epochs'),
                    }
best_params['hidden_dims']=make_hidden_dims(n_layers=best_params.pop('n_layers'),n_units=best_params.pop('n_units'))
best_params['lstm']=best_params.pop('lstm')
if algo=='dqn':
    agent=pearl_utils.create_dqn_model(**best_params)
elif algo=='ddqn':
    agent=pearl_utils.create_ddqn_model(**best_params)

best_params,learning_params

({'training_rounds': 17,
  'learning_rate': 7.063019511583738e-06,
  'discount_factor': 0.9515703787427277,
  'batch_size': 64,
  'target_update_freq': 10,
  'soft_update_tau': 0.49589638340411674,
  'is_conservative': True,
  'conservative_alpha': 0.8473691946647696,
  'hidden_dims': [64, 64],
  'lstm': False},
 {'learn_after_episode': False, 'learning_steps': 37, 'n_epochs': 100})

In [32]:


agent=pearl_utils.train_pearl_model(agent,train_env,**learning_params)

profit,n_trades=pearl_utils.test_pearl_model(agent,test_env)

agent=pearl_utils.train_pearl_model(agent,test_env,**learning_params)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x32 and 27x64)

In [None]:
%%run ./Pearl train_agent.py
