In [1]:
import warnings
import logging


warnings.filterwarnings('ignore',category=FutureWarning)
warnings.simplefilter('ignore')

In [2]:
from utils import make_environments
from utils import pearl_utils
from configs import defaults
from utils.reward_functions import log_reward_function,cumulative_reward_function,sharpe_reward_function
from utils. utils import make_hidden_dims
import optuna
from optuna.samplers import TPESampler

from neuralforecast.core import NeuralForecast
from Pearl.pearl.utils.instantiations.environments.gym_environment import GymEnvironment
from Pearl.pearl.utils.functional_utils.train_and_eval.online_learning import \
    online_learning
import datetime

In [3]:
reward_functions=[log_reward_function,cumulative_reward_function,sharpe_reward_function]
train_env,test_env=make_environments.make_envs(reward_function=log_reward_function)


FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.

In [7]:
sp=train_env.positions
train_env.action_space.n,train_env.name,train_env.observation_space.shape,

(np.int64(2), 'USDJPY_train', (24,))

In [8]:
make_hidden_dims(n_layers=3, n_units=64)

[64, 64, 64]

In [9]:
agent=pearl_utils.create_ddqn_model(

        observation_space_dim=train_env.observation_space.shape[0], 
        action_space_dim=train_env.action_space.n,
        hidden_dims=[64,64, 64], 
        training_rounds=20,
        learning_rate = 0.001,
        discount_factor = 0.99,
        batch_size = 128,
        target_update_freq = 10,
        soft_update_tau = 0.75,  # a value of 1 indicates no soft updates
        is_conservative = False,
        conservative_alpha = False,
        replay_buffer_size = 10_000,
        lstm=False)

In [10]:
test_env.observation_space.shape,train_env.action_space.n

((24,), np.int64(2))

In [11]:
env=GymEnvironment(train_env)

obs,action_space=env.reset()
agent.reset(   obs, action_space)

In [12]:
# done = False
# while not done:
#     action = agent.act(exploit=False)
#     action_result = env.step(action)
    
#     agent.observe(action_result)
#     loss=agent.learn()

#     done = action_result.done

In [13]:
info = online_learning(
        agent=agent,
        env=env,
        # number_of_episodes=10,
        number_of_steps=168,
        print_every_x_episodes=2,   # print returns after every 10 episdoes
        print_every_x_steps=1,   # print returns after every 10 episdoes
        learn_every_k_steps=20,   # print returns after every 10 episdoes
        learn_after_episode=False,
        record_period=169,   # instead of updating after every environment interaction, Q networks are updates at the end of each episode
        seed=0
    )

  0%|          | 0/168 [00:00<?, ?it/s]

In [14]:


def objective_function(trial):
 
    reward_id=trial.suggest_categorical('reward_function', [0,1,2])
    algo=trial.suggest_categorical('algorithm', ['dqn','ddqn'])    

    # reward_id=0
    
    reward_func=reward_functions[reward_id]
    train_env.reward_func=reward_func
    test_env.reward_func=reward_func
    
    observation_space_dim=train_env.observation_space.shape[0]
    action_space_dim=len(train_env.positions)
    n_layers=trial.suggest_int('n_layers', 1, 3)
    n_units=trial.suggest_categorical('n_units', [64,128,256,512])
    
    hidden_dims=make_hidden_dims(n_layers= n_layers, n_units=n_units)
    
    search_space={
                'observation_space_dim': observation_space_dim,
                'action_space_dim': action_space_dim,
                'hidden_dims': hidden_dims,
                'training_rounds': trial.suggest_int('training_rounds', 5, 30),
                'learning_rate': trial.suggest_float('learning_rate', 1e-6, 1e-4),
                'discount_factor': trial.suggest_float('discount_factor', 0.8, 0.99), # gamma (greediness)
                'batch_size': trial.suggest_categorical('batch_size', [64, 128]),
                'target_update_freq': trial.suggest_categorical('target_update_freq', [1, 5, 10, 24]),
                'soft_update_tau': trial.suggest_float('soft_update_tau', 0.1, .99), 
                'is_conservative': trial.suggest_categorical('is_conservative', [True, False]),
                'lstm': trial.suggest_categorical('lstm', [True, False]),
                'conservative_alpha': trial.suggest_float('conservative_alpha', 0.5, 1.0),
                }

    learning_space={'learn_after_episode':trial.suggest_categorical('learn_after_episode', [True, False]),
                    'learning_steps':trial.suggest_int('learning_steps', 10, 89),
                    'n_epochs':trial.suggest_categorical('n_epochs',[100,500]),
                    }
    #
    # print('n_epochs',n_epochs)
    if algo=='dqn':
        agent=pearl_utils.create_dqn_model(**search_space)
    elif algo=='ddqn':
        agent=pearl_utils.create_ddqn_model(**search_space)

        
    agent=pearl_utils.train_pearl_model(agent,train_env,**learning_space)
    profit,n_trades=pearl_utils.test_pearl_model(agent,test_env)
    objectives={'profit':profit,'n_trades':n_trades}

    print('profit',profit,'n_trades',n_trades)

    return profit,n_trades

In [15]:
model_name=defaults.model_name
model_name

study_name=f"{defaults.model_name}"
storage_name="sqlite:///PearlHPTuning.sqlite3"

In [16]:
from optuna import create_study,delete_study

In [17]:
# study=delete_study(study_name=study_name, 
#              storage=storage_name, )

In [18]:

study=create_study(study_name=study_name, 
             storage=storage_name, 
             load_if_exists=True,
             directions=['maximize','maximize'],
             sampler=TPESampler()
             )

[I 2024-11-23 19:56:02,061] A new study created in RDB with name: USDJPY


In [19]:
study.optimize(objective_function, n_trials=100)


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:14<?, ?it/s]
[I 2024-11-23 19:57:54,668] Trial 0 finished with values: [1150.577383967398, 21.2] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 17, 'learning_rate': 7.212852791520198e-05, 'discount_factor': 0.8848020344519296, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.43112552405359356, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.7953738699846757, 'learn_after_episode': False, 'learning_steps': 67, 'n_epochs': 100}.


profit 1150.577383967398 n_trades 21.2


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [04:55<?, ?it/s]
[I 2024-11-23 20:03:26,404] Trial 1 finished with values: [823.6518861108352, 22.4] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 6, 'learning_rate': 6.431503168114821e-06, 'discount_factor': 0.9332381259579979, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.12739617807938075, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.7110381502186368, 'learn_after_episode': False, 'learning_steps': 37, 'n_epochs': 500}.


profit 823.6518861108352 n_trades 22.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [04:47<?, ?it/s]
[I 2024-11-23 20:10:25,742] Trial 2 finished with values: [1119.2086158452692, 16.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 23, 'learning_rate': 9.746722744797855e-05, 'discount_factor': 0.8867237684857592, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.7704066493531193, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7146415497038774, 'learn_after_episode': False, 'learning_steps': 61, 'n_epochs': 100}.


profit 1119.2086158452692 n_trades 16.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [1:21:35<?, ?it/s]
[I 2024-11-23 21:34:20,343] Trial 3 finished with values: [1147.2389618542688, 17.2] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 22, 'learning_rate': 2.335341688158766e-06, 'discount_factor': 0.9613964367426219, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.4138012926568424, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8294413998756902, 'learn_after_episode': False, 'learning_steps': 12, 'n_epochs': 500}.


profit 1147.2389618542688 n_trades 17.2


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [05:10<?, ?it/s]
[I 2024-11-23 21:40:09,067] Trial 4 finished with values: [998.7725525552643, 0.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 6, 'learning_rate': 3.405753331088445e-05, 'discount_factor': 0.947098762745797, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.2647221245432291, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.9860983714664537, 'learn_after_episode': False, 'learning_steps': 40, 'n_epochs': 500}.


profit 998.7725525552643 n_trades 0.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [07:59<?, ?it/s]
[I 2024-11-23 21:48:48,421] Trial 5 finished with values: [987.4254664339622, 0.4] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 64, 'training_rounds': 10, 'learning_rate': 7.867350296414014e-05, 'discount_factor': 0.9489340283852856, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.564864486376604, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.9215986721485452, 'learn_after_episode': False, 'learning_steps': 25, 'n_epochs': 500}.


profit 987.4254664339622 n_trades 0.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:03<?, ?it/s]
[I 2024-11-23 21:54:11,351] Trial 6 finished with values: [825.533330785481, 18.4] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 14, 'learning_rate': 2.4313962026239784e-06, 'discount_factor': 0.8490402938089269, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.2734286605609143, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6149816292950374, 'learn_after_episode': True, 'learning_steps': 81, 'n_epochs': 100}.


profit 825.533330785481 n_trades 18.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:41<?, ?it/s]
[I 2024-11-23 21:59:12,401] Trial 7 finished with values: [855.2870473648263, 19.6] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 6, 'learning_rate': 9.792883570483368e-05, 'discount_factor': 0.9390311651504282, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.6213288701830008, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.5285399812721583, 'learn_after_episode': True, 'learning_steps': 53, 'n_epochs': 100}.


profit 855.2870473648263 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:26<?, ?it/s]
[I 2024-11-23 22:01:16,282] Trial 8 finished with values: [1134.4989352616928, 20.4] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 11, 'learning_rate': 5.8626490888630076e-05, 'discount_factor': 0.9763851229297112, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.4656098458850117, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.5151645950907742, 'learn_after_episode': False, 'learning_steps': 37, 'n_epochs': 100}.


profit 1134.4989352616928 n_trades 20.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [05:47<?, ?it/s]
[I 2024-11-23 22:09:22,852] Trial 9 finished with values: [1147.266811890151, 16.0] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 14, 'learning_rate': 8.853273553920737e-05, 'discount_factor': 0.835812620386413, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.3233761418634699, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.8318866727868123, 'learn_after_episode': False, 'learning_steps': 35, 'n_epochs': 100}.


profit 1147.266811890151 n_trades 16.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [01:04<?, ?it/s]
[I 2024-11-23 22:11:06,742] Trial 10 finished with values: [1139.6201936810044, 19.2] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 30, 'learning_rate': 6.207137386498402e-05, 'discount_factor': 0.8072012194698892, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.9714301443927552, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.8172500058974149, 'learn_after_episode': True, 'learning_steps': 82, 'n_epochs': 100}.


profit 1139.6201936810044 n_trades 19.2


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [06:22<?, ?it/s]
[I 2024-11-23 22:18:05,616] Trial 11 finished with values: [824.8498767221784, 21.2] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 512, 'training_rounds': 20, 'learning_rate': 3.722812440741113e-05, 'discount_factor': 0.892317533506221, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.10434712260035761, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6966797243420619, 'learn_after_episode': False, 'learning_steps': 68, 'n_epochs': 500}.


profit 824.8498767221784 n_trades 21.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:17<?, ?it/s]
[I 2024-11-23 22:19:59,418] Trial 12 finished with values: [952.2835776342862, 0.0] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 27, 'learning_rate': 7.240510672160744e-05, 'discount_factor': 0.9149147645752799, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.7050123470089736, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.7674335814386197, 'learn_after_episode': False, 'learning_steps': 70, 'n_epochs': 100}.


profit 952.2835776342862 n_trades 0.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:55<?, ?it/s]
[I 2024-11-23 22:21:35,606] Trial 13 finished with values: [1014.2212208030152, 2.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 17, 'learning_rate': 4.471003498417102e-05, 'discount_factor': 0.8682123405208111, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.9726864235137104, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.6243089183554602, 'learn_after_episode': True, 'learning_steps': 54, 'n_epochs': 100}.


profit 1014.2212208030152 n_trades 2.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [08:55<?, ?it/s]
[I 2024-11-23 22:31:10,032] Trial 14 finished with values: [1161.6547184906776, 20.4] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 64, 'training_rounds': 26, 'learning_rate': 2.254031146960984e-05, 'discount_factor': 0.9179945978996923, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.11637243318024865, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.8833455214823647, 'learn_after_episode': False, 'learning_steps': 72, 'n_epochs': 500}.


profit 1161.6547184906776 n_trades 20.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:25<?, ?it/s]
[I 2024-11-23 22:33:12,888] Trial 15 finished with values: [1176.322353536598, 20.8] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 17, 'learning_rate': 7.219892302276428e-05, 'discount_factor': 0.8639135932949485, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.8185818093870334, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.6605343064276803, 'learn_after_episode': False, 'learning_steps': 47, 'n_epochs': 100}.


profit 1176.322353536598 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:44<?, ?it/s]
[I 2024-11-23 22:34:34,374] Trial 16 finished with values: [820.4380288545931, 21.6] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 10, 'learning_rate': 5.65647442765632e-05, 'discount_factor': 0.9185568304623171, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.4269547246541, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.7648258068768808, 'learn_after_episode': True, 'learning_steps': 85, 'n_epochs': 100}.


profit 820.4380288545931 n_trades 21.6


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [09:00<?, ?it/s]
[I 2024-11-23 22:44:15,590] Trial 17 finished with values: [948.9689978531844, 4.8] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 20, 'learning_rate': 8.315670827042214e-05, 'discount_factor': 0.8218544687255038, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.22644810078697583, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9997055165089352, 'learn_after_episode': False, 'learning_steps': 58, 'n_epochs': 500}.


profit 948.9689978531844 n_trades 4.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:07<?, ?it/s]
[I 2024-11-23 22:46:01,657] Trial 18 finished with values: [1010.1543286052844, 0.0] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 14, 'learning_rate': 6.613273619471184e-05, 'discount_factor': 0.8709202710887631, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.5049460177953787, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.940082808009347, 'learn_after_episode': False, 'learning_steps': 64, 'n_epochs': 100}.


profit 1010.1543286052844 n_trades 0.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [18:51<?, ?it/s]
[I 2024-11-23 23:07:10,374] Trial 19 finished with values: [1145.9078644641602, 17.6] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 30, 'learning_rate': 2.2726013168534462e-05, 'discount_factor': 0.9039986729307371, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.35779497193123927, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.8717405894428403, 'learn_after_episode': True, 'learning_steps': 76, 'n_epochs': 500}.


profit 1145.9078644641602 n_trades 17.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:43<?, ?it/s]
[I 2024-11-23 23:09:33,848] Trial 20 finished with values: [1138.5868984260844, 19.6] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 8, 'learning_rate': 8.899439244540036e-05, 'discount_factor': 0.9888308128127742, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.6073483021849313, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.7901165685354554, 'learn_after_episode': False, 'learning_steps': 19, 'n_epochs': 100}.


profit 1138.5868984260844 n_trades 19.6


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [07:37<?, ?it/s]
[I 2024-11-23 23:17:49,239] Trial 21 finished with values: [1169.2671265934455, 20.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 17, 'learning_rate': 7.114675328835598e-05, 'discount_factor': 0.864505234349191, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.8273487045901502, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6696474046396556, 'learn_after_episode': False, 'learning_steps': 45, 'n_epochs': 500}.


profit 1169.2671265934455 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:26<?, ?it/s]
[I 2024-11-23 23:19:53,231] Trial 22 finished with values: [877.9944390538083, 20.0] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 512, 'training_rounds': 13, 'learning_rate': 4.613368840174158e-05, 'discount_factor': 0.8842359005141189, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.1908535506495564, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.6248923631311181, 'learn_after_episode': False, 'learning_steps': 29, 'n_epochs': 100}.


profit 877.9944390538083 n_trades 20.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [09:31<?, ?it/s]
[I 2024-11-23 23:30:02,043] Trial 23 finished with values: [1151.4461423757537, 21.2] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 19, 'learning_rate': 2.214400148114003e-05, 'discount_factor': 0.8482862274923698, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.8679370819391803, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.7352389857942464, 'learn_after_episode': False, 'learning_steps': 42, 'n_epochs': 500}.


profit 1151.4461423757537 n_trades 21.2


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [09:13<?, ?it/s]
[I 2024-11-23 23:39:54,276] Trial 24 finished with values: [823.5933314587056, 19.6] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 20, 'learning_rate': 1.3146225204719469e-05, 'discount_factor': 0.8388726917753868, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.8742959809271075, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.7222469731856025, 'learn_after_episode': False, 'learning_steps': 46, 'n_epochs': 500}.


profit 823.5933314587056 n_trades 19.6


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [13:54<?, ?it/s]
[I 2024-11-23 23:54:26,506] Trial 25 finished with values: [1145.7915708272872, 19.2] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 24, 'learning_rate': 1.479402850130407e-05, 'discount_factor': 0.852930813049577, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.8863133417250506, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6792918104367991, 'learn_after_episode': False, 'learning_steps': 30, 'n_epochs': 500}.


profit 1145.7915708272872 n_trades 19.2


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [09:17<?, ?it/s]
[I 2024-11-24 00:04:20,580] Trial 26 finished with values: [1166.0569638480713, 14.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 19, 'learning_rate': 1.1738434219781345e-05, 'discount_factor': 0.8037991856230602, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.7092640401977147, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.5710011646600395, 'learn_after_episode': False, 'learning_steps': 46, 'n_epochs': 500}.


profit 1166.0569638480713 n_trades 14.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [04:25<?, ?it/s]
[I 2024-11-24 00:09:20,903] Trial 27 finished with values: [972.5572229833299, 0.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 5, 'learning_rate': 2.6052982801343885e-05, 'discount_factor': 0.8253986951419221, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.7094042863765414, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.730757422780555, 'learn_after_episode': False, 'learning_steps': 41, 'n_epochs': 500}.


profit 972.5572229833299 n_trades 0.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [15:11<?, ?it/s]
[I 2024-11-24 00:26:44,831] Trial 28 finished with values: [830.6362184196511, 20.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 16, 'learning_rate': 3.371096222490144e-05, 'discount_factor': 0.9284560843243087, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.799153887926304, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.6621346147794545, 'learn_after_episode': True, 'learning_steps': 33, 'n_epochs': 500}.


profit 830.6362184196511 n_trades 20.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [09:20<?, ?it/s]
[I 2024-11-24 00:36:40,463] Trial 29 finished with values: [1163.9696799980275, 20.8] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 12, 'learning_rate': 6.645917277028515e-06, 'discount_factor': 0.8563083949145203, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.9000107372073323, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.5850860201985745, 'learn_after_episode': False, 'learning_steps': 21, 'n_epochs': 500}.


profit 1163.9696799980275 n_trades 20.8


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [05:20<?, ?it/s]
[I 2024-11-24 00:42:37,986] Trial 30 finished with values: [1030.200724856985, 0.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 256, 'training_rounds': 8, 'learning_rate': 1.8999168301197887e-05, 'discount_factor': 0.9021519484908285, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.6330990020882405, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.7477341270872846, 'learn_after_episode': False, 'learning_steps': 52, 'n_epochs': 500}.


profit 1030.200724856985 n_trades 0.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [04:13<?, ?it/s]
[I 2024-11-24 00:49:03,440] Trial 31 finished with values: [1137.6710034036155, 22.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 16, 'learning_rate': 7.492851989544916e-06, 'discount_factor': 0.8839625332390543, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.1498660640526482, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7062381386730303, 'learn_after_episode': False, 'learning_steps': 58, 'n_epochs': 100}.


profit 1137.6710034036155 n_trades 22.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:12<?, ?it/s]
[I 2024-11-24 00:54:28,350] Trial 32 finished with values: [1160.2159945272608, 17.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 22, 'learning_rate': 7.431429261275185e-06, 'discount_factor': 0.8780070248048075, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.1379444104457507, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6535737235510508, 'learn_after_episode': True, 'learning_steps': 57, 'n_epochs': 100}.


profit 1160.2159945272608 n_trades 17.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [05:06<?, ?it/s]
[I 2024-11-24 01:01:47,349] Trial 33 finished with values: [843.2805673815127, 20.4] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 25, 'learning_rate': 1.2013494746327314e-06, 'discount_factor': 0.8987197130437025, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.3431273381612787, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7050194602448171, 'learn_after_episode': False, 'learning_steps': 62, 'n_epochs': 100}.


profit 843.2805673815127 n_trades 20.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [03:33<?, ?it/s]
[I 2024-11-24 01:07:33,953] Trial 34 finished with values: [1145.0939711295189, 19.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 16, 'learning_rate': 8.631763265221376e-06, 'discount_factor': 0.883245256331291, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.17589132952304004, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7886957305368806, 'learn_after_episode': False, 'learning_steps': 89, 'n_epochs': 100}.


profit 1145.0939711295189 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [06:18<?, ?it/s]
[I 2024-11-24 01:16:03,953] Trial 35 finished with values: [1160.0049132091665, 23.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 28, 'learning_rate': 2.8910577690490594e-05, 'discount_factor': 0.9089192007949853, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.518370843001148, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5883062686658616, 'learn_after_episode': False, 'learning_steps': 49, 'n_epochs': 100}.


profit 1160.0049132091665 n_trades 23.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:28<?, ?it/s]
[I 2024-11-24 01:21:48,575] Trial 36 finished with values: [861.3932557900239, 19.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 27, 'learning_rate': 5.011506973241142e-05, 'discount_factor': 0.95629951447968, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.4798442427972439, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5656320697556525, 'learn_after_episode': True, 'learning_steps': 10, 'n_epochs': 100}.


profit 861.3932557900239 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [06:31<?, ?it/s]
[I 2024-11-24 01:30:35,885] Trial 37 finished with values: [835.2434384828301, 16.4] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 28, 'learning_rate': 3.720221852093302e-05, 'discount_factor': 0.9326149015403249, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.560810894497345, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5814225295850044, 'learn_after_episode': False, 'learning_steps': 49, 'n_epochs': 100}.


profit 835.2434384828301 n_trades 16.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:22<?, ?it/s]
[I 2024-11-24 01:36:16,379] Trial 38 finished with values: [1128.226043162254, 24.8] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 23, 'learning_rate': 3.100937751155472e-05, 'discount_factor': 0.9099280118053934, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.39144059536315134, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5439606108724765, 'learn_after_episode': True, 'learning_steps': 59, 'n_epochs': 100}.


profit 1128.226043162254 n_trades 24.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:20<?, ?it/s]
[I 2024-11-24 01:41:51,390] Trial 39 finished with values: [1135.3930925844654, 61.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 23, 'learning_rate': 2.9937978950544717e-05, 'discount_factor': 0.909368529386332, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.3988690261380955, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5442295152904463, 'learn_after_episode': True, 'learning_steps': 57, 'n_epochs': 100}.


profit 1135.3930925844654 n_trades 61.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:18<?, ?it/s]
[I 2024-11-24 01:47:23,590] Trial 40 finished with values: [842.8466807869823, 20.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 23, 'learning_rate': 3.143188392145338e-05, 'discount_factor': 0.9084578393946072, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.3938467976390005, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5389966941580625, 'learn_after_episode': True, 'learning_steps': 65, 'n_epochs': 100}.


profit 842.8466807869823 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:13<?, ?it/s]
[I 2024-11-24 01:52:50,596] Trial 41 finished with values: [1158.822975403994, 18.4] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 22, 'learning_rate': 2.8973175134143057e-05, 'discount_factor': 0.9227008573443123, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.2920331635039296, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5036214835279261, 'learn_after_episode': True, 'learning_steps': 16, 'n_epochs': 100}.


profit 1158.822975403994 n_trades 18.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:20<?, ?it/s]
[I 2024-11-24 01:58:24,270] Trial 42 finished with values: [1162.4870672633447, 17.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 256, 'training_rounds': 24, 'learning_rate': 4.050525096581591e-05, 'discount_factor': 0.9413408533497125, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.38869233864077246, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5499058702302541, 'learn_after_episode': True, 'learning_steps': 60, 'n_epochs': 100}.


profit 1162.4870672633447 n_trades 17.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:08<?, ?it/s]
[I 2024-11-24 02:03:44,566] Trial 43 finished with values: [1179.238596251898, 17.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 21, 'learning_rate': 2.86342674763936e-05, 'discount_factor': 0.9706399481867257, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5249900696899144, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5954259074343959, 'learn_after_episode': True, 'learning_steps': 74, 'n_epochs': 100}.


profit 1179.238596251898 n_trades 17.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:30<?, ?it/s]
[I 2024-11-24 02:09:29,246] Trial 44 finished with values: [1172.7930798396785, 20.4] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 29, 'learning_rate': 4.296022334069244e-05, 'discount_factor': 0.9699675878874114, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5187668161942369, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6011665624419039, 'learn_after_episode': True, 'learning_steps': 77, 'n_epochs': 100}.


profit 1172.7930798396785 n_trades 20.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:12<?, ?it/s]
[I 2024-11-24 02:14:53,617] Trial 45 finished with values: [1191.6558529692093, 20.4] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 21, 'learning_rate': 2.8764743562927777e-05, 'discount_factor': 0.911339518183664, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.30917867193935245, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5255327383263214, 'learn_after_episode': True, 'learning_steps': 51, 'n_epochs': 100}.


profit 1191.6558529692093 n_trades 20.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:18<?, ?it/s]
[I 2024-11-24 02:20:23,257] Trial 46 finished with values: [837.283432349426, 16.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 25, 'learning_rate': 5.108949394283532e-05, 'discount_factor': 0.8929739215180355, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.4485181388952948, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5167582562430107, 'learn_after_episode': True, 'learning_steps': 55, 'n_epochs': 100}.


profit 837.283432349426 n_trades 16.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:15<?, ?it/s]
[I 2024-11-24 02:25:49,057] Trial 47 finished with values: [1139.9044513387148, 20.8] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 23, 'learning_rate': 3.7557404891456605e-05, 'discount_factor': 0.9105971367471114, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.3012024733636614, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5591519901065704, 'learn_after_episode': True, 'learning_steps': 49, 'n_epochs': 100}.


profit 1139.9044513387148 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:20<?, ?it/s]
[I 2024-11-24 02:31:19,745] Trial 48 finished with values: [836.1609955080716, 20.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 26, 'learning_rate': 1.6887170508175158e-05, 'discount_factor': 0.9259513900170151, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.232972356384604, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5283231655949049, 'learn_after_episode': True, 'learning_steps': 67, 'n_epochs': 100}.


profit 836.1609955080716 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:00<?, ?it/s]
[I 2024-11-24 02:36:29,397] Trial 49 finished with values: [843.9450029874564, 19.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 18, 'learning_rate': 9.428816903429062e-05, 'discount_factor': 0.9510515561800338, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.4287675090297867, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5007470200789644, 'learn_after_episode': True, 'learning_steps': 51, 'n_epochs': 100}.


profit 843.9450029874564 n_trades 19.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:55<?, ?it/s]
[I 2024-11-24 02:43:19,575] Trial 50 finished with values: [1163.2791182203055, 19.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 256, 'training_rounds': 21, 'learning_rate': 2.6144784039123744e-05, 'discount_factor': 0.8914339650817914, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.37084527486247987, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.633566627563901, 'learn_after_episode': True, 'learning_steps': 62, 'n_epochs': 100}.


profit 1163.2791182203055 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:09<?, ?it/s]
[I 2024-11-24 02:50:10,549] Trial 51 finished with values: [1091.4864788100776, 511.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 28, 'learning_rate': 4.2165519977742505e-05, 'discount_factor': 0.9373318151729744, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.49543218166677083, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5943095939411217, 'learn_after_episode': True, 'learning_steps': 55, 'n_epochs': 100}.


profit 1091.4864788100776 n_trades 511.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:30<?, ?it/s]
[I 2024-11-24 02:56:11,955] Trial 52 finished with values: [1177.7666122307216, 24.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 28, 'learning_rate': 5.470619520085431e-05, 'discount_factor': 0.9406018035079231, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.6064443254173235, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9569190311571425, 'learn_after_episode': True, 'learning_steps': 56, 'n_epochs': 100}.


profit 1177.7666122307216 n_trades 24.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:26<?, ?it/s]
[I 2024-11-24 03:03:02,829] Trial 53 finished with values: [1187.7147489399895, 21.2] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 28, 'learning_rate': 5.34304347683492e-05, 'discount_factor': 0.9406775246087808, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5805151989304606, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.941728114103827, 'learn_after_episode': True, 'learning_steps': 55, 'n_epochs': 100}.


profit 1187.7147489399895 n_trades 21.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:58<?, ?it/s]
[I 2024-11-24 03:09:59,070] Trial 54 finished with values: [1167.7588379210677, 19.2] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 28, 'learning_rate': 5.262032472025306e-05, 'discount_factor': 0.9407256791921621, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5856327456215659, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8670153230873563, 'learn_after_episode': True, 'learning_steps': 54, 'n_epochs': 100}.


profit 1167.7588379210677 n_trades 19.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:18<?, ?it/s]
[I 2024-11-24 03:16:37,464] Trial 55 finished with values: [837.2924894835328, 16.8] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 30, 'learning_rate': 4.817398052041672e-05, 'discount_factor': 0.9622869412426481, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.6486596169640005, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9606800286575872, 'learn_after_episode': True, 'learning_steps': 38, 'n_epochs': 100}.


profit 837.2924894835328 n_trades 16.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:56<?, ?it/s]
[I 2024-11-24 03:22:54,569] Trial 56 finished with values: [1151.8898058578268, 22.0] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 28, 'learning_rate': 5.592925900979323e-05, 'discount_factor': 0.9474495694373005, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.48188217841070047, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.902683038803259, 'learn_after_episode': True, 'learning_steps': 43, 'n_epochs': 100}.


profit 1151.8898058578268 n_trades 22.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:55<?, ?it/s]
[I 2024-11-24 03:29:24,301] Trial 57 finished with values: [1176.6617422497266, 17.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 29, 'learning_rate': 6.381061332794396e-05, 'discount_factor': 0.9351238703656154, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5400164694999838, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8405934912565669, 'learn_after_episode': True, 'learning_steps': 69, 'n_epochs': 100}.


profit 1176.6617422497266 n_trades 17.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:52<?, ?it/s]
[I 2024-11-24 03:35:36,772] Trial 58 finished with values: [1177.5750665117725, 19.6] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 26, 'learning_rate': 4.4039611069387815e-05, 'discount_factor': 0.9825654227624299, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.6794394183737981, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8138688239599907, 'learn_after_episode': True, 'learning_steps': 56, 'n_epochs': 100}.


profit 1177.5750665117725 n_trades 19.6


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [21:39<?, ?it/s]
[I 2024-11-24 03:59:38,608] Trial 59 finished with values: [1161.8154769910302, 18.8] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 27, 'learning_rate': 3.973884572780095e-05, 'discount_factor': 0.9590347143720362, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5785775006476803, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6360016447741732, 'learn_after_episode': True, 'learning_steps': 65, 'n_epochs': 500}.


profit 1161.8154769910302 n_trades 18.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:44<?, ?it/s]
[I 2024-11-24 04:06:35,073] Trial 60 finished with values: [1142.6864554684303, 24.4] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 29, 'learning_rate': 6.013187669657129e-05, 'discount_factor': 0.9452169996954417, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5026880327571716, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.6831973576075341, 'learn_after_episode': True, 'learning_steps': 25, 'n_epochs': 100}.


profit 1142.6864554684303 n_trades 24.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:01<?, ?it/s]
[I 2024-11-24 04:13:10,978] Trial 61 finished with values: [1163.665800898656, 20.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 29, 'learning_rate': 5.778137069497209e-05, 'discount_factor': 0.8122672110635073, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.4983775779408373, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6848760506836826, 'learn_after_episode': True, 'learning_steps': 51, 'n_epochs': 100}.


profit 1163.665800898656 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:54<?, ?it/s]
[I 2024-11-24 04:20:00,501] Trial 62 finished with values: [833.5381808313716, 20.8] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 27, 'learning_rate': 7.868350466081661e-05, 'discount_factor': 0.9520648613208751, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.4607973625135947, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.6114700487544459, 'learn_after_episode': True, 'learning_steps': 80, 'n_epochs': 100}.


profit 833.5381808313716 n_trades 20.8


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [21:04<?, ?it/s]
[I 2024-11-24 04:43:14,967] Trial 63 finished with values: [852.8808497933377, 22.0] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 29, 'learning_rate': 5.3994402945733446e-05, 'discount_factor': 0.9649676181623641, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.44696443541609304, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9664188483660098, 'learn_after_episode': True, 'learning_steps': 24, 'n_epochs': 500}.


profit 852.8808497933377 n_trades 22.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:04<?, ?it/s]
[I 2024-11-24 04:49:44,362] Trial 64 finished with values: [823.7993104248608, 19.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 25, 'learning_rate': 6.147845569789206e-05, 'discount_factor': 0.9202570858318964, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.9323165167137539, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.61221167393525, 'learn_after_episode': True, 'learning_steps': 48, 'n_epochs': 100}.


profit 823.7993104248608 n_trades 19.2


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [22:55<?, ?it/s]
[I 2024-11-24 05:15:02,442] Trial 65 finished with values: [1168.4195301913246, 19.6] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 30, 'learning_rate': 4.6540099094478633e-05, 'discount_factor': 0.9337145604613526, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5516680508032002, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5786014494526321, 'learn_after_episode': True, 'learning_steps': 53, 'n_epochs': 500}.


profit 1168.4195301913246 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:36<?, ?it/s]
[I 2024-11-24 05:21:56,852] Trial 66 finished with values: [1157.2716904159802, 64.8] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 28, 'learning_rate': 3.502195725430407e-05, 'discount_factor': 0.9447058427910774, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.5996751853354005, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.6505399516357236, 'learn_after_episode': True, 'learning_steps': 72, 'n_epochs': 100}.


profit 1157.2716904159802 n_trades 64.8


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [04:20<?, ?it/s]
[I 2024-11-24 05:27:00,610] Trial 67 finished with values: [996.9691319692625, 0.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 9, 'learning_rate': 6.840494896068387e-05, 'discount_factor': 0.9898248091790173, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.5979312040841411, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6444628637435001, 'learn_after_episode': True, 'learning_steps': 71, 'n_epochs': 500}.


profit 996.9691319692625 n_trades 0.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:37<?, ?it/s]
[I 2024-11-24 05:28:39,252] Trial 68 finished with values: [1161.1871888667129, 24.4] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 11, 'learning_rate': 4.2541924530315144e-05, 'discount_factor': 0.980740879682152, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.750459198623217, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.6027098510715061, 'learn_after_episode': True, 'learning_steps': 62, 'n_epochs': 100}.


profit 1161.1871888667129 n_trades 24.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [01:20<?, ?it/s]
[I 2024-11-24 05:30:32,165] Trial 69 finished with values: [1157.9245719110224, 24.0] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 7, 'learning_rate': 4.173219499327548e-05, 'discount_factor': 0.9145406009512792, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.667415263444139, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.6042798559660101, 'learn_after_episode': True, 'learning_steps': 44, 'n_epochs': 100}.


profit 1157.9245719110224 n_trades 24.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [33:33<?, ?it/s]
[I 2024-11-24 06:21:31,873] Trial 70 finished with values: [1124.2038012419157, 57.6] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 12, 'learning_rate': 9.994938475858597e-05, 'discount_factor': 0.8336864743967694, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.7395977171467266, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.5572288352325703, 'learn_after_episode': True, 'learning_steps': 62, 'n_epochs': 500}.


profit 1124.2038012419157 n_trades 57.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:40<?, ?it/s]
[I 2024-11-24 06:22:42,131] Trial 71 finished with values: [844.8074385939465, 19.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 24, 'learning_rate': 3.647967918398868e-05, 'discount_factor': 0.9311649166700507, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.4243275697105597, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.6184682623400215, 'learn_after_episode': True, 'learning_steps': 56, 'n_epochs': 100}.


profit 844.8074385939465 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:45<?, ?it/s]
[I 2024-11-24 06:27:32,136] Trial 72 finished with values: [861.0456446515778, 18.4] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 15, 'learning_rate': 3.511835597144526e-05, 'discount_factor': 0.9538211749113432, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.8218150304503872, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6552470502666714, 'learn_after_episode': True, 'learning_steps': 33, 'n_epochs': 100}.


profit 861.0456446515778 n_trades 18.4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [03:59<?, ?it/s]
[I 2024-11-24 06:32:11,077] Trial 73 finished with values: [1137.6165801162842, 21.6] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 26, 'learning_rate': 4.740803615109907e-05, 'discount_factor': 0.9735310566590454, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.6072879827650348, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.5738994642463033, 'learn_after_episode': True, 'learning_steps': 59, 'n_epochs': 500}.


profit 1137.6165801162842 n_trades 21.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [04:36<?, ?it/s]
[I 2024-11-24 06:39:01,719] Trial 74 finished with values: [1131.034873053959, 20.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 5, 'learning_rate': 7.580132844491146e-05, 'discount_factor': 0.9249109300624732, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.33869346779465365, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9357036923693849, 'learn_after_episode': False, 'learning_steps': 16, 'n_epochs': 100}.


profit 1131.034873053959 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:43<?, ?it/s]
[I 2024-11-24 06:40:16,617] Trial 75 finished with values: [1176.1821558347476, 16.0] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 18, 'learning_rate': 9.200852179707517e-05, 'discount_factor': 0.8622515055316256, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.9853435253913974, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.6236916418342227, 'learn_after_episode': True, 'learning_steps': 85, 'n_epochs': 100}.


profit 1176.1821558347476 n_trades 16.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [15:18<?, ?it/s]
[I 2024-11-24 06:58:46,222] Trial 76 finished with values: [828.8791341466228, 18.4] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 10, 'learning_rate': 3.2794831219527655e-05, 'discount_factor': 0.9388516552757977, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.47387648520756015, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.5938840153795173, 'learn_after_episode': True, 'learning_steps': 53, 'n_epochs': 500}.


profit 828.8791341466228 n_trades 18.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [06:55<?, ?it/s]


profit 842.0800852959787 n_trades 23.2


[I 2024-11-24 07:07:55,393] Trial 77 finished with values: [842.0800852959787, 23.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 19, 'learning_rate': 8.548654702642195e-05, 'discount_factor': 0.8767245013507583, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5422001033996847, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7612426496029132, 'learn_after_episode': False, 'learning_steps': 40, 'n_epochs': 100}.
  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [01:21<?, ?it/s]
[I 2024-11-24 07:10:15,329] Trial 78 finished with values: [847.6689342457254, 15.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 21, 'learning_rate': 3.969501075928516e-05, 'discount_factor': 0.9812429615052772, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.6304376045606621, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.6415879073870578, 'learn_after_episode': True, 'learning_steps': 67, 'n_epochs': 100}.


profit 847.6689342457254 n_trades 15.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [08:58<?, ?it/s]
[I 2024-11-24 07:21:59,229] Trial 79 finished with values: [1149.223883487678, 21.2] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 25, 'learning_rate': 4.8704191448933915e-05, 'discount_factor': 0.9014233324332547, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.3994033508480812, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.5317191650054844, 'learn_after_episode': False, 'learning_steps': 46, 'n_epochs': 100}.


profit 1149.223883487678 n_trades 21.2


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [19:30<?, ?it/s]
[I 2024-11-24 07:43:47,575] Trial 80 finished with values: [838.0745651912723, 21.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 22, 'learning_rate': 4.4623964286277254e-05, 'discount_factor': 0.8138692914200744, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.765533425500144, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6622127712436382, 'learn_after_episode': True, 'learning_steps': 89, 'n_epochs': 500}.


profit 838.0745651912723 n_trades 21.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [01:09<?, ?it/s]
[I 2024-11-24 07:45:50,901] Trial 81 finished with values: [834.7923652044037, 20.0] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 24, 'learning_rate': 3.997582441484944e-05, 'discount_factor': 0.9140218245152117, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.23834133924011308, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.5988456791968896, 'learn_after_episode': True, 'learning_steps': 57, 'n_epochs': 100}.


profit 834.7923652044037 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:36<?, ?it/s]
[I 2024-11-24 07:52:13,718] Trial 82 finished with values: [530.6757656729593, 6261.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 8, 'learning_rate': 4.232514535603427e-05, 'discount_factor': 0.9654166988566071, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.9350127594607229, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6036362569632845, 'learn_after_episode': True, 'learning_steps': 44, 'n_epochs': 100}.


profit 530.6757656729593 n_trades 6261.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:58<?, ?it/s]
[I 2024-11-24 07:58:33,086] Trial 83 finished with values: [1133.7867315883516, 18.8] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 8, 'learning_rate': 4.196934364397506e-05, 'discount_factor': 0.9669267581683755, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.9425498213889303, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5668177567138764, 'learn_after_episode': True, 'learning_steps': 39, 'n_epochs': 100}.


profit 1133.7867315883516 n_trades 18.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:49<?, ?it/s]
[I 2024-11-24 08:04:03,128] Trial 84 finished with values: [1179.7477552120324, 19.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 7, 'learning_rate': 4.533902901442668e-05, 'discount_factor': 0.9752485886467858, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.8396774403684117, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.630593458478254, 'learn_after_episode': True, 'learning_steps': 35, 'n_epochs': 100}.


profit 1179.7477552120324 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:14<?, ?it/s]
[I 2024-11-24 08:10:19,329] Trial 85 finished with values: [851.3090442488007, 20.8] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 11, 'learning_rate': 5.064582368911989e-05, 'discount_factor': 0.9814295392042842, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.9237998919319803, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5931937305547624, 'learn_after_episode': True, 'learning_steps': 43, 'n_epochs': 100}.


profit 851.3090442488007 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:13<?, ?it/s]
[I 2024-11-24 08:16:11,337] Trial 86 finished with values: [834.3263327890281, 15.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 6, 'learning_rate': 3.7563225053149144e-05, 'discount_factor': 0.9597662217302834, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.8467414679888213, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5884633007996691, 'learn_after_episode': True, 'learning_steps': 50, 'n_epochs': 100}.


profit 834.3263327890281 n_trades 15.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:33<?, ?it/s]
[I 2024-11-24 08:21:34,434] Trial 87 finished with values: [1135.18265872526, 57.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 9, 'learning_rate': 5.397794359237387e-05, 'discount_factor': 0.9466278117182205, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.9513807501961099, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6126406115563512, 'learn_after_episode': True, 'learning_steps': 36, 'n_epochs': 100}.


profit 1135.18265872526 n_trades 57.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:01<?, ?it/s]
[I 2024-11-24 08:26:31,090] Trial 88 finished with values: [1165.9391403939555, 18.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 7, 'learning_rate': 3.39137747294214e-05, 'discount_factor': 0.957427593497891, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.7999902601571462, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5502656235712716, 'learn_after_episode': True, 'learning_steps': 48, 'n_epochs': 100}.


profit 1165.9391403939555 n_trades 18.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:47<?, ?it/s]
[I 2024-11-24 08:32:40,369] Trial 89 finished with values: [1132.3990127322081, 60.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 10, 'learning_rate': 4.31679010825245e-05, 'discount_factor': 0.9650541746754889, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.9060128398415831, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5840190035130799, 'learn_after_episode': True, 'learning_steps': 30, 'n_epochs': 100}.


profit 1132.3990127322081 n_trades 60.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:01<?, ?it/s]
[I 2024-11-24 08:38:20,024] Trial 90 finished with values: [854.2150966387155, 20.8] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 9, 'learning_rate': 4.013529716415057e-06, 'discount_factor': 0.9791922918438685, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.7695629340391269, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6433695423748748, 'learn_after_episode': True, 'learning_steps': 41, 'n_epochs': 100}.


profit 854.2150966387155 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:51<?, ?it/s]
[I 2024-11-24 08:44:05,982] Trial 91 finished with values: [1182.6541635566464, 18.4] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 13, 'learning_rate': 2.93832347926601e-05, 'discount_factor': 0.968924876519623, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.8530206744048681, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5740750669061936, 'learn_after_episode': True, 'learning_steps': 45, 'n_epochs': 100}.


profit 1182.6541635566464 n_trades 18.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:17<?, ?it/s]
[I 2024-11-24 08:49:58,603] Trial 92 finished with values: [1127.3446501981523, 23.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 7, 'learning_rate': 4.145904719192985e-05, 'discount_factor': 0.9428190085442331, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.8777806957009291, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6083655751007767, 'learn_after_episode': True, 'learning_steps': 47, 'n_epochs': 100}.


profit 1127.3446501981523 n_trades 23.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:45<?, ?it/s]
[I 2024-11-24 08:55:38,326] Trial 93 finished with values: [857.6622557070536, 23.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 11, 'learning_rate': 4.875713334448356e-05, 'discount_factor': 0.985171012522802, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.967621089068484, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6054860878426441, 'learn_after_episode': True, 'learning_steps': 44, 'n_epochs': 100}.


profit 857.6622557070536 n_trades 23.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:44<?, ?it/s]
[I 2024-11-24 09:01:52,400] Trial 94 finished with values: [866.4597002787149, 19.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 5, 'learning_rate': 3.84249869639019e-05, 'discount_factor': 0.9371910470999358, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.9155472669272477, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6245396477818033, 'learn_after_episode': True, 'learning_steps': 42, 'n_epochs': 100}.


profit 866.4597002787149 n_trades 19.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [04:25<?, ?it/s]
[I 2024-11-24 09:08:47,239] Trial 95 finished with values: [814.8582713025042, 22.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 8, 'learning_rate': 4.1870610511246216e-05, 'discount_factor': 0.949333351280633, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.7381892605107907, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.523679519269425, 'learn_after_episode': False, 'learning_steps': 52, 'n_epochs': 100}.


profit 814.8582713025042 n_trades 22.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:20<?, ?it/s]
[I 2024-11-24 09:15:08,955] Trial 96 finished with values: [846.9806559721806, 24.0] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 6, 'learning_rate': 3.6502577303012514e-05, 'discount_factor': 0.954240073451845, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.8909980545200563, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5627735530323585, 'learn_after_episode': True, 'learning_steps': 37, 'n_epochs': 100}.


profit 846.9806559721806 n_trades 24.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:40<?, ?it/s]
[I 2024-11-24 09:16:28,865] Trial 97 finished with values: [782.9800587938261, 20.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 6, 'learning_rate': 4.603219513884768e-05, 'discount_factor': 0.9287826081277953, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.9868767125087745, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.6771239691137769, 'learn_after_episode': True, 'learning_steps': 44, 'n_epochs': 100}.


profit 782.9800587938261 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:14<?, ?it/s]
[I 2024-11-24 09:21:59,704] Trial 98 finished with values: [1187.58085077912, 60.4] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 12, 'learning_rate': 5.2188235268080546e-05, 'discount_factor': 0.9623992040133226, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.6710603668114904, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8978216863599142, 'learn_after_episode': True, 'learning_steps': 55, 'n_epochs': 100}.


profit 1187.58085077912 n_trades 60.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:39<?, ?it/s]
[I 2024-11-24 09:27:48,544] Trial 99 finished with values: [1168.7038778158458, 19.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 12, 'learning_rate': 3.102028195688836e-05, 'discount_factor': 0.9713548826679157, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.8639359700460348, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5753386109889657, 'learn_after_episode': True, 'learning_steps': 55, 'n_epochs': 100}.


profit 1168.7038778158458 n_trades 19.2


In [20]:
# print(f"Best value: {study.best_value} (params: {study.best_params})")
best_trials=study.best_trials
best_trials

[FrozenTrial(number=45, state=TrialState.COMPLETE, values=[1191.6558529692093, 20.4], datetime_start=datetime.datetime(2024, 11, 24, 2, 9, 29, 249544), datetime_complete=datetime.datetime(2024, 11, 24, 2, 14, 53, 613837), params={'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 21, 'learning_rate': 2.8764743562927777e-05, 'discount_factor': 0.911339518183664, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.30917867193935245, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5255327383263214, 'learn_after_episode': True, 'learning_steps': 51, 'n_epochs': 100}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'reward_function': CategoricalDistribution(choices=(0, 1, 2)), 'algorithm': CategoricalDistribution(choices=('dqn', 'ddqn')), 'n_layers': IntDistribution(high=3, log=False, low=1, step=1), 'n_units': CategoricalDistribution(choices=(64, 128, 256, 512)), 'training_rounds': IntDistribution(h

In [21]:
best_trials=study.best_trials
best_trail=best_trials[3]
best_params=best_trail.params
reward_func=reward_functions[best_params.pop('reward_function')]
train_env.reward_func=reward_func
test_env.reward_func=reward_func

In [22]:
algo=best_params.pop('algorithm')

learning_params={'learn_after_episode':best_params.pop('learn_after_episode'),
                    'learning_steps':best_params.pop('learning_steps'),
                    'n_epochs':best_params.pop('n_epochs'),
                    }
best_params['hidden_dims']=make_hidden_dims(n_layers=best_params.pop('n_layers'),n_units=best_params.pop('n_units'))
best_params['lstm']=best_params.pop('lstm')
best_params['action_space_dim']=len(train_env.positions)
best_params['observation_space_dim']=train_env.observation_space.shape[0]
if algo=='dqn':
    agent=pearl_utils.create_dqn_model(**best_params)
elif algo=='ddqn':
    agent=pearl_utils.create_ddqn_model(**best_params)

best_params,learning_params

({'training_rounds': 28,
  'learning_rate': 3.502195725430407e-05,
  'discount_factor': 0.9447058427910774,
  'batch_size': 128,
  'target_update_freq': 24,
  'soft_update_tau': 0.5996751853354005,
  'is_conservative': True,
  'conservative_alpha': 0.6505399516357236,
  'hidden_dims': [128, 128],
  'lstm': True,
  'action_space_dim': 2,
  'observation_space_dim': 24},
 {'learn_after_episode': True, 'learning_steps': 72, 'n_epochs': 100})

In [23]:


agent=pearl_utils.train_pearl_model(agent,train_env,**learning_params)

profit,n_trades=pearl_utils.test_pearl_model(agent,test_env)

agent=pearl_utils.train_pearl_model(agent,test_env,**learning_params)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [05:14<?, ?it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [05:42<?, ?it/s]


In [24]:
import torch

In [25]:
torch.load

<function torch.serialization.load(f: Union[str, os.PathLike, BinaryIO, IO[bytes]], map_location: Union[Callable[[torch.types.Storage, str], torch.types.Storage], torch.device, str, Dict[str, str], NoneType] = None, pickle_module: Any = None, *, weights_only: Optional[bool] = None, mmap: Optional[bool] = None, **pickle_load_args: Any) -> Any>