In [4]:
import warnings
import logging


warnings.filterwarnings('ignore',category=FutureWarning)
warnings.simplefilter('ignore')

In [5]:
from utils import make_environments
from utils import pearl_utils
from configs import defaults
from utils.reward_functions import log_reward_function,cumulative_reward_function,sharpe_reward_function
from utils. utils import make_hidden_dims
import optuna
from optuna.samplers import TPESampler

from neuralforecast.core import NeuralForecast
from Pearl.pearl.utils.instantiations.environments.gym_environment import GymEnvironment
from Pearl.pearl.utils.functional_utils.train_and_eval.online_learning import \
    online_learning
import datetime

In [6]:
reward_functions=[log_reward_function,cumulative_reward_function,sharpe_reward_function]
train_env,test_env=make_environments.make_envs(reward_function=log_reward_function)


Seed set to 12
Seed set to 5


['data_forex/oanda-USDJPY-1h.pkl']


100%|██████████| 78/78 [00:00<00:00, 12336.19it/s]
1it [00:00,  9.37it/s]
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

2024-11-04 16:00:00 2024-12-04 16:00:00


In [7]:
sp=train_env.positions
train_env.action_space.n,train_env.name,train_env.observation_space.shape,

(np.int64(2), 'USDJPY_train', (24,))

In [8]:
make_hidden_dims(n_layers=3, n_units=64)

[64, 64, 64]

In [9]:
agent=pearl_utils.create_ddqn_model(

        observation_space_dim=train_env.observation_space.shape[0], 
        action_space_dim=train_env.action_space.n,
        hidden_dims=[64,64, 64], 
        training_rounds=20,
        learning_rate = 0.001,
        discount_factor = 0.99,
        batch_size = 128,
        target_update_freq = 10,
        soft_update_tau = 0.75,  # a value of 1 indicates no soft updates
        is_conservative = False,
        conservative_alpha = False,
        replay_buffer_size = 10_000,
        lstm=False)

In [10]:
test_env.observation_space.shape,train_env.action_space.n

((24,), np.int64(2))

In [11]:
env=GymEnvironment(train_env)

obs,action_space=env.reset()
agent.reset(   obs, action_space)

In [12]:
# done = False
# while not done:
#     action = agent.act(exploit=False)
#     action_result = env.step(action)
    
#     agent.observe(action_result)
#     loss=agent.learn()

#     done = action_result.done

In [13]:
info = online_learning(
        agent=agent,
        env=env,
        # number_of_episodes=10,
        number_of_steps=168,
        print_every_x_episodes=2,   # print returns after every 10 episdoes
        print_every_x_steps=1,   # print returns after every 10 episdoes
        learn_every_k_steps=20,   # print returns after every 10 episdoes
        learn_after_episode=False,
        record_period=169,   # instead of updating after every environment interaction, Q networks are updates at the end of each episode
        seed=0
    )

  0%|          | 0/168 [00:00<?, ?it/s]

In [20]:


def objective_function(trial):
 
    reward_id=trial.suggest_categorical('reward_function', [0,1,2])
    algo=trial.suggest_categorical('algorithm', ['dqn','ddqn'])    

    # reward_id=0
    
    reward_func=reward_functions[reward_id]
    train_env.reward_func=reward_func
    test_env.reward_func=reward_func
    
    observation_space_dim=train_env.observation_space.shape[0]
    action_space_dim=len(train_env.positions)
    n_layers=trial.suggest_int('n_layers', 1, 3)
    n_units=trial.suggest_categorical('n_units', [64,128,256,512])
    
    hidden_dims=make_hidden_dims(n_layers= n_layers, n_units=n_units)
    
    search_space={
                'observation_space_dim': observation_space_dim,
                'action_space_dim': action_space_dim,
                'hidden_dims': hidden_dims,
                'training_rounds': trial.suggest_int('training_rounds', 5, 30),
                'learning_rate': trial.suggest_float('learning_rate', 1e-6, 1e-4),
                'discount_factor': trial.suggest_float('discount_factor', 0.8, 0.99), # gamma (greediness)
                'batch_size': trial.suggest_categorical('batch_size', [64, 128]),
                'target_update_freq': trial.suggest_categorical('target_update_freq', [1, 5, 10, 24]),
                'soft_update_tau': trial.suggest_float('soft_update_tau', 0.1, .99), 
                'is_conservative': trial.suggest_categorical('is_conservative', [True, False]),
                'lstm': trial.suggest_categorical('lstm', [True, False]),
                'conservative_alpha': trial.suggest_float('conservative_alpha', 0.5, 1.0),
                'replay_buffer_size': trial.suggest_categorical('replay_buffer_size', [1_000,10_000,50_000,100_000,200_000]),

                }

    learning_space={'learn_after_episode':trial.suggest_categorical('learn_after_episode', [True, False]),
                    'learning_steps':trial.suggest_int('learning_steps', 10, 89),
                    'n_epochs':trial.suggest_categorical('n_epochs',[100,500]),
                    }
    #
    # print('n_epochs',n_epochs)
    if algo=='dqn':
        agent=pearl_utils.create_dqn_model(**search_space)
    elif algo=='ddqn':
        agent=pearl_utils.create_ddqn_model(**search_space)

        
    agent=pearl_utils.train_pearl_model(agent,train_env,**learning_space)
    profit,n_trades=pearl_utils.test_pearl_model(agent,test_env)
    objectives={'profit':profit,'n_trades':n_trades}

    print('profit',profit,'n_trades',n_trades)

    return profit,n_trades

In [21]:
model_name=defaults.model_name
model_name

study_name=f"{defaults.model_name}"
storage_name="sqlite:///PearlHPTuning.sqlite3"

In [22]:
from optuna import create_study,delete_study

In [23]:
# study=delete_study(study_name=study_name, 
#              storage=storage_name, )

In [24]:

study=create_study(study_name=study_name, 
             storage=storage_name, 
             load_if_exists=True,
             directions=['maximize','maximize'],
             sampler=TPESampler()
             )

[I 2024-12-04 15:53:06,449] A new study created in RDB with name: USDJPY


In [25]:
study.optimize(objective_function, n_trials=100)


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [08:23<?, ?it/s]
Profit: 1150.5957618769667, Number of Trades: 18.4: 100%|██████████| 100/100 [00:35<00:00,  2.82it/s]             
[I 2024-12-04 16:02:05,680] Trial 0 finished with values: [1150.5957618769667, 18.4] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 27, 'learning_rate': 6.64070527729143e-05, 'discount_factor': 0.9561062047837683, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.9516759849416108, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9487301482144148, 'replay_buffer_size': 2851, 'learn_after_episode': False, 'learning_steps': 54, 'n_epochs': 500}.


profit 1150.5957618769667 n_trades 18.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:24<?, ?it/s]
Profit: 998.5133724109462, Number of Trades: 0.0: 100%|██████████| 100/100 [00:35<00:00,  2.84it/s]
[I 2024-12-04 16:04:05,860] Trial 1 finished with values: [998.5133724109462, 0.0] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 256, 'training_rounds': 15, 'learning_rate': 2.1156008464906055e-05, 'discount_factor': 0.8875167453148376, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.3554511744918401, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.524259092314554, 'replay_buffer_size': 4999, 'learn_after_episode': False, 'learning_steps': 37, 'n_epochs': 100}.


profit 998.5133724109462 n_trades 0.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [12:32<?, ?it/s]
Profit: 801.1383947704479, Number of Trades: 17.6: 100%|██████████| 100/100 [00:36<00:00,  2.72it/s]             
[I 2024-12-04 16:17:15,529] Trial 2 finished with values: [801.1383947704479, 17.6] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 16, 'learning_rate': 9.507445554614118e-05, 'discount_factor': 0.9674711379208872, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.49286068735237626, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.7740092775390208, 'replay_buffer_size': 2790, 'learn_after_episode': True, 'learning_steps': 74, 'n_epochs': 100}.


profit 801.1383947704479 n_trades 17.6


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [1:37:20<?, ?it/s]
Profit: 853.5659968934291, Number of Trades: 18.8: 100%|██████████| 100/100 [02:07<00:00,  1.27s/it]             
[I 2024-12-04 17:56:43,971] Trial 3 finished with values: [853.5659968934291, 18.8] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 25, 'learning_rate': 7.120630556183288e-06, 'discount_factor': 0.9851102320276353, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.7210799177334739, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7026410914481382, 'replay_buffer_size': 2483, 'learn_after_episode': False, 'learning_steps': 10, 'n_epochs': 500}.


profit 853.5659968934291 n_trades 18.8


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [13:44<?, ?it/s]
Profit: 1176.5414771339817, Number of Trades: 16.8: 100%|██████████| 100/100 [02:06<00:00,  1.26s/it]             
[I 2024-12-04 18:12:34,344] Trial 4 finished with values: [1176.5414771339817, 16.8] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 11, 'learning_rate': 6.5825695929946e-05, 'discount_factor': 0.8045411014719126, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.15455216049908868, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.9963697750584546, 'replay_buffer_size': 717, 'learn_after_episode': True, 'learning_steps': 88, 'n_epochs': 500}.


profit 1176.5414771339817 n_trades 16.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:33<?, ?it/s]
Profit: 989.4892346207859, Number of Trades: 0.0: 100%|██████████| 100/100 [00:36<00:00,  2.77it/s]
[I 2024-12-04 18:14:44,197] Trial 5 finished with values: [989.4892346207859, 0.0] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 12, 'learning_rate': 3.549057737579175e-05, 'discount_factor': 0.8099464454489417, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.7682689402298634, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.8330276502550737, 'replay_buffer_size': 4545, 'learn_after_episode': False, 'learning_steps': 19, 'n_epochs': 100}.


profit 989.4892346207859 n_trades 0.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:00<?, ?it/s]
Profit: 841.5092663862898, Number of Trades: 18.0: 100%|██████████| 100/100 [00:34<00:00,  2.89it/s]             
[I 2024-12-04 18:16:19,788] Trial 6 finished with values: [841.5092663862898, 18.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 21, 'learning_rate': 6.886310828844766e-05, 'discount_factor': 0.8202405625077188, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.801943345283525, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.8567301349885983, 'replay_buffer_size': 4544, 'learn_after_episode': False, 'learning_steps': 85, 'n_epochs': 100}.


profit 841.5092663862898 n_trades 18.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [04:22<?, ?it/s]
Profit: 769.9717173252246, Number of Trades: 22.4: 100%|██████████| 100/100 [00:38<00:00,  2.62it/s]             
[I 2024-12-04 18:21:20,751] Trial 7 finished with values: [769.9717173252246, 22.4] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 256, 'training_rounds': 20, 'learning_rate': 4.913771212298313e-05, 'discount_factor': 0.955876443729569, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.42248655622452674, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.8274630603516491, 'replay_buffer_size': 628, 'learn_after_episode': False, 'learning_steps': 11, 'n_epochs': 100}.


profit 769.9717173252246 n_trades 22.4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [14:44<?, ?it/s]
Profit: 828.906255203153, Number of Trades: 20.4: 100%|██████████| 100/100 [02:01<00:00,  1.22s/it]              
[I 2024-12-04 18:38:06,911] Trial 8 finished with values: [828.906255203153, 20.4] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 19, 'learning_rate': 5.727937759999454e-06, 'discount_factor': 0.8405487713895176, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.9704222411587171, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5636202595773241, 'replay_buffer_size': 4009, 'learn_after_episode': True, 'learning_steps': 85, 'n_epochs': 500}.


profit 828.906255203153 n_trades 20.4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [17:53<?, ?it/s]
Profit: 819.9485406062594, Number of Trades: 20.4: 100%|██████████| 100/100 [02:05<00:00,  1.25s/it]             
[I 2024-12-04 18:58:05,920] Trial 9 finished with values: [819.9485406062594, 20.4] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 256, 'training_rounds': 30, 'learning_rate': 1.2725154685834103e-05, 'discount_factor': 0.9262485802025231, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.4375419393408185, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.7868957575284544, 'replay_buffer_size': 3684, 'learn_after_episode': True, 'learning_steps': 66, 'n_epochs': 500}.


profit 819.9485406062594 n_trades 20.4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [09:35<?, ?it/s]
Profit: 1172.3098362436463, Number of Trades: 19.6: 100%|██████████| 100/100 [00:36<00:00,  2.77it/s]             
[I 2024-12-04 19:08:17,445] Trial 10 finished with values: [1172.3098362436463, 19.6] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 30, 'learning_rate': 9.651147115345701e-05, 'discount_factor': 0.8999156527488265, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.9638377206292701, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9883264133469607, 'replay_buffer_size': 1825, 'learn_after_episode': False, 'learning_steps': 54, 'n_epochs': 500}.


profit 1172.3098362436463 n_trades 19.6


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [05:29<?, ?it/s]
Profit: 839.4908255046281, Number of Trades: 22.0: 100%|██████████| 100/100 [00:37<00:00,  2.65it/s]             
[I 2024-12-04 19:14:25,076] Trial 11 finished with values: [839.4908255046281, 22.0] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 6, 'learning_rate': 9.03511425099215e-05, 'discount_factor': 0.8902641008104727, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.6512115151505163, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9080255302828331, 'replay_buffer_size': 1012, 'learn_after_episode': False, 'learning_steps': 37, 'n_epochs': 500}.


profit 839.4908255046281 n_trades 22.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [05:20<?, ?it/s]
Profit: 1158.6009410501379, Number of Trades: 19.2: 100%|██████████| 100/100 [00:37<00:00,  2.67it/s]             
[I 2024-12-04 19:20:22,927] Trial 12 finished with values: [1158.6009410501379, 19.2] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 6, 'learning_rate': 9.460649153790751e-05, 'discount_factor': 0.8801588605175205, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.6342548006860199, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9350665987254563, 'replay_buffer_size': 1468, 'learn_after_episode': False, 'learning_steps': 39, 'n_epochs': 500}.


profit 1158.6009410501379 n_trades 19.2


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [04:23<?, ?it/s]
Profit: 1213.0898530705067, Number of Trades: 18.4: 100%|██████████| 100/100 [00:36<00:00,  2.75it/s]             
[I 2024-12-04 19:25:22,840] Trial 13 finished with values: [1213.0898530705067, 18.4] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 6, 'learning_rate': 8.089977724562876e-05, 'discount_factor': 0.9127903672493933, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.8890254491500227, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9065113385368218, 'replay_buffer_size': 1706, 'learn_after_episode': False, 'learning_steps': 52, 'n_epochs': 500}.


profit 1213.0898530705067 n_trades 18.4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [11:21<?, ?it/s]
Profit: 1171.8025648528414, Number of Trades: 17.2: 100%|██████████| 100/100 [00:37<00:00,  2.66it/s]             
[I 2024-12-04 19:37:22,259] Trial 14 finished with values: [1171.8025648528414, 17.2] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 24, 'learning_rate': 8.275359110580089e-05, 'discount_factor': 0.8563168174668079, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.6294745269790548, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9993104898758396, 'replay_buffer_size': 1725, 'learn_after_episode': False, 'learning_steps': 33, 'n_epochs': 500}.


profit 1171.8025648528414 n_trades 17.2


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [09:43<?, ?it/s]
Profit: 1162.6876258961036, Number of Trades: 19.2: 100%|██████████| 100/100 [00:43<00:00,  2.28it/s]             
[I 2024-12-04 19:47:49,382] Trial 15 finished with values: [1162.6876258961036, 19.2] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 29, 'learning_rate': 8.308257779690094e-05, 'discount_factor': 0.9202028582250369, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.24867282732129525, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6738727449522297, 'replay_buffer_size': 251, 'learn_after_episode': False, 'learning_steps': 65, 'n_epochs': 500}.


profit 1162.6876258961036 n_trades 19.2


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [14:13<?, ?it/s]
Profit: 838.2296508571137, Number of Trades: 18.4: 100%|██████████| 100/100 [02:11<00:00,  1.31s/it]             
[I 2024-12-04 20:04:13,918] Trial 16 finished with values: [838.2296508571137, 18.4] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 11, 'learning_rate': 9.828778658047002e-05, 'discount_factor': 0.8652510875774795, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.8429469975428692, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.8898956816745849, 'replay_buffer_size': 2298, 'learn_after_episode': True, 'learning_steps': 44, 'n_epochs': 500}.


profit 838.2296508571137 n_trades 18.4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [12:28<?, ?it/s]
Profit: 1193.0121812748591, Number of Trades: 19.6: 100%|██████████| 100/100 [00:35<00:00,  2.82it/s]             
[I 2024-12-04 20:17:17,991] Trial 17 finished with values: [1193.0121812748591, 19.6] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 24, 'learning_rate': 5.282761316402105e-05, 'discount_factor': 0.9019578704429577, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.6156146138601478, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6131158464420793, 'replay_buffer_size': 1168, 'learn_after_episode': False, 'learning_steps': 30, 'n_epochs': 500}.


profit 1193.0121812748591 n_trades 19.6


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [11:03<?, ?it/s]
Profit: 817.6282212388323, Number of Trades: 24.4: 100%|██████████| 100/100 [00:34<00:00,  2.89it/s]             
[I 2024-12-04 20:28:55,700] Trial 18 finished with values: [817.6282212388323, 24.4] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 23, 'learning_rate': 4.43332219575096e-05, 'discount_factor': 0.9315971574852383, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.6076725536601836, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6115864438131092, 'replay_buffer_size': 1037, 'learn_after_episode': False, 'learning_steps': 27, 'n_epochs': 500}.


profit 817.6282212388323 n_trades 24.4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [11:06<?, ?it/s]
Profit: 1019.5896706366425, Number of Trades: 0.0: 100%|██████████| 100/100 [00:35<00:00,  2.83it/s]
[I 2024-12-04 20:40:37,143] Trial 19 finished with values: [1019.5896706366425, 0.0] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 23, 'learning_rate': 4.514618491027097e-05, 'discount_factor': 0.935899603333052, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5529011094665981, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6045515901376467, 'replay_buffer_size': 1139, 'learn_after_episode': False, 'learning_steps': 26, 'n_epochs': 500}.


profit 1019.5896706366425 n_trades 0.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:09<?, ?it/s]
Profit: 676.7714633162718, Number of Trades: 2196.0: 100%|██████████| 100/100 [02:08<00:00,  1.29s/it]           
[I 2024-12-04 20:45:54,919] Trial 20 finished with values: [676.7714633162718, 2196.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 26, 'learning_rate': 3.318937794355847e-05, 'discount_factor': 0.943156240955047, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.5544717341157553, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6405695182036979, 'replay_buffer_size': 273, 'learn_after_episode': True, 'learning_steps': 26, 'n_epochs': 100}.


profit 676.7714633162718 n_trades 2196.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:03<?, ?it/s]
Profit: 802.3218935652501, Number of Trades: 17.6: 100%|██████████| 100/100 [02:06<00:00,  1.26s/it]             
[I 2024-12-04 20:52:04,938] Trial 21 finished with values: [802.3218935652501, 17.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 27, 'learning_rate': 3.0482719711352446e-05, 'discount_factor': 0.9503356366908214, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.31237610689945505, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6443221930697802, 'replay_buffer_size': 293, 'learn_after_episode': True, 'learning_steps': 25, 'n_epochs': 100}.


profit 802.3218935652501 n_trades 17.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:59<?, ?it/s]
Profit: 1111.956900163713, Number of Trades: 57.2: 100%|██████████| 100/100 [02:06<00:00,  1.26s/it]              
[I 2024-12-04 20:58:10,385] Trial 22 finished with values: [1111.956900163713, 57.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 26, 'learning_rate': 3.540572491596491e-05, 'discount_factor': 0.9840201384599606, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.558492721714105, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7319424774838178, 'replay_buffer_size': 674, 'learn_after_episode': True, 'learning_steps': 22, 'n_epochs': 100}.


profit 1111.956900163713 n_trades 57.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:03<?, ?it/s]
Profit: 1175.5662732639043, Number of Trades: 22.0: 100%|██████████| 100/100 [02:05<00:00,  1.25s/it]             
[I 2024-12-04 21:04:19,215] Trial 23 finished with values: [1175.5662732639043, 22.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 27, 'learning_rate': 2.833393055485657e-05, 'discount_factor': 0.9816107484154667, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.5179765810584458, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7232981831437858, 'replay_buffer_size': 222, 'learn_after_episode': True, 'learning_steps': 18, 'n_epochs': 100}.


profit 1175.5662732639043 n_trades 22.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:03<?, ?it/s]
Profit: 1155.4950017267067, Number of Trades: 60.4: 100%|██████████| 100/100 [02:05<00:00,  1.25s/it]             
[I 2024-12-04 21:10:28,135] Trial 24 finished with values: [1155.4950017267067, 60.4] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 27, 'learning_rate': 2.4762447589798305e-05, 'discount_factor': 0.986225305621527, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5042013854773669, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7217810056026264, 'replay_buffer_size': 589, 'learn_after_episode': True, 'learning_steps': 18, 'n_epochs': 100}.


profit 1155.4950017267067 n_trades 60.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:38<?, ?it/s]
Profit: 1129.0502962636851, Number of Trades: 59.2: 100%|██████████| 100/100 [02:05<00:00,  1.26s/it]             
[I 2024-12-04 21:16:12,641] Trial 25 finished with values: [1129.0502962636851, 59.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 22, 'learning_rate': 1.7379949750062583e-05, 'discount_factor': 0.9423632090058889, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.704334934673816, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5015634113021012, 'replay_buffer_size': 3319, 'learn_after_episode': True, 'learning_steps': 32, 'n_epochs': 100}.


profit 1129.0502962636851 n_trades 59.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:05<?, ?it/s]
Profit: 1184.9763305563604, Number of Trades: 18.8: 100%|██████████| 100/100 [02:05<00:00,  1.26s/it]             
[I 2024-12-04 21:22:24,180] Trial 26 finished with values: [1184.9763305563604, 18.8] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 28, 'learning_rate': 5.945387378938856e-05, 'discount_factor': 0.904559971991918, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.4145733201776095, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6586518666979742, 'replay_buffer_size': 1226, 'learn_after_episode': True, 'learning_steps': 16, 'n_epochs': 100}.


profit 1184.9763305563604 n_trades 18.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:55<?, ?it/s]
Profit: 1189.9814472863907, Number of Trades: 22.0: 100%|██████████| 100/100 [02:10<00:00,  1.30s/it]             
[I 2024-12-04 21:27:30,194] Trial 27 finished with values: [1189.9814472863907, 22.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 18, 'learning_rate': 3.8026665405734146e-05, 'discount_factor': 0.971347866986459, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.1025736153224488, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5879522364923437, 'replay_buffer_size': 613, 'learn_after_episode': True, 'learning_steps': 45, 'n_epochs': 100}.


profit 1189.9814472863907 n_trades 22.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:29<?, ?it/s]
Profit: 840.2583670773945, Number of Trades: 61.6: 100%|██████████| 100/100 [02:08<00:00,  1.28s/it]             
[I 2024-12-04 21:33:07,898] Trial 28 finished with values: [840.2583670773945, 61.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 17, 'learning_rate': 2.435766121250477e-05, 'discount_factor': 0.9730967187165045, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.3196302446305792, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5546305911875637, 'replay_buffer_size': 546, 'learn_after_episode': True, 'learning_steps': 47, 'n_epochs': 100}.


profit 840.2583670773945 n_trades 61.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:42<?, ?it/s]
Profit: 843.1172645080338, Number of Trades: 20.0: 100%|██████████| 100/100 [02:07<00:00,  1.27s/it]             
[I 2024-12-04 21:37:58,272] Trial 29 finished with values: [843.1172645080338, 20.0] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 8, 'learning_rate': 2.381214159483307e-05, 'discount_factor': 0.9614550540439175, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.2604905034588782, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6850111170726298, 'replay_buffer_size': 2043, 'learn_after_episode': True, 'learning_steps': 61, 'n_epochs': 100}.


profit 843.1172645080338 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:46<?, ?it/s]
Profit: 1189.5226266630648, Number of Trades: 17.2: 100%|██████████| 100/100 [02:08<00:00,  1.29s/it]             
[I 2024-12-04 21:42:53,556] Trial 30 finished with values: [1189.5226266630648, 17.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 15, 'learning_rate': 5.6278642146666304e-05, 'discount_factor': 0.8358040756450258, 'batch_size': 128, 'target_update_freq': 10, 'soft_update_tau': 0.3706012566060488, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5683352575804606, 'replay_buffer_size': 3217, 'learn_after_episode': True, 'learning_steps': 47, 'n_epochs': 100}.


profit 1189.5226266630648 n_trades 17.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:49<?, ?it/s]
Profit: 1160.1952523604582, Number of Trades: 16.4: 100%|██████████| 100/100 [02:08<00:00,  1.28s/it]             
[I 2024-12-04 21:47:51,487] Trial 31 finished with values: [1160.1952523604582, 16.4] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 17, 'learning_rate': 3.861958094853983e-05, 'discount_factor': 0.9697687981221642, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.48310365874110106, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6356954767480933, 'replay_buffer_size': 604, 'learn_after_episode': True, 'learning_steps': 45, 'n_epochs': 100}.


profit 1160.1952523604582 n_trades 16.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:58<?, ?it/s]
Profit: 799.3107673245111, Number of Trades: 19.2: 100%|██████████| 100/100 [02:09<00:00,  1.29s/it]             
[I 2024-12-04 21:52:59,303] Trial 32 finished with values: [799.3107673245111, 19.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 19, 'learning_rate': 2.67701490252057e-05, 'discount_factor': 0.8749212858144607, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.6980241797704638, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5637677470609574, 'replay_buffer_size': 459, 'learn_after_episode': True, 'learning_steps': 40, 'n_epochs': 100}.


profit 799.3107673245111 n_trades 19.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:08<?, ?it/s]
Profit: 1230.796288138875, Number of Trades: 20.4: 100%|██████████| 100/100 [02:07<00:00,  1.28s/it]              
[I 2024-12-04 21:58:15,483] Trial 33 finished with values: [1230.796288138875, 20.4] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 256, 'training_rounds': 14, 'learning_rate': 1.6445163278510933e-05, 'discount_factor': 0.9726877965533602, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.13448361852886265, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7588554781068245, 'replay_buffer_size': 913, 'learn_after_episode': True, 'learning_steps': 73, 'n_epochs': 100}.


profit 1230.796288138875 n_trades 20.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:01<?, ?it/s]
Profit: 794.4140161426453, Number of Trades: 19.6: 100%|██████████| 100/100 [02:06<00:00,  1.27s/it]             
[I 2024-12-04 22:03:24,188] Trial 34 finished with values: [794.4140161426453, 19.6] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 13, 'learning_rate': 1.7426077357097418e-05, 'discount_factor': 0.9457711037012931, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.4730792577208504, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7669355243441842, 'replay_buffer_size': 870, 'learn_after_episode': True, 'learning_steps': 74, 'n_epochs': 100}.


profit 794.4140161426453 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:07<?, ?it/s]
Profit: 833.5369837238903, Number of Trades: 20.4: 100%|██████████| 100/100 [02:07<00:00,  1.28s/it]             
[I 2024-12-04 22:08:39,585] Trial 35 finished with values: [833.5369837238903, 20.4] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 14, 'learning_rate': 4.3452569381339085e-05, 'discount_factor': 0.9120546711599813, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.5414924387388023, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5372224209633674, 'replay_buffer_size': 1507, 'learn_after_episode': True, 'learning_steps': 58, 'n_epochs': 100}.


profit 833.5369837238903 n_trades 20.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:43<?, ?it/s]
Profit: 1153.3566740212302, Number of Trades: 18.0: 100%|██████████| 100/100 [02:09<00:00,  1.30s/it]             
[I 2024-12-04 22:13:33,314] Trial 36 finished with values: [1153.3566740212302, 18.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 9, 'learning_rate': 1.1588466069053073e-05, 'discount_factor': 0.9746141932942525, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.7597115931591706, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7017687891253915, 'replay_buffer_size': 4961, 'learn_after_episode': True, 'learning_steps': 15, 'n_epochs': 100}.


profit 1153.3566740212302 n_trades 18.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:57<?, ?it/s]
Profit: 1147.468762389757, Number of Trades: 58.8: 100%|██████████| 100/100 [02:06<00:00,  1.27s/it]              
[I 2024-12-04 22:19:37,676] Trial 37 finished with values: [1147.468762389757, 58.8] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 26, 'learning_rate': 6.902051211220322e-05, 'discount_factor': 0.9594753016000518, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.5742507700734716, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6391618025684621, 'replay_buffer_size': 2855, 'learn_after_episode': True, 'learning_steps': 21, 'n_epochs': 100}.


profit 1147.468762389757 n_trades 58.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:15<?, ?it/s]
Profit: 806.6238683643476, Number of Trades: 20.8: 100%|██████████| 100/100 [02:06<00:00,  1.26s/it]             
[I 2024-12-04 22:24:59,479] Trial 38 finished with values: [806.6238683643476, 20.8] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 16, 'learning_rate': 3.2567741331150155e-05, 'discount_factor': 0.9391656462179565, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.8914173735359956, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7458005732861294, 'replay_buffer_size': 417, 'learn_after_episode': True, 'learning_steps': 77, 'n_epochs': 100}.


profit 806.6238683643476 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:50<?, ?it/s]
Profit: 1190.676702335382, Number of Trades: 20.0: 100%|██████████| 100/100 [02:08<00:00,  1.28s/it]              
[I 2024-12-04 22:30:58,458] Trial 39 finished with values: [1190.676702335382, 20.0] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 64, 'training_rounds': 21, 'learning_rate': 2.0418414792594776e-05, 'discount_factor': 0.8441163411385325, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.33927709184805266, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7972801389904007, 'replay_buffer_size': 838, 'learn_after_episode': True, 'learning_steps': 13, 'n_epochs': 100}.


profit 1190.676702335382 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:53<?, ?it/s]
Profit: 807.0533903635168, Number of Trades: 17.6: 100%|██████████| 100/100 [02:06<00:00,  1.27s/it]             
[I 2024-12-04 22:36:58,922] Trial 40 finished with values: [807.0533903635168, 17.6] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 25, 'learning_rate': 6.343716834064964e-05, 'discount_factor': 0.822368606042356, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.19809962043523466, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7003073453122883, 'replay_buffer_size': 4032, 'learn_after_episode': True, 'learning_steps': 34, 'n_epochs': 100}.


profit 807.0533903635168 n_trades 17.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:17<?, ?it/s]
Profit: 793.1110582362178, Number of Trades: 20.8: 100%|██████████| 100/100 [02:04<00:00,  1.24s/it]             
[I 2024-12-04 22:43:20,978] Trial 41 finished with values: [793.1110582362178, 20.8] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 28, 'learning_rate': 3.8241150666393744e-05, 'discount_factor': 0.9884162013076776, 'batch_size': 128, 'target_update_freq': 24, 'soft_update_tau': 0.1106103311779521, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5839353791674655, 'replay_buffer_size': 469, 'learn_after_episode': True, 'learning_steps': 42, 'n_epochs': 100}.


profit 793.1110582362178 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:25<?, ?it/s]
Profit: 1187.3073357217638, Number of Trades: 19.2: 100%|██████████| 100/100 [02:05<00:00,  1.26s/it]             
[I 2024-12-04 22:48:52,566] Trial 42 finished with values: [1187.3073357217638, 19.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 19, 'learning_rate': 2.414391445052139e-05, 'discount_factor': 0.9202166644433489, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.46186899290951533, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6748774401522325, 'replay_buffer_size': 726, 'learn_after_episode': True, 'learning_steps': 50, 'n_epochs': 100}.


profit 1187.3073357217638 n_trades 19.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [04:26<?, ?it/s]
Profit: 1183.5169587297605, Number of Trades: 19.2: 100%|██████████| 100/100 [02:04<00:00,  1.24s/it]             
[I 2024-12-04 22:55:23,724] Trial 43 finished with values: [1183.5169587297605, 19.2] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 30, 'learning_rate': 7.362405741972098e-05, 'discount_factor': 0.800825736940172, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.5090570669658702, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8119472950622035, 'replay_buffer_size': 216, 'learn_after_episode': True, 'learning_steps': 29, 'n_epochs': 100}.


profit 1183.5169587297605 n_trades 19.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:24<?, ?it/s]
Profit: 828.3060785804465, Number of Trades: 23.2: 100%|██████████| 100/100 [02:07<00:00,  1.27s/it]             
[I 2024-12-04 23:00:56,149] Trial 44 finished with values: [828.3060785804465, 23.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 64, 'training_rounds': 18, 'learning_rate': 4.030499707395994e-05, 'discount_factor': 0.8811731657224059, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.6719495042155054, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5954113130746119, 'replay_buffer_size': 545, 'learn_after_episode': True, 'learning_steps': 10, 'n_epochs': 100}.


profit 828.3060785804465 n_trades 23.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:31<?, ?it/s]
Profit: 829.4109380240317, Number of Trades: 20.0: 100%|██████████| 100/100 [02:07<00:00,  1.28s/it]             
[I 2024-12-04 23:05:35,670] Trial 45 finished with values: [829.4109380240317, 20.0] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 9, 'learning_rate': 4.7461128489099757e-05, 'discount_factor': 0.9728689799956185, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.3909701005821091, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5376473330028846, 'replay_buffer_size': 831, 'learn_after_episode': True, 'learning_steps': 36, 'n_epochs': 100}.


profit 829.4109380240317 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:51<?, ?it/s]
Profit: 813.765462915384, Number of Trades: 16.4: 100%|██████████| 100/100 [00:35<00:00,  2.81it/s]              
[I 2024-12-04 23:08:03,084] Trial 46 finished with values: [813.765462915384, 16.4] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 22, 'learning_rate': 3.001935679770176e-06, 'discount_factor': 0.8930572832968131, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.5957348894876235, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.857557456088728, 'replay_buffer_size': 1439, 'learn_after_episode': False, 'learning_steps': 23, 'n_epochs': 100}.


profit 813.765462915384 n_trades 16.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:03<?, ?it/s]
Profit: 1158.0619355628507, Number of Trades: 18.4: 100%|██████████| 100/100 [02:09<00:00,  1.30s/it]             
[I 2024-12-04 23:13:16,465] Trial 47 finished with values: [1158.0619355628507, 18.4] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 256, 'training_rounds': 21, 'learning_rate': 3.351488757935667e-05, 'discount_factor': 0.9279933429315902, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.6691803538116929, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.763252063089754, 'replay_buffer_size': 4438, 'learn_after_episode': True, 'learning_steps': 89, 'n_epochs': 100}.


profit 1158.0619355628507 n_trades 18.4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [04:14<?, ?it/s]
Profit: 1002.8065761743893, Number of Trades: 22.0: 100%|██████████| 100/100 [00:36<00:00,  2.78it/s]             
[I 2024-12-04 23:18:07,289] Trial 48 finished with values: [1002.8065761743893, 22.0] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 11, 'learning_rate': 1.0300484899151762e-05, 'discount_factor': 0.8127457890099576, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.7413069870839563, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9688729478891558, 'replay_buffer_size': 2408, 'learn_after_episode': False, 'learning_steps': 76, 'n_epochs': 500}.


profit 1002.8065761743893 n_trades 22.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:28<?, ?it/s]
Profit: 1185.1127617823513, Number of Trades: 20.4: 100%|██████████| 100/100 [02:12<00:00,  1.32s/it]             
[I 2024-12-04 23:22:48,139] Trial 49 finished with values: [1185.1127617823513, 20.4] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 64, 'training_rounds': 5, 'learning_rate': 5.1255643072352316e-05, 'discount_factor': 0.9505103985776159, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.4410058579398935, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6281636465447223, 'replay_buffer_size': 2810, 'learn_after_episode': True, 'learning_steps': 56, 'n_epochs': 100}.


profit 1185.1127617823513 n_trades 20.4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [07:17<?, ?it/s]
Profit: 1191.2542122057437, Number of Trades: 19.6: 100%|██████████| 100/100 [00:36<00:00,  2.72it/s]             
[I 2024-12-04 23:30:42,830] Trial 50 finished with values: [1191.2542122057437, 19.6] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 25, 'learning_rate': 1.9186182512767976e-05, 'discount_factor': 0.8632965234806507, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5068762528423971, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6593666153292552, 'replay_buffer_size': 2055, 'learn_after_episode': False, 'learning_steps': 65, 'n_epochs': 500}.


profit 1191.2542122057437 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:19<?, ?it/s]
Profit: 851.5268997417332, Number of Trades: 21.2: 100%|██████████| 100/100 [02:10<00:00,  1.30s/it]             
[I 2024-12-04 23:36:12,455] Trial 51 finished with values: [851.5268997417332, 21.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 29, 'learning_rate': 3.0073453951094364e-05, 'discount_factor': 0.9106241110385765, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.5756867700652036, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5030011797671627, 'replay_buffer_size': 3054, 'learn_after_episode': True, 'learning_steps': 20, 'n_epochs': 100}.


profit 851.5268997417332 n_trades 21.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:53<?, ?it/s]
Profit: 1155.1718294816542, Number of Trades: 23.2: 100%|██████████| 100/100 [02:09<00:00,  1.30s/it]             
[I 2024-12-04 23:41:16,057] Trial 52 finished with values: [1155.1718294816542, 23.2] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 17, 'learning_rate': 4.1488522372720404e-05, 'discount_factor': 0.9592561430104574, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.8033134187741919, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7173881511016225, 'replay_buffer_size': 430, 'learn_after_episode': True, 'learning_steps': 83, 'n_epochs': 100}.


profit 1155.1718294816542 n_trades 23.2


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [08:58<?, ?it/s]
Profit: 811.8123113739945, Number of Trades: 19.2: 100%|██████████| 100/100 [00:35<00:00,  2.81it/s]             
[I 2024-12-04 23:50:49,932] Trial 53 finished with values: [811.8123113739945, 19.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 128, 'training_rounds': 24, 'learning_rate': 9.025904473500675e-05, 'discount_factor': 0.9197695005060099, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.927616585130872, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.9446818586259873, 'replay_buffer_size': 3620, 'learn_after_episode': False, 'learning_steps': 38, 'n_epochs': 500}.


profit 811.8123113739945 n_trades 19.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:14<?, ?it/s]
Profit: 822.5923978735201, Number of Trades: 20.0: 100%|██████████| 100/100 [02:13<00:00,  1.33s/it]             
[I 2024-12-04 23:56:18,397] Trial 54 finished with values: [822.5923978735201, 20.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 20, 'learning_rate': 7.137162459805037e-05, 'discount_factor': 0.96620550273123, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.5786174798318635, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.6301073664352143, 'replay_buffer_size': 2607, 'learn_after_episode': True, 'learning_steps': 69, 'n_epochs': 100}.


profit 822.5923978735201 n_trades 20.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [08:06<?, ?it/s]
Profit: 1166.1652580994325, Number of Trades: 16.8: 100%|██████████| 100/100 [00:37<00:00,  2.68it/s]             
[I 2024-12-05 00:05:02,088] Trial 55 finished with values: [1166.1652580994325, 16.8] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 26, 'learning_rate': 3.651129953634232e-05, 'discount_factor': 0.8500506399519863, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.653676315721221, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.5853132538601251, 'replay_buffer_size': 942, 'learn_after_episode': False, 'learning_steps': 48, 'n_epochs': 500}.


profit 1166.1652580994325 n_trades 16.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:48<?, ?it/s]
Profit: 1194.6745218565795, Number of Trades: 17.6: 100%|██████████| 100/100 [02:03<00:00,  1.24s/it]             
[I 2024-12-05 00:09:54,479] Trial 56 finished with values: [1194.6745218565795, 17.6] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 15, 'learning_rate': 2.5321024416595794e-05, 'discount_factor': 0.884674448493043, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.5343617939846729, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8536084546881477, 'replay_buffer_size': 364, 'learn_after_episode': True, 'learning_steps': 22, 'n_epochs': 100}.


profit 1194.6745218565795 n_trades 17.6


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [16:34<?, ?it/s]
Profit: 839.8370610984156, Number of Trades: 18.4: 100%|██████████| 100/100 [02:10<00:00,  1.31s/it]             
[I 2024-12-05 00:28:40,290] Trial 57 finished with values: [839.8370610984156, 18.4] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 26, 'learning_rate': 4.733148540015008e-05, 'discount_factor': 0.8738959674802742, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.5986388448073573, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.6536359941304517, 'replay_buffer_size': 1284, 'learn_after_episode': True, 'learning_steps': 29, 'n_epochs': 500}.


profit 839.8370610984156 n_trades 18.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:04<?, ?it/s]
Profit: 1209.4643140255464, Number of Trades: 19.6: 100%|██████████| 100/100 [00:37<00:00,  2.67it/s]             
[I 2024-12-05 00:30:22,644] Trial 58 finished with values: [1209.4643140255464, 19.6] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 12, 'learning_rate': 3.287088487151749e-05, 'discount_factor': 0.8967498907136265, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.8346836244988638, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.6869661310818248, 'replay_buffer_size': 659, 'learn_after_episode': False, 'learning_steps': 51, 'n_epochs': 100}.


profit 1209.4643140255464 n_trades 19.6


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [16:39<?, ?it/s]
Profit: 1219.21651193597, Number of Trades: 20.0: 100%|██████████| 100/100 [02:07<00:00,  1.28s/it]               
[I 2024-12-05 00:49:10,338] Trial 59 finished with values: [1219.21651193597, 20.0] and parameters: {'reward_function': 1, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 28, 'learning_rate': 5.619621129959668e-05, 'discount_factor': 0.8279923767996897, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.9887585901401134, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.7436787757317993, 'replay_buffer_size': 4101, 'learn_after_episode': True, 'learning_steps': 18, 'n_epochs': 500}.


profit 1219.21651193597 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:52<?, ?it/s]
Profit: 1221.8935314538205, Number of Trades: 24.0: 100%|██████████| 100/100 [00:35<00:00,  2.79it/s]             
[I 2024-12-05 00:51:38,222] Trial 60 finished with values: [1221.8935314538205, 24.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 29, 'learning_rate': 2.9145745030038324e-05, 'discount_factor': 0.9790066725314294, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.6316007340740714, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.5432270918199467, 'replay_buffer_size': 215, 'learn_after_episode': False, 'learning_steps': 34, 'n_epochs': 100}.


profit 1221.8935314538205 n_trades 24.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:19<?, ?it/s]
Profit: 793.806599520599, Number of Trades: 19.2: 100%|██████████| 100/100 [02:10<00:00,  1.31s/it]              
[I 2024-12-05 00:57:08,219] Trial 61 finished with values: [793.806599520599, 19.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 29, 'learning_rate': 7.681133374127554e-05, 'discount_factor': 0.9336967653875641, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.27945232894948313, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.542676641992013, 'replay_buffer_size': 4865, 'learn_after_episode': True, 'learning_steps': 25, 'n_epochs': 100}.


profit 793.806599520599 n_trades 19.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [02:26<?, ?it/s]
Profit: 831.8968161092488, Number of Trades: 23.2: 100%|██████████| 100/100 [00:38<00:00,  2.60it/s]             
[I 2024-12-05 01:00:13,594] Trial 62 finished with values: [831.8968161092488, 23.2] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 64, 'training_rounds': 23, 'learning_rate': 1.526830402148448e-05, 'discount_factor': 0.9492784489775175, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.19595450621083377, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.8909157594158932, 'replay_buffer_size': 553, 'learn_after_episode': False, 'learning_steps': 28, 'n_epochs': 100}.


profit 831.8968161092488 n_trades 23.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [03:04<?, ?it/s]
Profit: 1176.2418780225871, Number of Trades: 14.8: 100%|██████████| 100/100 [02:10<00:00,  1.31s/it]             
[I 2024-12-05 01:05:29,594] Trial 63 finished with values: [1176.2418780225871, 14.8] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 20, 'learning_rate': 2.8624596772086404e-05, 'discount_factor': 0.9554919506728521, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5662117284337365, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.6170333870871364, 'replay_buffer_size': 297, 'learn_after_episode': True, 'learning_steps': 33, 'n_epochs': 100}.


profit 1176.2418780225871 n_trades 14.8


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [21:28<?, ?it/s]
Profit: 1127.1861511656784, Number of Trades: 18.0: 100%|██████████| 100/100 [02:07<00:00,  1.27s/it]             
[I 2024-12-05 01:29:04,980] Trial 64 finished with values: [1127.1861511656784, 18.0] and parameters: {'reward_function': 2, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 512, 'training_rounds': 27, 'learning_rate': 2.1624629098910198e-05, 'discount_factor': 0.9053030038600383, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.41391259737722297, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7138268239777175, 'replay_buffer_size': 212, 'learn_after_episode': True, 'learning_steps': 21, 'n_epochs': 500}.


profit 1127.1861511656784 n_trades 18.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:35<?, ?it/s]
Profit: 1168.9742732694194, Number of Trades: 18.0: 100%|██████████| 100/100 [00:36<00:00,  2.77it/s]             
[I 2024-12-05 01:31:16,499] Trial 65 finished with values: [1168.9742732694194, 18.0] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 64, 'training_rounds': 22, 'learning_rate': 3.548970006203571e-05, 'discount_factor': 0.9776180205284095, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.6263849470192318, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.7796578512087796, 'replay_buffer_size': 3618, 'learn_after_episode': False, 'learning_steps': 31, 'n_epochs': 100}.


profit 1168.9742732694194 n_trades 18.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:31<?, ?it/s]
Profit: 599.5733886184219, Number of Trades: 4823.2: 100%|██████████| 100/100 [02:11<00:00,  1.31s/it]           
[I 2024-12-05 01:35:59,340] Trial 66 finished with values: [599.5733886184219, 4823.2] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 7, 'learning_rate': 8.605080476004897e-05, 'discount_factor': 0.9650053539413979, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5305402601411661, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9694389654443387, 'replay_buffer_size': 1063, 'learn_after_episode': True, 'learning_steps': 24, 'n_epochs': 100}.


profit 599.5733886184219 n_trades 4823.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:34<?, ?it/s]
Profit: 802.7503063553709, Number of Trades: 20.8: 100%|██████████| 100/100 [02:12<00:00,  1.32s/it]             
[I 2024-12-05 01:40:45,631] Trial 67 finished with values: [802.7503063553709, 20.8] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 8, 'learning_rate': 8.717187782426316e-05, 'discount_factor': 0.9652202835666682, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.4497972370851765, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9164037719854629, 'replay_buffer_size': 2003, 'learn_after_episode': True, 'learning_steps': 25, 'n_epochs': 100}.


profit 802.7503063553709 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:34<?, ?it/s]
Profit: 1180.9281430961184, Number of Trades: 19.6: 100%|██████████| 100/100 [02:11<00:00,  1.32s/it]             
[I 2024-12-05 01:45:31,729] Trial 68 finished with values: [1180.9281430961184, 19.6] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 8, 'learning_rate': 7.90141633496575e-05, 'discount_factor': 0.9824826225732313, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.4946003916638922, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9779956148760391, 'replay_buffer_size': 1045, 'learn_after_episode': True, 'learning_steps': 17, 'n_epochs': 100}.


profit 1180.9281430961184 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:27<?, ?it/s]
Profit: 705.9613748571536, Number of Trades: 2659.6: 100%|██████████| 100/100 [02:11<00:00,  1.32s/it]           
[I 2024-12-05 01:50:11,611] Trial 69 finished with values: [705.9613748571536, 2659.6] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 5, 'learning_rate': 9.984694210560337e-05, 'discount_factor': 0.9535291120934012, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5389722659260651, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8331369664064073, 'replay_buffer_size': 2662, 'learn_after_episode': True, 'learning_steps': 24, 'n_epochs': 100}.


profit 705.9613748571536 n_trades 2659.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:27<?, ?it/s]
Profit: 1181.0069850291493, Number of Trades: 19.6: 100%|██████████| 100/100 [02:11<00:00,  1.31s/it]             
[I 2024-12-05 01:54:50,892] Trial 70 finished with values: [1181.0069850291493, 19.6] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 5, 'learning_rate': 9.935347343295635e-05, 'discount_factor': 0.9547044292673217, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5339057904864263, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.924861938270016, 'replay_buffer_size': 2608, 'learn_after_episode': True, 'learning_steps': 13, 'n_epochs': 100}.


profit 1181.0069850291493 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:42<?, ?it/s]
Profit: 1170.4465932651367, Number of Trades: 20.8: 100%|██████████| 100/100 [02:11<00:00,  1.31s/it]             
[I 2024-12-05 01:59:45,010] Trial 71 finished with values: [1170.4465932651367, 20.8] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 7, 'learning_rate': 9.361412891902177e-05, 'discount_factor': 0.9612105790751374, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.4792339580563748, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8747303223101527, 'replay_buffer_size': 2957, 'learn_after_episode': True, 'learning_steps': 23, 'n_epochs': 100}.


profit 1170.4465932651367 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:28<?, ?it/s]
Profit: 1182.976278789479, Number of Trades: 20.0: 100%|██████████| 100/100 [02:11<00:00,  1.32s/it]              
[I 2024-12-05 02:04:25,757] Trial 72 finished with values: [1182.976278789479, 20.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 6, 'learning_rate': 8.609981305967939e-05, 'discount_factor': 0.9425572569459586, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5209276883606261, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9652241108662082, 'replay_buffer_size': 2207, 'learn_after_episode': True, 'learning_steps': 27, 'n_epochs': 100}.


profit 1182.976278789479 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:32<?, ?it/s]
Profit: 1173.4202837924263, Number of Trades: 26.0: 100%|██████████| 100/100 [02:12<00:00,  1.32s/it]             
[I 2024-12-05 02:09:10,211] Trial 73 finished with values: [1173.4202837924263, 26.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 7, 'learning_rate': 9.372506329692902e-05, 'discount_factor': 0.9661475981761707, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5579189619133833, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8293888965733964, 'replay_buffer_size': 2707, 'learn_after_episode': True, 'learning_steps': 14, 'n_epochs': 100}.


profit 1173.4202837924263 n_trades 26.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:42<?, ?it/s]
Profit: 1190.071208761528, Number of Trades: 18.4: 100%|██████████| 100/100 [02:11<00:00,  1.31s/it]              
[I 2024-12-05 02:14:03,793] Trial 74 finished with values: [1190.071208761528, 18.4] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 7, 'learning_rate': 9.420052915090067e-05, 'discount_factor': 0.9776252284740502, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5489732691856007, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8299364320617432, 'replay_buffer_size': 3389, 'learn_after_episode': True, 'learning_steps': 19, 'n_epochs': 100}.


profit 1190.071208761528 n_trades 18.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:26<?, ?it/s]
Profit: 859.6064960019232, Number of Trades: 20.8: 100%|██████████| 100/100 [02:05<00:00,  1.26s/it]             
[I 2024-12-05 02:18:36,026] Trial 75 finished with values: [859.6064960019232, 20.8] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 5, 'learning_rate': 9.779147552594467e-05, 'discount_factor': 0.9894398872353979, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.3968727979758382, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8130497240979427, 'replay_buffer_size': 2483, 'learn_after_episode': True, 'learning_steps': 35, 'n_epochs': 100}.


profit 859.6064960019232 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:37<?, ?it/s]
Profit: 835.3209293664981, Number of Trades: 21.6: 100%|██████████| 100/100 [02:08<00:00,  1.29s/it]             
[I 2024-12-05 02:23:22,143] Trial 76 finished with values: [835.3209293664981, 21.6] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 7, 'learning_rate': 9.069864139718323e-05, 'discount_factor': 0.9689536883342142, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.6053972726321288, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8957171133752951, 'replay_buffer_size': 2683, 'learn_after_episode': True, 'learning_steps': 14, 'n_epochs': 100}.


profit 835.3209293664981 n_trades 21.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:49<?, ?it/s]
Profit: 1154.416160662522, Number of Trades: 20.8: 100%|██████████| 100/100 [02:09<00:00,  1.30s/it]              
[I 2024-12-05 02:28:21,588] Trial 77 finished with values: [1154.416160662522, 20.8] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 9, 'learning_rate': 9.992828714620023e-05, 'discount_factor': 0.950776940598714, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.46272132875708544, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8519779442181787, 'replay_buffer_size': 1798, 'learn_after_episode': True, 'learning_steps': 31, 'n_epochs': 100}.


profit 1154.416160662522 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:36<?, ?it/s]
Profit: 1170.6427954050052, Number of Trades: 18.4: 100%|██████████| 100/100 [02:10<00:00,  1.30s/it]             
[I 2024-12-05 02:33:09,156] Trial 78 finished with values: [1170.6427954050052, 18.4] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 6, 'learning_rate': 6.329478265208871e-05, 'discount_factor': 0.9442133416176007, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.4905911137927037, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.872812170359341, 'replay_buffer_size': 2281, 'learn_after_episode': True, 'learning_steps': 23, 'n_epochs': 100}.


profit 1170.6427954050052 n_trades 18.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:54<?, ?it/s]
Profit: 841.2214900923377, Number of Trades: 19.6: 100%|██████████| 100/100 [02:12<00:00,  1.33s/it]             
[I 2024-12-05 02:38:16,430] Trial 79 finished with values: [841.2214900923377, 19.6] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 10, 'learning_rate': 9.52020710761829e-05, 'discount_factor': 0.966162445847306, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.3676796442769496, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9490175159256735, 'replay_buffer_size': 3195, 'learn_after_episode': True, 'learning_steps': 25, 'n_epochs': 100}.


profit 841.2214900923377 n_trades 19.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:33<?, ?it/s]
Profit: 830.5395105016868, Number of Trades: 19.2: 100%|██████████| 100/100 [02:10<00:00,  1.31s/it]             
[I 2024-12-05 02:43:01,211] Trial 80 finished with values: [830.5395105016868, 19.2] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 7, 'learning_rate': 9.188033109872281e-05, 'discount_factor': 0.9393103904340235, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.333020672488995, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9865421100375856, 'replay_buffer_size': 1610, 'learn_after_episode': True, 'learning_steps': 17, 'n_epochs': 100}.


profit 830.5395105016868 n_trades 19.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:30<?, ?it/s]
Profit: 1136.1688375933734, Number of Trades: 20.0: 100%|██████████| 100/100 [02:12<00:00,  1.32s/it]             
[I 2024-12-05 02:47:43,764] Trial 81 finished with values: [1136.1688375933734, 20.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 6, 'learning_rate': 8.686814149558845e-05, 'discount_factor': 0.9584041623038373, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5791658433583102, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7895509722033242, 'replay_buffer_size': 2707, 'learn_after_episode': True, 'learning_steps': 21, 'n_epochs': 100}.


profit 1136.1688375933734 n_trades 20.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [12:17<?, ?it/s]
Profit: 798.7229310431923, Number of Trades: 22.0: 100%|██████████| 100/100 [02:10<00:00,  1.31s/it]             
[I 2024-12-05 03:02:11,664] Trial 82 finished with values: [798.7229310431923, 22.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 5, 'learning_rate': 8.303160129507912e-05, 'discount_factor': 0.9639239086148944, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5212749195418178, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.8421632986364335, 'replay_buffer_size': 3028, 'learn_after_episode': True, 'learning_steps': 27, 'n_epochs': 500}.


profit 798.7229310431923 n_trades 22.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:49<?, ?it/s]
Profit: 1200.6779776127592, Number of Trades: 18.4: 100%|██████████| 100/100 [02:11<00:00,  1.32s/it]             
[I 2024-12-05 03:07:12,803] Trial 83 finished with values: [1200.6779776127592, 18.4] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 8, 'learning_rate': 9.639290489923216e-05, 'discount_factor': 0.9546443547090826, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.5583203532802637, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9986636941738133, 'replay_buffer_size': 2494, 'learn_after_episode': True, 'learning_steps': 82, 'n_epochs': 100}.


profit 1200.6779776127592 n_trades 18.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:59<?, ?it/s]
Profit: 816.4455286929017, Number of Trades: 25.2: 100%|██████████| 100/100 [00:37<00:00,  2.68it/s]             
[I 2024-12-05 03:08:49,899] Trial 84 finished with values: [816.4455286929017, 25.2] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 6, 'learning_rate': 8.883134419411386e-05, 'discount_factor': 0.9754366056892795, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.627421058865239, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.8754726574609105, 'replay_buffer_size': 2902, 'learn_after_episode': False, 'learning_steps': 30, 'n_epochs': 100}.


profit 816.4455286929017 n_trades 25.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:42<?, ?it/s]
Profit: 814.3755407982285, Number of Trades: 19.2: 100%|██████████| 100/100 [02:12<00:00,  1.33s/it]             
[I 2024-12-05 03:13:44,999] Trial 85 finished with values: [814.3755407982285, 19.2] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 256, 'training_rounds': 10, 'learning_rate': 7.683872416200331e-05, 'discount_factor': 0.9485801727970181, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.4243368658145225, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.8168475521681114, 'replay_buffer_size': 2141, 'learn_after_episode': True, 'learning_steps': 20, 'n_epochs': 100}.


profit 814.3755407982285 n_trades 19.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:31<?, ?it/s]
Profit: 817.5128916701968, Number of Trades: 15.6: 100%|██████████| 100/100 [02:05<00:00,  1.25s/it]             
[I 2024-12-05 03:18:21,877] Trial 86 finished with values: [817.5128916701968, 15.6] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 5, 'learning_rate': 6.8225809538563e-05, 'discount_factor': 0.9390750652284544, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.7048868394162238, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.9332014072993264, 'replay_buffer_size': 1907, 'learn_after_episode': True, 'learning_steps': 24, 'n_epochs': 100}.


profit 817.5128916701968 n_trades 15.6


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [13:01<?, ?it/s]
Profit: 1203.0299847159772, Number of Trades: 20.4: 100%|██████████| 100/100 [02:12<00:00,  1.32s/it]             
[I 2024-12-05 03:33:35,476] Trial 87 finished with values: [1203.0299847159772, 20.4] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 7, 'learning_rate': 9.276919738840473e-05, 'discount_factor': 0.970974044407069, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.5047638771070968, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9580410150707483, 'replay_buffer_size': 2372, 'learn_after_episode': True, 'learning_steps': 27, 'n_epochs': 500}.


profit 1203.0299847159772 n_trades 20.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [00:54<?, ?it/s]
Profit: 812.9799689525812, Number of Trades: 20.8: 100%|██████████| 100/100 [00:38<00:00,  2.59it/s]             
[I 2024-12-05 03:35:08,633] Trial 88 finished with values: [812.9799689525812, 20.8] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 10, 'learning_rate': 9.783003979465587e-05, 'discount_factor': 0.9297058264983096, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.5337236497560218, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.7646037088888741, 'replay_buffer_size': 1371, 'learn_after_episode': False, 'learning_steps': 70, 'n_epochs': 100}.


profit 812.9799689525812 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:34<?, ?it/s]
Profit: 1187.996163667209, Number of Trades: 20.4: 100%|██████████| 100/100 [02:12<00:00,  1.32s/it]              
[I 2024-12-05 03:39:55,535] Trial 89 finished with values: [1187.996163667209, 20.4] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 8, 'learning_rate': 8.485758549877413e-05, 'discount_factor': 0.923538506327653, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.6495945519385797, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7996666588512544, 'replay_buffer_size': 3801, 'learn_after_episode': True, 'learning_steps': 40, 'n_epochs': 100}.


profit 1187.996163667209 n_trades 20.4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [13:53<?, ?it/s]
Profit: 787.3007700821362, Number of Trades: 14.8: 100%|██████████| 100/100 [02:12<00:00,  1.32s/it]             
[I 2024-12-05 03:56:00,993] Trial 90 finished with values: [787.3007700821362, 14.8] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 9, 'learning_rate': 9.674086419165565e-05, 'discount_factor': 0.9806388148793448, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.58693750871637, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.916145516159839, 'replay_buffer_size': 4273, 'learn_after_episode': True, 'learning_steps': 62, 'n_epochs': 500}.


profit 787.3007700821362 n_trades 14.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:38<?, ?it/s]
Profit: 752.4177092676207, Number of Trades: 18.0: 100%|██████████| 100/100 [02:11<00:00,  1.31s/it]             
[I 2024-12-05 04:00:50,834] Trial 91 finished with values: [752.4177092676207, 18.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 6, 'learning_rate': 7.337711186435687e-05, 'discount_factor': 0.9613766494266842, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5596521392849144, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9031748811053627, 'replay_buffer_size': 1579, 'learn_after_episode': True, 'learning_steps': 33, 'n_epochs': 100}.


profit 752.4177092676207 n_trades 18.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:34<?, ?it/s]
Profit: 1170.2512248167443, Number of Trades: 20.8: 100%|██████████| 100/100 [02:11<00:00,  1.32s/it]             
[I 2024-12-05 04:05:37,407] Trial 92 finished with values: [1170.2512248167443, 20.8] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 5, 'learning_rate': 6.43702441380284e-05, 'discount_factor': 0.9581294212556224, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.468580576241294, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7391000785519188, 'replay_buffer_size': 2778, 'learn_after_episode': True, 'learning_steps': 19, 'n_epochs': 100}.


profit 1170.2512248167443 n_trades 20.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:43<?, ?it/s]
Profit: 1175.1053231826088, Number of Trades: 18.4: 100%|██████████| 100/100 [00:37<00:00,  2.68it/s]             
[I 2024-12-05 04:06:57,872] Trial 93 finished with values: [1175.1053231826088, 18.4] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 256, 'training_rounds': 7, 'learning_rate': 8.072512612038066e-05, 'discount_factor': 0.9527442682821589, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.15058852593757044, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.8421529245416435, 'replay_buffer_size': 2492, 'learn_after_episode': True, 'learning_steps': 12, 'n_epochs': 100}.


profit 1175.1053231826088 n_trades 18.4


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [04:21<?, ?it/s]
Profit: 1196.8400803723307, Number of Trades: 17.6: 100%|██████████| 100/100 [02:12<00:00,  1.33s/it]             
[I 2024-12-05 04:13:32,415] Trial 94 finished with values: [1196.8400803723307, 17.6] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 64, 'training_rounds': 6, 'learning_rate': 8.835897402351238e-05, 'discount_factor': 0.8125299770502071, 'batch_size': 64, 'target_update_freq': 10, 'soft_update_tau': 0.49323673687206127, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7552471901934543, 'replay_buffer_size': 3366, 'learn_after_episode': False, 'learning_steps': 22, 'n_epochs': 100}.


profit 1196.8400803723307 n_trades 17.6


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:38<?, ?it/s]
Profit: 1186.941032043179, Number of Trades: 22.0: 100%|██████████| 100/100 [02:13<00:00,  1.33s/it]              
[I 2024-12-05 04:18:23,707] Trial 95 finished with values: [1186.941032043179, 22.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 5, 'learning_rate': 9.529521093189394e-05, 'discount_factor': 0.945902857580424, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.9348816772288298, 'is_conservative': True, 'lstm': True, 'conservative_alpha': 0.9417332990257385, 'replay_buffer_size': 3171, 'learn_after_episode': True, 'learning_steps': 16, 'n_epochs': 100}.


profit 1186.941032043179 n_trades 22.0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [13:26<?, ?it/s]
Profit: 780.7667125910808, Number of Trades: 22.8: 100%|██████████| 100/100 [02:11<00:00,  1.31s/it]             
[I 2024-12-05 04:34:01,732] Trial 96 finished with values: [780.7667125910808, 22.8] and parameters: {'reward_function': 2, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 8, 'learning_rate': 5.819635119790756e-05, 'discount_factor': 0.962374921517076, 'batch_size': 64, 'target_update_freq': 24, 'soft_update_tau': 0.5450854010577009, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9822530392838572, 'replay_buffer_size': 2847, 'learn_after_episode': True, 'learning_steps': 29, 'n_epochs': 500}.


profit 780.7667125910808 n_trades 22.8


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:05<?, ?it/s]
Profit: 1242.2589504212876, Number of Trades: 21.2: 100%|██████████| 100/100 [00:37<00:00,  2.67it/s]             
[I 2024-12-05 04:35:44,516] Trial 97 finished with values: [1242.2589504212876, 21.2] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 128, 'training_rounds': 6, 'learning_rate': 9.988529643519465e-05, 'discount_factor': 0.9683804196048948, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.4365939156988392, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.9694228166974908, 'replay_buffer_size': 1128, 'learn_after_episode': False, 'learning_steps': 26, 'n_epochs': 100}.


profit 1242.2589504212876 n_trades 21.2


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [01:09<?, ?it/s]
Profit: 1183.7035614485405, Number of Trades: 20.0: 100%|██████████| 100/100 [00:38<00:00,  2.58it/s]             
[I 2024-12-05 04:37:32,811] Trial 98 finished with values: [1183.7035614485405, 20.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 128, 'training_rounds': 6, 'learning_rate': 9.112552909818727e-05, 'discount_factor': 0.830955192785385, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.6131430129854597, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.9704634595302919, 'replay_buffer_size': 1122, 'learn_after_episode': False, 'learning_steps': 26, 'n_epochs': 100}.


profit 1183.7035614485405 n_trades 20.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [02:37<?, ?it/s]
Profit: 826.0222054107403, Number of Trades: 21.2: 100%|██████████| 100/100 [02:06<00:00,  1.27s/it]             
[I 2024-12-05 04:42:17,568] Trial 99 finished with values: [826.0222054107403, 21.2] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 3, 'n_units': 64, 'training_rounds': 9, 'learning_rate': 9.987000521254206e-05, 'discount_factor': 0.9687608589096867, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.4358153156186866, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.9609207212374272, 'replay_buffer_size': 1247, 'learn_after_episode': True, 'learning_steps': 24, 'n_epochs': 100}.


profit 826.0222054107403 n_trades 21.2


In [20]:
# print(f"Best value: {study.best_value} (params: {study.best_params})")
best_trials=study.best_trials
best_trials

[FrozenTrial(number=45, state=TrialState.COMPLETE, values=[1191.6558529692093, 20.4], datetime_start=datetime.datetime(2024, 11, 24, 2, 9, 29, 249544), datetime_complete=datetime.datetime(2024, 11, 24, 2, 14, 53, 613837), params={'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 1, 'n_units': 256, 'training_rounds': 21, 'learning_rate': 2.8764743562927777e-05, 'discount_factor': 0.911339518183664, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.30917867193935245, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.5255327383263214, 'learn_after_episode': True, 'learning_steps': 51, 'n_epochs': 100}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'reward_function': CategoricalDistribution(choices=(0, 1, 2)), 'algorithm': CategoricalDistribution(choices=('dqn', 'ddqn')), 'n_layers': IntDistribution(high=3, log=False, low=1, step=1), 'n_units': CategoricalDistribution(choices=(64, 128, 256, 512)), 'training_rounds': IntDistribution(h

In [21]:
best_trials=study.best_trials
best_trail=best_trials[3]
best_params=best_trail.params
reward_func=reward_functions[best_params.pop('reward_function')]
train_env.reward_func=reward_func
test_env.reward_func=reward_func

In [22]:
algo=best_params.pop('algorithm')

learning_params={'learn_after_episode':best_params.pop('learn_after_episode'),
                    'learning_steps':best_params.pop('learning_steps'),
                    'n_epochs':best_params.pop('n_epochs'),
                    }
best_params['hidden_dims']=make_hidden_dims(n_layers=best_params.pop('n_layers'),n_units=best_params.pop('n_units'))
best_params['lstm']=best_params.pop('lstm')
best_params['action_space_dim']=len(train_env.positions)
best_params['observation_space_dim']=train_env.observation_space.shape[0]
if algo=='dqn':
    agent=pearl_utils.create_dqn_model(**best_params)
elif algo=='ddqn':
    agent=pearl_utils.create_ddqn_model(**best_params)

best_params,learning_params

({'training_rounds': 28,
  'learning_rate': 3.502195725430407e-05,
  'discount_factor': 0.9447058427910774,
  'batch_size': 128,
  'target_update_freq': 24,
  'soft_update_tau': 0.5996751853354005,
  'is_conservative': True,
  'conservative_alpha': 0.6505399516357236,
  'hidden_dims': [128, 128],
  'lstm': True,
  'action_space_dim': 2,
  'observation_space_dim': 24},
 {'learn_after_episode': True, 'learning_steps': 72, 'n_epochs': 100})

In [23]:


agent=pearl_utils.train_pearl_model(agent,train_env,**learning_params)

profit,n_trades=pearl_utils.test_pearl_model(agent,test_env)

agent=pearl_utils.train_pearl_model(agent,test_env,**learning_params)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [05:14<?, ?it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [05:42<?, ?it/s]


In [24]:
import torch

In [25]:
torch.load

<function torch.serialization.load(f: Union[str, os.PathLike, BinaryIO, IO[bytes]], map_location: Union[Callable[[torch.types.Storage, str], torch.types.Storage], torch.device, str, Dict[str, str], NoneType] = None, pickle_module: Any = None, *, weights_only: Optional[bool] = None, mmap: Optional[bool] = None, **pickle_load_args: Any) -> Any>