In [6]:
import warnings
import logging


warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [7]:
from utils import make_environments
from utils import pearl_utils
from configs import defaults
from utils.reward_functions import log_reward_function,cumulative_reward_function,sharpe_reward_function
from utils. utils import make_hidden_dims
import optuna
from optuna.samplers import TPESampler

from neuralforecast.core import NeuralForecast
from Pearl.pearl.utils.instantiations.environments.gym_environment import GymEnvironment
from Pearl.pearl.utils.functional_utils.train_and_eval.online_learning import \
    online_learning
import datetime

In [8]:
reward_functions=[log_reward_function,cumulative_reward_function,sharpe_reward_function]
train_env,test_env=make_environments.make_envs(reward_function=log_reward_function)


Seed set to 4
Seed set to 3


['data/binanceus-DOGEUSDT-1h.pkl']


100%|██████████| 78/78 [00:00<00:00, 1923.12it/s]
1it [00:00,  9.41it/s]
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

2024-10-22 11:00:00 2024-11-21 11:00:00


In [9]:
sp=train_env.positions
train_env.action_space.n,train_env.name,train_env.observation_space.shape,

(np.int64(2), 'DOGEUSDT_train', (24,))

In [10]:
make_hidden_dims(n_layers=3, n_units=64)

[64, 64, 64]

In [11]:
agent=pearl_utils.create_ddqn_model(

        observation_space_dim=train_env.observation_space.shape[0], 
        action_space_dim=train_env.action_space.n,
        hidden_dims=[64,64, 64], 
        training_rounds=20,
        learning_rate = 0.001,
        discount_factor = 0.99,
        batch_size = 128,
        target_update_freq = 10,
        soft_update_tau = 0.75,  # a value of 1 indicates no soft updates
        is_conservative = False,
        conservative_alpha = False,
        replay_buffer_size = 10_000,
        lstm=False)

In [12]:
test_env.observation_space.shape,train_env.action_space.n

((24,), np.int64(2))

In [13]:
env=GymEnvironment(train_env)

obs,action_space=env.reset()
agent.reset(   obs, action_space)

In [14]:
# done = False
# while not done:
#     action = agent.act(exploit=False)
#     action_result = env.step(action)
    
#     agent.observe(action_result)
#     loss=agent.learn()

#     done = action_result.done

In [15]:
info = online_learning(
        agent=agent,
        env=env,
        # number_of_episodes=10,
        number_of_steps=168,
        print_every_x_episodes=2,   # print returns after every 10 episdoes
        print_every_x_steps=1,   # print returns after every 10 episdoes
        learn_every_k_steps=20,   # print returns after every 10 episdoes
        learn_after_episode=False,
        record_period=169,   # instead of updating after every environment interaction, Q networks are updates at the end of each episode
        seed=0
    )

  0%|          | 0/168 [00:00<?, ?it/s]

In [17]:


def objective_function(trial):
 
    reward_id=trial.suggest_categorical('reward_function', [0,1,2])
    algo=trial.suggest_categorical('algorithm', ['dqn','ddqn'])    

    # reward_id=0
    
    reward_func=reward_functions[reward_id]
    train_env.reward_func=reward_func
    test_env.reward_func=reward_func
    
    observation_space_dim=train_env.observation_space.shape[0]
    action_space_dim=len(train_env.positions)
    n_layers=trial.suggest_int('n_layers', 1, 3)
    n_units=trial.suggest_categorical('n_units', [64,128,256,512])
    
    hidden_dims=make_hidden_dims(n_layers= n_layers, n_units=n_units)
    
    search_space={
                'observation_space_dim': observation_space_dim,
                'action_space_dim': action_space_dim,
                'hidden_dims': hidden_dims,
                'training_rounds': trial.suggest_int('training_rounds', 5, 30),
                'learning_rate': trial.suggest_float('learning_rate', 1e-6, 1e-4),
                'discount_factor': trial.suggest_float('discount_factor', 0.8, 0.99), # gamma (greediness)
                'batch_size': trial.suggest_categorical('batch_size', [64, 128]),
                'target_update_freq': trial.suggest_categorical('target_update_freq', [1, 5, 10, 24]),
                'soft_update_tau': trial.suggest_float('soft_update_tau', 0.1, .99), 
                'is_conservative': trial.suggest_categorical('is_conservative', [True, False]),
                'lstm': trial.suggest_categorical('lstm', [True, False]),
                'conservative_alpha': trial.suggest_float('conservative_alpha', 0.5, 1.0),
                }

    learning_space={'learn_after_episode':trial.suggest_categorical('learn_after_episode', [True, False]),
                    'learning_steps':trial.suggest_int('learning_steps', 10, 89),
                    'n_epochs':trial.suggest_categorical('n_epochs',[100,500]),
                    }
    #
    # print('n_epochs',n_epochs)
    if algo=='dqn':
        agent=pearl_utils.create_dqn_model(**search_space)
    elif algo=='ddqn':
        agent=pearl_utils.create_ddqn_model(**search_space)

        
    agent=pearl_utils.train_pearl_model(agent,train_env,**learning_space)
    profit,n_trades=pearl_utils.test_pearl_model(agent,test_env)
    objectives={'profit':profit,'n_trades':n_trades}

    print('profit',profit,'n_trades',n_trades)

    return profit,n_trades

In [18]:
model_name=defaults.model_name
model_name

study_name=f"{defaults.model_name}"
storage_name="sqlite:///PearlHPTuning.sqlite3"

In [19]:
from optuna import create_study

In [20]:

study=create_study(study_name=study_name, 
             storage=storage_name, 
             load_if_exists=True,
             directions=['maximize','maximize'],
             sampler=TPESampler()
             )

[I 2024-11-21 10:58:57,021] A new study created in RDB with name: DOGEUSDTSPOT


In [None]:
study.optimize(objective_function, n_trials=30)


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [06:36<?, ?it/s]
[I 2024-11-21 11:07:31,641] Trial 0 finished with values: [1349.6927628289131, 9.5] and parameters: {'reward_function': 0, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 512, 'training_rounds': 30, 'learning_rate': 2.376432896972149e-05, 'discount_factor': 0.9107871524024563, 'batch_size': 128, 'target_update_freq': 5, 'soft_update_tau': 0.3947699267260204, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.7817686062689246, 'learn_after_episode': True, 'learning_steps': 38, 'n_epochs': 100}.


profit 1349.6927628289131 n_trades 9.5


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:04<?, ?it/s]
[I 2024-11-21 11:07:38,921] Trial 1 finished with values: [1249.879254161998, 26.23] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 1, 'n_units': 512, 'training_rounds': 8, 'learning_rate': 4.991467349701537e-05, 'discount_factor': 0.8492022330712309, 'batch_size': 128, 'target_update_freq': 1, 'soft_update_tau': 0.22638392482024414, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6038566154450107, 'learn_after_episode': True, 'learning_steps': 82, 'n_epochs': 100}.


profit 1249.879254161998 n_trades 26.23


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/84000 [00:00<?, ?it/s]

  0%|          | 0/500 [02:34<?, ?it/s]
[I 2024-11-21 11:10:15,929] Trial 2 finished with values: [1017.6293333516358, 39.0] and parameters: {'reward_function': 1, 'algorithm': 'ddqn', 'n_layers': 2, 'n_units': 256, 'training_rounds': 26, 'learning_rate': 3.130842465977884e-05, 'discount_factor': 0.8037685663490365, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.7434565141159406, 'is_conservative': False, 'lstm': False, 'conservative_alpha': 0.750487864149691, 'learn_after_episode': False, 'learning_steps': 26, 'n_epochs': 500}.


profit 1017.6293333516358 n_trades 39.0


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:21<?, ?it/s]
[I 2024-11-21 11:10:41,693] Trial 3 finished with values: [1256.0506563400577, 47.25] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 3, 'n_units': 512, 'training_rounds': 30, 'learning_rate': 8.789531456770244e-06, 'discount_factor': 0.8320350932322402, 'batch_size': 64, 'target_update_freq': 1, 'soft_update_tau': 0.24535496052088956, 'is_conservative': True, 'lstm': False, 'conservative_alpha': 0.6585274668438362, 'learn_after_episode': True, 'learning_steps': 52, 'n_epochs': 100}.


profit 1256.0506563400577 n_trades 47.25


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

  0%|          | 0/100 [13:43<?, ?it/s]
[I 2024-11-21 11:26:25,069] Trial 4 finished with values: [1041.7199005367222, 17.13] and parameters: {'reward_function': 0, 'algorithm': 'dqn', 'n_layers': 2, 'n_units': 64, 'training_rounds': 29, 'learning_rate': 5.6252634117492825e-05, 'discount_factor': 0.9757698667891089, 'batch_size': 64, 'target_update_freq': 5, 'soft_update_tau': 0.17698776324061644, 'is_conservative': False, 'lstm': True, 'conservative_alpha': 0.6190512426046135, 'learn_after_episode': False, 'learning_steps': 47, 'n_epochs': 100}.


profit 1041.7199005367222 n_trades 17.13


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/16800 [00:00<?, ?it/s]

In [None]:
# print(f"Best value: {study.best_value} (params: {study.best_params})")
best_trials=study.best_trials
best_trials

In [None]:
best_trials=study.best_trials
best_trail=best_trials[3]
best_params=best_trail.params
reward_func=reward_functions[best_params.pop('reward_function')]
train_env.reward_func=reward_func
test_env.reward_func=reward_func

In [None]:
algo=best_params.pop('algorithm')

learning_params={'learn_after_episode':best_params.pop('learn_after_episode'),
                    'learning_steps':best_params.pop('learning_steps'),
                    'n_epochs':best_params.pop('n_epochs'),
                    }
best_params['hidden_dims']=make_hidden_dims(n_layers=best_params.pop('n_layers'),n_units=best_params.pop('n_units'))
best_params['lstm']=best_params.pop('lstm')
best_params['action_space_dim']=len(train_env.positions)
best_params['observation_space_dim']=train_env.observation_space.shape[0]
if algo=='dqn':
    agent=pearl_utils.create_dqn_model(**best_params)
elif algo=='ddqn':
    agent=pearl_utils.create_ddqn_model(**best_params)

best_params,learning_params

In [None]:


agent=pearl_utils.train_pearl_model(agent,train_env,**learning_params)

profit,n_trades=pearl_utils.test_pearl_model(agent,test_env)

agent=pearl_utils.train_pearl_model(agent,test_env,**learning_params)

In [None]:
%%run Pearl_Fit_Best_Agent.ipynb
