In [1]:
import os
import sys

import pandas as pd

from datetime import datetime, timedelta
from pathlib import Path


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from trading.environments.forex_env2_flat import ForexTradingEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from data_management.dataset_manager import DatasetManager
from sb3_contrib import RecurrentPPO


Logging configuration loaded from /Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/config/logging_config.yaml


In [None]:
# Import our custom modules
from trading.agents.training_agent_lstm import FXTradingTrainer
from trading.environments.forex_env2_flat import ForexTradingEnv as FX_ENV_FLAT
from sb3_contrib import RecurrentPPO

trainer = FXTradingTrainer(
    env_class=FX_ENV_FLAT,
    data_path='./EUR_USD_5min_1D_all_indic_norm_unbiased.parquet',
    pair='EUR_USD',
    sequence_length=5,
    model_class=RecurrentPPO,
    use_sequences=True,
    tensorboard_log="logs/optuna_ppo_LSTM__08dec_new_data_v2"
)

# Run hyperparameter optimization
best_params = trainer.optimize(
    total_timesteps=1_000_000,  # Steps per trial
    n_trials=40,             # Number of trials to run
    n_startup_trials=10,     # Number of random trials before TPE
    n_evaluations=2,         # Episodes per evaluation
    eval_freq=50_000          # How often to evaluate
)

# Train final model with best parameters
model = trainer.train(
    total_timesteps=1_000_000,
    eval_freq=10000,
    save_freq=10000,
    hyperparams=best_params
)

2024-12-08 20:37:26 - oandapyV20.oandapyV20 - INFO - oandapyV20.py:207 - setting up API-client for environment practice
Dataset split sizes:
Training: 1215220 samples (70.0%)
Validation: 260404 samples (15.0%)
Test: 260405 samples (15.0%)


[I 2024-12-08 20:37:27,461] A new study created in RDB with name: lstm_ppo_08_dec


2024-12-08 20:39:05 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-08 20:39:05 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -2.02%
2024-12-08 20:39:05 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: -9753.43
2024-12-08 20:39:05 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 352
2024-12-08 20:39:05 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 178
2024-12-08 20:39:05 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 50.57%
2024-12-08 20:39:05 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-08 20:39:05 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 979785.73
2024-12-08 20:39:05 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-08 20:39:05 - ForexEnv2_flat - INFO - forex_env2_flat.py:329 - --------------------------------------------------
2024-12-08 20:39:18 - ForexEnv2_flat - INFO - forex

[I 2024-12-08 21:15:37,501] Trial 0 finished with value: -0.008043999999999999 and parameters: {'batch_size': 64, 'n_steps': 1024, 'gamma': 0.995, 'learning_rate': 0.42879638509673906, 'ent_coef': 0.0008033020749661096, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.99, 'max_grad_norm': 0.3, 'vf_coef': 0.7979795532545555, 'net_arch': 'medium', 'enable_critic_lstm': False, 'lstm_hidden_size': 64}. Best is trial 0 with value: -0.008043999999999999.


2024-12-08 21:19:41 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-08 21:19:41 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -31.30%
2024-12-08 21:19:41 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: -1001.81
2024-12-08 21:19:41 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 10515
2024-12-08 21:19:41 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 3841
2024-12-08 21:19:41 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 36.53%
2024-12-08 21:19:41 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-08 21:19:41 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 686953.58
2024-12-08 21:19:41 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-08 21:19:41 - ForexEnv2_flat - INFO - forex_env2_flat.py:329 - --------------------------------------------------
2024-12-08 21:20:38 - ForexEnv2_flat - INFO - f

[I 2024-12-08 22:50:49,524] Trial 1 finished with value: 0.009654 and parameters: {'batch_size': 8, 'n_steps': 256, 'gamma': 0.9999, 'learning_rate': 0.10941782181836622, 'ent_coef': 0.02964331587776015, 'clip_range': 0.1, 'n_epochs': 5, 'gae_lambda': 0.99, 'max_grad_norm': 5, 'vf_coef': 0.9635921497334217, 'net_arch': 'tiny', 'enable_critic_lstm': False, 'lstm_hidden_size': 256}. Best is trial 1 with value: 0.009654.


2024-12-08 22:57:04 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-08 22:57:04 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -0.80%
2024-12-08 22:57:04 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: -4385.26
2024-12-08 22:57:04 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 122
2024-12-08 22:57:04 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 68
2024-12-08 22:57:04 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 55.74%
2024-12-08 22:57:04 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-08 22:57:04 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 991979.41
2024-12-08 22:57:04 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-08 22:57:04 - ForexEnv2_flat - INFO - forex_env2_flat.py:329 - --------------------------------------------------
2024-12-08 22:57:54 - ForexEnv2_flat - INFO - forex_

[I 2024-12-09 00:59:09,278] Trial 2 finished with value: 4.2e-05 and parameters: {'batch_size': 16, 'n_steps': 32, 'gamma': 0.995, 'learning_rate': 0.11513777156640563, 'ent_coef': 0.0023711424142244916, 'clip_range': 0.4, 'n_epochs': 10, 'gae_lambda': 0.8, 'max_grad_norm': 5, 'vf_coef': 0.9488116936002668, 'net_arch': 'small', 'enable_critic_lstm': False, 'lstm_hidden_size': 256}. Best is trial 1 with value: 0.009654.


Eval num_timesteps=1000000, episode_reward=0.00 +/- 0.00
Episode length: 85559.00 +/- 70610.00
2024-12-09 01:02:45 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-09 01:02:45 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -1.71%
2024-12-09 01:02:45 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: -5500.48
2024-12-09 01:02:45 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 390
2024-12-09 01:02:45 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 146
2024-12-09 01:02:45 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 37.44%
2024-12-09 01:02:45 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-09 01:02:45 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 982910.99
2024-12-09 01:02:45 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-09 01:02:45 - ForexEnv2_flat - INFO - forex_env2_flat.py:329 - -------

[I 2024-12-09 02:13:53,878] Trial 3 finished with value: 0.0 and parameters: {'batch_size': 64, 'n_steps': 8, 'gamma': 0.99, 'learning_rate': 0.7494657263819388, 'ent_coef': 3.1566805956539367e-07, 'clip_range': 0.2, 'n_epochs': 10, 'gae_lambda': 0.9, 'max_grad_norm': 0.9, 'vf_coef': 0.5776907045621613, 'net_arch': 'tiny', 'enable_critic_lstm': False, 'lstm_hidden_size': 64}. Best is trial 1 with value: 0.009654.


Eval num_timesteps=1000000, episode_reward=-0.02 +/- 0.01
Episode length: 85657.50 +/- 49373.50
2024-12-09 02:18:11 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-09 02:18:11 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -1.55%
2024-12-09 02:18:11 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: -2476.16
2024-12-09 02:18:11 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 440
2024-12-09 02:18:11 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 205
2024-12-09 02:18:11 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 46.59%
2024-12-09 02:18:11 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-09 02:18:11 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 984451.50
2024-12-09 02:18:11 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-09 02:18:11 - ForexEnv2_flat - INFO - forex_env2_flat.py:329 - ------

[I 2024-12-09 04:34:47,076] Trial 4 finished with value: -0.0018340000000000001 and parameters: {'batch_size': 512, 'n_steps': 16, 'gamma': 0.99, 'learning_rate': 0.000323707657325567, 'ent_coef': 1.9442858804013083e-06, 'clip_range': 0.3, 'n_epochs': 20, 'gae_lambda': 1.0, 'max_grad_norm': 0.7, 'vf_coef': 0.8920860468424349, 'net_arch': 'medium', 'enable_critic_lstm': True, 'lstm_hidden_size': 16}. Best is trial 1 with value: 0.009654.


Eval num_timesteps=1000000, episode_reward=-0.00 +/- 0.00
Episode length: 21711.50 +/- 1598.50
New best mean reward!
2024-12-09 04:41:31 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-09 04:41:31 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -0.08%
2024-12-09 04:41:31 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: -378.04
2024-12-09 04:41:31 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 14
2024-12-09 04:41:31 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 4
2024-12-09 04:41:31 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 28.57%
2024-12-09 04:41:31 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-09 04:41:31 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 999206.49
2024-12-09 04:41:31 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-09 04:41:31 - ForexEnv2_flat - INFO - forex_env2_fla

[I 2024-12-09 07:02:52,977] Trial 5 finished with value: 0.0 and parameters: {'batch_size': 32, 'n_steps': 512, 'gamma': 0.999, 'learning_rate': 0.026078840602620806, 'ent_coef': 9.12841594890008e-06, 'clip_range': 0.3, 'n_epochs': 10, 'gae_lambda': 0.99, 'max_grad_norm': 1, 'vf_coef': 0.10943009945584248, 'net_arch': 'small', 'enable_critic_lstm': True, 'lstm_hidden_size': 256}. Best is trial 1 with value: 0.009654.


2024-12-09 07:03:16 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-09 07:03:16 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -7.78%
2024-12-09 07:03:16 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: 1351.49
2024-12-09 07:03:16 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 2667
2024-12-09 07:03:16 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 1028
2024-12-09 07:03:16 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 38.55%
2024-12-09 07:03:16 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-09 07:03:16 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 922205.23
2024-12-09 07:03:16 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-09 07:03:16 - ForexEnv2_flat - INFO - forex_env2_flat.py:329 - --------------------------------------------------
2024-12-09 07:04:39 - ForexEnv2_flat - INFO - fore

[I 2024-12-09 07:44:44,478] Trial 6 finished with value: 0.0035925 and parameters: {'batch_size': 32, 'n_steps': 1024, 'gamma': 0.98, 'learning_rate': 0.00030176497295331985, 'ent_coef': 4.390355604009315e-06, 'clip_range': 0.4, 'n_epochs': 1, 'gae_lambda': 0.99, 'max_grad_norm': 5, 'vf_coef': 0.5812146350708727, 'net_arch': 'small', 'enable_critic_lstm': False, 'lstm_hidden_size': 256}. Best is trial 1 with value: 0.009654.


2024-12-09 07:52:24 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-09 07:52:24 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -0.72%
2024-12-09 07:52:24 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: -4649.89
2024-12-09 07:52:24 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 85
2024-12-09 07:52:24 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 48
2024-12-09 07:52:24 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 56.47%
2024-12-09 07:52:24 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-09 07:52:24 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 992812.80
2024-12-09 07:52:24 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-09 07:52:24 - ForexEnv2_flat - INFO - forex_env2_flat.py:329 - --------------------------------------------------
2024-12-09 07:52:29 - ForexEnv2_flat - INFO - forex_e

In [1]:
import os
import sys

import pandas as pd

from datetime import datetime, timedelta
from pathlib import Path


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from trading.environments.forex_env2_flat import ForexTradingEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from data_management.dataset_manager import DatasetManager
from sb3_contrib import RecurrentPPO





train_set = f'./EUR_USD_5min_1D_indic_not_norm_unbiased_full.parquet'
df = pd.read_parquet(train_set)
sequence = 5
pair = 'EUR_USD'

dataset_manager = DatasetManager()
train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)

saving_path = f'/Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/no_lookahead_bias/'
os.makedirs(saving_path, exist_ok=True)

def make_train_env():
    env = ForexTradingEnv(
        df=train_df,
        pair=pair,
        sequence_length=sequence,

    )
    env = Monitor(env)
    env = DummyVecEnv([lambda: env])
    env = VecNormalize(env, norm_obs=True, norm_reward=True)
    return env

def make_eval_env():
    env = ForexTradingEnv(

        df=val_df,
        pair=pair,
        sequence_length=sequence,
        # resample_interval='1h'
    )
    env = Monitor(env)
    env = DummyVecEnv([lambda: env])
    env = VecNormalize(env, norm_obs=True, norm_reward=False)
    env.training = False
    return env

train_env = make_train_env()
eval_env = make_eval_env()
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=saving_path,
    log_path=saving_path,
    eval_freq=50_000,  # Adjust as needed
    n_eval_episodes=5,
    deterministic=True,
    render=False
)

# Define policy kwargs for the LSTM configuration
policy_kwargs = dict(
    # Architecture for the policy network
    net_arch=dict(
        pi=[64, 64],  # Actor network architecture
        vf=[64, 64]   # Critic network architecture
    ),
    # LSTM parameters
    lstm_hidden_size=128,      # Size of LSTM hidden states
    n_lstm_layers=2,           # Number of LSTM layers
    enable_critic_lstm=True,   # Use LSTM for critic as well
    lstm_kwargs=dict(
        dropout=0.1            # Dropout rate for LSTM layers
    )
)

model = RecurrentPPO(
    'MlpLstmPolicy',
    train_env,
    verbose=0,
    tensorboard_log=f'{saving_path}sequence_{sequence}_1day_RecurrentPPO/',
    policy_kwargs=policy_kwargs,
)

model.learn(
    total_timesteps=2_000_000,  # Adjust as needed
    callback=eval_callback
)

model.save(f'{saving_path}{sequence}_best_model.zip')
train_env.save(f'{saving_path}{sequence}_vec_normalize.pkl')

Logging configuration loaded from /Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/config/logging_config.yaml
2024-12-08 13:53:28 - oandapyV20.oandapyV20 - INFO - oandapyV20.py:207 - setting up API-client for environment practice
Dataset split sizes:
Training: 1215220 samples (70.0%)
Validation: 260404 samples (15.0%)
Test: 260405 samples (15.0%)
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -0.00%
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: 0.00
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 0
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 0
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 0.00%
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000

ResourceExhaustedError: /Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/no_lookahead_bias/sequence_5_1day_RecurrentPPO/RecurrentPPO_1/events.out.tfevents.1733662409.Florians-MacBook-Pro.local.43945.0; No space left on device