In [5]:
import os
import sys

import pandas as pd
from datetime import datetime, timedelta
from pathlib import Path
from stable_baselines3 import PPO

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

# Import our custom modules
from trading.agents.training_agent_lstm import FXTradingTrainer
from trading.environments.forex_env2_flat import ForexTradingEnv as FX_ENV_FLAT
from sb3_contrib import RecurrentPPO

In [None]:


# Initialize trainer
trainer = FXTradingTrainer(
    env_class=FX_ENV_FLAT,
    data_path='./EUR_USD_5min_1D_norm_unbiased_full.parquet',
    pair='EUR_USD',
    sequence_length=5,
    model_class=RecurrentPPO,
    use_sequences=True,
    tensorboard_log="logs/fx_trading"
)

# Train the model
model = trainer.train(
    total_timesteps=100000,
    eval_freq=10000,
    save_freq=10000
)

# Evaluate on test set
mean_reward, std_reward = trainer.evaluate_on_test(n_eval_episodes=20)
print(f"Final test reward: {mean_reward:.2f} ± {std_reward:.2f}")

# Save the final model
trainer.save("models/final_model")

Logging configuration loaded from /Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/config/logging_config.yaml
2024-12-08 13:55:17 - oandapyV20.oandapyV20 - INFO - oandapyV20.py:207 - setting up API-client for environment practice
Dataset split sizes:
Training: 1215220 samples (70.0%)
Validation: 260404 samples (15.0%)
Test: 260405 samples (15.0%)




In [None]:
trainer = FXTradingTrainer(
    env_class=FX_ENV_FLAT,
    data_path='./EUR_USD_5min_1D_norm_unbiased_full.parquet',
    pair='EUR_USD',
    sequence_length=5,
    model_class=PPO,
    use_sequences=False,
    tensorboard_log="logs/optuna_ppo_08dec"
)

# Run hyperparameter optimization
best_params = trainer.optimize(
    total_timesteps=500_000,  # Steps per trial
    n_trials=50,             # Number of trials to run
    n_startup_trials=10,     # Number of random trials before TPE
    n_evaluations=2,         # Episodes per evaluation
    eval_freq=50_000          # How often to evaluate
)

# Train final model with best parameters
model = trainer.train(
    total_timesteps=1_000_000,
    eval_freq=10000,
    save_freq=10000,
    hyperparams=best_params
)

2024-12-08 14:07:09 - oandapyV20.oandapyV20 - INFO - oandapyV20.py:207 - setting up API-client for environment practice


[I 2024-12-08 14:07:10,314] A new study created in memory with name: no-name-1b74779b-f263-4145-9388-adb35fb2eda1


Dataset split sizes:
Training: 1215220 samples (70.0%)
Validation: 260404 samples (15.0%)
Test: 260405 samples (15.0%)


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=16 and n_envs=1)


2024-12-08 14:11:35 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-08 14:11:35 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -6.92%
2024-12-08 14:11:35 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: -1171.92
2024-12-08 14:11:35 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 2291
2024-12-08 14:11:35 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 934
2024-12-08 14:11:35 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 40.77%
2024-12-08 14:11:35 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-08 14:11:35 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 930825.20
2024-12-08 14:11:35 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-08 14:11:35 - ForexEnv2_flat - INFO - forex_env2_flat.py:329 - --------------------------------------------------
2024-12-08 14:12:21 - ForexEnv2_flat - INFO - fore

[I 2024-12-08 15:07:09,582] Trial 0 finished with value: 4.8499999999999986e-05 and parameters: {'batch_size': 128, 'n_steps': 16, 'gamma': 0.9, 'learning_rate': 0.00010992652005655445, 'ent_coef': 0.004275089305284775, 'clip_range': 0.1, 'n_epochs': 20, 'gae_lambda': 0.95, 'max_grad_norm': 2, 'vf_coef': 0.2507005564697896, 'net_arch': 'medium'}. Best is trial 0 with value: 4.8499999999999986e-05.


Eval num_timesteps=500000, episode_reward=-0.01 +/- 0.00
Episode length: 211565.50 +/- 11958.50
2024-12-08 15:08:09 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-08 15:08:09 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -4.04%
2024-12-08 15:08:09 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: -1493.26
2024-12-08 15:08:09 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 1311
2024-12-08 15:08:09 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 588
2024-12-08 15:08:09 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 44.85%
2024-12-08 15:08:09 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-08 15:08:09 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 959586.48
2024-12-08 15:08:09 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-08 15:08:09 - ForexEnv2_flat - INFO - forex_env2_flat.py:329 - -----

[I 2024-12-08 15:23:23,024] Trial 1 finished with value: -0.0015400000000000001 and parameters: {'batch_size': 16, 'n_steps': 64, 'gamma': 0.95, 'learning_rate': 0.007441290180423046, 'ent_coef': 1.0265598385611179e-06, 'clip_range': 0.4, 'n_epochs': 1, 'gae_lambda': 0.99, 'max_grad_norm': 0.3, 'vf_coef': 0.33518626556006836, 'net_arch': 'medium'}. Best is trial 1 with value: -0.0015400000000000001.


Eval num_timesteps=500000, episode_reward=-0.02 +/- 0.02
Episode length: 69620.50 +/- 54201.50


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=8 and n_envs=1)


2024-12-08 15:25:42 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-08 15:25:42 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: 0.22%
2024-12-08 15:25:42 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: 3090.48
2024-12-08 15:25:42 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 30
2024-12-08 15:25:42 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 15
2024-12-08 15:25:42 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 50.00%
2024-12-08 15:25:42 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-08 15:25:42 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 1002185.35
2024-12-08 15:25:42 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-08 15:25:42 - ForexEnv2_flat - INFO - forex_env2_flat.py:329 - --------------------------------------------------
2024-12-08 15:26:42 - ForexEnv2_flat - INFO - forex_en

In [None]:
import pandas as pd
df = pd.read_parquet('EUR_USD_5min_1D_norm_unbiased_full.parquet')


Contains inf values: False
Contains NaN values: False


BEST RESULTS SO FAR

In [None]:
import os
import sys

import pandas as pd

from datetime import datetime, timedelta
from pathlib import Path


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from trading.environments.forex_env2_flat import ForexTradingEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from data_management.dataset_manager import DatasetManager
from sb3_contrib import RecurrentPPO



all_tickers = [
                             'GBP_CHF', 'GBP_JPY', 'EUR_CHF', 'EUR_JPY', 'USD_CHF',
 
        
        ]

sequence_length =[ 5,
                  ]

eur_usd = 'EUR_USD'

for sequence in sequence_length:
    train_set = f'./EUR_USD_5min_1D_indic_not_norm_unbiased_full.parquet'
    df = pd.read_parquet(train_set)
    dataset_manager = DatasetManager()
    train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)

    saving_path = f'/Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/no_lookahead_bias/'
    os.makedirs(saving_path, exist_ok=True)

    def make_train_env():
        env = ForexTradingEnv(
            df=train_df,
            pair=eur_usd,
            sequence_length=sequence,

        )
        env = Monitor(env)
        env = DummyVecEnv([lambda: env])
        env = VecNormalize(env, norm_obs=True, norm_reward=True)
        return env

    def make_eval_env():
        env = ForexTradingEnv(

            df=val_df,
            pair=eur_usd,
            sequence_length=sequence,
            # resample_interval='1h'
        )
        env = Monitor(env)
        env = DummyVecEnv([lambda: env])
        env = VecNormalize(env, norm_obs=True, norm_reward=False)
        env.training = False
        return env

    train_env = make_train_env()
    eval_env = make_eval_env()
    eval_callback = EvalCallback(
        eval_env,
        best_model_save_path=saving_path,
        log_path=saving_path,
        eval_freq=50_000,  # Adjust as needed
        n_eval_episodes=5,
        deterministic=True,
        render=False
    )

    model = PPO(
        'MlpPolicy',
        train_env,
        verbose=0,
        tensorboard_log=f'{saving_path}sequence_{sequence}_1day_A2C_indic_full/',
    )

    model.learn(
        total_timesteps=2_000_000,  # Adjust as needed
        callback=eval_callback
    )

    model.save(f'{saving_path}{sequence}_best_model.zip')
    train_env.save(f'{saving_path}{sequence}_vec_normalize.pkl')

In [None]:
import os
import sys

import pandas as pd

from datetime import datetime, timedelta
from pathlib import Path


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from trading.environments.forex_env2_flat import ForexTradingEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from data_management.dataset_manager import DatasetManager



all_tickers = [
                             'GBP_CHF', 'GBP_JPY', 'EUR_CHF', 'EUR_JPY', 'USD_CHF',
 
        
        ]

sequence_length =[ 5,
                  ]

eur_usd = 'EUR_USD'

for sequence in sequence_length:
    train_set = f'./EUR_USD_5min_1D_indic_not_norm_unbiased_full.parquet'
    df = pd.read_parquet(train_set)
    dataset_manager = DatasetManager()
    train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)

    saving_path = f'/Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/no_lookahead_bias/'
    os.makedirs(saving_path, exist_ok=True)

    def make_train_env():
        env = ForexTradingEnv(
            df=train_df,
            pair=eur_usd,
            sequence_length=sequence,

        )
        env = Monitor(env)
        env = DummyVecEnv([lambda: env])
        env = VecNormalize(env, norm_obs=True, norm_reward=True)
        return env

    def make_eval_env():
        env = ForexTradingEnv(

            df=val_df,
            pair=eur_usd,
            sequence_length=sequence,
            # resample_interval='1h'
        )
        env = Monitor(env)
        env = DummyVecEnv([lambda: env])
        env = VecNormalize(env, norm_obs=True, norm_reward=False)
        env.training = False
        return env

    train_env = make_train_env()
    eval_env = make_eval_env()
    eval_callback = EvalCallback(
        eval_env,
        best_model_save_path=saving_path,
        log_path=saving_path,
        eval_freq=50_000,  # Adjust as needed
        n_eval_episodes=5,
        deterministic=True,
        render=False
    )

    model = PPO(
        'MlpPolicy',
        train_env,
        verbose=0,
        tensorboard_log=f'{saving_path}sequence_{sequence}_1day_A2C_indic_full/',
    )

    model.learn(
        total_timesteps=2_000_000,  # Adjust as needed
        callback=eval_callback
    )

    model.save(f'{saving_path}{sequence}_best_model.zip')
    train_env.save(f'{saving_path}{sequence}_vec_normalize.pkl')