In [1]:
import os
import sys

import pandas as pd

from datetime import datetime, timedelta
from pathlib import Path


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from trading.environments.forex_env2_flat import ForexTradingEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from data_management.dataset_manager import DatasetManager
from sb3_contrib import RecurrentPPO





train_set = f'./EUR_USD_5min_1D_indic_not_norm_unbiased_full.parquet'
df = pd.read_parquet(train_set)
sequence = 5
pair = 'EUR_USD'

dataset_manager = DatasetManager()
train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)

saving_path = f'/Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/no_lookahead_bias/'
os.makedirs(saving_path, exist_ok=True)

def make_train_env():
    env = ForexTradingEnv(
        df=train_df,
        pair=pair,
        sequence_length=sequence,

    )
    env = Monitor(env)
    env = DummyVecEnv([lambda: env])
    env = VecNormalize(env, norm_obs=True, norm_reward=True)
    return env

def make_eval_env():
    env = ForexTradingEnv(

        df=val_df,
        pair=pair,
        sequence_length=sequence,
        # resample_interval='1h'
    )
    env = Monitor(env)
    env = DummyVecEnv([lambda: env])
    env = VecNormalize(env, norm_obs=True, norm_reward=False)
    env.training = False
    return env

train_env = make_train_env()
eval_env = make_eval_env()
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=saving_path,
    log_path=saving_path,
    eval_freq=50_000,  # Adjust as needed
    n_eval_episodes=5,
    deterministic=True,
    render=False
)

# Define policy kwargs for the LSTM configuration
policy_kwargs = dict(
    # Architecture for the policy network
    net_arch=dict(
        pi=[64, 64],  # Actor network architecture
        vf=[64, 64]   # Critic network architecture
    ),
    # LSTM parameters
    lstm_hidden_size=128,      # Size of LSTM hidden states
    n_lstm_layers=2,           # Number of LSTM layers
    enable_critic_lstm=True,   # Use LSTM for critic as well
    lstm_kwargs=dict(
        dropout=0.1            # Dropout rate for LSTM layers
    )
)

model = RecurrentPPO(
    'MlpLstmPolicy',
    train_env,
    verbose=0,
    tensorboard_log=f'{saving_path}sequence_{sequence}_1day_RecurrentPPO/',
    policy_kwargs=policy_kwargs,
)

model.learn(
    total_timesteps=2_000_000,  # Adjust as needed
    callback=eval_callback
)

model.save(f'{saving_path}{sequence}_best_model.zip')
train_env.save(f'{saving_path}{sequence}_vec_normalize.pkl')

Logging configuration loaded from /Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/config/logging_config.yaml
2024-12-08 13:53:28 - oandapyV20.oandapyV20 - INFO - oandapyV20.py:207 - setting up API-client for environment practice
Dataset split sizes:
Training: 1215220 samples (70.0%)
Validation: 260404 samples (15.0%)
Test: 260405 samples (15.0%)
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -0.00%
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: 0.00
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 0
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 0
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 0.00%
2024-12-08 13:59:13 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000

ResourceExhaustedError: /Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/no_lookahead_bias/sequence_5_1day_RecurrentPPO/RecurrentPPO_1/events.out.tfevents.1733662409.Florians-MacBook-Pro.local.43945.0; No space left on device