In [None]:
import pandas as pd
source = '/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/unbiased/not_norm/train2/EUR_GBP_validate.parquet'
df = pd.read_parquet(source)
df

In [None]:
import os
import sys

import pandas as pd

from datetime import datetime, timedelta
from pathlib import Path


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from stable_baselines3 import PPO, A2C, SAC
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from trading.environments.forex_env2_flat_simple import ForexTradingEnv
# from trading.environments.forex_env_flat_multi_pair import MultipairForexTradingEnv

from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from data_management.dataset_manager import DatasetManager
from sb3_contrib import RecurrentPPO

hourly_dir = "/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/unbiased/not_norm/train/"
source_path = '/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/df_with_all_indics_unbiased/not_norm/train2/'
source_dfs = [os.path.join(hourly_dir, f) for f in os.listdir(hourly_dir) if f.endswith('.parquet') and not f.startswith('.') and 'validate' not in f]

eval_path = '/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/df_with_all_indics_unbiased/not_norm/train2/EUR_GBP_validate.parquet'
sequence = 5
saving_path = f'/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/df_with_all_indics_unbiased/not_norm/train/results/'
os.makedirs(saving_path, exist_ok=True)



def make_train_env():
    env = ForexTradingEnv(
        df_paths=source_dfs,
        eval_mode=False,
        sequence_length=sequence,


    )
    env = Monitor(env)
    env = DummyVecEnv([lambda: env])
    env = VecNormalize(env, norm_obs=True, norm_reward=True)
    return env

def make_eval_env():
    env = ForexTradingEnv(
        df_paths=source_dfs,
        eval_path=eval_path,
        eval_mode=True,
        pair='EUR_GBP',
        sequence_length=sequence,


    )
    env = Monitor(env)
    env = DummyVecEnv([lambda: env])
    env = VecNormalize(env, norm_obs=True, norm_reward=False)
    env.training = False
    return env

train_env = make_train_env()
eval_env = make_eval_env()

eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=saving_path,
    log_path=saving_path,
    eval_freq=500_000,  # Adjust as needed
    n_eval_episodes=5,
    deterministic=True,
    render=False
)

model = PPO(
    'MlpPolicy',
    train_env,
    verbose=0,
    tensorboard_log=f'{saving_path}sequence_{sequence}_cont_space_PPO_20m_core_feat_v2_500k/',
)
# Define policy kwargs for the LSTM configuration
policy_kwargs = dict(
    # Network Architecture
    net_arch=dict(
        # Actor (policy) network
        pi=[256, 128],  # Larger first layer to process high-dimensional input
        # Critic (value) network
        vf=[256, 128]   # Match actor architecture for balanced learning
    ),
    
    # LSTM Configuration
    lstm_hidden_size=256,      # Larger hidden size to capture complex patterns
    n_lstm_layers=2,           # Multiple layers for hierarchical feature learning
    enable_critic_lstm=True,   # Share temporal understanding between actor and critic
    
    # LSTM specific parameters
    lstm_kwargs=dict(
        dropout=0.2            # Slightly higher dropout for regularization
    )
)

policy_kwargs_complex = dict(
    net_arch=dict(
        pi=[512, 256, 128],
        vf=[512, 256, 128]
    ),
    lstm_hidden_size=512,
    n_lstm_layers=3,
    enable_critic_lstm=True,
    lstm_kwargs=dict(
        dropout=0.25
    )
)

# model = RecurrentPPO(
#     'MlpLstmPolicy',
#     train_env,
#     verbose=0,
#     tensorboard_log=f'{saving_path}sequence_{sequence}_RecurrentPPO/',
#     policy_kwargs=policy_kwargs,
# )
model.learn(
    total_timesteps=20_000_000,  # Adjust as needed
    callback=eval_callback
)

model.save(f'{saving_path}{sequence}_best_model_core.zip')
train_env.save(f'{saving_path}{sequence}_vec_normalize_core.pkl')


In [None]:
source_path = '/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/df_with_all_indics_unbiased/not_norm/train/'
source_dfs = [os.path.join(source_path, f) for f in os.listdir(source_path) if f.endswith('.parquet') and not f.startswith('.')]
source_dfs

In [None]:
test = pd.read_parquet('/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/df_with_all_indics_unbiased/not_norm/train/NZD_JPY_train.parquet')
test.columns