In [None]:

import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
import numpy as np
import pandas as pd
from typing import Dict
from dataclasses import dataclass
import sqlite3
from datetime import datetime

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env import SubprocVecEnv

from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from optuna.integration.tensorboard import TensorBoardCallback
from stable_baselines3.common.callbacks import BaseCallback



import os, sys

from datetime import datetime, timedelta
from pathlib import Path

import logging


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from data_management.dataset_manager import DatasetManager
from trading.environments.forex_env2_flat import ForexTradingEnv

from utils.logging_utils import setup_logging, get_logger
setup_logging()
logger = get_logger('optuna_optimize_window_size')

df_norm_5min = '/Volumes/ssd_fat2/trial_datasets/EUR_USD_5T_indics_no_norm.parquet'


best_model_path = './logs/optuna/'
os.makedirs(best_model_path, exist_ok=True)


df = pd.read_parquet(df_norm_5min)
dataset_manager = DatasetManager()
train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)



class TrialEvalCallback(BaseCallback):
    """
    Callback used for evaluating and reporting a trial to Optuna.
    """
    def __init__(self, eval_env, trial, n_eval_episodes=5, eval_freq=50_000, deterministic=True, verbose=0):
        super().__init__(verbose)
        self.eval_env = eval_env
        self.trial = trial
        self.n_eval_episodes = n_eval_episodes
        self.eval_freq = eval_freq
        self.deterministic = deterministic

    def _init_callback(self):
        # Initialize variables
        self.is_pruned = False

    def _on_step(self):
        # Check if it's time to evaluate
        if self.n_calls % self.eval_freq == 0:
            # Evaluate the policy
            mean_reward, _ = evaluate_policy(
                self.model,
                self.eval_env,
                n_eval_episodes=self.n_eval_episodes,
                deterministic=self.deterministic,
                return_episode_rewards=False
            )
            
            # Report the intermediate result to the trial
            self.trial.report(mean_reward, self.n_calls)
            
            # Check if the trial should be pruned
            if self.trial.should_prune():
                self.is_pruned = True
                # Raise prune exception
                raise optuna.exceptions.TrialPruned()
                
        return True  # Continue training


def objective(trial):
    sequence_length = trial.suggest_int('sequence_length', 1, 50)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
    batch_size = trial.suggest_categorical('batch_size', [64, 128, 256])


    def make_env():
        env = ForexTradingEnv(
            df=train_df,
            pair='EUR_USD',
            sequence_length=sequence_length,
        )
        env = Monitor(env)
        env = DummyVecEnv([lambda: env])
        env = VecNormalize(env, norm_obs=True, norm_reward=True, epsilon=1e-08)
        return env

    train_env = make_env()

    # Create the validation environment
    def make_eval_env():
        env = ForexTradingEnv(
            df=val_df,
            pair='EUR_USD',
            sequence_length=sequence_length,
        )
        env = Monitor(env)
        env = DummyVecEnv([lambda: env])
        env = VecNormalize(env, norm_obs=True, norm_reward=False, epsilon=1e-08)
        env.training = False
        return env

    eval_env = make_eval_env()

    # Evaluation callback
    # eval_callback2 = EvalCallback(
    #     eval_env,
    #     best_model_save_path=f"./logs/optuna/15nov/ppo_trading_model/trial_{trial.number}",
    #     log_path='./logs/',
    #     eval_freq=50_000,
    #     deterministic=True,
    #     render=False,
    #     n_eval_episodes=5
    # )

    eval_callback = TrialEvalCallback(
        eval_env=eval_env,
        trial=trial,
        n_eval_episodes=5,
        eval_freq=50_000,  # Adjust based on your needs
        deterministic=True,
        verbose=0
    )

    # Initialize the model
    model = PPO(
        'MlpPolicy',
        train_env,
        learning_rate=learning_rate,
        batch_size=batch_size,
        verbose=0,
        tensorboard_log=f'./logs/optuna/tensorboard_logs/trial_{trial.number}'
    )

    # Train the model
    model.learn(
        total_timesteps=1_000_000,
        callback=eval_callback
    )
    train_env.save(f'{best_model_path}vecnormalize_{trial.number}.pkl')

    # Evaluate the model
    mean_reward, _ = evaluate_policy(model, eval_env, n_eval_episodes=5)

    return mean_reward


def print_status(trial):
    logger.info(f"Trial {trial.number} completed with value: {trial.value}")
    if study.best_trial == trial:
        logger.info(f"New best trial: {trial.number} with value: {trial.value}")


tensorboard_callback = TensorBoardCallback(
    "./optuna_logs/", metric_name="mean_reward")

optuna.logging.get_logger("optuna").addHandler(
    logging.StreamHandler(sys.stdout))

study = optuna.create_study(
    direction='maximize',
    storage="sqlite:///db.sqlite3",
    study_name='sequence_length',
    load_if_exists=True,
    sampler=TPESampler(),
    pruner=MedianPruner(n_warmup_steps=5)
)


study.optimize(objective, n_trials=50, show_progress_bar=False, n_jobs=4,
               callbacks=[tensorboard_callback])

logger.info("Number of finished trials: ", len(study.trials))
logger.info("Best trial:")
trial = study.best_trial

logger.info("  Value: ", trial.value)
logger.info("  Params: ")
for key, value in trial.params.items():
    logger.info(f"    {key}: {value}")


Logging configuration loaded from /Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/config/logging_config.yaml
Logging configuration loaded from /Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/config/logging_config.yaml
2024-11-29 05:31:14 - oandapyV20.oandapyV20 - INFO - oandapyV20.py:207 - setting up API-client for environment practice
Dataset split sizes:
Training: 5961956 samples (70.0%)
Validation: 1277562 samples (15.0%)
Test: 1277563 samples (15.0%)


  tensorboard_callback = TensorBoardCallback(
[I 2024-11-29 05:31:18,570] A new study created in RDB with name: sequence_length


A new study created in RDB with name: sequence_length
2024-11-29 05:31:18 - optuna_optimize_window_size - INFO - 1290728506.py:193 - Notebook initialized


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)


2024-11-29 05:31:18 - ForexEnv2_flat - INFO - forex_env2_flat.py:261 - Selected features for observation space: ['close', 'sma_20', 'sma_50', 'rsi', 'macd', 'macd_signal', 'macd_hist', 'bb_upper', 'bb_middle', 'bb_lower', 'bb_bandwidth', 'bb_percent', 'atr', 'plus_di', 'minus_di', 'adx', 'senkou_span_a', 'senkou_span_b', 'tenkan_sen', 'kijun_sen']
2024-11-29 05:31:18 - ForexEnv2_flat - INFO - forex_env2_flat.py:261 - Selected features for observation space: ['close', 'sma_20', 'sma_50', 'rsi', 'macd', 'macd_signal', 'macd_hist', 'bb_upper', 'bb_middle', 'bb_lower', 'bb_bandwidth', 'bb_percent', 'atr', 'plus_di', 'minus_di', 'adx', 'senkou_span_a', 'senkou_span_b', 'tenkan_sen', 'kijun_sen']
2024-11-29 05:31:18 - ForexEnv2_flat - INFO - forex_env2_flat.py:261 - Selected features for observation space: ['close', 'sma_20', 'sma_50', 'rsi', 'macd', 'macd_signal', 'macd_hist', 'bb_upper', 'bb_middle', 'bb_lower', 'bb_bandwidth', 'bb_percent', 'atr', 'plus_di', 'minus_di', 'adx', 'senkou_spa

  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)


2024-11-29 05:31:19 - ForexEnv2_flat - INFO - forex_env2_flat.py:261 - Selected features for observation space: ['close', 'sma_20', 'sma_50', 'rsi', 'macd', 'macd_signal', 'macd_hist', 'bb_upper', 'bb_middle', 'bb_lower', 'bb_bandwidth', 'bb_percent', 'atr', 'plus_di', 'minus_di', 'adx', 'senkou_span_a', 'senkou_span_b', 'tenkan_sen', 'kijun_sen']
