In [None]:
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
import numpy as np
import pandas as pd
from typing import Dict
from dataclasses import dataclass
import sqlite3
from datetime import datetime

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.monitor import Monitor

import os, sys

from datetime import datetime, timedelta
from pathlib import Path

import logging

# Configure logging
log_file = "optuna_trials4.log"  # Path to log file
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(message)s",
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()  # For console output
    ]
)



# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from data_management.dataset_manager import DatasetManager
from trading.environments.forex_env2 import ForexTradingEnv

pair = "EUR_USD"
parquet_path = Path("/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h") / f"{pair}.parquet"
norm_robust_path = Path('/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/normalized/eur_norm_robut.parquet')
df = pd.read_parquet(norm_robust_path)

dataset_manager = DatasetManager()
train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)


@dataclass
class RewardParams:
    """Parameters controlling the reward function behavior."""
    realized_pnl_weight: float = 1.1
    unrealized_pnl_weight: float = 0.8
    holding_time_threshold: int = 7*12  # hours
    holding_penalty_factor: float = -0.00001
    max_trades_per_day: int = 6 
    overtrading_penalty_factor: float = -0.0001
    win_rate_threshold: float = 0.4
    win_rate_bonus_factor: float = 0.0005
    drawdown_penalty_factor: float = -0.0001

@dataclass
class OptimizationResult:
    """Stores results of a single trial."""
    trial_number: int
    params: Dict
    final_balance: float
    total_trades: int
    win_rate: float
    max_drawdown: float
    training_time: float

class RewardOptimizer:
    def __init__(
        self,
        train_df: pd.DataFrame,
        val_df: pd.DataFrame,
        study_name: str = "forex_reward_optimization_optimized_only_2_params_robust_norm",
        n_timesteps: int = 500_000
    ):
        self.train_df = train_df
        self.val_df = val_df
        self.study_name = study_name
        self.n_timesteps = n_timesteps
        
        # Setup study with TPE sampler and Median pruner
        self.study = optuna.create_study(
            study_name=study_name,
            storage="sqlite:///optuna_trials.db",
            load_if_exists=True,
            sampler=TPESampler(seed=42),
            pruner=MedianPruner(
                n_startup_trials=5,
                n_warmup_steps=100_000,
                interval_steps=50_000
            ),
            direction="maximize"
        )

    def _create_env(self, df: pd.DataFrame, params: Dict, is_eval: bool = False, n_envs: int = 3) -> VecNormalize:
        """Create vectorized and normalized environment with multiple subprocesses."""
        def make_env():
            """Returns an environment creation function for use with vectorized environments."""
            def _init():
                try:
                    env = ForexTradingEnv(
                        df=df.copy(),
                        pair='EUR_USD',
                        initial_balance=1_000_000,
                        trade_size=100_000,
                        reward_params=RewardParams(**params),
                        sequence_length=10,
                        random_start=False
                    )
                    env = Monitor(env)
                    return env
                except Exception as e:
                    print(f"Error creating environment: {str(e)}")
                    raise
            return _init

        try:
            # Create environment builders with appropriate seeds
            if is_eval:
                envs = [make_env()]  # Single env for evaluation
                vec_env = DummyVecEnv(envs)
            else:
                envs = [make_env() for _ in range(n_envs)]
                vec_env = SubprocVecEnv(envs)

            # Apply normalization
            env = VecNormalize(
                vec_env,
                norm_obs=True,
                norm_reward=not is_eval,
                clip_obs=10.0,
                clip_reward=10.0,
                gamma=1.0,
                epsilon=1e-08
            )

            # Set training mode appropriately
            if is_eval:
                env.training = False
                env.norm_reward = False

            return env

        except Exception as e:
            print(f"Error in environment creation: {str(e)}")
            raise

    def objective(self, trial: optuna.Trial) -> float:
        """Optimization objective function."""
        def get_env_attribute(env, attr_name):
            """
            Fetch an attribute from the environment, supporting SubprocVecEnv and single environments.
            Args:
                env: The environment (SubprocVecEnv, DummyVecEnv, or wrapped env).
                attr_name (str): The name of the attribute to fetch.
            Returns:
                The attribute value, or None if not found.
            """
            if hasattr(env, "envs"):  # SubprocVecEnv or DummyVecEnv
                # Get the attribute from the first environment in the vectorized stack
                try:
                    print(f"Fetching attribute {attr_name} : {env.get_attr(attr_name, indices=0)} from the first environment in the vectorized stack.")
                    return env.get_attr(attr_name, indices=0)
                except AttributeError:
                    print(f"Attribute {attr_name} : {env.get_attr(attr_name, indices=0)} not found in the first environment.")
                    # Use the latest Gymnasium recommendation: get_wrapper_attr
                    return env.get_wrapper_attr(attr_name)
            else:  # Single unwrapped environment
                print(f"Fetching attribute {attr_name} : {getattr(env.unwrapped, attr_name, None)} from the unwrapped environment.")
                return getattr(env.unwrapped, attr_name, None)
        try:
            # Sample parameters
            params = self._sample_parameters(trial)
            

            # Create environments
            train_env = self._create_env(self.train_df, params, is_eval=False, n_envs=3)
            eval_env = self._create_env(self.val_df, params, is_eval=True, n_envs=1)
            
            start_time = datetime.now()
            
            # Create model
            model = PPO(
                "MultiInputPolicy",
                train_env,
                verbose=0,
                tensorboard_log=f"./tensorboard4/short_trial_{trial.number}"
            )

            # Setup evaluation callback
            eval_callback = EvalCallback(
                eval_env,
                best_model_save_path=f"./models/short_trial_{trial.number}",
                log_path=f"./logs/trial_{trial.number}",
                eval_freq=100_000,
                deterministic=True,
                render=False
            )
            
            # Train model
            model.learn(
                total_timesteps=self.n_timesteps,
                callback=eval_callback
            )
      
            try:
                save_path = f'./optuna2/best_model_trial_{trial.number}/'
                os.makedirs(save_path, exist_ok=True)
                train_env.save(os.path.join(save_path, 'vecnormalize.pkl'))
                logging.info(f"Saved VecNormalize to {os.path.join(save_path, 'vecnormalize.pkl')}")
            except Exception as e:
                logging.error(f"Failed to save VecNormalize: {e}")
                raise
            
            # Get final balance from eval environment
            #! Below use oudated code, but worked before
      
            try:
                final_balance = eval_env.get_attr('balance')[0]
                total_trades = eval_env.get_attr('total_trades')[0]
                win_rate = eval_env.get_attr('winning_trades')[0] / max(1, total_trades)
                # final_balance = float(eval_env.env_method('get_attr', 'balance')[0][0])
                # total_trades = int(eval_env.env_method('get_attr', 'total_trades')[0][0])
                # winning_trades = int(eval_env.env_method('get_attr', 'winning_trades')[0][0])
                # win_rate = winning_trades / max(1, total_trades)
                
                # Log results
                training_time = (datetime.now() - start_time).total_seconds()
                print(f"\nTrial {trial.number} completed:")
                print(f"Final Balance: ${final_balance:,.2f}")
                print(f"Total Trades: {total_trades}")
                print(f"Win Rate: {win_rate:.2%}")
                print(f"Training Time: {training_time:.1f}s")
                print("-" * 80)
                
                return final_balance
                
            except Exception as e:
                print(f"Error getting evaluation metrics: {str(e)}")
                return float('-inf')
                
            

        except Exception as e:
            print(f"Trial {trial.number} failed: {str(e)}")
            return float('-inf')
        finally:
            if 'train_env' in locals():
                train_env.close()
            if 'eval_env' in locals():
                eval_env.close()

    def _sample_parameters(self, trial: optuna.Trial) -> Dict:
        """Sample reward parameters for trial."""
        return {
            'realized_pnl_weight': trial.suggest_float('realized_pnl_weight', 1.0, 1.5),
            'unrealized_pnl_weight': trial.suggest_float('unrealized_pnl_weight', 0.5, 1.0),
            # 'holding_time_threshold': trial.suggest_int('holding_time_threshold', 24, 96),
            # 'holding_penalty_factor': trial.suggest_float('holding_penalty_factor', -0.0001, 0.0),
            # 'max_trades_per_day': trial.suggest_int('max_trades_per_day', 3, 12),
            # 'overtrading_penalty_factor': trial.suggest_float('overtrading_penalty_factor', -0.0001, 0.0),
            # 'win_rate_threshold': trial.suggest_float('win_rate_threshold', 0.3, 0.5),
            # 'win_rate_bonus_factor': trial.suggest_float('win_rate_bonus_factor', 0.0001, 0.001, log=True),
            # 'drawdown_penalty_factor': trial.suggest_float('drawdown_penalty_factor', -0.001, 0.0)
        }

    def optimize(self, n_trials: int = 100, n_jobs: int = 6) -> None:
        """Run optimization using Optuna's built-in parallelization."""
        self.study.optimize(
            self.objective,
            n_trials=n_trials,
            n_jobs=n_jobs,  # Number of parallel jobs
            show_progress_bar=False
        )
            
        # Print best trial after completion
        print("\nOptimization completed!")
        print("\nBest trial:")
        trial = self.study.best_trial
        print(f"Value: ${trial.value:,.2f}")
        print("Best parameters:")
        for key, value in trial.params.items():
            print(f"    {key}: {value}")


optimizer = RewardOptimizer(
    train_df=train_df,
    val_df=val_df,
    n_timesteps=1_000_000
)

optimizer.optimize(n_trials=10, n_jobs=2)