In [None]:

import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
import numpy as np
import pandas as pd
from typing import Dict
from dataclasses import dataclass
import sqlite3
from datetime import datetime

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env import SubprocVecEnv

from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from optuna.integration.tensorboard import TensorBoardCallback
from stable_baselines3.common.callbacks import BaseCallback



import os, sys

from datetime import datetime, timedelta
from pathlib import Path

import logging


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from data_management.dataset_manager import DatasetManager
from trading.environments.forex_env2_flat import ForexTradingEnv

from utils.logging_utils import setup_logging, get_logger
setup_logging()
logger = get_logger('optuna_optimize_window_size')

pair = "EUR_USD"
best_eur_df_so_far = '/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/best_dataframes/EUR_USD_5T_indics_1H_norm.parquet'
df = pd.read_parquet(best_eur_df_so_far)


best_model_path = '/Volumes/ssd_fat2/ai6_trading_bot/optuna/window_size'
os.makedirs(best_model_path, exist_ok=True)



dataset_manager = DatasetManager()
train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)



class TrialEvalCallback(BaseCallback):
    """
    Callback used for evaluating and reporting a trial to Optuna.
    """
    def __init__(self, eval_env, trial, n_eval_episodes=5, eval_freq=50_000, deterministic=True, verbose=0):
        super().__init__(verbose)
        self.eval_env = eval_env
        self.trial = trial
        self.n_eval_episodes = n_eval_episodes
        self.eval_freq = eval_freq
        self.deterministic = deterministic

    def _init_callback(self):
        # Initialize variables
        self.is_pruned = False

    def _on_step(self):
        # Check if it's time to evaluate
        if self.n_calls % self.eval_freq == 0:
            # Evaluate the policy
            mean_reward, _ = evaluate_policy(
                self.model,
                self.eval_env,
                n_eval_episodes=self.n_eval_episodes,
                deterministic=self.deterministic,
                return_episode_rewards=False
            )
            
            # Report the intermediate result to the trial
            self.trial.report(mean_reward, self.n_calls)
            
            # Check if the trial should be pruned
            if self.trial.should_prune():
                self.is_pruned = True
                # Raise prune exception
                raise optuna.exceptions.TrialPruned()
                
        return True  # Continue training


def objective(trial):

 
    sequence_length = trial.suggest_int('sequence_length', 1, 50)
    
    # batch_size = trial.suggest_categorical('batch_size', [64, 128, 256])


    def make_env():
        env = ForexTradingEnv(
            df=train_df,
            pair='EUR_USD',
            sequence_length=sequence_length,
      
        )
        env = Monitor(env)
        env = DummyVecEnv([lambda: env])
        env = VecNormalize(env, norm_obs=True, norm_reward=True, epsilon=1e-08)
        return env

    train_env = make_env()

    # Create the validation environment
    def make_eval_env():
        env = ForexTradingEnv(
            df=val_df,
            pair='EUR_USD',
            sequence_length=sequence_length,
   
        )
        env = Monitor(env)
        env = DummyVecEnv([lambda: env])
        env = VecNormalize(env, norm_obs=True, norm_reward=False, epsilon=1e-08)
        env.training = False
        return env

    eval_env = make_eval_env()

    eval_callback = TrialEvalCallback(
        eval_env=eval_env,
        trial=trial,
        n_eval_episodes=5,
        eval_freq=50_000,  # Adjust based on your needs
        deterministic=True,
        verbose=0
    )

    # Initialize the model
    model = PPO(
        'MlpPolicy',
        train_env,
        verbose=0,
        tensorboard_log=f'./logs/optuna/tensorboard_logs/trial_{trial.number}'
    )

    # Train the model
    model.learn(
        total_timesteps=1_000_000,
        callback=eval_callback
    )
    train_env.save(f'{best_model_path}vecnormalize_{trial.number}.pkl')


    mean_reward, _ = evaluate_policy(model, eval_env, n_eval_episodes=5,deterministic=True)

    return mean_reward


def print_status(trial):
    logger.info(f"Trial {trial.number} completed with value: {trial.value}")
    if study.best_trial == trial:
        logger.info(f"New best trial: {trial.number} with value: {trial.value}")


tensorboard_callback = TensorBoardCallback(
    "./optuna_logs/", metric_name="mean_reward")

optuna.logging.get_logger("optuna").addHandler(
    logging.StreamHandler(sys.stdout))

study = optuna.create_study(
    direction='maximize',
    storage="sqlite:///db.sqlite3",
    study_name='sequence_length_3Dec1',
    load_if_exists=True,
    sampler=TPESampler(n_startup_trials=10),
    pruner=MedianPruner(
        n_startup_trials=5,    # Wait for 5 trials before pruning
        n_warmup_steps=2,      # Wait for 2 evaluations within each trial
        interval_steps=1  
    )
)


study.optimize(objective, n_trials=50, show_progress_bar=False, n_jobs=6,
               )

logger.info("Number of finished trials: ", len(study.trials))
logger.info("Best trial:")
trial = study.best_trial

logger.info("  Value: ", trial.value)
logger.info("  Params: ")
for key, value in trial.params.items():
    logger.info(f"    {key}: {value}")



Logging configuration loaded from /Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/config/logging_config.yaml
2024-12-03 18:35:11 - oandapyV20.oandapyV20 - INFO - oandapyV20.py:207 - setting up API-client for environment practice
Dataset split sizes:
Training: 1227053 samples (70.0%)
Validation: 262940 samples (15.0%)
Test: 262940 samples (15.0%)


  tensorboard_callback = TensorBoardCallback(
[I 2024-12-03 18:35:11,659] A new study created in RDB with name: sequence_length_3Dec1


A new study created in RDB with name: sequence_length_3Dec1
A new study created in RDB with name: sequence_length_3Dec1
2024-12-03 18:41:19 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-03 18:41:19 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: -0.00%
2024-12-03 18:41:19 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: 0.00
2024-12-03 18:41:19 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 0
2024-12-03 18:41:19 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 0
2024-12-03 18:41:19 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 0.00%
2024-12-03 18:41:19 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-03 18:41:19 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 999985.16
2024-12-03 18:41:19 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-03 18:41:19 - ForexEnv2_flat - INFO - forex_env2_flat.

In [None]:
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
import numpy as np
import pandas as pd
from typing import Dict
from dataclasses import dataclass
import sqlite3
from datetime import datetime

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env import SubprocVecEnv

from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from optuna.integration.tensorboard import TensorBoardCallback
from stable_baselines3.common.callbacks import BaseCallback



import os, sys

from datetime import datetime, timedelta
from pathlib import Path

import logging


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from data_management.dataset_manager import DatasetManager
from trading.environments.forex_env2_flat import ForexTradingEnv


pair = "EUR_USD"
best_eur_df_so_far = '/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/best_dataframes/EUR_USD_5T_indics_1H_norm.parquet'
df = pd.read_parquet(best_eur_df_so_far)


best_model_path = '/Volumes/ssd_fat2/ai6_trading_bot/optuna/window_size'
os.makedirs(best_model_path, exist_ok=True)



dataset_manager = DatasetManager()
train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)



def objective(trial):

    sequence_length = trial.suggest_int('sequence_length', 1, 50)
    total_timesteps = 1_000_000  # Reduce for faster feedback
    eval_freq = 50_000         # More frequent evaluations

    def make_env():
        env = ForexTradingEnv(
            df=train_df,
            pair='EUR_USD',
            sequence_length=sequence_length,
        )
        env = Monitor(env)
        env = DummyVecEnv([lambda: env])
        env = VecNormalize(env, norm_obs=True, norm_reward=True)
        return env

    train_env = make_env()

    def make_eval_env():
        env = ForexTradingEnv(
            df=val_df,
            pair='EUR_USD',
            sequence_length=sequence_length,
        )
        env = Monitor(env)
        env = DummyVecEnv([lambda: env])
        env = VecNormalize(env, norm_obs=True, norm_reward=False)
        env.training = False
        return env

    eval_env = make_eval_env()

    # Use the built-in EvalCallback
    eval_callback = EvalCallback(
        eval_env,
        best_model_save_path=None,  # Disable saving the best model
        log_path=None,
        eval_freq=eval_freq,
        n_eval_episodes=5,
        deterministic=True,
        render=False,
        verbose=0
    )



    # Combine the callbacks
    callback = [eval_callback]

    model = PPO(
        'MlpPolicy',
        train_env,
        verbose=0,
        tensorboard_log=f'./logs/optuna/tensorboard_logs/trial_{trial.number}',
    )

    # Train the model with the combined callbacks
    model.learn(
        total_timesteps=total_timesteps,
        callback=callback
    )

    mean_reward, _ = evaluate_policy(model, eval_env, n_eval_episodes=5, deterministic=True)

    return mean_reward

study = optuna.create_study(
    direction='maximize',
    storage="sqlite:///db.sqlite3",
    study_name='sequence_length',
    load_if_exists=True,
    
)

study.optimize(
    objective,
    n_trials=50,
    show_progress_bar=False,
    n_jobs=6,  # Adjust based on your system's resources

)

2024-12-03 19:06:07 - oandapyV20.oandapyV20 - INFO - oandapyV20.py:207 - setting up API-client for environment practice


[I 2024-12-03 19:06:07,840] A new study created in RDB with name: sequence_length


Dataset split sizes:
Training: 1227053 samples (70.0%)
Validation: 262940 samples (15.0%)
Test: 262940 samples (15.0%)
2024-12-03 19:14:33 - ForexEnv2_flat - INFO - forex_env2_flat.py:320 - 
Episode Summary:
2024-12-03 19:14:33 - ForexEnv2_flat - INFO - forex_env2_flat.py:321 - Final Return: 11.63%
2024-12-03 19:14:33 - ForexEnv2_flat - INFO - forex_env2_flat.py:322 - Total PnL: 140942.76
2024-12-03 19:14:33 - ForexEnv2_flat - INFO - forex_env2_flat.py:323 - Total Trades: 831
2024-12-03 19:14:33 - ForexEnv2_flat - INFO - forex_env2_flat.py:324 - Winning Trades: 673
2024-12-03 19:14:33 - ForexEnv2_flat - INFO - forex_env2_flat.py:325 - Win Rate: 80.99%
2024-12-03 19:14:33 - ForexEnv2_flat - INFO - forex_env2_flat.py:326 - Initial Balance: 1000000.00
2024-12-03 19:14:33 - ForexEnv2_flat - INFO - forex_env2_flat.py:327 - Final Balance: 1116267.05
2024-12-03 19:14:33 - ForexEnv2_flat - INFO - forex_env2_flat.py:328 - Trade_size: 94510.00
2024-12-03 19:14:33 - ForexEnv2_flat - INFO - forex_

complete code from claude
dieser callback feeded anscheinend

In [None]:
import os
import sys
# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
import optuna
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import BaseCallback
import numpy as np
import logging
import pandas as pd
from trading.environments.forex_env2_flat import ForexTradingEnv
from data_management.dataset_manager import DatasetManager

best_model_path = '/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/best_dataframes/optuna/'
os.makedirs(best_model_path, exist_ok=True)

pair = "EUR_USD"
best_eur_df_so_far = '/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/best_dataframes/EUR_USD_5T_indics_1H_norm.parquet'
df = pd.read_parquet(best_eur_df_so_far)

dataset_manager = DatasetManager()
train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)

class ForexTrialEvalCallback(BaseCallback):
    """Callback for evaluating and reporting trial results to Optuna."""
    
    def __init__(
        self,
        eval_env,
        trial: optuna.Trial,
        eval_freq: int = 50_000,
        deterministic: bool = True,
        verbose: int = 0
    ):
        super().__init__(verbose)
        self.eval_env = eval_env  # We'll set this later
        self.trial = trial
        self.eval_freq = eval_freq
        self.deterministic = deterministic
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        self.is_pruned = False

    def _on_step(self) -> bool:
        """Called after each training step."""
        if self.n_calls % self.eval_freq == 0:
            # Use the training environment's VecNormalize statistics
            self.eval_env = self.training_env
            self.eval_env.training = False
            self.eval_env.norm_reward = False
            
            mean_reward, _ = evaluate_policy(
                self.model,
                self.eval_env,
                n_eval_episodes=5,  # Increased from 1
                deterministic=self.deterministic
            )

            # Report the result to Optuna
            self.trial.report(mean_reward, self.n_calls)

            # Optional: Save best model
            if mean_reward > self.best_mean_reward:
                self.best_mean_reward = mean_reward
                model_save_path = f"{best_model_path}best_model"
                self.model.save(model_save_path)
                # Save VecNormalize statistics
                vec_normalize_path = f"{best_model_path}vec_normalize.pkl"
                self.eval_env.save(vec_normalize_path)
                
            # Check if we should prune the trial
            if self.trial.should_prune():
                self.is_pruned = True
                raise optuna.exceptions.TrialPruned()

            # Reset environment to training mode
            self.eval_env.training = True
            self.eval_env.norm_reward = True

        return True

def make_env(df, is_training: bool = True):
    """Create and wrap the forex trading environment."""
    def _init():
        env = ForexTradingEnv(
            df=df.copy(),
            pair='EUR_USD',

        )
        return Monitor(env)
    
    env = DummyVecEnv([_init])
    env = VecNormalize(
        env,
        norm_obs=True,
        norm_reward=is_training,  # Only normalize rewards during training
        clip_obs=10.0,
        clip_reward=10.0
    )
    
    if not is_training:
        env.training = False
        env.norm_reward = False
        
    return env

def objective(trial: optuna.Trial) -> float:
    """Optuna objective function for optimizing learning rate."""
    
    # Suggest learning rate (narrowed range)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-4, log=True)
    
    # Calculate training parameters
    total_timesteps = 500_000  # Adjust based on your dataset size
    eval_freq = 50_000           # Evaluate every 50k steps
    
    try:
        # Create training environment
        train_env = make_env(train_df, is_training=True)
        
        # Create model
        model = PPO(
            "MlpPolicy",
            train_env,
            learning_rate=learning_rate,
            verbose=0,  # Increased verbosity
            tensorboard_log=f"{best_model_path}tensorboard_logs/trial_{trial.number}"
        )
        
        # Setup evaluation callback
        eval_callback = ForexTrialEvalCallback(
            eval_env=None,  # We'll set this later
            trial=trial,
            eval_freq=eval_freq,
            deterministic=True
        )
        
        # Train the model
        model.learn(
            total_timesteps=total_timesteps,
            callback=eval_callback
        )
        
        # Save the VecNormalize statistics
        vec_normalize_path = f"{best_model_path}vec_normalize.pkl"
        train_env.save(vec_normalize_path)
        
        # Load the saved VecNormalize statistics into a new evaluation environment
        eval_env = make_env(val_df, is_training=False)
        eval_env = VecNormalize.load(vec_normalize_path, eval_env)
        eval_env.training = False
        eval_env.norm_reward = False
        
        # Evaluate the policy
        mean_reward, _ = evaluate_policy(
            model,
            eval_env,
            n_eval_episodes=10,  # Increased from 5
            deterministic=True
        )
        
        # Clean up
        train_env.close()
        eval_env.close()
        
        return mean_reward
        
    except optuna.exceptions.TrialPruned:
        raise
    except Exception as e:
        print(f"Trial failed with error: {str(e)}")
        return float('-inf')

# Setup logging
logging.basicConfig(level=logging.INFO)

# Create the study
study = optuna.create_study(
    study_name="forex_learning_rate_optimization6",
    storage="sqlite:///db.sqlite3",
    direction="maximize",
    load_if_exists=True,
    sampler=optuna.samplers.TPESampler(seed=42),
    pruner=optuna.pruners.MedianPruner(
        n_startup_trials=10,  # Wait for 10 trials before pruning
        n_warmup_steps=5      # Wait for 5 evaluations within each trial
    )
)

# Run optimization
study.optimize(
    objective,
    n_trials=10,
    n_jobs=1,
    show_progress_bar=True
)

# Print results
print("\nBest trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print(f"  Params: ")
print(f"    learning_rate: {trial.params['learning_rate']}")
