In [None]:
import optuna
from optuna.samplers import TPESampler
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from security_env import SecurityEnv

def objective(trial):
    """
    Objective function for Optuna optimization
    """
    # Define hyperparameter ranges
    params = {
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),
        'n_steps': trial.suggest_categorical('n_steps', [2048, 4096, 8192]),
        'batch_size': trial.suggest_categorical('batch_size', [32, 64, 128]),
        'n_epochs': trial.suggest_categorical('n_epochs', [5, 10, 20]),
        'gamma': trial.suggest_categorical('gamma', [0.95, 0.99, 0.995]),
        'gae_lambda': trial.suggest_categorical('gae_lambda', [0.9, 0.95, 0.98]),
        'clip_range': trial.suggest_categorical('clip_range', [0.1, 0.2, 0.3]),
        'ent_coef': trial.suggest_loguniform('ent_coef', 1e-5, 1e-2),
        'alpha': trial.suggest_categorical('alpha', [0.3, 0.5, 0.7]),
        'beta': trial.suggest_categorical('beta', [0.3, 0.5, 0.7]),
        's_min': trial.suggest_categorical('s_min', [5.0, 8.0, 10.0])
    }
    
    # Create environment
    env = SecurityEnv(
        rf_model_path="fatigue_model.joblib",
        alpha=params['alpha'],
        beta=params['beta'],
        s_min=params['s_min']
    )
    env = DummyVecEnv([lambda: env])
    
    # Create and train model
    model = PPO(
        "MlpPolicy",
        env,
        learning_rate=params['learning_rate'],
        n_steps=params['n_steps'],
        batch_size=params['batch_size'],
        n_epochs=params['n_epochs'],
        gamma=params['gamma'],
        gae_lambda=params['gae_lambda'],
        clip_range=params['clip_range'],
        ent_coef=params['ent_coef'],
        verbose=0
    )
    
    # Train model
    model.learn(total_timesteps=10000)
    
    # Evaluate model
    eval_env = SecurityEnv(
        rf_model_path="fatigue_model.joblib",
        alpha=params['alpha'],
        beta=params['beta'],
        s_min=params['s_min']
    )
    eval_env = DummyVecEnv([lambda: eval_env])
    
    mean_reward, _ = evaluate_model(model, eval_env)
    
    return mean_reward

def run_bayesian_optimization(n_trials=50):
    """
    Run Bayesian Optimization using Optuna
    """
    # Create study
    study = optuna.create_study(
        direction='maximize',
        sampler=TPESampler(seed=42)
    )
    
    # Run optimization
    study.optimize(objective, n_trials=n_trials)
    
    # Print best results
    print("Best trial:")
    trial = study.best_trial
    print("  Value: ", trial.value)
    print("  Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")
    
    # Save results
    results_df = study.trials_dataframe()
    results_df.to_csv('bayesian_optimization_results.csv', index=False)
    
    return study

In [None]:
study = run_bayesian_optimization(n_trials=50)