In [6]:
import numpy as np
import optuna
from optuna.samplers import TPESampler
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from security_env import SecurityEnv

def objective(trial):
    """
    Objective function for Optuna optimization
    """
    # Define hyperparameter ranges
    params = {
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),
        'n_steps': trial.suggest_categorical('n_steps', [2048, 4096, 8192]),
        'batch_size': trial.suggest_categorical('batch_size', [32, 64, 128]),
        'n_epochs': trial.suggest_categorical('n_epochs', [5, 10, 20]),
        'gamma': trial.suggest_categorical('gamma', [0.95, 0.99, 0.995]),
        'gae_lambda': trial.suggest_categorical('gae_lambda', [0.9, 0.95, 0.98]),
        'clip_range': trial.suggest_categorical('clip_range', [0.1, 0.2, 0.3]),
        'ent_coef': trial.suggest_loguniform('ent_coef', 1e-5, 1e-2),
        'alpha': trial.suggest_categorical('alpha', [0.3, 0.5, 0.7]),
        'beta': trial.suggest_categorical('beta', [0.3, 0.5, 0.7]),
        's_min': trial.suggest_categorical('s_min', [5.0, 8.0, 10.0])
    }
    
    # Create environment
    env = SecurityEnv(
        rf_model_path="fatigue_model.joblib",
        alpha=params['alpha'],
        beta=params['beta'],
        s_min=params['s_min']
    )
    env = DummyVecEnv([lambda: env])
    
    # Create and train model
    model = PPO(
        "MlpPolicy",
        env,
        learning_rate=params['learning_rate'],
        n_steps=params['n_steps'],
        batch_size=params['batch_size'],
        n_epochs=params['n_epochs'],
        gamma=params['gamma'],
        gae_lambda=params['gae_lambda'],
        clip_range=params['clip_range'],
        ent_coef=params['ent_coef'],
        verbose=0
    )
    
    # Train model
    model.learn(total_timesteps=10000)
    
    # Evaluate model
    eval_env = SecurityEnv(
        rf_model_path="fatigue_model.joblib",
        alpha=params['alpha'],
        beta=params['beta'],
        s_min=params['s_min']
    )
    eval_env = DummyVecEnv([lambda: eval_env])
    
    mean_reward, _ = evaluate_model(model, eval_env)
    
    return mean_reward

def run_bayesian_optimization(n_trials=50):
    """
    Run Bayesian Optimization using Optuna
    """
    # Create study
    study = optuna.create_study(
        direction='maximize',
        sampler=TPESampler(seed=42)
    )
    
    # Run optimization
    study.optimize(objective, n_trials=n_trials)
    
    # Print best results
    print("Best trial:")
    trial = study.best_trial
    print("  Value: ", trial.value)
    print("  Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")
    
    # Save results
    results_df = study.trials_dataframe()
    results_df.to_csv('bayesian_optimization_results.csv', index=False)
    
    return study

def evaluate_model(model, env, n_episodes=5):
    """Evaluate a trained model"""
    rewards = []
    for _ in range(n_episodes):
        obs = env.reset()
        done = False
        episode_reward = 0
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)
            episode_reward += reward
        rewards.append(episode_reward)
    return np.mean(rewards), np.std(rewards)

In [7]:
study = run_bayesian_optimization(n_trials=50)

[I 2025-03-19 02:22:59,001] A new study created in memory with name: no-name-9aa40d30-0c50-46d5-a3fa-97c218b1e983
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),
  'ent_coef': trial.suggest_loguniform('ent_coef', 1e-5, 1e-2),
[I 2025-03-19 02:28:13,099] Trial 0 finished with value: -40.458885192871094 and parameters: {'learning_rate': 5.6115164153345e-05, 'n_steps': 2048, 'batch_size': 32, 'n_epochs': 5, 'gamma': 0.99, 'gae_lambda': 0.9, 'clip_range': 0.2, 'ent_coef': 7.476312062252303e-05, 'alpha': 0.3, 'beta': 0.7, 's_min': 10.0}. Best is trial 0 with value: -40.458885192871094.
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),
  'ent_coef': trial.suggest_loguniform('ent_coef', 1e-5, 1e-2),
[I 2025-03-19 02:34:33,525] Trial 1 finished with value: 588.4727783203125 and parameters: {'learning_rate': 1.2385137298860926e-05, 'n_steps': 2048, 'batch_size': 64, 'n_epochs': 20, 'gamma': 0.995, 'gae_lambda': 0.95, 'clip_range': 0.1, 'ent_coef

Best trial:
  Value:  859.52294921875
  Params: 
    learning_rate: 1.0108877212854842e-05
    n_steps: 8192
    batch_size: 64
    n_epochs: 20
    gamma: 0.95
    gae_lambda: 0.98
    clip_range: 0.2
    ent_coef: 0.004448235953062759
    alpha: 0.7
    beta: 0.3
    s_min: 5.0
