In [1]:
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from security_env import SecurityEnv
import pandas as pd

def random_search(n_trials=50, n_steps=10000):
    """
    Perform random search over hyperparameters
    """
    # Define hyperparameter ranges
    param_ranges = {
        'learning_rate': np.logspace(-5, -3, 100),
        'n_steps': [2048, 4096, 8192],
        'batch_size': [32, 64, 128],
        'n_epochs': [5, 10, 20],
        'gamma': [0.95, 0.99, 0.995],
        'gae_lambda': [0.9, 0.95, 0.98],
        'clip_range': [0.1, 0.2, 0.3],
        'ent_coef': [0.0, 0.01, 0.005],
        'alpha': [0.3, 0.5, 0.7],
        'beta': [0.3, 0.5, 0.7],
        's_min': [5.0, 8.0, 10.0]
    }
    
    results = []
    
    for trial in range(n_trials):
        # Randomly sample hyperparameters
        params = {k: np.random.choice(v) for k, v in param_ranges.items()}
        
        # Create environment
        env = SecurityEnv(
            rf_model_path="fatigue_model.joblib",
            alpha=params['alpha'],
            beta=params['beta'],
            s_min=params['s_min']
       h )
        env = DummyVecEnv([lambda: env])
        
        # Create and train model
        model = PPO(
            "MlpPolicy",
            env,
            learning_rate=params['learning_rate'],
            n_steps=params['n_steps'],
            batch_size=params['batch_size'],
            n_epochs=params['n_epochs'],
            gamma=params['gamma'],
            gae_lambda=params['gae_lambda'],
            clip_range=params['clip_range'],
            ent_coef=params['ent_coef'],
            verbose=0
        )
        
        # Train model
        model.learn(total_timesteps=n_steps)
        
        # Evaluate model
        eval_env = SecurityEnv(
            rf_model_path="fatigue_model.joblib",
            alpha=params['alpha'],
            beta=params['beta'],
            s_min=params['s_min']
        )
        eval_env = DummyVecEnv([lambda: eval_env])
        
        mean_reward, std_reward = evaluate_model(model, eval_env)
        
        # Store results
        results.append({
            'trial': trial,
            'params': params,
            'mean_reward': mean_reward,
            'std_reward': std_reward
        })
        
        print(f"Trial {trial + 1}/{n_trials}")
        print(f"Mean Reward: {mean_reward:.2f} ± {std_reward:.2f}")
        print("-" * 50)
    
    # Convert results to DataFrame and save
    results_df = pd.DataFrame(results)
    results_df.to_csv('random_search_results.csv', index=False)
    return results_df

def evaluate_model(model, env, n_episodes=5):
    """Evaluate a trained model"""
    rewards = []
    for _ in range(n_episodes):
        obs = env.reset()
        done = False
        episode_reward = 0
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)
            episode_reward += reward
        rewards.append(episode_reward)
    return np.mean(rewards), np.std(rewards)

In [None]:
results = random_search(n_trials=50, n_steps=10000)