In [19]:
import numpy as np
import gymnasium as gym

from stable_baselines3 import PPO

from security_env import SecurityEnv

def suggest_config_for_user(user_config: np.ndarray,
                            model_path: str = r'C:\Users\Tuan Anh HSLU\OneDrive - Hochschule Luzern\Desktop\HSLU22\Bachelor Thesis\ML Models\models\security_model_5000_steps.zip',
                            n_steps: int = 100):
    """
    1. Loads the pre-trained RL model (PPO).
    2. Resets the SecurityEnv to the user's config. 
    3. Steps through the environment using the model's policy.
    4. Returns the final proposed configuration plus any info.
    """
    # Create the environment
    env = SecurityEnv(
        rf_model_path="fatigue_model.joblib",
        alpha=0.7,  # or your chosen alpha
        beta=0.3,   # or your chosen beta
        s_min=5.0
    )

    # Load your trained PPO agent
    model = PPO.load(model_path, env=env)
    # Reset environment to the user config
    obs, info = env.reset_with_user_config(user_config)
    

    final_obs = None
    for step in range(n_steps):
        action, _states = model.predict(obs, deterministic=False)
        obs, reward, done, truncated, info = env.step(action)
        final_obs = obs  # keep the latest state
        if done or truncated:
            break

    if final_obs is None:
        raise RuntimeError("Model did not perform any step; final state is undefined.")
    
    # The 'obs' now contains the final state after n_steps, i.e. the final config + [afs, security]
    final_config = obs[:-2]  # everything except the last two (fatigue, security)
    predicted_fatigue = obs[-2]
    security_score = obs[-1]
    
    # Convert final_config to integers for compact representation
    final_config_int = np.round(final_config).astype(np.int64).tolist()
    
    # Create a compact response with just the essential information
    suggestion = {
        "config": final_config_int,  # The configuration as integers
        "fatigue": float(predicted_fatigue),
        "security": float(security_score),
        "steps": step + 1
    }
    
    # Optional: For debugging, you can still generate the detailed feature values
    if False:  # Set to True for debugging
        # Convert final_config to integers before passing to _get_feature_values
        final_config_int_arr = final_config.astype(np.int64)
        # Convert final_config from indexes back to meaningful feature values
        feature_values = env._get_feature_values(final_config_int_arr)
        
        print("\n🔎 Mapping Check: Index → Value")
        for i, feature in enumerate(env.feature_names):
            idx = int(round(final_config[i]))
            expected = env.feature_ranges[feature][idx]
            actual = feature_values[feature]
            print(f"{feature}: Index = {idx}, Mapped = {expected}, FeatureValue = {actual}, Match = {expected == actual}")
        
        suggestion["feature_values"] = feature_values
    
    return suggestion

path = r"C:\Users\Tuan Anh HSLU\Downloads\models-20250317T125637Z-001\models\best_model\best_model.zip"

def suggest_config_for_user1(user_config: np.ndarray,
                            model_path: str = path,
                            n_steps: int = 100):
    """Try different starting points by generating an ensemble of configurations"""
    env = SecurityEnv(
        rf_model_path="fatigue_model.joblib",
        alpha=0.7,
        beta=0.3,
        s_min=5.0
    )
    model = PPO.load(model_path, env=env)
    
    # Generate 10 different starting configurations
    # User config + 9 more with various levels of difference
    configs_to_try = [user_config.copy()]
    
    # Generate variations of the user config
    for i in range(9):
        # Higher i means more deviation from original
        variation_rate = 0.1 * (i + 1)  
        new_config = user_config.copy()
        
        # For each feature, maybe change it
        for j in range(len(new_config)):
            if np.random.random() < variation_rate:
                feature_name = env.feature_names[j]
                if env.feature_types[feature_name] == 'binary':
                    # Flip binary values
                    new_config[j] = 1 - new_config[j]
                else:
                    # For categorical, pick a different value
                    max_val = len(env.feature_ranges[feature_name]) - 1
                    current = new_config[j]
                    # Make sure we pick a different value
                    options = [x for x in range(max_val+1) if x != current]
                    if options:
                        new_config[j] = np.random.choice(options)
        
        configs_to_try.append(new_config)
    
    # Try each config and keep the best result
    best_result = None
    best_score = -float('inf')
    
    for start_config in configs_to_try:
        # Convert to int for the environment
        start_config_int = start_config.astype(np.int64)
        obs, _ = env.reset_with_user_config(start_config_int)
        
        for step in range(n_steps):
            action, _ = model.predict(obs, deterministic=False)
            obs, reward, done, truncated, info = env.step(action)
            if done or truncated:
                break
        
        # Calculate score
        security = obs[-1]
        fatigue = obs[-2]
        score = security - 0.5 * fatigue
        
        if best_result is None or score > best_score:
            best_score = score
            best_result = {
                "optimized_config": np.round(obs[:-2]).astype(np.int64).tolist(),
                "fatigue": float(fatigue),
                "security": float(security),
                "steps": step + 1
            }
    
    return best_result

def suggest_config_with_constraints(user_config: np.ndarray,
                                  model_path: str = r'C:\Users\Tuan Anh HSLU\OneDrive - Hochschule Luzern\Desktop\HSLU22\Bachelor Thesis\ML Models\models\best_model\best_model.zip',
                                  max_allowed_fatigue: float = None,
                                  min_required_security: float = None):
    """
    Generates a configuration that respects user constraints on maximum fatigue
    or minimum security requirements.
    """
    env = SecurityEnv(
        rf_model_path="fatigue_model.joblib",
        alpha=0.7, 
        beta=0.3,
        s_min=5.0
    )
    
    # Set constraints based on the initial user config if not provided
    if max_allowed_fatigue is None or min_required_security is None:
        obs, info = env.reset_with_user_config(user_config)
        initial_fatigue = info['fatigue_score']
        initial_security = info['security_score']
        
        if max_allowed_fatigue is None:
            # Default: Allow up to 20% more fatigue than initial
            max_allowed_fatigue = initial_fatigue * 1.2
        
        if min_required_security is None:
            # Default: Require at least 20% more security than initial
            min_required_security = initial_security * 1.2
    
    # Generate candidate configurations
    candidates = []
    
    # Start with the user config
    obs, info = env.reset_with_user_config(user_config)
    candidates.append({
        "config": user_config.tolist(),
        "fatigue": info['fatigue_score'],
        "security": info['security_score'],
        "valid": info['security_score'] >= min_required_security and 
                info['fatigue_score'] <= max_allowed_fatigue
    })
    
    # Try the optimization approach
    model = PPO.load(model_path, env=env)
    obs, _ = env.reset_with_user_config(user_config)
    
    # Try multiple runs with different random seeds
    for seed in range(5):
        env.reset(seed=seed)
        obs, _ = env.reset_with_user_config(user_config)
        
        for _ in range(50):  # Run for fewer steps to get more diverse solutions
            action, _ = model.predict(obs, deterministic=False)
            obs, _, _, _, info = env.step(action)
        
        config = obs[:-2].astype(np.int64)
        fatigue = obs[-2] 
        security = obs[-1]
        
        candidates.append({
            "config": config.tolist(),
            "fatigue": float(fatigue),
            "security": float(security),
            "valid": security >= min_required_security and fatigue <= max_allowed_fatigue
        })
    
    # Try random mutations of the user config
    for _ in range(10):
        mutated = user_config.copy()
        # Randomly change 20-40% of features
        n_to_change = np.random.randint(int(0.2 * len(user_config)), int(0.4 * len(user_config)) + 1)
        indices = np.random.choice(len(user_config), n_to_change, replace=False)
        
        for i in indices:
            feature_name = env.feature_names[i]
            if env.feature_types[feature_name] == 'binary':
                mutated[i] = 1 - mutated[i]  # Flip
            else:
                max_val = len(env.feature_ranges[feature_name]) - 1
                options = [x for x in range(max_val+1) if x != mutated[i]]
                if options:
                    mutated[i] = np.random.choice(options)
        
        obs, info = env.reset_with_user_config(mutated)
        
        candidates.append({
            "config": mutated.tolist(),
            "fatigue": info['fatigue_score'],
            "security": info['security_score'],
            "valid": info['security_score'] >= min_required_security and 
                    info['fatigue_score'] <= max_allowed_fatigue
        })
    
    # Filter valid candidates
    valid_candidates = [c for c in candidates if c["valid"]]
    
    if not valid_candidates:
        # Return best effort: highest security/fatigue ratio
        best_ratio = -float('inf')
        best_candidate = None
        
        for c in candidates:
            if c["fatigue"] > 0:
                ratio = c["security"] / c["fatigue"]
                if ratio > best_ratio:
                    best_ratio = ratio
                    best_candidate = c
        
        return {
            "optimized_config": best_candidate["config"],
            "fatigue": best_candidate["fatigue"],
            "security": best_candidate["security"],
            "meets_constraints": False
        }
    
    # Sort valid candidates by security (highest first)
    valid_candidates.sort(key=lambda x: x["security"], reverse=True)
    
    best = valid_candidates[0]
    return {
        "optimized_config": best["config"],
        "fatigue": best["fatigue"], 
        "security": best["security"],
        "meets_constraints": True
    }

if __name__ == "__main__":
    # Example user config: each number is the "index" for that feature 
    # in the environment's defined range
    example_user_config = np.array([
        1,  # Level of familiarity
        1,  # Frequency of Password Changes
        1,  # Difficulty Level
        1,  # Effort Required
        1,  # Perceived Importance
        1,  # Frequency of MFA prompts
        1,  # Difficulty Level MFA
        1,  # Effort Required MFA
        1,  # Perceived Importance of MFA
        1,  # Frequency of Security Warnings
        1,  # Difficulty Level Security Warnings
        1,  # Effort Required Security Warnings
        1,  # Perceived Importance of Security Warnings
        0,  # MFA - Auth app
        0,  # MFA - Biometric
        0,  # MFA - I do not use MFA
        1,  # MFA - OTP via SMS
        0,  # MFA - Security key
        1,  # Security Warnings - Antivirus
        0,  # Security Warnings - None
        1,  # Security Warnings - Phishing
        1,  # Security Warnings - System update
        0   # Security Warnings - Unauthorized access
    ])

    suggestion_output = suggest_config_with_constraints(example_user_config, path)
    print("\nSuggestion Output:")
    print(suggestion_output)



Suggestion Output:
{'optimized_config': [0, 0, 0, 0, 4, 0, 0, 0, 4, 1, 0, 0, 2, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1], 'fatigue': 4.109097003936768, 'security': 20.0, 'meets_constraints': True}
