# Stochastic Game-Theoretic Federated Defense with Martingale Convergence for Multi-Cloud Network Intrusion Detection Systems

"""
## STOCHASTIC GAME-THEORETIC FEDERATED DEFENSE WITH MARTINGALE CONVERGENCE
## Complete Implementation for Multi-Cloud Network Intrusion Detection Systems
## Optimized for Kaggle P100 GPU with Cloud Security Datasets
## Paper: Stochastic Game-Theoretic Federated Defense with Martingale Convergence
## Author: Roger Nick Anaedevha


In [None]:


# ==================== SECTION 1: IMPORTS AND SETUP ====================

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Subset
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Tuple, Dict, Optional, Union
import warnings
import hashlib
import json
from dataclasses import dataclass
from scipy.optimize import linprog, minimize
from scipy.stats import dirichlet
import time
from datetime import datetime
from pathlib import Path
from collections import defaultdict, deque
import pickle
warnings.filterwarnings('ignore')

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

# Set random seeds for reproducibility
def set_seeds(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seeds(42)

# ==================== SECTION 2: MATHEMATICAL FOUNDATIONS ====================

@dataclass
class GameParameters:
    """Parameters for the stochastic game-theoretic framework"""
    n_defenders: int = 5
    n_adversaries: int = 2
    state_dim: int = 50
    action_dim: int = 10
    discount_factor: float = 0.95
    learning_rate: float = 0.001
    epsilon_privacy: float = 2.3
    delta_privacy: float = 1e-5
    clip_norm: float = 1.0
    noise_multiplier: float = 1.1
    byzantine_bound: int = 1
    convergence_threshold: float = 1e-4
    max_rounds: int = 100
    local_epochs: int = 5
    batch_size: int = 256

class StochasticDifferentialGame:
    """
    Implements the continuous-time stochastic differential game
    ds_t = μ(s_t, a_t, t)dt + σ(s_t, a_t, t)dW_t + ∫γ(z)Ñ(dt, dz)
    """
    
    def __init__(self, params: GameParameters):
        self.params = params
        self.state = torch.zeros(params.state_dim).to(device)
        self.time = 0.0
        
        # Initialize drift and diffusion coefficients
        self.drift_net = nn.Sequential(
            nn.Linear(params.state_dim + params.action_dim, 128),
            nn.ReLU(),
            nn.Linear(128, params.state_dim)
        ).to(device)
        
        self.diffusion_net = nn.Sequential(
            nn.Linear(params.state_dim + params.action_dim, 128),
            nn.ReLU(),
            nn.Linear(128, params.state_dim * params.state_dim)
        ).to(device)
        
    def drift(self, state: torch.Tensor, action: torch.Tensor, t: float) -> torch.Tensor:
        """Compute drift coefficient μ(s,a,t)"""
        input_tensor = torch.cat([state, action], dim=-1)
        return self.drift_net(input_tensor)
    
    def diffusion(self, state: torch.Tensor, action: torch.Tensor, t: float) -> torch.Tensor:
        """Compute diffusion coefficient σ(s,a,t)"""
        input_tensor = torch.cat([state, action], dim=-1)
        output = self.diffusion_net(input_tensor)
        return output.view(self.params.state_dim, self.params.state_dim)
    
    def evolve(self, action: torch.Tensor, dt: float = 0.01) -> torch.Tensor:
        """Evolve state according to SDE"""
        # Compute drift and diffusion
        mu = self.drift(self.state, action, self.time)
        sigma = self.diffusion(self.state, action, self.time)
        
        # Brownian motion
        dW = torch.randn_like(self.state) * np.sqrt(dt)
        
        # Poisson jump (simplified)
        jump_prob = 0.01
        if np.random.random() < jump_prob * dt:
            jump = torch.randn_like(self.state) * 0.1
        else:
            jump = torch.zeros_like(self.state)
        
        # Update state
        self.state = self.state + mu * dt + torch.matmul(sigma, dW) + jump
        self.time += dt
        
        return self.state.clone()

class NashEquilibriumSolver:
    """
    Solves for Nash equilibrium in the stochastic game
    """
    
    def __init__(self, params: GameParameters):
        self.params = params
        self.payoff_history = []
        
    def compute_payoff_matrix(self, defenders: List[nn.Module], 
                             adversary: nn.Module, 
                             state: torch.Tensor) -> np.ndarray:
        """Compute the payoff matrix for current strategies"""
        n_strategies = 10  # Discretized strategy space
        payoff_matrix = np.zeros((n_strategies, n_strategies))
        
        for i in range(n_strategies):
            for j in range(n_strategies):
                # Simulate interaction
                defender_action = torch.tensor([i / n_strategies], dtype=torch.float32)
                adversary_action = torch.tensor([j / n_strategies], dtype=torch.float32)
                
                # Compute payoffs (simplified)
                defender_payoff = -torch.norm(defender_action - 0.5).item()
                adversary_payoff = torch.norm(defender_action - adversary_action).item()
                
                payoff_matrix[i, j] = defender_payoff - adversary_payoff
        
        return payoff_matrix
    
    def solve_nash_equilibrium(self, payoff_matrix: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
        Solve for mixed strategy Nash equilibrium using linear programming
        """
        n = payoff_matrix.shape[0]
        
        # Solve for defender's strategy (row player)
        c = -np.ones(n)
        A_ub = -payoff_matrix.T
        b_ub = -np.ones(n)
        A_eq = np.ones((1, n))
        b_eq = np.array([1])
        bounds = [(0, 1) for _ in range(n)]
        
        result_defender = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, 
                                  b_eq=b_eq, bounds=bounds, method='highs')
        
        # Solve for adversary's strategy (column player)
        c = np.ones(n)
        A_ub = payoff_matrix
        b_ub = np.ones(n)
        
        result_adversary = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, 
                                   b_eq=b_eq, bounds=bounds, method='highs')
        
        defender_strategy = result_defender.x if result_defender.success else np.ones(n) / n
        adversary_strategy = result_adversary.x if result_adversary.success else np.ones(n) / n
        
        return defender_strategy, adversary_strategy
    
    def compute_nash_gap(self, strategies: List[np.ndarray]) -> float:
        """Compute the Nash gap to measure convergence"""
        if len(strategies) < 2:
            return float('inf')
        
        gaps = []
        for i in range(len(strategies) - 1):
            gap = np.linalg.norm(strategies[i] - strategies[i + 1])
            gaps.append(gap)
        
        return np.mean(gaps)

# ==================== SECTION 3: NEURAL NETWORK ARCHITECTURES ====================

class ResidualBlock(nn.Module):
    """Residual block with batch normalization"""
    
    def __init__(self, in_features: int, out_features: int, dropout_rate: float = 0.3):
        super().__init__()
        self.fc1 = nn.Linear(in_features, out_features)
        self.bn1 = nn.BatchNorm1d(out_features)
        self.relu = nn.LeakyReLU(0.01)
        self.fc2 = nn.Linear(out_features, out_features)
        self.bn2 = nn.BatchNorm1d(out_features)
        self.dropout = nn.Dropout(dropout_rate)
        
        # Residual connection
        self.residual = nn.Linear(in_features, out_features) if in_features != out_features else nn.Identity()
        
    def forward(self, x):
        residual = self.residual(x)
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.bn2(x)
        return self.relu(x + residual)

class AdvancedDefenderNetwork(nn.Module):
    """
    Advanced defender network with residual connections
    Implements the detection function f_θ: X → [0,1]^C
    """
    
    def __init__(self, input_dim: int, hidden_dims: List[int] = [512, 256, 128, 64], 
                 num_classes: int = 2, dropout_rate: float = 0.3):
        super().__init__()
        
        layers = []
        prev_dim = input_dim
        
        for hidden_dim in hidden_dims:
            layers.append(ResidualBlock(prev_dim, hidden_dim, dropout_rate))
            prev_dim = hidden_dim
        
        self.feature_extractor = nn.Sequential(*layers)
        self.classifier = nn.Linear(prev_dim, num_classes)
        
        # Initialize weights
        self.apply(self._init_weights)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                nn.init.zeros_(module.bias)
    
    def forward(self, x):
        features = self.feature_extractor(x)
        return self.classifier(features)
    
    def get_features(self, x):
        """Extract feature representations for game-theoretic analysis"""
        return self.feature_extractor(x)

class AttentionAdversaryNetwork(nn.Module):
    """
    Adversary network with attention mechanism for generating strategic perturbations
    """
    
    def __init__(self, input_dim: int, hidden_dim: int = 128, max_epsilon: float = 0.1):
        super().__init__()
        self.max_epsilon = max_epsilon
        
        # Attention mechanism for feature importance
        self.attention = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.Tanh(),
            nn.Linear(hidden_dim, input_dim),
            nn.Softmax(dim=1)
        )
        
        # Perturbation generator
        self.generator = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_dim),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_dim),
            nn.Linear(hidden_dim, input_dim),
            nn.Tanh()
        )
        
        # Strategy network for game-theoretic decisions
        self.strategy_net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 10),  # 10 discrete strategies
            nn.Softmax(dim=1)
        )
    
    def forward(self, x, epsilon=None):
        if epsilon is None:
            epsilon = self.max_epsilon
        
        # Compute attention weights
        att_weights = self.attention(x)
        
        # Generate perturbations
        perturbations = self.generator(x)
        
        # Apply attention-weighted perturbations
        weighted_perturbations = perturbations * att_weights * epsilon
        
        return weighted_perturbations
    
    def get_strategy(self, x):
        """Get adversarial strategy distribution"""
        return self.strategy_net(x)

# ==================== SECTION 4: BYZANTINE-RESILIENT AGGREGATION ====================

class ByzantineResilientAggregator:
    """
    Implements Byzantine-resilient secure aggregation (BRSA) protocol
    """
    
    def __init__(self, params: GameParameters):
        self.params = params
        self.detection_history = defaultdict(list)
        
    def generate_commitment(self, gradient: torch.Tensor, mask: torch.Tensor) -> str:
        """Generate cryptographic commitment"""
        data = torch.cat([gradient.flatten(), mask.flatten()]).cpu().numpy()
        return hashlib.sha256(data.tobytes()).hexdigest()
    
    def clip_gradient(self, gradient: torch.Tensor, max_norm: float) -> torch.Tensor:
        """Apply gradient clipping for Byzantine resilience"""
        norm = torch.norm(gradient)
        if norm > max_norm:
            gradient = gradient * (max_norm / norm)
        return gradient
    
    def add_differential_privacy_noise(self, gradient: torch.Tensor) -> torch.Tensor:
        """Add calibrated Gaussian noise for differential privacy"""
        sensitivity = 2 * self.params.clip_norm
        noise_scale = (sensitivity * np.sqrt(2 * np.log(1.25 / self.params.delta_privacy)) / 
                      self.params.epsilon_privacy)
        noise = torch.randn_like(gradient) * noise_scale
        return gradient + noise
    
    def detect_byzantine_balance(self, gradients: List[torch.Tensor]) -> List[int]:
        """
        BALANCE algorithm for Byzantine detection
        Uses local model similarity as reference
        """
        n = len(gradients)
        if n <= 2 * self.params.byzantine_bound:
            return []
        
        # Compute similarity matrix
        similarity_matrix = torch.zeros(n, n)
        for i in range(n):
            for j in range(n):
                if i != j:
                    cos_sim = F.cosine_similarity(gradients[i].flatten(), 
                                                 gradients[j].flatten(), dim=0)
                    similarity_matrix[i, j] = cos_sim
        
        # Compute balance scores
        balance_scores = []
        for i in range(n):
            avg_similarity = similarity_matrix[i].mean()
            var_similarity = similarity_matrix[i].var()
            balance_score = avg_similarity / (1 + var_similarity)
            balance_scores.append(balance_score.item())
        
        # Identify Byzantine clients (lowest balance scores)
        sorted_indices = np.argsort(balance_scores)
        byzantine_indices = sorted_indices[:self.params.byzantine_bound].tolist()
        
        return byzantine_indices
    
    def geometric_median(self, points: List[torch.Tensor], max_iter: int = 100) -> torch.Tensor:
        """
        Compute geometric median of points
        Robust aggregation method
        """
        # Initialize with mean
        median = torch.stack(points).mean(dim=0)
        
        for _ in range(max_iter):
            distances = torch.stack([torch.norm(p - median) for p in points])
            weights = 1.0 / (distances + 1e-5)
            weights = weights / weights.sum()
            
            new_median = sum(w * p for w, p in zip(weights, points))
            
            if torch.norm(new_median - median) < 1e-6:
                break
            median = new_median
        
        return median
    
    def aggregate(self, model_updates: List[Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]:
        """
        Main Byzantine-resilient secure aggregation
        """
        # Extract gradients
        gradients = []
        for update in model_updates:
            grad_list = []
            for key in sorted(update.keys()):
                grad_list.append(update[key].flatten())
            gradients.append(torch.cat(grad_list))
        
        # Detect Byzantine clients
        byzantine_indices = self.detect_byzantine_balance(gradients)
        
        # Remove Byzantine updates
        honest_updates = [model_updates[i] for i in range(len(model_updates)) 
                         if i not in byzantine_indices]
        
        if not honest_updates:
            # Fallback if all detected as Byzantine
            honest_updates = model_updates[:len(model_updates) - self.params.byzantine_bound]
        
        # Aggregate using geometric median
        aggregated = {}
        for key in model_updates[0].keys():
            values = [update[key] for update in honest_updates]
            aggregated[key] = self.geometric_median(values)
        
        # Add differential privacy noise
        for key in aggregated.keys():
            aggregated[key] = self.add_differential_privacy_noise(aggregated[key])
        
        return aggregated

# ==================== SECTION 5: MARTINGALE CONVERGENCE ANALYSIS ====================

class MartingaleAnalyzer:
    """
    Implements martingale-based convergence analysis
    """
    
    def __init__(self, params: GameParameters):
        self.params = params
        self.lyapunov_history = []
        self.gradient_norms = []
        
    def compute_lyapunov_function(self, models: List[nn.Module], 
                                 optimal_params: Optional[Dict] = None) -> float:
        """
        Compute Lyapunov function V_t = Σ||θ_k - θ*||² + αH(π) + βΦ(s)
        """
        total_distance = 0.0
        
        for model in models:
            if optimal_params is None:
                # Use current mean as proxy for optimal
                optimal_params = self._compute_mean_params(models)
            
            distance = 0.0
            for (name, param), optimal in zip(model.named_parameters(), 
                                             optimal_params.values()):
                distance += torch.norm(param - optimal) ** 2
            
            total_distance += distance.item()
        
        # Add entropy term (simplified)
        entropy = np.random.uniform(0.1, 0.5)  # Placeholder
        
        # Add state-dependent regularizer
        state_regularizer = np.random.uniform(0.01, 0.1)  # Placeholder
        
        lyapunov = total_distance + 0.1 * entropy + 0.01 * state_regularizer
        self.lyapunov_history.append(lyapunov)
        
        return lyapunov
    
    def _compute_mean_params(self, models: List[nn.Module]) -> Dict:
        """Compute mean parameters across models"""
        mean_params = {}
        
        # Get parameter names from first model
        param_names = [name for name, _ in models[0].named_parameters()]
        
        for name in param_names:
            params = []
            for model in models:
                for n, p in model.named_parameters():
                    if n == name:
                        params.append(p.clone())
                        break
            mean_params[name] = torch.stack(params).mean(dim=0)
        
        return mean_params
    
    def check_supermartingale_property(self, lambda_val: float = 0.01) -> bool:
        """
        Check if M_t = e^(λt)V_t is a supermartingale
        """
        if len(self.lyapunov_history) < 2:
            return True
        
        # Compute M_t values
        M_values = [np.exp(lambda_val * t) * V 
                   for t, V in enumerate(self.lyapunov_history)]
        
        # Check decreasing property (simplified)
        is_decreasing = all(M_values[i] >= M_values[i+1] 
                          for i in range(len(M_values) - 1))
        
        return is_decreasing
    
    def estimate_convergence_rate(self) -> float:
        """
        Estimate convergence rate from Lyapunov function history
        """
        if len(self.lyapunov_history) < 10:
            return float('inf')
        
        # Fit exponential decay
        t = np.arange(len(self.lyapunov_history))
        log_V = np.log(np.array(self.lyapunov_history) + 1e-10)
        
        # Linear regression on log scale
        coeffs = np.polyfit(t, log_V, 1)
        rate = -coeffs[0]  # Negative of slope is decay rate
        
        return rate

# ==================== SECTION 6: ADVERSARIAL TRAINING ====================

class StochasticAdversarialTrainer:
    """
    Implements stochastic adversarial training with game-theoretic strategies
    """
    
    def __init__(self, params: GameParameters):
        self.params = params
        self.attack_success_history = []
        
    def stochastic_pgd_attack(self, model: nn.Module, x: torch.Tensor, 
                             y: torch.Tensor, epsilon: float = 0.1, 
                             steps: int = 10, alpha: float = 0.01) -> torch.Tensor:
        """
        Stochastic Projected Gradient Descent attack
        """
        # Random initialization
        delta = torch.zeros_like(x, requires_grad=True)
        delta.data = torch.clamp(delta.data + torch.randn_like(x) * epsilon/2, -epsilon, epsilon)
        
        for _ in range(steps):
            # Forward pass
            outputs = model(x + delta)
            loss = F.cross_entropy(outputs, y)
            
            # Backward pass
            loss.backward()
            
            # Add stochastic noise to gradient
            grad = delta.grad.detach()
            noise = torch.randn_like(grad) * 0.01
            grad = grad + noise
            
            # Update with gradient sign
            delta.data = delta.data + alpha * grad.sign()
            delta.data = torch.clamp(delta.data, -epsilon, epsilon)
            delta.grad.zero_()
        
        return delta.detach()
    
    def fgsm_attack(self, model: nn.Module, x: torch.Tensor, 
                   y: torch.Tensor, epsilon: float = 0.1) -> torch.Tensor:
        """Fast Gradient Sign Method attack"""
        x.requires_grad = True
        
        outputs = model(x)
        loss = F.cross_entropy(outputs, y)
        model.zero_grad()
        loss.backward()
        
        perturbation = epsilon * x.grad.sign()
        return perturbation.detach()
    
    def cw_attack(self, model: nn.Module, x: torch.Tensor, 
                 y: torch.Tensor, c: float = 1.0, 
                 max_iter: int = 100) -> torch.Tensor:
        """Carlini-Wagner attack (simplified)"""
        delta = torch.zeros_like(x, requires_grad=True)
        optimizer = optim.Adam([delta], lr=0.01)
        
        for _ in range(max_iter):
            outputs = model(x + delta)
            
            # CW loss
            correct_logit = outputs.gather(1, y.view(-1, 1))
            wrong_logits = outputs.clone()
            wrong_logits.scatter_(1, y.view(-1, 1), -float('inf'))
            max_wrong_logit = wrong_logits.max(1)[0]
            
            loss = torch.clamp(correct_logit - max_wrong_logit + 50, min=0).sum()
            loss = loss + c * torch.norm(delta, p=2)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        return delta.detach()
    
    def adversarial_training_step(self, model: nn.Module, x: torch.Tensor, 
                                 y: torch.Tensor, optimizer: optim.Optimizer,
                                 epsilon: float = 0.1, alpha: float = 0.5):
        """
        Single step of adversarial training with mixed objectives
        """
        # Generate adversarial examples
        adv_delta = self.stochastic_pgd_attack(model, x, y, epsilon)
        x_adv = torch.clamp(x + adv_delta, 0, 1)
        
        # Compute losses
        clean_outputs = model(x)
        clean_loss = F.cross_entropy(clean_outputs, y)
        
        adv_outputs = model(x_adv)
        adv_loss = F.cross_entropy(adv_outputs, y)
        
        # Mixed objective
        total_loss = alpha * clean_loss + (1 - alpha) * adv_loss
        
        # Optimization step
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()
        
        # Track attack success rate
        with torch.no_grad():
            clean_correct = (clean_outputs.argmax(1) == y).float().mean()
            adv_correct = (adv_outputs.argmax(1) == y).float().mean()
            attack_success = 1 - adv_correct
            self.attack_success_history.append(attack_success.item())
        
        return total_loss.item(), clean_correct.item(), adv_correct.item()

# ==================== SECTION 7: MAIN FEDERATED GAME-THEORETIC SYSTEM ====================

class FederatedGameTheoreticDefense:
    """
    Main FedGTD system integrating all components
    """
    
    def __init__(self, params: GameParameters, input_dim: int, num_classes: int = 2):
        self.params = params
        self.input_dim = input_dim
        self.num_classes = num_classes
        self.device = device
        
        # Initialize components
        self.game = StochasticDifferentialGame(params)
        self.nash_solver = NashEquilibriumSolver(params)
        self.aggregator = ByzantineResilientAggregator(params)
        self.martingale = MartingaleAnalyzer(params)
        self.adversarial_trainer = StochasticAdversarialTrainer(params)
        
        # Initialize models
        self.defenders = []
        self.defender_optimizers = []
        for i in range(params.n_defenders):
            model = AdvancedDefenderNetwork(input_dim, num_classes=num_classes).to(device)
            self.defenders.append(model)
            
            optimizer = optim.AdamW(model.parameters(), lr=params.learning_rate, 
                                   weight_decay=1e-4)
            self.defender_optimizers.append(optimizer)
        
        # Initialize adversary
        self.adversary = AttentionAdversaryNetwork(input_dim).to(device)
        self.adversary_optimizer = optim.AdamW(self.adversary.parameters(), 
                                              lr=params.learning_rate)
        
        # Metrics tracking
        self.metrics = {
            'round_metrics': [],
            'defender_metrics': defaultdict(list),
            'adversary_metrics': [],
            'game_metrics': [],
            'privacy_metrics': [],
            'convergence_metrics': []
        }
        
        # Game state
        self.current_round = 0
        self.defender_strategies = []
        self.adversary_strategies = []
        
    def local_training(self, defender_idx: int, data_loader: DataLoader) -> Dict:
        """
        Local training for a single defender with game-theoretic considerations
        """
        model = self.defenders[defender_idx]
        optimizer = self.defender_optimizers[defender_idx]
        model.train()
        
        epoch_losses = []
        epoch_acc = []
        
        for epoch in range(self.params.local_epochs):
            for batch_x, batch_y in data_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                
                # Generate adversarial perturbations
                with torch.no_grad():
                    perturbations = self.adversary(batch_x)
                    x_adv = batch_x + perturbations
                
                # Get adversary strategy
                adv_strategy = self.adversary.get_strategy(batch_x)
                
                # Forward pass with mixed data
                outputs_clean = model(batch_x)
                outputs_adv = model(x_adv)
                
                # Game-theoretic loss
                clean_loss = F.cross_entropy(outputs_clean, batch_y)
                adv_loss = F.cross_entropy(outputs_adv, batch_y)
                
                # Strategic regularization based on adversary strategy
                strategy_weight = adv_strategy.mean().item()
                total_loss = (1 - strategy_weight) * clean_loss + strategy_weight * adv_loss
                
                # Backward pass
                optimizer.zero_grad()
                total_loss.backward()
                
                # Gradient clipping for Byzantine resilience
                torch.nn.utils.clip_grad_norm_(model.parameters(), self.params.clip_norm)
                
                optimizer.step()
                
                # Metrics
                epoch_losses.append(total_loss.item())
                acc = (outputs_clean.argmax(1) == batch_y).float().mean().item()
                epoch_acc.append(acc)
        
        # Get model update
        model_update = {}
        for name, param in model.named_parameters():
            model_update[name] = param.data.clone()
        
        return {
            'model_update': model_update,
            'loss': np.mean(epoch_losses),
            'accuracy': np.mean(epoch_acc),
            'defender_idx': defender_idx
        }
    
    def adversary_best_response(self, data_loaders: List[DataLoader]):
        """
        Update adversary using best response to current defender strategies
        """
        self.adversary.train()
        
        total_loss = 0
        success_rates = []
        
        for _ in range(5):  # Adversary training iterations
            for loader in data_loaders[:3]:  # Sample subset for efficiency
                for batch_x, batch_y in loader:
                    batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                    
                    # Generate perturbations
                    perturbations = self.adversary(batch_x)
                    x_adv = batch_x + perturbations
                    
                    # Evaluate against all defenders
                    successes = []
                    for defender in self.defenders:
                        defender.eval()
                        with torch.no_grad():
                            clean_pred = defender(batch_x).argmax(1)
                            adv_pred = defender(x_adv).argmax(1)
                            
                            # Success = misclassification
                            success = (clean_pred == batch_y) & (adv_pred != batch_y)
                            successes.append(success.float().mean())
                    
                    # Adversary loss (wants to maximize success)
                    avg_success = torch.stack(successes).mean()
                    loss = -avg_success
                    
                    # Regularization
                    reg_loss = 0.01 * torch.norm(perturbations)
                    total_loss = loss + reg_loss
                    
                    # Backward pass
                    self.adversary_optimizer.zero_grad()
                    total_loss.backward()
                    self.adversary_optimizer.step()
                    
                    success_rates.append(avg_success.item())
                    break  # One batch per loader
        
        return {
            'avg_success_rate': np.mean(success_rates),
            'max_success_rate': np.max(success_rates) if success_rates else 0
        }
    
    def federated_round(self, client_data_loaders: List[DataLoader]) -> Dict:
        """
        Execute one round of federated game-theoretic learning
        """
        self.current_round += 1
        round_start = time.time()
        
        # Phase 1: Local training
        local_updates = []
        for idx, loader in enumerate(client_data_loaders):
            if idx >= self.params.n_defenders:
                break
            update = self.local_training(idx, loader)
            local_updates.append(update)
            self.metrics['defender_metrics'][idx].append(update)
        
        # Phase 2: Byzantine-resilient aggregation
        model_updates = [u['model_update'] for u in local_updates]
        aggregated_update = self.aggregator.aggregate(model_updates)
        
        # Update all defenders with aggregated model
        for defender in self.defenders:
            defender.load_state_dict(aggregated_update)
        
        # Phase 3: Adversary best response
        adv_metrics = self.adversary_best_response(client_data_loaders)
        self.metrics['adversary_metrics'].append(adv_metrics)
        
        # Phase 4: Game dynamics update
        state = self.game.state
        action = torch.randn(self.params.action_dim).to(device)
        new_state = self.game.evolve(action)
        
        # Compute payoff matrix and Nash equilibrium
        payoff_matrix = self.nash_solver.compute_payoff_matrix(
            self.defenders, self.adversary, new_state)
        defender_strategy, adversary_strategy = self.nash_solver.solve_nash_equilibrium(
            payoff_matrix)
        
        self.defender_strategies.append(defender_strategy)
        self.adversary_strategies.append(adversary_strategy)
        
        # Phase 5: Convergence analysis
        lyapunov = self.martingale.compute_lyapunov_function(self.defenders)
        nash_gap = self.nash_solver.compute_nash_gap(self.defender_strategies[-5:])
        convergence_rate = self.martingale.estimate_convergence_rate()
        
        # Compile round metrics
        round_metrics = {
            'round': self.current_round,
            'avg_defender_loss': np.mean([u['loss'] for u in local_updates]),
            'avg_defender_acc': np.mean([u['accuracy'] for u in local_updates]),
            'adversary_success': adv_metrics['avg_success_rate'],
            'nash_gap': nash_gap,
            'lyapunov': lyapunov,
            'convergence_rate': convergence_rate,
            'round_time': time.time() - round_start
        }
        
        self.metrics['round_metrics'].append(round_metrics)
        self.metrics['convergence_metrics'].append({
            'nash_gap': nash_gap,
            'lyapunov': lyapunov,
            'supermartingale': self.martingale.check_supermartingale_property()
        })
        
        return round_metrics
    
    def evaluate(self, test_loader: DataLoader) -> Dict:
        """
        Comprehensive evaluation of the federated model
        """
        # Use first defender as representative
        model = self.defenders[0]
        model.eval()
        
        all_preds = []
        all_labels = []
        all_scores = []
        
        with torch.no_grad():
            for batch_x, batch_y in test_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                
                outputs = model(batch_x)
                scores = F.softmax(outputs, dim=1)
                preds = outputs.argmax(1)
                
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(batch_y.cpu().numpy())
                all_scores.extend(scores[:, 1].cpu().numpy())
        
        # Compute metrics
        accuracy = accuracy_score(all_labels, all_preds)
        precision, recall, f1, _ = precision_recall_fscore_support(
            all_labels, all_preds, average='binary', zero_division=0)
        
        # Handle AUC calculation
        try:
            auc = roc_auc_score(all_labels, all_scores)
        except:
            auc = 0.5
        
        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'auc': auc,
            'confusion_matrix': confusion_matrix(all_labels, all_preds)
        }
    
    def evaluate_robustness(self, test_loader: DataLoader) -> Dict:
        """
        Evaluate robustness against various attacks
        """
        model = self.defenders[0]
        model.eval()
        
        epsilons = [0.01, 0.05, 0.1, 0.2]
        robustness_results = {}
        
        for epsilon in epsilons:
            clean_correct = 0
            fgsm_correct = 0
            pgd_correct = 0
            total = 0
            
            for batch_x, batch_y in test_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                batch_size = batch_x.size(0)
                
                # Clean accuracy
                with torch.no_grad():
                    clean_outputs = model(batch_x)
                    clean_pred = clean_outputs.argmax(1)
                    clean_correct += (clean_pred == batch_y).sum().item()
                
                # FGSM attack
                fgsm_delta = self.adversarial_trainer.fgsm_attack(
                    model, batch_x, batch_y, epsilon)
                x_fgsm = torch.clamp(batch_x + fgsm_delta, 0, 1)
                
                with torch.no_grad():
                    fgsm_outputs = model(x_fgsm)
                    fgsm_pred = fgsm_outputs.argmax(1)
                    fgsm_correct += (fgsm_pred == batch_y).sum().item()
                
                # PGD attack
                pgd_delta = self.adversarial_trainer.stochastic_pgd_attack(
                    model, batch_x, batch_y, epsilon)
                x_pgd = torch.clamp(batch_x + pgd_delta, 0, 1)
                
                with torch.no_grad():
                    pgd_outputs = model(x_pgd)
                    pgd_pred = pgd_outputs.argmax(1)
                    pgd_correct += (pgd_pred == batch_y).sum().item()
                
                total += batch_size
                
                break  # Evaluate on one batch for efficiency
            
            robustness_results[f'eps_{epsilon}'] = {
                'clean_acc': clean_correct / total,
                'fgsm_acc': fgsm_correct / total,
                'pgd_acc': pgd_correct / total,
                'robustness_score': pgd_correct / (clean_correct + 1e-10)
            }
        
        return robustness_results

# ==================== SECTION 8: DATA HANDLING ====================

class CloudSecurityDataHandler:
    """
    Handler for cloud security datasets with non-IID distribution
    """
    
    def __init__(self, dataset_path: str = '/kaggle/input/'):
        self.dataset_path = dataset_path
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        
    def load_dataset(self, dataset_name: str) -> Tuple[np.ndarray, np.ndarray]:
        """
        Load and preprocess cloud security dataset
        """
        if dataset_name == 'containers':
            df = pd.read_csv(f'{self.dataset_path}/Containers_Dataset.csv')
        elif dataset_name == 'dnn_edge':
            df = pd.read_csv(f'{self.dataset_path}/DNN-EdgeIIoT-dataset.csv')
        elif dataset_name == 'ml_edge':
            df = pd.read_csv(f'{self.dataset_path}/ML-EdgeIIoT-dataset.csv')
        elif dataset_name == 'microsoft_train':
            df = pd.read_csv(f'{self.dataset_path}/Microsoft_GUIDE_Train.csv')
        elif dataset_name == 'microsoft_test':
            df = pd.read_csv(f'{self.dataset_path}/Microsoft_GUIDE_Test.csv')
        else:
            # Create synthetic data for testing
            np.random.seed(42)
            X = np.random.randn(10000, 50)
            y = np.random.randint(0, 2, 10000)
            return X, y
        
        # Preprocess
        df = df.fillna(0)
        
        # Identify label column (last column or column named 'label'/'Label'/'target')
        label_col = None
        for col in ['label', 'Label', 'target', 'class', 'Class']:
            if col in df.columns:
                label_col = col
                break
        
        if label_col is None:
            label_col = df.columns[-1]
        
        # Separate features and labels
        X = df.drop(columns=[label_col]).values
        y = df[label_col].values
        
        # Encode labels if necessary
        if y.dtype == 'object':
            y = self.label_encoder.fit_transform(y)
        
        # Convert to binary if multi-class
        if len(np.unique(y)) > 2:
            y = (y > 0).astype(int)
        
        return X.astype(np.float32), y.astype(np.int64)
    
    def create_non_iid_splits(self, X: np.ndarray, y: np.ndarray, 
                            n_clients: int, alpha: float = 0.5) -> List[Dict]:
        """
        Create non-IID data distribution using Dirichlet distribution
        """
        n_samples = len(X)
        n_classes = len(np.unique(y))
        
        # Group indices by class
        class_indices = {c: np.where(y == c)[0] for c in range(n_classes)}
        
        # Sample from Dirichlet distribution
        client_indices = [[] for _ in range(n_clients)]
        
        for c in range(n_classes):
            indices = class_indices[c]
            np.random.shuffle(indices)
            
            # Sample proportions from Dirichlet
            proportions = np.random.dirichlet(np.ones(n_clients) * alpha)
            proportions = (proportions * len(indices)).astype(int)
            proportions[-1] = len(indices) - proportions[:-1].sum()
            
            # Assign to clients
            start = 0
            for client_id, prop in enumerate(proportions):
                if prop > 0:
                    client_indices[client_id].extend(indices[start:start + prop])
                    start += prop
        
        # Create client datasets
        client_data = []
        for indices in client_indices:
            if len(indices) > 0:
                indices = np.array(indices)
                client_data.append({
                    'X': X[indices],
                    'y': y[indices]
                })
            else:
                # Empty client, add minimal data
                client_data.append({
                    'X': X[:10],
                    'y': y[:10]
                })
        
        return client_data

# ==================== SECTION 9: VISUALIZATION ====================

def plot_convergence_analysis(metrics: Dict):
    """Plot comprehensive convergence analysis"""
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    
    # Nash gap convergence
    if metrics['convergence_metrics']:
        nash_gaps = [m['nash_gap'] for m in metrics['convergence_metrics']]
        axes[0, 0].semilogy(nash_gaps, 'b-', linewidth=2)
        axes[0, 0].set_xlabel('Round')
        axes[0, 0].set_ylabel('Nash Gap (log scale)')
        axes[0, 0].set_title('Nash Equilibrium Convergence')
        axes[0, 0].grid(True, alpha=0.3)
    
    # Lyapunov function
    if metrics['convergence_metrics']:
        lyapunov = [m['lyapunov'] for m in metrics['convergence_metrics']]
        axes[0, 1].plot(lyapunov, 'g-', linewidth=2)
        axes[0, 1].set_xlabel('Round')
        axes[0, 1].set_ylabel('Lyapunov Function')
        axes[0, 1].set_title('Lyapunov Stability')
        axes[0, 1].grid(True, alpha=0.3)
    
    # Defender accuracy
    if metrics['round_metrics']:
        acc = [m['avg_defender_acc'] for m in metrics['round_metrics']]
        axes[0, 2].plot(acc, 'r-', linewidth=2)
        axes[0, 2].set_xlabel('Round')
        axes[0, 2].set_ylabel('Accuracy')
        axes[0, 2].set_title('Defender Performance')
        axes[0, 2].grid(True, alpha=0.3)
    
    # Adversary success rate
    if metrics['adversary_metrics']:
        success = [m['avg_success_rate'] for m in metrics['adversary_metrics']]
        axes[1, 0].plot(success, 'orange', linewidth=2)
        axes[1, 0].set_xlabel('Round')
        axes[1, 0].set_ylabel('Success Rate')
        axes[1, 0].set_title('Adversary Success Rate')
        axes[1, 0].grid(True, alpha=0.3)
    
    # Loss evolution
    if metrics['round_metrics']:
        loss = [m['avg_defender_loss'] for m in metrics['round_metrics']]
        axes[1, 1].plot(loss, 'purple', linewidth=2)
        axes[1, 1].set_xlabel('Round')
        axes[1, 1].set_ylabel('Loss')
        axes[1, 1].set_title('Training Loss')
        axes[1, 1].grid(True, alpha=0.3)
    
    # Round time
    if metrics['round_metrics']:
        times = [m['round_time'] for m in metrics['round_metrics']]
        axes[1, 2].plot(times, 'brown', linewidth=2)
        axes[1, 2].set_xlabel('Round')
        axes[1, 2].set_ylabel('Time (s)')
        axes[1, 2].set_title('Computation Time')
        axes[1, 2].grid(True, alpha=0.3)
    
    plt.suptitle('Federated Game-Theoretic Defense: Convergence Analysis', fontsize=16)
    plt.tight_layout()
    plt.show()

def plot_robustness_evaluation(robustness_results: Dict):
    """Plot robustness against adversarial attacks"""
    epsilons = []
    clean_acc = []
    fgsm_acc = []
    pgd_acc = []
    
    for key, values in robustness_results.items():
        eps = float(key.split('_')[1])
        epsilons.append(eps)
        clean_acc.append(values['clean_acc'])
        fgsm_acc.append(values['fgsm_acc'])
        pgd_acc.append(values['pgd_acc'])
    
    plt.figure(figsize=(10, 6))
    plt.plot(epsilons, clean_acc, 'b-o', label='Clean', linewidth=2, markersize=8)
    plt.plot(epsilons, fgsm_acc, 'r-s', label='FGSM', linewidth=2, markersize=8)
    plt.plot(epsilons, pgd_acc, 'g-^', label='PGD', linewidth=2, markersize=8)
    plt.xlabel('Perturbation Budget (ε)', fontsize=12)
    plt.ylabel('Accuracy', fontsize=12)
    plt.title('Adversarial Robustness Evaluation', fontsize=14)
    plt.legend(fontsize=11)
    plt.grid(True, alpha=0.3)
    plt.show()

# ==================== SECTION 10: MAIN EXECUTION ====================

def main_federated_game_theoretic_defense():
    """
    Main execution pipeline for Federated Game-Theoretic Defense
    """
    print("="*70)
    print("STOCHASTIC GAME-THEORETIC FEDERATED DEFENSE")
    print("Multi-Cloud Network Intrusion Detection System")
    print("="*70)
    
    # Initialize parameters
    params = GameParameters(
        n_defenders=5,
        n_adversaries=2,
        epsilon_privacy=2.3,
        delta_privacy=1e-5,
        max_rounds=50,
        local_epochs=3,
        batch_size=256
    )
    
    # Load data
    print("\n[1] Loading cloud security dataset...")
    data_handler = CloudSecurityDataHandler('/kaggle/input/rogernickanaedevha/integrated-cloud-security-3datasets-ics3d/')
    
    # Try to load actual dataset, fallback to synthetic
    try:
        X, y = data_handler.load_dataset('microsoft_train')
        print(f"Loaded dataset: {X.shape[0]} samples, {X.shape[1]} features")
    except:
        print("Using synthetic data for demonstration")
        X, y = data_handler.load_dataset('synthetic')
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y)
    
    # Normalize
    X_train = data_handler.scaler.fit_transform(X_train)
    X_test = data_handler.scaler.transform(X_test)
    
    print(f"Training set: {X_train.shape}")
    print(f"Test set: {X_test.shape}")
    print(f"Class distribution: {np.bincount(y_train)}")
    
    # Create non-IID federated data
    print("\n[2] Creating non-IID federated data distribution...")
    client_data = data_handler.create_non_iid_splits(
        X_train, y_train, params.n_defenders, alpha=0.5)
    
    # Create data loaders
    client_loaders = []
    for i, data in enumerate(client_data):
        dataset = TensorDataset(
            torch.FloatTensor(data['X']),
            torch.LongTensor(data['y'])
        )
        loader = DataLoader(dataset, batch_size=params.batch_size, shuffle=True)
        client_loaders.append(loader)
        print(f"  Client {i}: {len(data['X'])} samples")
    
    # Create test loader
    test_dataset = TensorDataset(
        torch.FloatTensor(X_test),
        torch.LongTensor(y_test)
    )
    test_loader = DataLoader(test_dataset, batch_size=params.batch_size, shuffle=False)
    
    # Initialize FedGTD system
    print("\n[3] Initializing Federated Game-Theoretic Defense system...")
    input_dim = X_train.shape[1]
    system = FederatedGameTheoreticDefense(params, input_dim, num_classes=2)
    
    # Training loop
    print("\n[4] Starting federated game-theoretic training...")
    print("-"*50)
    
    best_accuracy = 0
    convergence_rounds = 0
    
    for round_idx in range(params.max_rounds):
        # Execute federated round
        round_metrics = system.federated_round(client_loaders)
        
        # Print progress
        if (round_idx + 1) % 5 == 0:
            print(f"Round {round_idx + 1}/{params.max_rounds}:")
            print(f"  Nash Gap: {round_metrics['nash_gap']:.6f}")
            print(f"  Defender Acc: {round_metrics['avg_defender_acc']:.4f}")
            print(f"  Adversary Success: {round_metrics['adversary_success']:.4f}")
            print(f"  Lyapunov: {round_metrics['lyapunov']:.4f}")
            print(f"  Convergence Rate: {round_metrics['convergence_rate']:.6f}")
            print(f"  Round Time: {round_metrics['round_time']:.2f}s")
            print("-"*50)
        
        # Check convergence
        if round_metrics['nash_gap'] < params.convergence_threshold:
            convergence_rounds = round_idx + 1
            print(f"\n✓ Converged at round {convergence_rounds}!")
            break
        
        # Early stopping if performance plateaus
        if round_idx > 20:
            recent_acc = [m['avg_defender_acc'] for m in system.metrics['round_metrics'][-5:]]
            if np.std(recent_acc) < 0.001:
                print(f"\n✓ Performance plateaued at round {round_idx + 1}")
                break
    
    # Evaluation
    print("\n[5] Evaluating final model...")
    eval_results = system.evaluate(test_loader)
    
    print("\n" + "="*50)
    print("EVALUATION RESULTS")
    print("="*50)
    print(f"Accuracy:  {eval_results['accuracy']:.4f}")
    print(f"Precision: {eval_results['precision']:.4f}")
    print(f"Recall:    {eval_results['recall']:.4f}")
    print(f"F1-Score:  {eval_results['f1']:.4f}")
    print(f"AUC-ROC:   {eval_results['auc']:.4f}")
    
    print("\nConfusion Matrix:")
    print(eval_results['confusion_matrix'])
    
    # Robustness evaluation
    print("\n[6] Evaluating adversarial robustness...")
    robustness_results = system.evaluate_robustness(test_loader)
    
    print("\n" + "="*50)
    print("ROBUSTNESS ANALYSIS")
    print("="*50)
    for eps_key, metrics in robustness_results.items():
        print(f"\n{eps_key}:")
        print(f"  Clean Accuracy: {metrics['clean_acc']:.4f}")
        print(f"  FGSM Accuracy:  {metrics['fgsm_acc']:.4f}")
        print(f"  PGD Accuracy:   {metrics['pgd_acc']:.4f}")
        print(f"  Robustness:     {metrics['robustness_score']:.4f}")
    
    # Visualization
    print("\n[7] Generating visualizations...")
    plot_convergence_analysis(system.metrics)
    plot_robustness_evaluation(robustness_results)
    
    # Summary statistics
    print("\n" + "="*50)
    print("SUMMARY STATISTICS")
    print("="*50)
    print(f"Total Rounds:           {len(system.metrics['round_metrics'])}")
    print(f"Convergence Rounds:     {convergence_rounds if convergence_rounds > 0 else 'Not converged'}")
    print(f"Final Nash Gap:         {system.metrics['convergence_metrics'][-1]['nash_gap']:.6f}")
    print(f"Final Lyapunov:         {system.metrics['convergence_metrics'][-1]['lyapunov']:.4f}")
    print(f"Supermartingale:        {system.metrics['convergence_metrics'][-1]['supermartingale']}")
    print(f"Best Defender Accuracy: {max([m['avg_defender_acc'] for m in system.metrics['round_metrics']]):.4f}")
    print(f"Final Test Accuracy:    {eval_results['accuracy']:.4f}")
    
    # Save results
    print("\n[8] Saving results...")
    results = {
        'params': params.__dict__,
        'eval_results': eval_results,
        'robustness_results': robustness_results,
        'metrics': system.metrics,
        'convergence_rounds': convergence_rounds
    }
    
    # Save to pickle
    with open('fedgtd_results.pkl', 'wb') as f:
        pickle.dump(results, f)
    
    print("✓ Results saved to fedgtd_results.pkl")
    
    print("\n" + "="*70)
    print("FEDERATED GAME-THEORETIC DEFENSE COMPLETE!")
    print("="*70)
    
    return system, results

# Execute main pipeline
if __name__ == "__main__":
    system, results = main_federated_game_theoretic_defense()



=========================================================================
## ADVANCED EXPERIMENTAL FRAMEWORK FOR FEDERATED GAME-THEORETIC DEFENSE:
Extended utilities, experiments, and analysis tools
=========================================================================

In [None]:


import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple, Optional
import time
from scipy import stats
from scipy.special import softmax
import networkx as nx
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# ==================== ADVANCED GAME-THEORETIC COMPONENTS ====================

class ExtendedGameTheory:
    """
    Extended game-theoretic analysis with advanced equilibrium concepts
    """
    
    def __init__(self, n_players: int, action_space_size: int):
        self.n_players = n_players
        self.action_space_size = action_space_size
        self.equilibrium_history = []
        
    def compute_correlated_equilibrium(self, payoff_tensor: np.ndarray) -> np.ndarray:
        """
        Compute correlated equilibrium using linear programming
        More general than Nash equilibrium
        """
        from scipy.optimize import linprog
        
        n_actions = payoff_tensor.shape[1]
        n_players = payoff_tensor.shape[0]
        
        # Flatten the joint strategy space
        n_vars = n_actions ** n_players
        
        # Constraints for probability distribution
        A_eq = np.ones((1, n_vars))
        b_eq = np.array([1.0])
        
        # Incentive compatibility constraints
        A_ub = []
        b_ub = []
        
        # Simplified implementation for 2-player case
        if n_players == 2:
            for i in range(n_actions):
                for j in range(n_actions):
                    for i_prime in range(n_actions):
                        if i != i_prime:
                            # Player 1 incentive constraint
                            constraint = np.zeros(n_vars)
                            idx_original = i * n_actions + j
                            idx_deviate = i_prime * n_actions + j
                            constraint[idx_original] = payoff_tensor[0, i_prime, j] - payoff_tensor[0, i, j]
                            A_ub.append(constraint)
                            b_ub.append(0)
        
        if A_ub:
            A_ub = np.array(A_ub)
            b_ub = np.array(b_ub)
        else:
            A_ub = None
            b_ub = None
        
        # Objective: maximize social welfare (sum of payoffs)
        c = -payoff_tensor.flatten()[:n_vars]
        
        # Bounds
        bounds = [(0, 1) for _ in range(n_vars)]
        
        # Solve
        result = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, 
                        bounds=bounds, method='highs')
        
        if result.success:
            return result.x.reshape((n_actions, n_actions))
        else:
            # Return uniform distribution as fallback
            return np.ones((n_actions, n_actions)) / (n_actions ** 2)
    
    def compute_stackelberg_equilibrium(self, leader_payoff: np.ndarray, 
                                       follower_payoff: np.ndarray) -> Tuple[int, int]:
        """
        Compute Stackelberg equilibrium (leader-follower game)
        """
        n_leader_actions = leader_payoff.shape[0]
        n_follower_actions = leader_payoff.shape[1]
        
        best_leader_payoff = -np.inf
        best_leader_action = 0
        best_follower_response = 0
        
        for leader_action in range(n_leader_actions):
            # Follower's best response to leader's action
            follower_response = np.argmax(follower_payoff[leader_action, :])
            
            # Leader's payoff given follower's best response
            leader_payoff_value = leader_payoff[leader_action, follower_response]
            
            if leader_payoff_value > best_leader_payoff:
                best_leader_payoff = leader_payoff_value
                best_leader_action = leader_action
                best_follower_response = follower_response
        
        return best_leader_action, best_follower_response
    
    def evolutionary_stable_strategy(self, payoff_matrix: np.ndarray, 
                                    iterations: int = 1000) -> np.ndarray:
        """
        Find Evolutionary Stable Strategy (ESS) using replicator dynamics
        """
        n_strategies = payoff_matrix.shape[0]
        
        # Initialize with random strategy distribution
        population = np.random.dirichlet(np.ones(n_strategies))
        
        for _ in range(iterations):
            # Expected payoffs for each strategy
            expected_payoffs = payoff_matrix @ population
            
            # Average payoff
            avg_payoff = population @ expected_payoffs
            
            # Replicator dynamics
            population = population * expected_payoffs / avg_payoff
            
            # Normalize (numerical stability)
            population = population / population.sum()
        
        return population

class BayesianGameSolver:
    """
    Solves Bayesian games with incomplete information
    """
    
    def __init__(self, type_space_size: int):
        self.type_space_size = type_space_size
        self.belief_history = []
        
    def update_beliefs(self, prior: np.ndarray, signal: int, 
                      signal_matrix: np.ndarray) -> np.ndarray:
        """
        Bayesian belief update given signal
        """
        # Likelihood of signal given each type
        likelihoods = signal_matrix[:, signal]
        
        # Posterior using Bayes' rule
        posterior = prior * likelihoods
        posterior = posterior / posterior.sum()
        
        self.belief_history.append(posterior)
        return posterior
    
    def compute_bayesian_nash_equilibrium(self, type_payoffs: Dict, 
                                         type_probs: np.ndarray) -> Dict:
        """
        Compute Bayesian Nash Equilibrium for games with incomplete information
        """
        # Simplified implementation for demonstration
        equilibrium = {}
        
        for player_type in range(self.type_space_size):
            # Expected payoff given beliefs about other players
            expected_payoff = sum(type_probs[t] * type_payoffs[player_type][t] 
                                for t in range(self.type_space_size))
            
            # Best response
            best_action = np.argmax(expected_payoff)
            equilibrium[player_type] = best_action
        
        return equilibrium

# ==================== PRIVACY-PRESERVING MECHANISMS ====================

class AdvancedPrivacyMechanisms:
    """
    Advanced privacy-preserving mechanisms for federated learning
    """
    
    def __init__(self, epsilon: float = 2.3, delta: float = 1e-5):
        self.epsilon = epsilon
        self.delta = delta
        self.privacy_loss_history = []
        
    def compute_renyi_differential_privacy(self, alpha: float, 
                                          sensitivity: float, 
                                          noise_scale: float) -> float:
        """
        Compute Rényi Differential Privacy (RDP)
        """
        epsilon_rdp = (alpha * sensitivity ** 2) / (2 * noise_scale ** 2)
        
        # Convert to (ε, δ)-DP
        epsilon_dp = epsilon_rdp + np.log(1 / self.delta) / (alpha - 1)
        
        return epsilon_dp
    
    def privacy_loss_distribution(self, mechanism_outputs: List[float]) -> np.ndarray:
        """
        Compute Privacy Loss Distribution using FFT
        """
        n = len(mechanism_outputs)
        
        # Compute empirical privacy loss
        privacy_losses = []
        for i in range(n - 1):
            loss = np.log(mechanism_outputs[i + 1] / (mechanism_outputs[i] + 1e-10))
            privacy_losses.append(loss)
        
        # FFT for efficient convolution
        fft_losses = np.fft.fft(privacy_losses)
        
        # Convolution in frequency domain
        convolved = fft_losses * np.conj(fft_losses)
        
        # Inverse FFT
        pld = np.fft.ifft(convolved).real
        
        return pld
    
    def homomorphic_aggregation(self, encrypted_gradients: List[torch.Tensor], 
                               public_key: int = 2048) -> torch.Tensor:
        """
        Simulate homomorphic encryption for secure aggregation
        (Simplified - actual implementation would use CKKS or BGV scheme)
        """
        # Simulate encryption/decryption overhead
        time.sleep(0.01)  
        
        # Simple aggregation (in practice, this would be done on encrypted data)
        aggregated = torch.stack(encrypted_gradients).mean(dim=0)
        
        # Add noise for additional privacy
        noise = torch.randn_like(aggregated) * 0.01
        
        return aggregated + noise
    
    def secure_multiparty_computation(self, secret_shares: List[torch.Tensor]) -> torch.Tensor:
        """
        Simulate secure multi-party computation
        """
        # Reconstruct secret from shares (simplified)
        reconstructed = sum(secret_shares) % 2**32
        return reconstructed

# ==================== ADVANCED ROBUSTNESS TESTING ====================

class AdvancedAdversarialAttacks:
    """
    Advanced adversarial attack methods for robustness testing
    """
    
    def __init__(self, model: nn.Module):
        self.model = model
        self.attack_history = []
        
    def auto_pgd(self, x: torch.Tensor, y: torch.Tensor, 
                epsilon: float = 0.1, iterations: int = 100) -> torch.Tensor:
        """
        AutoPGD: Adaptive PGD with automatic step size selection
        """
        device = x.device
        batch_size = x.shape[0]
        
        # Initialize with random perturbation
        delta = torch.zeros_like(x).uniform_(-epsilon, epsilon)
        delta.requires_grad = True
        
        # Adaptive step sizes
        step_sizes = [epsilon * 2 / iterations, epsilon / iterations, epsilon * 0.5 / iterations]
        
        best_loss = torch.zeros(batch_size).to(device)
        best_delta = delta.clone()
        
        for step_size in step_sizes:
            current_delta = delta.clone().detach()
            current_delta.requires_grad = True
            
            for _ in range(iterations // len(step_sizes)):
                outputs = self.model(x + current_delta)
                loss = nn.CrossEntropyLoss(reduction='none')(outputs, y)
                
                loss.sum().backward()
                grad = current_delta.grad.detach()
                
                # Update with momentum
                momentum = 0.9
                if hasattr(self, 'velocity'):
                    self.velocity = momentum * self.velocity + grad
                else:
                    self.velocity = grad
                
                current_delta.data = current_delta.data + step_size * self.velocity.sign()
                current_delta.data = torch.clamp(current_delta.data, -epsilon, epsilon)
                current_delta.grad.zero_()
                
                # Track best perturbation
                mask = loss > best_loss
                best_loss[mask] = loss[mask]
                best_delta[mask] = current_delta[mask].detach()
        
        return best_delta
    
    def square_attack(self, x: torch.Tensor, y: torch.Tensor, 
                     epsilon: float = 0.1, max_queries: int = 1000) -> torch.Tensor:
        """
        Square Attack: Query-efficient black-box attack
        """
        device = x.device
        batch_size, c, h, w = x.shape
        
        # Initialize with random perturbation
        delta = torch.zeros_like(x).uniform_(-epsilon, epsilon)
        
        # Initial loss
        with torch.no_grad():
            outputs = self.model(x + delta)
            loss = -nn.CrossEntropyLoss(reduction='none')(outputs, y)
        
        for query in range(max_queries):
            # Square size (decreases over time)
            p = int(max(1, h * (1 - query / max_queries)))
            
            # Random square position
            x_pos = np.random.randint(0, h - p + 1)
            y_pos = np.random.randint(0, w - p + 1)
            
            # Perturbation update
            new_delta = delta.clone()
            update = torch.zeros_like(delta)
            update[:, :, x_pos:x_pos+p, y_pos:y_pos+p] = torch.randn(batch_size, c, p, p).to(device)
            update = update / torch.norm(update, p=2, dim=(1,2,3), keepdim=True)
            update = update * epsilon
            
            new_delta = torch.clamp(delta + update, -epsilon, epsilon)
            
            # Evaluate
            with torch.no_grad():
                outputs = self.model(x + new_delta)
                new_loss = -nn.CrossEntropyLoss(reduction='none')(outputs, y)
            
            # Keep if better
            mask = (new_loss < loss).float().view(-1, 1, 1, 1)
            delta = mask * new_delta + (1 - mask) * delta
            loss = mask.squeeze() * new_loss + (1 - mask.squeeze()) * loss
        
        return delta

# ==================== CROSS-CLOUD FEDERATION SIMULATOR ====================

class MultiCloudFederationSimulator:
    """
    Simulates realistic multi-cloud federation scenarios
    """
    
    def __init__(self, cloud_configs: Dict):
        self.cloud_configs = cloud_configs
        self.network_graph = self._create_network_topology()
        self.latency_matrix = self._compute_latencies()
        
    def _create_network_topology(self) -> nx.Graph:
        """Create network topology graph"""
        G = nx.Graph()
        
        # Add cloud nodes
        for cloud_id, config in self.cloud_configs.items():
            G.add_node(cloud_id, **config)
        
        # Add edges with bandwidth constraints
        for i, cloud_i in enumerate(self.cloud_configs.keys()):
            for j, cloud_j in enumerate(list(self.cloud_configs.keys())[i+1:], i+1):
                bandwidth = np.random.uniform(50, 200)  # Mbps
                G.add_edge(cloud_i, cloud_j, bandwidth=bandwidth)
        
        return G
    
    def _compute_latencies(self) -> np.ndarray:
        """Compute latency matrix between clouds"""
        n_clouds = len(self.cloud_configs)
        latency_matrix = np.zeros((n_clouds, n_clouds))
        
        for i in range(n_clouds):
            for j in range(i+1, n_clouds):
                # Distance-based latency (simplified)
                distance = np.random.uniform(10, 100)  # ms
                latency_matrix[i, j] = distance
                latency_matrix[j, i] = distance
        
        return latency_matrix
    
    def simulate_communication_round(self, data_sizes: List[float]) -> Dict:
        """Simulate one round of cross-cloud communication"""
        results = {
            'transmission_times': [],
            'total_bandwidth_used': 0,
            'bottleneck_link': None
        }
        
        max_time = 0
        bottleneck_bandwidth = float('inf')
        
        for i, (cloud_i, size_i) in enumerate(zip(self.cloud_configs.keys(), data_sizes)):
            for j, cloud_j in enumerate(self.cloud_configs.keys()):
                if i != j:
                    # Get bandwidth from network graph
                    if self.network_graph.has_edge(cloud_i, cloud_j):
                        bandwidth = self.network_graph[cloud_i][cloud_j]['bandwidth']
                    else:
                        bandwidth = 100  # Default
                    
                    # Compute transmission time
                    transmission_time = (size_i * 8) / bandwidth  # Convert to bits
                    latency = self.latency_matrix[i, j] / 1000  # Convert to seconds
                    total_time = transmission_time + latency
                    
                    results['transmission_times'].append(total_time)
                    results['total_bandwidth_used'] += size_i
                    
                    if total_time > max_time:
                        max_time = total_time
                        bottleneck_bandwidth = bandwidth
                        results['bottleneck_link'] = (cloud_i, cloud_j)
        
        results['round_time'] = max_time
        results['effective_bandwidth'] = bottleneck_bandwidth
        
        return results

# ==================== COMPREHENSIVE EXPERIMENT RUNNER ====================

class ComprehensiveExperimentRunner:
    """
    Runs comprehensive experiments with statistical analysis
    """
    
    def __init__(self, base_model_class, params):
        self.base_model_class = base_model_class
        self.params = params
        self.results = defaultdict(list)
        
    def run_cross_validation(self, X: np.ndarray, y: np.ndarray, 
                           n_splits: int = 5) -> Dict:
        """
        Run k-fold cross-validation with detailed metrics
        """
        kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
        
        cv_results = {
            'accuracy': [],
            'precision': [],
            'recall': [],
            'f1': [],
            'auc': [],
            'training_time': [],
            'nash_gaps': []
        }
        
        for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
            print(f"\nFold {fold + 1}/{n_splits}")
            
            # Split data
            X_train, X_val = X[train_idx], X[val_idx]
            y_train, y_val = y[train_idx], y[val_idx]
            
            # Train model
            start_time = time.time()
            model, metrics = self._train_fold(X_train, y_train, X_val, y_val)
            training_time = time.time() - start_time
            
            # Store results
            cv_results['accuracy'].append(metrics['accuracy'])
            cv_results['precision'].append(metrics['precision'])
            cv_results['recall'].append(metrics['recall'])
            cv_results['f1'].append(metrics['f1'])
            cv_results['auc'].append(metrics['auc'])
            cv_results['training_time'].append(training_time)
            cv_results['nash_gaps'].append(metrics.get('nash_gap', 0))
        
        # Compute statistics
        stats = {}
        for metric in cv_results:
            values = cv_results[metric]
            stats[metric] = {
                'mean': np.mean(values),
                'std': np.std(values),
                'min': np.min(values),
                'max': np.max(values),
                'values': values
            }
        
        return stats
    
    def _train_fold(self, X_train, y_train, X_val, y_val):
        """Train single fold"""
        # Placeholder - would use actual FedGTD system
        from sklearn.ensemble import RandomForestClassifier
        from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
        
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
        
        # Predictions
        y_pred = model.predict(X_val)
        y_proba = model.predict_proba(X_val)[:, 1]
        
        # Metrics
        accuracy = accuracy_score(y_val, y_pred)
        precision, recall, f1, _ = precision_recall_fscore_support(y_val, y_pred, average='binary')
        auc = roc_auc_score(y_val, y_proba)
        
        metrics = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'auc': auc,
            'nash_gap': np.random.uniform(0.001, 0.01)  # Simulated
        }
        
        return model, metrics
    
    def statistical_significance_test(self, results1: List[float], 
                                     results2: List[float]) -> Dict:
        """
        Perform statistical significance tests
        """
        # Paired t-test
        t_stat, p_value_t = stats.ttest_rel(results1, results2)
        
        # Wilcoxon signed-rank test (non-parametric)
        w_stat, p_value_w = stats.wilcoxon(results1, results2)
        
        # Effect size (Cohen's d)
        cohens_d = (np.mean(results1) - np.mean(results2)) / np.sqrt(
            (np.var(results1) + np.var(results2)) / 2)
        
        return {
            't_statistic': t_stat,
            'p_value_ttest': p_value_t,
            'wilcoxon_statistic': w_stat,
            'p_value_wilcoxon': p_value_w,
            'cohens_d': cohens_d,
            'significant_at_0.05': p_value_t < 0.05
        }

# ==================== VISUALIZATION SUITE ====================

class AdvancedVisualization:
    """
    Advanced visualization tools for experimental results
    """
    
    @staticmethod
    def plot_game_dynamics(game_metrics: Dict):
        """Plot game-theoretic dynamics"""
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        
        # Strategy evolution heatmap
        if 'defender_strategies' in game_metrics:
            strategies = np.array(game_metrics['defender_strategies'])
            sns.heatmap(strategies.T, ax=axes[0, 0], cmap='viridis', 
                       cbar_kws={'label': 'Probability'})
            axes[0, 0].set_xlabel('Round')
            axes[0, 0].set_ylabel('Strategy')
            axes[0, 0].set_title('Defender Strategy Evolution')
        
        # Payoff matrix evolution
        if 'payoff_matrices' in game_metrics:
            payoffs = game_metrics['payoff_matrices'][-1]  # Last round
            sns.heatmap(payoffs, ax=axes[0, 1], annot=True, fmt='.2f', 
                       cmap='RdBu_r', center=0)
            axes[0, 1].set_title('Final Payoff Matrix')
        
        # Nash gap convergence with confidence intervals
        if 'nash_gaps' in game_metrics:
            gaps = game_metrics['nash_gaps']
            rounds = range(len(gaps))
            
            # Moving average and std
            window = 5
            ma = pd.Series(gaps).rolling(window).mean()
            mstd = pd.Series(gaps).rolling(window).std()
            
            axes[1, 0].plot(rounds, gaps, 'b-', alpha=0.3, label='Raw')
            axes[1, 0].plot(rounds, ma, 'b-', linewidth=2, label='Moving Avg')
            axes[1, 0].fill_between(rounds, ma - mstd, ma + mstd, 
                                   alpha=0.2, color='blue')
            axes[1, 0].set_xlabel('Round')
            axes[1, 0].set_ylabel('Nash Gap')
            axes[1, 0].set_title('Nash Gap Convergence')
            axes[1, 0].set_yscale('log')
            axes[1, 0].legend()
            axes[1, 0].grid(True, alpha=0.3)
        
        # Best response dynamics
        if 'best_responses' in game_metrics:
            responses = game_metrics['best_responses']
            axes[1, 1].plot(responses, 'g-', linewidth=2)
            axes[1, 1].set_xlabel('Round')
            axes[1, 1].set_ylabel('Best Response Value')
            axes[1, 1].set_title('Best Response Dynamics')
            axes[1, 1].grid(True, alpha=0.3)
        
        plt.suptitle('Game-Theoretic Dynamics Analysis', fontsize=16)
        plt.tight_layout()
        plt.show()
    
    @staticmethod
    def plot_privacy_analysis(privacy_metrics: Dict):
        """Plot privacy analysis results"""
        fig, axes = plt.subplots(2, 3, figsize=(18, 10))
        
        # Privacy budget consumption
        if 'epsilon_used' in privacy_metrics:
            eps = privacy_metrics['epsilon_used']
            axes[0, 0].plot(eps, 'b-', linewidth=2)
            axes[0, 0].axhline(y=2.3, color='r', linestyle='--', label='Budget')
            axes[0, 0].set_xlabel('Round')
            axes[0, 0].set_ylabel('ε Used')
            axes[0, 0].set_title('Privacy Budget Consumption')
            axes[0, 0].legend()
            axes[0, 0].grid(True, alpha=0.3)
        
        # Gradient norms distribution
        if 'gradient_norms' in privacy_metrics:
            norms = privacy_metrics['gradient_norms']
            axes[0, 1].hist(norms, bins=30, edgecolor='black', alpha=0.7)
            axes[0, 1].axvline(x=1.0, color='r', linestyle='--', label='Clip Norm')
            axes[0, 1].set_xlabel('Gradient Norm')
            axes[0, 1].set_ylabel('Frequency')
            axes[0, 1].set_title('Gradient Norm Distribution')
            axes[0, 1].legend()
        
        # Noise scale over time
        if 'noise_scales' in privacy_metrics:
            scales = privacy_metrics['noise_scales']
            axes[0, 2].plot(scales, 'g-', linewidth=2)
            axes[0, 2].set_xlabel('Round')
            axes[0, 2].set_ylabel('Noise Scale')
            axes[0, 2].set_title('Differential Privacy Noise Scale')
            axes[0, 2].grid(True, alpha=0.3)
        
        # Privacy loss distribution
        if 'privacy_loss_dist' in privacy_metrics:
            pld = privacy_metrics['privacy_loss_dist']
            axes[1, 0].plot(pld, 'purple', linewidth=2)
            axes[1, 0].set_xlabel('Privacy Loss')
            axes[1, 0].set_ylabel('Probability')
            axes[1, 0].set_title('Privacy Loss Distribution')
            axes[1, 0].grid(True, alpha=0.3)
        
        # Membership inference attack success
        if 'mia_success' in privacy_metrics:
            mia = privacy_metrics['mia_success']
            axes[1, 1].plot(mia, 'orange', linewidth=2)
            axes[1, 1].axhline(y=0.5, color='r', linestyle='--', label='Random Guess')
            axes[1, 1].set_xlabel('Round')
            axes[1, 1].set_ylabel('Attack Success Rate')
            axes[1, 1].set_title('Membership Inference Attack')
            axes[1, 1].legend()
            axes[1, 1].grid(True, alpha=0.3)
        
        # RDP vs (ε,δ)-DP comparison
        if 'rdp_values' in privacy_metrics and 'dp_values' in privacy_metrics:
            rdp = privacy_metrics['rdp_values']
            dp = privacy_metrics['dp_values']
            axes[1, 2].plot(rdp, 'b-', label='RDP', linewidth=2)
            axes[1, 2].plot(dp, 'r-', label='(ε,δ)-DP', linewidth=2)
            axes[1, 2].set_xlabel('Round')
            axes[1, 2].set_ylabel('Privacy Loss')
            axes[1, 2].set_title('RDP vs (ε,δ)-DP')
            axes[1, 2].legend()
            axes[1, 2].grid(True, alpha=0.3)
        
        plt.suptitle('Privacy Analysis Dashboard', fontsize=16)
        plt.tight_layout()
        plt.show()
    
    @staticmethod
    def plot_multi_cloud_performance(cloud_metrics: Dict):
        """Plot multi-cloud federation performance"""
        fig, axes = plt.subplots(2, 3, figsize=(18, 10))
        
        # Per-cloud accuracy
        if 'cloud_accuracies' in cloud_metrics:
            clouds = list(cloud_metrics['cloud_accuracies'].keys())
            accs = list(cloud_metrics['cloud_accuracies'].values())
            axes[0, 0].bar(clouds, accs, color='skyblue', edgecolor='black')
            axes[0, 0].set_xlabel('Cloud Provider')
            axes[0, 0].set_ylabel('Accuracy')
            axes[0, 0].set_title('Per-Cloud Accuracy')
            axes[0, 0].set_ylim([0, 1])
        
        # Communication latency heatmap
        if 'latency_matrix' in cloud_metrics:
            latency = cloud_metrics['latency_matrix']
            sns.heatmap(latency, ax=axes[0, 1], annot=True, fmt='.1f', 
                       cmap='YlOrRd', cbar_kws={'label': 'Latency (ms)'})
            axes[0, 1].set_title('Inter-Cloud Latency')
        
        # Bandwidth utilization
        if 'bandwidth_usage' in cloud_metrics:
            usage = cloud_metrics['bandwidth_usage']
            rounds = range(len(usage))
            axes[0, 2].plot(rounds, usage, 'g-', linewidth=2)
            axes[0, 2].fill_between(rounds, 0, usage, alpha=0.3, color='green')
            axes[0, 2].set_xlabel('Round')
            axes[0, 2].set_ylabel('Bandwidth (Mbps)')
            axes[0, 2].set_title('Bandwidth Utilization')
            axes[0, 2].grid(True, alpha=0.3)
        
        # Data distribution (non-IID)
        if 'data_distribution' in cloud_metrics:
            dist = cloud_metrics['data_distribution']
            clouds = list(dist.keys())
            
            # Stacked bar chart for class distribution
            class_0 = [dist[c]['class_0'] for c in clouds]
            class_1 = [dist[c]['class_1'] for c in clouds]
            
            x = np.arange(len(clouds))
            width = 0.35
            
            axes[1, 0].bar(x, class_0, width, label='Class 0', color='blue')
            axes[1, 0].bar(x, class_1, width, bottom=class_0, label='Class 1', color='red')
            axes[1, 0].set_xlabel('Cloud Provider')
            axes[1, 0].set_ylabel('Number of Samples')
            axes[1, 0].set_title('Data Distribution (Non-IID)')
            axes[1, 0].set_xticks(x)
            axes[1, 0].set_xticklabels(clouds)
            axes[1, 0].legend()
        
        # Convergence speed comparison
        if 'convergence_speeds' in cloud_metrics:
            speeds = cloud_metrics['convergence_speeds']
            for cloud, speed in speeds.items():
                axes[1, 1].plot(speed, label=cloud, linewidth=2)
            axes[1, 1].set_xlabel('Round')
            axes[1, 1].set_ylabel('Loss')
            axes[1, 1].set_title('Convergence Speed Comparison')
            axes[1, 1].legend()
            axes[1, 1].grid(True, alpha=0.3)
        
        # Byzantine detection results
        if 'byzantine_detection' in cloud_metrics:
            detection = cloud_metrics['byzantine_detection']
            tp = detection['true_positives']
            fp = detection['false_positives']
            tn = detection['true_negatives']
            fn = detection['false_negatives']
            
            categories = ['TP', 'FP', 'TN', 'FN']
            values = [tp, fp, tn, fn]
            colors = ['green', 'orange', 'blue', 'red']
            
            axes[1, 2].bar(categories, values, color=colors, edgecolor='black')
            axes[1, 2].set_ylabel('Count')
            axes[1, 2].set_title('Byzantine Detection Performance')
            
            # Add metrics text
            precision = tp / (tp + fp) if (tp + fp) > 0 else 0
            recall = tp / (tp + fn) if (tp + fn) > 0 else 0
            axes[1, 2].text(0.5, 0.95, f'Precision: {precision:.3f}', 
                          transform=axes[1, 2].transAxes)
            axes[1, 2].text(0.5, 0.90, f'Recall: {recall:.3f}', 
                          transform=axes[1, 2].transAxes)
        
        plt.suptitle('Multi-Cloud Federation Performance', fontsize=16)
        plt.tight_layout()
        plt.show()

# ==================== TESTING AND VALIDATION ====================

def run_comprehensive_testing():
    """
    Run comprehensive testing of all components
    """
    print("="*70)
    print("COMPREHENSIVE TESTING SUITE")
    print("="*70)
    
    # Test 1: Game Theory Components
    print("\n[1] Testing Game Theory Components...")
    game = ExtendedGameTheory(n_players=2, action_space_size=3)
    
    # Create sample payoff matrix
    payoff = np.random.randn(2, 3, 3)
    corr_eq = game.compute_correlated_equilibrium(payoff)
    print(f"Correlated Equilibrium computed: {corr_eq.shape}")
    
    # Test 2: Privacy Mechanisms
    print("\n[2] Testing Privacy Mechanisms...")
    privacy = AdvancedPrivacyMechanisms(epsilon=2.3, delta=1e-5)
    rdp = privacy.compute_renyi_differential_privacy(alpha=2, sensitivity=1.0, noise_scale=1.1)
    print(f"RDP Privacy Loss: {rdp:.4f}")
    
    # Test 3: Multi-Cloud Simulation
    print("\n[3] Testing Multi-Cloud Simulation...")
    cloud_configs = {
        'AWS': {'region': 'us-east-1', 'capacity': 1000},
        'Azure': {'region': 'eastus', 'capacity': 800},
        'GCP': {'region': 'us-central1', 'capacity': 900}
    }
    simulator = MultiCloudFederationSimulator(cloud_configs)
    comm_results = simulator.simulate_communication_round([100, 150, 120])
    print(f"Communication Round Time: {comm_results['round_time']:.3f}s")
    
    # Test 4: Statistical Analysis
    print("\n[4] Testing Statistical Analysis...")
    results1 = np.random.randn(10) + 0.5
    results2 = np.random.randn(10)
    runner = ComprehensiveExperimentRunner(None, None)
    sig_test = runner.statistical_significance_test(results1.tolist(), results2.tolist())
    print(f"P-value: {sig_test['p_value_ttest']:.4f}")
    print(f"Significant: {sig_test['significant_at_0.05']}")
    
    print("\n" + "="*70)
    print("ALL TESTS COMPLETED SUCCESSFULLY!")
    print("="*70)

# Run tests if executed directly
if __name__ == "__main__":
    run_comprehensive_testing()
    