In [84]:
import os
import sys
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import logging
from typing import Dict, List, Tuple, Any
from collections import defaultdict
import warnings
import pickle
warnings.filterwarnings('ignore')

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Kiểm tra TensorFlow
print(f"TensorFlow version: {tf.__version__}")
print(f"Eager execution enabled: {tf.executing_eagerly()}")

# Đảm bảo eager execution được bật để debug dễ dàng
tf.config.run_functions_eagerly(True)

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"Found {len(gpus)} GPU(s)")
    for gpu in gpus:
        print(f"  - {gpu}")
    # Đặt memory growth để tránh OOM
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
else:
    print("No GPUs found, using CPU")

print("Setup complete!")

TensorFlow version: 2.19.0
Eager execution enabled: True
No GPUs found, using CPU
Setup complete!


In [85]:
class Config:
    def __init__(self):
        # Paths
        self.BASE_DIR = "/Users/macbook/Desktop/FL-RL-Dos detection/Multi agents RL-FL-Fog/Multi agents RL-FL-Fog"
        self.DATA_DIR = os.path.join(self.BASE_DIR, "data")
        self.DATA_PATH = os.path.join(self.DATA_DIR, "Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv")
        self.CHECKPOINT_DIR = os.path.join(self.BASE_DIR, "checkpoints")
        self.RESULTS_DIR = os.path.join(self.BASE_DIR, "results")
        
        # Metadata
        self.TIMESTAMP = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        self.USER = os.getenv("USER", "unknown")
        
        # Selected Features - Sử dụng tên cột chính xác từ dữ liệu
        self.SELECTED_FEATURES = [
            ' Source Port',
            ' Destination Port',
            ' Protocol',
            ' Flow Duration',
            ' Total Fwd Packets',
            ' Total Backward Packets',
            'Total Length of Fwd Packets',
            ' Total Length of Bwd Packets',
            ' Fwd Packet Length Max',
            ' Fwd Packet Length Min',
            ' Fwd Packet Length Mean',
            ' Fwd Packet Length Std',
            'Bwd Packet Length Max'
        ]
        
        # Tên cột nhãn
        self.LABEL_COLUMN = " Label"
        
        # Network Architecture
        self.NUM_FOG_NODES = 2  # Giảm xuống tránh OOM
        self.NUM_AGENTS_PER_NODE = 2  # Giảm xuống tránh OOM
        self.STATE_DIM = len(self.SELECTED_FEATURES)
        self.ACTION_DIM = 2  # BENIGN và DDoS
        self.HIDDEN_UNITS = [64, 32]  # Giảm kích thước mạng
        
        # Training Parameters
        self.BATCH_SIZE = 32  # Giảm batch size
        self.LEARNING_RATE = 0.001
        self.NUM_EPISODES = 10  # Giảm số episodes
        self.UPDATE_INTERVAL = 5
        self.GAMMA = 0.99
        self.WINDOW_SIZE = 20  # Giảm window size để tránh OOM
        
        # Federated Learning
        self.FL_ROUNDS = 2  # Ít vòng để test nhanh
        self.LOCAL_EPOCHS = 1
        
        # PPO Parameters
        self.PPO_EPOCHS = 2  # Giảm số epochs
        self.PPO_BATCH_SIZE = 32  # Giảm batch size
        self.PPO_CLIP_RATIO = 0.2
        self.PPO_TARGET_KL = 0.01
        
        # Data Distribution
        self.DATA_DISTRIBUTION_TYPE = 'equal'  # Phân phối đồng đều để đơn giản hóa
        self.DIRICHLET_ALPHA = 0.5
        
        # Early Stopping & LR Scheduling
        self.EARLY_STOPPING_PATIENCE = 3
        self.EARLY_STOPPING_MIN_DELTA = 0.001
        self.LR_DECAY_STEPS = 5
        self.LR_DECAY_RATE = 0.9
        self.LR_WARMUP_STEPS = 1
        self.LR_MIN = 1e-6
        self.LR_STRATEGY = 'exponential'
        self.CHECKPOINT_FREQUENCY = 1
        
        # Data Processing Options
        self.USE_DATA_SAMPLING = True  # Lấy mẫu dữ liệu để tránh OOM
        self.MAX_SAMPLES = 10000  # Giới hạn số mẫu để test nhanh
        self.MAX_SAMPLES_PER_NODE = 3000  # Số lượng mẫu tối đa mỗi node
        self.USE_MINI_BATCH = True  # Sử dụng mini-batch training
        
        # Create directories
        self._create_directories()
    
    def _create_directories(self):
        os.makedirs(self.DATA_DIR, exist_ok=True)
        os.makedirs(self.CHECKPOINT_DIR, exist_ok=True)
        os.makedirs(self.RESULTS_DIR, exist_ok=True)
        logger.info(f"Directories created successfully at: {self.BASE_DIR}")
    
    def validate_config(self):
        try:
            # Check if data file exists
            if not os.path.exists(self.DATA_PATH):
                raise FileNotFoundError(f"Data file not found at {self.DATA_PATH}")
                
            # Validate parameters
            if self.BATCH_SIZE <= 0 or self.LEARNING_RATE <= 0 or self.WINDOW_SIZE <= 0:
                raise ValueError("Invalid parameter values")
                
            logger.info(f"Configuration validated successfully. Data at: {self.DATA_PATH}")
            return True
                
        except Exception as e:
            logger.error(f"Configuration validation failed: {str(e)}")
            raise

# Kiểm tra cấu hình
config = Config()
try:
    config.validate_config()
    print("Config validated successfully!")
except Exception as e:
    print(f"Config validation failed: {e}")

2025-05-21 13:31:06,569 - INFO - Directories created successfully at: /Users/macbook/Desktop/FL-RL-Dos detection/Multi agents RL-FL-Fog/Multi agents RL-FL-Fog
2025-05-21 13:31:06,570 - INFO - Configuration validated successfully. Data at: /Users/macbook/Desktop/FL-RL-Dos detection/Multi agents RL-FL-Fog/Multi agents RL-FL-Fog/data/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv


Config validated successfully!


In [86]:
class ActorNetwork(tf.keras.Model):
    def __init__(self, state_dim: int, action_dim: int, hidden_units: List[int]):
        super(ActorNetwork, self).__init__(name='actor_network')
        
        # Lớp LSTM để xử lý dữ liệu chuỗi thời gian
        self.lstm = tf.keras.layers.LSTM(64, return_sequences=False, name='actor_lstm')
        
        self.hidden_layers = []
        for i, units in enumerate(hidden_units):
            self.hidden_layers.append(tf.keras.layers.Dense(
                units, 
                activation='relu',
                kernel_initializer='he_normal',
                name=f'actor_dense_{i}'
            ))
            
        self.output_layer = tf.keras.layers.Dense(
            action_dim,
            activation='softmax',
            kernel_initializer='he_normal',
            name='actor_output'
        )
        
    def call(self, state, training=False):
        # Đảm bảo đầu vào là tensor 3D (batch, time_steps, features)
        # Sử dụng shape.rank thay vì len() để tránh lỗi với tensor symbolics
        if tf.is_tensor(state) and state.shape.rank < 3:
            # Nếu chỉ có 2D, thêm time_steps dimension
            state = tf.expand_dims(state, axis=1)
        elif not tf.is_tensor(state) and len(state.shape) < 3:
            # Nếu là numpy array, kiểm tra shape
            state = np.expand_dims(state, axis=1)
            state = tf.convert_to_tensor(state, dtype=tf.float32)
            
        # Xử lý dữ liệu 3D qua LSTM
        x = self.lstm(state, training=training)
        
        for layer in self.hidden_layers:
            x = layer(x, training=training)
            
        return self.output_layer(x, training=training)
    
    def build_model(self, input_shape):
        """Build model với input shape cụ thể"""
        inputs = tf.keras.Input(shape=input_shape)
        outputs = self.call(inputs)
        return tf.keras.Model(inputs=inputs, outputs=outputs)

class CriticNetwork(tf.keras.Model):
    def __init__(self, state_dim: int, hidden_units: List[int]):
        super(CriticNetwork, self).__init__(name='critic_network')
        
        # Lớp LSTM để xử lý dữ liệu chuỗi thời gian
        self.lstm = tf.keras.layers.LSTM(64, return_sequences=False, name='critic_lstm')
        
        self.hidden_layers = []
        for i, units in enumerate(hidden_units):
            self.hidden_layers.append(tf.keras.layers.Dense(
                units, 
                activation='relu',
                kernel_initializer='he_normal',
                name=f'critic_dense_{i}'
            ))
            
        self.output_layer = tf.keras.layers.Dense(
            1,
            kernel_initializer='he_normal',
            name='critic_output'
        )
        
    def call(self, state, training=False):
        # Đảm bảo đầu vào là tensor 3D (batch, time_steps, features)
        # Sử dụng shape.rank thay vì len() để tránh lỗi với tensor symbolics
        if tf.is_tensor(state) and state.shape.rank < 3:
            # Nếu chỉ có 2D, thêm time_steps dimension
            state = tf.expand_dims(state, axis=1)
        elif not tf.is_tensor(state) and len(state.shape) < 3:
            # Nếu là numpy array, kiểm tra shape
            state = np.expand_dims(state, axis=1)
            state = tf.convert_to_tensor(state, dtype=tf.float32)
            
        # Xử lý dữ liệu 3D qua LSTM
        x = self.lstm(state, training=training)
        
        for layer in self.hidden_layers:
            x = layer(x, training=training)
            
        return self.output_layer(x, training=training)
    
    def build_model(self, input_shape):
        """Build model với input shape cụ thể"""
        inputs = tf.keras.Input(shape=input_shape)
        outputs = self.call(inputs)
        return tf.keras.Model(inputs=inputs, outputs=outputs)

# Kiểm tra các lớp mạng
try:
    # Tạo dữ liệu giả
    batch_size, time_steps, features = 5, config.WINDOW_SIZE, config.STATE_DIM
    dummy_input = tf.random.normal((batch_size, time_steps, features))
    
    # Tạo và test actor network
    actor = ActorNetwork(features, config.ACTION_DIM, config.HIDDEN_UNITS)
    actor_output = actor(dummy_input)
    print(f"Actor output shape: {actor_output.shape}")
    
    # Tạo và test critic network
    critic = CriticNetwork(features, config.HIDDEN_UNITS)
    critic_output = critic(dummy_input)
    print(f"Critic output shape: {critic_output.shape}")
    
    print("Neural networks tested successfully!")
except Exception as e:
    print(f"Neural networks test failed: {e}")
    import traceback
    traceback.print_exc()

Actor output shape: (5, 2)
Critic output shape: (5, 1)
Neural networks tested successfully!


In [87]:
class MemoryBuffer:
    def __init__(self, capacity=10000):
        self.capacity = capacity
        self.states = []
        self.actions = []
        self.rewards = []
        self.next_states = []
        self.size = 0
        
    def add(self, state, action, reward, next_state):
        if self.size < self.capacity:
            self.states.append(state)
            self.actions.append(action)
            self.rewards.append(reward)
            self.next_states.append(next_state)
            self.size += 1
        else:
            # Thay thế random một mẫu nếu đầy
            idx = np.random.randint(0, self.capacity)
            self.states[idx] = state
            self.actions[idx] = action
            self.rewards[idx] = reward
            self.next_states[idx] = next_state
            
    def sample(self, batch_size):
        if self.size < batch_size:
            indices = range(self.size)
        else:
            indices = np.random.choice(self.size, batch_size, replace=False)
            
        return (
            np.array([self.states[i] for i in indices]),
            np.array([self.actions[i] for i in indices]),
            np.array([self.rewards[i] for i in indices]),
            np.array([self.next_states[i] for i in indices])
        )
        
    def clear(self):
        self.states = []
        self.actions = []
        self.rewards = []
        self.next_states = []
        self.size = 0
        
    def get_all(self):
        """Lấy tất cả dữ liệu dưới dạng list tuples"""
        return [(self.states[i], self.actions[i], self.rewards[i], self.next_states[i]) 
                for i in range(self.size)]

class EarlyStopping:
    def __init__(self, patience=5, min_delta=0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.best_value = None
        self.counter = 0
        self.should_stop = False
        
    def __call__(self, metric_value):
        if self.best_value is None:
            self.best_value = metric_value
            return False
            
        if metric_value > self.best_value + self.min_delta:
            self.best_value = metric_value
            self.counter = 0
        else:
            self.counter += 1
            
        if self.counter >= self.patience:
            self.should_stop = True
            
        return self.should_stop

class LearningRateScheduler:
    def __init__(self, initial_lr=0.001, decay_steps=100, decay_rate=0.9, 
               warmup_steps=0, min_lr=1e-6, strategy='exponential'):
        self.initial_lr = initial_lr
        self.decay_steps = decay_steps
        self.decay_rate = decay_rate
        self.warmup_steps = warmup_steps
        self.min_lr = min_lr
        self.strategy = strategy
        self.current_step = 0
        
    def get_lr(self):
        # Warmup phase
        if self.current_step < self.warmup_steps:
            return self.initial_lr * (self.current_step / max(1, self.warmup_steps))
            
        # Decay phase
        steps_after_warmup = self.current_step - self.warmup_steps
        decay_factor = steps_after_warmup / self.decay_steps
        
        if self.strategy == 'exponential':
            lr = self.initial_lr * (self.decay_rate ** decay_factor)
        elif self.strategy == 'linear':
            lr = self.initial_lr * (1.0 - decay_factor * (1.0 - self.decay_rate))
        elif self.strategy == 'cosine':
            decay = 0.5 * (1 + np.cos(np.pi * decay_factor))
            lr = self.initial_lr * decay
        else:
            lr = self.initial_lr
            
        return max(self.min_lr, lr)
        
    def step(self):
        self.current_step += 1
        return self.get_lr()

# Kiểm tra Memory Buffer
try:
    memory = MemoryBuffer(capacity=100)
    
    # Thêm một số mẫu
    for i in range(10):
        state = np.random.rand(20, 13)  # (time_steps, features)
        action = np.random.randint(0, 2)
        reward = np.random.rand()
        next_state = np.random.rand(20, 13)
        memory.add(state, action, reward, next_state)
    
    # Lấy mẫu
    states, actions, rewards, next_states = memory.sample(5)
    print(f"Sampled batch shapes - states: {states.shape}, actions: {actions.shape}")
    
    # Kiểm tra Early Stopping
    early_stopping = EarlyStopping(patience=3, min_delta=0.01)
    for i in range(5):
        value = 0.5 + 0.1 * i if i != 2 else 0.6  # Giá trị tăng trừ bước 2
        should_stop = early_stopping(value)
        print(f"Iter {i}: value={value:.2f}, stop={should_stop}")
    
    # Kiểm tra LR Scheduler
    lr_scheduler = LearningRateScheduler(
        initial_lr=0.001, decay_steps=5, decay_rate=0.9, strategy='exponential'
    )
    for i in range(10):
        lr = lr_scheduler.step()
        print(f"Step {i+1}: LR = {lr:.6f}")
    
    print("Utilities tested successfully!")
except Exception as e:
    print(f"Utilities test failed: {e}")

Sampled batch shapes - states: (5, 20, 13), actions: (5,)
Iter 0: value=0.50, stop=False
Iter 1: value=0.60, stop=False
Iter 2: value=0.60, stop=False
Iter 3: value=0.80, stop=False
Iter 4: value=0.90, stop=False
Step 1: LR = 0.000979
Step 2: LR = 0.000959
Step 3: LR = 0.000939
Step 4: LR = 0.000919
Step 5: LR = 0.000900
Step 6: LR = 0.000881
Step 7: LR = 0.000863
Step 8: LR = 0.000845
Step 9: LR = 0.000827
Step 10: LR = 0.000810
Utilities tested successfully!


In [88]:
class PPOAgent:
    def __init__(self, config, agent_id: int):
        self.config = config
        self.agent_id = agent_id
        
        # PPO parameters
        self.clip_ratio = config.PPO_CLIP_RATIO
        self.target_kl = config.PPO_TARGET_KL
        
        # Initialize networks
        self.actor = ActorNetwork(config.STATE_DIM, config.ACTION_DIM, config.HIDDEN_UNITS)
        self.critic = CriticNetwork(config.STATE_DIM, config.HIDDEN_UNITS)
        self.old_actor = ActorNetwork(config.STATE_DIM, config.ACTION_DIM, config.HIDDEN_UNITS)
        
        # Khởi tạo các mạng với shape đầu vào cụ thể
        input_shape = (config.WINDOW_SIZE, config.STATE_DIM)
        dummy_input = tf.random.normal((1,) + input_shape)
        self.actor(dummy_input)
        self.critic(dummy_input)
        self.old_actor(dummy_input)
        
        # Initialize optimizers
        self.actor_optimizer = tf.keras.optimizers.Adam(config.LEARNING_RATE)
        self.critic_optimizer = tf.keras.optimizers.Adam(config.LEARNING_RATE)
        
        # Experience buffer - sử dụng MemoryBuffer
        self.memory = MemoryBuffer(capacity=10000)
        
        # Copy weights to old actor
        self._update_old_actor()
        
        logger.info(f"Agent {agent_id} initialized")
        
    def _update_old_actor(self):
        for old_var, var in zip(self.old_actor.trainable_variables, self.actor.trainable_variables):
            old_var.assign(var)
    
    def select_action(self, state: np.ndarray) -> Tuple[int, float]:
        try:
            # Đảm bảo state có đúng kích thước cho mô hình LSTM (batch, time_steps, features)
            if len(state.shape) == 2:  # (time_steps, features)
                state_tensor = tf.convert_to_tensor(state[None, :, :], dtype=tf.float32)
            elif len(state.shape) == 1:  # (features,)
                # Thêm cả time_steps và batch dimensions
                state_tensor = tf.convert_to_tensor(state[None, None, :], dtype=tf.float32)
            else:
                state_tensor = tf.convert_to_tensor(state, dtype=tf.float32)
            
            action_probs = self.actor(state_tensor).numpy()[0]
            
            # Đảm bảo action_probs có kích thước phù hợp với ACTION_DIM
            if len(action_probs) != self.config.ACTION_DIM:
                logger.warning(f"Action probs shape {len(action_probs)} ≠ ACTION_DIM {self.config.ACTION_DIM}")
                action_probs = np.ones(self.config.ACTION_DIM) / self.config.ACTION_DIM
                
            # Epsilon-greedy exploration
            if np.random.random() < 0.1:
                action = np.random.choice(self.config.ACTION_DIM)
            else:
                action = np.argmax(action_probs)
                
            return action, action_probs[action]
        except Exception as e:
            logger.error(f"Error in select_action: {e}, state shape: {state.shape}")
            # Trả về hành động mặc định an toàn
            return 0, 1.0
    
    def train_ppo_epoch(self, memory, num_epochs=4, batch_size=64):
        if len(memory) < batch_size:
            return {'actor_loss': 0, 'critic_loss': 0, 'entropy': 0, 'kl': 0}
        
        try:
            # Đảm bảo dữ liệu giữ nguyên định dạng 3D
            states = np.array([exp[0] for exp in memory])  # Shape: (batch, time_steps, features)
            actions = np.array([exp[1] for exp in memory])
            rewards = np.array([exp[2] for exp in memory])
            next_states = np.array([exp[3] for exp in memory])
            
            # Log kích thước để debug
            logger.info(f"States shape: {states.shape}, actions shape: {actions.shape}")
            
            # Đảm bảo dữ liệu states có định dạng 3D
            if len(states.shape) == 2:  # Nếu đã bị flatten
                # Thử khôi phục định dạng 3D
                states = states.reshape(states.shape[0], self.config.WINDOW_SIZE, -1)
                next_states = next_states.reshape(next_states.shape[0], self.config.WINDOW_SIZE, -1)
                logger.info(f"Reshaped states to: {states.shape}")
            
            # Convert to tensors
            states_tensor = tf.convert_to_tensor(states, dtype=tf.float32)
            next_states_tensor = tf.convert_to_tensor(next_states, dtype=tf.float32)
            
            # Calculate values (ensure we get a scalar per state)
            values = self.critic(states_tensor).numpy().flatten()
            next_values = self.critic(next_states_tensor).numpy().flatten()
            
            # Calculate returns and advantages
            advantages = rewards + self.config.GAMMA * next_values - values
            returns = rewards + self.config.GAMMA * next_values
            
            # Get old action probabilities
            old_action_probs = tf.reduce_sum(
                self.actor(states_tensor) * tf.one_hot(actions, self.config.ACTION_DIM),
                axis=1
            ).numpy()
            
            metrics = defaultdict(list)
            
            # Train for multiple epochs
            for _ in range(num_epochs):
                indices = np.random.permutation(len(states))
                
                for start_idx in range(0, len(states), batch_size):
                    idx = indices[start_idx:start_idx + batch_size]
                    if len(idx) < 3:  # Quá ít mẫu, bỏ qua
                        continue
                    
                    batch_metrics = self.update_ppo(
                        tf.convert_to_tensor(states[idx], dtype=tf.float32),
                        tf.convert_to_tensor(actions[idx], dtype=tf.int32),
                        tf.convert_to_tensor(advantages[idx], dtype=tf.float32),
                        tf.convert_to_tensor(returns[idx], dtype=tf.float32),
                        tf.convert_to_tensor(old_action_probs[idx], dtype=tf.float32)
                    )
                    
                    for k, v in batch_metrics.items():
                        metrics[k].append(v)
                    
                    # Early stopping if KL too large
                    if batch_metrics['kl'] > 1.5 * self.target_kl:
                        break
            
            # Update old actor
            self._update_old_actor()
            
            return {k: np.mean(v) for k, v in metrics.items()}
        
        except Exception as e:
            logger.error(f"Error in train_ppo_epoch: {e}")
            import traceback
            traceback.print_exc()
            return {'actor_loss': 0, 'critic_loss': 0, 'entropy': 0, 'kl': 0}
    
    # Update PPO không dùng @tf.function để dễ debug
    def update_ppo(self, states, actions, advantages, returns, old_action_probs):
        with tf.GradientTape() as actor_tape, tf.GradientTape() as critic_tape:
            # Critic loss
            values = self.critic(states)
            values_flat = tf.reshape(values, [-1])  # Flatten để match shape với returns
            critic_loss = tf.reduce_mean(tf.square(returns - values_flat)) * 0.5
            
            # Actor loss (PPO style)
            action_probs = self.actor(states)
            action_logprobs = tf.math.log(tf.reduce_sum(
                action_probs * tf.one_hot(actions, self.config.ACTION_DIM),
                axis=1
            ) + 1e-10)
            old_action_logprobs = tf.math.log(old_action_probs + 1e-10)
            
            # Calculate ratios
            ratio = tf.exp(action_logprobs - old_action_logprobs)
            
            # PPO clipped objective
            clip_adv = tf.clip_by_value(ratio, 1.0 - self.clip_ratio, 1.0 + self.clip_ratio) * advantages
            ppo_loss = -tf.reduce_mean(tf.minimum(ratio * advantages, clip_adv))
            
            # Entropy bonus
            entropy = -tf.reduce_mean(tf.reduce_sum(action_probs * tf.math.log(action_probs + 1e-10), axis=1))
            actor_loss = ppo_loss - 0.01 * entropy
            
        # Compute and apply gradients
        actor_grads = actor_tape.gradient(actor_loss, self.actor.trainable_variables)
        critic_grads = critic_tape.gradient(critic_loss, self.critic.trainable_variables)
        
        self.actor_optimizer.apply_gradients(zip(actor_grads, self.actor.trainable_variables))
        self.critic_optimizer.apply_gradients(zip(critic_grads, self.critic.trainable_variables))
        
        # Calculate KL for early stopping
        old_action_probs_tensor = self.old_actor(states)
        kl = tf.reduce_mean(tf.reduce_sum(
            old_action_probs_tensor * tf.math.log((old_action_probs_tensor + 1e-10) / (action_probs + 1e-10)), 
            axis=1
        ))
        
        # Convert to Python values for non-graph mode
        return {
            'actor_loss': float(actor_loss.numpy()), 
            'critic_loss': float(critic_loss.numpy()), 
            'entropy': float(entropy.numpy()), 
            'kl': float(kl.numpy())
        }
    
    def store_experience(self, state, action, reward, next_state):
        self.memory.add(state, action, reward, next_state)

# Kiểm tra PPOAgent
try:
    # Khởi tạo agent
    ppo_agent = PPOAgent(config, agent_id=0)
    
    # Tạo dữ liệu giả để thử nghiệm
    dummy_state = np.random.rand(config.WINDOW_SIZE, config.STATE_DIM)
    
    # Thử nghiệm chọn hành động
    action, prob = ppo_agent.select_action(dummy_state)
    print(f"Selected action: {action}, probability: {prob:.4f}")
    
    # Thử nghiệm lưu trữ kinh nghiệm
    for i in range(10):
        state = np.random.rand(config.WINDOW_SIZE, config.STATE_DIM) 
        action = np.random.randint(0, 2)
        reward = np.random.rand()
        next_state = np.random.rand(config.WINDOW_SIZE, config.STATE_DIM)
        ppo_agent.store_experience(state, action, reward, next_state)
    
    # Thử nghiệm train PPO
    memory_data = ppo_agent.memory.get_all()
    metrics = ppo_agent.train_ppo_epoch(memory_data, num_epochs=2, batch_size=4)
    print(f"Training metrics: {metrics}")
    
    print("PPO Agent tested successfully!")
except Exception as e:
    print(f"PPO Agent test failed: {e}")
    import traceback
    traceback.print_exc()

2025-05-21 13:31:31,103 - INFO - Agent 0 initialized
2025-05-21 13:31:31,117 - INFO - States shape: (10, 20, 13), actions shape: (10,)


Selected action: 0, probability: 0.5678
Training metrics: {'actor_loss': -0.6556099951267242, 'critic_loss': 0.1594378836452961, 'entropy': 0.6786390841007233, 'kl': 0.0005597588024102151}
PPO Agent tested successfully!


In [89]:
class FogNode:
    def __init__(self, node_id: int, config):
        self.node_id = node_id
        self.config = config
        self.agents = []
        
        # Initialize agents
        self._initialize_agents()
        
        # Performance metrics
        self.metrics = {
            'detection_latency': [],
            'processing_time': [],
            'resource_usage': {'cpu': [], 'memory': []}
        }
        
        logger.info(f"Fog Node {node_id} initialized with {len(self.agents)} agents")
        
    def _initialize_agents(self):
        for i in range(self.config.NUM_AGENTS_PER_NODE):
            agent_id = self.node_id * self.config.NUM_AGENTS_PER_NODE + i
            agent = PPOAgent(self.config, agent_id)
            self.agents.append(agent)
            
    def process_data(self, data_batch: np.ndarray) -> List[int]:
        start_time = datetime.now()
        
        predictions = []
        for sample in data_batch:
            # Get predictions from all agents
            agent_predictions = []
            for agent in self.agents:
                action, _ = agent.select_action(sample)
                agent_predictions.append(action)
                
            # Majority voting
            final_prediction = np.bincount(agent_predictions).argmax()
            predictions.append(final_prediction)
            
        # Update metrics
        processing_time = (datetime.now() - start_time).total_seconds()
        self.metrics['processing_time'].append(processing_time)
        self.metrics['detection_latency'].append(processing_time)
        
        return predictions
        
    def train_local(self, data: np.ndarray, labels: np.ndarray) -> Dict:
        """Huấn luyện agents cục bộ với PPO"""
        try:
            metrics = defaultdict(list)
            
            # Kiểm tra kích thước dữ liệu
            logger.info(f"Training local with data shape: {data.shape}, labels shape: {labels.shape}")
            
            if len(data.shape) != 3:
                logger.error(f"Expected 3D data (samples, time_steps, features), got {data.shape}")
                return {"error": 1.0}
            
            for agent in self.agents:
                experiences = []
                
                # Giới hạn số lượng mẫu để tránh OOM
                max_samples = min(self.config.MAX_SAMPLES_PER_NODE, len(data))
                sample_indices = np.random.choice(len(data), max_samples, replace=False)
                
                # Thu thập kinh nghiệm
                for i in sample_indices:
                    try:
                        state = data[i]
                        label = labels[i]
                        
                        action, _ = agent.select_action(state)
                        reward = 1.0 if action == label else -1.0
                        experiences.append((state, action, reward, state))
                    except Exception as e:
                        logger.error(f"Error collecting experience: {e}")
                
                # Huấn luyện với PPO
                if experiences:
                    try:
                        logger.info(f"Agent {agent.agent_id} training with {len(experiences)} experiences")
                        ppo_metrics = agent.train_ppo_epoch(
                            experiences, 
                            num_epochs=self.config.PPO_EPOCHS,
                            batch_size=min(self.config.PPO_BATCH_SIZE, len(experiences))
                        )
                        
                        for k, v in ppo_metrics.items():
                            metrics[k].append(v)
                    except Exception as e:
                        logger.error(f"Error in PPO training: {e}")
            
            return {k: np.mean(v) for k, v in metrics.items() if v}
        except Exception as e:
            logger.error(f"Error in train_local: {e}")
            import traceback
            traceback.print_exc()
            return {"error": 1.0}

# Kiểm tra Fog Node
try:
    # Khởi tạo fog node
    fog_node = FogNode(node_id=0, config=config)
    print(f"Fog node has {len(fog_node.agents)} agents")
    
    # Tạo dữ liệu giả
    batch_size, time_steps, features = 5, config.WINDOW_SIZE, config.STATE_DIM
    dummy_data = np.random.rand(batch_size, time_steps, features)
    
    # Thử nghiệm xử lý dữ liệu
    predictions = fog_node.process_data(dummy_data)
    print(f"Processed batch of {len(dummy_data)} samples with predictions: {predictions}")
    
    # Tạo dữ liệu giả lớn hơn cho huấn luyện
    data = np.random.rand(100, time_steps, features)
    labels = np.random.randint(0, 2, size=100)
    
    # Thử nghiệm huấn luyện cục bộ
    metrics = fog_node.train_local(data, labels)
    print(f"Local training metrics: {metrics}")
    
    print("Fog Node tested successfully!")
except Exception as e:
    print(f"Fog Node test failed: {e}")
    import traceback
    traceback.print_exc()

2025-05-21 13:31:36,339 - INFO - Agent 0 initialized
2025-05-21 13:31:36,502 - INFO - Agent 1 initialized
2025-05-21 13:31:36,502 - INFO - Fog Node 0 initialized with 2 agents
2025-05-21 13:31:36,639 - INFO - Training local with data shape: (100, 20, 13), labels shape: (100,)


Fog node has 2 agents
Processed batch of 5 samples with predictions: [1, 0, 0, 0, 0]


2025-05-21 13:31:38,259 - INFO - Agent 0 training with 100 experiences
2025-05-21 13:31:38,260 - INFO - States shape: (100, 20, 13), actions shape: (100,)
2025-05-21 13:31:39,978 - INFO - Agent 1 training with 100 experiences
2025-05-21 13:31:39,979 - INFO - States shape: (100, 20, 13), actions shape: (100,)


Local training metrics: {'actor_loss': -0.08138644276186824, 'critic_loss': 0.5040508471429348, 'entropy': 0.6749391779303551, 'kl': 0.016190933296456933}
Fog Node tested successfully!


In [90]:
class FederatedLearning:
    def __init__(self, config):
        self.config = config
        self.fog_nodes = []
        self.global_weights = None
        self.metrics_history = defaultdict(list)
        
        # Initialize fog nodes
        self._initialize_fog_nodes()
        
    def _initialize_fog_nodes(self):
        for i in range(self.config.NUM_FOG_NODES):
            fog_node = FogNode(i, self.config)
            self.fog_nodes.append(fog_node)
        logger.info(f"Initialized {len(self.fog_nodes)} fog nodes")
            
    def distribute_data(self, X: np.ndarray, y: np.ndarray, 
                       distribution_type: str = None, 
                       alpha: float = None) -> List[Tuple[np.ndarray, np.ndarray]]:
        try:
            n_nodes = self.config.NUM_FOG_NODES
            n_samples = len(X)
            distributed_data = []
            
            # Default to config values if not specified
            if distribution_type is None:
                distribution_type = self.config.DATA_DISTRIBUTION_TYPE
            if alpha is None:
                alpha = self.config.DIRICHLET_ALPHA
            
            if distribution_type == 'equal':
                # Equal distribution
                data_per_node = n_samples // n_nodes
                for i in range(n_nodes):
                    start_idx = i * data_per_node
                    end_idx = start_idx + data_per_node if i < n_nodes - 1 else n_samples
                    distributed_data.append((X[start_idx:end_idx], y[start_idx:end_idx]))
                    
            elif distribution_type == 'dirichlet':
                # Dirichlet distribution
                proportions = np.random.dirichlet(np.repeat(alpha, n_nodes))
                samples_per_node = np.round(proportions * n_samples).astype(int)
                samples_per_node[-1] = n_samples - np.sum(samples_per_node[:-1])
                
                indices = np.random.permutation(n_samples)
                start_idx = 0
                
                for n_samples_node in samples_per_node:
                    node_indices = indices[start_idx:start_idx + n_samples_node]
                    distributed_data.append((X[node_indices], y[node_indices]))
                    start_idx += n_samples_node
                
                logger.info(f"Dirichlet distribution (α={alpha}): {samples_per_node}")
                
            elif distribution_type == 'label_based':
                # Label-based distribution
                unique_labels = np.unique(y)
                n_labels = len(unique_labels)
                labels_per_node = max(1, n_labels // n_nodes)
                
                for i in range(n_nodes):
                    start_label_idx = (i * labels_per_node) % n_labels
                    node_labels = unique_labels[start_label_idx:start_label_idx + labels_per_node]
                    
                    node_indices = np.isin(y, node_labels)
                    node_X, node_y = X[node_indices], y[node_indices]
                    
                    # Ensure minimum data per node
                    if len(node_X) < n_samples // (n_nodes * 2):
                        additional_indices = np.random.choice(
                            np.where(~node_indices)[0],
                            size=min(n_samples // (n_nodes * 2), np.sum(~node_indices)),
                            replace=False
                        )
                        node_X = np.vstack([node_X, X[additional_indices]])
                        node_y = np.append(node_y, y[additional_indices])
                    
                    distributed_data.append((node_X, node_y))
                    
            elif distribution_type == 'weighted':
                # Weighted distribution
                weights = np.array([2**(i/(n_nodes-1)) for i in range(n_nodes)])
                weights /= np.sum(weights)
                
                samples_per_node = np.round(weights * n_samples).astype(int)
                samples_per_node[-1] = n_samples - np.sum(samples_per_node[:-1])
                
                indices = np.random.permutation(n_samples)
                start_idx = 0
                
                for n_samples_node in samples_per_node:
                    node_indices = indices[start_idx:start_idx + n_samples_node]
                    distributed_data.append((X[node_indices], y[node_indices]))
                    start_idx += n_samples_node
                
                logger.info(f"Weighted distribution: {samples_per_node}")
                
            else:
                raise ValueError(f"Unknown distribution type: {distribution_type}")
                
            return distributed_data
            
        except Exception as e:
            logger.error(f"Error distributing data: {str(e)}")
            raise
            
    def train_round(self, distributed_data: List[Tuple[np.ndarray, np.ndarray]]) -> Dict:
        try:
            round_metrics = defaultdict(list)
            
            # Local training on each fog node
            for fog_node, (node_X, node_y) in zip(self.fog_nodes, distributed_data):
                local_metrics = fog_node.train_local(node_X, node_y)
                
                for k, v in local_metrics.items():
                    round_metrics[k].append(v)
                    
            # Aggregate models
            self._aggregate_models()
            
            # Update fog nodes with global model
            self._update_fog_nodes()
            
            # Calculate average metrics
            avg_metrics = {k: np.mean(v) for k, v in round_metrics.items()}
            
            # Store metrics history
            for k, v in avg_metrics.items():
                self.metrics_history[k].append(v)
                
            return avg_metrics
            
        except Exception as e:
            logger.error(f"Error in training round: {str(e)}")
            raise
            
    def _aggregate_models(self):
        try:
            if not self.fog_nodes:
                raise ValueError("No fog nodes available")
                
            aggregated_weights = {}
            
            # Get first agent's structure
            first_agent = self.fog_nodes[0].agents[0]
            
            # Initialize aggregated weights with zeros
            for var in first_agent.actor.trainable_variables:
                aggregated_weights[f'actor_{var.name}'] = tf.zeros_like(var)
            for var in first_agent.critic.trainable_variables:
                aggregated_weights[f'critic_{var.name}'] = tf.zeros_like(var)
                
            # Sum weights from all agents
            num_agents = self.config.NUM_FOG_NODES * self.config.NUM_AGENTS_PER_NODE
            for fog_node in self.fog_nodes:
                for agent in fog_node.agents:
                    for var in agent.actor.trainable_variables:
                        aggregated_weights[f'actor_{var.name}'] += var / num_agents
                    for var in agent.critic.trainable_variables:
                        aggregated_weights[f'critic_{var.name}'] += var / num_agents
                        
            self.global_weights = aggregated_weights
            logger.info("Models aggregated successfully")
            
        except Exception as e:
            logger.error(f"Error aggregating models: {str(e)}")
            raise
            
    def _update_fog_nodes(self):
        try:
            if self.global_weights is None:
                raise ValueError("Global weights not available")
                
            for fog_node in self.fog_nodes:
                for agent in fog_node.agents:
                    # Update actor
                    for var in agent.actor.trainable_variables:
                        var.assign(self.global_weights[f'actor_{var.name}'])
                    # Update critic
                    for var in agent.critic.trainable_variables:
                        var.assign(self.global_weights[f'critic_{var.name}'])
                    # Update old actor (for PPO)
                    agent._update_old_actor()
                        
            logger.info("Fog nodes updated with global weights")
            
        except Exception as e:
            logger.error(f"Error updating fog nodes: {str(e)}")
            raise

# Kiểm tra Federated Learning
try:
    # Khởi tạo federated learning
    fl_system = FederatedLearning(config)
    print(f"Federated learning system initialized with {len(fl_system.fog_nodes)} fog nodes")
    
    # Tạo dữ liệu giả
    batch_size, time_steps, features = 100, config.WINDOW_SIZE, config.STATE_DIM
    X = np.random.rand(batch_size, time_steps, features)
    y = np.random.randint(0, 2, size=batch_size)
    
    # Phân phối dữ liệu
    distributed_data = fl_system.distribute_data(X, y)
    print(f"Data distributed to {len(distributed_data)} nodes")
    for i, (node_X, node_y) in enumerate(distributed_data):
        print(f"Node {i}: X shape {node_X.shape}, y shape {node_y.shape}")
    
    # Kiểm tra huấn luyện một vòng nếu cần thiết
    # Vì huấn luyện khá nặng nên chỉ bình luận ở đây
    # metrics = fl_system.train_round(distributed_data)
    # print(f"Training round metrics: {metrics}")
    
    print("Federated Learning tested successfully!")
except Exception as e:
    print(f"Federated Learning test failed: {e}")
    import traceback
    traceback.print_exc()

2025-05-21 13:31:42,428 - INFO - Agent 0 initialized
2025-05-21 13:31:42,588 - INFO - Agent 1 initialized
2025-05-21 13:31:42,588 - INFO - Fog Node 0 initialized with 2 agents
2025-05-21 13:31:42,754 - INFO - Agent 2 initialized
2025-05-21 13:31:42,921 - INFO - Agent 3 initialized
2025-05-21 13:31:42,922 - INFO - Fog Node 1 initialized with 2 agents
2025-05-21 13:31:42,922 - INFO - Initialized 2 fog nodes


Federated learning system initialized with 2 fog nodes
Data distributed to 2 nodes
Node 0: X shape (50, 20, 13), y shape (50,)
Node 1: X shape (50, 20, 13), y shape (50,)
Federated Learning tested successfully!


In [91]:
class DataPreprocessor:
    def __init__(self, config):
        self.config = config
        self.scaler = MinMaxScaler()
        self.label_encoder = LabelEncoder()
        
    def preprocess_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        try:
            # Load data
            logger.info("Loading dataset...")
            df = pd.read_csv(self.config.DATA_PATH)
            
            # Log dataset info
            logger.info(f"Dataset loaded: {df.shape[0]} samples, {df.shape[1]} features")
            
            # Sampling data nếu cần
            if self.config.USE_DATA_SAMPLING:
                sample_size = min(self.config.MAX_SAMPLES, len(df))  # Giới hạn kích thước
                df = df.sample(sample_size, random_state=42)
                logger.info(f"Sampled dataset: {df.shape[0]} samples")
            
            # Handle missing values in features
            logger.info("Handling missing values...")
            features = df[self.config.SELECTED_FEATURES].copy()
            features.fillna(0, inplace=True)
            
            # Normalize features
            logger.info("Normalizing features...")
            normalized_features = self.scaler.fit_transform(features)
            
            # Process labels - Encode the label column
            logger.info("Processing labels...")
            labels = self.label_encoder.fit_transform(df[self.config.LABEL_COLUMN])
            
            # Log label encoding
            label_mapping = dict(zip(self.label_encoder.classes_, 
                                     self.label_encoder.transform(self.label_encoder.classes_)))
            logger.info(f"Label mapping: {label_mapping}")
            
            # Create windows
            logger.info("Creating time windows...")
            X = self._create_windows(normalized_features)
            y = labels[self.config.WINDOW_SIZE-1:]
            
            # Log shape
            logger.info(f"Windows shape: {X.shape}, labels shape: {y.shape}")
            
            # Split data
            logger.info("Splitting data...")
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, random_state=42, stratify=y
            )
            
            logger.info(f"Data preprocessing completed: X_train shape {X_train.shape}, y_train shape {y_train.shape}")
            logger.info(f"Class distribution in training set: {np.bincount(y_train)}")
            return X_train, X_test, y_train, y_test
            
        except Exception as e:
            logger.error(f"Error in preprocessing: {str(e)}")
            raise
            
    def _create_windows(self, data: np.ndarray) -> np.ndarray:
        """Create time windows from data"""
        windows = []
        for i in range(len(data) - self.config.WINDOW_SIZE + 1):
            window = data[i:i + self.config.WINDOW_SIZE]
            windows.append(window)
        return np.array(windows)

# Kiểm tra Data Preprocessor - may mắn thì chạy nếu có đủ dữ liệu
# Nếu không, chỉ cần kiểm tra class đã được định nghĩa
try:
    if os.path.exists(config.DATA_PATH):
        preprocessor = DataPreprocessor(config)
        X_train, X_test, y_train, y_test = preprocessor.preprocess_data()
        print(f"Data preprocessed: X_train shape {X_train.shape}, y_train shape {y_train.shape}")
    else:
        print(f"Data file not found at {config.DATA_PATH}, skipping preprocessing test")
        print("DataPreprocessor class defined successfully")
except Exception as e:
    print(f"DataPreprocessor test failed: {e}")
    import traceback
    traceback.print_exc()

2025-05-21 13:31:46,431 - INFO - Loading dataset...
2025-05-21 13:31:47,331 - INFO - Dataset loaded: 225745 samples, 85 features
2025-05-21 13:31:47,361 - INFO - Sampled dataset: 10000 samples
2025-05-21 13:31:47,361 - INFO - Handling missing values...
2025-05-21 13:31:47,369 - INFO - Normalizing features...
2025-05-21 13:31:47,375 - INFO - Processing labels...
2025-05-21 13:31:47,377 - INFO - Label mapping: {'BENIGN': 0, 'DDoS': 1}
2025-05-21 13:31:47,378 - INFO - Creating time windows...
2025-05-21 13:31:47,385 - INFO - Windows shape: (9981, 20, 13), labels shape: (9981,)
2025-05-21 13:31:47,386 - INFO - Splitting data...
2025-05-21 13:31:47,395 - INFO - Data preprocessing completed: X_train shape (7984, 20, 13), y_train shape (7984,)
2025-05-21 13:31:47,396 - INFO - Class distribution in training set: [3388 4596]


Data preprocessed: X_train shape (7984, 20, 13), y_train shape (7984,)


In [92]:
class TrainingPipeline:
    def __init__(self, config, fl_system, X_train, y_train, X_test, y_test):
        self.config = config
        self.fl_system = fl_system
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.metrics_history = {}
        
        logger.info("Training pipeline initialized")
        
    def train(self):
        try:
            # Initialize early stopping and LR scheduler
            early_stopping = EarlyStopping(
                patience=self.config.EARLY_STOPPING_PATIENCE,
                min_delta=self.config.EARLY_STOPPING_MIN_DELTA
            )
            
            lr_scheduler = LearningRateScheduler(
                initial_lr=self.config.LEARNING_RATE,
                decay_steps=self.config.LR_DECAY_STEPS,
                decay_rate=self.config.LR_DECAY_RATE,
                warmup_steps=self.config.LR_WARMUP_STEPS,
                min_lr=self.config.LR_MIN,
                strategy=self.config.LR_STRATEGY
            )
            
            # Distribute data with specified distribution
            distributed_data = self.fl_system.distribute_data(
                self.X_train, self.y_train,
                distribution_type=self.config.DATA_DISTRIBUTION_TYPE,
                alpha=self.config.DIRICHLET_ALPHA
            )
            
            # Training rounds
            start_time = datetime.now()
            best_metrics = None
            
            for round in range(self.config.FL_ROUNDS):
                logger.info(f"Starting round {round + 1}/{self.config.FL_ROUNDS}")
                
                # Update learning rate
                current_lr = lr_scheduler.step()
                logger.info(f"Current learning rate: {current_lr:.6f}")
                
                # Update learning rate for all agents
                for fog_node in self.fl_system.fog_nodes:
                    for agent in fog_node.agents:
                        agent.actor_optimizer.learning_rate.assign(current_lr)
                        agent.critic_optimizer.learning_rate.assign(current_lr)
                
                # Execute training round
                round_metrics = self.fl_system.train_round(distributed_data)
                
                # Evaluate
                test_metrics = self.evaluate()
                
                # Log metrics
                self._log_metrics(round, round_metrics, test_metrics)
                
                # Save checkpoints
                if (round + 1) % self.config.CHECKPOINT_FREQUENCY == 0:
                    self._save_checkpoint(round)
                    
                # Save best model
                if best_metrics is None or test_metrics['f1'] > best_metrics['f1']:
                    best_metrics = test_metrics
                    self._save_best_model()
                    logger.info(f"New best model saved with F1: {test_metrics['f1']:.4f}")
                
                # Check for early stopping
                if early_stopping(test_metrics['f1']):
                    logger.info(f"Early stopping triggered after {round + 1} rounds")
                    break
                    
            # Final evaluation
            if early_stopping.should_stop:
                self._load_best_model()
                logger.info("Loaded best model for final evaluation")
                
            total_time = (datetime.now() - start_time).total_seconds() / 60
            logger.info(f"Training completed in {total_time:.2f} minutes")
            
            final_metrics = self.evaluate()
            self._save_results(final_metrics)
            
            return final_metrics
            
        except Exception as e:
            logger.error(f"Error in training pipeline: {str(e)}")
            import traceback
            traceback.print_exc()
            raise
            
    def evaluate(self) -> Dict:
        try:
            predictions = []
            
            # Get predictions from all fog nodes
            for i in range(0, len(self.X_test), self.config.BATCH_SIZE):
                batch_X = self.X_test[i:i + self.config.BATCH_SIZE]
                batch_predictions = []
                
                # Get predictions from each fog node
                for fog_node in self.fl_system.fog_nodes:
                    node_predictions = fog_node.process_data(batch_X)
                    batch_predictions.append(node_predictions)
                    
                # Aggregate predictions using majority voting
                batch_predictions = np.array(batch_predictions)
                final_predictions = np.array([np.bincount(batch_predictions[:, j]).argmax() 
                                            for j in range(batch_predictions.shape[1])])
                predictions.extend(final_predictions)
                
            # Calculate metrics
            predictions = np.array(predictions)
            y_true = self.y_test[:len(predictions)]
            
            metrics = {
                'accuracy': accuracy_score(y_true, predictions),
                'precision': precision_score(y_true, predictions),
                'recall': recall_score(y_true, predictions),
                'f1': f1_score(y_true, predictions),
                'predictions': predictions  # Store for confusion matrix
            }
            
            return metrics
            
        except Exception as e:
            logger.error(f"Error in evaluation: {str(e)}")
            raise
            
    def _log_metrics(self, round: int, train_metrics: Dict, test_metrics: Dict):
        logger.info(f"\nRound {round + 1} Metrics:")
        logger.info("Training:")
        for k, v in train_metrics.items():
            logger.info(f"{k}: {v:.4f}")
            if 'train_' + k not in self.metrics_history:
                self.metrics_history['train_' + k] = []
            self.metrics_history['train_' + k].append(v)
            
        logger.info("\nTesting:")
        for k, v in test_metrics.items():
            if k != 'predictions':
                logger.info(f"{k}: {v:.4f}")
                if 'test_' + k not in self.metrics_history:
                    self.metrics_history['test_' + k] = []
                self.metrics_history['test_' + k].append(v)
            
    def _save_checkpoint(self, round: int):
        try:
            checkpoint_path = os.path.join(
                self.config.CHECKPOINT_DIR,
                f'checkpoint_round_{round + 1}'
            )
            os.makedirs(checkpoint_path, exist_ok=True)
            
            # Save global weights
            with open(os.path.join(checkpoint_path, 'global_weights.pkl'), 'wb') as f:
                pickle.dump(self.fl_system.global_weights, f)
            
            logger.info(f"Checkpoint saved for round {round + 1}")
            
        except Exception as e:
            logger.error(f"Error saving checkpoint: {str(e)}")
            
    def _save_best_model(self):
        try:
            best_model_path = os.path.join(
                self.config.CHECKPOINT_DIR,
                'best_model'
            )
            os.makedirs(best_model_path, exist_ok=True)
            
            # Save global weights
            with open(os.path.join(best_model_path, 'global_weights.pkl'), 'wb') as f:
                pickle.dump(self.fl_system.global_weights, f)
            
            logger.info("Best model saved successfully")
            
        except Exception as e:
            logger.error(f"Error saving best model: {str(e)}")
            
    def _load_best_model(self):
        try:
            best_model_path = os.path.join(
                self.config.CHECKPOINT_DIR,
                'best_model',
                'global_weights.pkl'
            )
            
            if os.path.exists(best_model_path):
                # Load global weights
                with open(best_model_path, 'rb') as f:
                    self.fl_system.global_weights = pickle.load(f)
                
                self.fl_system._update_fog_nodes()
                
                logger.info("Best model loaded successfully")
            else:
                logger.warning(f"Best model not found at {best_model_path}")
            
        except Exception as e:
            logger.error(f"Error loading best model: {str(e)}")
            
    def _save_results(self, final_metrics: Dict):
        try:
            results_path = os.path.join(
                self.config.RESULTS_DIR,
                'final_results.json'
            )
            
            # Loại bỏ predictions từ final_metrics để có thể lưu dưới dạng JSON
            metrics_to_save = {k: float(v) for k, v in final_metrics.items() if k != 'predictions'}
            
            results = {
                'timestamp': self.config.TIMESTAMP,
                'user': self.config.USER,
                'final_metrics': metrics_to_save,
                'training_history': {k: [float(x) for x in v] for k, v in self.metrics_history.items()}
            }
            
            with open(results_path, 'w') as f:
                json.dump(results, f, indent=4)
                
            logger.info("Final results saved successfully")
            
        except Exception as e:
            logger.error(f"Error saving results: {str(e)}")

class ResultsAnalyzer:
    def __init__(self, config):
        self.config = config
        
    def plot_training_history(self, metrics_history):
        try:
            plt.figure(figsize=(18, 10))
            
            # Plot losses
            plt.subplot(2, 3, 1)
            plt.plot(metrics_history.get('train_actor_loss', []), label='Actor Loss')
            plt.plot(metrics_history.get('train_critic_loss', []), label='Critic Loss')
            plt.title('Training Losses')
            plt.xlabel('Round')
            plt.ylabel('Loss')
            plt.legend()
            
            # Plot accuracy
            plt.subplot(2, 3, 2)
            plt.plot(metrics_history.get('test_accuracy', []), label='Accuracy')
            plt.title('Model Accuracy')
            plt.xlabel('Round')
            plt.ylabel('Accuracy')
            plt.grid(True)
            
            # Plot F1 Score
            plt.subplot(2, 3, 3)
            plt.plot(metrics_history.get('test_f1', []), label='F1 Score')
            plt.title('F1 Score')
            plt.xlabel('Round')
            plt.ylabel('F1 Score')
            plt.grid(True)
            
            # Plot KL divergence
            plt.subplot(2, 3, 4)
            plt.plot(metrics_history.get('train_kl', []), label='KL Divergence')
            plt.title('KL Divergence')
            plt.xlabel('Round')
            plt.ylabel('KL')
            plt.yscale('log')
            plt.grid(True)
            
            # Plot Precision/Recall
            plt.subplot(2, 3, 5)
            plt.plot(metrics_history.get('test_precision', []), label='Precision')
            plt.plot(metrics_history.get('test_recall', []), label='Recall')
            plt.title('Precision & Recall')
            plt.xlabel('Round')
            plt.ylabel('Score')
            plt.legend()
            plt.grid(True)
            
            # Plot entropy
            plt.subplot(2, 3, 6)
            plt.plot(metrics_history.get('train_entropy', []), label='Entropy')
            plt.title('Policy Entropy')
            plt.xlabel('Round')
            plt.ylabel('Entropy')
            plt.grid(True)
            
            plt.tight_layout()
            
            output_path = os.path.join(self.config.RESULTS_DIR, 'training_history.png')
            plt.savefig(output_path)
            plt.close()
            
            logger.info(f"Training history plot saved to {output_path}")
            
        except Exception as e:
            logger.error(f"Error plotting training history: {e}")
        
    def plot_confusion_matrix(self, y_true, y_pred):
        try:
            cm = confusion_matrix(y_true[:len(y_pred)], y_pred)
            plt.figure(figsize=(8, 6))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
            plt.title('Confusion Matrix')
            plt.ylabel('True Label')
            plt.xlabel('Predicted Label')
            
            output_path = os.path.join(self.config.RESULTS_DIR, 'confusion_matrix.png')
            plt.savefig(output_path)
            plt.close()
            
            logger.info(f"Confusion matrix plot saved to {output_path}")
            
        except Exception as e:
            logger.error(f"Error plotting confusion matrix: {e}")
        
    def analyze_detection_performance(self, y_true, y_pred):
        try:
            metrics = {
                'Accuracy': accuracy_score(y_true[:len(y_pred)], y_pred),
                'Precision': precision_score(y_true[:len(y_pred)], y_pred),
                'Recall': recall_score(y_true[:len(y_pred)], y_pred),
                'F1 Score': f1_score(y_true[:len(y_pred)], y_pred)
            }
            
            print("\nDetection Performance Analysis:")
            for metric, value in metrics.items():
                print(f"{metric}: {value:.4f}")
                
            return metrics
            
        except Exception as e:
            logger.error(f"Error analyzing detection performance: {e}")
            return {}

# Kiểm tra Pipeline và Analyzer
try:
    # Tạo dữ liệu giả để kiểm tra
    analyzer = ResultsAnalyzer(config)
    
    # Tạo metrics history giả
    fake_metrics = {
        'train_actor_loss': [0.5, 0.4, 0.3],
        'train_critic_loss': [0.4, 0.3, 0.2],
        'train_entropy': [0.1, 0.08, 0.06],
        'train_kl': [0.02, 0.015, 0.01],
        'test_accuracy': [0.7, 0.75, 0.8],
        'test_precision': [0.65, 0.7, 0.75],
        'test_recall': [0.7, 0.75, 0.8],
        'test_f1': [0.675, 0.725, 0.775]
    }
    
    # Thử plot training history
    analyzer.plot_training_history(fake_metrics)
    
    # Tạo dữ liệu giả cho confusion matrix
    y_true = np.array([0, 0, 1, 1, 0, 1, 0, 1])
    y_pred = np.array([0, 1, 1, 1, 0, 0, 0, 1])
    
    # Thử plot confusion matrix
    analyzer.plot_confusion_matrix(y_true, y_pred)
    
    # Thử analyze detection performance
    results = analyzer.analyze_detection_performance(y_true, y_pred)
    
    print("Pipeline & Analyzer test passed!")
except Exception as e:
    print(f"Pipeline & Analyzer test failed: {e}")
    import traceback
    traceback.print_exc()

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x12f36e5e0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "/Users/macbook/venv/lib/python3.9/site-packages/keras/src/backend/tensorflow/rnn.py", line 419, in <genexpr>
    ta.write(ta_index_to_write, out)  File "/Users/macbook/venv/lib/python3.9/site-packages/tensorflow/python/util/tf_should_use.py", line 288, in wrapped


Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x12f36e5e0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "/Users/macbook/venv/lib/python3.9/site-packages/keras/src/backend/tensorflow/rnn.py", line 419, in <genexpr>
    ta.write(ta_index_to_write, out)  File "/Users/macbook/venv/lib/python3.9/site-packages/tensorflow/python/util/tf_should_use.py", line 288, in wrapped


Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x313deb7c0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "/Users/macbook/venv/lib/python3.9/site-packages/keras/src/backend/tensorflow/rnn.py", line 419, in <genexpr>
    ta.write(ta_index_to_write, out)  File "/Users/macbook/venv/lib/python3.9/site-packages/tensorflow/python/util/tf_should_use.py", line 288, in wrapped


Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x313deb7c0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "/Users/macbook/venv/lib/python3.9/site-packages/keras/src/backend/tensorflow/rnn.py", line 419, in <genexpr>
    ta.write(ta_index_to_write, out)  File "/Users/macbook/venv/lib/python3.9/site-packages/tensorflow/python/util/tf_should_use.py", line 288, in wrapped
2025-05-21 13:31:50,432 - INFO - Training history plot saved to /Users/macbook/Desktop/FL-RL-Dos detection/Multi agents RL-FL-Fog/Multi agents RL-FL-Fog/results/training_history.png
2025-05-21 13:31:50,494 - INFO - Confusion matrix plot saved to /Users/macbook/Desktop/FL-RL-Dos detection/Multi agents RL-FL-Fog/Multi agents RL-FL-Fog/results/confusion_matrix.png



Detection Performance Analysis:
Accuracy: 0.7500
Precision: 0.7500
Recall: 0.7500
F1 Score: 0.7500
Pipeline & Analyzer test passed!


In [95]:
# Bạn có thể bỏ comment phần này để chạy toàn bộ quy trình nếu mọi thứ đã sẵn sàng
# Đảm bảo rằng bạn đã chạy tất cả các cell trước đó

def run_full_process():
    try:
        print("Starting DDoS Detection with Federated Learning and PPO...")
        
        # Step 1: Validate config
        # config đã được khởi tạo ở cell 2
        print(f"Data path: {config.DATA_PATH}")
        print(f"Selected features: {len(config.SELECTED_FEATURES)} features")
        print(f"Label column: {config.LABEL_COLUMN}")
        
        # Step 2: Preprocess data
        preprocessor = DataPreprocessor(config)
        X_train, X_test, y_train, y_test = preprocessor.preprocess_data()
        
        print(f"Training data shape: {X_train.shape}")
        print(f"Testing data shape: {X_test.shape}")
        print(f"Class distribution in training: {np.bincount(y_train)}")
        print(f"Class distribution in testing: {np.bincount(y_test)}")
        
        # Step 3: Initialize federated learning system
        fl_system = FederatedLearning(config)
        
        # Step 4: Initialize training pipeline
        pipeline = TrainingPipeline(config, fl_system, X_train, y_train, X_test, y_test)
        
        # Step 5: Start training
        print("Starting federated training...")
        final_metrics = pipeline.train()
        
        # Step 6: Analyze results
        analyzer = ResultsAnalyzer(config)
        analyzer.plot_training_history(pipeline.metrics_history)
        analyzer.plot_confusion_matrix(y_test, final_metrics['predictions'])
        analyzer.analyze_detection_performance(y_test, final_metrics['predictions'])
        
        print("Training completed successfully!")
        print("\nFinal Metrics:")
        for k, v in final_metrics.items():
            if k != 'predictions':
                print(f"{k}: {v:.4f}")
                
        return final_metrics
        
    except Exception as e:
        print(f"Error during execution: {e}")
        import traceback
        traceback.print_exc()
        return None

# Để chạy toàn bộ quy trình, uncomment dòng dưới
# Lưu ý rằng quá trình này có thể tốn nhiều thời gian và tài nguyên

final_metrics = run_full_process()

2025-05-21 13:33:44,320 - INFO - Loading dataset...


Starting DDoS Detection with Federated Learning and PPO...
Data path: /Users/macbook/Desktop/FL-RL-Dos detection/Multi agents RL-FL-Fog/Multi agents RL-FL-Fog/data/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv
Selected features: 13 features
Label column:  Label


2025-05-21 13:33:45,234 - INFO - Dataset loaded: 225745 samples, 85 features
2025-05-21 13:33:45,267 - INFO - Sampled dataset: 10000 samples
2025-05-21 13:33:45,268 - INFO - Handling missing values...
2025-05-21 13:33:45,273 - INFO - Normalizing features...
2025-05-21 13:33:45,278 - INFO - Processing labels...
2025-05-21 13:33:45,281 - INFO - Label mapping: {'BENIGN': 0, 'DDoS': 1}
2025-05-21 13:33:45,281 - INFO - Creating time windows...
2025-05-21 13:33:45,291 - INFO - Windows shape: (9981, 20, 13), labels shape: (9981,)
2025-05-21 13:33:45,291 - INFO - Splitting data...
2025-05-21 13:33:45,307 - INFO - Data preprocessing completed: X_train shape (7984, 20, 13), y_train shape (7984,)
2025-05-21 13:33:45,308 - INFO - Class distribution in training set: [3388 4596]
2025-05-21 13:33:45,484 - INFO - Agent 0 initialized


Training data shape: (7984, 20, 13)
Testing data shape: (1997, 20, 13)
Class distribution in training: [3388 4596]
Class distribution in testing: [ 847 1150]


2025-05-21 13:33:45,648 - INFO - Agent 1 initialized
2025-05-21 13:33:45,648 - INFO - Fog Node 0 initialized with 2 agents
2025-05-21 13:33:45,808 - INFO - Agent 2 initialized
2025-05-21 13:33:45,961 - INFO - Agent 3 initialized
2025-05-21 13:33:45,962 - INFO - Fog Node 1 initialized with 2 agents
2025-05-21 13:33:45,962 - INFO - Initialized 2 fog nodes
2025-05-21 13:33:45,962 - INFO - Training pipeline initialized
2025-05-21 13:33:45,963 - INFO - Starting round 1/2
2025-05-21 13:33:45,963 - INFO - Current learning rate: 0.001000
2025-05-21 13:33:45,964 - INFO - Training local with data shape: (3992, 20, 13), labels shape: (3992,)


Starting federated training...


2025-05-21 13:34:23,794 - INFO - Agent 0 training with 3000 experiences
2025-05-21 13:34:23,807 - INFO - States shape: (3000, 20, 13), actions shape: (3000,)
2025-05-21 13:35:01,523 - INFO - Agent 1 training with 3000 experiences
2025-05-21 13:35:01,528 - INFO - States shape: (3000, 20, 13), actions shape: (3000,)
2025-05-21 13:35:03,042 - INFO - Training local with data shape: (3992, 20, 13), labels shape: (3992,)
2025-05-21 13:35:40,023 - INFO - Agent 2 training with 3000 experiences
2025-05-21 13:35:40,031 - INFO - States shape: (3000, 20, 13), actions shape: (3000,)
2025-05-21 13:36:17,296 - INFO - Agent 3 training with 3000 experiences
2025-05-21 13:36:17,301 - INFO - States shape: (3000, 20, 13), actions shape: (3000,)
2025-05-21 13:36:19,037 - ERROR - Error aggregating models: {{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:CPU:0}} Incompatible shapes: [32,2] vs. [13,256] [Op:AddV2] name: 
2025-05-21 13:36:19.037491: I tensorflow/core/framework/loc

Error during execution: {{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:CPU:0}} Incompatible shapes: [32,2] vs. [13,256] [Op:AddV2] name: 


Traceback (most recent call last):
  File "/var/folders/v0/t5xgy88d26bctff9sd0szwc80000gn/T/ipykernel_23656/1212352700.py", line 55, in train
    round_metrics = self.fl_system.train_round(distributed_data)
  File "/var/folders/v0/t5xgy88d26bctff9sd0szwc80000gn/T/ipykernel_23656/4016177540.py", line 119, in train_round
    self._aggregate_models()
  File "/var/folders/v0/t5xgy88d26bctff9sd0szwc80000gn/T/ipykernel_23656/4016177540.py", line 158, in _aggregate_models
    aggregated_weights[f'actor_{var.name}'] += var / num_agents
  File "/Users/macbook/venv/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/Users/macbook/venv/lib/python3.9/site-packages/tensorflow/python/framework/ops.py", line 6006, in raise_from_not_ok_status
    raise core._status_to_exception(e) from None  # pylint: disable=protected-access
tensorflow.python.framework.errors_impl.InvalidArgumentError: {{function