**1. Experiments with Epsilon 0.5**

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import lightgbm as lgb
import pickle
import os
import random

# Step 1: Load and preprocess the data
df = pd.read_csv('/content/drive/MyDrive/IDS-IOT2024/Process_1 IDS-IoT-2024.csv')

# Drop the target variable and assign features to X
X = df.iloc[:, :-1].values
y = df['Attack_Category_x'].values

# Encode target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets for global evaluation
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Experiment setup
client_counts = [10, 25, 50, 75, 100]  # Number of clients to test
rounds = 10  # Number of federated learning rounds
privacy_budget = 0.5  # Privacy budget (epsilon)
results = []  # To store evaluation results for each client count

# Shuffle training data for IID client simulation
shuffled_indices = np.random.permutation(len(X_train_full))
X_train_shuffled = X_train_full[shuffled_indices]
y_train_shuffled = y_train_full[shuffled_indices]

# Differential Privacy - Laplace Mechanism
def apply_laplace_noise(values, epsilon, sensitivity=1.0):
    scale = sensitivity / epsilon
    noise = np.random.laplace(0, scale, size=values.shape)
    return values + noise

# Schnorr Protocol for Authentication
class SchnorrProtocol:
    def __init__(self, p, g):
        self.p = p
        self.g = g
        self.private_key = np.random.randint(1, p)
        self.public_key = pow(g, self.private_key, p)

    def generate_commitment(self):
        self.r = np.random.randint(1, self.p)
        self.R = pow(self.g, self.r, self.p)
        return self.R

    def compute_response(self, challenge):
        self.s = (self.r + challenge * self.private_key) % (self.p - 1)
        return self.s

    def verify(self, R, s, public_key, challenge):
        lhs = pow(self.g, s, self.p)
        rhs = (R * pow(public_key, challenge, self.p)) % self.p
        return lhs == rhs

# Schnorr Parameters
p = 104729
g = 2

# LightGBM parameters
lgb_params = {
    'objective': 'multiclass',
    'num_class': len(np.unique(y_encoded)),
    'boosting_type': 'gbdt',
    'metric': 'multi_logloss',
    'learning_rate': 0.1,
    'num_leaves': 31,
    'max_depth': -1,
    'verbosity': -1
}

# Robust Aggregation (Median Aggregation)
def robust_aggregation(updates):
    """Aggregate model updates using a robust method like median aggregation."""
    return np.median(updates, axis=0)

# Experiment loop
for client_count in client_counts:
    print(f"\nRunning experiment for {client_count} clients...")

    # Step 1: Simulate clients
    client_data = []
    samples_per_client = len(X_train_shuffled) // client_count

    for i in range(client_count):
        start_idx = i * samples_per_client
        end_idx = start_idx + samples_per_client if i < client_count - 1 else len(X_train_shuffled)
        X_client = X_train_shuffled[start_idx:end_idx]
        y_client = y_train_shuffled[start_idx:end_idx]
        client_data.append((X_client, y_client))

    # Randomly assign poisoned clients
    num_data_poisoning_clients = min(2, client_count)
    num_model_poisoning_clients = min(3, client_count)
    data_poisoning_clients = random.sample(range(client_count), num_data_poisoning_clients)
    model_poisoning_clients = random.sample(
        [client for client in range(client_count) if client not in data_poisoning_clients],
        num_model_poisoning_clients,
    )

    # Reset performance tracking for the current experiment
    excluded_clients = 0
    local_accuracies = []
    global_accuracies = []
    client_auth_times = []
    client_train_times = []
    round_latencies = []
    aggregation_times = []
    communication_overheads = []

    # Reinitialize Schnorr protocols for the current number of clients
    client_protocols = [SchnorrProtocol(p, g) for _ in range(client_count)]

    # Step 2: Federated rounds
    for round_num in range(rounds):
        print(f"  Round {round_num + 1}/{rounds}")
        round_start_time = time.time()

        local_models = []
        local_predictions = []
        round_client_auth_times = []
        round_client_train_times = []
        round_comm_overhead = 0

        for client_num, (X_client, y_client) in enumerate(client_data):
            auth_start_time = time.time()
            schnorr = client_protocols[client_num]
            R = schnorr.generate_commitment()
            challenge = np.random.randint(1, p)

            # Simulate poisoning in response
            if client_num in data_poisoning_clients + model_poisoning_clients:
                response = schnorr.compute_response(challenge) + 1
            else:
                response = schnorr.compute_response(challenge)

            if not schnorr.verify(R, response, schnorr.public_key, challenge):
                print(f"    Client {client_num + 1} failed authentication and is excluded.")
                excluded_clients += 1
                continue
            auth_end_time = time.time()

            train_start_time = time.time()
            if client_num in data_poisoning_clients:
                print(f"    Simulating data poisoning on Client {client_num + 1}")
                X_client += np.random.normal(0, 0.5, X_client.shape)

            # Train local model
            train_data = lgb.Dataset(X_client, label=y_client)
            local_model = lgb.train(lgb_params, train_data, num_boost_round=20)

            if client_num in model_poisoning_clients:
                print(f"    Simulating model poisoning on Client {client_num + 1}")
                local_model.params['num_leaves'] += 5

            train_end_time = time.time()
            round_client_auth_times.append(auth_end_time - auth_start_time)
            round_client_train_times.append(train_end_time - train_start_time)

            local_model_bytes = pickle.dumps(local_model)
            round_comm_overhead += len(local_model_bytes)

            local_models.append(local_model)
            predictions = local_model.predict(X_test)
            noisy_predictions = apply_laplace_noise(predictions, epsilon=privacy_budget, sensitivity=1.0)
            local_predictions.append(noisy_predictions)

            local_pred = np.argmax(noisy_predictions, axis=1)
            local_accuracy = accuracy_score(y_test, local_pred)
            local_accuracies.append(local_accuracy)

        if local_predictions:
            aggregation_start_time = time.time()
            aggregated_predictions = robust_aggregation(local_predictions)
            y_pred_aggregated = np.argmax(aggregated_predictions, axis=1)
            global_accuracy = accuracy_score(y_test, y_pred_aggregated)
            global_accuracies.append(global_accuracy)
            aggregation_end_time = time.time()
            aggregation_times.append(aggregation_end_time - aggregation_start_time)

            aggregated_model_bytes = pickle.dumps(aggregated_predictions)
            round_comm_overhead += len(aggregated_model_bytes)

        communication_overheads.append(round_comm_overhead)
        round_latencies.append(time.time() - round_start_time)
        client_auth_times.extend(round_client_auth_times)
        client_train_times.extend(round_client_train_times)

        print(f"  Round {round_num + 1} completed with Global Accuracy: {global_accuracy:.4f}")

    # Save global model
    with open(f"global_model_{client_count}.pkl", "wb") as f:
        pickle.dump(local_models[0], f)

    global_model_size = os.path.getsize(f"global_model_{client_count}.pkl") / (1024 ** 2)  # in MB
    results.append({
        'clients': client_count,
        'avg_global_accuracy': np.mean(global_accuracies),
        'avg_local_accuracy': np.mean(local_accuracies),
        'avg_train_time': np.mean(client_train_times),
        'avg_auth_time': np.mean(client_auth_times),
        'avg_round_latency': np.mean(round_latencies),
        'avg_aggregation_time': np.mean(aggregation_times),
        'avg_comm_overhead': np.mean(communication_overheads) / (1024 ** 2),
        'global_model_size': global_model_size,
    })

# Print all results
print("\nExperiment Results:")
for result in results:
    print(f"Clients: {result['clients']}, "
          f"Avg Global Accuracy: {result['avg_global_accuracy']:.5f}, "
          f"Avg Local Accuracy: {result['avg_local_accuracy']:.5f}, "
          f"Avg Train Time: {result['avg_train_time']:.5f}s, "
          f"Avg Auth Time: {result['avg_auth_time']:.5f}s, "
          f"Avg Round Latency: {result['avg_round_latency']:.5f}s, "
          f"Avg Aggregation Time: {result['avg_aggregation_time']:.5f}s, "
          f"Avg Comm Overhead: {result['avg_comm_overhead']:.5f}MB, "
          f"Global Model Size: {result['global_model_size']:.5f}MB")



Running experiment for 10 clients...
  Round 1/10
    Client 2 failed authentication and is excluded.
    Client 5 failed authentication and is excluded.
    Client 8 failed authentication and is excluded.
    Client 9 failed authentication and is excluded.
    Client 10 failed authentication and is excluded.
  Round 1 completed with Global Accuracy: 0.3547
  Round 2/10
    Client 2 failed authentication and is excluded.
    Client 5 failed authentication and is excluded.
    Client 8 failed authentication and is excluded.
    Client 9 failed authentication and is excluded.
    Client 10 failed authentication and is excluded.
  Round 2 completed with Global Accuracy: 0.3611
  Round 3/10
    Client 2 failed authentication and is excluded.
    Client 5 failed authentication and is excluded.
    Client 8 failed authentication and is excluded.
    Client 9 failed authentication and is excluded.
    Client 10 failed authentication and is excluded.
  Round 3 completed with Global Accuracy: 

**2. Experiments with Epsilon 0.8**

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import lightgbm as lgb
import pickle
import os
import random

# Step 1: Load and preprocess the data
df = pd.read_csv('/content/drive/MyDrive/IDS-IOT2024/Process_1 IDS-IoT-2024.csv')

# Drop the target variable and assign features to X
X = df.iloc[:, :-1].values
y = df['Attack_Category_x'].values

# Encode target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets for global evaluation
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Experiment setup
client_counts = [10, 25, 50, 75, 100]  # Number of clients to test
rounds = 10  # Number of federated learning rounds
privacy_budget = 0.8  # Privacy budget (epsilon)
results = []  # To store evaluation results for each client count

# Shuffle training data for IID client simulation
shuffled_indices = np.random.permutation(len(X_train_full))
X_train_shuffled = X_train_full[shuffled_indices]
y_train_shuffled = y_train_full[shuffled_indices]

# Differential Privacy - Laplace Mechanism
def apply_laplace_noise(values, epsilon, sensitivity=1.0):
    scale = sensitivity / epsilon
    noise = np.random.laplace(0, scale, size=values.shape)
    return values + noise

# Schnorr Protocol for Authentication
class SchnorrProtocol:
    def __init__(self, p, g):
        self.p = p
        self.g = g
        self.private_key = np.random.randint(1, p)
        self.public_key = pow(g, self.private_key, p)

    def generate_commitment(self):
        self.r = np.random.randint(1, self.p)
        self.R = pow(self.g, self.r, self.p)
        return self.R

    def compute_response(self, challenge):
        self.s = (self.r + challenge * self.private_key) % (self.p - 1)
        return self.s

    def verify(self, R, s, public_key, challenge):
        lhs = pow(self.g, s, self.p)
        rhs = (R * pow(public_key, challenge, self.p)) % self.p
        return lhs == rhs

# Schnorr Parameters
p = 104729
g = 2

# LightGBM parameters
lgb_params = {
    'objective': 'multiclass',
    'num_class': len(np.unique(y_encoded)),
    'boosting_type': 'gbdt',
    'metric': 'multi_logloss',
    'learning_rate': 0.1,
    'num_leaves': 31,
    'max_depth': -1,
    'verbosity': -1
}

# Robust Aggregation (Median Aggregation)
def robust_aggregation(updates):
    """Aggregate model updates using a robust method like median aggregation."""
    return np.median(updates, axis=0)

# Experiment loop
for client_count in client_counts:
    print(f"\nRunning experiment for {client_count} clients...")

    # Step 1: Simulate clients
    client_data = []
    samples_per_client = len(X_train_shuffled) // client_count

    for i in range(client_count):
        start_idx = i * samples_per_client
        end_idx = start_idx + samples_per_client if i < client_count - 1 else len(X_train_shuffled)
        X_client = X_train_shuffled[start_idx:end_idx]
        y_client = y_train_shuffled[start_idx:end_idx]
        client_data.append((X_client, y_client))

    # Randomly assign poisoned clients
    num_data_poisoning_clients = min(2, client_count)
    num_model_poisoning_clients = min(3, client_count)
    data_poisoning_clients = random.sample(range(client_count), num_data_poisoning_clients)
    model_poisoning_clients = random.sample(
        [client for client in range(client_count) if client not in data_poisoning_clients],
        num_model_poisoning_clients,
    )

    # Reset performance tracking for the current experiment
    excluded_clients = 0
    local_accuracies = []
    global_accuracies = []
    client_auth_times = []
    client_train_times = []
    round_latencies = []
    aggregation_times = []
    communication_overheads = []

    # Reinitialize Schnorr protocols for the current number of clients
    client_protocols = [SchnorrProtocol(p, g) for _ in range(client_count)]

    # Step 2: Federated rounds
    for round_num in range(rounds):
        print(f"  Round {round_num + 1}/{rounds}")
        round_start_time = time.time()

        local_models = []
        local_predictions = []
        round_client_auth_times = []
        round_client_train_times = []
        round_comm_overhead = 0

        for client_num, (X_client, y_client) in enumerate(client_data):
            auth_start_time = time.time()
            schnorr = client_protocols[client_num]
            R = schnorr.generate_commitment()
            challenge = np.random.randint(1, p)

            # Simulate poisoning in response
            if client_num in data_poisoning_clients + model_poisoning_clients:
                response = schnorr.compute_response(challenge) + 1
            else:
                response = schnorr.compute_response(challenge)

            if not schnorr.verify(R, response, schnorr.public_key, challenge):
                print(f"    Client {client_num + 1} failed authentication and is excluded.")
                excluded_clients += 1
                continue
            auth_end_time = time.time()

            train_start_time = time.time()
            if client_num in data_poisoning_clients:
                print(f"    Simulating data poisoning on Client {client_num + 1}")
                X_client += np.random.normal(0, 0.5, X_client.shape)

            # Train local model
            train_data = lgb.Dataset(X_client, label=y_client)
            local_model = lgb.train(lgb_params, train_data, num_boost_round=20)

            if client_num in model_poisoning_clients:
                print(f"    Simulating model poisoning on Client {client_num + 1}")
                local_model.params['num_leaves'] += 5

            train_end_time = time.time()
            round_client_auth_times.append(auth_end_time - auth_start_time)
            round_client_train_times.append(train_end_time - train_start_time)

            local_model_bytes = pickle.dumps(local_model)
            round_comm_overhead += len(local_model_bytes)

            local_models.append(local_model)
            predictions = local_model.predict(X_test)
            noisy_predictions = apply_laplace_noise(predictions, epsilon=privacy_budget, sensitivity=1.0)
            local_predictions.append(noisy_predictions)

            local_pred = np.argmax(noisy_predictions, axis=1)
            local_accuracy = accuracy_score(y_test, local_pred)
            local_accuracies.append(local_accuracy)

        if local_predictions:
            aggregation_start_time = time.time()
            aggregated_predictions = robust_aggregation(local_predictions)
            y_pred_aggregated = np.argmax(aggregated_predictions, axis=1)
            global_accuracy = accuracy_score(y_test, y_pred_aggregated)
            global_accuracies.append(global_accuracy)
            aggregation_end_time = time.time()
            aggregation_times.append(aggregation_end_time - aggregation_start_time)

            aggregated_model_bytes = pickle.dumps(aggregated_predictions)
            round_comm_overhead += len(aggregated_model_bytes)

        communication_overheads.append(round_comm_overhead)
        round_latencies.append(time.time() - round_start_time)
        client_auth_times.extend(round_client_auth_times)
        client_train_times.extend(round_client_train_times)

        print(f"  Round {round_num + 1} completed with Global Accuracy: {global_accuracy:.4f}")

    # Save global model
    with open(f"global_model_{client_count}.pkl", "wb") as f:
        pickle.dump(local_models[0], f)

    global_model_size = os.path.getsize(f"global_model_{client_count}.pkl") / (1024 ** 2)  # in MB
    results.append({
        'clients': client_count,
        'avg_global_accuracy': np.mean(global_accuracies),
        'avg_local_accuracy': np.mean(local_accuracies),
        'avg_train_time': np.mean(client_train_times),
        'avg_auth_time': np.mean(client_auth_times),
        'avg_round_latency': np.mean(round_latencies),
        'avg_aggregation_time': np.mean(aggregation_times),
        'avg_comm_overhead': np.mean(communication_overheads) / (1024 ** 2),
        'global_model_size': global_model_size,
    })

# Print all results
print("\nExperiment Results:")
for result in results:
    print(f"Clients: {result['clients']}, "
          f"Avg Global Accuracy: {result['avg_global_accuracy']:.5f}, "
          f"Avg Local Accuracy: {result['avg_local_accuracy']:.5f}, "
          f"Avg Train Time: {result['avg_train_time']:.5f}s, "
          f"Avg Auth Time: {result['avg_auth_time']:.5f}s, "
          f"Avg Round Latency: {result['avg_round_latency']:.5f}s, "
          f"Avg Aggregation Time: {result['avg_aggregation_time']:.5f}s, "
          f"Avg Comm Overhead: {result['avg_comm_overhead']:.5f}MB, "
          f"Global Model Size: {result['global_model_size']:.5f}MB")



Running experiment for 10 clients...
  Round 1/10
    Client 3 failed authentication and is excluded.
    Client 4 failed authentication and is excluded.
    Client 5 failed authentication and is excluded.
    Client 6 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
  Round 1 completed with Global Accuracy: 0.5193
  Round 2/10
    Client 3 failed authentication and is excluded.
    Client 4 failed authentication and is excluded.
    Client 5 failed authentication and is excluded.
    Client 6 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
  Round 2 completed with Global Accuracy: 0.5265
  Round 3/10
    Client 3 failed authentication and is excluded.
    Client 4 failed authentication and is excluded.
    Client 5 failed authentication and is excluded.
    Client 6 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
  Round 3 completed with Global Accuracy: 0.5

**3. Experiments with Epsilon 1.0**

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import lightgbm as lgb
import pickle
import os
import random

# Step 1: Load and preprocess the data
df = pd.read_csv('/content/drive/MyDrive/IDS-IOT2024/Process_1 IDS-IoT-2024.csv')

# Drop the target variable and assign features to X
X = df.iloc[:, :-1].values
y = df['Attack_Category_x'].values

# Encode target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets for global evaluation
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Experiment setup
client_counts = [10, 25, 50, 75, 100]  # Number of clients to test
rounds = 10  # Number of federated learning rounds
privacy_budget = 1.0  # Privacy budget (epsilon)
results = []  # To store evaluation results for each client count

# Shuffle training data for IID client simulation
shuffled_indices = np.random.permutation(len(X_train_full))
X_train_shuffled = X_train_full[shuffled_indices]
y_train_shuffled = y_train_full[shuffled_indices]

# Differential Privacy - Laplace Mechanism
def apply_laplace_noise(values, epsilon, sensitivity=1.0):
    scale = sensitivity / epsilon
    noise = np.random.laplace(0, scale, size=values.shape)
    return values + noise

# Schnorr Protocol for Authentication
class SchnorrProtocol:
    def __init__(self, p, g):
        self.p = p
        self.g = g
        self.private_key = np.random.randint(1, p)
        self.public_key = pow(g, self.private_key, p)

    def generate_commitment(self):
        self.r = np.random.randint(1, self.p)
        self.R = pow(self.g, self.r, self.p)
        return self.R

    def compute_response(self, challenge):
        self.s = (self.r + challenge * self.private_key) % (self.p - 1)
        return self.s

    def verify(self, R, s, public_key, challenge):
        lhs = pow(self.g, s, self.p)
        rhs = (R * pow(public_key, challenge, self.p)) % self.p
        return lhs == rhs

# Schnorr Parameters
p = 104729
g = 2

# LightGBM parameters
lgb_params = {
    'objective': 'multiclass',
    'num_class': len(np.unique(y_encoded)),
    'boosting_type': 'gbdt',
    'metric': 'multi_logloss',
    'learning_rate': 0.1,
    'num_leaves': 31,
    'max_depth': -1,
    'verbosity': -1
}

# Robust Aggregation (Median Aggregation)
def robust_aggregation(updates):
    """Aggregate model updates using a robust method like median aggregation."""
    return np.median(updates, axis=0)

# Experiment loop
for client_count in client_counts:
    print(f"\nRunning experiment for {client_count} clients...")

    # Step 1: Simulate clients
    client_data = []
    samples_per_client = len(X_train_shuffled) // client_count

    for i in range(client_count):
        start_idx = i * samples_per_client
        end_idx = start_idx + samples_per_client if i < client_count - 1 else len(X_train_shuffled)
        X_client = X_train_shuffled[start_idx:end_idx]
        y_client = y_train_shuffled[start_idx:end_idx]
        client_data.append((X_client, y_client))

    # Randomly assign poisoned clients
    num_data_poisoning_clients = min(2, client_count)
    num_model_poisoning_clients = min(3, client_count)
    data_poisoning_clients = random.sample(range(client_count), num_data_poisoning_clients)
    model_poisoning_clients = random.sample(
        [client for client in range(client_count) if client not in data_poisoning_clients],
        num_model_poisoning_clients,
    )

    # Reset performance tracking for the current experiment
    excluded_clients = 0
    local_accuracies = []
    global_accuracies = []
    client_auth_times = []
    client_train_times = []
    round_latencies = []
    aggregation_times = []
    communication_overheads = []

    # Reinitialize Schnorr protocols for the current number of clients
    client_protocols = [SchnorrProtocol(p, g) for _ in range(client_count)]

    # Step 2: Federated rounds
    for round_num in range(rounds):
        print(f"  Round {round_num + 1}/{rounds}")
        round_start_time = time.time()

        local_models = []
        local_predictions = []
        round_client_auth_times = []
        round_client_train_times = []
        round_comm_overhead = 0

        for client_num, (X_client, y_client) in enumerate(client_data):
            auth_start_time = time.time()
            schnorr = client_protocols[client_num]
            R = schnorr.generate_commitment()
            challenge = np.random.randint(1, p)

            # Simulate poisoning in response
            if client_num in data_poisoning_clients + model_poisoning_clients:
                response = schnorr.compute_response(challenge) + 1
            else:
                response = schnorr.compute_response(challenge)

            if not schnorr.verify(R, response, schnorr.public_key, challenge):
                print(f"    Client {client_num + 1} failed authentication and is excluded.")
                excluded_clients += 1
                continue
            auth_end_time = time.time()

            train_start_time = time.time()
            if client_num in data_poisoning_clients:
                print(f"    Simulating data poisoning on Client {client_num + 1}")
                X_client += np.random.normal(0, 0.5, X_client.shape)

            # Train local model
            train_data = lgb.Dataset(X_client, label=y_client)
            local_model = lgb.train(lgb_params, train_data, num_boost_round=20)

            if client_num in model_poisoning_clients:
                print(f"    Simulating model poisoning on Client {client_num + 1}")
                local_model.params['num_leaves'] += 5

            train_end_time = time.time()
            round_client_auth_times.append(auth_end_time - auth_start_time)
            round_client_train_times.append(train_end_time - train_start_time)

            local_model_bytes = pickle.dumps(local_model)
            round_comm_overhead += len(local_model_bytes)

            local_models.append(local_model)
            predictions = local_model.predict(X_test)
            noisy_predictions = apply_laplace_noise(predictions, epsilon=privacy_budget, sensitivity=1.0)
            local_predictions.append(noisy_predictions)

            local_pred = np.argmax(noisy_predictions, axis=1)
            local_accuracy = accuracy_score(y_test, local_pred)
            local_accuracies.append(local_accuracy)

        if local_predictions:
            aggregation_start_time = time.time()
            aggregated_predictions = robust_aggregation(local_predictions)
            y_pred_aggregated = np.argmax(aggregated_predictions, axis=1)
            global_accuracy = accuracy_score(y_test, y_pred_aggregated)
            global_accuracies.append(global_accuracy)
            aggregation_end_time = time.time()
            aggregation_times.append(aggregation_end_time - aggregation_start_time)

            aggregated_model_bytes = pickle.dumps(aggregated_predictions)
            round_comm_overhead += len(aggregated_model_bytes)

        communication_overheads.append(round_comm_overhead)
        round_latencies.append(time.time() - round_start_time)
        client_auth_times.extend(round_client_auth_times)
        client_train_times.extend(round_client_train_times)

        print(f"  Round {round_num + 1} completed with Global Accuracy: {global_accuracy:.4f}")

    # Save global model
    with open(f"global_model_{client_count}.pkl", "wb") as f:
        pickle.dump(local_models[0], f)

    global_model_size = os.path.getsize(f"global_model_{client_count}.pkl") / (1024 ** 2)  # in MB
    results.append({
        'clients': client_count,
        'avg_global_accuracy': np.mean(global_accuracies),
        'avg_local_accuracy': np.mean(local_accuracies),
        'avg_train_time': np.mean(client_train_times),
        'avg_auth_time': np.mean(client_auth_times),
        'avg_round_latency': np.mean(round_latencies),
        'avg_aggregation_time': np.mean(aggregation_times),
        'avg_comm_overhead': np.mean(communication_overheads) / (1024 ** 2),
        'global_model_size': global_model_size,
    })

# Print all results
print("\nExperiment Results:")
for result in results:
    print(f"Clients: {result['clients']}, "
          f"Avg Global Accuracy: {result['avg_global_accuracy']:.5f}, "
          f"Avg Local Accuracy: {result['avg_local_accuracy']:.5f}, "
          f"Avg Train Time: {result['avg_train_time']:.5f}s, "
          f"Avg Auth Time: {result['avg_auth_time']:.5f}s, "
          f"Avg Round Latency: {result['avg_round_latency']:.5f}s, "
          f"Avg Aggregation Time: {result['avg_aggregation_time']:.5f}s, "
          f"Avg Comm Overhead: {result['avg_comm_overhead']:.5f}MB, "
          f"Global Model Size: {result['global_model_size']:.5f}MB")



Running experiment for 10 clients...
  Round 1/10
    Client 2 failed authentication and is excluded.
    Client 6 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
    Client 8 failed authentication and is excluded.
    Client 9 failed authentication and is excluded.
  Round 1 completed with Global Accuracy: 0.6307
  Round 2/10
    Client 2 failed authentication and is excluded.
    Client 6 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
    Client 8 failed authentication and is excluded.
    Client 9 failed authentication and is excluded.
  Round 2 completed with Global Accuracy: 0.6319
  Round 3/10
    Client 2 failed authentication and is excluded.
    Client 6 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
    Client 8 failed authentication and is excluded.
    Client 9 failed authentication and is excluded.
  Round 3 completed with Global Accuracy: 0.6

**4. Experiments with Epsilon 1.3**

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import lightgbm as lgb
import pickle
import os
import random

# Step 1: Load and preprocess the data
df = pd.read_csv('/content/drive/MyDrive/IDS-IOT2024/Process_1 IDS-IoT-2024.csv')

# Drop the target variable and assign features to X
X = df.iloc[:, :-1].values
y = df['Attack_Category_x'].values

# Encode target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets for global evaluation
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Experiment setup
client_counts = [10, 25, 50, 75, 100]  # Number of clients to test
rounds = 10  # Number of federated learning rounds
privacy_budget = 1.3  # Privacy budget (epsilon)
results = []  # To store evaluation results for each client count

# Shuffle training data for IID client simulation
shuffled_indices = np.random.permutation(len(X_train_full))
X_train_shuffled = X_train_full[shuffled_indices]
y_train_shuffled = y_train_full[shuffled_indices]

# Differential Privacy - Laplace Mechanism
def apply_laplace_noise(values, epsilon, sensitivity=1.0):
    scale = sensitivity / epsilon
    noise = np.random.laplace(0, scale, size=values.shape)
    return values + noise

# Schnorr Protocol for Authentication
class SchnorrProtocol:
    def __init__(self, p, g):
        self.p = p
        self.g = g
        self.private_key = np.random.randint(1, p)
        self.public_key = pow(g, self.private_key, p)

    def generate_commitment(self):
        self.r = np.random.randint(1, self.p)
        self.R = pow(self.g, self.r, self.p)
        return self.R

    def compute_response(self, challenge):
        self.s = (self.r + challenge * self.private_key) % (self.p - 1)
        return self.s

    def verify(self, R, s, public_key, challenge):
        lhs = pow(self.g, s, self.p)
        rhs = (R * pow(public_key, challenge, self.p)) % self.p
        return lhs == rhs

# Schnorr Parameters
p = 104729
g = 2

# LightGBM parameters
lgb_params = {
    'objective': 'multiclass',
    'num_class': len(np.unique(y_encoded)),
    'boosting_type': 'gbdt',
    'metric': 'multi_logloss',
    'learning_rate': 0.1,
    'num_leaves': 31,
    'max_depth': -1,
    'verbosity': -1
}

# Robust Aggregation (Median Aggregation)
def robust_aggregation(updates):
    """Aggregate model updates using a robust method like median aggregation."""
    return np.median(updates, axis=0)

# Experiment loop
for client_count in client_counts:
    print(f"\nRunning experiment for {client_count} clients...")

    # Step 1: Simulate clients
    client_data = []
    samples_per_client = len(X_train_shuffled) // client_count

    for i in range(client_count):
        start_idx = i * samples_per_client
        end_idx = start_idx + samples_per_client if i < client_count - 1 else len(X_train_shuffled)
        X_client = X_train_shuffled[start_idx:end_idx]
        y_client = y_train_shuffled[start_idx:end_idx]
        client_data.append((X_client, y_client))

    # Randomly assign poisoned clients
    num_data_poisoning_clients = min(2, client_count)
    num_model_poisoning_clients = min(3, client_count)
    data_poisoning_clients = random.sample(range(client_count), num_data_poisoning_clients)
    model_poisoning_clients = random.sample(
        [client for client in range(client_count) if client not in data_poisoning_clients],
        num_model_poisoning_clients,
    )

    # Reset performance tracking for the current experiment
    excluded_clients = 0
    local_accuracies = []
    global_accuracies = []
    client_auth_times = []
    client_train_times = []
    round_latencies = []
    aggregation_times = []
    communication_overheads = []

    # Reinitialize Schnorr protocols for the current number of clients
    client_protocols = [SchnorrProtocol(p, g) for _ in range(client_count)]

    # Step 2: Federated rounds
    for round_num in range(rounds):
        print(f"  Round {round_num + 1}/{rounds}")
        round_start_time = time.time()

        local_models = []
        local_predictions = []
        round_client_auth_times = []
        round_client_train_times = []
        round_comm_overhead = 0

        for client_num, (X_client, y_client) in enumerate(client_data):
            auth_start_time = time.time()
            schnorr = client_protocols[client_num]
            R = schnorr.generate_commitment()
            challenge = np.random.randint(1, p)

            # Simulate poisoning in response
            if client_num in data_poisoning_clients + model_poisoning_clients:
                response = schnorr.compute_response(challenge) + 1
            else:
                response = schnorr.compute_response(challenge)

            if not schnorr.verify(R, response, schnorr.public_key, challenge):
                print(f"    Client {client_num + 1} failed authentication and is excluded.")
                excluded_clients += 1
                continue
            auth_end_time = time.time()

            train_start_time = time.time()
            if client_num in data_poisoning_clients:
                print(f"    Simulating data poisoning on Client {client_num + 1}")
                X_client += np.random.normal(0, 0.5, X_client.shape)

            # Train local model
            train_data = lgb.Dataset(X_client, label=y_client)
            local_model = lgb.train(lgb_params, train_data, num_boost_round=20)

            if client_num in model_poisoning_clients:
                print(f"    Simulating model poisoning on Client {client_num + 1}")
                local_model.params['num_leaves'] += 5

            train_end_time = time.time()
            round_client_auth_times.append(auth_end_time - auth_start_time)
            round_client_train_times.append(train_end_time - train_start_time)

            local_model_bytes = pickle.dumps(local_model)
            round_comm_overhead += len(local_model_bytes)

            local_models.append(local_model)
            predictions = local_model.predict(X_test)
            noisy_predictions = apply_laplace_noise(predictions, epsilon=privacy_budget, sensitivity=1.0)
            local_predictions.append(noisy_predictions)

            local_pred = np.argmax(noisy_predictions, axis=1)
            local_accuracy = accuracy_score(y_test, local_pred)
            local_accuracies.append(local_accuracy)

        if local_predictions:
            aggregation_start_time = time.time()
            aggregated_predictions = robust_aggregation(local_predictions)
            y_pred_aggregated = np.argmax(aggregated_predictions, axis=1)
            global_accuracy = accuracy_score(y_test, y_pred_aggregated)
            global_accuracies.append(global_accuracy)
            aggregation_end_time = time.time()
            aggregation_times.append(aggregation_end_time - aggregation_start_time)

            aggregated_model_bytes = pickle.dumps(aggregated_predictions)
            round_comm_overhead += len(aggregated_model_bytes)

        communication_overheads.append(round_comm_overhead)
        round_latencies.append(time.time() - round_start_time)
        client_auth_times.extend(round_client_auth_times)
        client_train_times.extend(round_client_train_times)

        print(f"  Round {round_num + 1} completed with Global Accuracy: {global_accuracy:.4f}")

    # Save global model
    with open(f"global_model_{client_count}.pkl", "wb") as f:
        pickle.dump(local_models[0], f)

    global_model_size = os.path.getsize(f"global_model_{client_count}.pkl") / (1024 ** 2)  # in MB
    results.append({
        'clients': client_count,
        'avg_global_accuracy': np.mean(global_accuracies),
        'avg_local_accuracy': np.mean(local_accuracies),
        'avg_train_time': np.mean(client_train_times),
        'avg_auth_time': np.mean(client_auth_times),
        'avg_round_latency': np.mean(round_latencies),
        'avg_aggregation_time': np.mean(aggregation_times),
        'avg_comm_overhead': np.mean(communication_overheads) / (1024 ** 2),
        'global_model_size': global_model_size,
    })

# Print all results
print("\nExperiment Results:")
for result in results:
    print(f"Clients: {result['clients']}, "
          f"Avg Global Accuracy: {result['avg_global_accuracy']:.5f}, "
          f"Avg Local Accuracy: {result['avg_local_accuracy']:.5f}, "
          f"Avg Train Time: {result['avg_train_time']:.5f}s, "
          f"Avg Auth Time: {result['avg_auth_time']:.5f}s, "
          f"Avg Round Latency: {result['avg_round_latency']:.5f}s, "
          f"Avg Aggregation Time: {result['avg_aggregation_time']:.5f}s, "
          f"Avg Comm Overhead: {result['avg_comm_overhead']:.5f}MB, "
          f"Global Model Size: {result['global_model_size']:.5f}MB")



Running experiment for 10 clients...
  Round 1/10
    Client 1 failed authentication and is excluded.
    Client 3 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
    Client 8 failed authentication and is excluded.
    Client 10 failed authentication and is excluded.
  Round 1 completed with Global Accuracy: 0.7595
  Round 2/10
    Client 1 failed authentication and is excluded.
    Client 3 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
    Client 8 failed authentication and is excluded.
    Client 10 failed authentication and is excluded.
  Round 2 completed with Global Accuracy: 0.7633
  Round 3/10
    Client 1 failed authentication and is excluded.
    Client 3 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
    Client 8 failed authentication and is excluded.
    Client 10 failed authentication and is excluded.
  Round 3 completed with Global Accuracy: 

**5. Experiments with Epsilon 1.5**

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import lightgbm as lgb
import pickle
import os
import random

# Step 1: Load and preprocess the data
df = pd.read_csv('/content/drive/MyDrive/IDS-IOT2024/Process_1 IDS-IoT-2024.csv')

# Drop the target variable and assign features to X
X = df.iloc[:, :-1].values
y = df['Attack_Category_x'].values

# Encode target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets for global evaluation
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Experiment setup
client_counts = [10, 25, 50, 75, 100]  # Number of clients to test
rounds = 10  # Number of federated learning rounds
privacy_budget = 1.5  # Privacy budget (epsilon)
results = []  # To store evaluation results for each client count

# Shuffle training data for IID client simulation
shuffled_indices = np.random.permutation(len(X_train_full))
X_train_shuffled = X_train_full[shuffled_indices]
y_train_shuffled = y_train_full[shuffled_indices]

# Differential Privacy - Laplace Mechanism
def apply_laplace_noise(values, epsilon, sensitivity=1.0):
    scale = sensitivity / epsilon
    noise = np.random.laplace(0, scale, size=values.shape)
    return values + noise

# Schnorr Protocol for Authentication
class SchnorrProtocol:
    def __init__(self, p, g):
        self.p = p
        self.g = g
        self.private_key = np.random.randint(1, p)
        self.public_key = pow(g, self.private_key, p)

    def generate_commitment(self):
        self.r = np.random.randint(1, self.p)
        self.R = pow(self.g, self.r, self.p)
        return self.R

    def compute_response(self, challenge):
        self.s = (self.r + challenge * self.private_key) % (self.p - 1)
        return self.s

    def verify(self, R, s, public_key, challenge):
        lhs = pow(self.g, s, self.p)
        rhs = (R * pow(public_key, challenge, self.p)) % self.p
        return lhs == rhs

# Schnorr Parameters
p = 104729
g = 2

# LightGBM parameters
lgb_params = {
    'objective': 'multiclass',
    'num_class': len(np.unique(y_encoded)),
    'boosting_type': 'gbdt',
    'metric': 'multi_logloss',
    'learning_rate': 0.1,
    'num_leaves': 31,
    'max_depth': -1,
    'verbosity': -1
}

# Robust Aggregation (Median Aggregation)
def robust_aggregation(updates):
    """Aggregate model updates using a robust method like median aggregation."""
    return np.median(updates, axis=0)

# Experiment loop
for client_count in client_counts:
    print(f"\nRunning experiment for {client_count} clients...")

    # Step 1: Simulate clients
    client_data = []
    samples_per_client = len(X_train_shuffled) // client_count

    for i in range(client_count):
        start_idx = i * samples_per_client
        end_idx = start_idx + samples_per_client if i < client_count - 1 else len(X_train_shuffled)
        X_client = X_train_shuffled[start_idx:end_idx]
        y_client = y_train_shuffled[start_idx:end_idx]
        client_data.append((X_client, y_client))

    # Randomly assign poisoned clients
    num_data_poisoning_clients = min(2, client_count)
    num_model_poisoning_clients = min(3, client_count)
    data_poisoning_clients = random.sample(range(client_count), num_data_poisoning_clients)
    model_poisoning_clients = random.sample(
        [client for client in range(client_count) if client not in data_poisoning_clients],
        num_model_poisoning_clients,
    )

    # Reset performance tracking for the current experiment
    excluded_clients = 0
    local_accuracies = []
    global_accuracies = []
    client_auth_times = []
    client_train_times = []
    round_latencies = []
    aggregation_times = []
    communication_overheads = []

    # Reinitialize Schnorr protocols for the current number of clients
    client_protocols = [SchnorrProtocol(p, g) for _ in range(client_count)]

    # Step 2: Federated rounds
    for round_num in range(rounds):
        print(f"  Round {round_num + 1}/{rounds}")
        round_start_time = time.time()

        local_models = []
        local_predictions = []
        round_client_auth_times = []
        round_client_train_times = []
        round_comm_overhead = 0

        for client_num, (X_client, y_client) in enumerate(client_data):
            auth_start_time = time.time()
            schnorr = client_protocols[client_num]
            R = schnorr.generate_commitment()
            challenge = np.random.randint(1, p)

            # Simulate poisoning in response
            if client_num in data_poisoning_clients + model_poisoning_clients:
                response = schnorr.compute_response(challenge) + 1
            else:
                response = schnorr.compute_response(challenge)

            if not schnorr.verify(R, response, schnorr.public_key, challenge):
                print(f"    Client {client_num + 1} failed authentication and is excluded.")
                excluded_clients += 1
                continue
            auth_end_time = time.time()

            train_start_time = time.time()
            if client_num in data_poisoning_clients:
                print(f"    Simulating data poisoning on Client {client_num + 1}")
                X_client += np.random.normal(0, 0.5, X_client.shape)

            # Train local model
            train_data = lgb.Dataset(X_client, label=y_client)
            local_model = lgb.train(lgb_params, train_data, num_boost_round=20)

            if client_num in model_poisoning_clients:
                print(f"    Simulating model poisoning on Client {client_num + 1}")
                local_model.params['num_leaves'] += 5

            train_end_time = time.time()
            round_client_auth_times.append(auth_end_time - auth_start_time)
            round_client_train_times.append(train_end_time - train_start_time)

            local_model_bytes = pickle.dumps(local_model)
            round_comm_overhead += len(local_model_bytes)

            local_models.append(local_model)
            predictions = local_model.predict(X_test)
            noisy_predictions = apply_laplace_noise(predictions, epsilon=privacy_budget, sensitivity=1.0)
            local_predictions.append(noisy_predictions)

            local_pred = np.argmax(noisy_predictions, axis=1)
            local_accuracy = accuracy_score(y_test, local_pred)
            local_accuracies.append(local_accuracy)

        if local_predictions:
            aggregation_start_time = time.time()
            aggregated_predictions = robust_aggregation(local_predictions)
            y_pred_aggregated = np.argmax(aggregated_predictions, axis=1)
            global_accuracy = accuracy_score(y_test, y_pred_aggregated)
            global_accuracies.append(global_accuracy)
            aggregation_end_time = time.time()
            aggregation_times.append(aggregation_end_time - aggregation_start_time)

            aggregated_model_bytes = pickle.dumps(aggregated_predictions)
            round_comm_overhead += len(aggregated_model_bytes)

        communication_overheads.append(round_comm_overhead)
        round_latencies.append(time.time() - round_start_time)
        client_auth_times.extend(round_client_auth_times)
        client_train_times.extend(round_client_train_times)

        print(f"  Round {round_num + 1} completed with Global Accuracy: {global_accuracy:.4f}")

    # Save global model
    with open(f"global_model_{client_count}.pkl", "wb") as f:
        pickle.dump(local_models[0], f)

    global_model_size = os.path.getsize(f"global_model_{client_count}.pkl") / (1024 ** 2)  # in MB
    results.append({
        'clients': client_count,
        'avg_global_accuracy': np.mean(global_accuracies),
        'avg_local_accuracy': np.mean(local_accuracies),
        'avg_train_time': np.mean(client_train_times),
        'avg_auth_time': np.mean(client_auth_times),
        'avg_round_latency': np.mean(round_latencies),
        'avg_aggregation_time': np.mean(aggregation_times),
        'avg_comm_overhead': np.mean(communication_overheads) / (1024 ** 2),
        'global_model_size': global_model_size,
    })

# Print all results
print("\nExperiment Results:")
for result in results:
    print(f"Clients: {result['clients']}, "
          f"Avg Global Accuracy: {result['avg_global_accuracy']:.5f}, "
          f"Avg Local Accuracy: {result['avg_local_accuracy']:.5f}, "
          f"Avg Train Time: {result['avg_train_time']:.5f}s, "
          f"Avg Auth Time: {result['avg_auth_time']:.5f}s, "
          f"Avg Round Latency: {result['avg_round_latency']:.5f}s, "
          f"Avg Aggregation Time: {result['avg_aggregation_time']:.5f}s, "
          f"Avg Comm Overhead: {result['avg_comm_overhead']:.5f}MB, "
          f"Global Model Size: {result['global_model_size']:.5f}MB")



Running experiment for 10 clients...
  Round 1/10
    Client 1 failed authentication and is excluded.
    Client 3 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
    Client 9 failed authentication and is excluded.
    Client 10 failed authentication and is excluded.
  Round 1 completed with Global Accuracy: 0.8275
  Round 2/10
    Client 1 failed authentication and is excluded.
    Client 3 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
    Client 9 failed authentication and is excluded.
    Client 10 failed authentication and is excluded.
  Round 2 completed with Global Accuracy: 0.8252
  Round 3/10
    Client 1 failed authentication and is excluded.
    Client 3 failed authentication and is excluded.
    Client 7 failed authentication and is excluded.
    Client 9 failed authentication and is excluded.
    Client 10 failed authentication and is excluded.
  Round 3 completed with Global Accuracy: 