In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('CICIDS_15.csv')
df

In [19]:
columns_to_drop = [
    'Dst Port',
    'Timestamp',
    'Fwd PSH Flags',
    'Bwd PSH Flags',
    'Fwd URG Flags',
    'Bwd URG Flags',
    'Flow Byts/s', 
    'Flow Pkts/s',
    'Protocol'
]
df.drop(columns=columns_to_drop,inplace=True)


In [None]:
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)
df.isna().sum()

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['Label'] = label_encoder.fit_transform(df['Label'])
print(df['Label'])

In [None]:
df['Label'].value_counts()

In [None]:
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder

string_cols = df.select_dtypes(include=['object']).columns
for col in string_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

X = df.drop(columns=['Label'])
y = df['Label']
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X, y)
df_resampled = pd.DataFrame(X_resampled, columns=X.columns)
df_resampled['Label'] = y_resampled
print("Class distribution after SMOTE:")
print(df_resampled['Label'].value_counts())


In [27]:
from sklearn.utils import shuffle

df_shuffled = shuffle(df_resampled)
split_size = len(df_shuffled) // 8

df_part1 = df_shuffled[:split_size]
df_part2 = df_shuffled[split_size:2*split_size]
df_part3 = df_shuffled[2*split_size:3*split_size]
df_part4 = df_shuffled[3*split_size:4*split_size]
df_part5 = df_shuffled[4*split_size:5*split_size]
df_part6 = df_shuffled[5*split_size:6*split_size]
df_part7 = df_shuffled[6*split_size:7*split_size]
df_part8 = df_shuffled[7*split_size:]

file_names = ["client1.csv", "client2.csv", "client3.csv", "client4.csv","client5.csv", "client6.csv", "client7.csv", "client8.csv"]

for part, file_name in zip([df_part1, df_part2, df_part3, df_part4, df_part5, df_part6, df_part7, df_part8], file_names):
    part.to_csv(file_name, index=False)


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

client1_df = pd.read_csv("client1.csv")
num_parts = 20
split_size = len(client1_df) // num_parts
losses = []
for i in range(num_parts):
    start_idx = i * split_size
    end_idx = start_idx + split_size
    part_df = client1_df.iloc[start_idx:end_idx]
    X = part_df.drop(columns=['Label'])
    y = part_df['Label']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=0)
    y_pred = model.predict(X_test_scaled)
    loss = mean_squared_error(y_test, y_pred)
    losses.append(loss)

for i, loss in enumerate(losses, start=1):
    print(f"Loss for model {i}: {loss}")

In [30]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering

In [31]:
client1_df = pd.read_csv("client1.csv")
client2_df = pd.read_csv("client2.csv")
client3_df = pd.read_csv("client3.csv")
client4_df = pd.read_csv("client4.csv")
client5_df = pd.read_csv("client5.csv")
client6_df = pd.read_csv("client6.csv")
client7_df = pd.read_csv("client7.csv")
client8_df = pd.read_csv("client8.csv")

loss_of_all_models = []
client_dfs = [client1_df, client2_df, client3_df,client4_df, client5_df, client6_df,client7_df, client8_df]

In [32]:
def train_model_with_feedback(X_train, y_train, X_test, y_test, avg_weights):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.set_weights(avg_weights)
    model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=0)
    weights = model.get_weights()
    y_pred = model.predict(X_test_scaled)
    #accuracy  =accuracy_score(y_test,y_pred)
    loss = mean_squared_error(y_test, y_pred)
    return weights, loss

In [33]:
counter_loss = float('inf')
threshold = 2.0
d = 1
consecutive_rounds = 0
stabilization_rounds = 5

In [47]:
def server(all_weights, all_losses, num_clusters):
    num_clients = len(all_weights)
    max_num_layers = max(len(weights) for weights in all_weights)
    common_layer_weights = [[] for _ in range(max_num_layers)]

    for layer_idx in range(max_num_layers):
        layer_weights = []
        for client_weights in all_weights:
            if layer_idx < len(client_weights):
                layer_weights.append(client_weights[layer_idx])
        if len(layer_weights) == num_clients:
            common_layer_weights[layer_idx] = layer_weights

    avg_weights = []
    for layer_weights in common_layer_weights:
        if layer_weights:
            avg_weights.append(np.mean(layer_weights, axis=0))

    avg_loss = np.mean(all_losses)
    print("Average Loss:")
    print(avg_loss)
    loss_of_all_models.append(avg_loss)

    counter_loss = float('inf')
    if avg_loss < counter_loss:
        counter_loss = avg_loss
    print("Counter Loss:")
    print(counter_loss)

    flat_weights = [np.concatenate([layer.flatten() for layer in client_weights]) for client_weights in all_weights]
    weighted_matrix = np.vstack(flat_weights)

    clustering = AgglomerativeClustering(n_clusters=num_clusters).fit(weighted_matrix)
    cluster_labels = clustering.labels_
    print("Cluster Labels:", cluster_labels)

    pca = PCA(n_components=2)
    reduced_weights = pca.fit_transform(weighted_matrix)
    plt.scatter(reduced_weights[:, 0], reduced_weights[:, 1], c=cluster_labels, cmap='viridis')
    plt.title('Agglomerative Clustering')
    plt.xlabel('Component 1')
    plt.ylabel('Component 2')
    plt.colorbar(label='Cluster')
    plt.show()

    return avg_weights, cluster_labels

In [41]:
def initialize_initial_weights(shape):
    initial_weights = [np.random.rand(*s) for s in shape]
    return initial_weights

In [None]:
w = 0.1
d = 1
num_features = 71
avg_weights = initialize_initial_weights([(70, 64), (64,), (64, 32), (32,), (32, 1), (1,)])
num_blocks = 5000
num_clients = 8
p = num_clients  # Initialize the number of clusters to the number of clients
loss_of_all_models = []

for block in range(num_blocks):
    all_weights = []
    all_losses = []
    for i, client_df in enumerate(client_dfs):
        block_size = len(client_df) // num_blocks
        start_idx = block * block_size
        end_idx = (block + 1) * block_size
        block_df = client_df.iloc[start_idx:end_idx]
        X = block_df.drop(columns=['Label'])
        y = block_df['Label']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        weights, loss = train_model_with_feedback(X_train, y_train, X_test, y_test, avg_weights)
        all_weights.append(weights)
        all_losses.append(loss)
        print(f"Block {block + 1}:")
        print(f"Client {i + 1} Loss after block {block + 1}: {loss}")

    avg_weights, cluster_labels = server(all_weights, all_losses, p)

    if block > 0:
        reduction_ratio = (loss_of_all_models[-2] - loss_of_all_models[-1]) / loss_of_all_models[-2]
        if reduction_ratio > w:
            p -= d
            d *= 2
        else:
            p = min(p * 2, num_clients)
            d = 1