In [2]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


In [11]:
# Load dataset
df = pd.read_csv("Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv")   # unzip first if needed
# Clean column names
df.columns = df.columns.str.strip()

LABEL_COLUMN = "Label"
BENIGN_LABEL = "BENIGN"

df[LABEL_COLUMN] = df[LABEL_COLUMN].apply(
    lambda x: 0 if x.strip().upper() == BENIGN_LABEL else 1
)

X = df.drop(columns=[LABEL_COLUMN]).values
y = df[LABEL_COLUMN].values






In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)


In [17]:
# Handle NaN and infinite values
X_train = np.nan_to_num(X_train, nan=0.0, posinf=0.0, neginf=0.0)
X_test = np.nan_to_num(X_test, nan=0.0, posinf=0.0, neginf=0.0)

# Apply SMOTE
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

print("After SMOTE:", np.bincount(y_train))




After SMOTE: [102422 102422]


In [18]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [19]:
NUM_CLIENTS = 5
BATCH_SIZE = 128

client_indices = np.array_split(
    np.random.permutation(len(X_train)),
    NUM_CLIENTS
)

def create_loader(indices):
    Xc = torch.tensor(X_train[indices], dtype=torch.float32)
    yc = torch.tensor(y_train[indices], dtype=torch.long)
    return DataLoader(TensorDataset(Xc, yc), batch_size=BATCH_SIZE, shuffle=True)



In [20]:
class Encoder(nn.Module):
    def __init__(self, input_dim, latent_dim=64):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, latent_dim)
        )

    def forward(self, x):
        return self.net(x)


class Classifier(nn.Module):
    def __init__(self, latent_dim=64, num_classes=2):
        super().__init__()
        self.fc = nn.Linear(latent_dim, num_classes)

    def forward(self, z):
        return self.fc(z)



In [21]:
def train_client(encoder, classifier, loader, epochs=1, lr=1e-3):
    encoder.train()
    classifier.train()

    optimizer = torch.optim.Adam(
        list(encoder.parameters()) + list(classifier.parameters()),
        lr=lr
    )

    for _ in range(epochs):
        for xb, yb in loader:
            optimizer.zero_grad()
            z = encoder(xb)
            logits = classifier(z)
            loss = F.cross_entropy(logits, yb)
            loss.backward()
            optimizer.step()



In [22]:
def fedavg(global_model, client_models):
    global_dict = global_model.state_dict()

    for key in global_dict:
        global_dict[key] = torch.mean(
            torch.stack([model.state_dict()[key] for model in client_models]),
            dim=0
        )

    global_model.load_state_dict(global_dict)





In [23]:
INPUT_DIM = X_train.shape[1]
ROUNDS = 10
LOCAL_EPOCHS = 1

global_encoder = Encoder(INPUT_DIM)
global_classifier = Classifier()

for r in range(ROUNDS):
    client_encoders = []
    client_classifiers = []

    for indices in client_indices:
        enc = Encoder(INPUT_DIM)
        clf = Classifier()

        enc.load_state_dict(global_encoder.state_dict())
        clf.load_state_dict(global_classifier.state_dict())

        loader = create_loader(indices)
        train_client(enc, clf, loader, LOCAL_EPOCHS)

        client_encoders.append(enc)
        client_classifiers.append(clf)

    fedavg(global_encoder, client_encoders)
    fedavg(global_classifier, client_classifiers)

    print(f"Round {r+1}/{ROUNDS} completed")






Round 1/10 completed
Round 2/10 completed
Round 3/10 completed
Round 4/10 completed
Round 5/10 completed
Round 6/10 completed
Round 7/10 completed
Round 8/10 completed
Round 9/10 completed
Round 10/10 completed


In [24]:
global_encoder.eval()
global_classifier.eval()

X_test_t = torch.tensor(X_test, dtype=torch.float32)

with torch.no_grad():
    z = global_encoder(X_test_t)
    logits = global_classifier(z)
    preds = torch.argmax(logits, dim=1).numpy()

acc = accuracy_score(y_test, preds)
prec = precision_score(y_test, preds)
rec = recall_score(y_test, preds)
f1 = f1_score(y_test, preds)
cm = confusion_matrix(y_test, preds)

print("Accuracy:", acc)
print("Precision:", prec)
print("Recall:", rec)
print("F1-score:", f1)
print("Confusion Matrix:\n", cm)


Accuracy: 0.9993355334558905
Precision: 0.9993361708774259
Recall: 0.9994922866627611
F1-score: 0.9994142226734877
Confusion Matrix:
 [[19527    17]
 [   13 25592]]
