<a href="https://www.kaggle.com/code/logeswarig/lightids-sdn?scriptVersionId=290289116" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
!pip install torch-geometric

In [None]:
import os
import numpy as np
import pandas as pd
import joblib
from tqdm.auto import tqdm

# sklearn preprocessing
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# PyTorch
import torch
from torch import nn
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

from collections import Counter


try:
    from torch_geometric.data import HeteroData
except Exception as e:
    print("torch_geometric not available in this environment. If needed, install it or run on your Kaggle runtime where you have it installed.")


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

In [None]:

import seaborn as sns
import matplotlib.pyplot as plt
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

Dataset

Each dataset has its own uses:

* Benign Traffic: Normal traffic instances.
* Metasploitable-2_Group: Attack traffic targeting the Metasploitable 2 server (e.g., DoS, DDoS, Exploit, Probe, Brute Force).
* OVS_Group: Attack traffic targeting the Open vSwitch (e.g., botnet, DoS, DDoS, web attack, probe).

In [None]:
#Dataframe creation
benign_data = pd.read_csv("/kaggle/input/insdn-dataset/InSDN_DatasetCSV/Normal_data.csv", index_col=False).dropna()
attack_data_Meta = pd.read_csv("/kaggle/input/insdn-dataset/InSDN_DatasetCSV/metasploitable-2.csv", index_col=False).dropna()
attack_data_OVS = pd.read_csv("/kaggle/input/insdn-dataset/InSDN_DatasetCSV/OVS.csv", index_col=False).dropna()

In [None]:
#Dataframe conca
attack_data = pd.concat([benign_data, attack_data_Meta, attack_data_OVS], ignore_index=True).dropna()

In [None]:
print("Dataset shape:", attack_data.shape)

In [None]:
print("\nColumn names:\n", attack_data.columns.tolist())

In [None]:
print("\nData types and missing values:\n")

In [None]:
attack_data.info()

In [None]:

attack_data.head()

In [None]:
attack_data.describe()

In [None]:
print("\nMissing values per column:\n", attack_data.isnull().sum())

In [None]:
null_counts = attack_data.isnull().sum()
null_columns = null_counts[null_counts > 0]

print(null_columns)


In [None]:
print(attack_data['Label'].unique())

In [None]:
print(attack_data['Label'].value_counts(normalize=True))  

In [None]:
# Plot histogram/bar plot
plt.figure(figsize=(6,4))
sns.countplot(x='Label', data=attack_data)
plt.title("Class Distribution (Normal vs Attack)")
plt.xlabel("Traffic Class")
plt.ylabel("Number of Flows")
plt.show()


In [None]:
numeric_data = attack_data.drop(columns=['Src IP', 'Dst IP', 'Label', 'Flow ID', 'Timestamp'])

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(numeric_data.corr(), center=0)
plt.title("Correlation Heatmap")
plt.show()

Preprocessing

In [None]:
attack_data['Label'] = attack_data['Label'].replace('DDoS ', 'DDoS')

In [None]:
print(attack_data['Label'].value_counts(normalize=True))  # Recheck distribution

In [None]:
print("Shape Before filtering:", attack_data.shape)
DDoS_attack_data = attack_data[attack_data['Label'].isin(['DDoS', 'Normal'])]
print("Shape after filtering:", DDoS_attack_data.shape)
print(DDoS_attack_data['Label'].value_counts(normalize=True)) #imbalance checking 

In [None]:
# Before dropping
print("Before dropping:", DDoS_attack_data.shape)
# Drop columns
DDoS_attack_data = DDoS_attack_data.drop(columns=['Flow ID', 'Timestamp']) 
#After dropping
print("After dropping:", DDoS_attack_data.shape)

In [None]:
DDoS_numeric_data = DDoS_attack_data.drop(columns=['Src IP', 'Dst IP', 'Label'])

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(DDoS_numeric_data.corr(), center=0)
plt.title("Correlation Heatmap")
plt.show()

In [None]:
unique_counts = DDoS_numeric_data.nunique()

for col in DDoS_numeric_data.columns:
    print(f"{col}: {DDoS_numeric_data[col].nunique()} unique values")

In [None]:
DDoS_constant_feature= [col for col in DDoS_numeric_data.columns if DDoS_numeric_data[col].nunique() <= 2]
print("Constant/low-variance features:", DDoS_constant_feature)

In [None]:
for col in DDoS_constant_feature:
    print(f"{col} â†’ {DDoS_numeric_data[col].unique()}")

In [None]:
DDoS_attack_data_clean = DDoS_attack_data.drop(columns=['Fwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'URG Flag Cnt', 
                                                            'CWE Flag Count', 'ECE Flag Cnt', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 
                                                            'Fwd Blk Rate Avg', 'Bwd Byts/b Avg', 'Bwd Pkts/b Avg', 'Bwd Blk Rate Avg', 
                                                            'Init Fwd Win Byts', 'Fwd Seg Size Min'])
print("Shape after dropping constants:", DDoS_attack_data_clean.shape)
print("Remaining columns:", DDoS_attack_data_clean.columns.tolist())

In [None]:
DDoS_numeric_data_clean = DDoS_attack_data_clean.drop(columns=['Src IP', 'Dst IP', 'Label'])

In [None]:
# -----------------------------
# Add labels
# -----------------------------
benign_data["Label"] = "Normal"
attack_data_Meta["Label"] = "MetaSploit"
attack_data_OVS["Label"] = "OVS"

# -----------------------------
# Combine all data
# -----------------------------
df = pd.concat(
    [benign_data, attack_data_Meta, attack_data_OVS],
    axis=0,
    ignore_index=True
)

print("Total samples:", df.shape)
print("Class distribution:")
print(df["Label"].value_counts())


In [None]:
# Typical categorical SDN features
categorical_cols = [
    "Src IP", "Dst IP", "Flow ID", "Timestamp"
]

# Ensure only existing columns are used
categorical_cols = [c for c in categorical_cols if c in df.columns]

# Remaining numerical features
numerical_cols = [c for c in df.columns if c not in categorical_cols + ["Label"]]

print("Numerical features:", len(numerical_cols))
print("Categorical features:", categorical_cols)


In [None]:
df["Label"] = df["Label"].astype("category")
label_mapping = dict(enumerate(df["Label"].cat.categories))
df["Label"] = df["Label"].cat.codes

print("Label mapping:", label_mapping)


In [None]:
X = df.drop(columns=["Label"])
y = df["Label"]

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("Train size:", X_train.shape)
print("Test size :", X_test.shape)


In [None]:
# Numerical pipeline
num_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

# Categorical pipeline
cat_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(
        handle_unknown="ignore",
        sparse_output=False,
        min_frequency=50  # IMPORTANT: controls dimensionality
    ))
])

# Column transformer
preprocessor = ColumnTransformer([
    ("num", num_pipeline, numerical_cols),
    ("cat", cat_pipeline, categorical_cols)
])


In [None]:
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed  = preprocessor.transform(X_test)

print("Processed train shape:", X_train_processed.shape)
print("Processed test shape :", X_test_processed.shape)


In [None]:
X_train_tensor = torch.tensor(X_train_processed, dtype=torch.float32).to(device)
X_test_tensor  = torch.tensor(X_test_processed, dtype=torch.float32).to(device)

y_train_tensor = torch.tensor(y_train.values, dtype=torch.long).to(device)
y_test_tensor  = torch.tensor(y_test.values, dtype=torch.long).to(device)

print("Tensor shapes:")
print(X_train_tensor.shape, y_train_tensor.shape)


In [None]:
BATCH_SIZE = 256

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset  = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("DataLoader ready.")


In [None]:
joblib.dump(preprocessor, "insdn_preprocessor.pkl")
print("Preprocessor saved.")


DFE-GQPSO Feature Selector

In [None]:
import numpy as np
import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


In [None]:
class DFE_GQPSO:
    def __init__(self, n_particles=20, n_iters=30,
                 alpha=0.9, beta=0.1,
                 lambda_q=0.75, sigma=0.1, tau=0.5,
                 random_state=42):
        self.n_particles = n_particles
        self.n_iters = n_iters
        self.alpha = alpha
        self.beta = beta
        self.lambda_q = lambda_q
        self.sigma = sigma
        self.tau = tau
        self.random_state = random_state

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def _fitness(self, X, y, mask):
        if np.sum(mask) == 0:
            return 0

        X_sel = X[:, mask == 1]

        X_tr, X_val, y_tr, y_val = train_test_split(
            X_sel, y, test_size=0.3, stratify=y, random_state=42
        )

        clf = lgb.LGBMClassifier(
            n_estimators=50,
            max_depth=6,
            n_jobs=-1
        )

        clf.fit(X_tr, y_tr)
        preds = clf.predict(X_val)

        acc = accuracy_score(y_val, preds)
        penalty = np.sum(mask) / X.shape[1]

        return self.alpha * acc - self.beta * penalty


In [None]:
    def fit(self, X, y):
        np.random.seed(self.random_state)
        d = X.shape[1]

        particles = np.random.rand(self.n_particles, d)
        pbest = particles.copy()
        pbest_fit = np.zeros(self.n_particles)

        for i in range(self.n_particles):
            mask = (self._sigmoid(particles[i]) > self.tau).astype(int)
            pbest_fit[i] = self._fitness(X, y, mask)

        gbest_idx = np.argmax(pbest_fit)
        gbest = pbest[gbest_idx].copy()
        gbest_fit = pbest_fit[gbest_idx]

        for t in range(self.n_iters):
            for i in range(self.n_particles):
                u = np.random.rand(d)
                theta = np.random.uniform(0, 2*np.pi, d)

                # Equation (4)
                particles[i] = (
                    pbest[i]
                    + self.lambda_q * np.abs(gbest - pbest[i])
                    * np.log(1 / u) * np.cos(theta)
                )

                # Equation (5)
                particles[i] += np.random.normal(0, self.sigma, d)

                # Equation (6)
                mask = (self._sigmoid(particles[i]) > self.tau).astype(int)

                fit = self._fitness(X, y, mask)

                if fit > pbest_fit[i]:
                    pbest[i] = particles[i].copy()
                    pbest_fit[i] = fit

                    if fit > gbest_fit:
                        gbest = particles[i].copy()
                        gbest_fit = fit

            print(f"Iteration {t+1}/{self.n_iters} | Best fitness: {gbest_fit:.4f}")

        self.best_mask_ = (self._sigmoid(gbest) > self.tau).astype(int)
        return self


In [None]:
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

class DFE_GQPSO:
    def __init__(self, n_particles=20, n_iters=30,
                 alpha=0.9, beta=0.1,
                 lambda_q=0.75, sigma=0.1, tau=0.5,
                 random_state=42):
        self.n_particles = n_particles
        self.n_iters = n_iters
        self.alpha = alpha
        self.beta = beta
        self.lambda_q = lambda_q
        self.sigma = sigma
        self.tau = tau
        self.random_state = random_state

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def _fitness(self, X, y, mask):
        if np.sum(mask) == 0:
            return 0

        X_sel = X[:, mask == 1]
        X_tr, X_val, y_tr, y_val = train_test_split(
            X_sel, y, test_size=0.3, stratify=y, random_state=42
        )

        clf = lgb.LGBMClassifier(n_estimators=50, max_depth=6, n_jobs=-1)
        clf.fit(X_tr, y_tr)
        preds = clf.predict(X_val)

        acc = accuracy_score(y_val, preds)
        penalty = np.sum(mask) / X.shape[1]

        return self.alpha * acc - self.beta * penalty

    def fit(self, X, y):
        np.random.seed(self.random_state)
        d = X.shape[1]

        # Initialize particles
        particles = np.random.rand(self.n_particles, d)
        pbest = particles.copy()
        pbest_fit = np.zeros(self.n_particles)

        for i in range(self.n_particles):
            mask = (self._sigmoid(particles[i]) > self.tau).astype(int)
            pbest_fit[i] = self._fitness(X, y, mask)

        gbest_idx = np.argmax(pbest_fit)
        gbest = pbest[gbest_idx].copy()
        gbest_fit = pbest_fit[gbest_idx]

        # Optimization loop
        for t in range(self.n_iters):
            for i in range(self.n_particles):
                u = np.random.rand(d)
                theta = np.random.uniform(0, 2*np.pi, d)

                # Quantum-inspired update
                particles[i] = (
                    pbest[i]
                    + self.lambda_q * np.abs(gbest - pbest[i])
                    * np.log(1 / u) * np.cos(theta)
                )

                # Gaussian perturbation
                particles[i] += np.random.normal(0, self.sigma, d)

                # Binarize
                mask = (self._sigmoid(particles[i]) > self.tau).astype(int)

                # Fitness evaluation
                fit = self._fitness(X, y, mask)

                # Update personal best
                if fit > pbest_fit[i]:
                    pbest[i] = particles[i].copy()
                    pbest_fit[i] = fit

                    # Update global best
                    if fit > gbest_fit:
                        gbest = particles[i].copy()
                        gbest_fit = fit

            print(f"Iteration {t+1}/{self.n_iters} | Best fitness: {gbest_fit:.4f}")

        self.best_mask_ = (self._sigmoid(gbest) > self.tau).astype(int)
        return self


In [None]:
selector = DFE_GQPSO(n_particles=25, n_iters=40)
selector.fit(X_train_processed, y_train.values)

X_train_fs = X_train_processed[:, selector.best_mask_ == 1]
X_test_fs  = X_test_processed[:, selector.best_mask_ == 1]

print("Selected features:", X_train_fs.shape[1])


MSDC-Net (Transformer + Capsule + BiLSTM)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F


In [None]:
class CapsuleLayer(nn.Module):
    def __init__(self, in_dim, num_capsules, dim_capsule):
        super().__init__()
        self.W = nn.Parameter(torch.randn(num_capsules, in_dim, dim_capsule))

    def forward(self, x):
        u_hat = torch.matmul(x.unsqueeze(1), self.W)
        s = u_hat.mean(dim=2)
        norm = torch.norm(s, dim=-1, keepdim=True)
        v = (norm**2 / (1 + norm**2)) * (s / (norm + 1e-8))
        return v


In [None]:
class MSDCNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()

        self.embedding = nn.Linear(input_dim, 128)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=128, nhead=4, batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)

        self.capsule = CapsuleLayer(128, num_capsules=8, dim_capsule=16)

        self.bilstm = nn.LSTM(
            input_size=16,
            hidden_size=64,
            batch_first=True,
            bidirectional=True
        )

        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.embedding(x).unsqueeze(1)
        x = self.transformer(x)
        x = self.capsule(x)
        x, _ = self.bilstm(x)
        x = x.mean(dim=1)
        return self.fc(x)


In [None]:
def fedavg(local_weights, local_sizes):
    total = sum(local_sizes)
    global_weights = {}

    for k in local_weights[0].keys():
        global_weights[k] = sum(
            (local_weights[i][k] * local_sizes[i]) / total
            for i in range(len(local_weights))
        )

    return global_weights


In [None]:
# Example feature dimensions
num_features = 30
num_classes = 2

# Dummy tensors (replace with your real X_train_tensor, X_test_tensor)
X_train_tensor = torch.rand(500, num_features)
X_test_tensor  = torch.rand(50, num_features)
y_train_tensor = torch.randint(0, num_classes, (500,))


In [None]:
class IDSNet(nn.Module):
    def __init__(self, input_dim, num_classes=2):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.out = nn.Linear(64, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.out(x)


In [None]:
device = torch.device("cpu")  # SHAP works best on CPU

model = IDSNet(input_dim=X_train_tensor.shape[1], num_classes=num_classes)
model.to(device)
model.eval()


In [None]:
X_train_tensor = X_train_tensor.to(device)
X_test_tensor  = X_test_tensor.to(device)


In [None]:
# Background samples (small subset)
background = X_train_tensor[:100]

# Samples to explain
test_samples = X_test_tensor[:10]

explainer = shap.GradientExplainer(
    model,
    background
)

shap_values = explainer.shap_values(test_samples)


In [None]:
shap.summary_plot(
    shap_values,
    test_samples.numpy()
)


In [None]:
class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.gradients = None
        target_layer.register_backward_hook(self.save_gradient)

    def save_gradient(self, module, grad_input, grad_output):
        self.gradients = grad_output[0]

    def generate(self, output, class_idx):
        self.model.zero_grad()
        output[:, class_idx].backward()
        return torch.mean(self.gradients, dim=1)
