# Hybrid vs Individual Swarm Intelligence IDS Comparison
This notebook compares performance of individual swarm algorithms (ACO, PSO, ABC, MWPA) against the deep hybrid method for feature selection on NSL-KDD.

## 1. Setup & Data Acquisition

In [None]:

!pip install pyswarms scikit-learn pandas numpy matplotlib seaborn scipy -q

import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load data
url = "https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTrain+.txt"
cols_url = "https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDFeatureNames.txt"
features = pd.read_csv(cols_url, sep=":", header=None)[0].tolist() + ['target']
data = pd.read_csv(url, names=features)

# Preprocess
le = LabelEncoder()
for col in data.select_dtypes(include='object').columns:
    data[col] = le.fit_transform(data[col])
X = data.drop('target', axis=1).values
y = (data['target'] != 0).astype(int).values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)


## 2. Define Swarm Feature Selection Functions

In [None]:

import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

# ACO, PSO, ABC, MWPA functions as defined previously...
# [Insert your previously defined functions here]
# For brevity, assume they've been imported or defined in the notebook.


## 3. Feature Selection and Evaluation

In [None]:

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
import time

methods = {
    'ACO': aco_feature_selection,
    'PSO': pso_feature_selection,
    'ABC': abc_feature_selection,
    'MWPA': mwpa_feature_selection,
    'Hybrid': deep_hybrid_selection
}

results = {}
for name, func in methods.items():
    start = time.time()
    feats = func(X_train, y_train)
    duration = time.time() - start
    X_tr = X_train[:, feats]
    X_te = X_test[:, feats]
    clf = RandomForestClassifier(random_state=42)
    clf.fit(X_tr, y_train)
    y_pred = clf.predict(X_te)
    y_prob = clf.predict_proba(X_te)[:,1]
    acc = accuracy_score(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    fpr = fp / (fp + tn)
    auc = roc_auc_score(y_test, y_prob)
    results[name] = {'accuracy': acc, 'fpr': fpr, 'auc': auc, 'time': duration, 'n_features': len(feats)}
results


## 4. Results Visualization

In [None]:

import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame(results).T
df = df[['accuracy','fpr','auc','time','n_features']]
display(df)

# Bar plots
df[['accuracy','auc']].plot(kind='bar', title='Accuracy and AUC Comparison')
plt.ylabel('Score')
plt.show()

df['fpr'].plot(kind='bar', title='False Positive Rate Comparison')
plt.ylabel('FPR')
plt.show()

df['time'].plot(kind='bar', title='Computation Time Comparison')
plt.ylabel('Seconds')
plt.show()

df['n_features'].plot(kind='bar', title='Number of Selected Features')
plt.ylabel('Count')
plt.show()
