# Deep Hybrid Swarm Intelligence IDS

This notebook implements a hybrid swarm intelligence feature selection method integrating ACO, PSO, ABC, and MWPA, along with benchmark optimization.

## 1. Setup & Data Acquisition

- Install dependencies and fetch NSL-KDD dataset.


In [None]:

!pip install pyswarms scikit-learn pandas numpy matplotlib seaborn scipy tqdm -q

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from io import StringIO
import time
import warnings
warnings.filterwarnings('ignore')

# URLs
data_url = "https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTrain%2B.txt"
test_url = "https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTest%2B.txt"
features_url = "https://raw.githubusercontent.com/defcom17/NSL_KDD/master/Field%20Names.csv"

# Fetch feature names
try:
    resp = requests.get(features_url); resp.raise_for_status()
    features_df = pd.read_csv(StringIO(resp.text), header=None)
    column_names = features_df[0].tolist()
    column_names += ['attack_type','difficulty_level']
except:
    column_names = [ 'duration','protocol_type','service','flag','src_bytes','dst_bytes','land','wrong_fragment',
                     'urgent','hot','num_failed_logins','logged_in','num_compromised','root_shell','su_attempted',
                     'num_root','num_file_creations','num_shells','num_access_files','num_outbound_cmds',
                     'is_host_login','is_guest_login','count','srv_count','serror_rate','srv_serror_rate',
                     'rerror_rate','srv_rerror_rate','same_srv_rate','diff_srv_rate','srv_diff_host_rate',
                     'dst_host_count','dst_host_srv_count','dst_host_same_srv_rate','dst_host_diff_srv_rate',
                     'dst_host_same_src_port_rate','dst_host_srv_diff_host_rate','dst_host_serror_rate',
                     'dst_host_srv_serror_rate','dst_host_rerror_rate','dst_host_srv_rerror_rate',
                     'attack_type','difficulty_level']

# Load datasets
df_train = pd.read_csv(StringIO(requests.get(data_url).text), header=None, names=column_names)
df_test = pd.read_csv(StringIO(requests.get(test_url).text), header=None, names=column_names)
for df in [df_train, df_test]:
    if 'difficulty_level' in df: df.drop('difficulty_level', axis=1, inplace=True)
# Binary target
df_train['is_attack'] = (df_train['attack_type']!='normal').astype(int)
df_test['is_attack']  = (df_test['attack_type']!='normal').astype(int)
df_train.drop('attack_type', axis=1, inplace=True)
df_test.drop('attack_type', axis=1, inplace=True)

print("Train shape:", df_train.shape, "Test shape:", df_test.shape)
print(df_train.head())


## 2. Data Preprocessing

- One-Hot encode categorical features.
- Scale numerical features.
- Split train into train/validation.

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Prepare X/y
X_train = df_train.drop('is_attack', axis=1); y_train = df_train['is_attack']
X_test  = df_test.drop('is_attack', axis=1);  y_test  = df_test['is_attack']

cat = ['protocol_type','service','flag']
num = [c for c in X_train.columns if c not in cat]

num_pipe = Pipeline([('imp', SimpleImputer('median')), ('scale', MinMaxScaler())])
cat_pipe = Pipeline([('imp', SimpleImputer('constant','missing')), ('onehot', OneHotEncoder(handle_unknown='ignore'))])

pre = ColumnTransformer([('num',num_pipe,num),('cat',cat_pipe,cat)])
X_train_p = pre.fit_transform(X_train)
X_test_p  = pre.transform(X_test)

# Feature names
try:
    processed_features = pre.get_feature_names_out()
except:
    cat_names = pre.named_transformers_['cat']['onehot'].get_feature_names(cat)
    processed_features = list(num)+list(cat_names)

X_tr, X_val, y_tr, y_val = train_test_split(X_train_p, y_train, test_size=0.25, random_state=42, stratify=y_train)

print("Processed shapes:", X_tr.shape, X_val.shape, X_test_p.shape)


## 3. Define Fitness Function

Logistic Regression-based fitness: Accuracy - α*(#features/total).

In [None]:

from sklearn.linear_model import LogisticRegression
import numpy as np

def feature_selection_fitness(mask, X_tr, y_tr, X_val, y_val, alpha=0.05):
    idx = np.where(mask==1)[0]
    if len(idx)==0: return -1.0
    X1, X2 = X_tr[:,idx], X_val[:,idx]
    try:
        model = LogisticRegression(max_iter=100, solver='liblinear')
        model.fit(X1,y_tr)
        acc = model.score(X2,y_val)
    except:
        return -1.0
    penalty = alpha*(len(idx)/len(mask))
    return acc - penalty


## 4. Individual Swarm Algorithms for Feature Selection
ACO, PSO, ABC, MWPA implementations.

In [None]:

import numpy as np
import time

def _binarize(pos,thr=0.5):
    s=1/(1+np.exp(-10*(pos-0.5)))
    return (s>thr).astype(int)

def aco_feature_selection(X_tr,y_tr,X_val,y_val,n_agents=30,max_iter=50,evap=0.1,alpha=0.05):
    n_feat = X_tr.shape[1]
    pher = np.ones(n_feat)*0.1
    best_fit=-np.inf; best_mask=None; history=[]
    start=time.time()
    for it in range(max_iter):
        masks=[]; fits=[]
        prob = pher/pher.sum()
        for _ in range(n_agents):
            m=(np.random.rand(n_feat)<prob).astype(int)
            masks.append(m)
            fits.append(feature_selection_fitness(m,X_tr,y_tr,X_val,y_val,alpha))
        idx=np.argmax(fits)
        if fits[idx]>best_fit:
            best_fit=fits[idx]; best_mask=masks[idx].copy()
        pher*=(1-evap)
        pher[best_mask==1]+=evap
        history.append(best_fit)
    return best_mask,history,time.time()-start
# [Add pso_feature_selection, abc_feature_selection, mwpa_feature_selection]


## 5. Hybrid Swarm Intelligence for Feature Selection
Placeholder class for hybrid algorithm.

In [None]:

class HybridSwarmFeatureSelector:
    def __init__(self, *args, **kwargs):
        pass
    def run(self, *args, **kwargs):
        return [], [], 0


## 6. Run Feature Selection Experiments
Compare individual vs. hybrid methods.

In [None]:

methods = {"ACO": aco_feature_selection, "Hybrid": HybridSwarmFeatureSelector}
results = {}
for name, fn in methods.items():
    if name=="Hybrid":
        sel, hist, t = fn().run(X_tr,y_tr,X_val,y_val)
    else:
        sel, hist, t = fn(X_tr,y_tr,X_val,y_val)
    results[name]={'mask':sel,'hist':hist,'time':t}
import matplotlib.pyplot as plt
for k in results:
    plt.plot(results[k]['hist'],label=k)
plt.legend(); plt.title("Convergence"); plt.show()


## 7. Model Training
Train RF, SVM, MLP on features selected by each method.

In [None]:

# [Implement model training loop as provided earlier]


## 8. Model Evaluation
Evaluate on test set; compute metrics and plot ROC.

In [None]:

# [Implement evaluation code as provided earlier]


## 9. Benchmark Function Analysis
Placeholder for continuous hybrid optimizer.

In [None]:

class HybridSwarmOptimizer:
    def __init__(self, *args, **kwargs):
        pass
    def run(self):
        return [], np.inf, [], 0


## 10. Component-wise Analysis
Outline experiments with varied hybrid configurations.

## 11. Presentation and Demonstration
Generate final comparison plots.

## 12. Tools and Libraries
- pandas, numpy, scikit-learn, matplotlib, seaborn, scipy, requests, tqdm