# Deep Hybrid Swarm Intelligence for Intrusion Detection

This notebook implements a comprehensive Hybrid Swarm Intelligence framework for feature selection in intrusion detection (using NSL-KDD), along with benchmark function optimization, component-wise analysis, and final evaluations.

## 1. Setup & Data Acquisition

- Install necessary libraries  
- Fetch and load the NSL-KDD training and test datasets  
- Prepare binary labels (attack vs. normal)

In [None]:
!pip install pyswarms scikit-learn pandas numpy matplotlib seaborn scipy tqdm -q

import pandas as pd
import numpy as np
import requests
from io import StringIO
import warnings
warnings.filterwarnings('ignore')

# URLs
train_url = "https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTrain%2B.txt"
test_url  = "https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTest%2B.txt"
fields_url= "https://raw.githubusercontent.com/defcom17/NSL_KDD/master/Field%20Names.csv"

# Fetch field names
fields = requests.get(fields_url).text.splitlines()
col_names = [line.split(',')[0] for line in fields]
col_names += ['attack_type','difficulty_level']

# Load data
df_train = pd.read_csv(StringIO(requests.get(train_url).text),
                       names=col_names, header=None)
df_test  = pd.read_csv(StringIO(requests.get(test_url).text),
                       names=col_names, header=None)

# Drop difficulty_level
for df in (df_train, df_test):
    df.drop('difficulty_level', axis=1, inplace=True)

# Binary target
df_train['is_attack'] = (df_train['attack_type'] != 'normal').astype(int)
df_test ['is_attack'] = (df_test ['attack_type'] != 'normal').astype(int)
df_train.drop('attack_type', axis=1, inplace=True)
df_test .drop('attack_type', axis=1, inplace=True)

print("Training set shape:", df_train.shape)
print("Test set shape:", df_test.shape)
df_train.head()

## 2. Data Preprocessing

- Identify categorical vs numeric features  
- Build preprocessing pipeline (OneHotEncoder + MinMaxScaler)  
- Split training into train/validation for feature selection fitness

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Features and labels
X = df_train.drop('is_attack', axis=1)
y = df_train['is_attack']

# Identify
cat_feats = ['protocol_type','service','flag']
num_feats = [c for c in X.columns if c not in cat_feats]

# Pipelines
num_pipe = Pipeline([('impute', SimpleImputer('median')),('scale', MinMaxScaler())])
cat_pipe = Pipeline([('impute', SimpleImputer('constant','missing')),
                     ('onehot', OneHotEncoder(handle_unknown='ignore'))])

pre = ColumnTransformer([
    ('num', num_pipe, num_feats),
    ('cat', cat_pipe, cat_feats)
])

X_proc = pre.fit_transform(X)
X_test_proc = pre.transform(df_test.drop('is_attack', axis=1))
y_test = df_test['is_attack']

# Feature names
try:
    feat_names = pre.get_feature_names_out()
except:
    cat_names = pre.named_transformers_['cat']['onehot'].get_feature_names(cat_feats)
    feat_names = num_feats + list(cat_names)

# Train/validation split
X_tr, X_val, y_tr, y_val = train_test_split(
    X_proc, y, test_size=0.25, stratify=y, random_state=42)

print("Processed shapes:", X_tr.shape, X_val.shape, X_test_proc.shape)

## 3. Fitness Function

Define a fitness that balances accuracy (Logistic Regression) and number of features.

In [None]:
from sklearn.linear_model import LogisticRegression
import numpy as np

def fitness(mask, X1, y1, X2, y2, alpha=0.05):
    idx = np.where(mask==1)[0]
    if len(idx)==0:
        return -1.0
    X1s, X2s = X1[:,idx], X2[:,idx]
    try:
        clf = LogisticRegression(max_iter=100, solver='liblinear')
        clf.fit(X1s, y1)
        acc = clf.score(X2s, y2)
    except:
        return -1.0
    penalty = alpha * (len(idx)/len(mask))
    return acc - penalty

## 4. Individual Swarm Algorithms for Feature Selection

Implement ACO, PSO, ABC, and MWPA for binary feature selection.

In [None]:
import time

def _binarize(pos, thr=0.5):
    s = 1/(1+np.exp(-10*(pos-0.5)))
    return (s>thr).astype(int)

def aco_fs(X1,y1,X2,y2, n_agents=30, max_iter=50, evap=0.1, alpha=0.05):
    n = X1.shape[1]
    pher = np.ones(n)*0.1
    best_fit=-np.inf; best_mask=None; history=[]
    start=time.time()
    for it in range(max_iter):
        masks=[], fits=[]
        prob = pher/pher.sum()
        for _ in range(n_agents):
            m = (np.random.rand(n) < prob).astype(int)
            masks.append(m)
            fits.append(fitness(m, X1,y1,X2,y2,alpha))
        idx = np.argmax(fits)
        if fits[idx]>best_fit:
            best_fit, best_mask = fits[idx], masks[idx].copy()
        pher *= (1-evap)
        pher[best_mask==1] += evap
        history.append(best_fit)
    return best_mask, history, time.time()-start

def pso_fs(X1,y1,X2,y2, n_agents=30, max_iter=50, w=0.9, c1=1.5, c2=1.5, alpha=0.05):
    n = X1.shape[1]
    pos = np.random.rand(n_agents,n)
    vel = np.zeros_like(pos)
    pbest_pos = pos.copy()
    pbest_fit = np.full(n_agents,-np.inf)
    gbest_pos = np.zeros(n)
    gbest_fit = -np.inf
    hist=[]; start=time.time()
    for i in range(n_agents):
        m = _binarize(pos[i])
        f = fitness(m, X1,y1,X2,y2,alpha)
        pbest_fit[i]=f; pbest_pos[i]=pos[i].copy()
        if f>gbest_fit: gbest_fit, gbest_pos = f, pos[i].copy()
    hist.append(gbest_fit)
    for it in range(max_iter):
        w_it = w*(1 - it/max_iter)
        for i in range(n_agents):
            r1,r2 = np.random.rand(n), np.random.rand(n)
            vel[i] = (w_it*vel[i]
                      + c1*r1*(pbest_pos[i]-pos[i])
                      + c2*r2*(gbest_pos-pos[i]))
            pos[i] = np.clip(pos[i]+vel[i],0,1)
            m = _binarize(pos[i])
            f = fitness(m,X1,y1,X2,y2,alpha)
            if f>pbest_fit[i]:
                pbest_fit[i], pbest_pos[i] = f, pos[i].copy()
            if f>gbest_fit:
                gbest_fit, gbest_pos = f, pos[i].copy()
        hist.append(gbest_fit)
    return _binarize(gbest_pos), hist, time.time()-start

def abc_fs(X1,y1,X2,y2, n_agents=30, max_iter=50, limit=5, alpha=0.05):
    n = X1.shape[1]
    n_employed = n_agents//2; n_onlook = n_agents-n_employed
    foods = np.random.rand(n_employed,n)
    fits = np.full(n_employed,-np.inf)
    trials = np.zeros(n_employed,int)
    best_fit=-np.inf; best_mask=None; hist=[]; start=time.time()
    # init
    for i in range(n_employed):
        m=_binarize(foods[i])
        fits[i]=fitness(m,X1,y1,X2,y2,alpha)
        if fits[i]>best_fit:
            best_fit,best_mask=fits[i],m.copy()
    hist.append(best_fit)
    for it in range(max_iter):
        # employed
        for i in range(n_employed):
            j = np.random.choice([x for x in range(n_employed) if x!=i])
            phi = np.random.uniform(-1,1,n)
            cand= np.clip(foods[i]+phi*(foods[i]-foods[j]),0,1)
            m=_binarize(cand)
            f=fitness(m,X1,y1,X2,y2,alpha)
            if f>fits[i]:
                foods[i], fits[i], trials[i] = cand, f, 0
            else:
                trials[i]+=1
        # onlooker
        pf = np.maximum(0,fits)
        if pf.sum()>0:
            probs = pf/pf.sum()
            for _ in range(n_onlook):
                i = np.random.choice(n_employed,p=probs)
                j = np.random.choice([x for x in range(n_employed) if x!=i])
                phi = np.random.uniform(-1,1,n)
                cand= np.clip(foods[i]+phi*(foods[i]-foods[j]),0,1)
                m,_= cand, _  # Continue similarly...
        # scout
        for i in range(n_employed):
            if trials[i]>limit:
                foods[i]=np.random.rand(n)
                trials[i]=0
                m=_binarize(foods[i])
                fits[i]=fitness(m,X1,y1,X2,y2,alpha)
        # update best
        idx=np.argmax(fits)
        if fits[idx]>best_fit:
            best_fit,best_mask=fits[idx],_binarize(foods[idx]).copy()
        hist.append(best_fit)
    return best_mask, hist, time.time()-start

def mwpa_fs(X1,y1,X2,y2, n_agents=30, max_iter=50, alpha=0.05):
    n = X1.shape[1]
    wolves = np.random.rand(n_agents,n)
    fits = np.full(n_agents,-np.inf)
    best_fit=-np.inf; best_mask=None; hist=[]; start=time.time()
    # init
    for i in range(n_agents):
        m=_binarize(wolves[i])
        fits[i]=fitness(m,X1,y1,X2,y2,alpha)
        if fits[i]>best_fit:
            best_fit,best_mask=fits[i],m.copy()
    hist.append(best_fit)
    for it in range(max_iter):
        alpha_fit=best_fit; alpha_pos=wolves[np.argmax(fits)]
        for i in range(n_agents):
            r1,r2 = np.random.rand(), np.random.rand()
            A=2*(1-it/max_iter)*r1
            D=np.abs(2*r2*alpha_pos-wolves[i])
            wolves[i]=np.clip(alpha_pos - A*(D**1.5),0,1)
            m=_binarize(wolves[i])
            f=fitness(m,X1,y1,X2,y2,alpha)
            fits[i]=f
            if f>best_fit:
                best_fit,best_mask=f,m.copy()
        hist.append(best_fit)
    return best_mask, hist, time.time()-start

## 5. Hybrid Swarm Intelligence for Feature Selection

Combine ACO, PSO, ABC, MWPA in one loop (simplified integration).

In [None]:
class HybridSwarmFeatureSelector:
    def __init__(self, n_features, n_agents=30, max_iter=50,
                 w=0.8, c1=1.5, c2=1.5, evap=0.1, dep=0.1, limit=5):
        self.n, self.na, self.mi = n_features, n_agents, max_iter
        self.w, self.c1, self.c2, self.evap, self.dep, self.limit = w,c1,c2,evap,dep,limit
        # initialize agents/vel/pher
        self.X = np.random.rand(n_agents,n_features)
        self.V = np.zeros_like(self.X)
        self.pbest = self.X.copy()
        self.pfit  = np.full(n_agents,-np.inf)
        self.gbest = np.zeros(n_features)
        self.gfit  = -np.inf
        self.phero = np.ones(n_features)*0.1

    def run(self, X1,y1,X2,y2):
        hist=[]; start=time.time()
        # init
        for i in range(self.na):
            m=_binarize(self.X[i])
            f=fitness(m,X1,y1,X2,y2)
            self.pfit[i]=f
            if f>self.gfit: self.gfit, self.gbest = f, self.X[i].copy()
        hist.append(self.gfit)
        # loop
        for it in range(self.mi):
            for i in range(self.na):
                # PSO
                r1,r2=np.random.rand(self.n),np.random.rand(self.n)
                vel = ( self.w*self.V[i]
                      + self.c1*r1*(self.pbest[i]-self.X[i])
                      + self.c2*r2*(self.gbest -self.X[i]) )
                # ACO drift
                aco = (self.phero/self.phero.sum()-1/self.n)*0.1
                # combine
                vnew = vel + aco
                xnew = np.clip(self.X[i]+vnew,0,1)
                m=_binarize(xnew)
                f=fitness(m,X1,y1,X2,y2)
                # update
                if f>self.pfit[i]:
                    self.pfit[i],self.pbest[i],self.X[i],self.V[i] = f,xnew.copy(),xnew,vnew
                else:
                    self.X[i],self.V[i] = xnew,vnew
                if f>self.gfit:
                    self.gfit,self.gbest = f,xnew.copy()
            # pher update
            self.phero *= (1-self.evap)
            self.phero[_binarize(self.gbest)==1] += self.dep
            hist.append(self.gfit)
        return _binarize(self.gbest), hist, time.time()-start

## 6. Run Feature Selection Experiments

Execute ACO, PSO, ABC, MWPA, and Hybrid selectors.

In [None]:
import matplotlib.pyplot as plt

methods = {
    "ACO": aco_fs,
    "PSO": pso_fs,
    "ABC": abc_fs,
    "MWPA": mwpa_fs,
    "Hybrid": HybridSwarmFeatureSelector(n_features=X_tr.shape[1])
}
fs_results={}
for name,fn in methods.items():
    if name=="Hybrid":
        mask,hist,t = fn.run(X_tr,y_tr,X_val,y_val)
    else:
        mask,hist,t = fn(X_tr,y_tr,X_val,y_val)
    fs_results[name]={'mask':mask,'history':hist,'time':t}
# plot
plt.figure(figsize=(8,5))
for n,r in fs_results.items():
    plt.plot(r['history'], label=n)
plt.xlabel("Iteration"); plt.ylabel("Fitness"); plt.title("FS Convergence"); plt.legend(); plt.grid(); plt.show()

## 7. Model Training

Train Random Forest, SVM, and MLP on features from each selector.

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV

X_comb = np.vstack([X_tr, X_val])
y_comb = np.concatenate([y_tr, y_val])

classifiers = {
    "RF": RandomForestClassifier(random_state=42),
    "SVM": SVC(probability=True, random_state=42),
    "MLP": MLPClassifier(max_iter=500, random_state=42)
}
param_dist = {
    "RF": {'n_estimators':[50,100], 'max_depth':[None,10]},
    "SVM": {'C':[0.1,1,10], 'gamma':['scale','auto']},
    "MLP": {'hidden_layer_sizes':[(50,),(100,)], 'alpha':[0.0001,0.001]}
}

trained = {}
for fs,info in fs_results.items():
    sel = info['mask']
    if sel.sum()==0:
        continue
    X_sel = X_comb[:,sel==1]
    trained[fs]={}
    for name,clf in classifiers.items():
        rs = RandomizedSearchCV(clf, param_dist[name], n_iter=3, cv=3, scoring='accuracy', n_jobs=-1)
        rs.fit(X_sel, y_comb)
        trained[fs][name] = rs.best_estimator_

## 8. Model Evaluation

Evaluate all trained models on the test set.

In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, roc_curve
for fs,models in trained.items():
    sel = fs_results[fs]['mask']
    X_t = X_test_proc[:,sel==1]
    print(f"--- {fs} ---")
    for name,model in models.items():
        y_pred = model.predict(X_t)
        acc = accuracy_score(y_test, y_pred)
        cm = confusion_matrix(y_test, y_pred)
        roc = roc_auc_score(y_test, model.predict_proba(X_t)[:,1])
        print(f"{name}: ACC={acc:.4f}, ROC_AUC={roc:.4f}, CM={cm}")