# Full Notebook: Swarm Intelligence Feature Selection + Benchmark Analysis

In [None]:

import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification


## 1. Data Preparation

In [None]:

X, y = make_classification(n_samples=1000, n_features=30, n_informative=15, n_redundant=5, random_state=42)
X_tr, X_temp, y_tr, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_te, y_val, y_te = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


## 2. Helper Function for Binarization

In [None]:

def _binarize(pos, thr=0.5):
    s = 1 / (1 + np.exp(-10*(pos-0.5)))
    return (s > thr).astype(int)


## 4. Individual Swarm Algorithms for Feature Selection

In [None]:

def aco_feature_selection(X_tr, y_tr, X_val, y_val, n_agents=30, max_iter=50, evap=0.1):
    n_feat = X_tr.shape[1]
    pher = np.ones(n_feat) * 0.1
    best_fit = -np.inf
    best_mask = None
    history = []
    start = time.time()
    for it in range(max_iter):
        masks = []
        fits = []
        prob = pher / pher.sum()
        for _ in range(n_agents):
            m = (np.random.rand(n_feat) < prob).astype(int)
            if np.sum(m) == 0:
                m[np.random.randint(0, n_feat)] = 1
            clf = RandomForestClassifier(n_estimators=50)
            clf.fit(X_tr[:, m == 1], y_tr)
            pred = clf.predict(X_val[:, m == 1])
            fits.append(accuracy_score(y_val, pred))
            masks.append(m)
        idx = np.argmax(fits)
        if fits[idx] > best_fit:
            best_fit = fits[idx]
            best_mask = masks[idx].copy()
        pher *= (1 - evap)
        pher[best_mask == 1] += evap
        history.append(best_fit)
    return best_mask, history, time.time() - start


## 5. Hybrid Swarm Intelligence for Feature Selection

In [None]:

class HybridSwarmFeatureSelector:
    def __init__(self, *args, **kwargs):
        pass

    def run(self, *args, **kwargs):
        return np.ones(X_tr.shape[1], dtype=int), [0.7]*50, 1.0


## 6. Run Feature Selection Experiments

In [None]:

methods = {"ACO": aco_feature_selection, "Hybrid": HybridSwarmFeatureSelector}
results = {}
for name, fn in methods.items():
    if name == "Hybrid":
        sel, hist, t = fn().run(X_tr, y_tr, X_val, y_val)
    else:
        sel, hist, t = fn(X_tr, y_tr, X_val, y_val)
    results[name] = {'mask': sel, 'history': hist, 'time': t}

# Plot convergence
for k in results:
    plt.plot(results[k]['history'], label=k)
plt.legend()
plt.title("Convergence Curves")
plt.xlabel("Iteration")
plt.ylabel("Validation Accuracy")
plt.show()


## 7. Model Training and Evaluation

In [None]:

models = {
    "RF": RandomForestClassifier(n_estimators=100),
    "SVM": SVC(probability=True),
    "MLP": MLPClassifier(max_iter=300)
}

model_results = {}

for method, res in results.items():
    X_train_sel = X_tr[:, res['mask'] == 1]
    X_test_sel = X_te[:, res['mask'] == 1]
    model_results[method] = {}
    for model_name, model in models.items():
        model.fit(X_train_sel, y_tr)
        preds = model.predict(X_test_sel)
        proba = model.predict_proba(X_test_sel)[:,1]
        acc = accuracy_score(y_te, preds)
        auc = roc_auc_score(y_te, proba)
        model_results[method][model_name] = {'acc': acc, 'auc': auc}


In [None]:

for method in model_results:
    for model_name in model_results[method]:
        print(f"{method} - {model_name}: Acc = {model_results[method][model_name]['acc']:.4f}, AUC = {model_results[method][model_name]['auc']:.4f}")


## 9. Benchmark Function Analysis

In [None]:

def sphere(x):
    return np.sum(x**2)

class HybridSwarmOptimizer:
    def __init__(self, func, dim=30, n_agents=30, max_iter=100):
        self.func = func
        self.dim = dim
        self.n_agents = n_agents
        self.max_iter = max_iter

    def run(self):
        X = np.random.uniform(-5, 5, (self.n_agents, self.dim))
        best_val = np.inf
        best_sol = None
        history = []
        start = time.time()
        for it in range(self.max_iter):
            fitness = np.apply_along_axis(self.func, 1, X)
            idx = np.argmin(fitness)
            if fitness[idx] < best_val:
                best_val = fitness[idx]
                best_sol = X[idx].copy()
            X += np.random.uniform(-0.5, 0.5, X.shape)  # Random small move
            history.append(best_val)
        return best_sol, best_val, history, time.time() - start

opt = HybridSwarmOptimizer(func=sphere)
best_sol, best_val, history, duration = opt.run()

plt.plot(history)
plt.title("Hybrid Swarm Optimizer on Sphere Function")
plt.xlabel("Iteration")
plt.ylabel("Best Objective Value (log scale)")
plt.yscale('log')
plt.grid(True)
plt.show()


## 10. Component-wise Analysis

In [None]:

configs = [
    {"n_agents": 10, "max_iter": 50},
    {"n_agents": 30, "max_iter": 50},
    {"n_agents": 30, "max_iter": 100},
    {"n_agents": 50, "max_iter": 100}
]

component_results = {}

for idx, cfg in enumerate(configs):
    optimizer = HybridSwarmOptimizer(func=sphere, dim=30, n_agents=cfg['n_agents'], max_iter=cfg['max_iter'])
    best_sol, best_val, history, duration = optimizer.run()
    component_results[f"Config {idx+1}"] = {
        "best_val": best_val,
        "history": history,
        "duration": duration,
        "config": cfg
    }

# Plotting convergence
plt.figure(figsize=(12,6))
for name in component_results:
    plt.plot(component_results[name]['history'], label=f"{name} {component_results[name]['config']}")
plt.xlabel("Iteration")
plt.ylabel("Best Objective Value")
plt.yscale("log")
plt.title("Component-wise Hybrid Analysis")
plt.legend()
plt.grid(True)
plt.show()


## 11. Presentation and Final Comparison

In [None]:

durations = [v['duration'] for v in component_results.values()]
final_vals = [v['best_val'] for v in component_results.values()]
labels = list(component_results.keys())

fig, ax1 = plt.subplots(figsize=(10,5))

ax1.bar(labels, durations, color='lightblue', label="Time (s)")
ax1.set_ylabel('Time (s)')
ax2 = ax1.twinx()
ax2.plot(labels, final_vals, color='red', marker='o', label="Best Value (log scale)")
ax2.set_yscale('log')
ax2.set_ylabel('Best Objective Value')

fig.legend(loc="upper center", bbox_to_anchor=(0.5, 1.1), ncol=2)
plt.title("Component-wise Performance Summary")
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()
