# FLARE AFAC Ensemble Training Notebook

This notebook:
- Loads the IoMT dataset
- Trains base classifiers & stacked ensemble
- Visualizes feature importances & ROC curves

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
import joblib

## Load Dataset

In [None]:
df = pd.read_csv("../datasets/CICIoMT2024.csv")
print(df.shape)
df.head()

## Label & Feature Prep

In [None]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Encode if categorical
if y.dtype == 'O':
    le = LabelEncoder()
    y = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Train Base Models

In [None]:
knn = KNeighborsClassifier()
dt = DecisionTreeClassifier()
rf = RandomForestClassifier(n_estimators=100)
svm = SVC(probability=True)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

knn.fit(X_train, y_train)
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
svm.fit(X_train, y_train)
xgb.fit(X_train, y_train)

## Train Ensemble

In [None]:
meta = MLPClassifier(hidden_layer_sizes=(64,32), max_iter=200)

ensemble = StackingClassifier(
    estimators=[
        ('knn', knn),
        ('dt', dt),
        ('rf', rf),
        ('svm', svm),
        ('xgb', xgb)
    ],
    final_estimator=meta,
    passthrough=True
)

ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)
print(classification_report(y_test, y_pred))

## Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

## ROC Curve

In [None]:
y_prob = ensemble.predict_proba(X_test)[:,1]
roc_auc = roc_auc_score(y_test, y_prob)
fpr, tpr, _ = roc_curve(y_test, y_prob)

plt.figure()
plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}")
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend(loc="lower right")
plt.show()

## Save Trained Models

In [None]:
import os

if not os.path.exists("../afac/models"):
    os.makedirs("../afac/models")

joblib.dump(scaler, "../afac/models/scaler.joblib")
joblib.dump(ensemble, "../afac/models/ensemble.joblib")

print("âœ… Models saved!")