In [None]:
# Efficient Network/IoT Classification with sklearn

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix, ConfusionMatrixDisplay

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# ---------------------------------------------------
# Load dataset
# ---------------------------------------------------
data = pd.read_csv('label_feature_IOT.csv')

# ---------------------------------------------------
# Separate features (X) and label (y)
# ---------------------------------------------------
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Encode target label if not numeric
if y.dtype == 'object':
    y = LabelEncoder().fit_transform(y)

# ---------------------------------------------------
# Convert non-numeric feature columns to numeric
# ---------------------------------------------------
for col in X.columns:
    if not np.issubdtype(X[col].dtype, np.number):
        X[col] = LabelEncoder().fit_transform(X[col].astype(str))

# ---------------------------------------------------
# Clean problematic columns
# ---------------------------------------------------
X.replace([-1, np.inf, -np.inf], 0, inplace=True)

numeric_cols = X.select_dtypes(include=np.number).columns
X[numeric_cols] = X[numeric_cols].clip(upper=1e6)

X.fillna(0, inplace=True)

# ---------------------------------------------------
# Scale features (numeric only)
# ---------------------------------------------------
scaler = StandardScaler()
X = scaler.fit_transform(X)

# ---------------------------------------------------
# Train-test split with stratification
# ---------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# ---------------------------------------------------
# Models
# ---------------------------------------------------
models = {
    'SVM': SVC(),
    'Logistic Regression': LogisticRegression(max_iter=200),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

# ---------------------------------------------------
# Metrics function
# ---------------------------------------------------
def print_metrics(y_true, y_pred):
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.2f}")
    print(f"F1-score: {f1_score(y_true, y_pred, average='weighted'):.3f}")
    print(f"Recall: {recall_score(y_true, y_pred, average='weighted'):.3f}")
    print(f"Precision: {precision_score(y_true, y_pred, average='weighted'):.3f}")
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print(pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Predicted'], margins=True))
    print('-' * 50)

# ---------------------------------------------------
# Train + Evaluate
# ---------------------------------------------------
results_metrics = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name} Results:")
    print_metrics(y_test, y_pred)

    # Save metrics
    results_metrics.append({
        'Model': name,
        'Accuracy': accuracy_score(y_test, y_pred),
        'F1-score': f1_score(y_test, y_pred, average='weighted'),
        'Recall': recall_score(y_test, y_pred, average='weighted'),
        'Precision': precision_score(y_test, y_pred, average='weighted')
    })

    # Plot confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.unique(y))
    disp.plot(cmap=plt.cm.Blues)
    plt.title(f"Confusion Matrix - {name}")
    plt.show()

# -----------------------------
# Compare all metrics in separate bar charts
# -----------------------------
metrics_df = pd.DataFrame(results_metrics)
sns.set(style="whitegrid", palette="Set2")

metrics = ['Accuracy', 'F1-score', 'Recall', 'Precision']

for metric in metrics:
    plt.figure(figsize=(8,5))
    sns.barplot(x='Model', y=metric, data=metrics_df)
    plt.title(f"{metric} Comparison Across Classifiers")
    plt.ylim(0,1)
    plt.ylabel(metric)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

