In [None]:
!pip install pandas numpy scikit-learn xgboost

In [None]:
# Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier


In [None]:
# Load Dataset
file_path = "custom_industrial_dataset_127features.csv"  # Change path if needed
df = pd.read_csv(file_path)

# Features and Target
X = df.drop(columns=["Class", "SNR_Level"])
y = df["Class"]

# Encode Class Labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42
)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Define Models
models = {
    "SVM (RBF)": SVC(kernel="rbf", probability=True, random_state=42),
    "KNN (k=7)": KNeighborsClassifier(n_neighbors=7),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42),
    "Neural Network": MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42),
}

# Add Soft Voting Ensemble
ensemble = VotingClassifier(
    estimators=[
        ("rf", models["Random Forest"]),
        ("xgb", models["XGBoost"]),
        ("svm", models["SVM (RBF)"])
    ],
    voting="soft"
)
models["Ensemble"] = ensemble


In [None]:
# Evaluate Models
results = []
for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    results.append({
        "Model": name,
        "Accuracy": round(accuracy_score(y_test, y_pred), 4),
        "Precision": round(precision_score(y_test, y_pred, average='weighted'), 4),
        "Recall": round(recall_score(y_test, y_pred, average='weighted'), 4),
        "F1-Score": round(f1_score(y_test, y_pred, average='weighted'), 4)
    })


In [None]:
# Display Results
results_df = pd.DataFrame(results).sort_values(by="F1-Score", ascending=False)
print("\nModel Performance Comparison:\n")
print(results_df.to_string(index=False))