In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier

In [None]:
# 1 Load dataset

df = pd.read_csv("Dataset/Dataset.csv").drop("FILENAME", axis=1)

# Separate numeric features only
non_numeric = df.select_dtypes(exclude=[np.number]).columns.tolist()
numeric = df.select_dtypes(include=[np.number]).columns.tolist()
numeric = [col for col in numeric if col != "label"]

X = df[numeric]
y = df["label"]

In [None]:
# 2 Train-test split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

In [4]:
# 3 Evaluation Pipeline

def evaluate(model, X_train, X_test, name):
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    probs = model.predict_proba(X_test)[:, 1]

    print(f"\n=== {name} ===")
    print("Accuracy:", accuracy_score(y_test, preds))
    print("Precision:", precision_score(y_test, preds))
    print("Recall:", recall_score(y_test, preds))
    print("F1 Score:", f1_score(y_test, preds))
    print("ROC-AUC:", roc_auc_score(y_test, probs))


evaluate(LogisticRegression(max_iter=10000), X_train, X_test, "Logistic Regression")

evaluate(DecisionTreeClassifier(), X_train, X_test, "Decision Tree")

evaluate(RandomForestClassifier(n_estimators=200), X_train, X_test, "Random Forest")

evaluate(ExtraTreesClassifier(n_estimators=200), X_train, X_test, "Extra Trees")


=== Logistic Regression ===
Accuracy: 0.9999010446853928
Precision: 0.9999011271504845
Recall: 0.9999258435298479
F1 Score: 0.9999134851874282
ROC-AUC: 0.9999994392474024

=== Decision Tree ===
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
ROC-AUC: 1.0

=== Random Forest ===
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
ROC-AUC: 1.0

=== Extra Trees ===
Accuracy: 0.9999717270529693
Precision: 0.9999505647971921
Recall: 1.0
F1 Score: 0.9999752817876211
ROC-AUC: 1.0
