In [23]:
# ============================
# 1. IMPORT LIBRARIES
# ============================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from ucimlrepo import fetch_ucirepo
import warnings
warnings.filterwarnings('ignore')

# ============================
# 2. LOAD & PREP DATA
# ============================
dataset = fetch_ucirepo(id=15)
data = pd.concat([dataset.data.features, dataset.data.targets], axis=1)
data.rename(columns={'Class': 'Target'}, inplace=True)
data['Target'] = data['Target'].map({2: 0, 4: 1})

X = data.drop('Target', axis=1)
y = data['Target']

imputer = SimpleImputer(strategy='mean')
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
y = y[y.notna()]
X_imputed = X_imputed.loc[y.index]

X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.25, random_state=42)

# ============================
# 3. DEFINE EVALUATION FUNCTION
# ============================
def evaluate_model(model, name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    return name, acc, cm

# ============================
# 4. TRAIN & EVALUATE ALL MODELS
# ============================
models = {
    "Logistic Regression": LogisticRegression(),
    "K-Nearest Neighbors (k=5)": KNeighborsClassifier(n_neighbors=5),
    "SVM (Linear Kernel)": SVC(kernel='linear'),
    "SVM (RBF Kernel)": SVC(kernel='rbf'),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=10, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

results = []
for name, model in models.items():
    model_name, accuracy, cm = evaluate_model(model, name)
    results.append({
        'Model': model_name,
        'Accuracy': round(accuracy, 4),
        'Confusion Matrix': cm.tolist()
    })

# ============================
# 5. TABULATE RESULTS
# ============================
results_df = pd.DataFrame(results)
print("\n=== Classification Model Comparison Summary ===")
print(results_df)

# Optional: Save to CSV for Word
# results_df.to_csv("model_comparison_summary.csv", index=False)



=== Classification Model Comparison Summary ===
                       Model  Accuracy     Confusion Matrix
0        Logistic Regression    0.9600  [[117, 1], [6, 51]]
1  K-Nearest Neighbors (k=5)    0.9771  [[116, 2], [2, 55]]
2        SVM (Linear Kernel)    0.9600  [[116, 2], [5, 52]]
3           SVM (RBF Kernel)    0.9657  [[115, 3], [3, 54]]
4                Naive Bayes    0.9600  [[113, 5], [2, 55]]
5              Decision Tree    0.9371  [[115, 3], [8, 49]]
6              Random Forest    0.9543  [[115, 3], [5, 52]]
7                    XGBoost    0.9600  [[115, 3], [4, 53]]
