In [None]:
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier

In [None]:
# 1 Load dataset

df = pd.read_csv("Dataset/dataset-uci.csv")
X = df.drop(columns=["Gallstone Status"])
y = df["Gallstone Status"]

In [None]:
# 2 Define 10-fold Cross Validation

cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

def cv_model(name, model, scaled=False):
    if scaled:
        pipe = Pipeline([("scaler", StandardScaler()), ("model", model)])
        acc = cross_val_score(pipe, X, y, cv=cv, scoring="accuracy")
        auc = cross_val_score(pipe, X, y, cv=cv, scoring="roc_auc")
    else:
        acc = cross_val_score(model, X, y, cv=cv, scoring="accuracy")
        auc = cross_val_score(model, X, y, cv=cv, scoring="roc_auc")

    return {
        "Model": name,
        "Accuracy Mean": acc.mean(),
        "Accuracy Std": acc.std(),
        "AUC Mean": auc.mean(),
        "AUC Std": auc.std()
    }

In [None]:
# 3 Six Models to Compare Based on 10-fold Cross Validation

results = []

results.append(cv_model("Logistic Regression", LogisticRegression(max_iter=300), scaled=True))
results.append(cv_model("SVM (Linear)", SVC(kernel="linear", probability=True), scaled=True))
results.append(cv_model("MLP Neural Network", MLPClassifier(hidden_layer_sizes=(32,16), max_iter=600), scaled=True))

results.append(cv_model("Random Forest", RandomForestClassifier(n_estimators=300)))
results.append(cv_model("Gradient Boosting", GradientBoostingClassifier()))
results.append(cv_model("XGBoost", XGBClassifier(
    n_estimators=300, learning_rate=0.05,
    max_depth=4, subsample=0.9, colsample_bytree=0.9,
    eval_metric="logloss"
)))

In [None]:
# 4 Models Ranked by Accuracy

results_df = pd.DataFrame(results)
results_df.sort_values("Accuracy Mean", ascending=False)