In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier, StackingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc

# 1. Load Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data"
df = pd.read_csv(url)
X = df.drop(['status', 'name'], axis=1)
y = df['status']

# 2. Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 4. Define Basic Models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(probability=True),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    'AdaBoost': AdaBoostClassifier(),
    'Bagging': BaggingClassifier()
}

# 5. Train & Evaluate Basic Models
results = []
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    results.append([name, acc, prec, rec, f1])

# 6. Hyperparameter Tuning
param_grid_lr = {'C': [0.01, 0.1, 1, 10], 'penalty': ['l2']}
grid_lr = GridSearchCV(LogisticRegression(), param_grid_lr, cv=5)
grid_lr.fit(X_train, y_train)

param_grid_rf = {'n_estimators': [50, 100, 150], 'max_depth': [4, 6, 8, None]}
grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=5)
grid_rf.fit(X_train, y_train)

param_grid_svc = {'C': [0.1, 1, 10], 'gamma': ['scale', 'auto'], 'kernel': ['rbf', 'linear']}
grid_svc = GridSearchCV(SVC(probability=True), param_grid_svc, cv=5)
grid_svc.fit(X_train, y_train)

# 7. Evaluate Tuned Models
y_pred_lr = grid_lr.predict(X_test)
results.append(["LightGBM", accuracy_score(y_test, y_pred_lr), precision_score(y_test, y_pred_lr), recall_score(y_test, y_pred_lr), f1_score(y_test, y_pred_lr)])

y_pred_rf = grid_rf.predict(X_test)
results.append(["Random Forest (Tuned)", accuracy_score(y_test, y_pred_rf), precision_score(y_test, y_pred_rf), recall_score(y_test, y_pred_rf), f1_score(y_test, y_pred_rf)])

y_pred_svc = grid_svc.predict(X_test)
results.append(["SVM (Tuned)", accuracy_score(y_test, y_pred_svc), precision_score(y_test, y_pred_svc), recall_score(y_test, y_pred_svc), f1_score(y_test, y_pred_svc)])

# 8. Stacking Classifier
estimators = [
    ('rf', RandomForestClassifier(n_estimators=100)),
    ('svc', SVC(probability=True))
]
stack = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
stack.fit(X_train, y_train)
y_pred_stack = stack.predict(X_test)
results.append(["Stacking Classifier", accuracy_score(y_test, y_pred_stack), precision_score(y_test, y_pred_stack), recall_score(y_test, y_pred_stack), f1_score(y_test, y_pred_stack)])

# 9. Results DataFrame
results_df = pd.DataFrame(results, columns=["Model", "Accuracy", "Precision", "Recall", "F1 Score"])
results_df.to_csv('model_results.csv', index=False)

print("Training and evaluation completed!\n")
print(results_df)

# 10. Plot Confusion Matrix

def plot_confusion(model_name, y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(5, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title(f"Confusion Matrix - {model_name}")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

# Plot for basic models
for name, model in models.items():
    y_pred = model.predict(X_test)
    plot_confusion(name, y_test, y_pred)

# Plot for tuned models
plot_confusion("Logistic Regression (Tuned)", y_test, y_pred_lr)
plot_confusion("Random Forest (Tuned)", y_test, y_pred_rf)
plot_confusion("SVM (Tuned)", y_test, y_pred_svc)
plot_confusion("Stacking Classifier", y_test, y_pred_stack)

# 11. Plot ROC Curves
def plot_roc(model_name, model, X_test, y_test):
    if hasattr(model, "predict_proba"):
        y_score = model.predict_proba(X_test)[:, 1]
    else:
        y_score = model.decision_function(X_test)

    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)

    plt.plot(fpr, tpr, lw=2, label=f'{model_name} (AUC = {roc_auc:.2f})')

plt.figure(figsize=(10, 8))
for name, model in models.items():
    plot_roc(name, model, X_test, y_test)

plot_roc("Logistic Regression (Tuned)", grid_lr.best_estimator_, X_test, y_test)
plot_roc("Random Forest (Tuned)", grid_rf.best_estimator_, X_test, y_test)
plot_roc("SVM (Tuned)", grid_svc.best_estimator_, X_test, y_test)
plot_roc("Stacking Classifier", stack, X_test, y_test)

plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.title('ROC Curve Comparison')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()


plt.figure(figsize=(10,6))
sns.barplot(x="Accuracy", y="Model", data=results_df.sort_values(by="Accuracy", ascending=False), palette="viridis")
plt.title("Model Accuracy Comparison")
plt.xlabel("Accuracy")
plt.ylabel("Model")
plt.xlim(0.7, 1.0)
plt.grid(True)
plt.show()
