In [1]:
from google.colab import drive
import os
import pandas as pd
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# --- Mount Google Drive ---
drive.mount('/content/drive')

project_path = "/content/drive/MyDrive/Heart_Disease_Project"
data_path = os.path.join(project_path, "data")
os.makedirs(data_path, exist_ok=True)

# --- Load selected features dataset ---
df = pd.read_csv(os.path.join(data_path, "selected_features_heart.csv"))
X = df.drop("target", axis=1)
y = df["target"]

Mounted at /content/drive


In [2]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [3]:
models_params = {
    "LogisticRegression": {
        "model": LogisticRegression(max_iter=1000, solver="liblinear"),
        "params": {
            "C": [0.01, 0.1, 1, 10],
            "penalty": ["l1", "l2"]
        }
    },
    "DecisionTree": {
        "model": DecisionTreeClassifier(random_state=42),
        "params": {
            "max_depth": [None, 3, 5, 7],
            "min_samples_split": [2, 5, 10],
            "min_samples_leaf": [1, 2, 4]
        }
    },
    "RandomForest": {
        "model": RandomForestClassifier(random_state=42),
        "params": {
            "n_estimators": [50, 100, 200],
            "max_depth": [None, 3, 5, 7],
            "min_samples_split": [2, 5, 10],
            "min_samples_leaf": [1, 2, 4]
        }
    },
    "SVM": {
        "model": SVC(probability=True, random_state=42),
        "params": {
            "C": [0.1, 1, 10],
            "kernel": ["linear", "rbf", "poly"],
            "gamma": ["scale", "auto"]
        }
    }
}


In [5]:
best_models = {}

for model_name, mp in models_params.items():
    print(f"\n - Tuning {model_name}...")
    clf = GridSearchCV(mp["model"], mp["params"], cv=5, scoring="accuracy", n_jobs=-1)
    clf.fit(X_train, y_train)
    best_models[model_name] = clf.best_estimator_
    print(f"Best params: {clf.best_params_}")
    y_pred = clf.predict(X_test)
    print("Test set accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))


 - Tuning LogisticRegression...
Best params: {'C': 1, 'penalty': 'l1'}
Test set accuracy: 0.6229508196721312
              precision    recall  f1-score   support

           0       0.74      0.97      0.84        33
           1       0.25      0.09      0.13        11
           2       0.20      0.14      0.17         7
           3       0.44      0.57      0.50         7
           4       0.00      0.00      0.00         3

    accuracy                           0.62        61
   macro avg       0.33      0.35      0.33        61
weighted avg       0.52      0.62      0.56        61


 - Tuning DecisionTree...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best params: {'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 5}
Test set accuracy: 0.5737704918032787
              precision    recall  f1-score   support

           0       0.80      0.85      0.82        33
           1       0.31      0.45      0.37        11
           2       0.20      0.14      0.17         7
           3       0.20      0.14      0.17         7
           4       0.00      0.00      0.00         3

    accuracy                           0.57        61
   macro avg       0.30      0.32      0.31        61
weighted avg       0.54      0.57      0.55        61


 - Tuning RandomForest...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best params: {'max_depth': 7, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
Test set accuracy: 0.5901639344262295
              precision    recall  f1-score   support

           0       0.73      0.97      0.83        33
           1       0.38      0.27      0.32        11
           2       0.17      0.14      0.15         7
           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00         3

    accuracy                           0.59        61
   macro avg       0.25      0.28      0.26        61
weighted avg       0.48      0.59      0.52        61


 - Tuning SVM...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best params: {'C': 1, 'gamma': 'scale', 'kernel': 'poly'}
Test set accuracy: 0.5737704918032787
              precision    recall  f1-score   support

           0       0.64      0.97      0.77        33
           1       0.14      0.09      0.11        11
           2       0.67      0.29      0.40         7
           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00         3

    accuracy                           0.57        61
   macro avg       0.29      0.27      0.26        61
weighted avg       0.45      0.57      0.48        61



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [6]:
results = []
for name, model in best_models.items():
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results.append({"Model": name, "Accuracy": acc})

results_df = pd.DataFrame(results).sort_values(by="Accuracy", ascending=False)
display(results_df)

# Save best model info
results_df.to_csv(os.path.join(data_path, "hyperparameter_tuning_results.csv"), index=False)
print("Hyperparameter tuning results saved.")

Unnamed: 0,Model,Accuracy
0,LogisticRegression,0.622951
2,RandomForest,0.590164
1,DecisionTree,0.57377
3,SVM,0.57377


Hyperparameter tuning results saved.
