In [3]:
# 📚 Import libraries
import numpy as np
import pandas as pd

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [4]:
df = pd.read_csv('../data/cleveland_selected_features.csv')

# Separate features and target
X = df.drop('target', axis=1)
y = df['target']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
# Logistic Regression (GridSearchCV)

lr = LogisticRegression(max_iter=1000)

lr_params = {
    'C': [0.01, 0.1, 1, 10, 100],
    'solver': ['liblinear', 'lbfgs'],
    'penalty': ['l2']
}

lr_grid = GridSearchCV(lr, lr_params, cv=5, scoring='accuracy')
lr_grid.fit(X_train, y_train)

print("Best Logistic Regression Params:", lr_grid.best_params_)
lr_best = lr_grid.best_estimator_


Best Logistic Regression Params: {'C': 0.01, 'penalty': 'l2', 'solver': 'liblinear'}


In [None]:
#  2. Decision Tree (GridSearchCV)
dt = DecisionTreeClassifier(random_state=42)

dt_params = {
    'max_depth': [3, 5, 10, None],
    'min_samples_split': [2, 5, 10],
    'criterion': ['gini', 'entropy']
}

dt_grid = GridSearchCV(dt, dt_params, cv=5, scoring='accuracy')
dt_grid.fit(X_train, y_train)

print("Decision Tree Best Params:", dt_grid.best_params_)
dt_best = dt_grid.best_estimator_


🔹 Decision Tree Best Params: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_split': 10}


In [11]:
# 3. Random Forest (RandomizedSearchCV)
rf = RandomForestClassifier(random_state=42)

rf_params = {
    'n_estimators': [50, 100, 150],
    'max_depth': [5, 10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

rf_random = RandomizedSearchCV(rf, rf_params, n_iter=20, cv=5, scoring='accuracy', random_state=42)
rf_random.fit(X_train, y_train)

print("🔹 Random Forest Best Params:", rf_random.best_params_)
rf_best = rf_random.best_estimator_


🔹 Random Forest Best Params: {'n_estimators': 50, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': None, 'bootstrap': False}


In [13]:
models = {
    "Logistic Regression": lr_best,
    "Decision Tree": dt_best,
    "Random Forest": rf_best
}

for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"\n{name} Evaluation:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))



Logistic Regression Evaluation:
Accuracy: 0.7630057803468208
Confusion Matrix:
 [[  0  40]
 [  1 132]]
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        40
           1       0.77      0.99      0.87       133

    accuracy                           0.76       173
   macro avg       0.38      0.50      0.43       173
weighted avg       0.59      0.76      0.67       173


Decision Tree Evaluation:
Accuracy: 0.8728323699421965
Confusion Matrix:
 [[ 26  14]
 [  8 125]]
Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.65      0.70        40
           1       0.90      0.94      0.92       133

    accuracy                           0.87       173
   macro avg       0.83      0.79      0.81       173
weighted avg       0.87      0.87      0.87       173


Random Forest Evaluation:
Accuracy: 0.8728323699421965
Confusion Matrix:
 [[ 23  17]
 [  5 128]