# Heart Disease Classification - Final Comparison
All models are evaluated with default, GridSearchCV, and RandomizedSearchCV.

In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt
from scipy.stats import randint, loguniform

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC

df = pd.read_csv('/content/heart.csv', na_values='?', header=None, names=[
    "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg",
    "thalach", "exang", "oldpeak", "slope", "ca", "thal", "target"
])
df = df.apply(pd.to_numeric, errors='coerce')
df.fillna(df.mean(numeric_only=True), inplace=True)
df['target'] = df['target'].apply(lambda x: 1 if x > 0 else 0)

X = df.drop('target', axis=1)
y = df['target']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
results = []


## Logistic Regression

In [2]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === Logistic Regression - Default ===

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'Logistic Regression',
    'Variant': 'Default',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


In [3]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === Logistic Regression - GridSearchCV ===

param_grid = {
    'C': [0.01, 0.1, 1, 10],
    'solver': ['liblinear', 'lbfgs'],
    'penalty': ['l2']
}
grid = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, scoring='f1', cv=5)
grid.fit(X_train, y_train)
model = grid.best_estimator_

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'Logistic Regression',
    'Variant': 'GridSearchCV',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


In [4]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === Logistic Regression - RandomizedSearchCV ===

param_dist = {
    'C': loguniform(1e-3, 1e2),
    'solver': ['liblinear', 'lbfgs'],
    'penalty': ['l2']
}
random_search = RandomizedSearchCV(LogisticRegression(max_iter=1000), param_dist, scoring='f1', cv=5, n_iter=20, random_state=42)
random_search.fit(X_train, y_train)
model = random_search.best_estimator_

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'Logistic Regression',
    'Variant': 'RandomizedSearchCV',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


## Random Forest

In [5]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === Random Forest - Default ===

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'Random Forest',
    'Variant': 'Default',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


In [6]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === Random Forest - GridSearchCV ===

param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}
grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, scoring='f1', cv=5)
grid.fit(X_train, y_train)
model = grid.best_estimator_

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'Random Forest',
    'Variant': 'GridSearchCV',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


In [7]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === Random Forest - RandomizedSearchCV ===

param_dist = {
    'n_estimators': randint(50, 200),
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': randint(2, 10),
    'min_samples_leaf': randint(1, 5),
    'bootstrap': [True, False]
}
random_search = RandomizedSearchCV(RandomForestClassifier(random_state=42), param_dist, scoring='f1', cv=5, n_iter=30, random_state=42)
random_search.fit(X_train, y_train)
model = random_search.best_estimator_

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'Random Forest',
    'Variant': 'RandomizedSearchCV',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


## XGBoost

In [8]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === XGBoost - Default ===

model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'XGBoost',
    'Variant': 'Default',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


Parameters: { "use_label_encoder" } are not used.



In [9]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === XGBoost - GridSearchCV ===

param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0]
}
grid = GridSearchCV(XGBClassifier(use_label_encoder=False, eval_metric='logloss'), param_grid, scoring='f1', cv=5)
grid.fit(X_train, y_train)
model = grid.best_estimator_

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'XGBoost',
    'Variant': 'GridSearchCV',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

In [10]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === XGBoost - RandomizedSearchCV ===

param_dist = {
    'n_estimators': randint(50, 150),
    'max_depth': randint(3, 10),
    'learning_rate': loguniform(0.01, 0.3),
    'subsample': [0.8, 1.0]
}
random_search = RandomizedSearchCV(XGBClassifier(use_label_encoder=False, eval_metric='logloss'), param_dist, scoring='f1', cv=5, n_iter=30, random_state=42)
random_search.fit(X_train, y_train)
model = random_search.best_estimator_

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'XGBoost',
    'Variant': 'RandomizedSearchCV',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

## SVM

In [11]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === SVM - Default ===

model = SVC(probability=True, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'SVM',
    'Variant': 'Default',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


In [12]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === SVM - GridSearchCV ===

param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}
grid = GridSearchCV(SVC(probability=True), param_grid, scoring='f1', cv=5)
grid.fit(X_train, y_train)
model = grid.best_estimator_

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'SVM',
    'Variant': 'GridSearchCV',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


In [13]:

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# === SVM - RandomizedSearchCV ===

param_dist = {
    'C': loguniform(1e-3, 1e2),
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto']
}
random_search = RandomizedSearchCV(SVC(probability=True), param_dist, scoring='f1', cv=5, n_iter=30, random_state=42)
random_search.fit(X_train, y_train)
model = random_search.best_estimator_

y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_probs)

results.append({
    'Model': 'SVM',
    'Variant': 'RandomizedSearchCV',
    'Confusion Matrix': cm,
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
})


In [17]:

summary_df = pd.DataFrame(results)
summary_df = summary_df.sort_values(by='Accuracy', ascending=False)
summary_df.reset_index(drop=True, inplace=True)
summary_df


Unnamed: 0,Model,Variant,Confusion Matrix,Accuracy,Precision,Recall,F1 Score,AUC
0,SVM,Default,"[[27, 2], [4, 28]]",0.901639,0.933333,0.875,0.903226,0.940733
1,SVM,GridSearchCV,"[[26, 3], [3, 29]]",0.901639,0.90625,0.90625,0.90625,0.934267
2,SVM,RandomizedSearchCV,"[[26, 3], [3, 29]]",0.901639,0.90625,0.90625,0.90625,0.934267
3,Random Forest,RandomizedSearchCV,"[[27, 2], [4, 28]]",0.901639,0.933333,0.875,0.903226,0.953664
4,Random Forest,Default,"[[26, 3], [4, 28]]",0.885246,0.903226,0.875,0.888889,0.940733
5,Logistic Regression,RandomizedSearchCV,"[[25, 4], [3, 29]]",0.885246,0.878788,0.90625,0.892308,0.921336
6,Logistic Regression,GridSearchCV,"[[26, 3], [4, 28]]",0.885246,0.903226,0.875,0.888889,0.940733
7,Logistic Regression,Default,"[[25, 4], [3, 29]]",0.885246,0.878788,0.90625,0.892308,0.919181
8,XGBoost,Default,"[[26, 3], [5, 27]]",0.868852,0.9,0.84375,0.870968,0.920259
9,XGBoost,RandomizedSearchCV,"[[25, 4], [4, 28]]",0.868852,0.875,0.875,0.875,0.927802
