In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, precision_score, recall_score, roc_curve, ConfusionMatrixDisplay
import joblib

In [4]:
df = pd.read_csv("heart_disease_cleaned.csv")
df

Unnamed: 0,age,trestbps,chol,thalach,oldpeak,ca,sex_Female,sex_Male,cp_Asymptomatic,cp_AtypicalAngina,...,restecg_STTAbnormality,exang_NoExAngina,exang_YesExAngina,slope_Downsloping,slope_Flat,slope_Upsloping,thal_FixedDefect,thal_Normal,thal_ReversibleDefect,heartdiseasepresence
0,63,145,233,150,2.3,0.0,0,1,0,0,...,0,1,0,1,0,0,1,0,0,0
1,67,160,286,108,1.5,3.0,0,1,1,0,...,0,0,1,0,1,0,0,1,0,1
2,67,120,229,129,2.6,2.0,0,1,1,0,...,0,0,1,0,1,0,0,0,1,1
3,37,130,250,187,3.5,0.0,0,1,0,0,...,0,1,0,1,0,0,0,1,0,0
4,41,130,204,172,1.4,0.0,1,0,0,1,...,0,1,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,57,140,241,123,0.2,0.0,1,0,1,0,...,0,0,1,0,1,0,0,0,1,1
268,45,110,264,132,1.2,0.0,0,1,0,0,...,0,1,0,0,1,0,0,0,1,1
269,68,144,193,141,3.4,2.0,0,1,1,0,...,0,1,0,0,1,0,0,0,1,1
270,57,130,131,115,1.2,1.0,0,1,1,0,...,0,0,1,0,1,0,0,0,1,1


In [5]:
X = df[df.columns[:-1]]
y = df["heartdiseasepresence"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
param_grid = {
    "C": [0.1, 1, 10],
    "gamma": ["scale", 0.01, 0.001],
    "kernel": ["rbf", "linear"]
}

svc_base = SVC(probability=True, random_state=42)

# Grid search with stratified 5-fold CV
grid = GridSearchCV(svc_base, param_grid, cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
                    scoring='roc_auc', n_jobs=-1)
grid.fit(X_train, y_train)

print("Best parameters:", grid.best_params_)

# Use best estimator to predict and evaluate on test set
best_svc = grid.best_estimator_
y_pred = best_svc.predict(X_test)
y_proba = best_svc.predict_proba(X_test)[:, 1]

print("Best Accuracy:", accuracy_score(y_test, y_pred))
print("Best ROC AUC:", roc_auc_score(y_test, y_proba))
print("Best Precision:", precision_score(y_test, y_pred))
print("Best Recall:", recall_score(y_test, y_pred))

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(best_svc, X, y, cv=skf, scoring='accuracy')
print("Best Cross-validated accuracy (mean):", cv_scores.mean())


Best parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
Best ROC AUC (CV): 0.9279417552144824
Best Accuracy: 0.8181818181818182
Best ROC AUC: 0.917989417989418
Best Precision: 0.8148148148148148
Best Recall: 0.8148148148148148
Best Cross-validated accuracy (mean): 0.81993265993266


In [None]:
best_svc = joblib.load("support_vector_machine_model.joblib")

# Predict on new data (example: X_test)
y_pred_loaded = best_svc.predict(X_test)
y_proba_loaded = best_svc.predict_proba(X_test)[:, 1]

# Evaluate loaded model
print("Loaded model accuracy:", accuracy_score(y_test, y_pred_loaded))
print("Loaded model ROC AUC:", roc_auc_score(y_test, y_proba_loaded))
print("Best Precision:", precision_score(y_test, y_pred_loaded))
print("Best Recall:", recall_score(y_test, y_pred_loaded))

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(best_svc, X, y, cv=skf, scoring='accuracy')
print("Best Cross-validated accuracy (mean):", cv_scores.mean())


In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_proba)

plt.figure(figsize=(8,6))
plt.plot(fpr, tpr, label='Support Vector Machine (AUC = {:.2f})'.format(roc_auc_score(y_test, y_proba)))
plt.plot([0, 1], [0, 1], 'k--', label='Random Guess')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()

In [None]:
ConfusionMatrixDisplay.from_estimator(best_svc, X_test, y_test, cmap='Blues')
plt.title('Confusion Matrix')
plt.show()

In [None]:
joblib.dump(best_svc, "support_vector_machine_model.joblib")
print("Model saved as support_vector_machine_model.joblib")