In [72]:
from scipy.stats import randint, uniform

lr_params = {
    "C": [0.01, 0.1, 1, 10],
    "penalty": ["l2"],
    "solver": ["lbfgs", "liblinear"]
}
lr_grid = GridSearchCV(LogisticRegression(max_iter=1000, random_state=42), lr_params, cv=5, scoring="f1")
lr_grid.fit(X_train, y_train)

dt_params = {
    "max_depth": randint(2, 10),
    "min_samples_split": randint(2, 10),
    "min_samples_leaf": randint(1, 10)
}
dt_rand = RandomizedSearchCV(DecisionTreeClassifier(random_state=42), dt_params, n_iter=20, cv=5, scoring="f1", random_state=42)
dt_rand.fit(X_train, y_train)

rf_params = {
    "n_estimators": randint(50, 300),
    "max_depth": randint(2, 10),
    "min_samples_split": randint(2, 10),
    "min_samples_leaf": randint(1, 10)
}
rf_rand = RandomizedSearchCV(RandomForestClassifier(random_state=42), rf_params, n_iter=20, cv=5, scoring="f1", random_state=42)
rf_rand.fit(X_train, y_train)


svm_params = {
    "C": [0.1, 1, 10],
    "kernel": ["linear", "rbf"],
    "gamma": ["scale", "auto"]
}
svm_grid = GridSearchCV(SVC(probability=True, random_state=42), svm_params, cv=5, scoring="f1")
svm_grid.fit(X_train, y_train)

In [73]:
optimized_results = {}
for name, model_search in {
    "Logistic Regression": lr_grid,
    "Decision Tree": dt_rand,
    "Random Forest": rf_rand,
    "SVM": svm_grid
}.items():
    model = model_search.best_estimator_
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:,1]
    optimized_results[name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1": f1_score(y_test, y_pred),
        "AUC": roc_auc_score(y_test, y_proba),
        "Best Params": model_search.best_params_
    }

optimized_df = pd.DataFrame(optimized_results).T
print(optimized_df)


                     Accuracy Precision    Recall        F1       AUC  \
Logistic Regression  0.831522  0.831776  0.872549  0.851675  0.913797   
Decision Tree        0.798913  0.798165  0.852941  0.824645  0.866332   
Random Forest        0.853261  0.844037  0.901961  0.872038  0.919417   
SVM                  0.853261  0.826087  0.931373  0.875576  0.921927   

                                                           Best Params  
Logistic Regression    {'C': 0.01, 'penalty': 'l2', 'solver': 'lbfgs'}  
Decision Tree        {'max_depth': 4, 'min_samples_leaf': 8, 'min_s...  
Random Forest        {'max_depth': 8, 'min_samples_leaf': 3, 'min_s...  
SVM                     {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}  


In [74]:
comparison_df = pd.concat([results_df, optimized_df], axis=1, keys=["Baseline","Optimized"])
print("\nComparison of Baseline vs Optimized:\n", comparison_df)


Comparison of Baseline vs Optimized:
                      Baseline                                          \
                     Accuracy Precision    Recall  F1-score       AUC   
Logistic Regression  0.836957  0.827273  0.892157  0.858491  0.921330   
Decision Tree        0.793478  0.775862  0.882353  0.825688  0.782640   
Random Forest        0.836957  0.827273  0.892157  0.858491  0.923302   
SVM                  0.853261  0.826087  0.931373  0.875576  0.922406   

                    Optimized                                          \
                     Accuracy Precision    Recall        F1       AUC   
Logistic Regression  0.831522  0.831776  0.872549  0.851675  0.913797   
Decision Tree        0.798913  0.798165  0.852941  0.824645  0.866332   
Random Forest        0.853261  0.844037  0.901961  0.872038  0.919417   
SVM                  0.853261  0.826087  0.931373  0.875576  0.921927   

                                                                        
          

In [75]:
best_svm = optimized_models["SVM"]

print("\nBest Model Selected: SVM")
print(best_svm)

# --- Predict on test set ---
y_pred_svm = best_svm.predict(X_test)

# --- Evaluation ---
print("\nClassification Report (SVM):")
print(classification_report(y_test, y_pred_svm))


Best Model Selected: SVM
SVC(C=1, kernel='linear', probability=True, random_state=42)

Classification Report (SVM):
              precision    recall  f1-score   support

           0       0.90      0.76      0.82        82
           1       0.83      0.93      0.88       102

    accuracy                           0.85       184
   macro avg       0.86      0.84      0.85       184
weighted avg       0.86      0.85      0.85       184



In [84]:
# Build sample row
sample_dict = {
    'age': 63,
    'trestbps': 145,
    'chol': 273,
    'thalach': 130,
    'oldpeak': 2.3,
    'ca': 0,
    'sex_1.0': 1,
    'cp_2.0': 0, 'cp_3.0': 0, 'cp_4.0': 0,
    'fbs_1.0': 1,
    'restecg_1.0': 0, 'restecg_2.0': 0,
    'exang_1.0': 0,
    'slope_2.0': 1, 'slope_3.0': 0,
    'thal_3.0': 0, 'thal_6.0': 0, 'thal_7.0': 1
}

# Ensure DataFrame matches training columns, fill missing with 0
sample_input = pd.DataFrame([sample_dict], columns=columns_).fillna(0)

# Prediction
y_pred = optimized_models["SVM"].predict(sample_input)[0]
y_prob = optimized_models["SVM"].predict_proba(sample_input)[0][1]

print("SVM Prediction:")
print(f"  Predicted: {y_pred}, True: {true_label}")
print(f"  Probability of Heart Disease: {y_prob:.4f}")




SVM Prediction:
  Predicted: 1, True: 1
  Probability of Heart Disease: 1.0000
