In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.ensemble import VotingClassifier,RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder ,MinMaxScaler,StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.linear_model import LogisticRegression,LogisticRegressionCV
from sklearn.model_selection import train_test_split, cross_val_score,GridSearchCV
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,f1_score,recall_score,precision_score
import seaborn as sns
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.svm import l1_min_c
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings('ignore')
from sklearn.impute import SimpleImputer

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/mujahidashraf/data/refs/heads/main/heart_disease_data.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,48,1,0,130,256,1,0,150,1,0.0,2,2,3,0
1,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
2,44,0,2,118,242,0,1,149,0,0.3,1,1,2,1
3,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
4,56,1,3,120,193,0,0,162,0,1.9,1,0,3,1


In [None]:
X=df.drop(columns=['target'])
y=df['target']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 10)

In [None]:
pipeline_gnb = Pipeline([
    ('scaler', StandardScaler()),
    ('gnb', GaussianNB())
])

pipeline_knn = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier(n_neighbors=5))
])

pipeline_logreg = Pipeline([
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression(max_iter=500))
])

pipeline_tree = Pipeline([
    ('scaler', StandardScaler()),
    ('tree', DecisionTreeClassifier(
        random_state=42))
])

pipeline_svm = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC( probability=True))  # Enable probability for soft voting
])

pipeline_rf = Pipeline([
    ('scaler', StandardScaler()),  # Not necessary for RandomForest but included for consistency
    ('rf', RandomForestClassifier(random_state=42))
])

# Voting Classifier with all pipelines
voting_clf = VotingClassifier(estimators=[
    #('gnb', pipeline_gnb),
    #('knn', pipeline_knn),
    #('logreg', pipeline_logreg),
    ('tree', pipeline_tree),
    ('svm', pipeline_svm),
    ('rf', pipeline_rf)
])

# Train the ensemble
voting_clf.fit(X_train, y_train)

In [None]:
for name, clf in voting_clf.named_estimators_.items():
  print(name, "=", clf.score(X_test, y_test))

tree = 0.9230769230769231
svm = 0.9010989010989011
rf = 0.9560439560439561


In [None]:
voting_clf.score(X_test, y_test)

0.967032967032967

In [None]:
y_pred = voting_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Voting Classifier Accuracy: {accuracy:.4f}")
print(classification_report(y_test, y_pred))

Voting Classifier Accuracy: 0.9670
              precision    recall  f1-score   support

           0       0.98      0.96      0.97        91
           1       0.96      0.98      0.97        91

    accuracy                           0.97       182
   macro avg       0.97      0.97      0.97       182
weighted avg       0.97      0.97      0.97       182



In [None]:
voting_clf.voting = "soft"
voting_clf.named_estimators["svm"].probability = True
voting_clf.fit(X_train, y_train)
voting_clf.score(X_test, y_test)

0.9340659340659341

In [None]:
y_pred = voting_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Voting Classifier Accuracy: {accuracy:.4f}")
print(classification_report(y_test, y_pred))

Voting Classifier Accuracy: 0.9341
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        91
           1       0.93      0.93      0.93        91

    accuracy                           0.93       182
   macro avg       0.93      0.93      0.93       182
weighted avg       0.93      0.93      0.93       182

