In [1]:
import os, shutil, glob
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

In [8]:
df = pd.read_csv(os.path.join(os.getcwd(),"Dataset For Model",'Heart Disease Data.csv'), header = 0)
df['Sex'] = df['Sex'].map({'Female':0, 'Male':1 })
df['Chest Pain Type'] = df['Chest Pain Type'].map({'No Pain':0, 'Typical Angina':1, 'Atypical Angina':2, 'Non-Anginal Pain':3, 'Asymptomatic':3})
df['Fasting Blood Sugar'] = df['Fasting Blood Sugar'].map({'< 120 mg/dl':0, '> 120 mg/dl':1})
df['Resting Electrocardiographic Results'] = df['Resting Electrocardiographic Results'].map({'Normal':1, 'ST-T Change':2, 'Left Ventricular Hypertrophy':3})
df['Exercise Induced Angina'] = df['Exercise Induced Angina'].map({'No Angina':0, 'Angina':1})
df['The slope of the peak exercise ST segment'] = df['The slope of the peak exercise ST segment'].map({'Down Slope':0 ,'Flat Slope':1, 'Up Slope':2})
df['Thallium Test Result'] = df['Thallium Test Result'].map({'Normal':3, 'Reversible Defect':2, 'Fixed Defect':1, 'Unknown':0})
df['Thallium Test Result'] = df['Thallium Test Result'].fillna(df['Thallium Test Result'].mean())

In [10]:
X = df.drop(columns=['Result']).values
Y = df['Result'].values
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

base_models = [
    ('svm', SVC(kernel='rbf', probability=True, random_state=42)),
    ('decision_tree', DecisionTreeClassifier(criterion='gini', max_depth=10, random_state=42)),
    ('naive_bayes', GaussianNB()),
    ('gradient_boost', GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)),
    ('adaboost', AdaBoostClassifier(n_estimators=50, learning_rate=1.0, algorithm='SAMME',random_state=42))
]

meta_model = LogisticRegression()
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)
stacking_model.fit(X_train, Y_train)

Y_pred_train = stacking_model.predict(X_train)
Y_pred_test = stacking_model.predict(X_test)
cm_train = confusion_matrix(Y_train, Y_pred_train)
cm_test = confusion_matrix(Y_test, Y_pred_test)
accuracy_train = accuracy_score(Y_train, Y_pred_train)
accuracy_test = accuracy_score(Y_test, Y_pred_test)

print('Accuracy for training set for Stacking = {:.2f}'.format(accuracy_train))
print('Accuracy for test set for Stacking = {:.2f}'.format(accuracy_test))


Accuracy for training set for Stacking = 1.00
Accuracy for test set for Stacking = 0.99
