In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

#  Data Modeling

**Import Libraries**

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import xgboost as xgb
import lightgbm as lgb

from mlxtend.plotting import plot_confusion_matrix

from colorama import Fore, Back, Style 

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from catboost import CatBoostClassifier

In [None]:
heart_data = pd.read_csv('/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')
heart_data.head()

In [None]:
heart_data.describe()

In [None]:
#Features = ['age','anaemia','diabetes','time','ejection_fraction','serum_creatinine']
z = heart_data["DEATH_EVENT"]
x = heart_data.drop(["DEATH_EVENT","creatinine_phosphokinase","diabetes"],axis=1)
y = z.copy()
x_train,x_test,y_train,y_test = train_test_split(x,y, test_size=0.2, random_state=2)

In [None]:
accuracy_list = []

# Logistic Regression

In [None]:
# logistic regression

log_reg = LogisticRegression()
log_reg.fit(x_train, y_train)
log_reg_pred = log_reg.predict(x_test)
log_reg_acc = accuracy_score(y_test, log_reg_pred)
accuracy_list.append(100*log_reg_acc)

In [None]:
print("Accuracy of Logistic Regression model is : ", 100*log_reg_acc)

In [None]:
cm = confusion_matrix(y_test, log_reg_pred)
plt.figure()
plot_confusion_matrix(cm, figsize=(12,8), hide_ticks=True, cmap = plt.cm.Blues)
plt.title("Logistic Regression Model - Confusion Matrix")
plt.xticks(range(2), ["Heart Not Failed", "Heart Fail"], fontsize = 16)
plt.yticks(range(2), ["Heart Not Failed", "Heart Fail"], fontsize = 16)
plt.show()


# Support Vector

In [None]:
# svc 

sv_clf = SVC()
sv_clf.fit(x_train, y_train)
sv_clf_pred = sv_clf.predict(x_test)
sv_clf_acc = accuracy_score(y_test, sv_clf_pred)
accuracy_list.append(100*sv_clf_acc)

In [None]:
print("Accuracy of SVC is : ", 100*sv_clf_acc)

In [None]:
cm = confusion_matrix(y_test, sv_clf_pred)
plt.figure()
plot_confusion_matrix(cm, figsize=(12,8), hide_ticks=True, cmap = plt.cm.Blues)
plt.title("SVC Model - Confusion Matrix")
plt.xticks(range(2), ["Heart Not Failed", "Heart Fail"], fontsize = 16)
plt.yticks(range(2), ["Heart Not Failed", "Heart Fail"], fontsize = 16)
plt.show()


# K Neighbors Classifier

In [None]:
# K Neighbors Classifier

kn_clf = KNeighborsClassifier(n_neighbors=6)
kn_clf.fit(x_train, y_train)
kn_pred = kn_clf.predict(x_test)
kn_acc  = accuracy_score(y_test, kn_pred)
accuracy_list.append(100*kn_acc)

In [None]:
print("Accuracy of K Neighbors Classifier is : ", 100*kn_acc)

In [None]:
cm = confusion_matrix(y_test, kn_pred)
plt.figure()
plot_confusion_matrix(cm, figsize = (12,8), hide_ticks=True, cmap = plt.cm.Blues)
plt.title("K Neighbors Model - Confusion Matrix")
plt.xticks(range(2), ["Heart Not Failed", "Heart Fail"], fontsize = 16)
plt.yticks(range(2), ["Heart Not Failed", "Heart Fail"], fontsize = 16)
plt.show()


# Decision Tree Classifier



In [None]:
# Decision Tree Classifier

dt_clf = DecisionTreeClassifier(max_leaf_nodes=3, random_state=0, criterion='entropy')
dt_clf.fit(x_train, y_train)
dt_pred = dt_clf.predict(x_test)
dt_acc = accuracy_score(y_test, dt_pred)
accuracy_list.append(100*dt_acc)

In [None]:
print("Accuracy of Decision Tree Classifier is : ", 100* dt_acc)

In [None]:
cm = confusion_matrix(y_test, dt_pred)
plt.figure()
plot_confusion_matrix(cm, figsize=(12,8), hide_ticks=True, cmap=plt.cm.Blues)
plt.title("Decision Tree Model - Confusion Matrix")
plt.xticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.yticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.show()

# RandomForestClassifier

In [None]:


r_clf = RandomForestClassifier(max_features=0.5, max_depth=15, random_state=1)
r_clf.fit(x_train, y_train)
r_pred = r_clf.predict(x_test)
r_acc = accuracy_score(y_test, r_pred)
accuracy_list.append(100*r_acc)

In [None]:
print("Accuracy of RandomForestClassifier is : ", 100*r_acc)

In [None]:
cm = confusion_matrix(y_test, r_pred)
plt.figure()
plot_confusion_matrix(cm, figsize=(12,8), hide_ticks=True, cmap=plt.cm.Blues)
plt.title("Random Forest Model - Confusion Matrix")
plt.xticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.yticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.show()

# GradientBoostingClassifier

In [None]:
# GradientBoostingClassifier

gradientboost_clf = GradientBoostingClassifier(max_depth=2, random_state=1)
gradientboost_clf.fit(x_train,y_train)
gradientboost_pred = gradientboost_clf.predict(x_test)
gradientboost_acc = accuracy_score(y_test, gradientboost_pred)
accuracy_list.append(100*gradientboost_acc)

In [None]:
print("Accuracy of GradientBoostingClassifier is : ", 100*gradientboost_acc)

In [None]:
cm = confusion_matrix(y_test, gradientboost_pred)
plt.figure()
plot_confusion_matrix(cm, figsize=(12,8), hide_ticks=True, cmap=plt.cm.Blues)
plt.title("Gredient Boosting Model - Confusion Matrix")
plt.xticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.yticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.show()


# CatBoostClassifier

In [None]:
# CatBoostClassifier

cat_clf = CatBoostClassifier()
cat_clf.fit(x_train,y_train)
cat_pred = cat_clf.predict(x_test)
cat_acc = accuracy_score(y_test, cat_pred)
accuracy_list.append(100*cat_acc)

In [None]:
print ("Accuracy of CatBoostClassifier is :", 100*cat_acc)

# XGB

In [None]:
xgb_clf = xgb.XGBClassifier(objective="binary:logistic", n_estimators=10)
xgb_clf.fit(x_train, y_train)
xgb_pred = xgb_clf.predict(x_test)
xgb_acc = accuracy_score(y_test, xgb_pred)
accuracy_list.append(100*xgb_acc)

In [None]:
print ("Accuracy of XGBoostClassifier is :", 100*xgb_acc)

In [None]:
cm = confusion_matrix(y_test, xgb_pred)
plt.figure()
plot_confusion_matrix(cm, figsize=(12,8), hide_ticks=True, cmap=plt.cm.Blues)
plt.title("XGB Model - Confusion Matrix")
plt.xticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.yticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.show()

In [None]:
from xgboost import XGBClassifier, plot_importance,to_graphviz

plt.figure()
plot_importance(xgb_clf,title="Feature importance from XGBoost model")
plt.show()

# LGBM

In [None]:
lgb_clf = lgb.LGBMClassifier()
lgb_clf.fit(x_train, y_train)
lgb_pred = lgb_clf.predict(x_test)
lgb_acc = accuracy_score(y_test, lgb_pred)
accuracy_list.append(100*lgb_acc)

In [None]:
print ("Accuracy of XGBoostClassifier is :", 100*xgb_acc)

In [None]:
cm = confusion_matrix(y_test, lgb_pred)
plt.figure()
plot_confusion_matrix(cm, figsize=(12,8), hide_ticks=True, cmap=plt.cm.Blues)
plt.title("LGBM Model - Confusion Matrix")
plt.xticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.yticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.show()

# Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

nb_clf = GaussianNB()
nb_clf.fit(x_train, y_train)
nb_pred = nb_clf.predict(x_test)
nb_acc = accuracy_score(y_test, nb_pred)
accuracy_list.append(100*nb_acc)

In [None]:
print ("Accuracy of XGBoostClassifier is :", 100*nb_acc)

In [None]:
cm = confusion_matrix(y_test, nb_pred)
plt.figure()
plot_confusion_matrix(cm, figsize=(12,8), hide_ticks=True, cmap=plt.cm.Blues)
plt.title("Gaussian Naive Bayes Model - Confusion Matrix")
plt.xticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.yticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.show()

# Voting Classifier

In [None]:
from sklearn.ensemble import VotingClassifier

LR_clf2 = LogisticRegression().fit(x_train, y_train)
DT_clf = DecisionTreeClassifier().fit(x_train, y_train)
knn_clf = KNeighborsClassifier().fit(x_train, y_train)
RF_clf = RandomForestClassifier(random_state=0).fit(x_train, y_train)
svc_clf = SVC().fit(x_train, y_train)

vc_clf = VotingClassifier(estimators=[('lr', LR_clf2), 
            ('dt', DT_clf), ('knn', knn_clf),
            ('rf', RF_clf), ('svc', svc_clf), ('r-clf', r_clf)], 
            voting='hard')
vc_clf.fit(x_train, y_train)
vc_pred = vc_clf.predict(x_test)
vc_acc = accuracy_score(y_test, vc_pred)
accuracy_list.append(100*vc_acc)

In [None]:
print ("Accuracy of VotingClassifier is :", 100*vc_acc)

In [None]:
cm = confusion_matrix(y_test, vc_pred)
plt.figure()
plot_confusion_matrix(cm, figsize=(12,8), hide_ticks=True, cmap=plt.cm.Blues)
plt.title("Voting Classifier Model - Confusion Matrix")
plt.xticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.yticks(range(2), ["Heart Not Failed","Heart Fail"], fontsize=16)
plt.show()

# Accuracy for all models

In [None]:
model_list = ['Logistic Regression', 'SVC','KNearestNeighbours', 'DecisionTree', 'RandomForest',
              'GradientBooster','CatBoostClassifier', 'XGB', 'LGBM', 'Naive Bayes', 'Voting Classifier']

In [None]:
import seaborn as sns

plt.rcParams['figure.figsize']=20,8
sns.set_style('darkgrid')


ax = sns.barplot(x=model_list, y=accuracy_list, palette = "husl", saturation =2.0)
plt.xlabel('Classifier Models', fontsize = 20 )
plt.ylabel('% of Accuracy', fontsize = 20)
plt.title('Accuracy of different Classifier Models', fontsize = 20)
plt.xticks(fontsize = 12, horizontalalignment = 'center', rotation = 8)
plt.yticks(fontsize = 12)
for i in ax.patches:
    width, height = i.get_width(), i.get_height()
    x, y = i.get_xy() 
    ax.annotate(f'{round(height,2)}%', (x + width/2, y + height*1.02), ha='center', fontsize = 'x-large')


    
plt.show()