# Heart Failure:
Heart failure is a chronic, progressive condition in which the heart muscle is unable to pump enough blood to meet the body's needs for blood and oxygen. Basically, the heart can't keep up with its workload.
This dataset contains person's information like age ,sex , blood pressure, smoke, diabetes,ejection fraction, creatinine phosphokinase, serum_creatinine, serum_sodium, time and we have to predict their DEATH EVENT.

   ![](https://image.shutterstock.com/image-illustration/heart-failure-260nw-293567960.jpg)

# Import libraries:

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import xgboost
import lightgbm
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from catboost import CatBoostClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
# load dataset
data=pd.read_csv('/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')

In [None]:
# check first 5 rows
data.head()

In [None]:
data.info()

In [None]:
data.columns

In [None]:
deathORalive = data.DEATH_EVENT.value_counts()
display(deathORalive)

In [None]:
plt.figure(figsize = (10,6))
deathORalive.plot.pie(autopct = '%.1f', colors = ['powderblue', 'slateblue'])
plt.title("DEATH_EVENT vlaue distribution pie chart", pad = 20, fontdict = {'size' : 15, 'color' : 'darkblue', 'weight' : 'bold'})
plt.show()

In [None]:
# co-relation with each others
corrMat = data.corr()
fig, ax = plt.subplots(figsize = (20,20))
cmap = sns.diverging_palette(230, 20, as_cmap = True)
mask = np.triu(np.ones_like(corrMat, dtype = bool))
sns.heatmap(corrMat, square = True, annot = False, linewidths = 1, cmap = cmap, mask = mask)

In [None]:
# Heatmap
plt.figure(figsize=(10,10))
sns.heatmap(data.corr(), vmin=-1, cmap='coolwarm', annot=True);

In [None]:
corr_matrix = data.corr()
print(corr_matrix["DEATH_EVENT"].sort_values(ascending=False))

In [None]:
check1 = sns.PairGrid(data, hue="DEATH_EVENT")
check1.map_diag(sns.histplot)
check1.map_offdiag(sns.scatterplot)
check1.add_legend()

In [None]:
check2 = sns.PairGrid(data)
check2.map_upper(sns.scatterplot)
check2.map_lower(sns.kdeplot)
check2.map_diag(sns.kdeplot, lw=3, legend=False)

In [None]:
visualize = sns.pairplot(data, hue="DEATH_EVENT", palette="Set2", diag_kind="kde", height=2.5)

# Data Modeling


In [None]:
features=['time','ejection_fraction','serum_creatinine','diabetes']

In [None]:
X=data[features]
y=data.DEATH_EVENT

###  Train Test Split

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=0)

In [None]:
accuracy_list=[]

## various model predictions:

In [None]:
# logistic regression

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg_pred = log_reg.predict(X_test)
log_reg_acc = accuracy_score(y_test, log_reg_pred)
accuracy_list.append(100*log_reg_acc)


In [None]:
print("Accuracy of Logistic Regression is : ", "{:.2f}%".format(100* log_reg_acc))

In [None]:
# svc

sv_clf = SVC()
sv_clf.fit(X_train, y_train)
sv_clf_pred = sv_clf.predict(X_test)
sv_clf_acc = accuracy_score(y_test, sv_clf_pred)
accuracy_list.append(100* sv_clf_acc)

In [None]:
print("Accuracy of SVC is : ", "{:.2f}%".format(100* sv_clf_acc))

In [None]:
# K Neighbors Classifier

kn_clf = KNeighborsClassifier(n_neighbors=6)
kn_clf.fit(X_train, y_train)
kn_pred = kn_clf.predict(X_test)
kn_acc = accuracy_score(y_test, kn_pred)
accuracy_list.append(100*kn_acc)

In [None]:
print("Accuracy of K Neighbors Classifier is : ", "{:.2f}%".format(100* kn_acc))

In [None]:
# Decision Tree Classifier

dt_clf = DecisionTreeClassifier(max_leaf_nodes=10, random_state=0, criterion='entropy')
dt_clf.fit(X_train, y_train)
dt_pred = dt_clf.predict(X_test)
dt_acc = accuracy_score(y_test, dt_pred)
accuracy_list.append(100*dt_acc)

In [None]:
print("Accuracy of Decision Tree Classifier is : ", "{:.2f}%".format(100* dt_acc))

In [None]:
# RandomForestClassifier

r_clf = RandomForestClassifier(max_features=0.5, max_depth=15, random_state=1)
r_clf.fit(X_train, y_train)
r_pred = r_clf.predict(X_test)
r_acc = accuracy_score(y_test, r_pred)
accuracy_list.append(100*r_acc)

In [None]:
print("Accuracy of Decision Tree Classifier is : ", "{:.2f}%".format(100* r_acc))

In [None]:
# GradientBoostingClassifier

gradientboost_clf = GradientBoostingClassifier(max_depth=2, random_state=1)
gradientboost_clf.fit(X_train,y_train)
gradientboost_pred = gradientboost_clf.predict(X_test)
gradientboost_acc = accuracy_score(y_test, gradientboost_pred)
accuracy_list.append(100*gradientboost_acc)

In [None]:
print( "Accuracy of Gradient Boosting is : ", "{:.2f}%".format(100* gradientboost_acc))

In [None]:
# xgbrf classifier

xgb_clf = xgboost.XGBRFClassifier(max_depth=3, random_state=1)
xgb_clf.fit(X_train,y_train)
xgb_pred = xgb_clf.predict(X_test)
xgb_acc = accuracy_score(y_test, xgb_pred)
accuracy_list.append(100*xgb_acc)

In [None]:
print( "Accuracy of Gradient Boosting is : ", "{:.2f}%".format(100* xgb_acc))

In [None]:
#  lightgbm 

lgb_clf = lightgbm.LGBMClassifier(max_depth=2, random_state=4)
lgb_clf.fit(X_train,y_train)
lgb_pred = lgb_clf.predict(X_test)
lgb_acc = accuracy_score(y_test, lgb_pred)
accuracy_list.append(100*lgb_acc)

In [None]:
print( "Accuracy of Gradient Boosting is : ", "{:.2f}%".format(100* lgb_acc))

In [None]:
# CatBoostClassifier

cat_clf = CatBoostClassifier()
cat_clf.fit(X_train,y_train)
cat_pred = cat_clf.predict(X_test)
cat_acc = accuracy_score(y_test, cat_pred)
accuracy_list.append(100*cat_acc)

In [None]:
print( "Accuracy of Gradient Boosting is : ", "{:.2f}%".format(100* cat_acc))

In [None]:
model_list = ['Logistic Regression', 'SVC','KNearestNeighbours', 'DecisionTree', 'RandomForest',
              'GradientBooster', 'XGBRF','LGBM', 'CatBoostClassifier']

In [None]:
plt.rcParams['figure.figsize']=20,8
sns.set_style('darkgrid')
ax = sns.barplot(x=model_list, y=accuracy_list, palette = "husl", saturation =2.0)
plt.xlabel('Classifier Models', fontsize = 20 )
plt.ylabel('% of Accuracy', fontsize = 20)
plt.title('Accuracy of different Classifier Models', fontsize = 20)
plt.xticks(fontsize = 12, horizontalalignment = 'center', rotation = 8)
plt.yticks(fontsize = 12)
for i in ax.patches:
    width, height = i.get_width(), i.get_height()
    x, y = i.get_xy() 
    ax.annotate(f'{round(height,2)}%', (x + width/2, y + height*1.02), ha='center', fontsize = 'x-large')
plt.show()

* LGBMClassifier: 92.22%
* Cat Boost Classifier: 88.89%
* Gradient Booster Classifier: 87.78%
* XGBRFClassifier: 87.78%
* Random Forest Classifier: 87.78%
* DecisionTreeClassifier: 86.67%
* SVC: 85.56%
* KNeighborsClassifier: 84.44%
* Logistic Regression: 81.11%