In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix, roc_auc_score, r2_score
from tensorflow import keras
%matplotlib inline
sns.set()

In [None]:
df=pd.read_csv("../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv")

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.isnull().any()

In [None]:
sns.countplot(x="DEATH_EVENT",data=df,palette=["#DA0858","#3498DB"])
plt.ylabel("Count")
plt.xlabel("Death Event")
plt.show()

In [None]:
sns.countplot(x="sex",data=df,palette=["#DA0858","#3498DB"])
plt.ylabel("Count")
plt.xlabel("Sex")
plt.show()

In [None]:
pd.crosstab(df.age,df.DEATH_EVENT).plot(kind="bar",figsize=(20,6),color=["#6495ED","#DE3163"])
plt.title('Distribution of Heart failure by Age')
plt.xlabel('Age')
plt.legend(["Heart Not Failed", "Heart Failed"])
plt.show()

In [None]:
pd.crosstab(df.sex,df.DEATH_EVENT).plot(kind="bar",figsize=(15,6),color=['#11A5AA','#AA1190'])
plt.title('Distribution of Heart Failure by Gender')
plt.xlabel('Gender')
plt.legend(["Heart Not Failed", "Heart Failed"])
plt.show()

In [None]:
pd.crosstab(df.anaemia,df.DEATH_EVENT).plot(kind="bar",figsize=(15,6),color=['#11A5AA','#AA1190'])
plt.title("Distribution of Heart Failure by Anemia")
plt.xlabel('Anemia')
plt.xticks(rotation = 0)
plt.legend(["Heart Not Failed", "Heart Failed"])
plt.show()

In [None]:
pd.crosstab(df.diabetes,df.DEATH_EVENT).plot(kind="bar",figsize=(15,6),color=['#11A5AA','#AA1190'])
plt.title("Distribution of Heart Failure by Diabetes")
plt.xlabel('Diabetes')
plt.xticks(rotation = 0)
plt.legend(["Heart Not Failed", "Heart Failed"])
plt.show()

In [None]:
pd.crosstab(df.ejection_fraction,df.DEATH_EVENT).plot(kind="bar",figsize=(15,6),color=['#11A5AA','#AA1190'])
plt.title("Distribution of Heart Failure by Ejection Fraction")
plt.xlabel('Ejection Fraction')
plt.xticks(rotation = 0)
plt.legend(["Heart Not Failed", "Heart Failed"])
plt.show()

In [None]:
pd.crosstab(df.high_blood_pressure,df.DEATH_EVENT).plot(kind="bar",figsize=(15,6),color=['#11A5AA','#AA1190'])
plt.title("Distribution of Heart Failure by High Blood Presure")
plt.xlabel('High Blood Presure')
plt.xticks(rotation = 0)
plt.legend(["Heart Not Failed", "Heart Failed"])
plt.show()

In [None]:
pd.crosstab(df.serum_sodium,df.DEATH_EVENT).plot(kind="bar",figsize=(15,6),color=['#11A5AA','#AA1190'])
plt.title("Distribution of Heart Failure by Serum Sodium")
plt.xlabel('Serum Sodium')
plt.xticks(rotation = 0)
plt.legend(["Heart Not Failed", "Heart Failed"])
plt.show()

In [None]:
pd.crosstab(df.smoking,df.DEATH_EVENT).plot(kind="bar",figsize=(15,6),color=['#11A5AA','#AA1190'])
plt.title("Distribution of Heart Failure by Smoking")
plt.xlabel('Serum Smoking')
plt.xticks(rotation = 0)
plt.legend(["Heart Not Failed", "Heart Failed"])
plt.show()

In [None]:
corr_mat = df.corr()
fig, ax = plt.subplots(figsize=(15, 10))
ax = sns.heatmap(corr_mat,annot=True,fmt='.2f')
plt.title("Correlation Matrix",fontsize=30)
plt.show()

In [None]:
df.corr()["DEATH_EVENT"].abs().nlargest(20)

In [None]:
xdata = df.drop("DEATH_EVENT",axis=1)
ydata = df["DEATH_EVENT"]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(xdata, ydata, test_size=0.3, random_state=0)

In [None]:
sc=StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

In [None]:
from sklearn.linear_model import LogisticRegression
log_reg=LogisticRegression(random_state=0)
log_reg.fit(x_train,y_train)
acc_log_reg=log_reg.score(x_test,y_test)*100
y_pred_lr=log_reg.predict(x_test)
lr_cm=confusion_matrix(y_test,y_pred_lr)
cv_log = cross_val_score(estimator = log_reg, X=x_train, y=y_train , cv = 5)
print("Test Accuracy = {:.3f} %\n".format(acc_log_reg))
print("R^2 score: {:.3f}".format(r2_score(y_test,y_pred_lr)))
print("Cross validation mean: {:.3f}".format(cv_log.mean()))
print("Cross validation max: {:.3f}".format(cv_log.max()))
print("ROC AUC Score: {:.3f}".format(roc_auc_score(y_test,y_pred_lr)))
print("Cross validation result: ",cv_log)
sns.heatmap(lr_cm,annot=True)
plt.title("Confusion matix")
plt.show()

In [None]:
from sklearn.svm import SVC
svc=SVC(kernel='rbf',random_state=0)
svc.fit(x_train,y_train)
acc_svc=svc.score(x_test,y_test)*100
y_pred_svm=svc.predict(x_test)
svm_cm=confusion_matrix(y_test,y_pred_svm)
cv_svc = cross_val_score(estimator = svc, X=x_train, y=y_train , cv = 5)
print("Test Accuracy = {:.3f} %\n".format(acc_svc))
print("R^2 score: {:.3f}".format(r2_score(y_test,y_pred_svm)))
print("Cross validation mean: {:.3f} %".format(cv_svc.mean()))
print("Cross validation max: {:.3f} %".format(cv_svc.max()))
print("ROC AUC Score: {:.3f}".format(roc_auc_score(y_test,y_pred_svm)))
print("Cross validation result: ",cv_svc)
sns.heatmap(svm_cm,annot=True)
plt.title("Confusion matix")
plt.show()

In [None]:
from sklearn.naive_bayes import BernoulliNB
bnb = BernoulliNB()
bnb.fit(x_train,y_train)
acc_bnb=bnb.score(x_test,y_test)*100
y_pred_bnb=bnb.predict(x_test)
bnb_cm=confusion_matrix(y_test,y_pred_bnb)

print("Test Accuracy = {:.3f} %\n".format(acc_bnb))
print(bnb)
print("R^2 score: ",r2_score(y_test,y_pred_bnb))

print(roc_auc_score(y_test,y_pred_bnb))
plt.title("Confusion Matrix")
sns.heatmap(bnb_cm,annot=True)
plt.show()

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=7,metric="manhattan")
knn.fit(x_train,y_train)
acc_knn=knn.score(x_test,y_test)*100
y_pred_knn=knn.predict(x_test)
knn_cm=confusion_matrix(y_test,y_pred_knn)
cv_knn = cross_val_score(estimator = knn, X=x_train, y=y_train , cv = 5)
print("Test Accuracy = {:.3f} %\n".format(acc_knn))
print("R^2 score: {:.3f}".format(r2_score(y_test,y_pred_knn)))
print("Cross validation mean: {:.3f} %".format(cv_knn.mean()))
print("Cross validation max: {:.3f} %".format(cv_knn.max()))
print("ROC AUC Score: {:.3f}".format(roc_auc_score(y_test,y_pred_knn)))
print("Cross validation result: ",cv_knn)
plt.title("Confusion Matrix")
sns.heatmap(knn_cm,annot=True)
plt.show()

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtc=DecisionTreeClassifier(random_state=0,criterion='entropy',splitter='random')
dtc.fit(x_train,y_train)
acc_dtc=dtc.score(x_test,y_test)*100
y_pred_dtc=dtc.predict(x_test)
dtc_cm=confusion_matrix(y_test,y_pred_dtc)
cv_dtc = cross_val_score(estimator = dtc, X=x_train, y=y_train , cv = 5)
print("Test Accuracy = {:.3f} %\n".format(acc_dtc))
print("R^2 score: {:.3f}".format(r2_score(y_test,y_pred_dtc)))
print("Cross validation mean: {:.3f} %".format(cv_dtc.mean()))
print("Cross validation max: {:.3f} %".format(cv_dtc.max()))
print("ROC AUC Score: {:.3f}".format(roc_auc_score(y_test,y_pred_dtc)))
print("Cross validation result: ",cv_dtc)
plt.title("Confusion Matrix")
sns.heatmap(dtc_cm,annot=True)
plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier(n_estimators=13,criterion='entropy',random_state=0)
rf.fit(x_train,y_train)
acc_rf=rf.score(x_test,y_test)*100
y_pred_rf=rf.predict(x_test)
rf_cm=confusion_matrix(y_test,y_pred_rf)
cv_rf = cross_val_score(estimator = rf, X=x_train, y=y_train , cv = 5)
print("Test Accuracy = {:.3f} %\n".format(acc_rf))
print("R^2 score: {:.3f}".format(r2_score(y_test,y_pred_rf)))
print("Cross validation mean: {:.3f} %".format(cv_rf.mean()))
print("Cross validation max: {:.3f} %".format(cv_rf.max()))
print("ROC AUC Score: {:.3f}".format(roc_auc_score(y_test,y_pred_rf)))
print("Cross validation result: ",cv_rf)
plt.title("Confusion Matrix")
sns.heatmap(rf_cm,annot=True)
plt.show()

In [None]:
import xgboost as xgb
xgbc = xgb.XGBClassifier(objective="reg:logistic",booster="gbtree",tree_method="approx")
xgbc.fit(x_train,y_train)
acc_xgbc = xgbc.score(x_test,y_test)*100
y_pred_xgbc=xgbc.predict(x_test)
xgbc_cm=confusion_matrix(y_test,y_pred_xgbc)
cv_xgbc=cross_val_score(estimator=xgbc, X=x_train, y=y_train, cv=5)
print("Test Accuracy = {:.3f} %\n".format(acc_xgbc))
print("R^2 score: {:.3f}".format(r2_score(y_test,y_pred_xgbc)))
print("Cross validation mean: {:.3f} %".format(cv_xgbc.mean()))
print("Cross validation max: {:.3f} %".format(cv_xgbc.max()))
print("ROC AUC Score: {:.3f}".format(roc_auc_score(y_test,y_pred_xgbc)))
print("Cross validation result: ",cv_xgbc)
plt.title("Confusion Matrix")
sns.heatmap(xgbc_cm,annot=True)
plt.show()

In [None]:
model = keras.Sequential()


model.add(keras.layers.Dense(7, activation='tanh'))
model.add(keras.layers.Dropout(0.1))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(), metrics=['accuracy'])

model.fit(x_train, y_train, epochs=50,batch_size=16,verbose=1,validation_data=(x_test, y_test))

In [None]:
loss = model.history.history["loss"]
val_loss = model.history.history["val_loss"]
acc = model.history.history["accuracy"]
val_acc = model.history.history["val_accuracy"]
plt.figure(figsize=(20,13))
plt.subplot(221)
plt.plot(loss,label="Training loss")
plt.plot(val_loss,"red",label="Validation loss")
plt.legend()
plt.title("Training and validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.subplot(222)
plt.plot(acc,label="Training accuracy")
plt.plot(val_acc,"red",label="Validation accuracy")
plt.legend()
plt.title("Training and validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.show()

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test Loss: %{:.2f}'.format(score[0]*100))
print('Test Accuracy: %{:.2f}'.format(score[1]*100))
y_pred_ann = np.round(model.predict(x_test))
ann_cm = confusion_matrix(y_test,y_pred_ann)
print("R^2 score: {:.3f}".format(r2_score(y_test,y_pred_ann)))
print("ROC AUC Score: {:.3f}".format(roc_auc_score(y_test,y_pred_ann)))
sns.heatmap(ann_cm,annot=True)
plt.title("Confusion Matrix")
plt.show()

Comparing Classifications

In [None]:
accuracies = {
    "Logistic Regression":(acc_log_reg),
    "SVC" : (acc_svc),
    "Decision Tree": (acc_dtc),
    "Random Forest": (acc_rf),
    "KNN": (acc_knn),
    "Navie Bayes":(acc_bnb),
    "XGboost":(acc_xgbc),
    "ANN": score[1]*100
    }

In [None]:
plt.figure(figsize=(15,5))
sns.barplot( x=list(accuracies.keys()), y=list(accuracies.values()))
plt.title("Test results")
plt.xlabel("Classification methods")
plt.ylabel("Classification score")
plt.show()

In [None]:
cv_scores = {
    "Logistic Regression":(cv_log.max()),
    "SVC" : (cv_svc.max()),
    "Decision Tree": (cv_dtc.max()),
    "Random Forest": (cv_rf.max()),
    "KNN": (cv_knn.max()),
    "XGboost":(cv_xgbc.max()),
    }

In [None]:
plt.figure(figsize=(15,5))
sns.barplot( x=list(cv_scores.keys()), y=list(cv_scores.values()))
plt.title("Cross validation results")
plt.xlabel("Classification methods")
plt.ylabel("Cross validation scores")
plt.show()

In [None]:
r2_scores = {
    "Logistic Regression":(r2_score(y_test,y_pred_lr)),
    "SVC" : (r2_score(y_test,y_pred_svm)),
    "Decision Tree": (r2_score(y_test,y_pred_dtc)),
    "Random Forest": (r2_score(y_test,y_pred_rf)),
    "KNN": (r2_score(y_test,y_pred_knn)),
    "XGboost":(r2_score(y_test,y_pred_xgbc)),
    "Navie Bayes":(r2_score(y_test,y_pred_bnb)),
    "ANN": (r2_score(y_test,y_pred_ann))
    }

In [None]:
plt.figure(figsize=(15,5))
sns.barplot( x=list(r2_scores.keys()), y=list(r2_scores.values()))
plt.title("R2 score results")
plt.xlabel("Classification methods")
plt.ylabel("R2 scores")
plt.show()

In [None]:
roc_auc_scores = {
    "Logistic Regression":(roc_auc_score(y_test,y_pred_lr)),
    "SVC" : (roc_auc_score(y_test,y_pred_svm)),
    "Decision Tree": (roc_auc_score(y_test,y_pred_dtc)),
    "Random Forest": (roc_auc_score(y_test,y_pred_rf)),
    "KNN": (roc_auc_score(y_test,y_pred_knn)),
    "XGboost":(roc_auc_score(y_test,y_pred_xgbc)),
    "Navie Bayes":(roc_auc_score(y_test,y_pred_bnb)),
    "ANN": (roc_auc_score(y_test,y_pred_ann))
    }

In [None]:
plt.figure(figsize=(15,5))
sns.barplot( x=list(roc_auc_scores.keys()), y=list(roc_auc_scores.values()))
plt.title("ROC AUC results")
plt.xlabel("Classification methods")
plt.ylabel("ROC AUC scores")
plt.show()