# Heart Failure Prediction

### Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

#Suppressing all warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [None]:
df=pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')
df.head()

### Exploration of Data

In [None]:
#checking null values
df.isnull().sum()

> Here we can observe that there is no null values present in this data .I think it's good sign towards good accuracy.

In [None]:
df.info()

In [None]:
#converting age from float to int
df['age']=df['age'].astype(int)

In [None]:
corr = df.corr()
sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns)

In [None]:
corr[abs(corr['DEATH_EVENT']) > 0.1]['DEATH_EVENT']

The features 'age', 'ejection_fraction', 'serum_creatinine', 'serum_sodium', and 'time' have a considerable correlation with 'DEATH_EVENT'

In [None]:
df.columns

### Splitting the Data set

In [None]:
x=df[['age', 'ejection_fraction', 'serum_creatinine', 'serum_sodium', 'time']]
y=df['DEATH_EVENT']

In [None]:
x.head()

In [None]:
y.head()

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=1,test_size=0.2)

In [None]:
#Metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

### Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
lr=LogisticRegression(max_iter=10000)
lr.fit(x_train,y_train)
lr_pred=lr.predict(x_test)
s1=accuracy_score(y_test,lr_pred)
print("Logistic Regression Success Rate :", "{:.2f}%".format(100*s1))
print(classification_report(y_test,lr.predict(x_test)))
print(confusion_matrix(y_test,lr.predict(x_test)))

### Gradient Boosting Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gbc=GradientBoostingClassifier()
gbc.fit(x_train,y_train)
p2=gbc.predict(x_test)
s2=accuracy_score(y_test,p2)
print("Gradient Booster Classifier Success Rate :", "{:.2f}%".format(100*s2))
print(classification_report(y_test,gbc.predict(x_test)))
print(confusion_matrix(y_test,gbc.predict(x_test)))

### RandomForest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier()
rfc.fit(x_train,y_train)
p3=rfc.predict(x_test)
s3=accuracy_score(y_test,p3)
print("Random Forest Classifier Success Rate :", "{:.2f}%".format(100*s3))
print(classification_report(y_test,rfc.predict(x_test)))
print(confusion_matrix(y_test,rfc.predict(x_test)))

### Support Vector Machine Classifier

In [None]:
from sklearn.svm import SVC
svm=SVC()
svm.fit(x_train,y_train)
p4=svm.predict(x_test)
s4=accuracy_score(y_test,p4)
print("Support Vector Machine Success Rate :", "{:.2f}%".format(100*s4))
print(classification_report(y_test,svm.predict(x_test)))
print(confusion_matrix(y_test,svm.predict(x_test)))

### KNeighbors Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
scorelist=[]
for i in range(1,21):
    knn=KNeighborsClassifier(n_neighbors=i)
    knn.fit(x_train,y_train)
    p5=knn.predict(x_test)
    s5=accuracy_score(y_test,p5)
    scorelist.append(round(100*s5, 2))
print("K Nearest Neighbors Top 5 Success Rates:")
print(sorted(scorelist,reverse=True)[:5])
print(classification_report(y_test,knn.predict(x_test)))
print(confusion_matrix(y_test,knn.predict(x_test)))

### Saving Model

In [None]:
## Pickle
import pickle

# save model
pickle.dump(svm, open('heart.pickle', 'wb'))

# load model
heart_failure_model = pickle.load(open('heart.pickle', 'rb'))
