# Task : Heart Rate Prediction
Heart failure is a serious condition and number one cause of death in recent years.By building Machine Learning model we can predict the death events in advance ,so that necessary treatment , precautions can be taken.

# Importing necessary libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import cufflinks as cf
from plotly import __version__
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
cf.go_offline()
%matplotlib inline

In [None]:
print( __version__)

**Heart failure prediction dataset**

In [None]:
dataset = pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')
dataset.head()

**Checking whether the dataset has null values.**

In [None]:
sns.heatmap(dataset.isnull())

In [None]:
dataset.describe()

In [None]:
dataset['DEATH_EVENT'].value_counts()

# Feature selection

In [None]:
X1 = dataset.iloc[:,:-1]
y1 = dataset.iloc[:,-1]

In [None]:
#feature selction
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
fit_best_features = SelectKBest(score_func=chi2,k=10)
best_features=fit_best_features.fit(X1,y1)



In [None]:
dataset_scores = pd.DataFrame(best_features.scores_)
dataset_cols = pd.DataFrame(X1.columns)

In [None]:
featurescores = pd.concat([dataset_cols,dataset_scores],axis=1)
featurescores.columns=['column','scores']

In [None]:
featurescores

**Top 10 features scores**

In [None]:
print(featurescores.nlargest(10,'scores'))

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
ee = ExtraTreesClassifier()
ee.fit(X1,y1)

**Choose important feature by looking into the plot below**

In [None]:
fea_imp=pd.Series(ee.feature_importances_,index=X1.columns)
fea_imp.nlargest(10).plot(kind='barh')


In [None]:
X2 = dataset.iloc[:,:-1]
y2 = dataset.iloc[:,-1]

In [None]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
X2=ss.fit_transform(X2)


In [None]:
from sklearn.feature_selection import VarianceThreshold
feature_high_variance = VarianceThreshold(threshold=(0.8*(1-0.8)))
falls=feature_high_variance.fit(X2)

In [None]:
dataset_scores1 = pd.DataFrame(falls.variances_)
dat1 = pd.DataFrame(X1.columns)


In [None]:
high_variance = pd.concat([dataset_scores1,dat1],axis=1)
high_variance.columns=['variance','cols']

In [None]:
high_variance

In [None]:
high_variance[high_variance['variance']>0.8]

# EDA

> Age

In [None]:

sns.distplot(dataset['age'],bins=30)

In [None]:
sns.boxplot(x='DEATH_EVENT',y='age',data=dataset)

> DEATH EVENT 

In [None]:
ds = dataset['DEATH_EVENT'].value_counts().reset_index()
ds.columns = ['DEATH_EVENT', 'count']
fig = px.pie(
    ds, 
    values='count', 
    names='DEATH_EVENT',
    title='DEATH_EVENT bar chart', 
    width=500, 
    height=500
)
fig.show()

> Platelets

In [None]:
dataset.iplot(kind='bar',x='DEATH_EVENT',y='platelets')
#dataset.count().iplot(kind='bar')

> time

In [None]:
dataset.iplot(kind='bar',x='DEATH_EVENT',y='time')
#sns.barplot(x='diabetes',y='DEATH_EVENT',data=dataset)

> serum cretinine

In [None]:
sns.violinplot(x='DEATH_EVENT',y='serum_creatinine',data=dataset)

> ejection fraction

In [None]:
dataset.iplot(kind='scatter',x='DEATH_EVENT',y='ejection_fraction',mode='markers')

In [None]:
dataset.iplot(kind='bar',x='DEATH_EVENT',y='ejection_fraction')

> creatinine phosphokinase

In [None]:
sns.barplot(x='DEATH_EVENT',y='creatinine_phosphokinase',data=dataset)#yes

> serum sodium

In [None]:
sns.barplot(x='DEATH_EVENT',y='serum_sodium',data=dataset)#no

In [None]:
sns.violinplot(x='DEATH_EVENT',y='serum_sodium',data=dataset)

> anaemia

In [None]:
sns.barplot(x='DEATH_EVENT',y='anaemia',data=dataset)

> Overview of all features

In [None]:
dataset.iplot(kind='hist')

**Correlation **
> By looking into the correlation heatmap , we need to choose the features that is highly correlated with the dependent variable(DEATH_EVENT)and less correlation with other features.

In [None]:
dataset_corr = dataset.corr()

In [None]:
fig, ax=plt.subplots(figsize=(15,10))
sns.heatmap(dataset_corr,annot=True)

In [None]:
dataset.columns

**Dividing Independent and dependent variables.**

In [None]:
X = dataset[['time','ejection_fraction','serum_creatinine']]
y = dataset['DEATH_EVENT']

**Splitting dataset**

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=101)

In [None]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
X_train=ss.fit_transform(X_train)
X_test=ss.transform(X_test)

# Building Models

In [None]:
from sklearn.linear_model import LogisticRegression
logistic_model = LogisticRegression(C=0.3,penalty='l1',solver='liblinear')
logistic_model.fit(X_train,y_train)

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=logistic_model, X=X_train ,y=y_train,cv=10)
print("accuracy is {:.2f} %".format(accuracies.mean()*100))
print("std is {:.2f} %".format(accuracies.std()*100))

In [None]:

from sklearn.metrics import accuracy_score
pre = logistic_model.predict(X_test)

In [None]:
Logistic_acc=accuracy_score(pre,y_test)
print(accuracy_score(pre,y_test))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
score=[]

for i in range(1,10):
    
    
    knn=KNeighborsClassifier(n_neighbors=i,metric='minkowski',p=2)
    knn.fit(X_train,y_train)

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=knn, X=X_train ,y=y_train,cv=10)
print("accuracy is {:.2f} %".format(accuracies.mean()*100))
print("std is {:.2f} %".format(accuracies.std()*100))

In [None]:
pre1 = knn.predict(X_test)

In [None]:
knn_acc=accuracy_score(pre1,y_test)
print(accuracy_score(pre1,y_test))

In [None]:
from sklearn.svm import SVC
svm_model=SVC(kernel='rbf',C=0.3,gamma='scale')
svm_model.fit(X_train,y_train)

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=svm_model, X=X_train ,y=y_train,cv=10)
print("accuracy is {:.2f} %".format(accuracies.mean()*100))
print("std is {:.2f} %".format(accuracies.std()*100))

In [None]:
pre2 = svm_model.predict(X_test)


In [None]:
svm_rbf_acc=accuracy_score(pre2,y_test)
print(accuracy_score(pre2,y_test))

In [None]:
from sklearn.tree import DecisionTreeClassifier
decision_model=DecisionTreeClassifier(criterion='entropy',max_leaf_nodes=3,random_state=30)
decision_model.fit(X_train,y_train)

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=decision_model, X=X_train ,y=y_train,cv=10)
print("accuracy is {:.2f} %".format(accuracies.mean()*100))
print("std is {:.2f} %".format(accuracies.std()*100))

In [None]:
pre3 = decision_model.predict(X_test)


In [None]:
decision_acc=accuracy_score(pre3,y_test)
print(accuracy_score(pre3,y_test))

In [None]:
from sklearn.ensemble import RandomForestClassifier
randomforest_model=RandomForestClassifier(n_estimators=55,criterion='entropy',random_state=1,max_features=0.5, max_depth=15)
randomforest_model.fit(X_train,y_train)

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=randomforest_model, X=X_train ,y=y_train,cv=10)
print("accuracy is {:.2f} %".format(accuracies.mean()*100))
print("std is {:.2f} %".format(accuracies.std()*100))

In [None]:
pre4 = randomforest_model.predict(X_test)


In [None]:
random_acc=accuracy_score(pre4,y_test)
print(accuracy_score(pre4,y_test))

In [None]:
from sklearn.svm import SVC
svmlinear_model=SVC(kernel='linear',C=0.1)
svmlinear_model.fit(X_train,y_train)

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=svmlinear_model, X=X_train ,y=y_train,cv=10)
print("accuracy is {:.2f} %".format(accuracies.mean()*100))
print("std is {:.2f} %".format(accuracies.std()*100))

In [None]:
pre5 = svmlinear_model.predict(X_test)


In [None]:
svm_linear_acc=accuracy_score(pre5,y_test)
print(accuracy_score(pre5,y_test))


In [None]:
from xgboost import XGBClassifier
xgb_model = XGBClassifier()
xgb_model.fit(X_train,y_train)

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=xgb_model, X=X_train ,y=y_train,cv=10)
print("accuracy is {:.2f} %".format(accuracies.mean()*100))
print("std is {:.2f} %".format(accuracies.std()*100))

In [None]:
pre5 = xgb_model.predict(X_test)


In [None]:
xgb_acc=accuracy_score(pre5,y_test)
print(accuracy_score(pre5,y_test))

# Overview of all models's accuracy

In [None]:
print("Logistic Accuracy:",Logistic_acc)
print("knn Accuracy:",knn_acc)
print("svm rbf model Accuracy :",svm_rbf_acc)
print("svm linear model Accuracy:",svm_linear_acc)
print("Decision tress Accuracy :",decision_acc)
print("Random_forest _Accuracy:",random_acc)
print("Xgb_boosdt_Accuracy:",xgb_acc)

**The best model for this dataset(heart failure prediction) is knn model with 95% Accuracy**

# Plz upvote 
**This will keep me motivated**