In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sb
sb.set_style('darkgrid')
sb.set_palette('dark')
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
data = pd.read_csv("/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv")

In [None]:
data

* anamea:1=present,0=absent
* diabetes: 1=present,0=absent
* high_blood_pressure: 1=present,0=absent
* sex: 1=male,0=female
* smoking: 1=yes,0=no


# Looking for Null Values

In [None]:
data.isnull().sum()

No Null Values

# Data-PreProcessing

In [None]:
data.dtypes

In [None]:
data.drop('time',1,inplace=True)

# Data analysis and visualization

In [None]:
data.describe()

In [None]:
sb.pairplot(data,hue='DEATH_EVENT',aspect=0.8)

In [None]:
plt.figure(figsize=(25,8))
sb.heatmap(data.corr(),annot=True)

**Does death due to heart-failure occur at some age-interval?**

In [None]:
fig,ax = plt.subplots(1,2,figsize=(20,9))
ax[0].hist(data['age'],color = 'darkorange',label = 'patients',edgecolor='black')
ax[0].set_xlabel('Age')
ax[0].set_ylabel('Number of Patients')
ax[0].set_yticks([5,10,15,20,25,30,35,40,45,50,55,60])
ax[0].legend()
ax[0].set_title('Age Distribution')
ax[1].hist(x = [data[data['DEATH_EVENT']==1]['age'],data[data['DEATH_EVENT']==0]['age']],color = ['blue','darkorange'],stacked=True,edgecolor='black',label=['Dead','Survived'])
ax[1].set_xlabel('Age')
ax[1].set_ylabel('Number of patients')
ax[1].set_yticks([5,10,15,20,25,30,35,40,45,50,55,60])
ax[1].set_title('Dead and Survived patients by age')
ax[1].legend()

In [None]:
plt.figure(figsize = (20,8))
sb.regplot(data['DEATH_EVENT'],data['age'],color='darkorange')

There is atleast 10% chance at the age of 40 to have a heart failure and it increases with age but in a small proportion.In the given sample of data we have:
* less than 40 people of age 40
* less than 40 people of age 50
* less than 40 people of age between 50 and 59
* less than 60 people of age 60
* 45 people of age between 61 and 69
* 40 people of age 70
* 19 people of age between 71 and 79
* 12 people of age 80
* less than 10 of age between 81 and 89
* more tahn 5 of age 90 and above

Given that they have suffered from heart-failure
* 25% chance of death due to heart failure between age 40 and 50
* 15% chance of death due to heart failure between age 51 and 60
* 33% chance of death due to heart failure between age 61 and 70
* 42% chance of death due to heart failure between age 71 and 80
* 65% chance of death due to heart failure between age 81 and 90
* 83% chance of death due to heart failure after age 90

**Thus with increase in age chances of death due to heart-failure also increases**




**Does having anaemia cause death due to heart failure?**



*Anemia is a condition in which you lack enough healthy red blood cells to carry adequate oxygen to your body's tissues.* 

In [None]:
fig,ax = plt.subplots(1,2,figsize=(20,9))
ax[0].hist(data['anaemia'],color='white',label = 'patients',edgecolor='black')
ax[0].set_xticks([0,1])
ax[0].set_yticks([10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170])
ax[0].set_xticklabels(['Absent','Present'])
ax[0].set_xlabel('Anaemia')
ax[0].set_ylabel('Number of Patients')
ax[0].legend()
ax[0].set_title('anaemia presence in patients')
ax[1].hist(x = [data[data['DEATH_EVENT']==1]['anaemia'],data[data['DEATH_EVENT']==0]['anaemia']],stacked=True,color = ['blue','white'],edgecolor='black',label=['Dead','Survived'])
ax[1].set_xticks([0,1])
ax[1].set_xticklabels(['Absent','Present'])
ax[1].set_yticks([10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170])
ax[1].set_xlabel('Anaemia')
ax[1].set_ylabel('Number of patients')
ax[1].set_title('dead and alive patients by presence of anaemia')
ax[1].legend()
plt.tight_layout()

In [None]:
plt.figure(figsize = (20,6))
sb.regplot(data['DEATH_EVENT'],data['anaemia'],color='red')
plt.title('DEATH due to anaemia')

* 56% patients in given dataset do not possess anaemia
* 43% patients in the given dataset possess anaemia

* 29.4% of patients not suffering from anaemia die due to heart failure
* 36% of patients suffering from anaemia die due to heart failure

**Thus patients having anaemia have a higher chance of death than patients not suffering from anaemia.** 


**Does level of creatinine_phosphokinase cause death due to heart failure?**


*creatinine_phosphokinase is a enzyme responsible for muscular function*

In [None]:
fig,ax = plt.subplots(1,2,figsize=(20,9))
ax[0].hist(data['creatinine_phosphokinase'],color='red',label = 'patients',edgecolor='black')
ax[0].set_yticks([10,30,50,70,90,110,130,150,170,190,210,230,250])
ax[0].set_xticks([500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500])
ax[0].set_xlabel('creatinine phosphokinase')
ax[0].set_ylabel('Number of Patients')
ax[0].legend()
ax[0].set_title('creatinine phosphokinase levels')
ax[1].hist(x = [data[data['DEATH_EVENT']==1]['creatinine_phosphokinase'],data[data['DEATH_EVENT']==0]['creatinine_phosphokinase']],stacked=True,color = ['pink','red'],edgecolor='black',label=['Dead','Survived'])
ax[1].set_yticks([10,30,50,70,90,110,130,150,170,190,210,230,250])
ax[1].set_xticks([500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500])
ax[1].set_xlabel('creatinine phosphokinase')
ax[1].set_ylabel('Number of patients')
ax[1].set_title('creatinine phosphokinase levels and death event')
ax[1].legend()

In [None]:
plt.figure(figsize=(15,6))
sb.regplot(data['creatinine_phosphokinase'],data['DEATH_EVENT'],color='magenta')
plt.title('creatinine_phosphokinase levels and chance of death')

In [None]:
plt.figure(figsize=(15,6))
sb.regplot(data['creatinine_phosphokinase'],data['age'],color='magenta')
plt.title('creatinine_phosphokinase levels and age')

In [None]:
plt.figure(figsize=(15,6))
sb.regplot(data['creatinine_phosphokinase'],data['anaemia'],color='magenta')
plt.title('creatinine_phosphokinase levels and anaemia')

**Almost 84% people have creatinine phosphokinase levels in range of 500 units.from the regression plot it is clear that with increase in creatinine phosphokinase levels chance of death increases.
Another interesting observation in that creatinine phosphokinase levels decrease with increase of age. Thus at higher ages creatinine phosphokinase levels are less and are more in youger ages**

**Does having diabeties cause death due to heart failure?**

In [None]:
fig,ax = plt.subplots(1,2,figsize=(20,9))
ax[0].hist(data['diabetes'],color = 'grey',label = 'patients',edgecolor='black')
ax[0].set_xlabel('Diabetes')
ax[0].set_ylabel('Number of Patients')
ax[0].set_yticks([20,40,60,80,100,120,140,160,180,200])
ax[0].set_xticks([0,1])
ax[0].set_xticklabels(['Absent','Present'])
ax[0].legend()
ax[0].set_title('Diabetic Patients')
ax[1].hist(x = [data[data['DEATH_EVENT']==1]['diabetes'],data[data['DEATH_EVENT']==0]['diabetes']],color = ['green','grey'],stacked=True,edgecolor='black',label=['Dead','Survived'])
ax[1].set_xlabel('Diabetes')
ax[1].set_ylabel('Number of patients')
ax[1].set_yticks([20,40,60,60,80,100,120,140,160,180,200])
ax[1].set_xticks([0,1])
ax[1].set_xticklabels(['Absent','Present'])
ax[1].set_title('Dead and Survived patients by diabetic patients')
ax[1].legend()

In [None]:
plt.figure(figsize = (15,7))
sb.regplot(data['DEATH_EVENT'],data['diabetes'],color='green')

Out of 299 patients
* 173 do not have diabetes
* 126 have diabetes

* 32% patients who did not have diabetes died due to heart-failure
* 32% patients who had diabetes died due to heart-failure

**Thus presence or absence of diabetes doesnt result to death due to heart-failure. the patient having and the patient not having diabetes have equal chances of death due to diabetes. Thus no corelation exists which is also proved from the regression plot**

**Ejection fraction (EF) is a measurement, expressed as a percentage, of how much blood the left ventricle pumps out with each contraction**

In [None]:
fig,ax = plt.subplots(1,2,figsize=(20,9))
ax[0].hist(data['ejection_fraction'],color = 'tomato',label = 'patients',edgecolor='black')
ax[0].set_xlabel('Ejection Fraction')
ax[0].set_ylabel('Number of patients')
ax[0].set_yticks([20,40,60,80,100,120,125])
ax[0].legend()
ax[0].set_title('Ejection Fraction')
ax[1].hist(x = [data[data['DEATH_EVENT']==1]['ejection_fraction'],data[data['DEATH_EVENT']==0]['ejection_fraction']],color = ['pink','tomato'],stacked=True,edgecolor='black',label=['Dead','Survived'])
ax[1].set_xlabel('Ejection Fraction')
ax[1].set_ylabel('Number of patients')
ax[1].set_yticks([20,40,60,80,100,120,125])
ax[1].set_title('Dead and Survived patients by Ejection Fraction')
ax[1].legend()

In [None]:
plt.figure(figsize = (20,7))
sb.regplot(data['DEATH_EVENT'],data['ejection_fraction'],color='tomato')
plt.title('Ejection Fraction vs Death event')

**As ejection fraction increases death rate decreases**

**Does having high blood pressure cause death due to heart-failure?**

In [None]:
fig,ax = plt.subplots(1,2,figsize=(20,9))
ax[0].hist(data['high_blood_pressure'],color = 'yellow',label = 'patients',edgecolor='black')
ax[0].set_xlabel('High BP')
ax[0].set_ylabel('Number of Patients')
ax[0].set_yticks([20,40,60,80,100,120,140,160,180,200])
ax[0].set_xticks([0,1])
ax[0].set_xticklabels(['Absent','Present'])
ax[0].legend()
ax[0].set_title('High BP Patients')
ax[1].hist(x = [data[data['DEATH_EVENT']==1]['high_blood_pressure'],data[data['DEATH_EVENT']==0]['high_blood_pressure']],color = ['green','yellow'],stacked=True,edgecolor='black',label=['Dead','Survived'])
ax[1].set_xlabel('High BP')
ax[1].set_ylabel('Number of patients')
ax[1].set_yticks([20,40,60,60,80,100,120,140,160,180,200])
ax[1].set_xticks([0,1])
ax[1].set_xticklabels(['Absent','Present'])
ax[1].set_title('Dead and Survived patients By High BP')
ax[1].legend()


In [None]:
plt.figure(figsize = (20,7))
sb.regplot(data['high_blood_pressure'],data['DEATH_EVENT'],color='yellow')

* Approx 30% of patients not having high blood pressure died due to heart-failure
* Approx 38% of patients having high blood pressure died due to heart-failure

**With High Blood Pressure the chance of death due to heart-failure increases**

**How does platelet count affect death due to heart failure**

In [None]:
fig,ax = plt.subplots(1,2,figsize=(25,9))
ax[0].hist(data['platelets'],color = 'red',edgecolor='black')
ax[0].set_xlabel('Platelet Count')
ax[0].set_ylabel('Number of patients')
ax[0].set_xticks([100000,200000,300000,400000,500000,600000,700000,800000])
ax[0].set_yticks([20,40,60,80,100,120,140])
ax[0].legend()
ax[0].set_title('Platelet Count')
ax[1].hist(x = [data[data['DEATH_EVENT']==1]['platelets'],data[data['DEATH_EVENT']==0]['platelets']],color = ['white','red'],stacked=True,edgecolor='black',label=['Dead','Survived'])
ax[1].set_xlabel('Platelet Count')
ax[1].set_ylabel('Number of patients')
ax[1].set_yticks([20,40,60,80,100,120,140])
ax[1].set_xticks([100000,200000,300000,400000,500000,600000,700000,800000])
ax[1].set_title('Dead and Survived patients and platelet count')
ax[1].legend()

In [None]:
plt.figure(figsize = (20,7))
sb.regplot(data['platelets'],data['DEATH_EVENT'],color='red')

* The Platelet count in blood varies from 100,000 to 500,000
* As platelet count increases the chance of death due to heart-failure decreases

In [None]:
plt.figure(figsize = (20,7))
sb.regplot(data['platelets'],data['age'],color='red')

**Platelet count decreases as age increases**

**Does serum creatinine levels have a role in death due to heart-failure?**

*Creatinine is a waste product that comes from the normal wear and tear on muscles of the body.*

In [None]:
fig,ax = plt.subplots(1,2,figsize=(25,9))
ax[0].hist(data['serum_creatinine'],color = 'darkgreen',edgecolor='black')
ax[0].set_xlabel('serum creatinine level')
ax[0].set_ylabel('Number of patients')
ax[0].set_yticks([5,10,20,30,40,50,60,70,80,90,100,120,140,160,180,200,220])
ax[0].set_xticks([1,2,3,4,5,6,7,8,9])
ax[0].legend()
ax[0].set_title('Serum Creatinine level')
ax[1].hist(x = [data[data['DEATH_EVENT']==1]['serum_creatinine'],data[data['DEATH_EVENT']==0]['serum_creatinine']],color = ['white','darkgreen'],stacked=True,edgecolor='black',label=['Dead','Survived'])
ax[1].set_xlabel('Serum Creatinine level')
ax[1].set_ylabel('Number of patients')
ax[1].set_yticks([5,10,20,30,40,50,60,70,80,90,100,120,140,160,180,200,220])
ax[1].set_xticks([1,2,3,4,5,6,7,8,9])
ax[1].set_title('Dead and Survived patients and Serum Creatinine level')
ax[1].legend()

In [None]:
plt.figure(figsize = (20,7))
sb.regplot(data['serum_creatinine'],data['DEATH_EVENT'],color='red')

* 73% patients have creatinine levels at Level 1
* 16% patients have creatinine levels at Level 2
* 4% patients have creatinine levels at Level 3
* 2.5% patients have creatinine levels at Level 4
* 1.5% patients have creatinine levels at Level 5
* 0.5% patients have creatinine levels at Level 6
* 1% patients have creatinine levels at Level 7
* 0% patients have creatinine levels at Level 8
* 1.5% patients have creatinine levels at Level 9



* 18% at Level 1 died due to heart failure
* 54% at Level 2 died due to heart falure
* 83% at Level 3 died due to heart failure
* 66% at Level 4 died due to heart failure
* 100% at Level 5 died due to heart failure
* 50% at Level 6 died due to heart failure
* 100%  at Level 7 died due to heart failure
* 100%  at Level 9 died due to hear failure



**As serum creatinine level increases chances of death due to heart-failure increases**

**Does sodium level cause death due to heart-failure?**

In [None]:
fig,ax = plt.subplots(1,2,figsize=(25,9))
ax[0].hist(data['serum_sodium'],color = 'pink',edgecolor='black')
ax[0].set_xlabel('sodium level')
ax[0].set_ylabel('Number of patients')
ax[0].set_yticks([5,10,20,30,40,50,60,70,80,90,100,120,140,160,180,200,220])

ax[0].legend()
ax[0].set_title('Sodium level')
ax[1].hist(x = [data[data['DEATH_EVENT']==1]['serum_sodium'],data[data['DEATH_EVENT']==0]['serum_sodium']],color = ['magenta','pink'],stacked=True,edgecolor='black',label=['Dead','Survived'])
ax[1].set_xlabel('Sodium level')
ax[1].set_ylabel('Number of patients')
ax[1].set_yticks([5,10,20,30,40,50,60,70,80,90,100,120,140,160,180,200,220])

ax[1].set_title('Dead and Survived patients and Sodim level')
ax[1].legend()


In [None]:
plt.figure(figsize = (20,7))
sb.regplot(data['serum_sodium'],data['DEATH_EVENT'],color='red')
plt.title("Sodium Level vs Death event")

In [None]:
plt.figure(figsize = (20,7))
sb.regplot(data['serum_sodium'],data['ejection_fraction'],color='red')
plt.title("Sodium Level vs ejection Fraction")

In [None]:
plt.figure(figsize = (20,7))
sb.regplot(data['serum_sodium'],data['serum_creatinine'],color='red')
plt.title("Sodium vs creatinine")

**As sodium concentration increases chance of death due to heart-failure decreases. Also with increase in sodium concentration ejection fraction increases and as ejection fraction increases and creatine concentration decreases thus inturn death due to hearfailure also decreases**  

**Does Gender have a role to play in death due to heart-failure?**

In [None]:
fig,ax = plt.subplots(1,2,figsize=(20,9))
ax[0].hist(data['sex'],color = 'blue',label = 'patients',edgecolor='black')
ax[0].set_xlabel('Gender')
ax[0].set_ylabel('Number of Patients')
ax[0].set_yticks([20,40,60,80,100,120,140,160,180,200,220])
ax[0].set_xticks([0,1])
ax[0].set_xticklabels(['Female','Male'])
ax[0].legend()
ax[0].set_title('Gender Distribution')
ax[1].hist(x = [data[data['DEATH_EVENT']==1]['sex'],data[data['DEATH_EVENT']==0]['sex']],color = ['blue','black'],stacked=True,edgecolor='white',label=['Dead','Survived'])
ax[1].set_xlabel('Gender')
ax[1].set_ylabel('Number of patients')
ax[1].set_yticks([20,40,60,60,80,100,120,140,160,180,200,220])
ax[1].set_xticks([0,1])
ax[1].set_xticklabels(['Female','Male'])
ax[1].set_title('Dead and Survived patients By Gender')
ax[1].legend()


In [None]:
plt.figure(figsize = (20,7))
sb.regplot(data['sex'],data['DEATH_EVENT'],color='red')
plt.title("Gender vs Death event")

**Both Genders are equally likely to die due to heart-failure and it is proved from a straight line in regression plot**

**Does Smoking have an effect?**

In [None]:
fig,ax = plt.subplots(1,2,figsize=(20,9))
ax[0].hist(data['smoking'],color = 'limegreen',label = 'patients',edgecolor='green')
ax[0].set_xlabel('Smoking')
ax[0].set_ylabel('Number of Patients')
ax[0].set_yticks([20,40,60,80,100,120,140,160,180,200,220])
ax[0].set_xticks([0,1])
ax[0].set_xticklabels(['No','Yes'])
ax[0].legend()
ax[0].set_title('Smoking Distribution')
ax[1].hist(x = [data[data['DEATH_EVENT']==1]['smoking'],data[data['DEATH_EVENT']==0]['smoking']],color = ['red','green'],stacked=True,edgecolor='black',label=['Dead','Survived'])
ax[1].set_xlabel('Smoking')
ax[1].set_ylabel('Number of patients')
ax[1].set_yticks([20,40,60,60,80,100,120,140,160,180,200,220])
ax[1].set_xticks([0,1])
ax[1].set_xticklabels(['No','Yes'])
ax[1].set_title('Dead and Survived patients By whether they smoke')
ax[1].legend()


In [None]:
plt.figure(figsize = (20,7))
sb.regplot(data['smoking'],data['DEATH_EVENT'],color='red')
plt.title("Smoking vs Death event")

**Smoking doesnt Really affect death chance due to heart-failure but it does cause cancer and you can die of it.**

# **Model Selection**

In [None]:
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score,cross_val_predict
from sklearn.linear_model import LogisticRegression,SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,BaggingClassifier,AdaBoostClassifier
from sklearn.metrics import confusion_matrix,precision_score,recall_score,f1_score,accuracy_score

In [None]:
y = data['DEATH_EVENT']
data.drop('DEATH_EVENT',1,inplace=True)
X = data



In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)

**LogisticReggression**

In [None]:
clf1 = LogisticRegression(C=.1)
clf1.fit(X_train,y_train)

In [None]:
y_train_pred = cross_val_predict(clf1,X_train,y_train,cv=4)
print("Confusion Matrix: \n",confusion_matrix(y_train,y_train_pred))
print('Precision Score:',precision_score(y_train,y_train_pred))
print("Recall Score:",recall_score(y_train,y_train_pred))
print("Accuracy Score:",accuracy_score(y_train,y_train_pred))
print("Cross Val Score Insample",cross_val_score(clf1,X_train,y_train,cv=4,scoring='accuracy').mean())
print("Cross Val Score Outsample",cross_val_score(clf1,X_test,y_test,cv=4,scoring='accuracy').mean())

SGD Classifier

In [None]:
clf2 = SGDClassifier()
clf2.fit(X_train,y_train)

In [None]:
y_train_pred = cross_val_predict(clf2,X_train,y_train,cv=4)
print("Confusion Matrix: \n",confusion_matrix(y_train,y_train_pred))
print('Precision Score:',precision_score(y_train,y_train_pred))
print("Recall Score:",recall_score(y_train,y_train_pred))
print("Accuracy Score:",accuracy_score(y_train,y_train_pred))
print("Cross Val Score Insample",cross_val_score(clf2,X_train,y_train,cv=4,scoring='accuracy').mean())
print("Cross Val Score Outsample",cross_val_score(clf2,X_test,y_test,cv=4,scoring='accuracy').mean())

KNeighbors Classifier

In [None]:
param_grid = {'n_neighbors':np.arange(1,6)}
grid_knn = GridSearchCV(KNeighborsClassifier(),param_grid,cv=5)
grid_knn.fit(X,y)

In [None]:
grid_knn.best_params_

In [None]:
clf3 = grid_knn.best_estimator_
clf3.fit(X_train,y_train)

In [None]:
y_train_pred = cross_val_predict(clf3,X_train,y_train,cv=4)
print("Confusion Matrix: \n",confusion_matrix(y_train,y_train_pred))
print('Precision Score:',precision_score(y_train,y_train_pred))
print("Recall Score:",recall_score(y_train,y_train_pred))
print("Accuracy Score:",accuracy_score(y_train,y_train_pred))
print("Cross Val Score Insample",cross_val_score(clf3,X_train,y_train,cv=4,scoring='accuracy').mean())
print("Cross Val Score Outsample",cross_val_score(clf3,X_test,y_test,cv=4,scoring='accuracy').mean())

**DecisionTreeClassifier**

In [None]:
param_grid = {'max_depth':np.arange(1,5),'min_samples_leaf':np.arange(1,4)}
grid_tree = GridSearchCV(DecisionTreeClassifier(),param_grid,cv=5)
grid_tree.fit(X,y)

In [None]:
grid_tree.best_params_

In [None]:
clf4 = grid_tree.best_estimator_
clf4.fit(X_train,y_train)

In [None]:
y_train_pred = cross_val_predict(clf4,X_train,y_train,cv=4)
print("Confusion Matrix: \n",confusion_matrix(y_train,y_train_pred))
print('Precision Score:',precision_score(y_train,y_train_pred))
print("Recall Score:",recall_score(y_train,y_train_pred))
print("Accuracy Score:",accuracy_score(y_train,y_train_pred))
print("Cross Val Score Insample",cross_val_score(clf4,X_train,y_train,cv=4,scoring='accuracy').mean())
print("Cross Val Score Outsample",cross_val_score(clf4,X_test,y_test,cv=4,scoring='accuracy').mean())

RandomForestClassifier

In [None]:
clf5 = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=3, max_features='auto',
                       max_leaf_nodes=2, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=2, min_samples_split=15,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=-1, random_state=1, verbose=0,
                       warm_start=False)
clf5.fit(X_train,y_train)

In [None]:
y_train_pred = cross_val_predict(clf5,X_train,y_train,cv=3)
print("Confusion Matrix: \n",confusion_matrix(y_train,y_train_pred))
print('Precision Score:',precision_score(y_train,y_train_pred))
print("Recall Score:",recall_score(y_train,y_train_pred))
print("Accuracy Score:",accuracy_score(y_train,y_train_pred))
print("Cross Val Score Insample",cross_val_score(clf5,X_train,y_train,cv=4,scoring='accuracy').mean())
print("Cross Val Score Outsample",cross_val_score(clf5,X_test,y_test,cv=4,scoring='accuracy').mean())

ADABOOST Classifier

In [None]:
clf6 = AdaBoostClassifier(DecisionTreeClassifier(max_depth = 3,min_samples_leaf=3),n_estimators = 1500)
clf6.fit(X_train,y_train)

In [None]:
y_train_pred = cross_val_predict(clf6,X_train,y_train,cv=3)
print("Confusion Matrix: \n",confusion_matrix(y_train,y_train_pred))
print('Precision Score:',precision_score(y_train,y_train_pred))
print("Recall Score:",recall_score(y_train,y_train_pred))
print("Accuracy Score:",accuracy_score(y_train,y_train_pred))
print("Cross Val Score Insample",cross_val_score(clf6,X_train,y_train,cv=4,scoring='accuracy').mean())
print("Cross Val Score Outsample",cross_val_score(clf6,X_test,y_test,cv=4,scoring='accuracy').mean())

# **Model Development**

In [None]:
classifier = DecisionTreeClassifier(max_depth=3,min_samples_leaf=3)
classifier.fit(X,y)

In [None]:
#Patient = [[age,anaemia ,creatinine_phosphokinase ,diabetes ,ejection_fraction ,high_blood_pressure ,platelets ,serum_creatinine ,serum_sodium ,sex,smoking]]
patient  = [[ 76,1,653,0,40,1,26500,1.6,128,1,0]]
death_chance = classifier.predict(patient)
if death_chance == 1:
    print("There are chances of death due to heart-failure")
else:
    print("Less Chance of Death due to heart-failure")

# **Exploring how good is MultiLayer Perceptrons compared to a Machine Learning Model**

In [None]:
from tensorflow import keras
from keras.models import Model
from keras.layers import Dense,Dropout,Input
from keras.utils import to_categorical,plot_model
from sklearn.preprocessing import StandardScaler

In [None]:
sc = StandardScaler()
num_labels = len(np.unique(y_train))
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
input_shape = X_train.shape[1]
inputs = Input(shape = input_shape)
x = inputs
x = Dense(64,activation = 'relu')(x)
x = Dropout(0.6)(x)
x = Dense(128,activation = 'relu')(x)
x = Dropout(0.6)(x)
x = Dense(256,activation = 'relu')(x)
x = Dropout(0.6)(x)
x = Dense(512,activation = 'relu')(x)
x = Dropout(0.6)(x)
x = Dense(512,activation = 'relu')(x)
x = Dropout(0.6)(x)
x = Dense(10,activation = 'relu')(x)
x = Dense(8,activation = 'relu')(x)

outputs = Dense(2,activation = 'softmax')(x)
model = Model(inputs,outputs)
model.summary()
plot_model(model)

In [None]:
model.compile(optimizer = 'rmsprop',loss = 'binary_crossentropy',metrics = ['accuracy'])

In [None]:
model.fit(X_train,y_train,epochs = 100,batch_size = 10)

In [None]:
loss,acc = model.evaluate(X_test,y_test,batch_size = 10)