In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
import numpy as np

In [2]:
data = pd.read_csv('heart_failure.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 15 columns):
Unnamed: 0                  299 non-null int64
age                         299 non-null float64
anaemia                     299 non-null object
creatinine_phosphokinase    299 non-null int64
diabetes                    299 non-null object
ejection_fraction           299 non-null int64
high_blood_pressure         299 non-null object
platelets                   299 non-null float64
serum_creatinine            299 non-null float64
serum_sodium                299 non-null int64
sex                         299 non-null object
smoking                     299 non-null object
time                        299 non-null int64
DEATH_EVENT                 299 non-null int64
death_event                 299 non-null object
dtypes: float64(3), int64(6), object(6)
memory usage: 35.2+ KB


In [3]:
print(Counter(data.death_event))

Counter({'no': 203, 'yes': 96})


In [4]:
y = data.death_event
x = data[['age','anaemia','creatinine_phosphokinase','diabetes','ejection_fraction','high_blood_pressure','platelets','serum_creatinine','serum_sodium','sex','smoking','time']]

In [5]:
x

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time
0,75.0,no,582,no,20,yes,265000.00,1.9,130,yes,no,4
1,55.0,no,7861,no,38,no,263358.03,1.1,136,yes,no,6
2,65.0,no,146,no,20,no,162000.00,1.3,129,yes,yes,7
3,50.0,yes,111,no,20,no,210000.00,1.9,137,yes,no,7
4,65.0,yes,160,yes,20,no,327000.00,2.7,116,no,no,8
...,...,...,...,...,...,...,...,...,...,...,...,...
294,62.0,no,61,yes,38,yes,155000.00,1.1,143,yes,yes,270
295,55.0,no,1820,no,38,no,270000.00,1.2,139,no,no,271
296,45.0,no,2060,yes,60,no,742000.00,0.8,138,no,no,278
297,45.0,no,2413,no,38,no,140000.00,1.4,140,yes,yes,280


In [6]:
x = pd.get_dummies(x)
x

Unnamed: 0,age,creatinine_phosphokinase,ejection_fraction,platelets,serum_creatinine,serum_sodium,time,anaemia_no,anaemia_yes,diabetes_no,diabetes_yes,high_blood_pressure_no,high_blood_pressure_yes,sex_no,sex_yes,smoking_no,smoking_yes
0,75.0,582,20,265000.00,1.9,130,4,1,0,1,0,0,1,0,1,1,0
1,55.0,7861,38,263358.03,1.1,136,6,1,0,1,0,1,0,0,1,1,0
2,65.0,146,20,162000.00,1.3,129,7,1,0,1,0,1,0,0,1,0,1
3,50.0,111,20,210000.00,1.9,137,7,0,1,1,0,1,0,0,1,1,0
4,65.0,160,20,327000.00,2.7,116,8,0,1,0,1,1,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,62.0,61,38,155000.00,1.1,143,270,1,0,0,1,0,1,0,1,0,1
295,55.0,1820,38,270000.00,1.2,139,271,1,0,1,0,1,0,1,0,1,0
296,45.0,2060,60,742000.00,0.8,138,278,1,0,0,1,1,0,1,0,1,0
297,45.0,2413,38,140000.00,1.4,140,280,1,0,1,0,1,0,0,1,0,1


## Spliting Dataset for training

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2)

In [8]:
ct = ColumnTransformer([('numeric', StandardScaler(), ['age','creatinine_phosphokinase','ejection_fraction','platelets','serum_creatinine','serum_sodium','time'])])

In [9]:
X_train = ct.fit_transform(X_train)
X_test = ct.transform(X_test)

## Preparing labels for Classification

In [13]:
le = LabelEncoder()
Y_train = le.fit_transform(Y_train.astype(str))
Y_test = le.fit_transform(Y_test.astype(str))

In [14]:
Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)

### Model Design

In [15]:
model =  Sequential()

In [16]:
model.add(InputLayer(input_shape = (X_train.shape[1], )))
model.add(Dense(12, activation = 'relu'))
model.add(Dense(2, activation = 'softmax'))

In [17]:
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

## Train and evaluate the model

In [18]:
model.fit(X_train, Y_train, epochs = 100, batch_size = 16, verbose = 1)

Train on 239 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x1faa8e5e948>

In [19]:
loss, acc = model.evaluate(X_test, Y_test, verbose = 0)
loss

0.33551783859729767

In [20]:
acc

0.8333333

## Generating a classification report`

In [21]:
y_estimate = model.predict(X_test, verbose = 0)
y_estimate = np.argmax(y_estimate, axis = 1)
y_estimate

array([1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0], dtype=int64)

In [22]:
y_true = np.argmax(Y_test, axis = 1)
y_true

array([0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0], dtype=int64)

In [24]:
print(classification_report(y_true, y_estimate))

              precision    recall  f1-score   support

           0       0.89      0.89      0.89        44
           1       0.69      0.69      0.69        16

    accuracy                           0.83        60
   macro avg       0.79      0.79      0.79        60
weighted avg       0.83      0.83      0.83        60

