In [26]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
import numpy as np

# Loading the Data

In [27]:
data = pd.read_csv('heart_failure.csv')

data.head()

Unnamed: 0.1,Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT,death_event
0,0,75.0,no,582,no,20,yes,265000.0,1.9,130,yes,no,4,1,yes
1,1,55.0,no,7861,no,38,no,263358.03,1.1,136,yes,no,6,1,yes
2,2,65.0,no,146,no,20,no,162000.0,1.3,129,yes,yes,7,1,yes
3,3,50.0,yes,111,no,20,no,210000.0,1.9,137,yes,no,7,1,yes
4,4,65.0,yes,160,yes,20,no,327000.0,2.7,116,no,no,8,1,yes


In [28]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0                299 non-null    int64  
 1   age                       299 non-null    float64
 2   anaemia                   299 non-null    object 
 3   creatinine_phosphokinase  299 non-null    int64  
 4   diabetes                  299 non-null    object 
 5   ejection_fraction         299 non-null    int64  
 6   high_blood_pressure       299 non-null    object 
 7   platelets                 299 non-null    float64
 8   serum_creatinine          299 non-null    float64
 9   serum_sodium              299 non-null    int64  
 10  sex                       299 non-null    object 
 11  smoking                   299 non-null    object 
 12  time                      299 non-null    int64  
 13  DEATH_EVENT               299 non-null    int64  
 14  death_even

In [29]:
Counter(data['death_event']) # printing the class distribution in the 'death_event' column

Counter({'no': 203, 'yes': 96})

In [30]:
y = data['death_event']

x = data[['age','anaemia','creatinine_phosphokinase','diabetes','ejection_fraction','high_blood_pressure','platelets','serum_creatinine','serum_sodium','sex','smoking','time']]


# Data preprocessing

In [31]:
# converting categorical data to numerical data
x = pd.get_dummies(x)

# splitting the data
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.2, random_state = 15)

In [32]:
# scaling (standardizing) the numeric features in the dataset
ct = ColumnTransformer([('numeric', StandardScaler(), ['age','creatinine_phosphokinase','ejection_fraction','platelets','serum_creatinine','serum_sodium','time'])], remainder='passthrough')
X_train = ct.fit_transform(X_train) 
X_test = ct.transform(X_test)


# Prepare labels for classification

In [35]:
# encoding the labels
le = LabelEncoder()
Y_train = le.fit_transform(Y_train.astype(str))
Y_test = le.transform(Y_test.astype(str))

# transforming the encoded training and test labels into a binary vector
Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)


# Design the model

In [36]:
# initializing the model
model = Sequential()

# creating an input layer and adding it to the model
input = InputLayer(input_shape=(x.shape[1],)) 
model.add(input)

# creating a hidden layer
model.add(Dense(12, activation = 'relu'))

# creating an output layer
model.add(Dense(2, activation='softmax'))

# compiling the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train and evaluate the model

In [38]:
model.fit(X_train, Y_train, epochs=100, batch_size=16, verbose=1) #fitting the model

loss, acc = model.evaluate(X_test, Y_test, verbose=0)

print("Loss:", loss, "Accuracy:", acc)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

# Generating a classification report

In [40]:
y_estimate = model.predict(X_test, verbose=0)

y_estimate = np.argmax(y_estimate, axis=1)

y_true = np.argmax(Y_test, axis=1)

print(classification_report(y_true, y_estimate))

              precision    recall  f1-score   support

           0       0.84      0.88      0.86        43
           1       0.67      0.59      0.62        17

    accuracy                           0.80        60
   macro avg       0.76      0.74      0.74        60
weighted avg       0.79      0.80      0.80        60

