# Cardiovascular Failure Prediction

In [200]:
import pandas as pd
import numpy as np
from collections import Counter

from sklearn.preprocessing import StandardScaler, LabelEncoder, Normalizer
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense
from tensorflow.keras.utils import to_categorical

In [201]:
#dataset is from kaggle 
#https://www.kaggle.com/datasets/andrewmvd/heart-failure-clinical-data
#CVD (Cardiovascular disease) is number one cause of death in the world
#ML model could help detect cardiovascular failure earlier for those 
#with higher risks or factors such as hypertension or diabetes

#### Loading Dataset

In [202]:
data = pd.read_csv("heart_failure_clinical_records_dataset.csv")
data.info()
data.head(10)
data = data.drop(["sl"], axis=1)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   sl                        299 non-null    int64  
 1   age                       299 non-null    float64
 2   anaemia                   299 non-null    object 
 3   creatinine_phosphokinase  299 non-null    int64  
 4   diabetes                  299 non-null    object 
 5   ejection_fraction         299 non-null    int64  
 6   high_blood_pressure       299 non-null    object 
 7   platelets                 299 non-null    float64
 8   serum_creatinine          299 non-null    float64
 9   serum_sodium              299 non-null    int64  
 10  sex                       299 non-null    object 
 11  smoking                   299 non-null    object 
 12  time                      299 non-null    int64  
 13  DEATH_EVENT               299 non-null    int64  
 14  death_even

In [204]:
data.head(10)
Counter(data["death_event"])
#this gives the number of labels with yes and no

Counter({'yes': 96, 'no': 203})

In [205]:
#selecting the features and labels
x = data[['age','anaemia','creatinine_phosphokinase','diabetes','ejection_fraction','high_blood_pressure','platelets','serum_creatinine','serum_sodium','sex','smoking','time']]
# x.columns
y = data["death_event"]

#### Data Preprocessing

In [236]:
x = pd.get_dummies(x)
x.columns

Index(['age', 'creatinine_phosphokinase', 'ejection_fraction', 'platelets',
       'serum_creatinine', 'serum_sodium', 'time', 'anaemia_no', 'anaemia_yes',
       'diabetes_no', 'diabetes_yes', 'high_blood_pressure_no',
       'high_blood_pressure_yes', 'sex_no', 'sex_yes', 'smoking_no',
       'smoking_yes'],
      dtype='object')

In [237]:
train_set, test_set, train_label, test_label = train_test_split(x, y, test_size=0.3, random_state=0)

In [238]:
#normalisation or standardisation
numerical_columns = ['age','creatinine_phosphokinase','ejection_fraction','platelets','serum_creatinine','serum_sodium','time']

In [239]:
ct = ColumnTransformer([('normalize', Normalizer(), numerical_columns)], remainder="passthrough")

In [240]:
train_normalised = ct.fit_transform(train_set)
test_normalised = ct.transform(test_set)

In [241]:
#this is a normalised dataframe
train_normalised_df = pd.DataFrame(train_normalised, columns=x.columns)
train_normalised_df

Unnamed: 0,age,creatinine_phosphokinase,ejection_fraction,platelets,serum_creatinine,serum_sodium,time,anaemia_no,anaemia_yes,diabetes_no,diabetes_yes,high_blood_pressure_no,high_blood_pressure_yes,sex_no,sex_yes,smoking_no,smoking_yes
0,0.001372,0.011411,0.000784,0.999918,0.000053,0.002666,0.004902,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
1,0.000309,0.003031,0.000309,0.999995,0.000006,0.000732,0.000170,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0
2,0.000261,0.002337,0.000120,0.999997,0.000005,0.000546,0.000851,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0
3,0.000145,0.000887,0.000164,0.999999,0.000003,0.000509,0.000633,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
4,0.000272,0.000625,0.000245,0.999999,0.000005,0.000728,0.000641,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
204,0.000238,0.002476,0.000152,0.999996,0.000003,0.000619,0.000931,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0
205,0.000192,0.000465,0.000154,1.000000,0.000003,0.000500,0.000673,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0
206,0.000168,0.000201,0.000118,1.000000,0.000006,0.000272,0.000185,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
207,0.000133,0.001290,0.000084,0.999999,0.000001,0.000306,0.000089,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0


#### Prepare Label for classification

In [242]:
#initialisation of Label Encoder which converts labels into vectors
le = LabelEncoder()

In [243]:
#finding the vectors of the labels (yes or no is converted to 0 or 1)
train_label = le.fit_transform(train_label)
test_label = le.transform(test_label)

In [244]:
#convert encoded training labels into binary vector
train_label= to_categorical(train_label)

In [245]:
#converting test labels into binary vector
test_label = to_categorical(test_label)

#### Building Model

In [246]:
model = Sequential()

In [247]:
#input layer
input = InputLayer(input_shape = (x.shape[1], ))
model.add(input)
#shorter version model.add(InputLayer(input_shape = (x.shape[1],)))

In [248]:
#hidden layer
model.add(Dense(12, activation="relu"))

In [249]:
#output layer
#number of neurons depend on the number of classes in the dataset(labels)
#we have class of yes or no, so use two neurons in the output
model.add(Dense(2, activation="softmax"))


#### Compile & Evaluate

In [250]:
#compiling
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [251]:
#evaluating
model.fit(train_normalised, train_label, epochs=100, batch_size=16, verbose=1)


Epoch 1/100


2022-08-17 16:53:25.368175: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x10a708a30>

In [252]:
loss, accuracy = model.evaluate(test_normalised, test_label, verbose=0)

2022-08-17 16:55:11.672765: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [253]:
loss, accuracy

(0.6470140814781189, 0.6666666865348816)

#### Generating a classification report

In [261]:
y_estimate = model.predict(test_normalised)



In [262]:
#convert into a np array using argmax
y_estimate =  np.argmax(y_estimate, axis=1)

In [263]:
y_true = np.argmax(test_label, axis=1)

In [265]:
report = classification_report(y_true, y_estimate)

In [266]:
report.split("\n")

['              precision    recall  f1-score   support',
 '',
 '           0       0.70      0.92      0.79        62',
 '           1       0.38      0.11      0.17        28',
 '',
 '    accuracy                           0.67        90',
 '   macro avg       0.54      0.51      0.48        90',
 'weighted avg       0.60      0.67      0.60        90',
 '']