<a href="https://colab.research.google.com/github/sergiobaezlugo/Neural_Networks_Classification_Projects/blob/main/heart_failure.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [110]:
# Loading of Data

import pandas as pd
import numpy as np

url = 'https://raw.githubusercontent.com/sergiobaezlugo/Neural_Networks_Classification_Projects/main/heart_failure.csv'

dataset = pd.read_csv(url)

dataset

Unnamed: 0.1,Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT,death_event
0,0,75.0,no,582,no,20,yes,265000.00,1.9,130,yes,no,4,1,yes
1,1,55.0,no,7861,no,38,no,263358.03,1.1,136,yes,no,6,1,yes
2,2,65.0,no,146,no,20,no,162000.00,1.3,129,yes,yes,7,1,yes
3,3,50.0,yes,111,no,20,no,210000.00,1.9,137,yes,no,7,1,yes
4,4,65.0,yes,160,yes,20,no,327000.00,2.7,116,no,no,8,1,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,294,62.0,no,61,yes,38,yes,155000.00,1.1,143,yes,yes,270,0,no
295,295,55.0,no,1820,no,38,no,270000.00,1.2,139,no,no,271,0,no
296,296,45.0,no,2060,yes,60,no,742000.00,0.8,138,no,no,278,0,no
297,297,45.0,no,2413,no,38,no,140000.00,1.4,140,yes,yes,280,0,no


In [111]:
# Overview of data

dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0                299 non-null    int64  
 1   age                       299 non-null    float64
 2   anaemia                   299 non-null    object 
 3   creatinine_phosphokinase  299 non-null    int64  
 4   diabetes                  299 non-null    object 
 5   ejection_fraction         299 non-null    int64  
 6   high_blood_pressure       299 non-null    object 
 7   platelets                 299 non-null    float64
 8   serum_creatinine          299 non-null    float64
 9   serum_sodium              299 non-null    int64  
 10  sex                       299 non-null    object 
 11  smoking                   299 non-null    object 
 12  time                      299 non-null    int64  
 13  DEATH_EVENT               299 non-null    int64  
 14  death_even

In [112]:
# We seek to predict the 'death_event' column, from all the other columns with the exception of the 'Unnamed: 0'and 'DEATH_EVENT' columns.

y = dataset['death_event']
X = dataset.drop(['Unnamed: 0',"DEATH_EVENT","death_event"], axis=1)

In [113]:
# Checking distribution of y.

from collections import Counter

Counter(y)

Counter({'yes': 96, 'no': 203})

In [114]:
# Data Preprocessing

# Converting categorical features to numerical data
X = pd.get_dummies(X)

# Splitting dataset into training and test datasets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state=5)



from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer

# Scaling features

ct = ColumnTransformer([('numeric', StandardScaler(), ['age','creatinine_phosphokinase','ejection_fraction','platelets','serum_creatinine','serum_sodium','time'])],  remainder='passthrough')
X_train_scaled = ct.fit_transform(X_train)
X_test_scaled = ct.transform(X_test)

# Encoding labels

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)


In [117]:
# NN architecture and hyperparameter set up

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from sklearn.metrics import classification_report

model = Sequential()
inputs = InputLayer(input_shape=(X_train.shape[1],))
model.add(inputs)
model.add(Dense(12, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [118]:
# Fitting model

model.fit(X_train, y_train, epochs=100, batch_size=16, verbose=0)

<keras.src.callbacks.History at 0x7c0f7b197910>

In [120]:
# Model performance on test set (loss and accuracy)

loss, acc = model.evaluate(X_test, y_test, verbose=1)
print("Loss", loss, "Accuracy:", acc)


Loss 9.939581871032715 Accuracy: 0.7666666507720947


In [121]:
# Model F1 score on test set

y_predictions = model.predict(X_test)
y_predictions = np.argmax(y_predictions, axis=1)

counter = 0

for i in range(len(y_test)):
  if y_test[i] == y_predictions[i]:
    counter += 1

print("F1 score is:", counter/len(y_test))



F1 score is: 0.7666666666666667
