In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# data visualization
import matplotlib.pyplot as plt
import seaborn as sns 

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#Load dataset
data = pd.read_csv('/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')

In [None]:
input_binary_variable = ['anaemia','diabetes','high_blood_pressure','sex','smoking']
input_continous_variable = ['age','creatinine_phosphokinase','ejection_fraction','platelets','serum_creatinine','serum_sodium','time']
output_variable = 'DEATH_EVENT'

## Normalization

In [None]:
from sklearn import preprocessing

x = data.values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
norm_data = pd.DataFrame(x_scaled, columns=data.columns)

## Features distributions

In [None]:
ax = sns.violinplot(x="variable", y="value", hue="DEATH_EVENT",
                   data=pd.melt(norm_data,id_vars='DEATH_EVENT'), split=True, linewidth=1,inner="quart",
                    palette={1: "b", 0: ".85"})
ax.set_ylim([-0.5,1.5])
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)

... discuss the follow up time ...

In [None]:
features_no_time = input_binary_variable+input_continous_variable
features_no_time.remove('time')
features_no_time

In [None]:
features_with_time = input_binary_variable+input_continous_variable
features_with_time

## Utilities

In [None]:
import tensorflow as tf
tfkl = tf.keras.layers

In [None]:
#FUNCTION TO PLOT THE TRAINING
def plot_training(fit, evaluation):
    best_epoch = fit.epoch[fit.history['val_loss'].index(min(fit.history['val_loss']))]
    fig, ax = plt.subplots(2,1,figsize=(3,5))
    
    ax[0].plot(fit.epoch,fit.history['val_loss'],'.-',color='red', label='validation')
    ax[0].plot(fit.epoch,fit.history['loss'],'.-',color='orange', label='train')
    ax[0].set(ylabel='Loss',ylim=[0,1])
    ax[0].axvspan(best_epoch-0.5,best_epoch+0.5, alpha=0.5, color='red')
    #ax[0].autoscale(False)
    ax[0].scatter(best_epoch, evaluation[0],s=2, zorder=1,color='green')
    ax[0].legend()
    
    ax[1].plot(fit.epoch,fit.history['val_accuracy'],'.-',color='red', label='validation')
    ax[1].plot(fit.epoch,fit.history['accuracy'],'.-',color='orange', label='train')
    ax[1].set(ylabel='Accuracy',ylim=[0,1])
    ax[1].axvspan(best_epoch-0.5,best_epoch+0.5, alpha=0.5, color='red')
    #ax[1].autoscale(False)
    ax[1].scatter(best_epoch, evaluation[1],s=2, zorder=1,color='green')
    ax[1].legend()
    plt.show()
    print("[Best epoch]:", best_epoch)
    print("[Loss]:", min(fit.history['val_loss']), " test:", evaluation[0])
    print("[Accuracy]:", max(fit.history['val_accuracy']), " test:", evaluation[1])
    


# Without follow-up time

In [None]:
input_array = norm_data[features_no_time].to_numpy()[:,:,np.newaxis]
output_array = norm_data[output_variable].to_numpy()[:,np.newaxis]
print(input_array.shape)
print(output_array.shape)

In [None]:
BATCH_SIZE = 1
DATASET_SIZE = input_array.shape[0]
base_depth = 128
conv_filters = 512
dropout_prob = 0.4
activation_func = tf.nn.leaky_relu

In [None]:
train_size = int(0.6 * DATASET_SIZE)//BATCH_SIZE
val_size = int(0.2 * DATASET_SIZE)//BATCH_SIZE
test_size = int(0.2 * DATASET_SIZE)//BATCH_SIZE

dataset = tf.data.Dataset.from_tensor_slices( (input_array,output_array) ).shuffle(1000).batch(BATCH_SIZE)
train_data = dataset.take(train_size)
test_data = dataset.skip(train_size)
valid_data = test_data.skip(test_size)
test_data = test_data.take(test_size)

print("\n[Train size]:",len(list(train_data)),"\n[Valid size]:", len(list(valid_data)),"\n[Test size]:", len(list(test_data)))

In [None]:
HFmodel_no_time = tf.keras.Sequential([
    tf.keras.Input(shape=(len(features_no_time),1,)),
    tfkl.Conv1D(filters=conv_filters,kernel_size=11, strides=2),
    tfkl.Dropout(dropout_prob),
    tfkl.Dense(base_depth,activation=activation_func),
    tfkl.Dense(base_depth,activation=activation_func),
    tfkl.Dense(base_depth,activation=activation_func),
    tfkl.Dropout(dropout_prob),
    tfkl.Dense(1,activation=tf.nn.sigmoid)
], name="heart_failure_model_notime")

HFmodel_no_time.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
fit = HFmodel_no_time.fit(train_data, epochs=400, validation_data=valid_data,
                    batch_size=BATCH_SIZE, verbose=False,
                    callbacks=[tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.000001),
                               tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.0, patience=100, verbose=1, mode='auto', restore_best_weights=True)])

evaluation = HFmodel_no_time.evaluate(test_data)
plot_training(fit, evaluation)

## Randomization 

In [None]:
evaluations_no_time = []
for i in range(30):
    
    dataset = tf.data.Dataset.from_tensor_slices( (input_array,output_array) ).shuffle(300).batch(BATCH_SIZE)
    train_data = dataset.take(train_size)
    test_data = dataset.skip(train_size)
    valid_data = test_data.skip(test_size)
    test_data = test_data.take(test_size)
    
    tf.keras.backend.clear_session()
    
    HFmodel_no_time = tf.keras.Sequential([
        tf.keras.Input(shape=(len(features_no_time),1,)),
        tfkl.Conv1D(filters=conv_filters,kernel_size=11, strides=2),
        tfkl.Dropout(dropout_prob),
        tfkl.Dense(base_depth,activation=activation_func),
        tfkl.Dense(base_depth,activation=activation_func),
        tfkl.Dense(base_depth,activation=activation_func),
        tfkl.Dropout(dropout_prob),
        tfkl.Dense(1,activation=tf.nn.sigmoid)
    ], name="heart_failure_model_notime")
    
    HFmodel_no_time.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy',metrics=['accuracy'])
    
    fit = HFmodel_no_time.fit(train_data, epochs=400, validation_data=valid_data,
                        batch_size=BATCH_SIZE, verbose=False,
                        callbacks=[tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.000001),
                                   tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.0, patience=100, verbose=1, mode='auto', restore_best_weights=True)])

    accuracy = HFmodel_no_time.evaluate(test_data)[1]
    evaluations_no_time.append(accuracy)


In [None]:
ax = sns.histplot(evaluations_no_time)
ax.set(xlim=(0,1))
ax.set(xlabel='Accuracy')
np.mean(evaluations_no_time)

# With follow-up time

In [None]:
input_array = norm_data[features_with_time].to_numpy()[:,:,np.newaxis]
output_array = norm_data[output_variable].to_numpy()[:,np.newaxis]
print(input_array.shape)
print(output_array.shape)

In [None]:
train_size = int(0.6 * DATASET_SIZE)//BATCH_SIZE
val_size = int(0.2 * DATASET_SIZE)//BATCH_SIZE
test_size = int(0.2 * DATASET_SIZE)//BATCH_SIZE

dataset = tf.data.Dataset.from_tensor_slices( (input_array,output_array) ).shuffle(1000).batch(BATCH_SIZE)
train_data = dataset.take(train_size)
test_data = dataset.skip(train_size)
valid_data = test_data.skip(test_size)
test_data = test_data.take(test_size)

print("\n[Train size]:",len(list(train_data)),"\n[Valid size]:", len(list(valid_data)),"\n[Test size]:", len(list(test_data)))

In [None]:
HFmodel_with_time = tf.keras.Sequential([
    tf.keras.Input(shape=(len(features_with_time),1,)),
    tfkl.Conv1D(filters=conv_filters,kernel_size=12, strides=2),
    tfkl.Dropout(dropout_prob),
    tfkl.Dense(base_depth,activation=activation_func),
    tfkl.Dense(base_depth,activation=activation_func),
    tfkl.Dense(base_depth,activation=activation_func),
    tfkl.Dropout(dropout_prob),
    tfkl.Dense(1,activation=tf.nn.sigmoid)
], name="heart_failure_model_time")

HFmodel_with_time.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
fit = HFmodel_with_time.fit(train_data, epochs=400, validation_data=valid_data,
                    batch_size=BATCH_SIZE, verbose=False,
                    callbacks=[tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.000001),
                               tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.0, patience=100, verbose=1, mode='auto', restore_best_weights=True)])

evaluation = HFmodel_with_time.evaluate(test_data)
plot_training(fit, evaluation)

## Randomization 

In [None]:
evaluations_with_time = []
for i in range(30):
    
    dataset = tf.data.Dataset.from_tensor_slices( (input_array,output_array) ).shuffle(300).batch(BATCH_SIZE)
    train_data = dataset.take(train_size)
    test_data = dataset.skip(train_size)
    valid_data = test_data.skip(test_size)
    test_data = test_data.take(test_size)
    
    tf.keras.backend.clear_session()
    
    HFmodel_with_time = tf.keras.Sequential([
        tf.keras.Input(shape=(len(features_with_time),1,)),
        tfkl.Conv1D(filters=conv_filters,kernel_size=12, strides=2),
        tfkl.Dropout(dropout_prob),
        tfkl.Dense(base_depth,activation=activation_func),
        tfkl.Dense(base_depth,activation=activation_func),
        tfkl.Dense(base_depth,activation=activation_func),
        tfkl.Dropout(dropout_prob),
        tfkl.Dense(1,activation=tf.nn.sigmoid)
    ], name="heart_failure_model_time")
    
    HFmodel_with_time.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy',metrics=['accuracy'])
    
    fit = HFmodel_with_time.fit(train_data, epochs=400, validation_data=valid_data,
                        batch_size=BATCH_SIZE, verbose=False,
                        callbacks=[tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.000001),
                                   tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.0, patience=100, verbose=1, mode='auto', restore_best_weights=True)])

    accuracy = HFmodel_with_time.evaluate(test_data)[1]
    evaluations_with_time.append(accuracy)


In [None]:
ax = sns.histplot(evaluations_with_time)
ax.set(xlim=(0,1))
ax.set(xlabel='Accuracy')
np.mean(evaluations_with_time)

# Conclusion

In [None]:
df = pd.DataFrame({'no follow-up time':evaluations_with_time,'with follow-up time':evaluations_with_time})

In [None]:
ax = sns.boxplot(x='variable',y='value', data=df.melt())
sns.stripplot(x='variable',y='value', data=df.melt(), ax=ax,color='black')
#0.83 with time
#0.74 without time
ax.set(ylim=(0,1))

The models outperfom the results in the original paper.