# Heart Failure Death Prediction | 9-Fold CV F2 Score Evaluation 

## 0. Program setup

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

from sklearn.metrics import make_scorer
from sklearn.metrics import fbeta_score
from sklearn.metrics import matthews_corrcoef

from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

import warnings
warnings.filterwarnings("ignore")

datafile="/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv"

## 1. Data inspection

 First let's take a look at the data.

In [None]:
rawdata=pd.read_csv(datafile)
rawdata

In [None]:
rawdata.describe()

Now let's look for direct correlations to death events.

In [None]:
sns.pairplot(rawdata, y_vars="DEATH_EVENT")

There doesn't seem to be a strong logistic behaviour of any of the variables. We need to look further at the death/survival distribution for each parameter.

In [None]:
for column in rawdata.columns:
    if column!="DEATH_EVENT": sns.displot(data=rawdata, x=rawdata[column], hue="DEATH_EVENT", element="step", stat="probability",common_norm=False)

The feature with most correlation is "time", which makes sense because patients with severe heart issues tend to die within the examination period whereas the survivors complete the full examination process. However, we can't use that variable for our prediction since initially we don't know how long the examination time will be.

## 2. Data cleaning

Now let's remove the outliers using the quantile criteria:

\begin{equation}
Q_1- 1.5 \ Q_{13}<{(X,y)} \leq Q_3+1.5 \ Q_{13}
\end{equation}

where

\begin{equation}
Q_{13}=Q_3-Q_1
\end{equation}

Along with the feature "time".

In [None]:
def outlier_remover(columnseries,q):
    Q1=columnseries.describe()["25%"]
    Q3=columnseries.describe()["75%"]
    Q13=Q3-Q1
    lowerbound=Q1-q*Q13
    upperbound=Q3+q*Q13
    newcolumnseries=columnseries[columnseries.between(lowerbound,upperbound)]
    return newcolumnseries

def outlier_clean(dataframe, exception_col=[],quo=1.5):
    newdataframe=pd.DataFrame()
    for columnname in dataframe.columns:
        if columnname not in exception_col: newdataframe[columnname]=outlier_remover(dataframe[columnname],q=quo)
    newdataframe=newdataframe.dropna()
    return newdataframe
    
cleandata=outlier_clean(rawdata.drop(columns=["time"]),quo=1.5)

cleandata.describe()

After that, let's take a look the new death/survivors distributions.

In [None]:
for column in cleandata.columns:
    if column!="DEATH_EVENT": 
        sns.displot(data=cleandata, x=cleandata[column], hue="DEATH_EVENT", element="step", stat="probability",common_norm=False)

Not so much has changed. "age", "serum_creatinine", "anaemia", "high_blood_pressure" and "ejection_fraction" are the features with most difference between the death and survivor distributions. Nevertheless we can use the rest as well to see if the machine learning algorythms can find some hidden patterns within them.

## 3. Prepparing the data to be machine learnt.

Now we will transform the binary input features into dummy variables to be easier to learn. For example, sex can be 0 or 1. We will create a feature sex_0 that will activate only when "sex"=0, and the same with sex_1 when "sex"=1. This could make our variables easier to understand by the algorythms.

In [None]:
learndata=cleandata
catvarnames=["anaemia","diabetes","high_blood_pressure","sex","smoking"]

for colname in catvarnames:
    dummies=pd.get_dummies(cleandata[colname],prefix=colname)
    learndata=pd.concat([learndata,dummies],axis=1)
    learndata=learndata.drop(columns=[colname])
    
learndata

In the last stage of this section we will make the data split into input features "X" and the output "y". Then we will split the data into train and test sets and we will scale it using the training data to fit the scaler. Then we will define the scorers we will use and assign the F2 as the CV score criteria, since we want to put special emphasis on false negatives. Finally we will define the functions we will use to evatuate each algorythm.

In [None]:
# Separation of the data into input X and output y.

data_col=list(learndata.columns)
y_col=data_col.pop(data_col.index("DEATH_EVENT"))
X_col=data_col

X_data=learndata[X_col] 
y_data=learndata[y_col]

# K value of K-Fold CV assignation.

k=9
r=29

# Split into CV Training data and Testing data. We choose the test size to be equal to the validation size of our K-Fold CV.
# For doing so, we split the data into K+1 pieces and we use the first K pieces to K-Fold CV and the left one for final testing.

X_train, X_test, y_train, y_test = train_test_split(X_data,y_data,test_size=1/(k+1), random_state=r)

# Standard scaling of the input data X.

Xscaler=StandardScaler()
Xscaler.fit(X_train)
X_train=Xscaler.transform(X_train)
X_test=Xscaler.transform(X_test)

# Creation of the variables we will use to store the evaluation results for each algorythm.

modelScore_train={}
modelScore_test={}
modelScore_KFCV={}
model_KFCV={}

modelConfussionMatrix_train={}
modelConfussionMatrix_test={}

deathevents_train=pd.DataFrame()
deathevents_test=pd.DataFrame()
deathevents_train["Reality"]=y_train
deathevents_test["Reality"]=y_test

# Scorers setup and F2 assignation as the evaluation score.
scorersCV={"Acc": make_scorer(accuracy_score),
           "MCC": make_scorer(matthews_corrcoef), 
           "F1": make_scorer(fbeta_score, beta=1), 
           "F2": make_scorer(fbeta_score, beta=2)}
scorerCVkey="F2"

# Definition of the algorythm evaluation function.

def tuned_model(model, param_grid, modelname, results):
    tuner=GridSearchCV(model,param_grid=param_grid,scoring=scorersCV,cv=k,refit=scorerCVkey)
    tuner.fit(X_train,y_train)
    best_model=tuner.best_estimator_
    best_params=tuner.best_params_
    model_KFCV[modelname]=best_model
    print(f"Best model: {best_model}")
    
    modelScore_KFCV[modelname]={}
    resultcols=['rank_test_'+scorerCVkey]
    resultsdata=pd.DataFrame(tuner.cv_results_)
    for paramkey in param_grid:
        paramcolname="param_"+paramkey
        resultcols.append(paramkey)
        resultsdata=resultsdata.rename(columns={paramcolname:paramkey})
        
    for i in range(0,k):
        old_split_score_name="split%s_test_%s" % (i,scorerCVkey)
        new_split_score_name="s%s_%s" % (i,scorerCVkey)
        resultsdata=resultsdata.rename(columns={old_split_score_name: new_split_score_name})
        resultcols.append(new_split_score_name)
    
    for scorerkey in scorersCV:
        old_mean_score_name="mean_test_"+scorerkey
        new_mean_score_name="mean_"+scorerkey
        resultsdata=resultsdata.rename(columns={old_mean_score_name: new_mean_score_name})
        resultcols.append(new_mean_score_name)
        
        best_row=resultsdata[resultsdata["params"]==best_params]
        modelScore_KFCV[modelname][scorerkey]=round(best_row[new_mean_score_name].values[0],3)
        
    if results==True: print(resultsdata[resultcols].sort_values(by=resultcols[0]).round(3))
        
    return best_model

def plot_confusion_matrix(confusion_matrix,model_name):
    ax=plt.axes()
    confusionmatrix=sns.heatmap(data=confusion_matrix,annot=True,ax=ax)
    ax.set_title(model_name)
    plt.show()

def execute_model(model,modelname):
    model.fit(X_train,y_train)
    y_train_pred=model.predict(X_train)
    y_test_pred=model.predict(X_test)
    deathevents_train[modelname]=y_train_pred
    deathevents_test[modelname]=y_test_pred
    modelScore_train[modelname]={}
    modelScore_test[modelname]={}
    for scorerkey in scorersCV:
        scorermaker=scorersCV[scorerkey]
        scorer=scorermaker._score_func
        scorerparams=scorermaker._kwargs
        modelScore_train[modelname][scorerkey]=round(scorer(y_train,y_train_pred,**scorermaker._kwargs) , 3)
        modelScore_test[modelname][scorerkey]=round(scorer(y_test,y_test_pred,**scorermaker._kwargs) , 3)
    modelConfussionMatrix_train[modelname]=confusion_matrix(y_train,y_train_pred,labels=[0, 1],normalize="true")
    modelConfussionMatrix_test[modelname]=confusion_matrix(y_test,y_test_pred,labels=[0, 1],normalize="true")
    
def eval_model(model_func,model_pgrid, model_name, results_info=False):
    model=tuned_model(model=model_func, param_grid=model_pgrid, modelname=model_name, results=results_info)
    execute_model(model,model_name)

## 4. Machine learning algorythms evaluation

First each algorythm will be tuned via 9-Fold CV with SKLearn GridSearchCV. The scores are refered to the testing sets of the CV process (for each split and the mean value).

#### Logistic Regression

In [None]:
LogitRegr_params={"C":[1,0.5,0.1,0.01],
                  "solver":["lbfgs", "liblinear"]}
eval_model(LogisticRegression(),LogitRegr_params,"Logistic Regr.",True)

#### Stochastic Gradient Descend Classification

In [None]:
SGDClf_params={"alpha":[0.1,0.05,0.01,0.005],
               "loss":["hinge","log","modified_huber"]}
eval_model(SGDClassifier(),SGDClf_params,"Stochastic Gradient Descend Clf.",True)

#### Support Vector Classification

In [None]:
SVClf_params={"C":[10,5,1,0.1],
              "gamma":["scale","auto"],
              "kernel":["linear", "poly", "rbf", "sigmoid"]}
eval_model(SVC(),SVClf_params, "Support Vector Clf.",True)

#### K Neighbors Classification

In [None]:
KNClf_params={"n_neighbors":[1,3],
              "weights":["uniform","distance"],
              "algorithm":["auto", "ball_tree", "kd_tree", "brute"]}
eval_model(KNeighborsClassifier(), KNClf_params, "K Neighbors Clf.", True)

#### Decision Tree Classification

In [None]:
DTClf_params={"ccp_alpha":[0,0.05,0.001],
              "criterion":["gini", "entropy"],
              "max_features":[None,"auto", "sqrt", "log2"]}
eval_model(DecisionTreeClassifier(), DTClf_params, "Decision Tree Clf.", True)

#### Multi-Layer Perceptron Classification

In [None]:
MLPClf_params={"alpha":[0.05,0.01,0.001,0.0001],
               "activation":["identity","tanh"],
               "hidden_layer_sizes":[15,(15,2)],
               "learning_rate":["constant","adaptive"]}
eval_model(MLPClassifier(), MLPClf_params, "Multi-Layer Perceptron Clf.", True)

#### Random Forest Classification

In [None]:
RFClf_params={"ccp_alpha":[0.01,0.03,0.05,0.005],
              "criterion":["gini", "entropy"],
              "n_estimators":[1,2,5,10]}
eval_model(RandomForestClassifier(), RFClf_params, "Random Forest Clf.",True)

#### Gradient Boosting Classification

In [None]:
GBClf_params={"ccp_alpha":[0.0,0.1,0.01],
              "loss":["deviance", "exponential"],
             "n_estimators":[50,200]}
eval_model(GradientBoostingClassifier(), GBClf_params, "Gradient Boosting Clf.", True)

Then the mean scores of the algorythms in the 9FCV and the testing set.

In [None]:
print("Score (%d-fold CV Train data)" % k)
for key in modelScore_KFCV:
    print("%s: %s" % (key, modelScore_KFCV[key]))
print("\n")    
print("Score (Test data)")
for key in modelScore_test:
    print("{}: {}".format(key,modelScore_test[key]))

After executing the evaluation of the algorythms, let's see briefly how they can predict the training set output.

In [None]:
deathevents_train

Now let's see how each algorythm predicts the testing set, totally unknown for them.

In [None]:
try: 
    deathevents_test=deathevents_test.drop(columns=["Positivity"])
except:
    pass
deathevents_test

To continue, let's plot the confusion matrices. 

In [None]:
for modelname in deathevents_test.columns:
    if modelname!="Reality":
       c_matrix=modelConfussionMatrix_test[modelname]
       c_matrix_plot=plot_confusion_matrix(c_matrix,modelname)

Now let's focus on the best algorythm according to the 9-Fold CV test. Also, let's define the positivity of a patient of the testing set as the probability of being an actual positive given a predicted output, based on the confusion matrix of the testing set.

\begin{equation}
P(y_{pred})=\begin{cases}
 \frac{FN}{TN+FN} & \quad \text{if  } y_{pred}=0 \\
 \\
 \frac{TP}{TP+FP} & \quad \text{if  } y_{pred}=1
\end{cases}
\end{equation}

In the ideal case, the positivity when predicted positive and negative should be 1 and 0, respectively.

In [None]:
def positivity(A,a):
    """
    A -> The Confussion Matrix of the best model.
    a -> Output of the the best model.
    """
    positive=A[1][a]
    negative=A[0][a]
    return positive/(positive+negative)

tuned_scores={}
for modelkey in modelScore_KFCV:
    tuned_scores[modelkey]=modelScore_KFCV[modelkey][scorerCVkey]

best_model=max(tuned_scores,key=tuned_scores.get)

c_matrix_best=modelConfussionMatrix_test[best_model]

deathevents_test["Positivity"]=positivity(A=c_matrix_best, a=deathevents_test[best_model])
deathevents_test[["Reality",best_model,"Positivity"]].head(50)

In [None]:
print("Data splitting: K = {:d} , Test-Train Split Random State Seed = {:d}".format(k,r))
print("BEST MODEL: %s" % (model_KFCV[best_model]))
print("Test dataset scores:")
print("· {:<35s} {:.3f}".format("Accuracy",modelScore_test[best_model]["Acc"]))
print("· {:<35s} {:.3f}".format("Matthews Correlation Coefficient",modelScore_test[best_model]["MCC"]))
print("· {:<35s} {:.3f}".format("F1 Score",modelScore_test[best_model]["F1"]))
print("· {:<35s} {:.3f}".format("F2 Score",modelScore_test[best_model]["F2"]))
print("Positivity when y_pred = 0 : {:.3f}".format(positivity(A=c_matrix_best, a=0)))
print("Positivity when y_pred = 1 : {:.3f}".format(positivity(A=c_matrix_best, a=1)))

This results may be poor, good or quite good depending on the run. Now let's try a random seed whose algorythms performance is generally poor.

In [None]:
#New splitting

r=0
X_train, X_test, y_train, y_test = train_test_split(X_data,y_data,test_size=1/(k+1), random_state=r)

# Standard scaling of the input data X.

Xscaler=StandardScaler()
Xscaler.fit(X_train)
X_train=Xscaler.transform(X_train)
X_test=Xscaler.transform(X_test)

# Reset of storing program variables.
modelScore_train={}
modelScore_test={}
modelScore_KFCV={}

modelConfussionMatrix_train={}
modelConfussionMatrix_test={}

deathevents_train=pd.DataFrame()
deathevents_test=pd.DataFrame()

# New train/test output split.
deathevents_train["Reality"]=y_train
deathevents_test["Reality"]=y_test

# Logistic Regression
LogitRegr_params={"C":[1,0.5,0.1,0.01],
                  "solver":["lbfgs", "liblinear"]}
eval_model(LogisticRegression(),LogitRegr_params,"Logistic Regr.")

# Stochastic Gradient Descend Classification
SGDClf_params={"alpha":[0.1,0.05,0.01,0.005],
               "loss":["hinge","log","modified_huber"]}
eval_model(SGDClassifier(),SGDClf_params,"Stochastic Gradient Descend Clf.")

# Support Vector Classification
SVClf_params={"C":[10,5,1,0.1],
              "gamma":["scale","auto"],
              "kernel":["linear", "poly", "rbf", "sigmoid"]}
eval_model(SVC(),SVClf_params, "Support Vector Clf.")

# K Neighbors Classification
KNClf_params={"n_neighbors":[1,3],
              "weights":["uniform","distance"],
              "algorithm":["auto", "ball_tree", "kd_tree", "brute"]}
eval_model(KNeighborsClassifier(), KNClf_params, "K Neighbors Clf.")

# Decision Tree Classification
DTClf_params={"ccp_alpha":[0,0.05,0.001],
              "criterion":["gini", "entropy"],
              "max_features":[None,"auto", "sqrt", "log2"]}
eval_model(DecisionTreeClassifier(), DTClf_params, "Decision Tree Clf.")

# Multi-Layer Perceptron Classification
MLPClf_params={"alpha":[0.05,0.01,0.001,0.0001],
               "activation":["identity","tanh"],
               "hidden_layer_sizes":[15,(15,2)],
               "learning_rate":["constant","adaptive"]}
eval_model(MLPClassifier(), MLPClf_params, "Multi-Layer Perceptron Clf.")

# Random Forest Classification
RFClf_params={"ccp_alpha":[0.01,0.03,0.05,0.005],
              "criterion":["gini", "entropy"],
              "n_estimators":[1,2,5,10]}
eval_model(RandomForestClassifier(), RFClf_params, "Random Forest Clf.")

# Gradient Boosting Classification
GBClf_params={"ccp_alpha":[0.0,0.1,0.01],
              "loss":["deviance", "exponential"],
             "n_estimators":[50,200]}
eval_model(GradientBoostingClassifier(), GBClf_params, "Gradient Boosting Clf.")

try: 
    deathevents_test=deathevents_test.drop(columns=["Positivity"])
except:
    pass
deathevents_test

In [None]:
print("Score (%d-fold CV Train data)" % k)
for key in modelScore_KFCV:
    print("%s: %s" % (key, modelScore_KFCV[key]))
print("\n")    
print("Score (Test data)")
for key in modelScore_test:
    print("{}: {}".format(key,modelScore_test[key]))

In [None]:
for modelname in deathevents_test.columns:
    if modelname!="Reality":
       c_matrix=modelConfussionMatrix_test[modelname]
       c_matrix_plot=plot_confusion_matrix(c_matrix,modelname)

Now again let's focus on the best algorythm according to the 9FCV.

In [None]:
for modelkey in modelScore_KFCV:
    tuned_scores[modelkey]=modelScore_KFCV[modelkey][scorerCVkey]

best_model=max(tuned_scores,key=tuned_scores.get)

c_matrix_best=modelConfussionMatrix_test[best_model]

deathevents_test["Positivity"]=positivity(A=c_matrix_best, a=deathevents_test[best_model])
deathevents_test[["Reality",best_model,"Positivity"]].head(50)

In [None]:
print("Data splitting: K = {:d} , Test-Train Split Random State Seed = {:d}".format(k,r))
print("BEST MODEL: %s" % (model_KFCV[best_model]))
print("Test dataset scores:")
print("· {:<35s} {:.3f}".format("Accuracy",modelScore_test[best_model]["Acc"]))
print("· {:<35s} {:.3f}".format("Matthews Correlation Coefficient",modelScore_test[best_model]["MCC"]))
print("· {:<35s} {:.3f}".format("F1 Score",modelScore_test[best_model]["F1"]))
print("· {:<35s} {:.3f}".format("F2 Score",modelScore_test[best_model]["F2"]))
print("Positivity when y_pred = 0 : {:.3f}".format(positivity(A=c_matrix_best, a=0)))
print("Positivity when y_pred = 1 : {:.3f}".format(positivity(A=c_matrix_best, a=1)))

In this case the results are horrible.

## 5. Conclusions

The selected algorythm models performance is inconsistent and it depends on the random split seed. Maybe more samples could help improving the consistency. Even in several runs of the same random seed, the results can change.

<span style="color:blue">
    <B><i>
        Author note: If there is something wrong (or improvable) in this notebook please leave a comment below. It will help me for the next tasks or for updating this one.
    </i></B>
</span>