In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!cp /content/drive/MyDrive/congestive_heart_failure_detection_datasets/dataset1.csv .

In [3]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import label_binarize

from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import silhouette_score
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.metrics import log_loss

from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support


import warnings
warnings.filterwarnings('ignore')

In [4]:
data = pd.read_csv('dataset1.csv')
health = {'healthy':1, 'heart failure' : 0}
data['label'] = data['label'].map(health)
modelData = data.drop(['label'], axis=1)
labels = data['label']

In [5]:
scaler = MinMaxScaler(feature_range=(0, 1)).fit(modelData)
modelData_scaled = scaler.transform(modelData)

In [6]:
kf = KFold(n_splits=10, shuffle=True, random_state=40)

In [7]:
def roc_area(y_true, y_pred):
    fpr, tpr, _ =  roc_curve(y_true, y_pred)
    roc_auc =  auc(fpr, tpr)
    return roc_auc

In [8]:
def report(acc_normals, acc_chfs, f1_normals, f1_chfs, roc_normals, roc_chfs):
    acc_normal = 0
    acc_chf = 0
    f1_normal = 0
    f1_chf = 0
    roc_normal = 0
    roc_chf = 0
    for i in range(10):
        acc_normal += acc_normals[i]
        acc_chf += acc_chfs[i]
        f1_normal += f1_normals[i]
        f1_chf += f1_chfs[i]
        roc_normal += roc_normals[i]
        roc_chf += roc_chfs[i]
    print('Normal')
    print('accuracy = ', round( acc_normal/10, 3))
    print('f1 = ' , round(f1_normal/10, 3))
    print('ROC area = ', round(roc_normal/10, 3))
    print('CHF')
    print('accuracy = ',  round(acc_chf/10, 3))
    print('f1 = ', round(f1_chf/10, 3))
    print('ROC area = ', round(roc_chf/10, 3))

In [11]:
acc_normal = []
acc_chf = []
f1_normal = []
f1_chf = []
roc_normal = []
roc_chf = []
for train_index, test_index in kf.split(modelData_scaled):
    x_train, x_test = modelData_scaled[train_index], modelData_scaled[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    KNN = KNeighborsClassifier(n_neighbors=3, weights='distance')
    KNN.fit(x_train, y_train)
    y_pred = KNN.predict(x_test)
    m = precision_recall_fscore_support(y_test, y_pred, average=None, labels=[1, 0])
    acc_normal.append((m[0][0] + m[1][0]) / 2)
    acc_chf.append((m[0][1] + m[1][1]) / 2)
    f1_normal.append(m[2][0])
    f1_chf.append(m[2][1])
    roc_normal.append(roc_area(y_test, y_pred))
    roc_chf.append(roc_area(y_test, y_pred))
print('****** KNN ******')   
report(acc_normal, acc_chf, f1_normal, f1_chf, roc_normal, roc_chf)

****** KNN ******
Normal
accuracy =  0.987
f1 =  0.987
ROC area =  0.991
CHF
accuracy =  1.0
f1 =  1.0
ROC area =  0.991


In [13]:
from sklearn.ensemble import RandomForestClassifier
acc_normal = []
acc_chf = []
f1_normal = []
f1_chf = []
roc_normal = []
roc_chf = []
for train_index, test_index in kf.split(modelData_scaled):
    x_train, x_test = modelData_scaled[train_index], modelData_scaled[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    clf = RandomForestClassifier(n_estimators=20, max_features=6)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    m = precision_recall_fscore_support(y_test, y_pred, average=None, labels=[1, 0])
    acc_normal.append((m[0][0] + m[1][0]) / 2)
    acc_chf.append((m[0][1] + m[1][1]) / 2)
    f1_normal.append(m[2][0])
    f1_chf.append(m[2][1])
    roc_normal.append(roc_area(y_test, y_pred))
    roc_chf.append(roc_area(y_test, y_pred))
print('****** Random Forest ******')   
report(acc_normal, acc_chf, f1_normal, f1_chf, roc_normal, roc_chf)

****** Random Forest ******
Normal
accuracy =  0.995
f1 =  0.995
ROC area =  0.995
CHF
accuracy =  1.0
f1 =  1.0
ROC area =  0.995


In [9]:
acc_normal = []
acc_chf = []
f1_normal = []
f1_chf = []
roc_normal = []
roc_chf = []
for train_index, test_index in kf.split(modelData_scaled):
    x_train, x_test = modelData_scaled[train_index], modelData_scaled[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    MLP = MLPClassifier(hidden_layer_sizes = (10), batch_size = 32, solver = 'sgd', random_state=4, \
                        momentum=0.85, learning_rate_init = 0.01, activation='logistic')
    MLP.fit(x_train, y_train)
    y_pred = MLP.predict(x_test)
    m = precision_recall_fscore_support(y_test, y_pred, average=None, labels=[1, 0])
    acc_normal.append((m[0][0] + m[1][0]) / 2)
    acc_chf.append((m[0][1] + m[1][1]) / 2)
    f1_normal.append(m[2][0])
    f1_chf.append(m[2][1])
    roc_normal.append(roc_area(y_test, y_pred))
    roc_chf.append(roc_area(y_test, y_pred))
print('****** ANN ******')   
report(acc_normal, acc_chf, f1_normal, f1_chf, roc_normal, roc_chf)

****** ANN ******
Normal
accuracy =  0.977
f1 =  0.977
ROC area =  0.979
CHF
accuracy =  0.999
f1 =  0.999
ROC area =  0.979
