In [2]:
# make python libraries ready to use for different experiments 
import numpy as np
import pandas as pd
from numpy import mean
from numpy import std
from sklearn import metrics
from lightgbm import LGBMClassifier 
from catboost import CatBoostClassifier
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier

from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score 
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier 
from sklearn.neural_network import MLPClassifier 
from sklearn.ensemble import RandomForestClassifier 
from xgboost import XGBClassifier 
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix 
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('E:/Javed Sab/1_heart_failure.csv')
df.head() 

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,0
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,0
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,0
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,0
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,0


In [4]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values 
X.shape 

(299, 12)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30) 
X_train.shape

(209, 12)

# Naive Bayes

10-Fold Cross Validation Method 

In [40]:
gnb = GaussianNB()

In [41]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(gnb, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
n_precision = cross_val_score(gnb, X, y, scoring='precision', cv=cv, n_jobs=-1, error_score='raise')
n_recall = cross_val_score(gnb, X, y, scoring='recall', cv=cv, n_jobs=-1, error_score='raise')
n_f1 = cross_val_score(gnb, X, y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')

In [42]:
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
print('Precision: %.3f' % (mean(n_precision))) 
print('Recall: %.3f' % (mean(n_recall)))
print('f1-score: %.3f' % (mean(n_f1)))

Accuracy: 0.791 (0.068)
Precision: 0.802
Recall: 0.924
f1-score: 0.857


Train_Test_Split Method 

In [43]:
#Train the model using the training sets
gnb.fit(X_train, y_train) 

GaussianNB()

In [44]:
#Predict the response for test dataset
y_pred = gnb.predict(X_test) 

In [45]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("f1-score:", metrics.f1_score(y_test, y_pred))

Accuracy: 0.7333333333333333
Precision: 0.7761194029850746
Recall: 0.8524590163934426
f1-score: 0.8124999999999999


# MLP

10-Fold Cross Validation Method 

In [46]:
mlp = MLPClassifier()

In [47]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(mlp, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
n_precision = cross_val_score(gnb, X, y, scoring='precision', cv=cv, n_jobs=-1, error_score='raise')
n_recall = cross_val_score(gnb, X, y, scoring='recall', cv=cv, n_jobs=-1, error_score='raise')

In [48]:
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
print('Precision: %.3f' % (mean(n_precision))) 
print('Recall: %.3f' % (mean(n_recall)))

Accuracy: 0.508 (0.184)
Precision: 0.802
Recall: 0.924


Train_Test_Split Method 

In [49]:
#Train the model using the training sets
mlp.fit(X_train, y_train) 

MLPClassifier()

In [50]:
#Predict the response for test dataset
y_pred = mlp.predict(X_test)

In [51]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("f1-score:", metrics.f1_score(y_test, y_pred))

Accuracy: 0.6777777777777778
Precision: 0.6777777777777778
Recall: 1.0
f1-score: 0.8079470198675497


# Bagging

10-Fold Cross Validation Method 

In [52]:
# define the model
bagging = BaggingClassifier()

In [53]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(bagging, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
n_precision = cross_val_score(bagging, X, y, scoring='precision', cv=cv, n_jobs=-1, error_score='raise')
n_recall = cross_val_score(bagging, X, y, scoring='recall', cv=cv, n_jobs=-1, error_score='raise')
n_f1 = cross_val_score(bagging, X, y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')

In [54]:
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
print('Precision: %.3f' % (mean(n_precision))) 
print('Recall: %.3f' % (mean(n_recall)))
print('f1-score: %.3f' % (mean(n_f1)))

Accuracy: 0.805 (0.078)
Precision: 0.862
Recall: 0.834
f1-score: 0.853


Train_Test_Split Method 

In [55]:
#Train the model using the training sets
bagging.fit(X_train, y_train) 

BaggingClassifier()

In [56]:
#Predict the response for test dataset
y_pred = bagging.predict(X_test)

In [57]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("f1-score:", metrics.f1_score(y_test, y_pred))

Accuracy: 0.8111111111111111
Precision: 0.8548387096774194
Recall: 0.8688524590163934
f1-score: 0.8617886178861789


# Random Forest 

10-Fold Cross Validation Method 

In [58]:
# define the model
rf = RandomForestClassifier()

In [59]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(rf, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
n_precision = cross_val_score(rf, X, y, scoring='precision', cv=cv, n_jobs=-1, error_score='raise')
n_recall = cross_val_score(rf, X, y, scoring='recall', cv=cv, n_jobs=-1, error_score='raise')
n_f1 = cross_val_score(rf, X, y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')

In [60]:
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
print('Precision: %.3f' % (mean(n_precision))) 
print('Recall: %.3f' % (mean(n_recall)))
print('f1-score: %.3f' % (mean(n_f1)))

Accuracy: 0.842 (0.067)
Precision: 0.874
Recall: 0.905
f1-score: 0.888


Train_Test_Split Method 

In [61]:
#Train the model using the training sets
rf.fit(X_train, y_train) 

RandomForestClassifier()

In [62]:
#Predict the response for test dataset
y_pred = rf.predict(X_test)

In [63]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print('f1-score: %.3f' % (mean(n_f1)))

Accuracy: 0.8222222222222222
Precision: 0.8571428571428571
Recall: 0.8852459016393442
f1-score: 0.888


# XG - Boost 

10-Fold Cross Validation Method 

In [64]:
# define the model
xgb = XGBClassifier()

In [65]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(xgb, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
n_precision = cross_val_score(xgb, X, y, scoring='precision', cv=cv, n_jobs=-1, error_score='raise')
n_recall = cross_val_score(xgb, X, y, scoring='recall', cv=cv, n_jobs=-1, error_score='raise')
n_f1 = cross_val_score(xgb, X, y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')

In [66]:
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
print('Precision: %.3f' % (mean(n_precision))) 
print('Recall: %.3f' % (mean(n_recall)))
print('f1-score: %.3f' % (mean(n_f1)))

Accuracy: 0.831 (0.064)
Precision: 0.871
Recall: 0.887
f1-score: 0.876


Train_Test_Split Method 

In [67]:
#Train the model using the training sets
xgb.fit(X_train, y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=8, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [68]:
#Predict the response for test dataset
y_pred = xgb.predict(X_test)

In [69]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("f1-score:", metrics.f1_score(y_test, y_pred))

Accuracy: 0.8444444444444444
Precision: 0.8615384615384616
Recall: 0.9180327868852459
f1-score: 0.8888888888888888


# Cat Boost

# K-fold

In [70]:
# evaluate the model
model = CatBoostClassifier(verbose=0, n_estimators=100)

In [71]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
n_precision = cross_val_score(model, X, y, scoring='precision', cv=cv, n_jobs=-1, error_score='raise')
n_recall = cross_val_score(model, X, y, scoring='recall', cv=cv, n_jobs=-1, error_score='raise')
n_f1 = cross_val_score(model, X, y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')

In [72]:
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
print('Precision: %.3f' % (mean(n_precision))) 
print('Recall: %.3f' % (mean(n_recall)))
print('f1-score: %.3f' % (mean(n_f1)))

Accuracy: 0.846 (0.057)
Precision: 0.863
Recall: 0.926
f1-score: 0.891


Test_Train_Split

In [73]:
# Building CatBoost Model 
# Create CatBoost classifer object
#clf =CatBoostClassifier()

# Train CatBoost Classifer
model = model.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = model.predict(X_test)

In [74]:
# Evaluating Model 
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred)) 
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("f1-score:", metrics.f1_score(y_test, y_pred))

Accuracy: 0.8333333333333334
Precision: 0.859375
Recall: 0.9016393442622951
f1-score: 0.88


# Light GBM

10 Fold Cross Validation Method

In [75]:
# define the model
model = LGBMClassifier()

In [76]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
n_precision = cross_val_score(model, X, y, scoring='precision', cv=cv, n_jobs=-1, error_score='raise')
n_recall = cross_val_score(model, X, y, scoring='recall', cv=cv, n_jobs=-1, error_score='raise')
n_f1 = cross_val_score(model, X, y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')

In [77]:
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores))) 
print('Precision: %.3f' % (mean(n_precision))) 
print('Recall: %.3f' % (mean(n_recall)))
print('f1-score: %.3f' % (mean(n_f1)))

Accuracy: 0.845 (0.066)
Precision: 0.875
Recall: 0.906
f1-score: 0.888


Test_Train_Split Method

In [78]:
# Building LIGHT GBM Model 
# Create Light GBM classifer object
#clf =LGBMClassifier()

# Train Light GBM Classifer
model = model.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = model.predict(X_test)

In [79]:
# Evaluating Model 
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred)) 
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("f1-score:", metrics.f1_score(y_test, y_pred))

Accuracy: 0.8333333333333334
Precision: 0.859375
Recall: 0.9016393442622951
f1-score: 0.88


# Decision Tree

In [80]:
# Building Decision Tree Model 
# Create Decision Tree classifer object
model = DecisionTreeClassifier()

Test_Train_Split_Method

In [81]:
# Train Decision Tree Classifer
model = model.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = model.predict(X_test)

In [82]:
# Evaluating Model 
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("f1-score:", metrics.f1_score(y_test, y_pred))


Accuracy: 0.8111111111111111
Precision: 0.84375
Recall: 0.8852459016393442
f1-score: 0.864


10 Fold Cross Validation

In [83]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
n_precision = cross_val_score(model, X, y, scoring='precision', cv=cv, n_jobs=-1, error_score='raise')
n_recall = cross_val_score(model, X, y, scoring='recall', cv=cv, n_jobs=-1, error_score='raise')
n_f1 = cross_val_score(model, X, y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')

In [84]:
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
print('Precision: %.3f' % (mean(n_precision))) 
print('Recall: %.3f' % (mean(n_recall)))
print('f1-score: %.3f' % (mean(n_f1)))

Accuracy: 0.790 (0.065)
Precision: 0.836
Recall: 0.850
f1-score: 0.835


# AdaBoost

In [85]:
# define the model
model = AdaBoostClassifier()

10-Fold Cross Validation

In [86]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
n_precision = cross_val_score(model, X, y, scoring='precision', cv=cv, n_jobs=-1, error_score='raise')
n_recall = cross_val_score(model, X, y, scoring='recall', cv=cv, n_jobs=-1, error_score='raise')
n_f1 = cross_val_score(model, X, y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')

In [87]:
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
print('Precision: %.3f' % (mean(n_precision))) 
print('Recall: %.3f' % (mean(n_recall)))
print('f1-score: %.3f' % (mean(n_f1)))

Accuracy: 0.818 (0.065)
Precision: 0.860
Recall: 0.880
f1-score: 0.867


Test_Train_Split Method

In [88]:
# BuildingAdaBOOST Model 
# Create AdaBoost classifer object
#clf = AdaBoostClassifier()

# Train AdaBoost Classifer
model = model.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = model.predict(X_test)

In [89]:
# Evaluating Model 
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred)) 
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("f1-score:", metrics.f1_score(y_test, y_pred))

Accuracy: 0.8444444444444444
Precision: 0.8852459016393442
Recall: 0.8852459016393442
f1-score: 0.8852459016393442


# Logistic Regression

In [90]:
# instantiate the model (using the default parameters)
model = LogisticRegression()

Test_Train_Split_Method

In [91]:
# fit the model with data
model.fit(X_train,y_train)

LogisticRegression()

In [92]:
# Predict the model 
y_pred=model.predict(X_test)

In [93]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("f1-score:", metrics.f1_score(y_test, y_pred))

Accuracy: 0.7777777777777778
Precision: 0.8253968253968254
Recall: 0.8524590163934426
f1-score: 0.8387096774193549


10-Fold Cross Validation

In [94]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
n_precision = cross_val_score(model, X, y, scoring='precision', cv=cv, n_jobs=-1, error_score='raise')
n_recall = cross_val_score(model, X, y, scoring='recall', cv=cv, n_jobs=-1, error_score='raise')
n_f1 = cross_val_score(model, X, y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')

In [95]:
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
print('Precision: %.3f' % (mean(n_precision))) 
print('Recall: %.3f' % (mean(n_recall)))
print('f1-score: %.3f' % (mean(n_f1)))

Accuracy: 0.821 (0.062)
Precision: 0.848
Recall: 0.903
f1-score: 0.873


# ETC

k-fold

In [5]:
# define the model
etc = ExtraTreesClassifier()

In [6]:
# evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(etc, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
n_precision = cross_val_score(etc, X, y, scoring='precision', cv=cv, n_jobs=-1, error_score='raise')
n_recall = cross_val_score(etc, X, y, scoring='recall', cv=cv, n_jobs=-1, error_score='raise')
n_f1 = cross_val_score(etc, X, y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')

In [7]:
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
print('Precision: %.3f' % (mean(n_precision))) 
print('Recall: %.3f' % (mean(n_recall)))
print('f1-score: %.3f' % (mean(n_f1)))

Accuracy: 0.818 (0.058)
Precision: 0.838
Recall: 0.918
f1-score: 0.869


TTS

In [10]:
# Train AdaBoost Classifer
etc = etc.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = etc.predict(X_test)

In [11]:
# Evaluating Model 
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred)) 
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Recall:", metrics.recall_score(y_test, y_pred))
print("f1-score:", metrics.f1_score(y_test, y_pred))

Accuracy: 0.7333333333333333
Precision: 0.7397260273972602
Recall: 0.9152542372881356
f1-score: 0.818181818181818
