In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from imblearn.over_sampling import RandomOverSampler

In [2]:
df = pd.read_csv('1402-12-03/heart_failure_clinical_records_dataset.csv')
df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [3]:
X=df.drop('DEATH_EVENT',axis=1).to_numpy()
Y=df['DEATH_EVENT'].to_numpy()

In [11]:
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.2,random_state=1)

In [34]:
params = [{'rf__n_estimators':[i for i in range(50,101)],
           'rf__criterion':['gini', 'entropy', 'log_loss'],
           'rf__max_depth':[1,2,3,4],
           'rf__oob_score':['True','False']
           }]
pipe = Pipeline([('scaler', StandardScaler()),('rf',RandomForestClassifier(random_state=42))])
_rf = GridSearchCV(pipe,param_grid=params,cv=3,verbose=0)
_rf.fit(x_train,y_train)
y_pred = _rf.predict(x_test)

In [35]:
print(classification_report(y_test,y_pred))
_rf.best_params_

              precision    recall  f1-score   support

           0       0.90      0.98      0.94        46
           1       0.90      0.64      0.75        14

    accuracy                           0.90        60
   macro avg       0.90      0.81      0.84        60
weighted avg       0.90      0.90      0.89        60



{'rf__criterion': 'gini', 'rf__max_depth': 3, 'rf__n_estimators': 74}

In [None]:
pip = Pipeline([('scaler', StandardScaler()),('dt',DecisionTreeClassifier(random_state=42))])
params = [{'dt__criterion':['gini', 'entropy', 'log_loss'],
           'dt__max_depth':[i for i in range(1,4)],}]
_dt = GridSearchCV(pip,param_grid=params,cv=3,verbose=0)
_dt.fit(x_train,y_train)
y_pred = _dt.predict(x_test)

In [32]:
print(classification_report(y_test,y_pred))
_dt.best_params_

              precision    recall  f1-score   support

           0       0.90      0.93      0.91        46
           1       0.75      0.64      0.69        14

    accuracy                           0.87        60
   macro avg       0.82      0.79      0.80        60
weighted avg       0.86      0.87      0.86        60



{'dt__criterion': 'gini', 'dt__max_depth': 1}

In [47]:
ros = RandomOverSampler(random_state=42)
x_resampled, y_resampled = ros.fit_resample(X,Y)

In [48]:
x_train,x_test,y_train,y_test=train_test_split(x_resampled,y_resampled,test_size=0.2,random_state=42)

In [49]:
pip = Pipeline([('scaler', StandardScaler()),('dt',DecisionTreeClassifier(random_state=42))])
params = [{'dt__criterion':['gini', 'entropy', 'log_loss'],
           'dt__max_depth':[i for i in range(1,4)],}]
_dt = GridSearchCV(pip,param_grid=params,cv=3,verbose=0)
_dt.fit(x_train,y_train)
y_pred = _dt.predict(x_test)
print(classification_report(y_test,y_pred))
_dt.best_params_

              precision    recall  f1-score   support

           0       0.82      0.76      0.78        41
           1       0.77      0.83      0.80        41

    accuracy                           0.79        82
   macro avg       0.79      0.79      0.79        82
weighted avg       0.79      0.79      0.79        82



{'dt__criterion': 'gini', 'dt__max_depth': 3}

In [50]:
params = [{'rf__n_estimators':[i for i in range(50,101)],
           'rf__criterion':['gini', 'entropy', 'log_loss'],
           'rf__max_depth':[1,2,3,4],
           'rf__oob_score':['True','False']
           }]
pipe = Pipeline([('scaler', StandardScaler()),('rf',RandomForestClassifier(random_state=42))])
_rf = GridSearchCV(pipe,param_grid=params,cv=3,verbose=0)
_rf.fit(x_train,y_train)
y_pred = _rf.predict(x_test)
print(classification_report(y_test,y_pred))
_rf.best_params_

              precision    recall  f1-score   support

           0       0.87      0.80      0.84        41
           1       0.82      0.88      0.85        41

    accuracy                           0.84        82
   macro avg       0.84      0.84      0.84        82
weighted avg       0.84      0.84      0.84        82



{'rf__criterion': 'gini', 'rf__max_depth': 4, 'rf__n_estimators': 83}