In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Import Data

In [None]:
data = pd.read_csv('/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')

# Simple Data Exploration

In [None]:
data.info()

In [None]:
data.head()

In [None]:
data.describe().T

In [None]:
data.isna().sum()

# Preprocessing

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate, KFold, train_test_split
from sklearn.metrics import make_scorer, accuracy_score, f1_score, roc_auc_score

In [None]:
y = data.iloc[:,-1]
X = data.iloc[:,:-1]

In [None]:
mms = MinMaxScaler()
X = mms.fit_transform(X)

# Define Custom Metrics

In [None]:
def oob_(estimator, X, y):
    return 1 - estimator.oob_score_
def accuracy(y_true, y_pred):
    return accuracy_score(y_true, y_pred)
def f1(y_true, y_pred):
    return f1_score(y_true, y_pred)
def auc(y_true, y_pred):
    return roc_auc_score(y_true, y_pred)
scoring = {
    'oob': oob_,
    'accuracy': make_scorer(accuracy),
    'f1 score': make_scorer(f1),
    'roc auc': make_scorer(auc)
}

# Model

In [None]:
Xt, Xv, yt, yv = train_test_split(X, y, test_size=0.3)

In [None]:
model = RandomForestClassifier(oob_score=True)
model.fit(Xt, yt)
yp = model.predict(Xv)
print('Accuracy Score :', np.round(accuracy_score(yv, yp), 4))
print('F1 Score :', np.round(f1_score(yv, yp), 4))
print('AUC Score :', np.round(roc_auc_score(yv, model.predict_proba(Xv)[:,1]), 4))
print('OOB Error :', np.round(1 - model.oob_score_, 4))

# Cross Validation

In [None]:
fold = KFold(n_splits=3)
cv = cross_validate(RandomForestClassifier(oob_score=True), X, y, scoring=scoring, cv=fold)

# Result

In [None]:
cv_result = pd.DataFrame({
    'Model': 'RandomForestClassifier',
    'OOB Error': np.round(cv['test_oob'].mean(), 4),
    'Accuracy': np.round(cv['test_accuracy'].mean(), 4),
    'F1 Score': np.round(cv['test_f1 score'].mean(), 4),
    'ROC AUC': np.round(cv['test_roc auc'].mean(), 4)
}, index=[0])

In [None]:
cv_result