In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt, seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score, RandomizedSearchCV
from sklearn.metrics import confusion_matrix, precision_recall_curve,\
roc_auc_score, roc_curve, accuracy_score, recall_score, precision_score

from imblearn.over_sampling import SMOTE

In [None]:
df = pd.read_csv('/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')
df.head()

In [None]:
df.info()

In [None]:
df.DEATH_EVENT.value_counts()

In [None]:
smote = SMOTE(random_state=50)
X_smote,y_smote = smote.fit_resample(df.iloc[:,:-1], df.DEATH_EVENT)
df = pd.concat([X_smote,y_smote], axis=1)
df.head()

In [None]:
df.shape

In [None]:
df.DEATH_EVENT.value_counts()

In [None]:
df.iloc[:,:-1].plot(kind='box', subplots=True, layout=(3,4), figsize=(15,15))
plt.show()

In [None]:
for i in df.iloc[:,:-1]:
    Q1 = df[i].quantile(0.25)
    Q3 = df[i].quantile(0.75)
    IQR = Q3-Q1
    upper_fence = Q3+(1.5*IQR)
    lower_fence = Q1-(1.5*IQR)
    
    df = df[(df[i]>=lower_fence) & (df[i]<=upper_fence)]

In [None]:
df.iloc[:,:-1].plot(kind='box', subplots=True, layout=(3,4), figsize=(15,15))
plt.show()

In [None]:
df.info()

In [None]:
corr = df.corr()
mask = np.triu(corr, 1)
plt.figure(figsize=(20,8))
sns.heatmap(corr, mask=mask, annot=True, cmap='Greens')
plt.show()

In [None]:
train, test = train_test_split(df, train_size=0.7, random_state=100)

In [None]:
scaler = StandardScaler()

cols = df.columns[:-1]
train[cols] = scaler.fit_transform(train[cols])
test[cols] = scaler.transform(test[cols])

In [None]:
train.describe()

In [None]:
test.describe()

In [None]:
X_train = train.drop('DEATH_EVENT', axis=1)
y_train = train.DEATH_EVENT

X_test = test.drop('DEATH_EVENT', axis=1)
y_test = test.DEATH_EVENT

In [None]:
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=60)

lr = LogisticRegression()
cv_score = cross_val_score(estimator=lr, X=X_train, y=y_train, cv=folds, scoring='accuracy')
cv_score.mean()

In [None]:
gb = GradientBoostingClassifier(random_state=20)
cv_score = cross_val_score(estimator=gb, X=X_train, y=y_train, cv=folds, scoring='accuracy')
cv_score.mean()

In [None]:
hyp_Params = {'learning_rate':[0.05,0.08,0.1,0.15,0.2,0.3,0.4],
             'n_estimators':[85,100,200,300,400],
             'max_depth':[2,3,4,5,7,9,11,13,15,18,22],
             'min_samples_split':[2,4,5,8,10,15,20,30,35,40,45,50],
             'min_samples_leaf':[1,2,4,5,8,10,15,20]}

rnd = RandomizedSearchCV(estimator=gb, param_distributions=hyp_Params, n_iter=300, n_jobs=-1, cv=folds,
                        scoring='accuracy', verbose=3)
rnd.fit(X_train,y_train)

rnd.best_score_,rnd.best_params_

In [None]:
model = rnd.best_estimator_
model.fit(X_train,y_train)

y_pred = model.predict(X_test)
test_probs = [i[1] for i in model.predict_proba(X_test)]

In [None]:
fpr, tpr, thresh = roc_curve(y_test, test_probs)
score = round(roc_auc_score(y_test, test_probs),2)
plt.figure(figsize=(20,8))
plt.plot(fpr,tpr, linewidth=4)
plt.xlabel('FPR', fontsize=20)
plt.ylabel('TPR', fontsize=20)
plt.legend(['AUC Score = {0}'.format(score)], loc='lower right', fontsize=20)
plt.show()

In [None]:
conf = confusion_matrix(y_test, y_pred)
sns.heatmap(conf, annot=True)
plt.show()

accuracy = round(accuracy_score(y_test,y_pred),2)
sensitivity = round(recall_score(y_test,y_pred),2)
precision = round(precision_score(y_test,y_pred),2)
print('Accuracy = {0}%'.format(accuracy*100))
print('Sensitivity = {0}%'.format(sensitivity*100))
print('Precision = {0}%'.format(precision*100))