In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
pd.pandas.set_option('display.max_columns', None)
pd.pandas.set_option('display.max_rows', None)

In [None]:
dataset = pd.read_csv('/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')

In [None]:
dataset.head(100)

In [None]:
print(dataset.isnull().sum())

In [None]:
dataset.info()

In [None]:
features = dataset.columns

In [None]:
features

In [None]:
categorical_features = [feature for feature in features if len(dataset[feature].unique())<10]

In [None]:
categorical_features = [feature for feature in categorical_features if feature != 'DEATH_EVENT']

In [None]:
print(categorical_features)

In [None]:
plt.figure(figsize=(18,12))
plt.suptitle('Categorical Features', fontsize=15)
for i in range(0, dataset[categorical_features].shape[1]):
    plt.subplot(3, 3, i+1)

    sns.countplot(dataset[categorical_features].iloc[:, i], hue=dataset['DEATH_EVENT'])

In [None]:
continuous_features = dataset.drop(categorical_features, axis=1).columns

In [None]:
continuous_features = [feature for feature in continuous_features if feature != 'DEATH_EVENT']

In [None]:
plt.figure(figsize=(18,12))
plt.suptitle('Continuous Features', fontsize=20)
for i in range(0, dataset[continuous_features].shape[1]):
    plt.subplot(3, 3, i+1)

    sns.distplot(dataset[continuous_features].iloc[:, i])

In [None]:
plt.figure(figsize=(12,8))
sns.heatmap(dataset.drop(columns=['DEATH_EVENT']).corr(), annot=True, cmap="YlGnBu")
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

sc.fit_transform(dataset[continuous_features])

In [None]:
from sklearn.model_selection import train_test_split

X = dataset.drop('DEATH_EVENT', axis=1)
y = dataset['DEATH_EVENT']

train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.20, random_state=1)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report,accuracy_score,average_precision_score,balanced_accuracy_score,precision_score

#max_dp = [4,6,8] works best on 6
dt = DecisionTreeClassifier(criterion='entropy', max_depth=6)
dt.fit(train_X, train_y)
pred = dt.predict(test_X)


In [None]:
print('\t Decision Tree Classifier Classification Report \n')
print(classification_report(test_y,pred))

In [None]:
from sklearn import tree
import graphviz

plt.figure(figsize=(50, 50))
filename ='dt.png'

featureNames = dataset.columns[:-1]
targetNames = ['death', 'alive']

dot_data = tree.export_graphviz(dt, feature_names=featureNames, class_names=targetNames,filled=True)

# Draw graph
graph = graphviz.Source(dot_data, format="png") 
graph

In [None]:
#now trying with XGB
import xgboost

#max_dp = [2,3,4,5] works best on 2
xgb_clf = xgboost.XGBRFClassifier(max_depth=2, random_state=1)
xgb_clf.fit(train_X,train_y)
xgb_pred = xgb_clf.predict(test_X)

In [None]:
print('\t XGB Classifier Classification Report \n')
print(classification_report(test_y,xgb_pred))