In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Set-Up

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics

import matplotlib.pyplot as plt 
plt.rc("font", size=14)

import seaborn as sns
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)

In [None]:
#import pydotplus
from sklearn.tree import DecisionTreeClassifier
from IPython.display import Image
from sklearn import tree

In [None]:
# Load dataset
records = pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')

# View first two rows
records.head(6)

Dependent variable, `DEATH_EVENT`, is binary. 

## Data Wrangling

In [None]:
# Rename DEATH_EVENT
records = records.rename(columns={'DEATH_EVENT': 'death_event'})

In [None]:
# Conditional probabilities of death given sex
pd.crosstab(
    records.sex,
    records.death_event,
    margins=True,
    normalize='columns'
)

In [None]:
# Visualize 
sns.countplot(x='death_event', data=records, palette='Set1')
plt.show()

In [None]:
%matplotlib inline
pd.crosstab(records.sex, records.death_event).plot(kind='bar')
plt.title('Number of deaths by sex')
plt.xlabel('Sex')
plt.ylabel('Death event')

In [None]:
records.age.hist()
plt.title('Histogram of age')
plt.xlabel('Age')
plt.ylabel('Frequency')

In [None]:
# Move death_event to first col
col_name = 'death_event'
first_col = records.pop(col_name)
records.insert(0, col_name, first_col)
records.head() # check

In [None]:
# separate into input and output columns
features, target = records.drop('death_event', 1), records['death_event']

In [None]:
# Normalize columns
cols_to_norm = ['creatinine_phosphokinase', 'ejection_fraction', 'platelets', 'serum_creatinine', 'serum_sodium']

features[cols_to_norm] = StandardScaler().fit_transform(features[cols_to_norm])

features.head()

## Model

### Logistic Regression

In [None]:
# train test split
x_train, x_test, y_train, y_test = train_test_split(features, target, random_state = 4)

In [None]:
# Create logistic reg object
log_reg = LogisticRegression(solver='liblinear')

# Train model
log_reg.fit(x_train, y_train)

# Predict from test set
y_pred = log_reg.predict(x_test)

In [None]:
# Accuracy of model
accuracy = metrics.accuracy_score(y_test, y_pred)
accuracy_perc = round(100*accuracy, 2)

print(f'The accuracy of the model is {accuracy_perc} %')

### Decision Tree

In [None]:
# Create decision tree classifier object
d_tree = DecisionTreeClassifier(random_state=0)

In [None]:
# train model
d_tree_mod = d_tree.fit(x_train, y_train)

In [None]:
y_pred = d_tree_mod.predict(x_test)

In [None]:
# Accuracy of model
accuracy = metrics.accuracy_score(y_test, y_pred)
accuracy_perc = round(100*accuracy, 2)

print(f'The accuracy of the model is {accuracy_perc} %')