# Import Libraray

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

df = pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')
df.head()

# Data Analysis

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['DEATH_EVENT'].unique()

In [None]:
df.isnull().sum()

In [None]:
yes = df[df['DEATH_EVENT'] == 1]['DEATH_EVENT']
no = df[df['DEATH_EVENT'] == 0]['DEATH_EVENT']

print(len(no))
print(len(yes))

# Data Visualization

In [None]:
plt.figure(figsize=(16,10))
sns.heatmap(df.corr(method='pearson'), annot=True)

In [None]:
sns.distplot(x=df['age'])

In [None]:
sns.kdeplot(
   data=df, x="creatinine_phosphokinase", hue="DEATH_EVENT",
   fill=True, common_norm=False, palette="crest",
   alpha=.5, linewidth=0,
)

In [None]:
sns.kdeplot(
   data=df, x="platelets", hue="DEATH_EVENT",
   fill=True, common_norm=False, palette="crest",
   alpha=.5, linewidth=0,
)

In [None]:
sns.kdeplot(
   data=df, x="serum_creatinine", hue="DEATH_EVENT",
   fill=True, common_norm=False, palette="crest",
   alpha=.5, linewidth=0,
)

In [None]:
sns.kdeplot(
   data=df, x="serum_sodium", hue="DEATH_EVENT",
   fill=True, common_norm=False, palette="crest",
   alpha=.5, linewidth=0,
)

In [None]:
sns.kdeplot(
   data=df, x="time", hue="DEATH_EVENT",
   fill=True, common_norm=False, palette="crest",
   alpha=.5, linewidth=0,
)

In [None]:
age_yes = df[df['DEATH_EVENT'] == 1].age
age = df.age

plt.figure(figsize=(8,6))
plt.xlabel('Age')
plt.ylabel('Death Event')
plt.hist([age_yes, age], label=['Death Event', 'Total per age'])
plt.legend()

In [None]:
plat = df[df['DEATH_EVENT'] == 1].platelets

plt.figure(figsize=(8,6))
plt.xlabel('Platelets')
plt.ylabel('Death Event')
plt.hist(plat, label=['Death Event'])
plt.legend()

In [None]:
sns.countplot(df['anaemia'])

In [None]:
sns.countplot(df['diabetes'])

In [None]:
from scipy.stats import skew
print(skew(df['age']))
print(skew(df['serum_sodium']))
print(skew(df['serum_creatinine']))
print(skew(df['platelets']))
print(skew(df['time'])) 
print(skew(df['creatinine_phosphokinase'])) 
print(skew(df['ejection_fraction'])) 

In [None]:
sns.catplot(x='diabetes', y="age", hue = 'DEATH_EVENT', kind="box", data=df)

In [None]:
sns.catplot(x='high_blood_pressure', y="serum_sodium",hue = 'DEATH_EVENT', kind="box", data=df)

In [None]:
sns.lineplot(data= df, x='creatinine_phosphokinase',y='platelets', color='goldenrod')

In [None]:
sns.lineplot(data= df, x='creatinine_phosphokinase',y='serum_creatinine', color='goldenrod')

# Data Preprocessiong

In [None]:
c = ['age', 'creatinine_phosphokinase', 'ejection_fraction', 'platelets', 'serum_creatinine', 'serum_sodium', 'time']
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df[c] = scaler.fit_transform(df[c]) 

In [None]:
df.head()

In [None]:
X = df.drop('DEATH_EVENT', axis='columns')
y = df['DEATH_EVENT']

from imblearn.over_sampling import SMOTE
smote = SMOTE(sampling_strategy='minority')
X_sm, y_sm = smote.fit_resample(X,y)

from sklearn.model_selection import train_test_split
X_tain, X_test, y_train, y_test = train_test_split(X_sm,y_sm)

# Model Buildin Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_tain, y_train)

In [None]:
lr_pred = lr.predict(X_test)
lr_pred

In [None]:
lr.score(X_test, y_test)

In [None]:
from sklearn.metrics import classification_report as cr
print(cr(y_test, lr_pred))

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_tain, y_train)

In [None]:
rfc_predict = rfc.predict(X_test)
rfc_predict

In [None]:
rfc.score(X_test, y_test)

# XGBClassifier build

In [None]:
import xgboost as xgb
model = xgb.XGBClassifier()
model.fit(X_tain,y_train)
y_pred1 = model.predict(X_test)

In [None]:
model.score(X_test, y_test)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
import lightgbm
from sklearn.tree import DecisionTreeClassifier

# Decision Tree Classifier Build

In [None]:
dt_clf = DecisionTreeClassifier(max_leaf_nodes=3, random_state=0, criterion='entropy')
dt_clf.fit(X_tain, y_train)
dt_pred = dt_clf.predict(X_test)


In [None]:
dt_clf.score(X_test, y_test)

In [None]:
from sklearn.metrics import precision_score
lgb_clf = lightgbm.LGBMClassifier(max_depth=2, random_state=4)
lgb_clf.fit(X_tain,y_train)


In [None]:
lgb_clf.score(X_test, y_test)

# Artificial Neural Network Build

In [None]:
import tensorflow as tf
from tensorflow import keras

ann = keras.Sequential([keras.layers.Dense(12, input_shape=(12,), activation='relu'),
                        keras.layers.Dense(12, activation='relu'),
                        keras.layers.Dense(1, activation='sigmoid')])
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
ann.fit(X_tain, y_train, epochs=100)

In [None]:
pred = ann.predict(X_test)

ann_pred = []
for i in pred:
  if i > 0.5:
    ann_pred.append(1)
  else:
    ann_pred.append(0)

In [None]:
print(cr(y_test, ann_pred))