In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Reading and Visualization Data 

In [None]:
df = pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.dtypes

In [None]:
import seaborn as sns 
import matplotlib.pyplot as plt 
plt.figure(figsize=(12,10))
ax = sns.heatmap(df.corr())

In [None]:
fig = plt.figure(figsize = (20,15))
ax = fig.gca()
df.hist(ax=ax)
plt.show()

# Preprocessing  the data

In [None]:
y = df['DEATH_EVENT']
X =  df.drop(['DEATH_EVENT'],axis=1)

In [None]:
X['serum_sodium'] = X[X['serum_sodium'] > 130]
X['creatinine_phosphokinase'] = X[X['creatinine_phosphokinase'] < 4000]
X['serum_creatinine'] = X[X['serum_creatinine'] < 4]
X['ejection_fraction'] = X[X['ejection_fraction'] < 60]
X['platelets'] = X[X['platelets'] < 500000]
X['age'] = X[X['age'] < 80]


In [None]:
fig = plt.figure(figsize = (20,15))
ax = fig.gca()
X.hist(ax=ax)
plt.show()

In [None]:
X.dropna(inplace=True,axis=1)

### Split the data 

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.4)

# Building model using XGBosst 

In [None]:
#Using xgboost
import xgboost as xgb
from sklearn.metrics import accuracy_score,confusion_matrix

xgb_model = xgb.XGBClassifier(objective="binary:logistic", n_estimators=10,) #random_state=10)
xgb_model.fit(X_train, y_train)

In [None]:
y_pred = xgb_model.predict(X_test)
print(confusion_matrix(y_test, y_pred))

In [None]:
accuracy_score(y_test,y_pred)

# Using NN to build classification model

In [None]:
#Using NN by keras 
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation,Dropout ,Flatten
from tensorflow.keras.layers.experimental.preprocessing import Normalization

In [None]:
normalize = Normalization()

In [None]:
model = Sequential([
    normalize,
    Flatten(input_shape=(12,)),
    Dense(32, activation=tf.nn.tanh),
      Dropout(0.5),
    Dense(16, activation=tf.nn.tanh),
     Dropout(0.5),
    Dense(1, activation=tf.nn.sigmoid),
])

model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy'])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', mode='min', patience=10,restore_best_weights=True)

In [None]:
model.fit(x=X_train, 
          y=y_train, 
          epochs=500,
          batch_size=10,
          validation_data=(X_test, y_test),
           callbacks=[early_stop]
          )

In [None]:
model.evaluate(X_test, y_test)

In [None]:
pred = model.predict(X_test)

In [None]:
y_test.iloc[9]

In [None]:
model.predict(np.array([X_test.iloc[9]]))