# **Imports**

In [None]:
import pandas as pd
from keras import layers
from keras import Sequential
from sklearn.preprocessing import MinMaxScaler
from keras import regularizers
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn import neighbors
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import GradientBoostingClassifier
import xgboost
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.ensemble import IsolationForest

In [None]:

df =  pd.read_csv(r'../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')
df = df.sample(frac=1).reset_index(drop=True)

# **Data info**

***First 5 rows***

In [None]:
print(f'First 5 rows:\n {df.head(5)}')

***Null values***

In [None]:
print(f'Null values:\n {df.isnull().sum()}')


***Number of examples***

In [None]:
print(f'Exaples:\n {len(df)}')


***More info about dataset***

In [None]:
print(f'Dataset info:\n {df.describe()}')


***Correlation matrix***

In [None]:
plt.figure(figsize=[20,10])
sns.heatmap(df.corr(), vmin=-1, cmap='coolwarm', annot=True)

In [None]:
# Getting only highly correlated features
Features = ['time', 'ejection_fraction', 'serum_creatinine', 'serum_sodium', 'age']

In [None]:
# Setting number of training samples
training_samples = 250
data = df[Features]
labels = df.iloc[:, -1]

In [None]:
# Applying scaling to data
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

In [None]:
# Splitting data to train and test
X_train = data[:training_samples]
y_train = labels[:training_samples]
X_test = data[training_samples:]
y_test = labels[training_samples:]

In [None]:
# Box plot before dealing with outliers
px.box(X_train, points='all')

In [None]:
# Fitting data to Isolation Forest for detecting and removing outliers
iso = IsolationForest(contamination=0.1)
isoPred = iso.fit_predict(X_train)
mask = isoPred != -1
X_train, y_train = X_train[mask, :], y_train[mask]

In [None]:
# Box plot after dealing with outliers
px.box(X_train, points='all')


In [None]:
# Setting simple neural network model
model = Sequential()
model.add(layers.Dense(32, kernel_regularizer=regularizers.l2(0.01), activation='relu', input_shape=(X_train.shape[1],)))
model.add(layers.Dense(16, kernel_regularizer=regularizers.l2(0.01), activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Training model
history = model.fit(X_train,
                    y_train,
                    epochs=20,
                    batch_size=4,
                    validation_split=0.2)

# **Accuracy scores for each algorithm**

In [None]:
accDict = {}
accNameList = []
accValueList = []

***Simple neural network***

In [None]:
print(f'Neural network: {model.evaluate(X_test, y_test)[1]}')
accNameList.append('NeuralNetwork')
accValueList.append(model.evaluate(X_test, y_test)[1])

***Logistic Regression***

In [None]:
lr = LogisticRegression()
lr.fit(X_train, y_train)
print(f'Logistic Regression: {lr.score(X_test, y_test)}')
accNameList.append('LogisticRegression')
accValueList.append(lr.score(X_test, y_test))

***Support Vector Machine Classifier***

In [None]:
sv = svm.SVC()
sv.fit(X_train, y_train)
print(f'Support Vector Machine Classifier: {sv.score(X_test, y_test)}')
accNameList.append('SupportVectorMachineClassifier')
accValueList.append(sv.score(X_test, y_test))

***Random Forest Classifier***

In [None]:
rfc = RandomForestClassifier(max_depth=20)
rfc.fit(X_train, y_train)
print(f'Random Forest Classifier: {rfc.score(X_test, y_test)}')
accNameList.append('RandomForestClassifier')
accValueList.append(rfc.score(X_test, y_test))

***Gaussian Naive Bayes***

In [None]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
print(f'Gaussian Naive Bayes: {gnb.score(X_test, y_test)}')
accNameList.append('GaussianNaiveBayes')
accValueList.append(gnb.score(X_test, y_test))

***SGD Classifier***

In [None]:
sgd = SGDClassifier()
sgd.fit(X_train, y_train)
print(f'SGD Classifier: {sgd.score(X_test, y_test)}')
accNameList.append('SGDClassifier')
accValueList.append(sgd.score(X_test, y_test))

***Gradient Boosting Classifier***

In [None]:
gbc = GradientBoostingClassifier()
gbc.fit(X_train, y_train)
print(f'Gradient Boosting Classifier: {gbc.score(X_test, y_test)}')
accNameList.append('GradientBoostingClassifier')
accValueList.append(gbc.score(X_test, y_test))

***XGB Classifier***

In [None]:

xgb = xgboost.sklearn.XGBClassifier()
xgb.fit(X_train, y_train)
print(f'XGB Classifier: {xgb.score(X_test, y_test)}')
accNameList.append('XGBClassifier')
accValueList.append(xgb.score(X_test, y_test))

In [None]:
accDict['Model'] = accNameList
accDict['Value'] = accValueList
accDf = pd.DataFrame(accDict)
px.bar(accDf, x='Model', y='Value')
