In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn import linear_model
from sklearn.model_selection import train_test_split


In [None]:
df = pd.read_csv("../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['DEATH_EVENT'].value_counts()

In [None]:
sns.countplot(df['DEATH_EVENT'])
plt.title('Death Counts')

In [None]:
df['anaemia'].value_counts()

In [None]:
sns.countplot(df['anaemia'])
plt.title("Anaemia Count")

In [None]:
df['diabetes'].value_counts()

In [None]:
sns.countplot(df['diabetes'])
plt.title('Diabetes Count')

In [None]:
df['high_blood_pressure'].value_counts()

In [None]:
sns.countplot(df['high_blood_pressure'])
plt.title('High Blood Pressure Count')

In [None]:
fig, ax =plt.subplots(ncols=5, figsize = (15,3))
sns.countplot(df['anaemia'], ax=ax[0])
sns.countplot(df['diabetes'], ax=ax[1])
sns.countplot(df['high_blood_pressure'], ax=ax[2])
sns.countplot(df['sex'], ax=ax[3])
sns.countplot(df['smoking'], ax=ax[4])
fig.show()

In [None]:
df['age'].hist(bins = 15, color = 'c')
plt.title("Age Histogram")

In [None]:
df['creatinine_phosphokinase'].hist(bins = 30, color = 'c')
plt.title("Creatinine Phosphokinase Histogram")

In [None]:
df['ejection_fraction'].hist(bins = 13, color = 'c')
plt.title("Ejection Fraction Histogram")

In [None]:
df['platelets'].hist(bins = 10, color = 'c')
plt.title("Platelets Histogram")

In [None]:
df['serum_creatinine'].hist(bins = 30, color = 'c')
plt.title("Serum Creatinine Histogram")

In [None]:
df['serum_sodium'].hist(bins = 30, color = 'c')
plt.title("Serum Sodium Histogram")

In [None]:
df['time'].hist(bins = 30, color = 'c')
plt.title("Time Histogram")

In [None]:
corr = df.corr()
corr

In [None]:
fig, ax = plt.subplots(figsize=(9,9))
sns.heatmap(corr, 
        xticklabels=corr.columns,
        yticklabels=corr.columns,
        vmin = -1.0, vmax = 1.0,
        center = 0, annot = False)

# Random Forest

In [None]:
X = df.drop(["DEATH_EVENT"], axis = 1)

In [None]:
y = df['DEATH_EVENT']

In [None]:
from sklearn.pipeline import Pipeline

In [None]:
rf = RandomForestClassifier()

In [None]:
scaler = MinMaxScaler()

In [None]:
rf_pipe = Pipeline([('Scaler', scaler), ('RandomForest', rf)])

In [None]:
rf_params = {'RandomForest__n_estimators': [100, 200, 600],
             'RandomForest__max_features': [3,5,6]
             }

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 9)

In [None]:
rf_grid = GridSearchCV(rf_pipe, param_grid = rf_params, cv = 8, scoring = "accuracy")
rf_grid.fit(X_train, y_train)

In [None]:
rf_grid.cv_results_
rf_grid_results = pd.DataFrame(rf_grid.cv_results_)

In [None]:
rf_grid_results

In [None]:
rf_grid.best_score_

In [None]:
rf_grid.best_params_

In [None]:
rf_pred = rf_grid.predict(X_test)

In [None]:
rf_acc = accuracy_score(y_test, rf_pred)
rf_acc

In [None]:
rf_conf = confusion_matrix(y_test, rf_pred)
rf_conf

In [None]:
rf_conf = pd.DataFrame(data = rf_conf, columns = [['Pred:No Failure', 'Pred: Failure']], index = [['Act: No Failure', 'Act: Failure']])

In [None]:
sns.heatmap(rf_conf, annot = True)

In [None]:
rf_fn= rf_conf.iloc[1,0]
rf_fn

In [None]:
rf_tp = rf_conf.iloc[1,1]
rf_tp

In [None]:
rf_prec = rf_tp/(rf_fn + rf_tp)
rf_prec

# Support Vector Machine

In [None]:
svc = svm.SVC()

In [None]:
svc_pipe = Pipeline([('Scaler', scaler), ('svc', svc )])

In [None]:
gamma_range = range(1,10)

In [None]:
c_range = range(1,10)

In [None]:
svc_params = dict(svc__gamma = gamma_range, svc__C = c_range)

In [None]:
svc_grid = GridSearchCV(svc_pipe, param_grid = svc_params, cv = 8, scoring = "accuracy", n_jobs=-1)

In [None]:
svc_grid.fit(X_train, y_train)

In [None]:
svc_results = pd.DataFrame(svc_grid.cv_results_)

In [None]:
svc_results

In [None]:
svc_grid.best_score_

In [None]:
svc_grid.best_params_

In [None]:
svc_pred = svc_grid.predict(X_test)

In [None]:
svc_acc = accuracy_score(y_test, svc_pred)
svc_acc

In [None]:
svc_conf = confusion_matrix(y_test, svc_pred)
svc_conf

In [None]:
svc_conf = pd.DataFrame(data = svc_conf, columns = [['Pred:No Failure', 'Pred: Failure']], index = [['Act: No Failure', 'Act: Failure']])

In [None]:
sns.heatmap(svc_conf, annot = True)

In [None]:
svc_fn = svc_conf.iloc[1,0]
svc_fn

In [None]:
svc_tp = svc_conf.iloc[1,1]
svc_tp

In [None]:
svc_prec = svc_tp/(svc_tp + svc_fn)
svc_prec

# Artificial Neural Network

In [None]:
mlp = MLPClassifier()

In [None]:
mlp_pipe = Pipeline([('scaler', scaler), ('mlp', mlp)])

In [None]:
mlp_params = {'mlp__solver': ['sgd'],
              'mlp__hidden_layer_sizes': [(10,10,10),(15,15,15),(20,20,20),(30,30,30),(40,40,40)],
              'mlp__max_iter': [2000,6000,8000,10000]
             }

In [None]:
mlp_grid = GridSearchCV(mlp_pipe, param_grid = mlp_params, cv=8, scoring = "accuracy", n_jobs=-1)

In [None]:
mlp_grid.fit(X_train, y_train)

In [None]:
mlp_results = pd.DataFrame(mlp_grid.cv_results_)

In [None]:
mlp_results

In [None]:
mlp_grid.best_score_

In [None]:
mlp_grid.best_params_

In [None]:
mlp_pred = mlp_grid.predict(X_test)

In [None]:
mlp_acc = accuracy_score(y_test, mlp_pred)
mlp_acc

In [None]:
mlp_conf = confusion_matrix(y_test, mlp_pred)
mlp_conf

In [None]:
mlp_conf = pd.DataFrame(data = mlp_conf, columns = [['Pred:No Failure', 'Pred: Failure']], index = [['Act: No Failure', 'Act: Failure']])

In [None]:
sns.heatmap(mlp_conf, annot = True)

In [None]:
mlp_fn = mlp_conf.iloc[1,0]
mlp_fn

In [None]:
mlp_tp = mlp_conf.iloc[1,1]
mlp_tp

In [None]:
mlp_prec = mlp_tp/(mlp_tp + mlp_fn)
mlp_prec

In [None]:
results = pd.DataFrame({'Model': ['Random Forest',
                                  'Support Vector Machine',
                                  'Artifical Neural Network'],
                        'Accuracy': [rf_acc,
                                     svc_acc,
                                     mlp_acc],
                        'Precision': [rf_prec,
                                    svc_prec,
                                    mlp_prec]
                       })

In [None]:
results = results.set_index("Model")

In [None]:
results = results.sort_values(by="Precision", ascending = False)

In [None]:
ax = results.plot(kind = 'bar', title = "Model Results", figsize = (7,5), legend = True, fontsize = 12)