In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

In [None]:
df = pd.read_csv('/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')
df.head()

In [None]:
print('Data has {} rows and {} columns.'.format(df.shape[0], df.shape[1]))

In [None]:
plt.figure(figsize=(12,10))
sns.heatmap(df.corr(), linewidths=0.2, linecolor='white', cmap='coolwarm', annot=True)

Most highly related feature is time (Negatively/inversly related).

Age and serum creatinine are postively realted. Clearly, with age heart failure chances increases.

In [None]:
sns.countplot(x=df['DEATH_EVENT'])

In [None]:
plt.figure(figsize=(14,8))
sns.countplot(x=df['age'], hue=df['DEATH_EVENT'])
plt.title('Age')
plt.xticks(rotation=45)

In [None]:
plt.figure(figsize=(6,8))
sns.boxplot(y=df['creatinine_phosphokinase'], x= df['DEATH_EVENT'])

In [None]:
plt.figure(figsize=(6,8))
sns.violinplot(y=df['ejection_fraction'], x=df['DEATH_EVENT'])

In [None]:
plt.figure(figsize=(6,8))
sns.boxenplot(y=df['platelets'], x=df['DEATH_EVENT'])
plt.title('Platelets Effects')

In [None]:
plt.figure(figsize=(6,8))
sns.swarmplot(y=df['time'], x=df['DEATH_EVENT'])
plt.title('Time Effect')

### Training Models
#### Random Forest

In [None]:
X = df.drop('DEATH_EVENT', axis=1)
y = df['DEATH_EVENT']

X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=101)

rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)

# Prediction
pred_rfc = rfc.predict(X_test)
print('Random Forest Results :-','\n','-'*80,'\n',classification_report(y_test, pred_rfc))


#### SVM (Support Vector Machines)

In [None]:
X = df.drop('DEATH_EVENT', axis=1)
y = df['DEATH_EVENT']

X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=101)

#Fitting to training set.
svm = SVC()
svm.fit(X_train, y_train)

# Prediction:
pred_svm = svm.predict(X_test)
print('Support Vector Machines Result :-','\n','-'*80,'\n',classification_report(y_test,pred_svm))

In [None]:
X = df.drop('DEATH_EVENT', axis=1)
y = df['DEATH_EVENT']

X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=101)

# Fitting to training set.
lr = LogisticRegression(max_iter=10000)
lr.fit(X_train, y_train)

# Prediction:
pred_LR = lr.predict(X_test)
print('Logistic Regression Results:','\n','-'*80,'\n', classification_report(y_test,pred_LR))

#### MultinomialNB

In [None]:
X = df.drop('DEATH_EVENT', axis=1)
y = df['DEATH_EVENT']

X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=101)

#Fitting to training set:
mn = MultinomialNB()
mn.fit(X_train,y_train)

pred_mn = mn.predict(X_test)
print('Naive bayes - MultnomialNB Results: ','\n','-'*80,'\n',classification_report(y_test,pred_mn))

In [None]:
print('Accuracy Scores :','\n','-'*80)
print('Random Forest', accuracy_score(y_test, pred_rfc))
print('Logistic Regression', accuracy_score(y_test, pred_LR))
print('Support Vector Machine', accuracy_score(y_test, pred_svm))
print('MultinomialNB', accuracy_score(y_test, pred_mn))

#### Clearly Random Forest is a winner here with 0.96 accuracy.

#### Followed by Logistic Regression with 0.87.