# Heart Failure Prediction

## About this dataset

Cardiovascular diseases (CVDs) are the number 1 cause of death globally, taking an estimated 17.9 million lives each year, which accounts for 31% of all deaths worlwide.
Heart failure is a common event caused by CVDs and this dataset contains 12 features that can be used to predict mortality by heart failure.

Most cardiovascular diseases can be prevented by addressing behavioural risk factors such as tobacco use, unhealthy diet and obesity, physical inactivity and harmful use of alcohol using population-wide strategies.

People with cardiovascular disease or who are at high cardiovascular risk (due to the presence of one or more risk factors such as hypertension, diabetes, hyperlipidaemia or already established disease) need early detection and management wherein a machine learning model can be of great help.

Reference: https://www.kaggle.com/andrewmvd/heart-failure-clinical-data

## Dataset Load

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy.stats import zscore
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from scipy import stats


data = pd.read_csv('/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')


## Data Analysis

In [None]:
data.head()

In [None]:
data.describe()

### Histograms

In [None]:
fig = make_subplots(rows=5, cols=3)

for i, col in enumerate(data.columns):
    fig.add_trace(go.Histogram(x=data[col], name=col), row=(i//3)+1, col=(i%3)+1)
    
fig.update_layout(height=1200,)
    
fig.show()

### Correlation Matrix

In [None]:
corr = data.corr()
corr.style.background_gradient(cmap='plasma').set_precision(2)

In [None]:
np.abs(corr['DEATH_EVENT']).sort_values(ascending=False)

In [None]:
len(data.columns)

In [None]:
def make_box_plots():
    fig = make_subplots(
        rows = 5,
        cols = 3,
        subplot_titles = data.columns
    )

    i = 1
    j = 1

    for col in data.columns:
        fig.append_trace(go.Box(y=data[:][col]), i, j)
        if j < 3:
            j += 1
        else:
            j = 1
            i += 1
    fig.update_layout(
        autosize=False,
        width=1200,
        height=2000,
        margin=dict(
            l=50,
            r=50,
            b=100,
            t=100,
            pad=4
        ),
        paper_bgcolor="LightSteelBlue",
    )
    fig.show()
    
make_box_plots()

### Outliers Removal

In [None]:
z_scores = np.abs(data.apply(zscore))
z_scores.head()

In [None]:
filtered_entries = (z_scores < 3).all(axis=1)
filtered_entries.head()

In [None]:
data = data[filtered_entries]
data.head()

In [None]:
make_box_plots()

### Standarization & Normalization

In [None]:
scaler = StandardScaler()
columns = data.columns
X = data.values[:, :-1]
X = scaler.fit_transform(X)
X = pd.DataFrame(X, columns=columns[:-1])
X.describe()

In [None]:
min_max_scaler = MinMaxScaler()
X = min_max_scaler.fit_transform(X)
X = pd.DataFrame(X, columns=columns[:-1])
X.describe()

In [None]:
fig = make_subplots(rows=4, cols=3)

for i, col in enumerate(X.columns):
    fig.add_trace(go.Histogram(x=X[col], name=col), row=(i//3)+1, col=(i%3)+1)
    
fig.update_layout(height=1200,)
    
fig.show()

## Train/Test dataset split

In [None]:
Y = data['DEATH_EVENT']
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=30)

## Naive Bayes Model

In [None]:
model = GaussianNB()
y_pred = model.fit(X_train, y_train).predict(X_test)

## Accuracy

#### Train model accuracy

In [None]:
model.score(X_train, y_train)

#### Test model accuracy

In [None]:
model.score(X_test, y_test)

### Confusion Matrix

In [None]:
plot_confusion_matrix(model, X_test, y_test)
plt.show()

### Dimension Reduction with LDA

In [None]:
lda_model = LinearDiscriminantAnalysis(n_components=1)
X_lda = lda_model.fit_transform(X_train, y_train)
nb_model = GaussianNB()
nb_model.fit(X_lda, y_train)

### Original data and prediction contour

In [None]:
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "xy"}, {'is_3d': True}]])
"""
fig.add_trace(go.Contour(
    x=X_lda[:,0],
    y=y_train,
    z=nb_model.predict(X_lda),
    showscale=False,
    opacity=0.40,
    colorscale='portland'
), row=1, col=1)
"""
fig.add_trace(go.Scatter(
    x=X_lda[:,0],
    y=y_train,
    text=data['DEATH_EVENT'],
    mode='markers',
    marker_symbol=nb_model.predict(X_lda),
    marker=dict(color=y_train, colorscale='portland')
), row=1, col=1)


### Gaussian Distribution of Classes

In [None]:
proba = model.predict_proba(X_train)
mean_0 = np.mean(proba[0])
mean_1 = np.mean(proba[1])
print(mean_0)
print(mean_1)

In [None]:
std_0 = np.std(proba[0])
std_1 = np.std(proba[1])
print(std_0)
print(std_1)

In [None]:
fig = make_subplots()

gaussian_0 = np.linspace(mean_0 - 3*std_0, mean_0 + 3*std_0, 100)
gaussian_1 = np.linspace(mean_1 - 3*std_1, mean_1 + 3*std_1, 100)

fig.add_trace(go.Scatter(
    x=[-1,2],
    y=proba[0],
    name=f"mu: {mean_0}, sigma: {std_0}",
    mode='lines'
))

fig.add_trace(go.Scatter(
    x=[-1,2],
    y=proba[1],
    mode='lines'
))

fig.update_layout(showlegend=False)

fig.show()