# Heart Attack Prediction using Deep Learning


## Importing necessary Libraries

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import tensorflow_addons as tfa
from tensorflow.keras import (layers, optimizers,losses)
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras import regularizers
import matplotlib.pyplot as plt
import sklearn
from sklearn.metrics import accuracy_score,precision_score,recall_score,roc_auc_score,classification_report,confusion_matrix
from sklearn.model_selection import train_test_split
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler,MaxAbsScaler
from tensorflow.keras import callbacks

## Data Analysis

### Reading CSV file into pandas Dataframe

In [None]:
heart=pd.read_csv('/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv')

### Overview of Dataframe

In [None]:
heart.head()

### Checkinng for any null values

In [None]:
heart.isnull().sum()

#### There is no null values

## Creating X,Y feature sets for classification

In [None]:
x = heart.drop("output",axis=1)
y = heart["output"]

## Splitting the dataset into Train and Test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x,y, train_size=0.8, random_state=42, shuffle=True)

#### Glance at the head of Train sets

In [None]:
X_train.head()

In [None]:
y_train.head()

## Data preprocessing

### Normalizing the values using Maximum Absolute Scaler

In [None]:
scaler = MaxAbsScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Building Neural Network for classification

## Model Arcitechture

   
   ### In this task we used a neural network composing of these following layers, activation functions and optmizer and loss function:

* Input Layer : Takes input  
* Dense Layer : Dense layer is the regular deeply connected neural network layer.
* BatchNormalization Layer : Batch normalization applies a transformation that maintains the mean output close to 0 and the output standard deviation close to 1.
* The Dropout layer randomly sets input units to 0 with a frequency of rate at each step during training time, which helps prevent overfitting. Inputs not set to 0 are scaled up by 1/(1 - rate) such that the sum over all inputs is unchanged
* Activation function: An activation function in a neural network defines how the weighted sum of the input is transformed into an output from a node or nodes in a layer of the network.

* Regularization :  Regularization is a technique which makes slight modifications to the learning algorithm such that the model generalizes better. This in turn improves the model's performance on the unseen data as well.



### Activation Function : 
  
 * ReLU :The rectified linear activation function or ReLU for short is a piecewise linear function that will output the input directly if it is positive, otherwise, it will output zero,
 * Sigmoid : Sigmoid activation function, sigmoid(x) = 1 / (1 + exp(-x)).
    Applies the sigmoid activation function. For small values (<-5), sigmoid returns a value close to zero, and for large values (>5) the result of the function gets close to 1.
 
### Regularization :
 * L1 regulazier : In L1 norm we shrink the parameters to zero.
 
### Adam Optimizer : 
 * Adam is a replacement optimization algorithm for stochastic gradient descent for training deep learning models. Adam combines the best properties of the AdaGrad and RMSProp algorithms to provide an optimization algorithm that can handle sparse gradients on noisy problems.
 
### SGD Optimizer : 
  * Stochastic Gradient Descent (SGD) is a simple yet very efficient approach to fitting linear classifiers and regressors under convex loss functions such as (linear) Support Vector Machines and Logistic Regression.
 
### Binary Crossentropy Loss: 
 * Cross-entropy is the default loss function to use for binary classification problems.
   It is intended for use with binary classification where the target values are in the set {0, 1}.

In [None]:
inputs = layers.Input(shape = (13,), name = 'Input_layer')
dense_1 = layers.Dense(16, kernel_regularizer = regularizers.l1(1e-2),activation = 'relu', name = 'Dense1')(inputs)
batch_norm1 = layers.BatchNormalization( name = 'Batch_norm1')(dense_1)
dropout_1 = layers.Dropout(0.2 , name = 'Dropout1')(batch_norm1)

dense_2 = layers.Dense(32, kernel_regularizer = regularizers.l1(1e-2),activation = 'relu', name = 'Dense2')(dropout_1)
batch_norm2 = layers.BatchNormalization(name = 'Batch_norm2')(dense_2)
dropout_2 = layers.Dropout(0.2, name = 'Dropout2')(batch_norm2)

dense_3 = layers.Dense(64, kernel_regularizer = regularizers.l1(1e-2),activation = 'relu', name = 'Dense3')(dropout_2)
batch_norm3 = layers.BatchNormalization(name = 'Batch_norm3')(dense_3)
dropout_3 = layers.Dropout(0.2, name = 'Dropout3')(batch_norm3)

dense_3 = layers.Dense(128, kernel_regularizer = regularizers.l1(1e-3),activation = 'relu', name = 'Dense4')(dropout_3)
outputs = layers.Dense(1, activation = 'sigmoid', name = 'Output_layer')(dense_3)

#### Selecting optimizer, loss function, metrics and callbacks

In [None]:
optimizer = optimizers.Adam(lr = 0.01)
loss = losses.BinaryCrossentropy()
metrics = 'accuracy'
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1,patience=6, min_lr=1e-4, verbose = True)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=True,mode='auto')
callbacks = [reduce_lr]

#### Making the NN Model using Tensorflow's functional API

In [None]:
model = tf.keras.Model(inputs = inputs, outputs = outputs, name='heart_attack_prediction')

#### Compiling the model

In [None]:
model.compile(loss = loss, optimizer = optimizer, metrics = metrics)

#### Overview of the Model (including total parameters)

In [None]:
model.summary()

#### Training the model with fixed number of epochs

In [None]:
history = model.fit(X_train,y_train, shuffle = True, epochs = 300, validation_data=(X_test,y_test),verbose =2, callbacks = callbacks)

## Plotting Accuracy vs Epochs and Loss vs Epochs Curve

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(20, 6))
ax = ax.ravel()

for i, met in enumerate([ "accuracy", "loss"]):
    ax[i].plot(history.history[met])
    ax[i].plot(history.history['val_'+met])
    ax[i].set_title("Model {}".format(met))
    ax[i].set_xlabel("epochs")
    ax[i].set_ylabel(met)
    

## Model Evaluation


  ### We used metrics from Scikit learn to evaluate the performance of our model
  
  ### Performance Criteria:
  
  * Balanced Accuracy Score
  * F1 Score
  * Precision Score
  * Recall Score
 

### We plot the Confusion Metrics, AUC Curve

In [None]:
def get_metrics(train_ds, train_y):

    train_y = np.asarray(train_y)
    a = model.predict(train_ds)
    prediction = []
    for i in range(len(a)):
        if a[i] >= 0.5:
            prediction.append(1)
        else:
            prediction.append(0)
    train_y = train_y.flatten()
    
    
    bal_acc = sklearn.metrics.balanced_accuracy_score(train_y, prediction)
    f1_score = sklearn.metrics.f1_score(train_y,prediction,average='macro')
    pre = sklearn.metrics.precision_score(train_y,prediction,average='macro')
    rec = recall_score(train_y,prediction,average='macro')
    confusion = confusion_matrix(train_y, prediction)
    class_rep = classification_report(train_y,prediction)
    
    
    
    print('\n')
    print('Balanced accuracy =',bal_acc)
    print('F1 score = ',f1_score)
    print('Precision =',pre)
    print('Recall =',rec)
    print('Classification Report =\n',class_rep)
    print('Confusion Matrix =\n',confusion)
    print('\n')
    
    fig, ax = plt.subplots(figsize=(12, 12))
    
    display_con = sklearn.metrics.ConfusionMatrixDisplay(confusion)
    
    display_con.plot(ax=ax)
    ax.set_title('Confusion Matrix')
    
    
    fpr, tpr, thresholds = sklearn.metrics.roc_curve(train_y,prediction)
    roc_auc = sklearn.metrics.auc(fpr, tpr)
    display_roc = sklearn.metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc)
    fig, ax = plt.subplots(figsize=(10, 6))
    display_roc.plot(ax=ax)
    ax.set_title('AUC Curve')
    

### Evaluation on Train Dataset

In [None]:
get_metrics(X_train,y_train)

### Evaluation on Test Dataset

In [None]:
get_metrics(X_test,y_test)

## Conclusion

### **Here we tried to use Neural Network for binary classification.  Neural Networks work best when there is a lot of data. In this scenerio, the dataset was relatively small. But we acheived a quite a good model with some Hyperparameter tunings like using L1 regularization, using several optimizers(AdamW, RMSprop, SGD) with various learning rate. We used callbacks to stop overfitting our model. We also used some normalization technique like Batch normalization, Dropout to prevent overfitting as Neural Nets are very much prone to overfit when the dataset is quite small.**