# Heart Attack Analysis - Basic Dense Neural Network

Goal: Establish a baseline for future improvement of heart attack prediction

## Load libraries

In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np

## Load dataset

In [None]:
dataset_path = '../input/heart-attack-analysis-prediction-dataset/heart.csv'

In [None]:
# Read csv file and load it with pandas

def load_csv_data(path):
    return pd.read_csv(path)

dataset = load_csv_data(dataset_path)

print(type(dataset))

In [None]:
dataset.head()

## Dataset structure

In [None]:
# Basic dataset info
dataset.info()

In [None]:
# Dataset description (count, min, max, standard deviation, percentiles)
dataset.describe()

In [None]:
import matplotlib.pyplot as plt
dataset.hist(bins=10, figsize=(20,15))
plt.show()

## Create labels

In [None]:
# Assign the dataset "output" column to the labels variable
labels = dataset['output']

In [None]:
labels.shape

In [None]:
# Drop the dataset "output" column as it will no longer be needed
dataset = dataset.drop(columns=['output'])

## Split dataset to train and test groups

In [None]:
# Set test split ratio
test_ratio = 0.2

# Use sklearn "train_test_split" function to create train and test groups
# This function shuffles dataset and splits it by the chosen test ratio. It also has a random generator state seed parameter.
# It is also able to handle multiple datasets with an identical number of rows, and it will split them on the same indices
# (this is very useful, for example, if you have a separate DataFrame for labels):
train_set, test_set, train_label_set, test_label_set = train_test_split(dataset, labels, test_size=test_ratio, random_state=42)

In [None]:
train_set.info()

In [None]:
test_set.info()

In [None]:
train_label_set.shape

In [None]:
test_label_set.shape

## Convert inputs to numpy

In [None]:
# Inputs to NN have to be coverted to numpy arrays
train_set = np.array(train_set)
test_set = np.array(test_set)
train_label_set = np.array(train_label_set)
test_label_set = np.array(test_label_set)

## Establish baseline accuracy with simple dense NN model

In [None]:
train_set.shape

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(13)),
    tf.keras.layers.Dense(500, activation='relu'),
    tf.keras.layers.Dense(500, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='linear')
])

model.compile(loss='MeanAbsoluteError', metrics=['accuracy'], optimizer='Adam')

model.summary()

## Train model

In [None]:
print(train_set.shape)

In [None]:
history = model.fit(train_set, train_label_set, epochs=50, validation_data=(test_set, test_label_set))

## Plot loss and accuracy

In [None]:
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.plot(history.history['accuracy'], label='accuracy')
plt.legend()
plt.show()