Worked on by Andy McRae

# Training Model on Larger Dataset

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

## Loading Cleaned Data

In [None]:
filename = '../../data/large_heart_disease_dataset.csv'
df = pd.read_csv(filename)
df.head()

age: age in years

sex: sex (1 = male; 0 = female)

cp: chest pain type (Value 0: typical angina; Value 1: atypical angina; Value 2: non-anginal pain; Value 3: asymptomatic)

trestbps: resting blood pressure in mm Hg on admission to the hospital

chol: serum cholestoral in mg/dl

fbs: fasting blood sugar > 120 mg/dl (1 = true; 0 = false)

restecg: resting electrocardiographic results (Value 0: normal; Value 1: having ST-T wave abnormality; Value 2: probable or definite left ventricular hypertrophy)

thalach: maximum heart rate achieved

exang: exercise induced angina (1 = yes; 0 = no)

oldpeak: ST depression induced by exercise relative to rest

slope: the slope of the peak exercise ST segment (Value 0: upsloping; Value 1: flat; Value 2: downsloping)

ca: number of major vessels (0-3) colored by flourosopy

thal: thalassemia (3 = normal; 6 = fixed defect; 7 = reversable defect)

target: heart disease (0 = no, 1 = yes)

## Select Features

In [None]:
# selecting features for model
X = df.drop(columns=['target'])

# selecting Output
y = df['target']

## Train-Test-Split

In [None]:
# Performing a train-test-split on the data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=21)

## Scaling and Categorizing

In [None]:
# importing scaling and categorizing materials
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [None]:
# scaling X's
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# categorizing code
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [None]:
y_train_categorical

## Create Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
# creating a model with one layer and 100 nodes
x_len = len(X.columns)
y_len = len(y_train_categorical[0])

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=x_len))
# model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=y_len, activation='softmax'))

In [None]:
# fitting model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=1
)

## Quantify Model

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")