# Comparing Four Machine Learning Models and an Ensemble for Predicting Heart Attacks from Routine Clinical Features

## Variable Breakdown:

*FILL IN*

## Importing Necessary Libraries

In [37]:
## Import is taken from Assignment 3, as it uses XGBoost, DTs, and NN. Should have all the imports we need.
#!pip install xgboost

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import (
    train_test_split,
    GridSearchCV,
    StratifiedKFold,
    cross_val_score,
)
from sklearn.metrics import (
accuracy_score,
roc_auc_score,
precision_score,
recall_score,
f1_score,      
average_precision_score,
confusion_matrix,
classification_report
)
from sklearn.decomposition import PCA

from xgboost import XGBClassifier

import torch
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
import torch.nn.utils as utils


seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

## Load Dataset

In [38]:
df = pd.read_csv('Medicaldataset.csv')

df.head()

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result
0,64,1,66,160,83,160.0,1.8,0.012,negative
1,21,1,94,98,46,296.0,6.75,1.06,positive
2,55,1,64,160,77,270.0,1.99,0.003,negative
3,64,1,70,120,55,270.0,13.87,0.122,positive
4,55,1,64,112,65,300.0,1.08,0.003,negative


In [39]:
df.tail()

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result
1314,44,1,94,122,67,204.0,1.63,0.006,negative
1315,66,1,84,125,55,149.0,1.33,0.172,positive
1316,45,1,85,168,104,96.0,1.24,4.25,positive
1317,54,1,58,117,68,443.0,5.8,0.359,positive
1318,51,1,94,157,79,134.0,50.89,1.77,positive


## Dataset Exploration

### Checking for Null Values

In [40]:
df.isnull().sum()

Age                         0
Gender                      0
Heart rate                  0
Systolic blood pressure     0
Diastolic blood pressure    0
Blood sugar                 0
CK-MB                       0
Troponin                    0
Result                      0
dtype: int64

### Checking Target Class Imbalances

In [41]:
res_groups = df.groupby('Result')

print(f'Group Totals\nNegative: {res_groups.size().iloc[0]}\nPositive: {res_groups.size().iloc[1]}\n')
print(f'Group Percents\nNegative: {res_groups.size().iloc[0] / df["Result"].shape[0] * 100 : 0.3f}\nPositive: {res_groups.size().iloc[1] / df["Result"].shape[0] * 100 : 0.3f}')

Group Totals
Negative: 509
Positive: 810

Group Percents
Negative:  38.590
Positive:  61.410


### Analyzing Variable Means

In [42]:
print(f'Mean Age: {df["Age"].mean() : 0.3f}')
print(f'Mean Heart Rate: {df["Heart rate"].mean() : 0.3f}')
print(f'Mean Systolic Blood Pressure: {df["Systolic blood pressure"].mean() : 0.3f}')
print(f'Mean Diastolic Blood Pressure: {df["Diastolic blood pressure"].mean() : 0.3f}')
print(f'Mean Blood Sugar Levels: {df["Blood sugar"].mean() : 0.3f}')
print(f'Mean CK-MB: {df["CK-MB"].mean() : 0.3f}')
print(f'Mean Troponin: {df["Troponin"].mean() : 0.3f}')

Mean Age:  56.192
Mean Heart Rate:  78.337
Mean Systolic Blood Pressure:  127.171
Mean Diastolic Blood Pressure:  72.269
Mean Blood Sugar Levels:  146.634
Mean CK-MB:  15.274
Mean Troponin:  0.361


### Checking Gender Variable Imbalances

### Checking Correlation Between Variables

### Data Preprocessing 
Check List: 
Neural Network (WIP)
XGBoost (WIP)
RandomForst (WIP)

## Neural Network (NN)

In [43]:
class HeartAttackNN(nn.Module):
    def __init__(self, input_size=8, hidden1=32, hidden2=16, hidden3=8, output_size=1):
        super(HeartAttackNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden1)
        self.fc2 = nn.Linear(hidden1, hidden2)
        self.fc3 = nn.Linear(hidden2, hidden3)
        self.fc3 = nn.Linear(hidden3, output_size)

        def forward(self, x):
            out = torch.relu(self.fc1(x)) #ReLU to add non-linearity while dealing with vanishing gradients
            out = torch.relu(self.fc2(x))
            out = torch.sigmoid(self.fc3(x)) # For binary classification
            return out

model_heart = HeartAttackNN().to(device)
print(model_heart)
    
        

HeartAttackNN(
  (fc1): Linear(in_features=8, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=16, bias=True)
  (fc3): Linear(in_features=8, out_features=1, bias=True)
)


### Hyperparameters for NN

In [None]:
input_size = 8
hidden1 = 32
hidden2 = 16
hidden3 = 8
output_size = 1
learning_rate = 0.01
num_epochs = 1000

#Model initialization
model = HeartAttackNN(input_size, hidden1, hidden2, hidden3, output_size)


### Loss Function and Optimizer

In [45]:
criterion = nn.BCELoss()  # Binary Cross Entropy
criterion

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    weight_decay: 0
)

# Random Forest Classifier

### Hyperparameter Options for Grid Search

In [None]:
hyparam_grid = {
    'n_estimators': [300, 600],
    'criterion': ['gini', 'entropy'],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 0.5]
}