# Comparing Four Machine Learning Models and an Ensemble for Predicting Heart Attacks from Routine Clinical Features

## Variable Breakdown:

*FILL IN*

## Importing Necessary Libraries

In [78]:
## Import is taken from Assignment 3, as it uses XGBoost, DTs, and NN. Should have all the imports we need.
#!pip install xgboost

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import (
    train_test_split,
    GridSearchCV,
    StratifiedKFold,
    cross_val_score,
)
from sklearn.metrics import (
accuracy_score,
roc_auc_score,
precision_score,
recall_score,
f1_score,      
average_precision_score,
confusion_matrix,
classification_report
)
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

from xgboost import XGBClassifier

from tabpfn import TabPFNClassifier

import torch
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
import torch.nn.utils as utils


seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

## Load Dataset

In [79]:
df = pd.read_csv('Medicaldataset.csv')

df.head()

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result
0,64,1,66,160,83,160.0,1.8,0.012,negative
1,21,1,94,98,46,296.0,6.75,1.06,positive
2,55,1,64,160,77,270.0,1.99,0.003,negative
3,64,1,70,120,55,270.0,13.87,0.122,positive
4,55,1,64,112,65,300.0,1.08,0.003,negative


In [80]:
df.tail()

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result
1314,44,1,94,122,67,204.0,1.63,0.006,negative
1315,66,1,84,125,55,149.0,1.33,0.172,positive
1316,45,1,85,168,104,96.0,1.24,4.25,positive
1317,54,1,58,117,68,443.0,5.8,0.359,positive
1318,51,1,94,157,79,134.0,50.89,1.77,positive


## Dataset Exploration

### Checking for Null Values

In [81]:
df.isnull().sum()

Age                         0
Gender                      0
Heart rate                  0
Systolic blood pressure     0
Diastolic blood pressure    0
Blood sugar                 0
CK-MB                       0
Troponin                    0
Result                      0
dtype: int64

### Checking Target Class Imbalances

In [82]:
res_groups = df.groupby('Result')

print(f'Group Totals\nNegative: {res_groups.size().iloc[0]}\nPositive: {res_groups.size().iloc[1]}\n')
print(f'Group Percents\nNegative: {res_groups.size().iloc[0] / df["Result"].shape[0] * 100 : 0.3f}\nPositive: {res_groups.size().iloc[1] / df["Result"].shape[0] * 100 : 0.3f}')

Group Totals
Negative: 509
Positive: 810

Group Percents
Negative:  38.590
Positive:  61.410


### Analyzing Variable Means

In [83]:
print(f'Mean Age: {df["Age"].mean() : 0.3f}')
print(f'Mean Heart Rate: {df["Heart rate"].mean() : 0.3f}')
print(f'Mean Systolic Blood Pressure: {df["Systolic blood pressure"].mean() : 0.3f}')
print(f'Mean Diastolic Blood Pressure: {df["Diastolic blood pressure"].mean() : 0.3f}')
print(f'Mean Blood Sugar Levels: {df["Blood sugar"].mean() : 0.3f}')
print(f'Mean CK-MB: {df["CK-MB"].mean() : 0.3f}')
print(f'Mean Troponin: {df["Troponin"].mean() : 0.3f}')

Mean Age:  56.192
Mean Heart Rate:  78.337
Mean Systolic Blood Pressure:  127.171
Mean Diastolic Blood Pressure:  72.269
Mean Blood Sugar Levels:  146.634
Mean CK-MB:  15.274
Mean Troponin:  0.361


### Checking Gender Variable Imbalances

### Checking Correlation Between Variables

## Data Split and Scaling

### Assign X and y variables

In [84]:
X = df.drop(columns=['Result'])
X.head()

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin
0,64,1,66,160,83,160.0,1.8,0.012
1,21,1,94,98,46,296.0,6.75,1.06
2,55,1,64,160,77,270.0,1.99,0.003
3,64,1,70,120,55,270.0,13.87,0.122
4,55,1,64,112,65,300.0,1.08,0.003


In [85]:
y = df['Result']
y.head()

0    negative
1    positive
2    negative
3    positive
4    negative
Name: Result, dtype: object

### Encode Positive as 1 and Negative as 0

In [86]:
y = y.map({"negative": 0, "positive": 1}).astype(int)
y.head()

0    0
1    1
2    0
3    1
4    0
Name: Result, dtype: int64

### Test/Train Split

In [87]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed, stratify=y)

### Scale Data

In [88]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Prepare Data for Neural Network (Convert to PyTorch Tensors)

In [89]:
# Convert scaled data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)  # Shape: (n, 1) for BCELoss
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)    # Shape: (n, 1) for BCELoss

# Move to device (GPU if available, otherwise CPU)
X_train_tensor = X_train_tensor.to(device)
X_test_tensor = X_test_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)

print('Training samples:', X_train_tensor.shape[0])
print('Test samples:', X_test_tensor.shape[0])
print('Number of features:', X_train_tensor.shape[1])
print('\nTensor shapes:')
print('X_train_tensor:', X_train_tensor.shape)
print('X_test_tensor:', X_test_tensor.shape)
print('y_train_tensor:', y_train_tensor.shape)
print('y_test_tensor:', y_test_tensor.shape)
print('\nDevice:', device)

Training samples: 1055
Test samples: 264
Number of features: 8

Tensor shapes:
X_train_tensor: torch.Size([1055, 8])
X_test_tensor: torch.Size([264, 8])
y_train_tensor: torch.Size([1055, 1])
y_test_tensor: torch.Size([264, 1])

Device: cpu


## Neural Network (NN)

In [90]:
class HeartAttackNN(nn.Module):
    def __init__(self, input_size, hidden1=32, hidden2=16, hidden3=8, output_size=1):
        super(HeartAttackNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden1)
        self.fc2 = nn.Linear(hidden1, hidden2)
        self.fc3 = nn.Linear(hidden2, hidden3) 
        self.fc4 = nn.Linear(hidden3, output_size)

    def forward(self, x):
        out = torch.relu(self.fc1(x)) #ReLU to add non-linearity while dealing with vanishing gradients
        out = torch.relu(self.fc2(out))
        out = torch.relu(self.fc3(out)) 
        out = torch.sigmoid(self.fc4(out)) #sigmoid for binary classification
        return out

### Hyperparameters for NN

In [91]:
input_size = X_train_tensor.shape[1]
hidden1 = 32
hidden2 = 16
hidden3 = 8
output_size = 1
learning_rate = 0.01
num_epochs = 1000

#Model initialization
model = HeartAttackNN(input_size, hidden1, hidden2, hidden3, output_size).to(device)
print(model)


HeartAttackNN(
  (fc1): Linear(in_features=8, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=8, bias=True)
  (fc4): Linear(in_features=8, out_features=1, bias=True)
)


### Loss Function and Optimizer

In [92]:
criterion = nn.BCELoss()  # Binary Cross Entropy
criterion

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    weight_decay: 0
)

#### Training Loop For NN

In [93]:
def train_nn(model, optimizer, criterion, X_train, y_train, num_epochs, patience=20):
    model.train()
    best_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        
        if loss.item() < best_loss:
            best_loss = loss.item()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                break
    return model
        

#### Evaluation of NN

In [94]:
@torch.no_grad() #Disables gradient calculation for evaluation
def eval_nn(model, X_test, y_test):
    model.eval()
    y_prob = model(X_test).cpu().numpy().ravel()
    y_pred = (y_prob >= 0.5).astype(int)

    return {
        "accuracy": accuracy_score(y_test.cpu().numpy(), y_pred),
        "precision": precision_score(y_test.cpu().numpy(), y_pred),
        "recall": recall_score(y_test.cpu().numpy(), y_pred),
        "f1": f1_score(y_test.cpu().numpy(), y_pred),
        "auroc": roc_auc_score(y_test.cpu().numpy(), y_prob),
        "auprc": average_precision_score(y_test.cpu().numpy(), y_prob),
        "y_prob": y_prob,
        "y_pred": y_pred,
    }

#### K-Fold CV & Hyperparameter Tuning (NN)

In [95]:
from sklearn.model_selection import StratifiedKFold

nn_config_grid = [
    {"hidden1": 32, "hidden2": 16, "hidden3": 8,  "lr": 0.01,  "epochs": 100},
    {"hidden1": 64, "hidden2": 32, "hidden3": 16, "lr": 0.01,  "epochs": 100},
    {"hidden1": 64, "hidden2": 32, "hidden3": 16, "lr": 0.005, "epochs": 100},
    {"hidden1": 128,"hidden2": 64, "hidden3": 32, "lr": 0.005, "epochs": 100},
]

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)

cv_results_nn = []
best_cfg = None
best_mean_auroc = -np.inf

X_train_np = X_train_scaled
y_train_np = y_train.values      

for cfg in nn_config_grid:
    fold_aurocs = []

    print(f"\nEvaluating config: {cfg}")
    for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(X_train_np, y_train_np), 1):
        X_tr = torch.tensor(X_train_np[train_idx], dtype=torch.float32).to(device)
        y_tr = torch.tensor(y_train_np[train_idx], dtype=torch.float32).unsqueeze(1).to(device)

        X_val = torch.tensor(X_train_np[val_idx], dtype=torch.float32).to(device)
        y_val = torch.tensor(y_train_np[val_idx], dtype=torch.float32).unsqueeze(1).to(device)

        input_size = X_tr.shape[1]
        model_cv = HeartAttackNN(
            input_size=input_size,
            hidden1=cfg["hidden1"],
            hidden2=cfg["hidden2"],
            hidden3=cfg["hidden3"],
            output_size=1
        ).to(device)

        criterion_cv = nn.BCELoss()
        optimizer_cv = optim.Adam(model_cv.parameters(), lr=cfg["lr"])


        model_cv = train_nn(model_cv, optimizer_cv, criterion_cv, X_tr, y_tr, cfg["epochs"])

        metrics_val = eval_nn(model_cv, X_val, y_val)
        fold_aurocs.append(metrics_val["auroc"])

        print(f"  Fold {fold_idx}: AUROC = {metrics_val['auroc']:.4f}")

    mean_auroc = np.mean(fold_aurocs)
    cv_results_nn.append({"config": cfg, "mean_auroc": mean_auroc})

    print(f"Mean AUROC for config {cfg}: {mean_auroc:.4f}")

    if mean_auroc > best_mean_auroc:
        best_mean_auroc = mean_auroc
        best_cfg = cfg

print("\nBest NN config:", best_cfg)
print("Best mean CV AUROC:", best_mean_auroc)



Evaluating config: {'hidden1': 32, 'hidden2': 16, 'hidden3': 8, 'lr': 0.01, 'epochs': 100}
  Fold 1: AUROC = 0.9230
  Fold 2: AUROC = 0.9266
  Fold 3: AUROC = 0.8963
  Fold 4: AUROC = 0.8734
  Fold 5: AUROC = 0.9282
Mean AUROC for config {'hidden1': 32, 'hidden2': 16, 'hidden3': 8, 'lr': 0.01, 'epochs': 100}: 0.9095

Evaluating config: {'hidden1': 64, 'hidden2': 32, 'hidden3': 16, 'lr': 0.01, 'epochs': 100}
  Fold 1: AUROC = 0.9266
  Fold 2: AUROC = 0.9258
  Fold 3: AUROC = 0.8785
  Fold 4: AUROC = 0.8968
  Fold 5: AUROC = 0.9124
Mean AUROC for config {'hidden1': 64, 'hidden2': 32, 'hidden3': 16, 'lr': 0.01, 'epochs': 100}: 0.9080

Evaluating config: {'hidden1': 64, 'hidden2': 32, 'hidden3': 16, 'lr': 0.005, 'epochs': 100}
  Fold 1: AUROC = 0.9093
  Fold 2: AUROC = 0.9111
  Fold 3: AUROC = 0.8842
  Fold 4: AUROC = 0.9022
  Fold 5: AUROC = 0.9256
Mean AUROC for config {'hidden1': 64, 'hidden2': 32, 'hidden3': 16, 'lr': 0.005, 'epochs': 100}: 0.9065

Evaluating config: {'hidden1': 128, 

#### Retraining NN on full trianing set with best config of hyperparameters & evaluation of NN on test set

In [96]:
input_size = X_train_tensor.shape[1]

best_model_nn = HeartAttackNN(
    input_size=input_size,
    hidden1=best_cfg["hidden1"],
    hidden2=best_cfg["hidden2"],
    hidden3=best_cfg["hidden3"],
    output_size=1
).to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(best_model_nn.parameters(), lr=best_cfg["lr"])

best_model_nn = train_nn(
    best_model_nn,
    optimizer,
    criterion,
    X_train_tensor,
    y_train_tensor,
    300  # Best model was trained in CV. 300 epochs to make it less compute heavy.
)

nn_results = eval_nn(best_model_nn, X_test_tensor, y_test_tensor)

print("\nFinal NN Test Results (using best config):")
for k, v in nn_results.items():
    if k in ["y_prob", "y_pred"]:
        continue
    print(f"{k}: {v}")

#Building a table we can call anywere
nn_rows = {
    "model": "Neural Network",
    "accuracy": nn_results["accuracy"],
    "precision": nn_results["precision"],
    "recall": nn_results["recall"],
    "f1": nn_results["f1"],
    "auroc": nn_results["auroc"],
    "auprc": nn_results["auprc"],
}
result_table_nn = []
result_table_nn.append(nn_rows)
pd.DataFrame(result_table_nn)


Final NN Test Results (using best config):
accuracy: 0.8939393939393939
precision: 0.9466666666666667
recall: 0.8765432098765432
f1: 0.9102564102564102
auroc: 0.9415395787944808
auprc: 0.9651237248589427


Unnamed: 0,model,accuracy,precision,recall,f1,auroc,auprc
0,Neural Network,0.893939,0.946667,0.876543,0.910256,0.94154,0.965124


## Random Forest Classifier

### Hyperparameter Options for Grid Search

In [97]:
hyparam_grid_rfc = {
    'n_estimators': [300, 600],
    'criterion': ['gini', 'entropy'],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 0.5]
}

### Initialize Random Forest Classifier

In [98]:
model_rfc = RandomForestClassifier(random_state=seed)

### Run a Grid Search to Find Best Hyperparameters

In [99]:
grid_rfc = GridSearchCV(
    estimator=model_rfc,
    scoring='accuracy',
    cv=3,
    param_grid=hyparam_grid_rfc,
    n_jobs=-1
)

In [100]:
grid_rfc.fit(X_train, y_train)

0,1,2
,estimator,RandomForestC...ndom_state=42)
,param_grid,"{'criterion': ['gini', 'entropy'], 'max_depth': [3, 5, ...], 'max_features': ['sqrt', 0.5], 'min_samples_leaf': [1, 2, ...], ...}"
,scoring,'accuracy'
,n_jobs,-1
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_estimators,300
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [101]:
print('Best accuracy: ', grid_rfc.best_score_)
print('Best Set of Hyperparameters: ', grid_rfc.best_params_)

Best accuracy:  0.9867397263230596
Best Set of Hyperparameters:  {'criterion': 'gini', 'max_depth': 3, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}


### Assign the Best Hyperparameters to the Random Forest Classifier

In [102]:
model_rfc = grid_rfc.best_estimator_

### Examine Performance of Random Forest Classifier Model

In [103]:
y_val_pred = model_rfc.predict(X_test)
y_val_proba = model_rfc.predict_proba(X_test)[:,1]

acc_rfc = accuracy_score(y_test, y_val_pred)
auc_rfc = roc_auc_score(y_test, y_val_proba)

print('Accuracy of Random Forest Classifier: ', acc_rfc)
print('AUC of Random Forest Classifier: ', auc_rfc)

Accuracy of Random Forest Classifier:  0.9810606060606061
AUC of Random Forest Classifier:  0.9869281045751634


## XGBoost

The parameter grid is as follows:

In [104]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.3]
}

In [105]:
xgb = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')

grid = GridSearchCV(
    estimator=xgb,
    scoring='accuracy',
    cv=3,
    param_grid=param_grid,
    n_jobs=-1
)

grid.fit(X_train, y_train)
print(grid.best_score_)
print(grid.best_params_)

xgb = grid.best_estimator_

y_val_pred = xgb.predict(X_test)
y_val_proba = xgb.predict_proba(X_test)[:, 1]

acc_xgb = accuracy_score(y_test, y_val_pred)
auc_xgb = roc_auc_score(y_test, y_val_proba)

print(f'Accuracy for XGBoost: {acc_xgb:.2%}')
print(f'AUC for XGBoost: {auc_xgb:.2f}')


0.9876866960200293
{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 100}
Accuracy for XGBoost: 98.11%
AUC for XGBoost: 0.99


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


## TabPFN

### Initialize TabPFN Model

In [106]:
model_tabPFN = TabPFNClassifier(
    random_state=seed, 
    device="cpu", 
    n_estimators=8,
    model_path="tabpfn-v2-classifier.ckpt",     # ungated model, shouldn't require HuggingFace account authentication
    ignore_pretraining_limits=True              # cpu safety limit is capped at 1000 rows, but ours is 1300, so ignore limit
    )

### Fit and Examine Performance of TabPFN

In [107]:
model_tabPFN.fit(X_train, y_train)

0,1,2
,n_estimators,8
,categorical_features_indices,
,softmax_temperature,0.9
,balance_probabilities,False
,average_before_softmax,False
,model_path,'tabpfn-v2-classifier.ckpt'
,device,'cpu'
,ignore_pretraining_limits,True
,inference_precision,'auto'
,fit_mode,'fit_preprocessors'


In [108]:
y_val_pred = model_tabPFN.predict(X_test)
y_val_proba = model_tabPFN.predict_proba(X_test)[:,1]

acc_tabPFN = accuracy_score(y_test, y_val_pred)
auc_tabPFN = roc_auc_score(y_test, y_val_proba)

print('Accuracy of TabPFN: ', acc_tabPFN)
print('AUC of TabPFN: ', auc_tabPFN)

Accuracy of TabPFN:  0.9810606060606061
AUC of TabPFN:  0.9895908980876301


## Ensemble Model (NN + Random Forest Classifier + XGBoost + TabPFN)

### Preparing NN for the Ensemble with a Scaler in a Pipeline

In [None]:
pipeline_nn = Pipeline( [
    ( "scaler", StandardScaler() ),
    ( "",  ) 
] )

### Initialize Ensemble Voter with the 4 Models

In [None]:
ens_voter = VotingClassifier(
    estimators=[ ("pipeline_nn", pipeline_nn), ("model_rfc", model_rfc), ("xgb", xgb), ("model_tabPFN", model_tabPFN) ],
    voting='soft',
    weights=[1, 1, 1, 1]
)

### Fit Ensemble and Evaluate Performance

In [111]:
ens_voter.fit(X_train, y_train)

ValueError: The estimator Pipeline should be a classifier.

In [None]:
y_val_pred = ens_voter.predict(X_test)
y_val_proba = ens_voter.predict_proba(X_test)[:, 1]

acc_ens = accuracy_score(y_test, y_val_pred)
auc_ens = roc_auc_score(y_test, y_val_proba)

print('Accuracy of Ensemble: ', acc_ens)
print('AUC of Ensemble: ', auc_ens)