## Imports

In [None]:
import numpy as np
import pandas as pd

from sklearn.base import BaseEstimator, ClassifierMixin
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.metrics import make_scorer, fbeta_score, roc_auc_score

import optuna

import shap

import warnings
warnings.filterwarnings("ignore")

## Read Dataset

In [None]:
data = pd.read_csv("data/Heart_Disease_Kaggle_Preprocessed.csv")
print(data.shape)
data.head()

(268, 14)


Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,ECG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,1.733026,0.681528,0.871534,-0.064103,1.407389,-0.418854,0.977857,-1.777056,-0.699206,1.193857,0.680101,2.462874,-0.881493,1
1,1.400801,-1.46729,-0.180588,-0.914581,6.109512,-0.418854,0.977857,0.437459,-0.699206,0.491048,0.680101,-0.715538,1.18174,0
2,0.293383,0.681528,-1.23271,-0.404294,0.222143,-0.418854,-1.022644,-0.387556,-0.699206,-0.651016,-0.947283,-0.715538,1.18174,1
3,1.068576,0.681528,0.871534,-0.1775,0.261004,-0.418854,-1.022644,-1.950743,1.430194,-0.738867,0.680101,0.343933,1.18174,0
4,2.175994,-1.46729,-1.23271,-0.631088,0.377585,-0.418854,0.977857,-1.255994,1.430194,-0.738867,-0.947283,0.343933,-0.881493,0


## Data split

In [None]:
X = data.drop(columns=["Heart Disease"])
y = data["Heart Disease"]

## Designing MLP

In [None]:
class SimpleMLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

class PyTorchMLPClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, input_size, hidden_size, output_size, epochs=10, lr=0.001):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.epochs = epochs
        self.lr = lr
        self.model = SimpleMLP(input_size, hidden_size, output_size)
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)

    def fit(self, X, y):
        X = torch.tensor(X.values, dtype=torch.float32) 
        y = torch.tensor(y.values, dtype=torch.long)    
        
        for epoch in range(self.epochs):
            self.optimizer.zero_grad()
            outputs = self.model(X)
            loss = self.criterion(outputs, y)
            loss.backward()
            self.optimizer.step()
        return self

    def predict(self, X):
        X = torch.tensor(X.values, dtype=torch.float32) 
        outputs = self.model(X)
        _, predicted = torch.max(outputs, 1)
        return predicted.numpy()

    def predict_proba(self, X):
        self.model.eval()
        X = torch.tensor(X.values, dtype=torch.float32)  
        with torch.no_grad():
            outputs = self.model(X)
            if self.output_size == 1:
                probs = torch.sigmoid(outputs).squeeze()
                probs = torch.stack([1 - probs, probs], dim=1)  # shape: (n_samples, 2)
            else:
                probs = torch.softmax(outputs, dim=1)
            return probs.numpy()

## Cross-validation with Optuna