In [224]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

In [225]:
df = pd.read_csv("heart.csv")

In [226]:
x = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

#### train test split

In [227]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [228]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=22)

#### one hot encode object data

In [229]:
object_cols = df.select_dtypes("object").columns.values.tolist()

In [230]:
OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
OH_cols_train = pd.DataFrame(OH_encoder.fit_transform(x_train[object_cols]))
OH_cols_test = pd.DataFrame(OH_encoder.transform(x_test[object_cols]))

In [231]:
OH_cols_train.index = x_train.index
OH_cols_test.index = x_test.index

In [232]:
num_X_train = x_train.drop(object_cols, axis=1)
num_X_test = x_test.drop(object_cols, axis=1)

In [233]:
oh_x_train = pd.concat([num_X_train, OH_cols_train], axis=1)
oh_x_test = pd.concat([num_X_test, OH_cols_test], axis=1)

In [234]:
oh_x_train.shape

(734, 20)

In [235]:
scaler = sklearn.preprocessing.StandardScaler()

oh_x_train = scaler.fit_transform(X=oh_x_train)

In [236]:
oh_x_test = scaler.transform(oh_x_test)

### NN definition

In [237]:
class MLP(nn.Module):
    def __init__(self, in_dim, h_dim, num_hidden, out_dim):
        super().__init__()
        
        self.in_lin = nn.Linear(in_dim, h_dim, dtype=torch.float64)
        
        #TODO: check how to add list of layers to model parameters
        self.hidden_layers = [nn.Linear(h_dim, h_dim, dtype=torch.float64) for i in range(num_hidden)]
        self.out_lin = nn.Linear(h_dim, out_dim, dtype=torch.float64)
        
    def forward(self, x):
        """
        Returns raw logits
        """
        x = F.relu(self.in_lin(x))
        
        #for layer in self.hidden_layers:
        #    x = F.relu(layer(x))
        
        x = self.out_lin(x)
        
        return x

In [238]:
#instance of an mlp binary classifier
mlp_class = MLP(oh_x_train.shape[1], 256, 2, 2)

#### Hyper parameter definition

In [239]:
import math

In [240]:
EPOCHS = 30
BS = 128
NB = math.ceil(oh_x_train.shape[0]/BS)

#### Train loop

In [241]:
def accuracy(logits, y):
    probs = F.softmax(logits, dim=1)
    preds = torch.argmax(probs, dim = 1).numpy()
    y = y.numpy()
    return (preds==y).sum()/len(y)

In [242]:
optimizer = torch.optim.Adam(mlp_class.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

In [243]:
for e in range(1, EPOCHS + 1):
    for batch in range(NB):
        batch_x = np.array(oh_x_train[BS*batch:(batch+1)*BS])
        batch_y = np.array(y_train[BS*batch:(batch+1)*BS])
        
        
        tensor_batch_x = torch.tensor(batch_x, dtype=torch.float64)
        tensor_batch_y = torch.tensor(batch_y, dtype=torch.long)
        
        
        optimizer.zero_grad()
        logits = mlp_class(tensor_batch_x)
        loss = criterion(logits, tensor_batch_y)
        loss.backward()
        
        optimizer.step()
        
        with torch.no_grad():
            test_logits = mlp_class(torch.tensor(oh_x_test, dtype=torch.float64))
            test_y = torch.tensor(np.array(y_test), dtype=torch.long)
            
            test_acc = accuracy(test_logits, test_y)
        
    print(f"Epoch {e:2} train accuracy: : {accuracy(logits, tensor_batch_y):.4f} test accuracy: {test_acc:.4f}")

Epoch  1 train accuracy: : 0.7872 test accuracy: 0.8315
Epoch  2 train accuracy: : 0.8298 test accuracy: 0.8533
Epoch  3 train accuracy: : 0.8298 test accuracy: 0.8533
Epoch  4 train accuracy: : 0.8404 test accuracy: 0.8696
Epoch  5 train accuracy: : 0.8404 test accuracy: 0.8750
Epoch  6 train accuracy: : 0.8404 test accuracy: 0.8750
Epoch  7 train accuracy: : 0.8404 test accuracy: 0.8696
Epoch  8 train accuracy: : 0.8511 test accuracy: 0.8696
Epoch  9 train accuracy: : 0.8511 test accuracy: 0.8641
Epoch 10 train accuracy: : 0.8511 test accuracy: 0.8641
Epoch 11 train accuracy: : 0.8511 test accuracy: 0.8641
Epoch 12 train accuracy: : 0.8511 test accuracy: 0.8641
Epoch 13 train accuracy: : 0.8511 test accuracy: 0.8641
Epoch 14 train accuracy: : 0.8511 test accuracy: 0.8641
Epoch 15 train accuracy: : 0.8511 test accuracy: 0.8533
Epoch 16 train accuracy: : 0.8511 test accuracy: 0.8533
Epoch 17 train accuracy: : 0.8511 test accuracy: 0.8533
Epoch 18 train accuracy: : 0.8511 test accuracy:

### Model evaluation

In [244]:
from sklearn.metrics import classification_report

In [245]:
with torch.no_grad():
    test_logits = mlp_class(torch.tensor(oh_x_test, dtype=torch.float64))
    test_preds = torch.argmax(F.softmax(test_logits, dim=1), dim=1)

    test_y = torch.tensor(np.array(y_test), dtype=torch.long)

In [246]:
print(classification_report(test_y, test_preds))

              precision    recall  f1-score   support

           0       0.84      0.84      0.84        76
           1       0.89      0.89      0.89       108

    accuracy                           0.87       184
   macro avg       0.87      0.87      0.87       184
weighted avg       0.87      0.87      0.87       184

