In [1]:
import torch
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt
from mlp import MLP, bce_loss
from icecream import ic

## 1. Data Preparing

In [2]:
def split_train_test(x, y, test_ratio=0.3):
    idxs = [i for i in range(len(x))]
    random.shuffle(idxs)
    # delimiter between test and train data
    delim = int(len(x) * test_ratio)
    test_idxs, train_idxs = idxs[:delim], idxs[delim:]
    return x[train_idxs], y[train_idxs], x[test_idxs], y[test_idxs]

def heart_disease_data(data_path: str):
    data = pd.read_csv(data_path)
    # drop rows with missing values
    data = data.dropna()
    # drop some features
    data = data.drop(columns=["education", "currentSmoker", "BPMeds", 
                              "diabetes", "diaBP", "BMI"])
    # balance data
    grouped = data.groupby('TenYearCHD')
    data = grouped.apply(
        lambda x: x.sample(grouped.size().min(), random_state=73).reset_index(drop=True)
        )
    # extract labels
    y = torch.tensor(data["TenYearCHD"].values).float().unsqueeze(1)
    # data = data.drop("TenYearCHD", 'columns')
    data = data.drop(columns="TenYearCHD")

    # standardize data
    data = (data - data.mean()) / data.std()
    x = torch.tensor(data.values).float()
    return split_train_test(x, y)

In [3]:
x_train, y_train, x_test, y_test = heart_disease_data("./data/framingham.csv")

In [4]:
print("############# Data summary #############")
print(f"x_train has shape: {x_train.shape}")
print(f"y_train has shape: {y_train.shape}")
print(f"x_test has shape: {x_test.shape}")
print(f"y_test has shape: {y_test.shape}")
print("#######################################")

############# Data summary #############
x_train has shape: torch.Size([780, 9])
y_train has shape: torch.Size([780, 1])
x_test has shape: torch.Size([334, 9])
y_test has shape: torch.Size([334, 1])
#######################################


## 2. Model

In [13]:
num_features = [9, 4, 1]
net = MLP(
    linear_1_in_features=num_features[0],
    linear_1_out_features=num_features[1],
    f_function='relu',
    linear_2_in_features=num_features[1],
    linear_2_out_features=num_features[2],
    g_function='sigmoid',
    lr=0.5
)

Training loop

In [14]:
epochs = 20
for e in range(epochs):
    # training loop
    net.clear_grad_and_cache()
    y_hat = net.forward(x_train)
    loss, dJdy_hat = bce_loss(y_train, y_hat)
    ic(loss)
    net.backward(dJdy_hat)  # calculate the gradients
    correct = torch.abs(y_train - y_hat) < 0.5
    accuracy = correct.float().mean()
    # print(f"Training loss at epoch {e}: {loss.data}")
    # print(f"Training accuracy at epoch {e}: {accuracy}")
    net.update_params()

    # testing loop
    # y_hat_test = net.forward(x_test)
    # loss_test, _ = bce_loss(y_test, y_hat_test)
    # correct_test = torch.abs(y_test - y_hat_test) < 0.5
    # accuracy_test = correct_test.float().mean()
    # print(f"Testing loss at epoch {e}: {loss.data}")
    # print(f"Testing accuracy at epoch {e}: {accuracy_test}")

    

ic| loss: tensor(4.3599)
ic| loss: tensor(1.8804)
ic| loss: tensor(0.9447)
ic| loss: tensor(0.8305)
ic| loss: tensor(0.7903)
ic| loss: tensor(0.7615)
ic| loss: tensor(0.7404)
ic| loss: tensor(0.7239)
ic| loss: tensor(0.7110)
ic| loss: tensor(0.7006)
ic| loss: tensor(0.6919)
ic| loss: tensor(0.6845)
ic| loss: tensor(0.6781)
ic| loss: tensor(0.6725)
ic| loss: tensor(0.6676)
ic| loss: tensor(0.6634)
ic| loss: tensor(0.6594)
ic| loss: tensor(0.6559)
ic| loss: tensor(0.6528)
ic| loss: tensor(0.6500)


In [15]:
def accuracy(model, x, y):
    out = model.forward(x)
    correct = torch.abs(y - out) < 0.5
    return correct.float().mean()

plain_accuracy = accuracy(net, x_test, y_test)
print(f"Accuracy on plain test_set: {plain_accuracy}")

Accuracy on plain test_set: 0.6317365169525146


In [8]:
class LR(torch.nn.Module):

    def __init__(self, n_features):
        super(LR, self).__init__()
        self.lr = torch.nn.Linear(n_features, 1)
        
    def forward(self, x):
        out = torch.sigmoid(self.lr(x))
        return out

In [11]:
model = LR(9)
# use gradient descent with a learning_rate=1
optim = torch.optim.SGD(model.parameters(), lr=1)
# use Binary Cross Entropy Loss
criterion = torch.nn.BCELoss()

# define the number of epochs for both plain and encrypted training
EPOCHS = 20

def train(model, optim, criterion, x, y, epochs=EPOCHS):
    for e in range(1, epochs + 1):
        optim.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optim.step()
        print(f"Loss at epoch {e}: {loss.data}")
    return model

model = train(model, optim, criterion, x_train, y_train)

Loss at epoch 1: 0.6737579107284546
Loss at epoch 2: 0.6295272707939148
Loss at epoch 3: 0.6128905415534973
Loss at epoch 4: 0.6052921414375305
Loss at epoch 5: 0.6013258695602417
Loss at epoch 6: 0.5990480780601501
Loss at epoch 7: 0.5976407527923584
Loss at epoch 8: 0.5967183709144592
Loss at epoch 9: 0.596083402633667
Loss at epoch 10: 0.5956276655197144
Loss at epoch 11: 0.5952886939048767
Loss at epoch 12: 0.595028817653656
Loss at epoch 13: 0.5948244333267212
Loss at epoch 14: 0.594660222530365
Loss at epoch 15: 0.5945258736610413
Loss at epoch 16: 0.5944143533706665
Loss at epoch 17: 0.5943205952644348
Loss at epoch 18: 0.594241201877594
Loss at epoch 19: 0.5941731333732605
Loss at epoch 20: 0.5941146612167358


In [12]:
def accuracy(model, x, y):
    out = model(x)
    correct = torch.abs(y - out) < 0.5
    return correct.float().mean()

plain_accuracy = accuracy(model, x_test, y_test)
print(f"Accuracy on plain test_set: {plain_accuracy}")

Accuracy on plain test_set: 0.652694582939148
