In [55]:
import torch
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt
from mlp import MLP, bce_loss
from icecream import ic

## 1. Data Preparing

In [37]:
def split_train_test(x, y, test_ratio=0.3):
    idxs = [i for i in range(len(x))]
    random.shuffle(idxs)
    # delimiter between test and train data
    delim = int(len(x) * test_ratio)
    test_idxs, train_idxs = idxs[:delim], idxs[delim:]
    return x[train_idxs], y[train_idxs], x[test_idxs], y[test_idxs]

def heart_disease_data(data_path: str):
    data = pd.read_csv(data_path)
    # drop rows with missing values
    data = data.dropna()
    # drop some features
    data = data.drop(columns=["education", "currentSmoker", "BPMeds", 
                              "diabetes", "diaBP", "BMI"])
    # balance data
    grouped = data.groupby('TenYearCHD')
    data = grouped.apply(
        lambda x: x.sample(grouped.size().min(), random_state=73).reset_index(drop=True)
        )
    # extract labels
    y = torch.tensor(data["TenYearCHD"].values).float().unsqueeze(1)
    # data = data.drop("TenYearCHD", 'columns')
    data = data.drop(columns="TenYearCHD")

    # standardize data
    data = (data - data.mean()) / data.std()
    x = torch.tensor(data.values).float()
    return split_train_test(x, y)

In [38]:
x_train, y_train, x_test, y_test = heart_disease_data("./data/framingham.csv")

In [39]:
print("############# Data summary #############")
print(f"x_train has shape: {x_train.shape}")
print(f"y_train has shape: {y_train.shape}")
print(f"x_test has shape: {x_test.shape}")
print(f"y_test has shape: {y_test.shape}")
print("#######################################")

############# Data summary #############
x_train has shape: torch.Size([780, 9])
y_train has shape: torch.Size([780, 1])
x_test has shape: torch.Size([334, 9])
y_test has shape: torch.Size([334, 1])
#######################################


## 2. Model

In [66]:
num_features = [9, 4, 1]
net = MLP(
    linear_1_in_features=num_features[0],
    linear_1_out_features=num_features[1],
    f_function='relu',
    linear_2_in_features=num_features[1],
    linear_2_out_features=num_features[2],
    g_function='sigmoid'
)

Training loop

In [67]:
epochs = 100
for e in range(epochs):
    # training loop
    net.clear_grad_and_cache()
    y_hat = net.forward(x_train)
    loss, dJdy_hat = bce_loss(y_train, y_hat)
    ic(loss)
    net.backward(dJdy_hat)  # calculate the gradients
    correct = torch.abs(y_train - y_hat) < 0.5
    accuracy = correct.float().mean()
    # print(f"Training loss at epoch {e}: {loss.data}")
    # print(f"Training accuracy at epoch {e}: {accuracy}")
    net.update_params()

    # testing loop
    # y_hat_test = net.forward(x_test)
    # loss_test, _ = bce_loss(y_test, y_hat_test)
    # correct_test = torch.abs(y_test - y_hat_test) < 0.5
    # accuracy_test = correct_test.float().mean()
    # print(f"Testing loss at epoch {e}: {loss.data}")
    # print(f"Testing accuracy at epoch {e}: {accuracy_test}")

    

ic| loss: tensor(0.8200)
ic| loss: tensor(0.7370)
ic| loss: tensor(0.7760)
ic| loss: tensor(1.5638)
ic| loss: tensor(1.7446)
ic| loss: tensor(1.5059)
ic| loss: tensor(1.1043)
ic| loss: tensor(0.8454)
ic| loss: tensor(0.8922)
ic| loss: tensor(0.9937)
ic| loss: tensor(0.9071)
ic| loss: tensor(0.9095)
ic| loss: tensor(0.7948)
ic| loss: tensor(0.7838)
ic| loss: tensor(0.7385)
ic| loss: tensor(0.7518)
ic| loss: tensor(0.7113)
ic| loss: tensor(0.7193)
ic| loss: tensor(0.6883)
ic| loss: tensor(0.6929)
ic| loss: tensor(0.6712)
ic| loss: tensor(0.6739)
ic| loss: tensor(0.6589)
ic| loss: tensor(0.6602)
ic| loss: tensor(0.6497)
ic| loss: tensor(0.6503)
ic| loss: tensor(0.6429)
ic| loss: tensor(0.6429)
ic| loss: tensor(0.6377)
ic| loss: tensor(0.6373)
ic| loss: tensor(0.6336)
ic| loss: tensor(0.6331)
ic| loss: tensor(0.6303)
ic| loss: tensor(0.6297)
ic| loss: tensor(0.6277)
ic| loss: tensor(0.6271)
ic| loss: tensor(0.6255)
ic| loss: tensor(0.6249)
ic| loss: tensor(0.6237)
ic| loss: tensor(0.6232)


In [2]:
def accuracy(model, x, y):
    out = model.forward(x)
    correct = torch.abs(y - out) < 0.5
    return correct.float().mean()

plain_accuracy = accuracy(net, x_test, y_test)
print(f"Accuracy on plain test_set: {plain_accuracy}")

NameError: name 'net' is not defined

In [24]:
class LR(torch.nn.Module):

    def __init__(self, n_features):
        super(LR, self).__init__()
        self.lr = torch.nn.Linear(n_features, 1)
        
    def forward(self, x):
        out = torch.sigmoid(self.lr(x))
        return out

In [25]:
model = LR(9)
# use gradient descent with a learning_rate=1
optim = torch.optim.SGD(model.parameters(), lr=1)
# use Binary Cross Entropy Loss
criterion = torch.nn.BCELoss()

# define the number of epochs for both plain and encrypted training
EPOCHS = 5

def train(model, optim, criterion, x, y, epochs=EPOCHS):
    for e in range(1, epochs + 1):
        optim.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optim.step()
        print(f"Loss at epoch {e}: {loss.data}")
    return model

model = train(model, optim, criterion, x_train, y_train)

Loss at epoch 1: 0.7648208141326904
Loss at epoch 2: 0.6660396456718445
Loss at epoch 3: 0.6310326457023621
Loss at epoch 4: 0.6159663796424866
Loss at epoch 5: 0.6083087921142578


In [26]:
def accuracy(model, x, y):
    out = model(x)
    correct = torch.abs(y - out) < 0.5
    return correct.float().mean()

plain_accuracy = accuracy(model, x_test, y_test)
print(f"Accuracy on plain test_set: {plain_accuracy}")

Accuracy on plain test_set: 0.7005987763404846
