Code that use our manual constructed network to train a simple 2-layer neural network to predict the risk of future Coronary Heart Disease (CHD). The dataset used is located in `./data/framingham.csv`. The data can be downloaded from [here](https://github.com/TarekDib03/Analytics/blob/master/Week3%20-%20Logistic%20Regression/Data/framingham.csv)

In [1]:
import torch
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt
from mlp import MLP, bce_loss
from icecream import ic

## 1. Data Preparing

In [2]:
def split_train_test(x, y, test_ratio=0.3):
    idxs = [i for i in range(len(x))]
    random.shuffle(idxs)
    # delimiter between test and train data
    delim = int(len(x) * test_ratio)
    test_idxs, train_idxs = idxs[:delim], idxs[delim:]
    return x[train_idxs], y[train_idxs], x[test_idxs], y[test_idxs]

def heart_disease_data(data_path: str):
    data = pd.read_csv(data_path)
    # drop rows with missing values
    data = data.dropna()
    # drop some features
    data = data.drop(columns=["education", "currentSmoker", "BPMeds", 
                              "diabetes", "diaBP", "BMI"])
    # balance data
    grouped = data.groupby('TenYearCHD')
    data = grouped.apply(
        lambda x: x.sample(grouped.size().min(), random_state=73).reset_index(drop=True)
        )
    # extract labels
    y = torch.tensor(data["TenYearCHD"].values).float().unsqueeze(1)
    # data = data.drop("TenYearCHD", 'columns')
    data = data.drop(columns="TenYearCHD")

    # standardize data
    data = (data - data.mean()) / data.std()
    x = torch.tensor(data.values).float()
    return split_train_test(x, y)

In [3]:
x_train, y_train, x_test, y_test = heart_disease_data("./data/framingham.csv")

In [4]:
print("############# Data summary #############")
print(f"x_train has shape: {x_train.shape}")
print(f"y_train has shape: {y_train.shape}")
print(f"x_test has shape: {x_test.shape}")
print(f"y_test has shape: {y_test.shape}")
print("#######################################")

############# Data summary #############
x_train has shape: torch.Size([780, 9])
y_train has shape: torch.Size([780, 1])
x_test has shape: torch.Size([334, 9])
y_test has shape: torch.Size([334, 1])
#######################################


## 2. Model (manual)

In [5]:
num_features = [9, 4, 1]
net = MLP(
    linear_1_in_features=num_features[0],
    linear_1_out_features=num_features[1],
    f_function='relu',
    linear_2_in_features=num_features[1],
    linear_2_out_features=num_features[2],
    g_function='sigmoid',
    lr=1
)

Training loop

In [6]:
epochs = 20
for e in range(1, epochs+1):
    # training loop
    net.clear_grad_and_cache()
    y_hat = net.forward(x_train)
    loss, dJdy_hat = bce_loss(y_train, y_hat)
    net.backward(dJdy_hat)  # calculate the gradients
    correct = torch.abs(y_train - y_hat) < 0.5
    accuracy = correct.float().mean()
    # print(f"Training loss at epoch {e}: {loss.data}")
    # print(f"Training accuracy at epoch {e}: {accuracy}")
    net.update_params()
    print(f"Loss at epoch {e}: {loss.data:.2f}")
    
    # testing loop
    # y_hat_test = net.forward(x_test)
    # loss_test, _ = bce_loss(y_test, y_hat_test)
    # correct_test = torch.abs(y_test - y_hat_test) < 0.5
    # accuracy_test = correct_test.float().mean()
    # print(f"Testing loss at epoch {e}: {loss.data}")
    # print(f"Testing accuracy at epoch {e}: {accuracy_test}")

    

Loss at epoch 1: 3.22
Loss at epoch 2: 1.46
Loss at epoch 3: 1.04
Loss at epoch 4: 0.83
Loss at epoch 5: 0.73
Loss at epoch 6: 0.68
Loss at epoch 7: 0.66
Loss at epoch 8: 0.64
Loss at epoch 9: 0.63
Loss at epoch 10: 0.62
Loss at epoch 11: 0.62
Loss at epoch 12: 0.62
Loss at epoch 13: 0.61
Loss at epoch 14: 0.61
Loss at epoch 15: 0.61
Loss at epoch 16: 0.61
Loss at epoch 17: 0.60
Loss at epoch 18: 0.60
Loss at epoch 19: 0.60
Loss at epoch 20: 0.60


In [7]:
def accuracy(model, x, y):
    out = model.forward(x)
    correct = torch.abs(y - out) < 0.5
    return correct.float().mean()

plain_accuracy = accuracy(net, x_test, y_test)
print(f"Accuracy on plain test_set: {plain_accuracy*100:.2f}%")

Accuracy on plain test_set: 64.07%


## 3. Model (torch)

In [8]:
class LR(torch.nn.Module):

    def __init__(self, n_features):
        super(LR, self).__init__()
        self.linear1 = torch.nn.Linear(n_features, 4)
        self.relu = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(4, 1)
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        out = self.sigmoid(out)
        return out

In [9]:
model = LR(9)
# use gradient descent with a learning_rate=1
optim = torch.optim.SGD(model.parameters(), lr=1)
# use Binary Cross Entropy Loss
criterion = torch.nn.BCELoss()

# define the number of epochs for both plain and encrypted training
EPOCHS = 20

def train(model, optim, criterion, x, y, epochs=EPOCHS):
    for e in range(1, epochs + 1):
        optim.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optim.step()
        print(f"Loss at epoch {e}: {loss.data:.2f}")
    return model

model = train(model, optim, criterion, x_train, y_train)

Loss at epoch 1: 0.71
Loss at epoch 2: 0.70
Loss at epoch 3: 0.69
Loss at epoch 4: 0.69
Loss at epoch 5: 0.68
Loss at epoch 6: 0.68
Loss at epoch 7: 0.67
Loss at epoch 8: 0.66
Loss at epoch 9: 0.66
Loss at epoch 10: 0.65
Loss at epoch 11: 0.64
Loss at epoch 12: 0.63
Loss at epoch 13: 0.62
Loss at epoch 14: 0.62
Loss at epoch 15: 0.61
Loss at epoch 16: 0.60
Loss at epoch 17: 0.60
Loss at epoch 18: 0.60
Loss at epoch 19: 0.60
Loss at epoch 20: 0.59


In [10]:
def accuracy(model, x, y):
    out = model(x)
    correct = torch.abs(y - out) < 0.5
    return correct.float().mean()

plain_accuracy = accuracy(model, x_test, y_test)
print(f"Accuracy on plain test_set: {plain_accuracy*100:.2f}%")

Accuracy on plain test_set: 66.17%
