In [None]:
import torch
import tenseal as ts
import pandas as pd
import random
from time import time

#optional
import numpy as np
import matplotlib.pyplot as plt



In [None]:
torch.random.manual_seed(73)
random.seed(73)

def split_train_test(x, y, test_ratio=0.3):
    idxs = [i for i in range(len(x))]
    random.shuffle(idxs)
    #delimiter between test and train data
    delim = int(len(x) * test_ratio)
    test_idxs , train_idxs = idxs[:delim], idxs[delim:]
    return x[train_idxs], y[train_idxs], x[test_idxs], y[test_idxs]


def heart_disease_data():
    data = pd.read_csv('framingham.csv')
    
    #drop rows with  missing values
    data = data.dropna()
  
    #drop some features
    data = data.drop(columns=["education", "currentSmoker", "BPMeds", 'diabetes','diaBP','BMI'])
    
    grouped = data.groupby('TenYearCHD')
    data = grouped.apply(lambda x:x.sample(grouped.size().min(), random_state=73).reset_index(drop=True))
    
    #extract labels
    y = torch.tensor(data['TenYearCHD'].values).float().unsqueeze(1)
    data = data.drop(columns='TenYearCHD')
    data = (data - data.mean())/data.std()

    x = torch.tensor(data.values).float()
    return split_train_test(x, y)


def random_data(m=1024, n=2):
    #data separable by the line y=x
    x_train = torch.random(m,n)
    x_test = torch.random(m//2, n)
    y_train = (x_train[:,0] >= x_train[:,1]).float().unsqueeze(0).t()
    y_test = (x_test[:,0] >= x_test[:,1]).float().unsqueeze(0).t()
    return x_train,y_train,x_test,y_test

 
 #You can use whatever data you want without modification to the tutorial
 #x_train, y_train, x_test, y_test = random_data()
 
x_train, y_train, x_test, y_test = heart_disease_data()

print("############# Data summary #############")
print(f"x_train has shape: {x_train.shape}")
print(f"y_train has shape: {y_train.shape}")
print(f"x_test has shape: {x_test.shape}")
print(f"y_test has shape: {y_test.shape}")
print("#######################################")

(1114, 9)
############# Data summary #############
x_train has shape: torch.Size([780, 9])
y_train has shape: torch.Size([780, 1])
x_test has shape: torch.Size([334, 9])
y_test has shape: torch.Size([334, 1])
#######################################


  data = grouped.apply(lambda x:x.sample(grouped.size().min(), random_state=73).reset_index(drop=True))


In [8]:
#Training a logistic Regression Model
class LR(torch.nn.Module):

    def __init__(self, n_features):
        super(LR, self).__init__()
        self.lr = torch.nn.Linear(n_features,1)

    def forward(self, x):
        out = torch.sigmoid(self.lr(x))
        return out 

In [9]:
n_features = x_train.shape[1]
model = LR(n_features)

#use gradient descient with learning_rate = 1
optim = torch.optim.SGD(model.parameters(), lr=1)
criterion = torch.nn.BCELoss()

In [12]:
#define 
EPOCHS = 5

def train(model, optim, criterion, x, y, epochs = EPOCHS):
    for e in range(1, epochs + 1):
        optim.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optim.step()
        print(f'Loss at epoch {e} : {loss.data}')
    return model 

model = train(model, optim, criterion, x_train, y_train)

Loss at epoch 1 : 0.8504331707954407
Loss at epoch 2 : 0.6863384246826172
Loss at epoch 3 : 0.6358115077018738
Loss at epoch 4 : 0.6193529367446899
Loss at epoch 5 : 0.6124349236488342


In [13]:
def accuracy(model, x, y):
    out = model(x)
    correct = torch.abs(y-out) < 0.5
    return correct.float().mean()

plain_accuracy = accuracy(model, x_test, y_test)
print(f'Accuracy on plain test_set: {plain_accuracy}')

Accuracy on plain test_set: 0.703592836856842


In [71]:
#doing it the modern way
import numpy as np 
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [72]:
#import data
heart_disease_df = pd.read_csv('framingham.csv')
heart_disease_df.shape

(4238, 16)

In [73]:
#data preparation 
heart_disease_df = heart_disease_df.dropna()
heart_disease_df.shape

(3656, 16)

In [74]:
#droping some columns (Features)
heart_disease_df = heart_disease_df.drop(columns=["education", "currentSmoker", "BPMeds", "diabetes", "diaBP", "BMI"])

In [77]:
grouped =heart_disease_df.groupby('TenYearCHD')
heart_disease_df = grouped.apply(lambda x:x.sample(grouped.size().min(), random_state=73).reset_index(drop=True))

  heart_disease_df = grouped.apply(lambda x:x.sample(grouped.size().min(), random_state=73).reset_index(drop=True))


In [78]:
y = heart_disease_df['TenYearCHD'].values.astype(float)
x = heart_disease_df.drop(columns=['TenYearCHD'])
x.shape

(1114, 9)

In [79]:
#standardizing
x = (x - x.mean())/x.std()

In [80]:
x.shape

(1114, 9)

In [83]:
xo_train, xo_test, yo_train, yo_test =train_test_split(x,y, random_state=42, test_size=0.3) 

In [84]:
print("############# Data summary #############")
print(f"xo_train has shape: {xo_train.shape}")
print(f"yo_train has shape: {yo_train.shape}")
print(f"xo_test has shape: {xo_test.shape}")
print(f"yo_test has shape: {yo_test.shape}")
print("#######################################")

############# Data summary #############
xo_train has shape: (779, 9)
yo_train has shape: (779,)
xo_test has shape: (335, 9)
yo_test has shape: (335,)
#######################################


In [None]:
#Encrypted Evaluation
class EncryptedLR:

    def __init__(self,torch_lr):
        self.weight = torch_lr.lr.weight.data.tolist()[0]
        self.bias = torch_lr.lr.bias.data.tolist()

    def forward(self, enc_x):
        enc_out = enc_x.dot(self.weight) + self.bias
        return enc_out
    
    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

    def encrypt(self, context):
        self.weight = ts.ckks_vector(context, self.weight)
        self.bias = ts.ckks_vector(context, self.bias)

eelr = EncryptedLR(model)

In [51]:
#parameters
poly_mod_degree = 4096
coeff_mod_bit_sizes = [40, 20, 40]

ctx_eval = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)
ctx_eval.global_scale = 2**20
ctx_eval.generate_galois_keys()

In [53]:
t_start = time()
enc_x_test = [ts.ckks_vector(ctx_eval,x.tolist()) for x in x_test]
t_end = time()
print(f'Encryption of the test-set took {int(t_end - t_start)} seconds')

Encryption of the test-set took 1 seconds


In [None]:
def encrypted_evaluation(model, enc_x_test, y_test):
    t_start = time()

    correct = 0
    for enc_x, y in zip(enc_x_test, y_test):
        #encrypted evaluation
        enc_out = model(enc_x)
        #plain comparison
        out = enc_out.decrypt()
        out = torch.tensor(out)
        out = torch.sigmoid(out)
        if torch.abs(out-y) < 0.5:
            correct+=1

    t_end = time()
    print(f"Evaluated test_set of {len(x_test)} entries in {int(t_end - t_start)} seconds")
    print(f"Accuracy: {correct}/{len(x_test)} = {correct / len(x_test)}")
    return correct / len(x_test)
    
    encrypted_accuracy = encrypted_evaluation(eelr, enc_x_test, y_test)
    diff_accuracy = plain_accuracy - encrypted_accuracy 

    print(f"Difference between plain and encrypted accuracies: {diff_accuracy}")
    if diff_accuracy < 0:
        print("Oh! We got a better accuracy on the encrypted test-set! The noise was on our side...")