In [1]:
import torch
import tenseal as ts
import pandas as pd
import random
from time import time

# those are optional and are not necessary for training
import numpy as np
import matplotlib.pyplot as plt

from torchmetrics.classification import BinaryAccuracy
import subprocess



In [2]:
df = pd.read_csv("./data/myTenYearCHD_n1000.csv", index_col=0).drop(columns="prevalentStroke")
df.head()


Unnamed: 0,male,age,cigsPerDay,prevalentHyp,totChol,sysBP,heartRate,glucose,TenYearCHD
521,1.057131,-1.11642,0.89401,-0.792487,0.05378,-0.758911,-0.622219,-0.30916,1
737,1.057131,-1.11642,0.89401,-0.792487,-0.312693,-1.083813,-0.869361,-0.216819,1
740,-0.945011,-0.538324,-0.756825,-0.792487,-0.19817,0.012732,-0.045556,-0.247599,1
660,-0.945011,1.311586,-0.756825,1.260588,1.542577,2.368274,-0.045556,0.090987,1
411,1.057131,-0.653943,0.89401,1.260588,0.328635,0.906213,-0.951741,-0.432283,0


In [3]:
X = df.iloc[:,:-1].copy()
X0 = np.percentile(X,1, axis=0)
X1 = np.percentile(X,99, axis=0)
X = X.clip(X0,X1,axis=1)
X = 2*(X-X.min())/(X.max()-X.min())-1
X.describe()


Unnamed: 0,male,age,cigsPerDay,prevalentHyp,totChol,sysBP,heartRate,glucose
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,-0.056,-0.020875,-0.575116,-0.228,-0.135902,-0.296413,-0.190172,-0.718411
std,0.99893,0.5388964,0.557931,0.974148,0.422897,0.433207,0.407512,0.263727
min,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
25%,-1.0,-0.4375,-1.0,-1.0,-0.430028,-0.616491,-0.482759,-0.840907
50%,-1.0,2.220446e-16,-1.0,-1.0,-0.140043,-0.37908,-0.206897,-0.771304
75%,1.0,0.4375,-0.069767,1.0,0.139943,-0.068621,0.034483,-0.681814
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [4]:
train_len = 800

#X_trn = torch.tensor(df.iloc[:train_len,:-1].values).float()
X_trn = torch.tensor(X.iloc[:train_len,:].values).float()
Y_trn = torch.tensor(df.iloc[:train_len,-1:].values).float()
#X_tst = torch.tensor(df.iloc[train_len:,:-1].values).float()
X_tst = torch.tensor(X.iloc[train_len:,:].values).float()
Y_tst = torch.tensor(df.iloc[train_len:,-1:].values).float()

print(f"X_trn has size: {X_trn.shape}")
print(f"Y_trn has size: {Y_trn.shape}")
print(f"X_tst has size: {X_tst.shape}")
print(f"Y_tst has size: {Y_tst.shape}")

n_features = X_trn.shape[1]

print(f"n_features: {n_features}")


X_trn has size: torch.Size([800, 8])
Y_trn has size: torch.Size([800, 1])
X_tst has size: torch.Size([200, 8])
Y_tst has size: torch.Size([200, 1])
n_features: 8


In [5]:
np.savetxt("data/X_test.csv", fmt='%.6f', X = X_tst.numpy(), delimiter=",")


## Training a plain text model

In [6]:
class LR(torch.nn.Module):

    def __init__(self, n_features):
        super(LR, self).__init__()
        self.lr = torch.nn.Linear(n_features, 1)
        
    def forward(self, x):
        out = torch.sigmoid(self.lr(x))
        return out
    

In [7]:
n_epochs = 1000

# use Binary Cross Entropy Loss
criterion = torch.nn.BCELoss()

def train_plain(n_epochs, verbose = True):
    model = LR(n_features)
    # use gradient descent with a learning_rate=1
    optim = torch.optim.SGD(model.parameters(), lr=1)
    for i in range(n_epochs):
        optim.zero_grad()
        out = model(X_trn)
        loss = criterion(out, Y_trn)
        if verbose:
            print(f"Loss at epoch {i}: {loss.data}")
        loss.backward()
        optim.step()  
    return model

model00 = train_plain(n_epochs, False)

out = model00(X_trn)
loss = criterion(out, Y_trn)
print(f"Loss at epoch {n_epochs}: {loss.data}")

accuracy = BinaryAccuracy()

plain_prediction = model00(X_tst)
plain_accuracy = accuracy(plain_prediction,Y_tst)

print(f"Plain model accuracy on test set: {plain_accuracy}")


Loss at epoch 1000: 0.5915659070014954
Plain model accuracy on test set: 0.6899999976158142


In [8]:
#np.savetxt("params.csv", fmt='%.6f', delimiter=",", 
#    X = np.concatenate([model00.lr.weight.data.numpy()[0],model00.lr.bias.data.numpy()])[None,:])

model00.lr.weight.data.numpy(), model00.lr.bias.data.numpy()

(array([[ 0.35881656,  1.0923964 ,  0.3184934 , -0.02589945,  0.463527  ,
          1.0900129 ,  0.12632568,  0.82190657]], dtype=float32),
 array([1.2166978], dtype=float32))

## Analysis of encrypted evaluation

In [9]:
keys = ['def_80_256_1','def_80_512_1','def_80_1024_1','std_62_2048_1']

rmse = lambda x: (x**2).mean()**0.5


In [10]:
cmd = f"target/debug/logres keys/def_80_512_1 2 8 -1.0 1.0"

out = subprocess.getoutput(cmd)


In [11]:
def splitter(test):
    tmp = test.split()
    tst = tmp[0].split("/")[1:2]
    tst += tmp[1:]
    return tst

df = pd.DataFrame([splitter(line) for line in out.split("\n")], 
    columns=["key", "prec", "padd", "lower", "upper", "value", "answer"]) 
df["answer"] = df["answer"].apply(float)
df["value"] = df["value"].apply(float)
df["diff"] = df.answer - df.value

df[["diff"]].describe().T


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
diff,200.0,-0.000286,0.019801,-0.042326,-0.01298,-0.000474,0.011804,0.057482


In [12]:
df["pred"] = (df["value"]>0).apply(float)
df["pred*"] = (df["answer"]>0).apply(float)

print("acc: ", 1-(df[["pred"]]-Y_tst.numpy()).abs().mean())
print("acc*: ", 1-(df[["pred*"]]-Y_tst.numpy()).abs().mean())

acc:  pred    0.69
dtype: float64
acc*:  pred*    0.69
dtype: float64
