In [199]:
import pandas as pd
import numpy as np

dataset = pd.read_csv("Sleep_Efficiency.csv", encoding="cp949") #load CSV file
dataset.shape

(452, 15)

In [200]:
drop_columns = ['ID', 'Bedtime', 'Wakeup time']
#drop not used columns (ID, Bedtime, Wakeup time) (we use Sleep duriation instead of Bedtime and Wakeup time)
dataset.drop(drop_columns, axis=1, inplace=True)

In [201]:
dataset = dataset.dropna() #drop rows that have missing values
dataset.shape

(388, 12)

In [202]:
#one-hot encoding for categorical values
cat_columns = ['Gender', 'Awakenings', 'Smoking status', 'Exercise frequency']
dataset = pd.get_dummies(dataset, columns=cat_columns, drop_first=True)
dataset.head()

Unnamed: 0,Age,Sleep duration,Sleep efficiency,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Caffeine consumption,Alcohol consumption,Gender_Male,Awakenings_1.0,Awakenings_2.0,Awakenings_3.0,Awakenings_4.0,Smoking status_Yes,Exercise frequency_1.0,Exercise frequency_2.0,Exercise frequency_3.0,Exercise frequency_4.0,Exercise frequency_5.0
0,65,6.0,0.88,18,70,12,0.0,0.0,False,False,False,False,False,True,False,False,True,False,False
1,69,7.0,0.66,19,28,53,0.0,3.0,True,False,False,True,False,True,False,False,True,False,False
2,40,8.0,0.89,20,70,10,0.0,0.0,False,True,False,False,False,False,False,False,True,False,False
3,40,6.0,0.51,23,25,52,50.0,5.0,False,False,False,True,False,True,True,False,False,False,False
4,57,8.0,0.76,27,55,18,0.0,3.0,True,False,False,True,False,False,False,False,True,False,False


In [203]:
#for one-hot encoded columns
bool_cols = dataset.dtypes[dataset.dtypes == bool].index.tolist()
dataset[bool_cols] = np.where(dataset[bool_cols], 1, 0).astype(float)

dataset.head()

Unnamed: 0,Age,Sleep duration,Sleep efficiency,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Caffeine consumption,Alcohol consumption,Gender_Male,Awakenings_1.0,Awakenings_2.0,Awakenings_3.0,Awakenings_4.0,Smoking status_Yes,Exercise frequency_1.0,Exercise frequency_2.0,Exercise frequency_3.0,Exercise frequency_4.0,Exercise frequency_5.0
0,65,6.0,0.88,18,70,12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
1,69,7.0,0.66,19,28,53,0.0,3.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,40,8.0,0.89,20,70,10,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,40,6.0,0.51,23,25,52,50.0,5.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
4,57,8.0,0.76,27,55,18,0.0,3.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [204]:
#train model without using HE
#check this model is useful(have linear relationship with target variable)
from sklearn.model_selection import train_test_split

X = dataset.drop('Sleep efficiency', axis=1)
y = dataset['Sleep efficiency']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

print("X_train shape : ", X_train.shape)
print("y_train shape : ", y_train.shape, "\n")
print("X_test shape : ", X_test.shape)
print("y_test shape : ", y_test.shape)

X_train shape :  (194, 18)
y_train shape :  (194,) 

X_test shape :  (194, 18)
y_test shape :  (194,)


In [205]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

model = LinearRegression()

X_train = np.c_[np.ones((len(X_train), 1)), X_train]
model.fit(X_train, y_train)

X_test = np.c_[np.ones((len(X_test), 1)), X_test]
y_pred = model.predict(X_test)

sklearn_r2 = r2_score(y_test, y_pred)

print("R^2 score:", sklearn_r2) # R^2 is enough high, so we will implement this model using HE

R^2 score: 0.838818252406227


In [206]:
import piheaan as heaan
from piheaan.math import sort
from piheaan.math import approx # for piheaan math function

In [207]:
params = heaan.ParameterPreset.FGb
context = heaan.make_context(params)
heaan.make_bootstrappable(context)

# Load pre-exisisting key
key_file_path = "./keys"

sk = heaan.SecretKey(context,key_file_path+"/secretkey.bin") # load sk
pk = heaan.KeyPack(context, key_file_path+"/") # load pk
pk.load_enc_key()
pk.load_mult_key()

eval = heaan.HomEvaluator(context,pk)
dec = heaan.Decryptor(context)
enc = heaan.Encryptor(context)

log_slots = 15
num_slots = 2**log_slots

In [208]:
def step(learning_rate, ctxt_X, ctxt_Y, ctxt_beta, n, log_slots, context, eval):
    '''
    ctxt_X, ctxt_Y : data for training
    ctxt_beta : initial value beta
    n : the number of row in train_data
    '''
    ctxt_rot = heaan.Ciphertext(context)
    ctxt_tmp = heaan.Ciphertext(context)
    
    ## step1
    # compute  ctxt_tmp = beta1*x1 / beta2*x2 / ... / beta18*x18 / beta19*x19
    ctxt_tmp = heaan.Ciphertext(context)
    eval.mult(ctxt_beta, ctxt_X, ctxt_tmp)
    
    # ctxt_beta_left = beta17*x17 / beta18*x18 / beta19*x19
    ctxt_beta_left = heaan.Ciphertext(context)
    eval.left_rotate(ctxt_tmp, 16*n, ctxt_beta_left)
    
    # compute ctxt_tmp = beta1*x1 + beta2*x2 + ... + beta16*x16
    for i in range(4):
        eval.left_rotate(ctxt_tmp, n*2**(3-i), ctxt_rot)
        eval.add(ctxt_tmp, ctxt_rot, ctxt_tmp)
        
    # compute ctxt_tmp = beta1*x1 / beta2*x2 / ... / beta18*x18 / beta19*x19
    for i in range(3):
        eval.left_rotate(ctxt_beta_left, n * i, ctxt_rot)
        eval.add(ctxt_tmp, ctxt_rot, ctxt_tmp)
    
    msg_mask = heaan.Message(log_slots)
    for i in range(n):
        msg_mask[i] = 1
    eval.mult(ctxt_tmp, msg_mask, ctxt_tmp)
    
    # bootstrap
    eval.bootstrap(ctxt_tmp, ctxt_tmp)
    
    ## step2
    # compute  (learning_rate/n) * (y_(j) - y'_(j))
    ctxt_d = heaan.Ciphertext(context)
    eval.sub(ctxt_Y, ctxt_tmp, ctxt_d)
    eval.mult(ctxt_d, learning_rate / n, ctxt_d)
    
    eval.right_rotate(ctxt_d, 16*n, ctxt_tmp) # for 17, 18, 19
    for i in range(4):
        eval.right_rotate(ctxt_d, n * 2**i, ctxt_rot)
        eval.add(ctxt_d, ctxt_rot, ctxt_d)
        
    for i in range(3):
        eval.right_rotate(ctxt_tmp, n * i, ctxt_rot)
        eval.add(ctxt_d, ctxt_rot, ctxt_d)
    
    ## step3
    # compute  (learning_rate/n) * (y_(j) - y'_(j)) * x_(j)
    eval.mult(ctxt_X, ctxt_d, ctxt_d)
    
    ## step4
    # compute  Sum_(all j) (learning_rate/n) * (y_(j) - y'_(j)) * x_(j)
    ctxt_beta_update = heaan.Ciphertext(context)
    for i in range(n):
        eval.left_rotate(ctxt_d, i, ctxt_rot)
        eval.add(ctxt_beta_update, ctxt_rot, ctxt_beta_update)
    msg_mask = heaan.Message(log_slots)
    for i in range(19):
        msg_mask[i * n] = 1
    eval.mult(ctxt_beta_update, msg_mask, ctxt_beta_update)
    eval.mult(ctxt_beta_update, msg_mask, ctxt_tmp)

    for i in range(1,n):
        eval.right_rotate(ctxt_tmp, i, ctxt_rot)
        eval.add(ctxt_beta_update, ctxt_rot, ctxt_beta_update)

    ## step5
    # update beta
    eval.add(ctxt_beta, ctxt_beta_update, ctxt_beta_update)
    return ctxt_beta_update

In [209]:
#transforms the data to have a mean of 0 and a standard deviation of 1
#Z-score standardization
def normalize_data(arr):
    mean = np.mean(arr)
    std_dev = np.std(arr)
    return [(x - mean) / std_dev for x in arr]

In [210]:
#using same train/test dataset with Model without using HE for comparing two models
dataset_train = X_train
dataset_test = X_test

print(dataset_train.shape)
print(dataset_test.shape)

(194, 19)
(194, 19)


In [211]:
# preprocessing data
train_n = dataset_train.shape[0]
X = [0] * 19
X[0] = normalize_data(dataset_train[:,1]) # Age
X[1] = normalize_data(dataset_train[:,2]) # Sleep duration
X[2] = normalize_data(dataset_train[:,3]) # REM sleep percentage
X[3] = normalize_data(dataset_train[:,4]) # Deep sleep percentage
X[4] = normalize_data(dataset_train[:,5]) # Light sleep percentage
X[5] = normalize_data(dataset_train[:,6]) # Caffeine consumption
X[6] = normalize_data(dataset_train[:,7]) # Alcohol consumption
X[7] = dataset_train[:,8] # Gender_Male
X[8] = dataset_train[:,9] # Awakenings_1
X[9] = dataset_train[:,10] # Awakenings_2
X[10] = dataset_train[:,11] # Awakenings_3
X[11] = dataset_train[:,12] # Awakenings_4
X[12] = dataset_train[:,13] # Smoking status_Yes
X[13] = dataset_train[:,14] # Exercise frequency_1
X[14] = dataset_train[:,15] # Exercise frequency_2
X[15] = dataset_train[:,16] # Exercise frequency_3
X[16] = dataset_train[:,17] # Exercise frequency_4
X[17] = dataset_train[:,18] # Exercise frequency_5
X[18] = [1 for _ in range(train_n)] # bias 

Y = y_train.values

msg_X = heaan.Message(log_slots)
ctxt_X = heaan.Ciphertext(context)
for i in range(19):
    for j in range(train_n):
        msg_X[train_n*i + j] = X[i][j]
enc.encrypt(msg_X, pk, ctxt_X)

msg_Y = heaan.Message(log_slots)
ctxt_Y = heaan.Ciphertext(context)
for j in range(train_n):
    msg_Y[j] = Y[j]
enc.encrypt(msg_Y, pk, ctxt_Y)

In [212]:
msg_beta = heaan.Message(log_slots)
ctxt_beta = heaan.Ciphertext(context)

#initialize all beta with 1
for i in range(19):
    for j in range(train_n):
        msg_beta[train_n*i + j] = 1 

enc.encrypt(msg_beta, pk, ctxt_beta)

In [213]:
# randomly assign learning_rate
learning_rate = 0.2
num_steps = 2500

ctxt_next = heaan.Ciphertext(context)
eval.add(ctxt_beta, 0, ctxt_next)
for i in range(num_steps):
    print("=== Step", i, "===")
    # estimate beta_hat using function 'step'
    ctxt_next = step(learning_rate, ctxt_X, ctxt_Y, ctxt_next, train_n, log_slots, context, eval)

=== Step 0 ===
=== Step 1 ===
=== Step 2 ===
=== Step 3 ===
=== Step 4 ===
=== Step 5 ===
=== Step 6 ===
=== Step 7 ===
=== Step 8 ===
=== Step 9 ===
=== Step 10 ===
=== Step 11 ===
=== Step 12 ===
=== Step 13 ===
=== Step 14 ===
=== Step 15 ===
=== Step 16 ===
=== Step 17 ===
=== Step 18 ===
=== Step 19 ===
=== Step 20 ===
=== Step 21 ===
=== Step 22 ===
=== Step 23 ===
=== Step 24 ===
=== Step 25 ===
=== Step 26 ===
=== Step 27 ===
=== Step 28 ===
=== Step 29 ===
=== Step 30 ===
=== Step 31 ===
=== Step 32 ===
=== Step 33 ===
=== Step 34 ===
=== Step 35 ===
=== Step 36 ===
=== Step 37 ===
=== Step 38 ===
=== Step 39 ===
=== Step 40 ===
=== Step 41 ===
=== Step 42 ===
=== Step 43 ===
=== Step 44 ===
=== Step 45 ===
=== Step 46 ===
=== Step 47 ===
=== Step 48 ===
=== Step 49 ===
=== Step 50 ===
=== Step 51 ===
=== Step 52 ===
=== Step 53 ===
=== Step 54 ===
=== Step 55 ===
=== Step 56 ===
=== Step 57 ===
=== Step 58 ===
=== Step 59 ===
=== Step 60 ===
=== Step 61 ===
=== Step 62 ===
==

=== Step 490 ===
=== Step 491 ===
=== Step 492 ===
=== Step 493 ===
=== Step 494 ===
=== Step 495 ===
=== Step 496 ===
=== Step 497 ===
=== Step 498 ===
=== Step 499 ===
=== Step 500 ===
=== Step 501 ===
=== Step 502 ===
=== Step 503 ===
=== Step 504 ===
=== Step 505 ===
=== Step 506 ===
=== Step 507 ===
=== Step 508 ===
=== Step 509 ===
=== Step 510 ===
=== Step 511 ===
=== Step 512 ===
=== Step 513 ===
=== Step 514 ===
=== Step 515 ===
=== Step 516 ===
=== Step 517 ===
=== Step 518 ===
=== Step 519 ===
=== Step 520 ===
=== Step 521 ===
=== Step 522 ===
=== Step 523 ===
=== Step 524 ===
=== Step 525 ===
=== Step 526 ===
=== Step 527 ===
=== Step 528 ===
=== Step 529 ===
=== Step 530 ===
=== Step 531 ===
=== Step 532 ===
=== Step 533 ===
=== Step 534 ===
=== Step 535 ===
=== Step 536 ===
=== Step 537 ===
=== Step 538 ===
=== Step 539 ===
=== Step 540 ===
=== Step 541 ===
=== Step 542 ===
=== Step 543 ===
=== Step 544 ===
=== Step 545 ===
=== Step 546 ===
=== Step 547 ===
=== Step 548 =

=== Step 973 ===
=== Step 974 ===
=== Step 975 ===
=== Step 976 ===
=== Step 977 ===
=== Step 978 ===
=== Step 979 ===
=== Step 980 ===
=== Step 981 ===
=== Step 982 ===
=== Step 983 ===
=== Step 984 ===
=== Step 985 ===
=== Step 986 ===
=== Step 987 ===
=== Step 988 ===
=== Step 989 ===
=== Step 990 ===
=== Step 991 ===
=== Step 992 ===
=== Step 993 ===
=== Step 994 ===
=== Step 995 ===
=== Step 996 ===
=== Step 997 ===
=== Step 998 ===
=== Step 999 ===
=== Step 1000 ===
=== Step 1001 ===
=== Step 1002 ===
=== Step 1003 ===
=== Step 1004 ===
=== Step 1005 ===
=== Step 1006 ===
=== Step 1007 ===
=== Step 1008 ===
=== Step 1009 ===
=== Step 1010 ===
=== Step 1011 ===
=== Step 1012 ===
=== Step 1013 ===
=== Step 1014 ===
=== Step 1015 ===
=== Step 1016 ===
=== Step 1017 ===
=== Step 1018 ===
=== Step 1019 ===
=== Step 1020 ===
=== Step 1021 ===
=== Step 1022 ===
=== Step 1023 ===
=== Step 1024 ===
=== Step 1025 ===
=== Step 1026 ===
=== Step 1027 ===
=== Step 1028 ===
=== Step 1029 ===
=

=== Step 1432 ===
=== Step 1433 ===
=== Step 1434 ===
=== Step 1435 ===
=== Step 1436 ===
=== Step 1437 ===
=== Step 1438 ===
=== Step 1439 ===
=== Step 1440 ===
=== Step 1441 ===
=== Step 1442 ===
=== Step 1443 ===
=== Step 1444 ===
=== Step 1445 ===
=== Step 1446 ===
=== Step 1447 ===
=== Step 1448 ===
=== Step 1449 ===
=== Step 1450 ===
=== Step 1451 ===
=== Step 1452 ===
=== Step 1453 ===
=== Step 1454 ===
=== Step 1455 ===
=== Step 1456 ===
=== Step 1457 ===
=== Step 1458 ===
=== Step 1459 ===
=== Step 1460 ===
=== Step 1461 ===
=== Step 1462 ===
=== Step 1463 ===
=== Step 1464 ===
=== Step 1465 ===
=== Step 1466 ===
=== Step 1467 ===
=== Step 1468 ===
=== Step 1469 ===
=== Step 1470 ===
=== Step 1471 ===
=== Step 1472 ===
=== Step 1473 ===
=== Step 1474 ===
=== Step 1475 ===
=== Step 1476 ===
=== Step 1477 ===
=== Step 1478 ===
=== Step 1479 ===
=== Step 1480 ===
=== Step 1481 ===
=== Step 1482 ===
=== Step 1483 ===
=== Step 1484 ===
=== Step 1485 ===
=== Step 1486 ===
=== Step 1

=== Step 1888 ===
=== Step 1889 ===
=== Step 1890 ===
=== Step 1891 ===
=== Step 1892 ===
=== Step 1893 ===
=== Step 1894 ===
=== Step 1895 ===
=== Step 1896 ===
=== Step 1897 ===
=== Step 1898 ===
=== Step 1899 ===
=== Step 1900 ===
=== Step 1901 ===
=== Step 1902 ===
=== Step 1903 ===
=== Step 1904 ===
=== Step 1905 ===
=== Step 1906 ===
=== Step 1907 ===
=== Step 1908 ===
=== Step 1909 ===
=== Step 1910 ===
=== Step 1911 ===
=== Step 1912 ===
=== Step 1913 ===
=== Step 1914 ===
=== Step 1915 ===
=== Step 1916 ===
=== Step 1917 ===
=== Step 1918 ===
=== Step 1919 ===
=== Step 1920 ===
=== Step 1921 ===
=== Step 1922 ===
=== Step 1923 ===
=== Step 1924 ===
=== Step 1925 ===
=== Step 1926 ===
=== Step 1927 ===
=== Step 1928 ===
=== Step 1929 ===
=== Step 1930 ===
=== Step 1931 ===
=== Step 1932 ===
=== Step 1933 ===
=== Step 1934 ===
=== Step 1935 ===
=== Step 1936 ===
=== Step 1937 ===
=== Step 1938 ===
=== Step 1939 ===
=== Step 1940 ===
=== Step 1941 ===
=== Step 1942 ===
=== Step 1

=== Step 2344 ===
=== Step 2345 ===
=== Step 2346 ===
=== Step 2347 ===
=== Step 2348 ===
=== Step 2349 ===
=== Step 2350 ===
=== Step 2351 ===
=== Step 2352 ===
=== Step 2353 ===
=== Step 2354 ===
=== Step 2355 ===
=== Step 2356 ===
=== Step 2357 ===
=== Step 2358 ===
=== Step 2359 ===
=== Step 2360 ===
=== Step 2361 ===
=== Step 2362 ===
=== Step 2363 ===
=== Step 2364 ===
=== Step 2365 ===
=== Step 2366 ===
=== Step 2367 ===
=== Step 2368 ===
=== Step 2369 ===
=== Step 2370 ===
=== Step 2371 ===
=== Step 2372 ===
=== Step 2373 ===
=== Step 2374 ===
=== Step 2375 ===
=== Step 2376 ===
=== Step 2377 ===
=== Step 2378 ===
=== Step 2379 ===
=== Step 2380 ===
=== Step 2381 ===
=== Step 2382 ===
=== Step 2383 ===
=== Step 2384 ===
=== Step 2385 ===
=== Step 2386 ===
=== Step 2387 ===
=== Step 2388 ===
=== Step 2389 ===
=== Step 2390 ===
=== Step 2391 ===
=== Step 2392 ===
=== Step 2393 ===
=== Step 2394 ===
=== Step 2395 ===
=== Step 2396 ===
=== Step 2397 ===
=== Step 2398 ===
=== Step 2

In [214]:
res = heaan.Message(log_slots)
dec.decrypt(ctxt_next, sk, res)
print("trained parameters\n")
for i in range(19):
    for j in range(1):
        print("beta[", i, "] = ",res[i*train_n + j].real)


trained parameters

beta[ 0 ] =  0.013505102688508096
beta[ 1 ] =  0.0013713243070411238
beta[ 2 ] =  0.2699956018471327
beta[ 3 ] =  1.1235464990333066
beta[ 4 ] =  1.0458471462917915
beta[ 5 ] =  0.0039357507965833115
beta[ 6 ] =  -0.010678110279846758
beta[ 7 ] =  0.009533232563441253
beta[ 8 ] =  -0.05085272911912002
beta[ 9 ] =  -0.131769775200267
beta[ 10 ] =  -0.12570638022641104
beta[ 11 ] =  -0.13413420914719654
beta[ 12 ] =  -0.04589260657793841
beta[ 13 ] =  -0.01723798581928716
beta[ 14 ] =  0.022602842359485738
beta[ 15 ] =  0.00502378707897802
beta[ 16 ] =  0.015338000602999852
beta[ 17 ] =  -0.004156710294800842
beta[ 18 ] =  0.8774429804330408


In [217]:
def predict_y(ctxt_X, ctxt_beta, n, log_slots, eval, context, num_slots):
    
    ctxt_rot = heaan.Ciphertext(context)
    ctxt_tmp = heaan.Ciphertext(context)
    
    # compute  ctxt_tmp = beta1*x1 / beta2*x2 / ... / beta18*x18 / beta19*x19
    ctxt_tmp = heaan.Ciphertext(context)
    eval.mult(ctxt_beta, ctxt_X, ctxt_tmp)
    
    # ctxt_beta_left = beta17*x17 / beta18*x18 / beta19*x19
    ctxt_beta_left = heaan.Ciphertext(context)
    eval.left_rotate(ctxt_tmp, 16*n, ctxt_beta_left)
    
    # compute ctxt_tmp = beta1*x1 + beta2*x2 + ... + beta16*x16
    for i in range(4):
        eval.left_rotate(ctxt_tmp, n*2**(3-i), ctxt_rot)
        eval.add(ctxt_tmp, ctxt_rot, ctxt_tmp)
        
    # compute ctxt_tmp = beta1*x1 / beta2*x2 / ... / beta18*x18 / beta19*x19
    for i in range(3):
        eval.left_rotate(ctxt_beta_left, n * i, ctxt_rot)
        eval.add(ctxt_tmp, ctxt_rot, ctxt_tmp)
    
    return ctxt_tmp

In [218]:
# check the model is fitted well on train data
# predict y of train dataset
ctxt_y_train = predict_y(ctxt_X, ctxt_next, train_n, log_slots, eval, context, num_slots)

res = heaan.Message(log_slots)
dec.decrypt(ctxt_y_train, sk, res)

Y_train_pred = [] # decrypted prediction
for i in range(train_n):
    Y_train_pred.append(res[i].real)
    
y_train = np.array(Y)
y_train_pred = np.array(Y_train_pred)

res_train = pd.DataFrame({'y':y_train, 'y_pred':y_train_pred, 'y - y_pred':y_train - y_train_pred})

r2_train = r2_score(y_train, y_train_pred)

print("Train Dataset R^2 score :", r2_train) # R^2 of trainset is enough high, so the model is trained well
print("")
print(res_train)

Train Dataset R^2 score : 0.8135907595047113

        y    y_pred  y - y_pred
0    0.71  0.854695   -0.144695
1    0.84  0.801310    0.038690
2    0.70  0.794905   -0.094905
3    0.86  0.841272    0.018728
4    0.93  0.898803    0.031197
..    ...       ...         ...
189  0.64  0.574608    0.065392
190  0.62  0.690350   -0.070350
191  0.90  0.877944    0.022056
192  0.93  0.898151    0.031849
193  0.50  0.632809   -0.132809

[194 rows x 3 columns]


In [219]:
# prepare test data for evaluation
test_n = dataset_test.shape[0]

X_test = [0] * 19
X_test[0] = normalize_data(dataset_test[:,1]) # Age
X_test[1] = normalize_data(dataset_test[:,2]) # Sleep duration
X_test[2] = normalize_data(dataset_test[:,3]) # REM sleep percentage
X_test[3] = normalize_data(dataset_test[:,4]) # Deep sleep percentage
X_test[4] = normalize_data(dataset_test[:,5]) # Light sleep percentage
X_test[5] = normalize_data(dataset_test[:,6]) # Caffeine consumption
X_test[6] = normalize_data(dataset_test[:,7]) # Alcohol consumption
X_test[7] = (dataset_test[:,8]) # Gender_Male
X_test[8] = (dataset_test[:,9]) # Awakenings_1
X_test[9] = (dataset_test[:,10]) # Awakenings_2
X_test[10] = (dataset_test[:,11]) # Awakenings_3
X_test[11] = (dataset_test[:,12]) # Awakenings_4
X_test[12] = (dataset_test[:,13]) # Smoking status_Yes
X_test[13] = (dataset_test[:,14]) # Exercise frequency_1
X_test[14] = (dataset_test[:,15]) # Exercise frequency_2
X_test[15] = (dataset_test[:,16]) # Exercise frequency_3
X_test[16] = (dataset_test[:,17]) # Exercise frequency_4
X_test[17] = (dataset_test[:,18]) # Exercise frequency_5
X_test[18] = [1 for _ in range(test_n)]

Y_test = y_test

msg_X_test = heaan.Message(log_slots)
ctxt_X_test = heaan.Ciphertext(context)
for i in range(19):
    for j in range(test_n):
        msg_X_test[test_n*i + j] = X_test[i][j]
enc.encrypt(msg_X_test, pk, ctxt_X_test)

In [220]:
# check performance on unseen data
ctxt_y_test = predict_y(ctxt_X_test, ctxt_next, test_n, log_slots, eval, context, num_slots)

res = heaan.Message(log_slots)
dec.decrypt(ctxt_y_test, sk, res)

Y_test_pred = []
for i in range(test_n):
    Y_test_pred.append(res[i].real)
    
y_test = np.array(Y_test)
y_test_pred = np.array(Y_test_pred)
res_test = pd.DataFrame({'y':y_test, 'y_pred':y_test_pred, 'y - y_pred':y_test - y_test_pred})

r2_test = r2_score(y_test, y_test_pred)

print("Test Dataset R^2 score :", r2_test)
print("")
print(res_test)

Test Dataset R^2 score : 0.6910724248567996

        y    y_pred  y - y_pred
0    0.86  0.862838   -0.002838
1    0.65  0.619314    0.030686
2    0.83  0.822842    0.007158
3    0.81  0.848052   -0.038052
4    0.87  0.913848   -0.043848
..    ...       ...         ...
189  0.81  0.812524   -0.002524
190  0.85  0.770759    0.079241
191  0.55  0.601850   -0.051850
192  0.94  0.910032    0.029968
193  0.70  0.831012   -0.131012

[194 rows x 3 columns]


In [222]:
#compare performances of Model with HE and Model without HE on same testset
print("Compare the Model used HE & the model without HE(LinearRegression of sklearn)\n")
print("without HE\nR^2 of testset : ", sklearn_r2)
print("HE\nR^2 of testset : ", r2_test) 
# peformace decrease approximately 0.15, but we think it is not too bad counter effect to adapt HE

Compare the Model used HE & the model without HE(LinearRegression of sklearn)

without HE
R^2 of testset :  0.838818252406227
HE
R^2 of testset :  0.6910724248567996
