## practice pi-heaan
- Lets make logistic algorithm
- This algorithm is not perfect.
- And this algorithm cannot be applied to all data for logistic regression.
- Please consider this as an exercise to practice how to make an algorithm using pi-heaan.

In [1]:
from IPython.display import Image

<img src="./fig0.jpg" width="1300"/>

In [2]:
import piheaan as heaan
from piheaan.math import approx

import pandas as pd
import numpy as np
import math

In [3]:
params = heaan.ParameterPreset.FGb
context = heaan.make_context(params)
heaan.make_bootstrappable(context)

# Load pre-exisisting key
key_file_path = "./keys"

sk = heaan.SecretKey(context,key_file_path+"/secretkey.bin") # load sk
pk = heaan.KeyPack(context, key_file_path+"/") # load pk
pk.load_enc_key()
pk.load_mult_key()

eval = heaan.HomEvaluator(context,pk)
dec = heaan.Decryptor(context)
enc = heaan.Encryptor(context)

log_slots = 15
num_slots = 2**log_slots

In [4]:
def normalize_data(arr):
    S = 0
    for i in range(len(arr)):
        S += arr[i]
    return [arr[i] / S for i in range(len(arr))]

In [5]:
def step(learning_rate, ctxt_X, ctxt_Y, ctxt_beta, n, log_slots, context, eval):
    '''
    ctxt_X, ctxt_Y : data for training
    ctxt_beta : initial value beta
    n : the number of row in train_data
    '''
    ctxt_rot = heaan.Ciphertext(context)
    ctxt_tmp = heaan.Ciphertext(context)
    
    ## step1
    # beta0
    ctxt_beta0 = heaan.Ciphertext(context)
    eval.left_rotate(ctxt_beta, 8*n, ctxt_beta0)
    
    # compute  ctxt_tmp = beta1*x1 + beta2*x2 + ... + beta8*x8 + beta0
    ctxt_tmp = heaan.Ciphertext(context)
    eval.mult(ctxt_beta, ctxt_X, ctxt_tmp)
    
    for i in range(3):
        eval.left_rotate(ctxt_tmp, n*2**(2-i), ctxt_rot)
        eval.add(ctxt_tmp, ctxt_rot, ctxt_tmp)
    eval.add(ctxt_tmp, ctxt_beta0, ctxt_tmp)
    
    msg_mask = heaan.Message(log_slots)
    for i in range(n):
        msg_mask[i] = 1
    eval.mult(ctxt_tmp, msg_mask, ctxt_tmp)
    
    ## step2
    # compute sigmoid
    approx.sigmoid(eval, ctxt_tmp, ctxt_tmp, 8.0)
    eval.bootstrap(ctxt_tmp, ctxt_tmp)
    msg_mask = heaan.Message(log_slots)
    # if sigmoid(0) -> return 0.5
    for i in range(n, num_slots):
        msg_mask[i] = 0.5
    eval.sub(ctxt_tmp, msg_mask, ctxt_tmp)
    
    ## step3
    # compute  (learning_rate/n) * (y_(j) - p_(j))
    ctxt_d = heaan.Ciphertext(context)
    eval.sub(ctxt_Y, ctxt_tmp, ctxt_d)
    eval.mult(ctxt_d, learning_rate / n, ctxt_d)
    
    eval.right_rotate(ctxt_d, 8*n, ctxt_tmp) # for beta0
    for i in range(3):
        eval.right_rotate(ctxt_d, n * 2**i, ctxt_rot)
        eval.add(ctxt_d, ctxt_rot, ctxt_d)
    eval.add(ctxt_d, ctxt_tmp, ctxt_d)
    
    ## step4
    # compute  (learning_rate/n) * (y_(j) - p_(j)) * x_(j)
    ctxt_X_j = heaan.Ciphertext(context)
    msg_X0 = heaan.Message(log_slots)
    for i in range(8*n, 9*n):
        msg_X0[i] = 1
    eval.add(ctxt_X, msg_X0, ctxt_X_j)
    eval.mult(ctxt_X_j, ctxt_d, ctxt_d)
    
    ## step5
    # compute  Sum_(all j) (learning_rate/n) * (y_(j) - p_(j)) * x_(j)
    for i in range(9):
        eval.left_rotate(ctxt_d, 2**(8-i), ctxt_rot)
        eval.add(ctxt_d, ctxt_rot, ctxt_d)
    msg_mask = heaan.Message(log_slots)
    for i in range(9):
        msg_mask[i * n] = 1
    eval.mult(ctxt_d, msg_mask, ctxt_d)

    for i in range(9):
        eval.right_rotate(ctxt_d, 2**i, ctxt_rot)
        eval.add(ctxt_d, ctxt_rot, ctxt_d)

    ## step6
    # update beta
    eval.add(ctxt_beta, ctxt_d, ctxt_d)
    return ctxt_d

- Next function is for evaluation

In [6]:
def compute_sigmoid(ctxt_X, ctxt_beta, n, log_slots, eval, context, num_slots):
    '''
    ctxt_X : data for evaluation
    ctxt_beta : estimated beta from function 'step'
    n : the number of row in test_data
    '''
    ctxt_rot = heaan.Ciphertext(context)
    ctxt_tmp = heaan.Ciphertext(context)
    
    # beta0
    ctxt_beta0 = heaan.Ciphertext(context)
    eval.left_rotate(ctxt_beta, 8*n, ctxt_beta0)
    
    # compute x * beta + beta0
    ctxt_tmp = heaan.Ciphertext(context)
    eval.mult(ctxt_beta, ctxt_X, ctxt_tmp)
    
    for i in range(3):
        eval.left_rotate(ctxt_tmp, n*2**(2-i), ctxt_rot)
        eval.add(ctxt_tmp, ctxt_rot, ctxt_tmp)
    eval.add(ctxt_tmp, ctxt_beta0, ctxt_tmp)
    
    msg_mask = heaan.Message(log_slots)
    for i in range(n):
        msg_mask[i] = 1
    eval.mult(ctxt_tmp, msg_mask, ctxt_tmp)
    
    # compute sigmoid
    approx.sigmoid(eval, ctxt_tmp, ctxt_tmp, 8.0)
    eval.bootstrap(ctxt_tmp, ctxt_tmp)
    msg_mask = heaan.Message(log_slots)
    for i in range(n, num_slots):
        msg_mask[i] = 0.5
    eval.sub(ctxt_tmp, msg_mask, ctxt_tmp)
    
    return ctxt_tmp

#### train

In [7]:
csv_train = pd.read_csv('train.csv')
df = pd.DataFrame(csv_train)

# preprocessing data
# convert only on columns that are not 0 ~ 1 to values 0 ~ 1
train_n = df.shape[0]
X = [0] * 8
X[0] = normalize_data(df['LVR'].values)
X[1] = list(df['REF'].values)
X[2] = list(df['INSUR'].values)
X[3] = normalize_data(df['RATE'].values)
X[4] = normalize_data(df['AMOUNT'].values)
X[5] = normalize_data(df['CREDIT'].values)
X[6] = normalize_data(df['TERM'].values)
X[7] = list(df['ARM'].values)
Y = list(df['DELINQUENT'].values)

msg_X = heaan.Message(log_slots)
ctxt_X = heaan.Ciphertext(context)
for i in range(8):
    for j in range(train_n):
        msg_X[train_n*i + j] = X[i][j]
enc.encrypt(msg_X, pk, ctxt_X)

msg_Y = heaan.Message(log_slots)
ctxt_Y = heaan.Ciphertext(context)
for j in range(train_n):
    msg_Y[j] = Y[j]
enc.encrypt(msg_Y, pk, ctxt_Y)

In [8]:
# initial value beta
beta = 2 * np.random.rand(9) - 1
print("beta : ", beta)
print()
msg_beta = heaan.Message(log_slots)
ctxt_beta = heaan.Ciphertext(context)

for i in range(8):
    for j in range(train_n):
        msg_beta[train_n*i + j] = beta[i+1]
for j in range(train_n):
    msg_beta[8*train_n + j] = beta[0]
    
enc.encrypt(msg_beta, pk, ctxt_beta)
print("msg_beta : ", msg_beta)

beta :  [ 3.47410588e-01 -8.57958060e-01  7.72948859e-02  4.64010502e-01
  3.35188081e-01 -7.44429844e-01  2.14214299e-04 -5.03642732e-01
 -3.20220245e-01]

msg_beta :  [ (-0.857958+0.000000j), (-0.857958+0.000000j), (-0.857958+0.000000j), (-0.857958+0.000000j), (-0.857958+0.000000j), ..., (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j) ]


In [9]:
# randomly assign learning_rate
learning_rate = 0.2
num_steps = 100

ctxt_next = heaan.Ciphertext(context)
eval.add(ctxt_beta, 0, ctxt_next)
for i in range(num_steps):
    print("=== Step", i, "===")
    # estimate beta_hat using function 'step' for 100 iteration
    ctxt_next = step(0.2, ctxt_X, ctxt_Y, ctxt_next, train_n, log_slots, context, eval)
    # print(ctxt_next)

=== Step 0 ===


=== Step 1 ===
=== Step 2 ===
=== Step 3 ===
=== Step 4 ===
=== Step 5 ===
=== Step 6 ===
=== Step 7 ===
=== Step 8 ===
=== Step 9 ===
=== Step 10 ===
=== Step 11 ===
=== Step 12 ===
=== Step 13 ===
=== Step 14 ===
=== Step 15 ===
=== Step 16 ===
=== Step 17 ===
=== Step 18 ===
=== Step 19 ===
=== Step 20 ===
=== Step 21 ===
=== Step 22 ===
=== Step 23 ===
=== Step 24 ===
=== Step 25 ===
=== Step 26 ===
=== Step 27 ===
=== Step 28 ===
=== Step 29 ===
=== Step 30 ===
=== Step 31 ===
=== Step 32 ===
=== Step 33 ===
=== Step 34 ===
=== Step 35 ===
=== Step 36 ===
=== Step 37 ===
=== Step 38 ===
=== Step 39 ===
=== Step 40 ===
=== Step 41 ===
=== Step 42 ===
=== Step 43 ===
=== Step 44 ===
=== Step 45 ===
=== Step 46 ===
=== Step 47 ===
=== Step 48 ===
=== Step 49 ===
=== Step 50 ===
=== Step 51 ===
=== Step 52 ===
=== Step 53 ===
=== Step 54 ===
=== Step 55 ===
=== Step 56 ===
=== Step 57 ===
=== Step 58 ===
=== Step 59 ===
=== Step 60 ===
=== Step 61 ===
=== Step 62 ===
=== Step 63 ===
=

#### evaluation

In [10]:
# prepare test data for evaluation
# convert only on columns that are not 0 ~ 1 to values 0 ~ 1
csv_test = pd.read_csv('test.csv')
df = pd.DataFrame(csv_test)
test_n = df.shape[0]

X_test = [0] * 8
X_test[0] = normalize_data(df['LVR'].values)
X_test[1] = list(df['REF'].values)
X_test[2] = list(df['INSUR'].values)
X_test[3] = normalize_data(df['RATE'].values)
X_test[4] = normalize_data(df['AMOUNT'].values)
X_test[5] = normalize_data(df['CREDIT'].values)
X_test[6] = normalize_data(df['TERM'].values)
X_test[7] = list(df['ARM'].values)
Y_test = df['DELINQUENT'].values

msg_X_test = heaan.Message(log_slots)
ctxt_X_test = heaan.Ciphertext(context)
for i in range(8):
    for j in range(test_n):
        msg_X_test[test_n*i + j] = X_test[i][j]
enc.encrypt(msg_X_test, pk, ctxt_X_test)

In [11]:
# accuracy
ctxt_infer = compute_sigmoid(ctxt_X_test, ctxt_next, test_n, log_slots, eval, context, num_slots)

res = heaan.Message(log_slots)
dec.decrypt(ctxt_infer, sk, res)
cnt = 0
for i in range(test_n):
    if res[i].real >= 0.6:
        if Y_test[i] == 1:
            cnt += 1
    else:
        if Y_test[i] == 0:
            cnt += 1
print("Accuracy : ", cnt / test_n)

Accuracy :  0.7930327868852459
