In [3]:
import math

import phe.encoding
from phe import paillier


class ExampleEncodedNumber(phe.encoding.EncodedNumber):
    BASE = 64
    LOG2_BASE = math.log(BASE, 2)


print("Generating paillier keypair")
public_key, private_key = paillier.generate_paillier_keypair()

Generating paillier keypair


In [4]:
def encode_and_encrypt_example():
    print("Encoding a large positive number. With a BASE {} encoding scheme".format(ExampleEncodedNumber.BASE))
    encoded = ExampleEncodedNumber.encode(public_key, 2.1 ** 20)
    print("Checking that decoding gives the same number...")
    assert 2.1 ** 20 == encoded.decode()

    print("Encrypting the encoded number")
    encrypted = public_key.encrypt(encoded)

    print("Decrypting...")
    decrypted_but_encoded = \
        private_key.decrypt_encoded(encrypted, ExampleEncodedNumber)

    print("Checking the decrypted number is what we started with")
    assert abs(2.1 ** 20 - decrypted_but_encoded.decode()) < 1e-12

In [5]:
def math_example():
    print("Encoding two large positive numbers. BASE={}".format(ExampleEncodedNumber.BASE))

    a = 102545 + (64 ** 8)
    b = 123 + (8 ** 20)

    encoded_a = ExampleEncodedNumber.encode(public_key, a)
    encoded_b = ExampleEncodedNumber.encode(public_key, b)

    print("Checking that decoding gives the same number...")
    assert a == encoded_a.decode()
    assert b == encoded_b.decode()

    print("Encrypting the encoded numbers")
    encrypted_a = public_key.encrypt(encoded_a)
    encrypted_b = public_key.encrypt(encoded_b)

    print("Adding the encrypted numbers")
    encrypted_c = encrypted_a + encrypted_b

    print("Decrypting the one encrypted sum")
    decrypted_but_encoded = \
        private_key.decrypt_encoded(encrypted_c, ExampleEncodedNumber)

    print("Checking the decrypted number is what we started with")

    print("Decrypted: {}".format(decrypted_but_encoded.decode()))
    assert abs((a + b) - decrypted_but_encoded.decode()) < 1e-15

In [6]:
import time
 
# store starting time
begin = time.time()

if __name__ == "__main__":
    encode_and_encrypt_example()

    math_example()
time.sleep(1)
# store end time
end = time.time()
 
# total time taken
print(f"Total runtime of the program is {end - begin}")

Encoding a large positive number. With a BASE 64 encoding scheme
Checking that decoding gives the same number...
Encrypting the encoded number
Decrypting...
Checking the decrypted number is what we started with
Encoding two large positive numbers. BASE=64
Checking that decoding gives the same number...
Encrypting the encoded numbers
Adding the encrypted numbers
Decrypting the one encrypted sum
Checking the decrypted number is what we started with
Decrypted: 1153202979583660300
Total runtime of the program is 1.9262516498565674


In [9]:
import numpy as np
import math
import phe.encoding
from phe import paillier

class ExampleEncodedNumber(phe.encoding.EncodedNumber):
    BASE = 64
    LOG2_BASE = math.log(BASE, 2)

print("Generating paillier keypair")
public_key, private_key = paillier.generate_paillier_keypair()

# generate encrypted data for privacy preserving svm
X = np.array([[1,2,3],[4,5,6],[7,8,9]])
y = np.array([1,1,-1])
y_encrypted = []
for i in range(len(y)):
    encoded = ExampleEncodedNumber.encode(public_key, int(y[i]))
    encrypted = public_key.encrypt(encoded)
    y_encrypted.append(encrypted)
print(y_encrypted)

Generating paillier keypair
[<phe.paillier.EncryptedNumber object at 0x000001CD1B1D0700>, <phe.paillier.EncryptedNumber object at 0x000001CD1B173EB0>, <phe.paillier.EncryptedNumber object at 0x000001CD1ADEAE30>]


In [36]:
import pandas as pd 
import numpy as np
from all_ss_module import *
import argparse
import math
import struct
import sys
import time
import warnings
import math

In [28]:
pip install pytictoc

Collecting pytictoc
  Downloading pytictoc-1.5.2-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: pytictoc
Successfully installed pytictoc-1.5.2
Note: you may need to restart the kernel to use updated packages.


In [37]:
def process_a_b(a, b):

    if a > 0:
        b += np.floor(a)
        a -= np.floor(a)
    elif a<0:
        a += np.floor(b)
        b -= np.floor(b)

    if a>=1:
        b=0
        a=1-1e-10
    if b>=1:
        a=0
        b=1-1e-10
    
    if(abs(a) >1 or abs(b)>1):
        print (a,b)

    assert(abs(a)<=1)
    assert(abs(b)<=1)

    return a, b

In [38]:
def RBF_linear(x1_a, x1_b, x2_a, x2_b , N, gamma = 0.5):
    bias = 0.02
    ans1, ans2 = InnerProductss(x1_a, x1_b, x2_a, x2_b)
    ans1, ans2 = process_a_b(ans1, ans2)
    return ans1, ans2

In [39]:
def RBF_Gaussian(x1_a, x1_b, x2_a, x2_b , N, gamma = 0.5):
    s_a = 0
    s_b = 0
    x3_a = x1_a - x2_a
    x3_b = x1_b - x2_b 

    s_a, s_b = InnerProductss(x3_a, x3_b, x3_a, x3_b)

    assert ( ( (s_a + s_b) - (x3_a+x3_b).dot((x3_a+x3_b))) < 1e-1   )

    f_a = -0.5 * gamma * s_a
    f_b = -0.5 * gamma * s_b


    f_a = np.exp( f_a )
    f_b = np.exp( f_b )
    

    f_a_a = random.random()
    f_a_b = f_a - f_a_a

    f_b_a = random.random()
    f_b_b = f_b - f_b_a


    e_a, e_b = NumProductss(f_a_a, f_a_b, f_b_a, f_b_b)

    e_a, e_b = process_a_b(e_a, e_b)

    return e_a, e_b

In [40]:
class SVM:
    def __init__(self, C, lr=0.1):

        self.C = C
        self.lr = lr
        self.alpha_a = None
        self.alpha_b = None
        self.b_a = 0
        self.b_b = 0
        self.X_a = None
        self.X_b = None
        self.loss_a = float("inf")
        self.loss_b = float("inf")
       
    def predict(self, x, raw=False):
        x_a = np.zeros(len(x))
        for k in range(len(x)):
            x_a[k] = np.random.uniform(0, x[k])

        x_b = x - x_a
        # y_pred = 0+self.b
        y_pred_a = 0+self.b_a
        y_pred_b = 0+self.b_b
        N = len(self.X)

        for i in range(N):

            rbf_a, rbf_b = RBF_Gaussian(x_a, x_b, self.X_a[i], self.X_b[i], N )

            y_add_a, y_add_b = NumProductss(rbf_a, rbf_b, self.alpha_a[i], self.alpha_b[i])
            y_pred_a += y_add_a
            y_pred_b += y_add_b
            y_pred = y_pred_a + y_pred_b

        if raw:
            return y_pred
        return np.sign(y_pred).astype(np.float32)
        
        
    def fit(self, X, y, iteration_times = 500, batch_size=10):

        lr = self.lr
        N = len(X)
        # X_a = np.random.randint(1,10, (N,9))
        X_a = np.zeros((N,9))

        for i in range(N):
            for j in range(9):
                X_a[i][j] = np.random.uniform(0, X[i][j])      
        X_b = X - X_a

        self.X = X
        self.X_a = X_a
        self.X_b = X_b

        y_a = np.zeros(len(y))
        
        for ii in range(len(y)):
            r = np.random.uniform(0,1)
            if y[ii] == 1:
                y_a[ii] = r
            else:
                y_a[ii] = 0 - r
        y_b = y - y_a 
        
        self.alpha_a = np.random.uniform(0,0.5, N)
        self.alpha_b = np.random.uniform(0,0.5, N)   
        K_a = np.zeros((N,N))
        K_b = np.zeros((N,N))

        for i in range(N):
            for j in range(N):
                K_a[i][j], K_b[i][j] = RBF_Gaussian(X_a[i],X_b[i],X_a[j],X_b[j],N)  
        

        
        print ('Kernel Matrix Done'), 

        K_diag_a = np.diag(K_a)
        K_diag_b = np.diag(K_b)


        for t in range(iteration_times):
            a_a = self.alpha_a.reshape(1,-1)
            a_b = self.alpha_b.reshape(1,-1)

            # loss = a.dot(K).dot(a.T)
            F_a, F_b = MatMulss(a_a, a_b, K_a, K_b)
            a_a_T = np.transpose(a_a)
            a_b_T = np.transpose(a_b)
            loss_a = 0 
            loss_b = 0

            da_a = np.zeros(N)
            da_b = np.zeros(N)
            db_a = 0
            db_b = 0

            P_a = K_a + np.transpose(K_a)
            P_b = K_b + np.transpose(K_b)

            H_a = np.zeros(N)
            H_b = np.zeros(N)

            for i in range(0,N):
             	self.alpha_a[i] -= H_a[i]
             	self.alpha_b[i] -= H_b[i]

            indices = np.random.permutation(N)[:batch_size]
            #print  indices
            for i in indices:

                q_a, q_b = InnerProductss(self.alpha_a, self.alpha_b, K_a[i], K_b[i])

                q_a = q_a + self.b_a
                q_b = q_b + self.b_b

                w_a, w_b = NumProductss(y_a[i], y_b[i], q_a, q_b)

                w_a = 0.5 - w_a
                w_b = 0.5 - w_b

                w_a_int = int(math.floor(w_a * 10000))
                w_b_int = int(math.floor(w_b * 10000))

                margin = w_a + w_b

                u_1, u_2 = BitExtractionMatrix2(w_a_int, w_b_int, 1, 1)       # comparison
                u_sum = u_1^u_2

                loss_a += self.C*w_a
                loss_b += self.C*w_b

                if (u_sum == 0) and (margin < 0):
                    print ('w_a',w_a,'w_b', w_b)
                    print ('w_a_int',w_a_int,'w_b_int',w_b_int)
                    print ('margin',margin,'u_sum',u_sum)

                if u_sum == 0:
                    yk_a, yk_b = DotProductss(y_a[i], y_b[i], K_a[i], K_b[i])
                    da_a -= self.C*yk_a
                    da_b -= self.C*yk_b
                    db_a -= self.C*y_a[i]
                    db_b -= self.C*y_b[i]
                


            if (t+1)%500==0:
                print("Iteration %d, " %(t+1),"Loss:",loss_a+loss_b)
            self.alpha_a -= lr*da_a
            self.alpha_b -= lr*da_b
            self.b_a -= lr*db_a
            self.b_b -= lr*db_b
            self.b = self.b_a + self.b_b 

In [41]:
data = pd.read_csv("breast-cancer-wisconsin.csv") 
total = len(data.values)
train = int(total * 0.8)
test = total - train
print (train, 'for training', test, 'for validating')

X = data.values[0:train,1:10]


Y = data.values[:,10]

for i in range(len(Y)):
	if Y[i]==2:
		Y[i] = 1
	else:
		Y[i] = -1
positive = -1

y = Y[0:train]
ans = Y[train:]

svm = SVM(10, lr = 0.01)
svm.fit(X, y, iteration_times=2000)


correct = 0 
tp=0
fp=0
fn=0

545 for training 137 for validating
Kernel Matrix Done
Iteration 500,  Loss: [-2543.3574055]
Iteration 1000,  Loss: [-1821.34518036]
Iteration 1500,  Loss: [-2123.8548711]
Iteration 2000,  Loss: [-2096.19391595]


In [42]:
test_set = data.values[train:total, :]
for i in range(len(test_set)):
    x = test_set[i, 1:10]
    y = test_set[i, 10]
    #print y
    pred = svm.predict(x)
    
    if pred == y:
        correct +=1 
    if y==positive:
        if pred==positive:
            tp+=1
            print ('tp', pred, y)
        else:
            fn+=1
            print ('fn', pred, y)
    else:
        if pred==positive:
            fp+=1
            print ('fp', pred, y)

tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
fp [-1.] 1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
fp [-1.] 1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
fp [-1.] 1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1
tp [-1.] -1


In [43]:
print ('tp, fn, fp', tp, fn, fp)
print ('Precision', float(tp)/(tp+fp))
print ('Recall', float(tp)/(tp + fn))
print ('Correct/Test', correct, test)

tp, fn, fp 35 0 3
Precision 0.9210526315789473
Recall 1.0
Correct/Test 134 137
