In [14]:
##HOMOMORPHIC ENCRYPTION - PARTIAL HOMOMORPHIC - PAILLIER 

import numpy as np 
import pandas as pd
from phe import paillier
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
#Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

In [3]:
#Split data
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42 )

In [None]:
#Train Logistic Regression Model
logreg = LogisticRegression(max_iter=500)
logreg.fit(X_train,y_train)

In [5]:
#Using the model to predict
y_pred = logreg.predict(X_test)

In [6]:
#Evaluate the performance
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy', accuracy)

#Detailed classification report
print('classification_report: \n', classification_report(y_test, y_pred))

#Show confusion matrix 
print('confusion_matrix: \n', confusion_matrix(y_test, y_pred))

Accuracy 0.956140350877193
classification_report: 
               precision    recall  f1-score   support

           0       0.97      0.91      0.94        43
           1       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

confusion_matrix: 
 [[39  4]
 [ 1 70]]


In [7]:
#Extract model parameters
weights = logreg.coef_[0] #Coefficients
bias = logreg.intercept_[0] #Bias

In [8]:
#Generate paillier key pair
public_key, private_key = paillier.generate_paillier_keypair()

#Appy sigmoid function
def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [13]:
X_test

array([[1.247e+01, 1.860e+01, 8.109e+01, ..., 1.015e-01, 3.014e-01,
        8.750e-02],
       [1.894e+01, 2.131e+01, 1.236e+02, ..., 1.789e-01, 2.551e-01,
        6.589e-02],
       [1.546e+01, 1.948e+01, 1.017e+02, ..., 1.514e-01, 2.837e-01,
        8.019e-02],
       ...,
       [1.152e+01, 1.493e+01, 7.387e+01, ..., 9.608e-02, 2.664e-01,
        7.809e-02],
       [1.422e+01, 2.785e+01, 9.255e+01, ..., 8.219e-02, 1.890e-01,
        7.796e-02],
       [2.073e+01, 3.112e+01, 1.357e+02, ..., 1.659e-01, 2.868e-01,
        8.218e-02]])

In [None]:
#Encrypt a test sample(first row in X_test)
encrypted_test = []

def encrypt_machine_learning(xtest,threshold):
    for element in xtest:
    
        encrypted_sample = [public_key.encrypt(x) for x in element]

        #Compute encrypted dot product
        encrypted_sum = sum(w*x for w, x in zip(weights, encrypted_sample)) + bias
        
        #decrypt the sum
        decrypted_sum = private_key.decrypt(encrypted_sum)

        #compute probability
        prob = sigmoid(decrypted_sum)

        #predicted class
        predicted_class = (1 if prob > threshold else 0)

        encrypted_test.append(predicted_class)
        print(f'decrypted probability: {prob:.4f}')

    return encrypted_test



In [None]:
he_X_test = encrypt_machine_learning(X_test, threshold=0.7)
np.

decrypted probability: 0.8360
decrypted probability: 0.0000
decrypted probability: 0.0014
decrypted probability: 0.9972
decrypted probability: 0.9994
decrypted probability: 0.0000
decrypted probability: 0.0000
decrypted probability: 0.0098
decrypted probability: 0.9976
decrypted probability: 0.9885
decrypted probability: 0.9381
decrypted probability: 0.0005
decrypted probability: 0.9904
decrypted probability: 0.2276
decrypted probability: 0.9978
decrypted probability: 0.0011
decrypted probability: 0.9976
decrypted probability: 0.9997
decrypted probability: 0.9985
decrypted probability: 0.0000
decrypted probability: 0.8637
decrypted probability: 0.9881
decrypted probability: 0.0000
decrypted probability: 0.9947
decrypted probability: 0.9885
decrypted probability: 0.9996
decrypted probability: 0.9976
decrypted probability: 0.9903
decrypted probability: 0.9935
decrypted probability: 0.0000
decrypted probability: 0.9928
decrypted probability: 0.9990
decrypted probability: 0.9781
decrypted 

In [11]:
print('Accuracy:', accuracy_score(y_test, he_X_test))

Accuracy: 0.956140350877193


In [12]:
#Evaluate the performance
accuracy = accuracy_score(y_test, he_X_test)
print('Accuracy', accuracy)

#Detailed classification report
print('classification_report: \n', classification_report(y_test, he_X_test))

#Show confusion matrix 
print('confusion_matrix: \n', confusion_matrix(y_test, he_X_test))

Accuracy 0.956140350877193
classification_report: 
               precision    recall  f1-score   support

           0       0.97      0.91      0.94        43
           1       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

confusion_matrix: 
 [[39  4]
 [ 1 70]]
