In [None]:
# Optional: setup NoTexBook theme
%load_ext notexbook

%texify

In [None]:
# Uncomment this if running on Anaconda Notebooks
# !pip install phe==1.5.0

#### Logistic Regression HE

An Example of Logistic Regression Model using **P**artially **H**omomorphic **E**ncryption (`phe`) Python Libray. 

Note: This example has been adapted from the original example on `phe` [repo](https://github.com/data61/python-paillier/blob/master/examples/logistic_regression_encrypted_model.py)

In [1]:
import time
from contextlib import contextmanager

import numpy as np
from sklearn.linear_model import LogisticRegression

import phe as paillier

In [2]:
np.random.seed(123456)  # Initialise Random Seed for reproducibility

In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [4]:
def get_winsconsin_bc_dataset():
    """
    Get the Breast Cancer Winsconsin Dataset,
    as split in Training and Test partitions
    """
    X, y = load_breast_cancer(return_X_y=True)
    y[y == 0] = -1  # so we can take the sign later :)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, stratify=y, random_state=42
    )
    return X_train, y_train, X_test, y_test

In [5]:
@contextmanager
def timer():
    """Helper for measuring runtime"""
    time0 = time.perf_counter()
    yield
    print("[elapsed time: %.2f s]" % (time.perf_counter() - time0))

**Alice**: Train a Logistic Regression Model on plain data, encrypts the model (parameters), and decrypts the scrores using PHE.

In [6]:
class Alice:
    """
    Trains a Logistic Regression model on plaintext data,
    encrypts the model for remote use,
    decrypts encrypted scores using the paillier private key.
    """

    def __init__(self):
        self.model = LogisticRegression()

    def generate_paillier_keypair(self, n_length):
        self.pubkey, self.privkey = paillier.generate_paillier_keypair(
            n_length=n_length
        )

    def fit(self, X, y):
        self.model = self.model.fit(X, y)

    def predict(self, X):
        return self.model.predict(X)

    def encrypt_weights(self):
        coef = self.model.coef_[0, :]
        encrypted_weights = [self.pubkey.encrypt(coef[i]) for i in range(coef.shape[0])]
        encrypted_intercept = self.pubkey.encrypt(self.model.intercept_[0])
        return encrypted_weights, encrypted_intercept

    def decrypt_scores(self, encrypted_scores):
        return [self.privkey.decrypt(s) for s in encrypted_scores]

**Bob**: Receives the encrypted model and the public key. 
Generate scores with the encrypted model but **cannot decrypt**.

In [7]:
class Bob:
    """
    Is given the encrypted model and the public key.
    Scores local plaintext data with the encrypted model, but cannot decrypt
    the scores without the private key held by Alice.
    """

    def __init__(self, pubkey):
        self.pubkey = pubkey

    def set_weights(self, weights, intercept):
        self.weights = weights
        self.intercept = intercept

    def encrypted_score(self, x):
        """Compute the score of `x` by multiplying with the encrypted model,
        which is a vector of `paillier.EncryptedNumber`"""
        score = self.intercept
        idx, *rest = x.nonzero()
        for i in idx:
            score += x[i] * self.weights[i]
        return score

    def encrypted_evaluate(self, X):
        return [self.encrypted_score(X[i, :]) for i in range(X.shape[0])]

In [8]:
X_train, y_train, X_test, y_test = get_winsconsin_bc_dataset()

# Feature Scaling
from sklearn.preprocessing import RobustScaler

sc = RobustScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [9]:
print("Alice: Generating paillier keypair")
alice = Alice()
# NOTE: using smaller keys sizes wouldn't be cryptographically safe
alice.generate_paillier_keypair(n_length=1024)

Alice: Generating paillier keypair


In [10]:
print("Alice: Training BC Classifier")
with timer() as t:
    alice.fit(X_train, y_train)

Alice: Training BC Classifier
[elapsed time: 0.06 s]


**Just test Model performance on Test** as Alice would have access to Bob's (test) data

In [11]:
print(
    "Classify with model in the clear -- "
    "what Alice would get having Bob's data locally"
)
with timer() as t:
    error = np.mean(alice.predict(X_test) != y_test)
print("Error {:.3f}".format(error))

Classify with model in the clear -- what Alice would get having Bob's data locally
[elapsed time: 0.00 s]
Error 0.014


Now Alice encrypts her (trained) model Parameters

In [12]:
print("Alice: Encrypting classifier")
with timer() as t:
    encrypted_weights, encrypted_intercept = alice.encrypt_weights()

Alice: Encrypting classifier
[elapsed time: 0.04 s]


In [13]:
print("Bob: Scoring with encrypted classifier")
bob = Bob(alice.pubkey)
bob.set_weights(encrypted_weights, encrypted_intercept)

Bob: Scoring with encrypted classifier


In [14]:
with timer() as t:
    encrypted_scores = bob.encrypted_evaluate(X_test)

[elapsed time: 0.36 s]


**Finally** Alice needs to _descrypt_ Bob's scores on test data

In [15]:
print("Alice: Decrypting Bob's scores")
with timer() as t:
    scores = alice.decrypt_scores(encrypted_scores)

Alice: Decrypting Bob's scores
[elapsed time: 0.05 s]


In [16]:
error = np.mean(np.sign(scores) != y_test)
print(
    "Error {:.3f} -- this is not known to Alice, who does not possess "
    "the ground truth labels".format(error)
)

Error 0.014 -- this is not known to Alice, who does not possess the ground truth labels
