# ConcreteML - Inner Workings

This notebook shows the individual steps of Concrete ML for a logistic regression classification

## Imports

In [1]:
import numpy as np
import base64
import random

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from concrete.ml.sklearn import LogisticRegression as ConcreteLogisticRegression

## Load & Prepare Data

In [2]:
# create artifical dataset with make_classification
x, y = make_classification(n_samples=1000, n_features=5, n_informative=5, n_redundant=0, n_classes=2, random_state=1)

# train/test split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, stratify=y, random_state=1)

print(f'xtrain shape: {xtrain.shape}')
print(f'xtest shape: {xtest.shape}')

In [5]:
# random test sample index
rand_test_sample_idx = random.randint(0,xtest.shape[0])

## Concrete ML

In [6]:
# number of quantization bits can be in range 2-6
# the higher the longer the key generation and inference time
# the higher the better the precision

n_bits = 6

In [7]:
# initiate concrete's logistic regression classifier
model = ConcreteLogisticRegression(n_bits=n_bits)

# fit model
model = model.fit(xtrain, ytrain)

In [8]:
# compile the Concrete ML model
fhe_circuit = model.compile(xtrain)

In [9]:
# generate and print keys
fhe_circuit.keys.generate()
print('Keys:')
print(base64.b64encode(fhe_circuit.keys.serialize()))

# get random observation and print
input = xtest[[rand_test_sample_idx]]
print('\nPlain Input:')
print(list(input.flatten()))

# quantize input and print
quantized_input = model.quantize_input(input)
print('\nQuantized Input:')
print(list(quantized_input.flatten()))

# encrypt the quantized input and print
encrypted_input = fhe_circuit.encrypt(quantized_input)
print('\nEncrypted Input:')
print(base64.b64encode(encrypted_input.serialize()))

# make prediction and print encrypted result
encrypted_result = fhe_circuit.run(encrypted_input)
print('\nEncrypted Results:')
print(base64.b64encode(encrypted_result.serialize()))

# decrypt the prediction result and print
decrypted_result = fhe_circuit.decrypt(encrypted_result)
print('\nDecrypted Results:')
print(list(decrypted_result.flatten()))

# dequantize the decrypted result and print
y_logit = model.dequantize_output(decrypted_result)
print('\nDequantized Results:')
print(list(y_logit.flatten()))

# apply the sigmoid function to get probabilities
# in this example this happens in clear
y_proba = model.post_processing(y_logit)
print('\nClass Probabilities (sigmoid applied in clear):')
print(list(y_proba.flatten()))

# apply the argmax function to get class label
# in this example this happens in clear
y_pred = np.argmax(y_proba, axis=1)
print('\ny_pred:')
print(list(y_pred.flatten()))

# print the true label for comparison
print('\ny_true:')
print(list(ytest[[rand_test_sample_idx]].flatten()))

Keys:
b'AQAAAAAAAADAAwAAAAAAAMADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAEAAAAAAAAA