## This Notebook is intended to show a method to compute precision in FHE computations

The resulting FHE vectors have been computed using the C++ program in verbose mode.

Replicate it by launching the following command:

```
./FHEBERT-tiny "Nuovo Cinema Paradiso has been an incredible movie! A gem in the italian culture." --verbose
```

In [17]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import numpy as np
import math
from matplotlib import pyplot as plt 
from datasets import load_dataset
import pandas as pd

def precision(correct, approx):
    if type(approx) == list:
        approx = np.array(approx)
    absolute = sum(abs(correct - approx))/len(correct)
    relative = absolute / (sum(abs(correct))/len(correct))
    return 1 - relative

def relative_error(correct, approx):
    relative_errors = abs(correct - approx) / max(correct)
    return sum(relative_errors)/len(relative_errors)

In [2]:
from transformers import logging
logging.set_verbosity_error() #Otherwise it will log annoying warnings

tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-tiny")
model = AutoModelForSequenceClassification.from_pretrained("prajjwal1/bert-tiny")
trained = torch.load('SST-2-BERT-tiny.bin', map_location=torch.device('cpu'))
trained.pop('bert.embeddings.position_ids', None) # Remove unexpected keys
model.load_state_dict(trained , strict=True)

model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-1): 2 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=128, out_features=128, bias=True)
              (key): Linear(in_features=128, out_features=128, bias=True)
              (value): Linear(in_features=128, out_features=128, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=128, out_features=128, bias=True)
              (LayerNorm): LayerNorm((128,), eps=1e-12, e

In [4]:
text = "Nuovo Cinema Paradiso has been an incredible movie! A gem in the italian culture."
text = "[CLS] " + text + " [SEP]"

In [6]:
#This is computed client-side

tokenized = tokenizer(text)
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens_tensor = torch.tensor([indexed_tokens])

x = model.bert.embeddings(tokens_tensor, torch.tensor([[1] * len(tokenized_text)]))

In [8]:
for i in range(len(x[0])):
    np.savetxt('../sample-inputs/0/input_{}.txt'.format(i), x[0][i].detach(), delimiter=',')

### 1) Layer 1 -- Self-Attention

In [9]:
key = model.bert.encoder.layer[0].attention.self.key.weight.clone().detach().double().transpose(0, 1)
query = model.bert.encoder.layer[0].attention.self.query.weight.clone().detach().double().transpose(0, 1)
value = model.bert.encoder.layer[0].attention.self.value.weight.clone().detach().double().transpose(0, 1)

key_bias = model.bert.encoder.layer[0].attention.self.key.bias.clone().detach().double()
query_bias = model.bert.encoder.layer[0].attention.self.query.bias.clone().detach().double()
value_bias = model.bert.encoder.layer[0].attention.self.value.bias.clone().detach().double()

original_input_tensor = x.double()

input_tensor = x.double()

q = torch.matmul(input_tensor, query) + query_bias
k = torch.matmul(input_tensor, key) + key_bias
v = torch.matmul(input_tensor, value) + value_bias

q = q.reshape([1, input_tensor.size()[1], 2, 64])
k = k.reshape([1, input_tensor.size()[1], 2, 64])
v = v.reshape([1, input_tensor.size()[1], 2, 64])

q = q.permute([0, 2, 1, 3])
k = k.permute([0, 2, 3, 1])

qk = torch.matmul(q, k)
qk = qk / 8

qk_softmaxed = torch.softmax(qk, -1)

v = v.permute([0, 2, 1, 3])

fin = torch.matmul(qk_softmaxed, v)
fin = fin.permute([0, 2, 1, 3])
fin = fin.reshape([1, input_tensor.size()[1], 128])

In [12]:
fhe_vector = np.array([ -0.3090, -0.0246,  0.7970, -0.0238,  0.1896,  0.3124,  0.0414, -0.2285, -0.7296, -0.3780,  0.2053, -0.3971, -0.3614,  0.0559, -0.6637, -0.3618, -0.8222, -0.0580,  0.6474,  0.1623, -0.2207, -0.1006, -0.1696, -0.0141, -0.2170,  0.2289,  0.3672, -0.2401, -0.2847, -0.4943, -0.1021,  0.3427,  0.2066, -0.1300,  0.1291,  0.8506, -0.6453, -0.6731, -0.1210,  0.3211,  0.0155, -0.2310,  0.6582,  0.1582,  0.1238, -0.3713,  0.5834,  0.1905, -0.1636,  0.3664, -0.2616,  0.0522,  0.5595,  0.2635, -0.7683,  0.2608,  0.5117,  0.5679,  0.0526,  0.6444,  0.5096, -0.7960,  0.0409,  0.3002,  0.0493,  0.1228,  0.8845, -0.5277,  0.8978,  0.0986,  0.1151,  0.5468,  1.2089, -0.1721,  0.8912,  0.3563,  0.5092,  0.2152,  0.1775, -0.2963, -0.5777, -0.4493,  0.2931, -0.2802,  0.6357, -0.2518, -1.1975, -0.3656, -0.6256, -0.6295,  0.2502, -0.0918,  0.9606,  0.2442,  0.8670,  0.3603,  0.6010, -0.4238,  0.2473,  0.8414,  0.8304,  0.7751,  0.2766,  0.2580, -0.7249,  0.5046, -0.3302,  0.5718, -0.5278,  1.1236,  1.7388, -0.3552, -0.3844,  1.1474, -0.4412,  0.6235, -0.5075, -0.6485, -0.1250,  0.2654,  1.2387,  0.7896, -0.5817, -0.1745, -0.0075, -0.2276,  1.3081, -0.9385 ])

precision(fin[0][0].detach(), fhe_vector)

tensor(0.9950, dtype=torch.float64)

### 2) Layer 1 -- Self-Output

In [13]:
w_output_dense = model.bert.encoder.layer[0].attention.output.dense.weight.clone().detach().double().transpose(0, 1)
b_output_dense = model.bert.encoder.layer[0].attention.output.dense.bias.clone().detach().double()

mean = np.array([-0.03383045433490704, -0.04689138747464171, -0.04320052751297194, -0.04194874763842685, -0.03849735236740709, -0.03583471496309556, -0.036673685450259945, -0.03533623114666153, -0.03301200050649906, -0.03385619903604035, -0.03394064677150061, -0.03581378040060232, -0.04000193681582013, -0.042994980738727644, -0.042689484809151766, -0.0422699887342667, -0.040702211423783496, -0.043257636922742766, -0.040924377288572664, -0.04212762593354266, -0.040090620729304687, -0.03727317047412721, -0.030603299343800818, -0.034141189654495016, -0.03468711091296442, -0.032307857857310274, -0.02926372943560165, -0.031292906450152466, -0.037837883896213766, -0.03745859562807607, -0.03794657692710982, -0.03860214509229593, -0.036185650111238955, -0.039154371235979875, -0.03589729976884486, -0.031731895884233016, -0.03465287223481833, -0.031348414682812194, -0.03688161652969029, -0.03338290816163936, -0.038240660222183975, -0.037525466450406116, -0.038229222217722264, -0.041201914113547705, -0.04212576296359885, -0.03980083151775188, -0.04072657806877826, -0.040145599490268025, -0.036685242667777444, -0.034109016054392725, -0.03544325775104831, -0.03623692053970561, -0.04948334692050963, -0.04596823422981405, -0.04892271117435003])
var = np.array([0.7495962428549272, 0.6109555428467895, 0.6225590467577651, 0.62495153067201, 0.631395549935461, 0.634492711694546, 0.644892789064359, 0.6542099965205022, 0.6595559062153842, 0.6659906881037033, 0.6680168012366937, 0.6758412527257586, 0.6668118068796066, 0.6718192460326265, 0.67786737736941, 0.6808577853930836, 0.6736657333151266, 0.6676446046843724, 0.6659979061989304, 0.6743226078654423, 0.681388263935704, 0.6837117808950258, 0.6907147768934253, 0.684537831509984, 0.6896744328697597, 0.6916627127801457, 0.6954043965468235, 0.6954046755145293, 0.7001025287354249, 0.695094327647078, 0.6854203403085795, 0.7027792682295838, 0.6956849098218769, 0.6945153573872891, 0.6856697060013522, 0.6897353511373785, 0.700668908202082, 0.6965624918742969, 0.7082690699456209, 0.7043163331126293, 0.7070770512949652, 0.7042510307314358, 0.6978925459183357, 0.7205035876616076, 0.6902461198740245, 0.686971254827903, 0.7028843270104062, 0.7032880792671149, 0.7057843340136714, 0.7104860015626775, 0.7321738164781159, 0.71095817492914, 0.7401485084476891, 0.7312957890728539, 0.7375994654874705])
    
fin2 = torch.matmul(fin, w_output_dense) + b_output_dense
fin2_backup = fin2.clone()
fin2_backup = fin2_backup + original_input_tensor

fin3_whole = []

for i in range(len(original_input_tensor.squeeze())):
    fin2 = fin2_backup.squeeze()[i]
    fin3_corr = (fin2.squeeze().detach() - mean[i]) * var[i]
    
    #fin3_corr = (fin2.squeeze().detach() - torch.mean(fin2.squeeze())) / math.sqrt(torch.var(fin2.squeeze()))
    
    w_output_layernorm = model.bert.encoder.layer[0].attention.output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[0].attention.output.LayerNorm.bias.clone().detach().double()

    fin3_corr = fin3_corr * w_output_layernorm + b_output_layernorm
    fin3_whole.append(fin3_corr.detach())

fin3_whole = torch.cat(tuple(fin3_whole), 0).unsqueeze(0)

In [16]:
fhe_vector = np.array([  0.6930, -0.5836, -11.3889, -0.0462,  0.2550, -0.4483,  0.6281,  0.0284, -0.9769,  0.3603, -0.5658,  0.8379,  0.5758,  0.2406,  0.2202,  0.5510,  1.4724,  0.4067, -0.1197,  0.5598, -0.6235,  0.2132,  4.4105,  2.0434, -0.1254, -0.6448,  0.5274, -0.9370,  0.3686,  0.8600, -0.1691,  0.8700, -6.1766,  0.2989, -0.6879, -0.0309,  0.3127, -0.6077,  0.2840, -0.8319,  0.4102,  0.3099,  1.8171, -0.8056, -0.7394,  0.1004,  0.6482, -1.0438,  0.1725,  0.4959,  1.7685, -0.3023,  0.5170, -0.2840,  0.8871,  0.5601, -0.8803,  0.1720,  1.4547,  0.4690, -0.0465, -0.0422, -0.3034, -0.7454,  0.2730, -0.4512, -0.4158,  0.8574, -0.0203,  0.1191,  1.3004,  0.4352,  0.6041, -1.5679,  0.4224, -1.7257, -0.7649,  0.5883,  1.9769, -1.6560,  0.0561, -1.2272,  0.8791, -0.4637,  1.0749, -0.2130,  0.5245, -0.0700,  0.6638,  0.0593,  0.1360,  0.1644,  0.3556,  0.9336,  0.2095,  1.7334,  0.5050,  0.2708, -0.0329,  0.1233, -0.9735,  0.9272,  0.6834,  1.0316,  0.1521,  0.0830,  1.3418,  0.7112,  1.1196, -0.8479, -0.2215,  0.1823, -0.5283,  1.2897, -0.2983, -0.2427,  0.4042, -1.4808, -0.4131, -0.9945, -0.6586,  0.2073, -0.4463,  0.1715,  0.9978, -0.8389, -0.9017, -0.3221,  ])

precision(fin3_whole[0][0].detach(), fhe_vector)

tensor(0.9968, dtype=torch.float64)

### 3) Layer 1 -- Intermediate

In [17]:
fin_4 = torch.matmul(fin3_whole, model.bert.encoder.layer[0].intermediate.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[0].intermediate.dense.bias
fin_5 = torch.nn.functional.gelu(fin_4)

In [18]:
fhe_vector = np.array([  0.2036, -0.0313,  0.7490,  0.0612, -0.0135, -0.1694,  0.0208,  1.0015,  0.1318, -0.0456, -0.1555, -0.0852, -0.1233, -0.1407, -0.0844, -0.0187, -0.1626,  0.2078, -0.0208,  0.3079,  0.0447,  0.3520,  0.1583, -0.1700,  0.7310, -0.0262, -0.1616,  0.0650, -0.1131, -0.1697, -0.0770, -0.0338, -0.0262, -0.0071, -0.0211, -0.0302,  0.1276, -0.1410,  0.4922,  0.1013, -0.1661, -0.1573, -0.1667, -0.1605, -0.1674, -0.1589,  0.1778,  0.5049,  0.3299,  0.2241, -0.1698, -0.1699, -0.1372, -0.0618,  0.2184,  0.0348, -0.1032, -0.1380,  0.2243,  1.1460, -0.1672,  0.5425,  0.0342,  0.0437, -0.1593, -0.1697, -0.1431, -0.0739,  0.4758,  0.2844, -0.1695, -0.0218, -0.1391, -0.0248,  1.0260,  0.6599,  0.9462, -0.0410, -0.1054, -0.1629,  0.1213, -0.1220, -0.1264,  0.0073, -0.0455, -0.1655,  0.6428, -0.1556, -0.0007,  0.2048, -0.1676, -0.1559,  0.0775,  0.0409,  0.2766,  0.1814,  0.6938, -0.1592, -0.1549, -0.1524,  0.1002, -0.1676,  0.0265,  0.3877,  0.1593,  0.4154,  0.5991,  1.2665,  0.1551,  0.0489,  0.0716, -0.1136, -0.1078, -0.1358, -0.0389,  0.2928, -0.0519, -0.1381, -0.0535, -0.1350,  0.1339, -0.0923, -0.0341,  0.2892, -0.1373, -0.1193, -0.0124, -0.0879 ])

precision(fin_5[0][0][:128].detach(), fhe_vector)

tensor(0.9960, dtype=torch.float64)

### 4) Layer 1 -- Output

In [27]:
mean = np.array([-0.09545516102868973, 0.034540955180462664, 0.03934738149667437, 0.040802318439555035, 0.04426037798445811, 0.04919343175846099, 0.0493616301294401, 0.047896279398118795, 0.04912640635535303, 0.048717249992826256, 0.0477219385203478, 0.05095357678578503, 0.05094908370417657, 0.0493275745992752, 0.048418324664654545, 0.0473653504669205, 0.04528009986283869, 0.04524247257539856, 0.046555073355952846, 0.0516135997743503, 0.049103903254210594, 0.048877585502238356, 0.048364988370661784, 0.049043507301742846, 0.049933470462367846, 0.05175179126331398, 0.05057227793143223, 0.055763206569478994, 0.055243365455213404, 0.04986745821758072, 0.047789218698650125, 0.047852162700887234, 0.04279460740337753, 0.04280733225675328, 0.04644169155736491, 0.04783492130826333, 0.04759649093761958, 0.045252139153821, 0.04367184005341422, 0.039034762655413016, 0.04374965234639466, 0.04355128435775863, 0.04499861862695065, 0.04318602336450084, 0.04549296197766528, 0.03907804279518851, 0.037683132925437485, 0.04109696491189214, 0.04410155617431274, 0.05015992918511731, 0.04335430986396108, 0.046492484403760526, 0.044277581701870204, 0.03723061917091777, 0.039156973130334664])
var = np.array([0.4156698594967092, 0.7008452266859936, 0.7214270983257646, 0.7095727482866087, 0.7102521835201318, 0.710293676073547, 0.7091783271698753, 0.6973493176419543, 0.7011688527520855, 0.7007704875343309, 0.6950537183089973, 0.6948029158092094, 0.6919309911197036, 0.6933694537037308, 0.6970711644923971, 0.7004276850010867, 0.6964234913676165, 0.6987678419874651, 0.6951829293138483, 0.6973048809142951, 0.6989420799277399, 0.7005696487948311, 0.6993937733493811, 0.6902070532566239, 0.6958399824203775, 0.6900361005407983, 0.6925891359742274, 0.6831642926666377, 0.6865279710039072, 0.6904370385593245, 0.6963724536275457, 0.6948942601360332, 0.6784634186071326, 0.6759657478656234, 0.6828578884489792, 0.683566347862741, 0.6857777074044566, 0.672040915409448, 0.6784995422914343, 0.6732453264186854, 0.683881765911935, 0.6909411690410042, 0.6715428435769978, 0.6775867807314924, 0.6785015863916147, 0.676156117696202, 0.6786376609996214, 0.6763771062984715, 0.7119440584663215, 0.7070342067744777, 0.6895996022331654, 0.6683970656272868, 0.6695013664908844, 0.6566575067124804, 0.672887703816164])    
    
fin_6 = torch.matmul(fin_5, model.bert.encoder.layer[0].output.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[0].output.dense.bias
fin_6 = fin_6 + fin3_whole

fin7_whole = []

for i in range(len(input_tensor.squeeze())):
    fin_7 = fin_6.squeeze()[i]
    
    fin7_corr = (fin_7.squeeze().detach() - mean[i]) * var[i]
    
    w_output_layernorm = model.bert.encoder.layer[0].output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[0].output.LayerNorm.bias.clone().detach().double()

    fin7_corr = fin7_corr * w_output_layernorm + b_output_layernorm

    fin7_whole.append(fin7_corr.detach())

fin7_whole = torch.cat(tuple(fin7_whole), 0).unsqueeze(0)

In [28]:
fhe_vector = np.array([  0.6424, -0.4183, -5.7956,  0.0586,  0.3221,  0.1392,  0.4846,  0.1377, -0.5715,  0.6327, -0.4416,  0.2155,  0.5938,  0.2331, -0.2338,  0.3281,  0.8561,  0.2883, -0.0535,  0.7715, -0.2160, -0.2260,  2.1372,  1.0728,  0.3306, -0.3512,  0.3824, -0.5885,  0.6194,  0.6495, -0.1989,  0.8904, -3.1494,  0.0803, -0.4179,  0.1636,  0.5439, -0.3502, -0.0813, -0.7531,  0.1106,  0.2494,  0.4909, -0.5352,  0.0497,  0.4297,  0.1061, -0.3740,  0.1505,  0.5614,  0.3989, -0.0217,  0.5015, -0.6834,  0.2683,  0.2792, -0.7177,  0.4818,  0.9521,  0.1921,  0.1787, -0.0114, -0.1805, -0.5684,  0.1129, -0.0196,  0.1483,  0.7506,  0.4292, -0.0407,  0.9526, -0.1415,  0.0881, -0.8449,  0.3157, -0.7813, -0.3296,  0.5706,  1.3779, -0.4998,  0.3389, -0.6015,  0.5237,  0.1444,  0.7592,  0.0488,  0.3009,  0.1576,  0.4370, -0.1002,  0.0497,  0.5814,  0.4891,  0.0543,  0.5678,  0.9361,  0.1419,  0.0758,  0.0734, -0.1712, -0.2375,  0.5297,  0.6446,  0.6805, -0.1038, -0.1348,  0.8626,  0.3060,  0.7431, -0.6814, -0.2641,  0.5244, -0.1873,  0.9844, -0.4480, -0.2002,  0.5243, -0.7223, -0.2745, -0.2728, -0.5158,  0.1901, -0.1706, -0.2167,  0.4850, -1.1066, -0.0582, -0.1517,  ])

precision(fin7_whole[0][0].detach(), fhe_vector)

tensor(0.9967, dtype=torch.float64)

### 5) Layer 2 -- Self-Attention

In [29]:
key = model.bert.encoder.layer[1].attention.self.key.weight.clone().detach().double().transpose(0, 1)
query = model.bert.encoder.layer[1].attention.self.query.weight.clone().detach().double().transpose(0, 1)
value = model.bert.encoder.layer[1].attention.self.value.weight.clone().detach().double().transpose(0, 1)

key_bias = model.bert.encoder.layer[1].attention.self.key.bias.clone().detach().double()
query_bias = model.bert.encoder.layer[1].attention.self.query.bias.clone().detach().double()
value_bias = model.bert.encoder.layer[1].attention.self.value.bias.clone().detach().double()

original_input_tensor = fin7_whole
input_tensor = fin7_whole

q = torch.matmul(input_tensor, query) + query_bias
k = torch.matmul(input_tensor, key) + key_bias
v = torch.matmul(input_tensor, value) + value_bias

q = q.reshape([1, input_tensor.size()[1], 2, 64])
k = k.reshape([1, input_tensor.size()[1], 2, 64])
v = v.reshape([1, input_tensor.size()[1], 2, 64])

q = q.permute([0, 2, 1, 3])
k = k.permute([0, 2, 3, 1])

qk = torch.matmul(q, k)
qk = qk / 8

qk_softmaxed = torch.softmax(qk, -1)

v = v.permute([0, 2, 1, 3])

fin = torch.matmul(qk_softmaxed, v)
fin = fin.permute([0, 2, 1, 3])
fin = fin.reshape([1, input_tensor.size()[1], 128])

In [30]:
fhe_vector = np.array([ -0.8123, -0.8500,  0.0295,  0.2296,  0.4401, -0.6094,  1.6168,  0.2558, -0.2224, -0.6283, -0.5895,  0.7919, -0.2594, -0.3843,  0.0067,  1.5401, -0.0503,  0.1357, -0.4071, -0.4671, -1.0653, -1.1093, -2.0851,  0.5782,  0.5840, -0.6833,  1.5346,  1.3422,  0.2175,  0.9805, -0.1275, -1.5916,  1.0102, -0.1957,  0.0962, -0.0464, -0.4231, -1.3056,  0.0510, -1.1596,  0.1894,  0.4713, -0.0684, -1.0158, -0.2589, -0.5890, -0.8593, -0.2406,  0.2359,  0.8717, -0.7101, -1.6676, -0.3206, -0.3165, -0.8318, -0.7661, -0.8755,  0.2422, -1.1412, -0.2040,  0.8289, -0.2363, -0.6205, -0.4749, -0.5698,  0.6264, -0.6598, -0.3961,  0.3553, -0.3192, -0.1223, -0.0449, -0.3661,  0.7190, -0.3748,  0.1306,  0.9412, -1.5460,  0.8761, -0.1402,  1.2423,  0.7885, -0.3937, -0.0085, -1.3537,  0.0370, -1.2522,  1.0030,  2.0746, -0.7593, -0.2284, -0.3362,  0.6514, -0.0331, -0.1410,  1.6767, -0.2301,  1.1221, -0.6067,  0.6165, -0.6068, -1.2288, -0.9807,  0.4249, -1.3200, -0.2358, -0.9543,  0.7164,  0.9259,  0.6031,  0.3302, -0.2839, -0.2300, -0.8882, -0.6937, -0.4157,  0.5060, -0.0074, -0.8772, -0.7689,  0.4577, -1.0608, -0.1394, -1.5404,  1.5020,  0.1260, -0.5764, -0.5608 ])

precision(fin[0][0].detach(), fhe_vector)

tensor(0.8062, dtype=torch.float64)

### 6) Layer 2 -- Self-Output

In [31]:
mean = np.array([0.04805131047475803, 0.014145706172069285, 0.010630181813540026, 0.010521146572975027, 0.00956244983947186, 0.008211288558782809, 0.008817800275674387, 0.008911457532306733, 0.008643898058317862, 0.008801769546523253, 0.009472254700839258, 0.008094415948174241, 0.007702615754430344, 0.005460620353838359, 0.007021847370084451, 0.008373831982472147, 0.01022061224155272, 0.00927594903773269, 0.009277225000069925, 0.007049453120897054, 0.008682554190420182, 0.008749022040809715, 0.010118317324741522, 0.008998865743435887, 0.008763833543884292, 0.008285728555981435, 0.006967351876718886, 0.00588068616144895, 0.0030701809065725363, 0.003659716972971551, 0.002116778487431024, 0.003947434346765913, 0.006907859825079262, 0.008494112860837831, 0.007040283968419036, 0.007197681884381672, 0.008232685835987293, 0.009965029801574864, 0.00731962961637719, 0.00830555309310382, 0.005340440177451385, 0.007833324368720607, 0.01047456825511633, 0.009674864773662995, 0.010093537461664302, 0.01588798917017868, 0.018537933333636507, 0.018245848282989877, 0.012253993810893607, 0.011354133953173591, 0.013474744814287221, 0.013707011955501919, 0.007918842609048385, 0.017240907760895086, 0.03465881962238184])
var = np.array([0.6741653046411179, 0.602392389437227, 0.5945841451997256, 0.5997135932136959, 0.6033806506910513, 0.6064839949503851, 0.6058735285405447, 0.6059001754921257, 0.6086086189801689, 0.6118981975241923, 0.6161533101614306, 0.6105411757987637, 0.6102443339235957, 0.6004337682468068, 0.6068584434133084, 0.6123178593290803, 0.6150302868629213, 0.6102744641580546, 0.6143169356654037, 0.6105845722771672, 0.61540315154488, 0.622109065598561, 0.6221720668578823, 0.6279330579960701, 0.6282907135959079, 0.6258439179151315, 0.6187239026398644, 0.618294817104495, 0.609488586748927, 0.6085185174201381, 0.6154275326252285, 0.6207534846328591, 0.6290521066315713, 0.6375810334496135, 0.6238236165346044, 0.6310571465398529, 0.6350551779511981, 0.6452639043477173, 0.6346915398812409, 0.646622546259538, 0.6435498445423712, 0.6401589932559348, 0.6458833892517316, 0.6354378204804867, 0.651796667347259, 0.6547600574517144, 0.6554038815336571, 0.655910889886979, 0.6412602949793637, 0.6489736968517984, 0.6633309254993116, 0.6771441398382873, 0.6423362709438692, 0.6302863730404997, 0.5940213893371686])

w_output_dense = model.bert.encoder.layer[1].attention.output.dense.weight.clone().detach().double().transpose(0, 1)
b_output_dense = model.bert.encoder.layer[1].attention.output.dense.bias.clone().detach().double()

fin2 = torch.matmul(fin, w_output_dense) + b_output_dense
fin2_backup = fin2.clone()
fin2_backup = fin2_backup + original_input_tensor

fin3_whole = []

for i in range(len(original_input_tensor.squeeze())):
    fin2 = fin2_backup.squeeze()[i]

    fin3_corr = (fin2.squeeze().detach() - mean[i]) * var[i]

    w_output_layernorm = model.bert.encoder.layer[1].attention.output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[1].attention.output.LayerNorm.bias.clone().detach().double()

    fin3_corr = fin3_corr * w_output_layernorm + b_output_layernorm
    fin3_whole.append(fin3_corr.detach())

fin3_whole = torch.cat(tuple(fin3_whole), 0).unsqueeze(0)

In [32]:
fhe_vector = np.array([  1.3770, -1.8186, -1.6910,  0.6093, -0.1824,  0.1786,  1.6645,  0.7341, -0.6092,  0.7240,  1.0244, -0.7155, -0.0153,  0.1687, -0.1843, -0.0103,  1.8486, -0.8672, -1.6907,  0.5330, -0.2316,  1.0860,  3.3427,  1.8338, -0.4012, -0.4893,  0.4482, -1.6318,  0.7493,  0.5131, -1.1009,  1.2824, -3.2195,  0.6660, -0.3238, -0.4962,  0.3410, -1.0572, -1.1014,  0.1388, -1.7925,  0.8096, -2.0355, -0.9068,  1.1941, -1.8014,  0.0378, -0.2286,  1.4185,  0.5991,  1.5236,  0.1015,  1.5935, -1.3028,  1.0833,  0.0207, -2.5202,  0.4889,  1.9203,  0.3599,  1.5069, -0.5983, -0.9472, -1.4128,  0.0251, -0.8160, -1.4836,  0.9483,  0.5418,  0.0704,  2.0288,  0.7253,  0.7689,  0.0401,  0.4672, -0.9288, -0.4404,  0.5059,  1.1886,  1.2352, -0.6807, -0.8505, -0.8434, -0.8269,  0.4738, -0.1371, -0.7369, -1.1949,  1.9052, -0.0479,  0.1652,  1.2224,  0.0298,  1.5454,  0.6461,  1.4474, -0.2515,  0.0815,  1.0245,  0.3735, -0.4966,  0.7358,  1.1659, -0.0261, -0.8297, -0.9907,  0.1873, -0.1336,  2.1544, -1.1358, -0.3534, -0.7904,  0.3181,  2.8254,  0.1058,  0.4013, -0.1092, -1.3576, -1.6598, -1.1263,  1.2364,  1.3081,  0.7460,  1.7961, -0.8862, -1.6055, -2.5590, -1.2520,  ])

precision(fin3_whole[0][0].detach(), fhe_vector)

tensor(0.8476, dtype=torch.float64)

### 7) Layer 2 -- Intermediate

In [34]:
fin_4 = torch.matmul(fin3_whole, model.bert.encoder.layer[1].intermediate.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[1].intermediate.dense.bias   
fin_5 = torch.nn.functional.gelu(fin_4)    

In [35]:
fhe_vector = np.array([ -0.0683, -0.0562,  1.3694, -0.1403, -0.0703, -0.1537, -0.1658,  0.4683, -0.0210, -0.0005, -0.1563, -0.1662, -0.0726, -0.0936, -0.1059, -0.1640, -0.0935, -0.1621, -0.1689, -0.0099, -0.1663, -0.0288, -0.0431, -0.1237, -0.1699,  1.3531, -0.0271, -0.1230, -0.0315, -0.1606, -0.1465, -0.1607, -0.1111, -0.1253, -0.1450,  0.0236,  1.3971, -0.1058, -0.1201,  0.6417, -0.1595, -0.1340,  0.3707, -0.0008, -0.1128,  0.5451, -0.0293, -0.0071,  0.0274,  0.0609, -0.1688,  0.0656,  2.1240, -0.1667, -0.0440,  0.1143,  1.0968, -0.0513,  0.0032, -0.1634, -0.1636, -0.0114,  0.3805, -0.0070, -0.1653, -0.0769, -0.1158,  0.8741,  0.2711,  0.5258,  0.7994, -0.1006,  0.0409,  0.1818,  0.7667, -0.0002, -0.0010, -0.0818,  1.7126, -0.0568, -0.1693,  0.0353, -0.1208,  0.4403, -0.0188, -0.1565, -0.1689, -0.0890, -0.1334,  0.1987, -0.0233,  1.3172, -0.1609,  0.0282, -0.1696, -0.0133, -0.0055, -0.1478, -0.1054, -0.1695,  0.0069, -0.1269,  3.3577, -0.1573, -0.0397,  0.1693,  0.9749,  0.9535,  0.9454,  0.0970, -0.1689,  2.1287,  0.2266,  1.7717, -0.1216, -0.1288, -0.0847, -0.1600, -0.0314,  0.6490, -0.0465, -0.1664, -0.0266, -0.1589, -0.0447, -0.1676, -0.0243, -0.1359 ])
precision(fin_5[0][0][:128].detach(), fhe_vector)

tensor(0.8045, dtype=torch.float64)

### 8) Layer 2 -- Output

In [36]:
fin_6 = torch.matmul(fin_5, model.bert.encoder.layer[1].output.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[1].output.dense.bias
fin_6 = fin_6 + fin3_whole

fin7_whole = []

mean = np.array([0.06643368, 0.05726708, 0.05311476, 0.05229822, 0.05352628, 0.05238868, 0.0536801 , 0.05327334, 0.05206954, 0.05110339, 0.051747  , 0.05016997, 0.04943122, 0.04937956, 0.04952862, 0.04973959, 0.04852742, 0.04696055, 0.04846476, 0.04925392,0.0509005 , 0.05373027, 0.05371865, 0.05446217, 0.05222489,0.05142676, 0.05080909, 0.05179351, 0.05049174, 0.04965748,0.05138143, 0.0499965 , 0.05194982, 0.05178364, 0.0521023 ,0.05059624, 0.05445499, 0.05507825, 0.05241935, 0.05073552,0.05200171, 0.04858642, 0.04419684, 0.04642237, 0.05115073,0.05028116, 0.05021724, 0.05312114, 0.0524375 , 0.04643478,0.05026358, 0.04248708, 0.04675281, 0.03895142, 0.04558007])
var = np.array([0.81992316, 0.78486345, 0.79259   , 0.79754392, 0.79350872, 0.79652433, 0.79935746, 0.79867687, 0.80257863, 0.80235328,0.80521209, 0.80621272, 0.80330435, 0.80469855, 0.81171202,0.81136354, 0.80977166, 0.8089956 , 0.8106946 , 0.80862825,0.81450049, 0.81722176, 0.82121488, 0.82012788, 0.8254015 ,0.82097106, 0.81742119, 0.82090554, 0.82116105, 0.82017896,0.82234659, 0.82832269, 0.82888739, 0.81852014, 0.82054523,0.8224114 , 0.82913892, 0.8289046 , 0.81985612, 0.83341215,0.82896934, 0.82315006, 0.82802216, 0.81886278, 0.8274004 ,0.83436616, 0.82014282, 0.82628005, 0.83230868, 0.84511334,0.85141143, 0.84934269, 0.83041272, 0.826798  , 0.83660989])

for i in range(len(input_tensor.squeeze())):
    fin_7 = fin_6.squeeze()[i]

    fin7_corr = (fin_7.squeeze().detach() - mean[i]) * var[i]

    w_output_layernorm = model.bert.encoder.layer[1].output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[1].output.LayerNorm.bias.clone().detach().double()

    fin7_corr = fin7_corr * w_output_layernorm + b_output_layernorm

    fin7_whole.append(fin7_corr.detach())

fin7_whole = torch.cat(tuple(fin7_whole), 0).unsqueeze(0)

In [37]:
fhe_vector = np.array([  1.7862, -2.0674, -0.2598, -0.2733, -0.6657,  0.9152,  0.6312,  1.0929,  0.3524,  0.7416,  1.2811,  0.2206, -0.8468, -0.5167, -0.1659,  0.6020,  1.3285, -0.9925, -1.9419,  0.2647,  0.2765,  2.3105,  2.8253,  0.5482, -1.4334, -0.3802, -0.3619, -1.4140,  0.5294,  0.6072, -1.8004,  0.5581, -2.8421,  0.2503,  0.4287, -0.3454,  0.2077, -1.0119, -1.5609, -0.6429, -1.4182,  0.1802, -1.7165, -0.2336,  0.9720, -1.6756, -0.1018,  0.1046,  1.0860,  0.9126,  0.9143,  1.2146,  0.9086, -0.7014,  0.1545,  0.2080, -3.3099, -0.0128,  2.6352,  1.0011,  0.7249, -1.0812, -1.1600, -1.2047, -0.4703, -0.2173, -1.3199,  2.1158,  0.7578, -0.3307,  0.9533,  0.5950, -0.1573,  0.8234,  0.7160,  0.6736,  0.2819,  0.8938, -0.2418,  1.3240, -2.0877, -1.4034, -1.4676,  0.0390,  1.2058, -0.2615, -1.5343, -2.3307,  1.4072,  0.2342, -0.0834,  1.4614, -0.2938,  0.3641,  1.0156, -0.0286, -0.7436, -1.0538,  1.2407, -0.2570, -0.8378, -0.0219,  1.4528, -0.0432, -0.9515, -1.8858, -0.4678, -1.1132,  1.9448, -0.1209,  0.0245, -1.3956, -0.0740,  2.0146,  0.8191,  1.0593,  0.4562, -0.5237, -2.1609, -0.7794,  1.5314,  0.5960,  1.6027,  0.2354, -0.7131, -0.2370, -2.6296, -2.5624,  ])

precision(fin7_whole[0][0].detach(), fhe_vector)

tensor(0.8662, dtype=torch.float64)

### 9) Pooler

In [39]:
pooler_output = torch.tanh(torch.matmul(fin7_whole.double(), model.bert.pooler.dense.weight.transpose(0, 1).double()) + model.bert.pooler.dense.bias)

fhe_vector = np.array([ -0.9927,  0.8013,  0.9060,  0.9605,  0.8593,  0.9571,  0.9932, -0.9621, -0.7717, -0.7638,  0.9065, -0.7645,  0.7586, -1.0000, -0.7322,  0.8411, -0.9783, -0.8740, -0.9485, -0.9935, -0.7144, -0.7350, -0.9765, -0.9997, -0.9999,  0.8470,  0.7017,  0.9998,  0.9908,  0.8513,  0.8815,  0.8093, -0.6729, -1.0000,  0.9950,  0.9232, -0.1012,  0.8349,  0.9545, -0.9780,  0.9877, -0.8211,  0.4969,  0.5953,  1.0000, -0.8886,  0.9633,  0.9491,  0.8541,  0.9537,  0.2123,  0.3760,  0.8408, -0.9647, -0.9615, -1.0000,  1.0000,  0.9498,  0.9999,  0.8861, -0.7808,  0.7819, -0.8337,  0.9718,  0.4564,  0.9990,  0.9810, -0.8391, -0.7649,  0.9975, -0.9025, -0.8443,  0.9654,  0.8712,  0.8375, -0.9997,  0.7395, -0.8623,  0.7199,  0.8548, -0.9981,  0.8115, -0.9822, -0.9967, -0.6843,  0.9065, -0.9786, -0.9991, -0.9831, -0.2063,  1.0001,  0.5352,  0.9573,  0.2655,  0.9964,  0.9887,  0.9934, -0.9821, -0.9671, -0.9841,  0.9170,  0.9644,  0.8206, -0.4082,  0.5954, -0.8314, -0.9763, -0.0547,  0.9726,  0.9915, -0.9727,  0.9921,  0.8717, -0.9732,  0.9999,  0.8561,  0.5407, -0.8877, -0.7994, -0.9976,  0.8514,  0.9111, -0.9988,  0.9380,  0.9013,  0.9992,  0.9997, -0.8654 ])

precision(pooler_output[0][0].detach(), fhe_vector)

tensor(0.9655, dtype=torch.float64)

### 10) Classifier

In [45]:
classification = torch.matmul(pooler_output, model.classifier.weight.transpose(0, 1).double()) + model.classifier.bias.double()

fhe_vector = np.array([ -2.5649,  2.4694 ])

precision(classification[0][0].detach(), fhe_vector)

tensor(0.9855, dtype=torch.float64)

In [48]:
print("Plain circuit output: {}\nFHE circuit output: {}".format(classification[0][0].detach().numpy(), fhe_vector))

Plain circuit output: [-2.61671689  2.49186753]
FHE circuit output: [-2.5649  2.4694]


---

## Extra

Some precisions and errors of some sentences. The two vectors were obtained by launching ./NewBERT in verbose mode

In [18]:
#Sentence: "This movie is just empty rethoric and a total aesthetic no sense"
precision(np.array([ 2.09596, -1.79233]), np.array([  2.1071, -1.7938 ]))

0.9967569291385159

In [19]:
#Sentence: "This movie is just empty rethoric and a total aesthetic no sense"
relative_error(np.array([ 2.09596, -1.79233]), np.array([  2.1071, -1.7938 ]))

0.003008168094811025

---

In [20]:
#Sentence: "Sunshine streamed through the window, brightening my spirits instantly"
precision(np.array([-1.23462,  1.41781]), np.array([ -1.0919,  1.2679 ]))

0.889674751077314

In [21]:
#Sentence: "Sunshine streamed through the window, brightening my spirits instantly"
relative_error(np.array([-1.23462,  1.41781]), np.array([ -1.0919,  1.2679 ]))

0.10319788970313368

---

In [22]:
#Sentence: "Hope blossomed anew in my heart, casting away shadows of doubt."
precision(np.array([-1.73595,  1.80724]), np.array([ -1.8774,  1.9805 ]))

0.9111789094008507

In [24]:
#Sentence: "Hope blossomed anew in my heart, casting away shadows of doubt."
relative_error(np.array([-1.73595,  1.80724]), np.array([ -1.8774,  1.9805 ]))

0.08706923264203975

---

In [25]:
#Sentence: "Frustration boiled within me as obstacles seemed insurmountable, disappointment lingering like a heavy fog, obscuring any glimmer of optimism."
#255 secondi - 20 words
precision(np.array( [ 1.94430, -1.53529]), np.array([  2.2199, -1.8629 ]))

0.8266433689026581

In [26]:
#Sentence: "Frustration boiled within me as obstacles seemed insurmountable, disappointment lingering like a heavy fog, obscuring any glimmer of optimism."
#255 secondi - 20 words
relative_error(np.array( [ 1.94430, -1.53529]), np.array([  2.2199, -1.8629 ]))

0.15512266625520754

---

In [27]:
#Sentence: "Sadness enveloped me in its suffocating embrace, draining all energy and hope, while fear gripped me tightly, exacerbating the sense of despair."
#265 secondi - 20 words
precision(np.array( [-1.20308,  1.29482]), np.array( [ -1.5387,  1.5612 ]))

0.758997557948677

In [28]:
#Sentence: "Sadness enveloped me in its suffocating embrace, draining all energy and hope, while fear gripped me tightly, exacerbating the sense of despair."
#265 secondi - 20 words
relative_error(np.array( [-1.20308,  1.29482]), np.array( [ -1.5387,  1.5612 ]))

0.23246474413431975

---

In [29]:
#Sentence: "in the end , we are left with something like two ships passing in the night rather than any insights into gay love , chinese society or the price one pays for being dishonest . 
#X secondi - 40 tokens
precision(np.array([ 1.94430, -1.53529]), np.array([  2.2194, -1.8624 ]))

0.8269307590836852

In [30]:
#Sentence: "in the end , we are left with something like two ships passing in the night rather than any insights into gay love , chinese society or the price one pays for being dishonest . 
#X secondi - 40 tokens
relative_error(np.array([ 1.94433, -1.5352]), np.array([  2.2194, -1.8624 ]))

0.15487854428003478

---