## This Notebook is intended to show a method to compute precision in FHE computations

The resulting FHE vectors have been computed using the C++ program in verbose mode.

Replicate it by launching the following command:

```
./FHEBERT-tiny "Nuovo Cinema Paradiso has been an incredible movie! A gem in the italian culture." --verbose
```

In [14]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import numpy as np
import math
from matplotlib import pyplot as plt 
from datasets import load_dataset
import pandas as pd

"""def precision(correct, approx):
    if type(approx) == list:
        approx = np.array(approx)
    absolute = sum(abs(correct - approx))/len(correct)
    relative = absolute / (sum(abs(correct))/len(correct))
    return 1 - relative"""

def precision(correct, approx):
    if type(approx) == list:
        approx = np.array(approx)
    #if type(correct) == torch.Tensor:
    #    correct = correct.cpu().numpy()
    absolute = sum(abs(correct - approx))/len(correct)
    relative = absolute / (sum(abs(correct))/len(correct))
    return 1 - relative

def relative_error(correct, approx):
    relative_errors = abs(correct - approx) / max(correct)
    return sum(relative_errors)/len(relative_errors)

In [15]:
from transformers import logging
logging.set_verbosity_error() #Otherwise it will log annoying warnings

tokenizer = AutoTokenizer.from_pretrained("gokuls/BERT-tiny-emotion-intent")
model = AutoModelForSequenceClassification.from_pretrained("gokuls/BERT-tiny-emotion-intent")

model.eval()



BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-1): 2 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=128, out_features=128, bias=True)
              (key): Linear(in_features=128, out_features=128, bias=True)
              (value): Linear(in_features=128, out_features=128, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=128, out_features=128, bias=True)
              (LayerNorm): LayerNorm((128,), eps=1e-1

In [16]:
text = "i felt anger when at the end of a telephone call"
text = "[CLS] " + text + " [SEP]"

In [17]:
#This is computed client-side

tokenized = tokenizer(text)
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens_tensor = torch.tensor([indexed_tokens])

x = model.bert.embeddings(tokens_tensor, torch.tensor([[1] * len(tokenized_text)]))

In [18]:
for i in range(len(x[0])):
    np.savetxt('../sample-inputs/0/input_{}.txt'.format(i), x[0][i].detach(), delimiter=',')

FileNotFoundError: [Errno 2] No such file or directory: '../sample-inputs/0/input_0.txt'

### 1) Layer 1 -- Self-Attention

In [None]:
key = model.bert.encoder.layer[0].attention.self.key.weight.clone().detach().double().transpose(0, 1)
query = model.bert.encoder.layer[0].attention.self.query.weight.clone().detach().double().transpose(0, 1)
value = model.bert.encoder.layer[0].attention.self.value.weight.clone().detach().double().transpose(0, 1)

key_bias = model.bert.encoder.layer[0].attention.self.key.bias.clone().detach().double()
query_bias = model.bert.encoder.layer[0].attention.self.query.bias.clone().detach().double()
value_bias = model.bert.encoder.layer[0].attention.self.value.bias.clone().detach().double()

original_input_tensor = x.double()

input_tensor = x.double()

q = torch.matmul(input_tensor, query) + query_bias
k = torch.matmul(input_tensor, key) + key_bias
v = torch.matmul(input_tensor, value) + value_bias

q = q.reshape([1, input_tensor.size()[1], 2, 64])
k = k.reshape([1, input_tensor.size()[1], 2, 64])
v = v.reshape([1, input_tensor.size()[1], 2, 64])

q = q.permute([0, 2, 1, 3])
k = k.permute([0, 2, 3, 1])

qk = torch.matmul(q, k)
qk = qk / 8

qk_softmaxed = torch.softmax(qk, -1)

v = v.permute([0, 2, 1, 3])

fin = torch.matmul(qk_softmaxed, v)
fin = fin.permute([0, 2, 1, 3])
fin = fin.reshape([1, input_tensor.size()[1], 128])

In [None]:
fhe_vector = np.array(

     [ -0.5935,  0.3001,  0.4497,  0.1863, -0.2018, -0.1721,  0.0250, -0.1890, -1.0796, -0.0060, -0.4688, -0.5391,  0.1450, -0.0287, -0.6920, -0.4591, -0.1239, -0.8362,  0.7285,  0.3737,  0.3568, -0.8378, -0.2016,  0.6911, -0.2741, -0.2680,  0.8550, -0.8486,  0.5371, -0.5620, -0.8053,  0.9087, -0.3066, -0.8876, -0.5665,  0.4161, -0.4047, -0.3765,  0.2435,  0.5308, -0.1741, -0.9727,  0.6449,  0.1752,  0.1384, -0.9170,  1.2273, -0.0590, -0.0822,  0.5915, -0.0051, -0.4627,  0.0796,  0.5783, -0.9413, -0.0595,  0.6105,  0.6918, -0.6532,  0.7175, -0.1555, -0.4823,  0.6042,  0.7576,  0.1762,  1.3432, -0.0884, -1.0011,  0.5167, -0.5999,  0.4000, -1.0968,  1.0935,  1.1478, -0.3586,  0.6682, -0.2993,  0.4807,  1.2827, -1.4677,  0.9413,  0.7757,  0.3210, -1.1940,  0.0152, -1.7562, -0.3805, -2.1975,  0.0580, -1.0865, -0.9083, -0.6207, -0.2115, -0.4391,  1.0169,  0.5326,  0.3592,  0.5113, -0.0752,  0.8449, -0.2961, -0.1065, -1.1455, -0.3673,  0.1517, -0.2898,  0.0715, -0.4683,  0.1280, -0.6740,  0.7972, -0.5265, -0.4628, -0.6778,  0.7309, -0.2363,  0.8243, -0.7288,  1.0269, -0.4441,  0.0688,  0.0794, -0.2260, -1.9900,  0.2719, -0.6153,  0.4409, -1.1387 ]

)

precision(fin[0][0].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.9997, dtype=torch.float64)

Great!

### 2) Layer 1 -- Self-Output

In [None]:
w_output_dense = model.bert.encoder.layer[0].attention.output.dense.weight.clone().detach().double().transpose(0, 1)
b_output_dense = model.bert.encoder.layer[0].attention.output.dense.bias.clone().detach().double()

#mean = np.array([-0.03383045433490704, -0.04689138747464171, -0.04320052751297194, -0.04194874763842685, -0.03849735236740709, -0.03583471496309556, -0.036673685450259945, -0.03533623114666153, -0.03301200050649906, -0.03385619903604035, -0.03394064677150061, -0.03581378040060232, -0.04000193681582013, -0.042994980738727644, -0.042689484809151766, -0.0422699887342667, -0.040702211423783496, -0.043257636922742766, -0.040924377288572664, -0.04212762593354266, -0.040090620729304687, -0.03727317047412721, -0.030603299343800818, -0.034141189654495016, -0.03468711091296442, -0.032307857857310274, -0.02926372943560165, -0.031292906450152466, -0.037837883896213766, -0.03745859562807607, -0.03794657692710982, -0.03860214509229593, -0.036185650111238955, -0.039154371235979875, -0.03589729976884486, -0.031731895884233016, -0.03465287223481833, -0.031348414682812194, -0.03688161652969029, -0.03338290816163936, -0.038240660222183975, -0.037525466450406116, -0.038229222217722264, -0.041201914113547705, -0.04212576296359885, -0.03980083151775188, -0.04072657806877826, -0.040145599490268025, -0.036685242667777444, -0.034109016054392725, -0.03544325775104831, -0.03623692053970561, -0.04948334692050963, -0.04596823422981405, -0.04892271117435003])
#var = np.array([0.7495962428549272, 0.6109555428467895, 0.6225590467577651, 0.62495153067201, 0.631395549935461, 0.634492711694546, 0.644892789064359, 0.6542099965205022, 0.6595559062153842, 0.6659906881037033, 0.6680168012366937, 0.6758412527257586, 0.6668118068796066, 0.6718192460326265, 0.67786737736941, 0.6808577853930836, 0.6736657333151266, 0.6676446046843724, 0.6659979061989304, 0.6743226078654423, 0.681388263935704, 0.6837117808950258, 0.6907147768934253, 0.684537831509984, 0.6896744328697597, 0.6916627127801457, 0.6954043965468235, 0.6954046755145293, 0.7001025287354249, 0.695094327647078, 0.6854203403085795, 0.7027792682295838, 0.6956849098218769, 0.6945153573872891, 0.6856697060013522, 0.6897353511373785, 0.700668908202082, 0.6965624918742969, 0.7082690699456209, 0.7043163331126293, 0.7070770512949652, 0.7042510307314358, 0.6978925459183357, 0.7205035876616076, 0.6902461198740245, 0.686971254827903, 0.7028843270104062, 0.7032880792671149, 0.7057843340136714, 0.7104860015626775, 0.7321738164781159, 0.71095817492914, 0.7401485084476891, 0.7312957890728539, 0.7375994654874705])
mean = np.loadtxt(f"../emotion-precompute/layer0_selfoutput_mean.txt", delimiter=',')
var = np.loadtxt(f"../emotion-precompute/layer0_selfoutput_var.txt", delimiter=',')
    
fin2 = torch.matmul(fin, w_output_dense) + b_output_dense
fin2_backup = fin2.clone()
fin2_backup = fin2_backup + original_input_tensor

fin3_whole = []

for i in range(len(original_input_tensor.squeeze())):
    fin2 = fin2_backup.squeeze()[i]
    fin3_corr = (fin2.squeeze().detach() - mean[i]) * var[i]
    #fin3_corr = (fin2.squeeze().detach() - mean[i])

    #fin3_corr = (fin2.squeeze().detach() - torch.mean(fin2.squeeze())) / math.sqrt(torch.var(fin2.squeeze()))
    
    w_output_layernorm = model.bert.encoder.layer[0].attention.output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[0].attention.output.LayerNorm.bias.clone().detach().double()

    #fin3_corr = fin3_corr * vy_0_0 + normbias_0_0
    fin3_corr = fin3_corr * w_output_layernorm + b_output_layernorm
    fin3_whole.append(fin3_corr.detach())

fin3_whole = torch.cat(tuple(fin3_whole), 0).unsqueeze(0)
#fin3_whole = torch.stack(tuple(fin3_whole), 0).unsqueeze(0)

In [None]:
fhe_vector = np.array(

     [  0.0482, -0.1075, -11.2613, -0.4110,  0.4302,  0.6497,  0.1301,  1.3278,  0.1579,  1.1311, -0.8848,  0.3747,  0.4718,  0.1210, -1.1492,  0.9855, -0.7290,  1.4671, -0.3598,  0.4963, -0.6950, -0.6814,  2.3943,  1.4768, -0.4297, -0.3462,  0.1880,  0.8064, -0.2661, -0.9504, -0.8116,  0.3943, -3.6442, -0.2285, -0.6789,  0.1951,  1.1093, -1.5599,  0.3479,  0.0791,  1.7797,  0.0992,  2.6998,  0.4910,  0.1479,  0.5430,  1.9698,  0.2414, -0.4515,  0.9966,  0.2433,  0.7121, -1.6903,  0.9698, -0.1424,  0.2204, -0.2862,  1.2299,  1.1952,  1.3073,  0.9330, -0.6140, -1.8105,  1.5041,  0.7163,  0.5801,  0.1258, -0.3920,  0.6397, -0.6413,  0.1302,  0.8207, -0.0230, -3.2885, -0.2270, -0.1593,  0.4109, -0.5154,  1.3843, -1.5219, -0.3101, -0.7277,  0.9753,  1.1526,  0.1468,  1.2595,  0.8203, -1.3128,  0.2633, -0.6659,  0.5354,  0.0253, -0.0716, -0.3263,  1.2387,  0.5496,  1.2123,  0.5951, -0.0991, -0.1983, -0.8707,  0.1587, -0.3325,  0.3296, -0.1410,  0.8270, -0.4210,  0.0649,  0.4337, -0.4403, -0.1882,  0.8972, -0.5520, -0.0156,  0.5180, -0.2747,  0.5136, -0.6566,  0.3882,  0.3571, -1.7037,  0.2001,  0.7856,  0.3999,  1.4019, -0.9091, -1.4313, -0.3592,  ]
     
)
precision(fin3_whole[0][0].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.8939, dtype=torch.float64)

### 3) Layer 1 -- Intermediate

In [None]:
fin_4 = torch.matmul(fin3_whole, model.bert.encoder.layer[0].intermediate.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[0].intermediate.dense.bias
fin_5 = torch.nn.functional.gelu(fin_4)

In [26]:
fhe_vector = np.array(
    
    [ -0.1268,  0.5738,  1.2875, -0.0935, -0.0194,  0.0611,  0.0165,  0.9638, -0.1469, -0.1195, -0.1417, -0.0212,  0.2971, -0.1298, -0.1526,  0.0029, -0.1748,  0.1770, -0.0869, -0.1305, -0.1005,  0.5176,  0.2457, -0.1674,  1.0318, -0.1479,  0.3479,  0.1266, -0.1568,  0.1565, -0.0942, -0.0277,  0.0808, -0.0366,  0.1928, -0.0202, -0.0539, -0.1342,  0.8762,  0.1116, -0.1755, -0.1504, -0.1750, -0.1597, -0.1527, -0.1330,  0.2825,  0.1499,  0.4071, -0.0555,  0.2550, -0.1669, -0.1110,  0.8471, -0.1029, -0.1376, -0.1370, -0.1745,  0.5725,  0.4268,  0.1072,  0.3939, -0.0193, -0.1406, -0.1543, -0.1620, -0.1121,  0.2423,  0.5701,  0.4421, -0.1487, -0.1474, -0.1599, -0.0415, -0.0387,  0.1406,  0.8771, -0.1611, -0.1108, -0.1744,  0.9426, -0.1760, -0.1738,  0.2903, -0.0144, -0.1693, -0.1683, -0.1444, -0.0168,  0.1850, -0.1460, -0.0825,  0.8256, -0.1489, -0.0661,  0.1548,  0.1536, -0.0231, -0.1635, -0.0233,  0.0905, -0.0781,  0.5147, -0.1052, -0.0203,  0.4085, -0.1124,  1.2346,  0.1943,  0.6653,  0.0976,  0.4432, -0.1256, -0.1745,  0.1763,  1.4561, -0.1712, -0.1263, -0.1395, -0.0998, -0.1059,  0.5487, -0.1464,  0.4725,  0.0598, -0.1532,  0.0991,  0.3776 ]
    
)

precision(fin_5[0][0][:128].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.8022, dtype=torch.float64)

Degree 20
13.7 - 0.6854

Degree 25
13.7 - 0.6391

Degree 30
13.7 - 0.6965/0.6968 [CONFIRMED]

Degree 35
13.7 - 0.6934

Degree 45
14.5 - 0.649
13.5 - 0.6958
13.2 - 0.6955
13.0 - unknown

Degree 100
13.7 - 0.6954

Degree 119
15.0 -
14.7 -
14.5 -
14.3 - 0.6956
13.7 - 0.696
13.6 - 0.6954
13.5 - 0.696
13.2 - 0.6956
13.0 -

Degree 200
13.9 - 0.6957
13.7 - 0.6961
13.5 - 0.6958

Degree 300
13.7 - 0.6958

### 4) Layer 1 -- Output

In [None]:
test_output_w_1 = np.loadtxt("../weights-emotion/layer0_output_weight1.txt", delimiter=',')
test_output_w_2 = np.loadtxt("../weights-emotion/layer0_output_weight2.txt", delimiter=',')
test_output_w_3 = np.loadtxt("../weights-emotion/layer0_output_weight3.txt", delimiter=',')
test_output_w_4 = np.loadtxt("../weights-emotion/layer0_output_weight4.txt", delimiter=',')

In [23]:
#mean = np.array([-0.09545516102868973, 0.034540955180462664, 0.03934738149667437, 0.040802318439555035, 0.04426037798445811, 0.04919343175846099, 0.0493616301294401, 0.047896279398118795, 0.04912640635535303, 0.048717249992826256, 0.0477219385203478, 0.05095357678578503, 0.05094908370417657, 0.0493275745992752, 0.048418324664654545, 0.0473653504669205, 0.04528009986283869, 0.04524247257539856, 0.046555073355952846, 0.0516135997743503, 0.049103903254210594, 0.048877585502238356, 0.048364988370661784, 0.049043507301742846, 0.049933470462367846, 0.05175179126331398, 0.05057227793143223, 0.055763206569478994, 0.055243365455213404, 0.04986745821758072, 0.047789218698650125, 0.047852162700887234, 0.04279460740337753, 0.04280733225675328, 0.04644169155736491, 0.04783492130826333, 0.04759649093761958, 0.045252139153821, 0.04367184005341422, 0.039034762655413016, 0.04374965234639466, 0.04355128435775863, 0.04499861862695065, 0.04318602336450084, 0.04549296197766528, 0.03907804279518851, 0.037683132925437485, 0.04109696491189214, 0.04410155617431274, 0.05015992918511731, 0.04335430986396108, 0.046492484403760526, 0.044277581701870204, 0.03723061917091777, 0.039156973130334664])
#var = np.array([0.4156698594967092, 0.7008452266859936, 0.7214270983257646, 0.7095727482866087, 0.7102521835201318, 0.710293676073547, 0.7091783271698753, 0.6973493176419543, 0.7011688527520855, 0.7007704875343309, 0.6950537183089973, 0.6948029158092094, 0.6919309911197036, 0.6933694537037308, 0.6970711644923971, 0.7004276850010867, 0.6964234913676165, 0.6987678419874651, 0.6951829293138483, 0.6973048809142951, 0.6989420799277399, 0.7005696487948311, 0.6993937733493811, 0.6902070532566239, 0.6958399824203775, 0.6900361005407983, 0.6925891359742274, 0.6831642926666377, 0.6865279710039072, 0.6904370385593245, 0.6963724536275457, 0.6948942601360332, 0.6784634186071326, 0.6759657478656234, 0.6828578884489792, 0.683566347862741, 0.6857777074044566, 0.672040915409448, 0.6784995422914343, 0.6732453264186854, 0.683881765911935, 0.6909411690410042, 0.6715428435769978, 0.6775867807314924, 0.6785015863916147, 0.676156117696202, 0.6786376609996214, 0.6763771062984715, 0.7119440584663215, 0.7070342067744777, 0.6895996022331654, 0.6683970656272868, 0.6695013664908844, 0.6566575067124804, 0.672887703816164])    
mean = np.loadtxt(f"../emotion-precompute/layer0_output_mean.txt", delimiter=',')
var = np.loadtxt(f"../emotion-precompute/layer0_output_var.txt", delimiter=',')
  
fin_6 = torch.matmul(fin_5, model.bert.encoder.layer[0].output.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[0].output.dense.bias
fin_6 = fin_6 + fin3_whole

fin7_whole = []

for i in range(len(input_tensor.squeeze())):
    fin_7 = fin_6.squeeze()[i]
    
    fin7_corr = (fin_7.squeeze().detach() - mean[i]) * var[i]
    
    w_output_layernorm = model.bert.encoder.layer[0].output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[0].output.LayerNorm.bias.clone().detach().double()

    fin7_corr = fin7_corr * w_output_layernorm + b_output_layernorm

    fin7_whole.append(fin7_corr.detach())

fin7_whole = torch.cat(tuple(fin7_whole), 0).unsqueeze(0)

In [27]:
fhe_vector = np.array(
    
    [  0.8548,  0.0799, -6.2998,  0.1110,  0.4423,  1.2373, -0.3117,  0.9139, -1.0462,  1.8252, -0.8188,  0.1476,  0.4547,  0.6061, -0.5310,  2.0824,  0.1361,  0.9327,  0.7835,  1.3431, -0.1257, -0.8280,  1.3266,  0.8780, -0.4022,  0.2011,  0.3912,  0.4494, -0.3645, -0.1963, -0.8242,  0.1293, -1.7161,  0.0403, -0.5149, -0.2283,  1.1953, -0.9533,  0.5437,  0.5772,  1.5210,  0.0419,  0.9770,  0.3625,  0.2277, -0.1288,  1.8288,  0.1730, -0.4084,  0.4450,  0.3576,  0.4193, -0.3518,  0.4891, -1.0996, -0.1909, -0.4549,  1.2638,  0.7988,  1.1737,  1.3571,  0.0567, -1.3891,  2.1352,  0.9397,  0.3468,  0.1896, -1.1315,  0.5010, -1.0375, -0.6645,  1.1378,  0.2884, -0.4214, -0.0472, -0.4689,  0.3556, -0.0826,  0.6386, -0.6460,  1.2734, -0.1073,  1.3520,  1.5206, -0.1313,  1.5936,  0.8095, -0.1601,  0.1511, -0.5796,  0.9114,  0.6353,  0.5931, -0.7430,  1.3519,  0.5141,  0.2243, -0.0708, -0.0925, -0.2621,  0.1497,  0.1509,  0.2030,  0.6585, -0.4940,  0.9430, -0.5299, -0.2476,  0.0081, -0.0938, -0.3917,  1.3483, -1.4795, -0.4455,  0.1102,  0.0763,  1.4226, -0.6157,  0.0773,  0.1760, -2.4029,  1.0622,  1.6139,  0.3935,  1.1649, -0.5905, -1.2482,  0.8483,  ]



    
)

precision(fin7_whole[0][0].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.1967, dtype=torch.float64)

In [539]:
fhe_vector = np.array(
    
    [ -0.5698, -0.0603, -0.8675,  0.1218,  2.3721,  0.0714,  1.3686,  0.3815, -1.8198,  1.3604, -1.0474,  0.5958,  0.4158,  0.7379,  0.0844,  2.2353,  0.3601,  1.5600,  0.1024,  1.0241, -0.2770, -0.0583,  1.3927,  0.9111,  0.5256,  0.7997,  1.5650,  1.5521,  1.3956,  1.3112, -0.9158,  1.0806, -0.3413, -1.6900, -0.6095,  0.4155,  1.9106, -2.1454,  2.0943, -0.8749,  1.8159, -0.0655,  1.3252, -0.7169,  0.2317,  1.6671,  3.3334,  0.3541,  1.2290,  1.0241,  1.6964,  0.4572,  0.5037,  2.1756, -0.2614, -0.2096,  0.0370,  1.4932,  2.4169,  1.5556,  1.3846,  0.8171, -0.9456,  1.8786,  0.7556,  0.7999,  0.7306,  2.4140,  2.3858,  0.6526,  0.3817,  0.9413, -0.4279, -0.0240, -0.0096, -1.3549,  1.0095, -0.1424,  1.2339, -2.4627, -0.0926,  0.5629,  0.7301,  0.1918,  2.2430,  1.2923,  2.6795,  1.0959,  1.3733, -0.4965,  0.8303,  1.0273, -1.6141, -0.4635,  3.4597,  0.8132,  2.3164,  0.4163, -0.0001, -0.2949,  0.1286,  1.3626,  0.9008,  1.2112, -0.1215,  2.7880,  0.6172,  0.0529,  0.5687,  0.6642,  0.1522,  1.4247, -0.3952,  1.6931,  0.9479, -0.3391,  2.0376, -0.6011,  2.2772,  0.5205, -1.4185,  1.1445,  1.1158,  0.1688,  3.2188,  0.6141, -0.8889, -0.0249,  ]
    
    
)

precision2(fin7_whole[0][0].detach(), fhe_vector)

np.float64(0.8614326578244663)

### 5) Layer 2 -- Self-Attention

In [331]:
key = model.bert.encoder.layer[1].attention.self.key.weight.clone().detach().double().transpose(0, 1)
query = model.bert.encoder.layer[1].attention.self.query.weight.clone().detach().double().transpose(0, 1)
value = model.bert.encoder.layer[1].attention.self.value.weight.clone().detach().double().transpose(0, 1)

key_bias = model.bert.encoder.layer[1].attention.self.key.bias.clone().detach().double()
query_bias = model.bert.encoder.layer[1].attention.self.query.bias.clone().detach().double()
value_bias = model.bert.encoder.layer[1].attention.self.value.bias.clone().detach().double()

original_input_tensor = fin7_whole
input_tensor = fin7_whole

q = torch.matmul(input_tensor, query) + query_bias
k = torch.matmul(input_tensor, key) + key_bias
v = torch.matmul(input_tensor, value) + value_bias

q = q.reshape([1, input_tensor.size()[1], 2, 64])
k = k.reshape([1, input_tensor.size()[1], 2, 64])
v = v.reshape([1, input_tensor.size()[1], 2, 64])

q = q.permute([0, 2, 1, 3])
k = k.permute([0, 2, 3, 1])

qk = torch.matmul(q, k)
qk = qk / 8

qk_softmaxed = torch.softmax(qk, -1)

v = v.permute([0, 2, 1, 3])

fin = torch.matmul(qk_softmaxed, v)
fin = fin.permute([0, 2, 1, 3])
fin = fin.reshape([1, input_tensor.size()[1], 128])

In [333]:
fhe_vector = np.array(
    
    [ -0.1068, -0.0192, -0.0325,  0.0396, -0.0603,  0.0161,  0.0151,  0.0365,  0.0889, -0.0211, -0.0483,  0.0069, -0.0039, -0.0782,  0.0337,  0.0714,  0.0039, -0.0265, -0.0201,  0.0206,  0.0002, -0.0246, -0.0609,  0.0242,  0.0406,  0.0411, -0.0160,  0.0758,  0.0260, -0.0362, -0.0005, -0.0519, -0.0379,  0.0289, -0.0025, -0.0080,  0.0431, -0.0276, -0.0388, -0.0315,  0.0684,  0.0084,  0.0255,  0.0333,  0.0054, -0.0003,  0.0017, -0.0945,  0.0022, -0.0299,  0.0269, -0.0148, -0.0379,  0.0329,  0.0031,  0.0052, -0.0264,  0.0242, -0.0043,  0.0648,  0.0141, -0.0231, -0.0518, -0.0902, -0.0546, -0.0035,  0.0251,  0.0327, -0.0308, -0.0023, -0.1091,  0.0219, -0.0311,  0.0348, -0.0481,  0.0155,  0.0416,  0.0198,  0.0389,  0.0467, -0.0077,  0.0297, -0.0016, -0.0626, -0.0696, -0.0503, -0.0709, -0.0326, -0.0139, -0.0379,  0.0038, -0.0521, -0.0233,  0.0129, -0.0227, -0.0013, -0.0082, -0.0778, -0.0278,  0.1103,  0.0615, -0.0260, -0.0202, -0.0436, -0.0298, -0.0323, -0.0008,  0.0215,  0.0190,  0.0241,  0.0029, -0.0184, -0.0218, -0.0025,  0.0144,  0.0158, -0.0330,  0.0256, -0.0412, -0.0671,  0.0479, -0.0008,  0.0188,  0.0359,  0.0415, -0.0137,  0.0120,  0.0061 ]

    
)

precision(fin[0][0].detach(), fhe_vector)

np.float64(-0.004796039933612439)

In [334]:
fhe_vector = np.array(
    
    [ -0.1068, -0.0192, -0.0325,  0.0396, -0.0603,  0.0161,  0.0151,  0.0365,  0.0889, -0.0211, -0.0483,  0.0069, -0.0039, -0.0782,  0.0337,  0.0714,  0.0039, -0.0265, -0.0201,  0.0206,  0.0002, -0.0246, -0.0609,  0.0242,  0.0406,  0.0411, -0.0160,  0.0758,  0.0260, -0.0362, -0.0005, -0.0519, -0.0379,  0.0289, -0.0025, -0.0080,  0.0431, -0.0276, -0.0388, -0.0315,  0.0684,  0.0084,  0.0255,  0.0333,  0.0054, -0.0003,  0.0017, -0.0945,  0.0022, -0.0299,  0.0269, -0.0148, -0.0379,  0.0329,  0.0031,  0.0052, -0.0264,  0.0242, -0.0043,  0.0648,  0.0141, -0.0231, -0.0518, -0.0902, -0.0546, -0.0035,  0.0251,  0.0327, -0.0308, -0.0023, -0.1091,  0.0219, -0.0311,  0.0348, -0.0481,  0.0155,  0.0416,  0.0198,  0.0389,  0.0467, -0.0077,  0.0297, -0.0016, -0.0626, -0.0696, -0.0503, -0.0709, -0.0326, -0.0139, -0.0379,  0.0038, -0.0521, -0.0233,  0.0129, -0.0227, -0.0013, -0.0082, -0.0778, -0.0278,  0.1103,  0.0615, -0.0260, -0.0202, -0.0436, -0.0298, -0.0323, -0.0008,  0.0215,  0.0190,  0.0241,  0.0029, -0.0184, -0.0218, -0.0025,  0.0144,  0.0158, -0.0330,  0.0256, -0.0412, -0.0671,  0.0479, -0.0008,  0.0188,  0.0359,  0.0415, -0.0137,  0.0120,  0.0061 ]

    
)

precision2(fin[0][0].detach(), fhe_vector)

np.float64(0.6965645833645266)

In [None]:
# need to tune

### 6) Layer 2 -- Self-Output

In [243]:
#mean = np.array([0.04805131047475803, 0.014145706172069285, 0.010630181813540026, 0.010521146572975027, 0.00956244983947186, 0.008211288558782809, 0.008817800275674387, 0.008911457532306733, 0.008643898058317862, 0.008801769546523253, 0.009472254700839258, 0.008094415948174241, 0.007702615754430344, 0.005460620353838359, 0.007021847370084451, 0.008373831982472147, 0.01022061224155272, 0.00927594903773269, 0.009277225000069925, 0.007049453120897054, 0.008682554190420182, 0.008749022040809715, 0.010118317324741522, 0.008998865743435887, 0.008763833543884292, 0.008285728555981435, 0.006967351876718886, 0.00588068616144895, 0.0030701809065725363, 0.003659716972971551, 0.002116778487431024, 0.003947434346765913, 0.006907859825079262, 0.008494112860837831, 0.007040283968419036, 0.007197681884381672, 0.008232685835987293, 0.009965029801574864, 0.00731962961637719, 0.00830555309310382, 0.005340440177451385, 0.007833324368720607, 0.01047456825511633, 0.009674864773662995, 0.010093537461664302, 0.01588798917017868, 0.018537933333636507, 0.018245848282989877, 0.012253993810893607, 0.011354133953173591, 0.013474744814287221, 0.013707011955501919, 0.007918842609048385, 0.017240907760895086, 0.03465881962238184])
#var = np.array([0.6741653046411179, 0.602392389437227, 0.5945841451997256, 0.5997135932136959, 0.6033806506910513, 0.6064839949503851, 0.6058735285405447, 0.6059001754921257, 0.6086086189801689, 0.6118981975241923, 0.6161533101614306, 0.6105411757987637, 0.6102443339235957, 0.6004337682468068, 0.6068584434133084, 0.6123178593290803, 0.6150302868629213, 0.6102744641580546, 0.6143169356654037, 0.6105845722771672, 0.61540315154488, 0.622109065598561, 0.6221720668578823, 0.6279330579960701, 0.6282907135959079, 0.6258439179151315, 0.6187239026398644, 0.618294817104495, 0.609488586748927, 0.6085185174201381, 0.6154275326252285, 0.6207534846328591, 0.6290521066315713, 0.6375810334496135, 0.6238236165346044, 0.6310571465398529, 0.6350551779511981, 0.6452639043477173, 0.6346915398812409, 0.646622546259538, 0.6435498445423712, 0.6401589932559348, 0.6458833892517316, 0.6354378204804867, 0.651796667347259, 0.6547600574517144, 0.6554038815336571, 0.655910889886979, 0.6412602949793637, 0.6489736968517984, 0.6633309254993116, 0.6771441398382873, 0.6423362709438692, 0.6302863730404997, 0.5940213893371686])
mean = np.loadtxt(f"../precomputed_layernorm/layer0_selfoutput_mean.txt", delimiter=',')
var = np.loadtxt(f"../precomputed_layernorm/layer0_selfoutput_inv_sqrt_var.txt", delimiter=',')
  
w_output_dense = model.bert.encoder.layer[1].attention.output.dense.weight.clone().detach().double().transpose(0, 1)
b_output_dense = model.bert.encoder.layer[1].attention.output.dense.bias.clone().detach().double()

fin2 = torch.matmul(fin, w_output_dense) + b_output_dense
fin2_backup = fin2.clone()
fin2_backup = fin2_backup + original_input_tensor

fin3_whole = []

for i in range(len(original_input_tensor.squeeze())):
    fin2 = fin2_backup.squeeze()[i]

    fin3_corr = (fin2.squeeze().detach() - mean[i]) * var[i]

    w_output_layernorm = model.bert.encoder.layer[1].attention.output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[1].attention.output.LayerNorm.bias.clone().detach().double()

    fin3_corr = fin3_corr * w_output_layernorm + b_output_layernorm
    fin3_whole.append(fin3_corr.detach())

fin3_whole = torch.cat(tuple(fin3_whole), 0).unsqueeze(0)

In [335]:
fhe_vector = np.array(
    
    [  0.7330,  0.0126, -0.0233, -0.2628,  0.1928, -0.2124,  0.1094,  0.0412, -0.2365, -0.3572,  0.5664,  0.1201, -0.1786, -0.0465,  0.6440, -0.1098, -0.0606, -0.1011, -0.4367, -0.0312,  0.4009,  0.4681,  0.6212,  0.2194,  0.6544, -0.2073,  0.0758,  0.4386,  0.0484, -0.2627, -0.0685, -0.3014, -0.2071,  0.1769,  0.3617, -0.0273, -0.1668,  0.3874, -1.0230, -0.2404, -0.0968,  0.0689,  0.0421, -0.3963,  0.4091, -1.1460,  0.0924,  0.0768,  0.3327,  0.0585,  0.3282,  0.4662,  0.3354,  0.2501, -0.2827, -0.0694,  0.0892, -0.0219,  0.2948,  0.6531,  0.3371, -0.1024,  0.0209, -0.3422, -0.0348,  0.3260, -0.4598, -0.0322, -0.2510, -0.3114, -0.3700, -0.0621,  0.1726,  0.3912, -0.4058, -0.0686,  0.3138,  0.1449,  0.5029,  0.2482,  0.1418, -0.0525,  0.0965,  0.0640,  0.1326,  0.0252, -0.2877, -0.1054,  0.4951, -0.3800,  0.6470, -0.2414, -0.5532,  0.8229, -0.1416,  0.5959,  0.2611,  0.2179,  0.1574,  0.4412,  0.0827,  0.0071, -0.0675, -0.6168,  0.2323, -0.5941, -0.2865,  0.3778,  0.0371, -0.0309, -0.0631,  0.2247,  0.9732, -0.3783, -0.1640,  0.0490, -0.1935,  0.1525, -0.2631,  0.0219,  0.3359,  0.9802,  0.8003, -0.1906, -0.4799, -0.2161, -0.5641,  0.7880,  ]

    
    
)

precision(fin3_whole[0][0].detach(), fhe_vector)

np.float64(-0.1050253083482644)

In [336]:
fhe_vector = np.array(
    
    [  0.7330,  0.0126, -0.0233, -0.2628,  0.1928, -0.2124,  0.1094,  0.0412, -0.2365, -0.3572,  0.5664,  0.1201, -0.1786, -0.0465,  0.6440, -0.1098, -0.0606, -0.1011, -0.4367, -0.0312,  0.4009,  0.4681,  0.6212,  0.2194,  0.6544, -0.2073,  0.0758,  0.4386,  0.0484, -0.2627, -0.0685, -0.3014, -0.2071,  0.1769,  0.3617, -0.0273, -0.1668,  0.3874, -1.0230, -0.2404, -0.0968,  0.0689,  0.0421, -0.3963,  0.4091, -1.1460,  0.0924,  0.0768,  0.3327,  0.0585,  0.3282,  0.4662,  0.3354,  0.2501, -0.2827, -0.0694,  0.0892, -0.0219,  0.2948,  0.6531,  0.3371, -0.1024,  0.0209, -0.3422, -0.0348,  0.3260, -0.4598, -0.0322, -0.2510, -0.3114, -0.3700, -0.0621,  0.1726,  0.3912, -0.4058, -0.0686,  0.3138,  0.1449,  0.5029,  0.2482,  0.1418, -0.0525,  0.0965,  0.0640,  0.1326,  0.0252, -0.2877, -0.1054,  0.4951, -0.3800,  0.6470, -0.2414, -0.5532,  0.8229, -0.1416,  0.5959,  0.2611,  0.2179,  0.1574,  0.4412,  0.0827,  0.0071, -0.0675, -0.6168,  0.2323, -0.5941, -0.2865,  0.3778,  0.0371, -0.0309, -0.0631,  0.2247,  0.9732, -0.3783, -0.1640,  0.0490, -0.1935,  0.1525, -0.2631,  0.0219,  0.3359,  0.9802,  0.8003, -0.1906, -0.4799, -0.2161, -0.5641,  0.7880,  ]

    
    
)

precision2(fin3_whole[0][0].detach(), fhe_vector)

np.float64(0.9222884557413686)

In [None]:
w_output_dense = model.bert.encoder.layer[0].attention.output.dense.weight.clone().detach().double().transpose(0, 1)
b_output_dense = model.bert.encoder.layer[0].attention.output.dense.bias.clone().detach().double()

#mean = np.array([-0.03383045433490704, -0.04689138747464171, -0.04320052751297194, -0.04194874763842685, -0.03849735236740709, -0.03583471496309556, -0.036673685450259945, -0.03533623114666153, -0.03301200050649906, -0.03385619903604035, -0.03394064677150061, -0.03581378040060232, -0.04000193681582013, -0.042994980738727644, -0.042689484809151766, -0.0422699887342667, -0.040702211423783496, -0.043257636922742766, -0.040924377288572664, -0.04212762593354266, -0.040090620729304687, -0.03727317047412721, -0.030603299343800818, -0.034141189654495016, -0.03468711091296442, -0.032307857857310274, -0.02926372943560165, -0.031292906450152466, -0.037837883896213766, -0.03745859562807607, -0.03794657692710982, -0.03860214509229593, -0.036185650111238955, -0.039154371235979875, -0.03589729976884486, -0.031731895884233016, -0.03465287223481833, -0.031348414682812194, -0.03688161652969029, -0.03338290816163936, -0.038240660222183975, -0.037525466450406116, -0.038229222217722264, -0.041201914113547705, -0.04212576296359885, -0.03980083151775188, -0.04072657806877826, -0.040145599490268025, -0.036685242667777444, -0.034109016054392725, -0.03544325775104831, -0.03623692053970561, -0.04948334692050963, -0.04596823422981405, -0.04892271117435003])
#var = np.array([0.7495962428549272, 0.6109555428467895, 0.6225590467577651, 0.62495153067201, 0.631395549935461, 0.634492711694546, 0.644892789064359, 0.6542099965205022, 0.6595559062153842, 0.6659906881037033, 0.6680168012366937, 0.6758412527257586, 0.6668118068796066, 0.6718192460326265, 0.67786737736941, 0.6808577853930836, 0.6736657333151266, 0.6676446046843724, 0.6659979061989304, 0.6743226078654423, 0.681388263935704, 0.6837117808950258, 0.6907147768934253, 0.684537831509984, 0.6896744328697597, 0.6916627127801457, 0.6954043965468235, 0.6954046755145293, 0.7001025287354249, 0.695094327647078, 0.6854203403085795, 0.7027792682295838, 0.6956849098218769, 0.6945153573872891, 0.6856697060013522, 0.6897353511373785, 0.700668908202082, 0.6965624918742969, 0.7082690699456209, 0.7043163331126293, 0.7070770512949652, 0.7042510307314358, 0.6978925459183357, 0.7205035876616076, 0.6902461198740245, 0.686971254827903, 0.7028843270104062, 0.7032880792671149, 0.7057843340136714, 0.7104860015626775, 0.7321738164781159, 0.71095817492914, 0.7401485084476891, 0.7312957890728539, 0.7375994654874705])

mean = np.loadtxt(f"../precomputed_layernorm/layer0_selfoutput_mean.txt", delimiter=',')
var = np.loadtxt(f"../precomputed_layernorm/layer0_selfoutput_inv_sqrt_var.txt", delimiter=',')

fin2 = torch.matmul(fin, w_output_dense) + b_output_dense
fin2_backup = fin2.clone()
fin2_backup = fin2_backup + original_input_tensor

fin3_whole = []

for i in range(len(original_input_tensor.squeeze())):
    fin2 = fin2_backup.squeeze()[i]
    fin3_corr = (fin2.squeeze().detach() - mean[i]) * var[i]
    #fin3_corr = (fin2.squeeze().detach() - mean[i])

    #fin3_corr = (fin2.squeeze().detach() - torch.mean(fin2.squeeze())) / math.sqrt(torch.var(fin2.squeeze()))
    
    w_output_layernorm = model.bert.encoder.layer[0].attention.output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[0].attention.output.LayerNorm.bias.clone().detach().double()

    #fin3_corr = (fin3_corr * vy_0_0) + normbias_0_0
    fin3_corr = fin3_corr * w_output_layernorm + b_output_layernorm
    fin3_whole.append(fin3_corr.detach())

#fin3_whole = torch.cat(tuple(fin3_whole), 0).unsqueeze(0)
fin3_whole = torch.stack(fin3_whole, 0).unsqueeze(0)

In [None]:
w_output_dense = model.bert.encoder.layer[0].attention.output.dense.weight.clone().detach().double().transpose(0, 1)
b_output_dense = model.bert.encoder.layer[0].attention.output.dense.bias.clone().detach().double()

#mean = np.array([-0.03383045433490704, -0.04689138747464171, -0.04320052751297194, -0.04194874763842685, -0.03849735236740709, -0.03583471496309556, -0.036673685450259945, -0.03533623114666153, -0.03301200050649906, -0.03385619903604035, -0.03394064677150061, -0.03581378040060232, -0.04000193681582013, -0.042994980738727644, -0.042689484809151766, -0.0422699887342667, -0.040702211423783496, -0.043257636922742766, -0.040924377288572664, -0.04212762593354266, -0.040090620729304687, -0.03727317047412721, -0.030603299343800818, -0.034141189654495016, -0.03468711091296442, -0.032307857857310274, -0.02926372943560165, -0.031292906450152466, -0.037837883896213766, -0.03745859562807607, -0.03794657692710982, -0.03860214509229593, -0.036185650111238955, -0.039154371235979875, -0.03589729976884486, -0.031731895884233016, -0.03465287223481833, -0.031348414682812194, -0.03688161652969029, -0.03338290816163936, -0.038240660222183975, -0.037525466450406116, -0.038229222217722264, -0.041201914113547705, -0.04212576296359885, -0.03980083151775188, -0.04072657806877826, -0.040145599490268025, -0.036685242667777444, -0.034109016054392725, -0.03544325775104831, -0.03623692053970561, -0.04948334692050963, -0.04596823422981405, -0.04892271117435003])
#var = np.array([0.7495962428549272, 0.6109555428467895, 0.6225590467577651, 0.62495153067201, 0.631395549935461, 0.634492711694546, 0.644892789064359, 0.6542099965205022, 0.6595559062153842, 0.6659906881037033, 0.6680168012366937, 0.6758412527257586, 0.6668118068796066, 0.6718192460326265, 0.67786737736941, 0.6808577853930836, 0.6736657333151266, 0.6676446046843724, 0.6659979061989304, 0.6743226078654423, 0.681388263935704, 0.6837117808950258, 0.6907147768934253, 0.684537831509984, 0.6896744328697597, 0.6916627127801457, 0.6954043965468235, 0.6954046755145293, 0.7001025287354249, 0.695094327647078, 0.6854203403085795, 0.7027792682295838, 0.6956849098218769, 0.6945153573872891, 0.6856697060013522, 0.6897353511373785, 0.700668908202082, 0.6965624918742969, 0.7082690699456209, 0.7043163331126293, 0.7070770512949652, 0.7042510307314358, 0.6978925459183357, 0.7205035876616076, 0.6902461198740245, 0.686971254827903, 0.7028843270104062, 0.7032880792671149, 0.7057843340136714, 0.7104860015626775, 0.7321738164781159, 0.71095817492914, 0.7401485084476891, 0.7312957890728539, 0.7375994654874705])

mean = np.loadtxt(f"../precomputed_layernorm/layer0_selfoutput_mean.txt", delimiter=',')
var = np.loadtxt(f"../precomputed_layernorm/layer0_selfoutput_inv_sqrt_var.txt", delimiter=',')

fin2 = torch.matmul(fin, w_output_dense) + b_output_dense
fin2_backup = fin2.clone()
fin2_backup = fin2_backup + original_input_tensor

fin3_whole = []

for i in range(len(original_input_tensor.squeeze())):
    fin2 = fin2_backup.squeeze()[i]
    fin3_corr = (fin2.squeeze().detach() - mean[i]) * var[i]
    #fin3_corr = (fin2.squeeze().detach() - mean[i])

    #fin3_corr = (fin2.squeeze().detach() - torch.mean(fin2.squeeze())) / math.sqrt(torch.var(fin2.squeeze()))
    
    w_output_layernorm = model.bert.encoder.layer[0].attention.output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[0].attention.output.LayerNorm.bias.clone().detach().double()

    #fin3_corr = (fin3_corr * vy_0_0) + normbias_0_0
    fin3_corr = fin3_corr * w_output_layernorm + b_output_layernorm
    fin3_whole.append(fin3_corr.detach())

#fin3_whole = torch.cat(tuple(fin3_whole), 0).unsqueeze(0)
fin3_whole = torch.stack(fin3_whole, 0).unsqueeze(0)

### 7) Layer 2 -- Intermediate

In [245]:
fin_4 = torch.matmul(fin3_whole, model.bert.encoder.layer[1].intermediate.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[1].intermediate.dense.bias   
fin_5 = torch.nn.functional.gelu(fin_4)    

In [337]:
fhe_vector = np.array(
    
    [  0.2089,  0.1804, -0.0193,  0.1832,  0.0786, -0.1512,  0.0695,  0.0792,  0.1895,  0.1795, -0.1036, -0.0976,  0.0866, -0.1338,  0.1990,  0.1126,  0.4351, -0.1659, -0.1090, -0.0947,  0.0075,  0.0637, -0.1711, -0.0681,  0.0068,  0.0177, -0.1717,  0.1709, -0.1713, -0.1624, -0.0453,  0.3058, -0.1614,  0.0645, -0.1264,  0.0838, -0.0656, -0.1658,  0.1341,  0.0774,  0.1991, -0.1686, -0.1183, -0.1212, -0.1696, -0.0603,  0.5000,  0.0815, -0.1452,  0.0161,  0.3889, -0.0862, -0.1690,  0.0995, -0.1636, -0.1701, -0.0529, -0.1713,  0.1575, -0.0371, -0.0031,  0.3008,  0.0344,  0.0350,  0.0429, -0.1180, -0.0403,  0.0270,  0.0822, -0.1604,  0.2453, -0.1502,  0.7438, -0.0145,  0.0086,  0.3415,  0.0330, -0.1695,  0.1374,  0.0996, -0.1700,  0.6239,  0.3119, -0.1600, -0.0281, -0.0966,  0.8649,  0.2832,  0.0705, -0.0207, -0.1656, -0.1195, -0.0862,  0.1879, -0.1601, -0.1694,  0.2916, -0.0421, -0.0267, -0.1543, -0.1659,  0.0144,  0.0828,  0.0414, -0.1628,  0.4824, -0.1032,  0.0989, -0.1695, -0.0274,  0.3906, -0.0886, -0.0917, -0.0510, -0.1374, -0.1567, -0.0074, -0.1704, -0.0793,  0.2637, -0.1228, -0.1021, -0.1650,  0.1013, -0.0998,  0.2761, -0.1485, -0.0181 ]

    
 
)

precision(fin_5[0][0][:128].detach(), fhe_vector)

np.float64(-0.06269348906201655)

In [338]:
fhe_vector = np.array(
    
    [  0.2089,  0.1804, -0.0193,  0.1832,  0.0786, -0.1512,  0.0695,  0.0792,  0.1895,  0.1795, -0.1036, -0.0976,  0.0866, -0.1338,  0.1990,  0.1126,  0.4351, -0.1659, -0.1090, -0.0947,  0.0075,  0.0637, -0.1711, -0.0681,  0.0068,  0.0177, -0.1717,  0.1709, -0.1713, -0.1624, -0.0453,  0.3058, -0.1614,  0.0645, -0.1264,  0.0838, -0.0656, -0.1658,  0.1341,  0.0774,  0.1991, -0.1686, -0.1183, -0.1212, -0.1696, -0.0603,  0.5000,  0.0815, -0.1452,  0.0161,  0.3889, -0.0862, -0.1690,  0.0995, -0.1636, -0.1701, -0.0529, -0.1713,  0.1575, -0.0371, -0.0031,  0.3008,  0.0344,  0.0350,  0.0429, -0.1180, -0.0403,  0.0270,  0.0822, -0.1604,  0.2453, -0.1502,  0.7438, -0.0145,  0.0086,  0.3415,  0.0330, -0.1695,  0.1374,  0.0996, -0.1700,  0.6239,  0.3119, -0.1600, -0.0281, -0.0966,  0.8649,  0.2832,  0.0705, -0.0207, -0.1656, -0.1195, -0.0862,  0.1879, -0.1601, -0.1694,  0.2916, -0.0421, -0.0267, -0.1543, -0.1659,  0.0144,  0.0828,  0.0414, -0.1628,  0.4824, -0.1032,  0.0989, -0.1695, -0.0274,  0.3906, -0.0886, -0.0917, -0.0510, -0.1374, -0.1567, -0.0074, -0.1704, -0.0793,  0.2637, -0.1228, -0.1021, -0.1650,  0.1013, -0.0998,  0.2761, -0.1485, -0.0181 ]

    
 
)

precision2(fin_5[0][0][:128].detach(), fhe_vector)

np.float64(0.8567903365816418)

### 8) Layer 2 -- Output

In [339]:
fin_6 = torch.matmul(fin_5, model.bert.encoder.layer[1].output.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[1].output.dense.bias
fin_6 = fin_6 + fin3_whole

fin7_whole = []

#mean = np.array([0.06643368, 0.05726708, 0.05311476, 0.05229822, 0.05352628, 0.05238868, 0.0536801 , 0.05327334, 0.05206954, 0.05110339, 0.051747  , 0.05016997, 0.04943122, 0.04937956, 0.04952862, 0.04973959, 0.04852742, 0.04696055, 0.04846476, 0.04925392,0.0509005 , 0.05373027, 0.05371865, 0.05446217, 0.05222489,0.05142676, 0.05080909, 0.05179351, 0.05049174, 0.04965748,0.05138143, 0.0499965 , 0.05194982, 0.05178364, 0.0521023 ,0.05059624, 0.05445499, 0.05507825, 0.05241935, 0.05073552,0.05200171, 0.04858642, 0.04419684, 0.04642237, 0.05115073,0.05028116, 0.05021724, 0.05312114, 0.0524375 , 0.04643478,0.05026358, 0.04248708, 0.04675281, 0.03895142, 0.04558007])
#var = np.array([0.81992316, 0.78486345, 0.79259   , 0.79754392, 0.79350872, 0.79652433, 0.79935746, 0.79867687, 0.80257863, 0.80235328,0.80521209, 0.80621272, 0.80330435, 0.80469855, 0.81171202,0.81136354, 0.80977166, 0.8089956 , 0.8106946 , 0.80862825,0.81450049, 0.81722176, 0.82121488, 0.82012788, 0.8254015 ,0.82097106, 0.81742119, 0.82090554, 0.82116105, 0.82017896,0.82234659, 0.82832269, 0.82888739, 0.81852014, 0.82054523,0.8224114 , 0.82913892, 0.8289046 , 0.81985612, 0.83341215,0.82896934, 0.82315006, 0.82802216, 0.81886278, 0.8274004 ,0.83436616, 0.82014282, 0.82628005, 0.83230868, 0.84511334,0.85141143, 0.84934269, 0.83041272, 0.826798  , 0.83660989])
mean = np.loadtxt(f"../precomputed_layernorm/layer0_output_mean.txt", delimiter=',')
var = np.loadtxt(f"../precomputed_layernorm/layer0_output_inv_sqrt_var.txt", delimiter=',')
  
for i in range(len(input_tensor.squeeze())):
    fin_7 = fin_6.squeeze()[i]

    fin7_corr = (fin_7.squeeze().detach() - mean[i]) * var[i]

    w_output_layernorm = model.bert.encoder.layer[1].output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[1].output.LayerNorm.bias.clone().detach().double()

    fin7_corr = fin7_corr * w_output_layernorm + b_output_layernorm

    fin7_whole.append(fin7_corr.detach())

fin7_whole = torch.cat(tuple(fin7_whole), 0).unsqueeze(0)

In [340]:
fhe_vector = np.array(
    
    [  1.0847,  0.0611, -0.2553, -0.2195, -0.0768, -0.0351, -0.2793,  0.2342, -0.0630, -0.0477,  0.0375,  0.2755, -0.1587,  0.0321,  0.1981, -0.0209, -0.2383,  0.0620, -0.0595,  0.0948, -0.1580, -0.2057,  0.1679,  0.0702,  0.1813, -0.2547, -0.0055,  0.0484, -0.1623, -0.1362, -0.1488, -0.3694, -0.2211,  0.0102, -0.0095, -0.1292, -0.0872, -0.0490, -0.4730, -0.0726, -0.1776, -0.0085,  0.0375, -0.0688, -0.1952, -0.3103, -0.2791, -0.0349, -0.0305, -0.1331,  0.1256,  0.1690,  0.0814,  0.0670, -0.1539, -0.0325,  0.0855, -0.2624, -0.0434,  0.2756, -0.1079, -0.1955, -0.1846, -0.0310, -0.2145,  0.1789, -0.0162,  0.1133, -0.0336,  0.0830, -0.2897, -0.0024,  0.1381,  0.0675, -0.1530,  0.0068, -0.0959,  0.0483,  0.0310, -0.0303, -0.1648,  0.0197, -0.0259,  0.1240, -0.0952, -0.0314, -0.2607, -0.2297, -0.0313, -0.2222,  0.0604,  0.0751, -0.2002, -0.0007, -0.0483, -0.0060,  0.1510,  0.0463,  0.0860,  0.1864,  0.0117, -0.0617,  0.0411, -0.1988,  0.1951, -0.2826,  0.0027,  0.0023, -0.1411, -0.0045,  0.0441, -0.1221, -0.0388, -0.1334, -0.0860,  0.0669, -0.0541, -0.1422, -0.1602,  0.0281,  0.0739, -0.1341,  0.1331, -0.2496, -0.2959,  0.1078, -0.1733,  0.1176,  ]
    
)

precision(fin7_whole[0][0].detach(), fhe_vector)

np.float64(-0.024578977829590398)

In [341]:
fhe_vector = np.array(
    
    [  1.0847,  0.0611, -0.2553, -0.2195, -0.0768, -0.0351, -0.2793,  0.2342, -0.0630, -0.0477,  0.0375,  0.2755, -0.1587,  0.0321,  0.1981, -0.0209, -0.2383,  0.0620, -0.0595,  0.0948, -0.1580, -0.2057,  0.1679,  0.0702,  0.1813, -0.2547, -0.0055,  0.0484, -0.1623, -0.1362, -0.1488, -0.3694, -0.2211,  0.0102, -0.0095, -0.1292, -0.0872, -0.0490, -0.4730, -0.0726, -0.1776, -0.0085,  0.0375, -0.0688, -0.1952, -0.3103, -0.2791, -0.0349, -0.0305, -0.1331,  0.1256,  0.1690,  0.0814,  0.0670, -0.1539, -0.0325,  0.0855, -0.2624, -0.0434,  0.2756, -0.1079, -0.1955, -0.1846, -0.0310, -0.2145,  0.1789, -0.0162,  0.1133, -0.0336,  0.0830, -0.2897, -0.0024,  0.1381,  0.0675, -0.1530,  0.0068, -0.0959,  0.0483,  0.0310, -0.0303, -0.1648,  0.0197, -0.0259,  0.1240, -0.0952, -0.0314, -0.2607, -0.2297, -0.0313, -0.2222,  0.0604,  0.0751, -0.2002, -0.0007, -0.0483, -0.0060,  0.1510,  0.0463,  0.0860,  0.1864,  0.0117, -0.0617,  0.0411, -0.1988,  0.1951, -0.2826,  0.0027,  0.0023, -0.1411, -0.0045,  0.0441, -0.1221, -0.0388, -0.1334, -0.0860,  0.0669, -0.0541, -0.1422, -0.1602,  0.0281,  0.0739, -0.1341,  0.1331, -0.2496, -0.2959,  0.1078, -0.1733,  0.1176,  ]
    
)

precision2(fin7_whole[0][0].detach(), fhe_vector)

np.float64(0.8501277285827478)

### 9) Pooler

In [345]:
pooler_output = torch.tanh(torch.matmul(fin7_whole.double(), model.bert.pooler.dense.weight.transpose(0, 1).double()) + model.bert.pooler.dense.bias)

fhe_vector = np.array(
    
    [ -0.6346, -0.0650,  0.0267,  0.0610, -0.5782,  0.3437, -0.4031, -0.3281, -0.1279,  0.0476, -0.3416,  0.1696,  0.1898,  0.7102, -0.6566,  0.1824,  0.1737,  0.2187, -0.6644, -0.1178,  0.2368,  0.0892,  0.3011, -0.3299, -0.5739,  0.1943, -0.6284,  0.1991, -0.0372,  0.2816, -0.0748, -0.1675, -0.2480, -0.1866,  0.1860,  0.2175, -0.1061, -0.0148, -0.0286, -0.7110,  0.4500,  0.4342, -0.6785,  0.4407, -0.7369,  0.1436, -0.3678,  0.3521,  0.1367,  0.2824,  0.4962,  0.0631, -0.0591,  0.3955,  0.3076,  0.8287, -0.8286,  0.3120,  0.1270, -0.2107, -0.0011, -0.4210,  0.2884,  0.0290,  0.1905, -0.5871, -0.0672, -0.6557,  0.7242,  0.2758,  0.4952, -0.0211, -0.4164, -0.0795,  0.1127, -0.4440,  0.4295,  0.0365, -0.3386,  0.0857, -0.0855, -0.1366, -0.1713, -0.7052,  0.2234, -0.4501, -0.0315, -0.2941, -0.1579,  0.1101, -0.5390,  0.1212, -0.0321,  0.5797,  0.2196,  0.3102, -0.1403, -0.5249, -0.2462, -0.2373, -0.1386,  0.3026, -0.2741, -0.0313, -0.3946,  0.1681, -0.5971, -0.0709, -0.0502,  0.2074,  0.3892,  0.3376, -0.0388,  0.4865, -0.7536, -0.0960,  0.2474,  0.4052,  0.1590, -0.4597, -0.1839, -0.5187, -0.2373,  0.5215, -0.6432,  0.2815,  0.5059,  0.5057 ]
    
    
)

precision(pooler_output[0][0].detach(), fhe_vector)

np.float64(-0.0698192332561256)

In [346]:
precision2(pooler_output[0][0].detach(), fhe_vector)

np.float64(0.17802239020784338)

### 10) Classifier

In [280]:
classification = torch.matmul(pooler_output, model.classifier.weight.transpose(0, 1).double()) + model.classifier.bias.double()


In [281]:
classification[0][0].detach()

tensor([-0.3530, -0.3917, -1.9960,  6.4267, -0.0741, -2.8484],
       dtype=torch.float64)

In [295]:
fhe_vector = np.array(
    [  0.2942,  1.6590,  0.0011,  0.0002,  0.0001,  0.0007 ]

)

precision(classification[0][0].detach(), fhe_vector)

np.float64(0.635767630105996)

In [292]:
classification = torch.matmul(pooler_output, model.classifier.weight.transpose(0, 1).double()) + model.classifier.bias.double()

fhe_vector = np.array([ -2.5649,  2.4694 ])

precision(classification[0][0].detach(), fhe_vector)

ValueError: operands could not be broadcast together with shapes (6,) (2,) 

In [48]:
print("Plain circuit output: {}\nFHE circuit output: {}".format(classification[0][0].detach().numpy(), fhe_vector))

Plain circuit output: [-2.61671689  2.49186753]
FHE circuit output: [-2.5649  2.4694]


---

## Extra

Some precisions and errors of some sentences. The two vectors were obtained by launching ./NewBERT in verbose mode

In [18]:
#Sentence: "This movie is just empty rethoric and a total aesthetic no sense"
precision(np.array([ 2.09596, -1.79233]), np.array([  2.1071, -1.7938 ]))

0.9967569291385159

In [19]:
#Sentence: "This movie is just empty rethoric and a total aesthetic no sense"
relative_error(np.array([ 2.09596, -1.79233]), np.array([  2.1071, -1.7938 ]))

0.003008168094811025

---

In [20]:
#Sentence: "Sunshine streamed through the window, brightening my spirits instantly"
precision(np.array([-1.23462,  1.41781]), np.array([ -1.0919,  1.2679 ]))

0.889674751077314

In [21]:
#Sentence: "Sunshine streamed through the window, brightening my spirits instantly"
relative_error(np.array([-1.23462,  1.41781]), np.array([ -1.0919,  1.2679 ]))

0.10319788970313368

---

In [22]:
#Sentence: "Hope blossomed anew in my heart, casting away shadows of doubt."
precision(np.array([-1.73595,  1.80724]), np.array([ -1.8774,  1.9805 ]))

0.9111789094008507

In [24]:
#Sentence: "Hope blossomed anew in my heart, casting away shadows of doubt."
relative_error(np.array([-1.73595,  1.80724]), np.array([ -1.8774,  1.9805 ]))

0.08706923264203975

---

In [25]:
#Sentence: "Frustration boiled within me as obstacles seemed insurmountable, disappointment lingering like a heavy fog, obscuring any glimmer of optimism."
#255 secondi - 20 words
precision(np.array( [ 1.94430, -1.53529]), np.array([  2.2199, -1.8629 ]))

0.8266433689026581

In [26]:
#Sentence: "Frustration boiled within me as obstacles seemed insurmountable, disappointment lingering like a heavy fog, obscuring any glimmer of optimism."
#255 secondi - 20 words
relative_error(np.array( [ 1.94430, -1.53529]), np.array([  2.2199, -1.8629 ]))

0.15512266625520754

---

In [27]:
#Sentence: "Sadness enveloped me in its suffocating embrace, draining all energy and hope, while fear gripped me tightly, exacerbating the sense of despair."
#265 secondi - 20 words
precision(np.array( [-1.20308,  1.29482]), np.array( [ -1.5387,  1.5612 ]))

0.758997557948677

In [28]:
#Sentence: "Sadness enveloped me in its suffocating embrace, draining all energy and hope, while fear gripped me tightly, exacerbating the sense of despair."
#265 secondi - 20 words
relative_error(np.array( [-1.20308,  1.29482]), np.array( [ -1.5387,  1.5612 ]))

0.23246474413431975

---

In [29]:
#Sentence: "in the end , we are left with something like two ships passing in the night rather than any insights into gay love , chinese society or the price one pays for being dishonest . 
#X secondi - 40 tokens
precision(np.array([ 1.94430, -1.53529]), np.array([  2.2194, -1.8624 ]))

0.8269307590836852

In [30]:
#Sentence: "in the end , we are left with something like two ships passing in the night rather than any insights into gay love , chinese society or the price one pays for being dishonest . 
#X secondi - 40 tokens
relative_error(np.array([ 1.94433, -1.5352]), np.array([  2.2194, -1.8624 ]))

0.15487854428003478

---