## This Notebook is intended to show a method to compute precision in FHE computations

The resulting FHE vectors have been computed using the C++ program in verbose mode.

Replicate it by launching the following command:

```
./FHEBERT-tiny "Nuovo Cinema Paradiso has been an incredible movie! A gem in the italian culture." --verbose
```

* use to compare the results of the FHE circuit with the plain model

In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import numpy as np
import math
from matplotlib import pyplot as plt 
from datasets import load_dataset
import pandas as pd

def precision(correct, approx):
    if type(approx) == list:
        approx = np.array(approx)
    absolute = sum(abs(correct - approx))/len(correct)
    relative = absolute / (sum(abs(correct))/len(correct))
    return 1 - relative

def relative_error(correct, approx):
    relative_errors = abs(correct - approx) / max(correct)
    return sum(relative_errors)/len(relative_errors)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import logging
logging.set_verbosity_error() #Otherwise it will log annoying warnings

# Emotion Classification
tokenizer = AutoTokenizer.from_pretrained("gokuls/BERT-tiny-emotion-intent")
model = AutoModelForSequenceClassification.from_pretrained("gokuls/BERT-tiny-emotion-intent")
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-1): 2 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=128, out_features=128, bias=True)
              (key): Linear(in_features=128, out_features=128, bias=True)
              (value): Linear(in_features=128, out_features=128, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=128, out_features=128, bias=True)
              (LayerNorm): LayerNorm((128,), eps=1e-1

In [3]:
text = "i feel a little nervous i go to the gym"
text = "[CLS] " + text + " [SEP]"

In [4]:
#This is computed client-side

tokenized = tokenizer(text)
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens_tensor = torch.tensor([indexed_tokens])

x = model.bert.embeddings(tokens_tensor, torch.tensor([[1] * len(tokenized_text)]))

In [None]:
for i in range(len(x[0])):
    np.savetxt('../sample-inputs/0/input_{}.txt'.format(i), x[0][i].detach(), delimiter=',')

### 1) Layer 1 -- Self-Attention

In [5]:
key = model.bert.encoder.layer[0].attention.self.key.weight.clone().detach().double().transpose(0, 1)
query = model.bert.encoder.layer[0].attention.self.query.weight.clone().detach().double().transpose(0, 1)
value = model.bert.encoder.layer[0].attention.self.value.weight.clone().detach().double().transpose(0, 1)

key_bias = model.bert.encoder.layer[0].attention.self.key.bias.clone().detach().double()
query_bias = model.bert.encoder.layer[0].attention.self.query.bias.clone().detach().double()
value_bias = model.bert.encoder.layer[0].attention.self.value.bias.clone().detach().double()

original_input_tensor = x.double()

input_tensor = x.double()

q = torch.matmul(input_tensor, query) + query_bias
k = torch.matmul(input_tensor, key) + key_bias
v = torch.matmul(input_tensor, value) + value_bias

q = q.reshape([1, input_tensor.size()[1], 2, 64])
k = k.reshape([1, input_tensor.size()[1], 2, 64])
v = v.reshape([1, input_tensor.size()[1], 2, 64])

q = q.permute([0, 2, 1, 3])
k = k.permute([0, 2, 3, 1])

qk = torch.matmul(q, k)
qk = qk / 8

qk_softmaxed = torch.softmax(qk, -1)

v = v.permute([0, 2, 1, 3])

fin = torch.matmul(qk_softmaxed, v)
fin = fin.permute([0, 2, 1, 3])
fin = fin.reshape([1, input_tensor.size()[1], 128])

In [6]:
fhe_vector = np.array([ -0.5542,  0.0719,  0.9474, -0.0088,  0.7037,  0.1251,  0.1044, -0.4588, -1.5154, -0.2117, -0.3926, -0.5190,  0.4942,  0.1907, -0.8218, -0.3821, -0.3927, -1.1429,  0.8639, -0.0676, -0.2135, -1.1685, -0.7865,  0.5513, -0.0571, -0.0879,  0.7648, -0.9639,  0.7985, -0.7295, -0.5001,  0.0935, -0.4595, -0.1975, -0.6440,  0.6960, -0.9751, -0.6709, -0.0087,  0.1851, -0.8138,  0.1878,  0.9431,  0.3623,  0.0023, -0.4644,  2.0706,  0.7929, -0.1158,  0.8846,  0.0744, -0.4808,  0.0367, -0.0409, -1.2362, -0.1041,  0.5177,  0.6630, -0.7016,  0.9057,  0.0420, -0.7834,  0.1744,  0.2567,  0.0316, -0.4705,  0.0308, -0.7412,  1.4350,  1.3925,  0.1120, -0.5018,  1.0305,  0.7995, -0.4587,  0.3468,  0.2135,  0.0280,  0.6599, -1.1008,  1.8387, -0.5121,  0.3195, -0.0219,  0.7471, -1.6983, -2.2450, -1.0583, -0.5496, -0.5253, -0.5452,  1.1787, -0.6951,  0.2187,  0.2595,  1.1157,  0.6468,  1.2580, -0.2199,  1.4540, -1.1090, -0.3132, -1.2607, -0.5839, -0.6725,  0.2050,  1.6777, -0.7951, -1.0366, -0.5607,  0.1646, -0.7092,  0.4991, -0.2014,  0.5676, -1.5736,  1.0879, -0.1717,  0.5201,  0.2711,  1.2065,  0.2973, -1.2823, -0.8824,  0.0188, -0.7935,  1.3508, -0.6156 ]
                        
)
precision(fin[0][0].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.9977, dtype=torch.float64)

### 2) Layer 1 -- Self-Output

In [7]:
w_output_dense = model.bert.encoder.layer[0].attention.output.dense.weight.clone().detach().double().transpose(0, 1)
b_output_dense = model.bert.encoder.layer[0].attention.output.dense.bias.clone().detach().double()

#mean = np.array([-0.03383045433490704, -0.04689138747464171, -0.04320052751297194, -0.04194874763842685, -0.03849735236740709, -0.03583471496309556, -0.036673685450259945, -0.03533623114666153, -0.03301200050649906, -0.03385619903604035, -0.03394064677150061, -0.03581378040060232, -0.04000193681582013, -0.042994980738727644, -0.042689484809151766, -0.0422699887342667, -0.040702211423783496, -0.043257636922742766, -0.040924377288572664, -0.04212762593354266, -0.040090620729304687, -0.03727317047412721, -0.030603299343800818, -0.034141189654495016, -0.03468711091296442, -0.032307857857310274, -0.02926372943560165, -0.031292906450152466, -0.037837883896213766, -0.03745859562807607, -0.03794657692710982, -0.03860214509229593, -0.036185650111238955, -0.039154371235979875, -0.03589729976884486, -0.031731895884233016, -0.03465287223481833, -0.031348414682812194, -0.03688161652969029, -0.03338290816163936, -0.038240660222183975, -0.037525466450406116, -0.038229222217722264, -0.041201914113547705, -0.04212576296359885, -0.03980083151775188, -0.04072657806877826, -0.040145599490268025, -0.036685242667777444, -0.034109016054392725, -0.03544325775104831, -0.03623692053970561, -0.04948334692050963, -0.04596823422981405, -0.04892271117435003])
#var = np.array([0.7495962428549272, 0.6109555428467895, 0.6225590467577651, 0.62495153067201, 0.631395549935461, 0.634492711694546, 0.644892789064359, 0.6542099965205022, 0.6595559062153842, 0.6659906881037033, 0.6680168012366937, 0.6758412527257586, 0.6668118068796066, 0.6718192460326265, 0.67786737736941, 0.6808577853930836, 0.6736657333151266, 0.6676446046843724, 0.6659979061989304, 0.6743226078654423, 0.681388263935704, 0.6837117808950258, 0.6907147768934253, 0.684537831509984, 0.6896744328697597, 0.6916627127801457, 0.6954043965468235, 0.6954046755145293, 0.7001025287354249, 0.695094327647078, 0.6854203403085795, 0.7027792682295838, 0.6956849098218769, 0.6945153573872891, 0.6856697060013522, 0.6897353511373785, 0.700668908202082, 0.6965624918742969, 0.7082690699456209, 0.7043163331126293, 0.7070770512949652, 0.7042510307314358, 0.6978925459183357, 0.7205035876616076, 0.6902461198740245, 0.686971254827903, 0.7028843270104062, 0.7032880792671149, 0.7057843340136714, 0.7104860015626775, 0.7321738164781159, 0.71095817492914, 0.7401485084476891, 0.7312957890728539, 0.7375994654874705])
mean = np.loadtxt("../emotion-precompute/layer0_selfoutput_mean.txt", delimiter=",")
var = np.loadtxt("../emotion-precompute/layer0_selfoutput_var.txt", delimiter=",")
   
fin2 = torch.matmul(fin, w_output_dense) + b_output_dense
fin2_backup = fin2.clone()
fin2_backup = fin2_backup + original_input_tensor

fin3_whole = []

for i in range(len(original_input_tensor.squeeze())):
    fin2 = fin2_backup.squeeze()[i]
    fin3_corr = (fin2.squeeze().detach() - mean[i]) * var[i]
    
    #fin3_corr = (fin2.squeeze().detach() - torch.mean(fin2.squeeze())) / math.sqrt(torch.var(fin2.squeeze()))
    
    w_output_layernorm = model.bert.encoder.layer[0].attention.output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[0].attention.output.LayerNorm.bias.clone().detach().double()

    fin3_corr = fin3_corr * w_output_layernorm + b_output_layernorm
    fin3_whole.append(fin3_corr.detach())

fin3_whole = torch.cat(tuple(fin3_whole), 0).unsqueeze(0)

In [8]:
fhe_vector = np.array([ -0.0418,  0.5408, -12.3399, -0.6920,  0.8008,  0.6721,  0.3253,  0.8535, -0.7000,  0.2170,  0.1085,  0.0671,  0.9487,  1.1281, -1.2033, -0.4951, -0.0588,  1.5033,  0.9878,  0.1433,  0.2498, -0.3061,  1.9022,  1.5137,  0.6878,  0.7957, -0.6076, -1.1800, -0.3753,  0.2227, -0.3583,  0.8381, -5.1350,  0.8886, -0.1966,  0.6240,  1.5271, -2.4589,  1.2481,  1.4218,  1.9707,  0.5162,  1.9949,  2.1307,  0.5443, -0.1204,  1.1623,  0.1931,  0.4621,  0.3056, -0.0550,  0.9554, -1.6540,  0.7389,  1.5180, -0.4281, -0.6251,  0.0338,  0.7734,  1.1474,  0.0097, -0.2841, -0.3144,  1.5948, -0.0649,  0.8615, -0.2177, -0.1785, -0.1224,  0.1428,  0.9311,  0.0515,  0.6020, -2.7831,  0.1628, -1.6625, -0.4576,  0.0007,  0.5674, -2.6785, -0.7723, -0.7218,  0.7436,  0.9825,  0.0021,  0.1540,  0.8460, -1.6900, -0.1645, -1.8849, -1.3262, -0.3934, -0.3519, -0.0043,  0.1654, -0.3789,  0.5630, -0.1479, -0.3587, -0.5441, -0.7190, -0.8631, -0.5297,  0.7082,  0.3793,  2.0221, -0.4232,  0.5602,  0.4818, -0.1944, -0.9211,  2.1624, -0.5661, -0.3897, -0.3378,  0.3956,  0.7880,  0.6636,  0.0191,  0.7121, -1.5808,  1.1844, -0.5862,  0.0763,  1.6999, -0.8687, -0.3733,  0.8019,  ]
                      
)
precision(fin3_whole[0][0].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.9981, dtype=torch.float64)

### 3) Layer 1 -- Intermediate

In [9]:
fin_4 = torch.matmul(fin3_whole, model.bert.encoder.layer[0].intermediate.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[0].intermediate.dense.bias
fin_5 = torch.nn.functional.gelu(fin_4)

In [10]:
fhe_vector = np.array([ -0.1666,  0.4183,  0.1968, -0.1532,  0.0075, -0.0907, -0.0209,  0.5879, -0.1257, -0.0837, -0.1700,  0.1767,  0.4635,  0.0131, -0.0976, -0.0008, -0.1698,  0.1983, -0.1218, -0.0963, -0.1095,  0.2922,  0.3653, -0.1538,  0.7946, -0.1622,  0.4493,  0.2022, -0.1551,  0.0738, -0.0675, -0.1384,  0.2047,  0.0005,  0.2951, -0.0225,  0.0374, -0.0900,  0.6634,  0.3867, -0.1395, -0.1566,  0.4066, -0.1434, -0.1173, -0.0721,  0.4753,  0.1785,  1.2128, -0.0010, -0.0365, -0.1687, -0.1478,  0.9672, -0.0478, -0.0266,  1.0411, -0.1240,  0.3135,  0.0886,  0.6621,  0.9160,  0.1100, -0.1088, -0.1678, -0.1346, -0.1354, -0.0701,  1.0184,  0.5991,  0.1190, -0.1630, -0.1148, -0.0621, -0.1683,  0.6924,  1.5253, -0.1476,  0.0691, -0.1613,  0.8959, -0.1672, -0.0851, -0.0333, -0.0348, -0.1566, -0.1632, -0.1301, -0.0101,  0.0181, -0.1143,  0.0951,  1.0461, -0.1431,  0.1090,  0.2095, -0.0092, -0.0093,  0.1578, -0.1689,  0.2681, -0.0085,  0.3058, -0.1463, -0.0168,  0.6450, -0.1202,  1.5885,  0.2591,  0.5525,  0.7315,  0.8492, -0.1668, -0.1699,  0.1607,  1.0363, -0.0204, -0.0626, -0.1704, -0.1134, -0.1122, -0.1231, -0.1657,  0.0571, -0.0360,  0.1016,  0.1446,  0.3508 ]
                      
)
precision(fin_5[0][0][:128].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.9965, dtype=torch.float64)

### 4) Layer 1 -- Output

In [11]:
#mean = np.array([-0.09545516102868973, 0.034540955180462664, 0.03934738149667437, 0.040802318439555035, 0.04426037798445811, 0.04919343175846099, 0.0493616301294401, 0.047896279398118795, 0.04912640635535303, 0.048717249992826256, 0.0477219385203478, 0.05095357678578503, 0.05094908370417657, 0.0493275745992752, 0.048418324664654545, 0.0473653504669205, 0.04528009986283869, 0.04524247257539856, 0.046555073355952846, 0.0516135997743503, 0.049103903254210594, 0.048877585502238356, 0.048364988370661784, 0.049043507301742846, 0.049933470462367846, 0.05175179126331398, 0.05057227793143223, 0.055763206569478994, 0.055243365455213404, 0.04986745821758072, 0.047789218698650125, 0.047852162700887234, 0.04279460740337753, 0.04280733225675328, 0.04644169155736491, 0.04783492130826333, 0.04759649093761958, 0.045252139153821, 0.04367184005341422, 0.039034762655413016, 0.04374965234639466, 0.04355128435775863, 0.04499861862695065, 0.04318602336450084, 0.04549296197766528, 0.03907804279518851, 0.037683132925437485, 0.04109696491189214, 0.04410155617431274, 0.05015992918511731, 0.04335430986396108, 0.046492484403760526, 0.044277581701870204, 0.03723061917091777, 0.039156973130334664])
#var = np.array([0.4156698594967092, 0.7008452266859936, 0.7214270983257646, 0.7095727482866087, 0.7102521835201318, 0.710293676073547, 0.7091783271698753, 0.6973493176419543, 0.7011688527520855, 0.7007704875343309, 0.6950537183089973, 0.6948029158092094, 0.6919309911197036, 0.6933694537037308, 0.6970711644923971, 0.7004276850010867, 0.6964234913676165, 0.6987678419874651, 0.6951829293138483, 0.6973048809142951, 0.6989420799277399, 0.7005696487948311, 0.6993937733493811, 0.6902070532566239, 0.6958399824203775, 0.6900361005407983, 0.6925891359742274, 0.6831642926666377, 0.6865279710039072, 0.6904370385593245, 0.6963724536275457, 0.6948942601360332, 0.6784634186071326, 0.6759657478656234, 0.6828578884489792, 0.683566347862741, 0.6857777074044566, 0.672040915409448, 0.6784995422914343, 0.6732453264186854, 0.683881765911935, 0.6909411690410042, 0.6715428435769978, 0.6775867807314924, 0.6785015863916147, 0.676156117696202, 0.6786376609996214, 0.6763771062984715, 0.7119440584663215, 0.7070342067744777, 0.6895996022331654, 0.6683970656272868, 0.6695013664908844, 0.6566575067124804, 0.672887703816164])    
mean = np.loadtxt("../emotion-precompute/layer0_output_mean.txt", delimiter=",")
var = np.loadtxt("../emotion-precompute/layer0_output_var.txt", delimiter=",")

fin_6 = torch.matmul(fin_5, model.bert.encoder.layer[0].output.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[0].output.dense.bias
fin_6 = fin_6 + fin3_whole

fin7_whole = []

for i in range(len(input_tensor.squeeze())):
    fin_7 = fin_6.squeeze()[i]
    
    fin7_corr = (fin_7.squeeze().detach() - mean[i]) * var[i]
    
    w_output_layernorm = model.bert.encoder.layer[0].output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[0].output.LayerNorm.bias.clone().detach().double()

    fin7_corr = fin7_corr * w_output_layernorm + b_output_layernorm

    fin7_whole.append(fin7_corr.detach())

fin7_whole = torch.cat(tuple(fin7_whole), 0).unsqueeze(0)

In [12]:
fhe_vector = np.array([ -0.0120,  0.5486, -6.6033, -0.1106,  0.8583,  0.4443,  0.3820,  0.3448, -0.3378,  0.5562, -0.2957,  0.2118,  0.4156,  0.5676, -1.3937, -0.2626, -0.7907,  0.8006,  0.3897,  0.3465,  0.0768, -0.5150,  1.1491,  1.4103,  0.8494,  1.3278, -0.3068, -0.5464,  0.3106,  0.4942, -0.1949,  0.4316, -3.1542,  0.5037, -0.0725,  0.3758,  0.5979, -1.1718,  0.5603,  0.9410,  1.4149, -0.1597,  0.4686,  0.9075,  0.9958,  1.0370,  0.6828,  0.3716,  0.5562,  0.3594, -0.3342,  0.8960, -1.0451,  0.2319,  0.6455, -0.0679, -0.4709,  0.3992,  0.5782,  0.3053,  0.2197, -0.3125, -0.1831,  0.8909,  0.2371,  0.4192, -0.1301, -0.1076,  0.3891, -0.0305,  0.5730,  0.1921,  0.2921, -1.3818, -0.0412, -0.6610, -0.1131, -0.1042,  1.0951, -0.7407,  0.0817, -0.2289,  0.4386,  0.5363,  0.4861, -0.2177,  0.5716, -0.7239,  0.0931, -1.2598, -0.5337,  0.3003,  0.2413, -0.5270,  0.7001, -0.2053,  1.1586, -0.3371, -0.2970, -0.3900,  0.0234, -0.9227, -0.0716,  0.4865,  0.1098,  0.9767, -0.4688,  0.6634,  0.0457, -0.9731, -1.0640,  2.1444,  0.0574,  0.2665, -0.6826, -0.0749, -0.0255,  0.4271,  0.0560,  0.5442, -1.3444,  1.2289,  0.1541, -0.1760,  0.6261, -1.4095,  0.1523,  0.6567,  ]

)
precision(fin7_whole[0][0].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.9979, dtype=torch.float64)

### 5) Layer 2 -- Self-Attention

In [13]:
key = model.bert.encoder.layer[1].attention.self.key.weight.clone().detach().double().transpose(0, 1)
query = model.bert.encoder.layer[1].attention.self.query.weight.clone().detach().double().transpose(0, 1)
value = model.bert.encoder.layer[1].attention.self.value.weight.clone().detach().double().transpose(0, 1)

key_bias = model.bert.encoder.layer[1].attention.self.key.bias.clone().detach().double()
query_bias = model.bert.encoder.layer[1].attention.self.query.bias.clone().detach().double()
value_bias = model.bert.encoder.layer[1].attention.self.value.bias.clone().detach().double()

original_input_tensor = fin7_whole
input_tensor = fin7_whole

q = torch.matmul(input_tensor, query) + query_bias
k = torch.matmul(input_tensor, key) + key_bias
v = torch.matmul(input_tensor, value) + value_bias

q = q.reshape([1, input_tensor.size()[1], 2, 64])
k = k.reshape([1, input_tensor.size()[1], 2, 64])
v = v.reshape([1, input_tensor.size()[1], 2, 64])

q = q.permute([0, 2, 1, 3])
k = k.permute([0, 2, 3, 1])

qk = torch.matmul(q, k)
qk = qk / 8

qk_softmaxed = torch.softmax(qk, -1)

v = v.permute([0, 2, 1, 3])

fin = torch.matmul(qk_softmaxed, v)
fin = fin.permute([0, 2, 1, 3])
fin = fin.reshape([1, input_tensor.size()[1], 128])

In [16]:
fhe_vector = np.array([ -1.9877, -1.4830,  1.9146,  2.0213,  0.8907, -0.8566,  1.0249,  0.6252,  0.8681,  0.3785, -1.6795,  0.9832,  0.8658, -1.9629,  0.0380, -1.0218,  1.8736, -1.0530, -1.6594, -0.4108, -0.7120,  0.2787,  1.0927, -0.3104, -0.5564,  1.5031,  0.4084,  0.7758, -0.5010,  0.9065, -0.1907, -0.0558, -0.1385,  0.8608,  0.7915,  1.4249, -0.0305, -1.7621, -0.7407,  1.1759,  1.2347, -0.3178, -1.1192,  1.1820, -0.0036, -0.9346, -0.6442, -1.6246, -1.3566,  0.6263, -0.5726,  1.4807, -1.4005, -0.6772,  0.7417, -1.6939, -3.5574,  0.2694, -0.6487, -1.7074,  0.2188, -1.6808, -0.7876,  0.5837,  0.1379,  1.2913,  0.2561,  1.6398,  0.0151,  0.3172,  0.8035, -0.5944,  0.5230,  0.8140, -0.4006, -0.6336, -0.2420, -0.9460,  0.2377,  1.5254, -0.1901,  0.1242,  1.1787, -1.5114, -0.6035,  0.0366,  0.0373,  0.2288, -0.2589,  1.0377, -1.6010, -0.3093, -0.5897,  0.8214,  0.0416, -0.7468, -0.7746, -1.5290,  0.1538, -0.2530,  0.4424,  0.1981,  0.2825,  0.3685, -0.2992, -0.4221, -0.4671, -0.6263,  0.0886, -0.7956, -0.3377, -0.0735,  0.9246,  0.2083, -0.2158,  0.2121, -0.4304,  0.4255, -0.7585, -0.3969, -0.6141, -0.2297,  1.1858,  0.8373, -0.4362,  0.0697,  0.2489, -0.9424 ]
                      
)
precision(fin[0][0].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.8249, dtype=torch.float64)

### 6) Layer 2 -- Self-Output

In [17]:
#mean = np.array([0.04805131047475803, 0.014145706172069285, 0.010630181813540026, 0.010521146572975027, 0.00956244983947186, 0.008211288558782809, 0.008817800275674387, 0.008911457532306733, 0.008643898058317862, 0.008801769546523253, 0.009472254700839258, 0.008094415948174241, 0.007702615754430344, 0.005460620353838359, 0.007021847370084451, 0.008373831982472147, 0.01022061224155272, 0.00927594903773269, 0.009277225000069925, 0.007049453120897054, 0.008682554190420182, 0.008749022040809715, 0.010118317324741522, 0.008998865743435887, 0.008763833543884292, 0.008285728555981435, 0.006967351876718886, 0.00588068616144895, 0.0030701809065725363, 0.003659716972971551, 0.002116778487431024, 0.003947434346765913, 0.006907859825079262, 0.008494112860837831, 0.007040283968419036, 0.007197681884381672, 0.008232685835987293, 0.009965029801574864, 0.00731962961637719, 0.00830555309310382, 0.005340440177451385, 0.007833324368720607, 0.01047456825511633, 0.009674864773662995, 0.010093537461664302, 0.01588798917017868, 0.018537933333636507, 0.018245848282989877, 0.012253993810893607, 0.011354133953173591, 0.013474744814287221, 0.013707011955501919, 0.007918842609048385, 0.017240907760895086, 0.03465881962238184])
#var = np.array([0.6741653046411179, 0.602392389437227, 0.5945841451997256, 0.5997135932136959, 0.6033806506910513, 0.6064839949503851, 0.6058735285405447, 0.6059001754921257, 0.6086086189801689, 0.6118981975241923, 0.6161533101614306, 0.6105411757987637, 0.6102443339235957, 0.6004337682468068, 0.6068584434133084, 0.6123178593290803, 0.6150302868629213, 0.6102744641580546, 0.6143169356654037, 0.6105845722771672, 0.61540315154488, 0.622109065598561, 0.6221720668578823, 0.6279330579960701, 0.6282907135959079, 0.6258439179151315, 0.6187239026398644, 0.618294817104495, 0.609488586748927, 0.6085185174201381, 0.6154275326252285, 0.6207534846328591, 0.6290521066315713, 0.6375810334496135, 0.6238236165346044, 0.6310571465398529, 0.6350551779511981, 0.6452639043477173, 0.6346915398812409, 0.646622546259538, 0.6435498445423712, 0.6401589932559348, 0.6458833892517316, 0.6354378204804867, 0.651796667347259, 0.6547600574517144, 0.6554038815336571, 0.655910889886979, 0.6412602949793637, 0.6489736968517984, 0.6633309254993116, 0.6771441398382873, 0.6423362709438692, 0.6302863730404997, 0.5940213893371686])
mean = np.loadtxt("../emotion-precompute/layer1_selfoutput_mean.txt", delimiter=",")
var = np.loadtxt("../emotion-precompute/layer1_selfoutput_var.txt", delimiter=",")

w_output_dense = model.bert.encoder.layer[1].attention.output.dense.weight.clone().detach().double().transpose(0, 1)
b_output_dense = model.bert.encoder.layer[1].attention.output.dense.bias.clone().detach().double()

fin2 = torch.matmul(fin, w_output_dense) + b_output_dense
fin2_backup = fin2.clone()
fin2_backup = fin2_backup + original_input_tensor

fin3_whole = []

for i in range(len(original_input_tensor.squeeze())):
    fin2 = fin2_backup.squeeze()[i]

    fin3_corr = (fin2.squeeze().detach() - mean[i]) * var[i]

    w_output_layernorm = model.bert.encoder.layer[1].attention.output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[1].attention.output.LayerNorm.bias.clone().detach().double()

    fin3_corr = fin3_corr * w_output_layernorm + b_output_layernorm
    fin3_whole.append(fin3_corr.detach())

fin3_whole = torch.cat(tuple(fin3_whole), 0).unsqueeze(0)

In [18]:
fhe_vector = np.array([  0.0669,  0.0044, -2.3350, -0.1717,  0.5168,  0.0463, -0.7830,  0.8716, -1.1796, -0.9007,  0.9944, -0.8822,  0.5698,  0.2849, -1.4511, -0.7445, -1.1545,  0.7321, -0.0050,  1.0187, -0.0787,  0.2838,  0.3789,  1.8473,  1.2078,  1.2479, -0.4392, -0.8864,  1.5616, -0.0060, -0.1450,  0.1423, -1.4359,  1.6066,  0.9505,  1.4055,  1.2888, -1.0439,  0.1501,  2.5184,  1.4363,  1.2828, -0.8008,  1.9402,  2.0831, -1.0964, -1.7968, -0.6411,  0.0179,  0.6910, -0.2114,  1.6372, -1.5305,  0.7051,  0.9675,  0.5594, -0.8680, -0.8888,  0.2159,  0.3788, -0.0800, -0.1705, -0.2030,  0.8372,  0.6588,  0.6616, -0.9684, -1.6280,  0.0456, -0.4152,  1.1465, -0.7878, -0.1054,  0.0064, -0.2156, -0.5916, -1.6802, -0.9784,  1.0782, -1.3737, -0.2435, -1.7285, -0.6551,  2.4126,  1.2030,  0.1311, -0.4634,  0.3775, -0.3132, -2.5276,  0.5774, -0.7184, -1.5056, -0.8492, -0.8575, -0.5355,  0.0029, -1.0554, -0.1110, -0.8078,  0.8386, -1.7294, -0.7562,  0.3446, -0.6423,  0.2546, -0.4518,  2.9689, -0.9002,  1.3706, -1.5360,  1.9388,  1.0138,  0.0970, -0.6053, -0.1878,  0.2454,  1.3518,  0.3686,  1.0167, -1.1718,  1.9117,  2.5037, -1.0305, -0.1956, -1.2153, -0.5800,  2.2312,  ]

)
precision(fin3_whole[0][0].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.8469, dtype=torch.float64)

### 7) Layer 2 -- Intermediate

In [19]:
fin_4 = torch.matmul(fin3_whole, model.bert.encoder.layer[1].intermediate.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[1].intermediate.dense.bias   
fin_5 = torch.nn.functional.gelu(fin_4)    

In [20]:
fhe_vector = np.array([ -0.1655, -0.1356, -0.0014, -0.1677, -0.1583, -0.1111,  2.7682, -0.1073, -0.0375,  0.0061,  0.3141, -0.1537,  0.7626,  0.1516,  0.0357, -0.0258,  0.7552, -0.1491, -0.1398,  1.5133,  0.0085, -0.0106,  1.4653,  0.3449,  0.9197,  0.3253,  0.0641, -0.0344, -0.0798, -0.1627, -0.1628, -0.1695, -0.0739,  0.5975,  0.3554, -0.0222, -0.1411,  0.0099, -0.0512,  0.4094, -0.1320,  0.1884, -0.1512, -0.1601, -0.0004,  1.7037,  0.4401, -0.0993, -0.0052, -0.1681,  0.7568, -0.0774, -0.1302, -0.1631, -0.1453, -0.1391,  2.2019, -0.0101, -0.1368, -0.0564,  0.2976,  1.7425, -0.0589, -0.0614, -0.0085,  0.2564,  0.6468, -0.1612,  0.0534, -0.1698,  1.0100,  0.0046, -0.0215, -0.1693,  0.0997,  4.1191,  0.8089,  0.0048,  0.4293, -0.1764, -0.1605,  0.2272, -0.1236, -0.1541, -0.0119, -0.0588, -0.0637, -0.0008, -0.1547,  0.0158,  0.2145, -0.0871, -0.0830, -0.1067, -0.1163, -0.0090, -0.0660, -0.1193,  0.0508, -0.0910, -0.1654,  0.8100, -0.1336,  0.0221,  0.0384,  0.2076,  0.7089,  1.1456, -0.1003, -0.1643, -0.1326, -0.1166, -0.0171, -0.0047,  0.5934, -0.0055,  0.3204, -0.0348,  0.1469, -0.1687, -0.1524, -0.0880, -0.0003,  0.4035, -0.1664, -0.1644,  0.6626, -0.0070 ]

)
precision(fin_5[0][0][:128].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.7647, dtype=torch.float64)

### 8) Layer 2 -- Output

In [21]:
fin_6 = torch.matmul(fin_5, model.bert.encoder.layer[1].output.dense.weight.transpose(0, 1).double()) + model.bert.encoder.layer[1].output.dense.bias
fin_6 = fin_6 + fin3_whole

fin7_whole = []

#mean = np.array([0.06643368, 0.05726708, 0.05311476, 0.05229822, 0.05352628, 0.05238868, 0.0536801 , 0.05327334, 0.05206954, 0.05110339, 0.051747  , 0.05016997, 0.04943122, 0.04937956, 0.04952862, 0.04973959, 0.04852742, 0.04696055, 0.04846476, 0.04925392,0.0509005 , 0.05373027, 0.05371865, 0.05446217, 0.05222489,0.05142676, 0.05080909, 0.05179351, 0.05049174, 0.04965748,0.05138143, 0.0499965 , 0.05194982, 0.05178364, 0.0521023 ,0.05059624, 0.05445499, 0.05507825, 0.05241935, 0.05073552,0.05200171, 0.04858642, 0.04419684, 0.04642237, 0.05115073,0.05028116, 0.05021724, 0.05312114, 0.0524375 , 0.04643478,0.05026358, 0.04248708, 0.04675281, 0.03895142, 0.04558007])
#var = np.array([0.81992316, 0.78486345, 0.79259   , 0.79754392, 0.79350872, 0.79652433, 0.79935746, 0.79867687, 0.80257863, 0.80235328,0.80521209, 0.80621272, 0.80330435, 0.80469855, 0.81171202,0.81136354, 0.80977166, 0.8089956 , 0.8106946 , 0.80862825,0.81450049, 0.81722176, 0.82121488, 0.82012788, 0.8254015 ,0.82097106, 0.81742119, 0.82090554, 0.82116105, 0.82017896,0.82234659, 0.82832269, 0.82888739, 0.81852014, 0.82054523,0.8224114 , 0.82913892, 0.8289046 , 0.81985612, 0.83341215,0.82896934, 0.82315006, 0.82802216, 0.81886278, 0.8274004 ,0.83436616, 0.82014282, 0.82628005, 0.83230868, 0.84511334,0.85141143, 0.84934269, 0.83041272, 0.826798  , 0.83660989])
mean = np.loadtxt("../emotion-precompute/layer1_output_mean.txt", delimiter=",")
var = np.loadtxt("../emotion-precompute/layer1_output_var.txt", delimiter=",")

for i in range(len(input_tensor.squeeze())):
    fin_7 = fin_6.squeeze()[i]

    fin7_corr = (fin_7.squeeze().detach() - mean[i]) * var[i]

    w_output_layernorm = model.bert.encoder.layer[1].output.LayerNorm.weight.clone().detach().double().unsqueeze(0)
    b_output_layernorm = model.bert.encoder.layer[1].output.LayerNorm.bias.clone().detach().double()

    fin7_corr = fin7_corr * w_output_layernorm + b_output_layernorm

    fin7_whole.append(fin7_corr.detach())

fin7_whole = torch.cat(tuple(fin7_whole), 0).unsqueeze(0)

In [22]:
fhe_vector = np.array([ -0.2878,  0.3439, -1.6368, -0.5351, -0.4200, -0.1911, -1.5314,  1.2849, -0.8399, -0.7583,  0.0711,  0.4168,  0.3946,  1.0162, -0.1611, -0.6183, -1.9408,  0.3896,  1.0788,  0.4029, -0.7584,  1.0490,  0.6436,  2.8241,  1.2127,  0.6649, -0.9586, -0.5119,  0.4490,  0.5205,  0.1889, -0.6272, -0.7800,  1.7710,  1.5141,  1.2182,  2.0973,  0.3477,  0.8053,  1.8744,  0.9475,  0.8821, -1.0891,  1.4506,  0.8158, -2.4672, -0.5354, -0.6877, -0.3175,  0.4974,  0.8970, -0.1105, -1.2276, -0.5427,  0.8476,  0.4907, -0.7033, -1.5651,  0.0102, -0.0672, -0.5370, -0.7277, -0.4135,  0.7912,  1.0703,  1.2110, -1.1981, -1.2090, -0.0698, -0.2273,  1.0470, -0.0457, -0.2572, -0.3591,  0.7058, -1.3808, -2.2219, -1.5260,  0.6365, -1.7333, -1.7138, -1.6216, -0.5013,  2.0267,  0.1738,  0.4102, -0.1390,  0.2492, -0.0020, -2.1727, -0.4533, -1.0592, -1.8434, -0.7747, -1.6186, -0.5410,  0.3699, -0.7105, -0.4334, -0.6854,  0.5468, -2.0914, -0.8950,  0.5671,  0.1379, -0.3994,  0.9762,  2.2562,  0.8315,  1.6343, -1.8014,  0.2429,  0.3540, -0.0026, -1.1169, -0.2627,  0.3759,  1.1848, -0.2164,  0.4077, -1.8521,  1.1433,  1.1478, -1.6825, -0.4459, -0.4054,  0.1128,  1.3968,  ]

)
precision(fin7_whole[0][0].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.8169, dtype=torch.float64)

### 9) Pooler

In [23]:
pooler_output = torch.tanh(torch.matmul(fin7_whole.double(), model.bert.pooler.dense.weight.transpose(0, 1).double()) + model.bert.pooler.dense.bias)

fhe_vector = np.array([  0.9827,  0.7424, -0.9903,  0.9974, -0.9865,  0.1791,  0.7972, -0.8933, -0.2103,  0.8942,  0.5109,  0.9799, -0.9034, -1.0026,  0.7248, -0.9278, -0.5138, -0.7697,  0.3084, -0.3474, -0.9184,  0.8537,  0.9933, -0.9836, -1.0029,  0.7631, -0.0198,  0.9728, -0.9722, -0.7211,  0.9799,  0.9645, -0.9665, -0.9498,  1.0007, -0.7138,  0.9981, -0.9399,  0.9358,  0.8181, -0.9552,  0.3494, -0.8611, -0.7953,  1.0016, -0.7816, -0.3165,  0.2958,  0.9908,  0.9857,  1.0001, -0.8426,  0.8391,  0.9939,  0.8410, -0.9987,  0.9906, -0.9968,  0.9915,  0.5392,  0.6443, -0.9822, -0.9796,  0.1180,  0.9967,  0.9876,  0.9960,  0.2515,  0.9955, -0.8104,  0.9941, -0.9723,  0.5188, -0.6456, -0.9622,  0.9895,  0.9953, -0.8388,  0.9500,  0.9924,  0.1622, -0.9823,  0.8991,  0.7629,  0.2660,  0.6638,  0.8910,  0.9930,  0.9870, -0.6659,  0.9337,  0.9962, -0.9743,  0.9680, -0.9466,  0.0465, -0.8882, -0.9962,  0.9468,  0.9856,  0.4824,  0.9922,  0.8828, -0.3890, -0.9975, -0.0106, -0.9940,  0.9438, -0.9358, -0.9849,  0.9959,  0.0587, -0.9927,  1.0003,  1.0020,  0.8375,  1.0074,  0.9979,  0.1449,  0.9934,  0.5330,  0.9397,  0.9419,  0.6525,  0.9959, -0.9419, -0.9381, -0.9960 ]
                     
)
precision(pooler_output[0][0].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.9413, dtype=torch.float64)

### 10) Classifier

In [24]:
classification = torch.matmul(pooler_output, model.classifier.weight.transpose(0, 1).double()) + model.classifier.bias.double()

fhe_vector = np.array([-0.4708, -0.6270, -2.3478, -2.5482, 5.7271, -0.9403])

precision(classification[0][0].detach(), fhe_vector)

  absolute = sum(abs(correct - approx))/len(correct)


tensor(0.9428, dtype=torch.float64)

In [25]:
print("Plain circuit output: {}\nFHE circuit output: {}".format(classification[0][0].detach().numpy(), fhe_vector))

Plain circuit output: [-0.43992036 -0.90293324 -2.27214278 -2.53368019  6.00071599 -1.02311408]
FHE circuit output: [-0.4708 -0.627  -2.3478 -2.5482  5.7271 -0.9403]


---