## Setup

In [1]:
# Import packages

import numpy as np
import torch
import pandas as pd
import plotly.express as px

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
from datasets import load_dataset

from tqdm.notebook import trange, tqdm
from ipywidgets import IntProgress

In [2]:
# Import pretrained BERT finetuned on IMDB Database

tokenizer = AutoTokenizer.from_pretrained("fabriceyhc/bert-base-uncased-imdb")

model = AutoModelForSequenceClassification.from_pretrained("fabriceyhc/bert-base-uncased-imdb")

In [3]:
# Loading IMDB dataset (IN) and SST2 (OUT)

dataset = load_dataset("imdb")

out_dataset = load_dataset("sst2")

Found cached dataset imdb (/home/onyxia/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0)


  0%|          | 0/3 [00:00<?, ?it/s]

Found cached dataset sst2 (/home/onyxia/.cache/huggingface/datasets/sst2/default/2.0.0/9896208a8d85db057ac50c72282bcb8fe755accc671a57dd8059d4e130961ed5)


  0%|          | 0/3 [00:00<?, ?it/s]

## Generate train distribution

In [4]:
# Hook functions to get latent representation of data

def features_hook_0(model, inp, output):
    global feat_0
    feat_0 = output

def features_hook_1(model, inp, output):
    global feat_1
    feat_1 = output

def features_hook_2(model, inp, output):
    global feat_2
    feat_2 = output

def features_hook_3(model, inp, output):
    global feat_3
    feat_3 = output

def features_hook_4(model, inp, output):
    global feat_4
    feat_4 = output

def features_hook_5(model, inp, output):
    global feat_5
    feat_5 = output

def features_hook_6(model, inp, output):
    global feat_6
    feat_6 = output

def features_hook_7(model, inp, output):
    global feat_7
    feat_7 = output

def features_hook_8(model, inp, output):
    global feat_8
    feat_8 = output

def features_hook_9(model, inp, output):
    global feat_9
    feat_9 = output

def features_hook_10(model, inp, output):
    global feat_10
    feat_10 = output

def features_hook_11(model, inp, output):
    global feat_11
    feat_11 = output

features_hooks = [features_hook_0, features_hook_1, features_hook_2, 
                    features_hook_3, features_hook_4, features_hook_5, 
                    features_hook_6, features_hook_7, features_hook_8, 
                    features_hook_9, features_hook_10, features_hook_11]



feat_hook = [model.base_model.encoder.layer[i].register_forward_hook(features_hooks[i]) for i in range(12)]

In [5]:
def get_lattent_representation(input_data, model):
    """aggregating latent representation to create a unique representation vector
    """
    pipe = TextClassificationPipeline(
                                model=model, tokenizer=tokenizer
                                )
    pipe(input_data)
    feats = [feat_0[0], feat_1[0], feat_2[0], 
                feat_3[0], feat_4[0], feat_5[0], 
                feat_6[0], feat_7[0], feat_8[0], 
                feat_9[0], feat_10[0], feat_11[0]]
    aggregated_features = torch.mean(torch.stack([i[0, 0, :] for i in feats]), dim= 0)

    return aggregated_features

###### IF THE DISTRIBUTION IS NOT COMPLETED ######

# distrib =[] 
# fail = []
# for i in tqdm(range(len(dataset['train']))):
#     if i%100 == 0:
#         print(i)
#     try:
#         distrib.append(get_lattent_representation(dataset['train'][i]['text'], model))
#     except:
#         fail.append(i)

# def process_distrib(distrib):
#     return np.vstack([distrib[i].numpy().flatten() for i in range(len(distrib))])

# distrib = process_distrib(distrib)
# pd.DataFrame(distrib).to_csv('distrib_.csv', index = False)




###### IF THE DISTRIBUTION IS ALREADY COMPLETED ######


distrib = np.array(pd.read_csv('distrib_first.csv'))
# distrib = distrib[:, 1:]
assert distrib.shape[1] == 768 # checking the shape : vectors must have a 768 size

In [6]:
def get_logits(input_data, model):
    pipe = TextClassificationPipeline(
                                model=model, tokenizer=tokenizer
                                )
    res = pipe(input_data)
    return res[0]['score']


In [7]:
def get_lattent_last_layer(input_data, model):
    pipe = TextClassificationPipeline(
                            model=model, tokenizer=tokenizer
                                )
    pipe(input_data)
    feat = feat_11[0]

    return feat[0][0]

###### IF THE DISTRIBUTION IS NOT COMPLETED ######

distrib_last_layer = [] 
fail = []
for i in tqdm(range(len(dataset['train']))):
    if i%100 == 0:
        print(i)
    try:
        distrib_last_layer.append(get_lattent_last_layer(dataset['train'][i]['text'], model))
    except:
        print(i)
        fail.append(i)

def process_distrib(distrib):
    return np.vstack([distrib[i].numpy().flatten() for i in range(len(distrib))])

distrib_last_layer = process_distrib(distrib_last_layer)
pd.DataFrame(distrib_last_layer).to_csv('distrib_last_layer.csv', index = False)




###### IF THE DISTRIBUTION IS ALREADY COMPLETED ######


distrib_last_layer = np.array(pd.read_csv('distrib_last_layer.csv'))
# distrib = distrib[:, 1:]
assert distrib.shape[1] == 768 # checking the shape : vectors must have a 768 size

  0%|          | 0/25000 [00:00<?, ?it/s]

0


Token indices sequence length is longer than the specified maximum sequence length for this model (720 > 512). Running this sequence through the model will result in indexing errors


8
17
34
42
44
52
55
69
74
80
87
92
95
98
100
111
112
128
142
147
149
165
180
197
198
200
209
213
218
230
238
246
248
253
257
264
265
273
281
282
298
300
312
345
353
370
374
375
394
400
413
415
416
421
422
440
457
459
461
478
479
483
498
500
500
501
509
517
520
521
528
531
543
581
591
593
600
603
612
614
615
618
651
652
655
657
661
666
667
668
674
681
687
688
689
700
700
706
725
726
735
736
740
745
746
749
750
751
753
757
759
762
768
771
773
779
784
788
797
800
803
812
814
825
831
832
848
877
878
896
899
900
913
922
926
932
942
958
965
974
982
983
984
989
1000
1001
1019
1020
1025
1026
1041
1056
1075
1080
1084
1090
1100
1104
1107
1109
1123
1127
1131
1147
1167
1173
1174
1175
1181
1197
1199
1200
1212
1213
1214
1216
1228
1233
1244
1261
1263
1267
1281
1282
1300
1306
1330
1332
1336
1340
1342
1347
1361
1364
1371
1391
1395
1400
1400
1406
1427
1432
1439
1444
1445
1449
1451
1452
1458
1466
1469
1477
1481
1492
1500
1509
1514
1520
1522
1531
1543
1545
1561
1564
1568
1572
1586
1590
1591
1592
1600
1606

## Distance functions

In [None]:
# computing variables for Mahalanobis score

cov_matrix = np.cov(distrib.T)
precision_matrix = np.linalg.inv(cov_matrix)
esperance_vector = np.mean(distrib, axis = 0)

def D_M(x, esperance = esperance, precision = precision_mat):
    """Mahalanobis distance
    """
    v = x - esperance
    u = 1 + (v.T @ precision @ v)
    return -1 / u

In [84]:
def MSP(x):
    """Max softmax proba
    x is the max logits value
    """
    return 1 - x

In [None]:
# Last layer mahalanobis score

cov_matrix = np.cov(distrib.T)
precision_matrix = np.linalg.inv(cov_matrix)
esperance_vector = np.mean(distrib, axis = 0)

def D_M(x, esperance = esperance, precision = precision_mat):
    """Mahalanobis distance
    """
    v = x - esperance
    u = 1 + (v.T @ precision @ v)
    return -1 / u

## Computing In and Out distances 

In [None]:
N = min([len(out_dataset['test']), len(dataset['test'])])

In [88]:
N = 10

In [104]:
distances = {
            'maha' : {'in' : [], 'out' : []},
            'MSP' : {'in' : [], 'out' : []},
            }

for i in tqdm(range(N)):
    input_data = out_dataset['test'][i]['sentence']

    x = get_lattent_representation(input_data, model)
    distances['maha']['out'].append(D_M(x.numpy()))

    logit = get_logits(input_data, model)
    distances['MSP']['out'].append(MSP(logit))

for i in tqdm(range(N)):
    try:
        input_data = dataset['test'][i]['text']

        x = get_lattent_representation(input_data, model)
        distances['maha']['in'].append(D_M(x.numpy()))

        logit = get_logits(input_data, model)
        distances['MSP']['in'].append(MSP(logit))

    except:
        pass

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

## Results

In [109]:
px.histogram(
            distances['maha']['out'] + distances['maha']['in'], 
            color = ['out'] * len(distances['maha']['out']) + ['in'] * len(distances['maha']['in']), 
            template = 'none'
            )

In [106]:
def error(in_distances, out_distances, threshold):
    v = np.sum([i >= threshold for i in in_distances]) + np.sum([i < threshold for i in out_distances])
    return v / (len(out_distances) + len(in_distances))

def compute_best_threshold(in_distances, out_distances, n_candidates = 1000):

    candidates = np.linspace(min(in_distances), max(out_distances), n_candidates)

    errors = [error(in_distances, out_distances, i) for i in candidates]
    best_index = np.argmin(errors)
    return (candidates[best_index], errors[best_index])


In [107]:
compute_best_threshold(distances['maha']['in'], distances['maha']['out'])

(-0.0008962058910801728, 0.0)

In [108]:
compute_best_threshold(distances['MSP']['in'], distances['MSP']['out'])

(0.0004859141282013825, 0.3)

## Some tries

In [None]:
def generate_sphere_point(ndim):
    vec = np.random.randn(ndim)
    vec /= np.linalg.norm(vec, axis=0)
    return vec

def compute_minimum_value(x, distrib, u_k):
    positive_rate = np.mean(np.array([np.dot(u_k, distrib[i] - x) for i in range(len(distrib))]) > 0)
    return min(positive_rate, 1 - positive_rate)

def D(x, distrib, n_proj = 10):
    u = [generate_sphere_point(x.shape[0]) for _ in range(n_proj)]
    vector_of_minimums = [compute_minimum_value(x, distrib, u_k) for u_k in u]
    return np.mean(vector_of_minimums)

In [None]:
def D_(x, distrib, n_proj = 100):
    u = [generate_sphere_point(x.shape[0]) for _ in range(n_proj)]
    vector_of_minimums = [compute_minimum_value(x, distrib, u_k) for u_k in u]
    return (vector_of_minimums)

x = np.random.random(distrib[0].shape)
a = D_(x, distrib, 1000)

px.line([np.mean(a[:i]) for i in range(1, len(a))], template = 'none')