## Setup

In [13]:
# Import packages

import numpy as np
import torch
import pandas as pd
import plotly.express as px

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
from datasets import load_dataset

from tqdm.notebook import trange, tqdm
from ipywidgets import IntProgress

In [None]:
# Import pretrained BERT finetuned on IMDB Database

tokenizer = AutoTokenizer.from_pretrained("fabriceyhc/bert-base-uncased-imdb")

model = AutoModelForSequenceClassification.from_pretrained("fabriceyhc/bert-base-uncased-imdb")

In [None]:
# Loading IMDB dataset (IN) and SST2 (OUT)

dataset = load_dataset("imdb")

out_dataset = load_dataset("sst2")

In [None]:
def label_to_bin(label):
    if label == 'neg':
        return 0
    return 1

def get_latent(input_data):
    pipe = TextClassificationPipeline(
                                    model=model, tokenizer=tokenizer
                                    )
    pred = pipe(input_data)[0]
    feats = [feat_0[0], feat_1[0], feat_2[0], 
                    feat_3[0], feat_4[0], feat_5[0], 
                    feat_6[0], feat_7[0], feat_8[0], 
                    feat_9[0], feat_10[0], feat_11[0]]
    label = label_to_bin(pred['label'])
    softmax = pred['score']

    pred_vector = torch.tensor([label, softmax])
    latent = np.concatenate([torch.cat([feats[i][0, 0] for i in range(12)], dim = 0).numpy(), pred_vector.numpy()])
    return latent

In [None]:
def compute_all_latent(inputs):
    df = pd.DataFrame([get_latent(x) for x in inputs])
    df.columns = list(range(768 * 12)) + ['pred', 'softmax_score']
    return df.T

In [None]:
inputs = [dataset['train'][i]['text'] for i in range(2)]
df = compute_all_latent(inputs)

## Generate train distribution

In [None]:
# Hook functions to get latent representation of data

def features_hook_0(model, inp, output):
    global feat_0
    feat_0 = output

def features_hook_1(model, inp, output):
    global feat_1
    feat_1 = output

def features_hook_2(model, inp, output):
    global feat_2
    feat_2 = output

def features_hook_3(model, inp, output):
    global feat_3
    feat_3 = output

def features_hook_4(model, inp, output):
    global feat_4
    feat_4 = output

def features_hook_5(model, inp, output):
    global feat_5
    feat_5 = output

def features_hook_6(model, inp, output):
    global feat_6
    feat_6 = output

def features_hook_7(model, inp, output):
    global feat_7
    feat_7 = output

def features_hook_8(model, inp, output):
    global feat_8
    feat_8 = output

def features_hook_9(model, inp, output):
    global feat_9
    feat_9 = output

def features_hook_10(model, inp, output):
    global feat_10
    feat_10 = output

def features_hook_11(model, inp, output):
    global feat_11
    feat_11 = output

features_hooks = [features_hook_0, features_hook_1, features_hook_2, 
                    features_hook_3, features_hook_4, features_hook_5, 
                    features_hook_6, features_hook_7, features_hook_8, 
                    features_hook_9, features_hook_10, features_hook_11]



feat_hook = [model.base_model.encoder.layer[i].register_forward_hook(features_hooks[i]) for i in range(12)]

In [None]:
def get_lattent_representation(input_data, model):
    """aggregating latent representation to create a unique representation vector
    """
    pipe = TextClassificationPipeline(
                                model=model, tokenizer=tokenizer
                                )
    pipe(input_data)
    feats = [feat_0[0], feat_1[0], feat_2[0], 
                feat_3[0], feat_4[0], feat_5[0], 
                feat_6[0], feat_7[0], feat_8[0], 
                feat_9[0], feat_10[0], feat_11[0]]
    aggregated_features = torch.mean(torch.stack([i[0, 0, :] for i in feats]), dim= 0)

    return aggregated_features

###### IF THE DISTRIBUTION IS NOT COMPLETED ######

# distrib =[] 
# fail = []
# for i in tqdm(range(len(dataset['train']))):
#     if i%100 == 0:
#         print(i)
#     try:
#         distrib.append(get_lattent_representation(dataset['train'][i]['text'], model))
#     except:
#         fail.append(i)

# def process_distrib(distrib):
#     return np.vstack([distrib[i].numpy().flatten() for i in range(len(distrib))])

# distrib = process_distrib(distrib)
# pd.DataFrame(distrib).to_csv('distrib_.csv', index = False)




###### IF THE DISTRIBUTION IS ALREADY COMPLETED ######


distrib = np.array(pd.read_csv('distrib_first.csv'))
# distrib = distrib[:, 1:]
assert distrib.shape[1] == 768 # checking the shape : vectors must have a 768 size

In [None]:
def get_logits(input_data, model):
    pipe = TextClassificationPipeline(
                                model=model, tokenizer=tokenizer
                                )
    res = pipe(input_data)
    return res[0]['score']


In [None]:
def get_lattent_last_layer(input_data, model):
    pipe = TextClassificationPipeline(
                            model=model, tokenizer=tokenizer
                                )
    pipe(input_data)
    feat = feat_11[0]

    return feat[0][0]

###### IF THE DISTRIBUTION IS NOT COMPLETED ######

distrib_last_layer = [] 
fail = []
for i in tqdm(range(len(dataset['train']))):
    if i%100 == 0:
        print(i)
    try:
        distrib_last_layer.append(get_lattent_last_layer(dataset['train'][i]['text'], model))
    except:
        print(i)
        fail.append(i)

def process_distrib(distrib):
    return np.vstack([distrib[i].numpy().flatten() for i in range(len(distrib))])

distrib_last_layer = process_distrib(distrib_last_layer)
pd.DataFrame(distrib_last_layer).to_csv('distrib_last_layer.csv', index = False)




###### IF THE DISTRIBUTION IS ALREADY COMPLETED ######


distrib_last_layer = np.array(pd.read_csv('distrib_last_layer.csv'))
# distrib = distrib[:, 1:]
assert distrib.shape[1] == 768 # checking the shape : vectors must have a 768 size

## Distance functions

In [None]:
# computing variables for Mahalanobis score

cov_matrix = np.cov(distrib.T)
precision_matrix = np.linalg.inv(cov_matrix)
esperance_vector = np.mean(distrib, axis = 0)

def D_M(x, esperance = esperance, precision = precision_mat):
    """Mahalanobis distance
    """
    v = x - esperance
    u = 1 + (v.T @ precision @ v)
    return -1 / u

In [None]:
def MSP(x):
    """Max softmax proba
    x is the max logits value
    """
    return 1 - x

In [None]:
# Last layer mahalanobis score

cov_matrix = np.cov(distrib.T)
precision_matrix = np.linalg.inv(cov_matrix)
esperance_vector = np.mean(distrib, axis = 0)

def D_M(x, esperance = esperance, precision = precision_mat):
    """Mahalanobis distance
    """
    v = x - esperance
    u = 1 + (v.T @ precision @ v)
    return -1 / u

## Computing In and Out distances 

In [None]:
N = min([len(out_dataset['test']), len(dataset['test'])])

In [None]:
N = 10

In [None]:
distances = {
            'maha' : {'in' : [], 'out' : []},
            'MSP' : {'in' : [], 'out' : []},
            }

for i in tqdm(range(N)):
    input_data = out_dataset['test'][i]['sentence']

    x = get_lattent_representation(input_data, model)
    distances['maha']['out'].append(D_M(x.numpy()))

    logit = get_logits(input_data, model)
    distances['MSP']['out'].append(MSP(logit))

for i in tqdm(range(N)):
    try:
        input_data = dataset['test'][i]['text']

        x = get_lattent_representation(input_data, model)
        distances['maha']['in'].append(D_M(x.numpy()))

        logit = get_logits(input_data, model)
        distances['MSP']['in'].append(MSP(logit))

    except:
        pass

## Results

In [None]:
px.histogram(
            distances['maha']['out'] + distances['maha']['in'], 
            color = ['out'] * len(distances['maha']['out']) + ['in'] * len(distances['maha']['in']), 
            template = 'none'
            )

In [None]:
def error(in_distances, out_distances, threshold):
    v = np.sum([i >= threshold for i in in_distances]) + np.sum([i < threshold for i in out_distances])
    return v / (len(out_distances) + len(in_distances))

def compute_best_threshold(in_distances, out_distances, n_candidates = 1000):

    candidates = np.linspace(min(in_distances), max(out_distances), n_candidates)

    errors = [error(in_distances, out_distances, i) for i in candidates]
    best_index = np.argmin(errors)
    return (candidates[best_index], errors[best_index])


In [None]:
compute_best_threshold(distances['maha']['in'], distances['maha']['out'])

In [None]:
compute_best_threshold(distances['MSP']['in'], distances['MSP']['out'])

## Some tries

In [None]:
def generate_sphere_point(ndim):
    vec = np.random.randn(ndim)
    vec /= np.linalg.norm(vec, axis=0)
    return vec

def compute_minimum_value(x, distrib, u_k):
    positive_rate = np.mean(np.array([np.dot(u_k, distrib[i] - x) for i in range(len(distrib))]) > 0)
    return min(positive_rate, 1 - positive_rate)

def D(x, distrib, n_proj = 10):
    u = [generate_sphere_point(x.shape[0]) for _ in range(n_proj)]
    vector_of_minimums = [compute_minimum_value(x, distrib, u_k) for u_k in u]
    return np.mean(vector_of_minimums)

In [None]:
def D_(x, distrib, n_proj = 100):
    u = [generate_sphere_point(x.shape[0]) for _ in range(n_proj)]
    vector_of_minimums = [compute_minimum_value(x, distrib, u_k) for u_k in u]
    return (vector_of_minimums)

x = np.random.random(distrib[0].shape)
a = D_(x, distrib, 1000)

px.line([np.mean(a[:i]) for i in range(1, len(a))], template = 'none')