# Entropia criterion : reg_mul = 0 (no regularization)

In [13]:
# preparation of the environment
%load_ext autoreload
%autoreload 2

import os
from os import path
# set the repository to the git repository
cwd = os.getcwd().split(os.path.sep)
while cwd[-1] != "stage_4_gm":
    os.chdir("..")
    cwd = os.getcwd().split(os.path.sep)
print(">> the git rep : ", end="")
print(os.getcwd())

# the folder where we will save our data
foler_name = "reg_mul=0.002"
plots_folder = os.path.join(os.getcwd(), '.cache', 'plots')
graph_folder = path.join(plots_folder, foler_name)
if not path.exists(graph_folder):
    os.mkdir(graph_folder)

print(f">> the plots location : {graph_folder}")

import pandas as pd
import torch
import numpy as np
import pickle
from scipy.stats import entropy
from tqdm import tqdm
from tabulate import tabulate
from attention_algorithms.attention_metrics import attention_score

from regularize_training_bert import BertNliRegu
from custom_data_set import SnliDataset
from custom_data_set import test_dir, dev_dir
from torch.utils.data import DataLoader

# --> from this environment
from attention_algorithms.raw_attention import RawAttention
from attention_algorithms.attention_metrics import normalize_attention

ckp = path.join(".cache", "logs", "igrida_trained", "reg_mul=0.002", "best.ckpt")
model = BertNliRegu.load_from_checkpoint(ckp)
model = model.eval()  # make sure to have the model in eval mod before using it

# load the data >> without the neutral labels
data_set = SnliDataset(dir=test_dir,
                       nb_sentences=1000,
                       msg=False,
                       keep_neutral=False)
data_loader = DataLoader(data_set, batch_size=1000, shuffle=False)

sentences, masks, train_labels = next(iter(data_loader))

e_snli_data = pd.read_csv(os.path.join('.cache', 'raw_data', 'e_snli', 'cleaned_data', 'test.csv'), sep=",")\
    [["tok_sent", "hg_goal", "label"]]

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
>> the git rep : C:\Users\loicf\Documents\IRISA\stage_4_gm\stage_4_gm
>> the plots location : C:\Users\loicf\Documents\IRISA\stage_4_gm\stage_4_gm\.cache\plots\reg_mul=0.002


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## The different metrics

In [14]:
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score, average_precision_score, auc

In [15]:
def jaccard(y, y_hat):
    num = np.dot(y,y_hat)
    den = np.sum(y) + np.sum(y_hat) - np.dot(y, y_hat)
    return num/den

## Create the entropia map

In [16]:
pur_attention, Y_test = None, None
# the inference dict will contain all the different values of the attention for the different head and layer
# the attention is only the agregation of the different lines
dir = os.path.join(graph_folder, "inference_dict.pickle")

if os.path.exists(dir) and os.path.getsize(dir) != 0:
    print(">> the files already exist in the environment >> loading the files ...", end="")
    with open(os.path.join(graph_folder, "inference_dict.pickle"), "rb") as f:
        pur_attention, Y_test = pickle.load(f)
    print(" loading finished")
else :
    print(">> the file doesn't exists >> downloading ...")
    with open(os.path.join(graph_folder, "inference_dict.pickle"), "wb") as f:
        pur_attention, Y_test, _ = attention_score(sentences=sentences, masks=masks,
                                                   e_snli_data=e_snli_data,
                                                   model=model, TR_q=0,
                                                   quantiles_calc=False)
        print(">> end downloading")
        pickle.dump([pur_attention, Y_test], f)

>> the files already exist in the environment >> loading the files ... loading finished


In [17]:
# the entropia map is only here
entropy_map = np.zeros((12, 12))
for l in range(12):
    for h in range(12):
        preds = pur_attention[f"layer_{l}"][f"head_{h}"]
        entropy_map[l, h] = entropy(pk=np.exp(np.array(preds))/sum(np.exp(np.array(preds))))

## Functions for the Study

In [18]:
def entropi_criterion(ent_tr,sentences=sentences, masks=masks, msg = False):
    y_hat = []
    y = []
    for i in tqdm(range(len(sentences))):
        sent = sentences[i, :].clone().detach()[None, :]
        mk = masks[i, :].clone().detach()[None, :]
        raw_attention_inst = RawAttention(model=model,
                                          input_ids=sent,
                                          attention_mask=mk,
                                          test_mod=False
                                          )

        # search for the right sentence in the snli dataset
        j = 0
        while j < e_snli_data.shape[0] and raw_attention_inst.tokens != eval(e_snli_data["tok_sent"][j]):
            j += 1

        try :
            # update the annotation list
            annot = eval(e_snli_data.hg_goal[j])
            y += annot

            # construction of the attention_score based on the SEP criterion
            attention_score = torch.zeros(len(raw_attention_inst.tokens))
            for layer in range(12):
                for head in range(12):
                    if entropy_map[layer, head] <= ent_tr: # this is the criterion to proceed the head selection
                        buff = raw_attention_inst.attention_tensor[0, layer, head, :, :].sum(dim=0)
                        attention_score += buff
            # transform to a list
            attention_score = list(normalize_attention(tokens=raw_attention_inst.tokens,
                                                       attention=attention_score).detach().numpy())
            y_hat += attention_score


        except Exception as e:
            if msg :
                print(e)
            else :
                pass
    return {"y" : y, "y_hat": y_hat}

In [19]:
def create_row(y, y_hat, metrics):
    row = []
    tr = np.linspace(0, 1, 50)
    for m in metrics :
        if m != "roc_auc_score" and m != "jaccard" and m != "average_precision_score":
            ar = [] # --> calculus of the metrics
            for t in tr:
                buff = 1 * (np.array(y_hat)>=t)
                ar.append(eval(m)(y, buff))
            row.append(auc(tr, ar))
        else:
            row.append(eval(m)(y, y_hat))

    return row

In [20]:
def create_eraser_row(y, y_hat, metrics):
    row = []
    tr = 0.5
    for m in metrics :
        if m != "roc_auc_score":
            buff = 1 * (np.array(y_hat)>=0.5)
            row.append(eval(m)(y, buff))
        else:
            row.append(eval(m)(y, y_hat))

    return row

## Calculus for multiple Threshold

In [21]:
y, y_hat = None, None
rows = []
evaluation_metrics = ["f1_score", "precision_score", "recall_score", "roc_auc_score", "jaccard", "average_precision_score"]

# set the header for the table of statistics
h1 = ["AU_"+x+"_curve" for x in ["f1", "PR", "RC"]]

h2 = ["AUROC", "jaccard", "AUPRC"]
rows.append(["ent_tr"] + h1 + h2)


for ent_tr in np.linspace(entropy_map.min(), entropy_map.max(), 7):
    dir = os.path.join(graph_folder, f"ent_{ent_tr}_criterion.pickle")

    if os.path.exists(dir) and os.path.getsize(dir) != 0:
        print(">> the files already exist in the environment >> loading the files ...", end="")
        with open(dir, "rb") as f:
            d = pickle.load(f)
            y = d["y"]
            y_hat = d["y_hat"]
        print(" loading finished")
    else :
        with open(dir, "wb") as f:
            print(">> the file doesn't exist >> downloading >> ", end=" ")
            d = entropi_criterion(ent_tr=ent_tr, msg=False)
            print(" >> downloading finished !")
            y = d["y"]
            y_hat = d["y_hat"]
            pickle.dump(d, f)

    rows.append([ent_tr] + create_row(y, y_hat, evaluation_metrics))

>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished


In [22]:
print(tabulate(rows, headers="firstrow"))

  ent_tr    AU_f1_curve    AU_PR_curve    AU_RC_curve     AUROC    jaccard     AUPRC
--------  -------------  -------------  -------------  --------  ---------  --------
 10.1353       0.277611       0.289849       0.34004   0.592868   0.18368   0.286644
 10.1399       0.333921       0.376177       0.390466  0.658553   0.222261  0.357765
 10.1444       0.250123       0.446215       0.22595   0.704314   0.167205  0.395579
 10.149        0.263649       0.448803       0.262131  0.726838   0.183716  0.420618
 10.1535       0.319732       0.444376       0.374758  0.726033   0.22541   0.422264
 10.158        0.325434       0.435212       0.404803  0.724478   0.230435  0.420115
 10.1626       0.332867       0.429298       0.431393  0.726096   0.235817  0.421236


In [23]:
from openpyxl import load_workbook, Workbook
from openpyxl.worksheet.table import Table, TableStyleInfo

xls_dir = os.path.join(plots_folder, "dash_board.xlsx")
wb = None
if os.path.exists(xls_dir):
    wb = load_workbook(xls_dir)
else :
    wb = Workbook()

ws = None
if "reg_mul=0.002" not in wb.sheetnames:
    ws = wb.create_sheet("reg_mul=0.002")

ws = wb["reg_mul=0.002"] # get the work sheet

ws.append(["Entropia metric score"])
for r in rows :
    ws.append(r)

wb.save(xls_dir)
wb.close()

## Table for the ERASER Benchmark

In [11]:
rows = []

evaluation_metrics = ["f1_score", "precision_score", "recall_score", "roc_auc_score", "jaccard"]
h = ["f1_score", "precision_score", "recall_score", "roc_auc_score", "IOU"]

rows.append(["ent_tr"] + h)

for ent_tr in np.linspace(entropy_map.min(), entropy_map.max(), 7):
    y, y_hat = None, None
    dir = os.path.join(graph_folder, f"ent_{ent_tr}_criterion.pickle")
    if os.path.exists(dir) and os.path.getsize(dir) != 0:
        print(">> the files already exist in the environment >> loading the files ...", end="")
        with open(dir, "rb") as f:
            d = pickle.load(f)
            y = d["y"]
            y_hat = d["y_hat"]
        print(" loading finished")
    else :
        with open(dir, "wb") as f:
            print(">> the file doesn't exist >> downloading >> ", end=" ")
            d = entropi_criterion(ent_tr=ent_tr ,msg=False)
            print(" >> downloading finished !")
            y = d["y"]
            y_hat = d["y_hat"]
            pickle.dump(d, f)

    rows.append([ent_tr] + create_eraser_row(y, y_hat, evaluation_metrics))


>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished
>> the files already exist in the environment >> loading the files ... loading finished


In [12]:
print(tabulate(rows, headers="firstrow"))

  ent_tr    f1_score    precision_score    recall_score    roc_auc_score       IOU
--------  ----------  -----------------  --------------  ---------------  --------
 10.1353    0.289514           0.289067        0.289963         0.592868  0.169259
 10.1399    0.358635           0.374179        0.344331         0.658553  0.218498
 10.1444    0.196892           0.455107        0.12562          0.704314  0.109196
 10.149     0.202762           0.465           0.129647         0.726838  0.112818
 10.1535    0.323947           0.548411        0.229864         0.726033  0.193279
 10.158     0.341698           0.550017        0.247831         0.724478  0.206053
 10.1626    0.363524           0.539305        0.274164         0.726096  0.222139
