# M1 - 130 - Compute metrics

Using by levels scores computed during the fine-tuning :
* compute evaluation on joint-labels
* compute evaluation on all entities

In [None]:
import os, sys
from pathlib import Path

ENV_IS_GOOGLE_COLAB = True if 'google.colab' in str(get_ipython()) else False
os.environ["ENV_IS_GOOGLE_COLAB"] = str(ENV_IS_GOOGLE_COLAB)

if ENV_IS_GOOGLE_COLAB:
  from google.colab import drive
  mountpoint = Path("/content/drive")
  drive.mount(str(mountpoint)) # Mount gdrive to BASE
  base = mountpoint / "MyDrive/article_icdar_2023" # Adapt this to your situation
  sys.path.append(str(base)) # Add BASE to Python Path
  BASE = Path(base).resolve() # Make BASE absolute
  DATASETS =  BASE / "dataset_ICDAR"
  OUT_BASE = BASE / "res_ICDAR/method_1"
else:
  BASE = Path().resolve() # Directory of this approach
  #Adapt this to your situation
  DATASETS = Path('../dataset_ICDAR').resolve() #Where your data are located befor Dataset object creation
  OUT_BASE = Path('../res_ICDAR/method_1').resolve() #Where you save the results of this notebook

print(sys.path)
print(BASE)
print(DATASETS)
print(OUT_BASE)

In [None]:
import glob

ls = sorted(glob.glob(f"{OUT_BASE}/*_metrics/*"))
ls

## Paths

In [None]:
#Please choose index in ls list to compute complete metrics on initial results
i = 0
dir_ = ls[i]

In [None]:
if 'iob2' in dir_:
    FORMAT = "IOB2"
else:
    FORMAT = "IO"

In [None]:
import os
from pathlib import Path
from config import logger

TRAINSETS_SIZES = [6084] #To train only on the biggest dataset

# INPUT / OUTPUT DIRS
METRICS_OUTPUT_DIR = dir_
METRICS_OUTPUT_DIR

In [None]:
import json
import glob

RUNS = sorted(glob.glob(f"{METRICS_OUTPUT_DIR}/run_*"))
L1_test = []
L2_test = []
for r in RUNS:
    l1 = glob.glob(f"{r}" + "/level-1/test_*")
    L1_test.append(l1[0])
    l2 = glob.glob(f"{r}" + "/level-2/test_*")
    L2_test.append(l2[0])
L1_test,L2_test

## Prepare data
Align L1 and L2 predictions and labels

In [None]:
def cleanLists(res):
    entries = []
    for entry in res:
        entry = entry.split(", ")
        ftag = []
        for tag in entry:
            tag = tag.replace("'",'')
            ftag.append(tag)
        entries.append(ftag)
    return entries

In [None]:
results = {} #results[run][set][entree]   set : predictionsl1 labelsl1 predictionsl2 labelsl2

#for each run
for i in range(len(L1_test)):
    # Opening JSON file
    f1 = open(L1_test[i])
    f2 = open(L2_test[i])
    # returns JSON object as
    # a dictionary
    data1 = json.load(f1)
    data2 = json.load(f2)
    
    f1.close()
    f2.close()
    
    predictions1 = data1['eval_predictions'][2:-2]
    predictions1 = predictions1.split('], [')
    labels1 = data1['eval_labels'][2:-2]
    labels1 = labels1.split('], [')
    predictions2 = data2['eval_predictions'][2:-2]
    predictions2 = predictions2.split('], [')
    labels2 = data2['eval_labels'][2:-2]
    labels2 = labels2.split('], [')
    
    fpredictions1 = cleanLists(predictions1)
    fpredictions2 = cleanLists(predictions2)
    flabels1 = cleanLists(labels1)
    flabels2 = cleanLists(labels2)
    
    results[f"run_{i+1}"] = {"gold_l1":flabels1,"gold_l2":flabels2,"predictions_l1":fpredictions1,"predictions_l2":fpredictions2}

In [None]:
results.keys()

In [None]:
results["run_1"].keys()

## Compute joint-labels

In [None]:
def createJointLabels(list1,list2):
    entries = []
    for i,(entryl1,entryl2) in enumerate(zip(list1,list2)):
        assert len(entryl1) == len(entryl2)
        new_tags = []
        for j in range(len(entryl1)):
            tag = entryl1[j] + '+' + entryl2[j]
            tag = tag.replace('+I-','+i_')
            tag = tag.replace('I-','I-i_')
            tag = tag.replace('+B-','+b_')
            tag = tag.replace('B-','I-b_')
            new_tags.append(tag)
        entries.append(new_tags)
    return entries

In [None]:
i = 20
jl_gold = createJointLabels(results["run_1"]["gold_l1"],results["run_1"]["gold_l2"])
jl_preds = createJointLabels(results["run_1"]["predictions_l1"],results["run_1"]["predictions_l2"])
print(jl_gold[i])
print(jl_preds[i])

In [None]:
def createL1L2Labels(list1,list2):
    entries = []
    for i,(entryl1,entryl2) in enumerate(zip(list1,list2)):
        assert len(entryl1) == len(entryl2)
        new_tags = []
        for j in range(len(entryl1)):
            if 'O' in entryl2[j]:
                prefixe_l2 = ''
                tag_l2 = 'O'
            else:
                prefixe_l2, tag_l2 = entryl2[j].split('-')
                
            if 'O' in entryl1[j]:
                prefixe_l1 = ''
                tag_l1 = 'O'
            else:
                prefixe_l1, tag_l1 = entryl1[j].split('-')
                
            if prefixe_l2 == '' and prefixe_l1 != '':
                new_tags.append(prefixe_l1 + '-' + tag_l2 + '+' + tag_l1)
            elif prefixe_l2 == '' and prefixe_l1 == '':
                new_tags.append('O+O')
            else:
                new_tags.append(prefixe_l2 + '-' + tag_l2 + '+' + tag_l1)
                
        entries.append(new_tags)
    return entries

In [None]:
l1l2_gold = createL1L2Labels(results["run_1"]["gold_l1"],results["run_1"]["gold_l2"])
l1l2_preds = createL1L2Labels(results["run_1"]["predictions_l1"],results["run_1"]["predictions_l2"])
print(l1l2_gold[i])
print(l1l2_preds[i])

In [None]:
def createL1L2Labels(jlentries):
    l1 = []
    l2 = []
    l1_p = []
    l2_p = []
    res = []
    for tags in jlentries:
        new_tags = []
        #print(tags)
        for elem in tags:
            if elem[0] == 'I' and '+O' not in elem:
                parts = elem.split('+')
                e1 = parts[0][4:]
                p1 = parts[0][2:3].upper()
                e2 = parts[1][2:]
                p2 = parts[1][:1].upper()
                tag = p2 + '-' + e2 + '+' + e1
            elif elem[0] == 'I' and elem[-1] == 'O':
                parts = elem.split('+')
                e1 = parts[0][4:]
                p1 = parts[0][2:3].upper()
                e2 = 'O'
                p2 = parts[1][:1].upper()
                tag = p1 + '-' + e2 + '+' + e1
            elif elem[0] == 'O' and len(elem) > 3:
                parts = elem.split('+')
                e1 = 'O'
                p1 = ''
                e2 = parts[1][2:]
                p2 = parts[1][:1].upper()
                tag = p2 + '-' + e2 + '+' + e1
                #print(e1,p1,e2,p2)
            else:
                e1 = 'O'
                p1 = ''
                e2 = 'O'
                p2 = ''
                tag = e2 + '+' + e1
            new_tags.append(tag)
        res.append(new_tags)
    return res

In [None]:
def createFlatLabels(jl_gold,jl_preds,FORMAT):
    
    LABELS_ID_TO_DAS = []
    if FORMAT == 'IO':
        LABELS_ID = {
            "O+O" : 0,
            "I-i_PER+O" : 1,
            "I-i_PER+i_TITREH" : 2,
            "I-i_ACT+O" : 3,
            "I-i_DESC+O" : 4,
            "I-i_DESC+i_ACT" : 5,
            "I-i_DESC+i_TITREP" : 6,
            "I-i_SPAT+O" : 7,
            "I-i_SPAT+i_LOC" : 8,
            "I-i_SPAT+i_CARDINAL" : 9,
            "I-i_SPAT+i_FT" : 10,
            "I-i_TITRE+O" : 11
        }
        LABELS_ID_TO_DAS =["O","I-PER","I-TITRE","I-ACT","O","I-ACT","I-TITRE","O","I-LOC","I-CARDINAL","I-FT","I-TITRE"]
        
    elif FORMAT == "IOB2":
        LABELS_ID = {
            "O+O" : 0,
            "I-b_PER+O" : 1,
            "I-i_PER+O" : 2,
            "I-b_PER+b_TITREH" : 3,
            "I-i_PER+b_TITREH" : 4,
            "I-i_PER+i_TITREH" : 5,
            "I-b_ACT+O" : 6,
            "I-i_ACT+O" : 7,
            "I-b_DESC+O" : 8,
            "I-i_DESC+O" : 9,
            "I-b_DESC+b_ACT" : 10,
            "I-i_DESC+b_ACT" : 11,
            "I-i_DESC+i_ACT" : 12,
            "I-b_DESC+b_TITREP" : 13,
            "I-i_DESC+b_TITREP" : 14,
            "I-i_DESC+i_TITREP" : 15,
            "I-b_SPAT+O" : 16,
            "I-i_SPAT+O" : 17,
            "I-b_SPAT+b_LOC" : 18,
            "I-i_SPAT+b_LOC" : 19,
            "I-i_SPAT+i_LOC" : 20,
            "I-b_SPAT+b_CARDINAL" : 21,
            "I-i_SPAT+b_CARDINAL" : 22,
            "I-i_SPAT+i_CARDINAL" : 23,
            "I-b_SPAT+b_FT" : 24,
            "I-i_SPAT+b_FT" : 25,
            "I-i_SPAT+i_FT" : 26,
            "I-b_TITRE+O" : 27,
            "I-i_TITRE+O" : 28
        }
        LABELS_ID_TO_DAS = ['O','I-PER','I-PER','I-TITRE','I-TITRE','I-TITRE','I-ACT','I-ACT','O','O','I-ACT','I-ACT','I-ACT','I-TITRE','I-TITRE','I-TITRE','O','O','I-LOC','I-LOC','I-LOC','I-CARDINAL','I-CARDINAL','I-CARDINAL','I-FT','I-FT','I-FT','I-TITRE','I-TITRE']
    
    das_labels = []
    for entry in jl_gold:
        das_entry = []
        for elem in entry:
            das_label = LABELS_ID_TO_DAS[LABELS_ID[elem]]
            das_entry.append(das_label)
        das_labels.append(das_entry)
    
    das_predictions = []
    for entry in jl_preds:
        das_entry = []
        for elem in entry:
            try:
                das_label = LABELS_ID_TO_DAS[LABELS_ID[elem]]
                das_entry.append(das_label)
            except:
                das_label = 'I-NO'
                das_entry.append(das_label)
        das_predictions.append(das_entry)
        
    return das_labels, das_predictions

In [None]:
from datasets import load_metric
from multihead_utils.tools import unique

for i in range(0,len(L1_test)):
    r = i+1
    ################ L1
    metric_l1 = load_metric("seqeval")
    results_l1 = metric_l1.compute(predictions=results[f"run_{r}"]["predictions_l1"], references=results[f"run_{r}"]["gold_l1"])
    
    ################ L2
    metric_l2 = load_metric("seqeval")
    results_l2 = metric_l2.compute(predictions=results[f"run_{r}"]["predictions_l2"], references=results[f"run_{r}"]["gold_l2"])
    
    ################ Joint-labels metrics (including prefixes)
    jl_predictions = createJointLabels(results[f"run_{r}"]["predictions_l1"],results[f"run_{r}"]["predictions_l2"]) #Gold
    jl_labels = createJointLabels(results[f"run_{r}"]["gold_l1"],results[f"run_{r}"]["gold_l2"]) #Predictions
    
    metric_jl = load_metric("seqeval")
    results_jl = metric_jl.compute(predictions=jl_predictions, references=jl_labels)
    
    #Save the joint-labels
    #jointlabelslist = unique(jl_predictions)
    
    ################ L1+l2
    if FORMAT == 'IOB2':
        l1l2_predictions = createL1L2Labels(jl_predictions)
        l1l2_labels = createL1L2Labels(jl_labels)
    else:
        l1l2_predictions = jl_predictions
        l1l2_labels = jl_labels
    
    metric_l1l2 = load_metric("seqeval")
    results_l1l2 = metric_l1l2.compute(predictions=l1l2_predictions, references=l1l2_labels)
    
    ################ DAS
    das_labels, das_predictions = createFlatLabels(jl_gold,jl_preds,FORMAT)
    
    metric_das = load_metric("seqeval")
    results_das = metric_das.compute(predictions=das_predictions, references=das_labels)
    
    ################# Global metrics
    all_preds = results[f"run_{r}"]["predictions_l1"] + results[f"run_{r}"]["predictions_l2"]
    all_labels = results[f"run_{r}"]["gold_l1"] + results[f"run_{r}"]["gold_l2"]

    metric_all = load_metric("seqeval")
    results_all = metric_all.compute(predictions=all_preds, references=all_labels)
    
    scores = {
        #Global results (L1 and L2)
        "eval_precision-all": results_all["overall_precision"],
        "eval_recall-all": results_all["overall_recall"],
        "eval_f1-all": results_all["overall_f1"],
        "eval_accuracy-all": results_all["overall_accuracy"],
        #L1
        "eval_precision-l1": results_l1["overall_precision"],
        "eval_recall-l1": results_l1["overall_recall"],
        "eval_f1-l1": results_l1["overall_f1"],
        "eval_accuracy-l1": results_l1["overall_accuracy"],
        #L2
        "eval_precision-l2": results_l2["overall_precision"],
        "eval_recall-l2": results_l2["overall_recall"],
        "eval_f1-l2": results_l2["overall_f1"],
        "eval_accuracy-l2": results_l2["overall_accuracy"],
        #Joint-labels (includign prefixes)
        "eval_precision": results_jl["overall_precision"],
        "eval_recall": results_jl["overall_recall"],
        "eval_f1": results_jl["overall_f1"],
        "eval_accuracy": results_jl["overall_accuracy"],
        #L1+L2
        "eval_precision-l1l2": results_l1l2["overall_precision"],
        "eval_recall-l1l2": results_l1l2["overall_recall"],
        "eval_f1-l1l2": results_l1l2["overall_f1"],
        "eval_accuracy-l1l2": results_l1l2["overall_accuracy"],
        #DAS
        "eval_precision-das": results_das["overall_precision"],
        "eval_recall-das": results_das["overall_recall"],
        "eval_f1-das": results_das["overall_f1"],
        "eval_accuracy-das": results_das["overall_accuracy"],
        #"joint-labels-list":f"{jointlabelslist}",
        #By class
        "eval_PER": results_all['PER'],
        "eval_ACT": results_all['ACT'],
        "eval_ACT_L1": results_l1['ACT'],
        "eval_ACT_L2": results_l2['ACT'],
        "eval_DESC": results_all['DESC'],
        "eval_TITREH": results_all['TITREH'],
        "eval_TITREP": results_all['TITREP'],
        "eval_SPAT": results_all['SPAT'],
        "eval_LOC": results_all['LOC'],
        "eval_CARDINAL": results_all['CARDINAL'],
        "eval_FT": results_all['FT']
    }
    
    #if 'TITRE' in list(results_all.keys()):
        #scores["TITRE"] = f"{results_all['eval_TITRE']}"
    print(scores)
    if not os.path.isdir(f"{METRICS_OUTPUT_DIR}/run_{r}"):
        os.mkdir(f"{METRICS_OUTPUT_DIR}/run_{r}")

    with open(f"{METRICS_OUTPUT_DIR}/run_{r}/test_{str(TRAINSETS_SIZES[0])}.json", 'w') as fp:
        json.dump(scores, fp, default=str)