# Figures and metrics

Evaluation scores on level-1 entities segmentation and classification with joint-labels method

In [None]:
import os, sys
from pathlib import Path

ENV_IS_GOOGLE_COLAB = True if 'google.colab' in str(get_ipython()) else False
os.environ["ENV_IS_GOOGLE_COLAB"] = str(ENV_IS_GOOGLE_COLAB)

if ENV_IS_GOOGLE_COLAB:
  from google.colab import drive
  mountpoint = Path("/content/drive")
  drive.mount(str(mountpoint)) # Mount gdrive to BASE
  base = mountpoint / "MyDrive/article_icdar_2023" # Adapt this to your situation
  sys.path.append(str(base)) # Add BASE to Python Path
  BASE = Path(base).resolve() # Make BASE absolute
  DATASETS =  BASE / "dataset_ICDAR"
  OUT_BASE = BASE / "res_ICDAR"
else:
  BASE = Path().resolve() # Directory of this approach
  #Adapt this to your situation
  DATASETS = Path('../dataset_ICDAR').resolve() #Where your data are located befor Dataset object creation
  OUT_BASE = Path('../res_ICDAR').resolve() #Where you save the results of this notebook

print(sys.path)
print(BASE)
print(DATASETS)
print(OUT_BASE)

## Constants

In [None]:
MAX_TRAINSET_SIZE = 6084

## Tools

In [None]:
import pandas as pd
import re
import os
import json

def compile_metrics(path): 
    rundirs = [f for f in os.listdir(path)]

    df = pd.DataFrame()
    for run_dir in rundirs:
        if 'run' in run_dir:
            run_path = path / run_dir
            nrun = re.search("\d+",run_dir)[0]

            files = [f for f in os.listdir(run_path) if "test_" in f and 'json' in f]
            sizes = [int(re.search("\d+",f)[0]) for f in files]

            for file, size in zip(files,sizes):
                file_path = run_path / file
                dftmp = pd.read_json(file_path, typ='series')
                dftmp = pd.DataFrame([dftmp])
                dftmp["trainsize"] = size 
                dftmp["run"] = nrun
                dftmp["trainsize_p"] = round(100 * size / MAX_TRAINSET_SIZE, 1)
                df = pd.concat([df, dftmp])

    return df.groupby(["run","trainsize"]).first()

# Load metrics

## Method 1

In [None]:
METRICS_DIR_REF_M1 = OUT_BASE / "method_1/m1-110-experiment_1_metrics"

In [None]:
# Load Camembert IO metrics from metrics jsons
camembert_io_ref_m1 = compile_metrics(METRICS_DIR_REF_M1 / "111-camembert-ner-multihead-io")
camembert_iob2_ref_m1 = compile_metrics(METRICS_DIR_REF_M1 / "112-camembert-ner-multihead-iob2")
prtn_camembert_io_ref_m1 = compile_metrics(METRICS_DIR_REF_M1 / "113-pretrained-camembert-ner-multihead-io")
prtn_camembert_iob2_ref_m1 = compile_metrics(METRICS_DIR_REF_M1 / "114-pretrained-camembert-multihead-iob2")
metrics_raw_m1_ref = pd.concat([camembert_io_ref_m1,camembert_iob2_ref_m1,prtn_camembert_io_ref_m1,prtn_camembert_iob2_ref_m1], keys=["CmBERT IO", "CmBERT IOB2", "CmBERT+ptrn IO", "CmBERT+ptrn IOB2"],names=["Test"])
metrics_raw_m1_ref.head()

In [None]:
METRICS_DIR_OCR_M1 = OUT_BASE / "method_1/m1-120-experiment_2_metrics"

In [None]:
# Load Camembert IO metrics from metrics jsons
camembert_io_ocr_m1 = compile_metrics(METRICS_DIR_OCR_M1 / "121-camembert-ner-multihead-io")
camembert_iob2_ocr_m1 = compile_metrics(METRICS_DIR_OCR_M1 / "122-camembert-ner-multihead-iob2")
prtn_camembert_io_ocr_m1 = compile_metrics(METRICS_DIR_OCR_M1 / "123-pretrained-camembert-ner-multihead-io")
prtn_camembert_iob2_ocr_m1 = compile_metrics(METRICS_DIR_OCR_M1 / "124-pretrained-camembert-multihead-iob2")
metrics_raw_m1_ocr = pd.concat([camembert_io_ocr_m1,camembert_iob2_ocr_m1,prtn_camembert_io_ocr_m1,prtn_camembert_iob2_ocr_m1], keys=["CmBERT IO", "CmBERT IOB2", "CmBERT+ptrn IO", "CmBERT+ptrn IOB2"],names=["Test"])
metrics_raw_m1_ocr.head()

## Methods 2 and 3

In [None]:
def loadM2M3IOMetrics(path,model_name,res,keys):
    if os.path.exists(f"{path}/run_2"):
        df = compile_metrics(path)
        df["eval_precision-l1l2"] = df["eval_precision"]
        df["eval_recall-l1l2"] = df["eval_recall"]
        df["eval_f1-l1l2"] = df["eval_f1"]
        df["eval_accuracy-l1l2"] = df["eval_accuracy"]
        res.append(df)
        keys.append(model_name)
    
def loadM2M3IOB2Metrics(path,model_name,res,keys):
    if os.path.exists(f"{path}/run_2"):
        df = compile_metrics(path)
        res.append(df)
        keys.append(model_name)

#### Method 2

In [None]:
METRICS_DIR_REF_M2 = OUT_BASE / "method_2/m2-210-experiment_1_metrics"

In [None]:
res_m2_ref = []
keys_m2_ref = []

camembert_ner_io_ref_m2 = METRICS_DIR_REF_M2 / "211-camembert-ner-joint-labelling-io"
loadM2M3IOMetrics(camembert_ner_io_ref_m2,"CmBERT IO",res_m2_ref,keys_m2_ref)

camembert_ner_iob2_ref_m2 = METRICS_DIR_REF_M2 / "212-camembert-ner-joint-labelling-iob2"
loadM2M3IOB2Metrics(camembert_ner_iob2_ref_m2,"CmBERT IOB2",res_m2_ref,keys_m2_ref)

ptrn_camembert_ner_io_ref_m2 = METRICS_DIR_REF_M2 / "213-pretrained-camembert-ner-joint-labelling-io"
loadM2M3IOMetrics(ptrn_camembert_ner_io_ref_m2,"CmBERT+ptrn IO",res_m2_ref,keys_m2_ref)

ptrn_camembert_ner_iob2_ref_m2 = METRICS_DIR_REF_M2 / "214-pretrained-camembert-ner-joint-labelling-iob2"
loadM2M3IOB2Metrics(ptrn_camembert_ner_iob2_ref_m2,"CmBERT+ptrn IOB2",res_m2_ref,keys_m2_ref)

In [None]:
metrics_raw_m2_ref = pd.concat(res_m2_ref, keys=keys_m2_ref,names=["Test"])
metrics_raw_m2_ref.head()

In [None]:
METRICS_DIR_OCR_M2 = OUT_BASE / "method_2/m2-220-experiment_2_metrics"

In [None]:
res_m2_ocr = []
keys_m2_ocr = []

camembert_ner_io_ocr_m2 = METRICS_DIR_OCR_M2 / "221-camembert-ner-joint-labelling-io"
loadM2M3IOMetrics(camembert_ner_io_ocr_m2,"CmBERT IO",res_m2_ocr,keys_m2_ocr)

camembert_ner_iob2_ocr_m2 = METRICS_DIR_OCR_M2 / "222-camembert-ner-joint-labelling-iob2"
loadM2M3IOB2Metrics(camembert_ner_iob2_ocr_m2,"CmBERT IOB2",res_m2_ocr,keys_m2_ocr)

ptrn_camembert_ner_io_ocr_m2 = METRICS_DIR_OCR_M2 / "223-pretrained-camembert-ner-joint-labelling-io"
loadM2M3IOMetrics(ptrn_camembert_ner_io_ocr_m2,"CmBERT+ptrn IO",res_m2_ocr,keys_m2_ocr)

ptrn_camembert_ner_iob2_ocr_m2 = METRICS_DIR_OCR_M2 / "224-pretrained-camembert-ner-joint-labelling-iob2"
loadM2M3IOB2Metrics(ptrn_camembert_ner_iob2_ocr_m2,"CmBERT+ptrn IOB2",res_m2_ocr,keys_m2_ocr)

In [None]:
metrics_raw_m2_ocr = pd.concat(res_m2_ocr, keys=keys_m2_ocr,names=["Test"])
metrics_raw_m2_ocr.head()

#### Method 3

In [None]:
METRICS_DIR_REF_M3 = OUT_BASE / "method_3/m3-310-experiment_1_metrics"

In [None]:
res_m3_ref = []
keys_m3_ref = []

camembert_ner_io_ref_m3 = METRICS_DIR_REF_M3 / "311-camembert-ner-hierarchical-loss-io"
loadM2M3IOMetrics(camembert_ner_io_ref_m3,"CmBERT IO",res_m3_ref,keys_m3_ref)

camembert_ner_iob2_ref_m3 = METRICS_DIR_REF_M3 / "312-camembert-ner-hierarchical-loss-iob2"
loadM2M3IOB2Metrics(camembert_ner_iob2_ref_m3,"CmBERT IOB2",res_m3_ref,keys_m3_ref)

ptrn_camembert_ner_io_ref_m3 = METRICS_DIR_REF_M3 / "313-pretrained-camembert-ner-hierarchical-loss-io"
loadM2M3IOMetrics(ptrn_camembert_ner_io_ref_m3,"CmBERT+ptrn IO",res_m3_ref,keys_m3_ref)

ptrn_camembert_ner_iob2_ref_m3 = METRICS_DIR_REF_M3 / "314-pretrained-camembert-ner-hierarchical-loss-iob2"
loadM2M3IOB2Metrics(ptrn_camembert_ner_iob2_ref_m3,"CmBERT+ptrn IOB2",res_m3_ref,keys_m3_ref)

In [None]:
metrics_raw_m3_ref = pd.concat(res_m3_ref, keys=keys_m3_ref,names=["Test"])
metrics_raw_m3_ref.head()

In [None]:
METRICS_DIR_OCR_M3 = OUT_BASE / "method_3/m3-320-experiment_2_metrics"

In [None]:
res_m3_ocr = []
keys_m3_ocr = []

camembert_ner_io_ocr_m3 = METRICS_DIR_OCR_M3 / "321-camembert-ner-hierarchical-loss-io"
loadM2M3IOMetrics(camembert_ner_io_ocr_m3,"CmBERT IO",res_m3_ocr,keys_m3_ocr)

camembert_ner_iob2_ocr_m3 = METRICS_DIR_OCR_M3 / "322-camembert-ner-hierarchical-loss-iob2"
loadM2M3IOB2Metrics(camembert_ner_iob2_ocr_m3,"CmBERT IOB2",res_m3_ocr,keys_m3_ocr)

ptrn_camembert_ner_io_ocr_m3 = METRICS_DIR_OCR_M3 / "323-pretrained-camembert-ner-hierarchical-loss-io"
loadM2M3IOMetrics(ptrn_camembert_ner_io_ocr_m3,"CmBERT+ptrn IO",res_m3_ocr,keys_m3_ocr)

ptrn_camembert_ner_iob2_ocr_m3 = METRICS_DIR_OCR_M3 / "324-pretrained-camembert-ner-hierarchical-loss-iob2"
loadM2M3IOB2Metrics(ptrn_camembert_ner_iob2_ocr_m3,"CmBERT+ptrn IOB2",res_m3_ocr,keys_m3_ocr)

In [None]:
metrics_raw_m3_ocr = pd.concat(res_m3_ocr, keys=keys_m3_ocr,names=["Test"])
metrics_raw_m3_ocr.head()

In [None]:
ref = [metrics_raw_m1_ref,metrics_raw_m2_ref,metrics_raw_m3_ref]
keys_ref = ["M1","M2","M3"]
metrics_raw_ref = pd.concat(ref, keys=keys_ref,names=["Approach"])

ocr = [metrics_raw_m1_ocr, metrics_raw_m2_ocr,metrics_raw_m3_ocr]
keys_ocr = ["M1","M2","M3"]
metrics_raw_ocr = pd.concat(ocr, keys=keys_ocr,names=["Approach"])

datasets = [metrics_raw_ref,metrics_raw_ocr]
keys = ["Reference","OCR"]
metrics_raw = pd.concat(datasets, keys=keys,names=["Dataset"])
metrics_raw

## 231.1 Build the averaged table


In [None]:
eval_ = []
for elem in metrics_raw.columns:
    if 'eval_p' in elem or 'eval_re' in elem or 'eval_f' in elem or 'eval_ac' in elem:
        eval_.append(elem)

In [None]:
metrics_raw = metrics_raw.groupby(level=(0,1,2)).mean()
metrics_raw[eval_] = metrics_raw[eval_].multiply(100., axis=1)
metrics_raw 

In [None]:
averaged = metrics_raw.copy()
averaged = averaged[["eval_f1-all","eval_f1-l1l2","eval_f1-l1","eval_f1-l2","eval_f1","eval_f1-das"]]

# Set pretty names
averaged.index.names = ['Dataset','Approach',"Model and tags"]
averaged.rename(columns={"eval_f1-all":"All",
                        "eval_f1-l1":"L1",
                        "eval_f1-l2":"L2",
                         "eval_f1-l1l2":"L1&L2",
                         "eval_f1":"P-L1+P-L2",
                         "eval_f1-das":"Flat",
                         }, errors="raise", inplace=True)
#averaged.rename(mapper={"camembert_io_ref": "CmBERT IO","camembert_iob2_ref": "CmBERT IOB2","prtn_camembert_io_ref": "Ptrn CmBERT IO","prtn_camembert_iob2_ref": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged

In [None]:
averaged_ref = averaged.loc[['Reference']]
averaged_ref = averaged_ref.reset_index(0)
del averaged_ref["Dataset"]
latex_table = averaged_ref.copy()
caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the reference dataset."
print(latex_table.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table

In [None]:
averaged_ref = averaged.loc[['OCR']]
averaged_ref = averaged_ref.reset_index(0)
del averaged_ref["Dataset"]
latex_table = averaged_ref.copy()
caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the noisy dataset."
print(latex_table.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table

## 231.2 Create the results table

In [None]:
latex_table = averaged.copy()

caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs)."
print(latex_table.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table

# 232 - Experiments 1 & 2: tables by classes

In [None]:
import pandas as pd
import json
from pandas import json_normalize

classes = ['eval_PER','eval_ACT','eval_ACT_L1','eval_ACT_L2','eval_DESC','eval_TITREH','eval_TITREP','eval_SPAT','eval_LOC','eval_CARDINAL','eval_FT'
 #'eval_TITRE'
]

def compile_metrics_by_classes(path, classes): 
    rundirs = [f for f in os.listdir(path)]

    df = pd.DataFrame()
    for run_dir in rundirs:
        if 'run' in run_dir:
            run_path = path / run_dir
            nrun = re.search("\d+",run_dir)[0]

            files = [f for f in os.listdir(run_path) if "test_" in f and 'json' in f]
            sizes = [int(re.search("\d+",f)[0]) for f in files]
                
            for file, size in zip(files,sizes):
                file_path = run_path / file
                dftmp = pd.read_json(file_path)
                classes_dict = {key: dftmp[key] for key in classes}
                dftmp = pd.DataFrame.from_dict(classes_dict)
                dftmp = dftmp.T
                dftmp['number'] = dftmp['number'].astype(int)
                dftmp["trainsize"] = size 
                dftmp["run"] = nrun
                dftmp["trainsize_p"] = round(100 * size / MAX_TRAINSET_SIZE, 1)
                df = pd.concat([df, dftmp])
                df["classe"] = df.index
                
    return df.groupby(["run","classe"]).first()

In [None]:
def formatbyClasses(df,classes,metric_name):
    m = df[[metric_name]].reset_index(2)
    m = m.pivot(columns='classe')
    classes = [classe + '-' + metric_name for classe in classes]
    m.columns = classes
    return m

def byClassesDf(metrics_raw_classes,classes):
    precision = formatbyClasses(metrics_raw_classes,classes,'precision')
    recall = formatbyClasses(metrics_raw_classes,classes,'recall')
    f1 = formatbyClasses(metrics_raw_classes,classes,'f1')
    number = formatbyClasses(metrics_raw_classes,classes,'number')
    tmp = precision.join(recall)
    tmp = tmp.join(f1)
    tmp = tmp.join(number)
    return tmp

### Method 1

In [None]:
METRICS_DIR_OCR_M1 = OUT_BASE / "method_1/m1-110-experiment_1_metrics"

In [None]:
# Load models metrics from metrics jsons
camembert_io_ref_m1 = compile_metrics_by_classes(METRICS_DIR_REF_M1 / "111-camembert-ner-multihead-io", classes)
camembert_iob2_ref_m1 = compile_metrics_by_classes(METRICS_DIR_REF_M1 / "112-camembert-ner-multihead-iob2", classes)
prtn_camembert_io_ref_m1 = compile_metrics_by_classes(METRICS_DIR_REF_M1 / "113-pretrained-camembert-ner-multihead-io",classes)
prtn_camembert_iob2_ref_m1 = compile_metrics_by_classes(METRICS_DIR_OCR_M1 / "114-pretrained-camembert-multihead-iob2", classes)

In [None]:
tmp_m1_ref = pd.concat([camembert_io_ref_m1,camembert_iob2_ref_m1,prtn_camembert_io_ref_m1,prtn_camembert_iob2_ref_m1], keys=["CmBERT IO", "CmBERT IOB2", "CmBERT+ptrn IO", "CmBERT+ptrn IOB2"])
metrics_raw_classes_m1_ref = byClassesDf(tmp_m1_ref,classes)
metrics_raw_classes_m1_ref.head()

In [None]:
METRICS_DIR_OCR_M1 = OUT_BASE / "method_1/m1-120-experiment_2_metrics"

In [None]:
# Load models metrics from metrics jsons
camembert_io_pero_m1 = compile_metrics_by_classes(METRICS_DIR_OCR_M1 / "121-camembert-ner-multihead-io", classes)
camembert_iob2_pero_m1 = compile_metrics_by_classes(METRICS_DIR_OCR_M1 / "122-camembert-ner-multihead-iob2", classes)
prtn_camembert_io_pero_m1 = compile_metrics_by_classes(METRICS_DIR_OCR_M1 / "123-pretrained-camembert-ner-multihead-io",classes)
prtn_camembert_iob2_pero_m1 = compile_metrics_by_classes(METRICS_DIR_OCR_M1 / "124-pretrained-camembert-multihead-iob2", classes)

In [None]:
tmp_m1_pero = pd.concat([camembert_io_pero_m1,camembert_iob2_pero_m1,prtn_camembert_io_pero_m1,prtn_camembert_iob2_pero_m1], keys=["CmBERT IO", "CmBERT IOB2", "CmBERT+ptrn IO", "CmBERT+ptrn IOB2"])
metrics_raw_classes_m1_pero = byClassesDf(tmp_m1_pero,classes)
metrics_raw_classes_m1_pero.head()

### Method 2

In [None]:
METRICS_DIR_REF_M2 = OUT_BASE / "method_2/m2-210-experiment_1_metrics"

In [None]:
# Load models metrics from metrics jsons
res_classes_m2_ref = []
keys_classes_m2_ref = []

if os.path.exists(METRICS_DIR_REF_M2 / "211-camembert-ner-joint-labelling-io/run_2"):
    camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_REF_M2 / "211-camembert-ner-joint-labelling-io",classes)
    res_classes_m2_ref.append(camembert_ner_io)
    keys_classes_m2_ref.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF_M2 / "212-camembert-ner-joint-labelling-iob2/run_2"):
    camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_REF_M2 / "212-camembert-ner-joint-labelling-iob2",classes)
    res_classes_m2_ref.append(camembert_ner_iob2)
    keys_classes_m2_ref.append("CmBERT IOB2")
    
if os.path.exists(METRICS_DIR_REF_M2 / "213-pretrained-camembert-ner-joint-labelling-io/run_2"):
    ptrn_camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_REF_M2 / "213-pretrained-camembert-ner-joint-labelling-io",classes)
    res_classes_m2_ref.append(ptrn_camembert_ner_io)
    keys_classes_m2_ref.append("CmBERT+ptrn IO")
    
if os.path.exists(METRICS_DIR_REF_M2 / "214-pretrained-camembert-ner-joint-labelling-iob2/run_2"):
    ptrn_camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_REF_M2 / "214-pretrained-camembert-ner-joint-labelling-iob2",classes)
    res_classes_m2_ref.append(ptrn_camembert_ner_iob2)
    keys_classes_m2_ref.append("CmBERT+ptrn IOB2")
    
tmp_m2_ref = pd.concat(res_classes_m2_ref,keys=keys_classes_m2_ref,names=["Test"])
metrics_raw_classes_m2_ref = byClassesDf(tmp_m2_ref,classes)
metrics_raw_classes_m2_ref

In [None]:
METRICS_DIR_PERO_M2 = OUT_BASE / "method_2/m2-220-experiment_2_metrics"

In [None]:
# Load models metrics from metrics jsons
res_classes_m2_pero = []
keys_classes_m2_pero = []

if os.path.exists(METRICS_DIR_PERO_M2 / "221-camembert-ner-joint-labelling-io/run_2"):
    camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_PERO_M2 / "221-camembert-ner-joint-labelling-io",classes)
    res_classes_m2_pero.append(camembert_ner_io)
    keys_classes_m2_pero.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_PERO_M2 / "222-camembert-ner-joint-labelling-iob2/run_2"):
    camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_PERO_M2 / "222-camembert-ner-joint-labelling-iob2",classes)
    res_classes_m2_pero.append(camembert_ner_iob2)
    keys_classes_m2_pero.append("CmBERT IOB2")
    
if os.path.exists(METRICS_DIR_PERO_M2 / "223-pretrained-camembert-ner-joint-labelling-io/run_2"):
    ptrn_camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_PERO_M2 / "223-pretrained-camembert-ner-joint-labelling-io",classes)
    res_classes_m2_pero.append(ptrn_camembert_ner_io)
    keys_classes_m2_pero.append("CmBERT+ptrn IO")
    
if os.path.exists(METRICS_DIR_PERO_M2 / "224-pretrained-camembert-ner-joint-labelling-iob2/run_2"):
    ptrn_camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_PERO_M2 / "224-pretrained-camembert-ner-joint-labelling-iob2",classes)
    res_classes_m2_pero.append(ptrn_camembert_ner_iob2)
    keys_classes_m2_pero.append("CmBERT+ptrn IOB2")
    
tmp_m2_pero = pd.concat(res_classes_m2_pero,keys=keys_classes_m2_pero,names=["Test"])
metrics_raw_classes_m2_pero = byClassesDf(tmp_m2_pero,classes)
metrics_raw_classes_m2_pero

### Method 3

In [None]:
METRICS_DIR_REF_M3 = OUT_BASE / "method_3/m3-310-experiment_1_metrics"

In [None]:
# Load models metrics from metrics jsons
res_classes_m3_ref = []
keys_classes_m3_ref = []

if os.path.exists(METRICS_DIR_REF_M3 / "311-camembert-ner-hierarchical-loss-io/run_2"):
    camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_REF_M3 / "311-camembert-ner-hierarchical-loss-io",classes)
    res_classes_m3_ref.append(camembert_ner_io)
    keys_classes_m3_ref.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF_M3 / "312-camembert-ner-hierarchical-loss-iob2/run_2"):
    camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_REF_M3 / "312-camembert-ner-hierarchical-loss-iob2",classes)
    res_classes_m3_ref.append(camembert_ner_iob2)
    keys_classes_m3_ref.append("CmBERT IOB2")
    
if os.path.exists(METRICS_DIR_REF_M3 / "313-pretrained-camembert-ner-hierarchical-loss-io/run_2"):
    ptrn_camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_REF_M3 / "313-pretrained-camembert-ner-hierarchical-loss-io",classes)
    res_classes_m3_ref.append(ptrn_camembert_ner_io)
    keys_classes_m3_ref.append("CmBERT+ptrn IO")
    
if os.path.exists(METRICS_DIR_REF_M3 / "314-pretrained-camembert-ner-hierarchical-loss-iob2/run_2"):
    ptrn_camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_REF_M3 / "314-pretrained-camembert-ner-hierarchical-loss-iob2",classes)
    res_classes_m3_ref.append(ptrn_camembert_ner_iob2)
    keys_classes_m3_ref.append("CmBERT+ptrn IOB2")
    
tmp_m3_ref = pd.concat(res_classes_m3_ref,keys=keys_classes_m3_ref,names=["Test"])
metrics_raw_classes_m3_ref = byClassesDf(tmp_m3_ref,classes)
metrics_raw_classes_m3_ref

In [None]:
METRICS_DIR_PERO_M3 = OUT_BASE / "method_3/m3-320-experiment_2_metrics"

In [None]:
# Load models metrics from metrics jsons
res_classes_m3_pero = []
keys_classes_m3_pero = []

if os.path.exists(METRICS_DIR_PERO_M3 / "321-camembert-ner-hierarchical-loss-io/run_2"):
    camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_PERO_M3 / "321-camembert-ner-hierarchical-loss-io",classes)
    res_classes_m3_pero.append(camembert_ner_io)
    keys_classes_m3_pero.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_PERO_M3 / "322-camembert-ner-hierarchical-loss-iob2/run_2"):
    camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_PERO_M3 / "322-camembert-ner-hierarchical-loss-iob2",classes)
    res_classes_m3_pero.append(camembert_ner_iob2)
    keys_classes_m3_pero.append("CmBERT IOB2")
    
if os.path.exists(METRICS_DIR_PERO_M3 / "323-pretrained-camembert-ner-hierarchical-loss-io/run_2"):
    ptrn_camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_PERO_M3 / "323-pretrained-camembert-ner-hierarchical-loss-io",classes)
    res_classes_m3_pero.append(ptrn_camembert_ner_io)
    keys_classes_m3_pero.append("CmBERT+ptrn IO")
    
if os.path.exists(METRICS_DIR_PERO_M3 / "324-pretrained-camembert-ner-hierarchical-loss-iob2/run_2"):
    ptrn_camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_PERO_M3 / "324-pretrained-camembert-ner-hierarchical-loss-iob2",classes)
    res_classes_m3_pero.append(ptrn_camembert_ner_iob2)
    keys_classes_m3_pero.append("CmBERT+ptrn IOB2")
    
tmp_m3_pero = pd.concat(res_classes_m3_pero,keys=keys_classes_m3_pero,names=["Test"])
metrics_raw_classes_m3_pero = byClassesDf(tmp_m3_pero,classes)
metrics_raw_classes_m3_pero

In [None]:
metrics_raw_classes_m2_ref

### Conclusion

In [None]:
ref_classes = [metrics_raw_classes_m1_ref,metrics_raw_classes_m2_ref,metrics_raw_classes_m3_ref]
keys_ref = ["M1","M2","M3"]
metrics_raw_classes_ref = pd.concat(ref_classes, keys=keys_ref,names=["Approach"])

ocr_classes = [metrics_raw_classes_m1_pero,metrics_raw_classes_m2_pero,metrics_raw_classes_m3_pero]
keys_ocr = ["M1","M2","M3"]
metrics_raw_classes_ocr = pd.concat(ocr_classes, keys=keys_ocr,names=["Approach"])

datasets = [metrics_raw_classes_ref,metrics_raw_classes_ocr]
keys = ["Reference","OCR"]
metrics_raw_classes = pd.concat(datasets, keys=keys,names=["Dataset"])
metrics_raw_classes

In [None]:
eval_ = []
for elem in metrics_raw_classes.columns:
    if 'number' not in elem and 'f1' in elem:
        eval_.append(elem)
        
metrics_raw_classes = metrics_raw_classes[eval_].groupby(level=(0,1,2)).mean()
metrics_raw_classes[eval_] = metrics_raw_classes[eval_].multiply(100., axis=1)
metrics_raw_classes

In [None]:
averaged_classes = metrics_raw_classes.copy()

start_classes = list(averaged_classes.columns)
final_classes = [classe.replace('eval_','') for classe in classes]
columns_names = {start_classes[i]: final_classes[i] for i in range(len(final_classes))}

# Set pretty names
averaged_classes.index.names = ['Dataset','Approach',"Model & tags"]
averaged_classes.rename(columns=columns_names, errors="raise", inplace=True)

classes_f = ['PER','ACT','DESC','TITREH','TITREP','SPAT','LOC','CARDINAL','FT']
averaged_classes = averaged_classes[classes_f]
averaged_classes

In [None]:
latex_table_classes = averaged_classes.copy()

caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) for each entity type."
print(latex_table_classes.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table_classes

In [None]:
averaged_classes_ref = averaged_classes.loc[['Reference']]
averaged_classes_ref = averaged_classes_ref.reset_index(0)
del averaged_classes_ref["Dataset"]
latex_table = averaged_classes_ref.copy()
caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the reference dataset for each entity type."
print(latex_table.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table

In [None]:
averaged_classes_ocr = averaged_classes.loc[['OCR']]
averaged_classes_ocr = averaged_classes_ocr.reset_index(0)
del averaged_classes_ocr["Dataset"]
latex_table = averaged_classes_ocr.copy()
caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the noisy dataset for each entity type."
print(latex_table.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table