# Figures and metrics

Evaluation scores on level-1 entities segmentation and classification with joint-labels method

In [1]:
import os, sys
from pathlib import Path

ENV_IS_GOOGLE_COLAB = True if 'google.colab' in str(get_ipython()) else False
os.environ["ENV_IS_GOOGLE_COLAB"] = str(ENV_IS_GOOGLE_COLAB)

if ENV_IS_GOOGLE_COLAB:
    from google.colab import drive
    mountpoint = Path("/content/drive")
    drive.mount(str(mountpoint)) # Mount gdrive to BASE
    base = mountpoint / "MyDrive/article_icdar_2023" # Adapt this to your situation
    sys.path.append(str(base)) # Add BASE to Python Path
    BASE = Path(base).resolve() # Make BASE absolute
    DATASETS =  BASE / "dataset"
else:
    BASE = Path(os.path.dirname(os.path.realpath("__file__"))).resolve() # If not on GColab, BASE will be the directory of this notebook
    DATASETS = Path('/work/stual/dataset_ICDAR').resolve()
    OUT_BASE = Path('/work/stual/res_ICDAR').resolve()

print(sys.path)
print(BASE)
print(DATASETS)
print(OUT_BASE)

['/lrde/home2/stual/stage_DAS/t2_metrics', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/lrde/home2/stual/.venv/python_3_9/lib/python3.10/site-packages']
/lrde/home2/stual/stage_DAS/t2_metrics
/work/stual/dataset_ICDAR
/work/stual/res_ICDAR


## Constants

In [2]:
MAX_TRAINSET_SIZE = 6084

## Tools

In [3]:
import pandas as pd
import re
import os
import json

def compile_metrics(path): 
    rundirs = [f for f in os.listdir(path)]

    df = pd.DataFrame()
    for run_dir in rundirs:
        if 'run' in run_dir:
            run_path = path / run_dir
            nrun = re.search("\d+",run_dir)[0]

            files = [f for f in os.listdir(run_path) if "test_" in f and 'json' in f]
            sizes = [int(re.search("\d+",f)[0]) for f in files]

            for file, size in zip(files,sizes):
                file_path = run_path / file
                dftmp = pd.read_json(file_path, typ='series')
                dftmp = pd.DataFrame([dftmp])
                dftmp["trainsize"] = size 
                dftmp["run"] = nrun
                dftmp["trainsize_p"] = round(100 * size / MAX_TRAINSET_SIZE, 1)
                df = pd.concat([df, dftmp])

    return df.groupby(["run","trainsize"]).first()

# Load metrics

## Method 1

In [4]:
METRICS_DIR_REF_M1 = OUT_BASE / "method_1/m1-110-experiment_1_metrics"

In [5]:
# Load Camembert IO metrics from metrics jsons
camembert_io_ref_m1 = compile_metrics(METRICS_DIR_REF_M1 / "111-camembert-ner-multihead-io")
camembert_iob2_ref_m1 = compile_metrics(METRICS_DIR_REF_M1 / "112-camembert-ner-multihead-iob2")
prtn_camembert_io_ref_m1 = compile_metrics(METRICS_DIR_REF_M1 / "113-pretrained-camembert-ner-multihead-io")
prtn_camembert_iob2_ref_m1 = compile_metrics(METRICS_DIR_REF_M1 / "114-pretrained-camembert-multihead-iob2")
metrics_raw_m1_ref = pd.concat([camembert_io_ref_m1,camembert_iob2_ref_m1,prtn_camembert_io_ref_m1,prtn_camembert_iob2_ref_m1], keys=["CmBERT IO", "CmBERT IOB2", "CmBERT+ptrn IO", "CmBERT+ptrn IOB2"],names=["Test"])
metrics_raw_m1_ref.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_precision-all,eval_recall-all,eval_f1-all,eval_accuracy-all,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,eval_recall-l2,...,eval_ACT_L1,eval_ACT_L2,eval_DESC,eval_TITREH,eval_TITREP,eval_SPAT,eval_LOC,eval_CARDINAL,eval_FT,trainsize_p
Test,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.972824,0.965262,0.969028,0.97322,0.96409,0.969397,0.966736,0.970846,0.983898,0.960173,...,"{'precision': 0.957364341085271, 'recall': 0.9...","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.531914893617021, 'recall': 0.6...","{'precision': 0.9772727272727271, 'recall': 1....","{'precision': 0.40740740740740705, 'recall': 0...","{'precision': 0.9805491990846681, 'recall': 0....","{'precision': 0.979213483146067, 'recall': 0.9...","{'precision': 0.9977142857142851, 'recall': 0....","{'precision': 1.0, 'recall': 0.071428571428571...",100.0
CmBERT IO,2,6084,0.967221,0.960525,0.963861,0.96259,0.956407,0.961251,0.958823,0.951032,0.980892,0.959632,...,"{'precision': 0.9659318637274541, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.35042735042735, 'recall': 0.51...","{'precision': 0.976190476190476, 'recall': 0.9...","{'precision': 0.764705882352941, 'recall': 0.3...","{'precision': 0.9732954545454541, 'recall': 0....","{'precision': 0.9688715953307391, 'recall': 0....","{'precision': 0.9965714285714281, 'recall': 0....","{'precision': 0.33333333333333304, 'recall': 0...",100.0
CmBERT IO,3,6084,0.966561,0.961982,0.964266,0.968731,0.959709,0.959709,0.959709,0.96259,0.975082,0.964779,...,"{'precision': 0.9608433734939751, 'recall': 0....","{'precision': 0.28947368421052605, 'recall': 0...","{'precision': 0.518867924528301, 'recall': 0.6...","{'precision': 0.9772727272727271, 'recall': 1....","{'precision': 0.6666666666666661, 'recall': 0....","{'precision': 0.9771428571428571, 'recall': 0....","{'precision': 0.971603563474387, 'recall': 0.9...","{'precision': 0.997712978845054, 'recall': 0.9...","{'precision': 0.75, 'recall': 0.21428571428571...",100.0
CmBERT IO,4,6084,0.96663,0.957002,0.961792,0.965893,0.95748,0.956847,0.957163,0.957224,0.978128,0.957193,...,"{'precision': 0.974200206398348, 'recall': 0.9...","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.398496240601503, 'recall': 0.6...","{'precision': 0.9772727272727271, 'recall': 1....","{'precision': 0.4, 'recall': 0.303030303030303...","{'precision': 0.9765177548682701, 'recall': 0....","{'precision': 0.9682451253481891, 'recall': 0....","{'precision': 0.9965675057208231, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",100.0
CmBERT IO,5,6084,0.967119,0.961011,0.964055,0.967105,0.955071,0.964113,0.959571,0.961094,0.982481,0.957193,...,"{'precision': 0.9299905392620621, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.39473684210526305, 'recall': 0...","{'precision': 0.976744186046511, 'recall': 0.9...","{'precision': 1.0, 'recall': 0.030303030303030...","{'precision': 0.9778030734206031, 'recall': 0....","{'precision': 0.9705391884380211, 'recall': 0....","{'precision': 0.9948659440958351, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",100.0


In [6]:
METRICS_DIR_OCR_M1 = OUT_BASE / "method_1/m1-120-experiment_2_metrics"

In [7]:
# Load Camembert IO metrics from metrics jsons
camembert_io_ocr_m1 = compile_metrics(METRICS_DIR_OCR_M1 / "121-camembert-ner-multihead-io")
camembert_iob2_ocr_m1 = compile_metrics(METRICS_DIR_OCR_M1 / "122-camembert-ner-multihead-iob2")
prtn_camembert_io_ocr_m1 = compile_metrics(METRICS_DIR_OCR_M1 / "123-pretrained-camembert-ner-multihead-io")
prtn_camembert_iob2_ocr_m1 = compile_metrics(METRICS_DIR_OCR_M1 / "124-pretrained-camembert-multihead-iob2")
metrics_raw_m1_ocr = pd.concat([camembert_io_ocr_m1,camembert_iob2_ocr_m1,prtn_camembert_io_ocr_m1,prtn_camembert_iob2_ocr_m1], keys=["CmBERT IO", "CmBERT IOB2", "CmBERT+ptrn IO", "CmBERT+ptrn IOB2"],names=["Test"])
metrics_raw_m1_ocr.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_precision-all,eval_recall-all,eval_f1-all,eval_accuracy-all,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,eval_recall-l2,...,eval_ACT_L1,eval_ACT_L2,eval_DESC,eval_TITREH,eval_TITREP,eval_SPAT,eval_LOC,eval_CARDINAL,eval_FT,trainsize_p
Test,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.938887,0.940598,0.939741,0.960667,0.927178,0.941876,0.934469,0.955108,0.953757,0.939024,...,"{'precision': 0.9277456647398841, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.47272727272727205, 'recall': 0...","{'precision': 0.69047619047619, 'recall': 0.67...","{'precision': 0.533333333333333, 'recall': 0.5...","{'precision': 0.943813847900113, 'recall': 0.9...","{'precision': 0.9415656008820281, 'recall': 0....","{'precision': 0.9805045871559631, 'recall': 0....","{'precision': 0.6666666666666661, 'recall': 0....",100.0
CmBERT IO,2,6084,0.934635,0.936224,0.935429,0.958684,0.918668,0.935051,0.926787,0.951663,0.95501,0.937669,...,"{'precision': 0.9007490636704121, 'recall': 0....","{'precision': 0.642857142857142, 'recall': 0.1...","{'precision': 0.49367088607594906, 'recall': 0...","{'precision': 0.8285714285714281, 'recall': 0....","{'precision': 0.652173913043478, 'recall': 0.4...","{'precision': 0.9388101983002831, 'recall': 0....","{'precision': 0.93732667775929, 'recall': 0.94...","{'precision': 0.9827981651376141, 'recall': 0....","{'precision': 0.75, 'recall': 0.21428571428571...",100.0
CmBERT IO,3,6084,0.932882,0.937075,0.934974,0.958605,0.920779,0.936592,0.928618,0.949157,0.948205,0.937669,...,"{'precision': 0.9256756756756751, 'recall': 0....","{'precision': 0.33333333333333304, 'recall': 0...","{'precision': 0.37391304347826004, 'recall': 0...","{'precision': 0.622222222222222, 'recall': 0.6...","{'precision': 0.65, 'recall': 0.40625000000000...","{'precision': 0.9404424276800901, 'recall': 0....","{'precision': 0.9334433443344331, 'recall': 0....","{'precision': 0.983381088825214, 'recall': 0.9...","{'precision': 0.16666666666666602, 'recall': 0...",100.0
CmBERT IO,4,6084,0.937803,0.939626,0.938714,0.960067,0.924446,0.937472,0.930914,0.951558,0.954695,0.942276,...,"{'precision': 0.9141791044776121, 'recall': 0....","{'precision': 0.48717948717948706, 'recall': 0...","{'precision': 0.41428571428571404, 'recall': 0...","{'precision': 0.7368421052631571, 'recall': 0....","{'precision': 0.631578947368421, 'recall': 0.3...","{'precision': 0.9291736930860031, 'recall': 0....","{'precision': 0.947163515016685, 'recall': 0.9...","{'precision': 0.981661891117478, 'recall': 0.9...","{'precision': 0.6666666666666661, 'recall': 0....",100.0
CmBERT IO,5,6084,0.939239,0.938897,0.939068,0.957509,0.927998,0.942096,0.934994,0.951976,0.953566,0.934959,...,"{'precision': 0.917613636363636, 'recall': 0.9...","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.556818181818181, 'recall': 0.6...","{'precision': 0.75, 'recall': 0.69767441860465...","{'precision': 0.5625, 'recall': 0.28125, 'f1':...","{'precision': 0.9431818181818181, 'recall': 0....","{'precision': 0.935643564356435, 'recall': 0.9...","{'precision': 0.98105625717566, 'recall': 0.97...","{'precision': 0.5, 'recall': 0.071428571428571...",100.0


## Methods 2 and 3

In [8]:
def loadM2M3IOMetrics(path,model_name,res,keys):
    if os.path.exists(f"{path}/run_2"):
        df = compile_metrics(path)
        df["eval_precision-l1l2"] = df["eval_precision"]
        df["eval_recall-l1l2"] = df["eval_recall"]
        df["eval_f1-l1l2"] = df["eval_f1"]
        df["eval_accuracy-l1l2"] = df["eval_accuracy"]
        res.append(df)
        keys.append(model_name)
    
def loadM2M3IOB2Metrics(path,model_name,res,keys):
    if os.path.exists(f"{path}/run_2"):
        df = compile_metrics(path)
        res.append(df)
        keys.append(model_name)

#### Method 2

In [9]:
METRICS_DIR_REF_M2 = OUT_BASE / "method_2/m2-210-experiment_1_metrics"

In [10]:
res_m2_ref = []
keys_m2_ref = []

camembert_ner_io_ref_m2 = METRICS_DIR_REF_M2 / "211-camembert-ner-joint-labelling-io"
loadM2M3IOMetrics(camembert_ner_io_ref_m2,"CmBERT IO",res_m2_ref,keys_m2_ref)

camembert_ner_iob2_ref_m2 = METRICS_DIR_REF_M2 / "212-camembert-ner-joint-labelling-iob2"
loadM2M3IOB2Metrics(camembert_ner_iob2_ref_m2,"CmBERT IOB2",res_m2_ref,keys_m2_ref)

ptrn_camembert_ner_io_ref_m2 = METRICS_DIR_REF_M2 / "213-pretrained-camembert-ner-joint-labelling-io"
loadM2M3IOMetrics(ptrn_camembert_ner_io_ref_m2,"CmBERT+ptrn IO",res_m2_ref,keys_m2_ref)

ptrn_camembert_ner_iob2_ref_m2 = METRICS_DIR_REF_M2 / "214-pretrained-camembert-ner-joint-labelling-iob2"
loadM2M3IOB2Metrics(ptrn_camembert_ner_iob2_ref_m2,"CmBERT+ptrn IOB2",res_m2_ref,keys_m2_ref)

In [11]:
metrics_raw_m2_ref = pd.concat(res_m2_ref, keys=keys_m2_ref,names=["Test"])
metrics_raw_m2_ref.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_TITRE,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Test,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.191235,0.959145,0.967932,0.963519,0.942002,0.957034,0.966094,0.961543,0.965067,0.961497,...,9.045,186.291,11.719,6.3,100.0,,0.959145,0.967932,0.963519,0.942002
CmBERT IO,2,6084,0.219643,0.959671,0.96962,0.96462,0.948091,0.957345,0.968516,0.962898,0.96935,0.96334,...,8.7241,193.143,12.15,10.24,100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",0.959671,0.96962,0.96462,0.948091
CmBERT IO,3,6084,0.216868,0.960798,0.969138,0.96495,0.944221,0.96153,0.968516,0.96501,0.966254,0.965674,...,10.7733,156.405,9.839,9.45,100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",0.960798,0.969138,0.96495,0.944221
CmBERT IO,4,6084,0.191439,0.95533,0.953948,0.954639,0.932921,0.922617,0.950242,0.936226,0.938029,0.981205,...,10.5994,158.971,10.001,2.89,100.0,,0.95533,0.953948,0.954639,0.932921
CmBERT IO,5,6084,0.163849,0.957006,0.963351,0.960168,0.943137,0.9481,0.961251,0.95463,0.961094,0.961652,...,9.3141,180.909,11.381,3.94,100.0,,0.957006,0.963351,0.960168,0.943137


In [12]:
METRICS_DIR_OCR_M2 = OUT_BASE / "method_2/m2-220-experiment_2_metrics"

In [13]:
res_m2_ocr = []
keys_m2_ocr = []

camembert_ner_io_ocr_m2 = METRICS_DIR_OCR_M2 / "221-camembert-ner-joint-labelling-io"
loadM2M3IOMetrics(camembert_ner_io_ocr_m2,"CmBERT IO",res_m2_ocr,keys_m2_ocr)

camembert_ner_iob2_ocr_m2 = METRICS_DIR_OCR_M2 / "222-camembert-ner-joint-labelling-iob2"
loadM2M3IOB2Metrics(camembert_ner_iob2_ocr_m2,"CmBERT IOB2",res_m2_ocr,keys_m2_ocr)

ptrn_camembert_ner_io_ocr_m2 = METRICS_DIR_OCR_M2 / "223-pretrained-camembert-ner-joint-labelling-io"
loadM2M3IOMetrics(ptrn_camembert_ner_io_ocr_m2,"CmBERT+ptrn IO",res_m2_ocr,keys_m2_ocr)

ptrn_camembert_ner_iob2_ocr_m2 = METRICS_DIR_OCR_M2 / "224-pretrained-camembert-ner-joint-labelling-iob2"
loadM2M3IOB2Metrics(ptrn_camembert_ner_iob2_ocr_m2,"CmBERT+ptrn IOB2",res_m2_ocr,keys_m2_ocr)

In [14]:
metrics_raw_m2_ocr = pd.concat(res_m2_ocr, keys=keys_m2_ocr,names=["Test"])
metrics_raw_m2_ocr.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_TITRE,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Test,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.291394,0.94007,0.948253,0.944143,0.937151,0.932435,0.944958,0.938655,0.961215,0.94416,...,10.5194,160.181,10.077,8.92,100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",0.94007,0.948253,0.944143,0.937151
CmBERT IO,2,6084,0.254665,0.942836,0.94523,0.944032,0.928642,0.926398,0.944958,0.935586,0.950775,0.944324,...,10.5746,159.345,10.024,4.72,100.0,,0.942836,0.94523,0.944032,0.928642
CmBERT IO,3,6084,0.209478,0.940993,0.946681,0.943828,0.935272,0.924157,0.941656,0.932824,0.955943,0.946341,...,8.9153,189.0,11.89,4.46,100.0,,0.940993,0.946681,0.943828,0.935272
CmBERT IO,4,6084,0.240886,0.92436,0.938218,0.931237,0.923996,0.90833,0.931528,0.919783,0.950827,0.934964,...,9.2637,181.893,11.443,3.94,100.0,,0.92436,0.938218,0.931237,0.923996
CmBERT IO,5,6084,0.227982,0.938368,0.948011,0.943165,0.934645,0.931735,0.949582,0.940574,0.959127,0.941763,...,10.343,162.912,10.248,6.56,100.0,,0.938368,0.948011,0.943165,0.934645


#### Method 3

In [15]:
METRICS_DIR_REF_M3 = OUT_BASE / "method_3/m3-310-experiment_1_metrics"

In [16]:
res_m3_ref = []
keys_m3_ref = []

camembert_ner_io_ref_m3 = METRICS_DIR_REF_M3 / "311-camembert-ner-hierarchical-loss-io"
loadM2M3IOMetrics(camembert_ner_io_ref_m3,"CmBERT IO",res_m3_ref,keys_m3_ref)

camembert_ner_iob2_ref_m3 = METRICS_DIR_REF_M3 / "312-camembert-ner-hierarchical-loss-iob2"
loadM2M3IOB2Metrics(camembert_ner_iob2_ref_m3,"CmBERT IOB2",res_m3_ref,keys_m3_ref)

ptrn_camembert_ner_io_ref_m3 = METRICS_DIR_REF_M3 / "313-pretrained-camembert-ner-hierarchical-loss-io"
loadM2M3IOMetrics(ptrn_camembert_ner_io_ref_m3,"CmBERT+ptrn IO",res_m3_ref,keys_m3_ref)

ptrn_camembert_ner_iob2_ref_m3 = METRICS_DIR_REF_M3 / "314-pretrained-camembert-ner-hierarchical-loss-iob2"
loadM2M3IOB2Metrics(ptrn_camembert_ner_iob2_ref_m3,"CmBERT+ptrn IOB2",res_m3_ref,keys_m3_ref)

In [17]:
metrics_raw_m3_ref = pd.concat(res_m3_ref, keys=keys_m3_ref,names=["Test"])
metrics_raw_m3_ref.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_TITRE,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Test,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.165812,0.958842,0.966124,0.962469,0.947626,0.940484,0.967195,0.953652,0.968215,0.967054,...,,21.1232,79.77,5.018,3.67,100.0,0.958842,0.966124,0.962469,0.947626
CmBERT IO,2,6084,0.216428,0.956278,0.965039,0.960638,0.941538,0.952007,0.96081,0.956388,0.960578,0.960779,...,,25.0354,67.305,4.234,6.04,100.0,0.956278,0.965039,0.960638,0.941538
CmBERT IO,3,6084,0.173049,0.956678,0.961061,0.958865,0.934469,0.950437,0.958388,0.954396,0.958514,0.963568,...,,26.0704,64.633,4.066,3.94,100.0,0.956678,0.961061,0.958865,0.934469
CmBERT IO,4,6084,0.274444,0.960791,0.966004,0.96339,0.942621,0.95786,0.965874,0.96185,0.962487,0.969534,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",15.456,109.019,6.858,11.81,100.0,0.960791,0.966004,0.96339,0.942621
CmBERT IO,5,6084,0.154278,0.956179,0.965401,0.960768,0.940454,0.958388,0.963452,0.960913,0.964241,0.961642,...,,56.4387,29.855,1.878,4.46,100.0,0.956179,0.965401,0.960768,0.940454


In [18]:
METRICS_DIR_OCR_M3 = OUT_BASE / "method_3/m3-320-experiment_2_metrics"

In [19]:
res_m3_ocr = []
keys_m3_ocr = []

camembert_ner_io_ocr_m3 = METRICS_DIR_OCR_M3 / "321-camembert-ner-hierarchical-loss-io"
loadM2M3IOMetrics(camembert_ner_io_ocr_m3,"CmBERT IO",res_m3_ocr,keys_m3_ocr)

camembert_ner_iob2_ocr_m3 = METRICS_DIR_OCR_M3 / "322-camembert-ner-hierarchical-loss-iob2"
loadM2M3IOB2Metrics(camembert_ner_iob2_ocr_m3,"CmBERT IOB2",res_m3_ocr,keys_m3_ocr)

ptrn_camembert_ner_io_ocr_m3 = METRICS_DIR_OCR_M3 / "323-pretrained-camembert-ner-hierarchical-loss-io"
loadM2M3IOMetrics(ptrn_camembert_ner_io_ocr_m3,"CmBERT+ptrn IO",res_m3_ocr,keys_m3_ocr)

ptrn_camembert_ner_iob2_ocr_m3 = METRICS_DIR_OCR_M3 / "324-pretrained-camembert-ner-hierarchical-loss-iob2"
loadM2M3IOB2Metrics(ptrn_camembert_ner_iob2_ocr_m3,"CmBERT+ptrn IOB2",res_m3_ocr,keys_m3_ocr)

In [20]:
metrics_raw_m3_ocr = pd.concat(res_m3_ocr, keys=keys_m3_ocr,names=["Test"])
metrics_raw_m3_ocr.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_TITRE,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Test,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.256559,0.947254,0.944505,0.945877,0.93381,0.928977,0.935931,0.932441,0.956361,0.95593,...,,9.0527,186.133,11.709,5.25,100.0,0.947254,0.944505,0.945877,0.93381
CmBERT IO,2,6084,0.223901,0.930058,0.945351,0.937642,0.931148,0.913155,0.939894,0.926332,0.951245,0.936932,...,,9.2894,181.39,11.411,7.09,100.0,0.930058,0.945351,0.937642,0.931148
CmBERT IO,3,6084,0.241124,0.943616,0.948978,0.94629,0.937621,0.935105,0.945399,0.940223,0.958866,0.94615,...,,8.6041,195.838,12.32,7.09,100.0,0.943616,0.948978,0.94629,0.937621
CmBERT IO,4,6084,0.264271,0.94124,0.948978,0.945093,0.932296,0.933435,0.941656,0.937527,0.95469,0.945058,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",9.4472,178.36,11.22,7.61,100.0,0.94124,0.948978,0.945093,0.932296
CmBERT IO,5,6084,0.242043,0.940459,0.951034,0.945717,0.933967,0.933493,0.945619,0.939517,0.957666,0.94328,...,,8.8687,189.995,11.952,6.56,100.0,0.940459,0.951034,0.945717,0.933967


In [21]:
ref = [metrics_raw_m1_ref,metrics_raw_m2_ref,metrics_raw_m3_ref]
keys_ref = ["M1","M2","M3"]
metrics_raw_ref = pd.concat(ref, keys=keys_ref,names=["Approach"])

ocr = [metrics_raw_m1_ocr, metrics_raw_m2_ocr,metrics_raw_m3_ocr]
keys_ocr = ["M1","M2","M3"]
metrics_raw_ocr = pd.concat(ocr, keys=keys_ocr,names=["Approach"])

datasets = [metrics_raw_ref,metrics_raw_ocr]
keys = ["Reference","OCR"]
metrics_raw = pd.concat(datasets, keys=keys,names=["Dataset"])
metrics_raw

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,eval_precision-all,eval_recall-all,eval_f1-all,eval_accuracy-all,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,eval_recall-l2,...,eval_LOC,eval_CARDINAL,eval_FT,trainsize_p,eval_loss,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,eval_TITRE
Dataset,Approach,Test,run,trainsize,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Reference,M1,CmBERT IO,1,6084,0.972824,0.965262,0.969028,0.973220,0.964090,0.969397,0.966736,0.970846,0.983898,0.960173,...,"{'precision': 0.979213483146067, 'recall': 0.9...","{'precision': 0.9977142857142851, 'recall': 0....","{'precision': 1.0, 'recall': 0.071428571428571...",100.0,,,,,,
Reference,M1,CmBERT IO,2,6084,0.967221,0.960525,0.963861,0.962590,0.956407,0.961251,0.958823,0.951032,0.980892,0.959632,...,"{'precision': 0.9688715953307391, 'recall': 0....","{'precision': 0.9965714285714281, 'recall': 0....","{'precision': 0.33333333333333304, 'recall': 0...",100.0,,,,,,
Reference,M1,CmBERT IO,3,6084,0.966561,0.961982,0.964266,0.968731,0.959709,0.959709,0.959709,0.962590,0.975082,0.964779,...,"{'precision': 0.971603563474387, 'recall': 0.9...","{'precision': 0.997712978845054, 'recall': 0.9...","{'precision': 0.75, 'recall': 0.21428571428571...",100.0,,,,,,
Reference,M1,CmBERT IO,4,6084,0.966630,0.957002,0.961792,0.965893,0.957480,0.956847,0.957163,0.957224,0.978128,0.957193,...,"{'precision': 0.9682451253481891, 'recall': 0....","{'precision': 0.9965675057208231, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",100.0,,,,,,
Reference,M1,CmBERT IO,5,6084,0.967119,0.961011,0.964055,0.967105,0.955071,0.964113,0.959571,0.961094,0.982481,0.957193,...,"{'precision': 0.9705391884380211, 'recall': 0....","{'precision': 0.9948659440958351, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",100.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
OCR,M3,CmBERT+ptrn IOB2,1,6084,0.924840,0.945956,0.935279,0.954038,0.910813,0.942096,0.926190,0.940335,0.942535,0.950704,...,"{'precision': 0.9349015317286651, 'recall': 0....","{'precision': 0.9873271889400921, 'recall': 0....","{'precision': 0.33333333333333304, 'recall': 0...",100.0,0.262870,11.9258,141.291,8.888,5.25,
OCR,M3,CmBERT+ptrn IOB2,2,6084,0.925077,0.946199,0.935519,0.951532,0.914939,0.942536,0.928533,0.950044,0.937750,0.950704,...,"{'precision': 0.935236004390779, 'recall': 0.9...","{'precision': 0.9879310344827581, 'recall': 0....","{'precision': 0.46666666666666606, 'recall': 0...",100.0,0.280608,11.7376,143.556,9.031,6.30,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '..."
OCR,M3,CmBERT+ptrn IOB2,3,6084,0.929074,0.946563,0.937737,0.952341,0.924220,0.945178,0.934581,0.953333,0.935096,0.948267,...,"{'precision': 0.9441680486456601, 'recall': 0....","{'precision': 0.9856897538637661, 'recall': 0....","{'precision': 0.42857142857142805, 'recall': 0...",100.0,0.249515,11.6596,144.517,9.091,3.94,
OCR,M3,CmBERT+ptrn IOB2,4,6084,0.927008,0.951664,0.939174,0.953907,0.922715,0.948921,0.935634,0.955108,0.932311,0.955038,...,"{'precision': 0.9452130603209741, 'recall': 0....","{'precision': 0.9823261117445831, 'recall': 0....","{'precision': 0.5, 'recall': 0.5, 'f1': 0.5, '...",100.0,0.264292,12.8783,130.840,8.231,6.56,


## 231.1 Build the averaged table


In [22]:
eval_ = []
for elem in metrics_raw.columns:
    if 'eval_p' in elem or 'eval_re' in elem or 'eval_f' in elem or 'eval_ac' in elem:
        eval_.append(elem)

In [23]:
metrics_raw = metrics_raw.groupby(level=(0,1,2)).mean()
metrics_raw[eval_] = metrics_raw[eval_].multiply(100., axis=1)
metrics_raw 

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_precision-all,eval_recall-all,eval_f1-all,eval_accuracy-all,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,eval_recall-l2,...,eval_precision-das,eval_recall-das,eval_f1-das,eval_accuracy-das,trainsize_p,eval_loss,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch
Dataset,Approach,Test,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Reference,M1,CmBERT IO,96.807098,96.115632,96.460054,96.750774,95.855151,96.226332,96.040045,96.055728,98.009614,95.979409,...,97.555591,96.535581,97.042905,95.670795,100.0,,,,,
Reference,M1,CmBERT IOB2,95.967198,96.470731,96.217547,96.542828,95.197476,96.389256,95.789203,95.598555,96.938211,96.570964,...,96.000622,96.270287,96.135266,94.649123,100.0,,,,,
Reference,M1,CmBERT+ptrn IO,96.56789,96.674359,96.620776,96.987616,95.881444,96.759137,96.317256,96.474716,97.438319,96.570035,...,97.581278,96.956929,97.268102,94.912281,100.0,,,,,
Reference,M1,CmBERT+ptrn IOB2,95.520199,96.512023,96.012234,96.654799,94.813072,96.257155,95.528058,95.99484,96.409203,96.825569,...,94.28222,96.239076,95.250599,94.133127,100.0,,,,,
Reference,M2,CmBERT IO,95.695905,96.686506,96.188573,96.427761,94.932504,96.292382,95.606135,95.995872,96.667367,97.171498,...,96.43385,96.975655,96.703881,95.395253,100.0,0.196607,9.69118,175.1438,11.018,6.564
Reference,M2,CmBERT IOB2,95.437932,96.533884,95.982694,96.369453,94.461236,96.138265,95.291741,95.964912,96.660016,97.020585,...,96.531877,96.944444,96.737652,95.184727,100.0,0.205812,12.39508,137.5056,8.6502,4.618
Reference,M2,CmBERT+ptrn IO,95.59492,96.99502,96.289239,96.640867,95.176142,96.781154,95.970882,96.586171,96.114237,97.258196,...,96.480497,97.278402,96.877739,95.588235,100.0,0.165514,9.01502,187.0726,11.7682,4.83
Reference,M2,CmBERT+ptrn IOB2,95.250306,96.920087,96.077814,96.529928,94.994016,96.715103,95.846368,96.415893,95.568425,97.172264,...,96.511682,97.315855,96.912063,95.489164,100.0,0.192014,10.45566,161.2854,10.1462,5.564
Reference,M3,CmBERT IO,95.749072,96.764241,96.253651,96.424149,95.183517,96.314399,95.744014,96.280702,96.451524,97.3178,...,96.503091,96.972534,96.736881,95.385965,100.0,0.196802,28.82474,70.1164,4.4108,5.984
Reference,M3,CmBERT IOB2,95.607198,96.633471,96.117493,96.475232,94.995366,96.257155,95.62179,96.05676,96.365836,97.096425,...,96.608666,96.997503,96.802549,95.434469,100.0,0.210205,10.4573,161.1548,10.1378,6.09


In [24]:
averaged = metrics_raw.copy()
averaged = averaged[["eval_f1-all","eval_f1-l1l2","eval_f1-l1","eval_f1-l2","eval_f1","eval_f1-das"]]

# Set pretty names
averaged.index.names = ['Dataset','Approach',"Model and tags"]
averaged.rename(columns={"eval_f1-all":"All",
                        "eval_f1-l1":"L1",
                        "eval_f1-l2":"L2",
                         "eval_f1-l1l2":"L1&L2",
                         "eval_f1":"P-L1+P-L2",
                         "eval_f1-das":"Flat",
                         }, errors="raise", inplace=True)
#averaged.rename(mapper={"camembert_io_ref": "CmBERT IO","camembert_iob2_ref": "CmBERT IOB2","prtn_camembert_io_ref": "Ptrn CmBERT IO","prtn_camembert_iob2_ref": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,All,L1&L2,L1,L2,P-L1+P-L2,Flat
Dataset,Approach,Model and tags,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Reference,M1,CmBERT IO,96.460054,95.699654,96.040045,96.983179,95.699654,97.042905
Reference,M1,CmBERT IOB2,96.217547,95.614767,95.789203,96.751012,95.663863,96.135266
Reference,M1,CmBERT+ptrn IO,96.620776,95.918409,96.317256,96.999829,95.918409,97.268102
Reference,M1,CmBERT+ptrn IOB2,96.012234,95.212618,95.528058,96.61288,95.331939,95.250599
Reference,M2,CmBERT IO,96.188573,96.157909,95.606135,96.914893,96.157909,96.703881
Reference,M2,CmBERT IOB2,95.982694,96.030419,95.291741,96.839731,96.035288,96.737652
Reference,M2,CmBERT+ptrn IO,96.289239,96.125104,95.970882,96.682303,96.125104,96.877739
Reference,M2,CmBERT+ptrn IOB2,96.077814,96.035353,95.846368,96.363276,96.068198,96.912063
Reference,M3,CmBERT IO,96.253651,96.12261,95.744014,96.882258,96.12261,96.736881
Reference,M3,CmBERT IOB2,96.117493,96.139536,95.62179,96.729561,96.144481,96.802549


In [25]:
averaged_ref = averaged.loc[['Reference']]
averaged_ref = averaged_ref.reset_index(0)
del averaged_ref["Dataset"]
latex_table = averaged_ref.copy()
caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the reference dataset."
print(latex_table.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table

\begin{table}
\centering
\caption{F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the reference dataset.}
\begin{tabular}{llrrrrrr}
\toprule
   &                  &  All &  L1\&L2 &   L1 &   L2 &  P-L1+P-L2 &  Flat \\
Approach & Model and tags &      &        &      &      &            &       \\
\midrule
\multirow{4}{*}{M1} & CmBERT IO & 96.5 &   95.7 & 96.0 & 97.0 &       95.7 &  97.0 \\
   & CmBERT IOB2 & 96.2 &   95.6 & 95.8 & 96.8 &       95.7 &  96.1 \\
   & CmBERT+ptrn IO & 96.6 &   95.9 & 96.3 & 97.0 &       95.9 &  97.3 \\
   & CmBERT+ptrn IOB2 & 96.0 &   95.2 & 95.5 & 96.6 &       95.3 &  95.3 \\
\cline{1-8}
\multirow{4}{*}{M2} & CmBERT IO & 96.2 &   96.2 & 95.6 & 96.9 &       96.2 &  96.7 \\
   & CmBERT IOB2 & 96.0 &   96.0 & 95.3 & 96.8 &       96.0 &  96.7 \\
   & CmBERT+ptrn IO & 96.3 &   96.1 & 96.0 & 96.7 &       96.1 &  96.9 \\
   & CmBERT+ptrn IOB2 & 96.1 &   96.0 & 95.8 & 96.4 &       96.1 &  96.9 \\
\cline{1-8}
\multirow{4}{

Unnamed: 0_level_0,Unnamed: 1_level_0,All,L1&L2,L1,L2,P-L1+P-L2,Flat
Approach,Model and tags,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
M1,CmBERT IO,96.460054,95.699654,96.040045,96.983179,95.699654,97.042905
M1,CmBERT IOB2,96.217547,95.614767,95.789203,96.751012,95.663863,96.135266
M1,CmBERT+ptrn IO,96.620776,95.918409,96.317256,96.999829,95.918409,97.268102
M1,CmBERT+ptrn IOB2,96.012234,95.212618,95.528058,96.61288,95.331939,95.250599
M2,CmBERT IO,96.188573,96.157909,95.606135,96.914893,96.157909,96.703881
M2,CmBERT IOB2,95.982694,96.030419,95.291741,96.839731,96.035288,96.737652
M2,CmBERT+ptrn IO,96.289239,96.125104,95.970882,96.682303,96.125104,96.877739
M2,CmBERT+ptrn IOB2,96.077814,96.035353,95.846368,96.363276,96.068198,96.912063
M3,CmBERT IO,96.253651,96.12261,95.744014,96.882258,96.12261,96.736881
M3,CmBERT IOB2,96.117493,96.139536,95.62179,96.729561,96.144481,96.802549


In [26]:
averaged_ref = averaged.loc[['OCR']]
averaged_ref = averaged_ref.reset_index(0)
del averaged_ref["Dataset"]
latex_table = averaged_ref.copy()
caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the noisy dataset."
print(latex_table.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table

\begin{table}
\centering
\caption{F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the noisy dataset.}
\begin{tabular}{llrrrrrr}
\toprule
   &                  &  All &  L1\&L2 &   L1 &   L2 &  P-L1+P-L2 &  Flat \\
Approach & Model and tags &      &        &      &      &            &       \\
\midrule
\multirow{4}{*}{M1} & CmBERT IO & 93.8 &   93.4 & 93.1 & 94.6 &       93.4 &  94.2 \\
   & CmBERT IOB2 & 93.5 &   92.9 & 93.1 & 94.0 &       93.1 &  92.7 \\
   & CmBERT+ptrn IO & 94.3 &   93.8 & 94.1 & 94.5 &       93.8 &  94.4 \\
   & CmBERT+ptrn IOB2 & 94.1 &   93.5 & 93.7 & 94.5 &       93.7 &  94.5 \\
\cline{1-8}
\multirow{4}{*}{M2} & CmBERT IO & 93.8 &   94.1 & 93.3 & 94.4 &       94.1 &  94.5 \\
   & CmBERT IOB2 & 93.8 &   94.2 & 93.2 & 94.5 &       94.3 &  94.7 \\
   & CmBERT+ptrn IO & 93.9 &   94.1 & 93.4 & 94.4 &       94.1 &  94.6 \\
   & CmBERT+ptrn IOB2 & 93.7 &   94.1 & 93.1 & 94.5 &       94.2 &  94.8 \\
\cline{1-8}
\multirow{4}{*}{M

Unnamed: 0_level_0,Unnamed: 1_level_0,All,L1&L2,L1,L2,P-L1+P-L2,Flat
Approach,Model and tags,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
M1,CmBERT IO,93.758518,93.355103,93.115639,94.562345,93.355103,94.158076
M1,CmBERT IOB2,93.50383,92.922154,93.109544,93.993053,93.127077,92.710741
M1,CmBERT+ptrn IO,94.318797,93.835619,94.133931,94.548789,93.835619,94.41947
M1,CmBERT+ptrn IOB2,94.085092,93.496475,93.743182,94.510817,93.708233,94.54517
M2,CmBERT IO,93.808923,94.128114,93.348428,94.380526,94.128114,94.534562
M2,CmBERT IOB2,93.771105,94.161059,93.195417,94.486534,94.281452,94.69002
M2,CmBERT+ptrn IO,93.882361,94.0893,93.437655,94.432368,94.0893,94.612859
M2,CmBERT+ptrn IOB2,93.722318,94.119129,93.064214,94.535459,94.189938,94.847359
M3,CmBERT IO,94.109835,94.41239,93.520808,94.838207,94.41239,94.773225
M3,CmBERT IOB2,93.518969,93.907747,92.901022,94.285655,93.990849,94.583356


## 231.2 Create the results table

In [27]:
latex_table = averaged.copy()

caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs)."
print(latex_table.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table

\begin{table}
\centering
\caption{F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs).}
\begin{tabular}{lllrrrrrr}
\toprule
    &    &                  &  All &  L1\&L2 &   L1 &   L2 &  P-L1+P-L2 &  Flat \\
Dataset & Approach & Model and tags &      &        &      &      &            &       \\
\midrule
\multirow{12}{*}{Reference} & \multirow{4}{*}{M1} & CmBERT IO & 96.5 &   95.7 & 96.0 & 97.0 &       95.7 &  97.0 \\
    &    & CmBERT IOB2 & 96.2 &   95.6 & 95.8 & 96.8 &       95.7 &  96.1 \\
    &    & CmBERT+ptrn IO & 96.6 &   95.9 & 96.3 & 97.0 &       95.9 &  97.3 \\
    &    & CmBERT+ptrn IOB2 & 96.0 &   95.2 & 95.5 & 96.6 &       95.3 &  95.3 \\
\cline{2-9}
    & \multirow{4}{*}{M2} & CmBERT IO & 96.2 &   96.2 & 95.6 & 96.9 &       96.2 &  96.7 \\
    &    & CmBERT IOB2 & 96.0 &   96.0 & 95.3 & 96.8 &       96.0 &  96.7 \\
    &    & CmBERT+ptrn IO & 96.3 &   96.1 & 96.0 & 96.7 &       96.1 &  96.9 \\
    &    & CmBERT+ptrn IOB2 & 96.1 &   96.0

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,All,L1&L2,L1,L2,P-L1+P-L2,Flat
Dataset,Approach,Model and tags,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Reference,M1,CmBERT IO,96.460054,95.699654,96.040045,96.983179,95.699654,97.042905
Reference,M1,CmBERT IOB2,96.217547,95.614767,95.789203,96.751012,95.663863,96.135266
Reference,M1,CmBERT+ptrn IO,96.620776,95.918409,96.317256,96.999829,95.918409,97.268102
Reference,M1,CmBERT+ptrn IOB2,96.012234,95.212618,95.528058,96.61288,95.331939,95.250599
Reference,M2,CmBERT IO,96.188573,96.157909,95.606135,96.914893,96.157909,96.703881
Reference,M2,CmBERT IOB2,95.982694,96.030419,95.291741,96.839731,96.035288,96.737652
Reference,M2,CmBERT+ptrn IO,96.289239,96.125104,95.970882,96.682303,96.125104,96.877739
Reference,M2,CmBERT+ptrn IOB2,96.077814,96.035353,95.846368,96.363276,96.068198,96.912063
Reference,M3,CmBERT IO,96.253651,96.12261,95.744014,96.882258,96.12261,96.736881
Reference,M3,CmBERT IOB2,96.117493,96.139536,95.62179,96.729561,96.144481,96.802549


# 232 - Experiments 1 & 2: tables by classes

In [28]:
import pandas as pd
import json
from pandas import json_normalize

classes = ['eval_PER','eval_ACT','eval_ACT_L1','eval_ACT_L2','eval_DESC','eval_TITREH','eval_TITREP','eval_SPAT','eval_LOC','eval_CARDINAL','eval_FT'
 #'eval_TITRE'
]

def compile_metrics_by_classes(path, classes): 
    rundirs = [f for f in os.listdir(path)]

    df = pd.DataFrame()
    for run_dir in rundirs:
        if 'run' in run_dir:
            run_path = path / run_dir
            nrun = re.search("\d+",run_dir)[0]

            files = [f for f in os.listdir(run_path) if "test_" in f and 'json' in f]
            sizes = [int(re.search("\d+",f)[0]) for f in files]
                
            for file, size in zip(files,sizes):
                file_path = run_path / file
                dftmp = pd.read_json(file_path)
                classes_dict = {key: dftmp[key] for key in classes}
                dftmp = pd.DataFrame.from_dict(classes_dict)
                dftmp = dftmp.T
                dftmp['number'] = dftmp['number'].astype(int)
                dftmp["trainsize"] = size 
                dftmp["run"] = nrun
                dftmp["trainsize_p"] = round(100 * size / MAX_TRAINSET_SIZE, 1)
                df = pd.concat([df, dftmp])
                df["classe"] = df.index
                
    return df.groupby(["run","classe"]).first()

In [29]:
def formatbyClasses(df,classes,metric_name):
    m = df[[metric_name]].reset_index(2)
    m = m.pivot(columns='classe')
    classes = [classe + '-' + metric_name for classe in classes]
    m.columns = classes
    return m

def byClassesDf(metrics_raw_classes,classes):
    precision = formatbyClasses(metrics_raw_classes,classes,'precision')
    recall = formatbyClasses(metrics_raw_classes,classes,'recall')
    f1 = formatbyClasses(metrics_raw_classes,classes,'f1')
    number = formatbyClasses(metrics_raw_classes,classes,'number')
    tmp = precision.join(recall)
    tmp = tmp.join(f1)
    tmp = tmp.join(number)
    return tmp

### Method 1

In [30]:
METRICS_DIR_OCR_M1 = OUT_BASE / "method_1/m1-110-experiment_1_metrics"

In [31]:
# Load models metrics from metrics jsons
camembert_io_ref_m1 = compile_metrics_by_classes(METRICS_DIR_REF_M1 / "111-camembert-ner-multihead-io", classes)
camembert_iob2_ref_m1 = compile_metrics_by_classes(METRICS_DIR_REF_M1 / "112-camembert-ner-multihead-iob2", classes)
prtn_camembert_io_ref_m1 = compile_metrics_by_classes(METRICS_DIR_REF_M1 / "113-pretrained-camembert-ner-multihead-io",classes)
prtn_camembert_iob2_ref_m1 = compile_metrics_by_classes(METRICS_DIR_OCR_M1 / "114-pretrained-camembert-multihead-iob2", classes)

In [32]:
tmp_m1_ref = pd.concat([camembert_io_ref_m1,camembert_iob2_ref_m1,prtn_camembert_io_ref_m1,prtn_camembert_iob2_ref_m1], keys=["CmBERT IO", "CmBERT IOB2", "CmBERT+ptrn IO", "CmBERT+ptrn IOB2"])
metrics_raw_classes_m1_ref = byClassesDf(tmp_m1_ref,classes)
metrics_raw_classes_m1_ref.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,eval_PER-precision,eval_ACT-precision,eval_ACT_L1-precision,eval_ACT_L2-precision,eval_DESC-precision,eval_TITREH-precision,eval_TITREP-precision,eval_SPAT-precision,eval_LOC-precision,eval_CARDINAL-precision,...,eval_ACT-number,eval_ACT_L1-number,eval_ACT_L2-number,eval_DESC-number,eval_TITREH-number,eval_TITREP-number,eval_SPAT-number,eval_LOC-number,eval_CARDINAL-number,eval_FT-number
Unnamed: 0_level_1,run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CmBERT IO,1,0.957364,0.957364,0.0,0.997714,0.531915,1.0,0.979213,0.975768,0.980549,0.977273,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,2,0.965932,0.965932,0.0,0.996571,0.350427,0.333333,0.968872,0.975148,0.973295,0.97619,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,3,0.93617,0.960843,0.289474,0.997713,0.518868,0.75,0.971604,0.969213,0.977143,0.977273,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,4,0.9742,0.9742,0.0,0.996568,0.398496,0.0,0.968245,0.972206,0.976518,0.977273,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,5,0.929991,0.929991,0.0,0.994866,0.394737,0.0,0.970539,0.972271,0.977803,0.976744,...,1031,63,1751,79,14,1787,1685,1747,43,33


In [33]:
METRICS_DIR_OCR_M1 = OUT_BASE / "method_1/m1-120-experiment_2_metrics"

In [34]:
# Load models metrics from metrics jsons
camembert_io_pero_m1 = compile_metrics_by_classes(METRICS_DIR_OCR_M1 / "121-camembert-ner-multihead-io", classes)
camembert_iob2_pero_m1 = compile_metrics_by_classes(METRICS_DIR_OCR_M1 / "122-camembert-ner-multihead-iob2", classes)
prtn_camembert_io_pero_m1 = compile_metrics_by_classes(METRICS_DIR_OCR_M1 / "123-pretrained-camembert-ner-multihead-io",classes)
prtn_camembert_iob2_pero_m1 = compile_metrics_by_classes(METRICS_DIR_OCR_M1 / "124-pretrained-camembert-multihead-iob2", classes)

In [35]:
tmp_m1_pero = pd.concat([camembert_io_pero_m1,camembert_iob2_pero_m1,prtn_camembert_io_pero_m1,prtn_camembert_iob2_pero_m1], keys=["CmBERT IO", "CmBERT IOB2", "CmBERT+ptrn IO", "CmBERT+ptrn IOB2"])
metrics_raw_classes_m1_pero = byClassesDf(tmp_m1_pero,classes)
metrics_raw_classes_m1_pero.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,eval_PER-precision,eval_ACT-precision,eval_ACT_L1-precision,eval_ACT_L2-precision,eval_DESC-precision,eval_TITREH-precision,eval_TITREP-precision,eval_SPAT-precision,eval_LOC-precision,eval_CARDINAL-precision,...,eval_ACT-number,eval_ACT_L1-number,eval_ACT_L2-number,eval_DESC-number,eval_TITREH-number,eval_TITREP-number,eval_SPAT-number,eval_LOC-number,eval_CARDINAL-number,eval_FT-number
Unnamed: 0_level_1,run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CmBERT IO,1,0.927746,0.927746,0.0,0.980505,0.472727,0.666667,0.941566,0.941176,0.943814,0.690476,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,2,0.897412,0.900749,0.642857,0.982798,0.493671,0.75,0.937327,0.928697,0.93881,0.828571,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,3,0.917222,0.925676,0.333333,0.983381,0.373913,0.166667,0.933443,0.934349,0.940442,0.622222,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,4,0.89919,0.914179,0.487179,0.981662,0.414286,0.666667,0.947164,0.947181,0.929174,0.736842,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,5,0.917614,0.917614,0.0,0.981056,0.556818,0.5,0.935644,0.941211,0.943182,0.75,...,1031,63,1751,79,14,1787,1685,1747,43,32


### Method 2

In [36]:
METRICS_DIR_REF_M2 = OUT_BASE / "method_2/m2-210-experiment_1_metrics"

In [37]:
# Load models metrics from metrics jsons
res_classes_m2_ref = []
keys_classes_m2_ref = []

if os.path.exists(METRICS_DIR_REF_M2 / "211-camembert-ner-joint-labelling-io/run_2"):
    camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_REF_M2 / "211-camembert-ner-joint-labelling-io",classes)
    res_classes_m2_ref.append(camembert_ner_io)
    keys_classes_m2_ref.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF_M2 / "212-camembert-ner-joint-labelling-iob2/run_2"):
    camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_REF_M2 / "212-camembert-ner-joint-labelling-iob2",classes)
    res_classes_m2_ref.append(camembert_ner_iob2)
    keys_classes_m2_ref.append("CmBERT IOB2")
    
if os.path.exists(METRICS_DIR_REF_M2 / "213-pretrained-camembert-ner-joint-labelling-io/run_2"):
    ptrn_camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_REF_M2 / "213-pretrained-camembert-ner-joint-labelling-io",classes)
    res_classes_m2_ref.append(ptrn_camembert_ner_io)
    keys_classes_m2_ref.append("CmBERT+ptrn IO")
    
if os.path.exists(METRICS_DIR_REF_M2 / "214-pretrained-camembert-ner-joint-labelling-iob2/run_2"):
    ptrn_camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_REF_M2 / "214-pretrained-camembert-ner-joint-labelling-iob2",classes)
    res_classes_m2_ref.append(ptrn_camembert_ner_iob2)
    keys_classes_m2_ref.append("CmBERT+ptrn IOB2")
    
tmp_m2_ref = pd.concat(res_classes_m2_ref,keys=keys_classes_m2_ref,names=["Test"])
metrics_raw_classes_m2_ref = byClassesDf(tmp_m2_ref,classes)
metrics_raw_classes_m2_ref

Unnamed: 0_level_0,Unnamed: 1_level_0,eval_PER-precision,eval_ACT-precision,eval_ACT_L1-precision,eval_ACT_L2-precision,eval_DESC-precision,eval_TITREH-precision,eval_TITREP-precision,eval_SPAT-precision,eval_LOC-precision,eval_CARDINAL-precision,...,eval_ACT-number,eval_ACT_L1-number,eval_ACT_L2-number,eval_DESC-number,eval_TITREH-number,eval_TITREP-number,eval_SPAT-number,eval_LOC-number,eval_CARDINAL-number,eval_FT-number
Test,run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CmBERT IO,1,0.894783,0.953488,0.282828,0.997719,0.524752,0.666667,0.968889,0.976331,0.96538,0.933333,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,2,0.893766,0.94514,0.36,0.997151,0.485437,0.388889,0.971619,0.98102,0.970538,0.977273,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,3,0.901596,0.95809,0.333333,0.999429,0.52,0.75,0.974345,0.976331,0.977752,0.977273,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,4,0.896269,0.896269,0.0,0.996005,0.129412,0.0,0.972747,0.940255,0.959887,0.976744,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,5,0.882405,0.943359,0.299065,0.997713,0.408,1.0,0.971729,0.973934,0.964367,0.97619,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IOB2,1,0.877408,0.91768,0.287671,0.996011,0.261905,1.0,0.973772,0.96831,0.969783,0.977273,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,2,0.903863,0.950725,0.282051,0.996581,0.45,0.5,0.972207,0.96816,0.974344,0.97619,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,3,0.904293,0.938931,0.385714,0.995442,0.314815,0.5,0.96086,0.969965,0.975485,0.977273,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,4,0.893406,0.912512,0.431818,0.997141,0.262626,0.571429,0.967439,0.971648,0.95892,0.97619,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,5,0.880847,0.916117,0.361111,0.995444,0.297521,0.4,0.972717,0.971698,0.973265,0.977273,...,1031,63,1751,79,14,1788,1685,1747,43,33


In [38]:
METRICS_DIR_PERO_M2 = OUT_BASE / "method_2/m2-220-experiment_2_metrics"

In [39]:
# Load models metrics from metrics jsons
res_classes_m2_pero = []
keys_classes_m2_pero = []

if os.path.exists(METRICS_DIR_PERO_M2 / "221-camembert-ner-joint-labelling-io/run_2"):
    camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_PERO_M2 / "221-camembert-ner-joint-labelling-io",classes)
    res_classes_m2_pero.append(camembert_ner_io)
    keys_classes_m2_pero.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_PERO_M2 / "222-camembert-ner-joint-labelling-iob2/run_2"):
    camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_PERO_M2 / "222-camembert-ner-joint-labelling-iob2",classes)
    res_classes_m2_pero.append(camembert_ner_iob2)
    keys_classes_m2_pero.append("CmBERT IOB2")
    
if os.path.exists(METRICS_DIR_PERO_M2 / "223-pretrained-camembert-ner-joint-labelling-io/run_2"):
    ptrn_camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_PERO_M2 / "223-pretrained-camembert-ner-joint-labelling-io",classes)
    res_classes_m2_pero.append(ptrn_camembert_ner_io)
    keys_classes_m2_pero.append("CmBERT+ptrn IO")
    
if os.path.exists(METRICS_DIR_PERO_M2 / "224-pretrained-camembert-ner-joint-labelling-iob2/run_2"):
    ptrn_camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_PERO_M2 / "224-pretrained-camembert-ner-joint-labelling-iob2",classes)
    res_classes_m2_pero.append(ptrn_camembert_ner_iob2)
    keys_classes_m2_pero.append("CmBERT+ptrn IOB2")
    
tmp_m2_pero = pd.concat(res_classes_m2_pero,keys=keys_classes_m2_pero,names=["Test"])
metrics_raw_classes_m2_pero = byClassesDf(tmp_m2_pero,classes)
metrics_raw_classes_m2_pero

Unnamed: 0_level_0,Unnamed: 1_level_0,eval_PER-precision,eval_ACT-precision,eval_ACT_L1-precision,eval_ACT_L2-precision,eval_DESC-precision,eval_TITREH-precision,eval_TITREP-precision,eval_SPAT-precision,eval_LOC-precision,eval_CARDINAL-precision,...,eval_ACT-number,eval_ACT_L1-number,eval_ACT_L2-number,eval_DESC-number,eval_TITREH-number,eval_TITREP-number,eval_SPAT-number,eval_LOC-number,eval_CARDINAL-number,eval_FT-number
Test,run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CmBERT IO,1,0.888496,0.923004,0.423077,0.983447,0.479167,0.473684,0.944196,0.954438,0.942177,0.738095,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,2,0.876869,0.906827,0.264151,0.98056,0.352941,0.416667,0.942382,0.954064,0.939411,0.868421,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,3,0.881786,0.907579,0.416667,0.981662,0.46875,0.444444,0.944721,0.946682,0.937571,0.644444,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,4,0.871366,0.922115,0.315789,0.978797,0.381679,0.5,0.931694,0.917544,0.930219,0.806452,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,5,0.864494,0.917834,0.263158,0.987861,0.418182,0.615385,0.945,0.96097,0.944223,0.85,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IOB2,1,0.875327,0.918791,0.352273,0.984465,0.45283,0.416667,0.93388,0.949173,0.925098,0.727273,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,2,0.87193,0.919924,0.318681,0.980594,0.491071,0.75,0.938809,0.944771,0.93266,0.829787,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,3,0.876882,0.939275,0.287037,0.978519,0.391608,0.454545,0.924661,0.937427,0.932886,0.62963,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,4,0.872285,0.912067,0.353659,0.986262,0.522222,0.466667,0.941662,0.952941,0.93641,0.829787,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,5,0.853575,0.87917,0.307692,0.989619,0.272727,0.714286,0.949917,0.942387,0.936122,0.921053,...,1031,63,1751,79,14,1788,1685,1747,43,33


### Method 3

In [40]:
METRICS_DIR_REF_M3 = OUT_BASE / "method_3/m3-310-experiment_1_metrics"

In [41]:
# Load models metrics from metrics jsons
res_classes_m3_ref = []
keys_classes_m3_ref = []

if os.path.exists(METRICS_DIR_REF_M3 / "311-camembert-ner-hierarchical-loss-io/run_2"):
    camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_REF_M3 / "311-camembert-ner-hierarchical-loss-io",classes)
    res_classes_m3_ref.append(camembert_ner_io)
    keys_classes_m3_ref.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF_M3 / "312-camembert-ner-hierarchical-loss-iob2/run_2"):
    camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_REF_M3 / "312-camembert-ner-hierarchical-loss-iob2",classes)
    res_classes_m3_ref.append(camembert_ner_iob2)
    keys_classes_m3_ref.append("CmBERT IOB2")
    
if os.path.exists(METRICS_DIR_REF_M3 / "313-pretrained-camembert-ner-hierarchical-loss-io/run_2"):
    ptrn_camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_REF_M3 / "313-pretrained-camembert-ner-hierarchical-loss-io",classes)
    res_classes_m3_ref.append(ptrn_camembert_ner_io)
    keys_classes_m3_ref.append("CmBERT+ptrn IO")
    
if os.path.exists(METRICS_DIR_REF_M3 / "314-pretrained-camembert-ner-hierarchical-loss-iob2/run_2"):
    ptrn_camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_REF_M3 / "314-pretrained-camembert-ner-hierarchical-loss-iob2",classes)
    res_classes_m3_ref.append(ptrn_camembert_ner_iob2)
    keys_classes_m3_ref.append("CmBERT+ptrn IOB2")
    
tmp_m3_ref = pd.concat(res_classes_m3_ref,keys=keys_classes_m3_ref,names=["Test"])
metrics_raw_classes_m3_ref = byClassesDf(tmp_m3_ref,classes)
metrics_raw_classes_m3_ref

Unnamed: 0_level_0,Unnamed: 1_level_0,eval_PER-precision,eval_ACT-precision,eval_ACT_L1-precision,eval_ACT_L2-precision,eval_DESC-precision,eval_TITREH-precision,eval_TITREP-precision,eval_SPAT-precision,eval_LOC-precision,eval_CARDINAL-precision,...,eval_ACT-number,eval_ACT_L1-number,eval_ACT_L2-number,eval_DESC-number,eval_TITREH-number,eval_TITREP-number,eval_SPAT-number,eval_LOC-number,eval_CARDINAL-number,eval_FT-number
Test,run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CmBERT IO,1,0.870497,0.915974,0.277108,0.997146,0.321739,0.0,0.976031,0.978848,0.95878,0.97619,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,2,0.888193,0.93561,0.365591,0.996581,0.45283,0.5,0.969512,0.972765,0.971655,0.955556,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,3,0.892889,0.972421,0.30303,0.997146,0.444444,0.0,0.982033,0.95892,0.971477,1.0,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,4,0.914027,0.948544,0.44,0.999429,0.489583,0.642857,0.968333,0.972845,0.977208,0.976744,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,5,0.899556,0.969185,0.310924,0.998286,0.456693,0.8,0.971572,0.978673,0.969054,0.977273,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IOB2,1,0.891228,0.941627,0.336842,0.996577,0.35,0.461538,0.969983,0.98227,0.97433,1.0,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,2,0.888594,0.944177,0.26087,0.994875,0.484211,0.545455,0.974402,0.956243,0.972096,0.976744,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,3,0.915468,0.952611,0.423077,0.995447,0.431373,0.304348,0.967259,0.979822,0.966083,0.955556,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,4,0.893178,0.931731,0.351351,0.994875,0.47619,0.5,0.971667,0.968047,0.967724,0.893617,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,5,0.899274,0.921979,0.431373,0.996007,0.268519,0.6,0.963616,0.964727,0.966629,0.854167,...,1031,63,1751,79,14,1788,1685,1747,43,33


In [42]:
METRICS_DIR_PERO_M3 = OUT_BASE / "method_3/m3-320-experiment_2_metrics"

In [43]:
# Load models metrics from metrics jsons
res_classes_m3_pero = []
keys_classes_m3_pero = []

if os.path.exists(METRICS_DIR_PERO_M3 / "321-camembert-ner-hierarchical-loss-io/run_2"):
    camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_PERO_M3 / "321-camembert-ner-hierarchical-loss-io",classes)
    res_classes_m3_pero.append(camembert_ner_io)
    keys_classes_m3_pero.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_PERO_M3 / "322-camembert-ner-hierarchical-loss-iob2/run_2"):
    camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_PERO_M3 / "322-camembert-ner-hierarchical-loss-iob2",classes)
    res_classes_m3_pero.append(camembert_ner_iob2)
    keys_classes_m3_pero.append("CmBERT IOB2")
    
if os.path.exists(METRICS_DIR_PERO_M3 / "323-pretrained-camembert-ner-hierarchical-loss-io/run_2"):
    ptrn_camembert_ner_io = compile_metrics_by_classes(METRICS_DIR_PERO_M3 / "323-pretrained-camembert-ner-hierarchical-loss-io",classes)
    res_classes_m3_pero.append(ptrn_camembert_ner_io)
    keys_classes_m3_pero.append("CmBERT+ptrn IO")
    
if os.path.exists(METRICS_DIR_PERO_M3 / "324-pretrained-camembert-ner-hierarchical-loss-iob2/run_2"):
    ptrn_camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR_PERO_M3 / "324-pretrained-camembert-ner-hierarchical-loss-iob2",classes)
    res_classes_m3_pero.append(ptrn_camembert_ner_iob2)
    keys_classes_m3_pero.append("CmBERT+ptrn IOB2")
    
tmp_m3_pero = pd.concat(res_classes_m3_pero,keys=keys_classes_m3_pero,names=["Test"])
metrics_raw_classes_m3_pero = byClassesDf(tmp_m3_pero,classes)
metrics_raw_classes_m3_pero

Unnamed: 0_level_0,Unnamed: 1_level_0,eval_PER-precision,eval_ACT-precision,eval_ACT_L1-precision,eval_ACT_L2-precision,eval_DESC-precision,eval_TITREH-precision,eval_TITREP-precision,eval_SPAT-precision,eval_LOC-precision,eval_CARDINAL-precision,...,eval_ACT-number,eval_ACT_L1-number,eval_ACT_L2-number,eval_DESC-number,eval_TITREH-number,eval_TITREP-number,eval_SPAT-number,eval_LOC-number,eval_CARDINAL-number,eval_FT-number
Test,run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CmBERT IO,1,0.892889,0.926782,0.410959,0.986766,0.466667,0.666667,0.955841,0.938163,0.945143,0.880952,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,2,0.848847,0.903955,0.311927,0.983945,0.387097,0.583333,0.940299,0.936916,0.93247,0.829268,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,3,0.898917,0.9343,0.39726,0.982857,0.537634,0.7,0.944721,0.946934,0.945136,0.765957,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,4,0.883865,0.92271,0.375,0.981132,0.489362,0.461538,0.948391,0.954411,0.944,0.717391,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IO,5,0.878709,0.926136,0.322222,0.987349,0.479592,0.642857,0.943889,0.953955,0.943525,0.755556,...,1031,63,1751,79,14,1787,1685,1747,43,32
CmBERT IOB2,1,0.886201,0.939036,0.343434,0.982346,0.408333,0.388889,0.940364,0.949971,0.934159,0.833333,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,2,0.879859,0.926597,0.289157,0.978929,0.324074,0.454545,0.939779,0.943296,0.931345,0.666667,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,3,0.812238,0.868324,0.242991,0.985092,0.256983,0.5,0.935663,0.94284,0.928733,0.73913,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,4,0.875552,0.904986,0.428571,0.984009,0.510638,0.6,0.943062,0.954064,0.943311,0.85,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,5,0.857762,0.898242,0.25,0.982877,0.377551,0.4,0.940166,0.949322,0.939342,0.769231,...,1031,63,1751,79,14,1788,1685,1747,43,33


In [44]:
metrics_raw_classes_m2_ref

Unnamed: 0_level_0,Unnamed: 1_level_0,eval_PER-precision,eval_ACT-precision,eval_ACT_L1-precision,eval_ACT_L2-precision,eval_DESC-precision,eval_TITREH-precision,eval_TITREP-precision,eval_SPAT-precision,eval_LOC-precision,eval_CARDINAL-precision,...,eval_ACT-number,eval_ACT_L1-number,eval_ACT_L2-number,eval_DESC-number,eval_TITREH-number,eval_TITREP-number,eval_SPAT-number,eval_LOC-number,eval_CARDINAL-number,eval_FT-number
Test,run,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CmBERT IO,1,0.894783,0.953488,0.282828,0.997719,0.524752,0.666667,0.968889,0.976331,0.96538,0.933333,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,2,0.893766,0.94514,0.36,0.997151,0.485437,0.388889,0.971619,0.98102,0.970538,0.977273,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,3,0.901596,0.95809,0.333333,0.999429,0.52,0.75,0.974345,0.976331,0.977752,0.977273,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,4,0.896269,0.896269,0.0,0.996005,0.129412,0.0,0.972747,0.940255,0.959887,0.976744,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IO,5,0.882405,0.943359,0.299065,0.997713,0.408,1.0,0.971729,0.973934,0.964367,0.97619,...,1031,63,1751,79,14,1787,1685,1747,43,33
CmBERT IOB2,1,0.877408,0.91768,0.287671,0.996011,0.261905,1.0,0.973772,0.96831,0.969783,0.977273,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,2,0.903863,0.950725,0.282051,0.996581,0.45,0.5,0.972207,0.96816,0.974344,0.97619,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,3,0.904293,0.938931,0.385714,0.995442,0.314815,0.5,0.96086,0.969965,0.975485,0.977273,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,4,0.893406,0.912512,0.431818,0.997141,0.262626,0.571429,0.967439,0.971648,0.95892,0.97619,...,1031,63,1751,79,14,1788,1685,1747,43,33
CmBERT IOB2,5,0.880847,0.916117,0.361111,0.995444,0.297521,0.4,0.972717,0.971698,0.973265,0.977273,...,1031,63,1751,79,14,1788,1685,1747,43,33


### Conclusion

In [45]:
ref_classes = [metrics_raw_classes_m1_ref,metrics_raw_classes_m2_ref,metrics_raw_classes_m3_ref]
keys_ref = ["M1","M2","M3"]
metrics_raw_classes_ref = pd.concat(ref_classes, keys=keys_ref,names=["Approach"])

ocr_classes = [metrics_raw_classes_m1_pero,metrics_raw_classes_m2_pero,metrics_raw_classes_m3_pero]
keys_ocr = ["M1","M2","M3"]
metrics_raw_classes_ocr = pd.concat(ocr_classes, keys=keys_ocr,names=["Approach"])

datasets = [metrics_raw_classes_ref,metrics_raw_classes_ocr]
keys = ["Reference","OCR"]
metrics_raw_classes = pd.concat(datasets, keys=keys,names=["Dataset"])
metrics_raw_classes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,eval_PER-precision,eval_ACT-precision,eval_ACT_L1-precision,eval_ACT_L2-precision,eval_DESC-precision,eval_TITREH-precision,eval_TITREP-precision,eval_SPAT-precision,eval_LOC-precision,eval_CARDINAL-precision,...,eval_ACT-number,eval_ACT_L1-number,eval_ACT_L2-number,eval_DESC-number,eval_TITREH-number,eval_TITREP-number,eval_SPAT-number,eval_LOC-number,eval_CARDINAL-number,eval_FT-number
Dataset,Approach,Unnamed: 2_level_1,run,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
Reference,M1,CmBERT IO,1,0.957364,0.957364,0.000000,0.997714,0.531915,1.000000,0.979213,0.975768,0.980549,0.977273,...,1031,63,1751,79,14,1787,1685,1747,43,33
Reference,M1,CmBERT IO,2,0.965932,0.965932,0.000000,0.996571,0.350427,0.333333,0.968872,0.975148,0.973295,0.976190,...,1031,63,1751,79,14,1787,1685,1747,43,33
Reference,M1,CmBERT IO,3,0.936170,0.960843,0.289474,0.997713,0.518868,0.750000,0.971604,0.969213,0.977143,0.977273,...,1031,63,1751,79,14,1787,1685,1747,43,33
Reference,M1,CmBERT IO,4,0.974200,0.974200,0.000000,0.996568,0.398496,0.000000,0.968245,0.972206,0.976518,0.977273,...,1031,63,1751,79,14,1787,1685,1747,43,33
Reference,M1,CmBERT IO,5,0.929991,0.929991,0.000000,0.994866,0.394737,0.000000,0.970539,0.972271,0.977803,0.976744,...,1031,63,1751,79,14,1787,1685,1747,43,33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
OCR,M3,CmBERT+ptrn IOB2,1,0.848537,0.874545,0.387097,0.987327,0.258621,0.333333,0.934902,0.951765,0.936588,0.844444,...,1031,63,1751,79,14,1788,1685,1747,43,33
OCR,M3,CmBERT+ptrn IOB2,2,0.847487,0.897267,0.279570,0.987931,0.285714,0.466667,0.935236,0.960449,0.933184,0.869565,...,1031,63,1751,79,14,1788,1685,1747,43,33
OCR,M3,CmBERT+ptrn IOB2,3,0.847807,0.914692,0.194444,0.985690,0.377049,0.428571,0.944168,0.952913,0.940079,0.720930,...,1031,63,1751,79,14,1788,1685,1747,43,33
OCR,M3,CmBERT+ptrn IOB2,4,0.836243,0.907993,0.228346,0.982326,0.307692,0.500000,0.945213,0.959364,0.941709,0.847826,...,1031,63,1751,79,14,1788,1685,1747,43,33


In [46]:
eval_ = []
for elem in metrics_raw_classes.columns:
    if 'number' not in elem and 'f1' in elem:
        eval_.append(elem)
        
metrics_raw_classes = metrics_raw_classes[eval_].groupby(level=(0,1,2)).mean()
metrics_raw_classes[eval_] = metrics_raw_classes[eval_].multiply(100., axis=1)
metrics_raw_classes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_PER-f1,eval_ACT-f1,eval_ACT_L1-f1,eval_ACT_L2-f1,eval_DESC-f1,eval_TITREH-f1,eval_TITREP-f1,eval_SPAT-f1,eval_LOC-f1,eval_CARDINAL-f1,eval_FT-f1
Dataset,Approach,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Reference,M1,CmBERT IO,91.799822,94.757473,4.356436,99.640116,49.561868,11.686275,97.352885,97.476534,97.85092,98.139346,36.176726
Reference,M1,CmBERT IOB2,90.438916,93.986777,24.826957,99.708938,43.286669,22.464858,97.261297,97.674942,97.555637,96.497072,51.549715
Reference,M1,CmBERT+ptrn IO,92.56393,95.699587,17.848898,99.680245,53.528603,50.462824,97.246492,97.62615,97.600769,98.380112,53.423239
Reference,M1,CmBERT+ptrn IOB2,89.813091,93.597412,25.666546,99.697578,44.407316,41.462868,97.214125,97.681952,97.350078,97.219995,50.419259
Reference,M2,CmBERT IO,90.644476,94.470645,31.519772,99.794455,47.069827,39.903846,97.495406,97.253573,97.214809,97.46014,58.715928
Reference,M2,CmBERT IOB2,90.338967,93.783417,35.344414,99.697562,36.810314,39.806076,97.332429,97.350988,97.377046,97.89858,55.978269
Reference,M2,CmBERT+ptrn IO,90.115511,94.613351,31.622461,99.697425,47.674517,58.148272,97.562956,98.223989,97.324784,98.374766,57.300857
Reference,M2,CmBERT+ptrn IOB2,90.119513,94.394823,35.416521,99.65201,42.875308,38.634699,97.05384,98.359877,97.339982,97.898769,62.291779
Reference,M3,CmBERT IO,90.531106,94.480437,40.921532,99.805897,51.492058,33.042901,97.554721,97.539117,97.255276,97.909277,55.967049
Reference,M3,CmBERT IOB2,90.794113,94.328273,38.967055,99.669142,44.75984,40.705579,97.326496,97.345912,97.361644,95.768983,51.786387


In [47]:
averaged_classes = metrics_raw_classes.copy()

start_classes = list(averaged_classes.columns)
final_classes = [classe.replace('eval_','') for classe in classes]
columns_names = {start_classes[i]: final_classes[i] for i in range(len(final_classes))}

# Set pretty names
averaged_classes.index.names = ['Dataset','Approach',"Model & tags"]
averaged_classes.rename(columns=columns_names, errors="raise", inplace=True)

classes_f = ['PER','ACT','DESC','TITREH','TITREP','SPAT','LOC','CARDINAL','FT']
averaged_classes = averaged_classes[classes_f]
averaged_classes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PER,ACT,DESC,TITREH,TITREP,SPAT,LOC,CARDINAL,FT
Dataset,Approach,Model & tags,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Reference,M1,CmBERT IO,91.799822,94.757473,49.561868,11.686275,97.352885,97.476534,97.85092,98.139346,36.176726
Reference,M1,CmBERT IOB2,90.438916,93.986777,43.286669,22.464858,97.261297,97.674942,97.555637,96.497072,51.549715
Reference,M1,CmBERT+ptrn IO,92.56393,95.699587,53.528603,50.462824,97.246492,97.62615,97.600769,98.380112,53.423239
Reference,M1,CmBERT+ptrn IOB2,89.813091,93.597412,44.407316,41.462868,97.214125,97.681952,97.350078,97.219995,50.419259
Reference,M2,CmBERT IO,90.644476,94.470645,47.069827,39.903846,97.495406,97.253573,97.214809,97.46014,58.715928
Reference,M2,CmBERT IOB2,90.338967,93.783417,36.810314,39.806076,97.332429,97.350988,97.377046,97.89858,55.978269
Reference,M2,CmBERT+ptrn IO,90.115511,94.613351,47.674517,58.148272,97.562956,98.223989,97.324784,98.374766,57.300857
Reference,M2,CmBERT+ptrn IOB2,90.119513,94.394823,42.875308,38.634699,97.05384,98.359877,97.339982,97.898769,62.291779
Reference,M3,CmBERT IO,90.531106,94.480437,51.492058,33.042901,97.554721,97.539117,97.255276,97.909277,55.967049
Reference,M3,CmBERT IOB2,90.794113,94.328273,44.75984,40.705579,97.326496,97.345912,97.361644,95.768983,51.786387


In [48]:
latex_table_classes = averaged_classes.copy()

caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) for each entity type."
print(latex_table_classes.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table_classes

\begin{table}
\centering
\caption{F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) for each entity type.}
\begin{tabular}{lllrrrrrrrrr}
\toprule
    &    &                  &  PER &  ACT &  DESC &  TITREH &  TITREP &  SPAT &  LOC &  CARDINAL &   FT \\
Dataset & Approach & Model \& tags &      &      &       &         &         &       &      &           &      \\
\midrule
\multirow{12}{*}{Reference} & \multirow{4}{*}{M1} & CmBERT IO & 91.8 & 94.8 &  49.6 &    11.7 &    97.4 &  97.5 & 97.9 &      98.1 & 36.2 \\
    &    & CmBERT IOB2 & 90.4 & 94.0 &  43.3 &    22.5 &    97.3 &  97.7 & 97.6 &      96.5 & 51.5 \\
    &    & CmBERT+ptrn IO & 92.6 & 95.7 &  53.5 &    50.5 &    97.2 &  97.6 & 97.6 &      98.4 & 53.4 \\
    &    & CmBERT+ptrn IOB2 & 89.8 & 93.6 &  44.4 &    41.5 &    97.2 &  97.7 & 97.4 &      97.2 & 50.4 \\
\cline{2-12}
    & \multirow{4}{*}{M2} & CmBERT IO & 90.6 & 94.5 &  47.1 &    39.9 &    97.5 &  97.3 & 97.2 &      97.5 & 58.7 \\
  

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PER,ACT,DESC,TITREH,TITREP,SPAT,LOC,CARDINAL,FT
Dataset,Approach,Model & tags,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Reference,M1,CmBERT IO,91.799822,94.757473,49.561868,11.686275,97.352885,97.476534,97.85092,98.139346,36.176726
Reference,M1,CmBERT IOB2,90.438916,93.986777,43.286669,22.464858,97.261297,97.674942,97.555637,96.497072,51.549715
Reference,M1,CmBERT+ptrn IO,92.56393,95.699587,53.528603,50.462824,97.246492,97.62615,97.600769,98.380112,53.423239
Reference,M1,CmBERT+ptrn IOB2,89.813091,93.597412,44.407316,41.462868,97.214125,97.681952,97.350078,97.219995,50.419259
Reference,M2,CmBERT IO,90.644476,94.470645,47.069827,39.903846,97.495406,97.253573,97.214809,97.46014,58.715928
Reference,M2,CmBERT IOB2,90.338967,93.783417,36.810314,39.806076,97.332429,97.350988,97.377046,97.89858,55.978269
Reference,M2,CmBERT+ptrn IO,90.115511,94.613351,47.674517,58.148272,97.562956,98.223989,97.324784,98.374766,57.300857
Reference,M2,CmBERT+ptrn IOB2,90.119513,94.394823,42.875308,38.634699,97.05384,98.359877,97.339982,97.898769,62.291779
Reference,M3,CmBERT IO,90.531106,94.480437,51.492058,33.042901,97.554721,97.539117,97.255276,97.909277,55.967049
Reference,M3,CmBERT IOB2,90.794113,94.328273,44.75984,40.705579,97.326496,97.345912,97.361644,95.768983,51.786387


In [49]:
averaged_classes_ref = averaged_classes.loc[['Reference']]
averaged_classes_ref = averaged_classes_ref.reset_index(0)
del averaged_classes_ref["Dataset"]
latex_table = averaged_classes_ref.copy()
caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the reference dataset for each entity type."
print(latex_table.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table

\begin{table}
\centering
\caption{F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the reference dataset for each entity type.}
\begin{tabular}{llrrrrrrrrr}
\toprule
   &                  &  PER &  ACT &  DESC &  TITREH &  TITREP &  SPAT &  LOC &  CARDINAL &   FT \\
Approach & Model \& tags &      &      &       &         &         &       &      &           &      \\
\midrule
\multirow{4}{*}{M1} & CmBERT IO & 91.8 & 94.8 &  49.6 &    11.7 &    97.4 &  97.5 & 97.9 &      98.1 & 36.2 \\
   & CmBERT IOB2 & 90.4 & 94.0 &  43.3 &    22.5 &    97.3 &  97.7 & 97.6 &      96.5 & 51.5 \\
   & CmBERT+ptrn IO & 92.6 & 95.7 &  53.5 &    50.5 &    97.2 &  97.6 & 97.6 &      98.4 & 53.4 \\
   & CmBERT+ptrn IOB2 & 89.8 & 93.6 &  44.4 &    41.5 &    97.2 &  97.7 & 97.4 &      97.2 & 50.4 \\
\cline{1-11}
\multirow{4}{*}{M2} & CmBERT IO & 90.6 & 94.5 &  47.1 &    39.9 &    97.5 &  97.3 & 97.2 &      97.5 & 58.7 \\
   & CmBERT IOB2 & 90.3 & 93.8 &  36.8 &    39.8

Unnamed: 0_level_0,Unnamed: 1_level_0,PER,ACT,DESC,TITREH,TITREP,SPAT,LOC,CARDINAL,FT
Approach,Model & tags,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
M1,CmBERT IO,91.799822,94.757473,49.561868,11.686275,97.352885,97.476534,97.85092,98.139346,36.176726
M1,CmBERT IOB2,90.438916,93.986777,43.286669,22.464858,97.261297,97.674942,97.555637,96.497072,51.549715
M1,CmBERT+ptrn IO,92.56393,95.699587,53.528603,50.462824,97.246492,97.62615,97.600769,98.380112,53.423239
M1,CmBERT+ptrn IOB2,89.813091,93.597412,44.407316,41.462868,97.214125,97.681952,97.350078,97.219995,50.419259
M2,CmBERT IO,90.644476,94.470645,47.069827,39.903846,97.495406,97.253573,97.214809,97.46014,58.715928
M2,CmBERT IOB2,90.338967,93.783417,36.810314,39.806076,97.332429,97.350988,97.377046,97.89858,55.978269
M2,CmBERT+ptrn IO,90.115511,94.613351,47.674517,58.148272,97.562956,98.223989,97.324784,98.374766,57.300857
M2,CmBERT+ptrn IOB2,90.119513,94.394823,42.875308,38.634699,97.05384,98.359877,97.339982,97.898769,62.291779
M3,CmBERT IO,90.531106,94.480437,51.492058,33.042901,97.554721,97.539117,97.255276,97.909277,55.967049
M3,CmBERT IOB2,90.794113,94.328273,44.75984,40.705579,97.326496,97.345912,97.361644,95.768983,51.786387


In [50]:
averaged_classes_ocr = averaged_classes.loc[['OCR']]
averaged_classes_ocr = averaged_classes_ocr.reset_index(0)
del averaged_classes_ocr["Dataset"]
latex_table = averaged_classes_ocr.copy()
caption = "F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the noisy dataset for each entity type."
print(latex_table.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table

\begin{table}
\centering
\caption{F1 score measured for each approach, pre-trained model and tag format (mean of 5 runs) on the noisy dataset for each entity type.}
\begin{tabular}{llrrrrrrrrr}
\toprule
   &                  &  PER &  ACT &  DESC &  TITREH &  TITREP &  SPAT &  LOC &  CARDINAL &   FT \\
Approach & Model \& tags &      &      &       &         &         &       &      &           &      \\
\midrule
\multirow{4}{*}{M1} & CmBERT IO & 90.0 & 92.7 &  49.3 &    20.6 &    94.5 &  94.3 & 94.4 &      69.5 & 48.1 \\
   & CmBERT IOB2 & 88.7 & 92.8 &  42.2 &    27.3 &    94.3 &  94.8 & 94.2 &      79.5 & 33.7 \\
   & CmBERT+ptrn IO & 89.3 & 93.3 &  50.9 &    39.7 &    95.1 &  96.0 & 95.1 &      77.0 & 50.3 \\
   & CmBERT+ptrn IOB2 & 89.0 & 92.6 &  48.2 &    37.8 &    94.8 &  96.0 & 94.8 &      79.4 & 42.3 \\
\cline{1-11}
\multirow{4}{*}{M2} & CmBERT IO & 89.5 & 93.1 &  47.4 &    39.9 &    94.7 &  95.0 & 94.4 &      74.0 & 41.9 \\
   & CmBERT IOB2 & 89.0 & 92.7 &  49.2 &    45.8 &  

Unnamed: 0_level_0,Unnamed: 1_level_0,PER,ACT,DESC,TITREH,TITREP,SPAT,LOC,CARDINAL,FT
Approach,Model & tags,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
M1,CmBERT IO,90.043853,92.720677,49.26645,20.578431,94.507176,94.282584,94.409681,69.531118,48.143436
M1,CmBERT IOB2,88.690412,92.817873,42.16768,27.34127,94.330636,94.781748,94.204201,79.458108,33.662753
M1,CmBERT+ptrn IO,89.329012,93.284056,50.928631,39.739216,95.118416,96.041616,95.096117,77.020229,50.341841
M1,CmBERT+ptrn IOB2,88.953135,92.599081,48.195187,37.78836,94.833236,96.004853,94.815366,79.441012,42.294429
M2,CmBERT IO,89.535834,93.05533,47.376332,39.909772,94.686317,94.961686,94.357316,73.965405,41.934837
M2,CmBERT IOB2,88.999364,92.669387,49.178469,45.780381,94.620276,94.979734,94.161251,80.68388,45.645323
M2,CmBERT+ptrn IO,88.959022,93.412767,48.065263,56.984127,94.978049,94.75659,94.859238,75.554274,50.707517
M2,CmBERT+ptrn IOB2,87.812343,91.894318,39.167631,40.770953,94.962686,95.461803,94.770036,78.521844,56.52414
M3,CmBERT IO,89.572129,93.017104,52.284171,54.616712,95.051834,94.96367,94.541947,79.888247,53.876494
M3,CmBERT IOB2,88.169382,91.928783,43.927508,38.33046,94.493064,95.120737,94.148703,76.83711,42.857044
