# M2 : 230 - Figures and metrics

Evaluation scores on level-1 entities segmentation and classification with joint-labels method

In [47]:
import os, sys
from pathlib import Path

ENV_IS_GOOGLE_COLAB = True if 'google.colab' in str(get_ipython()) else False
os.environ["ENV_IS_GOOGLE_COLAB"] = str(ENV_IS_GOOGLE_COLAB)

if ENV_IS_GOOGLE_COLAB:
    from google.colab import drive
    mountpoint = Path("/content/drive")
    drive.mount(str(mountpoint)) # Mount gdrive to BASE
    base = mountpoint / "MyDrive/article_icdar_2023" # Adapt this to your situation
    sys.path.append(str(base)) # Add BASE to Python Path
    BASE = Path(base).resolve() # Make BASE absolute
    DATASETS =  BASE / "dataset"
else:
    BASE = Path(os.path.dirname(os.path.realpath("__file__"))).resolve() # If not on GColab, BASE will be the directory of this notebook
    DATASETS = Path('/work/stual/dataset_ICDAR').resolve()
    OUT_BASE = Path('/work/stual/res_ICDAR').resolve()

print(sys.path)
print(BASE)
print(DATASETS)
print(OUT_BASE)

['/lrde/home2/stual/stage_DAS/m2_joint-labelling_for_ner', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/lrde/home2/stual/.venv/python_3_9/lib/python3.10/site-packages']
/lrde/home2/stual/stage_DAS/m2_joint-labelling_for_ner
/work/stual/dataset_ICDAR
/work/stual/res_ICDAR


## Constants

In [48]:
MAX_TRAINSET_SIZE = 6084

## Tools

In [49]:
import pandas as pd
import re
import os
import json

def compile_metrics(path): 
    rundirs = [f for f in os.listdir(path)]

    df = pd.DataFrame()
    for run_dir in rundirs:
        if 'run' in run_dir:
            run_path = path / run_dir
            nrun = re.search("\d+",run_dir)[0]

            files = [f for f in os.listdir(run_path) if "test_" in f and 'json' in f]
            sizes = [int(re.search("\d+",f)[0]) for f in files]

            for file, size in zip(files,sizes):
                file_path = run_path / file
                dftmp = pd.read_json(file_path, typ='series')
                dftmp = pd.DataFrame([dftmp])

                dftmp["trainsize"] = size 
                dftmp["run"] = nrun
                dftmp["trainsize_p"] = round(100 * size / MAX_TRAINSET_SIZE, 1)
                df = pd.concat([df, dftmp])

    return df.groupby(["run","trainsize"]).first()

# 231 - Experiment 1: tables on all-entities metrics

In [50]:
METRICS_DIR_REF = OUT_BASE / "method_2/m2-210-experiment_1_metrics"

In [51]:
# Load models metrics from metrics jsons
res = []
keys = []

if os.path.exists(METRICS_DIR_REF / "211-camembert-ner-joint-labelling-io/run_2"):
    camembert_ner_io_ref = compile_metrics(METRICS_DIR_REF / "211-camembert-ner-joint-labelling-io")
    camembert_ner_io_ref["eval_precision-l1l2"] = camembert_ner_io_ref["eval_precision"]
    camembert_ner_io_ref["eval_recall-l1l2"] = camembert_ner_io_ref["eval_recall"]
    camembert_ner_io_ref["eval_f1-l1l2"] = camembert_ner_io_ref["eval_f1"]
    camembert_ner_io_ref["eval_accuracy-l1l2"] = camembert_ner_io_ref["eval_accuracy"]
    res.append(camembert_ner_io_ref)
    keys.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF / "212-camembert-ner-joint-labelling-iob2/run_2"):
    camembert_ner_iob2_ref = compile_metrics(METRICS_DIR_REF / "212-camembert-ner-joint-labelling-iob2")
    res.append(camembert_ner_iob2_ref)
    keys.append("CmBERT IOB2")

if os.path.exists(METRICS_DIR_REF / "213-pretrained-camembert-ner-joint-labelling-io/run_2"):
    ptrn_camembert_ner_io_ref = compile_metrics(METRICS_DIR_REF / "213-pretrained-camembert-ner-joint-labelling-io")
    ptrn_camembert_ner_io_ref["eval_precision-l1l2"] = ptrn_camembert_ner_io_ref["eval_precision"]
    ptrn_camembert_ner_io_ref["eval_recall-l1l2"] = ptrn_camembert_ner_io_ref["eval_recall"]
    ptrn_camembert_ner_io_ref["eval_f1-l1l2"] = ptrn_camembert_ner_io_ref["eval_f1"]
    ptrn_camembert_ner_io_ref["eval_accuracy-l1l2"] = ptrn_camembert_ner_io_ref["eval_accuracy"]
    res.append(ptrn_camembert_ner_io_ref)
    keys.append("Ptrn CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF / "214-pretrained-camembert-ner-joint-labelling-iob2/run_2"):
    ptrn_camembert_ner_iob2_ref = compile_metrics(METRICS_DIR_REF / "214-pretrained-camembert-ner-joint-labelling-iob2")
    res.append(ptrn_camembert_ner_iob2_ref)
    keys.append("Ptrn CmBERT IOB2")
    
# Create the full table
print(keys)
metrics_raw_ref = pd.concat(res, keys=keys)
metrics_raw_ref

['CmBERT IO', 'CmBERT IOB2', 'Ptrn CmBERT IO', 'Ptrn CmBERT IOB2']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_TITRE,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.191235,0.959145,0.967932,0.963519,0.942002,0.957034,0.966094,0.961543,0.965067,0.961497,...,,9.045,186.291,11.719,6.3,100.0,0.959145,0.967932,0.963519,0.942002
CmBERT IO,10,6084,0.160874,0.960657,0.965521,0.963083,0.949381,0.954043,0.968956,0.961442,0.967699,0.966477,...,,8.862,190.138,11.961,4.72,100.0,0.960657,0.965521,0.963083,0.949381
CmBERT IO,11,6084,0.19756,0.961474,0.968776,0.965111,0.948452,0.960035,0.967856,0.963929,0.966099,0.96698,...,,9.3453,180.305,11.343,6.82,100.0,0.961474,0.968776,0.965111,0.948452
CmBERT IO,12,6084,0.228159,0.960794,0.969017,0.964888,0.947265,0.96289,0.965434,0.96416,0.970175,0.963971,...,,10.4733,160.886,10.121,10.24,100.0,0.960794,0.969017,0.964888,0.947265
CmBERT IO,13,6084,0.196876,0.954426,0.961905,0.958151,0.940248,0.957484,0.961911,0.959692,0.962332,0.959091,...,,10.7838,156.252,9.83,4.99,100.0,0.954426,0.961905,0.958151,0.940248
CmBERT IO,14,6084,0.212267,0.962235,0.961423,0.961828,0.935965,0.939446,0.963232,0.95119,0.946388,0.975254,...,,13.1446,128.19,8.064,2.89,100.0,0.962235,0.961423,0.961828,0.935965
CmBERT IO,15,6084,0.184041,0.957964,0.964316,0.961129,0.94453,0.958425,0.959269,0.958847,0.966047,0.963092,...,,9.939,169.533,10.665,6.3,100.0,0.957964,0.964316,0.961129,0.94453
CmBERT IO,16,6084,0.184273,0.96129,0.966968,0.96412,0.947162,0.951427,0.961691,0.956531,0.963055,0.970374,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",11.1581,151.011,9.5,7.09,100.0,0.96129,0.966968,0.96412,0.947162
CmBERT IO,17,6084,0.183668,0.958564,0.959373,0.958968,0.939009,0.941342,0.957508,0.949356,0.959185,0.967072,...,,10.607,158.857,9.993,3.94,100.0,0.958564,0.959373,0.958968,0.939009
CmBERT IO,18,6084,0.173756,0.965805,0.963593,0.964697,0.94515,0.962418,0.964113,0.963264,0.96192,0.970971,...,,10.9964,153.231,9.639,5.25,100.0,0.965805,0.963593,0.964697,0.94515


In [52]:
metrics_raw_ref.columns

Index(['eval_loss', 'eval_precision', 'eval_recall', 'eval_f1',
       'eval_accuracy', 'eval_precision-l1', 'eval_recall-l1', 'eval_f1-l1',
       'eval_accuracy-l1', 'eval_precision-l2', 'eval_recall-l2', 'eval_f1-l2',
       'eval_accuracy-l2', 'eval_precision-all', 'eval_recall-all',
       'eval_f1-all', 'eval_accuracy-all', 'eval_precision-das',
       'eval_recall-das', 'eval_f1-das', 'eval_accuracy-das', 'eval_PER',
       'eval_ACT', 'eval_ACT_L1', 'eval_ACT_L2', 'eval_DESC', 'eval_TITREH',
       'eval_TITREP', 'eval_SPAT', 'eval_LOC', 'eval_CARDINAL', 'eval_FT',
       'eval_TITRE', 'eval_runtime', 'eval_samples_per_second',
       'eval_steps_per_second', 'epoch', 'trainsize_p', 'eval_precision-l1l2',
       'eval_recall-l1l2', 'eval_f1-l1l2', 'eval_accuracy-l1l2'],
      dtype='object')

## 231.1 Build the averaged table


In [53]:
# Store p/r/f1 as percentages
eval_ = ["eval_f1","eval_f1-l1","eval_f1-all","eval_f1-l2",'eval_f1-l1l2','eval_f1-das']
metrics_ref = metrics_raw_ref.copy()
metrics_ref[eval_] = metrics_raw_ref[eval_].multiply(100., axis=1)
metrics_ref.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_TITRE,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.191235,0.959145,0.967932,96.351854,0.942002,0.957034,0.966094,96.154268,0.965067,0.961497,...,,9.045,186.291,11.719,6.3,100.0,0.959145,0.967932,96.351854,0.942002
CmBERT IO,10,6084,0.160874,0.960657,0.965521,96.308321,0.949381,0.954043,0.968956,96.144184,0.967699,0.966477,...,,8.862,190.138,11.961,4.72,100.0,0.960657,0.965521,96.308321,0.949381
CmBERT IO,11,6084,0.19756,0.961474,0.968776,96.511139,0.948452,0.960035,0.967856,96.392939,0.966099,0.96698,...,,9.3453,180.305,11.343,6.82,100.0,0.961474,0.968776,96.511139,0.948452
CmBERT IO,12,6084,0.228159,0.960794,0.969017,96.488806,0.947265,0.96289,0.965434,96.416007,0.970175,0.963971,...,,10.4733,160.886,10.121,10.24,100.0,0.960794,0.969017,96.488806,0.947265
CmBERT IO,13,6084,0.196876,0.954426,0.961905,95.815071,0.940248,0.957484,0.961911,95.969248,0.962332,0.959091,...,,10.7838,156.252,9.83,4.99,100.0,0.954426,0.961905,95.815071,0.940248


In [54]:
# Average over runs
averaged_ref = metrics_ref.groupby(level=0).apply(lambda grp: grp.groupby(by="trainsize").mean())
averaged_ref.set_index(["trainsize_p"], append=True,inplace=True)

# Keep just the necessary columns
averaged_ref=averaged_ref[["eval_f1",'eval_f1-l1l2',"eval_f1-all","eval_f1-l1","eval_f1-l2",'eval_f1-das']]

# Set pretty names
averaged_ref.index.names = ['Model','Trainset Size',"%"]
averaged_ref.rename(columns={"eval_f1":"P+L1+P+L2 (train)",
                             "eval_f1-l1l2":"L1+L2",
                             'eval_f1-all':"All",
                            "eval_f1-l1":"Level 1",
                            "eval_f1-l2":"Level 2",
                            'eval_f1-das':"DAS alignement"
                         }, errors="raise", inplace=True)
averaged_ref.rename(mapper={"camembert_io_ref": "CmBERT IO","camembert_iob2_ref": "CmBERT IOB2","prtn_camembert_io_ref": "Ptrn CmBERT IO","prtn_camembert_iob2_ref": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged_ref

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Model,Trainset Size,%,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,6084,100.0,96.082082,96.082082,96.124431,95.488981,96.916017,96.630067
CmBERT IOB2,6084,100.0,96.035288,96.030419,95.982694,95.291741,96.839731,96.737652
Ptrn CmBERT IO,6084,100.0,96.125104,96.125104,96.289239,95.970882,96.682303,96.877739
Ptrn CmBERT IOB2,6084,100.0,96.068198,96.035353,96.077814,95.846368,96.363276,96.912063


## 231.2 Create the results table

In [55]:
latex_table_ref = averaged_ref.copy()

caption = "F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on reference dataset with Independent Flat NER layers approach (M1)."
print(latex_table_ref.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table_ref

\begin{table}
\centering
\caption{F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on reference dataset with Independent Flat NER layers approach (M1).}
\begin{tabular}{lllrrrrrr}
\toprule
                 &      &       &  P+L1+P+L2 (train) &  L1+L2 &  All &  Level 1 &  Level 2 &  DAS alignement \\
Model & Trainset Size & \% &                    &        &      &          &          &                 \\
\midrule
CmBERT IO & 6084 & 100.0 &               96.1 &   96.1 & 96.1 &     95.5 &     96.9 &            96.6 \\
CmBERT IOB2 & 6084 & 100.0 &               96.0 &   96.0 & 96.0 &     95.3 &     96.8 &            96.7 \\
Ptrn CmBERT IO & 6084 & 100.0 &               96.1 &   96.1 & 96.3 &     96.0 &     96.7 &            96.9 \\
Ptrn CmBERT IOB2 & 6084 & 100.0 &               96.1 &   96.0 & 96.1 &     95.8 &     96.4 &            96.9 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Model,Trainset Size,%,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,6084,100.0,96.082082,96.082082,96.124431,95.488981,96.916017,96.630067
CmBERT IOB2,6084,100.0,96.035288,96.030419,95.982694,95.291741,96.839731,96.737652
Ptrn CmBERT IO,6084,100.0,96.125104,96.125104,96.289239,95.970882,96.682303,96.877739
Ptrn CmBERT IOB2,6084,100.0,96.068198,96.035353,96.077814,95.846368,96.363276,96.912063


# 232 - Experiment 2: tables

In [56]:
METRICS_DIR_PERO = OUT_BASE / "method_2/m2-220-experiment_2_metrics"

In [57]:
# Load models metrics from metrics jsons
res = []
keys = []

if os.path.exists(METRICS_DIR_PERO / "221-camembert-ner-joint-labelling-io/run_2"):
    camembert_ner_io_pero = compile_metrics(METRICS_DIR_PERO / "221-camembert-ner-joint-labelling-io")
    camembert_ner_io_pero["eval_precision-l1l2"] = camembert_ner_io_pero["eval_precision"]
    camembert_ner_io_pero["eval_recall-l1l2"] = camembert_ner_io_pero["eval_recall"]
    camembert_ner_io_pero["eval_f1-l1l2"] = camembert_ner_io_pero["eval_f1"]
    camembert_ner_io_pero["eval_accuracy-l1l2"] = camembert_ner_io_pero["eval_accuracy"]
    res.append(camembert_ner_io_pero)
    keys.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_PERO / "222-camembert-ner-joint-labelling-iob2/run_2"):
    camembert_ner_iob2_pero = compile_metrics(METRICS_DIR_PERO / "222-camembert-ner-joint-labelling-iob2")
    res.append(camembert_ner_iob2_pero)
    keys.append("CmBERT IOB2")

if os.path.exists(METRICS_DIR_PERO / "223-pretrained-camembert-ner-joint-labelling-io/run_2"):
    ptrn_camembert_ner_io_pero = compile_metrics(METRICS_DIR_PERO / "223-pretrained-camembert-ner-joint-labelling-io")
    ptrn_camembert_ner_io_pero["eval_precision-l1l2"] = ptrn_camembert_ner_io_pero["eval_precision"]
    ptrn_camembert_ner_io_pero["eval_recall-l1l2"] = ptrn_camembert_ner_io_pero["eval_recall"]
    ptrn_camembert_ner_io_pero["eval_f1-l1l2"] = ptrn_camembert_ner_io_pero["eval_f1"]
    ptrn_camembert_ner_io_pero["eval_accuracy-l1l2"] = ptrn_camembert_ner_io_pero["eval_accuracy"]
    res.append(ptrn_camembert_ner_io_pero)
    keys.append("Ptrn CmBERT IO")
    
if os.path.exists(METRICS_DIR_PERO / "224-pretrained-camembert-ner-joint-labelling-iob2/run_2"):
    ptrn_camembert_ner_iob2_pero = compile_metrics(METRICS_DIR_PERO / "224-pretrained-camembert-ner-joint-labelling-iob2")
    res.append(ptrn_camembert_ner_iob2_pero)
    keys.append("Ptrn CmBERT IOB2")
    
# Create the full table
print(keys)
metrics_raw_pero = pd.concat(res, keys=keys)
metrics_raw_pero

['CmBERT IO', 'CmBERT IOB2', 'Ptrn CmBERT IO', 'Ptrn CmBERT IOB2']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_TITRE,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.291394,0.94007,0.948253,0.944143,0.937151,0.932435,0.944958,0.938655,0.961215,0.94416,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",10.5194,160.181,10.077,8.92,100.0,0.94007,0.948253,0.944143,0.937151
CmBERT IO,10,6084,0.256732,0.939662,0.948978,0.944297,0.929269,0.91738,0.943637,0.930323,0.950984,0.942519,...,,10.8105,155.867,9.805,6.3,100.0,0.939662,0.948978,0.944297,0.929269
CmBERT IO,11,6084,0.280147,0.937718,0.94112,0.939416,0.925876,0.927165,0.933289,0.930217,0.950201,0.945391,...,,10.5991,158.976,10.001,5.77,100.0,0.937718,0.94112,0.939416,0.925876
CmBERT IO,12,6084,0.228882,0.9388,0.949583,0.944161,0.937882,0.931311,0.946279,0.938735,0.964817,0.943966,...,,10.8705,155.007,9.751,6.56,100.0,0.9388,0.949583,0.944161,0.937882
CmBERT IO,13,6084,0.22175,0.930985,0.942691,0.936802,0.928747,0.915247,0.93439,0.924719,0.946912,0.93519,...,,9.7219,173.32,10.903,3.94,100.0,0.930985,0.942691,0.936802,0.928747
CmBERT IO,14,6084,0.278953,0.938272,0.946439,0.942338,0.925406,0.925806,0.942316,0.933988,0.945294,0.941935,...,,9.0969,185.229,11.652,6.04,100.0,0.938272,0.946439,0.942338,0.925406
CmBERT IO,15,6084,0.219882,0.929524,0.944021,0.936716,0.925928,0.915898,0.939894,0.927741,0.95349,0.934532,...,,10.185,165.44,10.407,4.2,100.0,0.929524,0.944021,0.936716,0.925928
CmBERT IO,16,6084,0.229505,0.945308,0.948737,0.947019,0.933497,0.921801,0.942096,0.931838,0.957039,0.942235,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",9.3233,180.73,11.369,6.3,100.0,0.945308,0.948737,0.947019,0.933497
CmBERT IO,17,6084,0.232668,0.930584,0.93846,0.934505,0.924101,0.911164,0.932629,0.921771,0.951819,0.938412,...,,9.1374,184.407,11.601,4.2,100.0,0.930584,0.93846,0.934505,0.924101
CmBERT IO,18,6084,0.258292,0.936753,0.947286,0.94199,0.930208,0.927687,0.940555,0.934077,0.952602,0.943264,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",9.2562,182.041,11.452,8.14,100.0,0.936753,0.947286,0.94199,0.930208


In [58]:
metrics_raw_pero.columns

Index(['eval_loss', 'eval_precision', 'eval_recall', 'eval_f1',
       'eval_accuracy', 'eval_precision-l1', 'eval_recall-l1', 'eval_f1-l1',
       'eval_accuracy-l1', 'eval_precision-l2', 'eval_recall-l2', 'eval_f1-l2',
       'eval_accuracy-l2', 'eval_precision-all', 'eval_recall-all',
       'eval_f1-all', 'eval_accuracy-all', 'eval_precision-das',
       'eval_recall-das', 'eval_f1-das', 'eval_accuracy-das', 'eval_PER',
       'eval_ACT', 'eval_ACT_L1', 'eval_ACT_L2', 'eval_DESC', 'eval_TITREH',
       'eval_TITREP', 'eval_SPAT', 'eval_LOC', 'eval_CARDINAL', 'eval_FT',
       'eval_TITRE', 'eval_runtime', 'eval_samples_per_second',
       'eval_steps_per_second', 'epoch', 'trainsize_p', 'eval_precision-l1l2',
       'eval_recall-l1l2', 'eval_f1-l1l2', 'eval_accuracy-l1l2'],
      dtype='object')

In [59]:
# Store p/r/f1 as percentages
eval_ = ["eval_f1","eval_f1-l1","eval_f1-all","eval_f1-l2",'eval_f1-l1l2','eval_f1-das']
metrics_pero = metrics_raw_pero.copy()
metrics_pero[eval_] = metrics_raw_pero[eval_].multiply(100., axis=1)
metrics_pero.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_TITRE,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.291394,0.94007,0.948253,94.414349,0.937151,0.932435,0.944958,93.8655,0.961215,0.94416,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",10.5194,160.181,10.077,8.92,100.0,0.94007,0.948253,94.414349,0.937151
CmBERT IO,10,6084,0.256732,0.939662,0.948978,94.42974,0.929269,0.91738,0.943637,93.032342,0.950984,0.942519,...,,10.8105,155.867,9.805,6.3,100.0,0.939662,0.948978,94.42974,0.929269
CmBERT IO,11,6084,0.280147,0.937718,0.94112,93.941588,0.925876,0.927165,0.933289,93.021725,0.950201,0.945391,...,,10.5991,158.976,10.001,5.77,100.0,0.937718,0.94112,93.941588,0.925876
CmBERT IO,12,6084,0.228882,0.9388,0.949583,94.416061,0.937882,0.931311,0.946279,93.873539,0.964817,0.943966,...,,10.8705,155.007,9.751,6.56,100.0,0.9388,0.949583,94.416061,0.937882
CmBERT IO,13,6084,0.22175,0.930985,0.942691,93.680163,0.928747,0.915247,0.93439,92.471947,0.946912,0.93519,...,,9.7219,173.32,10.903,3.94,100.0,0.930985,0.942691,93.680163,0.928747


In [60]:
# Average over runs
averaged_pero = metrics_pero.groupby(level=0).apply(lambda grp: grp.groupby(by="trainsize").mean())
averaged_pero.set_index(["trainsize_p"], append=True,inplace=True)

# Keep just the necessary columns
averaged_pero=averaged_pero[["eval_f1",'eval_f1-l1l2',"eval_f1-all","eval_f1-l1","eval_f1-l2",'eval_f1-das']]

# Set pretty names
averaged_pero.index.names = ['Model','Trainset Size',"%"]
averaged_pero.rename(columns={"eval_f1":"P+L1+P+L2 (train)",
                             "eval_f1-l1l2":"L1+L2",
                              'eval_f1-all':"All",
                            "eval_f1-l1":"Level 1",
                            "eval_f1-l2":"Level 2",
                            'eval_f1-das':"DAS alignement"
                         }, errors="raise", inplace=True)
averaged_pero.rename(mapper={"camembert_io_pero": "CmBERT IO","camembert_iob2_pero": "CmBERT IOB2","prtn_camembert_io_pero": "Ptrn CmBERT IO","prtn_camembert_iob2_pero": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged_pero

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Model,Trainset Size,%,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,6084,100.0,94.09358,94.09358,93.695383,93.101579,94.431895,94.549476
CmBERT IOB2,6084,100.0,94.281452,94.161059,93.771105,93.195417,94.486534,94.69002
Ptrn CmBERT IO,6084,100.0,94.0893,94.0893,93.882361,93.437655,94.432368,94.612859
Ptrn CmBERT IOB2,6084,100.0,94.189938,94.119129,93.722318,93.064214,94.535459,94.847359


In [61]:
latex_table_pero = averaged_pero.copy()

caption = "F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on noisy dataset with Joint-labelling approach (M2)."
print(latex_table_pero.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table_pero

\begin{table}
\centering
\caption{F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on noisy dataset with Joint-labelling approach (M2).}
\begin{tabular}{lllrrrrrr}
\toprule
                 &      &       &  P+L1+P+L2 (train) &  L1+L2 &  All &  Level 1 &  Level 2 &  DAS alignement \\
Model & Trainset Size & \% &                    &        &      &          &          &                 \\
\midrule
CmBERT IO & 6084 & 100.0 &               94.1 &   94.1 & 93.7 &     93.1 &     94.4 &            94.5 \\
CmBERT IOB2 & 6084 & 100.0 &               94.3 &   94.2 & 93.8 &     93.2 &     94.5 &            94.7 \\
Ptrn CmBERT IO & 6084 & 100.0 &               94.1 &   94.1 & 93.9 &     93.4 &     94.4 &            94.6 \\
Ptrn CmBERT IOB2 & 6084 & 100.0 &               94.2 &   94.1 & 93.7 &     93.1 &     94.5 &            94.8 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Model,Trainset Size,%,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,6084,100.0,94.09358,94.09358,93.695383,93.101579,94.431895,94.549476
CmBERT IOB2,6084,100.0,94.281452,94.161059,93.771105,93.195417,94.486534,94.69002
Ptrn CmBERT IO,6084,100.0,94.0893,94.0893,93.882361,93.437655,94.432368,94.612859
Ptrn CmBERT IOB2,6084,100.0,94.189938,94.119129,93.722318,93.064214,94.535459,94.847359


# 233 - Experiments 1 & 2: table

Build averaged table of F1-score for each dataset, each BERT-based model and each annotation model.

In [62]:
averaged = pd.concat([averaged_ref,averaged_pero],keys=["Reference","Pero OCR"])
averaged = averaged.reset_index(level=[2,3], drop=True)
averaged

Unnamed: 0_level_0,Unnamed: 1_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Unnamed: 0_level_1,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Reference,CmBERT IO,96.082082,96.082082,96.124431,95.488981,96.916017,96.630067
Reference,CmBERT IOB2,96.035288,96.030419,95.982694,95.291741,96.839731,96.737652
Reference,Ptrn CmBERT IO,96.125104,96.125104,96.289239,95.970882,96.682303,96.877739
Reference,Ptrn CmBERT IOB2,96.068198,96.035353,96.077814,95.846368,96.363276,96.912063
Pero OCR,CmBERT IO,94.09358,94.09358,93.695383,93.101579,94.431895,94.549476
Pero OCR,CmBERT IOB2,94.281452,94.161059,93.771105,93.195417,94.486534,94.69002
Pero OCR,Ptrn CmBERT IO,94.0893,94.0893,93.882361,93.437655,94.432368,94.612859
Pero OCR,Ptrn CmBERT IOB2,94.189938,94.119129,93.722318,93.064214,94.535459,94.847359


In [63]:
caption = "F1 score measured on the fine-tuned models CmBERTand CmBERT+ptrn on reference dataset and noisy dataset with Joint-labelling approach (M2)."
print(averaged.to_latex(float_format="%.1f", multirow=True, caption=caption))
averaged

\begin{table}
\centering
\caption{F1 score measured on the fine-tuned models CmBERTand CmBERT+ptrn on reference dataset and noisy dataset with Joint-labelling approach (M2).}
\begin{tabular}{llrrrrrr}
\toprule
         &                  &  P+L1+P+L2 (train) &  L1+L2 &  All &  Level 1 &  Level 2 &  DAS alignement \\
{} & Model &                    &        &      &          &          &                 \\
\midrule
\multirow{4}{*}{Reference} & CmBERT IO &               96.1 &   96.1 & 96.1 &     95.5 &     96.9 &            96.6 \\
         & CmBERT IOB2 &               96.0 &   96.0 & 96.0 &     95.3 &     96.8 &            96.7 \\
         & Ptrn CmBERT IO &               96.1 &   96.1 & 96.3 &     96.0 &     96.7 &            96.9 \\
         & Ptrn CmBERT IOB2 &               96.1 &   96.0 & 96.1 &     95.8 &     96.4 &            96.9 \\
\cline{1-8}
\multirow{4}{*}{Pero OCR} & CmBERT IO &               94.1 &   94.1 & 93.7 &     93.1 &     94.4 &            94.5 \\
         & CmBER

Unnamed: 0_level_0,Unnamed: 1_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Unnamed: 0_level_1,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Reference,CmBERT IO,96.082082,96.082082,96.124431,95.488981,96.916017,96.630067
Reference,CmBERT IOB2,96.035288,96.030419,95.982694,95.291741,96.839731,96.737652
Reference,Ptrn CmBERT IO,96.125104,96.125104,96.289239,95.970882,96.682303,96.877739
Reference,Ptrn CmBERT IOB2,96.068198,96.035353,96.077814,95.846368,96.363276,96.912063
Pero OCR,CmBERT IO,94.09358,94.09358,93.695383,93.101579,94.431895,94.549476
Pero OCR,CmBERT IOB2,94.281452,94.161059,93.771105,93.195417,94.486534,94.69002
Pero OCR,Ptrn CmBERT IO,94.0893,94.0893,93.882361,93.437655,94.432368,94.612859
Pero OCR,Ptrn CmBERT IOB2,94.189938,94.119129,93.722318,93.064214,94.535459,94.847359


# 234 : Experiments 1 and 2: table by classe

In [46]:
import pandas as pd
import json
from pandas import json_normalize

def compile_metrics_by_classes(path, classes): 
    rundirs = [f for f in os.listdir(path)]

    df = pd.DataFrame()
    for run_dir in rundirs:
        if 'run' in run_dir:
            run_path = path / run_dir
            nrun = re.search("\d+",run_dir)[0]

            files = [f for f in os.listdir(run_path) if "test_" in f and 'json' in f]
            sizes = [int(re.search("\d+",f)[0]) for f in files]
                
            for file, size in zip(files,sizes):
                file_path = run_path / file
                dftmp = pd.read_json(file_path)
                classes_dict = {key: dftmp[key] for key in classes}
                dftmp = pd.DataFrame.from_dict(classes_dict)
                dftmp = dftmp.T
                dftmp['number'] = dftmp['number'].astype(int)
                dftmp["trainsize"] = size 
                dftmp["run"] = nrun
                dftmp["trainsize_p"] = round(100 * size / MAX_TRAINSET_SIZE, 1)
                df = pd.concat([df, dftmp])
                df["classe"] = df.index
                
    return df.groupby(["run","classe"]).first()

In [11]:
classes = ['eval_PER','eval_ACT','eval_ACT_L1','eval_ACT_L2','eval_DESC','eval_TITREH','eval_TITREP','eval_SPAT','eval_LOC','eval_CARDINAL','eval_FT'
 #'eval_TITRE'
]

# Load models metrics from metrics jsons
res = []
keys = []

if os.path.exists(METRICS_DIR / "211-camembert-ner-joint-labelling-io/run_2"):
    camembert_ner_io = compile_metrics_by_classes(METRICS_DIR / "211-camembert-ner-joint-labelling-io",classes)
    res.append(camembert_ner_io)
    keys.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR / "212-camembert-ner-joint-labelling-iob2/run_2"):
    camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR / "212-camembert-ner-joint-labelling-iob2",classes)
    res.append(camembert_ner_iob2)
    keys.append("CmBERT IOB2")
    
if os.path.exists(METRICS_DIR / "213-pretrained-camembert-ner-joint-labelling-io-classes/run_2"):
    ptrn_camembert_ner_io = compile_metrics_by_classes(METRICS_DIR / "213-pretrained-camembert-ner-joint-labelling-io-classes",classes)
    res.append(ptrn_camembert_ner_io)
    keys.append("Ptrn CmBERT IO")
    
"""
if os.path.exists(METRICS_DIR / "214-pretrained-camembert-ner-joint-labelling-iob2/run_2"):
    ptrn_camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR / "214-pretrained-camembert-ner-joint-labelling-iob2-classes",classes)
    res.append(ptrn_camembert_ner_iob2)
    keys.append("Ptrn CmBERT IOB2")"""

'\nif os.path.exists(METRICS_DIR / "214-pretrained-camembert-ner-joint-labelling-iob2/run_2"):\n    ptrn_camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR / "214-pretrained-camembert-ner-joint-labelling-iob2-classes",classes)\n    res.append(ptrn_camembert_ner_iob2)\n    keys.append("Ptrn CmBERT IOB2")'

In [12]:
metrics_raw_classes = pd.concat(res, keys=keys)
metrics_raw_classes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,precision,recall,f1,number,trainsize,trainsize_p
Unnamed: 0_level_1,run,classe,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,1,eval_ACT,0.894783,0.925046,0.909663,1094,6084,100.0
CmBERT IO,1,eval_ACT_L1,0.953488,0.954413,0.953951,1031,6084,100.0
CmBERT IO,1,eval_ACT_L2,0.282828,0.444444,0.345679,63,6084,100.0
CmBERT IO,1,eval_CARDINAL,0.997719,0.999429,0.998573,1751,6084,100.0
CmBERT IO,1,eval_DESC,0.524752,0.670886,0.588889,79,6084,100.0
...,...,...,...,...,...,...,...,...
Ptrn CmBERT IO,5,eval_LOC,0.968316,0.974818,0.971556,1787,6084,100.0
Ptrn CmBERT IO,5,eval_PER,0.976923,0.979822,0.978370,1685,6084,100.0
Ptrn CmBERT IO,5,eval_SPAT,0.976027,0.978821,0.977422,1747,6084,100.0
Ptrn CmBERT IO,5,eval_TITREH,1.000000,1.000000,1.000000,43,6084,100.0


In [13]:
# Store p/r/f1 as percentages
val = ["f1","precision","recall"]
metrics_raw_classes = metrics_raw_classes.copy()
metrics_raw_classes[val] = metrics_raw_classes[val].multiply(100., axis=1)
metrics_raw_classes.head()
metrics_raw_classes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,precision,recall,f1,number,trainsize,trainsize_p
Unnamed: 0_level_1,run,classe,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,1,eval_ACT,89.478338,92.504570,90.966292,1094,6084,100.0
CmBERT IO,1,eval_ACT_L1,95.348837,95.441319,95.395056,1031,6084,100.0
CmBERT IO,1,eval_ACT_L2,28.282828,44.444444,34.567901,63,6084,100.0
CmBERT IO,1,eval_CARDINAL,99.771950,99.942890,99.857347,1751,6084,100.0
CmBERT IO,1,eval_DESC,52.475248,67.088608,58.888889,79,6084,100.0
...,...,...,...,...,...,...,...,...
Ptrn CmBERT IO,5,eval_LOC,96.831573,97.481813,97.155605,1787,6084,100.0
Ptrn CmBERT IO,5,eval_PER,97.692308,97.982196,97.837037,1685,6084,100.0
Ptrn CmBERT IO,5,eval_SPAT,97.602740,97.882084,97.742212,1747,6084,100.0
Ptrn CmBERT IO,5,eval_TITREH,100.000000,100.000000,100.000000,43,6084,100.0


In [14]:
# Average over runs
averaged_classes = metrics_raw_classes.groupby(level=0).apply(lambda grp: grp.groupby(by="classe").mean())
averaged_classes.set_index(["number"],
                   append=True,
                   inplace=True)

# Keep just the necessary columns
averaged_classes=averaged_classes[["precision","recall","f1"]]

# Set pretty names
averaged_classes.index.names = ['Test','Entity type',"Count"]
averaged_classes.rename(mapper={'eval_PER':'PER','eval_ACT':'ACT','eval_ACT_L1':'ACT L1 only','eval_ACT_L2':'ACT L2 only','eval_DESC':'DESC','eval_TITREH':'TITREH','eval_TITREP':'TITREP','eval_SPAT':'SPAT','eval_LOC':'LOC','eval_CARDINAL':'CARDINAL','eval_FT':'FT'}, errors="ignore", inplace=True, axis=0)
averaged_classes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,precision,recall,f1
Test,Entity type,Count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CmBERT IO,ACT,1094.0,89.740092,91.654479,90.682136
CmBERT IO,ACT L1 only,1031.0,93.545559,94.975752,94.228502
CmBERT IO,ACT L2 only,63.0,29.614605,37.301587,31.652003
CmBERT IO,CARDINAL,1751.0,99.70062,99.822958,99.7617
CmBERT IO,DESC,79.0,37.82724,48.924051,42.381466
CmBERT IO,FT,14.0,52.050866,31.071429,35.933656
CmBERT IO,LOC,1787.0,97.17672,97.714046,97.444382
CmBERT IO,PER,1685.0,96.860526,97.51632,97.185476
CmBERT IO,SPAT,1747.0,96.95102,97.68174,97.314619
CmBERT IO,TITREH,43.0,94.534345,97.44186,95.801115
