# M3 : 330 - Figures and metrics

Evaluation scores on level-1 entities segmentation and classification with joint-labels method

In [1]:
import os, sys
from pathlib import Path

ENV_IS_GOOGLE_COLAB = True if 'google.colab' in str(get_ipython()) else False
os.environ["ENV_IS_GOOGLE_COLAB"] = str(ENV_IS_GOOGLE_COLAB)

if ENV_IS_GOOGLE_COLAB:
    from google.colab import drive
    mountpoint = Path("/content/drive")
    drive.mount(str(mountpoint)) # Mount gdrive to BASE
    base = mountpoint / "MyDrive/article_icdar_2023" # Adapt this to your situation
    sys.path.append(str(base)) # Add BASE to Python Path
    BASE = Path(base).resolve() # Make BASE absolute
    DATASETS =  BASE / "dataset"
else:
    BASE = Path(os.path.dirname(os.path.realpath("__file__"))).resolve() # If not on GColab, BASE will be the directory of this notebook
    DATASETS = Path('/work/stual/dataset_ICDAR').resolve()
    OUT_BASE = Path('/work/stual/res_ICDAR').resolve()

print(sys.path)
print(BASE)
print(DATASETS)
print(OUT_BASE)

['/lrde/home2/stual/stage_DAS/m3_hierarchical_ner', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/lrde/home2/stual/.venv/python_3_9/lib/python3.10/site-packages']
/lrde/home2/stual/stage_DAS/m3_hierarchical_ner
/work/stual/dataset_ICDAR
/work/stual/res_ICDAR


## Constants

In [2]:
MAX_TRAINSET_SIZE = 6084

## Tools

In [3]:
import pandas as pd
import re
import os
import json

def compile_metrics(path): 
    rundirs = [f for f in os.listdir(path)]

    df = pd.DataFrame()
    for run_dir in rundirs:
        if 'run' in run_dir:
            run_path = path / run_dir
            nrun = re.search("\d+",run_dir)[0]

            files = [f for f in os.listdir(run_path) if "test_" in f and 'json' in f]
            sizes = [int(re.search("\d+",f)[0]) for f in files]

            for file, size in zip(files,sizes):
                file_path = run_path / file
                dftmp = pd.read_json(file_path, typ='series')
                dftmp = pd.DataFrame([dftmp])

                dftmp["trainsize"] = size 
                dftmp["run"] = nrun
                dftmp["trainsize_p"] = round(100 * size / MAX_TRAINSET_SIZE, 1)
                df = pd.concat([df, dftmp])

    return df.groupby(["run","trainsize"]).first()

# 331 - Experiment 1: tables on all-entities metrics

In [4]:
METRICS_DIR_REF = OUT_BASE / "method_3/m3-310-experiment_1_metrics"

In [5]:
# Load models metrics from metrics jsons
res = []
keys = []

if os.path.exists(METRICS_DIR_REF / "311-camembert-ner-hierarchical-loss-io/run_2"):
    camembert_ner_io_ref = compile_metrics(METRICS_DIR_REF / "311-camembert-ner-hierarchical-loss-io")
    camembert_ner_io_ref["eval_precision-l1l2"] = camembert_ner_io_ref["eval_precision"]
    camembert_ner_io_ref["eval_recall-l1l2"] = camembert_ner_io_ref["eval_recall"]
    camembert_ner_io_ref["eval_f1-l1l2"] = camembert_ner_io_ref["eval_f1"]
    camembert_ner_io_ref["eval_accuracy-l1l2"] = camembert_ner_io_ref["eval_accuracy"]
    res.append(camembert_ner_io_ref)
    keys.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF / "312-camembert-ner-hierarchical-loss-iob2/run_2"):
    camembert_ner_iob2_ref = compile_metrics(METRICS_DIR_REF / "312-camembert-ner-hierarchical-loss-iob2")
    res.append(camembert_ner_iob2_ref)
    keys.append("CmBERT IOB2")

if os.path.exists(METRICS_DIR_REF / "313-pretrained-camembert-ner-hierarchical-loss-io/run_2"):
    ptrn_camembert_ner_io_ref = compile_metrics(METRICS_DIR_REF / "313-pretrained-camembert-ner-hierarchical-loss-io")
    ptrn_camembert_ner_io_ref["eval_precision-l1l2"] = ptrn_camembert_ner_io_ref["eval_precision"]
    ptrn_camembert_ner_io_ref["eval_recall-l1l2"] = ptrn_camembert_ner_io_ref["eval_recall"]
    ptrn_camembert_ner_io_ref["eval_f1-l1l2"] = ptrn_camembert_ner_io_ref["eval_f1"]
    ptrn_camembert_ner_io_ref["eval_accuracy-l1l2"] = ptrn_camembert_ner_io_ref["eval_accuracy"]
    res.append(ptrn_camembert_ner_io_ref)
    keys.append("Ptrn CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF / "314-pretrained-camembert-ner-hierarchical-loss-iob2/run_2"):
    ptrn_camembert_ner_iob2_ref = compile_metrics(METRICS_DIR_REF / "314-pretrained-camembert-ner-hierarchical-loss-iob2")
    res.append(ptrn_camembert_ner_iob2_ref)
    keys.append("Ptrn CmBERT IOB2")
    
# Create the full table
print(keys)
metrics_raw_ref = pd.concat(res, keys=keys)
metrics_raw_ref

['CmBERT IO', 'CmBERT IOB2', 'Ptrn CmBERT IO', 'Ptrn CmBERT IOB2']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_TITRE,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.18214,0.961187,0.964316,0.962749,0.945975,0.946903,0.965874,0.956294,0.963055,0.970963,...,9.1408,184.338,11.596,4.72,100.0,,0.961187,0.964316,0.962749,0.945975
CmBERT IO,2,6084,0.174358,0.961986,0.967089,0.96453,0.950826,0.957998,0.969177,0.963555,0.969969,0.969738,...,9.0102,187.01,11.764,6.56,100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",0.961986,0.967089,0.96453,0.950826
CmBERT IO,3,6084,0.158997,0.960871,0.968053,0.964449,0.944737,0.95302,0.969177,0.96103,0.967492,0.96561,...,9.0555,186.074,11.706,4.99,100.0,,0.960871,0.968053,0.964449,0.944737
CmBERT IO,4,6084,0.206844,0.951365,0.962146,0.956725,0.934107,0.947667,0.956847,0.952235,0.954696,0.95463,...,8.9319,188.65,11.868,4.99,100.0,,0.951365,0.962146,0.956725,0.934107
CmBERT IO,5,6084,0.207482,0.962887,0.963351,0.963119,0.938854,0.954248,0.964333,0.959264,0.959598,0.966505,...,8.8263,190.906,12.01,4.72,100.0,,0.962887,0.963351,0.963119,0.938854
CmBERT IOB2,1,6084,0.222135,0.961066,0.964647,0.962853,0.944737,0.953579,0.967856,0.960664,0.965635,0.962745,...,10.5117,160.298,10.084,8.4,100.0,,0.958746,0.969262,0.963975,0.946078
CmBERT IOB2,2,6084,0.197114,0.960638,0.96073,0.960684,0.940351,0.949826,0.962792,0.956265,0.964396,0.964247,...,10.6552,158.139,9.948,4.46,100.0,,0.955571,0.964441,0.959986,0.94097
CmBERT IOB2,3,6084,0.205048,0.964265,0.964265,0.964265,0.944582,0.956209,0.966314,0.961235,0.966512,0.962219,...,10.2769,163.96,10.314,7.09,100.0,,0.961188,0.967213,0.964191,0.945408
CmBERT IOB2,4,6084,0.205131,0.962504,0.956526,0.959505,0.939577,0.950655,0.958609,0.954615,0.95805,0.964969,...,10.3932,162.125,10.199,5.25,100.0,,0.959166,0.95986,0.959513,0.940093
CmBERT IOB2,5,6084,0.221598,0.961111,0.958723,0.959916,0.937822,0.939499,0.957288,0.94831,0.948246,0.964112,...,10.4495,161.252,10.144,5.25,100.0,,0.958043,0.960583,0.959311,0.938751


In [6]:
metrics_raw_ref.columns

Index(['eval_loss', 'eval_precision', 'eval_recall', 'eval_f1',
       'eval_accuracy', 'eval_precision-l1', 'eval_recall-l1', 'eval_f1-l1',
       'eval_accuracy-l1', 'eval_precision-l2', 'eval_recall-l2', 'eval_f1-l2',
       'eval_accuracy-l2', 'eval_precision-all', 'eval_recall-all',
       'eval_f1-all', 'eval_accuracy-all', 'eval_precision-das',
       'eval_recall-das', 'eval_f1-das', 'eval_accuracy-das', 'eval_PER',
       'eval_ACT', 'eval_ACT_L1', 'eval_ACT_L2', 'eval_DESC', 'eval_TITREH',
       'eval_TITREP', 'eval_SPAT', 'eval_LOC', 'eval_CARDINAL', 'eval_FT',
       'eval_runtime', 'eval_samples_per_second', 'eval_steps_per_second',
       'epoch', 'trainsize_p', 'eval_TITRE', 'eval_precision-l1l2',
       'eval_recall-l1l2', 'eval_f1-l1l2', 'eval_accuracy-l1l2'],
      dtype='object')

## 231.1 Build the averaged table


In [7]:
# Store p/r/f1 as percentages
eval_ = ["eval_f1",'eval_f1-all',"eval_f1-l1","eval_f1-l2",'eval_f1-l1l2','eval_f1-das']
metrics_ref = metrics_raw_ref.copy()
metrics_ref[eval_] = metrics_raw_ref[eval_].multiply(100., axis=1)
metrics_ref.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_TITRE,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.18214,0.961187,0.964316,96.274899,0.945975,0.946903,0.965874,95.629428,0.963055,0.970963,...,9.1408,184.338,11.596,4.72,100.0,,0.961187,0.964316,96.274899,0.945975
CmBERT IO,2,6084,0.174358,0.961986,0.967089,96.453048,0.950826,0.957998,0.969177,96.355478,0.969969,0.969738,...,9.0102,187.01,11.764,6.56,100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",0.961986,0.967089,96.453048,0.950826
CmBERT IO,3,6084,0.158997,0.960871,0.968053,96.444871,0.944737,0.95302,0.969177,96.103046,0.967492,0.96561,...,9.0555,186.074,11.706,4.99,100.0,,0.960871,0.968053,96.444871,0.944737
CmBERT IO,4,6084,0.206844,0.951365,0.962146,95.672501,0.934107,0.947667,0.956847,95.223488,0.954696,0.95463,...,8.9319,188.65,11.868,4.99,100.0,,0.951365,0.962146,95.672501,0.934107
CmBERT IO,5,6084,0.207482,0.962887,0.963351,96.31192,0.938854,0.954248,0.964333,95.926413,0.959598,0.966505,...,8.8263,190.906,12.01,4.72,100.0,,0.962887,0.963351,96.31192,0.938854


In [8]:
# Average over runs
averaged_ref = metrics_ref.groupby(level=0).apply(lambda grp: grp.groupby(by="trainsize").mean())
averaged_ref.set_index(["trainsize_p"], append=True,inplace=True)

# Keep just the necessary columns
averaged_ref=averaged_ref[["eval_f1",'eval_f1-l1l2','eval_f1-all',"eval_f1-l1","eval_f1-l2",'eval_f1-das']]

# Set pretty names
averaged_ref.index.names = ['Model','Trainset Size',"%"]
averaged_ref.rename(columns={"eval_f1":"P+L1+P+L2 (train)",
                             "eval_f1-l1l2":"L1+L2",
                             'eval_f1-all':"All",
                            "eval_f1-l1":"Level 1",
                            "eval_f1-l2":"Level 2",
                            'eval_f1-das':"DAS alignement"
                         }, errors="raise", inplace=True)
averaged_ref.rename(mapper={"camembert_io_ref": "CmBERT IO","camembert_iob2_ref": "CmBERT IOB2","prtn_camembert_io_ref": "Ptrn CmBERT IO","prtn_camembert_iob2_ref": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged_ref

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Model,Trainset Size,%,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,6084,100.0,96.231448,96.231448,96.304372,95.84757,96.868503,96.788148
CmBERT IOB2,6084,100.0,96.144481,96.139536,96.117493,95.62179,96.729561,96.802549
Ptrn CmBERT IO,6084,100.0,95.752708,95.752708,95.845413,95.188101,96.663223,96.411974
Ptrn CmBERT IOB2,6084,100.0,96.254617,96.283451,96.336005,96.024833,96.720151,97.01719


## 231.2 Create the results table

In [9]:
latex_table_ref = averaged_ref.copy()

caption = "F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on reference dataset with Joint-labelling + Hierarchical loss approach (M3)."
print(latex_table_ref.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table_ref

\begin{table}
\centering
\caption{F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on reference dataset with Joint-labelling + Hierarchical loss approach (M3).}
\begin{tabular}{lllrrrrrr}
\toprule
                 &      &       &  P+L1+P+L2 (train) &  L1+L2 &  All &  Level 1 &  Level 2 &  DAS alignement \\
Model & Trainset Size & \% &                    &        &      &          &          &                 \\
\midrule
CmBERT IO & 6084 & 100.0 &               96.2 &   96.2 & 96.3 &     95.8 &     96.9 &            96.8 \\
CmBERT IOB2 & 6084 & 100.0 &               96.1 &   96.1 & 96.1 &     95.6 &     96.7 &            96.8 \\
Ptrn CmBERT IO & 6084 & 100.0 &               95.8 &   95.8 & 95.8 &     95.2 &     96.7 &            96.4 \\
Ptrn CmBERT IOB2 & 6084 & 100.0 &               96.3 &   96.3 & 96.3 &     96.0 &     96.7 &            97.0 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Model,Trainset Size,%,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,6084,100.0,96.231448,96.231448,96.304372,95.84757,96.868503,96.788148
CmBERT IOB2,6084,100.0,96.144481,96.139536,96.117493,95.62179,96.729561,96.802549
Ptrn CmBERT IO,6084,100.0,95.752708,95.752708,95.845413,95.188101,96.663223,96.411974
Ptrn CmBERT IOB2,6084,100.0,96.254617,96.283451,96.336005,96.024833,96.720151,97.01719


# 232 - Experiment 2: tables

In [10]:
METRICS_DIR_PERO = OUT_BASE / "method_3/m3-320-experiment_2_metrics"

In [11]:
# Load models metrics from metrics jsons
res = []
keys = []

if os.path.exists(METRICS_DIR_PERO / "321-camembert-ner-hierarchical-loss-io/run_2"):
    camembert_ner_io_pero = compile_metrics(METRICS_DIR_PERO / "321-camembert-ner-hierarchical-loss-io")
    camembert_ner_io_pero["eval_precision-l1l2"] = camembert_ner_io_pero["eval_precision"]
    camembert_ner_io_pero["eval_recall-l1l2"] = camembert_ner_io_pero["eval_recall"]
    camembert_ner_io_pero["eval_f1-l1l2"] = camembert_ner_io_pero["eval_f1"]
    camembert_ner_io_pero["eval_accuracy-l1l2"] = camembert_ner_io_pero["eval_accuracy"]
    res.append(camembert_ner_io_pero)
    keys.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_PERO / "322-camembert-ner-hierarchical-loss-iob2/run_2"):
    camembert_ner_iob2_pero = compile_metrics(METRICS_DIR_PERO / "322-camembert-ner-hierarchical-loss-iob2")
    res.append(camembert_ner_iob2_pero)
    keys.append("CmBERT IOB2")

if os.path.exists(METRICS_DIR_PERO / "323-pretrained-camembert-ner-hierarchical-loss-io/run_2"):
    ptrn_camembert_ner_io_pero = compile_metrics(METRICS_DIR_PERO / "323-pretrained-camembert-ner-hierarchical-loss-io")
    ptrn_camembert_ner_io_pero["eval_precision-l1l2"] = ptrn_camembert_ner_io_pero["eval_precision"]
    ptrn_camembert_ner_io_pero["eval_recall-l1l2"] = ptrn_camembert_ner_io_pero["eval_recall"]
    ptrn_camembert_ner_io_pero["eval_f1-l1l2"] = ptrn_camembert_ner_io_pero["eval_f1"]
    ptrn_camembert_ner_io_pero["eval_accuracy-l1l2"] = ptrn_camembert_ner_io_pero["eval_accuracy"]
    res.append(ptrn_camembert_ner_io_pero)
    keys.append("Ptrn CmBERT IO")
    
if os.path.exists(METRICS_DIR_PERO / "324-pretrained-camembert-ner-hierarchical-loss-iob2/run_2"):
    ptrn_camembert_ner_iob2_pero = compile_metrics(METRICS_DIR_PERO / "324-pretrained-camembert-ner-hierarchical-loss-iob2")
    res.append(ptrn_camembert_ner_iob2_pero)
    keys.append("Ptrn CmBERT IOB2")
    
# Create the full table
print(keys)
metrics_raw_pero = pd.concat(res, keys=keys)
metrics_raw_pero

['CmBERT IO', 'CmBERT IOB2', 'Ptrn CmBERT IO', 'Ptrn CmBERT IOB2']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_TITRE,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.256559,0.947254,0.944505,0.945877,0.93381,0.928977,0.935931,0.932441,0.956361,0.95593,...,,9.0527,186.133,11.709,5.25,100.0,0.947254,0.944505,0.945877,0.93381
CmBERT IO,2,6084,0.223901,0.930058,0.945351,0.937642,0.931148,0.913155,0.939894,0.926332,0.951245,0.936932,...,,9.2894,181.39,11.411,7.09,100.0,0.930058,0.945351,0.937642,0.931148
CmBERT IO,3,6084,0.241124,0.943616,0.948978,0.94629,0.937621,0.935105,0.945399,0.940223,0.958866,0.94615,...,,8.6041,195.838,12.32,7.09,100.0,0.943616,0.948978,0.94629,0.937621
CmBERT IO,4,6084,0.264271,0.94124,0.948978,0.945093,0.932296,0.933435,0.941656,0.937527,0.95469,0.945058,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",9.4472,178.36,11.22,7.61,100.0,0.94124,0.948978,0.945093,0.932296
CmBERT IO,5,6084,0.242043,0.940459,0.951034,0.945717,0.933967,0.933493,0.945619,0.939517,0.957666,0.94328,...,,8.8687,189.995,11.952,6.56,100.0,0.940459,0.951034,0.945717,0.933967
CmBERT IOB2,1,6084,0.29933,0.943229,0.944041,0.943634,0.927442,0.927379,0.941876,0.934571,0.954116,0.9379,...,,12.0324,140.038,8.81,7.09,100.0,0.937724,0.948265,0.942965,0.928172
CmBERT IOB2,2,6084,0.272428,0.938204,0.93964,0.938922,0.922065,0.920467,0.937693,0.929,0.949836,0.932746,...,,11.6413,144.744,9.106,4.72,100.0,0.933206,0.944035,0.938589,0.922796
CmBERT IOB2,3,6084,0.260303,0.930581,0.933518,0.932047,0.917263,0.894503,0.931528,0.91264,0.94378,0.933423,...,,12.0146,140.246,8.823,4.46,100.0,0.920289,0.9392,0.929648,0.917837
CmBERT IOB2,4,6084,0.284175,0.946132,0.944232,0.945181,0.93287,0.929638,0.945399,0.937452,0.955995,0.947156,...,,11.7191,143.783,9.045,7.61,100.0,0.940549,0.948507,0.944511,0.933392
CmBERT IOB2,5,6084,0.316962,0.940163,0.939353,0.939758,0.925197,0.921552,0.941435,0.931387,0.947382,0.940621,...,,11.2559,149.699,9.417,5.51,100.0,0.937019,0.942343,0.939673,0.925719


In [12]:
metrics_raw_pero.columns

Index(['eval_loss', 'eval_precision', 'eval_recall', 'eval_f1',
       'eval_accuracy', 'eval_precision-l1', 'eval_recall-l1', 'eval_f1-l1',
       'eval_accuracy-l1', 'eval_precision-l2', 'eval_recall-l2', 'eval_f1-l2',
       'eval_accuracy-l2', 'eval_precision-all', 'eval_recall-all',
       'eval_f1-all', 'eval_accuracy-all', 'eval_precision-das',
       'eval_recall-das', 'eval_f1-das', 'eval_accuracy-das', 'eval_PER',
       'eval_ACT', 'eval_ACT_L1', 'eval_ACT_L2', 'eval_DESC', 'eval_TITREH',
       'eval_TITREP', 'eval_SPAT', 'eval_LOC', 'eval_CARDINAL', 'eval_FT',
       'eval_TITRE', 'eval_runtime', 'eval_samples_per_second',
       'eval_steps_per_second', 'epoch', 'trainsize_p', 'eval_precision-l1l2',
       'eval_recall-l1l2', 'eval_f1-l1l2', 'eval_accuracy-l1l2'],
      dtype='object')

In [13]:
# Store p/r/f1 as percentages
eval_ = ["eval_f1","eval_f1-l1","eval_f1-l2",'eval_f1-l1l2','eval_f1-das','eval_f1-all']
metrics_pero = metrics_raw_pero.copy()
metrics_pero[eval_] = metrics_raw_pero[eval_].multiply(100., axis=1)
metrics_pero.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,eval_loss,eval_precision,eval_recall,eval_f1,eval_accuracy,eval_precision-l1,eval_recall-l1,eval_f1-l1,eval_accuracy-l1,eval_precision-l2,...,eval_TITRE,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,trainsize_p,eval_precision-l1l2,eval_recall-l1l2,eval_f1-l1l2,eval_accuracy-l1l2
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.256559,0.947254,0.944505,94.587722,0.93381,0.928977,0.935931,93.244132,0.956361,0.95593,...,,9.0527,186.133,11.709,5.25,100.0,0.947254,0.944505,94.587722,0.93381
CmBERT IO,2,6084,0.223901,0.930058,0.945351,93.76424,0.931148,0.913155,0.939894,92.633178,0.951245,0.936932,...,,9.2894,181.39,11.411,7.09,100.0,0.930058,0.945351,93.76424,0.931148
CmBERT IO,3,6084,0.241124,0.943616,0.948978,94.628971,0.937621,0.935105,0.945399,94.022334,0.958866,0.94615,...,,8.6041,195.838,12.32,7.09,100.0,0.943616,0.948978,94.628971,0.937621
CmBERT IO,4,6084,0.264271,0.94124,0.948978,94.509332,0.932296,0.933435,0.941656,93.75274,0.95469,0.945058,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",9.4472,178.36,11.22,7.61,100.0,0.94124,0.948978,94.509332,0.932296
CmBERT IO,5,6084,0.242043,0.940459,0.951034,94.571686,0.933967,0.933493,0.945619,93.951657,0.957666,0.94328,...,,8.8687,189.995,11.952,6.56,100.0,0.940459,0.951034,94.571686,0.933967


In [14]:
# Average over runs
averaged_pero = metrics_pero.groupby(level=0).apply(lambda grp: grp.groupby(by="trainsize").mean())
averaged_pero.set_index(["trainsize_p"], append=True,inplace=True)

# Keep just the necessary columns
averaged_pero=averaged_pero[["eval_f1",'eval_f1-l1l2','eval_f1-all',"eval_f1-l1","eval_f1-l2",'eval_f1-das']]

# Set pretty names
averaged_pero.index.names = ['Model','Trainset Size',"%"]
averaged_pero.rename(columns={"eval_f1":"P+L1+P+L2 (train)",
                             "eval_f1-l1l2":"L1+L2",
                              "eval_f1-all":"All",
                            "eval_f1-l1":"Level 1",
                            "eval_f1-l2":"Level 2",
                            "eval_f1-das":"DAS alignement"
                         }, errors="raise", inplace=True)
averaged_pero.rename(mapper={"camembert_io_pero": "CmBERT IO","camembert_iob2_pero": "CmBERT IOB2","prtn_camembert_io_pero": "Ptrn CmBERT IO","prtn_camembert_iob2_pero": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged_pero

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Model,Trainset Size,%,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,6084,100.0,94.41239,94.41239,94.109835,93.520808,94.838207,94.773225
CmBERT IOB2,6084,100.0,93.990849,93.907747,93.518969,92.901022,94.285655,94.583356
Ptrn CmBERT IO,6084,100.0,94.398637,94.398637,94.111771,93.57972,94.769611,94.91448
Ptrn CmBERT IOB2,6084,100.0,94.189544,94.03894,93.660205,93.07517,94.38566,94.808493


In [15]:
latex_table_pero = averaged_pero.copy()

caption = "F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on noisy dataset with Joint-labelling + Hierarchical loss approach (M3)."
print(latex_table_pero.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table_pero

\begin{table}
\centering
\caption{F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on noisy dataset with Joint-labelling + Hierarchical loss approach (M3).}
\begin{tabular}{lllrrrrrr}
\toprule
                 &      &       &  P+L1+P+L2 (train) &  L1+L2 &  All &  Level 1 &  Level 2 &  DAS alignement \\
Model & Trainset Size & \% &                    &        &      &          &          &                 \\
\midrule
CmBERT IO & 6084 & 100.0 &               94.4 &   94.4 & 94.1 &     93.5 &     94.8 &            94.8 \\
CmBERT IOB2 & 6084 & 100.0 &               94.0 &   93.9 & 93.5 &     92.9 &     94.3 &            94.6 \\
Ptrn CmBERT IO & 6084 & 100.0 &               94.4 &   94.4 & 94.1 &     93.6 &     94.8 &            94.9 \\
Ptrn CmBERT IOB2 & 6084 & 100.0 &               94.2 &   94.0 & 93.7 &     93.1 &     94.4 &            94.8 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Model,Trainset Size,%,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,6084,100.0,94.41239,94.41239,94.109835,93.520808,94.838207,94.773225
CmBERT IOB2,6084,100.0,93.990849,93.907747,93.518969,92.901022,94.285655,94.583356
Ptrn CmBERT IO,6084,100.0,94.398637,94.398637,94.111771,93.57972,94.769611,94.91448
Ptrn CmBERT IOB2,6084,100.0,94.189544,94.03894,93.660205,93.07517,94.38566,94.808493


# 233 - Experiments 1 & 2: table

Build averaged table of F1-score for each dataset, each BERT-based model and each annotation model.

In [16]:
averaged = pd.concat([averaged_ref,averaged_pero],keys=["Reference","Pero OCR"])
averaged = averaged.reset_index(level=[2,3], drop=True)
averaged

Unnamed: 0_level_0,Unnamed: 1_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Unnamed: 0_level_1,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Reference,CmBERT IO,96.231448,96.231448,96.304372,95.84757,96.868503,96.788148
Reference,CmBERT IOB2,96.144481,96.139536,96.117493,95.62179,96.729561,96.802549
Reference,Ptrn CmBERT IO,95.752708,95.752708,95.845413,95.188101,96.663223,96.411974
Reference,Ptrn CmBERT IOB2,96.254617,96.283451,96.336005,96.024833,96.720151,97.01719
Pero OCR,CmBERT IO,94.41239,94.41239,94.109835,93.520808,94.838207,94.773225
Pero OCR,CmBERT IOB2,93.990849,93.907747,93.518969,92.901022,94.285655,94.583356
Pero OCR,Ptrn CmBERT IO,94.398637,94.398637,94.111771,93.57972,94.769611,94.91448
Pero OCR,Ptrn CmBERT IOB2,94.189544,94.03894,93.660205,93.07517,94.38566,94.808493


In [17]:
caption = "F1 score measured on the fine-tuned models CmBERTand CmBERT+ptrn on reference dataset and noisy dataset with Joint-labelling + Hierarchical loss approach (M3)."
print(averaged.to_latex(float_format="%.1f", multirow=True, caption=caption))
averaged

\begin{table}
\centering
\caption{F1 score measured on the fine-tuned models CmBERTand CmBERT+ptrn on reference dataset and noisy dataset with Joint-labelling + Hierarchical loss approach (M3).}
\begin{tabular}{llrrrrrr}
\toprule
         &                  &  P+L1+P+L2 (train) &  L1+L2 &  All &  Level 1 &  Level 2 &  DAS alignement \\
{} & Model &                    &        &      &          &          &                 \\
\midrule
\multirow{4}{*}{Reference} & CmBERT IO &               96.2 &   96.2 & 96.3 &     95.8 &     96.9 &            96.8 \\
         & CmBERT IOB2 &               96.1 &   96.1 & 96.1 &     95.6 &     96.7 &            96.8 \\
         & Ptrn CmBERT IO &               95.8 &   95.8 & 95.8 &     95.2 &     96.7 &            96.4 \\
         & Ptrn CmBERT IOB2 &               96.3 &   96.3 & 96.3 &     96.0 &     96.7 &            97.0 \\
\cline{1-8}
\multirow{4}{*}{Pero OCR} & CmBERT IO &               94.4 &   94.4 & 94.1 &     93.5 &     94.8 &            94.8

Unnamed: 0_level_0,Unnamed: 1_level_0,P+L1+P+L2 (train),L1+L2,All,Level 1,Level 2,DAS alignement
Unnamed: 0_level_1,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Reference,CmBERT IO,96.231448,96.231448,96.304372,95.84757,96.868503,96.788148
Reference,CmBERT IOB2,96.144481,96.139536,96.117493,95.62179,96.729561,96.802549
Reference,Ptrn CmBERT IO,95.752708,95.752708,95.845413,95.188101,96.663223,96.411974
Reference,Ptrn CmBERT IOB2,96.254617,96.283451,96.336005,96.024833,96.720151,97.01719
Pero OCR,CmBERT IO,94.41239,94.41239,94.109835,93.520808,94.838207,94.773225
Pero OCR,CmBERT IOB2,93.990849,93.907747,93.518969,92.901022,94.285655,94.583356
Pero OCR,Ptrn CmBERT IO,94.398637,94.398637,94.111771,93.57972,94.769611,94.91448
Pero OCR,Ptrn CmBERT IOB2,94.189544,94.03894,93.660205,93.07517,94.38566,94.808493


# 234 : Experiment 1 : table by classe

In [18]:
import pandas as pd
import json
from pandas import json_normalize

def compile_metrics_by_classes(path, classes): 
    rundirs = [f for f in os.listdir(path)]

    df = pd.DataFrame()
    for run_dir in rundirs:
        if 'run' in run_dir:
            run_path = path / run_dir
            nrun = re.search("\d+",run_dir)[0]

            files = [f for f in os.listdir(run_path) if "test_" in f and 'json' in f]
            sizes = [int(re.search("\d+",f)[0]) for f in files]
                
            
            for file, size in zip(files,sizes):
                file_path = run_path / file
                dftmp = pd.read_json(file_path)
                classes_dict = {key: dftmp[key] for key in classes}
                dftmp = pd.DataFrame.from_dict(classes_dict)
                dftmp = dftmp.T
                dftmp['number'] = dftmp['number'].astype(int)
                dftmp["trainsize"] = size 
                dftmp["run"] = nrun
                dftmp["trainsize_p"] = round(100 * size / MAX_TRAINSET_SIZE, 1)
                df = pd.concat([df, dftmp])
                df["classe"] = df.index
                
    return df.groupby(["run","classe"]).first()

In [19]:
classes = ['eval_PER','eval_ACT','eval_ACT_L1','eval_ACT_L2','eval_DESC','eval_TITREH','eval_TITREP','eval_SPAT','eval_LOC','eval_CARDINAL','eval_FT'
 #'eval_TITRE'
]

# Load models metrics from metrics jsons
res = []
keys = []

if os.path.exists(METRICS_DIR_REF / "311-camembert-ner-hierarchical-loss-io/run_2"):
    camembert_ner_io_ref = compile_metrics_by_classes(METRICS_DIR_REF / "311-camembert-ner-hierarchical-loss-io", classes)
    res.append(camembert_ner_io_ref)
    keys.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF / "312-camembert-ner-hierarchical-loss-iob2/run_2"):
    camembert_ner_iob2_ref = compile_metrics_by_classes(METRICS_DIR_REF / "312-camembert-ner-hierarchical-loss-iob2", classes)
    res.append(camembert_ner_iob2_ref)
    keys.append("CmBERT IOB2")

if os.path.exists(METRICS_DIR_REF / "313-pretrained-camembert-ner-hierarchical-loss-io/run_2"):
    ptrn_camembert_ner_io_ref = compile_metrics_by_classes(METRICS_DIR_REF / "313-pretrained-camembert-ner-hierarchical-loss-io", classes)
    res.append(ptrn_camembert_ner_io_ref)
    keys.append("Ptrn CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF / "314-pretrained-camembert-ner-hierarchical-loss-iob2/run_2"):
    ptrn_camembert_ner_iob2_ref = compile_metrics_by_classes(METRICS_DIR_REF / "314-pretrained-camembert-ner-hierarchical-loss-iob2", classes)
    res.append(ptrn_camembert_ner_iob2_ref)
    keys.append("Ptrn CmBERT IOB2")
    
# Create the full table
print(keys)
metrics_raw_classes = pd.concat(res, keys=keys)
metrics_raw_classes

['CmBERT IO', 'CmBERT IOB2', 'Ptrn CmBERT IO', 'Ptrn CmBERT IOB2']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,precision,recall,f1,number,trainsize,trainsize_p
Unnamed: 0_level_1,run,classe,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,1,eval_ACT,0.885689,0.927788,0.906250,1094,6084,100.0
CmBERT IO,1,eval_ACT_L1,0.910174,0.963143,0.935910,1031,6084,100.0
CmBERT IO,1,eval_ACT_L2,0.400000,0.349206,0.372881,63,6084,100.0
CmBERT IO,1,eval_CARDINAL,0.998289,0.999429,0.998858,1751,6084,100.0
CmBERT IO,1,eval_DESC,0.435897,0.430380,0.433121,79,6084,100.0
...,...,...,...,...,...,...,...,...
Ptrn CmBERT IOB2,5,eval_LOC,0.973289,0.978188,0.975732,1788,6084,100.0
Ptrn CmBERT IOB2,5,eval_PER,0.988152,0.989911,0.989031,1685,6084,100.0
Ptrn CmBERT IOB2,5,eval_SPAT,0.969440,0.980538,0.974957,1747,6084,100.0
Ptrn CmBERT IOB2,5,eval_TITREH,1.000000,0.976744,0.988235,43,6084,100.0


In [20]:
# Store p/r/f1 as percentages
val = ["f1","precision","recall"]
metrics_raw_classes = metrics_raw_classes.copy()
metrics_raw_classes[val] = metrics_raw_classes[val].multiply(100., axis=1)
metrics_raw_classes.head()
metrics_raw_classes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,precision,recall,f1,number,trainsize,trainsize_p
Unnamed: 0_level_1,run,classe,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CmBERT IO,1,eval_ACT,88.568935,92.778793,90.625000,1094,6084,100.0
CmBERT IO,1,eval_ACT_L1,91.017415,96.314258,93.590952,1031,6084,100.0
CmBERT IO,1,eval_ACT_L2,40.000000,34.920635,37.288136,63,6084,100.0
CmBERT IO,1,eval_CARDINAL,99.828865,99.942890,99.885845,1751,6084,100.0
CmBERT IO,1,eval_DESC,43.589744,43.037975,43.312102,79,6084,100.0
...,...,...,...,...,...,...,...,...
Ptrn CmBERT IOB2,5,eval_LOC,97.328881,97.818792,97.573222,1788,6084,100.0
Ptrn CmBERT IOB2,5,eval_PER,98.815166,98.991098,98.903054,1685,6084,100.0
Ptrn CmBERT IOB2,5,eval_SPAT,96.943973,98.053807,97.495731,1747,6084,100.0
Ptrn CmBERT IOB2,5,eval_TITREH,100.000000,97.674419,98.823529,43,6084,100.0


In [21]:
# Average over runs
averaged_classes = metrics_raw_classes.groupby(level=0).apply(lambda grp: grp.groupby(by="classe").mean())
averaged_classes.set_index(["number"],
                   append=True,
                   inplace=True)

# Keep just the necessary columns
averaged_classes=averaged_classes[["precision","recall","f1"]]

# Set pretty names
averaged_classes.index.names = ['Test','Entity type',"Count"]
averaged_classes.rename(mapper={'eval_PER':'PER','eval_ACT':'ACT','eval_ACT_L1':'ACT L1 only','eval_ACT_L2':'ACT L2 only','eval_DESC':'DESC','eval_TITREH':'TITREH','eval_TITREP':'TITREP','eval_SPAT':'SPAT','eval_LOC':'LOC','eval_CARDINAL':'CARDINAL','eval_FT':'FT'}, errors="ignore", inplace=True, axis=0)
averaged_classes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,precision,recall,f1
Test,Entity type,Count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CmBERT IO,ACT,1094.0,89.678905,92.486289,91.057034
CmBERT IO,ACT L1 only,1031.0,94.170629,95.150339,94.63669
CmBERT IO,ACT L2 only,63.0,37.301062,48.888889,41.277659
CmBERT IO,CARDINAL,1751.0,99.760424,99.862935,99.811632
CmBERT IO,DESC,79.0,44.182149,57.21519,49.44892
CmBERT IO,FT,14.0,52.666667,38.571429,43.5
CmBERT IO,LOC,1787.0,97.118773,97.705652,97.411222
CmBERT IO,PER,1685.0,97.507312,97.922849,97.714498
CmBERT IO,SPAT,1747.0,96.693784,97.721809,97.204911
CmBERT IO,TITREH,43.0,97.25974,97.209302,97.213965


In [22]:
def formatDf(averaged_classes,model_str):
    averaged_classes_f1 = averaged_classes[["f1"]]
    averaged_classes_f1 = averaged_classes_f1.reset_index(level=0)
    averaged_classes_f1
    line = averaged_classes_f1[averaged_classes_f1["Test"] == model_str]
    line = line.drop(['Test'], axis=1).T
    line = line.rename(index={'f1': model_str})
    line.columns = line.columns.droplevel(1)
    return line

In [23]:
cmbert_io_classes = formatDf(averaged_classes,"CmBERT IO")
cmbert_iob2_classes = formatDf(averaged_classes,"CmBERT IOB2")
ptrn_cmbert_io_classes = formatDf(averaged_classes,"Ptrn CmBERT IO")
ptrn_cmbert_iob2_classes = formatDf(averaged_classes,"Ptrn CmBERT IOB2")
metrics_raw_classes = pd.concat([cmbert_io_classes,cmbert_iob2_classes,ptrn_cmbert_io_classes,ptrn_cmbert_iob2_classes])
metrics_raw_classes

Entity type,ACT,ACT L1 only,ACT L2 only,CARDINAL,DESC,FT,LOC,PER,SPAT,TITREH,TITREP
CmBERT IO,91.057034,94.63669,41.277659,99.811632,49.44892,43.5,97.411222,97.714498,97.204911,97.213965,50.911817
CmBERT IOB2,90.794113,94.328273,38.967055,99.669142,44.75984,40.705579,97.326496,97.345912,97.361644,95.768983,51.786387
Ptrn CmBERT IO,89.99711,94.066241,25.551067,99.703398,36.070327,45.113034,97.464507,96.807606,97.387768,96.192415,42.861735
Ptrn CmBERT IOB2,90.388034,94.573537,35.831492,99.697669,48.314903,44.818126,97.460048,98.37806,97.371193,98.134063,64.13326


In [24]:
caption = "F1 score measured on the fine-tuned models CmBERTand CmBERT+ptrn on reference dataset with Joint-labelling + Hierarchical loss approach (M3)."
print(metrics_raw_classes.to_latex(float_format="%.1f", multirow=True, caption=caption))

\begin{table}
\centering
\caption{F1 score measured on the fine-tuned models CmBERTand CmBERT+ptrn on reference dataset with Joint-labelling + Hierarchical loss approach (M3).}
\begin{tabular}{lrrrrrrrrrrr}
\toprule
Entity type &  ACT &  ACT L1 only &  ACT L2 only &  CARDINAL &  DESC &   FT &  LOC &  PER &  SPAT &  TITREH &  TITREP \\
\midrule
CmBERT IO        & 91.1 &         94.6 &         41.3 &      99.8 &  49.4 & 43.5 & 97.4 & 97.7 &  97.2 &    97.2 &    50.9 \\
CmBERT IOB2      & 90.8 &         94.3 &         39.0 &      99.7 &  44.8 & 40.7 & 97.3 & 97.3 &  97.4 &    95.8 &    51.8 \\
Ptrn CmBERT IO   & 90.0 &         94.1 &         25.6 &      99.7 &  36.1 & 45.1 & 97.5 & 96.8 &  97.4 &    96.2 &    42.9 \\
Ptrn CmBERT IOB2 & 90.4 &         94.6 &         35.8 &      99.7 &  48.3 & 44.8 & 97.5 & 98.4 &  97.4 &    98.1 &    64.1 \\
\bottomrule
\end{tabular}
\end{table}

