# 140 - Experiment 1: Figures and metrics

In [1]:
import os, sys
from pathlib import Path

ENV_IS_GOOGLE_COLAB = True if 'google.colab' in str(get_ipython()) else False
os.environ["ENV_IS_GOOGLE_COLAB"] = str(ENV_IS_GOOGLE_COLAB)

if ENV_IS_GOOGLE_COLAB:
  from google.colab import drive
  mountpoint = Path("/content/drive")
  drive.mount(str(mountpoint)) # Mount gdrive to BASE
  base = mountpoint / "MyDrive/article_icdar_2023" # Adapt this to your situation
  sys.path.append(str(base)) # Add BASE to Python Path
  BASE = Path(base).resolve() # Make BASE absolute
  DATASETS =  BASE / "dataset"
else:
  BASE = Path(os.path.dirname(os.path.realpath("__file__"))).resolve() # If not on GColab, BASE will be the directory of this notebook
  DATASETS = Path('/work/stual/dataset_ICDAR').resolve()
  OUT_BASE = Path('/work/stual/res_ICDAR/method_1').resolve()

print(sys.path)
print(BASE)
print(DATASETS)
print(OUT_BASE)

['/lrde/home2/stual/stage_DAS/m1_independant_ner_layers', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/lrde/home2/stual/.venv/python_3_9/lib/python3.10/site-packages']
/lrde/home2/stual/stage_DAS/m1_independant_ner_layers
/work/stual/dataset_ICDAR
/work/stual/res_ICDAR/method_1


# Tools

In [2]:
import pandas as pd
import re
import os
import json

def compile_metrics(path): 
    rundirs = [f for f in os.listdir(path) if "run_" in f]

    df = pd.DataFrame()
    for run_dir in rundirs:
        run_path = path / run_dir
        nrun = re.search("\d+",run_dir)[0]
        
        files = [f for f in os.listdir(run_path) if "test_" in f and "level" not in f]
        sizes = [int(re.search("\d+",f)[0]) for f in files]
        
        for file, size in zip(files,sizes):
            file_path = run_path / file
            dftmp = pd.read_json(file_path, typ='series')
            dftmp = pd.DataFrame([dftmp])
            
            dftmp["trainsize"] = size 
            dftmp["run"] = nrun
            dftmp["trainsize_p"] = round(100 * size / 6084, 1)
            df = pd.concat([df, dftmp])

    return df.groupby(["run","trainsize"]).first()

# 141 - Experiment 1: figures and tables

In [3]:
from pathlib import Path

# PATHS
METRICS_DIR_E1 = OUT_BASE / "m1-110-experiment_1_metrics"
assert METRICS_DIR_E1
METRICS_DIR_E1

PosixPath('/work/stual/res_ICDAR/method_1/m1-110-experiment_1_metrics')

In [4]:
# Load Camembert IO metrics from metrics jsons
camembert_io_ref = compile_metrics(METRICS_DIR_E1 / "111-camembert-ner-multihead-io")
camembert_iob2_ref = compile_metrics(METRICS_DIR_E1 / "112-camembert-ner-multihead-iob2")
prtn_camembert_io_ref = compile_metrics(METRICS_DIR_E1 / "113-pretrained-camembert-ner-multihead-io")
prtn_camembert_iob2_ref = compile_metrics(METRICS_DIR_E1 / "114-pretrained-camembert-multihead-iob2")
metrics_raw_ref = pd.concat([camembert_io_ref,camembert_iob2_ref,prtn_camembert_io_ref,prtn_camembert_iob2_ref], keys=["CmBERT IO", "CmBERT IOB2", "Ptrn CmBERT IO", "Ptrn CmBERT IOB2"])
metrics_raw_ref

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,precision,recall,f1,accuracy,precision-l1,recall-l1,f1-l1,accuracy-l1,precision-l2,recall-l2,...,ACT-l2,DESC,TITREH,TITREP,SPAT,LOC,CARDINAL,FT,trainsize_p,TITRE
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.972824,0.965262,0.969028,0.97322,0.96409,0.969397,0.966736,0.970846,0.983898,0.960173,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.5319148936170213, 'recall': 0....","{'precision': 0.9772727272727273, 'recall': 1....","{'precision': 0.4074074074074074, 'recall': 0....","{'precision': 0.9805491990846682, 'recall': 0....","{'precision': 0.9792134831460674, 'recall': 0....","{'precision': 0.9977142857142857, 'recall': 0....","{'precision': 1.0, 'recall': 0.071428571428571...",100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '..."
CmBERT IO,2,6084,0.967221,0.960525,0.963861,0.96259,0.956407,0.961251,0.958823,0.951032,0.980892,0.959632,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.3504273504273504, 'recall': 0....","{'precision': 0.9761904761904762, 'recall': 0....","{'precision': 0.7647058823529411, 'recall': 0....","{'precision': 0.9732954545454545, 'recall': 0....","{'precision': 0.9688715953307393, 'recall': 0....","{'precision': 0.9965714285714286, 'recall': 0....","{'precision': 0.3333333333333333, 'recall': 0....",100.0,
CmBERT IO,3,6084,0.966561,0.961982,0.964266,0.968731,0.959709,0.959709,0.959709,0.96259,0.975082,0.964779,...,"{'precision': 0.2894736842105263, 'recall': 0....","{'precision': 0.5188679245283019, 'recall': 0....","{'precision': 0.9772727272727273, 'recall': 1....","{'precision': 0.6666666666666666, 'recall': 0....","{'precision': 0.9771428571428571, 'recall': 0....","{'precision': 0.9716035634743875, 'recall': 0....","{'precision': 0.9977129788450543, 'recall': 0....","{'precision': 0.75, 'recall': 0.21428571428571...",100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '..."
CmBERT IO,4,6084,0.96663,0.957002,0.961792,0.965893,0.95748,0.956847,0.957163,0.957224,0.978128,0.957193,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.39849624060150374, 'recall': 0...","{'precision': 0.9772727272727273, 'recall': 1....","{'precision': 0.4, 'recall': 0.303030303030303...","{'precision': 0.9765177548682703, 'recall': 0....","{'precision': 0.9682451253481894, 'recall': 0....","{'precision': 0.9965675057208238, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",100.0,
CmBERT IO,5,6084,0.967119,0.961011,0.964055,0.967105,0.955071,0.964113,0.959571,0.961094,0.982481,0.957193,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.39473684210526316, 'recall': 0...","{'precision': 0.9767441860465116, 'recall': 0....","{'precision': 1.0, 'recall': 0.030303030303030...","{'precision': 0.9778030734206034, 'recall': 0....","{'precision': 0.9705391884380211, 'recall': 0....","{'precision': 0.9948659440958357, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",100.0,
CmBERT IOB2,1,6084,0.956222,0.960287,0.95825,0.961326,0.94063,0.959269,0.949858,0.948297,0.976079,0.961538,...,"{'precision': 0.25, 'recall': 0.06349206349206...","{'precision': 0.13432835820895522, 'recall': 0...","{'precision': 0.9761904761904762, 'recall': 0....","{'precision': 0.6363636363636364, 'recall': 0....","{'precision': 0.9656144306651635, 'recall': 0....","{'precision': 0.9661862527716186, 'recall': 0....","{'precision': 0.997716894977169, 'recall': 0.9...","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",100.0,
CmBERT IOB2,2,6084,0.956647,0.96478,0.960697,0.964448,0.947666,0.96081,0.954193,0.951548,0.967829,0.969664,...,"{'precision': 0.3384615384615385, 'recall': 0....","{'precision': 0.43956043956043955, 'recall': 0...","{'precision': 0.9761904761904762, 'recall': 0....","{'precision': 0.5, 'recall': 0.424242424242424...","{'precision': 0.9661590524534687, 'recall': 0....","{'precision': 0.9694783573806881, 'recall': 0....","{'precision': 0.9982886480319453, 'recall': 0....","{'precision': 0.6666666666666666, 'recall': 0....",100.0,
CmBERT IOB2,3,6084,0.964122,0.969274,0.966691,0.969118,0.961572,0.969617,0.965578,0.961507,0.96728,0.968852,...,"{'precision': 0.3188405797101449, 'recall': 0....","{'precision': 0.4774774774774775, 'recall': 0....","{'precision': 1.0, 'recall': 0.953488372093023...","{'precision': 0.6176470588235294, 'recall': 0....","{'precision': 0.9749430523917996, 'recall': 0....","{'precision': 0.9732142857142857, 'recall': 0....","{'precision': 0.9948717948717949, 'recall': 0....","{'precision': 0.42857142857142855, 'recall': 0...",100.0,
CmBERT IOB2,4,6084,0.966943,0.962716,0.964825,0.964499,0.955638,0.962792,0.959202,0.955573,0.981226,0.962622,...,"{'precision': 0.6363636363636364, 'recall': 0....","{'precision': 0.5064935064935064, 'recall': 0....","{'precision': 0.9333333333333333, 'recall': 0....","{'precision': 0.5384615384615384, 'recall': 0....","{'precision': 0.9743881616391576, 'recall': 0....","{'precision': 0.9776035834266518, 'recall': 0....","{'precision': 0.995436394751854, 'recall': 0.9...","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",100.0,
CmBERT IOB2,5,6084,0.954426,0.96648,0.960415,0.96775,0.954368,0.966975,0.96063,0.963003,0.954497,0.965872,...,"{'precision': 0.21904761904761905, 'recall': 0...","{'precision': 0.425, 'recall': 0.6455696202531...","{'precision': 0.9761904761904762, 'recall': 0....","{'precision': 0.5333333333333333, 'recall': 0....","{'precision': 0.9754566210045662, 'recall': 0....","{'precision': 0.9644641865630206, 'recall': 0....","{'precision': 0.9960068454078722, 'recall': 0....","{'precision': 0.6, 'recall': 0.214285714285714...",100.0,


In [5]:
# Store p/r/f1 as percentages
eval_ = ["precision-l1","recall-l1","f1-l1","precision-l2","recall-l2","f1-l2","precision","recall","f1","precision-jl","recall_jl","f1_jl"]
metrics_ref = metrics_raw_ref.copy()
metrics_ref[eval_] = metrics_raw_ref[eval_].multiply(100., axis=1)
metrics_ref.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,precision,recall,f1,accuracy,precision-l1,recall-l1,f1-l1,accuracy-l1,precision-l2,recall-l2,...,ACT-l2,DESC,TITREH,TITREP,SPAT,LOC,CARDINAL,FT,trainsize_p,TITRE
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,97.282409,96.526175,96.902817,0.97322,96.409021,96.939674,96.673619,0.970846,98.389783,96.017339,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.5319148936170213, 'recall': 0....","{'precision': 0.9772727272727273, 'recall': 1....","{'precision': 0.4074074074074074, 'recall': 0....","{'precision': 0.9805491990846682, 'recall': 0....","{'precision': 0.9792134831460674, 'recall': 0....","{'precision': 0.9977142857142857, 'recall': 0....","{'precision': 1.0, 'recall': 0.071428571428571...",100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '..."
CmBERT IO,2,6084,96.722114,96.052472,96.38613,0.96259,95.640745,96.125055,95.882288,0.951032,98.089172,95.963154,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.3504273504273504, 'recall': 0....","{'precision': 0.9761904761904762, 'recall': 0....","{'precision': 0.7647058823529411, 'recall': 0....","{'precision': 0.9732954545454545, 'recall': 0....","{'precision': 0.9688715953307393, 'recall': 0....","{'precision': 0.9965714285714286, 'recall': 0....","{'precision': 0.3333333333333333, 'recall': 0....",100.0,
CmBERT IO,3,6084,96.65609,96.198227,96.426615,0.968731,95.970938,95.970938,95.970938,0.96259,97.508215,96.477919,...,"{'precision': 0.2894736842105263, 'recall': 0....","{'precision': 0.5188679245283019, 'recall': 0....","{'precision': 0.9772727272727273, 'recall': 1....","{'precision': 0.6666666666666666, 'recall': 0....","{'precision': 0.9771428571428571, 'recall': 0....","{'precision': 0.9716035634743875, 'recall': 0....","{'precision': 0.9977129788450543, 'recall': 0....","{'precision': 0.75, 'recall': 0.21428571428571...",100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '..."
CmBERT IO,4,6084,96.662986,95.700231,96.179199,0.965893,95.747962,95.68472,95.716331,0.957224,97.812846,95.719317,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.39849624060150374, 'recall': 0...","{'precision': 0.9772727272727273, 'recall': 1....","{'precision': 0.4, 'recall': 0.303030303030303...","{'precision': 0.9765177548682703, 'recall': 0....","{'precision': 0.9682451253481894, 'recall': 0....","{'precision': 0.9965675057208238, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",100.0,
CmBERT IO,5,6084,96.711893,96.101057,96.405507,0.967105,95.507088,96.411273,95.957051,0.961094,98.248053,95.719317,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.39473684210526316, 'recall': 0...","{'precision': 0.9767441860465116, 'recall': 0....","{'precision': 1.0, 'recall': 0.030303030303030...","{'precision': 0.9778030734206034, 'recall': 0....","{'precision': 0.9705391884380211, 'recall': 0....","{'precision': 0.9948659440958357, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",100.0,


### Build the average table

In [20]:
# Average over runs
averaged_ref = metrics_ref.groupby(level=0).apply(lambda grp: grp.groupby(by="trainsize").mean())
averaged_ref.set_index(["trainsize_p"], append=True,inplace=True)

# Keep just the necessary columns
averaged_ref=averaged_ref[["f1-l1","f1-l2","f1","f1_jl"]]

# Set pretty names
averaged_ref.index.names = ['Model','Trainset Size',"%"]
averaged_ref.rename(columns={"f1-l1":"Level 1",
    "f1-l2":"Level 2",
    "f1":"Global",
    "f1_jl":"P+L1+P+L2"
                         }, errors="raise", inplace=True)
averaged_ref.rename(mapper={"camembert_io_ref": "CmBERT IO","camembert_iob2_ref": "CmBERT IOB2","prtn_camembert_io_ref": "Ptrn CmBERT IO","prtn_camembert_iob2_ref": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged_ref

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Level 1,Level 2,Global,P+L1+P+L2
Model,Trainset Size,%,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CmBERT IO,6084,100.0,96.040045,96.983179,96.460054,95.699654
CmBERT IOB2,6084,100.0,95.789203,96.751012,96.217547,95.663863
Ptrn CmBERT IO,6084,100.0,96.317256,96.999829,96.620776,95.918409
Ptrn CmBERT IOB2,6084,100.0,95.528058,96.61288,96.012234,95.331939


### Create the results table

In [21]:
# Filter and transpose to obtain the latex table
latex_table_ref = averaged_ref.stack().unstack(level=[1,2])

# Swap model name and metrics to get a nice table
latex_table_ref = latex_table_ref.swaplevel(0,1).sort_index(level=0)

caption = "F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on reference dataset with Independent Flat NER layers approach (M1)."
print(latex_table_ref.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table_ref

\begin{table}
\centering
\caption{F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on reference dataset.}
\begin{tabular}{llr}
\toprule
          & Trainset Size &  6084 \\
          & \% & 100.0 \\
{} & Model &       \\
\midrule
\multirow{4}{*}{Global} & CmBERT IO &  96.5 \\
          & CmBERT IOB2 &  96.2 \\
          & Ptrn CmBERT IO &  96.6 \\
          & Ptrn CmBERT IOB2 &  96.0 \\
\cline{1-3}
\multirow{4}{*}{Level 1} & CmBERT IO &  96.0 \\
          & CmBERT IOB2 &  95.8 \\
          & Ptrn CmBERT IO &  96.3 \\
          & Ptrn CmBERT IOB2 &  95.5 \\
\cline{1-3}
\multirow{4}{*}{Level 2} & CmBERT IO &  97.0 \\
          & CmBERT IOB2 &  96.8 \\
          & Ptrn CmBERT IO &  97.0 \\
          & Ptrn CmBERT IOB2 &  96.6 \\
\cline{1-3}
\multirow{4}{*}{P+L1+P+L2} & CmBERT IO &  95.7 \\
          & CmBERT IOB2 &  95.7 \\
          & Ptrn CmBERT IO &  95.9 \\
          & Ptrn CmBERT IOB2 &  95.3 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0_level_0,Trainset Size,6084
Unnamed: 0_level_1,%,100.0
Unnamed: 0_level_2,Model,Unnamed: 2_level_2
Global,CmBERT IO,96.460054
Global,CmBERT IOB2,96.217547
Global,Ptrn CmBERT IO,96.620776
Global,Ptrn CmBERT IOB2,96.012234
Level 1,CmBERT IO,96.040045
Level 1,CmBERT IOB2,95.789203
Level 1,Ptrn CmBERT IO,96.317256
Level 1,Ptrn CmBERT IOB2,95.528058
Level 2,CmBERT IO,96.983179
Level 2,CmBERT IOB2,96.751012


# 142 - Experiment 2: figures and tables

In [8]:
from pathlib import Path

# PATHS
METRICS_DIR_E2 = OUT_BASE / "m1-120-experiment_2_metrics"
assert METRICS_DIR_E2
METRICS_DIR_E2

PosixPath('/work/stual/res_ICDAR/method_1/m1-120-experiment_2_metrics')

In [9]:
# Load Camembert IO metrics from metrics jsons
camembert_io_pero = compile_metrics(METRICS_DIR_E2 / "121-camembert-ner-multihead-io")
camembert_iob2_pero = compile_metrics(METRICS_DIR_E2 / "122-camembert-ner-multihead-iob2")
prtn_camembert_io_pero = compile_metrics(METRICS_DIR_E2 / "123-pretrained-camembert-ner-multihead-io")
prtn_camembert_iob2_pero = compile_metrics(METRICS_DIR_E2 / "124-pretrained-camembert-multihead-iob2")
metrics_raw_pero = pd.concat([camembert_io_pero,camembert_iob2_pero,prtn_camembert_io_pero,prtn_camembert_iob2_pero], keys=["CmBERT IO", "CmBERT IOB2", "Ptrn CmBERT IO", "Ptrn CmBERT IOB2"])
metrics_raw_pero

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,precision,recall,f1,accuracy,precision-l1,recall-l1,f1-l1,accuracy-l1,precision-l2,recall-l2,...,ACT-l2,DESC,TITREH,TITREP,SPAT,LOC,CARDINAL,FT,trainsize_p,TITRE
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,0.938887,0.940598,0.939741,0.960667,0.927178,0.941876,0.934469,0.955108,0.953757,0.939024,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.4727272727272727, 'recall': 0....","{'precision': 0.6904761904761905, 'recall': 0....","{'precision': 0.5333333333333333, 'recall': 0....","{'precision': 0.9438138479001135, 'recall': 0....","{'precision': 0.9415656008820287, 'recall': 0....","{'precision': 0.9805045871559633, 'recall': 0....","{'precision': 0.6666666666666666, 'recall': 0....",100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '..."
CmBERT IO,2,6084,0.934635,0.936224,0.935429,0.958684,0.918668,0.935051,0.926787,0.951663,0.95501,0.937669,...,"{'precision': 0.6428571428571429, 'recall': 0....","{'precision': 0.4936708860759494, 'recall': 0....","{'precision': 0.8285714285714286, 'recall': 0....","{'precision': 0.6521739130434783, 'recall': 0....","{'precision': 0.9388101983002833, 'recall': 0....","{'precision': 0.93732667775929, 'recall': 0.94...","{'precision': 0.9827981651376146, 'recall': 0....","{'precision': 0.75, 'recall': 0.21428571428571...",100.0,
CmBERT IO,3,6084,0.932882,0.937075,0.934974,0.958605,0.920779,0.936592,0.928618,0.949157,0.948205,0.937669,...,"{'precision': 0.3333333333333333, 'recall': 0....","{'precision': 0.3739130434782609, 'recall': 0....","{'precision': 0.6222222222222222, 'recall': 0....","{'precision': 0.65, 'recall': 0.40625, 'f1': 0...","{'precision': 0.9404424276800908, 'recall': 0....","{'precision': 0.9334433443344334, 'recall': 0....","{'precision': 0.9833810888252149, 'recall': 0....","{'precision': 0.16666666666666666, 'recall': 0...",100.0,
CmBERT IO,4,6084,0.937803,0.939626,0.938714,0.960067,0.924446,0.937472,0.930914,0.951558,0.954695,0.942276,...,"{'precision': 0.48717948717948717, 'recall': 0...","{'precision': 0.4142857142857143, 'recall': 0....","{'precision': 0.7368421052631579, 'recall': 0....","{'precision': 0.631578947368421, 'recall': 0.3...","{'precision': 0.9291736930860034, 'recall': 0....","{'precision': 0.9471635150166852, 'recall': 0....","{'precision': 0.9816618911174785, 'recall': 0....","{'precision': 0.6666666666666666, 'recall': 0....",100.0,
CmBERT IO,5,6084,0.939239,0.938897,0.939068,0.957509,0.927998,0.942096,0.934994,0.951976,0.953566,0.934959,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.5568181818181818, 'recall': 0....","{'precision': 0.75, 'recall': 0.69767441860465...","{'precision': 0.5625, 'recall': 0.28125, 'f1':...","{'precision': 0.9431818181818182, 'recall': 0....","{'precision': 0.9356435643564357, 'recall': 0....","{'precision': 0.9810562571756601, 'recall': 0....","{'precision': 0.5, 'recall': 0.071428571428571...",100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '..."
CmBERT IOB2,1,6084,0.926027,0.944134,0.934993,0.953698,0.92719,0.939234,0.933173,0.945138,0.924618,0.950163,...,"{'precision': 0.2112676056338028, 'recall': 0....","{'precision': 0.5245901639344263, 'recall': 0....","{'precision': 0.9024390243902439, 'recall': 0....","{'precision': 0.42857142857142855, 'recall': 0...","{'precision': 0.9303761931499158, 'recall': 0....","{'precision': 0.9372937293729373, 'recall': 0....","{'precision': 0.9811536264991434, 'recall': 0....","{'precision': 0.5714285714285714, 'recall': 0....",100.0,
CmBERT IOB2,2,6084,0.937235,0.939398,0.938315,0.955551,0.928772,0.944518,0.936579,0.95041,0.947991,0.933099,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.4247787610619469, 'recall': 0....","{'precision': 0.8529411764705882, 'recall': 0....","{'precision': 0.5, 'recall': 0.303030303030303...","{'precision': 0.9455782312925171, 'recall': 0....","{'precision': 0.9223248234655079, 'recall': 0....","{'precision': 0.9821736630247269, 'recall': 0....","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...",100.0,
CmBERT IOB2,3,6084,0.929223,0.940734,0.934943,0.957613,0.921314,0.938353,0.929756,0.95088,0.939084,0.943662,...,"{'precision': 0.23157894736842105, 'recall': 0...","{'precision': 0.35185185185185186, 'recall': 0...","{'precision': 0.7948717948717948, 'recall': 0....","{'precision': 0.4166666666666667, 'recall': 0....","{'precision': 0.9393079977311402, 'recall': 0....","{'precision': 0.9440443213296399, 'recall': 0....","{'precision': 0.9827882960413081, 'recall': 0....","{'precision': 1.0, 'recall': 0.285714285714285...",100.0,
CmBERT IOB2,4,6084,0.931254,0.942677,0.93693,0.95422,0.920215,0.942096,0.931027,0.947382,0.945183,0.943391,...,"{'precision': 0.22916666666666666, 'recall': 0...","{'precision': 0.26666666666666666, 'recall': 0...","{'precision': 0.8095238095238095, 'recall': 0....","{'precision': 0.4444444444444444, 'recall': 0....","{'precision': 0.9312465064281722, 'recall': 0....","{'precision': 0.9419889502762431, 'recall': 0....","{'precision': 0.9789413773477519, 'recall': 0....","{'precision': 0.5, 'recall': 0.357142857142857...",100.0,
CmBERT IOB2,5,6084,0.926479,0.933568,0.93001,0.953672,0.918022,0.931968,0.924943,0.94472,0.937059,0.935536,...,"{'precision': 0.21428571428571427, 'recall': 0...","{'precision': 0.4157303370786517, 'recall': 0....","{'precision': 0.7346938775510204, 'recall': 0....","{'precision': 0.2222222222222222, 'recall': 0....","{'precision': 0.9297848244620611, 'recall': 0....","{'precision': 0.9298630136986301, 'recall': 0....","{'precision': 0.9855240301100173, 'recall': 0....","{'precision': 0.5, 'recall': 0.071428571428571...",100.0,


In [10]:
# Store p/r/f1 as percentages
eval_ = ["precision-l1","recall-l1","f1-l1","precision-l2","recall-l2","f1-l2","precision","recall","f1","precision-jl","recall_jl","f1_jl"]
metrics_pero = metrics_raw_pero.copy()
metrics_pero[eval_] = metrics_raw_pero[eval_].multiply(100., axis=1)
metrics_pero.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,precision,recall,f1,accuracy,precision-l1,recall-l1,f1-l1,accuracy-l1,precision-l2,recall-l2,...,ACT-l2,DESC,TITREH,TITREP,SPAT,LOC,CARDINAL,FT,trainsize_p,TITRE
Unnamed: 0_level_1,run,trainsize,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
CmBERT IO,1,6084,93.888687,94.059767,93.974149,0.960667,92.717815,94.187583,93.44692,0.955108,95.375723,93.902439,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.4727272727272727, 'recall': 0....","{'precision': 0.6904761904761905, 'recall': 0....","{'precision': 0.5333333333333333, 'recall': 0....","{'precision': 0.9438138479001135, 'recall': 0....","{'precision': 0.9415656008820287, 'recall': 0....","{'precision': 0.9805045871559633, 'recall': 0....","{'precision': 0.6666666666666666, 'recall': 0....",100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '..."
CmBERT IO,2,6084,93.463497,93.622449,93.542906,0.958684,91.866753,93.505064,92.678669,0.951663,95.500966,93.766938,...,"{'precision': 0.6428571428571429, 'recall': 0....","{'precision': 0.4936708860759494, 'recall': 0....","{'precision': 0.8285714285714286, 'recall': 0....","{'precision': 0.6521739130434783, 'recall': 0....","{'precision': 0.9388101983002833, 'recall': 0....","{'precision': 0.93732667775929, 'recall': 0.94...","{'precision': 0.9827981651376146, 'recall': 0....","{'precision': 0.75, 'recall': 0.21428571428571...",100.0,
CmBERT IO,3,6084,93.288185,93.707483,93.497364,0.958605,92.077922,93.659181,92.861821,0.949157,94.820499,93.766938,...,"{'precision': 0.3333333333333333, 'recall': 0....","{'precision': 0.3739130434782609, 'recall': 0....","{'precision': 0.6222222222222222, 'recall': 0....","{'precision': 0.65, 'recall': 0.40625, 'f1': 0...","{'precision': 0.9404424276800908, 'recall': 0....","{'precision': 0.9334433443344334, 'recall': 0....","{'precision': 0.9833810888252149, 'recall': 0....","{'precision': 0.16666666666666666, 'recall': 0...",100.0,
CmBERT IO,4,6084,93.78031,93.962585,93.871359,0.960067,92.444637,93.747248,93.091386,0.951558,95.469522,94.227642,...,"{'precision': 0.48717948717948717, 'recall': 0...","{'precision': 0.4142857142857143, 'recall': 0....","{'precision': 0.7368421052631579, 'recall': 0....","{'precision': 0.631578947368421, 'recall': 0.3...","{'precision': 0.9291736930860034, 'recall': 0....","{'precision': 0.9471635150166852, 'recall': 0....","{'precision': 0.9816618911174785, 'recall': 0....","{'precision': 0.6666666666666666, 'recall': 0....",100.0,
CmBERT IO,5,6084,93.923928,93.889699,93.90681,0.957509,92.799827,94.209599,93.499399,0.951976,95.356551,93.495935,...,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '...","{'precision': 0.5568181818181818, 'recall': 0....","{'precision': 0.75, 'recall': 0.69767441860465...","{'precision': 0.5625, 'recall': 0.28125, 'f1':...","{'precision': 0.9431818181818182, 'recall': 0....","{'precision': 0.9356435643564357, 'recall': 0....","{'precision': 0.9810562571756601, 'recall': 0....","{'precision': 0.5, 'recall': 0.071428571428571...",100.0,"{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, '..."


### Build the averaged table


In [11]:
# Average over runs
averaged_pero = metrics_pero.groupby(level=0).apply(lambda grp: grp.groupby(by="trainsize").mean())
averaged_pero.set_index(["trainsize_p"], append=True,inplace=True)

# Keep just the necessary columns
averaged_pero=averaged_pero[["f1-l1","f1-l2","f1","f1_jl"]]

# Set pretty names
averaged_pero.index.names = ['Model','Trainset Size',"%"]
averaged_pero.rename(columns={
    "f1-l1":"Level 1",
    "f1-l2":"Level 2",
    "f1":"Global",
    "f1_jl":"P+L1+P+L2"}, errors="raise", inplace=True)
averaged_pero.rename(mapper={"camembert_io_ref": "CmBERT IO","camembert_iob2_ref": "CmBERT IOB2","prtn_camembert_io_ref": "Ptrn CmBERT IO","prtn_camembert_iob2_ref": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged_pero

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Level 1,Level 2,Global,P+L1+P+L2
Model,Trainset Size,%,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CmBERT IO,6084,100.0,93.115639,94.562345,93.758518,93.355103
CmBERT IOB2,6084,100.0,93.109544,93.993053,93.50383,93.127077
Ptrn CmBERT IO,6084,100.0,94.133931,94.548789,94.318797,93.835619
Ptrn CmBERT IOB2,6084,100.0,93.743182,94.510817,94.085092,93.708233


### Create the results table

In [12]:
# Filter and transpose to obtain the latex table
latex_table_pero = averaged_pero.stack().unstack(level=[1,2])

# Swap model name and metrics to get a nice table
latex_table_pero = latex_table_pero.swaplevel(0,1).sort_index(level=0)

caption = "F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on noisy dataset with Independent Flat NER layers approach (M1)."
print(latex_table_pero.to_latex(float_format="%.1f", multirow=True, caption=caption))

\begin{table}
\centering
\caption{F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on noisy dataset.}
\begin{tabular}{llr}
\toprule
          & Trainset Size &  6084 \\
          & \% & 100.0 \\
{} & Model &       \\
\midrule
\multirow{4}{*}{Global} & CmBERT IO &  93.8 \\
          & CmBERT IOB2 &  93.5 \\
          & Ptrn CmBERT IO &  94.3 \\
          & Ptrn CmBERT IOB2 &  94.1 \\
\cline{1-3}
\multirow{4}{*}{Level 1} & CmBERT IO &  93.1 \\
          & CmBERT IOB2 &  93.1 \\
          & Ptrn CmBERT IO &  94.1 \\
          & Ptrn CmBERT IOB2 &  93.7 \\
\cline{1-3}
\multirow{4}{*}{Level 2} & CmBERT IO &  94.6 \\
          & CmBERT IOB2 &  94.0 \\
          & Ptrn CmBERT IO &  94.5 \\
          & Ptrn CmBERT IOB2 &  94.5 \\
\cline{1-3}
\multirow{4}{*}{P+L1+P+L2} & CmBERT IO &  93.4 \\
          & CmBERT IOB2 &  93.1 \\
          & Ptrn CmBERT IO &  93.8 \\
          & Ptrn CmBERT IOB2 &  93.7 \\
\bottomrule
\end{tabular}
\end{table}



# 143 - Experiments 1 and 2 results table

In [43]:
averaged = pd.concat([averaged_ref,averaged_pero],keys=["Reference","Pero OCR"])
averaged = averaged.reset_index(level=[2,3], drop=True)
averaged

Unnamed: 0_level_0,Unnamed: 1_level_0,Level 1,Level 2,Global,P+L1+P+L2
Unnamed: 0_level_1,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Reference,CmBERT IO,96.040045,96.983179,96.460054,95.699654
Reference,CmBERT IOB2,95.789203,96.751012,96.217547,95.663863
Reference,Ptrn CmBERT IO,96.317256,96.999829,96.620776,95.918409
Reference,Ptrn CmBERT IOB2,95.528058,96.61288,96.012234,95.331939
Pero OCR,CmBERT IO,93.115639,94.562345,93.758518,93.355103
Pero OCR,CmBERT IOB2,93.109544,93.993053,93.50383,93.127077
Pero OCR,Ptrn CmBERT IO,94.133931,94.548789,94.318797,93.835619
Pero OCR,Ptrn CmBERT IOB2,93.743182,94.510817,94.085092,93.708233


In [45]:
caption = "F1 score measured on the fine-tuned models CmBERTand CmBERT+ptrn on reference dataset and noisy dataset with Independent Flat NER layers approach (M1)."
print(averaged.to_latex(float_format="%.1f", multirow=True, caption=caption))
averaged

\begin{table}
\centering
\caption{F1 score measured on the fine-tuned models CmBERTand CmBERT+ptrn on reference dataset and noisy dataset with method 1.}
\begin{tabular}{llrrrr}
\toprule
         &                  &  Level 1 &  Level 2 &  Global &  P+L1+P+L2 \\
{} & Model &          &          &         &            \\
\midrule
\multirow{4}{*}{Reference} & CmBERT IO &     96.0 &     97.0 &    96.5 &       95.7 \\
         & CmBERT IOB2 &     95.8 &     96.8 &    96.2 &       95.7 \\
         & Ptrn CmBERT IO &     96.3 &     97.0 &    96.6 &       95.9 \\
         & Ptrn CmBERT IOB2 &     95.5 &     96.6 &    96.0 &       95.3 \\
\cline{1-6}
\multirow{4}{*}{Pero OCR} & CmBERT IO &     93.1 &     94.6 &    93.8 &       93.4 \\
         & CmBERT IOB2 &     93.1 &     94.0 &    93.5 &       93.1 \\
         & Ptrn CmBERT IO &     94.1 &     94.5 &    94.3 &       93.8 \\
         & Ptrn CmBERT IOB2 &     93.7 &     94.5 &    94.1 &       93.7 \\
\bottomrule
\end{tabular}
\end{table}



Unnamed: 0_level_0,Unnamed: 1_level_0,Level 1,Level 2,Global,P+L1+P+L2
Unnamed: 0_level_1,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Reference,CmBERT IO,96.040045,96.983179,96.460054,95.699654
Reference,CmBERT IOB2,95.789203,96.751012,96.217547,95.663863
Reference,Ptrn CmBERT IO,96.317256,96.999829,96.620776,95.918409
Reference,Ptrn CmBERT IOB2,95.528058,96.61288,96.012234,95.331939
Pero OCR,CmBERT IO,93.115639,94.562345,93.758518,93.355103
Pero OCR,CmBERT IOB2,93.109544,93.993053,93.50383,93.127077
Pero OCR,Ptrn CmBERT IO,94.133931,94.548789,94.318797,93.835619
Pero OCR,Ptrn CmBERT IOB2,93.743182,94.510817,94.085092,93.708233
