# M2 : 230 - Figures and metrics

Evaluation scores on level-1 entities segmentation and classification with joint-labels method

In [None]:
import os, sys
from pathlib import Path

ENV_IS_GOOGLE_COLAB = True if 'google.colab' in str(get_ipython()) else False
os.environ["ENV_IS_GOOGLE_COLAB"] = str(ENV_IS_GOOGLE_COLAB)

if ENV_IS_GOOGLE_COLAB:
  from google.colab import drive
  mountpoint = Path("/content/drive")
  drive.mount(str(mountpoint)) # Mount gdrive to BASE
  base = mountpoint / "MyDrive/article_icdar_2023" # Adapt this to your situation
  sys.path.append(str(base)) # Add BASE to Python Path
  BASE = Path(base).resolve() # Make BASE absolute
  DATASETS =  BASE / "dataset_ICDAR"
  OUT_BASE = BASE / "res_ICDAR/method_2"
else:
  BASE = Path().resolve() # Directory of this approach
  #Adapt this to your situation
  DATASETS = Path('../dataset_ICDAR').resolve() #Where your data are located befor Dataset object creation
  OUT_BASE = Path('../res_ICDAR/method_2').resolve() #Where you save the results of this notebook

print(sys.path)
print(BASE)
print(DATASETS)
print(OUT_BASE)

## Constants

In [None]:
MAX_TRAINSET_SIZE = 6084

## Tools

In [None]:
import pandas as pd
import re
import os
import json

def compile_metrics(path): 
    rundirs = [f for f in os.listdir(path)]

    df = pd.DataFrame()
    for run_dir in rundirs:
        if 'run' in run_dir:
            run_path = path / run_dir
            nrun = re.search("\d+",run_dir)[0]

            files = [f for f in os.listdir(run_path) if "test_" in f and 'json' in f]
            sizes = [int(re.search("\d+",f)[0]) for f in files]

            for file, size in zip(files,sizes):
                file_path = run_path / file
                dftmp = pd.read_json(file_path, typ='series')
                dftmp = pd.DataFrame([dftmp])

                dftmp["trainsize"] = size 
                dftmp["run"] = nrun
                dftmp["trainsize_p"] = round(100 * size / MAX_TRAINSET_SIZE, 1)
                df = pd.concat([df, dftmp])

    return df.groupby(["run","trainsize"]).first()

# 231 - Experiment 1: tables on all-entities metrics

In [None]:
METRICS_DIR_REF = OUT_BASE / "method_2/m2-210-experiment_1_metrics"

In [None]:
# Load models metrics from metrics jsons
res = []
keys = []

if os.path.exists(METRICS_DIR_REF / "211-camembert-ner-joint-labelling-io/run_2"):
    camembert_ner_io_ref = compile_metrics(METRICS_DIR_REF / "211-camembert-ner-joint-labelling-io")
    camembert_ner_io_ref["eval_precision-l1l2"] = camembert_ner_io_ref["eval_precision"]
    camembert_ner_io_ref["eval_recall-l1l2"] = camembert_ner_io_ref["eval_recall"]
    camembert_ner_io_ref["eval_f1-l1l2"] = camembert_ner_io_ref["eval_f1"]
    camembert_ner_io_ref["eval_accuracy-l1l2"] = camembert_ner_io_ref["eval_accuracy"]
    res.append(camembert_ner_io_ref)
    keys.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF / "212-camembert-ner-joint-labelling-iob2/run_2"):
    camembert_ner_iob2_ref = compile_metrics(METRICS_DIR_REF / "212-camembert-ner-joint-labelling-iob2")
    res.append(camembert_ner_iob2_ref)
    keys.append("CmBERT IOB2")

if os.path.exists(METRICS_DIR_REF / "213-pretrained-camembert-ner-joint-labelling-io/run_2"):
    ptrn_camembert_ner_io_ref = compile_metrics(METRICS_DIR_REF / "213-pretrained-camembert-ner-joint-labelling-io")
    ptrn_camembert_ner_io_ref["eval_precision-l1l2"] = ptrn_camembert_ner_io_ref["eval_precision"]
    ptrn_camembert_ner_io_ref["eval_recall-l1l2"] = ptrn_camembert_ner_io_ref["eval_recall"]
    ptrn_camembert_ner_io_ref["eval_f1-l1l2"] = ptrn_camembert_ner_io_ref["eval_f1"]
    ptrn_camembert_ner_io_ref["eval_accuracy-l1l2"] = ptrn_camembert_ner_io_ref["eval_accuracy"]
    res.append(ptrn_camembert_ner_io_ref)
    keys.append("Ptrn CmBERT IO")
    
if os.path.exists(METRICS_DIR_REF / "214-pretrained-camembert-ner-joint-labelling-iob2/run_2"):
    ptrn_camembert_ner_iob2_ref = compile_metrics(METRICS_DIR_REF / "214-pretrained-camembert-ner-joint-labelling-iob2")
    res.append(ptrn_camembert_ner_iob2_ref)
    keys.append("Ptrn CmBERT IOB2")
    
# Create the full table
print(keys)
metrics_raw_ref = pd.concat(res, keys=keys)
metrics_raw_ref

In [None]:
metrics_raw_ref.columns

## 231.1 Build the averaged table


In [None]:
# Store p/r/f1 as percentages
eval_ = ["eval_f1","eval_f1-l1","eval_f1-all","eval_f1-l2",'eval_f1-l1l2','eval_f1-das']
metrics_ref = metrics_raw_ref.copy()
metrics_ref[eval_] = metrics_raw_ref[eval_].multiply(100., axis=1)
metrics_ref.head()

In [None]:
# Average over runs
averaged_ref = metrics_ref.groupby(level=0).apply(lambda grp: grp.groupby(by="trainsize").mean())
averaged_ref.set_index(["trainsize_p"], append=True,inplace=True)

# Keep just the necessary columns
averaged_ref=averaged_ref[["eval_f1",'eval_f1-l1l2',"eval_f1-all","eval_f1-l1","eval_f1-l2",'eval_f1-das']]

# Set pretty names
averaged_ref.index.names = ['Model','Trainset Size',"%"]
averaged_ref.rename(columns={"eval_f1":"P+L1+P+L2 (train)",
                             "eval_f1-l1l2":"L1+L2",
                             'eval_f1-all':"All",
                            "eval_f1-l1":"Level 1",
                            "eval_f1-l2":"Level 2",
                            'eval_f1-das':"DAS alignement"
                         }, errors="raise", inplace=True)
averaged_ref.rename(mapper={"camembert_io_ref": "CmBERT IO","camembert_iob2_ref": "CmBERT IOB2","prtn_camembert_io_ref": "Ptrn CmBERT IO","prtn_camembert_iob2_ref": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged_ref

## 231.2 Create the results table

In [None]:
latex_table_ref = averaged_ref.copy()

caption = "F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on reference dataset with Independent Flat NER layers approach (M1)."
print(latex_table_ref.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table_ref

# 232 - Experiment 2: tables

In [None]:
METRICS_DIR_PERO = OUT_BASE / "method_2/m2-220-experiment_2_metrics"

In [None]:
# Load models metrics from metrics jsons
res = []
keys = []

if os.path.exists(METRICS_DIR_PERO / "221-camembert-ner-joint-labelling-io/run_2"):
    camembert_ner_io_pero = compile_metrics(METRICS_DIR_PERO / "221-camembert-ner-joint-labelling-io")
    camembert_ner_io_pero["eval_precision-l1l2"] = camembert_ner_io_pero["eval_precision"]
    camembert_ner_io_pero["eval_recall-l1l2"] = camembert_ner_io_pero["eval_recall"]
    camembert_ner_io_pero["eval_f1-l1l2"] = camembert_ner_io_pero["eval_f1"]
    camembert_ner_io_pero["eval_accuracy-l1l2"] = camembert_ner_io_pero["eval_accuracy"]
    res.append(camembert_ner_io_pero)
    keys.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR_PERO / "222-camembert-ner-joint-labelling-iob2/run_2"):
    camembert_ner_iob2_pero = compile_metrics(METRICS_DIR_PERO / "222-camembert-ner-joint-labelling-iob2")
    res.append(camembert_ner_iob2_pero)
    keys.append("CmBERT IOB2")

if os.path.exists(METRICS_DIR_PERO / "223-pretrained-camembert-ner-joint-labelling-io/run_2"):
    ptrn_camembert_ner_io_pero = compile_metrics(METRICS_DIR_PERO / "223-pretrained-camembert-ner-joint-labelling-io")
    ptrn_camembert_ner_io_pero["eval_precision-l1l2"] = ptrn_camembert_ner_io_pero["eval_precision"]
    ptrn_camembert_ner_io_pero["eval_recall-l1l2"] = ptrn_camembert_ner_io_pero["eval_recall"]
    ptrn_camembert_ner_io_pero["eval_f1-l1l2"] = ptrn_camembert_ner_io_pero["eval_f1"]
    ptrn_camembert_ner_io_pero["eval_accuracy-l1l2"] = ptrn_camembert_ner_io_pero["eval_accuracy"]
    res.append(ptrn_camembert_ner_io_pero)
    keys.append("Ptrn CmBERT IO")
    
if os.path.exists(METRICS_DIR_PERO / "224-pretrained-camembert-ner-joint-labelling-iob2/run_2"):
    ptrn_camembert_ner_iob2_pero = compile_metrics(METRICS_DIR_PERO / "224-pretrained-camembert-ner-joint-labelling-iob2")
    res.append(ptrn_camembert_ner_iob2_pero)
    keys.append("Ptrn CmBERT IOB2")
    
# Create the full table
print(keys)
metrics_raw_pero = pd.concat(res, keys=keys)
metrics_raw_pero

In [None]:
metrics_raw_pero.columns

In [None]:
# Store p/r/f1 as percentages
eval_ = ["eval_f1","eval_f1-l1","eval_f1-all","eval_f1-l2",'eval_f1-l1l2','eval_f1-das']
metrics_pero = metrics_raw_pero.copy()
metrics_pero[eval_] = metrics_raw_pero[eval_].multiply(100., axis=1)
metrics_pero.head()

In [None]:
# Average over runs
averaged_pero = metrics_pero.groupby(level=0).apply(lambda grp: grp.groupby(by="trainsize").mean())
averaged_pero.set_index(["trainsize_p"], append=True,inplace=True)

# Keep just the necessary columns
averaged_pero=averaged_pero[["eval_f1",'eval_f1-l1l2',"eval_f1-all","eval_f1-l1","eval_f1-l2",'eval_f1-das']]

# Set pretty names
averaged_pero.index.names = ['Model','Trainset Size',"%"]
averaged_pero.rename(columns={"eval_f1":"P+L1+P+L2 (train)",
                             "eval_f1-l1l2":"L1+L2",
                              'eval_f1-all':"All",
                            "eval_f1-l1":"Level 1",
                            "eval_f1-l2":"Level 2",
                            'eval_f1-das':"DAS alignement"
                         }, errors="raise", inplace=True)
averaged_pero.rename(mapper={"camembert_io_pero": "CmBERT IO","camembert_iob2_pero": "CmBERT IOB2","prtn_camembert_io_pero": "Ptrn CmBERT IO","prtn_camembert_iob2_pero": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged_pero

In [None]:
latex_table_pero = averaged_pero.copy()

caption = "F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on noisy dataset with Joint-labelling approach (M2)."
print(latex_table_pero.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table_pero

# 233 - Experiments 1 & 2: table

Build averaged table of F1-score for each dataset, each BERT-based model and each annotation model.

In [None]:
averaged = pd.concat([averaged_ref,averaged_pero],keys=["Reference","Pero OCR"])
averaged = averaged.reset_index(level=[2,3], drop=True)
averaged

In [None]:
caption = "F1 score measured on the fine-tuned models CmBERTand CmBERT+ptrn on reference dataset and noisy dataset with Joint-labelling approach (M2)."
print(averaged.to_latex(float_format="%.1f", multirow=True, caption=caption))
averaged

# 234 : Experiments 1 and 2: table by classe

In [None]:
import pandas as pd
import json
from pandas import json_normalize

def compile_metrics_by_classes(path, classes): 
    rundirs = [f for f in os.listdir(path)]

    df = pd.DataFrame()
    for run_dir in rundirs:
        if 'run' in run_dir:
            run_path = path / run_dir
            nrun = re.search("\d+",run_dir)[0]

            files = [f for f in os.listdir(run_path) if "test_" in f and 'json' in f]
            sizes = [int(re.search("\d+",f)[0]) for f in files]
                
            for file, size in zip(files,sizes):
                file_path = run_path / file
                dftmp = pd.read_json(file_path)
                classes_dict = {key: dftmp[key] for key in classes}
                dftmp = pd.DataFrame.from_dict(classes_dict)
                dftmp = dftmp.T
                dftmp['number'] = dftmp['number'].astype(int)
                dftmp["trainsize"] = size 
                dftmp["run"] = nrun
                dftmp["trainsize_p"] = round(100 * size / MAX_TRAINSET_SIZE, 1)
                df = pd.concat([df, dftmp])
                df["classe"] = df.index
                
    return df.groupby(["run","classe"]).first()

In [None]:
classes = ['eval_PER','eval_ACT','eval_ACT_L1','eval_ACT_L2','eval_DESC','eval_TITREH','eval_TITREP','eval_SPAT','eval_LOC','eval_CARDINAL','eval_FT'
 #'eval_TITRE'
]

# Load models metrics from metrics jsons
res = []
keys = []

if os.path.exists(METRICS_DIR / "211-camembert-ner-joint-labelling-io/run_2"):
    camembert_ner_io = compile_metrics_by_classes(METRICS_DIR / "211-camembert-ner-joint-labelling-io",classes)
    res.append(camembert_ner_io)
    keys.append("CmBERT IO")
    
if os.path.exists(METRICS_DIR / "212-camembert-ner-joint-labelling-iob2/run_2"):
    camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR / "212-camembert-ner-joint-labelling-iob2",classes)
    res.append(camembert_ner_iob2)
    keys.append("CmBERT IOB2")
    
if os.path.exists(METRICS_DIR / "213-pretrained-camembert-ner-joint-labelling-io-classes/run_2"):
    ptrn_camembert_ner_io = compile_metrics_by_classes(METRICS_DIR / "213-pretrained-camembert-ner-joint-labelling-io-classes",classes)
    res.append(ptrn_camembert_ner_io)
    keys.append("Ptrn CmBERT IO")
    
if os.path.exists(METRICS_DIR / "214-pretrained-camembert-ner-joint-labelling-iob2/run_2"):
    ptrn_camembert_ner_iob2 = compile_metrics_by_classes(METRICS_DIR / "214-pretrained-camembert-ner-joint-labelling-iob2-classes",classes)
    res.append(ptrn_camembert_ner_iob2)
    keys.append("Ptrn CmBERT IOB2")

In [None]:
metrics_raw_classes = pd.concat(res, keys=keys)
metrics_raw_classes

In [None]:
# Store p/r/f1 as percentages
val = ["f1","precision","recall"]
metrics_raw_classes = metrics_raw_classes.copy()
metrics_raw_classes[val] = metrics_raw_classes[val].multiply(100., axis=1)
metrics_raw_classes.head()
metrics_raw_classes

In [None]:
# Average over runs
averaged_classes = metrics_raw_classes.groupby(level=0).apply(lambda grp: grp.groupby(by="classe").mean())
averaged_classes.set_index(["number"],
                   append=True,
                   inplace=True)

# Keep just the necessary columns
averaged_classes=averaged_classes[["precision","recall","f1"]]

# Set pretty names
averaged_classes.index.names = ['Test','Entity type',"Count"]
averaged_classes.rename(mapper={'eval_PER':'PER','eval_ACT':'ACT','eval_ACT_L1':'ACT L1 only','eval_ACT_L2':'ACT L2 only','eval_DESC':'DESC','eval_TITREH':'TITREH','eval_TITREP':'TITREP','eval_SPAT':'SPAT','eval_LOC':'LOC','eval_CARDINAL':'CARDINAL','eval_FT':'FT'}, errors="ignore", inplace=True, axis=0)
averaged_classes