# 140 - Experiment 1: Figures and metrics

In [None]:
import os, sys
from pathlib import Path

ENV_IS_GOOGLE_COLAB = True if 'google.colab' in str(get_ipython()) else False
os.environ["ENV_IS_GOOGLE_COLAB"] = str(ENV_IS_GOOGLE_COLAB)

if ENV_IS_GOOGLE_COLAB:
  from google.colab import drive
  mountpoint = Path("/content/drive")
  drive.mount(str(mountpoint)) # Mount gdrive to BASE
  base = mountpoint / "MyDrive/article_icdar_2023" # Adapt this to your situation
  sys.path.append(str(base)) # Add BASE to Python Path
  BASE = Path(base).resolve() # Make BASE absolute
  DATASETS =  BASE / "dataset_ICDAR"
  OUT_BASE = BASE / "res_ICDAR/method_1"
else:
  BASE = Path().resolve() # Directory of this approach
  #Adapt this to your situation
  DATASETS = Path('../dataset_ICDAR').resolve() #Where your data are located befor Dataset object creation
  OUT_BASE = Path('../res_ICDAR/method_1').resolve() #Where you save the results of this notebook

print(sys.path)
print(BASE)
print(DATASETS)
print(OUT_BASE)

# Tools

In [None]:
import pandas as pd
import re
import os
import json

def compile_metrics(path): 
    rundirs = [f for f in os.listdir(path) if "run_" in f]

    df = pd.DataFrame()
    for run_dir in rundirs:
        run_path = path / run_dir
        nrun = re.search("\d+",run_dir)[0]
        
        files = [f for f in os.listdir(run_path) if "test_" in f and "level" not in f]
        sizes = [int(re.search("\d+",f)[0]) for f in files]
        
        for file, size in zip(files,sizes):
            file_path = run_path / file
            dftmp = pd.read_json(file_path, typ='series')
            dftmp = pd.DataFrame([dftmp])
            
            dftmp["trainsize"] = size 
            dftmp["run"] = nrun
            dftmp["trainsize_p"] = round(100 * size / 6084, 1)
            df = pd.concat([df, dftmp])

    return df.groupby(["run","trainsize"]).first()

# 141 - Experiment 1: figures and tables

In [None]:
from pathlib import Path

# PATHS
METRICS_DIR_E1 = OUT_BASE / "m1-110-experiment_1_metrics"
assert METRICS_DIR_E1
METRICS_DIR_E1

In [None]:
# Load Camembert IO metrics from metrics jsons
camembert_io_ref = compile_metrics(METRICS_DIR_E1 / "111-camembert-ner-multihead-io")
camembert_iob2_ref = compile_metrics(METRICS_DIR_E1 / "112-camembert-ner-multihead-iob2")
prtn_camembert_io_ref = compile_metrics(METRICS_DIR_E1 / "113-pretrained-camembert-ner-multihead-io")
prtn_camembert_iob2_ref = compile_metrics(METRICS_DIR_E1 / "114-pretrained-camembert-multihead-iob2")
metrics_raw_ref = pd.concat([camembert_io_ref,camembert_iob2_ref,prtn_camembert_io_ref,prtn_camembert_iob2_ref], keys=["CmBERT IO", "CmBERT IOB2", "Ptrn CmBERT IO", "Ptrn CmBERT IOB2"])
metrics_raw_ref

In [None]:
# Store p/r/f1 as percentages
eval_ = ["precision-l1","recall-l1","f1-l1","precision-l2","recall-l2","f1-l2","precision","recall","f1","precision-jl","recall_jl","f1_jl"]
metrics_ref = metrics_raw_ref.copy()
metrics_ref[eval_] = metrics_raw_ref[eval_].multiply(100., axis=1)
metrics_ref.head()

### Build the average table

In [None]:
# Average over runs
averaged_ref = metrics_ref.groupby(level=0).apply(lambda grp: grp.groupby(by="trainsize").mean())
averaged_ref.set_index(["trainsize_p"], append=True,inplace=True)

# Keep just the necessary columns
averaged_ref=averaged_ref[["f1-l1","f1-l2","f1","f1_jl"]]

# Set pretty names
averaged_ref.index.names = ['Model','Trainset Size',"%"]
averaged_ref.rename(columns={"f1-l1":"Level 1",
    "f1-l2":"Level 2",
    "f1":"Global",
    "f1_jl":"P+L1+P+L2"
                         }, errors="raise", inplace=True)
averaged_ref.rename(mapper={"camembert_io_ref": "CmBERT IO","camembert_iob2_ref": "CmBERT IOB2","prtn_camembert_io_ref": "Ptrn CmBERT IO","prtn_camembert_iob2_ref": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged_ref

### Create the results table

In [None]:
# Filter and transpose to obtain the latex table
latex_table_ref = averaged_ref.stack().unstack(level=[1,2])

# Swap model name and metrics to get a nice table
latex_table_ref = latex_table_ref.swaplevel(0,1).sort_index(level=0)

caption = "F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on reference dataset with Independent Flat NER layers approach (M1)."
print(latex_table_ref.to_latex(float_format="%.1f", multirow=True, caption=caption))
latex_table_ref

# 142 - Experiment 2: figures and tables

In [None]:
from pathlib import Path

# PATHS
METRICS_DIR_E2 = OUT_BASE / "m1-120-experiment_2_metrics"
assert METRICS_DIR_E2
METRICS_DIR_E2

In [None]:
# Load Camembert IO metrics from metrics jsons
camembert_io_pero = compile_metrics(METRICS_DIR_E2 / "121-camembert-ner-multihead-io")
camembert_iob2_pero = compile_metrics(METRICS_DIR_E2 / "122-camembert-ner-multihead-iob2")
prtn_camembert_io_pero = compile_metrics(METRICS_DIR_E2 / "123-pretrained-camembert-ner-multihead-io")
prtn_camembert_iob2_pero = compile_metrics(METRICS_DIR_E2 / "124-pretrained-camembert-multihead-iob2")
metrics_raw_pero = pd.concat([camembert_io_pero,camembert_iob2_pero,prtn_camembert_io_pero,prtn_camembert_iob2_pero], keys=["CmBERT IO", "CmBERT IOB2", "Ptrn CmBERT IO", "Ptrn CmBERT IOB2"])
metrics_raw_pero

In [None]:
# Store p/r/f1 as percentages
eval_ = ["precision-l1","recall-l1","f1-l1","precision-l2","recall-l2","f1-l2","precision","recall","f1","precision-jl","recall_jl","f1_jl"]
metrics_pero = metrics_raw_pero.copy()
metrics_pero[eval_] = metrics_raw_pero[eval_].multiply(100., axis=1)
metrics_pero.head()

### Build the averaged table


In [None]:
# Average over runs
averaged_pero = metrics_pero.groupby(level=0).apply(lambda grp: grp.groupby(by="trainsize").mean())
averaged_pero.set_index(["trainsize_p"], append=True,inplace=True)

# Keep just the necessary columns
averaged_pero=averaged_pero[["f1-l1","f1-l2","f1","f1_jl"]]

# Set pretty names
averaged_pero.index.names = ['Model','Trainset Size',"%"]
averaged_pero.rename(columns={
    "f1-l1":"Level 1",
    "f1-l2":"Level 2",
    "f1":"Global",
    "f1_jl":"P+L1+P+L2"}, errors="raise", inplace=True)
averaged_pero.rename(mapper={"camembert_io_ref": "CmBERT IO","camembert_iob2_ref": "CmBERT IOB2","prtn_camembert_io_ref": "Ptrn CmBERT IO","prtn_camembert_iob2_ref": "Ptrn CmBERT IOB2"}, errors="ignore", inplace=True, axis=0)
averaged_pero

### Create the results table

In [None]:
# Filter and transpose to obtain the latex table
latex_table_pero = averaged_pero.stack().unstack(level=[1,2])

# Swap model name and metrics to get a nice table
latex_table_pero = latex_table_pero.swaplevel(0,1).sort_index(level=0)

caption = "F1 score measured on the fine-tuned models CmBERT, CmBERT+ptrn on noisy dataset with Independent Flat NER layers approach (M1)."
print(latex_table_pero.to_latex(float_format="%.1f", multirow=True, caption=caption))

# 143 - Experiments 1 and 2 results table

In [None]:
averaged = pd.concat([averaged_ref,averaged_pero],keys=["Reference","Pero OCR"])
averaged = averaged.reset_index(level=[2,3], drop=True)
averaged

In [None]:
caption = "F1 score measured on the fine-tuned models CmBERTand CmBERT+ptrn on reference dataset and noisy dataset with Independent Flat NER layers approach (M1)."
print(averaged.to_latex(float_format="%.1f", multirow=True, caption=caption))
averaged