In [238]:
import csv
import os
import numpy as np
import pandas as pd

def find_files(root_dir, binary=None, iob=None):
    file_paths = {}
    for dir_name, subdir_list, file_list in os.walk(root_dir):
        for file_name in file_list:
            if binary:
                if 'norne_CR_binary_' in file_name:
                    file_path = os.path.join(dir_name, file_name)
                    key = file_name.replace('norne_CR_binary_', '')
                    file_paths[key] = file_path
            if iob:
                if "norne_CR_IOB2_" in file_name:
                    file_path = os.path.join(dir_name, file_name)
                    key = file_name.replace('norne_CR_IOB2_', '')
                    file_paths[key] = file_path
    return file_paths





In [239]:
root_directory = '/Users/aarnes/Documents/GitHub/MA_Peter-R-ysland-Aarnes/Results'
files = find_files(root_directory, binary=True)
files_iob = find_files(root_directory, iob=True)

In [240]:
files_iob

{'norbert.csv': '/Users/aarnes/Documents/GitHub/MA_Peter-R-ysland-Aarnes/Results/norbert/norne_CR_IOB2_norbert.csv',
 'nbailab-base-ner-scandi.csv': '/Users/aarnes/Documents/GitHub/MA_Peter-R-ysland-Aarnes/Results/nbailab-base-ner-scandi/norne_CR_IOB2_nbailab-base-ner-scandi.csv',
 'bert-base-multilingual-cased.csv': '/Users/aarnes/Documents/GitHub/MA_Peter-R-ysland-Aarnes/Results/bert-base-multilingual-cased/norne_CR_IOB2_bert-base-multilingual-cased.csv',
 'norbert2.csv': '/Users/aarnes/Documents/GitHub/MA_Peter-R-ysland-Aarnes/Results/norbert2/norne_CR_IOB2_norbert2.csv',
 'nb-bert-base.csv': '/Users/aarnes/Documents/GitHub/MA_Peter-R-ysland-Aarnes/Results/nb-bert-base/norne_CR_IOB2_nb-bert-base.csv'}

In [241]:
files

{'norbert.csv': '/Users/aarnes/Documents/GitHub/MA_Peter-R-ysland-Aarnes/Results/norbert/norne_CR_binary_norbert.csv',
 'nbailab-base-ner-scandi.csv': '/Users/aarnes/Documents/GitHub/MA_Peter-R-ysland-Aarnes/Results/nbailab-base-ner-scandi/norne_CR_binary_nbailab-base-ner-scandi.csv',
 'bert-base-multilingual-cased.csv': '/Users/aarnes/Documents/GitHub/MA_Peter-R-ysland-Aarnes/Results/bert-base-multilingual-cased/norne_CR_binary_bert-base-multilingual-cased.csv',
 'norbert2.csv': '/Users/aarnes/Documents/GitHub/MA_Peter-R-ysland-Aarnes/Results/norbert2/norne_CR_binary_norbert2.csv',
 'nb-bert-base.csv': '/Users/aarnes/Documents/GitHub/MA_Peter-R-ysland-Aarnes/Results/nb-bert-base/norne_CR_binary_nb-bert-base.csv'}

In [242]:
"norbert.csv".split(".")[0]

'norbert'

In [357]:
def extract_rows(file_path, row_type, binary=None, IOB2=None): #row type "macro_avg" etc
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        r, p, f1, s = [],[],[], []
        for row in reader:
            d = {}
            if len(row) > 0 and row[0] == row_type and binary:
                r.append(float(row[1])*100)
                p.append(float(row[2])*100)
                f1.append(float(row[3])*100)
            elif len(row) > 0 and row[0] == row_type and IOB2:
                r.append(float(row[1])*100)
                p.append(float(row[2])*100)
                f1.append(float(row[3])*100)
                s.append(int(float(row[4])))
        d[f"precision"]=p
        d["recall"]=r
        d[f"f1"]=f1
        d[f"support"]=s
    return d


In [358]:
def get_sd_from_extracted_rows_keys(iob2_data, key):
    texts = []
    latex = []
    model_name=key.split(".")[0]

    print(model_name)
    for key in iob2_data:
        values = [float(x) for x in iob2_data[key]]
        avg = np.mean(values)
        std = np.std(values)
        text = (f"{key}: Average = {avg:.2f}, Standard Deviation = {std:.2f}")
        latex_text = (f"{avg:.2f} (\pm{{{std:.2f}}})")
        texts.append(text)
        latex.append(latex_text)
    for i in texts:
        print(i)
    for t in latex:
        if t!=latex[-1]:
            print(f"{t} & ", end="")
        else:
            print(f"{t} \\\\ [2pt]")
    print()

    return latex

In [359]:
for key, v in files_iob.items():
    iob2_data = extract_rows(v, "micro avg", IOB2=True)
    get_sd_from_extracted_rows_keys(iob2_data, key)

norbert
precision: Average = 82.88, Standard Deviation = 0.71
recall: Average = 81.53, Standard Deviation = 2.11
f1: Average = 82.19, Standard Deviation = 1.26
support: Average = 2393.00, Standard Deviation = 0.00
82.88 (\pm{0.71}) & 81.53 (\pm{2.11}) & 82.19 (\pm{1.26}) & 2393.00 (\pm{0.00}) \\ [2pt]

nbailab-base-ner-scandi
precision: Average = 86.90, Standard Deviation = 0.55
recall: Average = 86.26, Standard Deviation = 0.47
f1: Average = 86.58, Standard Deviation = 0.45
support: Average = 2393.00, Standard Deviation = 0.00
86.90 (\pm{0.55}) & 86.26 (\pm{0.47}) & 86.58 (\pm{0.45}) & 2393.00 (\pm{0.00}) \\ [2pt]

bert-base-multilingual-cased
precision: Average = 78.93, Standard Deviation = 1.01
recall: Average = 81.06, Standard Deviation = 1.25
f1: Average = 79.97, Standard Deviation = 0.41
support: Average = 2393.00, Standard Deviation = 0.00
78.93 (\pm{1.01}) & 81.06 (\pm{1.25}) & 79.97 (\pm{0.41}) & 2393.00 (\pm{0.00}) \\ [2pt]

norbert2
precision: Average = 84.96, Standard Devia

In [360]:
for key, v in files_iob.items():
    if key=="nbailab-base-ner-scandi.csv":
        iob2_data = extract_rows(v, "micro avg", IOB2=True)
        get_sd_from_extracted_rows_keys(iob2_data, key)

nbailab-base-ner-scandi
precision: Average = 86.90, Standard Deviation = 0.55
recall: Average = 86.26, Standard Deviation = 0.47
f1: Average = 86.58, Standard Deviation = 0.45
support: Average = 2393.00, Standard Deviation = 0.00
86.90 (\pm{0.55}) & 86.26 (\pm{0.47}) & 86.58 (\pm{0.45}) & 2393.00 (\pm{0.00}) \\ [2pt]



In [381]:
lst = ["PER", "LOC", "ORG", "GPE_LOC", "GPE_ORG", "PROD", "EVT", "DRV", "MISC", "micro avg"]
d = {}
for name in lst:
    for key, v in files_iob.items():
        if key=="nbailab-base-ner-scandi.csv":
            d[name]=extract_rows(v, name, IOB2=True)

all_latex = []
for k, v in d.items():
    latex = get_sd_from_extracted_rows_keys(v, k)
    all_latex.append(latex)

for latex, name in zip(all_latex, lst):
    for i1 in latex:
        if i1==latex[0]:
            print(f"{name} & {i1} &", end=" ")
        elif i1!=latex[-1] and i1!=[0]:
            print(f"{i1}", end=" ")
        else:
            print(f"& {i1[:-15]} \\\\ [2pt]")

PER
precision: Average = 96.75, Standard Deviation = 0.86
recall: Average = 94.97, Standard Deviation = 0.76
f1: Average = 95.85, Standard Deviation = 0.26
support: Average = 961.00, Standard Deviation = 0.00
96.75 (\pm{0.86}) & 94.97 (\pm{0.76}) & 95.85 (\pm{0.26}) & 961.00 (\pm{0.00}) \\ [2pt]

LOC
precision: Average = 78.38, Standard Deviation = 3.15
recall: Average = 75.26, Standard Deviation = 1.85
f1: Average = 76.74, Standard Deviation = 1.64
support: Average = 185.00, Standard Deviation = 0.00
78.38 (\pm{3.15}) & 75.26 (\pm{1.85}) & 76.74 (\pm{1.64}) & 185.00 (\pm{0.00}) \\ [2pt]

ORG
precision: Average = 80.15, Standard Deviation = 0.89
recall: Average = 84.84, Standard Deviation = 0.48
f1: Average = 82.43, Standard Deviation = 0.69
support: Average = 521.00, Standard Deviation = 0.00
80.15 (\pm{0.89}) & 84.84 (\pm{0.48}) & 82.43 (\pm{0.69}) & 521.00 (\pm{0.00}) \\ [2pt]

GPE_LOC
precision: Average = 93.32, Standard Deviation = 0.67
recall: Average = 88.93, Standard Deviation 

In [379]:
for k, v in files_iob.items():
    model_name=k.split(".")[0]
    iob2_data = extract_rows(v, "micro avg", IOB2=True)
    print()
    print(model_name)
    for key in iob2_data:
        values = [float(x) for x in iob2_data[key]]
        avg = np.mean(values)
        std = np.std(values)
        print(f"{key}: Average = {avg:.2f}, Standard Deviation = {std:.2f}")


norbert
precision: Average = 82.88, Standard Deviation = 0.71
recall: Average = 81.53, Standard Deviation = 2.11
f1: Average = 82.19, Standard Deviation = 1.26
support: Average = 2393.00, Standard Deviation = 0.00

nbailab-base-ner-scandi
precision: Average = 86.90, Standard Deviation = 0.55
recall: Average = 86.26, Standard Deviation = 0.47
f1: Average = 86.58, Standard Deviation = 0.45
support: Average = 2393.00, Standard Deviation = 0.00

bert-base-multilingual-cased
precision: Average = 78.93, Standard Deviation = 1.01
recall: Average = 81.06, Standard Deviation = 1.25
f1: Average = 79.97, Standard Deviation = 0.41
support: Average = 2393.00, Standard Deviation = 0.00

norbert2
precision: Average = 84.96, Standard Deviation = 0.69
recall: Average = 83.80, Standard Deviation = 0.62
f1: Average = 84.38, Standard Deviation = 0.37
support: Average = 2393.00, Standard Deviation = 0.00

nb-bert-base
precision: Average = 87.17, Standard Deviation = 0.75
recall: Average = 85.84, Standard 

In [371]:
len("(\pm{0.00})")

11

In [None]:
for k, v in files.items():
    model_name=k.split(".")[0]
    data = extract_rows(v, row_type="macro avg", binary=True)
    print()
    print(model_name)
    for key in data:
        values = [float(x) for x in data[key]]
        avg = np.mean(values)
        std = np.std(values)
        print(f"{key}: Average = {avg:.2f}, Standard Deviation = {std:.2f}")


norbert
precision: Average = 96.04, Standard Deviation = 0.45
recall: Average = 96.61, Standard Deviation = 0.56
f1: Average = 96.32, Standard Deviation = 0.10

nbailab-base-ner-scandi
precision: Average = 96.71, Standard Deviation = 0.16
recall: Average = 97.44, Standard Deviation = 0.12
f1: Average = 97.07, Standard Deviation = 0.13

bert-base-multilingual-cased
precision: Average = 94.68, Standard Deviation = 0.51
recall: Average = 96.38, Standard Deviation = 0.49
f1: Average = 95.51, Standard Deviation = 0.10

norbert2
precision: Average = 96.21, Standard Deviation = 0.17
recall: Average = 96.76, Standard Deviation = 0.21
f1: Average = 96.48, Standard Deviation = 0.08

nb-bert-base
precision: Average = 96.98, Standard Deviation = 0.34
recall: Average = 97.16, Standard Deviation = 0.27
f1: Average = 97.06, Standard Deviation = 0.09
