# Udapi stats

In [2]:
import udapi

In [3]:
from collections import defaultdict

def process_file(f):
    
    doc = udapi.Document(f)
    udapi.Document()
    counts = defaultdict(int)
    counts["entities[Stats]"] = len(doc.coref_entities)
    counts["mentions[Stats]"] = len(doc.coref_mentions)
    for node in doc.nodes:
        for attr in node.misc:
            if attr == "name":  # handle "Outside" entities
                attrvalue = node.misc[attr]
                if attrvalue == "O":
                    continue
            counts[attr + "[MiscStats]"] += 1
    return counts

f = "../output/narc/annotations_conll_bokmaal/aftenposten_01.conllu"
process_file(f)

defaultdict(int,
            {'entities[Stats]': 41,
             'mentions[Stats]': 44,
             'Entity[MiscStats]': 58,
             'Bridge[MiscStats]': 2})

# NARC Conll results

In [6]:
import os

for lang in ["bokmaal", "nynorsk"]:
    path = "annotations_conll_" + lang
    path = f"../output/narc/annotations_conll_{lang}"
    stats = defaultdict(int)
    for conll_f in os.listdir(path):
        if conll_f.endswith(".conllu"):
            _stats = process_file(os.path.join(path, conll_f))
            for key, value in _stats.items():
                stats[key] += value
    print(lang)
    print(stats)

bokmaal
defaultdict(<class 'int'>, {'entities[Stats]': 55225, 'mentions[Stats]': 77565, 'Entity[MiscStats]': 92633, 'Bridge[MiscStats]': 1060, 'SplitAnte[MiscStats]': 140})
nynorsk
defaultdict(<class 'int'>, {'entities[Stats]': 45918, 'mentions[Stats]': 63137, 'Entity[MiscStats]': 75981, 'Bridge[MiscStats]': 868, 'SplitAnte[MiscStats]': 81})


# Aligned NARC results:

In [8]:
import os
for lang in ["bokmaal", "nynorsk"]:
    path = f"../output/aligned/no-narc_{lang}"
    stats = defaultdict(int)
    for conll_f in os.listdir(path):
        if conll_f.endswith(".conllu"):
            _stats = process_file(os.path.join(path, conll_f))
            for key, value in _stats.items():
                stats[key] += value
    print(lang)
    print(stats)

bokmaal
defaultdict(<class 'int'>, {'entities[Stats]': 52815, 'mentions[Stats]': 73983, 'Entity[MiscStats]': 88291, 'SpaceAfter[MiscStats]': 27055, 'name[MiscStats]': 16168, 'Bridge[MiscStats]': 1025, 'SplitAnte[MiscStats]': 134})
nynorsk
defaultdict(<class 'int'>, {'entities[Stats]': 44847, 'mentions[Stats]': 61615, 'Entity[MiscStats]': 74145, 'name[MiscStats]': 15520, 'SpaceAfter[MiscStats]': 21339, 'Bridge[MiscStats]': 841, 'SplitAnte[MiscStats]': 80})
