In [1]:
import json
import time
import os
import sklearn_crfsuite
import re
import nereval
import pandas as pd

from estnltk import Text
from estnltk.taggers import NerTagger
from estnltk.taggers import WordLevelNerTagger
from estnltk.converters import text_to_json
from estnltk.converters import json_to_text
from estnltk.layer_operations import flatten
from sklearn.metrics import classification_report
from estnltk.taggers import Retagger
from estnltk.taggers import CompoundTokenTagger
from sklearn_crfsuite import metrics

from nervaluate import Evaluator

nertagger = NerTagger()
word_level_ner = WordLevelNerTagger()

### Adding TokenSplitter to make an equal amount of tokens.

In [2]:
class TokenSplitter( Retagger ):
    """Splits tokens into smaller tokens based on regular expression patterns.""" 
    conf_param = ['patterns', 'break_group_name']
    
    def __init__(self, patterns, break_group_name:str='end'):
        # Set input/output layers
        self.input_layers = ['tokens']
        self.output_layer = 'tokens'
        self.output_attributes = ()
        # Set other configuration parameters
        if not (isinstance(break_group_name, str) and len(break_group_name) > 0):
            raise TypeError('(!) break_group_name should be a non-empty string.')
        self.break_group_name = break_group_name
        # Assert that all patterns are regular expressions in the valid format
        if not isinstance(patterns, list):
            raise TypeError('(!) patterns should be a list of compiled regular expressions.')
        # TODO: we use an adhoc way to verify that patterns are regular expressions 
        #       because there seems to be no common way of doing it both in py35 
        #       and py36
        for pat in patterns:
            # Check for the existence of methods/attributes
            has_match   = callable(getattr(pat, "match", None))
            has_search  = callable(getattr(pat, "search", None))
            has_pattern = getattr(pat, "pattern", None) is not None
            for (k,v) in (('method match()',has_match),\
                          ('method search()',has_search),\
                          ('attribute pattern',has_pattern)):
                if v is False:
                    raise TypeError('(!) Unexpected regex pattern: {!r} is missing {}.'.format(pat, k))
            symbolic_groups = pat.groupindex
            if self.break_group_name not in symbolic_groups.keys():
                raise TypeError('(!) Pattern {!r} is missing symbolic group named {!r}.'.format(pat, self.break_group_name))
        self.patterns = patterns

    def _change_layer(self, text, layers, status):
        # Get changeble layer
        changeble_layer = layers[self.output_layer]
        # Iterate over tokens
        add_spans    = []
        remove_spans = []
        for span in changeble_layer:
            token_str = text.text[span.start:span.end]
            for pat in self.patterns:
                m = pat.search(token_str)
                if m:
                    break_group_end = m.end( self.break_group_name )
                    if break_group_end > -1 and \
                       break_group_end > 0  and \
                       break_group_end < len(token_str):
                        # Make the split
                        add_spans.append( (span.start, span.start+break_group_end) )
                        add_spans.append( (span.start+break_group_end, span.end) )
                        remove_spans.append( span )
                        # Once a token has been split, then break and move on to 
                        # the next token ...
                        break
        if add_spans:
            assert len(remove_spans) > 0
            for old_span in remove_spans:
                changeble_layer.remove_span( old_span )
            for new_span in add_spans:
                changeble_layer.add_annotation( new_span )

In [3]:
token_splitter = TokenSplitter(patterns=[re.compile(r'(?P<end>[A-ZÕÄÖÜ]{1}\w+)[A-ZÕÄÖÜ]{1}\w+'),\
                                         re.compile(r'(?P<end>Piebenomme)metsawaht'),\
                                         re.compile(r'(?P<end>maa)peal'),\
                                         re.compile(r'(?P<end>reppi)käest'),\
                                         re.compile(r'(?P<end>Kiidjerwelt)J'),\
                                         re.compile(r'(?P<end>Ameljanow)Persitski'),\
                                         re.compile(r'(?P<end>mõistmas)Mihkel'),\
                                         re.compile(r'(?P<end>tema)Käkk'),\
                                         re.compile(r'(?P<end>Ahjawalla)liikmed'),\
                                         re.compile(r'(?P<end>kohtumees)A'),\
                                         re.compile(r'(?P<end>Pechmann)x'),\
                                         re.compile(r'(?P<end>pölli)Anni'),\
                                         re.compile(r'(?P<end>külla)Rauba'),\
                                         re.compile(r'(?P<end>kohtowannem)Jaak'),\
                                         re.compile(r'(?P<end>rannast)Leno'),\
                                         re.compile(r'(?P<end>wallast)Kiiwita'),\
                                         re.compile(r'(?P<end>wallas)Kristjan'),\
                                         re.compile(r'(?P<end>Pedoson)rahul'),\
                                         re.compile(r'(?P<end>pere)Jaan'),\
                                         re.compile(r'(?P<end>kohtu)poolest'),\
                                         re.compile(r'(?P<end>Kurrista)kaudo'),\
                                         re.compile(r'(?P<end>mölder)Gottlieb'),\
                                         re.compile(r'(?P<end>wöörmündri)Jaan'),\
                                         re.compile(r'(?P<end>Oinas)ja'),\
                                         re.compile(r'(?P<end>ette)Leenu'),\
                                         re.compile(r'(?P<end>Tommingas)peab'),\
                                         re.compile(r'(?P<end>wäljaja)Kotlep'),\
                                         re.compile(r'(?P<end>pea)A'),\
                                         re.compile(r'(?P<end>talumees)Nikolai')])

### Reading in files from the distributed corpus:

In [4]:
files = {}

with open('divided_corpus.txt', 'r', encoding = 'UTF-8') as f:
    txt = f.readlines()

for fileName in txt:
    file, subdistribution = fileName.split(":")
    files[file] = subdistribution.rstrip("\n")

### Making `ner` and `wordner` layers from goldstandard files

In [5]:
def find(name, path):
    for root, dirs, files in os.walk(path):
        if name in files:
            return os.path.join(root, name)

In [24]:
%%time
removed_layers = ['sentences', 'morph_analysis', 'compound_tokens', 'ner', 'words', 'tokens']
for file in {key: value for key, value in files.items() if value in ('1', '2', '3')}:
    with open(find(file.replace(".json", ".txt"), "./vallakohtufailid/"), 'r', encoding='UTF-8') as f:
        text = Text(f.read())
        if f == "Tartu_V6nnu_Ahja_id3502_1882a.txt":
            text = text.replace('..', '. .')
        
        text = text.tag_layer(['tokens'])
        token_splitter.retag(text)
        CompoundTokenTagger(tag_initials = False, tag_abbreviations = False, tag_hyphenations = False).tag(text)
        text.tag_layer('morph_analysis')
        
        nertagger.tag(text)
        text.add_layer(flatten(text['ner'], 'flat_ner'))

        for x in removed_layers:
            text.pop_layer(x)
        text_to_json(text, file=os.getcwd() + "/vallakohtufailid_nertagger/" + file)


KeyboardInterrupt: 

### Calculating the f1-scores
Layer `ner` against `gold_ner` and `wordner` against `gold_wordner`.

In [6]:
files_not_working = ['J2rva_Tyri_V22tsa_id22177_1911a.json', \
                     'J2rva_Tyri_V22tsa_id18538_1894a.json', \
                     'J2rva_Tyri_V22tsa_id22155_1911a.json', \
                     'Saare_Kihelkonna_Kotlandi_id18845_1865a.json', \
                     'P2rnu_Halliste_Abja_id257_1844a.json', \
                     'Saare_Kaarma_Loona_id7575_1899a.json']
# These files don't work because the protocols are written in a different language, which the goldstandard didn't
# recognise, hence have no goldstandard tags.

In [7]:
%%time
#LOC and ORG separately
gold_ner_loc = []
gold_ner_org = []

#LOC_ORG
gold = []
test = []
gold_ner = []
test_ner = []

for file in sorted(os.listdir("./vallakohtufailid_nertagger/")):
    appendable_gold_ner = []
    appendable_test_ner = []
    appendable_gold_ner_loc = []
    appendable_gold_ner_org = []
    
    if file.endswith(".json"):
        if file in files_not_working:
            continue
        else:
            with open("./vallakohtufailid_nertagger/" + str(file), 'r', encoding='UTF-8') as f_test, \
                open("./vallakohtufailid_json_flat/" + str(file), 'r', encoding='UTF-8') as f_gold:
                    test_import = json_to_text(f_test.read())
                    gold_import = json_to_text(f_gold.read())

                    for i in range(len(gold_import['flat_gold_wordner'])):
                        tag = gold_import['flat_gold_wordner'][i].nertag[0]
                        gold.append(tag)
                    for i in range(len(test_import['flat_wordner'])):
                        tag = test_import['flat_wordner'][i].nertag[0]
                        test.append(tag)
                        
                    for i in range(len(gold_import['flat_gold_ner'])):
                        ner = gold_import['flat_gold_ner'][i]
                        label = ner.nertag[0]
                        start = int(ner.start)
                        end = int(ner.end)
                        if label == 'LOC_ORG':
                            appendable_gold_ner.append({"label": label, "start": start, "end": end})
                            appendable_gold_ner_loc.append({"label": 'LOC', "start": start, "end": end})
                            appendable_gold_ner_org.append({"label": 'ORG', "start": start, "end": end})
                        else:
                            appendable_gold_ner.append({"label": label, "start": start, "end": end})
                            appendable_gold_ner_loc.append({"label": label, "start": start, "end": end})
                            appendable_gold_ner_org.append({"label": label, "start": start, "end": end})

                    for i in range(len(test_import['flat_ner'])):
                        ner = test_import['flat_ner'][i]
                        label = ner.nertag[0]
                        start = int(ner.start)
                        end = int(ner.end)
                        appendable_test_ner.append({"label": label, "start": start, "end": end})

    gold_ner.append(appendable_gold_ner)
    gold_ner_loc.append(appendable_gold_ner_loc)
    gold_ner_org.append(appendable_gold_ner_org)
    test_ner.append(appendable_test_ner)

CPU times: user 13.9 s, sys: 375 ms, total: 14.2 s
Wall time: 14.7 s


In [8]:
gold_loc = []
gold_org = []
for item in gold:
    if item == "B-LOC_ORG":
        gold_loc.append("B-LOC")
        gold_org.append("B-ORG")
    elif item == "I-LOC_ORG":
        gold_loc.append("I-LOC")
        gold_org.append("B-ORG")
    else:
        gold_loc.append(item)
        gold_org.append(item)

In [9]:
labels_locorg = set(gold)
labels_loc = set(gold_loc)
labels_org = set(gold_org)

sorted_labels_locorg = sorted(labels_locorg,key=lambda name: (name[1:], name[0]))
sorted_labels_loc = sorted(labels_loc,key=lambda name: (name[1:], name[0]))
sorted_labels_org = sorted(labels_org,key=lambda name: (name[1:], name[0]))

### Results for LOC_ORG:

In [10]:
print(classification_report(gold, test, labels=sorted_labels_locorg, zero_division=0))

              precision    recall  f1-score   support

           O       0.94      0.96      0.95    133945
       B-LOC       0.13      0.21      0.16       511
       I-LOC       0.04      0.01      0.02       212
   B-LOC_ORG       0.00      0.00      0.00      1196
   I-LOC_ORG       0.00      0.00      0.00       708
      B-MISC       0.00      0.00      0.00       134
      I-MISC       0.00      0.00      0.00       397
       B-ORG       0.03      0.27      0.05       195
       I-ORG       0.04      0.18      0.07       462
       B-PER       0.61      0.57      0.59     11132
       I-PER       0.64      0.51      0.57     10428

    accuracy                           0.88    159320
   macro avg       0.22      0.25      0.22    159320
weighted avg       0.88      0.88      0.88    159320



In [11]:
evaluator = Evaluator(gold_ner, test_ner, tags=['ORG', 'PER', 'MISC', 'LOC', 'LOC_ORG'])
results, results_per_tag = evaluator.evaluate()

In [12]:
display(pd.DataFrame(results))

Unnamed: 0,ent_type,partial,strict,exact
correct,8841.0,8141.0,7439.0,8141.0
incorrect,2070.0,0.0,3472.0,2770.0
partial,0.0,2770.0,0.0,0.0
missed,2257.0,2257.0,2257.0,2257.0
spurious,2052.0,2052.0,2052.0,2052.0
possible,13168.0,13168.0,13168.0,13168.0
actual,12963.0,12963.0,12963.0,12963.0
precision,0.682018,0.734861,0.573864,0.628018
recall,0.6714,0.72342,0.56493,0.618241
f1,0.676668,0.729096,0.569362,0.623091


In [13]:
display(pd.DataFrame(results_per_tag))

Unnamed: 0,ORG,PER,MISC,LOC,LOC_ORG
ent_type,"{'correct': 73, 'incorrect': 78, 'partial': 0, 'missed': 44, 'spurious': 683, 'possible': 195, 'actual': 834, 'precision': 0.08752997601918465, 'recall': 0.37435897435897436, 'f1': 0.14188532555879493}","{'correct': 8643, 'incorrect': 939, 'partial': 0, 'missed': 1550, 'spurious': 1080, 'possible': 11132, 'actual': 10662, 'precision': 0.8106359032076533, 'recall': 0.7764103485447359, 'f1': 0.7931540791043407}","{'correct': 0, 'incorrect': 70, 'partial': 0, 'missed': 64, 'spurious': 0, 'possible': 134, 'actual': 70, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 125, 'incorrect': 173, 'partial': 0, 'missed': 213, 'spurious': 289, 'possible': 511, 'actual': 587, 'precision': 0.21294718909710392, 'recall': 0.2446183953033268, 'f1': 0.22768670309653918}","{'correct': 0, 'incorrect': 810, 'partial': 0, 'missed': 386, 'spurious': 0, 'possible': 1196, 'actual': 810, 'precision': 0.0, 'recall': 0.0, 'f1': 0}"
partial,"{'correct': 33, 'incorrect': 0, 'partial': 118, 'missed': 44, 'spurious': 683, 'possible': 195, 'actual': 834, 'precision': 0.11031175059952038, 'recall': 0.4717948717948718, 'f1': 0.17881438289601556}","{'correct': 7685, 'incorrect': 0, 'partial': 1897, 'missed': 1550, 'spurious': 1080, 'possible': 11132, 'actual': 10662, 'precision': 0.8097448883886701, 'recall': 0.7755569529284945, 'f1': 0.7922822795264752}","{'correct': 10, 'incorrect': 0, 'partial': 60, 'missed': 64, 'spurious': 0, 'possible': 134, 'actual': 70, 'precision': 0.5714285714285714, 'recall': 0.29850746268656714, 'f1': 0.392156862745098}","{'correct': 174, 'incorrect': 0, 'partial': 124, 'missed': 213, 'spurious': 289, 'possible': 511, 'actual': 587, 'precision': 0.4020442930153322, 'recall': 0.461839530332681, 'f1': 0.42987249544626593}","{'correct': 239, 'incorrect': 0, 'partial': 571, 'missed': 386, 'spurious': 0, 'possible': 1196, 'actual': 810, 'precision': 0.6475308641975308, 'recall': 0.43854515050167225, 'f1': 0.5229312063808574}"
strict,"{'correct': 33, 'incorrect': 118, 'partial': 0, 'missed': 44, 'spurious': 683, 'possible': 195, 'actual': 834, 'precision': 0.039568345323741004, 'recall': 0.16923076923076924, 'f1': 0.06413994169096209}","{'correct': 7311, 'incorrect': 2271, 'partial': 0, 'missed': 1550, 'spurious': 1080, 'possible': 11132, 'actual': 10662, 'precision': 0.6857062464828363, 'recall': 0.6567553000359324, 'f1': 0.6709186014499404}","{'correct': 0, 'incorrect': 70, 'partial': 0, 'missed': 64, 'spurious': 0, 'possible': 134, 'actual': 70, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 95, 'incorrect': 203, 'partial': 0, 'missed': 213, 'spurious': 289, 'possible': 511, 'actual': 587, 'precision': 0.16183986371379896, 'recall': 0.18590998043052837, 'f1': 0.17304189435336978}","{'correct': 0, 'incorrect': 810, 'partial': 0, 'missed': 386, 'spurious': 0, 'possible': 1196, 'actual': 810, 'precision': 0.0, 'recall': 0.0, 'f1': 0}"
exact,"{'correct': 33, 'incorrect': 118, 'partial': 0, 'missed': 44, 'spurious': 683, 'possible': 195, 'actual': 834, 'precision': 0.039568345323741004, 'recall': 0.16923076923076924, 'f1': 0.06413994169096209}","{'correct': 7685, 'incorrect': 1897, 'partial': 0, 'missed': 1550, 'spurious': 1080, 'possible': 11132, 'actual': 10662, 'precision': 0.7207840930407053, 'recall': 0.6903521379805965, 'f1': 0.7052399743048546}","{'correct': 10, 'incorrect': 60, 'partial': 0, 'missed': 64, 'spurious': 0, 'possible': 134, 'actual': 70, 'precision': 0.14285714285714285, 'recall': 0.07462686567164178, 'f1': 0.0980392156862745}","{'correct': 174, 'incorrect': 124, 'partial': 0, 'missed': 213, 'spurious': 289, 'possible': 511, 'actual': 587, 'precision': 0.29642248722316866, 'recall': 0.3405088062622309, 'f1': 0.31693989071038253}","{'correct': 239, 'incorrect': 571, 'partial': 0, 'missed': 386, 'spurious': 0, 'possible': 1196, 'actual': 810, 'precision': 0.29506172839506173, 'recall': 0.19983277591973245, 'f1': 0.2382851445663011}"


### Results for LOC:

In [14]:
print(classification_report(gold_loc, test, labels=sorted_labels_loc, zero_division=0))

              precision    recall  f1-score   support

           O       0.94      0.96      0.95    133945
       B-LOC       0.39      0.19      0.26      1707
       I-LOC       0.60      0.05      0.10       920
      B-MISC       0.00      0.00      0.00       134
      I-MISC       0.00      0.00      0.00       397
       B-ORG       0.03      0.27      0.05       195
       I-ORG       0.04      0.18      0.07       462
       B-PER       0.61      0.57      0.59     11132
       I-PER       0.64      0.51      0.57     10428

    accuracy                           0.88    159320
   macro avg       0.36      0.30      0.29    159320
weighted avg       0.89      0.88      0.88    159320



In [15]:
evaluator = Evaluator(gold_ner, test_ner, tags=['ORG', 'PER', 'MISC', 'LOC'])
results, results_per_tag = evaluator.evaluate()

In [16]:
display(pd.DataFrame(results))

Unnamed: 0,ent_type,partial,strict,exact
correct,9020.0,7902.0,7439.0,7902.0
incorrect,1345.0,0.0,2926.0,2463.0
partial,0.0,2463.0,0.0,0.0
missed,1608.0,1608.0,1608.0,1608.0
spurious,2598.0,2598.0,2598.0,2598.0
possible,11973.0,11973.0,11973.0,11973.0
actual,12963.0,12963.0,12963.0,12963.0
precision,0.695827,0.704582,0.573864,0.609581
recall,0.753362,0.762841,0.621315,0.659985
f1,0.723452,0.732555,0.596647,0.633782


In [17]:
display(pd.DataFrame(results_per_tag))

Unnamed: 0,ORG,PER,MISC,LOC
ent_type,"{'correct': 73, 'incorrect': 78, 'partial': 0, 'missed': 44, 'spurious': 769, 'possible': 195, 'actual': 920, 'precision': 0.07934782608695652, 'recall': 0.37435897435897436, 'f1': 0.13094170403587443}","{'correct': 8822, 'incorrect': 1023, 'partial': 0, 'missed': 1288, 'spurious': 1282, 'possible': 11133, 'actual': 11127, 'precision': 0.7928462298912555, 'recall': 0.7924189346986437, 'f1': 0.7926325247079965}","{'correct': 0, 'incorrect': 71, 'partial': 0, 'missed': 63, 'spurious': 0, 'possible': 134, 'actual': 71, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 125, 'incorrect': 173, 'partial': 0, 'missed': 213, 'spurious': 547, 'possible': 511, 'actual': 845, 'precision': 0.14792899408284024, 'recall': 0.2446183953033268, 'f1': 0.18436578171091444}"
partial,"{'correct': 33, 'incorrect': 0, 'partial': 118, 'missed': 44, 'spurious': 769, 'possible': 195, 'actual': 920, 'precision': 0.1, 'recall': 0.4717948717948718, 'f1': 0.1650224215246637}","{'correct': 7685, 'incorrect': 0, 'partial': 2160, 'missed': 1288, 'spurious': 1282, 'possible': 11133, 'actual': 11127, 'precision': 0.7877235553158982, 'recall': 0.7872990209287704, 'f1': 0.7875112309074572}","{'correct': 10, 'incorrect': 0, 'partial': 61, 'missed': 63, 'spurious': 0, 'possible': 134, 'actual': 71, 'precision': 0.5704225352112676, 'recall': 0.30223880597014924, 'f1': 0.3951219512195122}","{'correct': 174, 'incorrect': 0, 'partial': 124, 'missed': 213, 'spurious': 547, 'possible': 511, 'actual': 845, 'precision': 0.27928994082840236, 'recall': 0.461839530332681, 'f1': 0.34808259587020646}"
strict,"{'correct': 33, 'incorrect': 118, 'partial': 0, 'missed': 44, 'spurious': 769, 'possible': 195, 'actual': 920, 'precision': 0.035869565217391305, 'recall': 0.16923076923076924, 'f1': 0.059192825112107626}","{'correct': 7311, 'incorrect': 2534, 'partial': 0, 'missed': 1288, 'spurious': 1282, 'possible': 11133, 'actual': 11127, 'precision': 0.6570504179023996, 'recall': 0.6566963082727028, 'f1': 0.6568733153638814}","{'correct': 0, 'incorrect': 71, 'partial': 0, 'missed': 63, 'spurious': 0, 'possible': 134, 'actual': 71, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 95, 'incorrect': 203, 'partial': 0, 'missed': 213, 'spurious': 547, 'possible': 511, 'actual': 845, 'precision': 0.11242603550295859, 'recall': 0.18590998043052837, 'f1': 0.140117994100295}"
exact,"{'correct': 33, 'incorrect': 118, 'partial': 0, 'missed': 44, 'spurious': 769, 'possible': 195, 'actual': 920, 'precision': 0.035869565217391305, 'recall': 0.16923076923076924, 'f1': 0.059192825112107626}","{'correct': 7685, 'incorrect': 2160, 'partial': 0, 'missed': 1288, 'spurious': 1282, 'possible': 11133, 'actual': 11127, 'precision': 0.6906623528354453, 'recall': 0.6902901284469595, 'f1': 0.6904761904761905}","{'correct': 10, 'incorrect': 61, 'partial': 0, 'missed': 63, 'spurious': 0, 'possible': 134, 'actual': 71, 'precision': 0.14084507042253522, 'recall': 0.07462686567164178, 'f1': 0.09756097560975609}","{'correct': 174, 'incorrect': 124, 'partial': 0, 'missed': 213, 'spurious': 547, 'possible': 511, 'actual': 845, 'precision': 0.20591715976331362, 'recall': 0.3405088062622309, 'f1': 0.25663716814159293}"


### Results for ORG:

In [18]:
print(classification_report(gold_org, test, labels=sorted_labels_org, zero_division=0))

              precision    recall  f1-score   support

           O       0.94      0.96      0.95    133945
       B-LOC       0.13      0.21      0.16       511
       I-LOC       0.04      0.01      0.02       212
      B-MISC       0.00      0.00      0.00       134
      I-MISC       0.00      0.00      0.00       397
       B-ORG       0.09      0.09      0.09      2099
       I-ORG       0.04      0.18      0.07       462
       B-PER       0.61      0.57      0.59     11132
       I-PER       0.64      0.51      0.57     10428

    accuracy                           0.88    159320
   macro avg       0.28      0.28      0.27    159320
weighted avg       0.88      0.88      0.88    159320



In [19]:
evaluator = Evaluator(gold_ner, test_ner, tags=['ORG', 'PER', 'MISC', 'LOC'])
results, results_per_tag = evaluator.evaluate()

In [20]:
display(pd.DataFrame(results))

Unnamed: 0,ent_type,partial,strict,exact
correct,9020.0,7902.0,7439.0,7902.0
incorrect,1345.0,0.0,2926.0,2463.0
partial,0.0,2463.0,0.0,0.0
missed,1608.0,1608.0,1608.0,1608.0
spurious,2598.0,2598.0,2598.0,2598.0
possible,11973.0,11973.0,11973.0,11973.0
actual,12963.0,12963.0,12963.0,12963.0
precision,0.695827,0.704582,0.573864,0.609581
recall,0.753362,0.762841,0.621315,0.659985
f1,0.723452,0.732555,0.596647,0.633782


In [21]:
display(pd.DataFrame(results_per_tag))

Unnamed: 0,ORG,PER,MISC,LOC
ent_type,"{'correct': 73, 'incorrect': 78, 'partial': 0, 'missed': 44, 'spurious': 769, 'possible': 195, 'actual': 920, 'precision': 0.07934782608695652, 'recall': 0.37435897435897436, 'f1': 0.13094170403587443}","{'correct': 8822, 'incorrect': 1023, 'partial': 0, 'missed': 1288, 'spurious': 1282, 'possible': 11133, 'actual': 11127, 'precision': 0.7928462298912555, 'recall': 0.7924189346986437, 'f1': 0.7926325247079965}","{'correct': 0, 'incorrect': 71, 'partial': 0, 'missed': 63, 'spurious': 0, 'possible': 134, 'actual': 71, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 125, 'incorrect': 173, 'partial': 0, 'missed': 213, 'spurious': 547, 'possible': 511, 'actual': 845, 'precision': 0.14792899408284024, 'recall': 0.2446183953033268, 'f1': 0.18436578171091444}"
partial,"{'correct': 33, 'incorrect': 0, 'partial': 118, 'missed': 44, 'spurious': 769, 'possible': 195, 'actual': 920, 'precision': 0.1, 'recall': 0.4717948717948718, 'f1': 0.1650224215246637}","{'correct': 7685, 'incorrect': 0, 'partial': 2160, 'missed': 1288, 'spurious': 1282, 'possible': 11133, 'actual': 11127, 'precision': 0.7877235553158982, 'recall': 0.7872990209287704, 'f1': 0.7875112309074572}","{'correct': 10, 'incorrect': 0, 'partial': 61, 'missed': 63, 'spurious': 0, 'possible': 134, 'actual': 71, 'precision': 0.5704225352112676, 'recall': 0.30223880597014924, 'f1': 0.3951219512195122}","{'correct': 174, 'incorrect': 0, 'partial': 124, 'missed': 213, 'spurious': 547, 'possible': 511, 'actual': 845, 'precision': 0.27928994082840236, 'recall': 0.461839530332681, 'f1': 0.34808259587020646}"
strict,"{'correct': 33, 'incorrect': 118, 'partial': 0, 'missed': 44, 'spurious': 769, 'possible': 195, 'actual': 920, 'precision': 0.035869565217391305, 'recall': 0.16923076923076924, 'f1': 0.059192825112107626}","{'correct': 7311, 'incorrect': 2534, 'partial': 0, 'missed': 1288, 'spurious': 1282, 'possible': 11133, 'actual': 11127, 'precision': 0.6570504179023996, 'recall': 0.6566963082727028, 'f1': 0.6568733153638814}","{'correct': 0, 'incorrect': 71, 'partial': 0, 'missed': 63, 'spurious': 0, 'possible': 134, 'actual': 71, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 95, 'incorrect': 203, 'partial': 0, 'missed': 213, 'spurious': 547, 'possible': 511, 'actual': 845, 'precision': 0.11242603550295859, 'recall': 0.18590998043052837, 'f1': 0.140117994100295}"
exact,"{'correct': 33, 'incorrect': 118, 'partial': 0, 'missed': 44, 'spurious': 769, 'possible': 195, 'actual': 920, 'precision': 0.035869565217391305, 'recall': 0.16923076923076924, 'f1': 0.059192825112107626}","{'correct': 7685, 'incorrect': 2160, 'partial': 0, 'missed': 1288, 'spurious': 1282, 'possible': 11133, 'actual': 11127, 'precision': 0.6906623528354453, 'recall': 0.6902901284469595, 'f1': 0.6904761904761905}","{'correct': 10, 'incorrect': 61, 'partial': 0, 'missed': 63, 'spurious': 0, 'possible': 134, 'actual': 71, 'precision': 0.14084507042253522, 'recall': 0.07462686567164178, 'f1': 0.09756097560975609}","{'correct': 174, 'incorrect': 124, 'partial': 0, 'missed': 213, 'spurious': 547, 'possible': 511, 'actual': 845, 'precision': 0.20591715976331362, 'recall': 0.3405088062622309, 'f1': 0.25663716814159293}"
