In [2]:
import json
import time
import os
import sklearn_crfsuite
import re
import nereval
import pandas as pd

from estnltk import Text
from estnltk.taggers import NerTagger
from estnltk.taggers import WordLevelNerTagger
from estnltk.converters import text_to_json
from estnltk.converters import json_to_text
from estnltk.layer_operations import flatten
from sklearn.metrics import classification_report
from estnltk.taggers import Retagger
from estnltk.taggers import CompoundTokenTagger
from sklearn_crfsuite import metrics
from estnltk.converters import json_to_text

from nervaluate import Evaluator

nertagger = NerTagger()
word_level_ner = WordLevelNerTagger()

In [3]:
class TokenSplitter( Retagger ):
    """Splits tokens into smaller tokens based on regular expression patterns.""" 
    conf_param = ['patterns', 'break_group_name']
    
    def __init__(self, patterns, break_group_name:str='end'):
        # Set input/output layers
        self.input_layers = ['tokens']
        self.output_layer = 'tokens'
        self.output_attributes = ()
        # Set other configuration parameters
        if not (isinstance(break_group_name, str) and len(break_group_name) > 0):
            raise TypeError('(!) break_group_name should be a non-empty string.')
        self.break_group_name = break_group_name
        # Assert that all patterns are regular expressions in the valid format
        if not isinstance(patterns, list):
            raise TypeError('(!) patterns should be a list of compiled regular expressions.')
        # TODO: we use an adhoc way to verify that patterns are regular expressions 
        #       because there seems to be no common way of doing it both in py35 
        #       and py36
        for pat in patterns:
            # Check for the existence of methods/attributes
            has_match   = callable(getattr(pat, "match", None))
            has_search  = callable(getattr(pat, "search", None))
            has_pattern = getattr(pat, "pattern", None) is not None
            for (k,v) in (('method match()',has_match),\
                          ('method search()',has_search),\
                          ('attribute pattern',has_pattern)):
                if v is False:
                    raise TypeError('(!) Unexpected regex pattern: {!r} is missing {}.'.format(pat, k))
            symbolic_groups = pat.groupindex
            if self.break_group_name not in symbolic_groups.keys():
                raise TypeError('(!) Pattern {!r} is missing symbolic group named {!r}.'.format(pat, self.break_group_name))
        self.patterns = patterns

    def _change_layer(self, text, layers, status):
        # Get changeble layer
        changeble_layer = layers[self.output_layer]
        # Iterate over tokens
        add_spans    = []
        remove_spans = []
        for span in changeble_layer:
            token_str = text.text[span.start:span.end]
            for pat in self.patterns:
                m = pat.search(token_str)
                if m:
                    break_group_end = m.end( self.break_group_name )
                    if break_group_end > -1 and \
                       break_group_end > 0  and \
                       break_group_end < len(token_str):
                        # Make the split
                        add_spans.append( (span.start, span.start+break_group_end) )
                        add_spans.append( (span.start+break_group_end, span.end) )
                        remove_spans.append( span )
                        # Once a token has been split, then break and move on to 
                        # the next token ...
                        break
        if add_spans:
            assert len(remove_spans) > 0
            for old_span in remove_spans:
                changeble_layer.remove_span( old_span )
            for new_span in add_spans:
                changeble_layer.add_annotation( new_span )

In [4]:
token_splitter = TokenSplitter(patterns=[re.compile(r'(?P<end>[A-ZÕÄÖÜ]{1}\w+)[A-ZÕÄÖÜ]{1}\w+'),\
                                         re.compile(r'(?P<end>Piebenomme)metsawaht'),\
                                         re.compile(r'(?P<end>maa)peal'),\
                                         re.compile(r'(?P<end>reppi)käest'),\
                                         re.compile(r'(?P<end>Kiidjerwelt)J'),\
                                         re.compile(r'(?P<end>Ameljanow)Persitski'),\
                                         re.compile(r'(?P<end>mõistmas)Mihkel'),\
                                         re.compile(r'(?P<end>tema)Käkk'),\
                                         re.compile(r'(?P<end>Ahjawalla)liikmed'),\
                                         re.compile(r'(?P<end>kohtumees)A'),\
                                         re.compile(r'(?P<end>Pechmann)x'),\
                                         re.compile(r'(?P<end>pölli)Anni'),\
                                         re.compile(r'(?P<end>külla)Rauba'),\
                                         re.compile(r'(?P<end>kohtowannem)Jaak'),\
                                         re.compile(r'(?P<end>rannast)Leno'),\
                                         re.compile(r'(?P<end>wallast)Kiiwita'),\
                                         re.compile(r'(?P<end>wallas)Kristjan'),\
                                         re.compile(r'(?P<end>Pedoson)rahul'),\
                                         re.compile(r'(?P<end>pere)Jaan'),\
                                         re.compile(r'(?P<end>kohtu)poolest'),\
                                         re.compile(r'(?P<end>Kurrista)kaudo'),\
                                         re.compile(r'(?P<end>mölder)Gottlieb'),\
                                         re.compile(r'(?P<end>wöörmündri)Jaan'),\
                                         re.compile(r'(?P<end>Oinas)ja'),\
                                         re.compile(r'(?P<end>ette)Leenu'),\
                                         re.compile(r'(?P<end>Tommingas)peab'),\
                                         re.compile(r'(?P<end>wäljaja)Kotlep'),\
                                         re.compile(r'(?P<end>pea)A'),\
                                         re.compile(r'(?P<end>talumees)Nikolai')])

### Read files from the distributed corpus:

In [5]:
files = {}

with open(os.path.join('..', 'data', 'divided_corpus.txt'), 'r', encoding = 'UTF-8') as f:
    txt = f.readlines()

for fileName in txt:
    file, subdistribution = fileName.split(':')
    files[file] = subdistribution.rstrip('\n')

'''
with open(os.path.join('..', 'data', 'corpus_subdistribution_without_hand_tagged.txt'), 'r', encoding='UTF-8') as f:
    txt = f.readlines()

for filename in txt:
    file, subdistribution = filename.split(':')
    files[file] = subdistribution.rstrip('\n')
'''

"\nwith open(os.path.join('..', 'data', 'corpus_subdistribution_without_hand_tagged.txt'), 'r', encoding='UTF-8') as f:\n    txt = f.readlines()\n\nfor filename in txt:\n    file, subdistribution = filename.split(':')\n    files[file] = subdistribution.rstrip('\n')\n"

### Make `ner` and `wordner` layers from goldstandard files

In [6]:
# Remove layers to keep file sizes low
removed_layers = ['sentences', 'morph_analysis', 'compound_tokens', 'ner', 'words', 'tokens']

In [7]:
for file in files:
    with open(os.path.join('..', 'data', 'vallakohtufailid-json-flattened', file), 'r', encoding='UTF-8') as f:
        text = json_to_text(f.read()).text
        
        if file == "Tartu_V6nnu_Ahja_id3502_1882a.json":
            text.replace('..', '. .')
            
        text = Text(text)
        text.tag_layer(['tokens'])
        token_splitter.retag(text)
        CompoundTokenTagger(tag_initials = False, tag_abbreviations = False, tag_hyphenations = False).tag(text)
        text.tag_layer('morph_analysis')
        
        nertagger.tag(text)
        text.add_layer(flatten(text['ner'], 'flat_ner'))
        
        word_level_ner.tag(text)
        text.add_layer(flatten(text['wordner'], 'flat_wordner'))
        
        for x in removed_layers:
            text.pop_layer(x)
        
        text_to_json(text, file=os.path.join('..', 'data', 'vallakohtufailid_nertagger_baseline', file))

### Calculate the scores

In [8]:
files_not_working = [
                    'J2rva_Tyri_V22tsa_id22177_1911a.json', \
                     'J2rva_Tyri_V22tsa_id18538_1894a.json', \
                     'J2rva_Tyri_V22tsa_id22155_1911a.json', \
                     'Saare_Kihelkonna_Kotlandi_id18845_1865a.json', \
                     'P2rnu_Halliste_Abja_id257_1844a.json', \
                     'Saare_Kaarma_Loona_id7575_1899a.json', \
                     'J2rva_Tyri_V22tsa_id22178_1912a.json', \
                     'J2rva_Tyri_V22tsa_id22266_1913a.json'
                    ]
# These files don't work because the protocols are written in a different language, which the goldstandard didn't
# recognise, hence have no goldstandard tags.

In [9]:
%%time
#LOC and ORG separately
gold_ner_loc = []
gold_ner_org = []

#LOC_ORG
gold = []
test = []
gold_ner = []
test_ner = []

for file in sorted(os.listdir(os.path.join('..', 'data', 'vallakohtufailid_nertagger_baseline'))):
    appendable_gold_ner = []
    appendable_test_ner = []
    appendable_gold_ner_loc = []
    appendable_gold_ner_org = []
    
    if file.endswith(".json"):
        if file in files_not_working:
            continue
        else:
            with open(os.path.join('..', 'data', 'vallakohtufailid_nertagger_baseline', file), 'r', encoding='UTF-8') as f_test, \
                open(os.path.join('..', 'data', 'vallakohtufailid-json-flattened', file), 'r', encoding='UTF-8') as f_gold:
                    test_import = json_to_text(f_test.read())
                    gold_import = json_to_text(f_gold.read())
                    
            if len(gold_import['gold_wordner']) != len(test_import['flat_wordner']):
                print(file, len(gold_import['gold_wordner']), len(test_import['flat_wordner']))
            
            for i in range(len(gold_import['gold_wordner'])):
                tag = gold_import['gold_wordner'][i].nertag
                gold.append(tag)
            for i in range(len(test_import['flat_wordner'])):
                tag = test_import['flat_wordner'][i].nertag
                test.append(tag)

            for i in range(len(gold_import['gold_ner'])):
                ner = gold_import['gold_ner'][i]
                label = ner.nertag
                start = int(ner.start)
                end = int(ner.end)
                if label == 'LOC_ORG':
                    appendable_gold_ner.append({"label": label, "start": start, "end": end})
                    appendable_gold_ner_loc.append({"label": 'LOC', "start": start, "end": end})
                    appendable_gold_ner_org.append({"label": 'ORG', "start": start, "end": end})
                else:
                    appendable_gold_ner.append({"label": label, "start": start, "end": end})
                    appendable_gold_ner_loc.append({"label": label, "start": start, "end": end})
                    appendable_gold_ner_org.append({"label": label, "start": start, "end": end})

            for i in range(len(test_import['flat_ner'])):
                ner = test_import['flat_ner'][i]
                label = ner.nertag[0]
                start = int(ner.start)
                end = int(ner.end)
                appendable_test_ner.append({"label": label, "start": start, "end": end})

    gold_ner.append(appendable_gold_ner)
    gold_ner_loc.append(appendable_gold_ner_loc)
    gold_ner_org.append(appendable_gold_ner_org)
    test_ner.append(appendable_test_ner)

Wall time: 36.9 s


In [10]:
gold_loc = []
gold_org = []
for item in gold:
    if item == "B-LOC_ORG":
        gold_loc.append("B-LOC")
        gold_org.append("B-ORG")
    elif item == "I-LOC_ORG":
        gold_loc.append("I-LOC")
        gold_org.append("B-ORG")
    else:
        gold_loc.append(item)
        gold_org.append(item)

In [11]:
labels_locorg = set(gold)
labels_loc = set(gold_loc)
labels_org = set(gold_org)

sorted_labels_locorg = sorted(labels_locorg,key=lambda name: (name[1:], name[0]))
sorted_labels_loc = sorted(labels_loc,key=lambda name: (name[1:], name[0]))
sorted_labels_org = sorted(labels_org,key=lambda name: (name[1:], name[0]))

### Results for LOC_ORG:

In [12]:
print(classification_report(gold, test, labels=sorted_labels_locorg, zero_division=0))

              precision    recall  f1-score   support

           O       0.96      0.98      0.97    270018
       B-LOC       0.16      0.27      0.20      1008
       I-LOC       0.04      0.02      0.03       395
   B-LOC_ORG       0.00      0.00      0.00      2733
   I-LOC_ORG       0.00      0.00      0.00      1592
      B-MISC       0.00      0.00      0.00       254
      I-MISC       0.00      0.00      0.00       815
       B-ORG       0.03      0.29      0.06       419
       I-ORG       0.05      0.19      0.08       974
       B-PER       0.79      0.73      0.76     23126
       I-PER       0.85      0.67      0.75     21943

    accuracy                           0.91    323277
   macro avg       0.26      0.29      0.26    323277
weighted avg       0.92      0.91      0.91    323277



In [13]:
evaluator = Evaluator(gold_ner, test_ner, tags=['ORG', 'PER', 'MISC', 'LOC', 'LOC_ORG'])
results, results_per_tag = evaluator.evaluate()

In [14]:
display(pd.DataFrame(results))

Unnamed: 0,ent_type,partial,strict,exact
correct,18331.0,16817.0,15332.0,16817.0
incorrect,4327.0,0.0,7326.0,5841.0
partial,0.0,5841.0,0.0,0.0
missed,4886.0,4886.0,4886.0,4886.0
spurious,4027.0,4027.0,4027.0,4027.0
possible,27544.0,27544.0,27544.0,27544.0
actual,26685.0,26685.0,26685.0,26685.0
precision,0.68694,0.739648,0.574555,0.630204
recall,0.665517,0.716581,0.556637,0.61055
f1,0.676059,0.727932,0.565454,0.620222


In [15]:
display(pd.DataFrame(results_per_tag))

Unnamed: 0,ORG,PER,MISC,LOC,LOC_ORG
ent_type,"{'correct': 145, 'incorrect': 175, 'partial': 0, 'missed': 99, 'spurious': 1312, 'possible': 419, 'actual': 1632, 'precision': 0.08884803921568628, 'recall': 0.3460620525059666, 'f1': 0.14139444173573865}","{'correct': 17911, 'incorrect': 1901, 'partial': 0, 'missed': 3318, 'spurious': 2139, 'possible': 23130, 'actual': 21951, 'precision': 0.8159537150927065, 'recall': 0.7743623000432339, 'f1': 0.7946141389942548}","{'correct': 0, 'incorrect': 143, 'partial': 0, 'missed': 111, 'spurious': 0, 'possible': 254, 'actual': 143, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 275, 'incorrect': 312, 'partial': 0, 'missed': 421, 'spurious': 576, 'possible': 1008, 'actual': 1163, 'precision': 0.236457437661221, 'recall': 0.2728174603174603, 'f1': 0.2533394748963611}","{'correct': 0, 'incorrect': 1796, 'partial': 0, 'missed': 937, 'spurious': 0, 'possible': 2733, 'actual': 1796, 'precision': 0.0, 'recall': 0.0, 'f1': 0}"
partial,"{'correct': 67, 'incorrect': 0, 'partial': 253, 'missed': 99, 'spurious': 1312, 'possible': 419, 'actual': 1632, 'precision': 0.11856617647058823, 'recall': 0.4618138424821002, 'f1': 0.1886884446611409}","{'correct': 15810, 'incorrect': 0, 'partial': 4002, 'missed': 3318, 'spurious': 2139, 'possible': 23130, 'actual': 21951, 'precision': 0.8113981139811398, 'recall': 0.7700389105058366, 'f1': 0.7901776801756837}","{'correct': 15, 'incorrect': 0, 'partial': 128, 'missed': 111, 'spurious': 0, 'possible': 254, 'actual': 143, 'precision': 0.5524475524475524, 'recall': 0.3110236220472441, 'f1': 0.3979848866498741}","{'correct': 371, 'incorrect': 0, 'partial': 216, 'missed': 421, 'spurious': 576, 'possible': 1008, 'actual': 1163, 'precision': 0.411865864144454, 'recall': 0.4751984126984127, 'f1': 0.4412713035467526}","{'correct': 554, 'incorrect': 0, 'partial': 1242, 'missed': 937, 'spurious': 0, 'possible': 2733, 'actual': 1796, 'precision': 0.6542316258351893, 'recall': 0.42993047932674716, 'f1': 0.5188783395893133}"
strict,"{'correct': 64, 'incorrect': 256, 'partial': 0, 'missed': 99, 'spurious': 1312, 'possible': 419, 'actual': 1632, 'precision': 0.0392156862745098, 'recall': 0.15274463007159905, 'f1': 0.062408581179912236}","{'correct': 15039, 'incorrect': 4773, 'partial': 0, 'missed': 3318, 'spurious': 2139, 'possible': 23130, 'actual': 21951, 'precision': 0.6851168511685117, 'recall': 0.6501945525291829, 'f1': 0.6671990417248952}","{'correct': 0, 'incorrect': 143, 'partial': 0, 'missed': 111, 'spurious': 0, 'possible': 254, 'actual': 143, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 229, 'incorrect': 358, 'partial': 0, 'missed': 421, 'spurious': 576, 'possible': 1008, 'actual': 1163, 'precision': 0.19690455717970765, 'recall': 0.22718253968253968, 'f1': 0.21096269000460616}","{'correct': 0, 'incorrect': 1796, 'partial': 0, 'missed': 937, 'spurious': 0, 'possible': 2733, 'actual': 1796, 'precision': 0.0, 'recall': 0.0, 'f1': 0}"
exact,"{'correct': 67, 'incorrect': 253, 'partial': 0, 'missed': 99, 'spurious': 1312, 'possible': 419, 'actual': 1632, 'precision': 0.04105392156862745, 'recall': 0.15990453460620524, 'f1': 0.06533398342272062}","{'correct': 15810, 'incorrect': 4002, 'partial': 0, 'missed': 3318, 'spurious': 2139, 'possible': 23130, 'actual': 21951, 'precision': 0.7202405357386907, 'recall': 0.6835278858625162, 'f1': 0.7014041392160777}","{'correct': 15, 'incorrect': 128, 'partial': 0, 'missed': 111, 'spurious': 0, 'possible': 254, 'actual': 143, 'precision': 0.1048951048951049, 'recall': 0.05905511811023622, 'f1': 0.07556675062972291}","{'correct': 371, 'incorrect': 216, 'partial': 0, 'missed': 421, 'spurious': 576, 'possible': 1008, 'actual': 1163, 'precision': 0.31900257953568356, 'recall': 0.3680555555555556, 'f1': 0.3417779824965454}","{'correct': 554, 'incorrect': 1242, 'partial': 0, 'missed': 937, 'spurious': 0, 'possible': 2733, 'actual': 1796, 'precision': 0.30846325167037864, 'recall': 0.20270764727405782, 'f1': 0.2446456171340252}"


### Results for LOC:

In [16]:
print(classification_report(gold_loc, test, labels=sorted_labels_loc, zero_division=0))

              precision    recall  f1-score   support

           O       0.96      0.98      0.97    270018
       B-LOC       0.47      0.22      0.30      3741
       I-LOC       0.70      0.07      0.12      1987
      B-MISC       0.00      0.00      0.00       254
      I-MISC       0.00      0.00      0.00       815
       B-ORG       0.03      0.29      0.06       419
       I-ORG       0.05      0.19      0.08       974
       B-PER       0.79      0.73      0.76     23126
       I-PER       0.85      0.67      0.75     21943

    accuracy                           0.92    323277
   macro avg       0.43      0.35      0.34    323277
weighted avg       0.92      0.92      0.92    323277



In [17]:
evaluator = Evaluator(gold_ner, test_ner, tags=['ORG', 'PER', 'MISC', 'LOC'])
results, results_per_tag = evaluator.evaluate()

In [18]:
display(pd.DataFrame(results))

Unnamed: 0,ent_type,partial,strict,exact
correct,18748.0,16263.0,15332.0,16263.0
incorrect,2680.0,0.0,6096.0,5165.0
partial,0.0,5165.0,0.0,0.0
missed,3385.0,3385.0,3385.0,3385.0
spurious,5257.0,5257.0,5257.0,5257.0
possible,24813.0,24813.0,24813.0,24813.0
actual,26685.0,26685.0,26685.0,26685.0
precision,0.702567,0.706221,0.574555,0.609444
recall,0.755572,0.759501,0.617902,0.655423
f1,0.728106,0.731893,0.595441,0.631597


In [19]:
display(pd.DataFrame(results_per_tag))

Unnamed: 0,ORG,PER,MISC,LOC
ent_type,"{'correct': 145, 'incorrect': 175, 'partial': 0, 'missed': 99, 'spurious': 1524, 'possible': 419, 'actual': 1844, 'precision': 0.07863340563991324, 'recall': 0.3460620525059666, 'f1': 0.12814847547503314}","{'correct': 18328, 'incorrect': 2049, 'partial': 0, 'missed': 2755, 'spurious': 2603, 'possible': 23132, 'actual': 22980, 'precision': 0.7975630983463882, 'recall': 0.7923223240532595, 'f1': 0.7949340735600278}","{'correct': 0, 'incorrect': 144, 'partial': 0, 'missed': 110, 'spurious': 0, 'possible': 254, 'actual': 144, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 275, 'incorrect': 312, 'partial': 0, 'missed': 421, 'spurious': 1130, 'possible': 1008, 'actual': 1717, 'precision': 0.16016307513104253, 'recall': 0.2728174603174603, 'f1': 0.2018348623853211}"
partial,"{'correct': 67, 'incorrect': 0, 'partial': 253, 'missed': 99, 'spurious': 1524, 'possible': 419, 'actual': 1844, 'precision': 0.1049349240780911, 'recall': 0.4618138424821002, 'f1': 0.171011931064958}","{'correct': 15810, 'incorrect': 0, 'partial': 4567, 'missed': 2755, 'spurious': 2603, 'possible': 23132, 'actual': 22980, 'precision': 0.7873585726718886, 'recall': 0.7821848521528618, 'f1': 0.7847631852879945}","{'correct': 15, 'incorrect': 0, 'partial': 129, 'missed': 110, 'spurious': 0, 'possible': 254, 'actual': 144, 'precision': 0.5520833333333334, 'recall': 0.31299212598425197, 'f1': 0.39949748743718594}","{'correct': 371, 'incorrect': 0, 'partial': 216, 'missed': 421, 'spurious': 1130, 'possible': 1008, 'actual': 1717, 'precision': 0.2789749563191613, 'recall': 0.4751984126984127, 'f1': 0.3515596330275229}"
strict,"{'correct': 64, 'incorrect': 256, 'partial': 0, 'missed': 99, 'spurious': 1524, 'possible': 419, 'actual': 1844, 'precision': 0.03470715835140998, 'recall': 0.15274463007159905, 'f1': 0.05656208572691118}","{'correct': 15039, 'incorrect': 5338, 'partial': 0, 'missed': 2755, 'spurious': 2603, 'possible': 23132, 'actual': 22980, 'precision': 0.6544386422976501, 'recall': 0.6501383365035449, 'f1': 0.6522814018043025}","{'correct': 0, 'incorrect': 144, 'partial': 0, 'missed': 110, 'spurious': 0, 'possible': 254, 'actual': 144, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 229, 'incorrect': 358, 'partial': 0, 'missed': 421, 'spurious': 1130, 'possible': 1008, 'actual': 1717, 'precision': 0.1333721607454863, 'recall': 0.22718253968253968, 'f1': 0.16807339449541286}"
exact,"{'correct': 67, 'incorrect': 253, 'partial': 0, 'missed': 99, 'spurious': 1524, 'possible': 419, 'actual': 1844, 'precision': 0.03633405639913232, 'recall': 0.15990453460620524, 'f1': 0.059213433495360145}","{'correct': 15810, 'incorrect': 4567, 'partial': 0, 'missed': 2755, 'spurious': 2603, 'possible': 23132, 'actual': 22980, 'precision': 0.6879895561357703, 'recall': 0.6834687878263876, 'f1': 0.6857217210270645}","{'correct': 15, 'incorrect': 129, 'partial': 0, 'missed': 110, 'spurious': 0, 'possible': 254, 'actual': 144, 'precision': 0.10416666666666667, 'recall': 0.05905511811023622, 'f1': 0.07537688442211056}","{'correct': 371, 'incorrect': 216, 'partial': 0, 'missed': 421, 'spurious': 1130, 'possible': 1008, 'actual': 1717, 'precision': 0.2160745486313337, 'recall': 0.3680555555555556, 'f1': 0.2722935779816514}"


### Results for ORG:

In [20]:
print(classification_report(gold_org, test, labels=sorted_labels_org, zero_division=0))

              precision    recall  f1-score   support

           O       0.96      0.98      0.97    270018
       B-LOC       0.16      0.27      0.20      1008
       I-LOC       0.04      0.02      0.03       395
      B-MISC       0.00      0.00      0.00       254
      I-MISC       0.00      0.00      0.00       815
       B-ORG       0.12      0.10      0.11      4744
       I-ORG       0.05      0.19      0.08       974
       B-PER       0.79      0.73      0.76     23126
       I-PER       0.85      0.67      0.75     21943

    accuracy                           0.92    323277
   macro avg       0.33      0.33      0.32    323277
weighted avg       0.92      0.92      0.92    323277



In [21]:
evaluator = Evaluator(gold_ner, test_ner, tags=['ORG', 'PER', 'MISC', 'LOC'])
results, results_per_tag = evaluator.evaluate()

In [22]:
display(pd.DataFrame(results))

Unnamed: 0,ent_type,partial,strict,exact
correct,18748.0,16263.0,15332.0,16263.0
incorrect,2680.0,0.0,6096.0,5165.0
partial,0.0,5165.0,0.0,0.0
missed,3385.0,3385.0,3385.0,3385.0
spurious,5257.0,5257.0,5257.0,5257.0
possible,24813.0,24813.0,24813.0,24813.0
actual,26685.0,26685.0,26685.0,26685.0
precision,0.702567,0.706221,0.574555,0.609444
recall,0.755572,0.759501,0.617902,0.655423
f1,0.728106,0.731893,0.595441,0.631597


In [23]:
display(pd.DataFrame(results_per_tag))

Unnamed: 0,ORG,PER,MISC,LOC
ent_type,"{'correct': 145, 'incorrect': 175, 'partial': 0, 'missed': 99, 'spurious': 1524, 'possible': 419, 'actual': 1844, 'precision': 0.07863340563991324, 'recall': 0.3460620525059666, 'f1': 0.12814847547503314}","{'correct': 18328, 'incorrect': 2049, 'partial': 0, 'missed': 2755, 'spurious': 2603, 'possible': 23132, 'actual': 22980, 'precision': 0.7975630983463882, 'recall': 0.7923223240532595, 'f1': 0.7949340735600278}","{'correct': 0, 'incorrect': 144, 'partial': 0, 'missed': 110, 'spurious': 0, 'possible': 254, 'actual': 144, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 275, 'incorrect': 312, 'partial': 0, 'missed': 421, 'spurious': 1130, 'possible': 1008, 'actual': 1717, 'precision': 0.16016307513104253, 'recall': 0.2728174603174603, 'f1': 0.2018348623853211}"
partial,"{'correct': 67, 'incorrect': 0, 'partial': 253, 'missed': 99, 'spurious': 1524, 'possible': 419, 'actual': 1844, 'precision': 0.1049349240780911, 'recall': 0.4618138424821002, 'f1': 0.171011931064958}","{'correct': 15810, 'incorrect': 0, 'partial': 4567, 'missed': 2755, 'spurious': 2603, 'possible': 23132, 'actual': 22980, 'precision': 0.7873585726718886, 'recall': 0.7821848521528618, 'f1': 0.7847631852879945}","{'correct': 15, 'incorrect': 0, 'partial': 129, 'missed': 110, 'spurious': 0, 'possible': 254, 'actual': 144, 'precision': 0.5520833333333334, 'recall': 0.31299212598425197, 'f1': 0.39949748743718594}","{'correct': 371, 'incorrect': 0, 'partial': 216, 'missed': 421, 'spurious': 1130, 'possible': 1008, 'actual': 1717, 'precision': 0.2789749563191613, 'recall': 0.4751984126984127, 'f1': 0.3515596330275229}"
strict,"{'correct': 64, 'incorrect': 256, 'partial': 0, 'missed': 99, 'spurious': 1524, 'possible': 419, 'actual': 1844, 'precision': 0.03470715835140998, 'recall': 0.15274463007159905, 'f1': 0.05656208572691118}","{'correct': 15039, 'incorrect': 5338, 'partial': 0, 'missed': 2755, 'spurious': 2603, 'possible': 23132, 'actual': 22980, 'precision': 0.6544386422976501, 'recall': 0.6501383365035449, 'f1': 0.6522814018043025}","{'correct': 0, 'incorrect': 144, 'partial': 0, 'missed': 110, 'spurious': 0, 'possible': 254, 'actual': 144, 'precision': 0.0, 'recall': 0.0, 'f1': 0}","{'correct': 229, 'incorrect': 358, 'partial': 0, 'missed': 421, 'spurious': 1130, 'possible': 1008, 'actual': 1717, 'precision': 0.1333721607454863, 'recall': 0.22718253968253968, 'f1': 0.16807339449541286}"
exact,"{'correct': 67, 'incorrect': 253, 'partial': 0, 'missed': 99, 'spurious': 1524, 'possible': 419, 'actual': 1844, 'precision': 0.03633405639913232, 'recall': 0.15990453460620524, 'f1': 0.059213433495360145}","{'correct': 15810, 'incorrect': 4567, 'partial': 0, 'missed': 2755, 'spurious': 2603, 'possible': 23132, 'actual': 22980, 'precision': 0.6879895561357703, 'recall': 0.6834687878263876, 'f1': 0.6857217210270645}","{'correct': 15, 'incorrect': 129, 'partial': 0, 'missed': 110, 'spurious': 0, 'possible': 254, 'actual': 144, 'precision': 0.10416666666666667, 'recall': 0.05905511811023622, 'f1': 0.07537688442211056}","{'correct': 371, 'incorrect': 216, 'partial': 0, 'missed': 421, 'spurious': 1130, 'possible': 1008, 'actual': 1717, 'precision': 0.2160745486313337, 'recall': 0.3680555555555556, 'f1': 0.2722935779816514}"
