In [None]:
from ipymarkup import show_ascii_markup as show_markup

from nerus.log import log_progress
from nerus.utils import (
    head,
    iter_sents
)
from nerus.span import select_type_spans
from nerus.dump import load_raw
from nerus.path import join_path
from nerus.const import (
    DUMPS_DIR, RAW, JSONL, GZ,
    FACTRU, NE5, GAREEV, WIKINER,
    DEEPPAVLOV, PULLENTI, TEXTERRA, TOMITA, NATASHA, MITIE,
    ANNOTATORS, MIX,
    PER, LOC, ORG
)
from nerus.eval import eval_markups

In [None]:
path = join_path(DUMPS_DIR, RAW, FACTRU + JSONL + GZ)
factru = log_progress(load_raw(path), prefix=FACTRU)
factru = [_ for _ in factru if int(_.source.id) >= 3000]  # testset

path = join_path(DUMPS_DIR, RAW, GAREEV + JSONL + GZ)
gareev = list(log_progress(load_raw(path), prefix=GAREEV))

path = join_path(DUMPS_DIR, RAW, NE5 + JSONL + GZ)
ne5 = list(log_progress(load_raw(path), prefix=NE5))

path = path = join_path(DUMPS_DIR, RAW, WIKINER + JSONL + GZ)
wikiner = log_progress(load_raw(path), prefix=WIKINER)
wikiner = list(head(wikiner, 20000))  # ~10%, ~same size as ne5

# Eval

In [None]:
scores = {}
for source, dataset in [(FACTRU, factru), (GAREEV, gareev), (NE5, ne5), (WIKINER, wikiner)]:
    etalon = [_.source.adapted for _ in log_progress(dataset, prefix=source)]

    for annotator in ANNOTATORS:
        guess = [_.find(annotator).adapted for _ in dataset]
        scores[source, annotator] = eval_markups(log_progress(guess, prefix=source + '/' + annotator), etalon)

    guess = [_.adapted.mixed for _ in log_progress(dataset, prefix=source + '/' + MIX)]
    scores[source, MIX] = eval_markups(log_progress(guess, prefix=source + '/' + MIX), etalon)

In [None]:
import pandas as pd


def format_float(value):
    if value is None:
        return '0.00'
    return '{0:0.2f}'.format(value)


def format_score(scores, types):
    return ' / '.join(
        format_float(scores[_].f1)
        for _ in types
    )


data = []
sources = [FACTRU, GAREEV, NE5, WIKINER]
annotators = ANNOTATORS + [MIX]
for source in sources:
    for annotator in annotators:
        score = format_score(scores[source, annotator], [PER, LOC, ORG])
        data.append([source, annotator, score])

table = pd.DataFrame(data, columns=['source', 'annotator', 'score'])
table = table.pivot('annotator', 'source', 'score')
table = table.reindex(columns=sources, index=annotators)
display(table)
print(table.to_html())

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>source</th>
      <th>factru</th>
      <th>gareev</th>
      <th>ne5</th>
      <th>wikiner</th>
    </tr>
    <tr>
      <th>annotator</th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>deeppavlov</th>
      <td>0.93 / 0.90 / 0.79</td>
      <td>0.97 / 0.00 / 0.67</td>
      <td>0.92 / 0.98 / 0.92</td>
      <td>0.89 / 0.79 / 0.43</td>
    </tr>
    <tr>
      <th>pullenti</th>
      <td>0.90 / 0.83 / 0.68</td>
      <td>0.93 / 0.00 / 0.55</td>
      <td>0.95 / 0.86 / 0.68</td>
      <td>0.82 / 0.57 / 0.38</td>
    </tr>
    <tr>
      <th>texterra</th>
      <td>0.89 / 0.79 / 0.64</td>
      <td>0.89 / 0.00 / 0.58</td>
      <td>0.90 / 0.78 / 0.62</td>
      <td>0.94 / 0.78 / 0.52</td>
    </tr>
    <tr>
      <th>tomita</th>
      <td>0.94 / 0.00 / 0.00</td>
      <td>0.92 / 0.00 / 0.00</td>
      <td>0.94 / 0.00 / 0.00</td>
      <td>0.84 / 0.00 / 0.00</td>
    </tr>
    <tr>
      <th>natasha</th>
      <td>0.87 / 0.76 / 0.32</td>
      <td>0.88 / 0.00 / 0.26</td>
      <td>0.84 / 0.70 / 0.39</td>
      <td>0.70 / 0.63 / 0.18</td>
    </tr>
    <tr>
      <th>mitie</th>
      <td>0.88 / 0.85 / 0.55</td>
      <td>0.85 / 0.00 / 0.39</td>
      <td>0.75 / 0.64 / 0.43</td>
      <td>0.65 / 0.61 / 0.29</td>
    </tr>
    <tr>
      <th>mix</th>
      <td>0.97 / 0.91 / 0.79</td>
      <td>0.94 / 0.00 / 0.65</td>
      <td>0.99 / 0.92 / 0.84</td>
      <td>0.93 / 0.81 / 0.50</td>
    </tr>
  </tbody>
</table>

# Examples

In [None]:
sents = iter_sents(wikiner)
count = 0
for sent in sents:
    types = [ORG]

    # sent.find(DEEPPAVLOV).adapted
    # sent.adapted.mixed
    # sent.source.adapted

    a = sent.adapted.mixed
    a = list(select_type_spans(a.spans, types))

    b = sent.find(DEEPPAVLOV).adapted
    b = list(select_type_spans(b.spans, types))

    if a == b:
        continue

    show_markup(sent.text, a)
    print('---')
    show_markup(sent.text, b)
    print('===')

    count += 1
    if count > 10:
        break