In [None]:
from random import seed, shuffle

import pandas as pd

from ipymarkup import show_line_markup as show_markup

from nerus.log import log_progress
from nerus.utils import (
    head,
    iter_sents
)
from nerus.span import select_type_spans
from nerus.load import load_raw
from nerus.path import join_path
from nerus.const import (
    DUMPS_DIR, RAW, JSONL, GZ,
    FACTRU, NE5, GAREEV, WIKINER,
    DEEPPAVLOV, PULLENTI, TEXTERRA, TOMITA, NATASHA, MITIE,
    ANNOTATORS,
    PER, LOC, ORG
)
from nerus.eval import eval_markups

In [None]:
path = join_path(DUMPS_DIR, FACTRU + RAW + JSONL + GZ)
factru = list(log_progress(load_raw(path), prefix=FACTRU))

path = join_path(DUMPS_DIR, GAREEV + RAW + JSONL + GZ)
gareev = list(log_progress(load_raw(path), prefix=GAREEV))

path = join_path(DUMPS_DIR, NE5 + RAW + JSONL + GZ)
ne5 = list(log_progress(load_raw(path), prefix=NE5))

path = path = join_path(DUMPS_DIR, WIKINER + RAW + JSONL + GZ)
wikiner = log_progress(load_raw(path), prefix=WIKINER)
wikiner = list(head(wikiner, 10000))  # ~5%, ~same size as ne5

sources = [FACTRU, GAREEV, NE5, WIKINER]
datasets = {
    FACTRU: factru,
    GAREEV: gareev,
    NE5: ne5,
    WIKINER: wikiner
}

In [None]:
etalons = {}
for source in sources:
    dataset = datasets[source]
    etalon = [_.source.adapted for _ in log_progress(dataset, prefix=source)]
    etalons[source] = etalon

# Eval

In [None]:
def eval(source, annotator):
    dataset = datasets[source]
    etalon = etalons[source]
    guess = [_.find(annotator).adapted for _ in dataset]
    return eval_markups(guess, etalon)


annotators = ANNOTATORS
keys = [
    (source, annotator)
    for annotator in annotators
    for source in sources
]
scores = {_: eval(*_) for _ in log_progress(keys)}

In [None]:
def format_score(value):
    if not value:
        return '-'
    return '{0:02d}'.format(int(value * 100))


def format_scores(scores):
    return '{prec}/{recall}/{f1}'.format(
        prec=format_score(scores.prec),
        recall=format_score(scores.recall),
        f1=format_score(scores.f1)
    )


types = [PER, LOC, ORG]
data = []
for source in sources:
    for annotator in annotators:
        for type in types:
            score = format_scores(scores[source, annotator][type])
            data.append([source, annotator, type, score])

table = pd.DataFrame(data, columns=['source', 'annotator', 'type', 'score'])
table = table.set_index(['source', 'annotator', 'type']).unstack(['source', 'type'])
table.columns = table.columns.droplevel()
table.index.name = None
table.columns.names = [None, 'prec/recall/f1,%']
columns = [
    (source, type)
    for source in sources
    for type in types
]
table = table.reindex(index=annotators, columns=columns)
display(table)

In [None]:
# print(table.to_html())

<table border="1" class="dataframe">
  <thead>
    <tr>
      <th></th>
      <th colspan="3" halign="left">factru</th>
      <th colspan="3" halign="left">gareev</th>
      <th colspan="3" halign="left">ne5</th>
      <th colspan="3" halign="left">wikiner</th>
    </tr>
    <tr>
      <th>prec/recall/f1,%</th>
      <th>PER</th>
      <th>LOC</th>
      <th>ORG</th>
      <th>PER</th>
      <th>LOC</th>
      <th>ORG</th>
      <th>PER</th>
      <th>LOC</th>
      <th>ORG</th>
      <th>PER</th>
      <th>LOC</th>
      <th>ORG</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>deeppavlov</th>
      <td>96/97/97</td>
      <td>91/93/92</td>
      <td>85/79/82</td>
      <td>97/99/98</td>
      <td>-/-/-</td>
      <td>88/94/91</td>
      <td>99/99/99</td>
      <td>98/99/98</td>
      <td>98/97/97</td>
      <td>96/96/96</td>
      <td>90/82/86</td>
      <td>54/78/64</td>
    </tr>
    <tr>
      <th>pullenti</th>
      <td>95/85/90</td>
      <td>86/77/81</td>
      <td>68/68/68</td>
      <td>91/96/94</td>
      <td>-/-/-</td>
      <td>63/64/63</td>
      <td>97/92/95</td>
      <td>86/85/86</td>
      <td>62/75/68</td>
      <td>96/72/82</td>
      <td>78/44/57</td>
      <td>36/46/40</td>
    </tr>
    <tr>
      <th>texterra</th>
      <td>91/88/90</td>
      <td>76/83/80</td>
      <td>86/45/59</td>
      <td>87/89/88</td>
      <td>-/-/-</td>
      <td>81/42/56</td>
      <td>97/83/90</td>
      <td>81/73/77</td>
      <td>83/46/59</td>
      <td>95/93/94</td>
      <td>81/72/76</td>
      <td>59/52/55</td>
    </tr>
    <tr>
      <th>tomita</th>
      <td>93/92/92</td>
      <td>-/-/-</td>
      <td>-/-/-</td>
      <td>90/93/92</td>
      <td>-/-/-</td>
      <td>-/-/-</td>
      <td>96/92/94</td>
      <td>-/-/-</td>
      <td>-/-/-</td>
      <td>85/80/83</td>
      <td>-/-/-</td>
      <td>-/-/-</td>
    </tr>
    <tr>
      <th>natasha</th>
      <td>96/78/86</td>
      <td>72/78/75</td>
      <td>41/23/29</td>
      <td>96/79/87</td>
      <td>-/-/-</td>
      <td>40/30/34</td>
      <td>98/75/85</td>
      <td>70/71/70</td>
      <td>49/32/39</td>
      <td>89/59/71</td>
      <td>73/55/63</td>
      <td>22/20/21</td>
    </tr>
    <tr>
      <th>mitie</th>
      <td>95/82/88</td>
      <td>90/81/86</td>
      <td>74/41/53</td>
      <td>85/84/84</td>
      <td>-/-/-</td>
      <td>55/38/45</td>
      <td>94/62/75</td>
      <td>74/56/64</td>
      <td>49/38/43</td>
      <td>74/55/63</td>
      <td>73/52/61</td>
      <td>21/32/25</td>
    </tr>
  </tbody>
</table>

# Examples

In [None]:
# sents = list(iter_sents(factru))
# seed(5)
# shuffle(sents)

In [None]:
# count = 0
# for sent in sents:
#     types = [PER]

#     a = sent.source.adapted
#     a = list(select_type_spans(a.spans, types))

#     b = sent.find(DEEPPAVLOV).adapted
#     b = list(select_type_spans(b.spans, types))

#     if a == b:
#         continue

#     show_markup(sent.text, a)
#     print('---')
#     show_markup(sent.text, b)
#     print('---')

#     for markup in sent.select([TOMITA, PULLENTI, TEXTERRA, MITIE]).markups:
#         print(markup.label)
#         show_markup(sent.text, markup.adapted.spans)
#     print('\n\n\n')

#     count += 1
#     if count > 10:
#         break