# Eval

In [None]:
from os.path import join as join_path

from tqdm import tqdm_notebook as log_progress
import pandas as pd

from nerus.load import load_raw
from nerus.const import (
    NERUS_,
    DUMPS_DIR as NERUS_DIR, RAW, JSONL, GZ,
    FACTRU, NE5, GAREEV, BSNLP,
    DEEPPAVLOV, DEEPPAVLOV_BERT, PULLENTI, TEXTERRA, TOMITA, NATASHA, MITIE
)

from navec import Navec

from slovnet.record import Record
from slovnet.bio import PER, LOC, ORG
from slovnet.tokenizer import Tokenizer
from slovnet.markup import SpanMarkup
from slovnet.eval import eval_markups, avg_markup_scores
from slovnet import NERTagger


NERUS_DATASETS = [FACTRU, GAREEV, NE5, BSNLP]
NERUS_ANNOTATORS = [DEEPPAVLOV_BERT, DEEPPAVLOV, PULLENTI, TEXTERRA, TOMITA, MITIE]

NAVEC_DIR = join_path('..', 'navec', 'data', 'models', 'navec')
NAVEC_MODEL = 'navec_news_v1_1B_250K_300d_100q.tar'

SLOVNET = 'slovnet'
SLOVNET_DIR = join_path('data', 'models')
SLOVNET_MODEL = 'slovnet_ner_v1.tar'

DATASETS = NERUS_DATASETS
ANNOTATORS = [SLOVNET] + NERUS_ANNOTATORS
TYPES = [PER, LOC, ORG]

In [None]:
NERUS = {}
for dataset in log_progress(NERUS_DATASETS):
    path = join_path(NERUS_DIR, NERUS_ + dataset + RAW + JSONL + GZ)
    NERUS[dataset] = list(load_raw(path))

In [None]:
def adapt(markups):
    for markup in markups:
        markup = markup.adapted
        yield SpanMarkup(markup.text, markup.spans)


MARKUPS = {}
for dataset in log_progress(NERUS_DATASETS):
    records = NERUS[dataset]
    
    # target
    markups = [_.source for _ in records]
    MARKUPS[dataset, dataset] = list(adapt(markups))
    
    # nerus preds
    for annotator in log_progress(NERUS_ANNOTATORS, leave=False):
        markups = [_.find(annotator) for _ in records]
        MARKUPS[dataset, annotator] = list(adapt(markups))

In [None]:
%%time
path = join_path(NAVEC_DIR, NAVEC_MODEL)
navec = Navec.load(path)

path = join_path(SLOVNET_DIR, SLOVNET_MODEL)
slovnet_ner = NERTagger.load(path, navec)

In [None]:
for dataset in log_progress(NERUS_DATASETS):
    records = NERUS[dataset]
    markups = [
        slovnet_ner(_.text)
        for _ in log_progress(records, leave=False)
    ]
    MARKUPS[dataset, SLOVNET] = markups

In [None]:
TOKENIZER = Tokenizer()
TAG_MARKUPS = {}
for key in log_progress(MARKUPS):
    markups = MARKUPS[key]
    TAG_MARKUPS[key] = [_.to_tag(TOKENIZER) for _ in markups]

## Scores

In [None]:
SCORES = {}
for dataset in log_progress(DATASETS):
    for annotator in log_progress(ANNOTATORS, leave=False):
        targets = TAG_MARKUPS[dataset, dataset]
        preds = TAG_MARKUPS[dataset, annotator]
        score = avg_markup_scores(eval_markups(preds, targets, TYPES))
        for type in TYPES:
            SCORES[dataset, annotator, type] = score.get(type)

In [None]:
def report_table(scores, datasets=DATASETS, annotators=ANNOTATORS, types=TYPES):
    data = []
    for dataset in datasets:
        for annotator in annotators:
            for type in types:
                score = scores[dataset, annotator, type]
                data.append([dataset, annotator, type, score])
    table = pd.DataFrame(data, columns=['dataset', 'annotator', 'type', 'score'])
    table = table.set_index(['dataset', 'annotator', 'type']).unstack(['dataset', 'type'])

    table.columns = table.columns.droplevel()
    table.index.name = None

    columns = [
        (dataset, type)
        for dataset in datasets
        for type in types
    ]
    table = table.reindex(index=annotators, columns=columns)

    return table


def format_score(value):
    if not value:
        return '-'
    return '{0:02d}'.format(int(value * 100))


def format_scores(scores):
    return '{prec}/{recall}/{f1}'.format(
        prec=format_score(scores.prec.value),
        recall=format_score(scores.recall.value),
        f1=format_score(scores.f1)
    )


def format_report(table):
    output = pd.DataFrame()
    for column in table.columns:
        output[column] = table[column].map(format_scores)

    output.columns = pd.MultiIndex.from_tuples(output.columns)
    output.columns.names = [None, 'prec/recall/f1,%']

    return output


table = report_table(SCORES)
output = format_report(table)
output

In [None]:
def format_github_column(column, top=3):
    column = [
        (_.f1 if _ else None)
        for _ in column
    ]

    selection = None
    values = list(filter(None, column))
    selection = sorted(values)[-top:]

    for value in column:
        cell = ''
        if value:
            cell = '%.3f' % value
        if value in selection:
            cell = '<b>%s</b>' % cell
        yield cell


def format_github_report(table):
    output = pd.DataFrame()
    for column in table.columns:
        dataset, type = column
        if dataset == GAREEV and type == LOC:
            continue
        output[column] = list(format_github_column(table[column]))

    output.index = table.index
    output.columns = pd.MultiIndex.from_tuples(output.columns)
    output.columns.names = [None, 'f1']

    return output


output = format_github_report(table)
html = output.to_html(escape=False)
html = html.replace('border="1"', 'border="0"')
display(output)
print(html)

## Stats

In [None]:
# see nerus/bench.ipynb

class Stats(Record):
    __attributes__ = ['annotator', 'init', 'disk', 'ram', 'speed']
    
    def __init__(self, annotator, init=None, disk=None, ram=None, speed=None):
        self.annotator = annotator
        self.init = init
        self.disk = disk
        self.ram = ram
        self.speed = speed
        

class GPUStats(Stats):
    pass


KB = 1024
MB = 1024 * KB
GB = 1024 * MB
        

STATS = [
    # GTX 1080 Ti
    GPUStats(
        DEEPPAVLOV,
        init=5.9,
        disk=1 * GB,  # 1GB emb + 5MB model
        ram=3 * GB,
        speed=24.31,  # 1.95 / 7 cores on CPU
    ),
    GPUStats(
        DEEPPAVLOV_BERT,
        init=34.5,
        disk=2 * GB,
        ram=6 * GB,
        speed=13.13,  # 17.71 / 3 cores on CPU
    ),
    
    # 16 CPUs
    Stats(
        PULLENTI,
        init=2.85,
        disk=16 * MB,
        ram=253 * MB,
        speed=6.05
    ),
    Stats(
        TEXTERRA,
        init=47.6,
        disk=193 * MB,
        ram=3.3 * GB,  # leaks
        speed=20.16 / 5  # utils ~5 cores
    ),
    Stats(
        TOMITA,
        init=2.03,
        disk=64 * MB,
        ram=63 * MB,
        speed=29.8,
    ),
    Stats(
        NATASHA,
        init=2.05,
        disk=700 * KB,
        ram=160 * MB,
        speed=8.8,
    ),
    Stats(
        MITIE,
        init=28.3,
        disk=327 * MB,
        ram=261 * MB,
        speed=32.8,
    ),    
]


STATS += [
    Stats(
        SLOVNET,
        init=0.9,
        disk=30 * MB,
        ram=180 * MB,
        speed=33.92
    )
]

In [None]:
def select_max(values, count=3):
    return sorted(values)[-count:]


def select_min(values, count=3):
    return sorted(values)[:count]


def slice_attr(records, attr):
    for record in records:
        yield getattr(record, attr)
        
        
def slice_init(records): return slice_attr(records, 'init')
def slice_disk(records): return slice_attr(records, 'disk')
def slice_ram(records): return slice_attr(records, 'ram')


def slice_speed(records):
    for record in records:
        yield record.speed, record.__class__ is GPUStats


def highlight(column, selection, format):
    for value in column:
        select = value in selection
        value = format(value)
        if select:
            value = '<b>%s</b>' % value
        yield value


def format_mb(bytes):
    mb = bytes / MB
    return '%0.0f' % mb


def format_sec(secs):
    return '%0.1f' % secs


def format_speed(value):
    its, gpu = value
    value = '%0.1f' % its
    if gpu:
        value += ' (gpu)'
    return value


def format_report(stats, annotators=ANNOTATORS):
    table = pd.DataFrame()

    mapping = {_.annotator: _ for _ in stats}
    stats = [mapping[_] for _ in annotators]

    columns = [
        [slice_init, format_sec, select_min, 'init, s'],
        [slice_disk, format_mb, select_min, 'disk, mb'],
        [slice_ram, format_mb, select_min, 'ram, mb'],
        [slice_speed, format_speed, select_max, 'speed, articles/s']
    ]
    for slice, format, select, name in columns:
        values = list(slice(stats))
        selection = select(values)
        table[name] = list(highlight(values, selection, format))

    table.index = annotators
    table.index.name = None
    return table


output = format_report(STATS)
html = output.to_html(escape=False)
html = html.replace('border="1"', 'border="0"')
display(output)
print(html)