In [27]:
from __future__ import print_function
from math import ceil, floor
from os.path import isfile
import time
import numpy as np
import torch.nn as nn
from src.classes.report import Report
from src.classes.utils import *
from src.factories.factory_data_io import DataIOFactory
from src.factories.factory_datasets_bank import DatasetsBankFactory
from src.factories.factory_evaluator import EvaluatorFactory
from src.factories.factory_optimizer import OptimizerFactory
from src.factories.factory_tagger import TaggerFactory
from src.seq_indexers.seq_indexer_tag import SeqIndexerTag
from src.seq_indexers.seq_indexer_word import SeqIndexerWord
from src.seq_indexers.seq_indexer_elmo import SeqIndexerElmo
from src.seq_indexers.seq_indexer_bert import SeqIndexerBert


In [28]:
!pwd

/home/vika/targer


In [29]:
from src.evaluators.evaluator_base import EvaluatorBase
from src.evaluators.evaluator_f1_macro_token_level import EvaluatorF1MacroTokenLevel # choose evaluator type

Create dataset bank, to evaluate external result you should only set test sequence.

In [30]:
from src.classes.datasets_bank import DatasetsBank, DatasetsBankSorted
from src.data_io.data_io_connl_ner_2003 import DataIOConnlNer2003

data_io = DataIOConnlNer2003()
#word_sequences_train, tag_sequences_train = data_io.read_data(fn='/home/vika/targer/data/NER/Varvara_v3/train_pred_full.tsv', verbose=True)
#word_sequences_dev, tag_sequences_dev = data_io.read_data(fn='/home/vika/targer/data/NER/Varvara_v3/dev_pred_full.tsv', verbose=True)
word_sequences_test, tag_sequences_test = data_io.read_data(fn='/home/vika/targer/data/NER/Varvara_v3/test_manual_predfull_seq_labelling.tsv', verbose=True)

datasets_bank = DatasetsBank(verbose=True)

#datasets_bank.add_train_sequences(word_sequences_train, tag_sequences_train)
#datasets_bank.add_dev_sequences(word_sequences_dev, tag_sequences_dev)
datasets_bank.add_test_sequences(word_sequences_test, tag_sequences_test)

Loading from /home/vika/targer/data/NER/Varvara_v3/train_pred_full.tsv: 3077 samples, 89350 words.
Loading from /home/vika/targer/data/NER/Varvara_v3/dev_pred_full.tsv: 402 samples, 12360 words.
Loading from /home/vika/targer/data/NER/Varvara_v3/test_manual_predfull_seq_labelling.tsv: 488 samples, 13736 words.
DatasetsBank: len(unique_words_list) = 8799 unique words.
DatasetsBank: len(unique_words_list) = 9516 unique words.
DatasetsBank: len(unique_words_list) = 10344 unique words.


In [31]:
word_sequences=datasets_bank.word_sequences_test
targets_tag_sequences=datasets_bank.tag_sequences_test

In [33]:
with open('labels_v3_predful_manual.txt') as lines:
    outputs_tag_sequences = []
    for line in lines:
        outputs_tag_sequences.append(line.strip().split(', '))

In [34]:
## if extra tag_sequence have the differents labels than test

new_ts = []
for line in tag_sequences_test:
    for ind, elem in enumerate(line):
        if (elem == 'NONE'):
            line[ind] = 'O'
    new_ts.append(line)
tag_sequences_test=new_ts

In [36]:
tag_sequences_test[15]

['B-OBJ',
 'O',
 'O',
 'B-PREDFULL',
 'O',
 'O',
 'O',
 'O',
 'B-OBJ',
 'O',
 'O',
 'O',
 'O',
 'O']

In [37]:
outputs_tag_sequences[15]

['O', 'O', 'O', 'B-PREDFULL', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']

In [38]:


evaluator = EvaluatorF1MacroTokenLevel()
evaluator.get_evaluation_score(tag_sequences_test, outputs_tag_sequences, word_sequences)

['O', 'O', 'O', 'O', 'B-OBJ', 'O', 'B-PREDFULL', 'I-PREDFULL', 'O', 'B-OBJ', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'B-OBJ', 'O', 'B-PREDFULL', 'I-PREDFULL', 'I-PREDFULL', 'I-PREDFULL', 'O', 'B-OBJ', 'O', 'O', 'O', 'O', 'O', 'B-PREDFULL', 'I-PREDFULL', 'I-PREDFULL', 'I-PREDFULL', 'I-PREDFULL', 'O', 'B-OBJ', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'B-OBJ', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PREDFULL', 'O', 'B-OBJ', 'I-OBJ', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-OBJ', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-OBJ', 'O', 'B-PREDFULL', 'I-PREDFULL', 'I-PREDFULL', 'O', 'B-OBJ', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O'

(42.10951606537835,
 '\nF1 scores\n------------------------\n          B-OBJ = f1 = 37.71, precision = 57.03, recall = 28.17\n     B-PREDFULL = f1 = 47.66, precision = 74.18, recall = 35.11\n          I-OBJ = f1 = 0.00, precision = 0.00, recall = 0.00\n     I-PREDFULL = f1 = 33.97, precision = 87.70, recall = 21.06\n              O = f1 = 91.21, precision = 85.45, recall = 97.80\n------------------------\nMacro-F1 = 42.110Macro-Prescion = 60.871Macro-Recall = 36.429')