In [None]:
# Colabis kasutamiseks
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/My Drive/Colab Notebooks/thesis/')

!pip install estnltk==1.7.4
!pip install evaluate
!pip install seqeval
!pip install nervaluate

In [None]:
from modules.data_processing import DatasetProcessor
from modules.bert_data_processing import BERTDataProcessor
from modules.bert_evaluator import BERTEvaluator
ALL_TAGS = DatasetProcessor.ALL_TAGS
TAG2IDX = DatasetProcessor.TAG2IDX
IDX2TAG = DatasetProcessor.IDX2TAG
from transformers import BertForTokenClassification, Trainer, TrainingArguments, AutoModelForTokenClassification
import json
from datasets import Dataset

In [3]:
def load_test_data(path):
  with open(path, 'r', encoding='utf-8') as f:
    data = json.load(f)
    ids, tokens, tags = [], [], []
    id = 0
    for document in data['documents']:
      for sentence in document['sentences']:
        sentence_tokens = []
        sentence_tags = []
        for word in sentence['words']:
          token = word['word']
          tag = word['ner_1']

          match tag:
            case 'B-GPE':
              tag = 'B-GEP'
            case 'I-GPE':
              tag = 'I-GEP'
            case 'B-EVENT':
              tag = 'B-EVE'
            case 'I-EVENT':
              tag = 'I-EVE'

          if tag not in ALL_TAGS:
            tag = 'O'

          sentence_tokens.append(token)
          sentence_tags.append(TAG2IDX[tag])


        ids.append(id)
        tokens.append(sentence_tokens)
        tags.append(sentence_tags)
        id += 1
    return Dataset.from_dict({'id': ids, 'tokens': tokens, 'tags': tags})

In [4]:
estner_new = load_test_data('data/estner/EstNER_new_test.json')

In [5]:
estner_v1 = load_test_data('data/estner/EstNER_v1_test.json')

In [6]:
model = AutoModelForTokenClassification.from_pretrained('results/EstRoBERTa/combined/checkpoint-11268')
bert_processor = BERTDataProcessor('results/EstRoBERTa/combined/checkpoint-11268')

In [None]:
label2id = model.config.label2id
def convert_to_model_id(example):
  example['tags'] = [IDX2TAG[tag] for tag in example['tags']]
  example['tags'] = [label2id.get(tag) for tag in example['tags']]
  return example

estner_new = estner_new.map(convert_to_model_id)
estner_v1 = estner_v1.map(convert_to_model_id)

estner_new = bert_processor.tokenize_dataset(estner_new)
estner_v1 = bert_processor.tokenize_dataset(estner_v1)

In [None]:
tags = [model.config.id2label[i] for i in range(len(model.config.id2label))]

evaluator = BERTEvaluator(all_tags=tags)

training_args = TrainingArguments(
            report_to='none',
            output_dir='./results',
            #per_device_eval_batch_size=16
        )
trainer = Trainer(
            model=model,
            processing_class=bert_processor.tokenizer,
            data_collator=bert_processor.data_collator,
            compute_metrics=evaluator.compute_metrics,
            args=training_args
        )

In [9]:
estner_new_results = evaluator.evaluate_and_print(estner_new, trainer)

Hindan testandmestikul..


  _warn_prf(average, modifier, msg_start, len(result))


Seqeval tulemused
EVE {'precision': 0.3870967741935484, 'recall': 0.2, 'f1': 0.26373626373626374, 'number': 60}
GEP {'precision': 0.8464566929133859, 'recall': 0.5180722891566265, 'f1': 0.6427503736920778, 'number': 415}
LOC {'precision': 0.22406639004149378, 'recall': 0.5567010309278351, 'f1': 0.31952662721893493, 'number': 97}
MUU {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 0}
ORG {'precision': 0.7714285714285715, 'recall': 0.5983379501385041, 'f1': 0.6739469578783152, 'number': 361}
PER {'precision': 0.9163498098859315, 'recall': 0.9060150375939849, 'f1': 0.9111531190926276, 'number': 532}
PROD {'precision': 0.5088757396449705, 'recall': 0.23756906077348067, 'f1': 0.3239171374764595, 'number': 362}
UNK {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 0}
overall_precision 0.7071713147410359
overall_recall 0.5829228243021346
overall_f1 0.639063906390639
overall_accuracy 0.9600480508408897

Nervaluate tulemused
Strict {'correct': 1065, 'incorrect': 418, 'partial': 0, '

In [10]:
estner_v1_results = evaluator.evaluate_and_print(estner_v1, trainer)

Hindan testandmestikul..


Seqeval tulemused
EVE {'precision': 0.4918032786885246, 'recall': 0.6976744186046512, 'f1': 0.5769230769230769, 'number': 43}
GEP {'precision': 0.7857142857142857, 'recall': 0.6376811594202898, 'f1': 0.7039999999999998, 'number': 828}
LOC {'precision': 0.26157407407407407, 'recall': 0.773972602739726, 'f1': 0.3910034602076124, 'number': 146}
MUU {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 0}
ORG {'precision': 0.7543859649122807, 'recall': 0.5025041736227045, 'f1': 0.6032064128256514, 'number': 1198}
PER {'precision': 0.8880842659644503, 'recall': 0.9473314606741573, 'f1': 0.9167516139993204, 'number': 1424}
PROD {'precision': 0.165, 'recall': 0.22758620689655173, 'f1': 0.19130434782608693, 'number': 145}
UNK {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 0}
overall_precision 0.7181498512307276
overall_recall 0.7016384778012685
overall_f1 0.709798155326828
overall_accuracy 0.9609026001298155

Nervaluate tulemused
Strict {'correct': 2655, 'incorrect': 678, 'partial': 0