In [None]:
# Colabis kasutamiseks
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/My Drive/Colab Notebooks/thesis/')

!pip install seqeval
!pip install nervaluate
!pip install datasets
!pip install evaluate
!pip install estnltk

In [None]:
from modules.eval_functions import get_supported_entities, predict_tags, write_to_tsv, evaluation_to_json
from modules.data_processing import DatasetProcessor
from nervaluate import Evaluator

In [3]:
def evaluate_model(model_path, model_name, trained_on, evaluated_on, is_roberta_model=False):
  if evaluated_on in ['edt', 'ewt']:
    testset = DatasetProcessor(evaluated_on).test
  else:
    edt = DatasetProcessor('edt').dataset
    ewt = DatasetProcessor('ewt').dataset
    combined = DatasetProcessor.combine_datasetdicts(edt, ewt)
    edt_test = edt['test']
    ewt_test = ewt['test']
    testset = combined['test']

  supported_entities = get_supported_entities(model_path)

  true_tags = [item['tags'] for item in testset]
  pred_tags = predict_tags(model_path, testset, is_roberta_model)
  assert len(true_tags) == len(pred_tags)

  evaluator = Evaluator(true_tags, pred_tags, tags=list(supported_entities), loader='list')
  results, results_by_tag, result_indices, result_indices_by_tag = evaluator.evaluate()

  nervaluate_strict_overall = results['strict']
  nervaluate_by_tag = results_by_tag
  evaluation_to_json(nervaluate_strict_overall, nervaluate_by_tag, model_name, trained_on, evaluated_on, epochs=3)

  sents = [item['tokens'] for item in testset]
  write_to_tsv(sents, true_tags, pred_tags, model_name, evaluated_on)

EstBERT testimine

In [5]:
base_path = 'results/models/EstBERT'
checkpoints = {
    'edt': 'checkpoint-4614',
    'ewt': 'checkpoint-1023',
    'combined': 'checkpoint-5634'
}
folder_names = {
    'edt': 'EDT',
    'ewt': 'EWT',
    'combined': 'Combined'
}

eval_combinations = [
    ('edt', 'edt'),
    ('ewt', 'ewt'),
    ('combined', 'combined'),
    ('combined', 'edt'),
    ('combined', 'ewt'),
]

for trained_on, evaluated_on in eval_combinations:
    ckpt = checkpoints[trained_on]
    folder_name = folder_names[trained_on]
    model_path = f'{base_path}/{folder_name}/trained_for_3/{ckpt}'
    model_name = f'EstBERT_{folder_name}'
    evaluate_model(model_path, model_name=model_name, trained_on=trained_on, evaluated_on=evaluated_on)

Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Salvestasin: results/EstBERT_EDT/eval_on_edt_test_2025-17-04_12-47.json


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Salvestasin: results/EstBERT_EWT/eval_on_ewt_test_2025-17-04_12-47.json


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Salvestasin: results/EstBERT_Combined/eval_on_combined_test_2025-17-04_12-49.json


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Salvestasin: results/EstBERT_Combined/eval_on_edt_test_2025-17-04_12-49.json


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Salvestasin: results/EstBERT_Combined/eval_on_ewt_test_2025-17-04_12-49.json


Est-RoBERTa testimine

In [6]:
base_path = 'results/models/est-roberta'
checkpoints = {
    'edt': 'checkpoint-4614',
    'ewt': 'checkpoint-682',
    'combined': 'checkpoint-5634'
}
folder_names = {
    'edt': 'EDT',
    'ewt': 'EWT',
    'combined': 'Combined'
}

eval_combinations = [
    ('edt', 'edt'),
    ('ewt', 'ewt'),
    ('combined', 'combined'),
    ('combined', 'edt'),
    ('combined', 'ewt'),
]

for trained_on, evaluated_on in eval_combinations:
    ckpt = checkpoints[trained_on]
    folder_name = folder_names[trained_on]
    model_path = f'{base_path}/{folder_name}/trained_for_3/{ckpt}'
    model_name = f'est-roberta_{folder_name}'
    evaluate_model(model_path, model_name=model_name, trained_on=trained_on, evaluated_on=evaluated_on, is_roberta_model=True)

Device set to use cuda:0


Salvestasin: results/est-roberta_EDT/eval_on_edt_test_2025-17-04_12-51.json


Device set to use cuda:0


Salvestasin: results/est-roberta_EWT/eval_on_ewt_test_2025-17-04_12-52.json


Device set to use cuda:0


Salvestasin: results/est-roberta_Combined/eval_on_combined_test_2025-17-04_12-53.json


Device set to use cuda:0


Salvestasin: results/est-roberta_Combined/eval_on_edt_test_2025-17-04_12-54.json


Device set to use cuda:0


Salvestasin: results/est-roberta_Combined/eval_on_ewt_test_2025-17-04_12-54.json
