In [None]:
# Colabis kasutamiseks
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/My Drive/Colab Notebooks/thesis/')

!pip install estnltk==1.7.4
!pip install evaluate
!pip install seqeval
!pip install nervaluate

In [None]:
from modules.data_processing import DatasetProcessor
from modules.bert_data_processing import BERTDataProcessor
from modules.bert_evaluator import BERTEvaluator
from modules.bert_trainer import BERTTrainer
import json
ALL_TAGS = DatasetProcessor.ALL_TAGS
TAG2IDX = DatasetProcessor.TAG2IDX
IDX2TAG = DatasetProcessor.IDX2TAG

In [3]:
def train_model(model_name, dataset_name, epochs=3, batch_size=16):
    if dataset_name.lower() == 'combined':
      ewt_processor = DatasetProcessor('ewt', from_json=True)
      edt_processor = DatasetProcessor('edt', from_json=True)
      ewt_dataset = DatasetProcessor.tag_to_id(ewt_processor.dataset, TAG2IDX)
      edt_dataset = DatasetProcessor.tag_to_id(edt_processor.dataset, TAG2IDX)
      dataset = DatasetProcessor.combine_datasetdicts(ewt_dataset, edt_dataset)
    elif dataset_name.lower() in ['ewt', 'edt']:
      processor = DatasetProcessor(dataset_name.lower(), from_json=True)
      dataset = DatasetProcessor.tag_to_id(processor.dataset, TAG2IDX)

    print(f'{dataset_name.upper()} andmestik laetud')
    bert_processor = BERTDataProcessor(model_name)
    evaluator = BERTEvaluator(all_tags=ALL_TAGS)

    tokenized_dataset = bert_processor.tokenize_dataset(dataset)

    trainer = BERTTrainer(model_name=model_name, idx2tag=IDX2TAG, tag2idx=TAG2IDX, evaluator=evaluator)

    model, model_trainer = trainer.finetune_model(processor=bert_processor, tokenized_dataset=tokenized_dataset, epochs=epochs, batch_size=batch_size, early_stop_patience=3, output_dir=f'./results/models/{model_name.split("/")[1]}/{dataset_name}/trained_for_{epochs}')

    results = evaluator.evaluate_and_print(tokenized_dataset['test'], model_trainer)
    evaluator.evaluation_to_json(nervaluate_strict_overall=results[0]['strict'], nervaluate_by_tag=results[1], model_name=model_name.split('/')[1], trained_on=dataset_name, evaluated_on=dataset_name, epochs=epochs)

    if dataset_name.lower() == 'combined':
      tokenized_ewt = bert_processor.tokenize_dataset(ewt_dataset['test'])
      tokenized_edt = bert_processor.tokenize_dataset(edt_dataset['test'])

      print(f'Kombineeritud andmestikul treenitud {model_name} EWT testandmestikul')
      ewt_results = evaluator.evaluate_and_print(tokenized_ewt, model_trainer)
      evaluator.evaluation_to_json(nervaluate_strict_overall=ewt_results[0]['strict'], nervaluate_by_tag=ewt_results[1], model_name=model_name.split('/')[1], trained_on=dataset_name, evaluated_on="EWT", epochs=epochs)

      print(f'Kombineeritud andmestikul treenitud {model_name} EDT testandmestikul')
      edt_results = evaluator.evaluate_and_print(tokenized_edt, model_trainer)
      evaluator.evaluation_to_json(nervaluate_strict_overall=edt_results[0]['strict'], nervaluate_by_tag=edt_results[1], model_name=model_name.split('/')[1], trained_on=dataset_name, evaluated_on="EDT", epochs=epochs)

      return model, model_trainer, (results, ewt_results, edt_results)

    return model, model_trainer, results

def train_all():
  dataset_names = ['EWT', 'EDT', 'Combined']

  model_names = ["EMBEDDIA/est-roberta", "tartuNLP/EstBERT"] # "tartuNLP/EstRoBERTa" XLM-RoBERTa eestikeelne variant, aga puudub igasugune taustainfo
  results_dict = {}

  for model_name in model_names:
    model_results = {}
    for dataset_name in dataset_names:
      model, model_trainer, results = train_model(model_name, dataset_name, epochs=3)
      model_results[dataset_name] = results
    results_dict[model_name] = model_results

  return results_dict

In [None]:
def main():
    results = train_all()

In [None]:
main()

Map:   0%|          | 0/5444 [00:00<?, ? examples/s]

Map:   0%|          | 0/833 [00:00<?, ? examples/s]

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

EWT andmestik laetud


tokenizer_config.json:   0%|          | 0.00/509 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/936k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.25M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/298 [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

Map:   0%|          | 0/5444 [00:00<?, ? examples/s]

Map:   0%|          | 0/833 [00:00<?, ? examples/s]

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of CamembertForTokenClassification were not initialized from the model checkpoint at EMBEDDIA/est-roberta and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[2025-03-24 12:07:03] Alustan EMBEDDIA/est-roberta treenimist


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy,Gep precision,Gep recall,Gep f1,Gep number,Loc precision,Loc recall,Loc f1,Loc number,Org precision,Org recall,Org f1,Org number,Per precision,Per recall,Per f1,Per number,Prod precision,Prod recall,Prod f1,Prod number,Unk precision,Unk recall,Unk f1,Unk number,Eve precision,Eve recall,Eve f1,Eve number
1,No log,0.263147,0.76203,0.761364,0.761697,0.94695,0.484848,0.615385,0.542373,26,0.086957,0.2,0.121212,10,0.435897,0.242857,0.311927,70,0.931133,0.92582,0.928469,701,0.532764,0.556548,0.544396,336,0.0,0.0,0.0,1,,,,
2,0.189800,0.249677,0.805195,0.812937,0.809047,0.956474,0.607143,0.653846,0.62963,26,0.3,0.3,0.3,10,0.625,0.214286,0.319149,70,0.944681,0.950071,0.947368,701,0.590206,0.681548,0.632597,336,0.0,0.0,0.0,1,,,,
3,0.047400,0.277456,0.811989,0.781469,0.796437,0.957025,0.615385,0.615385,0.615385,26,0.105263,0.2,0.137931,10,0.393939,0.185714,0.252427,70,0.969253,0.944365,0.956647,701,0.59292,0.598214,0.595556,336,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0


model.safetensors:   0%|          | 0.00/467M [00:00<?, ?B/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[2025-03-24 12:10:17] EMBEDDIA/est-roberta treenimine lõpetatud
Kokku kulus: 193.81 sekundit (0.05 tundi)
Hindan testandmestikul..


Nervaluate tulemused
Strict {'correct': 1321, 'incorrect': 372, 'partial': 0, 'missed': 175, 'spurious': 227, 'possible': 1868, 'actual': 1920, 'precision': 0.6880208333333333, 'recall': 0.7071734475374732, 'f1': 0.6974656810982047}
precision 0.6880208333333333
recall 0.7071734475374732
f1 0.6974656810982047
EVE {'correct': 19, 'incorrect': 23, 'partial': 0, 'missed': 12, 'spurious': 6, 'possible': 54, 'actual': 48, 'precision': 0.3958333333333333, 'recall': 0.35185185185185186, 'f1': 0.3725490196078432}
GEP {'correct': 35, 'incorrect': 15, 'partial': 0, 'missed': 7, 'spurious': 6, 'possible': 57, 'actual': 56, 'precision': 0.625, 'recall': 0.6140350877192983, 'f1': 0.6194690265486725}
LOC {'correct': 24, 'incorrect': 27, 'partial': 0, 'missed': 9, 'spurious': 2, 'possible': 60, 'actual': 53, 'precision': 0.4528301886792453, 'recall': 0.4, 'f1': 0.4247787610619469}
MUU {'correct': 0, 'incorrect': 0, 'partial': 0, 'missed': 0, 'spurious': 0, 'possible': 0, 'actual': 0, 'precision': 0, '

Map:   0%|          | 0/24601 [00:00<?, ? examples/s]

Map:   0%|          | 0/3122 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

EDT andmestik laetud


Map:   0%|          | 0/24601 [00:00<?, ? examples/s]

Map:   0%|          | 0/3122 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

Some weights of CamembertForTokenClassification were not initialized from the model checkpoint at EMBEDDIA/est-roberta and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[2025-03-24 12:10:52] Alustan EMBEDDIA/est-roberta treenimist


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy,Eve precision,Eve recall,Eve f1,Eve number,Gep precision,Gep recall,Gep f1,Gep number,Loc precision,Loc recall,Loc f1,Loc number,Muu precision,Muu recall,Muu f1,Muu number,Org precision,Org recall,Org f1,Org number,Per precision,Per recall,Per f1,Per number,Prod precision,Prod recall,Prod f1,Prod number,Unk precision,Unk recall,Unk f1,Unk number
1,0.08,0.08399,0.768661,0.76553,0.767092,0.976252,0.444444,0.394366,0.41791,71,0.635417,0.753086,0.689266,405,0.776364,0.653905,0.709892,653,0.0,0.0,0.0,9,0.59432,0.779255,0.674338,376,0.911088,0.953998,0.932049,1826,0.512077,0.392593,0.444444,540,0.0,0.0,0.0,48
2,0.0436,0.085828,0.765116,0.753819,0.759425,0.975455,0.642857,0.507042,0.566929,71,0.520362,0.851852,0.646067,405,0.783664,0.543645,0.641953,653,0.0,0.0,0.0,9,0.613687,0.739362,0.670688,376,0.932832,0.950712,0.941687,1826,0.549479,0.390741,0.45671,540,0.0,0.0,0.0,48
3,0.0198,0.088817,0.787678,0.787678,0.787678,0.977559,0.534091,0.661972,0.591195,71,0.593633,0.782716,0.675186,405,0.758879,0.621746,0.683502,653,0.0,0.0,0.0,9,0.643991,0.755319,0.695226,376,0.953226,0.970975,0.962018,1826,0.568085,0.494444,0.528713,540,0.0,0.0,0.0,48


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[2025-03-24 12:22:16] EMBEDDIA/est-roberta treenimine lõpetatud
Kokku kulus: 684.62 sekundit (0.19 tundi)
Hindan testandmestikul..


  _warn_prf(average, modifier, msg_start, len(result))


Nervaluate tulemused
Strict {'correct': 3838, 'incorrect': 603, 'partial': 0, 'missed': 156, 'spurious': 138, 'possible': 4597, 'actual': 4579, 'precision': 0.8381742738589212, 'recall': 0.8348923210789645, 'f1': 0.8365300784655624}
precision 0.8381742738589212
recall 0.8348923210789645
f1 0.8365300784655624
EVE {'correct': 36, 'incorrect': 15, 'partial': 0, 'missed': 28, 'spurious': 5, 'possible': 79, 'actual': 56, 'precision': 0.6428571428571429, 'recall': 0.45569620253164556, 'f1': 0.5333333333333333}
GEP {'correct': 275, 'incorrect': 73, 'partial': 0, 'missed': 9, 'spurious': 9, 'possible': 357, 'actual': 357, 'precision': 0.7703081232492998, 'recall': 0.7703081232492998, 'f1': 0.7703081232492998}
LOC {'correct': 379, 'incorrect': 115, 'partial': 0, 'missed': 11, 'spurious': 12, 'possible': 505, 'actual': 506, 'precision': 0.7490118577075099, 'recall': 0.7504950495049505, 'f1': 0.7497527200791295}
MUU {'correct': 0, 'incorrect': 6, 'partial': 0, 'missed': 0, 'spurious': 0, 'possibl

Map:   0%|          | 0/5444 [00:00<?, ? examples/s]

Map:   0%|          | 0/833 [00:00<?, ? examples/s]

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

Map:   0%|          | 0/24601 [00:00<?, ? examples/s]

Map:   0%|          | 0/3122 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

COMBINED andmestik laetud


Map:   0%|          | 0/30045 [00:00<?, ? examples/s]

Map:   0%|          | 0/3955 [00:00<?, ? examples/s]

Map:   0%|          | 0/4120 [00:00<?, ? examples/s]

Some weights of CamembertForTokenClassification were not initialized from the model checkpoint at EMBEDDIA/est-roberta and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[2025-03-24 12:23:01] Alustan EMBEDDIA/est-roberta treenimist


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy,Eve precision,Eve recall,Eve f1,Eve number,Gep precision,Gep recall,Gep f1,Gep number,Loc precision,Loc recall,Loc f1,Loc number,Muu precision,Muu recall,Muu f1,Muu number,Org precision,Org recall,Org f1,Org number,Per precision,Per recall,Per f1,Per number,Prod precision,Prod recall,Prod f1,Prod number,Unk precision,Unk recall,Unk f1,Unk number
1,0.0905,0.122752,0.752239,0.778194,0.764997,0.970869,0.352273,0.43662,0.389937,71,0.621053,0.684455,0.651214,431,0.798479,0.633484,0.706476,663,0.0,0.0,0.0,9,0.515651,0.701794,0.594492,446,0.922688,0.93985,0.93119,2527,0.525077,0.585616,0.553697,876,0.0,0.0,0.0,49
2,0.0382,0.131752,0.795664,0.766956,0.781046,0.972526,0.671875,0.605634,0.637037,71,0.644397,0.693735,0.668156,431,0.690226,0.692308,0.691265,663,0.0,0.0,0.0,9,0.641026,0.672646,0.656455,446,0.959967,0.920459,0.939798,2527,0.575155,0.528539,0.550863,876,0.0,0.0,0.0,49
3,0.0207,0.13451,0.79462,0.780363,0.787427,0.974421,0.611111,0.619718,0.615385,71,0.574106,0.781903,0.662083,431,0.75523,0.544495,0.632778,663,0.0,0.0,0.0,9,0.611328,0.701794,0.653445,446,0.944879,0.95647,0.950639,2527,0.627907,0.554795,0.589091,876,0.0,0.0,0.0,49


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[2025-03-24 12:37:04] EMBEDDIA/est-roberta treenimine lõpetatud
Kokku kulus: 842.38 sekundit (0.23 tundi)
Hindan testandmestikul..


  _warn_prf(average, modifier, msg_start, len(result))


Nervaluate tulemused
Strict {'correct': 5228, 'incorrect': 806, 'partial': 0, 'missed': 431, 'spurious': 316, 'possible': 6465, 'actual': 6350, 'precision': 0.8233070866141732, 'recall': 0.8086620262954369, 'f1': 0.8159188451033944}
precision 0.8233070866141732
recall 0.8086620262954369
f1 0.8159188451033944
EVE {'correct': 73, 'incorrect': 19, 'partial': 0, 'missed': 41, 'spurious': 10, 'possible': 133, 'actual': 102, 'precision': 0.7156862745098039, 'recall': 0.5488721804511278, 'f1': 0.621276595744681}
GEP {'correct': 328, 'incorrect': 68, 'partial': 0, 'missed': 18, 'spurious': 9, 'possible': 414, 'actual': 405, 'precision': 0.8098765432098766, 'recall': 0.7922705314009661, 'f1': 0.8009768009768009}
LOC {'correct': 414, 'incorrect': 134, 'partial': 0, 'missed': 17, 'spurious': 17, 'possible': 565, 'actual': 565, 'precision': 0.7327433628318584, 'recall': 0.7327433628318584, 'f1': 0.7327433628318583}
MUU {'correct': 0, 'incorrect': 6, 'partial': 0, 'missed': 0, 'spurious': 0, 'possi

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

Kombineeritud andmestikul treenitud EMBEDDIA/est-roberta EWT testandmestikul
Hindan testandmestikul..


Nervaluate tulemused
Strict {'correct': 1383, 'incorrect': 218, 'partial': 0, 'missed': 267, 'spurious': 133, 'possible': 1868, 'actual': 1734, 'precision': 0.7975778546712803, 'recall': 0.7403640256959315, 'f1': 0.7679067184897279}
precision 0.7975778546712803
recall 0.7403640256959315
f1 0.7679067184897279
EVE {'correct': 43, 'incorrect': 1, 'partial': 0, 'missed': 10, 'spurious': 2, 'possible': 54, 'actual': 46, 'precision': 0.9347826086956522, 'recall': 0.7962962962962963, 'f1': 0.8599999999999999}
GEP {'correct': 41, 'incorrect': 8, 'partial': 0, 'missed': 8, 'spurious': 0, 'possible': 57, 'actual': 49, 'precision': 0.8367346938775511, 'recall': 0.7192982456140351, 'f1': 0.7735849056603773}
LOC {'correct': 33, 'incorrect': 20, 'partial': 0, 'missed': 7, 'spurious': 5, 'possible': 60, 'actual': 58, 'precision': 0.5689655172413793, 'recall': 0.55, 'f1': 0.5593220338983051}
MUU {'correct': 0, 'incorrect': 0, 'partial': 0, 'missed': 0, 'spurious': 0, 'possible': 0, 'actual': 0, 'preci

Nervaluate tulemused
Strict {'correct': 3845, 'incorrect': 588, 'partial': 0, 'missed': 164, 'spurious': 183, 'possible': 4597, 'actual': 4616, 'precision': 0.8329722703639515, 'recall': 0.8364150532956276, 'f1': 0.8346901117985456}
precision 0.8329722703639515
recall 0.8364150532956276
f1 0.8346901117985456
EVE {'correct': 30, 'incorrect': 18, 'partial': 0, 'missed': 31, 'spurious': 8, 'possible': 79, 'actual': 56, 'precision': 0.5357142857142857, 'recall': 0.379746835443038, 'f1': 0.4444444444444445}
GEP {'correct': 287, 'incorrect': 60, 'partial': 0, 'missed': 10, 'spurious': 9, 'possible': 357, 'actual': 356, 'precision': 0.8061797752808989, 'recall': 0.803921568627451, 'f1': 0.8050490883590462}
LOC {'correct': 381, 'incorrect': 114, 'partial': 0, 'missed': 10, 'spurious': 12, 'possible': 505, 'actual': 507, 'precision': 0.7514792899408284, 'recall': 0.7544554455445545, 'f1': 0.7529644268774703}
MUU {'correct': 0, 'incorrect': 6, 'partial': 0, 'missed': 0, 'spurious': 0, 'possible'

Map:   0%|          | 0/5444 [00:00<?, ? examples/s]

Map:   0%|          | 0/833 [00:00<?, ? examples/s]

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

EWT andmestik laetud


tokenizer_config.json:   0%|          | 0.00/62.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/534 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/410k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Map:   0%|          | 0/5444 [00:00<?, ? examples/s]

Map:   0%|          | 0/833 [00:00<?, ? examples/s]

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[2025-03-24 12:37:58] Alustan tartuNLP/EstBERT treenimist


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy,Gep precision,Gep recall,Gep f1,Gep number,Loc precision,Loc recall,Loc f1,Loc number,Org precision,Org recall,Org f1,Org number,Per precision,Per recall,Per f1,Per number,Prod precision,Prod recall,Prod f1,Prod number,Unk precision,Unk recall,Unk f1,Unk number
1,No log,0.25049,0.756376,0.763766,0.760053,0.950646,0.5,0.423077,0.458333,26,0.333333,0.333333,0.333333,9,0.46875,0.234375,0.3125,64,0.92645,0.942446,0.934379,695,0.479564,0.531722,0.504298,331,0.0,0.0,0.0,1
2,0.167000,0.304114,0.78456,0.776199,0.780357,0.953721,0.5,0.423077,0.458333,26,0.125,0.111111,0.117647,9,0.333333,0.171875,0.226804,64,0.952872,0.930935,0.941776,695,0.548387,0.616314,0.58037,331,0.0,0.0,0.0,1
3,0.040900,0.325442,0.79326,0.773535,0.783273,0.954951,0.52381,0.423077,0.468085,26,0.25,0.333333,0.285714,9,0.415094,0.34375,0.376068,64,0.945985,0.932374,0.93913,695,0.571865,0.564955,0.568389,331,0.0,0.0,0.0,1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[2025-03-24 12:41:25] tartuNLP/EstBERT treenimine lõpetatud
Kokku kulus: 206.80 sekundit (0.06 tundi)
Hindan testandmestikul..


Nervaluate tulemused
Strict {'correct': 1171, 'incorrect': 404, 'partial': 0, 'missed': 280, 'spurious': 225, 'possible': 1855, 'actual': 1800, 'precision': 0.6505555555555556, 'recall': 0.631266846361186, 'f1': 0.640766073871409}
precision 0.6505555555555556
recall 0.631266846361186
f1 0.640766073871409
EVE {'correct': 9, 'incorrect': 23, 'partial': 0, 'missed': 25, 'spurious': 10, 'possible': 57, 'actual': 42, 'precision': 0.21428571428571427, 'recall': 0.15789473684210525, 'f1': 0.18181818181818182}
GEP {'correct': 33, 'incorrect': 20, 'partial': 0, 'missed': 2, 'spurious': 10, 'possible': 55, 'actual': 63, 'precision': 0.5238095238095238, 'recall': 0.6, 'f1': 0.559322033898305}
LOC {'correct': 37, 'incorrect': 9, 'partial': 0, 'missed': 9, 'spurious': 16, 'possible': 55, 'actual': 62, 'precision': 0.5967741935483871, 'recall': 0.6727272727272727, 'f1': 0.6324786324786325}
MUU {'correct': 0, 'incorrect': 0, 'partial': 0, 'missed': 0, 'spurious': 0, 'possible': 0, 'actual': 0, 'preci

Map:   0%|          | 0/24601 [00:00<?, ? examples/s]

Map:   0%|          | 0/3122 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

EDT andmestik laetud


Map:   0%|          | 0/24601 [00:00<?, ? examples/s]

Map:   0%|          | 0/3122 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[2025-03-24 12:42:00] Alustan tartuNLP/EstBERT treenimist


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy,Eve precision,Eve recall,Eve f1,Eve number,Gep precision,Gep recall,Gep f1,Gep number,Loc precision,Loc recall,Loc f1,Loc number,Muu precision,Muu recall,Muu f1,Muu number,Org precision,Org recall,Org f1,Org number,Per precision,Per recall,Per f1,Per number,Prod precision,Prod recall,Prod f1,Prod number,Unk precision,Unk recall,Unk f1,Unk number
1,0.084,0.103139,0.704421,0.695147,0.699753,0.968213,0.352941,0.441176,0.392157,68,0.610075,0.771226,0.68125,424,0.800475,0.529874,0.637654,636,0.0,0.0,0.0,12,0.553299,0.720264,0.625837,454,0.880624,0.902274,0.891318,1627,0.390057,0.350515,0.369231,582,0.0,0.0,0.0,71
2,0.0385,0.117017,0.691579,0.716572,0.703854,0.969907,0.361111,0.382353,0.371429,68,0.548872,0.860849,0.67034,424,0.828042,0.492138,0.617357,636,0.0,0.0,0.0,12,0.487943,0.757709,0.593615,454,0.921118,0.911494,0.916281,1627,0.419521,0.420962,0.42024,582,0.0,0.0,0.0,71
3,0.0152,0.126593,0.721408,0.724574,0.722988,0.971279,0.434783,0.441176,0.437956,68,0.600753,0.752358,0.668063,424,0.769841,0.610063,0.680702,636,0.0,0.0,0.0,12,0.539755,0.777533,0.637184,454,0.930159,0.90043,0.915053,1627,0.451613,0.43299,0.442105,582,0.0,0.0,0.0,71


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[2025-03-24 12:53:56] tartuNLP/EstBERT treenimine lõpetatud
Kokku kulus: 716.28 sekundit (0.20 tundi)
Hindan testandmestikul..


  _warn_prf(average, modifier, msg_start, len(result))


Nervaluate tulemused
Strict {'correct': 3601, 'incorrect': 721, 'partial': 0, 'missed': 241, 'spurious': 245, 'possible': 4563, 'actual': 4567, 'precision': 0.7884825925114956, 'recall': 0.7891737891737892, 'f1': 0.7888280394304491}
precision 0.7884825925114956
recall 0.7891737891737892
f1 0.7888280394304491
EVE {'correct': 25, 'incorrect': 26, 'partial': 0, 'missed': 33, 'spurious': 6, 'possible': 84, 'actual': 57, 'precision': 0.43859649122807015, 'recall': 0.2976190476190476, 'f1': 0.35460992907801414}
GEP {'correct': 289, 'incorrect': 85, 'partial': 0, 'missed': 10, 'spurious': 6, 'possible': 384, 'actual': 380, 'precision': 0.7605263157894737, 'recall': 0.7526041666666666, 'f1': 0.756544502617801}
LOC {'correct': 311, 'incorrect': 113, 'partial': 0, 'missed': 17, 'spurious': 9, 'possible': 441, 'actual': 433, 'precision': 0.7182448036951501, 'recall': 0.7052154195011338, 'f1': 0.711670480549199}
MUU {'correct': 0, 'incorrect': 6, 'partial': 0, 'missed': 0, 'spurious': 0, 'possible

Map:   0%|          | 0/5444 [00:00<?, ? examples/s]

Map:   0%|          | 0/833 [00:00<?, ? examples/s]

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

Map:   0%|          | 0/24601 [00:00<?, ? examples/s]

Map:   0%|          | 0/3122 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

COMBINED andmestik laetud


Map:   0%|          | 0/30045 [00:00<?, ? examples/s]

Map:   0%|          | 0/3955 [00:00<?, ? examples/s]

Map:   0%|          | 0/4120 [00:00<?, ? examples/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[2025-03-24 12:54:48] Alustan tartuNLP/EstBERT treenimist


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy,Eve precision,Eve recall,Eve f1,Eve number,Gep precision,Gep recall,Gep f1,Gep number,Loc precision,Loc recall,Loc f1,Loc number,Muu precision,Muu recall,Muu f1,Muu number,Org precision,Org recall,Org f1,Org number,Per precision,Per recall,Per f1,Per number,Prod precision,Prod recall,Prod f1,Prod number,Unk precision,Unk recall,Unk f1,Unk number
1,0.0925,0.138515,0.711299,0.7076,0.709445,0.965994,0.428571,0.352941,0.387097,68,0.585448,0.768889,0.664745,450,0.764317,0.537984,0.631483,645,0.0,0.0,0.0,12,0.476312,0.718147,0.572748,518,0.895645,0.894488,0.895066,2322,0.481242,0.407448,0.441281,913,0.0,0.0,0.0,72
2,0.0371,0.158468,0.73091,0.7198,0.725312,0.967261,0.373626,0.5,0.427673,68,0.606362,0.677778,0.640084,450,0.697171,0.649612,0.672552,645,0.0,0.0,0.0,12,0.548124,0.648649,0.594164,518,0.93228,0.88932,0.910293,2322,0.488346,0.481928,0.485116,913,0.0,0.0,0.0,72
3,0.0173,0.17731,0.742991,0.7314,0.73715,0.968875,0.457627,0.397059,0.425197,68,0.585691,0.782222,0.669838,450,0.74502,0.579845,0.652136,645,0.0,0.0,0.0,12,0.560305,0.708494,0.625746,518,0.937894,0.897502,0.917254,2322,0.513024,0.496166,0.504454,913,0.0,0.0,0.0,72


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[2025-03-24 13:09:19] tartuNLP/EstBERT treenimine lõpetatud
Kokku kulus: 870.85 sekundit (0.24 tundi)
Hindan testandmestikul..


  _warn_prf(average, modifier, msg_start, len(result))


Nervaluate tulemused
Strict {'correct': 4903, 'incorrect': 977, 'partial': 0, 'missed': 538, 'spurious': 459, 'possible': 6418, 'actual': 6339, 'precision': 0.7734658463480044, 'recall': 0.7639451542536616, 'f1': 0.768676021008074}
precision 0.7734658463480044
recall 0.7639451542536616
f1 0.768676021008074
EVE {'correct': 66, 'incorrect': 27, 'partial': 0, 'missed': 48, 'spurious': 23, 'possible': 141, 'actual': 116, 'precision': 0.5689655172413793, 'recall': 0.46808510638297873, 'f1': 0.5136186770428015}
GEP {'correct': 338, 'incorrect': 97, 'partial': 0, 'missed': 4, 'spurious': 17, 'possible': 439, 'actual': 452, 'precision': 0.7477876106194691, 'recall': 0.7699316628701595, 'f1': 0.7586980920314255}
LOC {'correct': 351, 'incorrect': 123, 'partial': 0, 'missed': 22, 'spurious': 23, 'possible': 496, 'actual': 497, 'precision': 0.7062374245472837, 'recall': 0.7076612903225806, 'f1': 0.7069486404833836}
MUU {'correct': 0, 'incorrect': 5, 'partial': 0, 'missed': 1, 'spurious': 0, 'possi

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

Kombineeritud andmestikul treenitud tartuNLP/EstBERT EWT testandmestikul
Hindan testandmestikul..


Nervaluate tulemused
Strict {'correct': 1263, 'incorrect': 266, 'partial': 0, 'missed': 326, 'spurious': 165, 'possible': 1855, 'actual': 1694, 'precision': 0.7455726092089728, 'recall': 0.6808625336927224, 'f1': 0.7117497886728656}
precision 0.7455726092089728
recall 0.6808625336927224
f1 0.7117497886728656
EVE {'correct': 33, 'incorrect': 5, 'partial': 0, 'missed': 19, 'spurious': 13, 'possible': 57, 'actual': 51, 'precision': 0.6470588235294118, 'recall': 0.5789473684210527, 'f1': 0.6111111111111113}
GEP {'correct': 41, 'incorrect': 10, 'partial': 0, 'missed': 4, 'spurious': 2, 'possible': 55, 'actual': 53, 'precision': 0.7735849056603774, 'recall': 0.7454545454545455, 'f1': 0.7592592592592593}
LOC {'correct': 34, 'incorrect': 12, 'partial': 0, 'missed': 9, 'spurious': 15, 'possible': 55, 'actual': 61, 'precision': 0.5573770491803278, 'recall': 0.6181818181818182, 'f1': 0.5862068965517241}
MUU {'correct': 0, 'incorrect': 0, 'partial': 0, 'missed': 0, 'spurious': 0, 'possible': 0, 'a

Nervaluate tulemused
Strict {'correct': 3640, 'incorrect': 711, 'partial': 0, 'missed': 212, 'spurious': 294, 'possible': 4563, 'actual': 4645, 'precision': 0.7836383207750269, 'recall': 0.7977207977207977, 'f1': 0.790616854908775}
precision 0.7836383207750269
recall 0.7977207977207977
f1 0.790616854908775
EVE {'correct': 33, 'incorrect': 22, 'partial': 0, 'missed': 29, 'spurious': 10, 'possible': 84, 'actual': 65, 'precision': 0.5076923076923077, 'recall': 0.39285714285714285, 'f1': 0.4429530201342282}
GEP {'correct': 297, 'incorrect': 87, 'partial': 0, 'missed': 0, 'spurious': 15, 'possible': 384, 'actual': 399, 'precision': 0.7443609022556391, 'recall': 0.7734375, 'f1': 0.7586206896551724}
LOC {'correct': 317, 'incorrect': 111, 'partial': 0, 'missed': 13, 'spurious': 8, 'possible': 441, 'actual': 436, 'precision': 0.7270642201834863, 'recall': 0.7188208616780045, 'f1': 0.7229190421892816}
MUU {'correct': 0, 'incorrect': 5, 'partial': 0, 'missed': 1, 'spurious': 0, 'possible': 6, 'ac

In [6]:
estroberta_edt, estroberta_edt_trainer, estroberta_edt_results = train_model('EMBEDDIA/est-roberta', 'EDT', epochs=6)

Map:   0%|          | 0/24601 [00:00<?, ? examples/s]

Map:   0%|          | 0/3122 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

EDT andmestik laetud


Map:   0%|          | 0/24601 [00:00<?, ? examples/s]

Map:   0%|          | 0/3122 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

Some weights of CamembertForTokenClassification were not initialized from the model checkpoint at EMBEDDIA/est-roberta and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[2025-03-24 13:35:07] Alustan EMBEDDIA/est-roberta treenimist


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy,Eve precision,Eve recall,Eve f1,Eve number,Gep precision,Gep recall,Gep f1,Gep number,Loc precision,Loc recall,Loc f1,Loc number,Muu precision,Muu recall,Muu f1,Muu number,Org precision,Org recall,Org f1,Org number,Per precision,Per recall,Per f1,Per number,Prod precision,Prod recall,Prod f1,Prod number,Unk precision,Unk recall,Unk f1,Unk number
1,0.0813,0.086085,0.756475,0.758401,0.757437,0.974021,0.58,0.408451,0.479339,71,0.649438,0.71358,0.68,405,0.783929,0.672282,0.723825,653,0.0,0.0,0.0,9,0.599613,0.824468,0.694289,376,0.912357,0.917853,0.915097,1826,0.446125,0.437037,0.441534,540,0.0,0.0,0.0,48
2,0.0503,0.094608,0.757827,0.745672,0.7517,0.974515,0.434211,0.464789,0.44898,71,0.554828,0.837037,0.667323,405,0.790909,0.532925,0.63678,653,0.0,0.0,0.0,9,0.595289,0.739362,0.659549,376,0.91362,0.94414,0.928629,1826,0.539062,0.383333,0.448052,540,0.0,0.0,0.0,48
3,0.0264,0.111319,0.755539,0.764002,0.759747,0.974324,0.457831,0.535211,0.493506,71,0.57845,0.755556,0.655246,405,0.753571,0.646248,0.695796,653,0.0,0.0,0.0,9,0.600427,0.74734,0.665877,376,0.919873,0.955641,0.937416,1826,0.48046,0.387037,0.428718,540,0.0,0.0,0.0,48
4,0.0154,0.120865,0.7699,0.765784,0.767837,0.975169,0.558824,0.535211,0.546763,71,0.561258,0.837037,0.671952,405,0.805369,0.551302,0.654545,653,0.0,0.0,0.0,9,0.723861,0.718085,0.720961,376,0.940502,0.943593,0.942045,1826,0.486865,0.514815,0.50045,540,0.0,0.0,0.0,48
5,0.0085,0.119126,0.773638,0.776986,0.775308,0.976125,0.590909,0.549296,0.569343,71,0.60479,0.748148,0.668874,405,0.774566,0.61562,0.686007,653,0.0,0.0,0.0,9,0.609053,0.787234,0.686775,376,0.938841,0.958379,0.948509,1826,0.519841,0.485185,0.501916,540,0.0,0.0,0.0,48
6,0.0043,0.12438,0.77954,0.775967,0.777749,0.976396,0.55,0.464789,0.503817,71,0.587571,0.77037,0.666667,405,0.784736,0.614089,0.689003,653,0.0,0.0,0.0,9,0.651007,0.773936,0.707169,376,0.941145,0.954545,0.947798,1826,0.533865,0.496296,0.514395,540,0.0,0.0,0.0,48


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[2025-03-24 13:56:48] EMBEDDIA/est-roberta treenimine lõpetatud
Kokku kulus: 1301.16 sekundit (0.36 tundi)
Hindan testandmestikul..



Nervaluate tulemused
Strict {'correct': 3773, 'incorrect': 671, 'partial': 0, 'missed': 153, 'spurious': 156, 'possible': 4597, 'actual': 4600, 'precision': 0.8202173913043478, 'recall': 0.8207526647813792, 'f1': 0.8204849407415461}
precision 0.8202173913043478
recall 0.8207526647813792
f1 0.8204849407415461
EVE {'correct': 23, 'incorrect': 26, 'partial': 0, 'missed': 30, 'spurious': 7, 'possible': 79, 'actual': 56, 'precision': 0.4107142857142857, 'recall': 0.2911392405063291, 'f1': 0.34074074074074073}
GEP {'correct': 260, 'incorrect': 89, 'partial': 0, 'missed': 8, 'spurious': 6, 'possible': 357, 'actual': 355, 'precision': 0.7323943661971831, 'recall': 0.7282913165266106, 'f1': 0.7303370786516854}
LOC {'correct': 357, 'incorrect': 134, 'partial': 0, 'missed': 14, 'spurious': 9, 'possible': 505, 'actual': 500, 'precision': 0.714, 'recall': 0.7069306930693069, 'f1': 0.7104477611940297}
MUU {'correct': 0, 'incorrect': 6, 'partial': 0, 'missed': 0, 'spurious': 1, 'possible': 6, 'actua

In [8]:
estroberta_combined, estroberta_combined_trainer, estroberta_combined_results = train_model('EMBEDDIA/est-roberta', 'Combined', epochs=6)

Map:   0%|          | 0/5444 [00:00<?, ? examples/s]

Map:   0%|          | 0/833 [00:00<?, ? examples/s]

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

Map:   0%|          | 0/24601 [00:00<?, ? examples/s]

Map:   0%|          | 0/3122 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

COMBINED andmestik laetud


Map:   0%|          | 0/30045 [00:00<?, ? examples/s]

Map:   0%|          | 0/3955 [00:00<?, ? examples/s]

Map:   0%|          | 0/4120 [00:00<?, ? examples/s]

Some weights of CamembertForTokenClassification were not initialized from the model checkpoint at EMBEDDIA/est-roberta and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[2025-03-24 13:57:54] Alustan EMBEDDIA/est-roberta treenimist


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy,Eve precision,Eve recall,Eve f1,Eve number,Gep precision,Gep recall,Gep f1,Gep number,Loc precision,Loc recall,Loc f1,Loc number,Muu precision,Muu recall,Muu f1,Muu number,Org precision,Org recall,Org f1,Org number,Per precision,Per recall,Per f1,Per number,Prod precision,Prod recall,Prod f1,Prod number,Unk precision,Unk recall,Unk f1,Unk number
1,0.0913,0.123706,0.758977,0.766759,0.762848,0.970896,0.315789,0.507042,0.389189,71,0.61949,0.61949,0.61949,431,0.752182,0.650075,0.697411,663,0.0,0.0,0.0,9,0.546139,0.650224,0.593654,446,0.91133,0.951721,0.931088,2527,0.550239,0.525114,0.537383,876,0.0,0.0,0.0,49
2,0.0479,0.131141,0.760928,0.775631,0.768209,0.972115,0.406015,0.760563,0.529412,71,0.569369,0.733179,0.640974,431,0.65123,0.678733,0.664697,663,0.0,0.0,0.0,9,0.659524,0.621076,0.639723,446,0.948955,0.934309,0.941575,2527,0.539071,0.543379,0.541217,876,0.0,0.0,0.0,49
3,0.0296,0.153247,0.772763,0.766364,0.769551,0.970472,0.442105,0.591549,0.506024,71,0.582237,0.821346,0.681424,431,0.742308,0.582202,0.652578,663,0.0,0.0,0.0,9,0.584551,0.627803,0.605405,446,0.960445,0.922438,0.941058,2527,0.553812,0.563927,0.558824,876,0.0,0.0,0.0,49
4,0.0153,0.177876,0.782197,0.748423,0.764937,0.971055,0.461538,0.507042,0.483221,71,0.598859,0.730858,0.658307,431,0.74606,0.642534,0.690438,663,0.0,0.0,0.0,9,0.559322,0.665919,0.607984,446,0.957511,0.882865,0.918674,2527,0.616858,0.55137,0.582278,876,0.32,0.163265,0.216216,49
5,0.0081,0.17876,0.786328,0.775631,0.780943,0.972274,0.493151,0.507042,0.5,71,0.613636,0.689095,0.64918,431,0.723856,0.668175,0.694902,663,0.0,0.0,0.0,9,0.61194,0.643498,0.627322,446,0.951705,0.927978,0.939691,2527,0.598152,0.591324,0.594719,876,0.258065,0.163265,0.2,49
6,0.004,0.182216,0.791009,0.78056,0.78575,0.972791,0.45122,0.521127,0.48366,71,0.589965,0.791183,0.675917,431,0.802789,0.607843,0.691845,663,0.0,0.0,0.0,9,0.623932,0.654709,0.63895,446,0.951613,0.933914,0.94268,2527,0.605293,0.600457,0.602865,876,0.0,0.0,0.0,49


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[2025-03-24 14:24:22] EMBEDDIA/est-roberta treenimine lõpetatud
Kokku kulus: 1587.95 sekundit (0.44 tundi)
Hindan testandmestikul..



Nervaluate tulemused
Strict {'correct': 5223, 'incorrect': 889, 'partial': 0, 'missed': 353, 'spurious': 316, 'possible': 6465, 'actual': 6428, 'precision': 0.8125388923459863, 'recall': 0.8078886310904873, 'f1': 0.8102070891181261}
precision 0.8125388923459863
recall 0.8078886310904873
f1 0.8102070891181261
EVE {'correct': 78, 'incorrect': 17, 'partial': 0, 'missed': 38, 'spurious': 12, 'possible': 133, 'actual': 107, 'precision': 0.7289719626168224, 'recall': 0.5864661654135338, 'f1': 0.6499999999999999}
GEP {'correct': 309, 'incorrect': 87, 'partial': 0, 'missed': 18, 'spurious': 9, 'possible': 414, 'actual': 405, 'precision': 0.762962962962963, 'recall': 0.7463768115942029, 'f1': 0.7545787545787547}
LOC {'correct': 409, 'incorrect': 139, 'partial': 0, 'missed': 17, 'spurious': 20, 'possible': 565, 'actual': 568, 'precision': 0.7200704225352113, 'recall': 0.7238938053097345, 'f1': 0.7219770520741394}
MUU {'correct': 0, 'incorrect': 4, 'partial': 0, 'missed': 2, 'spurious': 0, 'poss

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

Kombineeritud andmestikul treenitud EMBEDDIA/est-roberta EWT testandmestikul
Hindan testandmestikul..


  _warn_prf(average, modifier, msg_start, len(result))



Nervaluate tulemused
Strict {'correct': 1424, 'incorrect': 254, 'partial': 0, 'missed': 190, 'spurious': 123, 'possible': 1868, 'actual': 1801, 'precision': 0.7906718489727929, 'recall': 0.7623126338329764, 'f1': 0.7762333060779504}
precision 0.7906718489727929
recall 0.7623126338329764
f1 0.7762333060779504
EVE {'correct': 44, 'incorrect': 1, 'partial': 0, 'missed': 9, 'spurious': 2, 'possible': 54, 'actual': 47, 'precision': 0.9361702127659575, 'recall': 0.8148148148148148, 'f1': 0.8712871287128713}
GEP {'correct': 41, 'incorrect': 8, 'partial': 0, 'missed': 8, 'spurious': 0, 'possible': 57, 'actual': 49, 'precision': 0.8367346938775511, 'recall': 0.7192982456140351, 'f1': 0.7735849056603773}
LOC {'correct': 34, 'incorrect': 19, 'partial': 0, 'missed': 7, 'spurious': 6, 'possible': 60, 'actual': 59, 'precision': 0.576271186440678, 'recall': 0.5666666666666667, 'f1': 0.5714285714285714}
MUU {'correct': 0, 'incorrect': 0, 'partial': 0, 'missed': 0, 'spurious': 0, 'possible': 0, 'actua


Nervaluate tulemused
Strict {'correct': 3799, 'incorrect': 635, 'partial': 0, 'missed': 163, 'spurious': 193, 'possible': 4597, 'actual': 4627, 'precision': 0.8210503566025502, 'recall': 0.8264085273004134, 'f1': 0.8237207285342585}
precision 0.8210503566025502
recall 0.8264085273004134
f1 0.8237207285342585
EVE {'correct': 34, 'incorrect': 16, 'partial': 0, 'missed': 29, 'spurious': 10, 'possible': 79, 'actual': 60, 'precision': 0.5666666666666667, 'recall': 0.43037974683544306, 'f1': 0.4892086330935252}
GEP {'correct': 268, 'incorrect': 79, 'partial': 0, 'missed': 10, 'spurious': 9, 'possible': 357, 'actual': 356, 'precision': 0.7528089887640449, 'recall': 0.7507002801120448, 'f1': 0.7517531556802244}
LOC {'correct': 375, 'incorrect': 120, 'partial': 0, 'missed': 10, 'spurious': 14, 'possible': 505, 'actual': 509, 'precision': 0.7367387033398821, 'recall': 0.7425742574257426, 'f1': 0.7396449704142013}
MUU {'correct': 0, 'incorrect': 4, 'partial': 0, 'missed': 2, 'spurious': 0, 'poss

In [10]:
estbert_combined, estbert_combined_trainer, estbert_combined_results = train_model('tartuNLP/EstBERT', 'Combined', epochs=6)

Map:   0%|          | 0/5444 [00:00<?, ? examples/s]

Map:   0%|          | 0/833 [00:00<?, ? examples/s]

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

Map:   0%|          | 0/24601 [00:00<?, ? examples/s]

Map:   0%|          | 0/3122 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

COMBINED andmestik laetud


tokenizer_config.json:   0%|          | 0.00/62.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/534 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/410k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Map:   0%|          | 0/30045 [00:00<?, ? examples/s]

Map:   0%|          | 0/3955 [00:00<?, ? examples/s]

Map:   0%|          | 0/4120 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[2025-03-24 14:26:00] Alustan tartuNLP/EstBERT treenimist


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy,Eve precision,Eve recall,Eve f1,Eve number,Gep precision,Gep recall,Gep f1,Gep number,Loc precision,Loc recall,Loc f1,Loc number,Muu precision,Muu recall,Muu f1,Muu number,Org precision,Org recall,Org f1,Org number,Per precision,Per recall,Per f1,Per number,Prod precision,Prod recall,Prod f1,Prod number,Unk precision,Unk recall,Unk f1,Unk number
1,0.0931,0.135429,0.719613,0.714,0.716796,0.966341,0.329268,0.397059,0.36,68,0.624309,0.753333,0.682779,450,0.778742,0.556589,0.649186,645,0.0,0.0,0.0,12,0.44533,0.754826,0.560172,518,0.914847,0.902239,0.9085,2322,0.507779,0.393209,0.44321,913,0.0,0.0,0.0,72
2,0.0414,0.162471,0.722256,0.7224,0.722328,0.966728,0.45122,0.544118,0.493333,68,0.607004,0.693333,0.647303,450,0.711667,0.662016,0.685944,645,0.0,0.0,0.0,12,0.490566,0.702703,0.577778,518,0.940662,0.880706,0.909698,2322,0.480315,0.467689,0.473918,913,0.0,0.0,0.0,72
3,0.0217,0.176937,0.725914,0.7384,0.732104,0.967341,0.421687,0.514706,0.463576,68,0.556122,0.726667,0.630058,450,0.726481,0.646512,0.684167,645,0.0,0.0,0.0,12,0.546751,0.666023,0.600522,518,0.937416,0.903101,0.919939,2322,0.487073,0.515882,0.501064,913,0.0,0.0,0.0,72
4,0.0106,0.19968,0.750565,0.73,0.74014,0.968541,0.431034,0.367647,0.396825,68,0.579125,0.764444,0.659004,450,0.746269,0.620155,0.677392,645,0.0,0.0,0.0,12,0.553254,0.722008,0.626466,518,0.947822,0.899655,0.923111,2322,0.538065,0.456736,0.494076,913,0.066667,0.013889,0.022989,72
5,0.0062,0.212258,0.756198,0.7382,0.74709,0.969208,0.432836,0.426471,0.42963,68,0.609615,0.704444,0.653608,450,0.726957,0.648062,0.685246,645,0.0,0.0,0.0,12,0.596123,0.712355,0.649077,518,0.94382,0.904393,0.923686,2322,0.539551,0.500548,0.519318,913,0.05,0.013889,0.021739,72
6,0.0022,0.22556,0.736611,0.7372,0.736905,0.968074,0.448276,0.382353,0.412698,68,0.572148,0.757778,0.652008,450,0.735955,0.609302,0.666667,645,0.0,0.0,0.0,12,0.572079,0.727799,0.640612,518,0.947129,0.894918,0.920283,2322,0.500532,0.514786,0.507559,913,0.066667,0.013889,0.022989,72


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[2025-03-24 14:53:22] tartuNLP/EstBERT treenimine lõpetatud
Kokku kulus: 1641.29 sekundit (0.46 tundi)
Hindan testandmestikul..



Nervaluate tulemused
Strict {'correct': 4884, 'incorrect': 977, 'partial': 0, 'missed': 557, 'spurious': 432, 'possible': 6418, 'actual': 6293, 'precision': 0.7761004290481487, 'recall': 0.7609847304456216, 'f1': 0.7684682558413972}
precision 0.7761004290481487
recall 0.7609847304456216
f1 0.7684682558413972
EVE {'correct': 62, 'incorrect': 32, 'partial': 0, 'missed': 47, 'spurious': 24, 'possible': 141, 'actual': 118, 'precision': 0.5254237288135594, 'recall': 0.4397163120567376, 'f1': 0.47876447876447875}
GEP {'correct': 296, 'incorrect': 132, 'partial': 0, 'missed': 11, 'spurious': 14, 'possible': 439, 'actual': 442, 'precision': 0.669683257918552, 'recall': 0.6742596810933941, 'f1': 0.6719636776390464}
LOC {'correct': 350, 'incorrect': 123, 'partial': 0, 'missed': 23, 'spurious': 34, 'possible': 496, 'actual': 507, 'precision': 0.6903353057199211, 'recall': 0.7056451612903226, 'f1': 0.6979062811565305}
MUU {'correct': 0, 'incorrect': 4, 'partial': 0, 'missed': 2, 'spurious': 5, 'p

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

Map:   0%|          | 0/3207 [00:00<?, ? examples/s]

Kombineeritud andmestikul treenitud tartuNLP/EstBERT EWT testandmestikul
Hindan testandmestikul..


  _warn_prf(average, modifier, msg_start, len(result))



Nervaluate tulemused
Strict {'correct': 1266, 'incorrect': 260, 'partial': 0, 'missed': 329, 'spurious': 150, 'possible': 1855, 'actual': 1676, 'precision': 0.7553699284009546, 'recall': 0.6824797843665769, 'f1': 0.7170773152081563}
precision 0.7553699284009546
recall 0.6824797843665769
f1 0.7170773152081563
EVE {'correct': 33, 'incorrect': 6, 'partial': 0, 'missed': 18, 'spurious': 13, 'possible': 57, 'actual': 52, 'precision': 0.6346153846153846, 'recall': 0.5789473684210527, 'f1': 0.6055045871559633}
GEP {'correct': 40, 'incorrect': 11, 'partial': 0, 'missed': 4, 'spurious': 2, 'possible': 55, 'actual': 53, 'precision': 0.7547169811320755, 'recall': 0.7272727272727273, 'f1': 0.7407407407407407}
LOC {'correct': 27, 'incorrect': 19, 'partial': 0, 'missed': 9, 'spurious': 12, 'possible': 55, 'actual': 58, 'precision': 0.46551724137931033, 'recall': 0.4909090909090909, 'f1': 0.47787610619469023}
MUU {'correct': 0, 'incorrect': 0, 'partial': 0, 'missed': 0, 'spurious': 2, 'possible': 0,


Nervaluate tulemused
Strict {'correct': 3618, 'incorrect': 717, 'partial': 0, 'missed': 228, 'spurious': 282, 'possible': 4563, 'actual': 4617, 'precision': 0.783625730994152, 'recall': 0.7928994082840237, 'f1': 0.788235294117647}
precision 0.783625730994152
recall 0.7928994082840237
f1 0.788235294117647
EVE {'correct': 29, 'incorrect': 26, 'partial': 0, 'missed': 29, 'spurious': 11, 'possible': 84, 'actual': 66, 'precision': 0.4393939393939394, 'recall': 0.34523809523809523, 'f1': 0.38666666666666666}
GEP {'correct': 256, 'incorrect': 121, 'partial': 0, 'missed': 7, 'spurious': 12, 'possible': 384, 'actual': 389, 'precision': 0.6580976863753213, 'recall': 0.6666666666666666, 'f1': 0.6623544631306597}
LOC {'correct': 323, 'incorrect': 104, 'partial': 0, 'missed': 14, 'spurious': 22, 'possible': 441, 'actual': 449, 'precision': 0.7193763919821826, 'recall': 0.7324263038548753, 'f1': 0.7258426966292135}
MUU {'correct': 0, 'incorrect': 4, 'partial': 0, 'missed': 2, 'spurious': 3, 'possib