In [1]:
# Colabis kasutamiseks
from google.colab import drive
#drive.mount('/content/drive')
import os
os.chdir('/content/drive/My Drive/Colab Notebooks/thesis/')

In [2]:
!pip install estnltk==1.7.2
!pip install evaluate
!pip install seqeval
!pip install nervaluate

Collecting estnltk==1.7.2
  Downloading estnltk-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting estnltk-core>=1.7.2 (from estnltk==1.7.2)
  Downloading estnltk_core-1.7.4-py3-none-any.whl.metadata (3.5 kB)
Collecting python-crfsuite>=0.8.3 (from estnltk==1.7.2)
  Downloading python_crfsuite-0.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting cached-property>=1.2.0 (from estnltk==1.7.2)
  Downloading cached_property-2.0.1-py3-none-any.whl.metadata (10 kB)
Collecting bs4 (from estnltk==1.7.2)
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Collecting conllu (from estnltk==1.7.2)
  Downloading conllu-6.0.0-py3-none-any.whl.metadata (21 kB)
Collecting pyahocorasick (from estnltk==1.7.2)
  Downloading pyahocorasick-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting jedi>=0.16 (from ipython->estnltk==1.7.2)
  Downloading jedi-0.19.2-py2.py3-none

In [3]:
# Impordid
import os
import numpy as np
import pandas as pd
from collections import defaultdict
import evaluate
from datasets import Dataset, DatasetDict
import json
from itertools import product
from datetime import datetime

# Korpuse lugemiseks
from estnltk.converters.conll import conll_importer

# _
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import AdamWeightDecay
from transformers import DataCollatorForTokenClassification
from transformers import create_optimizer
from transformers.keras_callbacks import KerasMetricCallback
from transformers import TFAutoModelForTokenClassification
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback

In [4]:
def get_dataset_paths(dataset: str) -> dict:
    # Sisend: andmestiku nimi sõnena (ewt/edt)
    # Väljund: sõnastik, mis sisaldab train/dev/test failiteid
    dataset_dir = os.path.join('data', dataset)
    files = os.listdir(dataset_dir)

    paths = {}
    for split in ['train', 'dev', 'test']:
        matching_file = next(f for f in files if f'-ud-{split}.' in f)
        paths[split] = os.path.join(dataset_dir, matching_file)

    return paths

edt = get_dataset_paths('edt')
ewt = get_dataset_paths('ewt')
print(edt)
print(ewt)

{'train': 'data/edt/et_edt-ud-train.conllu', 'dev': 'data/edt/et_edt-ud-dev.conllu', 'test': 'data/edt/et_edt-ud-test.conllu'}
{'train': 'data/ewt/et_ewt-ud-train.conllu', 'dev': 'data/ewt/et_ewt-ud-dev.conllu', 'test': 'data/ewt/et_ewt-ud-test.conllu'}


In [5]:
def preprocess(dataset_path: str) -> list:
    # Sisend: andmestiku failitee sõnena
    # Väljund: List, mis sisaldab parsitud lauseid
    # Iga lause on paaride list kujul [(w0, t0), (w1, t1), ..., (wn, tn)], kus w tähistab sõna ja t sõnale vastavat märgendit.

    dataset = conll_importer.conll_to_text(file=dataset_path)
    parsed_sents = []
    known_tags = ['B-Eve', 'B-Gep', 'B-Loc', 'B-Muu', 'B-Org', 'B-Per', 'B-Prod', 'B-Unk', 'I-Eve', 'I-Gep', 'I-Loc', 'I-Muu', 'I-Org', 'I-Per', 'I-Prod', 'I-Unk']

    # Kuna andmestikus on üksikud vead, aga on enam-vähem selge, mida tegelikult mõeldi, siis teeme vastavad parandused.
    corrections = {
      'B-OrgSpaceAfter': 'B-Org',
      'B_Gep': 'B-Gep',
      'i-Prod': 'I-Prod',
      'Org': 'B-Org',
      'Per': 'B-Per',
      'BäOrg': 'B-Org',
      'B.Prod': 'B-Prod',
      'I-per': 'I-Per'
    }

    for sent in dataset.sentences:
        parsed_sent = []
        for word, misc in zip(sent.words, sent.conll_syntax.misc):
            tag = 'O'
            if misc:
                if 'NE' in misc:
                  if misc['NE'] in known_tags:
                    tag = misc['NE']
                  else:
                    # Kaks üksikut juhtu, kus kahe elemendi pikkune nimeüksus oli märgendatud (_, Per), (_, Per) või (_, Org), (_, Org)
                    if parsed_sent[-1][1] == 'B-Org' and misc['NE'] == 'Org':
                      tag = 'I-Org'
                    if parsed_sent[-1][1] == 'B-Per' and misc['NE'] == 'Per':
                      tag = 'I-Per'
                    else:
                      tag = corrections[misc['NE']]
            pair = (word.text, tag)
            #print(f"({word.text}, {tag})")
            parsed_sent.append(pair)
        parsed_sents.append(parsed_sent)

    return parsed_sents

In [None]:
ewt_dev_sents = preprocess(ewt['dev'])
ewt_train_sents = preprocess(ewt['train'])
ewt_test_sents = preprocess(ewt['test'])

print(ewt_dev_sents[0])
print(f"EWT dev lauseid {len(ewt_dev_sents)}")
print(f"EWT train lauseid {len(ewt_train_sents)}")
print(f"EWT test lauseid {len(ewt_test_sents)}")

[('täiesti', 'O'), ('nõus', 'O'), ('.', 'O')]
EWT dev lauseid 833
EWT train lauseid 5444
EWT test lauseid 913


In [None]:
edt_dev_sents = preprocess(edt['dev'])
edt_train_sents = preprocess(edt['train'])
edt_test_sents = preprocess(edt['test'])

print(edt_dev_sents[0])
print(f"EDT dev lauseid {len(edt_dev_sents)}")
print(f"EDT train lauseid {len(edt_train_sents)}")
print(f"EDT test lauseid {len(edt_test_sents)}")

[('Aga', 'O'), ('mulle', 'O'), ('tundub', 'O'), (',', 'O'), ('et', 'O'), ('kogu', 'O'), ('maailm', 'O'), ('ootab', 'O'), ('muusikamaailmalt', 'O'), ('midagi', 'O'), ('erutavalt', 'O'), ('uut', 'O'), ('minimalismi', 'O'), ('kõrvale', 'O'), ('.', 'O')]
EDT dev lauseid 3122
EDT train lauseid 24601
EDT test lauseid 3207


In [None]:
combined_dev_sents = ewt_dev_sents + edt_dev_sents
combined_train_sents = ewt_train_sents + edt_train_sents
combined_test_sents = ewt_test_sents + edt_test_sents

print(f"dev lauseid {len(combined_dev_sents)}")
print(f"train lauseid {len(combined_train_sents)}")
print(f"test lauseid {len(combined_test_sents)}")

dev lauseid 3955
train lauseid 30045
test lauseid 4120


In [6]:
# https://github.com/Kyubyong/nlp_made_easy/blob/master/Pos-tagging%20with%20Bert%20Fine-tuning.ipynb
#all_tags = ['O','B-Eve', 'B-Gep', 'B-Loc', 'B-Muu', 'B-Org', 'B-Per', 'B-Prod', 'B-Unk', 'I-Eve', 'I-Gep', 'I-Loc', 'I-Muu', 'I-Org', 'I-Per', 'I-Prod', 'I-Unk']
all_tags = ['O',
            'B-Eve', 'I-Eve',
            'B-Gep', 'I-Gep',
            'B-Loc', 'I-Loc',
            'B-Muu', 'I-Muu',
            'B-Org', 'I-Org',
            'B-Per', 'I-Per',
            'B-Prod', 'I-Prod',
            'B-Unk', 'I-Unk']
tag2idx = {tag:idx for idx, tag in enumerate(all_tags)}
idx2tag = {idx:tag for idx, tag in enumerate(all_tags)}
# Sõnastikud, kus on vastavuses arv:märgend ja vastupidi, näiteks tag2idx sõnastikus 'B-Eve' -> 1 ning idx2tag sõnastikus siis 1 -> 'B-Eve'

In [7]:
def split_to_token_and_tag(sents, tag2idx):
  # Sisend: parsitud laused ja tag2idx sõnastik
  # Väljund: Sõnastike list
  # Sõnastik sisaldab kolme elementi: lause ID täisarvuna, märgendite list arvulisel kujul ning sõnade list

  res = {}
  #res = []
  for i, sent in enumerate(sents):
    tags = [tag2idx[tag] for _, tag in sent]
    words = [word for word, _ in sent]
    res[i] = {
        'id': i,
        'tags': tags,
        'tokens': words
    }

  return res

def transform_set(data):
  transformed = {
      "id": [v["id"] for v in data.values()],
      "tags": [v["tags"] for v in data.values()],
      "tokens": [v["tokens"] for v in data.values()]
  }
  ds = Dataset.from_dict(transformed)
  return ds

def process_all(train_sents, dev_sents, test_sents, tag2idx):
  # Sisend: train/dev/test lausete listid ja tag2idx sõnastik
  # Väljund: töödeldud andmestik
  train = split_to_token_and_tag(train_sents, tag2idx)
  dev = split_to_token_and_tag(dev_sents, tag2idx)
  test = split_to_token_and_tag(test_sents, tag2idx)

  train_ds = transform_set(train)
  dev_ds = transform_set(dev)
  test_ds = transform_set(test)

  dataset = DatasetDict({
      'train': train_ds,
      'dev': dev_ds,
      'test': test_ds
  })

  return dataset

# dev_split = split_to_token_and_tag(ewt_dev_sents, tag2idx)
# transformed_dev = {
#     "id": [v["id"] for v in dev_split.values()],
#     "tags": [v["tags"] for v in dev_split.values()],
#     "tokens": [v["tokens"] for v in dev_split.values()]
# }
# ds = Dataset.from_dict(transformed_dev)

In [None]:
ewt_dataset = process_all(ewt_train_sents, ewt_dev_sents, ewt_test_sents, tag2idx)

In [None]:
edt_dataset = process_all(edt_train_sents, edt_dev_sents, edt_test_sents, tag2idx)

In [None]:
combined_dataset = process_all(combined_train_sents, combined_dev_sents, combined_test_sents, tag2idx)

In [None]:
print(ewt_dataset)
print(ewt_dataset['dev'][0])

DatasetDict({
    train: Dataset({
        features: ['id', 'tags', 'tokens'],
        num_rows: 5444
    })
    dev: Dataset({
        features: ['id', 'tags', 'tokens'],
        num_rows: 833
    })
    test: Dataset({
        features: ['id', 'tags', 'tokens'],
        num_rows: 913
    })
})
{'id': 0, 'tags': [0, 0, 0], 'tokens': ['täiesti', 'nõus', '.']}


In [8]:
def save_split_to_json(split_data, output_path):
    serializable_data = []

    for item in split_data:
        data_dict = {
            'id': item['id'],
            'tags': item['tags'],
            'tokens': item['tokens']
        }
        serializable_data.append(data_dict)

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(serializable_data, f, ensure_ascii=False, indent=2)

def save_dataset_to_json(dataset, name=''):
  try:
    for split_name, split_data in dataset.items():
      output_path = f'data/{name}/{split_name}.json'
      save_split_to_json(split_data, output_path)
  except Exception as e:
    print(f"Error: {e}")

In [None]:
save_dataset_to_json(ewt_dataset, 'ewt')
save_dataset_to_json(edt_dataset, 'edt')
save_dataset_to_json(combined_dataset, '')

In [8]:
def load_split_from_json(input_path):
    with open(input_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    dataset_dict = {
        'id': [],
        'tags': [],
        'tokens': []
    }

    for item in data:
        dataset_dict['id'].append(item['id'])
        dataset_dict['tags'].append(item['tags'])
        dataset_dict['tokens'].append(item['tokens'])

    return Dataset.from_dict(dataset_dict)

def load_dataset_from_json(name=''):
  try:
    dataset = DatasetDict()
    for split_name in ['train', 'dev', 'test']:
      input_path = f'data/{name}/{split_name}.json'
      dataset[split_name] = load_split_from_json(input_path)
    return dataset
  except Exception as e:
    print(f"Error: {e}")

In [9]:
ewt_from_json = load_dataset_from_json('ewt')
edt_from_json = load_dataset_from_json('edt')
combined_from_json = load_dataset_from_json()

In [10]:
tokenizer = AutoTokenizer.from_pretrained("tartuNLP/EstBERT") #, max_length=128, padding="max_length", truncation=True

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/62.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/534 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/410k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [11]:
# https://huggingface.co/docs/transformers/en/tasks/token_classification

def tokenize_and_align_labels(sent, label_all=False):
  # Sisend: üks lause
  # Väljund: tokenizeri abil sõnestatud lause, sõnastik sisaldab input_ids, token_type_ids, attention_mask ja labels
  # input_ids on lause 'arvulisel' kujul, igale ID-le vastab mingi sõna (teisendamine funktsiooni tokenizer.convert_ids_to_tokens(input_ids) abil)
  # labels on märgendite list, kus märgendid kattuvad tokenizeri abil sõnestatud lausega

  tokenized_inputs = tokenizer(sent['tokens'], is_split_into_words=True) #, truncation=True, is_split_into_words=True, max_length=128, padding="max_length"
  labels = []
  word_ids = tokenized_inputs.word_ids()
  prev_word = None

  for word in word_ids:
    if word is None:
      label = -100
    elif word != prev_word:
      label = sent['tags'][word]
    else:
      label = sent['tags'][word] if label_all else -100
      #label = -100 # reetpa67 -> B-Per; 'reet', '##pa', '##67' -> 11, -100, -100 ehk B-Per
    labels.append(label)
    prev_word = word

  tokenized_inputs["labels"] = labels
  return tokenized_inputs

In [14]:
def print_aligned_example(dataset, split='dev', i=0):
  example = dataset[split][i]
  print(f'Lause algselt: {example}')

  aligned_example = tokenize_and_align_labels(example, False)
  tokens = tokenizer.convert_ids_to_tokens(aligned_example.input_ids)
  print(f'Lause nö arvulisel kujul: {aligned_example.input_ids}')
  print(f'Sõnestatud: {tokens}')
  aligned_labels = aligned_example['labels']
  print(f'Märgendid arvulisel kujul paigas: {aligned_labels}')
  aligned_labels_text = [all_tags[label] for label in aligned_labels if label>=0]
  print(f'Märgendid: {aligned_labels_text}')

print_aligned_example(ewt_from_json, 'train', 1)

Lause algselt: {'id': 1, 'tags': [11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'tokens': ['reetpa67', ':', 'Juba', 'teist', 'aastat', 'ei', 'õitse', 'mul', 'aias', 'nartsissid', '.']}
Lause nö arvulisel kujul: [2, 21999, 199, 23498, 137, 368, 1325, 887, 82, 6917, 164, 392, 10013, 3553, 3188, 379, 15, 3]
Sõnestatud: ['[CLS]', 'reet', '##pa', '##67', ':', 'juba', 'teist', 'aastat', 'ei', 'oi', '##tse', 'mul', 'aias', 'nar', '##tsis', '##sid', '.', '[SEP]']
Märgendid arvulisel kujul paigas: [-100, 11, -100, -100, 0, 0, 0, 0, 0, 0, -100, 0, 0, 0, -100, -100, 0, -100]
Märgendid: ['B-Per', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']


In [14]:
# NB! Kasutades DatasetDict ja Dataset on see siin ebavajalik, saab otse dataset.map() kasutada
def tokenize_dataset(dataset_dict):
    # Sisend: andmestik sõnastikuna, mis sisaldab train/dev/test hulkasid
    # Väljund:
    processed_dataset = {}

    for split_name, split_data in dataset_dict.items():
        #examples = [split_data[idx] for idx in range(len(split_data))]
        #print(examples == split_data)
        #tokenized_dataset = list(map(tokenize_and_align_labels, examples))
        tokenized_dataset = list(map(tokenize_and_align_labels, split_data))
        processed_split = {
            'input_ids': [],
            'attention_mask': [],
            'labels': []
        }

        for item in tokenized_dataset:
            processed_split['input_ids'].append(item['input_ids'])
            processed_split['attention_mask'].append(item['attention_mask'])
            processed_split['labels'].append(item['labels'])

        processed_dataset[split_name] = processed_split

    return processed_dataset

In [15]:
#tokenized_ewt = ewt_dataset.map(tokenize_and_align_labels)
#tokenized_edt = edt_dataset.map(tokenize_and_align_labels)
#tokenized_combined = combined_dataset.map(tokenize_and_align_labels)

tokenized_ewt = ewt_from_json.map(tokenize_and_align_labels)
#tokenized_edt = edt_from_json.map(tokenize_and_align_labels)
#tokenized_combined = combined_from_json.map(tokenize_and_align_labels)

Map:   0%|          | 0/5444 [00:00<?, ? examples/s]

Map:   0%|          | 0/833 [00:00<?, ? examples/s]

Map:   0%|          | 0/913 [00:00<?, ? examples/s]

In [16]:
print(tokenized_ewt['train'])

Dataset({
    features: ['id', 'tags', 'tokens', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 5444
})


In [17]:
#data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
data_collator_pt = DataCollatorForTokenClassification(tokenizer=tokenizer)
seqeval = evaluate.load("seqeval")

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

In [18]:
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [all_tags[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [all_tags[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

In [19]:
# tensorflow variant
def finetune_model(tokenized_dataset, epochs=3, batch_size=8):
  num_train_steps = (len(tokenized_dataset["train"]) // batch_size) * epochs
  optimizer, lr_schedule = create_optimizer(
      init_lr=1e-5,
      num_train_steps=num_train_steps,
      weight_decay_rate=0.01,
      num_warmup_steps=0,
      adam_beta1=0.9,
      adam_beta2=0.98,
      adam_epsilon=1e-6
  )

  model = TFAutoModelForTokenClassification.from_pretrained("tartuNLP/EstBERT", num_labels=len(all_tags), id2label=idx2tag, label2id=tag2idx)

  tf_train_set = model.prepare_tf_dataset(
      tokenized_dataset['train'],
      shuffle=True,
      batch_size=batch_size,
      collate_fn=data_collator,
  )
  tf_validation_set = model.prepare_tf_dataset(
      tokenized_dataset['dev'],
      shuffle=False,
      batch_size=batch_size,
      collate_fn=data_collator,
  )

  model.compile(optimizer=optimizer)

  metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
  callbacks = [metric_callback]

  model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=epochs, callbacks=callbacks)

  return model

In [82]:
def finetune_model_pytorch(tokenized_dataset, epochs=3, batch_size=16, lr=5e-5):
  model = AutoModelForTokenClassification.from_pretrained("tartuNLP/EstBERT", num_labels=len(all_tags), id2label=idx2tag, label2id=tag2idx)
  training_args = TrainingArguments(
      report_to='none',
      output_dir='./results',
      learning_rate=lr,
      lr_scheduler_type='polynomial', # default on linear, aga polynomial annab veidi parema tulemuse
      per_device_train_batch_size=batch_size,
      per_device_eval_batch_size=batch_size,
      num_train_epochs=epochs,
      weight_decay=0.01,
      eval_strategy="epoch",
      save_strategy="epoch",
      optim="adamw_torch",
      load_best_model_at_end=True,
      metric_for_best_model='f1',
      adam_beta1=0.9,
      adam_beta2=0.98,
      adam_epsilon=1e-6,
      fp16=True
  )

  early_stopping_callback = EarlyStoppingCallback(
        early_stopping_patience=5,
        early_stopping_threshold=0.0001
    )

  trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=tokenized_dataset['train'],
      eval_dataset=tokenized_dataset['dev'],
      processing_class=tokenizer,
      data_collator=data_collator_pt,
      compute_metrics=compute_metrics,
      callbacks=[early_stopping_callback]
  )

  trainer.train()

  return model

In [None]:
combined_model = finetune_model(tokenized_combined)

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForTokenClassification: ['bert.embeddings.position_ids']
- This IS expected if you are initializing TFBertForTokenClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForTokenClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFBertForTokenClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/3
Epoch 3/3


In [72]:
def grid_search_pt(tokenized_dataset=None, param_grid=None):
  results = []
  best_f1 = 0
  best_model = None
  best_params = None

  if param_grid:
    param_combinations = [dict(zip(param_grid.keys(), v))
                         for v in product(*param_grid.values())]

  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

  for i, params in enumerate(param_combinations, 1):
    print(f"\n{i}/{len(param_combinations)}")
    print(f"Params: {params}")

    try:
      model = AutoModelForTokenClassification.from_pretrained("tartuNLP/EstBERT", num_labels=len(all_tags), id2label=idx2tag, label2id=tag2idx)

      training_args = TrainingArguments(
          report_to='none', # kui seda ei ole, siis kasutab weights&biases, vaja api võtit
          output_dir=f'./results_{timestamp}_{i}',
          learning_rate=params['learning_rate'],
          per_device_train_batch_size=params['batch_size'],
          per_device_eval_batch_size=params['batch_size'],
          num_train_epochs=params['num_train_epochs'],
          weight_decay=params['weight_decay'],
          eval_strategy="epoch",
          save_strategy="epoch",
          optim="adamw_torch",
          load_best_model_at_end=True,
          metric_for_best_model='f1',
          adam_beta1=params['adam_beta1'],
          adam_beta2=params['adam_beta2'],
          adam_epsilon=params['adam_epsilon'],
          fp16=True
      )

      early_stopping_callback = EarlyStoppingCallback(
          early_stopping_patience=2,
          early_stopping_threshold=0.0001
      )

      trainer = Trainer(
          model=model,
          args=training_args,
          train_dataset=tokenized_dataset['train'],
          eval_dataset=tokenized_dataset['dev'],
          processing_class=tokenizer,
          data_collator=data_collator_pt,
          compute_metrics=compute_metrics,
          callbacks=[early_stopping_callback]
      )

      train_result = trainer.train()
      eval_result = trainer.evaluate()

      trial_results = {
          'parameters': params,
          'eval_metrics': eval_result,
          'train_metrics': {
              'train_runtime': train_result.metrics['train_runtime'],
              'train_samples_per_second': train_result.metrics['train_samples_per_second']
          }
      }
      results.append(trial_results)

      if eval_result['eval_f1'] > best_f1:
        best_f1 = eval_result['eval_f1']
        best_model = model
        best_params = params

    except Exception as e:
      print(f"Error {i}: {e}")
      continue

  print(f"parim f1: {best_f1}")
  print(f"parameetrid: {best_params}")

  return best_model, best_params, results

In [25]:
param_grid = {
    'learning_rate': [1e-5, 5e-5],
    'batch_size': [16],
    'num_train_epochs': [5],
    'weight_decay': [0.01, 0.0],
    'adam_beta1': [0.9],
    'adam_beta2': [0.98, 0.99],
    'adam_epsilon': [1e-6, 1e-8]
}

best_model, best_params, all_results = grid_search_pt(tokenized_dataset=tokenized_ewt, param_grid=param_grid)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



1/16
Params: {'learning_rate': 1e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-06}


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.316813,0.750249,0.661984,0.703358,0.940634
2,0.305600,0.317966,0.72068,0.70676,0.713652,0.943635
3,0.125900,0.32663,0.729828,0.70676,0.718109,0.945392
4,0.125900,0.327276,0.729754,0.727831,0.728791,0.946636
5,0.099100,0.330332,0.725284,0.727831,0.726556,0.946636


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))



2/16
Params: {'learning_rate': 1e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.313081,0.735849,0.650571,0.690587,0.93895
2,0.281000,0.317851,0.71364,0.693591,0.703473,0.941293
3,0.127200,0.320296,0.730388,0.71115,0.720641,0.946124
4,0.127200,0.323065,0.733274,0.721686,0.727434,0.94671
5,0.098500,0.326329,0.728406,0.718174,0.723254,0.94627


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



3/16
Params: {'learning_rate': 1e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.99, 'adam_epsilon': 1e-06}


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.31284,0.735849,0.650571,0.690587,0.938877
2,0.282000,0.317829,0.712353,0.693591,0.702847,0.94122
3,0.126800,0.320413,0.728417,0.71115,0.71968,0.945904
4,0.126800,0.323846,0.732558,0.719052,0.725742,0.94649
5,0.098000,0.326642,0.728648,0.719052,0.723818,0.945978


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))



4/16
Params: {'learning_rate': 1e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.99, 'adam_epsilon': 1e-08}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.311757,0.729941,0.65496,0.690421,0.939243
2,0.279800,0.316724,0.712489,0.696225,0.704263,0.941439
3,0.125700,0.319887,0.734177,0.712906,0.723385,0.946344
4,0.125700,0.323411,0.732143,0.71993,0.725985,0.946417
5,0.096300,0.326282,0.729055,0.718174,0.723574,0.945758


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))



5/16
Params: {'learning_rate': 1e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-06}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.312426,0.740557,0.654083,0.694639,0.93917
2,0.283500,0.318067,0.711191,0.691835,0.70138,0.941146
3,0.128500,0.320141,0.730561,0.709394,0.719822,0.945758
4,0.128500,0.323423,0.732797,0.71993,0.726306,0.946344
5,0.099600,0.32643,0.728164,0.717296,0.722689,0.946051


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))



6/16
Params: {'learning_rate': 1e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.312521,0.734975,0.65496,0.692665,0.93939
2,0.280900,0.317552,0.711331,0.694469,0.702799,0.94122
3,0.127200,0.320082,0.730144,0.710272,0.720071,0.945758
4,0.127200,0.323083,0.732319,0.718174,0.725177,0.946197
5,0.098100,0.326342,0.728406,0.718174,0.723254,0.946051


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))



7/16
Params: {'learning_rate': 1e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.99, 'adam_epsilon': 1e-06}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.312393,0.7357,0.65496,0.692987,0.939243
2,0.281700,0.31753,0.711071,0.693591,0.702222,0.941146
3,0.126800,0.320317,0.733755,0.713784,0.723632,0.94627
4,0.126800,0.32367,0.732558,0.719052,0.725742,0.946417
5,0.097800,0.326629,0.729055,0.718174,0.723574,0.945904


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))



8/16
Params: {'learning_rate': 1e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.99, 'adam_epsilon': 1e-08}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.311731,0.73209,0.65496,0.691381,0.939316
2,0.279700,0.316397,0.712489,0.696225,0.704263,0.941586
3,0.125400,0.320006,0.729391,0.714662,0.721951,0.946197
4,0.125400,0.323205,0.733096,0.723442,0.728237,0.946783
5,0.096400,0.326205,0.726465,0.718174,0.722296,0.945612


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))



9/16
Params: {'learning_rate': 5e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-06}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.28352,0.785924,0.705882,0.743756,0.948027
2,0.171400,0.314585,0.779926,0.743635,0.761348,0.950589
3,0.047100,0.353374,0.758929,0.746269,0.752545,0.951468
4,0.047100,0.377637,0.762367,0.757682,0.760018,0.951907


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))



10/16
Params: {'learning_rate': 5e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.287738,0.804082,0.691835,0.743747,0.946417
2,0.174100,0.317652,0.781481,0.741001,0.760703,0.950223
3,0.045700,0.358724,0.775115,0.738367,0.756295,0.951614
4,0.045700,0.382744,0.772605,0.757682,0.765071,0.952712
5,0.016600,0.411855,0.778591,0.747147,0.762545,0.953005


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



11/16
Params: {'learning_rate': 5e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.99, 'adam_epsilon': 1e-06}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.283154,0.787524,0.709394,0.74642,0.949125
2,0.170400,0.321624,0.780694,0.731343,0.755213,0.949784
3,0.046700,0.355228,0.769719,0.745391,0.75736,0.95176
4,0.046700,0.385242,0.744996,0.751536,0.748252,0.950516
5,0.016300,0.406924,0.757307,0.750658,0.753968,0.951321


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))



12/16
Params: {'learning_rate': 5e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.99, 'adam_epsilon': 1e-08}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.283299,0.789009,0.705882,0.745134,0.948759
2,0.169400,0.325503,0.782407,0.741879,0.761604,0.950516
3,0.045400,0.367499,0.758993,0.741001,0.749889,0.951834
4,0.045400,0.386773,0.749338,0.745391,0.747359,0.951248


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))



13/16
Params: {'learning_rate': 5e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-06}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.285314,0.796425,0.704126,0.747437,0.947295
2,0.174900,0.325242,0.757333,0.748025,0.75265,0.94854
3,0.046100,0.370454,0.77757,0.730465,0.753282,0.950589
4,0.046100,0.386169,0.786381,0.740123,0.762551,0.952273
5,0.017800,0.415075,0.78757,0.745391,0.7659,0.952346


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



14/16
Params: {'learning_rate': 5e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.284546,0.786982,0.700615,0.741291,0.948759
2,0.170200,0.323668,0.779016,0.736611,0.75722,0.95037
3,0.046500,0.36096,0.768881,0.741879,0.755139,0.951907
4,0.046500,0.393717,0.752414,0.752414,0.752414,0.951175


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))



15/16
Params: {'learning_rate': 5e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.99, 'adam_epsilon': 1e-06}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.289578,0.811983,0.690079,0.746084,0.947588
2,0.174600,0.323841,0.764811,0.748025,0.756325,0.949198
3,0.045600,0.370991,0.768304,0.727831,0.74752,0.950809
4,0.045600,0.388314,0.773723,0.744513,0.758837,0.951687
5,0.017600,0.41032,0.782609,0.742757,0.762162,0.952566


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



16/16
Params: {'learning_rate': 5e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.99, 'adam_epsilon': 1e-08}


Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.280998,0.799603,0.707638,0.750815,0.949052
2,0.169700,0.328181,0.77623,0.733977,0.754513,0.950662
3,0.045500,0.361099,0.767593,0.727831,0.747183,0.951321
4,0.045500,0.384846,0.73928,0.756804,0.747939,0.950223


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


parim f1: 0.7658998646820027
parameetrid: {'learning_rate': 5e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-06}


  _warn_prf(average, modifier, msg_start, len(result))


In [47]:
# parim f1: 0.7658998646820027
# parameetrid: {'learning_rate': 5e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-06}

ewt_mudel = finetune_model_pytorch(tokenized_ewt, 5, 16, 5e-5)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.281832,0.818465,0.692713,0.750357,0.948247
2,0.173300,0.330366,0.788499,0.710272,0.747344,0.948686
3,0.047600,0.358526,0.775547,0.746269,0.760626,0.95176
4,0.047600,0.387028,0.785714,0.753292,0.769162,0.952346
5,0.018100,0.408722,0.77889,0.751536,0.764969,0.95198


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [83]:
ewt_testimiseks = finetune_model_pytorch(tokenized_ewt, 5, 16, 5e-5)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.217636,0.734644,0.674944,0.703529,0.960804
2,0.111600,0.283895,0.742092,0.688488,0.714286,0.962704
3,0.027700,0.322581,0.704492,0.672686,0.688222,0.961004
4,0.027700,0.349729,0.745192,0.699774,0.721769,0.962504
5,0.009700,0.36211,0.748184,0.697517,0.721963,0.963004


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [97]:
from transformers import pipeline

ewt_ner = pipeline("ner", model=ewt_testimiseks, tokenizer=tokenizer)
s = tokenized_ewt['test'][2]['tokens']
actual = tokenized_ewt['test'][2]['labels']
actual_text = [all_tags[label] for label in actual if label>=0]
print(actual_text)
print(s)
pred = ewt_ner(s)
print(pred)

Device set to use cuda:0


['B-Per', 'I-Per', 'O', 'B-Loc', 'O', 'O', 'O', 'O', 'B-Eve', 'O', 'O', 'B-Per', 'I-Per', 'O', 'O', 'O', 'O']
['Jürgen', 'Zopp', 'kaotas', 'Petange’is', '64000', 'euro', 'suuruse', 'auhinnafondiga', 'Challenger-turniiri', 'poolfinaalis', 'prantslasele', 'Paul-Henri', 'Mathieu’le', '6:7', ',', '2:6', '.']
[[{'entity': 'B-Per', 'score': 0.99676657, 'index': 1, 'word': 'ju', 'start': 0, 'end': 2}, {'entity': 'B-Per', 'score': 0.5130056, 'index': 2, 'word': '##rgen', 'start': 2, 'end': 6}], [{'entity': 'B-Per', 'score': 0.99400735, 'index': 1, 'word': 'zo', 'start': 0, 'end': 2}, {'entity': 'I-Per', 'score': 0.892888, 'index': 2, 'word': '##pp', 'start': 2, 'end': 4}], [], [{'entity': 'B-Org', 'score': 0.69498736, 'index': 1, 'word': 'peta', 'start': 0, 'end': 4}, {'entity': 'I-Org', 'score': 0.27533016, 'index': 2, 'word': '##nge', 'start': 4, 'end': 7}], [], [], [], [], [{'entity': 'B-Prod', 'score': 0.53563005, 'index': 1, 'word': 'cha', 'start': 0, 'end': 3}, {'entity': 'I-Prod', 'scor