In [1]:
# Colabis kasutamiseks
from google.colab import drive
#drive.mount('/content/drive')
import os
os.chdir('/content/drive/My Drive/Colab Notebooks/thesis/')

In [2]:
!pip install estnltk==1.7.2
!pip install evaluate
!pip install seqeval

Collecting estnltk==1.7.2
  Downloading estnltk-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting estnltk-core>=1.7.2 (from estnltk==1.7.2)
  Downloading estnltk_core-1.7.4-py3-none-any.whl.metadata (3.5 kB)
Collecting python-crfsuite>=0.8.3 (from estnltk==1.7.2)
  Downloading python_crfsuite-0.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting cached-property>=1.2.0 (from estnltk==1.7.2)
  Downloading cached_property-2.0.1-py3-none-any.whl.metadata (10 kB)
Collecting bs4 (from estnltk==1.7.2)
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Collecting conllu (from estnltk==1.7.2)
  Downloading conllu-6.0.0-py3-none-any.whl.metadata (21 kB)
Collecting pyahocorasick (from estnltk==1.7.2)
  Downloading pyahocorasick-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting jedi>=0.16 (from ipython->estnltk==1.7.2)
  Downloading jedi-0.19.2-py2.py3-none

In [3]:
# Impordid
import os
import numpy as np
import pandas as pd
from collections import defaultdict
import evaluate
from datasets import Dataset, DatasetDict
import json

# Korpuse lugemiseks
from estnltk.converters.conll import conll_importer

# _
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import AdamWeightDecay
from transformers import DataCollatorForTokenClassification
from transformers import create_optimizer
from transformers.keras_callbacks import KerasMetricCallback
from transformers import TFAutoModelForTokenClassification
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback

In [4]:
def get_dataset_paths(dataset: str) -> dict:
    # Sisend: andmestiku nimi sõnena (ewt/edt)
    # Väljund: sõnastik, mis sisaldab train/dev/test failiteid
    dataset_dir = os.path.join('data', dataset)
    files = os.listdir(dataset_dir)

    paths = {}
    for split in ['train', 'dev', 'test']:
        matching_file = next(f for f in files if f'-ud-{split}.' in f)
        paths[split] = os.path.join(dataset_dir, matching_file)

    return paths

edt = get_dataset_paths('edt')
ewt = get_dataset_paths('ewt')
print(edt)
print(ewt)

{'train': 'data/edt/et_edt-ud-train.conllu', 'dev': 'data/edt/et_edt-ud-dev.conllu', 'test': 'data/edt/et_edt-ud-test.conllu'}
{'train': 'data/ewt/et_ewt-ud-train.conllu', 'dev': 'data/ewt/et_ewt-ud-dev.conllu', 'test': 'data/ewt/et_ewt-ud-test.conllu'}


In [5]:
def preprocess(dataset_path: str) -> list:
    # Sisend: andmestiku failitee sõnena
    # Väljund: List, mis sisaldab parsitud lauseid
    # Iga lause on paaride list kujul [(w0, t0), (w1, t1), ..., (wn, tn)], kus w tähistab sõna ja t sõnale vastavat märgendit.

    dataset = conll_importer.conll_to_text(file=dataset_path)
    parsed_sents = []
    known_tags = ['B-Eve', 'B-Gep', 'B-Loc', 'B-Muu', 'B-Org', 'B-Per', 'B-Prod', 'B-Unk', 'I-Eve', 'I-Gep', 'I-Loc', 'I-Muu', 'I-Org', 'I-Per', 'I-Prod', 'I-Unk']

    # Kuna andmestikus on üksikud vead, aga on enam-vähem selge, mida tegelikult mõeldi, siis teeme vastavad parandused.
    corrections = {
      'B-OrgSpaceAfter': 'B-Org',
      'B_Gep': 'B-Gep',
      'i-Prod': 'I-Prod',
      'Org': 'B-Org',
      'Per': 'B-Per',
      'BäOrg': 'B-Org',
      'B.Prod': 'B-Prod',
      'I-per': 'I-Per'
    }

    for sent in dataset.sentences:
        parsed_sent = []
        for word, misc in zip(sent.words, sent.conll_syntax.misc):
            tag = 'O'
            if misc:
                if 'NE' in misc:
                  if misc['NE'] in known_tags:
                    tag = misc['NE']
                  else:
                    # Kaks üksikut juhtu, kus kahe elemendi pikkune nimeüksus oli märgendatud (_, Per), (_, Per) või (_, Org), (_, Org)
                    if parsed_sent[-1][1] == 'B-Org' and misc['NE'] == 'Org':
                      tag = 'I-Org'
                    if parsed_sent[-1][1] == 'B-Per' and misc['NE'] == 'Per':
                      tag = 'I-Per'
                    else:
                      tag = corrections[misc['NE']]
            pair = (word.text, tag)
            #print(f"({word.text}, {tag})")
            parsed_sent.append(pair)
        parsed_sents.append(parsed_sent)

    return parsed_sents

In [6]:
ewt_dev_sents = preprocess(ewt['dev'])
ewt_train_sents = preprocess(ewt['train'])
ewt_test_sents = preprocess(ewt['test'])

print(ewt_dev_sents[0])
print(f"EWT dev lauseid {len(ewt_dev_sents)}")
print(f"EWT train lauseid {len(ewt_train_sents)}")
print(f"EWT test lauseid {len(ewt_test_sents)}")

[('täiesti', 'O'), ('nõus', 'O'), ('.', 'O')]
EWT dev lauseid 833
EWT train lauseid 5444
EWT test lauseid 913


In [7]:
edt_dev_sents = preprocess(edt['dev'])
edt_train_sents = preprocess(edt['train'])
edt_test_sents = preprocess(edt['test'])

print(edt_dev_sents[0])
print(f"EDT dev lauseid {len(edt_dev_sents)}")
print(f"EDT train lauseid {len(edt_train_sents)}")
print(f"EDT test lauseid {len(edt_test_sents)}")

[('Aga', 'O'), ('mulle', 'O'), ('tundub', 'O'), (',', 'O'), ('et', 'O'), ('kogu', 'O'), ('maailm', 'O'), ('ootab', 'O'), ('muusikamaailmalt', 'O'), ('midagi', 'O'), ('erutavalt', 'O'), ('uut', 'O'), ('minimalismi', 'O'), ('kõrvale', 'O'), ('.', 'O')]
EDT dev lauseid 3122
EDT train lauseid 24601
EDT test lauseid 3207


In [8]:
combined_dev_sents = ewt_dev_sents + edt_dev_sents
combined_train_sents = ewt_train_sents + edt_train_sents
combined_test_sents = ewt_test_sents + edt_test_sents

print(f"dev lauseid {len(combined_dev_sents)}")
print(f"train lauseid {len(combined_train_sents)}")
print(f"test lauseid {len(combined_test_sents)}")

dev lauseid 3955
train lauseid 30045
test lauseid 4120


In [9]:
# https://github.com/Kyubyong/nlp_made_easy/blob/master/Pos-tagging%20with%20Bert%20Fine-tuning.ipynb
#all_tags = ['O','B-Eve', 'B-Gep', 'B-Loc', 'B-Muu', 'B-Org', 'B-Per', 'B-Prod', 'B-Unk', 'I-Eve', 'I-Gep', 'I-Loc', 'I-Muu', 'I-Org', 'I-Per', 'I-Prod', 'I-Unk']
all_tags = ['O',
            'B-Eve', 'I-Eve',
            'B-Gep', 'I-Gep',
            'B-Loc', 'I-Loc',
            'B-Muu', 'I-Muu',
            'B-Org', 'I-Org',
            'B-Per', 'I-Per',
            'B-Prod', 'I-Prod',
            'B-Unk', 'I-Unk']
tag2idx = {tag:idx for idx, tag in enumerate(all_tags)}
idx2tag = {idx:tag for idx, tag in enumerate(all_tags)}
# Sõnastikud, kus on vastavuses arv:märgend ja vastupidi, näiteks tag2idx sõnastikus 'B-Eve' -> 1 ning idx2tag sõnastikus siis 1 -> 'B-Eve'

In [10]:
def split_to_token_and_tag(sents, tag2idx):
  # Sisend: parsitud laused ja tag2idx sõnastik
  # Väljund: Sõnastike list
  # Sõnastik sisaldab kolme elementi: lause ID täisarvuna, märgendite list arvulisel kujul ning sõnade list

  res = {}
  #res = []
  for i, sent in enumerate(sents):
    tags = [tag2idx[tag] for _, tag in sent]
    words = [word for word, _ in sent]
    res[i] = {
        'id': i,
        'tags': tags,
        'tokens': words
    }

  return res

def transform_set(data):
  transformed = {
      "id": [v["id"] for v in data.values()],
      "tags": [v["tags"] for v in data.values()],
      "tokens": [v["tokens"] for v in data.values()]
  }
  ds = Dataset.from_dict(transformed)
  return ds

def process_all(train_sents, dev_sents, test_sents, tag2idx):
  # Sisend: train/dev/test lausete listid ja tag2idx sõnastik
  # Väljund: töödeldud andmestik
  train = split_to_token_and_tag(train_sents, tag2idx)
  dev = split_to_token_and_tag(dev_sents, tag2idx)
  test = split_to_token_and_tag(test_sents, tag2idx)

  train_ds = transform_set(train)
  dev_ds = transform_set(dev)
  test_ds = transform_set(test)

  dataset = DatasetDict({
      'train': train_ds,
      'dev': dev_ds,
      'test': test_ds
  })

  return dataset

# dev_split = split_to_token_and_tag(ewt_dev_sents, tag2idx)
# transformed_dev = {
#     "id": [v["id"] for v in dev_split.values()],
#     "tags": [v["tags"] for v in dev_split.values()],
#     "tokens": [v["tokens"] for v in dev_split.values()]
# }
# ds = Dataset.from_dict(transformed_dev)

In [11]:
ewt_dataset = process_all(ewt_train_sents, ewt_dev_sents, ewt_test_sents, tag2idx)

In [12]:
edt_dataset = process_all(edt_train_sents, edt_dev_sents, edt_test_sents, tag2idx)

In [13]:
combined_dataset = process_all(combined_train_sents, combined_dev_sents, combined_test_sents, tag2idx)

In [14]:
print(ewt_dataset)
print(ewt_dataset['dev'][0])

DatasetDict({
    train: Dataset({
        features: ['id', 'tags', 'tokens'],
        num_rows: 5444
    })
    dev: Dataset({
        features: ['id', 'tags', 'tokens'],
        num_rows: 833
    })
    test: Dataset({
        features: ['id', 'tags', 'tokens'],
        num_rows: 913
    })
})
{'id': 0, 'tags': [0, 0, 0], 'tokens': ['täiesti', 'nõus', '.']}


In [15]:
def save_split_to_json(split_data, output_path):
    serializable_data = []

    for item in split_data:
        data_dict = {
            'id': item['id'],
            'tags': item['tags'],
            'tokens': item['tokens']
        }
        serializable_data.append(data_dict)

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(serializable_data, f, ensure_ascii=False, indent=2)

def save_dataset_to_json(dataset, name=''):
  try:
    for split_name, split_data in dataset.items():
      output_path = f'data/{name}/{split_name}.json'
      save_split_to_json(split_data, output_path)
  except Exception as e:
    print(f"Error: {e}")

In [16]:
save_dataset_to_json(ewt_dataset, 'ewt')
save_dataset_to_json(edt_dataset, 'edt')
save_dataset_to_json(combined_dataset, '')

In [17]:
def load_split_from_json(input_path):
    with open(input_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    dataset_dict = {
        'id': [],
        'tags': [],
        'tokens': []
    }

    for item in data:
        dataset_dict['id'].append(item['id'])
        dataset_dict['tags'].append(item['tags'])
        dataset_dict['tokens'].append(item['tokens'])

    return Dataset.from_dict(dataset_dict)

def load_dataset_from_json(name=''):
  try:
    dataset = DatasetDict()
    for split_name in ['train', 'dev', 'test']:
      input_path = f'data/{name}/{split_name}.json'
      dataset[split_name] = load_split_from_json(input_path)
    return dataset
  except Exception as e:
    print(f"Error: {e}")

In [18]:
ewt_from_json = load_dataset_from_json('ewt')
edt_from_json = load_dataset_from_json('edt')
combined_from_json = load_dataset_from_json()

In [19]:
tokenizer = AutoTokenizer.from_pretrained("tartuNLP/EstBERT") #, max_length=128, padding="max_length", truncation=True

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/62.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/534 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/410k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [20]:
# https://huggingface.co/docs/transformers/en/tasks/token_classification

def tokenize_and_align_labels(sent):
  # Sisend: üks lause
  # Väljund: tokenizeri abil sõnestatud lause, sõnastik sisaldab input_ids, token_type_ids, attention_mask ja labels
  # input_ids on lause 'arvulisel' kujul, igale ID-le vastab mingi sõna (teisendamine funktsiooni tokenizer.convert_ids_to_tokens(input_ids) abil)
  # labels on märgendite list, kus märgendid kattuvad tokenizeri abil sõnestatud lausega

  tokenized_inputs = tokenizer(sent['tokens'], is_split_into_words=True) #, truncation=True, is_split_into_words=True, max_length=128, padding="max_length"
  labels = []
  word_ids = tokenized_inputs.word_ids()
  prev_word_idx = None
  for word_idx in word_ids:
    if word_idx is None:
      labels.append(-100)
    elif word_idx != prev_word_idx:
      labels.append(sent['tags'][word_idx])
    else:
      labels.append(-100)
    previous_word_idx = word_idx

  tokenized_inputs["labels"] = labels
  return tokenized_inputs

In [21]:
example = ewt_dataset['train'][1]
print(f'Lause algselt: {example}')

test_align = tokenize_and_align_labels(example)
tokens = tokenizer.convert_ids_to_tokens(test_align.input_ids)
print(f'Lause nö arvulisel kujul: {test_align.input_ids}')
# Osad sõned tükeldatakse veel, näiteks 'reetpa67' -> 'reet', '##pa', '##67'
print(f'Sõnestatud: {tokens}')
aligned_labels = test_align['labels']
print(f'Märgendid paigas: {aligned_labels}')

Lause algselt: {'id': 1, 'tags': [11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'tokens': ['reetpa67', ':', 'Juba', 'teist', 'aastat', 'ei', 'õitse', 'mul', 'aias', 'nartsissid', '.']}
Lause nö arvulisel kujul: [2, 21999, 199, 23498, 137, 368, 1325, 887, 82, 6917, 164, 392, 10013, 3553, 3188, 379, 15, 3]
Sõnestatud: ['[CLS]', 'reet', '##pa', '##67', ':', 'juba', 'teist', 'aastat', 'ei', 'oi', '##tse', 'mul', 'aias', 'nar', '##tsis', '##sid', '.', '[SEP]']
Märgendid paigas: [-100, 11, 11, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]


In [22]:
# NB! Kasutades DatasetDict ja Dataset on see siin ebavajalik, saab otse dataset.map() kasutada
def tokenize_dataset(dataset_dict):
    # Sisend: andmestik sõnastikuna, mis sisaldab train/dev/test hulkasid
    # Väljund:
    processed_dataset = {}

    for split_name, split_data in dataset_dict.items():
        #examples = [split_data[idx] for idx in range(len(split_data))]
        #print(examples == split_data)
        #tokenized_dataset = list(map(tokenize_and_align_labels, examples))
        tokenized_dataset = list(map(tokenize_and_align_labels, split_data))
        processed_split = {
            'input_ids': [],
            'attention_mask': [],
            'labels': []
        }

        for item in tokenized_dataset:
            processed_split['input_ids'].append(item['input_ids'])
            processed_split['attention_mask'].append(item['attention_mask'])
            processed_split['labels'].append(item['labels'])

        processed_dataset[split_name] = processed_split

    return processed_dataset

In [14]:
#tokenized_ewt = tokenize_dataset(ewt_dataset)
#tokenized_edt = tokenize_dataset(edt_dataset)

#tokenized_ewt = ewt_dataset.map(tokenize_and_align_labels)
#tokenized_edt = edt_dataset.map(tokenize_and_align_labels)

tokenized_combined = combined_from_json.map(tokenize_and_align_labels)

Map:   0%|          | 0/30045 [00:00<?, ? examples/s]

Map:   0%|          | 0/3955 [00:00<?, ? examples/s]

Map:   0%|          | 0/4120 [00:00<?, ? examples/s]

In [15]:
print(tokenized_ewt['train'])

Dataset({
    features: ['id', 'tags', 'tokens', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 5444
})


In [20]:
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
data_collator_pt = DataCollatorForTokenClassification(tokenizer=tokenizer)
seqeval = evaluate.load("seqeval")

In [16]:
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [all_tags[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [all_tags[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

In [17]:
def finetune_model(tokenized_dataset, epochs=3, batch_size=8):
  num_train_steps = (len(tokenized_dataset["train"]) // batch_size) * epochs
  optimizer, lr_schedule = create_optimizer(
      init_lr=1e-5,
      num_train_steps=num_train_steps,
      weight_decay_rate=0.01,
      num_warmup_steps=0,
      adam_beta1=0.9,
      adam_beta2=0.98,
      adam_epsilon=1e-6
  )

  model = TFAutoModelForTokenClassification.from_pretrained("tartuNLP/EstBERT", num_labels=len(all_tags), id2label=idx2tag, label2id=tag2idx)

  tf_train_set = model.prepare_tf_dataset(
      tokenized_dataset['train'],
      shuffle=True,
      batch_size=batch_size,
      collate_fn=data_collator,
  )
  tf_validation_set = model.prepare_tf_dataset(
      tokenized_dataset['dev'],
      shuffle=False,
      batch_size=batch_size,
      collate_fn=data_collator,
  )

  model.compile(optimizer=optimizer)

  metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
  callbacks = [metric_callback]

  model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=epochs, callbacks=callbacks)

  return model

In [26]:
def finetune_model_pytorch(tokenized_dataset, epochs=3, batch_size=8):
  model = AutoModelForTokenClassification.from_pretrained("tartuNLP/EstBERT", num_labels=len(all_tags), id2label=idx2tag, label2id=tag2idx)
  training_args = TrainingArguments(
      output_dir='./results',
      learning_rate=1e-5,
      per_device_train_batch_size=batch_size,
      per_device_eval_batch_size=batch_size,
      num_train_epochs=epochs,
      #weight_decay=0.01,
      eval_strategy="epoch",
      save_strategy="epoch",
      optim="adamw_torch",
      load_best_model_at_end=True,
      metric_for_best_model='f1',
      adam_beta1=0.9,
      adam_beta2=0.98,
      adam_epsilon=1e-6,
      fp16=True
  )

  early_stopping_callback = EarlyStoppingCallback(
        early_stopping_patience=2,
        early_stopping_threshold=0.0001
    )

  trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=tokenized_dataset['train'],
      eval_dataset=tokenized_dataset['dev'],
      processing_class=tokenizer,
      data_collator=data_collator_pt,
      compute_metrics=compute_metrics,
      callbacks=[early_stopping_callback]
  )

  trainer.train()

  return model

#ewt_torch = finetune_model_pytorch(tokenized_ewt, 1)
combined_torch = finetune_model_pytorch(tokenized_combined, 3, 16)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at tartuNLP/EstBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.158,0.160673,0.641882,0.653143,0.647464,0.954984
2,0.1063,0.153437,0.681296,0.672332,0.676784,0.959723
3,0.0883,0.156056,0.696505,0.681104,0.688718,0.960795


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
# train_ewt = Dataset.from_dict({
#         'input_ids': np.array(tokenized_ewt['train']['input_ids']),
#         'attention_mask': np.array(tokenized_ewt['train']['attention_mask']),
#         'labels': np.array(tokenized_ewt['train']['labels'])
#     })
# dev_ewt = Dataset.from_dict({
#         'input_ids': np.array(tokenized_ewt['dev']['input_ids']),
#         'attention_mask': np.array(tokenized_ewt['dev']['attention_mask']),
#         'labels': np.array(tokenized_ewt['dev']['labels'])
#     })
# test_ewt = Dataset.from_dict({
#         'input_ids': np.array(tokenized_ewt['test']['input_ids']),
#         'attention_mask': np.array(tokenized_ewt['test']['attention_mask']),
#         'labels': np.array(tokenized_ewt['test']['labels'])
#     })

# tf_train_set = model.prepare_tf_dataset(
#     train_ewt,
#     shuffle=True,
#     batch_size=8,
#     collate_fn=data_collator,
# )

# tf_validation_set = model.prepare_tf_dataset(
#     dev_ewt,
#     shuffle=False,
#     batch_size=8,
#     collate_fn=data_collator,
# )

# tf_train_set = model.prepare_tf_dataset(
#     tokenized_ewt['train'],
#     shuffle=True,
#     batch_size=8,
#     collate_fn=data_collator,
# )

# tf_validation_set = model.prepare_tf_dataset(
#     tokenized_ewt['dev'],
#     shuffle=False,
#     batch_size=8,
#     collate_fn=data_collator,
# )

In [18]:
#ewt_model = finetune_model(tokenized_ewt)
combined_model = finetune_model(tokenized_combined)

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForTokenClassification: ['bert.embeddings.position_ids']
- This IS expected if you are initializing TFBertForTokenClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForTokenClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFBertForTokenClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/3
Epoch 3/3
