In [5]:
import json
import logging
import os
import glob
import re

import numpy as np
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.nn import CrossEntropyLoss
from fastprogress.fastprogress import master_bar, progress_bar
from attrdict import AttrDict

from transformers import (
    AdamW,
    get_linear_schedule_with_warmup
)

from src import (
    CONFIG_CLASSES,
    TOKENIZER_CLASSES,
    MODEL_FOR_TOKEN_CLASSIFICATION,
    init_logger,
    set_seed,
    compute_metrics,
    show_ner_report
)

from processor import ner_load_and_cache_examples as load_and_cache_examples
from processor import ner_tasks_num_labels as tasks_num_labels
from processor import ner_processors as processors

logger = logging.getLogger(__name__)

from run_ner import train, evaluate

In [17]:
config_path = "../../../../data/KoELECTRA_config/naver-ner/koelectra-small-v3.json"
# Read from config file and make args
with open(config_path) as f:
    args = AttrDict(json.load(f))
logger.info("Training/evaluation parameters {}".format(args))

args.output_dir = os.path.join(args.ckpt_dir, args.output_dir)

init_logger()
set_seed(args)

02/06/2024 02:44:18 - INFO - __main__ -   Training/evaluation parameters AttrDict({'task': 'naver-ner', 'data_dir': '../../../../data/KoELECTRA', 'ckpt_dir': 'ckpt', 'train_file': 'train.tsv', 'dev_file': '', 'test_file': 'test.tsv', 'evaluate_test_during_training': True, 'eval_all_checkpoints': True, 'save_optimizer': False, 'do_lower_case': False, 'do_train': True, 'do_eval': True, 'max_seq_len': 128, 'num_train_epochs': 20, 'weight_decay': 0.0, 'gradient_accumulation_steps': 1, 'adam_epsilon': 1e-08, 'warmup_proportion': 0, 'max_steps': -1, 'max_grad_norm': 1.0, 'no_cuda': False, 'model_type': 'koelectra-small-v3', 'model_name_or_path': 'monologg/koelectra-small-v3-discriminator', 'output_dir': 'koelectra-small-v3-naver-ner-ckpt', 'seed': 42, 'train_batch_size': 32, 'eval_batch_size': 128, 'logging_steps': 1000, 'save_steps': 1000, 'learning_rate': 5e-05})


In [18]:
cache_dir = "../../../../models/huggingface"
processor = processors[args.task](args)
labels = processor.get_labels()
config = CONFIG_CLASSES[args.model_type].from_pretrained(
    args.model_name_or_path,
    num_labels=tasks_num_labels[args.task],
    id2label={str(i): label for i, label in enumerate(labels)},
    label2id={label: i for i, label in enumerate(labels)},
    cache_dir=cache_dir
)
tokenizer = TOKENIZER_CLASSES[args.model_type].from_pretrained(
    args.model_name_or_path,
    do_lower_case=args.do_lower_case,
    cache_dir=cache_dir
)
model = MODEL_FOR_TOKEN_CLASSIFICATION[args.model_type].from_pretrained(
    args.model_name_or_path,
    config=config,
    cache_dir=cache_dir
)

Some weights of ElectraForTokenClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
# GPU or CPU
args.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
model.to(args.device)
print(args.device)

cuda


In [20]:
# Load dataset
train_dataset = load_and_cache_examples(args, tokenizer, mode="train") if args.train_file else None
dev_dataset = load_and_cache_examples(args, tokenizer, mode="dev") if args.dev_file else None
test_dataset = load_and_cache_examples(args, tokenizer, mode="test") if args.test_file else None


02/06/2024 02:44:31 - INFO - processor.ner -   Creating features from dataset file at ../../../../data/KoELECTRA
02/06/2024 02:44:31 - INFO - processor.ner -   LOOKING AT ../../../../data/KoELECTRA/naver-ner/train.tsv
02/06/2024 02:44:31 - INFO - processor.ner -   금석객잔 여러분, 감사드립니다 .	ORG-B O O O
02/06/2024 02:44:31 - INFO - processor.ner -   -얼마 전에는 강남에 강천수 씨 소유의 건축물이 25억이 넘는다고 해서 말거리가 됐는데요 .	O O LOC-B PER-B O O O NUM-B O O O O O
02/06/2024 02:44:31 - INFO - processor.ner -   ” 한옥순 사장은 기념물 용적량 축소의 의미를 이렇게 표현했다 .	O PER-B CVL-B TRM-B O O O O O O
02/06/2024 02:44:31 - INFO - processor.ner -   )-경기 서반 부진했던 근거는 무엇이라 생각하는지 ?	O O O O O O O
02/06/2024 02:44:31 - INFO - processor.ner -   미다스 구귀족의 무비 아는 남자에서 불가사의한 첫 출연.	PER-B CVL-B O O O O NUM-B O
02/06/2024 02:44:31 - INFO - processor.ner -   USGA는 최하의 선수권자를 가리기 위해 엄준한 코스세팅을 한다 .	ORG-B O CVL-B O O O O O O
02/06/2024 02:44:31 - INFO - processor.ner -   남자유도 -60kg에 출정한 마터스(한국마사회)는 2회전부터 결승전까지 1 불연속 한판승을 거두는 괴력을 분휘하며 신세대를 목에 걸었다 .	CVL-B NUM-B O PER

In [21]:
if dev_dataset == None:
    args.evaluate_test_during_training = True  # If there is no dev dataset, only use testset

if args.do_train:
    global_step, tr_loss = train(args, model, train_dataset, dev_dataset, test_dataset)
    logger.info(" global_step = {}, average loss = {}".format(global_step, tr_loss))


02/06/2024 02:45:48 - INFO - run_ner -   ***** Running training *****
02/06/2024 02:45:48 - INFO - run_ner -     Num examples = 81000
02/06/2024 02:45:48 - INFO - run_ner -     Num Epochs = 20
02/06/2024 02:45:48 - INFO - run_ner -     Total train batch size = 32
02/06/2024 02:45:48 - INFO - run_ner -     Gradient Accumulation steps = 1
02/06/2024 02:45:48 - INFO - run_ner -     Total optimization steps = 50640
02/06/2024 02:45:48 - INFO - run_ner -     Logging steps = 1000
02/06/2024 02:45:48 - INFO - run_ner -     Save steps = 1000


Epoch 1/20 : |███████████████-------------------------| 39.45% [999/2532 00:27<00:42]

02/06/2024 02:46:16 - INFO - run_ner -   ***** Running evaluation on test dataset (1000 step) *****
02/06/2024 02:46:16 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:46:16 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:46:19 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:46:19 - INFO - run_ner -     f1 = 0.714103631697908
02/06/2024 02:46:19 - INFO - run_ner -     loss = 0.43820058459966954
02/06/2024 02:46:19 - INFO - run_ner -     precision = 0.709917215745903
02/06/2024 02:46:19 - INFO - run_ner -     recall = 0.718339715536105
  _warn_prf(average, modifier, msg_start, len(result))
02/06/2024 02:46:19 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.00      0.00      0.00       394
         ANM       0.78      0.31      0.44       701
         CVL       0.71      0.69      0.70      5758
         DAT       0.74      0.85      0.79      2521
         EVT       0.50      0.61      0.55      1094
         FLD       0.00      0.00      0.00       228
         LOC       0.62      0.74      0.67      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.83      0.89      0.86      5590
 

Epoch 1/20 : |███████████████████████████████---------| 78.83% [1996/2532 00:57<00:15]

02/06/2024 02:46:45 - INFO - run_ner -   ***** Running evaluation on test dataset (2000 step) *****
02/06/2024 02:46:45 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:46:45 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:46:48 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:46:48 - INFO - run_ner -     f1 = 0.7626676665587238
02/06/2024 02:46:48 - INFO - run_ner -     loss = 0.3313604160933427
02/06/2024 02:46:48 - INFO - run_ner -     precision = 0.760373831775701
02/06/2024 02:46:48 - INFO - run_ner -     recall = 0.7649753829321663
02/06/2024 02:46:49 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.40      0.01      0.02       394
         ANM       0.71      0.46      0.56       701
         CVL       0.75      0.72      0.73      5758
         DAT       0.85      0.90      0.87      2521
         EVT       0.67      0.68      0.67      1094
         FLD       0.00      0.00      0.00       228
         LOC       0.73      0.74      0.73      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.88      0.91      0.89      5590
         ORG       0.74      0.78      0.76      4086
 

Epoch 1 done                                                                           
Epoch 2/20 : |███████---------------------------------| 18.29% [463/2532 00:11<00:52]

02/06/2024 02:47:15 - INFO - run_ner -   ***** Running evaluation on test dataset (3000 step) *****
02/06/2024 02:47:15 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:47:15 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:47:18 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:47:18 - INFO - run_ner -     f1 = 0.7862342147137489
02/06/2024 02:47:18 - INFO - run_ner -     loss = 0.2945446730919287
02/06/2024 02:47:18 - INFO - run_ner -     precision = 0.7786159726413197
02/06/2024 02:47:18 - INFO - run_ner -     recall = 0.7940030087527352
02/06/2024 02:47:19 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.35      0.30      0.32       394
         ANM       0.64      0.65      0.65       701
         CVL       0.75      0.77      0.76      5758
         DAT       0.85      0.91      0.88      2521
         EVT       0.67      0.74      0.70      1094
         FLD       0.85      0.18      0.29       228
         LOC       0.70      0.80      0.75      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.90      0.91      0.91      5590
         ORG       0.78      0.79      0.79      4086


Epoch 2/20 : |███████████████████████-----------------| 57.94% [1467/2532 00:41<00:30]

02/06/2024 02:47:45 - INFO - run_ner -   ***** Running evaluation on test dataset (4000 step) *****
02/06/2024 02:47:45 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:47:45 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:47:48 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:47:48 - INFO - run_ner -     f1 = 0.8031878270448912
02/06/2024 02:47:48 - INFO - run_ner -     loss = 0.2651865671218281
02/06/2024 02:47:48 - INFO - run_ner -     precision = 0.8087562396006656
02/06/2024 02:47:48 - INFO - run_ner -     recall = 0.7976955689277899
02/06/2024 02:47:49 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.48      0.32      0.38       394
         ANM       0.73      0.62      0.67       701
         CVL       0.77      0.77      0.77      5758
         DAT       0.90      0.90      0.90      2521
         EVT       0.73      0.73      0.73      1094
         FLD       0.81      0.30      0.44       228
         LOC       0.78      0.77      0.78      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.89      0.92      0.90      5590
         ORG       0.79      0.81      0.80      4086


Epoch 2/20 : |██████████████████████████████████████--| 97.31% [2464/2532 01:14<00:02]

02/06/2024 02:48:17 - INFO - run_ner -   ***** Running evaluation on test dataset (5000 step) *****
02/06/2024 02:48:17 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:48:17 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:48:21 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:48:21 - INFO - run_ner -     f1 = 0.8128922527425801
02/06/2024 02:48:21 - INFO - run_ner -     loss = 0.2508525772833488
02/06/2024 02:48:21 - INFO - run_ner -     precision = 0.8087792330862693
02/06/2024 02:48:21 - INFO - run_ner -     recall = 0.8170473194748359
02/06/2024 02:48:21 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.49      0.40      0.44       394
         ANM       0.73      0.68      0.70       701
         CVL       0.76      0.80      0.78      5758
         DAT       0.90      0.91      0.90      2521
         EVT       0.73      0.76      0.74      1094
         FLD       0.59      0.51      0.55       228
         LOC       0.82      0.76      0.79      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.91      0.92      0.91      5590
         ORG       0.81      0.82      0.82      4086


Epoch 2 done                                                                           
Epoch 3/20 : |██████████████--------------------------| 36.89% [934/2532 00:22<00:38]

02/06/2024 02:48:47 - INFO - run_ner -   ***** Running evaluation on test dataset (6000 step) *****
02/06/2024 02:48:47 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:48:47 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:48:50 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:48:50 - INFO - run_ner -     f1 = 0.8127008830395507
02/06/2024 02:48:50 - INFO - run_ner -     loss = 0.2480069982333922
02/06/2024 02:48:50 - INFO - run_ner -     precision = 0.8042925065291636
02/06/2024 02:48:50 - INFO - run_ner -     recall = 0.8212869256017505
02/06/2024 02:48:50 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.38      0.51      0.44       394
         ANM       0.66      0.75      0.70       701
         CVL       0.79      0.79      0.79      5758
         DAT       0.89      0.91      0.90      2521
         EVT       0.66      0.77      0.71      1094
         FLD       0.57      0.58      0.57       228
         LOC       0.79      0.79      0.79      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.90      0.92      0.91      5590
         ORG       0.82      0.81      0.81      4086


Epoch 3/20 : |██████████████████████████████----------| 76.22% [1930/2532 00:51<00:16]

02/06/2024 02:49:16 - INFO - run_ner -   ***** Running evaluation on test dataset (7000 step) *****
02/06/2024 02:49:16 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:49:16 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:49:19 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:49:19 - INFO - run_ner -     f1 = 0.8204787460601414
02/06/2024 02:49:19 - INFO - run_ner -     loss = 0.23534270743249167
02/06/2024 02:49:19 - INFO - run_ner -     precision = 0.8177063877474785
02/06/2024 02:49:19 - INFO - run_ner -     recall = 0.8232699671772429
02/06/2024 02:49:19 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.51      0.49      0.50       394
         ANM       0.63      0.76      0.69       701
         CVL       0.82      0.77      0.79      5758
         DAT       0.91      0.91      0.91      2521
         EVT       0.76      0.75      0.75      1094
         FLD       0.58      0.59      0.58       228
         LOC       0.78      0.82      0.80      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.89      0.93      0.91      5590
         ORG       0.84      0.83      0.83      4086

Epoch 3 done                                                                           
Epoch 4/20 : |██████----------------------------------| 15.92% [403/2532 00:09<00:52]

02/06/2024 02:49:45 - INFO - run_ner -   ***** Running evaluation on test dataset (8000 step) *****
02/06/2024 02:49:45 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:49:45 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:49:48 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:49:48 - INFO - run_ner -     f1 = 0.8282962357906201
02/06/2024 02:49:48 - INFO - run_ner -     loss = 0.23038874104828902
02/06/2024 02:49:48 - INFO - run_ner -     precision = 0.8245358449654425
02/06/2024 02:49:48 - INFO - run_ner -     recall = 0.8320910831509847
02/06/2024 02:49:49 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.51      0.51      0.51       394
         ANM       0.69      0.74      0.72       701
         CVL       0.80      0.81      0.80      5758
         DAT       0.90      0.92      0.91      2521
         EVT       0.73      0.75      0.74      1094
         FLD       0.56      0.61      0.58       228
         LOC       0.81      0.82      0.81      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.91      0.92      0.92      5590
         ORG       0.85      0.82      0.84      4086

Epoch 4/20 : |██████████████████████------------------| 55.37% [1402/2532 00:38<00:31]

02/06/2024 02:50:14 - INFO - run_ner -   ***** Running evaluation on test dataset (9000 step) *****
02/06/2024 02:50:14 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:50:14 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:50:17 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:50:17 - INFO - run_ner -     f1 = 0.8302228602679792
02/06/2024 02:50:17 - INFO - run_ner -     loss = 0.2251335388757813
02/06/2024 02:50:17 - INFO - run_ner -     precision = 0.8299958993985784
02/06/2024 02:50:17 - INFO - run_ner -     recall = 0.8304499452954048
02/06/2024 02:50:18 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.53      0.49      0.51       394
         ANM       0.77      0.69      0.73       701
         CVL       0.79      0.81      0.80      5758
         DAT       0.89      0.93      0.91      2521
         EVT       0.73      0.77      0.75      1094
         FLD       0.66      0.54      0.60       228
         LOC       0.80      0.84      0.82      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.91      0.92      0.91      5590
         ORG       0.84      0.84      0.84      4086


Epoch 4/20 : |█████████████████████████████████████---| 94.87% [2402/2532 01:07<00:03]

02/06/2024 02:50:43 - INFO - run_ner -   ***** Running evaluation on test dataset (10000 step) *****
02/06/2024 02:50:43 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:50:43 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:50:46 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:50:46 - INFO - run_ner -     f1 = 0.8323132587157205
02/06/2024 02:50:46 - INFO - run_ner -     loss = 0.22266232274787526
02/06/2024 02:50:46 - INFO - run_ner -     precision = 0.8232657702856379
02/06/2024 02:50:46 - INFO - run_ner -     recall = 0.8415618161925602
02/06/2024 02:50:47 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.57      0.52      0.54       394
         ANM       0.74      0.72      0.73       701
         CVL       0.77      0.83      0.80      5758
         DAT       0.91      0.93      0.92      2521
         EVT       0.76      0.76      0.76      1094
         FLD       0.60      0.62      0.61       228
         LOC       0.85      0.80      0.82      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.91      0.92      0.92      5590
         ORG       0.85      0.84      0.84      4086

Epoch 4 done                                                                           
Epoch 5/20 : |█████████████---------------------------| 34.24% [867/2532 00:21<00:40]

02/06/2024 02:51:12 - INFO - run_ner -   ***** Running evaluation on test dataset (11000 step) *****
02/06/2024 02:51:12 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:51:12 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:51:15 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:51:15 - INFO - run_ner -     f1 = 0.8367087103400275
02/06/2024 02:51:15 - INFO - run_ner -     loss = 0.22068260736028913
02/06/2024 02:51:15 - INFO - run_ner -     precision = 0.830911359115277
02/06/2024 02:51:15 - INFO - run_ner -     recall = 0.8425875273522976
02/06/2024 02:51:16 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.54      0.53      0.54       394
         ANM       0.72      0.74      0.73       701
         CVL       0.80      0.82      0.81      5758
         DAT       0.90      0.93      0.92      2521
         EVT       0.74      0.76      0.75      1094
         FLD       0.69      0.56      0.62       228
         LOC       0.85      0.81      0.83      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.92      0.93      0.92      5590
         ORG       0.85      0.84      0.84      4086


Epoch 5/20 : |█████████████████████████████-----------| 73.78% [1868/2532 00:50<00:17]

02/06/2024 02:51:42 - INFO - run_ner -   ***** Running evaluation on test dataset (12000 step) *****
02/06/2024 02:51:42 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:51:42 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:51:45 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:51:45 - INFO - run_ner -     f1 = 0.8379452008284385
02/06/2024 02:51:45 - INFO - run_ner -     loss = 0.2161395060764232
02/06/2024 02:51:45 - INFO - run_ner -     precision = 0.8321532132982669
02/06/2024 02:51:45 - INFO - run_ner -     recall = 0.8438183807439825
02/06/2024 02:51:46 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.60      0.55      0.57       394
         ANM       0.77      0.72      0.74       701
         CVL       0.78      0.83      0.81      5758
         DAT       0.90      0.92      0.91      2521
         EVT       0.72      0.77      0.75      1094
         FLD       0.64      0.58      0.61       228
         LOC       0.85      0.81      0.83      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.92      0.92      0.92      5590
         ORG       0.85      0.84      0.84      4086


Epoch 5 done                                                                           
Epoch 6/20 : |█████-----------------------------------| 13.39% [339/2532 00:08<00:54]

02/06/2024 02:52:11 - INFO - run_ner -   ***** Running evaluation on test dataset (13000 step) *****
02/06/2024 02:52:11 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:52:11 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:52:14 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:52:14 - INFO - run_ner -     f1 = 0.839269313195224
02/06/2024 02:52:14 - INFO - run_ner -     loss = 0.2182343262182155
02/06/2024 02:52:14 - INFO - run_ner -     precision = 0.8350368916266161
02/06/2024 02:52:14 - INFO - run_ner -     recall = 0.8435448577680525
02/06/2024 02:52:15 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.55      0.53      0.54       394
         ANM       0.75      0.74      0.74       701
         CVL       0.82      0.81      0.81      5758
         DAT       0.90      0.93      0.91      2521
         EVT       0.73      0.78      0.75      1094
         FLD       0.62      0.66      0.64       228
         LOC       0.80      0.85      0.82      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.92      0.93      0.92      5590
         ORG       0.86      0.84      0.85      4086
 

Epoch 6/20 : |█████████████████████-------------------| 52.73% [1335/2532 00:37<00:33]

02/06/2024 02:52:40 - INFO - run_ner -   ***** Running evaluation on test dataset (14000 step) *****
02/06/2024 02:52:40 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:52:40 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:52:43 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:52:43 - INFO - run_ner -     f1 = 0.8432201232241967
02/06/2024 02:52:43 - INFO - run_ner -     loss = 0.21553516450902105
02/06/2024 02:52:43 - INFO - run_ner -     precision = 0.8372375206444437
02/06/2024 02:52:43 - INFO - run_ner -     recall = 0.8492888402625821
02/06/2024 02:52:44 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.57      0.54      0.55       394
         ANM       0.73      0.77      0.75       701
         CVL       0.81      0.83      0.82      5758
         DAT       0.90      0.92      0.91      2521
         EVT       0.74      0.78      0.76      1094
         FLD       0.65      0.61      0.63       228
         LOC       0.83      0.84      0.83      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.91      0.93      0.92      5590
         ORG       0.87      0.84      0.85      4086

Epoch 6/20 : |████████████████████████████████████----| 92.18% [2334/2532 01:06<00:05]

02/06/2024 02:53:09 - INFO - run_ner -   ***** Running evaluation on test dataset (15000 step) *****
02/06/2024 02:53:09 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:53:09 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:53:12 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:53:12 - INFO - run_ner -     f1 = 0.8426273912307432
02/06/2024 02:53:12 - INFO - run_ner -     loss = 0.21483306943530767
02/06/2024 02:53:12 - INFO - run_ner -     precision = 0.8345181409697539
02/06/2024 02:53:12 - INFO - run_ner -     recall = 0.8508957877461707
02/06/2024 02:53:13 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.58      0.57      0.57       394
         ANM       0.73      0.76      0.75       701
         CVL       0.79      0.84      0.81      5758
         DAT       0.91      0.93      0.92      2521
         EVT       0.74      0.77      0.75      1094
         FLD       0.63      0.61      0.62       228
         LOC       0.85      0.82      0.84      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.92      0.93      0.92      5590
         ORG       0.84      0.85      0.85      4086

Epoch 6 done                                                                           
Epoch 7/20 : |████████████----------------------------| 31.71% [803/2532 00:19<00:42]

02/06/2024 02:53:38 - INFO - run_ner -   ***** Running evaluation on test dataset (16000 step) *****
02/06/2024 02:53:38 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:53:38 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:53:41 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:53:41 - INFO - run_ner -     f1 = 0.8442342250703557
02/06/2024 02:53:41 - INFO - run_ner -     loss = 0.21693137896732545
02/06/2024 02:53:41 - INFO - run_ner -     precision = 0.8372789024144193
02/06/2024 02:53:41 - INFO - run_ner -     recall = 0.8513060722100656
02/06/2024 02:53:42 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.60      0.58      0.59       394
         ANM       0.68      0.79      0.73       701
         CVL       0.82      0.82      0.82      5758
         DAT       0.90      0.93      0.92      2521
         EVT       0.75      0.78      0.76      1094
         FLD       0.66      0.62      0.64       228
         LOC       0.82      0.85      0.83      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.91      0.93      0.92      5590
         ORG       0.88      0.83      0.85      4086

Epoch 7/20 : |████████████████████████████------------| 71.25% [1804/2532 00:49<00:19]

02/06/2024 02:54:07 - INFO - run_ner -   ***** Running evaluation on test dataset (17000 step) *****
02/06/2024 02:54:07 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:54:07 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:54:10 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:54:10 - INFO - run_ner -     f1 = 0.845857983062833
02/06/2024 02:54:10 - INFO - run_ner -     loss = 0.21323744637865416
02/06/2024 02:54:10 - INFO - run_ner -     precision = 0.8534683791027113
02/06/2024 02:54:10 - INFO - run_ner -     recall = 0.8383821115973742
02/06/2024 02:54:11 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.62      0.53      0.57       394
         ANM       0.75      0.75      0.75       701
         CVL       0.84      0.80      0.82      5758
         DAT       0.91      0.92      0.92      2521
         EVT       0.76      0.76      0.76      1094
         FLD       0.72      0.57      0.64       228
         LOC       0.83      0.84      0.83      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.91      0.93      0.92      5590
         ORG       0.86      0.85      0.86      4086


Epoch 7 done                                                                           
Epoch 8/20 : |████------------------------------------| 10.82% [274/2532 00:06<00:55]

02/06/2024 02:54:36 - INFO - run_ner -   ***** Running evaluation on test dataset (18000 step) *****
02/06/2024 02:54:36 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:54:36 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:54:39 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:54:39 - INFO - run_ner -     f1 = 0.8479229503701672
02/06/2024 02:54:39 - INFO - run_ner -     loss = 0.21461330228288408
02/06/2024 02:54:39 - INFO - run_ner -     precision = 0.8500206157229246
02/06/2024 02:54:39 - INFO - run_ner -     recall = 0.8458356126914661
02/06/2024 02:54:40 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.58      0.56      0.57       394
         ANM       0.76      0.77      0.76       701
         CVL       0.84      0.81      0.82      5758
         DAT       0.92      0.93      0.92      2521
         EVT       0.75      0.77      0.76      1094
         FLD       0.69      0.58      0.63       228
         LOC       0.84      0.84      0.84      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.92      0.93      0.92      5590
         ORG       0.87      0.85      0.86      4086

Epoch 8/20 : |████████████████████--------------------| 50.16% [1270/2532 00:35<00:35]

02/06/2024 02:55:06 - INFO - run_ner -   ***** Running evaluation on test dataset (19000 step) *****
02/06/2024 02:55:06 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:55:06 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:55:09 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:55:09 - INFO - run_ner -     f1 = 0.8474064012225146
02/06/2024 02:55:09 - INFO - run_ner -     loss = 0.21540392305649503
02/06/2024 02:55:09 - INFO - run_ner -     precision = 0.8417040510001011
02/06/2024 02:55:09 - INFO - run_ner -     recall = 0.8531865426695843
02/06/2024 02:55:09 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.57      0.58      0.58       394
         ANM       0.76      0.75      0.76       701
         CVL       0.82      0.83      0.82      5758
         DAT       0.91      0.93      0.92      2521
         EVT       0.74      0.77      0.76      1094
         FLD       0.62      0.64      0.63       228
         LOC       0.83      0.84      0.84      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.92      0.93      0.92      5590
         ORG       0.86      0.85      0.86      4086

Epoch 8/20 : |███████████████████████████████████-----| 89.81% [2274/2532 01:04<00:07]

02/06/2024 02:55:34 - INFO - run_ner -   ***** Running evaluation on test dataset (20000 step) *****
02/06/2024 02:55:34 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:55:34 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:55:38 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:55:38 - INFO - run_ner -     f1 = 0.8474770642201834
02/06/2024 02:55:38 - INFO - run_ner -     loss = 0.21246362949760866
02/06/2024 02:55:38 - INFO - run_ner -     precision = 0.8361954206602769
02/06/2024 02:55:38 - INFO - run_ner -     recall = 0.8590672866520788
02/06/2024 02:55:38 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.55      0.59      0.57       394
         ANM       0.73      0.77      0.75       701
         CVL       0.81      0.84      0.82      5758
         DAT       0.91      0.93      0.92      2521
         EVT       0.74      0.77      0.75      1094
         FLD       0.55      0.69      0.61       228
         LOC       0.84      0.84      0.84      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.91      0.93      0.92      5590
         ORG       0.88      0.85      0.86      4086

Epoch 8 done                                                                           
Epoch 9/20 : |███████████-----------------------------| 29.34% [743/2532 00:18<00:43]

02/06/2024 02:56:04 - INFO - run_ner -   ***** Running evaluation on test dataset (21000 step) *****
02/06/2024 02:56:04 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:56:04 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:56:07 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:56:07 - INFO - run_ner -     f1 = 0.8494069195554874
02/06/2024 02:56:07 - INFO - run_ner -     loss = 0.21519028930596903
02/06/2024 02:56:07 - INFO - run_ner -     precision = 0.8455934672856029
02/06/2024 02:56:07 - INFO - run_ner -     recall = 0.8532549234135668
02/06/2024 02:56:07 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.58      0.58      0.58       394
         ANM       0.70      0.79      0.74       701
         CVL       0.82      0.83      0.83      5758
         DAT       0.91      0.93      0.92      2521
         EVT       0.75      0.76      0.76      1094
         FLD       0.65      0.64      0.64       228
         LOC       0.83      0.85      0.84      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.92      0.93      0.92      5590
         ORG       0.87      0.85      0.86      4086

Epoch 9/20 : |███████████████████████████-------------| 68.60% [1737/2532 00:46<00:21]

02/06/2024 02:56:32 - INFO - run_ner -   ***** Running evaluation on test dataset (22000 step) *****
02/06/2024 02:56:32 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:56:32 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:56:35 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:56:35 - INFO - run_ner -     f1 = 0.8530296592164527
02/06/2024 02:56:35 - INFO - run_ner -     loss = 0.21502089458452145
02/06/2024 02:56:35 - INFO - run_ner -     precision = 0.8481427654037246
02/06/2024 02:56:35 - INFO - run_ner -     recall = 0.8579731947483589
02/06/2024 02:56:36 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.62      0.58      0.60       394
         ANM       0.79      0.76      0.77       701
         CVL       0.82      0.84      0.83      5758
         DAT       0.91      0.94      0.92      2521
         EVT       0.75      0.78      0.76      1094
         FLD       0.63      0.63      0.63       228
         LOC       0.84      0.85      0.85      2126
         MAT       0.00      0.00      0.00        12
         NUM       0.92      0.93      0.92      5590
         ORG       0.86      0.86      0.86      4086

Epoch 9 done                                                                           
Epoch 10/20 : |███-------------------------------------| 8.18% [207/2532 00:05<00:58]

02/06/2024 02:57:02 - INFO - run_ner -   ***** Running evaluation on test dataset (23000 step) *****
02/06/2024 02:57:02 - INFO - run_ner -     Num examples = 9000
02/06/2024 02:57:02 - INFO - run_ner -     Eval Batch size = 128


 |████████████████████████████████████████| 100.00% [71/71 00:02<00:00]

02/06/2024 02:57:05 - INFO - run_ner -   ***** Eval results on test dataset *****
02/06/2024 02:57:05 - INFO - run_ner -     f1 = 0.8518493429074652
02/06/2024 02:57:05 - INFO - run_ner -     loss = 0.2183495195398868
02/06/2024 02:57:05 - INFO - run_ner -     precision = 0.8439597315436241
02/06/2024 02:57:05 - INFO - run_ner -     recall = 0.8598878555798687
02/06/2024 02:57:05 - INFO - run_ner -   
              precision    recall  f1-score   support

         AFW       0.59      0.58      0.58       394
         ANM       0.74      0.78      0.76       701
         CVL       0.83      0.84      0.83      5758
         DAT       0.91      0.93      0.92      2521
         EVT       0.76      0.78      0.77      1094
         FLD       0.64      0.65      0.64       228
         LOC       0.84      0.85      0.84      2126
         MAT       1.00      0.08      0.15        12
         NUM       0.91      0.93      0.92      5590
         ORG       0.87      0.85      0.86      4086


Epoch 10/20 : |███████---------------------------------| 19.31% [489/2532 00:16<01:10]

In [None]:

results = {}
if args.do_eval:
    checkpoints = list(os.path.dirname(c) for c in
                        sorted(glob.glob(args.output_dir + "/**/" + "pytorch_model.bin", recursive=True),
                                key=lambda path_with_step: list(map(int, re.findall(r"\d+", path_with_step)))[-1]))
    if not args.eval_all_checkpoints:
        checkpoints = checkpoints[-1:]
    else:
        logging.getLogger("transformers.configuration_utils").setLevel(logging.WARN)  # Reduce logging
        logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN)  # Reduce logging
    logger.info("Evaluate the following checkpoints: %s", checkpoints)
    for checkpoint in checkpoints:
        global_step = checkpoint.split("-")[-1]
        model = MODEL_FOR_TOKEN_CLASSIFICATION[args.model_type].from_pretrained(checkpoint)
        model.to(args.device)
        result = evaluate(args, model, test_dataset, mode="test", global_step=global_step)
        result = dict((k + "_{}".format(global_step), v) for k, v in result.items())
        results.update(result)

    output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
    with open(output_eval_file, "w") as f_w:
        if len(checkpoints) > 1:
            for key in sorted(results.keys(), key=lambda key_with_step: (
                    "".join(re.findall(r'[^_]+_', key_with_step)),
                    int(re.findall(r"_\d+", key_with_step)[-1][1:])
            )):
                f_w.write("{} = {}\n".format(key, str(results[key])))
        else:
            for key in sorted(results.keys()):
                f_w.write("{} = {}\n".format(key, str(results[key])))
