In [1]:
from flair.data import Corpus
from flair.datasets import ColumnCorpus
columns = {0: 'text', 1: 'ner'}
corpus: Corpus = ColumnCorpus(data_folder = 'data_flair/',
                              column_format = columns,
                              encoding="ISO-8859-1",
                              train_file='data_prep_train.txt',
                              test_file='data_prep_test.txt',
                              dev_file='data_prep_val.txt')

2022-09-25 18:33:24,525 Reading data from data_flair
2022-09-25 18:33:24,525 Train: data_flair\data_prep_train.txt
2022-09-25 18:33:24,525 Dev: data_flair\data_prep_val.txt
2022-09-25 18:33:24,540 Test: data_flair\data_prep_test.txt


In [2]:
import pandas as pd
data = [[len(corpus.train), len(corpus.test), len(corpus.dev)]]
# Prints out the dataset sizes of train test and development in a table.
pd.DataFrame(data, columns=["Train", "Test", "Development"])

Unnamed: 0,Train,Test,Development
0,1850,159,634


In [4]:
import flair
from typing import List
from flair.trainers import ModelTrainer
from flair.models import SequenceTagger
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, FlairEmbeddings

tag_type = 'ner'
tag_dictionary = corpus.make_label_dictionary('ner')

# For faster training and smaller models, we can comment out the flair embeddings.
# This will significantly affect the performance though.
embedding_types: List[TokenEmbeddings] = [
    WordEmbeddings('glove'),
    FlairEmbeddings('news-forward'),
    FlairEmbeddings('news-backward'),
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)

trainer: ModelTrainer = ModelTrainer(tagger, corpus)

trainer.train('/content/model/conllpp',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=50,
              embeddings_storage_mode='gpu')

2022-09-25 18:42:08,154 Computing label dictionary. Progress:


1850it [00:00, 39789.26it/s]

2022-09-25 18:42:08,213 Dictionary created for label 'ner' with 5 values: GeoMeth (seen 2471 times), GeoPetro (seen 1718 times), GeoLoc (seen 344 times), GeoTime (seen 114 times)





2022-09-25 18:42:12,525 SequenceTagger predicts: Dictionary with 17 tags: O, S-GeoMeth, B-GeoMeth, E-GeoMeth, I-GeoMeth, S-GeoPetro, B-GeoPetro, E-GeoPetro, I-GeoPetro, S-GeoLoc, B-GeoLoc, E-GeoLoc, I-GeoLoc, S-GeoTime, B-GeoTime, E-GeoTime, I-GeoTime
2022-09-25 18:42:12,650 ----------------------------------------------------------------------------------------------------
2022-09-25 18:42:12,650 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings(
      'glove'
      (embedding): Embedding(400001, 100)
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.05, inplace=False)
        (encoder): Embedding(300, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=300, bias=True)
      )
    )
    (list_embedding_2): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.05, inplace=False)
        (encoder): Embedding(300, 100)
        (rnn

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [02:14<00:00,  6.74s/it]

2022-09-25 18:50:39,952 Evaluating as a multi-label problem: False





2022-09-25 18:50:40,108 DEV : loss 0.242720365524292 - f1-score (micro avg)  0.6548
2022-09-25 18:50:40,155 BAD EPOCHS (no improvement): 0
2022-09-25 18:50:40,155 saving best model
2022-09-25 18:50:44,208 ----------------------------------------------------------------------------------------------------
2022-09-25 18:50:49,130 epoch 2 - iter 5/58 - loss 0.32106258 - samples/sec: 32.71 - lr: 0.100000
2022-09-25 18:50:54,707 epoch 2 - iter 10/58 - loss 0.30120795 - samples/sec: 28.77 - lr: 0.100000
2022-09-25 18:50:59,581 epoch 2 - iter 15/58 - loss 0.28416559 - samples/sec: 32.93 - lr: 0.100000
2022-09-25 18:51:04,486 epoch 2 - iter 20/58 - loss 0.28810048 - samples/sec: 32.83 - lr: 0.100000
2022-09-25 18:51:09,461 epoch 2 - iter 25/58 - loss 0.28540354 - samples/sec: 32.26 - lr: 0.100000
2022-09-25 18:51:14,351 epoch 2 - iter 30/58 - loss 0.28057418 - samples/sec: 32.72 - lr: 0.100000
2022-09-25 18:51:19,459 epoch 2 - iter 35/58 - loss 0.27943263 - samples/sec: 31.32 - lr: 0.100000
20

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:09<00:00,  2.19it/s]

2022-09-25 18:51:51,233 Evaluating as a multi-label problem: False
2022-09-25 18:51:51,248 DEV : loss 0.17691299319267273 - f1-score (micro avg)  0.7717
2022-09-25 18:51:51,248 BAD EPOCHS (no improvement): 0
2022-09-25 18:51:51,264 saving best model





2022-09-25 18:51:52,498 ----------------------------------------------------------------------------------------------------
2022-09-25 18:51:57,146 epoch 3 - iter 5/58 - loss 0.23778671 - samples/sec: 34.42 - lr: 0.100000
2022-09-25 18:52:01,598 epoch 3 - iter 10/58 - loss 0.23243383 - samples/sec: 35.94 - lr: 0.100000
2022-09-25 18:52:05,441 epoch 3 - iter 15/58 - loss 0.22930930 - samples/sec: 41.64 - lr: 0.100000
2022-09-25 18:52:09,706 epoch 3 - iter 20/58 - loss 0.23186050 - samples/sec: 37.52 - lr: 0.100000
2022-09-25 18:52:13,877 epoch 3 - iter 25/58 - loss 0.23277628 - samples/sec: 38.36 - lr: 0.100000
2022-09-25 18:52:17,797 epoch 3 - iter 30/58 - loss 0.23433477 - samples/sec: 40.81 - lr: 0.100000
2022-09-25 18:52:21,922 epoch 3 - iter 35/58 - loss 0.23011421 - samples/sec: 38.80 - lr: 0.100000
2022-09-25 18:52:26,605 epoch 3 - iter 40/58 - loss 0.22677752 - samples/sec: 34.16 - lr: 0.100000
2022-09-25 18:52:30,698 epoch 3 - iter 45/58 - loss 0.22345056 - samples/sec: 39.09 

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.44it/s]

2022-09-25 18:52:46,991 Evaluating as a multi-label problem: False
2022-09-25 18:52:46,991 DEV : loss 0.1405283510684967 - f1-score (micro avg)  0.7634
2022-09-25 18:52:47,006 BAD EPOCHS (no improvement): 1
2022-09-25 18:52:47,006 ----------------------------------------------------------------------------------------------------





2022-09-25 18:52:50,990 epoch 4 - iter 5/58 - loss 0.20454011 - samples/sec: 40.32 - lr: 0.100000
2022-09-25 18:52:54,973 epoch 4 - iter 10/58 - loss 0.19449990 - samples/sec: 40.17 - lr: 0.100000
2022-09-25 18:52:59,316 epoch 4 - iter 15/58 - loss 0.18935235 - samples/sec: 36.84 - lr: 0.100000
2022-09-25 18:53:03,721 epoch 4 - iter 20/58 - loss 0.18738724 - samples/sec: 36.32 - lr: 0.100000
2022-09-25 18:53:08,017 epoch 4 - iter 25/58 - loss 0.18597302 - samples/sec: 37.25 - lr: 0.100000
2022-09-25 18:53:12,375 epoch 4 - iter 30/58 - loss 0.18796562 - samples/sec: 36.71 - lr: 0.100000
2022-09-25 18:53:16,359 epoch 4 - iter 35/58 - loss 0.18695196 - samples/sec: 40.32 - lr: 0.100000
2022-09-25 18:53:19,983 epoch 4 - iter 40/58 - loss 0.18574116 - samples/sec: 44.15 - lr: 0.100000
2022-09-25 18:53:24,013 epoch 4 - iter 45/58 - loss 0.18659629 - samples/sec: 39.85 - lr: 0.100000
2022-09-25 18:53:28,262 epoch 4 - iter 50/58 - loss 0.18838693 - samples/sec: 37.66 - lr: 0.100000
2022-09-25 

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.45it/s]

2022-09-25 18:53:41,400 Evaluating as a multi-label problem: False
2022-09-25 18:53:41,415 DEV : loss 0.11093738675117493 - f1-score (micro avg)  0.8334
2022-09-25 18:53:41,431 BAD EPOCHS (no improvement): 0
2022-09-25 18:53:41,431 saving best model





2022-09-25 18:53:42,556 ----------------------------------------------------------------------------------------------------
2022-09-25 18:53:46,508 epoch 5 - iter 5/58 - loss 0.17867624 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 18:53:50,257 epoch 5 - iter 10/58 - loss 0.16939247 - samples/sec: 42.68 - lr: 0.100000
2022-09-25 18:53:54,334 epoch 5 - iter 15/58 - loss 0.17072806 - samples/sec: 39.24 - lr: 0.100000
2022-09-25 18:53:58,583 epoch 5 - iter 20/58 - loss 0.16412032 - samples/sec: 37.66 - lr: 0.100000
2022-09-25 18:54:02,738 epoch 5 - iter 25/58 - loss 0.16641693 - samples/sec: 38.51 - lr: 0.100000
2022-09-25 18:54:06,987 epoch 5 - iter 30/58 - loss 0.16327816 - samples/sec: 37.66 - lr: 0.100000
2022-09-25 18:54:10,908 epoch 5 - iter 35/58 - loss 0.16225030 - samples/sec: 40.81 - lr: 0.100000
2022-09-25 18:54:14,626 epoch 5 - iter 40/58 - loss 0.16149001 - samples/sec: 43.04 - lr: 0.100000
2022-09-25 18:54:18,282 epoch 5 - iter 45/58 - loss 0.16075652 - samples/sec: 43.96 

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:06<00:00,  3.31it/s]

2022-09-25 18:54:35,230 Evaluating as a multi-label problem: False
2022-09-25 18:54:35,245 DEV : loss 0.0958387479186058 - f1-score (micro avg)  0.8449
2022-09-25 18:54:35,261 BAD EPOCHS (no improvement): 0
2022-09-25 18:54:35,261 saving best model





2022-09-25 18:54:36,495 ----------------------------------------------------------------------------------------------------
2022-09-25 18:54:40,650 epoch 6 - iter 5/58 - loss 0.15810664 - samples/sec: 38.65 - lr: 0.100000
2022-09-25 18:54:44,603 epoch 6 - iter 10/58 - loss 0.15363288 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 18:54:48,555 epoch 6 - iter 15/58 - loss 0.14951766 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 18:54:52,679 epoch 6 - iter 20/58 - loss 0.14620922 - samples/sec: 38.80 - lr: 0.100000
2022-09-25 18:54:56,615 epoch 6 - iter 25/58 - loss 0.15066090 - samples/sec: 40.64 - lr: 0.100000
2022-09-25 18:55:00,802 epoch 6 - iter 30/58 - loss 0.14891098 - samples/sec: 38.22 - lr: 0.100000
2022-09-25 18:55:04,567 epoch 6 - iter 35/58 - loss 0.15104313 - samples/sec: 42.50 - lr: 0.100000
2022-09-25 18:55:08,441 epoch 6 - iter 40/58 - loss 0.14953930 - samples/sec: 41.30 - lr: 0.100000
2022-09-25 18:55:12,112 epoch 6 - iter 45/58 - loss 0.14659918 - samples/sec: 43.77 

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.47it/s]

2022-09-25 18:55:28,889 Evaluating as a multi-label problem: False
2022-09-25 18:55:28,905 DEV : loss 0.08196767419576645 - f1-score (micro avg)  0.8789
2022-09-25 18:55:28,920 BAD EPOCHS (no improvement): 0
2022-09-25 18:55:28,920 saving best model





2022-09-25 18:55:30,217 ----------------------------------------------------------------------------------------------------
2022-09-25 18:55:34,122 epoch 7 - iter 5/58 - loss 0.13342910 - samples/sec: 40.97 - lr: 0.100000
2022-09-25 18:55:38,668 epoch 7 - iter 10/58 - loss 0.13904409 - samples/sec: 35.20 - lr: 0.100000
2022-09-25 18:55:42,276 epoch 7 - iter 15/58 - loss 0.13984088 - samples/sec: 44.34 - lr: 0.100000
2022-09-25 18:55:46,744 epoch 7 - iter 20/58 - loss 0.13758506 - samples/sec: 35.81 - lr: 0.100000
2022-09-25 18:55:50,368 epoch 7 - iter 25/58 - loss 0.13859971 - samples/sec: 44.34 - lr: 0.100000
2022-09-25 18:55:54,024 epoch 7 - iter 30/58 - loss 0.14050583 - samples/sec: 43.77 - lr: 0.100000
2022-09-25 18:55:58,007 epoch 7 - iter 35/58 - loss 0.14047112 - samples/sec: 40.17 - lr: 0.100000
2022-09-25 18:56:02,287 epoch 7 - iter 40/58 - loss 0.13862440 - samples/sec: 37.38 - lr: 0.100000
2022-09-25 18:56:06,458 epoch 7 - iter 45/58 - loss 0.13575028 - samples/sec: 38.36 

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.46it/s]

2022-09-25 18:56:22,633 Evaluating as a multi-label problem: False
2022-09-25 18:56:22,648 DEV : loss 0.06964662671089172 - f1-score (micro avg)  0.8983
2022-09-25 18:56:22,664 BAD EPOCHS (no improvement): 0
2022-09-25 18:56:22,664 saving best model





2022-09-25 18:56:23,945 ----------------------------------------------------------------------------------------------------
2022-09-25 18:56:28,491 epoch 8 - iter 5/58 - loss 0.12012444 - samples/sec: 35.20 - lr: 0.100000
2022-09-25 18:56:32,287 epoch 8 - iter 10/58 - loss 0.12898944 - samples/sec: 42.15 - lr: 0.100000
2022-09-25 18:56:36,161 epoch 8 - iter 15/58 - loss 0.12766477 - samples/sec: 41.30 - lr: 0.100000
2022-09-25 18:56:40,722 epoch 8 - iter 20/58 - loss 0.12446210 - samples/sec: 35.08 - lr: 0.100000
2022-09-25 18:56:44,565 epoch 8 - iter 25/58 - loss 0.12434230 - samples/sec: 41.64 - lr: 0.100000
2022-09-25 18:56:48,377 epoch 8 - iter 30/58 - loss 0.12599064 - samples/sec: 41.98 - lr: 0.100000
2022-09-25 18:56:52,391 epoch 8 - iter 35/58 - loss 0.12357583 - samples/sec: 39.85 - lr: 0.100000
2022-09-25 18:56:56,515 epoch 8 - iter 40/58 - loss 0.12120390 - samples/sec: 38.80 - lr: 0.100000
2022-09-25 18:57:00,265 epoch 8 - iter 45/58 - loss 0.11950259 - samples/sec: 42.86 

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.33it/s]

2022-09-25 18:57:16,308 Evaluating as a multi-label problem: False
2022-09-25 18:57:16,323 DEV : loss 0.06581784784793854 - f1-score (micro avg)  0.9037
2022-09-25 18:57:16,323 BAD EPOCHS (no improvement): 0
2022-09-25 18:57:16,339 saving best model





2022-09-25 18:57:17,760 ----------------------------------------------------------------------------------------------------
2022-09-25 18:57:21,728 epoch 9 - iter 5/58 - loss 0.12733984 - samples/sec: 40.32 - lr: 0.100000
2022-09-25 18:57:25,696 epoch 9 - iter 10/58 - loss 0.11459482 - samples/sec: 40.32 - lr: 0.100000
2022-09-25 18:57:30,117 epoch 9 - iter 15/58 - loss 0.11723532 - samples/sec: 36.19 - lr: 0.100000
2022-09-25 18:57:34,491 epoch 9 - iter 20/58 - loss 0.11189740 - samples/sec: 36.58 - lr: 0.100000
2022-09-25 18:57:37,834 epoch 9 - iter 25/58 - loss 0.11200666 - samples/sec: 47.86 - lr: 0.100000
2022-09-25 18:57:42,505 epoch 9 - iter 30/58 - loss 0.11468696 - samples/sec: 34.26 - lr: 0.100000
2022-09-25 18:57:46,675 epoch 9 - iter 35/58 - loss 0.11077907 - samples/sec: 38.36 - lr: 0.100000
2022-09-25 18:57:50,222 epoch 9 - iter 40/58 - loss 0.11055116 - samples/sec: 45.12 - lr: 0.100000
2022-09-25 18:57:54,064 epoch 9 - iter 45/58 - loss 0.11073076 - samples/sec: 41.64 

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.46it/s]

2022-09-25 18:58:10,284 Evaluating as a multi-label problem: False
2022-09-25 18:58:10,300 DEV : loss 0.0726819857954979 - f1-score (micro avg)  0.8899
2022-09-25 18:58:10,315 BAD EPOCHS (no improvement): 1
2022-09-25 18:58:10,315 ----------------------------------------------------------------------------------------------------





2022-09-25 18:58:14,502 epoch 10 - iter 5/58 - loss 0.09682799 - samples/sec: 38.22 - lr: 0.100000
2022-09-25 18:58:18,376 epoch 10 - iter 10/58 - loss 0.09621398 - samples/sec: 41.30 - lr: 0.100000
2022-09-25 18:58:22,187 epoch 10 - iter 15/58 - loss 0.10254754 - samples/sec: 41.98 - lr: 0.100000
2022-09-25 18:58:26,284 epoch 10 - iter 20/58 - loss 0.10129772 - samples/sec: 39.06 - lr: 0.100000
2022-09-25 18:58:30,533 epoch 10 - iter 25/58 - loss 0.10168510 - samples/sec: 37.66 - lr: 0.100000
2022-09-25 18:58:34,532 epoch 10 - iter 30/58 - loss 0.10232006 - samples/sec: 40.01 - lr: 0.100000
2022-09-25 18:58:38,953 epoch 10 - iter 35/58 - loss 0.10338133 - samples/sec: 36.19 - lr: 0.100000
2022-09-25 18:58:43,030 epoch 10 - iter 40/58 - loss 0.10155330 - samples/sec: 39.24 - lr: 0.100000
2022-09-25 18:58:46,498 epoch 10 - iter 45/58 - loss 0.10075396 - samples/sec: 46.14 - lr: 0.100000
2022-09-25 18:58:50,434 epoch 10 - iter 50/58 - loss 0.10113199 - samples/sec: 40.64 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.46it/s]

2022-09-25 18:59:03,041 Evaluating as a multi-label problem: False
2022-09-25 18:59:03,056 DEV : loss 0.05354638025164604 - f1-score (micro avg)  0.9276
2022-09-25 18:59:03,072 BAD EPOCHS (no improvement): 0
2022-09-25 18:59:03,072 saving best model





2022-09-25 18:59:04,025 ----------------------------------------------------------------------------------------------------
2022-09-25 18:59:07,790 epoch 11 - iter 5/58 - loss 0.09435860 - samples/sec: 42.50 - lr: 0.100000
2022-09-25 18:59:11,601 epoch 11 - iter 10/58 - loss 0.09711199 - samples/sec: 42.15 - lr: 0.100000
2022-09-25 18:59:15,600 epoch 11 - iter 15/58 - loss 0.09765137 - samples/sec: 40.01 - lr: 0.100000
2022-09-25 18:59:19,880 epoch 11 - iter 20/58 - loss 0.09897130 - samples/sec: 37.52 - lr: 0.100000
2022-09-25 18:59:24,145 epoch 11 - iter 25/58 - loss 0.10049743 - samples/sec: 37.52 - lr: 0.100000
2022-09-25 18:59:28,347 epoch 11 - iter 30/58 - loss 0.09997903 - samples/sec: 38.08 - lr: 0.100000
2022-09-25 18:59:32,456 epoch 11 - iter 35/58 - loss 0.10007880 - samples/sec: 38.94 - lr: 0.100000
2022-09-25 18:59:36,533 epoch 11 - iter 40/58 - loss 0.09972698 - samples/sec: 39.24 - lr: 0.100000
2022-09-25 18:59:40,610 epoch 11 - iter 45/58 - loss 0.09893255 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.47it/s]

2022-09-25 18:59:56,848 Evaluating as a multi-label problem: False
2022-09-25 18:59:56,848 DEV : loss 0.0665973499417305 - f1-score (micro avg)  0.8933
2022-09-25 18:59:56,863 BAD EPOCHS (no improvement): 1
2022-09-25 18:59:56,863 ----------------------------------------------------------------------------------------------------





2022-09-25 19:00:01,128 epoch 12 - iter 5/58 - loss 0.10639449 - samples/sec: 37.52 - lr: 0.100000
2022-09-25 19:00:04,971 epoch 12 - iter 10/58 - loss 0.09812923 - samples/sec: 41.64 - lr: 0.100000
2022-09-25 19:00:08,751 epoch 12 - iter 15/58 - loss 0.09545648 - samples/sec: 42.50 - lr: 0.100000
2022-09-25 19:00:13,547 epoch 12 - iter 20/58 - loss 0.09346809 - samples/sec: 33.36 - lr: 0.100000
2022-09-25 19:00:17,358 epoch 12 - iter 25/58 - loss 0.09757601 - samples/sec: 41.98 - lr: 0.100000
2022-09-25 19:00:21,342 epoch 12 - iter 30/58 - loss 0.09453439 - samples/sec: 40.17 - lr: 0.100000
2022-09-25 19:00:25,455 epoch 12 - iter 35/58 - loss 0.09438909 - samples/sec: 38.90 - lr: 0.100000
2022-09-25 19:00:29,188 epoch 12 - iter 40/58 - loss 0.09224181 - samples/sec: 42.86 - lr: 0.100000
2022-09-25 19:00:32,781 epoch 12 - iter 45/58 - loss 0.09133019 - samples/sec: 44.53 - lr: 0.100000
2022-09-25 19:00:37,186 epoch 12 - iter 50/58 - loss 0.09016895 - samples/sec: 36.32 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.44it/s]

2022-09-25 19:00:49,224 Evaluating as a multi-label problem: False
2022-09-25 19:00:49,240 DEV : loss 0.04721418023109436 - f1-score (micro avg)  0.9324
2022-09-25 19:00:49,255 BAD EPOCHS (no improvement): 0
2022-09-25 19:00:49,255 saving best model





2022-09-25 19:00:50,724 ----------------------------------------------------------------------------------------------------
2022-09-25 19:00:55,191 epoch 13 - iter 5/58 - loss 0.06978932 - samples/sec: 35.81 - lr: 0.100000
2022-09-25 19:00:59,518 epoch 13 - iter 10/58 - loss 0.08595712 - samples/sec: 37.11 - lr: 0.100000
2022-09-25 19:01:03,274 epoch 13 - iter 15/58 - loss 0.08582397 - samples/sec: 42.60 - lr: 0.100000
2022-09-25 19:01:07,289 epoch 13 - iter 20/58 - loss 0.08429440 - samples/sec: 39.85 - lr: 0.100000
2022-09-25 19:01:12,053 epoch 13 - iter 25/58 - loss 0.08740725 - samples/sec: 33.58 - lr: 0.100000
2022-09-25 19:01:15,474 epoch 13 - iter 30/58 - loss 0.08891668 - samples/sec: 46.77 - lr: 0.100000
2022-09-25 19:01:19,583 epoch 13 - iter 35/58 - loss 0.08874816 - samples/sec: 38.94 - lr: 0.100000
2022-09-25 19:01:23,691 epoch 13 - iter 40/58 - loss 0.08889973 - samples/sec: 38.94 - lr: 0.100000
2022-09-25 19:01:27,425 epoch 13 - iter 45/58 - loss 0.08800765 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.47it/s]

2022-09-25 19:01:43,234 Evaluating as a multi-label problem: False
2022-09-25 19:01:43,250 DEV : loss 0.045372236520051956 - f1-score (micro avg)  0.9349
2022-09-25 19:01:43,266 BAD EPOCHS (no improvement): 0
2022-09-25 19:01:43,266 saving best model





2022-09-25 19:01:44,594 ----------------------------------------------------------------------------------------------------
2022-09-25 19:01:48,889 epoch 14 - iter 5/58 - loss 0.06991755 - samples/sec: 37.38 - lr: 0.100000
2022-09-25 19:01:52,482 epoch 14 - iter 10/58 - loss 0.07590108 - samples/sec: 44.53 - lr: 0.100000
2022-09-25 19:01:56,169 epoch 14 - iter 15/58 - loss 0.07726713 - samples/sec: 43.40 - lr: 0.100000
2022-09-25 19:02:00,746 epoch 14 - iter 20/58 - loss 0.07891389 - samples/sec: 34.96 - lr: 0.100000
2022-09-25 19:02:04,230 epoch 14 - iter 25/58 - loss 0.07701754 - samples/sec: 45.93 - lr: 0.100000
2022-09-25 19:02:08,463 epoch 14 - iter 30/58 - loss 0.07759165 - samples/sec: 37.79 - lr: 0.100000
2022-09-25 19:02:12,118 epoch 14 - iter 35/58 - loss 0.07786479 - samples/sec: 43.77 - lr: 0.100000
2022-09-25 19:02:15,961 epoch 14 - iter 40/58 - loss 0.07858365 - samples/sec: 41.64 - lr: 0.100000
2022-09-25 19:02:19,679 epoch 14 - iter 45/58 - loss 0.08085493 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.43it/s]

2022-09-25 19:02:36,921 Evaluating as a multi-label problem: False
2022-09-25 19:02:36,937 DEV : loss 0.044466860592365265 - f1-score (micro avg)  0.9343
2022-09-25 19:02:36,937 BAD EPOCHS (no improvement): 1
2022-09-25 19:02:36,953 ----------------------------------------------------------------------------------------------------





2022-09-25 19:02:41,155 epoch 15 - iter 5/58 - loss 0.07323336 - samples/sec: 38.08 - lr: 0.100000
2022-09-25 19:02:44,841 epoch 15 - iter 10/58 - loss 0.07761592 - samples/sec: 43.40 - lr: 0.100000
2022-09-25 19:02:49,200 epoch 15 - iter 15/58 - loss 0.07635143 - samples/sec: 36.71 - lr: 0.100000
2022-09-25 19:02:53,246 epoch 15 - iter 20/58 - loss 0.07588462 - samples/sec: 39.55 - lr: 0.100000
2022-09-25 19:02:57,495 epoch 15 - iter 25/58 - loss 0.07567391 - samples/sec: 37.66 - lr: 0.100000
2022-09-25 19:03:01,275 epoch 15 - iter 30/58 - loss 0.07615048 - samples/sec: 42.32 - lr: 0.100000
2022-09-25 19:03:05,258 epoch 15 - iter 35/58 - loss 0.07692683 - samples/sec: 40.17 - lr: 0.100000
2022-09-25 19:03:09,195 epoch 15 - iter 40/58 - loss 0.07772324 - samples/sec: 40.64 - lr: 0.100000
2022-09-25 19:03:13,241 epoch 15 - iter 45/58 - loss 0.07786182 - samples/sec: 39.55 - lr: 0.100000
2022-09-25 19:03:17,193 epoch 15 - iter 50/58 - loss 0.07769037 - samples/sec: 40.48 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.45it/s]

2022-09-25 19:03:29,987 Evaluating as a multi-label problem: False
2022-09-25 19:03:30,003 DEV : loss 0.042859386652708054 - f1-score (micro avg)  0.9453
2022-09-25 19:03:30,003 BAD EPOCHS (no improvement): 0
2022-09-25 19:03:30,003 saving best model





2022-09-25 19:03:30,877 ----------------------------------------------------------------------------------------------------
2022-09-25 19:03:34,845 epoch 16 - iter 5/58 - loss 0.06532181 - samples/sec: 40.32 - lr: 0.100000
2022-09-25 19:03:38,985 epoch 16 - iter 10/58 - loss 0.06664275 - samples/sec: 38.80 - lr: 0.100000
2022-09-25 19:03:43,046 epoch 16 - iter 15/58 - loss 0.06848816 - samples/sec: 39.55 - lr: 0.100000
2022-09-25 19:03:47,186 epoch 16 - iter 20/58 - loss 0.06695595 - samples/sec: 38.65 - lr: 0.100000
2022-09-25 19:03:51,388 epoch 16 - iter 25/58 - loss 0.07092492 - samples/sec: 38.08 - lr: 0.100000
2022-09-25 19:03:55,312 epoch 16 - iter 30/58 - loss 0.07128168 - samples/sec: 40.78 - lr: 0.100000
2022-09-25 19:03:59,311 epoch 16 - iter 35/58 - loss 0.07496579 - samples/sec: 40.01 - lr: 0.100000
2022-09-25 19:04:03,388 epoch 16 - iter 40/58 - loss 0.07349108 - samples/sec: 39.24 - lr: 0.100000
2022-09-25 19:04:07,653 epoch 16 - iter 45/58 - loss 0.07424206 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.43it/s]

2022-09-25 19:04:24,067 Evaluating as a multi-label problem: False
2022-09-25 19:04:24,082 DEV : loss 0.03931363672018051 - f1-score (micro avg)  0.9513
2022-09-25 19:04:24,082 BAD EPOCHS (no improvement): 0
2022-09-25 19:04:24,098 saving best model





2022-09-25 19:04:25,004 ----------------------------------------------------------------------------------------------------
2022-09-25 19:04:29,066 epoch 17 - iter 5/58 - loss 0.06793543 - samples/sec: 39.39 - lr: 0.100000
2022-09-25 19:04:33,518 epoch 17 - iter 10/58 - loss 0.07478948 - samples/sec: 35.94 - lr: 0.100000
2022-09-25 19:04:37,782 epoch 17 - iter 15/58 - loss 0.07441299 - samples/sec: 37.52 - lr: 0.100000
2022-09-25 19:04:41,828 epoch 17 - iter 20/58 - loss 0.07504487 - samples/sec: 39.55 - lr: 0.100000
2022-09-25 19:04:45,796 epoch 17 - iter 25/58 - loss 0.07663207 - samples/sec: 40.32 - lr: 0.100000
2022-09-25 19:04:49,748 epoch 17 - iter 30/58 - loss 0.07529889 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 19:04:53,825 epoch 17 - iter 35/58 - loss 0.07289057 - samples/sec: 39.24 - lr: 0.100000
2022-09-25 19:04:57,637 epoch 17 - iter 40/58 - loss 0.07169472 - samples/sec: 41.98 - lr: 0.100000
2022-09-25 19:05:02,042 epoch 17 - iter 45/58 - loss 0.07075747 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.46it/s]

2022-09-25 19:05:18,398 Evaluating as a multi-label problem: False
2022-09-25 19:05:18,413 DEV : loss 0.03661825880408287 - f1-score (micro avg)  0.9527
2022-09-25 19:05:18,429 BAD EPOCHS (no improvement): 0
2022-09-25 19:05:18,429 saving best model





2022-09-25 19:05:19,398 ----------------------------------------------------------------------------------------------------
2022-09-25 19:05:24,021 epoch 18 - iter 5/58 - loss 0.07474826 - samples/sec: 34.60 - lr: 0.100000
2022-09-25 19:05:28,458 epoch 18 - iter 10/58 - loss 0.07387853 - samples/sec: 36.06 - lr: 0.100000
2022-09-25 19:05:32,988 epoch 18 - iter 15/58 - loss 0.07289897 - samples/sec: 35.32 - lr: 0.100000
2022-09-25 19:05:36,940 epoch 18 - iter 20/58 - loss 0.06930054 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 19:05:40,471 epoch 18 - iter 25/58 - loss 0.06784550 - samples/sec: 45.32 - lr: 0.100000
2022-09-25 19:05:44,657 epoch 18 - iter 30/58 - loss 0.06763699 - samples/sec: 38.22 - lr: 0.100000
2022-09-25 19:05:48,719 epoch 18 - iter 35/58 - loss 0.06786425 - samples/sec: 39.39 - lr: 0.100000
2022-09-25 19:05:53,015 epoch 18 - iter 40/58 - loss 0.06876401 - samples/sec: 37.25 - lr: 0.100000
2022-09-25 19:05:57,389 epoch 18 - iter 45/58 - loss 0.06893075 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.44it/s]

2022-09-25 19:06:13,354 Evaluating as a multi-label problem: False
2022-09-25 19:06:13,354 DEV : loss 0.03759501129388809 - f1-score (micro avg)  0.9535
2022-09-25 19:06:13,369 BAD EPOCHS (no improvement): 0
2022-09-25 19:06:13,369 saving best model





2022-09-25 19:06:14,291 ----------------------------------------------------------------------------------------------------
2022-09-25 19:06:18,368 epoch 19 - iter 5/58 - loss 0.05627141 - samples/sec: 39.24 - lr: 0.100000
2022-09-25 19:06:22,086 epoch 19 - iter 10/58 - loss 0.06091001 - samples/sec: 43.04 - lr: 0.100000
2022-09-25 19:06:26,294 epoch 19 - iter 15/58 - loss 0.06157409 - samples/sec: 38.02 - lr: 0.100000
2022-09-25 19:06:30,215 epoch 19 - iter 20/58 - loss 0.06153479 - samples/sec: 40.81 - lr: 0.100000
2022-09-25 19:06:34,433 epoch 19 - iter 25/58 - loss 0.06219887 - samples/sec: 37.93 - lr: 0.100000
2022-09-25 19:06:38,995 epoch 19 - iter 30/58 - loss 0.06316547 - samples/sec: 35.08 - lr: 0.100000
2022-09-25 19:06:43,384 epoch 19 - iter 35/58 - loss 0.06148872 - samples/sec: 36.45 - lr: 0.100000
2022-09-25 19:06:47,493 epoch 19 - iter 40/58 - loss 0.06263463 - samples/sec: 38.94 - lr: 0.100000
2022-09-25 19:06:51,929 epoch 19 - iter 45/58 - loss 0.06380611 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.44it/s]

2022-09-25 19:07:07,894 Evaluating as a multi-label problem: False
2022-09-25 19:07:07,910 DEV : loss 0.0343373604118824 - f1-score (micro avg)  0.9541
2022-09-25 19:07:07,910 BAD EPOCHS (no improvement): 0
2022-09-25 19:07:07,910 saving best model





2022-09-25 19:07:08,972 ----------------------------------------------------------------------------------------------------
2022-09-25 19:07:13,018 epoch 20 - iter 5/58 - loss 0.05006778 - samples/sec: 39.55 - lr: 0.100000
2022-09-25 19:07:17,032 epoch 20 - iter 10/58 - loss 0.05799013 - samples/sec: 39.85 - lr: 0.100000
2022-09-25 19:07:20,844 epoch 20 - iter 15/58 - loss 0.06071775 - samples/sec: 41.98 - lr: 0.100000
2022-09-25 19:07:24,671 epoch 20 - iter 20/58 - loss 0.05973220 - samples/sec: 41.98 - lr: 0.100000
2022-09-25 19:07:28,655 epoch 20 - iter 25/58 - loss 0.05990256 - samples/sec: 40.17 - lr: 0.100000
2022-09-25 19:07:32,826 epoch 20 - iter 30/58 - loss 0.06257089 - samples/sec: 38.36 - lr: 0.100000
2022-09-25 19:07:36,965 epoch 20 - iter 35/58 - loss 0.06185615 - samples/sec: 38.65 - lr: 0.100000
2022-09-25 19:07:41,277 epoch 20 - iter 40/58 - loss 0.06236134 - samples/sec: 37.11 - lr: 0.100000
2022-09-25 19:07:45,635 epoch 20 - iter 45/58 - loss 0.06136642 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:06<00:00,  3.28it/s]

2022-09-25 19:08:02,303 Evaluating as a multi-label problem: False
2022-09-25 19:08:02,319 DEV : loss 0.03575373440980911 - f1-score (micro avg)  0.9555
2022-09-25 19:08:02,334 BAD EPOCHS (no improvement): 0
2022-09-25 19:08:02,334 saving best model





2022-09-25 19:08:03,459 ----------------------------------------------------------------------------------------------------
2022-09-25 19:08:07,693 epoch 21 - iter 5/58 - loss 0.05722614 - samples/sec: 37.79 - lr: 0.100000
2022-09-25 19:08:11,724 epoch 21 - iter 10/58 - loss 0.06250512 - samples/sec: 39.70 - lr: 0.100000
2022-09-25 19:08:15,629 epoch 21 - iter 15/58 - loss 0.06110294 - samples/sec: 40.97 - lr: 0.100000
2022-09-25 19:08:19,847 epoch 21 - iter 20/58 - loss 0.05957551 - samples/sec: 37.93 - lr: 0.100000
2022-09-25 19:08:23,810 epoch 21 - iter 25/58 - loss 0.05865925 - samples/sec: 40.38 - lr: 0.100000
2022-09-25 19:08:27,996 epoch 21 - iter 30/58 - loss 0.05737192 - samples/sec: 38.22 - lr: 0.100000
2022-09-25 19:08:32,026 epoch 21 - iter 35/58 - loss 0.05710109 - samples/sec: 39.70 - lr: 0.100000
2022-09-25 19:08:37,385 epoch 21 - iter 40/58 - loss 0.05690707 - samples/sec: 29.86 - lr: 0.100000
2022-09-25 19:08:41,540 epoch 21 - iter 45/58 - loss 0.05730573 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.45it/s]

2022-09-25 19:08:57,645 Evaluating as a multi-label problem: False
2022-09-25 19:08:57,661 DEV : loss 0.033298786729574203 - f1-score (micro avg)  0.9578
2022-09-25 19:08:57,677 BAD EPOCHS (no improvement): 0
2022-09-25 19:08:57,677 saving best model





2022-09-25 19:08:58,583 ----------------------------------------------------------------------------------------------------
2022-09-25 19:09:02,707 epoch 22 - iter 5/58 - loss 0.05090443 - samples/sec: 38.80 - lr: 0.100000
2022-09-25 19:09:06,784 epoch 22 - iter 10/58 - loss 0.05070887 - samples/sec: 39.24 - lr: 0.100000
2022-09-25 19:09:10,752 epoch 22 - iter 15/58 - loss 0.05614203 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 19:09:14,579 epoch 22 - iter 20/58 - loss 0.05681533 - samples/sec: 41.98 - lr: 0.100000
2022-09-25 19:09:18,312 epoch 22 - iter 25/58 - loss 0.05831302 - samples/sec: 42.86 - lr: 0.100000
2022-09-25 19:09:22,249 epoch 22 - iter 30/58 - loss 0.05706858 - samples/sec: 40.64 - lr: 0.100000
2022-09-25 19:09:26,467 epoch 22 - iter 35/58 - loss 0.05851869 - samples/sec: 37.93 - lr: 0.100000
2022-09-25 19:09:30,685 epoch 22 - iter 40/58 - loss 0.05854566 - samples/sec: 37.93 - lr: 0.100000
2022-09-25 19:09:35,574 epoch 22 - iter 45/58 - loss 0.05885171 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.47it/s]

2022-09-25 19:09:51,383 Evaluating as a multi-label problem: False
2022-09-25 19:09:51,398 DEV : loss 0.035188499838113785 - f1-score (micro avg)  0.9506
2022-09-25 19:09:51,414 BAD EPOCHS (no improvement): 1
2022-09-25 19:09:51,414 ----------------------------------------------------------------------------------------------------





2022-09-25 19:09:55,585 epoch 23 - iter 5/58 - loss 0.05648096 - samples/sec: 38.36 - lr: 0.100000
2022-09-25 19:09:59,975 epoch 23 - iter 10/58 - loss 0.05748556 - samples/sec: 36.45 - lr: 0.100000
2022-09-25 19:10:03,614 epoch 23 - iter 15/58 - loss 0.05937003 - samples/sec: 43.96 - lr: 0.100000
2022-09-25 19:10:07,395 epoch 23 - iter 20/58 - loss 0.05659012 - samples/sec: 42.32 - lr: 0.100000
2022-09-25 19:10:11,362 epoch 23 - iter 25/58 - loss 0.05786008 - samples/sec: 40.32 - lr: 0.100000
2022-09-25 19:10:15,861 epoch 23 - iter 30/58 - loss 0.05753499 - samples/sec: 35.56 - lr: 0.100000
2022-09-25 19:10:19,860 epoch 23 - iter 35/58 - loss 0.05957634 - samples/sec: 40.01 - lr: 0.100000
2022-09-25 19:10:23,998 epoch 23 - iter 40/58 - loss 0.05947115 - samples/sec: 38.67 - lr: 0.100000
2022-09-25 19:10:28,059 epoch 23 - iter 45/58 - loss 0.05912772 - samples/sec: 39.39 - lr: 0.100000
2022-09-25 19:10:32,183 epoch 23 - iter 50/58 - loss 0.05895914 - samples/sec: 38.80 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:06<00:00,  3.32it/s]

2022-09-25 19:10:44,727 Evaluating as a multi-label problem: False
2022-09-25 19:10:44,727 DEV : loss 0.03511998802423477 - f1-score (micro avg)  0.9519
2022-09-25 19:10:44,743 BAD EPOCHS (no improvement): 2
2022-09-25 19:10:44,743 ----------------------------------------------------------------------------------------------------





2022-09-25 19:10:48,414 epoch 24 - iter 5/58 - loss 0.06694627 - samples/sec: 43.58 - lr: 0.100000
2022-09-25 19:10:52,803 epoch 24 - iter 10/58 - loss 0.05391126 - samples/sec: 36.45 - lr: 0.100000
2022-09-25 19:10:56,646 epoch 24 - iter 15/58 - loss 0.05598922 - samples/sec: 41.81 - lr: 0.100000
2022-09-25 19:11:00,755 epoch 24 - iter 20/58 - loss 0.05533552 - samples/sec: 38.94 - lr: 0.100000
2022-09-25 19:11:05,222 epoch 24 - iter 25/58 - loss 0.05682293 - samples/sec: 35.81 - lr: 0.100000
2022-09-25 19:11:08,893 epoch 24 - iter 30/58 - loss 0.05592728 - samples/sec: 43.58 - lr: 0.100000
2022-09-25 19:11:13,189 epoch 24 - iter 35/58 - loss 0.05621309 - samples/sec: 37.25 - lr: 0.100000
2022-09-25 19:11:17,673 epoch 24 - iter 40/58 - loss 0.05716795 - samples/sec: 35.69 - lr: 0.100000
2022-09-25 19:11:21,906 epoch 24 - iter 45/58 - loss 0.05769106 - samples/sec: 37.79 - lr: 0.100000
2022-09-25 19:11:26,155 epoch 24 - iter 50/58 - loss 0.05793802 - samples/sec: 37.66 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.48it/s]

2022-09-25 19:11:38,574 Evaluating as a multi-label problem: False
2022-09-25 19:11:38,574 DEV : loss 0.03166037052869797 - f1-score (micro avg)  0.9543
2022-09-25 19:11:38,589 BAD EPOCHS (no improvement): 3
2022-09-25 19:11:38,589 ----------------------------------------------------------------------------------------------------





2022-09-25 19:11:43,260 epoch 25 - iter 5/58 - loss 0.05293935 - samples/sec: 34.26 - lr: 0.100000
2022-09-25 19:11:47,009 epoch 25 - iter 10/58 - loss 0.05451433 - samples/sec: 42.68 - lr: 0.100000
2022-09-25 19:11:51,258 epoch 25 - iter 15/58 - loss 0.05255409 - samples/sec: 37.66 - lr: 0.100000
2022-09-25 19:11:55,367 epoch 25 - iter 20/58 - loss 0.05252733 - samples/sec: 38.94 - lr: 0.100000
2022-09-25 19:11:59,381 epoch 25 - iter 25/58 - loss 0.05229214 - samples/sec: 39.85 - lr: 0.100000
2022-09-25 19:12:03,302 epoch 25 - iter 30/58 - loss 0.05088477 - samples/sec: 40.81 - lr: 0.100000
2022-09-25 19:12:07,551 epoch 25 - iter 35/58 - loss 0.05099271 - samples/sec: 37.66 - lr: 0.100000
2022-09-25 19:12:11,394 epoch 25 - iter 40/58 - loss 0.05119749 - samples/sec: 41.64 - lr: 0.100000
2022-09-25 19:12:15,190 epoch 25 - iter 45/58 - loss 0.05263785 - samples/sec: 42.15 - lr: 0.100000
2022-09-25 19:12:19,330 epoch 25 - iter 50/58 - loss 0.05163718 - samples/sec: 38.65 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.42it/s]

2022-09-25 19:12:31,793 Evaluating as a multi-label problem: False
2022-09-25 19:12:31,809 DEV : loss 0.03199426084756851 - f1-score (micro avg)  0.9578
2022-09-25 19:12:31,824 BAD EPOCHS (no improvement): 0
2022-09-25 19:12:31,824 saving best model





2022-09-25 19:12:32,855 ----------------------------------------------------------------------------------------------------
2022-09-25 19:12:36,761 epoch 26 - iter 5/58 - loss 0.06093729 - samples/sec: 40.97 - lr: 0.100000
2022-09-25 19:12:40,869 epoch 26 - iter 10/58 - loss 0.05686313 - samples/sec: 38.94 - lr: 0.100000
2022-09-25 19:12:44,993 epoch 26 - iter 15/58 - loss 0.05404314 - samples/sec: 38.80 - lr: 0.100000
2022-09-25 19:12:48,899 epoch 26 - iter 20/58 - loss 0.05557483 - samples/sec: 40.97 - lr: 0.100000
2022-09-25 19:12:53,132 epoch 26 - iter 25/58 - loss 0.05630269 - samples/sec: 37.79 - lr: 0.100000
2022-09-25 19:12:56,975 epoch 26 - iter 30/58 - loss 0.05614185 - samples/sec: 41.81 - lr: 0.100000
2022-09-25 19:13:01,130 epoch 26 - iter 35/58 - loss 0.05574388 - samples/sec: 38.51 - lr: 0.100000
2022-09-25 19:13:05,020 epoch 26 - iter 40/58 - loss 0.05433126 - samples/sec: 41.13 - lr: 0.100000
2022-09-25 19:13:09,458 epoch 26 - iter 45/58 - loss 0.05366608 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.43it/s]

2022-09-25 19:13:26,111 Evaluating as a multi-label problem: False
2022-09-25 19:13:26,126 DEV : loss 0.03087194263935089 - f1-score (micro avg)  0.962
2022-09-25 19:13:26,142 BAD EPOCHS (no improvement): 0
2022-09-25 19:13:26,142 saving best model





2022-09-25 19:13:27,079 ----------------------------------------------------------------------------------------------------
2022-09-25 19:13:31,781 epoch 27 - iter 5/58 - loss 0.06115919 - samples/sec: 34.03 - lr: 0.100000
2022-09-25 19:13:35,874 epoch 27 - iter 10/58 - loss 0.05509251 - samples/sec: 39.09 - lr: 0.100000
2022-09-25 19:13:40,264 epoch 27 - iter 15/58 - loss 0.05167944 - samples/sec: 36.45 - lr: 0.100000
2022-09-25 19:13:44,028 epoch 27 - iter 20/58 - loss 0.05167049 - samples/sec: 42.50 - lr: 0.100000
2022-09-25 19:13:47,606 epoch 27 - iter 25/58 - loss 0.05010719 - samples/sec: 44.73 - lr: 0.100000
2022-09-25 19:13:51,370 epoch 27 - iter 30/58 - loss 0.05048232 - samples/sec: 42.50 - lr: 0.100000
2022-09-25 19:13:55,713 epoch 27 - iter 35/58 - loss 0.05200874 - samples/sec: 36.85 - lr: 0.100000
2022-09-25 19:13:59,555 epoch 27 - iter 40/58 - loss 0.05182892 - samples/sec: 41.64 - lr: 0.100000
2022-09-25 19:14:03,820 epoch 27 - iter 45/58 - loss 0.05097956 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.45it/s]

2022-09-25 19:14:19,538 Evaluating as a multi-label problem: False
2022-09-25 19:14:19,553 DEV : loss 0.03286566585302353 - f1-score (micro avg)  0.9611
2022-09-25 19:14:19,569 BAD EPOCHS (no improvement): 1
2022-09-25 19:14:19,569 ----------------------------------------------------------------------------------------------------





2022-09-25 19:14:24,047 epoch 28 - iter 5/58 - loss 0.05536190 - samples/sec: 35.73 - lr: 0.100000
2022-09-25 19:14:28,171 epoch 28 - iter 10/58 - loss 0.05485746 - samples/sec: 38.80 - lr: 0.100000
2022-09-25 19:14:32,561 epoch 28 - iter 15/58 - loss 0.05108648 - samples/sec: 36.45 - lr: 0.100000
2022-09-25 19:14:36,904 epoch 28 - iter 20/58 - loss 0.05335229 - samples/sec: 36.84 - lr: 0.100000
2022-09-25 19:14:40,622 epoch 28 - iter 25/58 - loss 0.05085057 - samples/sec: 43.04 - lr: 0.100000
2022-09-25 19:14:44,527 epoch 28 - iter 30/58 - loss 0.04937448 - samples/sec: 40.97 - lr: 0.100000
2022-09-25 19:14:48,682 epoch 28 - iter 35/58 - loss 0.04927669 - samples/sec: 38.51 - lr: 0.100000
2022-09-25 19:14:52,963 epoch 28 - iter 40/58 - loss 0.04813656 - samples/sec: 37.38 - lr: 0.100000
2022-09-25 19:14:56,946 epoch 28 - iter 45/58 - loss 0.04798395 - samples/sec: 40.17 - lr: 0.100000
2022-09-25 19:15:01,054 epoch 28 - iter 50/58 - loss 0.04859233 - samples/sec: 38.94 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.44it/s]

2022-09-25 19:15:12,864 Evaluating as a multi-label problem: False
2022-09-25 19:15:12,880 DEV : loss 0.03183416649699211 - f1-score (micro avg)  0.9602
2022-09-25 19:15:12,895 BAD EPOCHS (no improvement): 2
2022-09-25 19:15:12,895 ----------------------------------------------------------------------------------------------------





2022-09-25 19:15:17,097 epoch 29 - iter 5/58 - loss 0.05015563 - samples/sec: 38.08 - lr: 0.100000
2022-09-25 19:15:21,721 epoch 29 - iter 10/58 - loss 0.05137446 - samples/sec: 34.60 - lr: 0.100000
2022-09-25 19:15:25,948 epoch 29 - iter 15/58 - loss 0.04926162 - samples/sec: 37.85 - lr: 0.100000
2022-09-25 19:15:29,947 epoch 29 - iter 20/58 - loss 0.05113526 - samples/sec: 40.01 - lr: 0.100000
2022-09-25 19:15:33,821 epoch 29 - iter 25/58 - loss 0.04772211 - samples/sec: 41.30 - lr: 0.100000
2022-09-25 19:15:38,508 epoch 29 - iter 30/58 - loss 0.04971871 - samples/sec: 34.14 - lr: 0.100000
2022-09-25 19:15:42,491 epoch 29 - iter 35/58 - loss 0.05021901 - samples/sec: 40.17 - lr: 0.100000
2022-09-25 19:15:46,459 epoch 29 - iter 40/58 - loss 0.04945454 - samples/sec: 40.32 - lr: 0.100000
2022-09-25 19:15:50,411 epoch 29 - iter 45/58 - loss 0.04876001 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 19:15:54,678 epoch 29 - iter 50/58 - loss 0.04909016 - samples/sec: 37.50 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.43it/s]

2022-09-25 19:16:06,769 Evaluating as a multi-label problem: False
2022-09-25 19:16:06,784 DEV : loss 0.03257587552070618 - f1-score (micro avg)  0.9594
2022-09-25 19:16:06,800 BAD EPOCHS (no improvement): 3
2022-09-25 19:16:06,800 ----------------------------------------------------------------------------------------------------





2022-09-25 19:16:10,830 epoch 30 - iter 5/58 - loss 0.03537259 - samples/sec: 39.70 - lr: 0.100000
2022-09-25 19:16:15,517 epoch 30 - iter 10/58 - loss 0.04784088 - samples/sec: 34.14 - lr: 0.100000
2022-09-25 19:16:19,922 epoch 30 - iter 15/58 - loss 0.04818489 - samples/sec: 36.32 - lr: 0.100000
2022-09-25 19:16:23,668 epoch 30 - iter 20/58 - loss 0.04831844 - samples/sec: 42.71 - lr: 0.100000
2022-09-25 19:16:28,105 epoch 30 - iter 25/58 - loss 0.04746349 - samples/sec: 36.19 - lr: 0.100000
2022-09-25 19:16:32,479 epoch 30 - iter 30/58 - loss 0.04732659 - samples/sec: 36.58 - lr: 0.100000
2022-09-25 19:16:36,650 epoch 30 - iter 35/58 - loss 0.04745985 - samples/sec: 38.36 - lr: 0.100000
2022-09-25 19:16:40,414 epoch 30 - iter 40/58 - loss 0.04714397 - samples/sec: 42.50 - lr: 0.100000
2022-09-25 19:16:44,710 epoch 30 - iter 45/58 - loss 0.04665607 - samples/sec: 37.25 - lr: 0.100000
2022-09-25 19:16:48,584 epoch 30 - iter 50/58 - loss 0.04587604 - samples/sec: 41.30 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.43it/s]

2022-09-25 19:17:00,175 Evaluating as a multi-label problem: False
2022-09-25 19:17:00,191 DEV : loss 0.029809214174747467 - f1-score (micro avg)  0.963
2022-09-25 19:17:00,191 BAD EPOCHS (no improvement): 0
2022-09-25 19:17:00,191 saving best model





2022-09-25 19:17:01,175 ----------------------------------------------------------------------------------------------------
2022-09-25 19:17:05,362 epoch 31 - iter 5/58 - loss 0.04517964 - samples/sec: 38.22 - lr: 0.100000
2022-09-25 19:17:09,314 epoch 31 - iter 10/58 - loss 0.04253365 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 19:17:13,157 epoch 31 - iter 15/58 - loss 0.04286059 - samples/sec: 41.81 - lr: 0.100000
2022-09-25 19:17:17,437 epoch 31 - iter 20/58 - loss 0.04446043 - samples/sec: 37.38 - lr: 0.100000
2022-09-25 19:17:21,702 epoch 31 - iter 25/58 - loss 0.04605465 - samples/sec: 37.52 - lr: 0.100000
2022-09-25 19:17:26,044 epoch 31 - iter 30/58 - loss 0.04655403 - samples/sec: 36.84 - lr: 0.100000
2022-09-25 19:17:30,043 epoch 31 - iter 35/58 - loss 0.04638981 - samples/sec: 40.01 - lr: 0.100000
2022-09-25 19:17:34,261 epoch 31 - iter 40/58 - loss 0.04630469 - samples/sec: 37.93 - lr: 0.100000
2022-09-25 19:17:38,124 epoch 31 - iter 45/58 - loss 0.04642453 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.43it/s]

2022-09-25 19:17:54,495 Evaluating as a multi-label problem: False
2022-09-25 19:17:54,511 DEV : loss 0.030304474756121635 - f1-score (micro avg)  0.9605
2022-09-25 19:17:54,526 BAD EPOCHS (no improvement): 1
2022-09-25 19:17:54,526 ----------------------------------------------------------------------------------------------------





2022-09-25 19:17:58,557 epoch 32 - iter 5/58 - loss 0.04709275 - samples/sec: 39.70 - lr: 0.100000
2022-09-25 19:18:02,853 epoch 32 - iter 10/58 - loss 0.05106329 - samples/sec: 37.25 - lr: 0.100000
2022-09-25 19:18:06,758 epoch 32 - iter 15/58 - loss 0.04941496 - samples/sec: 40.97 - lr: 0.100000
2022-09-25 19:18:10,710 epoch 32 - iter 20/58 - loss 0.04893394 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 19:18:14,522 epoch 32 - iter 25/58 - loss 0.04729643 - samples/sec: 41.98 - lr: 0.100000
2022-09-25 19:18:19,130 epoch 32 - iter 30/58 - loss 0.04778431 - samples/sec: 34.72 - lr: 0.100000
2022-09-25 19:18:22,535 epoch 32 - iter 35/58 - loss 0.04821734 - samples/sec: 46.99 - lr: 0.100000
2022-09-25 19:18:26,721 epoch 32 - iter 40/58 - loss 0.04824268 - samples/sec: 38.22 - lr: 0.100000
2022-09-25 19:18:30,720 epoch 32 - iter 45/58 - loss 0.04853455 - samples/sec: 40.01 - lr: 0.100000
2022-09-25 19:18:35,032 epoch 32 - iter 50/58 - loss 0.04734010 - samples/sec: 37.11 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.44it/s]

2022-09-25 19:18:46,826 Evaluating as a multi-label problem: False
2022-09-25 19:18:46,842 DEV : loss 0.030063314363360405 - f1-score (micro avg)  0.96
2022-09-25 19:18:46,857 BAD EPOCHS (no improvement): 2
2022-09-25 19:18:46,857 ----------------------------------------------------------------------------------------------------





2022-09-25 19:18:51,171 epoch 33 - iter 5/58 - loss 0.04583226 - samples/sec: 37.09 - lr: 0.100000
2022-09-25 19:18:55,654 epoch 33 - iter 10/58 - loss 0.04389729 - samples/sec: 35.69 - lr: 0.100000
2022-09-25 19:18:59,981 epoch 33 - iter 15/58 - loss 0.04494065 - samples/sec: 36.98 - lr: 0.100000
2022-09-25 19:19:04,387 epoch 33 - iter 20/58 - loss 0.04375473 - samples/sec: 36.32 - lr: 0.100000
2022-09-25 19:19:08,448 epoch 33 - iter 25/58 - loss 0.04398078 - samples/sec: 39.55 - lr: 0.100000
2022-09-25 19:19:12,603 epoch 33 - iter 30/58 - loss 0.04451003 - samples/sec: 38.51 - lr: 0.100000
2022-09-25 19:19:16,962 epoch 33 - iter 35/58 - loss 0.04480967 - samples/sec: 36.71 - lr: 0.100000
2022-09-25 19:19:21,148 epoch 33 - iter 40/58 - loss 0.04404702 - samples/sec: 38.22 - lr: 0.100000
2022-09-25 19:19:25,100 epoch 33 - iter 45/58 - loss 0.04362073 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 19:19:29,037 epoch 33 - iter 50/58 - loss 0.04463256 - samples/sec: 40.64 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.44it/s]

2022-09-25 19:19:40,925 Evaluating as a multi-label problem: False
2022-09-25 19:19:40,940 DEV : loss 0.030626511201262474 - f1-score (micro avg)  0.9635
2022-09-25 19:19:40,956 BAD EPOCHS (no improvement): 0
2022-09-25 19:19:40,956 saving best model





2022-09-25 19:19:41,925 ----------------------------------------------------------------------------------------------------
2022-09-25 19:19:45,942 epoch 34 - iter 5/58 - loss 0.04429364 - samples/sec: 39.83 - lr: 0.100000
2022-09-25 19:19:49,831 epoch 34 - iter 10/58 - loss 0.04641318 - samples/sec: 41.13 - lr: 0.100000
2022-09-25 19:19:53,924 epoch 34 - iter 15/58 - loss 0.04660091 - samples/sec: 39.09 - lr: 0.100000
2022-09-25 19:19:58,095 epoch 34 - iter 20/58 - loss 0.04563925 - samples/sec: 38.36 - lr: 0.100000
2022-09-25 19:20:02,094 epoch 34 - iter 25/58 - loss 0.04305274 - samples/sec: 40.01 - lr: 0.100000
2022-09-25 19:20:06,624 epoch 34 - iter 30/58 - loss 0.04344969 - samples/sec: 35.44 - lr: 0.100000
2022-09-25 19:20:10,951 epoch 34 - iter 35/58 - loss 0.04356870 - samples/sec: 36.98 - lr: 0.100000
2022-09-25 19:20:15,060 epoch 34 - iter 40/58 - loss 0.04326265 - samples/sec: 39.09 - lr: 0.100000
2022-09-25 19:20:18,746 epoch 34 - iter 45/58 - loss 0.04329126 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.44it/s]

2022-09-25 19:20:35,114 Evaluating as a multi-label problem: False
2022-09-25 19:20:35,114 DEV : loss 0.03188515082001686 - f1-score (micro avg)  0.961
2022-09-25 19:20:35,130 BAD EPOCHS (no improvement): 1
2022-09-25 19:20:35,130 ----------------------------------------------------------------------------------------------------





2022-09-25 19:20:39,785 epoch 35 - iter 5/58 - loss 0.04109071 - samples/sec: 34.37 - lr: 0.100000
2022-09-25 19:20:44,184 epoch 35 - iter 10/58 - loss 0.04365844 - samples/sec: 36.38 - lr: 0.100000
2022-09-25 19:20:47,877 epoch 35 - iter 15/58 - loss 0.04179230 - samples/sec: 43.32 - lr: 0.100000
2022-09-25 19:20:52,110 epoch 35 - iter 20/58 - loss 0.04134780 - samples/sec: 37.79 - lr: 0.100000
2022-09-25 19:20:56,063 epoch 35 - iter 25/58 - loss 0.04222237 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 19:20:59,937 epoch 35 - iter 30/58 - loss 0.04252430 - samples/sec: 41.30 - lr: 0.100000
2022-09-25 19:21:04,437 epoch 35 - iter 35/58 - loss 0.04139225 - samples/sec: 35.55 - lr: 0.100000
2022-09-25 19:21:08,561 epoch 35 - iter 40/58 - loss 0.04179973 - samples/sec: 38.80 - lr: 0.100000
2022-09-25 19:21:12,201 epoch 35 - iter 45/58 - loss 0.04157453 - samples/sec: 43.96 - lr: 0.100000
2022-09-25 19:21:16,013 epoch 35 - iter 50/58 - loss 0.04199248 - samples/sec: 41.98 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:06<00:00,  3.25it/s]

2022-09-25 19:21:28,447 Evaluating as a multi-label problem: False
2022-09-25 19:21:28,463 DEV : loss 0.03355122730135918 - f1-score (micro avg)  0.9578
2022-09-25 19:21:28,478 BAD EPOCHS (no improvement): 2
2022-09-25 19:21:28,478 ----------------------------------------------------------------------------------------------------





2022-09-25 19:21:32,446 epoch 36 - iter 5/58 - loss 0.04061636 - samples/sec: 40.32 - lr: 0.100000
2022-09-25 19:21:36,008 epoch 36 - iter 10/58 - loss 0.04291832 - samples/sec: 45.12 - lr: 0.100000
2022-09-25 19:21:40,382 epoch 36 - iter 15/58 - loss 0.04217828 - samples/sec: 36.58 - lr: 0.100000
2022-09-25 19:21:44,084 epoch 36 - iter 20/58 - loss 0.04214502 - samples/sec: 43.40 - lr: 0.100000
2022-09-25 19:21:47,833 epoch 36 - iter 25/58 - loss 0.04246778 - samples/sec: 42.68 - lr: 0.100000
2022-09-25 19:21:52,020 epoch 36 - iter 30/58 - loss 0.04241719 - samples/sec: 38.36 - lr: 0.100000
2022-09-25 19:21:55,972 epoch 36 - iter 35/58 - loss 0.04190576 - samples/sec: 40.48 - lr: 0.100000
2022-09-25 19:22:00,424 epoch 36 - iter 40/58 - loss 0.04227099 - samples/sec: 35.94 - lr: 0.100000
2022-09-25 19:22:04,610 epoch 36 - iter 45/58 - loss 0.04242412 - samples/sec: 38.22 - lr: 0.100000
2022-09-25 19:22:09,203 epoch 36 - iter 50/58 - loss 0.04354685 - samples/sec: 34.84 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.42it/s]

2022-09-25 19:22:21,357 Evaluating as a multi-label problem: False
2022-09-25 19:22:21,357 DEV : loss 0.03153787553310394 - f1-score (micro avg)  0.9639
2022-09-25 19:22:21,372 BAD EPOCHS (no improvement): 0
2022-09-25 19:22:21,372 saving best model





2022-09-25 19:22:22,306 ----------------------------------------------------------------------------------------------------
2022-09-25 19:22:26,181 epoch 37 - iter 5/58 - loss 0.03740364 - samples/sec: 41.30 - lr: 0.100000
2022-09-25 19:22:30,055 epoch 37 - iter 10/58 - loss 0.03841364 - samples/sec: 41.30 - lr: 0.100000
2022-09-25 19:22:33,663 epoch 37 - iter 15/58 - loss 0.04179218 - samples/sec: 44.34 - lr: 0.100000
2022-09-25 19:22:37,990 epoch 37 - iter 20/58 - loss 0.04397912 - samples/sec: 36.98 - lr: 0.100000
2022-09-25 19:22:42,333 epoch 37 - iter 25/58 - loss 0.04343008 - samples/sec: 36.98 - lr: 0.100000
2022-09-25 19:22:46,270 epoch 37 - iter 30/58 - loss 0.04327767 - samples/sec: 40.64 - lr: 0.100000
2022-09-25 19:22:51,003 epoch 37 - iter 35/58 - loss 0.04289012 - samples/sec: 33.80 - lr: 0.100000
2022-09-25 19:22:54,736 epoch 37 - iter 40/58 - loss 0.04327065 - samples/sec: 43.04 - lr: 0.100000
2022-09-25 19:22:58,782 epoch 37 - iter 45/58 - loss 0.04310749 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.40it/s]

2022-09-25 19:23:15,872 Evaluating as a multi-label problem: False
2022-09-25 19:23:15,888 DEV : loss 0.031085126101970673 - f1-score (micro avg)  0.9633
2022-09-25 19:23:15,903 BAD EPOCHS (no improvement): 1
2022-09-25 19:23:15,903 ----------------------------------------------------------------------------------------------------





2022-09-25 19:23:19,840 epoch 38 - iter 5/58 - loss 0.03922672 - samples/sec: 40.64 - lr: 0.100000
2022-09-25 19:23:24,261 epoch 38 - iter 10/58 - loss 0.03974375 - samples/sec: 36.19 - lr: 0.100000
2022-09-25 19:23:28,681 epoch 38 - iter 15/58 - loss 0.03911351 - samples/sec: 36.19 - lr: 0.100000
2022-09-25 19:23:33,040 epoch 38 - iter 20/58 - loss 0.03753127 - samples/sec: 36.71 - lr: 0.100000
2022-09-25 19:23:36,961 epoch 38 - iter 25/58 - loss 0.03985145 - samples/sec: 40.81 - lr: 0.100000
2022-09-25 19:23:41,663 epoch 38 - iter 30/58 - loss 0.03888237 - samples/sec: 34.03 - lr: 0.100000
2022-09-25 19:23:45,865 epoch 38 - iter 35/58 - loss 0.04064334 - samples/sec: 38.08 - lr: 0.100000
2022-09-25 19:23:49,505 epoch 38 - iter 40/58 - loss 0.04081266 - samples/sec: 43.96 - lr: 0.100000
2022-09-25 19:23:53,551 epoch 38 - iter 45/58 - loss 0.04072513 - samples/sec: 39.55 - lr: 0.100000
2022-09-25 19:23:57,206 epoch 38 - iter 50/58 - loss 0.04191500 - samples/sec: 43.77 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:06<00:00,  3.28it/s]

2022-09-25 19:24:09,719 Evaluating as a multi-label problem: False
2022-09-25 19:24:09,734 DEV : loss 0.02983834035694599 - f1-score (micro avg)  0.9636
2022-09-25 19:24:09,750 BAD EPOCHS (no improvement): 2
2022-09-25 19:24:09,750 ----------------------------------------------------------------------------------------------------





2022-09-25 19:24:13,905 epoch 39 - iter 5/58 - loss 0.04047796 - samples/sec: 38.51 - lr: 0.100000
2022-09-25 19:24:18,420 epoch 39 - iter 10/58 - loss 0.04245917 - samples/sec: 35.44 - lr: 0.100000
2022-09-25 19:24:22,169 epoch 39 - iter 15/58 - loss 0.04074783 - samples/sec: 42.68 - lr: 0.100000
2022-09-25 19:24:26,449 epoch 39 - iter 20/58 - loss 0.04182771 - samples/sec: 37.38 - lr: 0.100000
2022-09-25 19:24:30,511 epoch 39 - iter 25/58 - loss 0.04074946 - samples/sec: 39.39 - lr: 0.100000
2022-09-25 19:24:34,838 epoch 39 - iter 30/58 - loss 0.03979358 - samples/sec: 36.98 - lr: 0.100000
2022-09-25 19:24:38,946 epoch 39 - iter 35/58 - loss 0.03942212 - samples/sec: 38.94 - lr: 0.100000
2022-09-25 19:24:43,226 epoch 39 - iter 40/58 - loss 0.04053022 - samples/sec: 37.38 - lr: 0.100000
2022-09-25 19:24:47,507 epoch 39 - iter 45/58 - loss 0.04055214 - samples/sec: 37.38 - lr: 0.100000
2022-09-25 19:24:51,849 epoch 39 - iter 50/58 - loss 0.04005566 - samples/sec: 36.84 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.43it/s]

2022-09-25 19:25:03,565 Evaluating as a multi-label problem: False
2022-09-25 19:25:03,581 DEV : loss 0.029002243652939796 - f1-score (micro avg)  0.9646
2022-09-25 19:25:03,597 BAD EPOCHS (no improvement): 0
2022-09-25 19:25:03,597 saving best model





2022-09-25 19:25:04,549 ----------------------------------------------------------------------------------------------------
2022-09-25 19:25:08,564 epoch 40 - iter 5/58 - loss 0.04636861 - samples/sec: 40.01 - lr: 0.100000
2022-09-25 19:25:12,469 epoch 40 - iter 10/58 - loss 0.04299121 - samples/sec: 40.97 - lr: 0.100000
2022-09-25 19:25:16,594 epoch 40 - iter 15/58 - loss 0.04306308 - samples/sec: 38.80 - lr: 0.100000
2022-09-25 19:25:20,218 epoch 40 - iter 20/58 - loss 0.04069030 - samples/sec: 44.15 - lr: 0.100000
2022-09-25 19:25:24,029 epoch 40 - iter 25/58 - loss 0.03840567 - samples/sec: 41.98 - lr: 0.100000
2022-09-25 19:25:27,941 epoch 40 - iter 30/58 - loss 0.03857232 - samples/sec: 40.91 - lr: 0.100000
2022-09-25 19:25:32,299 epoch 40 - iter 35/58 - loss 0.03897779 - samples/sec: 36.71 - lr: 0.100000
2022-09-25 19:25:36,814 epoch 40 - iter 40/58 - loss 0.03901695 - samples/sec: 35.44 - lr: 0.100000
2022-09-25 19:25:40,875 epoch 40 - iter 45/58 - loss 0.03843039 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.39it/s]

2022-09-25 19:25:57,309 Evaluating as a multi-label problem: False
2022-09-25 19:25:57,324 DEV : loss 0.029639560729265213 - f1-score (micro avg)  0.9648
2022-09-25 19:25:57,340 BAD EPOCHS (no improvement): 0
2022-09-25 19:25:57,340 saving best model





2022-09-25 19:25:58,480 ----------------------------------------------------------------------------------------------------
2022-09-25 19:26:02,636 epoch 41 - iter 5/58 - loss 0.04323437 - samples/sec: 38.51 - lr: 0.100000
2022-09-25 19:26:06,463 epoch 41 - iter 10/58 - loss 0.03929415 - samples/sec: 41.81 - lr: 0.100000
2022-09-25 19:26:10,852 epoch 41 - iter 15/58 - loss 0.04319883 - samples/sec: 36.45 - lr: 0.100000
2022-09-25 19:26:15,586 epoch 41 - iter 20/58 - loss 0.04176904 - samples/sec: 33.80 - lr: 0.100000
2022-09-25 19:26:19,413 epoch 41 - iter 25/58 - loss 0.03855356 - samples/sec: 41.81 - lr: 0.100000
2022-09-25 19:26:23,224 epoch 41 - iter 30/58 - loss 0.03679490 - samples/sec: 41.99 - lr: 0.100000
2022-09-25 19:26:27,426 epoch 41 - iter 35/58 - loss 0.03758998 - samples/sec: 38.08 - lr: 0.100000
2022-09-25 19:26:31,409 epoch 41 - iter 40/58 - loss 0.03882458 - samples/sec: 40.17 - lr: 0.100000
2022-09-25 19:26:35,502 epoch 41 - iter 45/58 - loss 0.03899097 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.47it/s]

2022-09-25 19:26:51,846 Evaluating as a multi-label problem: False
2022-09-25 19:26:51,862 DEV : loss 0.031048990786075592 - f1-score (micro avg)  0.9621
2022-09-25 19:26:51,877 BAD EPOCHS (no improvement): 1
2022-09-25 19:26:51,877 ----------------------------------------------------------------------------------------------------





2022-09-25 19:26:56,329 epoch 42 - iter 5/58 - loss 0.03435609 - samples/sec: 35.94 - lr: 0.100000
2022-09-25 19:27:00,938 epoch 42 - iter 10/58 - loss 0.03893973 - samples/sec: 34.72 - lr: 0.100000
2022-09-25 19:27:05,062 epoch 42 - iter 15/58 - loss 0.04004248 - samples/sec: 38.80 - lr: 0.100000
2022-09-25 19:27:08,780 epoch 42 - iter 20/58 - loss 0.03816918 - samples/sec: 43.04 - lr: 0.100000
2022-09-25 19:27:12,654 epoch 42 - iter 25/58 - loss 0.03948347 - samples/sec: 41.30 - lr: 0.100000
2022-09-25 19:27:16,825 epoch 42 - iter 30/58 - loss 0.03949007 - samples/sec: 38.51 - lr: 0.100000
2022-09-25 19:27:21,261 epoch 42 - iter 35/58 - loss 0.03851069 - samples/sec: 36.06 - lr: 0.100000
2022-09-25 19:27:25,166 epoch 42 - iter 40/58 - loss 0.03787621 - samples/sec: 40.97 - lr: 0.100000
2022-09-25 19:27:28,884 epoch 42 - iter 45/58 - loss 0.03730470 - samples/sec: 43.04 - lr: 0.100000
2022-09-25 19:27:33,180 epoch 42 - iter 50/58 - loss 0.03663375 - samples/sec: 37.25 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.44it/s]

2022-09-25 19:27:45,724 Evaluating as a multi-label problem: False
2022-09-25 19:27:45,740 DEV : loss 0.03156502544879913 - f1-score (micro avg)  0.9645
2022-09-25 19:27:45,755 BAD EPOCHS (no improvement): 2
2022-09-25 19:27:45,755 ----------------------------------------------------------------------------------------------------





2022-09-25 19:27:50,239 epoch 43 - iter 5/58 - loss 0.03569066 - samples/sec: 35.69 - lr: 0.100000
2022-09-25 19:27:54,003 epoch 43 - iter 10/58 - loss 0.03511866 - samples/sec: 42.50 - lr: 0.100000
2022-09-25 19:27:58,284 epoch 43 - iter 15/58 - loss 0.03483596 - samples/sec: 37.38 - lr: 0.100000
2022-09-25 19:28:02,673 epoch 43 - iter 20/58 - loss 0.03623957 - samples/sec: 36.45 - lr: 0.100000
2022-09-25 19:28:06,172 epoch 43 - iter 25/58 - loss 0.03902715 - samples/sec: 45.73 - lr: 0.100000
2022-09-25 19:28:09,765 epoch 43 - iter 30/58 - loss 0.03835510 - samples/sec: 44.53 - lr: 0.100000
2022-09-25 19:28:13,952 epoch 43 - iter 35/58 - loss 0.03925968 - samples/sec: 38.22 - lr: 0.100000
2022-09-25 19:28:18,404 epoch 43 - iter 40/58 - loss 0.03792383 - samples/sec: 35.94 - lr: 0.100000
2022-09-25 19:28:22,728 epoch 43 - iter 45/58 - loss 0.03748909 - samples/sec: 37.00 - lr: 0.100000
2022-09-25 19:28:26,665 epoch 43 - iter 50/58 - loss 0.03812482 - samples/sec: 40.64 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.44it/s]

2022-09-25 19:28:39,052 Evaluating as a multi-label problem: False
2022-09-25 19:28:39,068 DEV : loss 0.02876821905374527 - f1-score (micro avg)  0.9667
2022-09-25 19:28:39,084 BAD EPOCHS (no improvement): 0
2022-09-25 19:28:39,084 saving best model





2022-09-25 19:28:39,990 ----------------------------------------------------------------------------------------------------
2022-09-25 19:28:44,254 epoch 44 - iter 5/58 - loss 0.03746400 - samples/sec: 37.52 - lr: 0.100000
2022-09-25 19:28:48,284 epoch 44 - iter 10/58 - loss 0.04023154 - samples/sec: 39.85 - lr: 0.100000
2022-09-25 19:28:52,112 epoch 44 - iter 15/58 - loss 0.04041240 - samples/sec: 41.81 - lr: 0.100000
2022-09-25 19:28:56,267 epoch 44 - iter 20/58 - loss 0.04037959 - samples/sec: 38.51 - lr: 0.100000
2022-09-25 19:28:59,876 epoch 44 - iter 25/58 - loss 0.03896815 - samples/sec: 44.34 - lr: 0.100000
2022-09-25 19:29:04,343 epoch 44 - iter 30/58 - loss 0.03933768 - samples/sec: 35.94 - lr: 0.100000
2022-09-25 19:29:08,936 epoch 44 - iter 35/58 - loss 0.03898591 - samples/sec: 34.84 - lr: 0.100000
2022-09-25 19:29:13,138 epoch 44 - iter 40/58 - loss 0.03803538 - samples/sec: 38.08 - lr: 0.100000
2022-09-25 19:29:16,731 epoch 44 - iter 45/58 - loss 0.03760844 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.43it/s]

2022-09-25 19:29:33,055 Evaluating as a multi-label problem: False
2022-09-25 19:29:33,071 DEV : loss 0.029950125142931938 - f1-score (micro avg)  0.9658
2022-09-25 19:29:33,087 BAD EPOCHS (no improvement): 1
2022-09-25 19:29:33,087 ----------------------------------------------------------------------------------------------------





2022-09-25 19:29:37,179 epoch 45 - iter 5/58 - loss 0.03309966 - samples/sec: 39.09 - lr: 0.100000
2022-09-25 19:29:40,913 epoch 45 - iter 10/58 - loss 0.02897454 - samples/sec: 42.86 - lr: 0.100000
2022-09-25 19:29:45,506 epoch 45 - iter 15/58 - loss 0.03352279 - samples/sec: 34.84 - lr: 0.100000
2022-09-25 19:29:49,536 epoch 45 - iter 20/58 - loss 0.03512935 - samples/sec: 39.85 - lr: 0.100000
2022-09-25 19:29:53,754 epoch 45 - iter 25/58 - loss 0.03639351 - samples/sec: 38.08 - lr: 0.100000
2022-09-25 19:29:58,003 epoch 45 - iter 30/58 - loss 0.03518420 - samples/sec: 37.66 - lr: 0.100000
2022-09-25 19:30:02,220 epoch 45 - iter 35/58 - loss 0.03576257 - samples/sec: 37.93 - lr: 0.100000
2022-09-25 19:30:06,157 epoch 45 - iter 40/58 - loss 0.03593326 - samples/sec: 40.64 - lr: 0.100000
2022-09-25 19:30:10,250 epoch 45 - iter 45/58 - loss 0.03549068 - samples/sec: 39.09 - lr: 0.100000
2022-09-25 19:30:14,343 epoch 45 - iter 50/58 - loss 0.03567767 - samples/sec: 39.09 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.44it/s]

2022-09-25 19:30:26,874 Evaluating as a multi-label problem: False
2022-09-25 19:30:26,890 DEV : loss 0.030808083713054657 - f1-score (micro avg)  0.9661
2022-09-25 19:30:26,906 BAD EPOCHS (no improvement): 2
2022-09-25 19:30:26,906 ----------------------------------------------------------------------------------------------------





2022-09-25 19:30:31,342 epoch 46 - iter 5/58 - loss 0.03742639 - samples/sec: 36.06 - lr: 0.100000
2022-09-25 19:30:35,216 epoch 46 - iter 10/58 - loss 0.03346247 - samples/sec: 41.30 - lr: 0.100000
2022-09-25 19:30:39,293 epoch 46 - iter 15/58 - loss 0.03430191 - samples/sec: 39.24 - lr: 0.100000
2022-09-25 19:30:43,074 epoch 46 - iter 20/58 - loss 0.03519224 - samples/sec: 42.32 - lr: 0.100000
2022-09-25 19:30:46,838 epoch 46 - iter 25/58 - loss 0.03640863 - samples/sec: 42.50 - lr: 0.100000
2022-09-25 19:30:50,806 epoch 46 - iter 30/58 - loss 0.03642589 - samples/sec: 40.32 - lr: 0.100000
2022-09-25 19:30:55,129 epoch 46 - iter 35/58 - loss 0.03584480 - samples/sec: 37.01 - lr: 0.100000
2022-09-25 19:30:59,238 epoch 46 - iter 40/58 - loss 0.03594280 - samples/sec: 38.94 - lr: 0.100000
2022-09-25 19:31:03,924 epoch 46 - iter 45/58 - loss 0.03567657 - samples/sec: 34.14 - lr: 0.100000
2022-09-25 19:31:08,267 epoch 46 - iter 50/58 - loss 0.03588496 - samples/sec: 36.84 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.42it/s]

2022-09-25 19:31:20,717 Evaluating as a multi-label problem: False
2022-09-25 19:31:20,733 DEV : loss 0.03281557187438011 - f1-score (micro avg)  0.9644
2022-09-25 19:31:20,748 BAD EPOCHS (no improvement): 3
2022-09-25 19:31:20,748 ----------------------------------------------------------------------------------------------------





2022-09-25 19:31:24,013 epoch 47 - iter 5/58 - loss 0.03356996 - samples/sec: 49.01 - lr: 0.100000
2022-09-25 19:31:28,965 epoch 47 - iter 10/58 - loss 0.03810736 - samples/sec: 32.31 - lr: 0.100000
2022-09-25 19:31:33,433 epoch 47 - iter 15/58 - loss 0.03621782 - samples/sec: 35.81 - lr: 0.100000
2022-09-25 19:31:37,760 epoch 47 - iter 20/58 - loss 0.03352589 - samples/sec: 36.98 - lr: 0.100000
2022-09-25 19:31:41,962 epoch 47 - iter 25/58 - loss 0.03504278 - samples/sec: 38.08 - lr: 0.100000
2022-09-25 19:31:45,477 epoch 47 - iter 30/58 - loss 0.03597336 - samples/sec: 45.52 - lr: 0.100000
2022-09-25 19:31:49,538 epoch 47 - iter 35/58 - loss 0.03559100 - samples/sec: 39.39 - lr: 0.100000
2022-09-25 19:31:53,678 epoch 47 - iter 40/58 - loss 0.03540447 - samples/sec: 38.65 - lr: 0.100000
2022-09-25 19:31:57,646 epoch 47 - iter 45/58 - loss 0.03494840 - samples/sec: 40.32 - lr: 0.100000
2022-09-25 19:32:01,910 epoch 47 - iter 50/58 - loss 0.03477148 - samples/sec: 37.52 - lr: 0.100000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:06<00:00,  3.27it/s]

2022-09-25 19:32:14,423 Evaluating as a multi-label problem: False
2022-09-25 19:32:14,439 DEV : loss 0.030078226700425148 - f1-score (micro avg)  0.9641
2022-09-25 19:32:14,454 Epoch    47: reducing learning rate of group 0 to 5.0000e-02.
2022-09-25 19:32:14,454 BAD EPOCHS (no improvement): 4
2022-09-25 19:32:14,454 ----------------------------------------------------------------------------------------------------





2022-09-25 19:32:19,016 epoch 48 - iter 5/58 - loss 0.03318941 - samples/sec: 35.08 - lr: 0.050000
2022-09-25 19:32:23,233 epoch 48 - iter 10/58 - loss 0.03578156 - samples/sec: 37.94 - lr: 0.050000
2022-09-25 19:32:26,935 epoch 48 - iter 15/58 - loss 0.03346806 - samples/sec: 43.22 - lr: 0.050000
2022-09-25 19:32:31,371 epoch 48 - iter 20/58 - loss 0.03307610 - samples/sec: 36.06 - lr: 0.050000
2022-09-25 19:32:35,652 epoch 48 - iter 25/58 - loss 0.03226163 - samples/sec: 37.38 - lr: 0.050000
2022-09-25 19:32:39,557 epoch 48 - iter 30/58 - loss 0.03233032 - samples/sec: 40.97 - lr: 0.050000
2022-09-25 19:32:44,009 epoch 48 - iter 35/58 - loss 0.03232098 - samples/sec: 35.94 - lr: 0.050000
2022-09-25 19:32:47,805 epoch 48 - iter 40/58 - loss 0.03243934 - samples/sec: 42.32 - lr: 0.050000
2022-09-25 19:32:51,914 epoch 48 - iter 45/58 - loss 0.03224795 - samples/sec: 38.94 - lr: 0.050000
2022-09-25 19:32:56,084 epoch 48 - iter 50/58 - loss 0.03311950 - samples/sec: 38.36 - lr: 0.050000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.43it/s]

2022-09-25 19:33:07,863 Evaluating as a multi-label problem: False
2022-09-25 19:33:07,863 DEV : loss 0.028613414615392685 - f1-score (micro avg)  0.9676
2022-09-25 19:33:07,879 BAD EPOCHS (no improvement): 0
2022-09-25 19:33:07,879 saving best model





2022-09-25 19:33:08,925 ----------------------------------------------------------------------------------------------------
2022-09-25 19:33:13,408 epoch 49 - iter 5/58 - loss 0.03816882 - samples/sec: 35.69 - lr: 0.050000
2022-09-25 19:33:17,611 epoch 49 - iter 10/58 - loss 0.03238807 - samples/sec: 38.08 - lr: 0.050000
2022-09-25 19:33:21,438 epoch 49 - iter 15/58 - loss 0.03294093 - samples/sec: 41.81 - lr: 0.050000
2022-09-25 19:33:25,546 epoch 49 - iter 20/58 - loss 0.03293477 - samples/sec: 38.94 - lr: 0.050000
2022-09-25 19:33:29,889 epoch 49 - iter 25/58 - loss 0.03250333 - samples/sec: 36.98 - lr: 0.050000
2022-09-25 19:33:33,919 epoch 49 - iter 30/58 - loss 0.03201782 - samples/sec: 39.70 - lr: 0.050000
2022-09-25 19:33:37,622 epoch 49 - iter 35/58 - loss 0.03228060 - samples/sec: 43.40 - lr: 0.050000
2022-09-25 19:33:42,292 epoch 49 - iter 40/58 - loss 0.03260691 - samples/sec: 34.26 - lr: 0.050000
2022-09-25 19:33:46,073 epoch 49 - iter 45/58 - loss 0.03251080 - samples/se

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.42it/s]

2022-09-25 19:34:02,600 Evaluating as a multi-label problem: False
2022-09-25 19:34:02,616 DEV : loss 0.028785236179828644 - f1-score (micro avg)  0.9661
2022-09-25 19:34:02,631 BAD EPOCHS (no improvement): 1
2022-09-25 19:34:02,631 ----------------------------------------------------------------------------------------------------





2022-09-25 19:34:06,693 epoch 50 - iter 5/58 - loss 0.03035155 - samples/sec: 39.39 - lr: 0.050000
2022-09-25 19:34:10,933 epoch 50 - iter 10/58 - loss 0.03014845 - samples/sec: 37.74 - lr: 0.050000
2022-09-25 19:34:15,338 epoch 50 - iter 15/58 - loss 0.02978503 - samples/sec: 36.32 - lr: 0.050000
2022-09-25 19:34:19,634 epoch 50 - iter 20/58 - loss 0.02907028 - samples/sec: 37.38 - lr: 0.050000
2022-09-25 19:34:23,505 epoch 50 - iter 25/58 - loss 0.03132269 - samples/sec: 41.33 - lr: 0.050000
2022-09-25 19:34:27,645 epoch 50 - iter 30/58 - loss 0.03034910 - samples/sec: 38.65 - lr: 0.050000
2022-09-25 19:34:32,003 epoch 50 - iter 35/58 - loss 0.02974067 - samples/sec: 36.71 - lr: 0.050000
2022-09-25 19:34:36,439 epoch 50 - iter 40/58 - loss 0.02960116 - samples/sec: 36.06 - lr: 0.050000
2022-09-25 19:34:40,438 epoch 50 - iter 45/58 - loss 0.02988214 - samples/sec: 40.01 - lr: 0.050000
2022-09-25 19:34:44,391 epoch 50 - iter 50/58 - loss 0.02970215 - samples/sec: 40.48 - lr: 0.050000
2

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:06<00:00,  3.27it/s]

2022-09-25 19:34:56,747 Evaluating as a multi-label problem: False
2022-09-25 19:34:56,763 DEV : loss 0.02821298874914646 - f1-score (micro avg)  0.9676
2022-09-25 19:34:56,778 BAD EPOCHS (no improvement): 0





2022-09-25 19:34:57,731 ----------------------------------------------------------------------------------------------------
2022-09-25 19:34:57,731 loading file \content\model\conllpp\best-model.pt
2022-09-25 19:34:58,559 SequenceTagger predicts: Dictionary with 19 tags: O, S-GeoMeth, B-GeoMeth, E-GeoMeth, I-GeoMeth, S-GeoPetro, B-GeoPetro, E-GeoPetro, I-GeoPetro, S-GeoLoc, B-GeoLoc, E-GeoLoc, I-GeoLoc, S-GeoTime, B-GeoTime, E-GeoTime, I-GeoTime, <START>, <STOP>


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:26<00:00,  5.40s/it]

2022-09-25 19:35:25,865 Evaluating as a multi-label problem: False
2022-09-25 19:35:25,881 0.9717	0.9643	0.968	0.938
2022-09-25 19:35:25,881 
Results:
- F-score (micro) 0.968
- F-score (macro) 0.9347
- Accuracy 0.938

By class:
              precision    recall  f1-score   support

     GeoMeth     0.9907    0.9953    0.9930       215
    GeoPetro     0.9517    0.9583    0.9550       144
      GeoLoc     0.8947    0.7083    0.7907        24
     GeoTime     1.0000    1.0000    1.0000         9

   micro avg     0.9717    0.9643    0.9680       392
   macro avg     0.9593    0.9155    0.9347       392
weighted avg     0.9707    0.9643    0.9668       392

2022-09-25 19:35:25,881 ----------------------------------------------------------------------------------------------------





{'test_score': 0.9679897567221512,
 'dev_score_history': [0.6547870733917246,
  0.7717423567824281,
  0.7634004779788324,
  0.8334376956793988,
  0.8448823903369357,
  0.8789293495175848,
  0.898320610687023,
  0.9037267080745343,
  0.8898819023300351,
  0.9275618374558303,
  0.8933460681311683,
  0.9323995127892815,
  0.9348682217509845,
  0.9343110296364192,
  0.945273631840796,
  0.9513064133016627,
  0.9526907155529273,
  0.9535160905840286,
  0.9540846750149075,
  0.9554703627248599,
  0.9577632361689471,
  0.9506355305941472,
  0.9519230769230769,
  0.9542600896860987,
  0.9578134284016637,
  0.9620029455081002,
  0.9611078373600471,
  0.9601887348864642,
  0.9594074074074074,
  0.9630068067475586,
  0.9604754829123329,
  0.9600477042337507,
  0.963464938126105,
  0.9610085019055996,
  0.9578134284016637,
  0.9639401934916446,
  0.9632918886915334,
  0.963641738102276,
  0.9646017699115044,
  0.9648033126293996,
  0.9621145374449339,
  0.9644640234948606,
  0.9666568309235762,
  