# Summarizing (w/ GPT-2)

## Packages

In [1]:
from utils.json_utils import read_json, write_json
from datasets import Dataset
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


## Tokenizing

In [2]:
sentences_w_subjects_tokenized = read_json("9_non_lemmatized_tokenized_sentences_black_clover.json")
sentences_w_subjects_tokenized

[{'subjects': ['Yuno'],
  'tokens': ['a',
   'priest',
   'takes',
   'two',
   'babies',
   'abandoned',
   'outside',
   'his',
   'church',
   'inside',
   'and',
   'discovers',
   'two',
   'babies',
   'abandoned',
   'outside',
   'his',
   'church',
   'names',
   'to',
   'be',
   'Yuno',
   'and',
   'Asta',
   '.']},
 {'subjects': ['Asta'],
  'tokens': ['a',
   'priest',
   'takes',
   'two',
   'babies',
   'abandoned',
   'outside',
   'his',
   'church',
   'inside',
   'and',
   'discovers',
   'two',
   'babies',
   'abandoned',
   'outside',
   'his',
   'church',
   'names',
   'to',
   'be',
   'Yuno',
   'and',
   'Asta',
   '.']},
 {'subjects': ['Lily'],
  'tokens': ['Fifteen',
   'years',
   'later',
   ',',
   'Asta',
   'proposes',
   'to',
   'Sister',
   'Lily',
   ',',
   'who',
   'refuses',
   'repeatedly',
   '.']},
 {'subjects': ['Asta'],
  'tokens': ['Fifteen',
   'years',
   'later',
   ',',
   'Asta',
   'proposes',
   'to',
   'Sister',
   'Lily',
   

In [3]:
sentences_w_subjects_tokenized = [
    {
        "subjects" : sentence["subjects"],
        "tokens"  : " ".join(sentence["tokens"])
    } 
    for sentence in sentences_w_subjects_tokenized
]
sentences_w_subjects_tokenized

[{'subjects': ['Yuno'],
  'tokens': 'a priest takes two babies abandoned outside his church inside and discovers two babies abandoned outside his church names to be Yuno and Asta .'},
 {'subjects': ['Asta'],
  'tokens': 'a priest takes two babies abandoned outside his church inside and discovers two babies abandoned outside his church names to be Yuno and Asta .'},
 {'subjects': ['Lily'],
  'tokens': 'Fifteen years later , Asta proposes to Sister Lily , who refuses repeatedly .'},
 {'subjects': ['Asta'],
  'tokens': 'Fifteen years later , Asta proposes to Sister Lily , who refuses repeatedly .'},
 {'subjects': ['Yuno'],
  'tokens': 'Yuno and the other orphans criticize Asta and point out Yuno lack of magic .'},
 {'subjects': ['Asta'],
  'tokens': 'Yuno and the other orphans criticize Asta and point out Yuno lack of magic .'},
 {'subjects': ['Yuno'],
  'tokens': 'Asta tries to show off Asta skills , but Yuno outshines Asta with Asta magic .'},
 {'subjects': ['Asta'],
  'tokens': 'Asta t

In [4]:
max_length = 0
for sentence in sentences_w_subjects_tokenized:
    if len(sentence["tokens"]) > max_length:
        max_length = len(sentence["tokens"])
max_length

610

In [5]:
varied_set_adjectives = read_json("14_varied_set_adjectives_definitions.json")
#varied_set_adjectives

In [6]:
training_sents = []
for adj in varied_set_adjectives:
    training_sents.append("[MASK] can be described as " + adj + ".")
    training_sents.append("[MASK] can be described as " + adj + ".")
    training_sents.append("[MASK] can be described as " + adj + ".")
    training_sents.append("[MASK] can be described as " + adj + ".")
    training_sents.append("[MASK] can be described as " + adj + ".")

training_sents.extend(list(map(lambda x : x["tokens"], sentences_w_subjects_tokenized)))
#training_sents

## Transformer

In [7]:
from transformers import AutoTokenizer, AutoModelForMaskedLM, DataCollatorForLanguageModeling, Trainer, TrainingArguments
import torch

2023-01-26 21:30:23.579465: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-26 21:30:23.802136: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-01-26 21:30:23.802149: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-01-26 21:30:24.475778: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

In [8]:
model_checkpoint = "bert-base-cased"

In [9]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [10]:
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [11]:
tokenize_fn = lambda doc : tokenizer(
        doc,
        truncation=True,
        max_length=1024
    )

tokenized_dataset = list(map(tokenize_fn, training_sents))

In [12]:
tokenized_dataset = pd.DataFrame(tokenized_dataset)
tokenized_dataset = Dataset.from_pandas(tokenized_dataset)

In [13]:
model = AutoModelForMaskedLM.from_pretrained(
    model_checkpoint, pad_token_id=tokenizer.eos_token_id)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [14]:
device = f"cuda:{torch.cuda.current_device()}" if torch.cuda.is_available() else "cpu"

In [15]:
training_args = TrainingArguments(
    f"{model_checkpoint}-finetuned-adjs-wsent-black-clover",
    num_train_epochs=2,
    per_device_train_batch_size=10,
    per_device_eval_batch_size=10,    
    learning_rate=2e-5,
    weight_decay=0.01,
    do_eval=True, # eval en validation set
    evaluation_strategy="steps", # eval en validation set
    eval_steps=100,
    save_steps=100, # checkpoint model every 500 steps
    logging_dir='./logs', # logging
    logging_strategy="steps",
    logging_steps=1,
    fp16=False, # float16 en training (only on CUDA)
    push_to_hub=False,
)

In [16]:
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_dataset, #.select(range(0, 128)),
    eval_dataset=tokenized_dataset, #.select(range(0, 128)),
)

In [17]:
train_output = trainer.train()

***** Running training *****
  Num examples = 6239
  Num Epochs = 2
  Instantaneous batch size per device = 10
  Total train batch size (w. parallel, distributed & accumulation) = 10
  Gradient Accumulation steps = 1
  Total optimization steps = 1248
  0%|          | 0/1248 [00:00<?, ?it/s]You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  0%|          | 1/1248 [00:01<37:07,  1.79s/it]

{'loss': 1.866, 'learning_rate': 1.998397435897436e-05, 'epoch': 0.0}


  0%|          | 2/1248 [00:03<40:12,  1.94s/it]

{'loss': 1.3938, 'learning_rate': 1.996794871794872e-05, 'epoch': 0.0}


  0%|          | 3/1248 [00:06<46:36,  2.25s/it]

{'loss': 1.0281, 'learning_rate': 1.9951923076923078e-05, 'epoch': 0.0}


  0%|          | 4/1248 [00:08<45:04,  2.17s/it]

{'loss': 0.9594, 'learning_rate': 1.9935897435897437e-05, 'epoch': 0.01}


  0%|          | 5/1248 [00:10<40:37,  1.96s/it]

{'loss': 1.1016, 'learning_rate': 1.9919871794871797e-05, 'epoch': 0.01}


  0%|          | 6/1248 [00:12<40:50,  1.97s/it]

{'loss': 0.9625, 'learning_rate': 1.9903846153846157e-05, 'epoch': 0.01}


  1%|          | 7/1248 [00:14<43:22,  2.10s/it]

{'loss': 0.8328, 'learning_rate': 1.9887820512820513e-05, 'epoch': 0.01}


  1%|          | 8/1248 [00:16<45:25,  2.20s/it]

{'loss': 0.7732, 'learning_rate': 1.9871794871794873e-05, 'epoch': 0.01}


  1%|          | 9/1248 [00:19<46:36,  2.26s/it]

{'loss': 0.58, 'learning_rate': 1.9855769230769233e-05, 'epoch': 0.01}


  1%|          | 10/1248 [00:22<51:25,  2.49s/it]

{'loss': 0.4646, 'learning_rate': 1.9839743589743593e-05, 'epoch': 0.02}


  1%|          | 11/1248 [00:23<46:21,  2.25s/it]

{'loss': 0.659, 'learning_rate': 1.982371794871795e-05, 'epoch': 0.02}


  1%|          | 12/1248 [00:26<47:16,  2.30s/it]

{'loss': 0.4671, 'learning_rate': 1.980769230769231e-05, 'epoch': 0.02}


  1%|          | 13/1248 [00:27<43:08,  2.10s/it]

{'loss': 0.4634, 'learning_rate': 1.979166666666667e-05, 'epoch': 0.02}


  1%|          | 14/1248 [00:29<41:51,  2.04s/it]

{'loss': 0.3761, 'learning_rate': 1.977564102564103e-05, 'epoch': 0.02}


  1%|          | 15/1248 [00:32<45:57,  2.24s/it]

{'loss': 0.3398, 'learning_rate': 1.975961538461539e-05, 'epoch': 0.02}


  1%|▏         | 16/1248 [00:35<47:37,  2.32s/it]

{'loss': 0.1964, 'learning_rate': 1.9743589743589745e-05, 'epoch': 0.03}


  1%|▏         | 17/1248 [00:37<47:44,  2.33s/it]

{'loss': 0.1631, 'learning_rate': 1.9727564102564105e-05, 'epoch': 0.03}


  1%|▏         | 18/1248 [00:39<46:30,  2.27s/it]

{'loss': 0.1107, 'learning_rate': 1.9711538461538465e-05, 'epoch': 0.03}


  2%|▏         | 19/1248 [00:42<49:39,  2.42s/it]

{'loss': 0.0838, 'learning_rate': 1.9695512820512824e-05, 'epoch': 0.03}


  2%|▏         | 20/1248 [00:45<51:29,  2.52s/it]

{'loss': 0.0662, 'learning_rate': 1.967948717948718e-05, 'epoch': 0.03}


  2%|▏         | 21/1248 [00:47<50:26,  2.47s/it]

{'loss': 0.0362, 'learning_rate': 1.966346153846154e-05, 'epoch': 0.03}


  2%|▏         | 22/1248 [00:48<44:32,  2.18s/it]

{'loss': 0.058, 'learning_rate': 1.9647435897435897e-05, 'epoch': 0.04}


  2%|▏         | 23/1248 [00:51<43:56,  2.15s/it]

{'loss': 0.0279, 'learning_rate': 1.9631410256410257e-05, 'epoch': 0.04}


  2%|▏         | 24/1248 [00:53<46:14,  2.27s/it]

{'loss': 0.013, 'learning_rate': 1.9615384615384617e-05, 'epoch': 0.04}


  2%|▏         | 25/1248 [00:55<43:26,  2.13s/it]

{'loss': 0.1138, 'learning_rate': 1.9599358974358976e-05, 'epoch': 0.04}


  2%|▏         | 26/1248 [00:56<39:40,  1.95s/it]

{'loss': 0.0127, 'learning_rate': 1.9583333333333333e-05, 'epoch': 0.04}


  2%|▏         | 27/1248 [01:00<47:59,  2.36s/it]

{'loss': 0.0654, 'learning_rate': 1.9567307692307693e-05, 'epoch': 0.04}


  2%|▏         | 28/1248 [01:02<44:45,  2.20s/it]

{'loss': 0.0517, 'learning_rate': 1.9551282051282052e-05, 'epoch': 0.04}


  2%|▏         | 29/1248 [01:04<46:08,  2.27s/it]

{'loss': 0.0741, 'learning_rate': 1.9535256410256412e-05, 'epoch': 0.05}


  2%|▏         | 30/1248 [01:07<47:32,  2.34s/it]

{'loss': 0.0061, 'learning_rate': 1.9519230769230772e-05, 'epoch': 0.05}


  2%|▏         | 31/1248 [01:08<43:34,  2.15s/it]

{'loss': 0.005, 'learning_rate': 1.950320512820513e-05, 'epoch': 0.05}


  3%|▎         | 32/1248 [01:13<59:03,  2.91s/it]

{'loss': 0.0327, 'learning_rate': 1.9487179487179488e-05, 'epoch': 0.05}


  3%|▎         | 33/1248 [01:15<52:18,  2.58s/it]

{'loss': 0.0431, 'learning_rate': 1.9471153846153848e-05, 'epoch': 0.05}


  3%|▎         | 34/1248 [01:17<48:12,  2.38s/it]

{'loss': 0.0044, 'learning_rate': 1.9455128205128208e-05, 'epoch': 0.05}


  3%|▎         | 35/1248 [01:19<46:58,  2.32s/it]

{'loss': 0.0349, 'learning_rate': 1.9439102564102564e-05, 'epoch': 0.06}


  3%|▎         | 36/1248 [01:21<43:34,  2.16s/it]

{'loss': 0.003, 'learning_rate': 1.9423076923076924e-05, 'epoch': 0.06}


  3%|▎         | 37/1248 [01:23<44:22,  2.20s/it]

{'loss': 0.0043, 'learning_rate': 1.9407051282051284e-05, 'epoch': 0.06}


  3%|▎         | 38/1248 [01:25<41:31,  2.06s/it]

{'loss': 0.0436, 'learning_rate': 1.9391025641025644e-05, 'epoch': 0.06}


  3%|▎         | 39/1248 [01:27<41:48,  2.07s/it]

{'loss': 0.0913, 'learning_rate': 1.9375e-05, 'epoch': 0.06}


  3%|▎         | 40/1248 [01:29<41:12,  2.05s/it]

{'loss': 0.0027, 'learning_rate': 1.935897435897436e-05, 'epoch': 0.06}


  3%|▎         | 41/1248 [01:31<40:04,  1.99s/it]

{'loss': 0.04, 'learning_rate': 1.934294871794872e-05, 'epoch': 0.07}


  3%|▎         | 42/1248 [01:32<38:32,  1.92s/it]

{'loss': 0.0021, 'learning_rate': 1.932692307692308e-05, 'epoch': 0.07}


  3%|▎         | 43/1248 [01:34<39:47,  1.98s/it]

{'loss': 0.004, 'learning_rate': 1.931089743589744e-05, 'epoch': 0.07}


  4%|▎         | 44/1248 [01:37<43:02,  2.14s/it]

{'loss': 0.0264, 'learning_rate': 1.9294871794871796e-05, 'epoch': 0.07}


  4%|▎         | 45/1248 [01:39<44:34,  2.22s/it]

{'loss': 0.0023, 'learning_rate': 1.9278846153846155e-05, 'epoch': 0.07}


  4%|▎         | 46/1248 [01:42<45:13,  2.26s/it]

{'loss': 0.0193, 'learning_rate': 1.9262820512820515e-05, 'epoch': 0.07}


  4%|▍         | 47/1248 [01:44<43:26,  2.17s/it]

{'loss': 0.0053, 'learning_rate': 1.9246794871794875e-05, 'epoch': 0.08}


  4%|▍         | 48/1248 [01:46<45:27,  2.27s/it]

{'loss': 0.0025, 'learning_rate': 1.923076923076923e-05, 'epoch': 0.08}


  4%|▍         | 49/1248 [01:48<44:01,  2.20s/it]

{'loss': 0.0009, 'learning_rate': 1.921474358974359e-05, 'epoch': 0.08}


  4%|▍         | 50/1248 [01:50<40:39,  2.04s/it]

{'loss': 0.08, 'learning_rate': 1.919871794871795e-05, 'epoch': 0.08}


  4%|▍         | 51/1248 [01:52<39:46,  1.99s/it]

{'loss': 0.0026, 'learning_rate': 1.918269230769231e-05, 'epoch': 0.08}


  4%|▍         | 52/1248 [01:54<38:24,  1.93s/it]

{'loss': 0.0034, 'learning_rate': 1.916666666666667e-05, 'epoch': 0.08}


  4%|▍         | 53/1248 [01:56<39:18,  1.97s/it]

{'loss': 0.0017, 'learning_rate': 1.9150641025641027e-05, 'epoch': 0.08}


  4%|▍         | 54/1248 [01:58<40:13,  2.02s/it]

{'loss': 0.0012, 'learning_rate': 1.9134615384615387e-05, 'epoch': 0.09}


  4%|▍         | 55/1248 [02:02<51:58,  2.61s/it]

{'loss': 0.0336, 'learning_rate': 1.9118589743589747e-05, 'epoch': 0.09}


  4%|▍         | 56/1248 [02:03<45:56,  2.31s/it]

{'loss': 0.0627, 'learning_rate': 1.9102564102564106e-05, 'epoch': 0.09}


  5%|▍         | 57/1248 [02:05<43:21,  2.18s/it]

{'loss': 0.0734, 'learning_rate': 1.9086538461538463e-05, 'epoch': 0.09}


  5%|▍         | 58/1248 [02:09<49:41,  2.51s/it]

{'loss': 0.0005, 'learning_rate': 1.9070512820512823e-05, 'epoch': 0.09}


  5%|▍         | 59/1248 [02:10<44:58,  2.27s/it]

{'loss': 0.0672, 'learning_rate': 1.905448717948718e-05, 'epoch': 0.09}


  5%|▍         | 60/1248 [02:15<59:52,  3.02s/it]

{'loss': 0.0203, 'learning_rate': 1.903846153846154e-05, 'epoch': 0.1}


  5%|▍         | 61/1248 [02:18<59:01,  2.98s/it]

{'loss': 0.0019, 'learning_rate': 1.90224358974359e-05, 'epoch': 0.1}


  5%|▍         | 62/1248 [02:21<56:59,  2.88s/it]

{'loss': 0.0011, 'learning_rate': 1.900641025641026e-05, 'epoch': 0.1}


  5%|▌         | 63/1248 [02:23<51:39,  2.62s/it]

{'loss': 0.05, 'learning_rate': 1.8990384615384615e-05, 'epoch': 0.1}


  5%|▌         | 64/1248 [02:24<46:26,  2.35s/it]

{'loss': 0.003, 'learning_rate': 1.8974358974358975e-05, 'epoch': 0.1}


  5%|▌         | 65/1248 [02:27<46:53,  2.38s/it]

{'loss': 0.0013, 'learning_rate': 1.8958333333333334e-05, 'epoch': 0.1}


  5%|▌         | 66/1248 [02:29<47:51,  2.43s/it]

{'loss': 0.0186, 'learning_rate': 1.8942307692307694e-05, 'epoch': 0.11}


  5%|▌         | 67/1248 [02:31<44:49,  2.28s/it]

{'loss': 0.0255, 'learning_rate': 1.892628205128205e-05, 'epoch': 0.11}


  5%|▌         | 68/1248 [02:33<39:21,  2.00s/it]

{'loss': 0.0006, 'learning_rate': 1.891025641025641e-05, 'epoch': 0.11}


  6%|▌         | 69/1248 [02:37<52:48,  2.69s/it]

{'loss': 0.0006, 'learning_rate': 1.889423076923077e-05, 'epoch': 0.11}


  6%|▌         | 70/1248 [02:40<53:08,  2.71s/it]

{'loss': 0.002, 'learning_rate': 1.887820512820513e-05, 'epoch': 0.11}


  6%|▌         | 71/1248 [02:41<48:04,  2.45s/it]

{'loss': 0.0017, 'learning_rate': 1.886217948717949e-05, 'epoch': 0.11}


  6%|▌         | 72/1248 [02:44<51:31,  2.63s/it]

{'loss': 0.0009, 'learning_rate': 1.8846153846153846e-05, 'epoch': 0.12}


  6%|▌         | 73/1248 [02:46<47:16,  2.41s/it]

{'loss': 0.0025, 'learning_rate': 1.8830128205128206e-05, 'epoch': 0.12}


  6%|▌         | 74/1248 [02:48<45:01,  2.30s/it]

{'loss': 0.0537, 'learning_rate': 1.8814102564102566e-05, 'epoch': 0.12}


  6%|▌         | 75/1248 [02:51<46:24,  2.37s/it]

{'loss': 0.0017, 'learning_rate': 1.8798076923076926e-05, 'epoch': 0.12}


  6%|▌         | 76/1248 [02:53<43:23,  2.22s/it]

{'loss': 0.0266, 'learning_rate': 1.8782051282051282e-05, 'epoch': 0.12}


  6%|▌         | 77/1248 [02:55<40:24,  2.07s/it]

{'loss': 0.002, 'learning_rate': 1.8766025641025642e-05, 'epoch': 0.12}


  6%|▋         | 78/1248 [02:56<39:19,  2.02s/it]

{'loss': 0.0537, 'learning_rate': 1.8750000000000002e-05, 'epoch': 0.12}


  6%|▋         | 79/1248 [02:59<42:44,  2.19s/it]

{'loss': 0.0254, 'learning_rate': 1.873397435897436e-05, 'epoch': 0.13}


  6%|▋         | 80/1248 [03:01<39:45,  2.04s/it]

{'loss': 0.0011, 'learning_rate': 1.8717948717948718e-05, 'epoch': 0.13}


  6%|▋         | 81/1248 [03:03<42:59,  2.21s/it]

{'loss': 0.001, 'learning_rate': 1.8701923076923078e-05, 'epoch': 0.13}


  7%|▋         | 82/1248 [03:05<40:58,  2.11s/it]

{'loss': 0.0012, 'learning_rate': 1.8685897435897438e-05, 'epoch': 0.13}


  7%|▋         | 83/1248 [03:08<43:45,  2.25s/it]

{'loss': 0.0008, 'learning_rate': 1.8669871794871797e-05, 'epoch': 0.13}


  7%|▋         | 84/1248 [03:12<55:28,  2.86s/it]

{'loss': 0.0209, 'learning_rate': 1.8653846153846157e-05, 'epoch': 0.13}


  7%|▋         | 85/1248 [03:14<52:28,  2.71s/it]

{'loss': 0.0008, 'learning_rate': 1.8637820512820514e-05, 'epoch': 0.14}


  7%|▋         | 86/1248 [03:17<50:37,  2.61s/it]

{'loss': 0.023, 'learning_rate': 1.8621794871794873e-05, 'epoch': 0.14}


  7%|▋         | 87/1248 [03:18<43:54,  2.27s/it]

{'loss': 0.0266, 'learning_rate': 1.8605769230769233e-05, 'epoch': 0.14}


  7%|▋         | 88/1248 [03:23<56:09,  2.90s/it]

{'loss': 0.0006, 'learning_rate': 1.8589743589743593e-05, 'epoch': 0.14}


  7%|▋         | 89/1248 [03:24<48:27,  2.51s/it]

{'loss': 0.0009, 'learning_rate': 1.857371794871795e-05, 'epoch': 0.14}


  7%|▋         | 90/1248 [03:27<48:08,  2.49s/it]

{'loss': 0.0011, 'learning_rate': 1.855769230769231e-05, 'epoch': 0.14}


  7%|▋         | 91/1248 [03:29<44:43,  2.32s/it]

{'loss': 0.005, 'learning_rate': 1.854166666666667e-05, 'epoch': 0.15}


  7%|▋         | 92/1248 [03:31<43:21,  2.25s/it]

{'loss': 0.0005, 'learning_rate': 1.852564102564103e-05, 'epoch': 0.15}


  7%|▋         | 93/1248 [03:33<42:42,  2.22s/it]

{'loss': 0.0014, 'learning_rate': 1.850961538461539e-05, 'epoch': 0.15}


  8%|▊         | 94/1248 [03:35<41:27,  2.16s/it]

{'loss': 0.0006, 'learning_rate': 1.8493589743589745e-05, 'epoch': 0.15}


  8%|▊         | 95/1248 [03:37<43:43,  2.27s/it]

{'loss': 0.0016, 'learning_rate': 1.8477564102564105e-05, 'epoch': 0.15}


  8%|▊         | 96/1248 [03:42<57:06,  2.97s/it]

{'loss': 0.0003, 'learning_rate': 1.8461538461538465e-05, 'epoch': 0.15}


  8%|▊         | 97/1248 [03:44<50:55,  2.65s/it]

{'loss': 0.0189, 'learning_rate': 1.8445512820512824e-05, 'epoch': 0.16}


  8%|▊         | 98/1248 [03:46<47:19,  2.47s/it]

{'loss': 0.0004, 'learning_rate': 1.842948717948718e-05, 'epoch': 0.16}


  8%|▊         | 99/1248 [03:48<46:38,  2.44s/it]

{'loss': 0.0007, 'learning_rate': 1.841346153846154e-05, 'epoch': 0.16}


  8%|▊         | 100/1248 [03:50<43:32,  2.28s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.0143, 'learning_rate': 1.8397435897435897e-05, 'epoch': 0.16}


                                                  
  8%|▊         | 100/1248 [08:29<43:32,  2.28s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-100
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-100/config.json


{'eval_loss': 0.015754861757159233, 'eval_runtime': 278.4739, 'eval_samples_per_second': 22.404, 'eval_steps_per_second': 2.241, 'epoch': 0.16}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-100/pytorch_model.bin
  8%|▊         | 101/1248 [08:32<27:25:53, 86.10s/it]

{'loss': 0.007, 'learning_rate': 1.8381410256410257e-05, 'epoch': 0.16}


  8%|▊         | 102/1248 [08:34<19:23:19, 60.91s/it]

{'loss': 0.0324, 'learning_rate': 1.8365384615384617e-05, 'epoch': 0.16}


  8%|▊         | 103/1248 [08:35<13:41:31, 43.05s/it]

{'loss': 0.018, 'learning_rate': 1.8349358974358976e-05, 'epoch': 0.17}


  8%|▊         | 104/1248 [08:38<9:51:26, 31.02s/it] 

{'loss': 0.0107, 'learning_rate': 1.8333333333333333e-05, 'epoch': 0.17}


  8%|▊         | 105/1248 [08:41<7:07:45, 22.45s/it]

{'loss': 0.0006, 'learning_rate': 1.8317307692307693e-05, 'epoch': 0.17}


  8%|▊         | 106/1248 [08:43<5:10:30, 16.31s/it]

{'loss': 0.0126, 'learning_rate': 1.8301282051282052e-05, 'epoch': 0.17}


  9%|▊         | 107/1248 [08:45<3:47:57, 11.99s/it]

{'loss': 0.0006, 'learning_rate': 1.8285256410256412e-05, 'epoch': 0.17}


  9%|▊         | 108/1248 [08:47<2:51:31,  9.03s/it]

{'loss': 0.0123, 'learning_rate': 1.826923076923077e-05, 'epoch': 0.17}


  9%|▊         | 109/1248 [08:48<2:08:20,  6.76s/it]

{'loss': 0.0126, 'learning_rate': 1.825320512820513e-05, 'epoch': 0.17}


  9%|▉         | 110/1248 [08:51<1:42:04,  5.38s/it]

{'loss': 0.0096, 'learning_rate': 1.8237179487179488e-05, 'epoch': 0.18}


  9%|▉         | 111/1248 [08:52<1:22:25,  4.35s/it]

{'loss': 0.0181, 'learning_rate': 1.8221153846153848e-05, 'epoch': 0.18}


  9%|▉         | 112/1248 [08:55<1:11:06,  3.76s/it]

{'loss': 0.0137, 'learning_rate': 1.8205128205128208e-05, 'epoch': 0.18}


  9%|▉         | 113/1248 [08:56<59:06,  3.12s/it]  

{'loss': 0.001, 'learning_rate': 1.8189102564102564e-05, 'epoch': 0.18}


  9%|▉         | 114/1248 [08:58<51:38,  2.73s/it]

{'loss': 0.0027, 'learning_rate': 1.8173076923076924e-05, 'epoch': 0.18}


  9%|▉         | 115/1248 [09:00<47:47,  2.53s/it]

{'loss': 0.0008, 'learning_rate': 1.8157051282051284e-05, 'epoch': 0.18}


  9%|▉         | 116/1248 [09:03<48:10,  2.55s/it]

{'loss': 0.0086, 'learning_rate': 1.8141025641025644e-05, 'epoch': 0.19}


  9%|▉         | 117/1248 [09:05<46:11,  2.45s/it]

{'loss': 0.0006, 'learning_rate': 1.8125e-05, 'epoch': 0.19}


  9%|▉         | 118/1248 [09:07<44:23,  2.36s/it]

{'loss': 0.0007, 'learning_rate': 1.810897435897436e-05, 'epoch': 0.19}


 10%|▉         | 119/1248 [09:10<45:59,  2.44s/it]

{'loss': 0.0088, 'learning_rate': 1.809294871794872e-05, 'epoch': 0.19}


 10%|▉         | 120/1248 [09:12<42:58,  2.29s/it]

{'loss': 0.001, 'learning_rate': 1.807692307692308e-05, 'epoch': 0.19}


 10%|▉         | 121/1248 [09:14<40:27,  2.15s/it]

{'loss': 0.0006, 'learning_rate': 1.8060897435897436e-05, 'epoch': 0.19}


 10%|▉         | 122/1248 [09:15<37:53,  2.02s/it]

{'loss': 0.001, 'learning_rate': 1.8044871794871796e-05, 'epoch': 0.2}


 10%|▉         | 123/1248 [09:17<36:00,  1.92s/it]

{'loss': 0.0008, 'learning_rate': 1.8028846153846156e-05, 'epoch': 0.2}


 10%|▉         | 124/1248 [09:19<38:09,  2.04s/it]

{'loss': 0.0081, 'learning_rate': 1.8012820512820515e-05, 'epoch': 0.2}


 10%|█         | 125/1248 [09:22<43:24,  2.32s/it]

{'loss': 0.0011, 'learning_rate': 1.7996794871794875e-05, 'epoch': 0.2}


 10%|█         | 126/1248 [09:25<42:34,  2.28s/it]

{'loss': 0.0004, 'learning_rate': 1.798076923076923e-05, 'epoch': 0.2}


 10%|█         | 127/1248 [09:27<40:47,  2.18s/it]

{'loss': 0.0012, 'learning_rate': 1.796474358974359e-05, 'epoch': 0.2}


 10%|█         | 128/1248 [09:28<38:23,  2.06s/it]

{'loss': 0.0063, 'learning_rate': 1.794871794871795e-05, 'epoch': 0.21}


 10%|█         | 129/1248 [09:30<37:37,  2.02s/it]

{'loss': 0.0009, 'learning_rate': 1.793269230769231e-05, 'epoch': 0.21}


 10%|█         | 130/1248 [09:33<40:32,  2.18s/it]

{'loss': 0.0195, 'learning_rate': 1.7916666666666667e-05, 'epoch': 0.21}


 10%|█         | 131/1248 [09:35<42:09,  2.26s/it]

{'loss': 0.0004, 'learning_rate': 1.7900641025641027e-05, 'epoch': 0.21}


 11%|█         | 132/1248 [09:38<46:54,  2.52s/it]

{'loss': 0.0041, 'learning_rate': 1.7884615384615387e-05, 'epoch': 0.21}


 11%|█         | 133/1248 [09:41<46:34,  2.51s/it]

{'loss': 0.0046, 'learning_rate': 1.7868589743589747e-05, 'epoch': 0.21}


 11%|█         | 134/1248 [09:43<46:02,  2.48s/it]

{'loss': 0.0038, 'learning_rate': 1.7852564102564107e-05, 'epoch': 0.21}


 11%|█         | 135/1248 [09:46<46:33,  2.51s/it]

{'loss': 0.0128, 'learning_rate': 1.7836538461538463e-05, 'epoch': 0.22}


 11%|█         | 136/1248 [09:49<49:49,  2.69s/it]

{'loss': 0.0035, 'learning_rate': 1.7820512820512823e-05, 'epoch': 0.22}


 11%|█         | 137/1248 [09:51<43:46,  2.36s/it]

{'loss': 0.0059, 'learning_rate': 1.780448717948718e-05, 'epoch': 0.22}


 11%|█         | 138/1248 [09:52<41:27,  2.24s/it]

{'loss': 0.0027, 'learning_rate': 1.778846153846154e-05, 'epoch': 0.22}


 11%|█         | 139/1248 [09:54<39:06,  2.12s/it]

{'loss': 0.0019, 'learning_rate': 1.77724358974359e-05, 'epoch': 0.22}


 11%|█         | 140/1248 [09:56<37:54,  2.05s/it]

{'loss': 0.0036, 'learning_rate': 1.775641025641026e-05, 'epoch': 0.22}


 11%|█▏        | 141/1248 [09:58<35:46,  1.94s/it]

{'loss': 0.0008, 'learning_rate': 1.7740384615384615e-05, 'epoch': 0.23}


 11%|█▏        | 142/1248 [10:00<35:52,  1.95s/it]

{'loss': 0.0009, 'learning_rate': 1.7724358974358975e-05, 'epoch': 0.23}


 11%|█▏        | 143/1248 [10:02<34:29,  1.87s/it]

{'loss': 0.0011, 'learning_rate': 1.7708333333333335e-05, 'epoch': 0.23}


 12%|█▏        | 144/1248 [10:03<34:06,  1.85s/it]

{'loss': 0.0007, 'learning_rate': 1.7692307692307694e-05, 'epoch': 0.23}


 12%|█▏        | 145/1248 [10:06<38:30,  2.10s/it]

{'loss': 0.0006, 'learning_rate': 1.767628205128205e-05, 'epoch': 0.23}


 12%|█▏        | 146/1248 [10:08<35:54,  1.96s/it]

{'loss': 0.0072, 'learning_rate': 1.766025641025641e-05, 'epoch': 0.23}


 12%|█▏        | 147/1248 [10:10<39:37,  2.16s/it]

{'loss': 0.0044, 'learning_rate': 1.764423076923077e-05, 'epoch': 0.24}


 12%|█▏        | 148/1248 [10:12<37:08,  2.03s/it]

{'loss': 0.0051, 'learning_rate': 1.762820512820513e-05, 'epoch': 0.24}


 12%|█▏        | 149/1248 [10:14<34:26,  1.88s/it]

{'loss': 0.0007, 'learning_rate': 1.7612179487179487e-05, 'epoch': 0.24}


 12%|█▏        | 150/1248 [10:16<39:04,  2.14s/it]

{'loss': 0.0023, 'learning_rate': 1.7596153846153846e-05, 'epoch': 0.24}


 12%|█▏        | 151/1248 [10:19<39:48,  2.18s/it]

{'loss': 0.0006, 'learning_rate': 1.7580128205128206e-05, 'epoch': 0.24}


 12%|█▏        | 152/1248 [10:20<37:11,  2.04s/it]

{'loss': 0.0019, 'learning_rate': 1.7564102564102566e-05, 'epoch': 0.24}


 12%|█▏        | 153/1248 [10:22<36:29,  2.00s/it]

{'loss': 0.0041, 'learning_rate': 1.7548076923076926e-05, 'epoch': 0.25}


 12%|█▏        | 154/1248 [10:24<34:56,  1.92s/it]

{'loss': 0.0004, 'learning_rate': 1.7532051282051282e-05, 'epoch': 0.25}


 12%|█▏        | 155/1248 [10:27<39:05,  2.15s/it]

{'loss': 0.0004, 'learning_rate': 1.7516025641025642e-05, 'epoch': 0.25}


 12%|█▎        | 156/1248 [10:29<39:31,  2.17s/it]

{'loss': 0.0022, 'learning_rate': 1.7500000000000002e-05, 'epoch': 0.25}


 13%|█▎        | 157/1248 [10:31<40:08,  2.21s/it]

{'loss': 0.0008, 'learning_rate': 1.748397435897436e-05, 'epoch': 0.25}


 13%|█▎        | 158/1248 [10:33<37:35,  2.07s/it]

{'loss': 0.0021, 'learning_rate': 1.7467948717948718e-05, 'epoch': 0.25}


 13%|█▎        | 159/1248 [10:35<37:48,  2.08s/it]

{'loss': 0.0004, 'learning_rate': 1.7451923076923078e-05, 'epoch': 0.25}


 13%|█▎        | 160/1248 [10:37<37:11,  2.05s/it]

{'loss': 0.0017, 'learning_rate': 1.7435897435897438e-05, 'epoch': 0.26}


 13%|█▎        | 161/1248 [10:38<34:22,  1.90s/it]

{'loss': 0.0037, 'learning_rate': 1.7419871794871797e-05, 'epoch': 0.26}


 13%|█▎        | 162/1248 [10:40<34:15,  1.89s/it]

{'loss': 0.0005, 'learning_rate': 1.7403846153846157e-05, 'epoch': 0.26}


 13%|█▎        | 163/1248 [10:43<38:48,  2.15s/it]

{'loss': 0.0016, 'learning_rate': 1.7387820512820514e-05, 'epoch': 0.26}


 13%|█▎        | 164/1248 [10:45<35:50,  1.98s/it]

{'loss': 0.0002, 'learning_rate': 1.7371794871794873e-05, 'epoch': 0.26}


 13%|█▎        | 165/1248 [10:47<36:51,  2.04s/it]

{'loss': 0.0004, 'learning_rate': 1.7355769230769233e-05, 'epoch': 0.26}


 13%|█▎        | 166/1248 [10:49<38:13,  2.12s/it]

{'loss': 0.0013, 'learning_rate': 1.7339743589743593e-05, 'epoch': 0.27}


 13%|█▎        | 167/1248 [10:51<38:53,  2.16s/it]

{'loss': 0.0011, 'learning_rate': 1.732371794871795e-05, 'epoch': 0.27}


 13%|█▎        | 168/1248 [10:54<40:57,  2.28s/it]

{'loss': 0.0004, 'learning_rate': 1.730769230769231e-05, 'epoch': 0.27}


 14%|█▎        | 169/1248 [10:56<39:57,  2.22s/it]

{'loss': 0.0024, 'learning_rate': 1.729166666666667e-05, 'epoch': 0.27}


 14%|█▎        | 170/1248 [10:58<38:56,  2.17s/it]

{'loss': 0.0004, 'learning_rate': 1.727564102564103e-05, 'epoch': 0.27}


 14%|█▎        | 171/1248 [11:00<40:02,  2.23s/it]

{'loss': 0.0003, 'learning_rate': 1.7259615384615385e-05, 'epoch': 0.27}


 14%|█▍        | 172/1248 [11:03<40:26,  2.26s/it]

{'loss': 0.0004, 'learning_rate': 1.7243589743589745e-05, 'epoch': 0.28}


 14%|█▍        | 173/1248 [11:05<38:22,  2.14s/it]

{'loss': 0.0003, 'learning_rate': 1.7227564102564105e-05, 'epoch': 0.28}


 14%|█▍        | 174/1248 [11:06<35:03,  1.96s/it]

{'loss': 0.0002, 'learning_rate': 1.7211538461538465e-05, 'epoch': 0.28}


 14%|█▍        | 175/1248 [11:09<38:29,  2.15s/it]

{'loss': 0.0562, 'learning_rate': 1.7195512820512825e-05, 'epoch': 0.28}


 14%|█▍        | 176/1248 [11:11<38:15,  2.14s/it]

{'loss': 0.0033, 'learning_rate': 1.717948717948718e-05, 'epoch': 0.28}


 14%|█▍        | 177/1248 [11:13<38:24,  2.15s/it]

{'loss': 0.0004, 'learning_rate': 1.7163461538461537e-05, 'epoch': 0.28}


 14%|█▍        | 178/1248 [11:15<38:13,  2.14s/it]

{'loss': 0.0003, 'learning_rate': 1.7147435897435897e-05, 'epoch': 0.29}


 14%|█▍        | 179/1248 [11:17<35:52,  2.01s/it]

{'loss': 0.0046, 'learning_rate': 1.7131410256410257e-05, 'epoch': 0.29}


 14%|█▍        | 180/1248 [11:19<35:57,  2.02s/it]

{'loss': 0.0005, 'learning_rate': 1.7115384615384617e-05, 'epoch': 0.29}


 15%|█▍        | 181/1248 [11:20<33:12,  1.87s/it]

{'loss': 0.0004, 'learning_rate': 1.7099358974358977e-05, 'epoch': 0.29}


 15%|█▍        | 182/1248 [11:25<48:43,  2.74s/it]

{'loss': 0.0003, 'learning_rate': 1.7083333333333333e-05, 'epoch': 0.29}


 15%|█▍        | 183/1248 [11:27<43:33,  2.45s/it]

{'loss': 0.0003, 'learning_rate': 1.7067307692307693e-05, 'epoch': 0.29}


 15%|█▍        | 184/1248 [11:30<45:10,  2.55s/it]

{'loss': 0.0003, 'learning_rate': 1.7051282051282053e-05, 'epoch': 0.29}


 15%|█▍        | 185/1248 [11:32<41:32,  2.35s/it]

{'loss': 0.0018, 'learning_rate': 1.7035256410256412e-05, 'epoch': 0.3}


 15%|█▍        | 186/1248 [11:34<40:46,  2.30s/it]

{'loss': 0.0013, 'learning_rate': 1.701923076923077e-05, 'epoch': 0.3}


 15%|█▍        | 187/1248 [11:35<35:52,  2.03s/it]

{'loss': 0.0003, 'learning_rate': 1.700320512820513e-05, 'epoch': 0.3}


 15%|█▌        | 188/1248 [11:37<36:26,  2.06s/it]

{'loss': 0.0012, 'learning_rate': 1.698717948717949e-05, 'epoch': 0.3}


 15%|█▌        | 189/1248 [11:39<35:52,  2.03s/it]

{'loss': 0.0016, 'learning_rate': 1.6971153846153848e-05, 'epoch': 0.3}


 15%|█▌        | 190/1248 [11:41<34:16,  1.94s/it]

{'loss': 0.0011, 'learning_rate': 1.6955128205128205e-05, 'epoch': 0.3}


 15%|█▌        | 191/1248 [11:43<35:09,  2.00s/it]

{'loss': 0.0004, 'learning_rate': 1.6939102564102564e-05, 'epoch': 0.31}


 15%|█▌        | 192/1248 [11:45<33:15,  1.89s/it]

{'loss': 0.0005, 'learning_rate': 1.6923076923076924e-05, 'epoch': 0.31}


 15%|█▌        | 193/1248 [11:47<36:06,  2.05s/it]

{'loss': 0.0006, 'learning_rate': 1.6907051282051284e-05, 'epoch': 0.31}


 16%|█▌        | 194/1248 [11:50<38:58,  2.22s/it]

{'loss': 0.0004, 'learning_rate': 1.6891025641025644e-05, 'epoch': 0.31}


 16%|█▌        | 195/1248 [11:52<38:08,  2.17s/it]

{'loss': 0.0005, 'learning_rate': 1.6875e-05, 'epoch': 0.31}


 16%|█▌        | 196/1248 [11:55<40:02,  2.28s/it]

{'loss': 0.0006, 'learning_rate': 1.685897435897436e-05, 'epoch': 0.31}


 16%|█▌        | 197/1248 [11:57<40:36,  2.32s/it]

{'loss': 0.0004, 'learning_rate': 1.684294871794872e-05, 'epoch': 0.32}


 16%|█▌        | 198/1248 [11:59<37:25,  2.14s/it]

{'loss': 0.0008, 'learning_rate': 1.682692307692308e-05, 'epoch': 0.32}


 16%|█▌        | 199/1248 [12:00<34:33,  1.98s/it]

{'loss': 0.0023, 'learning_rate': 1.6810897435897436e-05, 'epoch': 0.32}


 16%|█▌        | 200/1248 [12:03<36:04,  2.07s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.001, 'learning_rate': 1.6794871794871796e-05, 'epoch': 0.32}


                                                  
 16%|█▌        | 200/1248 [16:42<36:04,  2.07s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-200
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-200/config.json


{'eval_loss': 0.00055781623814255, 'eval_runtime': 279.8127, 'eval_samples_per_second': 22.297, 'eval_steps_per_second': 2.23, 'epoch': 0.32}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-200/pytorch_model.bin
 16%|█▌        | 201/1248 [16:48<25:19:00, 87.05s/it]

{'loss': 0.0004, 'learning_rate': 1.6778846153846156e-05, 'epoch': 0.32}


 16%|█▌        | 202/1248 [16:50<17:50:53, 61.43s/it]

{'loss': 0.0004, 'learning_rate': 1.6762820512820515e-05, 'epoch': 0.32}


 16%|█▋        | 203/1248 [16:51<12:37:37, 43.50s/it]

{'loss': 0.0004, 'learning_rate': 1.6746794871794875e-05, 'epoch': 0.33}


 16%|█▋        | 204/1248 [16:56<9:14:24, 31.86s/it] 

{'loss': 0.0008, 'learning_rate': 1.673076923076923e-05, 'epoch': 0.33}


 16%|█▋        | 205/1248 [16:58<6:38:32, 22.93s/it]

{'loss': 0.0016, 'learning_rate': 1.671474358974359e-05, 'epoch': 0.33}


 17%|█▋        | 206/1248 [17:00<4:50:14, 16.71s/it]

{'loss': 0.0008, 'learning_rate': 1.669871794871795e-05, 'epoch': 0.33}


 17%|█▋        | 207/1248 [17:02<3:33:59, 12.33s/it]

{'loss': 0.0003, 'learning_rate': 1.668269230769231e-05, 'epoch': 0.33}


 17%|█▋        | 208/1248 [17:04<2:38:30,  9.14s/it]

{'loss': 0.001, 'learning_rate': 1.6666666666666667e-05, 'epoch': 0.33}


 17%|█▋        | 209/1248 [17:06<2:02:46,  7.09s/it]

{'loss': 0.0008, 'learning_rate': 1.6650641025641027e-05, 'epoch': 0.33}


 17%|█▋        | 210/1248 [17:08<1:35:33,  5.52s/it]

{'loss': 0.0004, 'learning_rate': 1.6634615384615387e-05, 'epoch': 0.34}


 17%|█▋        | 211/1248 [17:11<1:21:02,  4.69s/it]

{'loss': 0.0008, 'learning_rate': 1.6618589743589747e-05, 'epoch': 0.34}


 17%|█▋        | 212/1248 [17:13<1:05:31,  3.80s/it]

{'loss': 0.0004, 'learning_rate': 1.6602564102564103e-05, 'epoch': 0.34}


 17%|█▋        | 213/1248 [17:15<59:03,  3.42s/it]  

{'loss': 0.0008, 'learning_rate': 1.6586538461538463e-05, 'epoch': 0.34}


 17%|█▋        | 214/1248 [17:18<53:33,  3.11s/it]

{'loss': 0.0009, 'learning_rate': 1.6570512820512823e-05, 'epoch': 0.34}


 17%|█▋        | 215/1248 [17:19<47:32,  2.76s/it]

{'loss': 0.0004, 'learning_rate': 1.655448717948718e-05, 'epoch': 0.34}


 17%|█▋        | 216/1248 [17:22<44:27,  2.58s/it]

{'loss': 0.0002, 'learning_rate': 1.653846153846154e-05, 'epoch': 0.35}


 17%|█▋        | 217/1248 [17:24<42:46,  2.49s/it]

{'loss': 0.0012, 'learning_rate': 1.65224358974359e-05, 'epoch': 0.35}


 17%|█▋        | 218/1248 [17:26<42:40,  2.49s/it]

{'loss': 0.0006, 'learning_rate': 1.6506410256410255e-05, 'epoch': 0.35}


 18%|█▊        | 219/1248 [17:28<38:59,  2.27s/it]

{'loss': 0.0041, 'learning_rate': 1.6490384615384615e-05, 'epoch': 0.35}


 18%|█▊        | 220/1248 [17:30<38:31,  2.25s/it]

{'loss': 0.0003, 'learning_rate': 1.6474358974358975e-05, 'epoch': 0.35}


 18%|█▊        | 221/1248 [17:32<37:01,  2.16s/it]

{'loss': 0.0002, 'learning_rate': 1.6458333333333335e-05, 'epoch': 0.35}


 18%|█▊        | 222/1248 [17:34<35:06,  2.05s/it]

{'loss': 0.0006, 'learning_rate': 1.6442307692307695e-05, 'epoch': 0.36}


 18%|█▊        | 223/1248 [17:36<35:32,  2.08s/it]

{'loss': 0.0003, 'learning_rate': 1.642628205128205e-05, 'epoch': 0.36}


 18%|█▊        | 224/1248 [17:38<33:57,  1.99s/it]

{'loss': 0.0009, 'learning_rate': 1.641025641025641e-05, 'epoch': 0.36}


 18%|█▊        | 225/1248 [17:40<34:03,  2.00s/it]

{'loss': 0.0015, 'learning_rate': 1.639423076923077e-05, 'epoch': 0.36}


 18%|█▊        | 226/1248 [17:42<33:34,  1.97s/it]

{'loss': 0.0009, 'learning_rate': 1.637820512820513e-05, 'epoch': 0.36}


 18%|█▊        | 227/1248 [17:44<32:58,  1.94s/it]

{'loss': 0.0007, 'learning_rate': 1.6362179487179487e-05, 'epoch': 0.36}


 18%|█▊        | 228/1248 [17:46<31:29,  1.85s/it]

{'loss': 0.0005, 'learning_rate': 1.6346153846153847e-05, 'epoch': 0.37}


 18%|█▊        | 229/1248 [17:47<30:37,  1.80s/it]

{'loss': 0.0019, 'learning_rate': 1.6330128205128206e-05, 'epoch': 0.37}


 18%|█▊        | 230/1248 [17:50<33:42,  1.99s/it]

{'loss': 0.0005, 'learning_rate': 1.6314102564102566e-05, 'epoch': 0.37}


 19%|█▊        | 231/1248 [17:52<34:16,  2.02s/it]

{'loss': 0.0252, 'learning_rate': 1.6298076923076923e-05, 'epoch': 0.37}


 19%|█▊        | 232/1248 [17:54<33:26,  1.97s/it]

{'loss': 0.0005, 'learning_rate': 1.6282051282051282e-05, 'epoch': 0.37}


 19%|█▊        | 233/1248 [17:56<36:16,  2.14s/it]

{'loss': 0.0012, 'learning_rate': 1.6266025641025642e-05, 'epoch': 0.37}


 19%|█▉        | 234/1248 [17:58<37:13,  2.20s/it]

{'loss': 0.0013, 'learning_rate': 1.6250000000000002e-05, 'epoch': 0.38}


 19%|█▉        | 235/1248 [18:01<39:26,  2.34s/it]

{'loss': 0.001, 'learning_rate': 1.6233974358974362e-05, 'epoch': 0.38}


 19%|█▉        | 236/1248 [18:03<37:59,  2.25s/it]

{'loss': 0.0004, 'learning_rate': 1.6217948717948718e-05, 'epoch': 0.38}


 19%|█▉        | 237/1248 [18:05<37:39,  2.23s/it]

{'loss': 0.0004, 'learning_rate': 1.6201923076923078e-05, 'epoch': 0.38}


 19%|█▉        | 238/1248 [18:07<36:55,  2.19s/it]

{'loss': 0.0022, 'learning_rate': 1.6185897435897438e-05, 'epoch': 0.38}


 19%|█▉        | 239/1248 [18:10<38:17,  2.28s/it]

{'loss': 0.001, 'learning_rate': 1.6169871794871798e-05, 'epoch': 0.38}


 19%|█▉        | 240/1248 [18:12<35:19,  2.10s/it]

{'loss': 0.0003, 'learning_rate': 1.6153846153846154e-05, 'epoch': 0.38}


 19%|█▉        | 241/1248 [18:14<34:20,  2.05s/it]

{'loss': 0.0004, 'learning_rate': 1.6137820512820514e-05, 'epoch': 0.39}


 19%|█▉        | 242/1248 [18:16<34:35,  2.06s/it]

{'loss': 0.0004, 'learning_rate': 1.6121794871794874e-05, 'epoch': 0.39}


 19%|█▉        | 243/1248 [18:18<34:08,  2.04s/it]

{'loss': 0.0011, 'learning_rate': 1.6105769230769233e-05, 'epoch': 0.39}


 20%|█▉        | 244/1248 [18:19<30:06,  1.80s/it]

{'loss': 0.0005, 'learning_rate': 1.6089743589743593e-05, 'epoch': 0.39}


 20%|█▉        | 245/1248 [18:22<36:02,  2.16s/it]

{'loss': 0.0004, 'learning_rate': 1.607371794871795e-05, 'epoch': 0.39}


 20%|█▉        | 246/1248 [18:27<49:20,  2.95s/it]

{'loss': 0.0002, 'learning_rate': 1.605769230769231e-05, 'epoch': 0.39}


 20%|█▉        | 247/1248 [18:29<44:57,  2.70s/it]

{'loss': 0.0009, 'learning_rate': 1.604166666666667e-05, 'epoch': 0.4}


 20%|█▉        | 248/1248 [18:31<44:42,  2.68s/it]

{'loss': 0.0006, 'learning_rate': 1.602564102564103e-05, 'epoch': 0.4}


 20%|█▉        | 249/1248 [18:34<44:34,  2.68s/it]

{'loss': 0.0002, 'learning_rate': 1.6009615384615385e-05, 'epoch': 0.4}


 20%|██        | 250/1248 [18:36<41:30,  2.50s/it]

{'loss': 0.0003, 'learning_rate': 1.5993589743589745e-05, 'epoch': 0.4}


 20%|██        | 251/1248 [18:38<39:33,  2.38s/it]

{'loss': 0.0018, 'learning_rate': 1.5977564102564105e-05, 'epoch': 0.4}


 20%|██        | 252/1248 [18:40<38:12,  2.30s/it]

{'loss': 0.0002, 'learning_rate': 1.5961538461538465e-05, 'epoch': 0.4}


 20%|██        | 253/1248 [18:42<35:49,  2.16s/it]

{'loss': 0.0002, 'learning_rate': 1.594551282051282e-05, 'epoch': 0.41}


 20%|██        | 254/1248 [18:44<35:21,  2.13s/it]

{'loss': 0.0004, 'learning_rate': 1.592948717948718e-05, 'epoch': 0.41}


 20%|██        | 255/1248 [18:46<35:28,  2.14s/it]

{'loss': 0.0038, 'learning_rate': 1.5913461538461537e-05, 'epoch': 0.41}


 21%|██        | 256/1248 [18:49<37:02,  2.24s/it]

{'loss': 0.0002, 'learning_rate': 1.5897435897435897e-05, 'epoch': 0.41}


 21%|██        | 257/1248 [18:52<40:28,  2.45s/it]

{'loss': 0.0003, 'learning_rate': 1.5881410256410257e-05, 'epoch': 0.41}


 21%|██        | 258/1248 [18:55<43:05,  2.61s/it]

{'loss': 0.0002, 'learning_rate': 1.5865384615384617e-05, 'epoch': 0.41}


 21%|██        | 259/1248 [18:57<40:49,  2.48s/it]

{'loss': 0.002, 'learning_rate': 1.5849358974358973e-05, 'epoch': 0.42}


 21%|██        | 260/1248 [18:59<37:22,  2.27s/it]

{'loss': 0.0021, 'learning_rate': 1.5833333333333333e-05, 'epoch': 0.42}


 21%|██        | 261/1248 [19:02<41:55,  2.55s/it]

{'loss': 0.0002, 'learning_rate': 1.5817307692307693e-05, 'epoch': 0.42}


 21%|██        | 262/1248 [19:04<41:40,  2.54s/it]

{'loss': 0.0006, 'learning_rate': 1.5801282051282053e-05, 'epoch': 0.42}


 21%|██        | 263/1248 [19:07<40:56,  2.49s/it]

{'loss': 0.0016, 'learning_rate': 1.5785256410256412e-05, 'epoch': 0.42}


 21%|██        | 264/1248 [19:10<44:19,  2.70s/it]

{'loss': 0.0012, 'learning_rate': 1.576923076923077e-05, 'epoch': 0.42}


 21%|██        | 265/1248 [19:12<42:34,  2.60s/it]

{'loss': 0.0003, 'learning_rate': 1.575320512820513e-05, 'epoch': 0.42}


 21%|██▏       | 266/1248 [19:14<38:23,  2.35s/it]

{'loss': 0.0008, 'learning_rate': 1.573717948717949e-05, 'epoch': 0.43}


 21%|██▏       | 267/1248 [19:17<39:12,  2.40s/it]

{'loss': 0.0003, 'learning_rate': 1.5721153846153848e-05, 'epoch': 0.43}


 21%|██▏       | 268/1248 [19:19<40:23,  2.47s/it]

{'loss': 0.0005, 'learning_rate': 1.5705128205128205e-05, 'epoch': 0.43}


 22%|██▏       | 269/1248 [19:21<37:34,  2.30s/it]

{'loss': 0.0003, 'learning_rate': 1.5689102564102565e-05, 'epoch': 0.43}


 22%|██▏       | 270/1248 [19:24<40:47,  2.50s/it]

{'loss': 0.0004, 'learning_rate': 1.5673076923076924e-05, 'epoch': 0.43}


 22%|██▏       | 271/1248 [19:26<37:42,  2.32s/it]

{'loss': 0.0002, 'learning_rate': 1.5657051282051284e-05, 'epoch': 0.43}


 22%|██▏       | 272/1248 [19:28<36:44,  2.26s/it]

{'loss': 0.0003, 'learning_rate': 1.5641025641025644e-05, 'epoch': 0.44}


 22%|██▏       | 273/1248 [19:30<36:14,  2.23s/it]

{'loss': 0.0006, 'learning_rate': 1.5625e-05, 'epoch': 0.44}


 22%|██▏       | 274/1248 [19:33<36:17,  2.24s/it]

{'loss': 0.001, 'learning_rate': 1.560897435897436e-05, 'epoch': 0.44}


 22%|██▏       | 275/1248 [19:35<34:27,  2.12s/it]

{'loss': 0.0003, 'learning_rate': 1.559294871794872e-05, 'epoch': 0.44}


 22%|██▏       | 276/1248 [19:36<33:25,  2.06s/it]

{'loss': 0.0005, 'learning_rate': 1.557692307692308e-05, 'epoch': 0.44}


 22%|██▏       | 277/1248 [19:38<32:33,  2.01s/it]

{'loss': 0.0007, 'learning_rate': 1.5560897435897436e-05, 'epoch': 0.44}


 22%|██▏       | 278/1248 [19:40<32:46,  2.03s/it]

{'loss': 0.0004, 'learning_rate': 1.5544871794871796e-05, 'epoch': 0.45}


 22%|██▏       | 279/1248 [19:43<34:09,  2.12s/it]

{'loss': 0.0002, 'learning_rate': 1.5528846153846156e-05, 'epoch': 0.45}


 22%|██▏       | 280/1248 [19:45<34:38,  2.15s/it]

{'loss': 0.0002, 'learning_rate': 1.5512820512820516e-05, 'epoch': 0.45}


 23%|██▎       | 281/1248 [19:47<32:19,  2.01s/it]

{'loss': 0.0009, 'learning_rate': 1.5496794871794872e-05, 'epoch': 0.45}


 23%|██▎       | 282/1248 [19:49<35:07,  2.18s/it]

{'loss': 0.0005, 'learning_rate': 1.5480769230769232e-05, 'epoch': 0.45}


 23%|██▎       | 283/1248 [19:52<38:47,  2.41s/it]

{'loss': 0.0005, 'learning_rate': 1.546474358974359e-05, 'epoch': 0.45}


 23%|██▎       | 284/1248 [19:54<37:37,  2.34s/it]

{'loss': 0.0009, 'learning_rate': 1.544871794871795e-05, 'epoch': 0.46}


 23%|██▎       | 285/1248 [19:56<36:38,  2.28s/it]

{'loss': 0.0003, 'learning_rate': 1.543269230769231e-05, 'epoch': 0.46}


 23%|██▎       | 286/1248 [19:58<32:32,  2.03s/it]

{'loss': 0.0003, 'learning_rate': 1.5416666666666668e-05, 'epoch': 0.46}


 23%|██▎       | 287/1248 [20:00<31:58,  2.00s/it]

{'loss': 0.0002, 'learning_rate': 1.5400641025641027e-05, 'epoch': 0.46}


 23%|██▎       | 288/1248 [20:02<31:59,  2.00s/it]

{'loss': 0.0007, 'learning_rate': 1.5384615384615387e-05, 'epoch': 0.46}


 23%|██▎       | 289/1248 [20:03<29:49,  1.87s/it]

{'loss': 0.0003, 'learning_rate': 1.5368589743589747e-05, 'epoch': 0.46}


 23%|██▎       | 290/1248 [20:06<31:43,  1.99s/it]

{'loss': 0.0003, 'learning_rate': 1.5352564102564103e-05, 'epoch': 0.46}


 23%|██▎       | 291/1248 [20:07<30:57,  1.94s/it]

{'loss': 0.0002, 'learning_rate': 1.5336538461538463e-05, 'epoch': 0.47}


 23%|██▎       | 292/1248 [20:09<30:29,  1.91s/it]

{'loss': 0.0002, 'learning_rate': 1.5320512820512823e-05, 'epoch': 0.47}


 23%|██▎       | 293/1248 [20:11<30:50,  1.94s/it]

{'loss': 0.0021, 'learning_rate': 1.530448717948718e-05, 'epoch': 0.47}


 24%|██▎       | 294/1248 [20:13<29:37,  1.86s/it]

{'loss': 0.0003, 'learning_rate': 1.528846153846154e-05, 'epoch': 0.47}


 24%|██▎       | 295/1248 [20:15<30:02,  1.89s/it]

{'loss': 0.0002, 'learning_rate': 1.52724358974359e-05, 'epoch': 0.47}


 24%|██▎       | 296/1248 [20:17<29:15,  1.84s/it]

{'loss': 0.0006, 'learning_rate': 1.5256410256410257e-05, 'epoch': 0.47}


 24%|██▍       | 297/1248 [20:20<34:39,  2.19s/it]

{'loss': 0.0002, 'learning_rate': 1.5240384615384615e-05, 'epoch': 0.48}


 24%|██▍       | 298/1248 [20:22<34:00,  2.15s/it]

{'loss': 0.0006, 'learning_rate': 1.5224358974358975e-05, 'epoch': 0.48}


 24%|██▍       | 299/1248 [20:24<32:58,  2.09s/it]

{'loss': 0.0004, 'learning_rate': 1.5208333333333333e-05, 'epoch': 0.48}


 24%|██▍       | 300/1248 [20:25<31:12,  1.98s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.0002, 'learning_rate': 1.5192307692307693e-05, 'epoch': 0.48}


                                                  
 24%|██▍       | 300/1248 [25:08<31:12,  1.98s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-300
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-300/config.json


{'eval_loss': 0.00022969194105826318, 'eval_runtime': 282.9169, 'eval_samples_per_second': 22.052, 'eval_steps_per_second': 2.206, 'epoch': 0.48}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-300/pytorch_model.bin
 24%|██▍       | 301/1248 [25:11<22:54:03, 87.06s/it]

{'loss': 0.0004, 'learning_rate': 1.5176282051282053e-05, 'epoch': 0.48}


 24%|██▍       | 302/1248 [25:13<16:10:38, 61.56s/it]

{'loss': 0.0002, 'learning_rate': 1.516025641025641e-05, 'epoch': 0.48}


 24%|██▍       | 303/1248 [25:15<11:29:34, 43.78s/it]

{'loss': 0.0002, 'learning_rate': 1.514423076923077e-05, 'epoch': 0.49}


 24%|██▍       | 304/1248 [25:18<8:15:29, 31.49s/it] 

{'loss': 0.001, 'learning_rate': 1.5128205128205129e-05, 'epoch': 0.49}


 24%|██▍       | 305/1248 [25:19<5:52:15, 22.41s/it]

{'loss': 0.0002, 'learning_rate': 1.5112179487179489e-05, 'epoch': 0.49}


 25%|██▍       | 306/1248 [25:21<4:15:26, 16.27s/it]

{'loss': 0.0025, 'learning_rate': 1.5096153846153847e-05, 'epoch': 0.49}


 25%|██▍       | 307/1248 [25:23<3:07:17, 11.94s/it]

{'loss': 0.0009, 'learning_rate': 1.5080128205128206e-05, 'epoch': 0.49}


 25%|██▍       | 308/1248 [25:26<2:22:48,  9.12s/it]

{'loss': 0.0002, 'learning_rate': 1.5064102564102565e-05, 'epoch': 0.49}


 25%|██▍       | 309/1248 [25:28<1:48:47,  6.95s/it]

{'loss': 0.0002, 'learning_rate': 1.5048076923076924e-05, 'epoch': 0.5}


 25%|██▍       | 310/1248 [25:30<1:27:52,  5.62s/it]

{'loss': 0.0001, 'learning_rate': 1.5032051282051282e-05, 'epoch': 0.5}


 25%|██▍       | 311/1248 [25:32<1:10:20,  4.50s/it]

{'loss': 0.0002, 'learning_rate': 1.5016025641025642e-05, 'epoch': 0.5}


 25%|██▌       | 312/1248 [25:34<1:00:04,  3.85s/it]

{'loss': 0.0006, 'learning_rate': 1.5000000000000002e-05, 'epoch': 0.5}


 25%|██▌       | 313/1248 [25:36<48:35,  3.12s/it]  

{'loss': 0.0003, 'learning_rate': 1.498397435897436e-05, 'epoch': 0.5}


 25%|██▌       | 314/1248 [25:38<45:00,  2.89s/it]

{'loss': 0.0005, 'learning_rate': 1.496794871794872e-05, 'epoch': 0.5}


 25%|██▌       | 315/1248 [25:40<41:25,  2.66s/it]

{'loss': 0.0001, 'learning_rate': 1.4951923076923078e-05, 'epoch': 0.5}


 25%|██▌       | 316/1248 [25:42<38:46,  2.50s/it]

{'loss': 0.0005, 'learning_rate': 1.4935897435897438e-05, 'epoch': 0.51}


 25%|██▌       | 317/1248 [25:45<37:12,  2.40s/it]

{'loss': 0.0002, 'learning_rate': 1.4919871794871796e-05, 'epoch': 0.51}


 25%|██▌       | 318/1248 [25:47<37:43,  2.43s/it]

{'loss': 0.0002, 'learning_rate': 1.4903846153846156e-05, 'epoch': 0.51}


 26%|██▌       | 319/1248 [25:50<40:39,  2.63s/it]

{'loss': 0.0114, 'learning_rate': 1.4887820512820514e-05, 'epoch': 0.51}


 26%|██▌       | 320/1248 [25:52<38:32,  2.49s/it]

{'loss': 0.0015, 'learning_rate': 1.4871794871794874e-05, 'epoch': 0.51}


 26%|██▌       | 321/1248 [25:54<36:17,  2.35s/it]

{'loss': 0.0002, 'learning_rate': 1.4855769230769232e-05, 'epoch': 0.51}


 26%|██▌       | 322/1248 [25:56<33:56,  2.20s/it]

{'loss': 0.0007, 'learning_rate': 1.4839743589743592e-05, 'epoch': 0.52}


 26%|██▌       | 323/1248 [25:59<38:48,  2.52s/it]

{'loss': 0.0005, 'learning_rate': 1.482371794871795e-05, 'epoch': 0.52}


 26%|██▌       | 324/1248 [26:02<40:20,  2.62s/it]

{'loss': 0.0003, 'learning_rate': 1.480769230769231e-05, 'epoch': 0.52}


 26%|██▌       | 325/1248 [26:04<35:52,  2.33s/it]

{'loss': 0.0005, 'learning_rate': 1.479166666666667e-05, 'epoch': 0.52}


 26%|██▌       | 326/1248 [26:06<32:55,  2.14s/it]

{'loss': 0.0007, 'learning_rate': 1.4775641025641027e-05, 'epoch': 0.52}


 26%|██▌       | 327/1248 [26:08<31:37,  2.06s/it]

{'loss': 0.0003, 'learning_rate': 1.4759615384615387e-05, 'epoch': 0.52}


 26%|██▋       | 328/1248 [26:09<28:46,  1.88s/it]

{'loss': 0.0002, 'learning_rate': 1.4743589743589745e-05, 'epoch': 0.53}


 26%|██▋       | 329/1248 [26:11<29:35,  1.93s/it]

{'loss': 0.0003, 'learning_rate': 1.4727564102564105e-05, 'epoch': 0.53}


 26%|██▋       | 330/1248 [26:13<29:22,  1.92s/it]

{'loss': 0.0004, 'learning_rate': 1.4711538461538463e-05, 'epoch': 0.53}


 27%|██▋       | 331/1248 [26:15<28:23,  1.86s/it]

{'loss': 0.0008, 'learning_rate': 1.4695512820512823e-05, 'epoch': 0.53}


 27%|██▋       | 332/1248 [26:17<29:08,  1.91s/it]

{'loss': 0.0015, 'learning_rate': 1.467948717948718e-05, 'epoch': 0.53}


 27%|██▋       | 333/1248 [26:18<28:41,  1.88s/it]

{'loss': 0.0006, 'learning_rate': 1.466346153846154e-05, 'epoch': 0.53}


 27%|██▋       | 334/1248 [26:20<28:46,  1.89s/it]

{'loss': 0.0002, 'learning_rate': 1.4647435897435897e-05, 'epoch': 0.54}


 27%|██▋       | 335/1248 [26:22<28:14,  1.86s/it]

{'loss': 0.0001, 'learning_rate': 1.4631410256410257e-05, 'epoch': 0.54}


 27%|██▋       | 336/1248 [26:25<33:16,  2.19s/it]

{'loss': 0.0006, 'learning_rate': 1.4615384615384615e-05, 'epoch': 0.54}


 27%|██▋       | 337/1248 [26:27<31:15,  2.06s/it]

{'loss': 0.0006, 'learning_rate': 1.4599358974358975e-05, 'epoch': 0.54}


 27%|██▋       | 338/1248 [26:29<30:29,  2.01s/it]

{'loss': 0.0002, 'learning_rate': 1.4583333333333333e-05, 'epoch': 0.54}


 27%|██▋       | 339/1248 [26:31<30:45,  2.03s/it]

{'loss': 0.0004, 'learning_rate': 1.4567307692307693e-05, 'epoch': 0.54}


 27%|██▋       | 340/1248 [26:33<31:27,  2.08s/it]

{'loss': 0.0007, 'learning_rate': 1.4551282051282051e-05, 'epoch': 0.54}


 27%|██▋       | 341/1248 [26:35<31:19,  2.07s/it]

{'loss': 0.0007, 'learning_rate': 1.4535256410256411e-05, 'epoch': 0.55}


 27%|██▋       | 342/1248 [26:37<29:33,  1.96s/it]

{'loss': 0.0008, 'learning_rate': 1.451923076923077e-05, 'epoch': 0.55}


 27%|██▋       | 343/1248 [26:39<30:33,  2.03s/it]

{'loss': 0.0001, 'learning_rate': 1.4503205128205129e-05, 'epoch': 0.55}


 28%|██▊       | 344/1248 [26:41<32:01,  2.13s/it]

{'loss': 0.0008, 'learning_rate': 1.4487179487179489e-05, 'epoch': 0.55}


 28%|██▊       | 345/1248 [26:44<32:55,  2.19s/it]

{'loss': 0.0001, 'learning_rate': 1.4471153846153847e-05, 'epoch': 0.55}


 28%|██▊       | 346/1248 [26:46<35:03,  2.33s/it]

{'loss': 0.0007, 'learning_rate': 1.4455128205128207e-05, 'epoch': 0.55}


 28%|██▊       | 347/1248 [26:49<34:28,  2.30s/it]

{'loss': 0.0003, 'learning_rate': 1.4439102564102565e-05, 'epoch': 0.56}


 28%|██▊       | 348/1248 [26:52<37:36,  2.51s/it]

{'loss': 0.0003, 'learning_rate': 1.4423076923076924e-05, 'epoch': 0.56}


 28%|██▊       | 349/1248 [26:53<34:17,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 1.4407051282051283e-05, 'epoch': 0.56}


 28%|██▊       | 350/1248 [26:55<30:07,  2.01s/it]

{'loss': 0.0002, 'learning_rate': 1.4391025641025642e-05, 'epoch': 0.56}


 28%|██▊       | 351/1248 [26:57<33:06,  2.22s/it]

{'loss': 0.0001, 'learning_rate': 1.4375e-05, 'epoch': 0.56}


 28%|██▊       | 352/1248 [26:59<31:34,  2.11s/it]

{'loss': 0.0001, 'learning_rate': 1.435897435897436e-05, 'epoch': 0.56}


 28%|██▊       | 353/1248 [27:02<33:39,  2.26s/it]

{'loss': 0.0001, 'learning_rate': 1.434294871794872e-05, 'epoch': 0.57}


 28%|██▊       | 354/1248 [27:04<31:18,  2.10s/it]

{'loss': 0.0005, 'learning_rate': 1.4326923076923078e-05, 'epoch': 0.57}


 28%|██▊       | 355/1248 [27:06<30:27,  2.05s/it]

{'loss': 0.0001, 'learning_rate': 1.4310897435897438e-05, 'epoch': 0.57}


 29%|██▊       | 356/1248 [27:08<30:31,  2.05s/it]

{'loss': 0.0003, 'learning_rate': 1.4294871794871796e-05, 'epoch': 0.57}


 29%|██▊       | 357/1248 [27:09<28:23,  1.91s/it]

{'loss': 0.0002, 'learning_rate': 1.4278846153846156e-05, 'epoch': 0.57}


 29%|██▊       | 358/1248 [27:11<29:11,  1.97s/it]

{'loss': 0.0003, 'learning_rate': 1.4262820512820514e-05, 'epoch': 0.57}


 29%|██▉       | 359/1248 [27:13<30:04,  2.03s/it]

{'loss': 0.0003, 'learning_rate': 1.4246794871794874e-05, 'epoch': 0.58}


 29%|██▉       | 360/1248 [27:15<28:41,  1.94s/it]

{'loss': 0.0006, 'learning_rate': 1.4230769230769232e-05, 'epoch': 0.58}


 29%|██▉       | 361/1248 [27:17<26:04,  1.76s/it]

{'loss': 0.0008, 'learning_rate': 1.4214743589743592e-05, 'epoch': 0.58}


 29%|██▉       | 362/1248 [27:19<27:33,  1.87s/it]

{'loss': 0.0002, 'learning_rate': 1.419871794871795e-05, 'epoch': 0.58}


 29%|██▉       | 363/1248 [27:20<25:37,  1.74s/it]

{'loss': 0.0011, 'learning_rate': 1.418269230769231e-05, 'epoch': 0.58}


 29%|██▉       | 364/1248 [27:23<29:30,  2.00s/it]

{'loss': 0.0002, 'learning_rate': 1.416666666666667e-05, 'epoch': 0.58}


 29%|██▉       | 365/1248 [27:25<28:58,  1.97s/it]

{'loss': 0.0002, 'learning_rate': 1.4150641025641027e-05, 'epoch': 0.58}


 29%|██▉       | 366/1248 [27:26<27:37,  1.88s/it]

{'loss': 0.0012, 'learning_rate': 1.4134615384615387e-05, 'epoch': 0.59}


 29%|██▉       | 367/1248 [27:29<32:58,  2.25s/it]

{'loss': 0.0001, 'learning_rate': 1.4118589743589745e-05, 'epoch': 0.59}


 29%|██▉       | 368/1248 [27:31<32:21,  2.21s/it]

{'loss': 0.0012, 'learning_rate': 1.4102564102564105e-05, 'epoch': 0.59}


 30%|██▉       | 369/1248 [27:33<30:33,  2.09s/it]

{'loss': 0.0004, 'learning_rate': 1.4086538461538463e-05, 'epoch': 0.59}


 30%|██▉       | 370/1248 [27:35<29:44,  2.03s/it]

{'loss': 0.0003, 'learning_rate': 1.4070512820512823e-05, 'epoch': 0.59}


 30%|██▉       | 371/1248 [27:38<35:17,  2.41s/it]

{'loss': 0.0005, 'learning_rate': 1.405448717948718e-05, 'epoch': 0.59}


 30%|██▉       | 372/1248 [27:41<33:41,  2.31s/it]

{'loss': 0.0004, 'learning_rate': 1.403846153846154e-05, 'epoch': 0.6}


 30%|██▉       | 373/1248 [27:42<30:31,  2.09s/it]

{'loss': 0.0004, 'learning_rate': 1.4022435897435897e-05, 'epoch': 0.6}


 30%|██▉       | 374/1248 [27:44<29:27,  2.02s/it]

{'loss': 0.0003, 'learning_rate': 1.4006410256410257e-05, 'epoch': 0.6}


 30%|███       | 375/1248 [27:46<29:32,  2.03s/it]

{'loss': 0.0001, 'learning_rate': 1.3990384615384615e-05, 'epoch': 0.6}


 30%|███       | 376/1248 [27:48<28:01,  1.93s/it]

{'loss': 0.0002, 'learning_rate': 1.3974358974358975e-05, 'epoch': 0.6}


 30%|███       | 377/1248 [27:50<27:52,  1.92s/it]

{'loss': 0.0006, 'learning_rate': 1.3958333333333333e-05, 'epoch': 0.6}


 30%|███       | 378/1248 [27:52<30:01,  2.07s/it]

{'loss': 0.0006, 'learning_rate': 1.3942307692307693e-05, 'epoch': 0.61}


 30%|███       | 379/1248 [27:57<42:09,  2.91s/it]

{'loss': 0.0002, 'learning_rate': 1.3926282051282051e-05, 'epoch': 0.61}


 30%|███       | 380/1248 [28:02<51:32,  3.56s/it]

{'loss': 0.0003, 'learning_rate': 1.3910256410256411e-05, 'epoch': 0.61}


 31%|███       | 381/1248 [28:04<44:42,  3.09s/it]

{'loss': 0.0004, 'learning_rate': 1.3894230769230769e-05, 'epoch': 0.61}


 31%|███       | 382/1248 [28:06<38:57,  2.70s/it]

{'loss': 0.0001, 'learning_rate': 1.3878205128205129e-05, 'epoch': 0.61}


 31%|███       | 383/1248 [28:08<35:02,  2.43s/it]

{'loss': 0.0002, 'learning_rate': 1.3862179487179489e-05, 'epoch': 0.61}


 31%|███       | 384/1248 [28:10<33:56,  2.36s/it]

{'loss': 0.6167, 'learning_rate': 1.3846153846153847e-05, 'epoch': 0.62}


 31%|███       | 385/1248 [28:13<35:53,  2.49s/it]

{'loss': 0.0001, 'learning_rate': 1.3830128205128207e-05, 'epoch': 0.62}


 31%|███       | 386/1248 [28:15<34:01,  2.37s/it]

{'loss': 0.0001, 'learning_rate': 1.3814102564102565e-05, 'epoch': 0.62}


 31%|███       | 387/1248 [28:17<32:02,  2.23s/it]

{'loss': 0.0001, 'learning_rate': 1.3798076923076924e-05, 'epoch': 0.62}


 31%|███       | 388/1248 [28:18<29:09,  2.03s/it]

{'loss': 0.0002, 'learning_rate': 1.3782051282051283e-05, 'epoch': 0.62}


 31%|███       | 389/1248 [28:21<34:39,  2.42s/it]

{'loss': 0.0001, 'learning_rate': 1.3766025641025642e-05, 'epoch': 0.62}


 31%|███▏      | 390/1248 [28:25<37:43,  2.64s/it]

{'loss': 0.0001, 'learning_rate': 1.375e-05, 'epoch': 0.62}


 31%|███▏      | 391/1248 [28:27<37:19,  2.61s/it]

{'loss': 0.0001, 'learning_rate': 1.373397435897436e-05, 'epoch': 0.63}


 31%|███▏      | 392/1248 [28:32<45:16,  3.17s/it]

{'loss': 0.0004, 'learning_rate': 1.3717948717948718e-05, 'epoch': 0.63}


 31%|███▏      | 393/1248 [28:33<37:08,  2.61s/it]

{'loss': 0.0002, 'learning_rate': 1.3701923076923078e-05, 'epoch': 0.63}


 32%|███▏      | 394/1248 [28:36<38:36,  2.71s/it]

{'loss': 0.0003, 'learning_rate': 1.3685897435897438e-05, 'epoch': 0.63}


 32%|███▏      | 395/1248 [28:38<35:55,  2.53s/it]

{'loss': 0.0004, 'learning_rate': 1.3669871794871796e-05, 'epoch': 0.63}


 32%|███▏      | 396/1248 [28:41<37:21,  2.63s/it]

{'loss': 0.0002, 'learning_rate': 1.3653846153846156e-05, 'epoch': 0.63}


 32%|███▏      | 397/1248 [28:43<33:48,  2.38s/it]

{'loss': 0.0004, 'learning_rate': 1.3637820512820514e-05, 'epoch': 0.64}


 32%|███▏      | 398/1248 [28:45<32:58,  2.33s/it]

{'loss': 0.0002, 'learning_rate': 1.3621794871794874e-05, 'epoch': 0.64}


 32%|███▏      | 399/1248 [28:47<30:16,  2.14s/it]

{'loss': 0.0001, 'learning_rate': 1.3605769230769232e-05, 'epoch': 0.64}


 32%|███▏      | 400/1248 [28:49<29:55,  2.12s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.0001, 'learning_rate': 1.3589743589743592e-05, 'epoch': 0.64}


                                                  
 32%|███▏      | 400/1248 [33:29<29:55,  2.12s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-400
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-400/config.json


{'eval_loss': 0.00012838821567129344, 'eval_runtime': 280.4582, 'eval_samples_per_second': 22.246, 'eval_steps_per_second': 2.225, 'epoch': 0.64}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-400/pytorch_model.bin
 32%|███▏      | 401/1248 [33:32<20:20:32, 86.46s/it]

{'loss': 0.0001, 'learning_rate': 1.357371794871795e-05, 'epoch': 0.64}


 32%|███▏      | 402/1248 [33:34<14:23:22, 61.23s/it]

{'loss': 0.0004, 'learning_rate': 1.355769230769231e-05, 'epoch': 0.64}


 32%|███▏      | 403/1248 [33:36<10:11:10, 43.40s/it]

{'loss': 0.0014, 'learning_rate': 1.3541666666666668e-05, 'epoch': 0.65}


 32%|███▏      | 404/1248 [33:39<7:18:06, 31.15s/it] 

{'loss': 0.0004, 'learning_rate': 1.3525641025641028e-05, 'epoch': 0.65}


 32%|███▏      | 405/1248 [33:41<5:15:25, 22.45s/it]

{'loss': 0.0004, 'learning_rate': 1.3509615384615387e-05, 'epoch': 0.65}


 33%|███▎      | 406/1248 [33:45<3:59:42, 17.08s/it]

{'loss': 0.0004, 'learning_rate': 1.3493589743589745e-05, 'epoch': 0.65}


 33%|███▎      | 407/1248 [33:48<2:58:07, 12.71s/it]

{'loss': 0.0004, 'learning_rate': 1.3477564102564105e-05, 'epoch': 0.65}


 33%|███▎      | 408/1248 [33:50<2:13:17,  9.52s/it]

{'loss': 0.0008, 'learning_rate': 1.3461538461538463e-05, 'epoch': 0.65}


 33%|███▎      | 409/1248 [33:52<1:42:58,  7.36s/it]

{'loss': 0.0003, 'learning_rate': 1.3445512820512823e-05, 'epoch': 0.66}


 33%|███▎      | 410/1248 [33:54<1:18:10,  5.60s/it]

{'loss': 0.0002, 'learning_rate': 1.342948717948718e-05, 'epoch': 0.66}


 33%|███▎      | 411/1248 [33:57<1:06:33,  4.77s/it]

{'loss': 0.0001, 'learning_rate': 1.341346153846154e-05, 'epoch': 0.66}


 33%|███▎      | 412/1248 [33:59<57:22,  4.12s/it]  

{'loss': 0.0003, 'learning_rate': 1.3397435897435897e-05, 'epoch': 0.66}


 33%|███▎      | 413/1248 [34:02<51:08,  3.67s/it]

{'loss': 0.0001, 'learning_rate': 1.3381410256410257e-05, 'epoch': 0.66}


 33%|███▎      | 414/1248 [34:04<42:51,  3.08s/it]

{'loss': 0.0003, 'learning_rate': 1.3365384615384615e-05, 'epoch': 0.66}


 33%|███▎      | 415/1248 [34:05<36:39,  2.64s/it]

{'loss': 0.0003, 'learning_rate': 1.3349358974358975e-05, 'epoch': 0.67}


 33%|███▎      | 416/1248 [34:07<34:19,  2.47s/it]

{'loss': 0.0001, 'learning_rate': 1.3333333333333333e-05, 'epoch': 0.67}


 33%|███▎      | 417/1248 [34:12<42:44,  3.09s/it]

{'loss': 0.0008, 'learning_rate': 1.3317307692307693e-05, 'epoch': 0.67}


 33%|███▎      | 418/1248 [34:13<36:57,  2.67s/it]

{'loss': 0.0005, 'learning_rate': 1.3301282051282051e-05, 'epoch': 0.67}


 34%|███▎      | 419/1248 [34:16<37:00,  2.68s/it]

{'loss': 0.0004, 'learning_rate': 1.3285256410256411e-05, 'epoch': 0.67}


 34%|███▎      | 420/1248 [34:19<36:14,  2.63s/it]

{'loss': 0.0006, 'learning_rate': 1.3269230769230769e-05, 'epoch': 0.67}


 34%|███▎      | 421/1248 [34:21<36:33,  2.65s/it]

{'loss': 0.0001, 'learning_rate': 1.3253205128205129e-05, 'epoch': 0.67}


 34%|███▍      | 422/1248 [34:23<33:51,  2.46s/it]

{'loss': 0.0001, 'learning_rate': 1.3237179487179487e-05, 'epoch': 0.68}


 34%|███▍      | 423/1248 [34:26<34:26,  2.51s/it]

{'loss': 0.0002, 'learning_rate': 1.3221153846153847e-05, 'epoch': 0.68}


 34%|███▍      | 424/1248 [34:28<32:55,  2.40s/it]

{'loss': 0.0003, 'learning_rate': 1.3205128205128207e-05, 'epoch': 0.68}


 34%|███▍      | 425/1248 [34:30<29:39,  2.16s/it]

{'loss': 0.0003, 'learning_rate': 1.3189102564102565e-05, 'epoch': 0.68}


 34%|███▍      | 426/1248 [34:32<28:51,  2.11s/it]

{'loss': 0.0003, 'learning_rate': 1.3173076923076925e-05, 'epoch': 0.68}


 34%|███▍      | 427/1248 [34:33<27:25,  2.00s/it]

{'loss': 0.0002, 'learning_rate': 1.3157051282051283e-05, 'epoch': 0.68}


 34%|███▍      | 428/1248 [34:36<28:58,  2.12s/it]

{'loss': 0.0002, 'learning_rate': 1.3141025641025642e-05, 'epoch': 0.69}


 34%|███▍      | 429/1248 [34:39<31:15,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 1.3125e-05, 'epoch': 0.69}


 34%|███▍      | 430/1248 [34:40<27:11,  1.99s/it]

{'loss': 0.0005, 'learning_rate': 1.310897435897436e-05, 'epoch': 0.69}


 35%|███▍      | 431/1248 [34:42<29:13,  2.15s/it]

{'loss': 0.0001, 'learning_rate': 1.3092948717948718e-05, 'epoch': 0.69}


 35%|███▍      | 432/1248 [34:45<32:00,  2.35s/it]

{'loss': 0.0002, 'learning_rate': 1.3076923076923078e-05, 'epoch': 0.69}


 35%|███▍      | 433/1248 [34:47<29:15,  2.15s/it]

{'loss': 0.0006, 'learning_rate': 1.3060897435897436e-05, 'epoch': 0.69}


 35%|███▍      | 434/1248 [34:49<27:40,  2.04s/it]

{'loss': 0.0002, 'learning_rate': 1.3044871794871796e-05, 'epoch': 0.7}


 35%|███▍      | 435/1248 [34:51<27:23,  2.02s/it]

{'loss': 0.0002, 'learning_rate': 1.3028846153846156e-05, 'epoch': 0.7}


 35%|███▍      | 436/1248 [34:54<30:53,  2.28s/it]

{'loss': 0.0001, 'learning_rate': 1.3012820512820514e-05, 'epoch': 0.7}


 35%|███▌      | 437/1248 [34:55<27:32,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 1.2996794871794874e-05, 'epoch': 0.7}


 35%|███▌      | 438/1248 [34:57<26:58,  2.00s/it]

{'loss': 0.0005, 'learning_rate': 1.2980769230769232e-05, 'epoch': 0.7}


 35%|███▌      | 439/1248 [34:59<25:45,  1.91s/it]

{'loss': 0.0002, 'learning_rate': 1.2964743589743592e-05, 'epoch': 0.7}


 35%|███▌      | 440/1248 [35:00<24:54,  1.85s/it]

{'loss': 0.0006, 'learning_rate': 1.294871794871795e-05, 'epoch': 0.71}


 35%|███▌      | 441/1248 [35:03<29:35,  2.20s/it]

{'loss': 0.0002, 'learning_rate': 1.293269230769231e-05, 'epoch': 0.71}


 35%|███▌      | 442/1248 [35:07<37:29,  2.79s/it]

{'loss': 0.0001, 'learning_rate': 1.2916666666666668e-05, 'epoch': 0.71}


 35%|███▌      | 443/1248 [35:10<35:24,  2.64s/it]

{'loss': 0.0003, 'learning_rate': 1.2900641025641028e-05, 'epoch': 0.71}


 36%|███▌      | 444/1248 [35:12<35:17,  2.63s/it]

{'loss': 0.0002, 'learning_rate': 1.2884615384615386e-05, 'epoch': 0.71}


 36%|███▌      | 445/1248 [35:15<33:56,  2.54s/it]

{'loss': 0.0001, 'learning_rate': 1.2868589743589746e-05, 'epoch': 0.71}


 36%|███▌      | 446/1248 [35:16<30:49,  2.31s/it]

{'loss': 0.0001, 'learning_rate': 1.2852564102564105e-05, 'epoch': 0.71}


 36%|███▌      | 447/1248 [35:18<28:47,  2.16s/it]

{'loss': 0.0004, 'learning_rate': 1.2836538461538463e-05, 'epoch': 0.72}


 36%|███▌      | 448/1248 [35:22<37:00,  2.78s/it]

{'loss': 0.0001, 'learning_rate': 1.2820512820512823e-05, 'epoch': 0.72}


 36%|███▌      | 449/1248 [35:24<32:56,  2.47s/it]

{'loss': 0.0002, 'learning_rate': 1.280448717948718e-05, 'epoch': 0.72}


 36%|███▌      | 450/1248 [35:26<30:58,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 1.2788461538461538e-05, 'epoch': 0.72}


 36%|███▌      | 451/1248 [35:28<28:36,  2.15s/it]

{'loss': 0.0003, 'learning_rate': 1.2772435897435898e-05, 'epoch': 0.72}


 36%|███▌      | 452/1248 [35:30<29:00,  2.19s/it]

{'loss': 0.0002, 'learning_rate': 1.2756410256410257e-05, 'epoch': 0.72}


 36%|███▋      | 453/1248 [35:32<26:05,  1.97s/it]

{'loss': 0.0006, 'learning_rate': 1.2740384615384615e-05, 'epoch': 0.73}


 36%|███▋      | 454/1248 [35:34<25:57,  1.96s/it]

{'loss': 0.0001, 'learning_rate': 1.2724358974358975e-05, 'epoch': 0.73}


 36%|███▋      | 455/1248 [35:35<25:06,  1.90s/it]

{'loss': 0.0002, 'learning_rate': 1.2708333333333333e-05, 'epoch': 0.73}


 37%|███▋      | 456/1248 [35:37<25:35,  1.94s/it]

{'loss': 0.0003, 'learning_rate': 1.2692307692307693e-05, 'epoch': 0.73}


 37%|███▋      | 457/1248 [35:39<24:34,  1.86s/it]

{'loss': 0.0001, 'learning_rate': 1.2676282051282051e-05, 'epoch': 0.73}


 37%|███▋      | 458/1248 [35:41<23:20,  1.77s/it]

{'loss': 0.0007, 'learning_rate': 1.2660256410256411e-05, 'epoch': 0.73}


 37%|███▋      | 459/1248 [35:43<24:24,  1.86s/it]

{'loss': 0.0001, 'learning_rate': 1.264423076923077e-05, 'epoch': 0.74}


 37%|███▋      | 460/1248 [35:44<23:39,  1.80s/it]

{'loss': 0.0001, 'learning_rate': 1.2628205128205129e-05, 'epoch': 0.74}


 37%|███▋      | 461/1248 [35:47<25:26,  1.94s/it]

{'loss': 0.0001, 'learning_rate': 1.2612179487179487e-05, 'epoch': 0.74}


 37%|███▋      | 462/1248 [35:50<29:29,  2.25s/it]

{'loss': 0.0001, 'learning_rate': 1.2596153846153847e-05, 'epoch': 0.74}


 37%|███▋      | 463/1248 [35:52<28:55,  2.21s/it]

{'loss': 0.0003, 'learning_rate': 1.2580128205128207e-05, 'epoch': 0.74}


 37%|███▋      | 464/1248 [35:55<32:20,  2.48s/it]

{'loss': 0.0001, 'learning_rate': 1.2564102564102565e-05, 'epoch': 0.74}


 37%|███▋      | 465/1248 [35:59<38:08,  2.92s/it]

{'loss': 0.0002, 'learning_rate': 1.2548076923076925e-05, 'epoch': 0.75}


 37%|███▋      | 466/1248 [36:01<33:23,  2.56s/it]

{'loss': 0.0001, 'learning_rate': 1.2532051282051283e-05, 'epoch': 0.75}


 37%|███▋      | 467/1248 [36:03<31:05,  2.39s/it]

{'loss': 0.0003, 'learning_rate': 1.2516025641025642e-05, 'epoch': 0.75}


 38%|███▊      | 468/1248 [36:05<31:58,  2.46s/it]

{'loss': 0.0003, 'learning_rate': 1.25e-05, 'epoch': 0.75}


 38%|███▊      | 469/1248 [36:07<28:48,  2.22s/it]

{'loss': 0.0003, 'learning_rate': 1.248397435897436e-05, 'epoch': 0.75}


 38%|███▊      | 470/1248 [36:09<28:17,  2.18s/it]

{'loss': 0.0003, 'learning_rate': 1.2467948717948719e-05, 'epoch': 0.75}


 38%|███▊      | 471/1248 [36:11<28:23,  2.19s/it]

{'loss': 0.0002, 'learning_rate': 1.2451923076923078e-05, 'epoch': 0.75}


 38%|███▊      | 472/1248 [36:12<24:55,  1.93s/it]

{'loss': 0.0002, 'learning_rate': 1.2435897435897436e-05, 'epoch': 0.76}


 38%|███▊      | 473/1248 [36:14<24:21,  1.89s/it]

{'loss': 0.0003, 'learning_rate': 1.2419871794871796e-05, 'epoch': 0.76}


 38%|███▊      | 474/1248 [36:16<24:14,  1.88s/it]

{'loss': 0.0002, 'learning_rate': 1.2403846153846156e-05, 'epoch': 0.76}


 38%|███▊      | 475/1248 [36:18<23:14,  1.80s/it]

{'loss': 0.0004, 'learning_rate': 1.2387820512820514e-05, 'epoch': 0.76}


 38%|███▊      | 476/1248 [36:19<22:51,  1.78s/it]

{'loss': 0.0001, 'learning_rate': 1.2371794871794874e-05, 'epoch': 0.76}


 38%|███▊      | 477/1248 [36:22<25:16,  1.97s/it]

{'loss': 0.0005, 'learning_rate': 1.2355769230769232e-05, 'epoch': 0.76}


 38%|███▊      | 478/1248 [36:23<23:35,  1.84s/it]

{'loss': 0.0003, 'learning_rate': 1.2339743589743592e-05, 'epoch': 0.77}


 38%|███▊      | 479/1248 [36:25<24:23,  1.90s/it]

{'loss': 0.0002, 'learning_rate': 1.232371794871795e-05, 'epoch': 0.77}


 38%|███▊      | 480/1248 [36:27<24:10,  1.89s/it]

{'loss': 0.0003, 'learning_rate': 1.230769230769231e-05, 'epoch': 0.77}


 39%|███▊      | 481/1248 [36:30<26:09,  2.05s/it]

{'loss': 0.0003, 'learning_rate': 1.2291666666666668e-05, 'epoch': 0.77}


 39%|███▊      | 482/1248 [36:32<25:54,  2.03s/it]

{'loss': 0.0001, 'learning_rate': 1.2275641025641028e-05, 'epoch': 0.77}


 39%|███▊      | 483/1248 [36:34<25:15,  1.98s/it]

{'loss': 0.0004, 'learning_rate': 1.2259615384615386e-05, 'epoch': 0.77}


 39%|███▉      | 484/1248 [36:37<29:25,  2.31s/it]

{'loss': 0.0001, 'learning_rate': 1.2243589743589746e-05, 'epoch': 0.78}


 39%|███▉      | 485/1248 [36:39<28:02,  2.20s/it]

{'loss': 0.0002, 'learning_rate': 1.2227564102564105e-05, 'epoch': 0.78}


 39%|███▉      | 486/1248 [36:40<24:49,  1.96s/it]

{'loss': 0.0001, 'learning_rate': 1.2211538461538463e-05, 'epoch': 0.78}


 39%|███▉      | 487/1248 [36:43<27:08,  2.14s/it]

{'loss': 0.0001, 'learning_rate': 1.2195512820512823e-05, 'epoch': 0.78}


 39%|███▉      | 488/1248 [36:45<28:39,  2.26s/it]

{'loss': 0.0002, 'learning_rate': 1.217948717948718e-05, 'epoch': 0.78}


 39%|███▉      | 489/1248 [36:49<33:51,  2.68s/it]

{'loss': 0.0001, 'learning_rate': 1.2163461538461538e-05, 'epoch': 0.78}


 39%|███▉      | 490/1248 [36:50<28:55,  2.29s/it]

{'loss': 0.0005, 'learning_rate': 1.2147435897435898e-05, 'epoch': 0.79}


 39%|███▉      | 491/1248 [36:51<25:21,  2.01s/it]

{'loss': 0.0001, 'learning_rate': 1.2131410256410256e-05, 'epoch': 0.79}


 39%|███▉      | 492/1248 [36:55<30:08,  2.39s/it]

{'loss': 0.0001, 'learning_rate': 1.2115384615384615e-05, 'epoch': 0.79}


 40%|███▉      | 493/1248 [36:57<28:39,  2.28s/it]

{'loss': 0.0001, 'learning_rate': 1.2099358974358975e-05, 'epoch': 0.79}


 40%|███▉      | 494/1248 [36:58<25:34,  2.04s/it]

{'loss': 0.0009, 'learning_rate': 1.2083333333333333e-05, 'epoch': 0.79}


 40%|███▉      | 495/1248 [37:00<25:32,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 1.2067307692307693e-05, 'epoch': 0.79}


 40%|███▉      | 496/1248 [37:04<29:58,  2.39s/it]

{'loss': 0.0002, 'learning_rate': 1.2051282051282051e-05, 'epoch': 0.79}


 40%|███▉      | 497/1248 [37:06<28:53,  2.31s/it]

{'loss': 0.0004, 'learning_rate': 1.2035256410256411e-05, 'epoch': 0.8}


 40%|███▉      | 498/1248 [37:07<27:00,  2.16s/it]

{'loss': 0.0001, 'learning_rate': 1.201923076923077e-05, 'epoch': 0.8}


 40%|███▉      | 499/1248 [37:10<27:30,  2.20s/it]

{'loss': 0.0007, 'learning_rate': 1.2003205128205129e-05, 'epoch': 0.8}


 40%|████      | 500/1248 [37:13<29:53,  2.40s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.0001, 'learning_rate': 1.1987179487179487e-05, 'epoch': 0.8}


                                                  
 40%|████      | 500/1248 [41:53<29:53,  2.40s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-500
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-500/config.json


{'eval_loss': 9.796654194360599e-05, 'eval_runtime': 279.9603, 'eval_samples_per_second': 22.285, 'eval_steps_per_second': 2.229, 'epoch': 0.8}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-500/pytorch_model.bin
 40%|████      | 501/1248 [41:55<17:56:45, 86.49s/it]

{'loss': 0.0002, 'learning_rate': 1.1971153846153847e-05, 'epoch': 0.8}


 40%|████      | 502/1248 [41:58<12:42:44, 61.35s/it]

{'loss': 0.0002, 'learning_rate': 1.1955128205128205e-05, 'epoch': 0.8}


 40%|████      | 503/1248 [42:01<9:05:02, 43.90s/it] 

{'loss': 0.0003, 'learning_rate': 1.1939102564102565e-05, 'epoch': 0.81}


 40%|████      | 504/1248 [42:03<6:26:15, 31.15s/it]

{'loss': 0.0002, 'learning_rate': 1.1923076923076925e-05, 'epoch': 0.81}


 40%|████      | 505/1248 [42:05<4:38:53, 22.52s/it]

{'loss': 0.0001, 'learning_rate': 1.1907051282051283e-05, 'epoch': 0.81}


 41%|████      | 506/1248 [42:07<3:21:44, 16.31s/it]

{'loss': 0.0053, 'learning_rate': 1.1891025641025643e-05, 'epoch': 0.81}


 41%|████      | 507/1248 [42:09<2:27:41, 11.96s/it]

{'loss': 0.0001, 'learning_rate': 1.1875e-05, 'epoch': 0.81}


 41%|████      | 508/1248 [42:10<1:49:18,  8.86s/it]

{'loss': 0.0001, 'learning_rate': 1.185897435897436e-05, 'epoch': 0.81}


 41%|████      | 509/1248 [42:13<1:25:25,  6.94s/it]

{'loss': 0.0001, 'learning_rate': 1.1842948717948719e-05, 'epoch': 0.82}


 41%|████      | 510/1248 [42:15<1:06:42,  5.42s/it]

{'loss': 0.0001, 'learning_rate': 1.1826923076923078e-05, 'epoch': 0.82}


 41%|████      | 511/1248 [42:16<53:30,  4.36s/it]  

{'loss': 0.0004, 'learning_rate': 1.1810897435897436e-05, 'epoch': 0.82}


 41%|████      | 512/1248 [42:19<46:03,  3.76s/it]

{'loss': 0.0001, 'learning_rate': 1.1794871794871796e-05, 'epoch': 0.82}


 41%|████      | 513/1248 [42:21<40:22,  3.30s/it]

{'loss': 0.0001, 'learning_rate': 1.1778846153846154e-05, 'epoch': 0.82}


 41%|████      | 514/1248 [42:23<34:25,  2.81s/it]

{'loss': 0.0001, 'learning_rate': 1.1762820512820514e-05, 'epoch': 0.82}


 41%|████▏     | 515/1248 [42:25<32:31,  2.66s/it]

{'loss': 0.0001, 'learning_rate': 1.1746794871794874e-05, 'epoch': 0.83}


 41%|████▏     | 516/1248 [42:28<32:15,  2.64s/it]

{'loss': 0.5731, 'learning_rate': 1.1730769230769232e-05, 'epoch': 0.83}


 41%|████▏     | 517/1248 [42:30<30:25,  2.50s/it]

{'loss': 0.0002, 'learning_rate': 1.1714743589743592e-05, 'epoch': 0.83}


 42%|████▏     | 518/1248 [42:32<28:33,  2.35s/it]

{'loss': 0.0004, 'learning_rate': 1.169871794871795e-05, 'epoch': 0.83}


 42%|████▏     | 519/1248 [42:34<27:20,  2.25s/it]

{'loss': 0.0002, 'learning_rate': 1.168269230769231e-05, 'epoch': 0.83}


 42%|████▏     | 520/1248 [42:37<31:43,  2.62s/it]

{'loss': 0.0001, 'learning_rate': 1.1666666666666668e-05, 'epoch': 0.83}


 42%|████▏     | 521/1248 [42:39<30:17,  2.50s/it]

{'loss': 0.0001, 'learning_rate': 1.1650641025641028e-05, 'epoch': 0.83}


 42%|████▏     | 522/1248 [42:42<30:13,  2.50s/it]

{'loss': 0.0011, 'learning_rate': 1.1634615384615386e-05, 'epoch': 0.84}


 42%|████▏     | 523/1248 [42:45<33:33,  2.78s/it]

{'loss': 0.0001, 'learning_rate': 1.1618589743589746e-05, 'epoch': 0.84}


 42%|████▏     | 524/1248 [42:47<30:27,  2.52s/it]

{'loss': 0.0004, 'learning_rate': 1.1602564102564104e-05, 'epoch': 0.84}


 42%|████▏     | 525/1248 [42:51<33:53,  2.81s/it]

{'loss': 0.0005, 'learning_rate': 1.1586538461538464e-05, 'epoch': 0.84}


 42%|████▏     | 526/1248 [42:54<33:43,  2.80s/it]

{'loss': 0.0003, 'learning_rate': 1.1570512820512823e-05, 'epoch': 0.84}


 42%|████▏     | 527/1248 [42:55<30:20,  2.53s/it]

{'loss': 0.0002, 'learning_rate': 1.155448717948718e-05, 'epoch': 0.84}


 42%|████▏     | 528/1248 [42:58<29:31,  2.46s/it]

{'loss': 0.0003, 'learning_rate': 1.1538461538461538e-05, 'epoch': 0.85}


 42%|████▏     | 529/1248 [43:00<28:51,  2.41s/it]

{'loss': 0.0002, 'learning_rate': 1.1522435897435898e-05, 'epoch': 0.85}


 42%|████▏     | 530/1248 [43:02<26:18,  2.20s/it]

{'loss': 0.0002, 'learning_rate': 1.1506410256410256e-05, 'epoch': 0.85}


 43%|████▎     | 531/1248 [43:05<28:29,  2.38s/it]

{'loss': 0.0001, 'learning_rate': 1.1490384615384616e-05, 'epoch': 0.85}


 43%|████▎     | 532/1248 [43:06<26:23,  2.21s/it]

{'loss': 0.0001, 'learning_rate': 1.1474358974358974e-05, 'epoch': 0.85}


 43%|████▎     | 533/1248 [43:08<24:27,  2.05s/it]

{'loss': 0.0001, 'learning_rate': 1.1458333333333333e-05, 'epoch': 0.85}


 43%|████▎     | 534/1248 [43:10<24:32,  2.06s/it]

{'loss': 0.0003, 'learning_rate': 1.1442307692307693e-05, 'epoch': 0.86}


 43%|████▎     | 535/1248 [43:12<24:06,  2.03s/it]

{'loss': 0.0002, 'learning_rate': 1.1426282051282051e-05, 'epoch': 0.86}


 43%|████▎     | 536/1248 [43:14<24:43,  2.08s/it]

{'loss': 0.0003, 'learning_rate': 1.1410256410256411e-05, 'epoch': 0.86}


 43%|████▎     | 537/1248 [43:17<25:07,  2.12s/it]

{'loss': 0.0002, 'learning_rate': 1.139423076923077e-05, 'epoch': 0.86}


 43%|████▎     | 538/1248 [43:21<34:50,  2.94s/it]

{'loss': 0.0002, 'learning_rate': 1.1378205128205129e-05, 'epoch': 0.86}


 43%|████▎     | 539/1248 [43:23<30:11,  2.56s/it]

{'loss': 0.0003, 'learning_rate': 1.1362179487179487e-05, 'epoch': 0.86}


 43%|████▎     | 540/1248 [43:25<29:35,  2.51s/it]

{'loss': 0.0002, 'learning_rate': 1.1346153846153847e-05, 'epoch': 0.87}


 43%|████▎     | 541/1248 [43:27<27:25,  2.33s/it]

{'loss': 0.0007, 'learning_rate': 1.1330128205128205e-05, 'epoch': 0.87}


 43%|████▎     | 542/1248 [43:32<35:48,  3.04s/it]

{'loss': 0.0002, 'learning_rate': 1.1314102564102565e-05, 'epoch': 0.87}


 44%|████▎     | 543/1248 [43:35<33:53,  2.88s/it]

{'loss': 0.0006, 'learning_rate': 1.1298076923076923e-05, 'epoch': 0.87}


 44%|████▎     | 544/1248 [43:36<29:04,  2.48s/it]

{'loss': 0.0005, 'learning_rate': 1.1282051282051283e-05, 'epoch': 0.87}


 44%|████▎     | 545/1248 [43:38<27:50,  2.38s/it]

{'loss': 0.0001, 'learning_rate': 1.1266025641025643e-05, 'epoch': 0.87}


 44%|████▍     | 546/1248 [43:40<26:32,  2.27s/it]

{'loss': 0.0002, 'learning_rate': 1.125e-05, 'epoch': 0.88}


 44%|████▍     | 547/1248 [43:42<25:41,  2.20s/it]

{'loss': 0.0003, 'learning_rate': 1.123397435897436e-05, 'epoch': 0.88}


 44%|████▍     | 548/1248 [43:45<26:16,  2.25s/it]

{'loss': 0.0001, 'learning_rate': 1.1217948717948719e-05, 'epoch': 0.88}


 44%|████▍     | 549/1248 [43:46<24:00,  2.06s/it]

{'loss': 0.0001, 'learning_rate': 1.1201923076923078e-05, 'epoch': 0.88}


 44%|████▍     | 550/1248 [43:49<27:21,  2.35s/it]

{'loss': 0.0003, 'learning_rate': 1.1185897435897437e-05, 'epoch': 0.88}


 44%|████▍     | 551/1248 [43:51<26:22,  2.27s/it]

{'loss': 0.0001, 'learning_rate': 1.1169871794871796e-05, 'epoch': 0.88}


 44%|████▍     | 552/1248 [43:53<25:13,  2.18s/it]

{'loss': 0.0001, 'learning_rate': 1.1153846153846154e-05, 'epoch': 0.88}


 44%|████▍     | 553/1248 [43:55<23:30,  2.03s/it]

{'loss': 0.0002, 'learning_rate': 1.1137820512820514e-05, 'epoch': 0.89}


 44%|████▍     | 554/1248 [43:58<26:32,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 1.1121794871794872e-05, 'epoch': 0.89}


 44%|████▍     | 555/1248 [44:00<27:10,  2.35s/it]

{'loss': 0.0002, 'learning_rate': 1.1105769230769232e-05, 'epoch': 0.89}


 45%|████▍     | 556/1248 [44:02<25:55,  2.25s/it]

{'loss': 0.0006, 'learning_rate': 1.1089743589743592e-05, 'epoch': 0.89}


 45%|████▍     | 557/1248 [44:04<22:59,  2.00s/it]

{'loss': 0.0003, 'learning_rate': 1.107371794871795e-05, 'epoch': 0.89}


 45%|████▍     | 558/1248 [44:07<25:40,  2.23s/it]

{'loss': 0.0003, 'learning_rate': 1.105769230769231e-05, 'epoch': 0.89}


 45%|████▍     | 559/1248 [44:09<26:32,  2.31s/it]

{'loss': 0.0002, 'learning_rate': 1.1041666666666668e-05, 'epoch': 0.9}


 45%|████▍     | 560/1248 [44:11<26:29,  2.31s/it]

{'loss': 0.0003, 'learning_rate': 1.1025641025641028e-05, 'epoch': 0.9}


 45%|████▍     | 561/1248 [44:13<25:35,  2.24s/it]

{'loss': 0.0003, 'learning_rate': 1.1009615384615386e-05, 'epoch': 0.9}


 45%|████▌     | 562/1248 [44:16<25:49,  2.26s/it]

{'loss': 0.0004, 'learning_rate': 1.0993589743589746e-05, 'epoch': 0.9}


 45%|████▌     | 563/1248 [44:19<27:36,  2.42s/it]

{'loss': 0.0001, 'learning_rate': 1.0977564102564104e-05, 'epoch': 0.9}


 45%|████▌     | 564/1248 [44:21<26:14,  2.30s/it]

{'loss': 0.0001, 'learning_rate': 1.0961538461538464e-05, 'epoch': 0.9}


 45%|████▌     | 565/1248 [44:23<26:54,  2.36s/it]

{'loss': 0.001, 'learning_rate': 1.0945512820512822e-05, 'epoch': 0.91}


 45%|████▌     | 566/1248 [44:26<29:05,  2.56s/it]

{'loss': 0.0001, 'learning_rate': 1.092948717948718e-05, 'epoch': 0.91}


 45%|████▌     | 567/1248 [44:28<26:45,  2.36s/it]

{'loss': 0.0002, 'learning_rate': 1.0913461538461538e-05, 'epoch': 0.91}


 46%|████▌     | 568/1248 [44:30<25:47,  2.28s/it]

{'loss': 0.0004, 'learning_rate': 1.0897435897435898e-05, 'epoch': 0.91}


 46%|████▌     | 569/1248 [44:33<27:12,  2.40s/it]

{'loss': 0.0001, 'learning_rate': 1.0881410256410256e-05, 'epoch': 0.91}


 46%|████▌     | 570/1248 [44:35<26:17,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 1.0865384615384616e-05, 'epoch': 0.91}


 46%|████▌     | 571/1248 [44:37<24:26,  2.17s/it]

{'loss': 0.0002, 'learning_rate': 1.0849358974358974e-05, 'epoch': 0.92}


 46%|████▌     | 572/1248 [44:39<23:52,  2.12s/it]

{'loss': 0.0001, 'learning_rate': 1.0833333333333334e-05, 'epoch': 0.92}


 46%|████▌     | 573/1248 [44:41<25:21,  2.25s/it]

{'loss': 0.0001, 'learning_rate': 1.0817307692307693e-05, 'epoch': 0.92}


 46%|████▌     | 574/1248 [44:43<23:52,  2.13s/it]

{'loss': 0.0004, 'learning_rate': 1.0801282051282051e-05, 'epoch': 0.92}


 46%|████▌     | 575/1248 [44:45<22:31,  2.01s/it]

{'loss': 0.0003, 'learning_rate': 1.0785256410256411e-05, 'epoch': 0.92}


 46%|████▌     | 576/1248 [44:47<24:11,  2.16s/it]

{'loss': 0.0001, 'learning_rate': 1.076923076923077e-05, 'epoch': 0.92}


 46%|████▌     | 577/1248 [44:49<22:55,  2.05s/it]

{'loss': 0.0001, 'learning_rate': 1.0753205128205129e-05, 'epoch': 0.92}


 46%|████▋     | 578/1248 [44:51<22:11,  1.99s/it]

{'loss': 0.0002, 'learning_rate': 1.0737179487179487e-05, 'epoch': 0.93}


 46%|████▋     | 579/1248 [44:53<23:09,  2.08s/it]

{'loss': 0.0002, 'learning_rate': 1.0721153846153847e-05, 'epoch': 0.93}


 46%|████▋     | 580/1248 [44:56<24:03,  2.16s/it]

{'loss': 0.0002, 'learning_rate': 1.0705128205128205e-05, 'epoch': 0.93}


 47%|████▋     | 581/1248 [44:58<23:05,  2.08s/it]

{'loss': 0.0001, 'learning_rate': 1.0689102564102565e-05, 'epoch': 0.93}


 47%|████▋     | 582/1248 [45:00<23:39,  2.13s/it]

{'loss': 0.0002, 'learning_rate': 1.0673076923076923e-05, 'epoch': 0.93}


 47%|████▋     | 583/1248 [45:02<23:03,  2.08s/it]

{'loss': 0.0003, 'learning_rate': 1.0657051282051283e-05, 'epoch': 0.93}


 47%|████▋     | 584/1248 [45:03<21:04,  1.90s/it]

{'loss': 0.0002, 'learning_rate': 1.0641025641025643e-05, 'epoch': 0.94}


 47%|████▋     | 585/1248 [45:05<21:46,  1.97s/it]

{'loss': 0.0001, 'learning_rate': 1.0625e-05, 'epoch': 0.94}


 47%|████▋     | 586/1248 [45:08<22:56,  2.08s/it]

{'loss': 0.0001, 'learning_rate': 1.060897435897436e-05, 'epoch': 0.94}


 47%|████▋     | 587/1248 [45:09<21:41,  1.97s/it]

{'loss': 0.0002, 'learning_rate': 1.0592948717948719e-05, 'epoch': 0.94}


 47%|████▋     | 588/1248 [45:12<23:39,  2.15s/it]

{'loss': 0.0003, 'learning_rate': 1.0576923076923078e-05, 'epoch': 0.94}


 47%|████▋     | 589/1248 [45:14<24:23,  2.22s/it]

{'loss': 0.0009, 'learning_rate': 1.0560897435897437e-05, 'epoch': 0.94}


 47%|████▋     | 590/1248 [45:17<24:42,  2.25s/it]

{'loss': 0.0007, 'learning_rate': 1.0544871794871796e-05, 'epoch': 0.95}


 47%|████▋     | 591/1248 [45:18<22:42,  2.07s/it]

{'loss': 0.0005, 'learning_rate': 1.0528846153846154e-05, 'epoch': 0.95}


 47%|████▋     | 592/1248 [45:21<23:12,  2.12s/it]

{'loss': 0.0002, 'learning_rate': 1.0512820512820514e-05, 'epoch': 0.95}


 48%|████▊     | 593/1248 [45:25<29:50,  2.73s/it]

{'loss': 0.0001, 'learning_rate': 1.0496794871794872e-05, 'epoch': 0.95}


 48%|████▊     | 594/1248 [45:27<28:36,  2.62s/it]

{'loss': 0.0001, 'learning_rate': 1.0480769230769232e-05, 'epoch': 0.95}


 48%|████▊     | 595/1248 [45:29<25:52,  2.38s/it]

{'loss': 0.0001, 'learning_rate': 1.0464743589743592e-05, 'epoch': 0.95}


 48%|████▊     | 596/1248 [45:32<26:21,  2.43s/it]

{'loss': 0.0002, 'learning_rate': 1.044871794871795e-05, 'epoch': 0.96}


 48%|████▊     | 597/1248 [45:33<24:00,  2.21s/it]

{'loss': 0.0002, 'learning_rate': 1.043269230769231e-05, 'epoch': 0.96}


 48%|████▊     | 598/1248 [45:36<24:20,  2.25s/it]

{'loss': 0.0001, 'learning_rate': 1.0416666666666668e-05, 'epoch': 0.96}


 48%|████▊     | 599/1248 [45:38<23:40,  2.19s/it]

{'loss': 0.0001, 'learning_rate': 1.0400641025641028e-05, 'epoch': 0.96}


 48%|████▊     | 600/1248 [45:39<22:11,  2.05s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.0001, 'learning_rate': 1.0384615384615386e-05, 'epoch': 0.96}


                                                  
 48%|████▊     | 600/1248 [50:19<22:11,  2.05s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-600
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-600/config.json


{'eval_loss': 8.839500515023246e-05, 'eval_runtime': 279.3314, 'eval_samples_per_second': 22.335, 'eval_steps_per_second': 2.234, 'epoch': 0.96}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-600/pytorch_model.bin
 48%|████▊     | 601/1248 [50:22<15:30:04, 86.25s/it]

{'loss': 0.0002, 'learning_rate': 1.0368589743589746e-05, 'epoch': 0.96}


 48%|████▊     | 602/1248 [50:24<10:56:07, 60.94s/it]

{'loss': 0.0001, 'learning_rate': 1.0352564102564104e-05, 'epoch': 0.96}


 48%|████▊     | 603/1248 [50:26<7:46:18, 43.38s/it] 

{'loss': 0.0004, 'learning_rate': 1.0336538461538464e-05, 'epoch': 0.97}


 48%|████▊     | 604/1248 [50:29<5:33:58, 31.12s/it]

{'loss': 0.0001, 'learning_rate': 1.0320512820512822e-05, 'epoch': 0.97}


 48%|████▊     | 605/1248 [50:31<4:00:09, 22.41s/it]

{'loss': 0.0003, 'learning_rate': 1.030448717948718e-05, 'epoch': 0.97}


 49%|████▊     | 606/1248 [50:34<2:56:15, 16.47s/it]

{'loss': 0.0001, 'learning_rate': 1.0288461538461538e-05, 'epoch': 0.97}


 49%|████▊     | 607/1248 [50:35<2:08:42, 12.05s/it]

{'loss': 0.0004, 'learning_rate': 1.0272435897435898e-05, 'epoch': 0.97}


 49%|████▊     | 608/1248 [50:38<1:37:59,  9.19s/it]

{'loss': 0.0001, 'learning_rate': 1.0256410256410256e-05, 'epoch': 0.97}


 49%|████▉     | 609/1248 [50:40<1:14:58,  7.04s/it]

{'loss': 0.0006, 'learning_rate': 1.0240384615384616e-05, 'epoch': 0.98}


 49%|████▉     | 610/1248 [50:42<1:00:34,  5.70s/it]

{'loss': 0.0001, 'learning_rate': 1.0224358974358974e-05, 'epoch': 0.98}


 49%|████▉     | 611/1248 [50:44<49:03,  4.62s/it]  

{'loss': 0.0002, 'learning_rate': 1.0208333333333334e-05, 'epoch': 0.98}


 49%|████▉     | 612/1248 [50:46<40:26,  3.82s/it]

{'loss': 0.0004, 'learning_rate': 1.0192307692307692e-05, 'epoch': 0.98}


 49%|████▉     | 613/1248 [50:49<35:24,  3.35s/it]

{'loss': 0.0001, 'learning_rate': 1.0176282051282051e-05, 'epoch': 0.98}


 49%|████▉     | 614/1248 [50:51<31:21,  2.97s/it]

{'loss': 0.0001, 'learning_rate': 1.0160256410256411e-05, 'epoch': 0.98}


 49%|████▉     | 615/1248 [50:53<27:45,  2.63s/it]

{'loss': 0.0001, 'learning_rate': 1.014423076923077e-05, 'epoch': 0.99}


 49%|████▉     | 616/1248 [50:55<27:39,  2.63s/it]

{'loss': 0.0001, 'learning_rate': 1.012820512820513e-05, 'epoch': 0.99}


 49%|████▉     | 617/1248 [50:57<26:11,  2.49s/it]

{'loss': 0.0001, 'learning_rate': 1.0112179487179487e-05, 'epoch': 0.99}


 50%|████▉     | 618/1248 [51:00<24:55,  2.37s/it]

{'loss': 0.0001, 'learning_rate': 1.0096153846153847e-05, 'epoch': 0.99}


 50%|████▉     | 619/1248 [51:02<24:34,  2.34s/it]

{'loss': 0.0001, 'learning_rate': 1.0080128205128205e-05, 'epoch': 0.99}


 50%|████▉     | 620/1248 [51:04<23:38,  2.26s/it]

{'loss': 0.0001, 'learning_rate': 1.0064102564102565e-05, 'epoch': 0.99}


 50%|████▉     | 621/1248 [51:06<23:00,  2.20s/it]

{'loss': 0.0004, 'learning_rate': 1.0048076923076923e-05, 'epoch': 1.0}


 50%|████▉     | 622/1248 [51:08<21:06,  2.02s/it]

{'loss': 0.0003, 'learning_rate': 1.0032051282051283e-05, 'epoch': 1.0}


 50%|████▉     | 623/1248 [51:10<21:19,  2.05s/it]

{'loss': 0.0001, 'learning_rate': 1.0016025641025641e-05, 'epoch': 1.0}


 50%|█████     | 624/1248 [51:11<20:20,  1.96s/it]

{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 1.0}


 50%|█████     | 625/1248 [51:13<19:35,  1.89s/it]

{'loss': 0.0001, 'learning_rate': 9.98397435897436e-06, 'epoch': 1.0}


 50%|█████     | 626/1248 [51:15<21:09,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 9.967948717948719e-06, 'epoch': 1.0}


 50%|█████     | 627/1248 [51:18<22:21,  2.16s/it]

{'loss': 0.0002, 'learning_rate': 9.951923076923079e-06, 'epoch': 1.0}


 50%|█████     | 628/1248 [51:20<23:08,  2.24s/it]

{'loss': 0.0002, 'learning_rate': 9.935897435897437e-06, 'epoch': 1.01}


 50%|█████     | 629/1248 [51:22<21:38,  2.10s/it]

{'loss': 0.0004, 'learning_rate': 9.919871794871796e-06, 'epoch': 1.01}


 50%|█████     | 630/1248 [51:24<20:48,  2.02s/it]

{'loss': 0.0002, 'learning_rate': 9.903846153846155e-06, 'epoch': 1.01}


 51%|█████     | 631/1248 [51:27<22:29,  2.19s/it]

{'loss': 0.0003, 'learning_rate': 9.887820512820514e-06, 'epoch': 1.01}


 51%|█████     | 632/1248 [51:28<20:40,  2.01s/it]

{'loss': 0.0002, 'learning_rate': 9.871794871794872e-06, 'epoch': 1.01}


 51%|█████     | 633/1248 [51:31<22:23,  2.18s/it]

{'loss': 0.0001, 'learning_rate': 9.855769230769232e-06, 'epoch': 1.01}


 51%|█████     | 634/1248 [51:32<21:03,  2.06s/it]

{'loss': 0.0003, 'learning_rate': 9.83974358974359e-06, 'epoch': 1.02}


 51%|█████     | 635/1248 [51:35<23:04,  2.26s/it]

{'loss': 0.0001, 'learning_rate': 9.823717948717948e-06, 'epoch': 1.02}


 51%|█████     | 636/1248 [51:37<21:25,  2.10s/it]

{'loss': 0.0005, 'learning_rate': 9.807692307692308e-06, 'epoch': 1.02}


 51%|█████     | 637/1248 [51:40<24:02,  2.36s/it]

{'loss': 0.0002, 'learning_rate': 9.791666666666666e-06, 'epoch': 1.02}


 51%|█████     | 638/1248 [51:42<23:14,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 9.775641025641026e-06, 'epoch': 1.02}


 51%|█████     | 639/1248 [51:44<23:17,  2.30s/it]

{'loss': 0.0001, 'learning_rate': 9.759615384615386e-06, 'epoch': 1.02}


 51%|█████▏    | 640/1248 [51:46<20:46,  2.05s/it]

{'loss': 0.0001, 'learning_rate': 9.743589743589744e-06, 'epoch': 1.03}


 51%|█████▏    | 641/1248 [51:48<19:46,  1.95s/it]

{'loss': 0.0003, 'learning_rate': 9.727564102564104e-06, 'epoch': 1.03}


 51%|█████▏    | 642/1248 [51:49<17:49,  1.77s/it]

{'loss': 0.0002, 'learning_rate': 9.711538461538462e-06, 'epoch': 1.03}


 52%|█████▏    | 643/1248 [51:51<18:30,  1.84s/it]

{'loss': 0.0001, 'learning_rate': 9.695512820512822e-06, 'epoch': 1.03}


 52%|█████▏    | 644/1248 [51:53<20:31,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 9.67948717948718e-06, 'epoch': 1.03}


 52%|█████▏    | 645/1248 [51:55<20:13,  2.01s/it]

{'loss': 0.0002, 'learning_rate': 9.66346153846154e-06, 'epoch': 1.03}


 52%|█████▏    | 646/1248 [51:58<21:05,  2.10s/it]

{'loss': 0.0001, 'learning_rate': 9.647435897435898e-06, 'epoch': 1.04}


 52%|█████▏    | 647/1248 [52:00<20:48,  2.08s/it]

{'loss': 0.0002, 'learning_rate': 9.631410256410258e-06, 'epoch': 1.04}


 52%|█████▏    | 648/1248 [52:02<20:27,  2.05s/it]

{'loss': 0.0001, 'learning_rate': 9.615384615384616e-06, 'epoch': 1.04}


 52%|█████▏    | 649/1248 [52:04<19:58,  2.00s/it]

{'loss': 0.0003, 'learning_rate': 9.599358974358976e-06, 'epoch': 1.04}


 52%|█████▏    | 650/1248 [52:07<24:13,  2.43s/it]

{'loss': 0.0002, 'learning_rate': 9.583333333333335e-06, 'epoch': 1.04}


 52%|█████▏    | 651/1248 [52:09<22:53,  2.30s/it]

{'loss': 0.0001, 'learning_rate': 9.567307692307693e-06, 'epoch': 1.04}


 52%|█████▏    | 652/1248 [52:11<22:28,  2.26s/it]

{'loss': 0.0001, 'learning_rate': 9.551282051282053e-06, 'epoch': 1.04}


 52%|█████▏    | 653/1248 [52:14<24:11,  2.44s/it]

{'loss': 0.0001, 'learning_rate': 9.535256410256411e-06, 'epoch': 1.05}


 52%|█████▏    | 654/1248 [52:16<23:05,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 9.51923076923077e-06, 'epoch': 1.05}


 52%|█████▏    | 655/1248 [52:18<20:41,  2.09s/it]

{'loss': 0.0001, 'learning_rate': 9.50320512820513e-06, 'epoch': 1.05}


 53%|█████▎    | 656/1248 [52:20<20:59,  2.13s/it]

{'loss': 0.0001, 'learning_rate': 9.487179487179487e-06, 'epoch': 1.05}


 53%|█████▎    | 657/1248 [52:23<23:12,  2.36s/it]

{'loss': 0.0001, 'learning_rate': 9.471153846153847e-06, 'epoch': 1.05}


 53%|█████▎    | 658/1248 [52:24<21:11,  2.16s/it]

{'loss': 0.0002, 'learning_rate': 9.455128205128205e-06, 'epoch': 1.05}


 53%|█████▎    | 659/1248 [52:26<20:17,  2.07s/it]

{'loss': 0.0002, 'learning_rate': 9.439102564102565e-06, 'epoch': 1.06}


 53%|█████▎    | 660/1248 [52:29<22:04,  2.25s/it]

{'loss': 0.0009, 'learning_rate': 9.423076923076923e-06, 'epoch': 1.06}


 53%|█████▎    | 661/1248 [52:31<21:44,  2.22s/it]

{'loss': 0.0002, 'learning_rate': 9.407051282051283e-06, 'epoch': 1.06}


 53%|█████▎    | 662/1248 [52:34<23:18,  2.39s/it]

{'loss': 0.0001, 'learning_rate': 9.391025641025641e-06, 'epoch': 1.06}


 53%|█████▎    | 663/1248 [52:37<24:51,  2.55s/it]

{'loss': 0.0002, 'learning_rate': 9.375000000000001e-06, 'epoch': 1.06}


 53%|█████▎    | 664/1248 [52:38<21:49,  2.24s/it]

{'loss': 0.0001, 'learning_rate': 9.358974358974359e-06, 'epoch': 1.06}


 53%|█████▎    | 665/1248 [52:40<21:10,  2.18s/it]

{'loss': 0.0001, 'learning_rate': 9.342948717948719e-06, 'epoch': 1.07}


 53%|█████▎    | 666/1248 [52:43<21:48,  2.25s/it]

{'loss': 0.0001, 'learning_rate': 9.326923076923079e-06, 'epoch': 1.07}


 53%|█████▎    | 667/1248 [52:47<27:26,  2.83s/it]

{'loss': 0.0001, 'learning_rate': 9.310897435897437e-06, 'epoch': 1.07}


 54%|█████▎    | 668/1248 [52:49<25:15,  2.61s/it]

{'loss': 0.0001, 'learning_rate': 9.294871794871796e-06, 'epoch': 1.07}


 54%|█████▎    | 669/1248 [52:51<22:50,  2.37s/it]

{'loss': 0.0002, 'learning_rate': 9.278846153846155e-06, 'epoch': 1.07}


 54%|█████▎    | 670/1248 [52:53<21:42,  2.25s/it]

{'loss': 0.0001, 'learning_rate': 9.262820512820514e-06, 'epoch': 1.07}


 54%|█████▍    | 671/1248 [52:54<19:19,  2.01s/it]

{'loss': 0.0001, 'learning_rate': 9.246794871794873e-06, 'epoch': 1.08}


 54%|█████▍    | 672/1248 [52:59<26:31,  2.76s/it]

{'loss': 0.0002, 'learning_rate': 9.230769230769232e-06, 'epoch': 1.08}


 54%|█████▍    | 673/1248 [53:01<25:50,  2.70s/it]

{'loss': 0.0001, 'learning_rate': 9.21474358974359e-06, 'epoch': 1.08}


 54%|█████▍    | 674/1248 [53:03<23:01,  2.41s/it]

{'loss': 0.0003, 'learning_rate': 9.198717948717949e-06, 'epoch': 1.08}


 54%|█████▍    | 675/1248 [53:05<22:57,  2.40s/it]

{'loss': 0.0001, 'learning_rate': 9.182692307692308e-06, 'epoch': 1.08}


 54%|█████▍    | 676/1248 [53:07<21:39,  2.27s/it]

{'loss': 0.0001, 'learning_rate': 9.166666666666666e-06, 'epoch': 1.08}


 54%|█████▍    | 677/1248 [53:09<20:43,  2.18s/it]

{'loss': 0.0002, 'learning_rate': 9.150641025641026e-06, 'epoch': 1.08}


 54%|█████▍    | 678/1248 [53:11<19:53,  2.09s/it]

{'loss': 0.0001, 'learning_rate': 9.134615384615384e-06, 'epoch': 1.09}


 54%|█████▍    | 679/1248 [53:13<19:20,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 9.118589743589744e-06, 'epoch': 1.09}


 54%|█████▍    | 680/1248 [53:16<20:39,  2.18s/it]

{'loss': 0.0002, 'learning_rate': 9.102564102564104e-06, 'epoch': 1.09}


 55%|█████▍    | 681/1248 [53:18<20:17,  2.15s/it]

{'loss': 0.0001, 'learning_rate': 9.086538461538462e-06, 'epoch': 1.09}


 55%|█████▍    | 682/1248 [53:20<21:38,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 9.070512820512822e-06, 'epoch': 1.09}


 55%|█████▍    | 683/1248 [53:22<20:34,  2.19s/it]

{'loss': 0.0002, 'learning_rate': 9.05448717948718e-06, 'epoch': 1.09}


 55%|█████▍    | 684/1248 [53:24<19:59,  2.13s/it]

{'loss': 0.0001, 'learning_rate': 9.03846153846154e-06, 'epoch': 1.1}


 55%|█████▍    | 685/1248 [53:26<19:43,  2.10s/it]

{'loss': 0.0001, 'learning_rate': 9.022435897435898e-06, 'epoch': 1.1}


 55%|█████▍    | 686/1248 [53:28<19:31,  2.08s/it]

{'loss': 0.0012, 'learning_rate': 9.006410256410258e-06, 'epoch': 1.1}


 55%|█████▌    | 687/1248 [53:31<21:25,  2.29s/it]

{'loss': 0.0002, 'learning_rate': 8.990384615384616e-06, 'epoch': 1.1}


 55%|█████▌    | 688/1248 [53:36<28:05,  3.01s/it]

{'loss': 0.0001, 'learning_rate': 8.974358974358976e-06, 'epoch': 1.1}


 55%|█████▌    | 689/1248 [53:38<26:40,  2.86s/it]

{'loss': 0.0003, 'learning_rate': 8.958333333333334e-06, 'epoch': 1.1}


 55%|█████▌    | 690/1248 [53:42<29:59,  3.23s/it]

{'loss': 0.0001, 'learning_rate': 8.942307692307693e-06, 'epoch': 1.11}


 55%|█████▌    | 691/1248 [53:45<27:37,  2.98s/it]

{'loss': 0.0003, 'learning_rate': 8.926282051282053e-06, 'epoch': 1.11}


 55%|█████▌    | 692/1248 [53:46<23:12,  2.50s/it]

{'loss': 0.0002, 'learning_rate': 8.910256410256411e-06, 'epoch': 1.11}


 56%|█████▌    | 693/1248 [53:48<21:31,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 8.89423076923077e-06, 'epoch': 1.11}


 56%|█████▌    | 694/1248 [53:50<20:31,  2.22s/it]

{'loss': 0.0002, 'learning_rate': 8.87820512820513e-06, 'epoch': 1.11}


 56%|█████▌    | 695/1248 [53:52<19:33,  2.12s/it]

{'loss': 0.0002, 'learning_rate': 8.862179487179487e-06, 'epoch': 1.11}


 56%|█████▌    | 696/1248 [53:54<18:29,  2.01s/it]

{'loss': 0.0004, 'learning_rate': 8.846153846153847e-06, 'epoch': 1.12}


 56%|█████▌    | 697/1248 [53:56<18:42,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 8.830128205128205e-06, 'epoch': 1.12}


 56%|█████▌    | 698/1248 [53:58<19:52,  2.17s/it]

{'loss': 0.0002, 'learning_rate': 8.814102564102565e-06, 'epoch': 1.12}


 56%|█████▌    | 699/1248 [54:01<21:21,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 8.798076923076923e-06, 'epoch': 1.12}


 56%|█████▌    | 700/1248 [54:03<18:52,  2.07s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.0002, 'learning_rate': 8.782051282051283e-06, 'epoch': 1.12}


                                                  
 56%|█████▌    | 700/1248 [58:58<18:52,  2.07s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-700
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-700/config.json


{'eval_loss': 7.673375512240455e-05, 'eval_runtime': 295.2878, 'eval_samples_per_second': 21.129, 'eval_steps_per_second': 2.113, 'epoch': 1.12}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-700/pytorch_model.bin
 56%|█████▌    | 701/1248 [59:01<13:49:19, 90.97s/it]

{'loss': 0.0001, 'learning_rate': 8.766025641025641e-06, 'epoch': 1.12}


 56%|█████▋    | 702/1248 [59:03<9:46:09, 64.41s/it] 

{'loss': 0.0003, 'learning_rate': 8.750000000000001e-06, 'epoch': 1.12}


 56%|█████▋    | 703/1248 [59:05<6:54:28, 45.63s/it]

{'loss': 0.0002, 'learning_rate': 8.733974358974359e-06, 'epoch': 1.13}


 56%|█████▋    | 704/1248 [59:08<4:56:01, 32.65s/it]

{'loss': 0.0001, 'learning_rate': 8.717948717948719e-06, 'epoch': 1.13}


 56%|█████▋    | 705/1248 [59:10<3:33:30, 23.59s/it]

{'loss': 0.0002, 'learning_rate': 8.701923076923079e-06, 'epoch': 1.13}


 57%|█████▋    | 706/1248 [59:12<2:35:51, 17.25s/it]

{'loss': 0.0003, 'learning_rate': 8.685897435897437e-06, 'epoch': 1.13}


 57%|█████▋    | 707/1248 [59:14<1:53:15, 12.56s/it]

{'loss': 0.0002, 'learning_rate': 8.669871794871797e-06, 'epoch': 1.13}


 57%|█████▋    | 708/1248 [59:16<1:25:02,  9.45s/it]

{'loss': 0.0007, 'learning_rate': 8.653846153846155e-06, 'epoch': 1.13}


 57%|█████▋    | 709/1248 [59:19<1:05:49,  7.33s/it]

{'loss': 0.0001, 'learning_rate': 8.637820512820514e-06, 'epoch': 1.14}


 57%|█████▋    | 710/1248 [59:21<52:16,  5.83s/it]  

{'loss': 0.0001, 'learning_rate': 8.621794871794873e-06, 'epoch': 1.14}


 57%|█████▋    | 711/1248 [59:24<43:47,  4.89s/it]

{'loss': 0.0001, 'learning_rate': 8.605769230769232e-06, 'epoch': 1.14}


 57%|█████▋    | 712/1248 [59:25<34:57,  3.91s/it]

{'loss': 0.0001, 'learning_rate': 8.58974358974359e-06, 'epoch': 1.14}


 57%|█████▋    | 713/1248 [59:27<29:12,  3.28s/it]

{'loss': 0.0007, 'learning_rate': 8.573717948717949e-06, 'epoch': 1.14}


 57%|█████▋    | 714/1248 [59:29<26:28,  2.97s/it]

{'loss': 0.0001, 'learning_rate': 8.557692307692308e-06, 'epoch': 1.14}


 57%|█████▋    | 715/1248 [59:32<24:24,  2.75s/it]

{'loss': 0.0001, 'learning_rate': 8.541666666666666e-06, 'epoch': 1.15}


 57%|█████▋    | 716/1248 [59:34<23:20,  2.63s/it]

{'loss': 0.0001, 'learning_rate': 8.525641025641026e-06, 'epoch': 1.15}


 57%|█████▋    | 717/1248 [59:37<23:52,  2.70s/it]

{'loss': 0.0001, 'learning_rate': 8.509615384615384e-06, 'epoch': 1.15}


 58%|█████▊    | 718/1248 [59:39<22:19,  2.53s/it]

{'loss': 0.0002, 'learning_rate': 8.493589743589744e-06, 'epoch': 1.15}


 58%|█████▊    | 719/1248 [59:41<20:33,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 8.477564102564102e-06, 'epoch': 1.15}


 58%|█████▊    | 720/1248 [59:43<20:49,  2.37s/it]

{'loss': 0.0001, 'learning_rate': 8.461538461538462e-06, 'epoch': 1.15}


 58%|█████▊    | 721/1248 [59:45<18:43,  2.13s/it]

{'loss': 0.0001, 'learning_rate': 8.445512820512822e-06, 'epoch': 1.16}


 58%|█████▊    | 722/1248 [59:47<18:49,  2.15s/it]

{'loss': 0.0001, 'learning_rate': 8.42948717948718e-06, 'epoch': 1.16}


 58%|█████▊    | 723/1248 [59:49<18:59,  2.17s/it]

{'loss': 0.0003, 'learning_rate': 8.41346153846154e-06, 'epoch': 1.16}


 58%|█████▊    | 724/1248 [59:52<20:23,  2.33s/it]

{'loss': 0.0002, 'learning_rate': 8.397435897435898e-06, 'epoch': 1.16}


 58%|█████▊    | 725/1248 [59:54<20:09,  2.31s/it]

{'loss': 0.0003, 'learning_rate': 8.381410256410258e-06, 'epoch': 1.16}


 58%|█████▊    | 726/1248 [59:57<20:11,  2.32s/it]

{'loss': 0.0001, 'learning_rate': 8.365384615384616e-06, 'epoch': 1.16}


 58%|█████▊    | 727/1248 [59:58<18:29,  2.13s/it]

{'loss': 0.0002, 'learning_rate': 8.349358974358976e-06, 'epoch': 1.17}


 58%|█████▊    | 728/1248 [1:00:00<17:38,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 8.333333333333334e-06, 'epoch': 1.17}


 58%|█████▊    | 729/1248 [1:00:02<18:25,  2.13s/it]

{'loss': 0.0002, 'learning_rate': 8.317307692307694e-06, 'epoch': 1.17}


 58%|█████▊    | 730/1248 [1:00:04<17:43,  2.05s/it]

{'loss': 0.0001, 'learning_rate': 8.301282051282052e-06, 'epoch': 1.17}


 59%|█████▊    | 731/1248 [1:00:09<24:17,  2.82s/it]

{'loss': 0.0007, 'learning_rate': 8.285256410256411e-06, 'epoch': 1.17}


 59%|█████▊    | 732/1248 [1:00:11<22:19,  2.60s/it]

{'loss': 0.0001, 'learning_rate': 8.26923076923077e-06, 'epoch': 1.17}


 59%|█████▊    | 733/1248 [1:00:13<21:09,  2.46s/it]

{'loss': 0.0002, 'learning_rate': 8.253205128205128e-06, 'epoch': 1.17}


 59%|█████▉    | 734/1248 [1:00:16<21:06,  2.46s/it]

{'loss': 0.0001, 'learning_rate': 8.237179487179487e-06, 'epoch': 1.18}


 59%|█████▉    | 735/1248 [1:00:18<21:44,  2.54s/it]

{'loss': 0.0001, 'learning_rate': 8.221153846153847e-06, 'epoch': 1.18}


 59%|█████▉    | 736/1248 [1:00:20<20:02,  2.35s/it]

{'loss': 0.0001, 'learning_rate': 8.205128205128205e-06, 'epoch': 1.18}


 59%|█████▉    | 737/1248 [1:00:25<26:21,  3.10s/it]

{'loss': 0.0001, 'learning_rate': 8.189102564102565e-06, 'epoch': 1.18}


 59%|█████▉    | 738/1248 [1:00:27<23:00,  2.71s/it]

{'loss': 0.0001, 'learning_rate': 8.173076923076923e-06, 'epoch': 1.18}


 59%|█████▉    | 739/1248 [1:00:29<20:28,  2.41s/it]

{'loss': 0.0003, 'learning_rate': 8.157051282051283e-06, 'epoch': 1.18}


 59%|█████▉    | 740/1248 [1:00:31<20:02,  2.37s/it]

{'loss': 0.0001, 'learning_rate': 8.141025641025641e-06, 'epoch': 1.19}


 59%|█████▉    | 741/1248 [1:00:33<20:30,  2.43s/it]

{'loss': 0.0003, 'learning_rate': 8.125000000000001e-06, 'epoch': 1.19}


 59%|█████▉    | 742/1248 [1:00:35<19:29,  2.31s/it]

{'loss': 0.0004, 'learning_rate': 8.108974358974359e-06, 'epoch': 1.19}


 60%|█████▉    | 743/1248 [1:00:38<19:34,  2.33s/it]

{'loss': 0.0002, 'learning_rate': 8.092948717948719e-06, 'epoch': 1.19}


 60%|█████▉    | 744/1248 [1:00:40<19:17,  2.30s/it]

{'loss': 0.0001, 'learning_rate': 8.076923076923077e-06, 'epoch': 1.19}


 60%|█████▉    | 745/1248 [1:00:45<24:54,  2.97s/it]

{'loss': 0.0002, 'learning_rate': 8.060897435897437e-06, 'epoch': 1.19}


 60%|█████▉    | 746/1248 [1:00:46<21:26,  2.56s/it]

{'loss': 0.0001, 'learning_rate': 8.044871794871797e-06, 'epoch': 1.2}


 60%|█████▉    | 747/1248 [1:00:49<22:17,  2.67s/it]

{'loss': 0.0002, 'learning_rate': 8.028846153846155e-06, 'epoch': 1.2}


 60%|█████▉    | 748/1248 [1:00:51<20:07,  2.42s/it]

{'loss': 0.0002, 'learning_rate': 8.012820512820515e-06, 'epoch': 1.2}


 60%|██████    | 749/1248 [1:00:53<18:47,  2.26s/it]

{'loss': 0.0002, 'learning_rate': 7.996794871794873e-06, 'epoch': 1.2}


 60%|██████    | 750/1248 [1:00:56<20:47,  2.50s/it]

{'loss': 0.0002, 'learning_rate': 7.980769230769232e-06, 'epoch': 1.2}


 60%|██████    | 751/1248 [1:00:58<20:13,  2.44s/it]

{'loss': 0.0001, 'learning_rate': 7.96474358974359e-06, 'epoch': 1.2}


 60%|██████    | 752/1248 [1:01:01<21:33,  2.61s/it]

{'loss': 0.0001, 'learning_rate': 7.948717948717949e-06, 'epoch': 1.21}


 60%|██████    | 753/1248 [1:01:04<21:17,  2.58s/it]

{'loss': 0.0002, 'learning_rate': 7.932692307692308e-06, 'epoch': 1.21}


 60%|██████    | 754/1248 [1:01:05<18:17,  2.22s/it]

{'loss': 0.0002, 'learning_rate': 7.916666666666667e-06, 'epoch': 1.21}


 60%|██████    | 755/1248 [1:01:07<17:36,  2.14s/it]

{'loss': 0.0001, 'learning_rate': 7.900641025641026e-06, 'epoch': 1.21}


 61%|██████    | 756/1248 [1:01:10<19:43,  2.41s/it]

{'loss': 0.0002, 'learning_rate': 7.884615384615384e-06, 'epoch': 1.21}


 61%|██████    | 757/1248 [1:01:13<19:45,  2.41s/it]

{'loss': 0.0001, 'learning_rate': 7.868589743589744e-06, 'epoch': 1.21}


 61%|██████    | 758/1248 [1:01:15<19:30,  2.39s/it]

{'loss': 0.0001, 'learning_rate': 7.852564102564102e-06, 'epoch': 1.21}


 61%|██████    | 759/1248 [1:01:17<18:45,  2.30s/it]

{'loss': 0.0001, 'learning_rate': 7.836538461538462e-06, 'epoch': 1.22}


 61%|██████    | 760/1248 [1:01:21<22:06,  2.72s/it]

{'loss': 0.0002, 'learning_rate': 7.820512820512822e-06, 'epoch': 1.22}


 61%|██████    | 761/1248 [1:01:23<21:43,  2.68s/it]

{'loss': 0.0001, 'learning_rate': 7.80448717948718e-06, 'epoch': 1.22}


 61%|██████    | 762/1248 [1:01:25<19:52,  2.45s/it]

{'loss': 0.0001, 'learning_rate': 7.78846153846154e-06, 'epoch': 1.22}


 61%|██████    | 763/1248 [1:01:27<17:27,  2.16s/it]

{'loss': 0.0001, 'learning_rate': 7.772435897435898e-06, 'epoch': 1.22}


 61%|██████    | 764/1248 [1:01:29<17:20,  2.15s/it]

{'loss': 0.0001, 'learning_rate': 7.756410256410258e-06, 'epoch': 1.22}


 61%|██████▏   | 765/1248 [1:01:31<17:38,  2.19s/it]

{'loss': 0.1894, 'learning_rate': 7.740384615384616e-06, 'epoch': 1.23}


 61%|██████▏   | 766/1248 [1:01:33<16:29,  2.05s/it]

{'loss': 0.0001, 'learning_rate': 7.724358974358976e-06, 'epoch': 1.23}


 61%|██████▏   | 767/1248 [1:01:35<16:37,  2.07s/it]

{'loss': 0.0003, 'learning_rate': 7.708333333333334e-06, 'epoch': 1.23}


 62%|██████▏   | 768/1248 [1:01:38<18:18,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 7.692307692307694e-06, 'epoch': 1.23}


 62%|██████▏   | 769/1248 [1:01:39<16:23,  2.05s/it]

{'loss': 0.0002, 'learning_rate': 7.676282051282052e-06, 'epoch': 1.23}


 62%|██████▏   | 770/1248 [1:01:42<18:45,  2.35s/it]

{'loss': 0.0001, 'learning_rate': 7.660256410256411e-06, 'epoch': 1.23}


 62%|██████▏   | 771/1248 [1:01:44<17:31,  2.20s/it]

{'loss': 0.0006, 'learning_rate': 7.64423076923077e-06, 'epoch': 1.24}


 62%|██████▏   | 772/1248 [1:01:46<17:23,  2.19s/it]

{'loss': 0.0001, 'learning_rate': 7.6282051282051286e-06, 'epoch': 1.24}


 62%|██████▏   | 773/1248 [1:01:49<17:43,  2.24s/it]

{'loss': 0.0001, 'learning_rate': 7.6121794871794875e-06, 'epoch': 1.24}


 62%|██████▏   | 774/1248 [1:01:51<17:21,  2.20s/it]

{'loss': 0.0001, 'learning_rate': 7.5961538461538465e-06, 'epoch': 1.24}


 62%|██████▏   | 775/1248 [1:01:53<17:29,  2.22s/it]

{'loss': 0.0002, 'learning_rate': 7.580128205128205e-06, 'epoch': 1.24}


 62%|██████▏   | 776/1248 [1:01:56<18:20,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 7.564102564102564e-06, 'epoch': 1.24}


 62%|██████▏   | 777/1248 [1:01:57<17:06,  2.18s/it]

{'loss': 0.0001, 'learning_rate': 7.548076923076923e-06, 'epoch': 1.25}


 62%|██████▏   | 778/1248 [1:01:59<15:49,  2.02s/it]

{'loss': 0.0002, 'learning_rate': 7.532051282051282e-06, 'epoch': 1.25}


 62%|██████▏   | 779/1248 [1:02:01<15:25,  1.97s/it]

{'loss': 0.0002, 'learning_rate': 7.516025641025641e-06, 'epoch': 1.25}


 62%|██████▎   | 780/1248 [1:02:03<15:19,  1.96s/it]

{'loss': 0.0002, 'learning_rate': 7.500000000000001e-06, 'epoch': 1.25}


 63%|██████▎   | 781/1248 [1:02:05<15:16,  1.96s/it]

{'loss': 0.0002, 'learning_rate': 7.48397435897436e-06, 'epoch': 1.25}


 63%|██████▎   | 782/1248 [1:02:07<15:53,  2.05s/it]

{'loss': 0.0002, 'learning_rate': 7.467948717948719e-06, 'epoch': 1.25}


 63%|██████▎   | 783/1248 [1:02:09<15:23,  1.99s/it]

{'loss': 0.0001, 'learning_rate': 7.451923076923078e-06, 'epoch': 1.25}


 63%|██████▎   | 784/1248 [1:02:11<15:25,  1.99s/it]

{'loss': 0.0002, 'learning_rate': 7.435897435897437e-06, 'epoch': 1.26}


 63%|██████▎   | 785/1248 [1:02:13<15:11,  1.97s/it]

{'loss': 0.0001, 'learning_rate': 7.419871794871796e-06, 'epoch': 1.26}


 63%|██████▎   | 786/1248 [1:02:15<15:34,  2.02s/it]

{'loss': 0.0001, 'learning_rate': 7.403846153846155e-06, 'epoch': 1.26}


 63%|██████▎   | 787/1248 [1:02:17<16:06,  2.10s/it]

{'loss': 0.0003, 'learning_rate': 7.387820512820514e-06, 'epoch': 1.26}


 63%|██████▎   | 788/1248 [1:02:20<17:30,  2.28s/it]

{'loss': 0.0002, 'learning_rate': 7.371794871794873e-06, 'epoch': 1.26}


 63%|██████▎   | 789/1248 [1:02:22<16:15,  2.12s/it]

{'loss': 0.0001, 'learning_rate': 7.355769230769232e-06, 'epoch': 1.26}


 63%|██████▎   | 790/1248 [1:02:24<16:11,  2.12s/it]

{'loss': 0.0001, 'learning_rate': 7.33974358974359e-06, 'epoch': 1.27}


 63%|██████▎   | 791/1248 [1:02:26<17:01,  2.23s/it]

{'loss': 0.0002, 'learning_rate': 7.323717948717949e-06, 'epoch': 1.27}


 63%|██████▎   | 792/1248 [1:02:28<16:30,  2.17s/it]

{'loss': 0.0, 'learning_rate': 7.307692307692308e-06, 'epoch': 1.27}


 64%|██████▎   | 793/1248 [1:02:31<16:38,  2.19s/it]

{'loss': 0.0002, 'learning_rate': 7.291666666666667e-06, 'epoch': 1.27}


 64%|██████▎   | 794/1248 [1:02:32<15:20,  2.03s/it]

{'loss': 0.0001, 'learning_rate': 7.2756410256410255e-06, 'epoch': 1.27}


 64%|██████▎   | 795/1248 [1:02:34<15:35,  2.06s/it]

{'loss': 0.0001, 'learning_rate': 7.259615384615385e-06, 'epoch': 1.27}


 64%|██████▍   | 796/1248 [1:02:37<17:50,  2.37s/it]

{'loss': 0.0001, 'learning_rate': 7.243589743589744e-06, 'epoch': 1.28}


 64%|██████▍   | 797/1248 [1:02:39<16:49,  2.24s/it]

{'loss': 0.0002, 'learning_rate': 7.227564102564103e-06, 'epoch': 1.28}


 64%|██████▍   | 798/1248 [1:02:41<15:48,  2.11s/it]

{'loss': 0.0001, 'learning_rate': 7.211538461538462e-06, 'epoch': 1.28}


 64%|██████▍   | 799/1248 [1:02:43<15:09,  2.03s/it]

{'loss': 0.0001, 'learning_rate': 7.195512820512821e-06, 'epoch': 1.28}


 64%|██████▍   | 800/1248 [1:02:45<15:45,  2.11s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.0001, 'learning_rate': 7.17948717948718e-06, 'epoch': 1.28}


                                                    
 64%|██████▍   | 800/1248 [1:07:43<15:45,  2.11s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-800
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-800/config.json


{'eval_loss': 7.418659515678883e-05, 'eval_runtime': 297.8465, 'eval_samples_per_second': 20.947, 'eval_steps_per_second': 2.095, 'epoch': 1.28}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-800/pytorch_model.bin
 64%|██████▍   | 801/1248 [1:07:47<11:25:21, 91.99s/it]

{'loss': 0.0001, 'learning_rate': 7.163461538461539e-06, 'epoch': 1.28}


 64%|██████▍   | 802/1248 [1:07:49<8:02:58, 64.97s/it] 

{'loss': 0.0004, 'learning_rate': 7.147435897435898e-06, 'epoch': 1.29}


 64%|██████▍   | 803/1248 [1:07:51<5:41:01, 45.98s/it]

{'loss': 0.0001, 'learning_rate': 7.131410256410257e-06, 'epoch': 1.29}


 64%|██████▍   | 804/1248 [1:07:53<4:02:53, 32.82s/it]

{'loss': 0.0001, 'learning_rate': 7.115384615384616e-06, 'epoch': 1.29}


 65%|██████▍   | 805/1248 [1:07:55<2:54:47, 23.67s/it]

{'loss': 0.0002, 'learning_rate': 7.099358974358975e-06, 'epoch': 1.29}


 65%|██████▍   | 806/1248 [1:07:57<2:05:27, 17.03s/it]

{'loss': 0.0001, 'learning_rate': 7.083333333333335e-06, 'epoch': 1.29}


 65%|██████▍   | 807/1248 [1:07:58<1:31:28, 12.45s/it]

{'loss': 0.0001, 'learning_rate': 7.067307692307694e-06, 'epoch': 1.29}


 65%|██████▍   | 808/1248 [1:08:00<1:08:13,  9.30s/it]

{'loss': 0.0003, 'learning_rate': 7.051282051282053e-06, 'epoch': 1.29}


 65%|██████▍   | 809/1248 [1:08:04<56:16,  7.69s/it]  

{'loss': 0.0001, 'learning_rate': 7.0352564102564116e-06, 'epoch': 1.3}


 65%|██████▍   | 810/1248 [1:08:07<45:52,  6.28s/it]

{'loss': 0.0001, 'learning_rate': 7.01923076923077e-06, 'epoch': 1.3}


 65%|██████▍   | 811/1248 [1:08:09<35:56,  4.94s/it]

{'loss': 0.0004, 'learning_rate': 7.003205128205129e-06, 'epoch': 1.3}


 65%|██████▌   | 812/1248 [1:08:11<30:07,  4.14s/it]

{'loss': 0.0002, 'learning_rate': 6.9871794871794876e-06, 'epoch': 1.3}


 65%|██████▌   | 813/1248 [1:08:14<26:50,  3.70s/it]

{'loss': 0.0002, 'learning_rate': 6.9711538461538465e-06, 'epoch': 1.3}


 65%|██████▌   | 814/1248 [1:08:15<21:46,  3.01s/it]

{'loss': 0.0001, 'learning_rate': 6.9551282051282055e-06, 'epoch': 1.3}


 65%|██████▌   | 815/1248 [1:08:17<18:46,  2.60s/it]

{'loss': 0.0001, 'learning_rate': 6.9391025641025644e-06, 'epoch': 1.31}


 65%|██████▌   | 816/1248 [1:08:19<17:53,  2.49s/it]

{'loss': 0.0001, 'learning_rate': 6.923076923076923e-06, 'epoch': 1.31}


 65%|██████▌   | 817/1248 [1:08:21<16:31,  2.30s/it]

{'loss': 0.0002, 'learning_rate': 6.907051282051282e-06, 'epoch': 1.31}


 66%|██████▌   | 818/1248 [1:08:23<15:07,  2.11s/it]

{'loss': 0.0001, 'learning_rate': 6.891025641025641e-06, 'epoch': 1.31}


 66%|██████▌   | 819/1248 [1:08:25<15:06,  2.11s/it]

{'loss': 0.0002, 'learning_rate': 6.875e-06, 'epoch': 1.31}


 66%|██████▌   | 820/1248 [1:08:27<15:46,  2.21s/it]

{'loss': 0.0001, 'learning_rate': 6.858974358974359e-06, 'epoch': 1.31}


 66%|██████▌   | 821/1248 [1:08:30<16:10,  2.27s/it]

{'loss': 0.0004, 'learning_rate': 6.842948717948719e-06, 'epoch': 1.32}


 66%|██████▌   | 822/1248 [1:08:33<17:30,  2.47s/it]

{'loss': 0.0002, 'learning_rate': 6.826923076923078e-06, 'epoch': 1.32}


 66%|██████▌   | 823/1248 [1:08:35<16:23,  2.31s/it]

{'loss': 0.0001, 'learning_rate': 6.810897435897437e-06, 'epoch': 1.32}


 66%|██████▌   | 824/1248 [1:08:37<15:19,  2.17s/it]

{'loss': 0.0001, 'learning_rate': 6.794871794871796e-06, 'epoch': 1.32}


 66%|██████▌   | 825/1248 [1:08:38<14:43,  2.09s/it]

{'loss': 0.0002, 'learning_rate': 6.778846153846155e-06, 'epoch': 1.32}


 66%|██████▌   | 826/1248 [1:08:42<17:35,  2.50s/it]

{'loss': 0.0003, 'learning_rate': 6.762820512820514e-06, 'epoch': 1.32}


 66%|██████▋   | 827/1248 [1:08:45<17:55,  2.56s/it]

{'loss': 0.0001, 'learning_rate': 6.746794871794873e-06, 'epoch': 1.33}


 66%|██████▋   | 828/1248 [1:08:46<16:18,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 6.730769230769232e-06, 'epoch': 1.33}


 66%|██████▋   | 829/1248 [1:08:48<15:44,  2.25s/it]

{'loss': 0.0001, 'learning_rate': 6.71474358974359e-06, 'epoch': 1.33}


 67%|██████▋   | 830/1248 [1:08:53<21:06,  3.03s/it]

{'loss': 0.0001, 'learning_rate': 6.698717948717949e-06, 'epoch': 1.33}


 67%|██████▋   | 831/1248 [1:08:55<19:18,  2.78s/it]

{'loss': 0.0003, 'learning_rate': 6.682692307692308e-06, 'epoch': 1.33}


 67%|██████▋   | 832/1248 [1:08:57<17:38,  2.54s/it]

{'loss': 0.0002, 'learning_rate': 6.666666666666667e-06, 'epoch': 1.33}


 67%|██████▋   | 833/1248 [1:09:00<16:50,  2.43s/it]

{'loss': 0.0002, 'learning_rate': 6.650641025641026e-06, 'epoch': 1.33}


 67%|██████▋   | 834/1248 [1:09:02<16:55,  2.45s/it]

{'loss': 0.0002, 'learning_rate': 6.6346153846153846e-06, 'epoch': 1.34}


 67%|██████▋   | 835/1248 [1:09:04<16:24,  2.38s/it]

{'loss': 0.0001, 'learning_rate': 6.6185897435897435e-06, 'epoch': 1.34}


 67%|██████▋   | 836/1248 [1:09:06<15:36,  2.27s/it]

{'loss': 0.0002, 'learning_rate': 6.602564102564103e-06, 'epoch': 1.34}


 67%|██████▋   | 837/1248 [1:09:08<14:24,  2.10s/it]

{'loss': 0.0002, 'learning_rate': 6.586538461538462e-06, 'epoch': 1.34}


 67%|██████▋   | 838/1248 [1:09:11<15:56,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 6.570512820512821e-06, 'epoch': 1.34}


 67%|██████▋   | 839/1248 [1:09:13<14:41,  2.15s/it]

{'loss': 0.0001, 'learning_rate': 6.55448717948718e-06, 'epoch': 1.34}


 67%|██████▋   | 840/1248 [1:09:14<12:47,  1.88s/it]

{'loss': 0.0004, 'learning_rate': 6.538461538461539e-06, 'epoch': 1.35}


 67%|██████▋   | 841/1248 [1:09:16<12:53,  1.90s/it]

{'loss': 0.0001, 'learning_rate': 6.522435897435898e-06, 'epoch': 1.35}


 67%|██████▋   | 842/1248 [1:09:19<14:42,  2.17s/it]

{'loss': 0.0002, 'learning_rate': 6.506410256410257e-06, 'epoch': 1.35}


 68%|██████▊   | 843/1248 [1:09:21<14:11,  2.10s/it]

{'loss': 0.0003, 'learning_rate': 6.490384615384616e-06, 'epoch': 1.35}


 68%|██████▊   | 844/1248 [1:09:23<14:24,  2.14s/it]

{'loss': 0.0002, 'learning_rate': 6.474358974358975e-06, 'epoch': 1.35}


 68%|██████▊   | 845/1248 [1:09:24<12:39,  1.88s/it]

{'loss': 0.0002, 'learning_rate': 6.458333333333334e-06, 'epoch': 1.35}


 68%|██████▊   | 846/1248 [1:09:26<12:47,  1.91s/it]

{'loss': 0.0001, 'learning_rate': 6.442307692307693e-06, 'epoch': 1.36}


 68%|██████▊   | 847/1248 [1:09:28<13:10,  1.97s/it]

{'loss': 0.0002, 'learning_rate': 6.426282051282053e-06, 'epoch': 1.36}


 68%|██████▊   | 848/1248 [1:09:33<18:04,  2.71s/it]

{'loss': 0.0001, 'learning_rate': 6.410256410256412e-06, 'epoch': 1.36}


 68%|██████▊   | 849/1248 [1:09:36<18:38,  2.80s/it]

{'loss': 0.0002, 'learning_rate': 6.394230769230769e-06, 'epoch': 1.36}


 68%|██████▊   | 850/1248 [1:09:39<19:28,  2.93s/it]

{'loss': 0.0001, 'learning_rate': 6.378205128205129e-06, 'epoch': 1.36}


 68%|██████▊   | 851/1248 [1:09:41<16:51,  2.55s/it]

{'loss': 0.0002, 'learning_rate': 6.362179487179488e-06, 'epoch': 1.36}


 68%|██████▊   | 852/1248 [1:09:43<16:01,  2.43s/it]

{'loss': 0.0001, 'learning_rate': 6.3461538461538466e-06, 'epoch': 1.37}


 68%|██████▊   | 853/1248 [1:09:45<15:25,  2.34s/it]

{'loss': 0.0002, 'learning_rate': 6.3301282051282055e-06, 'epoch': 1.37}


 68%|██████▊   | 854/1248 [1:09:47<15:14,  2.32s/it]

{'loss': 0.0037, 'learning_rate': 6.3141025641025645e-06, 'epoch': 1.37}


 69%|██████▊   | 855/1248 [1:09:50<16:43,  2.55s/it]

{'loss': 0.0001, 'learning_rate': 6.2980769230769234e-06, 'epoch': 1.37}


 69%|██████▊   | 856/1248 [1:09:53<16:38,  2.55s/it]

{'loss': 0.0001, 'learning_rate': 6.282051282051282e-06, 'epoch': 1.37}


 69%|██████▊   | 857/1248 [1:09:55<15:13,  2.34s/it]

{'loss': 0.0001, 'learning_rate': 6.266025641025641e-06, 'epoch': 1.37}


 69%|██████▉   | 858/1248 [1:09:57<15:36,  2.40s/it]

{'loss': 0.0013, 'learning_rate': 6.25e-06, 'epoch': 1.38}


 69%|██████▉   | 859/1248 [1:09:59<14:19,  2.21s/it]

{'loss': 0.0002, 'learning_rate': 6.233974358974359e-06, 'epoch': 1.38}


 69%|██████▉   | 860/1248 [1:10:01<13:06,  2.03s/it]

{'loss': 0.0002, 'learning_rate': 6.217948717948718e-06, 'epoch': 1.38}


 69%|██████▉   | 861/1248 [1:10:02<12:29,  1.94s/it]

{'loss': 0.0002, 'learning_rate': 6.201923076923078e-06, 'epoch': 1.38}


 69%|██████▉   | 862/1248 [1:10:05<13:46,  2.14s/it]

{'loss': 0.0001, 'learning_rate': 6.185897435897437e-06, 'epoch': 1.38}


 69%|██████▉   | 863/1248 [1:10:07<14:23,  2.24s/it]

{'loss': 0.0002, 'learning_rate': 6.169871794871796e-06, 'epoch': 1.38}


 69%|██████▉   | 864/1248 [1:10:10<15:06,  2.36s/it]

{'loss': 0.0001, 'learning_rate': 6.153846153846155e-06, 'epoch': 1.38}


 69%|██████▉   | 865/1248 [1:10:13<15:51,  2.49s/it]

{'loss': 0.0001, 'learning_rate': 6.137820512820514e-06, 'epoch': 1.39}


 69%|██████▉   | 866/1248 [1:10:15<14:38,  2.30s/it]

{'loss': 0.0001, 'learning_rate': 6.121794871794873e-06, 'epoch': 1.39}


 69%|██████▉   | 867/1248 [1:10:17<14:01,  2.21s/it]

{'loss': 0.0002, 'learning_rate': 6.105769230769232e-06, 'epoch': 1.39}


 70%|██████▉   | 868/1248 [1:10:21<18:26,  2.91s/it]

{'loss': 0.0001, 'learning_rate': 6.08974358974359e-06, 'epoch': 1.39}


 70%|██████▉   | 869/1248 [1:10:23<16:34,  2.62s/it]

{'loss': 0.0001, 'learning_rate': 6.073717948717949e-06, 'epoch': 1.39}


 70%|██████▉   | 870/1248 [1:10:25<15:58,  2.54s/it]

{'loss': 0.0002, 'learning_rate': 6.057692307692308e-06, 'epoch': 1.39}


 70%|██████▉   | 871/1248 [1:10:28<15:42,  2.50s/it]

{'loss': 0.0002, 'learning_rate': 6.041666666666667e-06, 'epoch': 1.4}


 70%|██████▉   | 872/1248 [1:10:30<14:19,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 6.025641025641026e-06, 'epoch': 1.4}


 70%|██████▉   | 873/1248 [1:10:31<12:58,  2.07s/it]

{'loss': 0.0001, 'learning_rate': 6.009615384615385e-06, 'epoch': 1.4}


 70%|███████   | 874/1248 [1:10:33<13:09,  2.11s/it]

{'loss': 0.0002, 'learning_rate': 5.9935897435897436e-06, 'epoch': 1.4}


 70%|███████   | 875/1248 [1:10:36<14:39,  2.36s/it]

{'loss': 0.0001, 'learning_rate': 5.9775641025641025e-06, 'epoch': 1.4}


 70%|███████   | 876/1248 [1:10:39<14:24,  2.32s/it]

{'loss': 0.0001, 'learning_rate': 5.961538461538462e-06, 'epoch': 1.4}


 70%|███████   | 877/1248 [1:10:41<14:30,  2.35s/it]

{'loss': 0.0002, 'learning_rate': 5.945512820512821e-06, 'epoch': 1.41}


 70%|███████   | 878/1248 [1:10:44<15:13,  2.47s/it]

{'loss': 0.0001, 'learning_rate': 5.92948717948718e-06, 'epoch': 1.41}


 70%|███████   | 879/1248 [1:10:46<14:21,  2.34s/it]

{'loss': 0.0001, 'learning_rate': 5.913461538461539e-06, 'epoch': 1.41}


 71%|███████   | 880/1248 [1:10:48<14:11,  2.31s/it]

{'loss': 0.0001, 'learning_rate': 5.897435897435898e-06, 'epoch': 1.41}


 71%|███████   | 881/1248 [1:10:53<18:04,  2.96s/it]

{'loss': 0.0003, 'learning_rate': 5.881410256410257e-06, 'epoch': 1.41}


 71%|███████   | 882/1248 [1:10:55<16:59,  2.78s/it]

{'loss': 0.0002, 'learning_rate': 5.865384615384616e-06, 'epoch': 1.41}


 71%|███████   | 883/1248 [1:10:57<15:06,  2.48s/it]

{'loss': 0.0001, 'learning_rate': 5.849358974358975e-06, 'epoch': 1.42}


 71%|███████   | 884/1248 [1:10:59<14:00,  2.31s/it]

{'loss': 0.0001, 'learning_rate': 5.833333333333334e-06, 'epoch': 1.42}


 71%|███████   | 885/1248 [1:11:04<18:49,  3.11s/it]

{'loss': 0.0002, 'learning_rate': 5.817307692307693e-06, 'epoch': 1.42}


 71%|███████   | 886/1248 [1:11:06<17:27,  2.89s/it]

{'loss': 0.0001, 'learning_rate': 5.801282051282052e-06, 'epoch': 1.42}


 71%|███████   | 887/1248 [1:11:08<15:51,  2.64s/it]

{'loss': 0.0001, 'learning_rate': 5.785256410256412e-06, 'epoch': 1.42}


 71%|███████   | 888/1248 [1:11:10<15:07,  2.52s/it]

{'loss': 0.0004, 'learning_rate': 5.769230769230769e-06, 'epoch': 1.42}


 71%|███████   | 889/1248 [1:11:12<13:36,  2.27s/it]

{'loss': 0.0001, 'learning_rate': 5.753205128205128e-06, 'epoch': 1.42}


 71%|███████▏  | 890/1248 [1:11:14<13:18,  2.23s/it]

{'loss': 0.0001, 'learning_rate': 5.737179487179487e-06, 'epoch': 1.43}


 71%|███████▏  | 891/1248 [1:11:16<12:19,  2.07s/it]

{'loss': 0.0001, 'learning_rate': 5.721153846153847e-06, 'epoch': 1.43}


 71%|███████▏  | 892/1248 [1:11:18<12:14,  2.06s/it]

{'loss': 0.0001, 'learning_rate': 5.705128205128206e-06, 'epoch': 1.43}


 72%|███████▏  | 893/1248 [1:11:20<12:46,  2.16s/it]

{'loss': 0.0002, 'learning_rate': 5.6891025641025645e-06, 'epoch': 1.43}


 72%|███████▏  | 894/1248 [1:11:22<12:18,  2.08s/it]

{'loss': 0.0001, 'learning_rate': 5.6730769230769235e-06, 'epoch': 1.43}


 72%|███████▏  | 895/1248 [1:11:24<11:16,  1.92s/it]

{'loss': 0.0001, 'learning_rate': 5.6570512820512824e-06, 'epoch': 1.43}


 72%|███████▏  | 896/1248 [1:11:26<12:43,  2.17s/it]

{'loss': 0.0003, 'learning_rate': 5.641025641025641e-06, 'epoch': 1.44}


 72%|███████▏  | 897/1248 [1:11:28<12:29,  2.14s/it]

{'loss': 0.0003, 'learning_rate': 5.625e-06, 'epoch': 1.44}


 72%|███████▏  | 898/1248 [1:11:31<12:39,  2.17s/it]

{'loss': 0.0003, 'learning_rate': 5.608974358974359e-06, 'epoch': 1.44}


 72%|███████▏  | 899/1248 [1:11:33<13:24,  2.31s/it]

{'loss': 0.0001, 'learning_rate': 5.592948717948718e-06, 'epoch': 1.44}


 72%|███████▏  | 900/1248 [1:11:35<13:04,  2.25s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.0002, 'learning_rate': 5.576923076923077e-06, 'epoch': 1.44}


                                                    
 72%|███████▏  | 900/1248 [1:16:46<13:04,  2.25s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-900
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-900/config.json


{'eval_loss': 5.630413943435997e-05, 'eval_runtime': 310.9504, 'eval_samples_per_second': 20.064, 'eval_steps_per_second': 2.007, 'epoch': 1.44}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-900/pytorch_model.bin
 72%|███████▏  | 901/1248 [1:16:50<9:15:00, 95.97s/it]

{'loss': 0.0001, 'learning_rate': 5.560897435897436e-06, 'epoch': 1.44}


 72%|███████▏  | 902/1248 [1:16:53<6:31:36, 67.91s/it]

{'loss': 0.0001, 'learning_rate': 5.544871794871796e-06, 'epoch': 1.45}


 72%|███████▏  | 903/1248 [1:16:54<4:36:18, 48.05s/it]

{'loss': 0.0002, 'learning_rate': 5.528846153846155e-06, 'epoch': 1.45}


 72%|███████▏  | 904/1248 [1:16:56<3:16:44, 34.31s/it]

{'loss': 0.0002, 'learning_rate': 5.512820512820514e-06, 'epoch': 1.45}


 73%|███████▎  | 905/1248 [1:16:59<2:22:02, 24.85s/it]

{'loss': 0.0001, 'learning_rate': 5.496794871794873e-06, 'epoch': 1.45}


 73%|███████▎  | 906/1248 [1:17:01<1:42:08, 17.92s/it]

{'loss': 0.0002, 'learning_rate': 5.480769230769232e-06, 'epoch': 1.45}


 73%|███████▎  | 907/1248 [1:17:03<1:14:28, 13.10s/it]

{'loss': 0.0001, 'learning_rate': 5.46474358974359e-06, 'epoch': 1.45}


 73%|███████▎  | 908/1248 [1:17:05<55:32,  9.80s/it]  

{'loss': 0.0001, 'learning_rate': 5.448717948717949e-06, 'epoch': 1.46}


 73%|███████▎  | 909/1248 [1:17:07<42:38,  7.55s/it]

{'loss': 0.0001, 'learning_rate': 5.432692307692308e-06, 'epoch': 1.46}


 73%|███████▎  | 910/1248 [1:17:09<33:13,  5.90s/it]

{'loss': 0.0001, 'learning_rate': 5.416666666666667e-06, 'epoch': 1.46}


 73%|███████▎  | 911/1248 [1:17:11<26:10,  4.66s/it]

{'loss': 0.0002, 'learning_rate': 5.400641025641026e-06, 'epoch': 1.46}


 73%|███████▎  | 912/1248 [1:17:13<21:34,  3.85s/it]

{'loss': 0.0002, 'learning_rate': 5.384615384615385e-06, 'epoch': 1.46}


 73%|███████▎  | 913/1248 [1:17:15<18:35,  3.33s/it]

{'loss': 0.0001, 'learning_rate': 5.368589743589744e-06, 'epoch': 1.46}


 73%|███████▎  | 914/1248 [1:17:21<22:26,  4.03s/it]

{'loss': 0.0002, 'learning_rate': 5.3525641025641026e-06, 'epoch': 1.46}


 73%|███████▎  | 915/1248 [1:17:23<19:09,  3.45s/it]

{'loss': 0.005, 'learning_rate': 5.3365384615384615e-06, 'epoch': 1.47}


 73%|███████▎  | 916/1248 [1:17:26<17:46,  3.21s/it]

{'loss': 0.0001, 'learning_rate': 5.320512820512821e-06, 'epoch': 1.47}


 73%|███████▎  | 917/1248 [1:17:28<16:31,  3.00s/it]

{'loss': 0.0001, 'learning_rate': 5.30448717948718e-06, 'epoch': 1.47}


 74%|███████▎  | 918/1248 [1:17:30<13:53,  2.53s/it]

{'loss': 0.0002, 'learning_rate': 5.288461538461539e-06, 'epoch': 1.47}


 74%|███████▎  | 919/1248 [1:17:32<13:19,  2.43s/it]

{'loss': 0.0001, 'learning_rate': 5.272435897435898e-06, 'epoch': 1.47}


 74%|███████▎  | 920/1248 [1:17:34<12:19,  2.26s/it]

{'loss': 0.0001, 'learning_rate': 5.256410256410257e-06, 'epoch': 1.47}


 74%|███████▍  | 921/1248 [1:17:36<11:51,  2.17s/it]

{'loss': 0.0002, 'learning_rate': 5.240384615384616e-06, 'epoch': 1.48}


 74%|███████▍  | 922/1248 [1:17:37<10:48,  1.99s/it]

{'loss': 0.0003, 'learning_rate': 5.224358974358975e-06, 'epoch': 1.48}


 74%|███████▍  | 923/1248 [1:17:39<10:38,  1.96s/it]

{'loss': 0.0001, 'learning_rate': 5.208333333333334e-06, 'epoch': 1.48}


 74%|███████▍  | 924/1248 [1:17:42<12:45,  2.36s/it]

{'loss': 0.0001, 'learning_rate': 5.192307692307693e-06, 'epoch': 1.48}


 74%|███████▍  | 925/1248 [1:17:45<12:56,  2.40s/it]

{'loss': 0.0001, 'learning_rate': 5.176282051282052e-06, 'epoch': 1.48}


 74%|███████▍  | 926/1248 [1:17:49<15:30,  2.89s/it]

{'loss': 0.0001, 'learning_rate': 5.160256410256411e-06, 'epoch': 1.48}


 74%|███████▍  | 927/1248 [1:17:51<13:43,  2.57s/it]

{'loss': 0.0001, 'learning_rate': 5.144230769230769e-06, 'epoch': 1.49}


 74%|███████▍  | 928/1248 [1:17:53<13:54,  2.61s/it]

{'loss': 0.0001, 'learning_rate': 5.128205128205128e-06, 'epoch': 1.49}


 74%|███████▍  | 929/1248 [1:17:58<16:51,  3.17s/it]

{'loss': 0.0001, 'learning_rate': 5.112179487179487e-06, 'epoch': 1.49}


 75%|███████▍  | 930/1248 [1:17:59<14:25,  2.72s/it]

{'loss': 0.0001, 'learning_rate': 5.096153846153846e-06, 'epoch': 1.49}


 75%|███████▍  | 931/1248 [1:18:01<12:56,  2.45s/it]

{'loss': 0.0001, 'learning_rate': 5.080128205128206e-06, 'epoch': 1.49}


 75%|███████▍  | 932/1248 [1:18:03<12:15,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 5.064102564102565e-06, 'epoch': 1.49}


 75%|███████▍  | 933/1248 [1:18:05<11:54,  2.27s/it]

{'loss': 0.0001, 'learning_rate': 5.0480769230769235e-06, 'epoch': 1.5}


 75%|███████▍  | 934/1248 [1:18:07<11:22,  2.17s/it]

{'loss': 0.0002, 'learning_rate': 5.0320512820512825e-06, 'epoch': 1.5}


 75%|███████▍  | 935/1248 [1:18:10<11:13,  2.15s/it]

{'loss': 0.0001, 'learning_rate': 5.0160256410256415e-06, 'epoch': 1.5}


 75%|███████▌  | 936/1248 [1:18:11<10:41,  2.06s/it]

{'loss': 0.0001, 'learning_rate': 5e-06, 'epoch': 1.5}


 75%|███████▌  | 937/1248 [1:18:14<11:27,  2.21s/it]

{'loss': 0.0001, 'learning_rate': 4.983974358974359e-06, 'epoch': 1.5}


 75%|███████▌  | 938/1248 [1:18:16<11:21,  2.20s/it]

{'loss': 0.0001, 'learning_rate': 4.967948717948718e-06, 'epoch': 1.5}


 75%|███████▌  | 939/1248 [1:18:21<14:45,  2.87s/it]

{'loss': 0.0001, 'learning_rate': 4.951923076923077e-06, 'epoch': 1.5}


 75%|███████▌  | 940/1248 [1:18:23<14:42,  2.87s/it]

{'loss': 0.0002, 'learning_rate': 4.935897435897436e-06, 'epoch': 1.51}


 75%|███████▌  | 941/1248 [1:18:26<13:33,  2.65s/it]

{'loss': 0.0001, 'learning_rate': 4.919871794871795e-06, 'epoch': 1.51}


 75%|███████▌  | 942/1248 [1:18:28<13:24,  2.63s/it]

{'loss': 0.0001, 'learning_rate': 4.903846153846154e-06, 'epoch': 1.51}


 76%|███████▌  | 943/1248 [1:18:31<13:03,  2.57s/it]

{'loss': 0.0001, 'learning_rate': 4.887820512820513e-06, 'epoch': 1.51}


 76%|███████▌  | 944/1248 [1:18:33<12:24,  2.45s/it]

{'loss': 0.0001, 'learning_rate': 4.871794871794872e-06, 'epoch': 1.51}


 76%|███████▌  | 945/1248 [1:18:35<12:27,  2.47s/it]

{'loss': 0.0005, 'learning_rate': 4.855769230769231e-06, 'epoch': 1.51}


 76%|███████▌  | 946/1248 [1:18:38<12:42,  2.53s/it]

{'loss': 0.0002, 'learning_rate': 4.83974358974359e-06, 'epoch': 1.52}


 76%|███████▌  | 947/1248 [1:18:42<15:45,  3.14s/it]

{'loss': 0.0001, 'learning_rate': 4.823717948717949e-06, 'epoch': 1.52}


 76%|███████▌  | 948/1248 [1:18:45<14:07,  2.82s/it]

{'loss': 0.0002, 'learning_rate': 4.807692307692308e-06, 'epoch': 1.52}


 76%|███████▌  | 949/1248 [1:18:48<14:21,  2.88s/it]

{'loss': 0.0001, 'learning_rate': 4.791666666666668e-06, 'epoch': 1.52}


 76%|███████▌  | 950/1248 [1:18:50<13:08,  2.65s/it]

{'loss': 0.0, 'learning_rate': 4.775641025641027e-06, 'epoch': 1.52}


 76%|███████▌  | 951/1248 [1:18:51<11:36,  2.35s/it]

{'loss': 0.0002, 'learning_rate': 4.759615384615385e-06, 'epoch': 1.52}


 76%|███████▋  | 952/1248 [1:18:53<11:02,  2.24s/it]

{'loss': 0.0002, 'learning_rate': 4.743589743589744e-06, 'epoch': 1.53}


 76%|███████▋  | 953/1248 [1:18:55<10:35,  2.15s/it]

{'loss': 0.0001, 'learning_rate': 4.727564102564103e-06, 'epoch': 1.53}


 76%|███████▋  | 954/1248 [1:18:57<10:19,  2.11s/it]

{'loss': 0.0002, 'learning_rate': 4.711538461538462e-06, 'epoch': 1.53}


 77%|███████▋  | 955/1248 [1:19:00<11:01,  2.26s/it]

{'loss': 0.0001, 'learning_rate': 4.6955128205128205e-06, 'epoch': 1.53}


 77%|███████▋  | 956/1248 [1:19:02<10:11,  2.09s/it]

{'loss': 0.0003, 'learning_rate': 4.6794871794871795e-06, 'epoch': 1.53}


 77%|███████▋  | 957/1248 [1:19:04<10:05,  2.08s/it]

{'loss': 0.0001, 'learning_rate': 4.663461538461539e-06, 'epoch': 1.53}


 77%|███████▋  | 958/1248 [1:19:06<10:18,  2.13s/it]

{'loss': 0.0001, 'learning_rate': 4.647435897435898e-06, 'epoch': 1.54}


 77%|███████▋  | 959/1248 [1:19:08<10:13,  2.12s/it]

{'loss': 0.0002, 'learning_rate': 4.631410256410257e-06, 'epoch': 1.54}


 77%|███████▋  | 960/1248 [1:19:10<09:21,  1.95s/it]

{'loss': 0.0001, 'learning_rate': 4.615384615384616e-06, 'epoch': 1.54}


 77%|███████▋  | 961/1248 [1:19:11<08:44,  1.83s/it]

{'loss': 0.0001, 'learning_rate': 4.599358974358974e-06, 'epoch': 1.54}


 77%|███████▋  | 962/1248 [1:19:13<08:58,  1.88s/it]

{'loss': 0.0001, 'learning_rate': 4.583333333333333e-06, 'epoch': 1.54}


 77%|███████▋  | 963/1248 [1:19:15<09:20,  1.97s/it]

{'loss': 0.0001, 'learning_rate': 4.567307692307692e-06, 'epoch': 1.54}


 77%|███████▋  | 964/1248 [1:19:17<09:15,  1.96s/it]

{'loss': 0.0002, 'learning_rate': 4.551282051282052e-06, 'epoch': 1.54}


 77%|███████▋  | 965/1248 [1:19:19<08:30,  1.80s/it]

{'loss': 0.0004, 'learning_rate': 4.535256410256411e-06, 'epoch': 1.55}


 77%|███████▋  | 966/1248 [1:19:22<10:22,  2.21s/it]

{'loss': 0.0001, 'learning_rate': 4.51923076923077e-06, 'epoch': 1.55}


 77%|███████▋  | 967/1248 [1:19:24<10:20,  2.21s/it]

{'loss': 0.0001, 'learning_rate': 4.503205128205129e-06, 'epoch': 1.55}


 78%|███████▊  | 968/1248 [1:19:26<09:47,  2.10s/it]

{'loss': 0.0001, 'learning_rate': 4.487179487179488e-06, 'epoch': 1.55}


 78%|███████▊  | 969/1248 [1:19:30<12:08,  2.61s/it]

{'loss': 0.0002, 'learning_rate': 4.471153846153847e-06, 'epoch': 1.55}


 78%|███████▊  | 970/1248 [1:19:32<11:57,  2.58s/it]

{'loss': 0.0002, 'learning_rate': 4.455128205128206e-06, 'epoch': 1.55}


 78%|███████▊  | 971/1248 [1:19:34<11:23,  2.47s/it]

{'loss': 0.0019, 'learning_rate': 4.439102564102565e-06, 'epoch': 1.56}


 78%|███████▊  | 972/1248 [1:19:37<11:22,  2.47s/it]

{'loss': 0.0002, 'learning_rate': 4.423076923076924e-06, 'epoch': 1.56}


 78%|███████▊  | 973/1248 [1:19:39<11:20,  2.48s/it]

{'loss': 0.0003, 'learning_rate': 4.4070512820512826e-06, 'epoch': 1.56}


 78%|███████▊  | 974/1248 [1:19:41<10:19,  2.26s/it]

{'loss': 0.0001, 'learning_rate': 4.3910256410256415e-06, 'epoch': 1.56}


 78%|███████▊  | 975/1248 [1:19:43<09:44,  2.14s/it]

{'loss': 0.0001, 'learning_rate': 4.3750000000000005e-06, 'epoch': 1.56}


 78%|███████▊  | 976/1248 [1:19:45<09:32,  2.11s/it]

{'loss': 0.0002, 'learning_rate': 4.358974358974359e-06, 'epoch': 1.56}


 78%|███████▊  | 977/1248 [1:19:47<09:19,  2.07s/it]

{'loss': 0.0001, 'learning_rate': 4.342948717948718e-06, 'epoch': 1.57}


 78%|███████▊  | 978/1248 [1:19:49<09:51,  2.19s/it]

{'loss': 0.0002, 'learning_rate': 4.326923076923077e-06, 'epoch': 1.57}


 78%|███████▊  | 979/1248 [1:19:52<10:05,  2.25s/it]

{'loss': 0.0002, 'learning_rate': 4.310897435897436e-06, 'epoch': 1.57}


 79%|███████▊  | 980/1248 [1:19:54<09:21,  2.10s/it]

{'loss': 0.0001, 'learning_rate': 4.294871794871795e-06, 'epoch': 1.57}


 79%|███████▊  | 981/1248 [1:19:55<08:40,  1.95s/it]

{'loss': 0.0003, 'learning_rate': 4.278846153846154e-06, 'epoch': 1.57}


 79%|███████▊  | 982/1248 [1:19:58<09:21,  2.11s/it]

{'loss': 0.0001, 'learning_rate': 4.262820512820513e-06, 'epoch': 1.57}


 79%|███████▉  | 983/1248 [1:19:59<08:55,  2.02s/it]

{'loss': 0.0001, 'learning_rate': 4.246794871794872e-06, 'epoch': 1.58}


 79%|███████▉  | 984/1248 [1:20:02<09:11,  2.09s/it]

{'loss': 0.0001, 'learning_rate': 4.230769230769231e-06, 'epoch': 1.58}


 79%|███████▉  | 985/1248 [1:20:03<08:26,  1.93s/it]

{'loss': 0.0001, 'learning_rate': 4.21474358974359e-06, 'epoch': 1.58}


 79%|███████▉  | 986/1248 [1:20:05<08:40,  1.99s/it]

{'loss': 0.0003, 'learning_rate': 4.198717948717949e-06, 'epoch': 1.58}


 79%|███████▉  | 987/1248 [1:20:08<09:00,  2.07s/it]

{'loss': 0.0001, 'learning_rate': 4.182692307692308e-06, 'epoch': 1.58}


 79%|███████▉  | 988/1248 [1:20:09<08:21,  1.93s/it]

{'loss': 0.0001, 'learning_rate': 4.166666666666667e-06, 'epoch': 1.58}


 79%|███████▉  | 989/1248 [1:20:12<08:47,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 4.150641025641026e-06, 'epoch': 1.58}


 79%|███████▉  | 990/1248 [1:20:14<09:37,  2.24s/it]

{'loss': 0.0001, 'learning_rate': 4.134615384615385e-06, 'epoch': 1.59}


 79%|███████▉  | 991/1248 [1:20:17<09:49,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 4.118589743589744e-06, 'epoch': 1.59}


 79%|███████▉  | 992/1248 [1:20:19<09:30,  2.23s/it]

{'loss': 0.0001, 'learning_rate': 4.102564102564103e-06, 'epoch': 1.59}


 80%|███████▉  | 993/1248 [1:20:20<08:49,  2.07s/it]

{'loss': 0.0002, 'learning_rate': 4.086538461538462e-06, 'epoch': 1.59}


 80%|███████▉  | 994/1248 [1:20:22<08:26,  1.99s/it]

{'loss': 0.0001, 'learning_rate': 4.070512820512821e-06, 'epoch': 1.59}


 80%|███████▉  | 995/1248 [1:20:25<08:46,  2.08s/it]

{'loss': 0.0001, 'learning_rate': 4.0544871794871795e-06, 'epoch': 1.59}


 80%|███████▉  | 996/1248 [1:20:26<08:33,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 4.0384615384615385e-06, 'epoch': 1.6}


 80%|███████▉  | 997/1248 [1:20:28<08:18,  1.99s/it]

{'loss': 0.0001, 'learning_rate': 4.022435897435898e-06, 'epoch': 1.6}


 80%|███████▉  | 998/1248 [1:20:30<08:24,  2.02s/it]

{'loss': 0.0002, 'learning_rate': 4.006410256410257e-06, 'epoch': 1.6}


 80%|████████  | 999/1248 [1:20:33<08:40,  2.09s/it]

{'loss': 0.0001, 'learning_rate': 3.990384615384616e-06, 'epoch': 1.6}


 80%|████████  | 1000/1248 [1:20:34<08:11,  1.98s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.0002, 'learning_rate': 3.974358974358974e-06, 'epoch': 1.6}


                                                     
 80%|████████  | 1000/1248 [1:25:46<08:11,  1.98s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-1000
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-1000/config.json


{'eval_loss': 4.914210512652062e-05, 'eval_runtime': 311.4918, 'eval_samples_per_second': 20.029, 'eval_steps_per_second': 2.003, 'epoch': 1.6}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-1000/pytorch_model.bin
 80%|████████  | 1001/1248 [1:25:50<6:35:12, 96.00s/it]

{'loss': 0.0001, 'learning_rate': 3.958333333333333e-06, 'epoch': 1.6}


 80%|████████  | 1002/1248 [1:25:52<4:37:54, 67.78s/it]

{'loss': 0.0001, 'learning_rate': 3.942307692307692e-06, 'epoch': 1.61}


 80%|████████  | 1003/1248 [1:25:54<3:16:21, 48.09s/it]

{'loss': 0.0002, 'learning_rate': 3.926282051282051e-06, 'epoch': 1.61}


 80%|████████  | 1004/1248 [1:25:58<2:22:01, 34.93s/it]

{'loss': 0.0001, 'learning_rate': 3.910256410256411e-06, 'epoch': 1.61}


 81%|████████  | 1005/1248 [1:26:00<1:41:12, 24.99s/it]

{'loss': 0.0002, 'learning_rate': 3.89423076923077e-06, 'epoch': 1.61}


 81%|████████  | 1006/1248 [1:26:03<1:13:46, 18.29s/it]

{'loss': 0.0, 'learning_rate': 3.878205128205129e-06, 'epoch': 1.61}


 81%|████████  | 1007/1248 [1:26:05<53:51, 13.41s/it]  

{'loss': 0.0002, 'learning_rate': 3.862179487179488e-06, 'epoch': 1.61}


 81%|████████  | 1008/1248 [1:26:07<40:21, 10.09s/it]

{'loss': 0.0001, 'learning_rate': 3.846153846153847e-06, 'epoch': 1.62}


 81%|████████  | 1009/1248 [1:26:09<30:14,  7.59s/it]

{'loss': 0.0001, 'learning_rate': 3.830128205128206e-06, 'epoch': 1.62}


 81%|████████  | 1010/1248 [1:26:11<23:49,  6.01s/it]

{'loss': 0.0002, 'learning_rate': 3.8141025641025643e-06, 'epoch': 1.62}


 81%|████████  | 1011/1248 [1:26:14<20:10,  5.11s/it]

{'loss': 0.0002, 'learning_rate': 3.7980769230769232e-06, 'epoch': 1.62}


 81%|████████  | 1012/1248 [1:26:16<16:15,  4.13s/it]

{'loss': 0.0, 'learning_rate': 3.782051282051282e-06, 'epoch': 1.62}


 81%|████████  | 1013/1248 [1:26:17<13:13,  3.38s/it]

{'loss': 0.0002, 'learning_rate': 3.766025641025641e-06, 'epoch': 1.62}


 81%|████████▏ | 1014/1248 [1:26:21<12:55,  3.32s/it]

{'loss': 0.0002, 'learning_rate': 3.7500000000000005e-06, 'epoch': 1.62}


 81%|████████▏ | 1015/1248 [1:26:22<10:55,  2.81s/it]

{'loss': 0.0002, 'learning_rate': 3.7339743589743595e-06, 'epoch': 1.63}


 81%|████████▏ | 1016/1248 [1:26:25<10:50,  2.80s/it]

{'loss': 0.0001, 'learning_rate': 3.7179487179487184e-06, 'epoch': 1.63}


 81%|████████▏ | 1017/1248 [1:26:27<09:43,  2.53s/it]

{'loss': 0.0001, 'learning_rate': 3.7019230769230774e-06, 'epoch': 1.63}


 82%|████████▏ | 1018/1248 [1:26:30<09:51,  2.57s/it]

{'loss': 0.0001, 'learning_rate': 3.6858974358974363e-06, 'epoch': 1.63}


 82%|████████▏ | 1019/1248 [1:26:32<09:19,  2.44s/it]

{'loss': 0.0001, 'learning_rate': 3.669871794871795e-06, 'epoch': 1.63}


 82%|████████▏ | 1020/1248 [1:26:34<08:32,  2.25s/it]

{'loss': 0.0002, 'learning_rate': 3.653846153846154e-06, 'epoch': 1.63}


 82%|████████▏ | 1021/1248 [1:26:36<08:34,  2.27s/it]

{'loss': 0.0002, 'learning_rate': 3.6378205128205128e-06, 'epoch': 1.64}


 82%|████████▏ | 1022/1248 [1:26:38<08:52,  2.36s/it]

{'loss': 0.0001, 'learning_rate': 3.621794871794872e-06, 'epoch': 1.64}


 82%|████████▏ | 1023/1248 [1:26:41<09:18,  2.48s/it]

{'loss': 0.0, 'learning_rate': 3.605769230769231e-06, 'epoch': 1.64}


 82%|████████▏ | 1024/1248 [1:26:44<09:11,  2.46s/it]

{'loss': 0.0002, 'learning_rate': 3.58974358974359e-06, 'epoch': 1.64}


 82%|████████▏ | 1025/1248 [1:26:46<08:53,  2.39s/it]

{'loss': 0.0, 'learning_rate': 3.573717948717949e-06, 'epoch': 1.64}


 82%|████████▏ | 1026/1248 [1:26:48<08:54,  2.41s/it]

{'loss': 0.0001, 'learning_rate': 3.557692307692308e-06, 'epoch': 1.64}


 82%|████████▏ | 1027/1248 [1:26:51<09:22,  2.54s/it]

{'loss': 0.0001, 'learning_rate': 3.5416666666666673e-06, 'epoch': 1.65}


 82%|████████▏ | 1028/1248 [1:26:53<08:48,  2.40s/it]

{'loss': 0.0001, 'learning_rate': 3.5256410256410263e-06, 'epoch': 1.65}


 82%|████████▏ | 1029/1248 [1:26:56<09:22,  2.57s/it]

{'loss': 0.0001, 'learning_rate': 3.509615384615385e-06, 'epoch': 1.65}


 83%|████████▎ | 1030/1248 [1:26:58<08:05,  2.23s/it]

{'loss': 0.0001, 'learning_rate': 3.4935897435897438e-06, 'epoch': 1.65}


 83%|████████▎ | 1031/1248 [1:27:00<07:52,  2.18s/it]

{'loss': 0.0001, 'learning_rate': 3.4775641025641027e-06, 'epoch': 1.65}


 83%|████████▎ | 1032/1248 [1:27:02<08:02,  2.23s/it]

{'loss': 0.0001, 'learning_rate': 3.4615384615384617e-06, 'epoch': 1.65}


 83%|████████▎ | 1033/1248 [1:27:05<08:22,  2.34s/it]

{'loss': 0.0001, 'learning_rate': 3.4455128205128206e-06, 'epoch': 1.66}


 83%|████████▎ | 1034/1248 [1:27:08<09:15,  2.59s/it]

{'loss': 0.0001, 'learning_rate': 3.4294871794871796e-06, 'epoch': 1.66}


 83%|████████▎ | 1035/1248 [1:27:10<08:30,  2.40s/it]

{'loss': 0.0001, 'learning_rate': 3.413461538461539e-06, 'epoch': 1.66}


 83%|████████▎ | 1036/1248 [1:27:12<07:59,  2.26s/it]

{'loss': 0.0001, 'learning_rate': 3.397435897435898e-06, 'epoch': 1.66}


 83%|████████▎ | 1037/1248 [1:27:14<07:43,  2.20s/it]

{'loss': 0.0001, 'learning_rate': 3.381410256410257e-06, 'epoch': 1.66}


 83%|████████▎ | 1038/1248 [1:27:16<07:22,  2.11s/it]

{'loss': 0.0001, 'learning_rate': 3.365384615384616e-06, 'epoch': 1.66}


 83%|████████▎ | 1039/1248 [1:27:18<07:31,  2.16s/it]

{'loss': 0.0002, 'learning_rate': 3.3493589743589744e-06, 'epoch': 1.67}


 83%|████████▎ | 1040/1248 [1:27:20<07:19,  2.11s/it]

{'loss': 0.0001, 'learning_rate': 3.3333333333333333e-06, 'epoch': 1.67}


 83%|████████▎ | 1041/1248 [1:27:22<07:10,  2.08s/it]

{'loss': 0.0001, 'learning_rate': 3.3173076923076923e-06, 'epoch': 1.67}


 83%|████████▎ | 1042/1248 [1:27:24<07:00,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 3.3012820512820517e-06, 'epoch': 1.67}


 84%|████████▎ | 1043/1248 [1:27:26<06:46,  1.98s/it]

{'loss': 0.0001, 'learning_rate': 3.2852564102564106e-06, 'epoch': 1.67}


 84%|████████▎ | 1044/1248 [1:27:28<07:17,  2.15s/it]

{'loss': 0.0001, 'learning_rate': 3.2692307692307696e-06, 'epoch': 1.67}


 84%|████████▎ | 1045/1248 [1:27:31<08:10,  2.42s/it]

{'loss': 0.0001, 'learning_rate': 3.2532051282051285e-06, 'epoch': 1.67}


 84%|████████▍ | 1046/1248 [1:27:33<07:27,  2.21s/it]

{'loss': 0.0002, 'learning_rate': 3.2371794871794875e-06, 'epoch': 1.68}


 84%|████████▍ | 1047/1248 [1:27:35<06:43,  2.01s/it]

{'loss': 0.0001, 'learning_rate': 3.2211538461538464e-06, 'epoch': 1.68}


 84%|████████▍ | 1048/1248 [1:27:37<06:43,  2.02s/it]

{'loss': 0.0002, 'learning_rate': 3.205128205128206e-06, 'epoch': 1.68}


 84%|████████▍ | 1049/1248 [1:27:38<06:31,  1.97s/it]

{'loss': 0.0002, 'learning_rate': 3.1891025641025643e-06, 'epoch': 1.68}


 84%|████████▍ | 1050/1248 [1:27:41<06:39,  2.02s/it]

{'loss': 0.0002, 'learning_rate': 3.1730769230769233e-06, 'epoch': 1.68}


 84%|████████▍ | 1051/1248 [1:27:42<06:28,  1.97s/it]

{'loss': 0.0001, 'learning_rate': 3.1570512820512822e-06, 'epoch': 1.68}


 84%|████████▍ | 1052/1248 [1:27:45<07:07,  2.18s/it]

{'loss': 0.0001, 'learning_rate': 3.141025641025641e-06, 'epoch': 1.69}


 84%|████████▍ | 1053/1248 [1:27:47<06:38,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 3.125e-06, 'epoch': 1.69}


 84%|████████▍ | 1054/1248 [1:27:49<06:24,  1.98s/it]

{'loss': 0.0001, 'learning_rate': 3.108974358974359e-06, 'epoch': 1.69}


 85%|████████▍ | 1055/1248 [1:27:51<06:23,  1.99s/it]

{'loss': 0.0001, 'learning_rate': 3.0929487179487185e-06, 'epoch': 1.69}


 85%|████████▍ | 1056/1248 [1:27:52<05:47,  1.81s/it]

{'loss': 0.0001, 'learning_rate': 3.0769230769230774e-06, 'epoch': 1.69}


 85%|████████▍ | 1057/1248 [1:27:54<06:09,  1.93s/it]

{'loss': 0.0002, 'learning_rate': 3.0608974358974364e-06, 'epoch': 1.69}


 85%|████████▍ | 1058/1248 [1:27:57<06:27,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 3.044871794871795e-06, 'epoch': 1.7}


 85%|████████▍ | 1059/1248 [1:27:58<06:12,  1.97s/it]

{'loss': 0.0001, 'learning_rate': 3.028846153846154e-06, 'epoch': 1.7}


 85%|████████▍ | 1060/1248 [1:28:01<06:23,  2.04s/it]

{'loss': 0.0001, 'learning_rate': 3.012820512820513e-06, 'epoch': 1.7}


 85%|████████▌ | 1061/1248 [1:28:03<06:14,  2.00s/it]

{'loss': 0.0001, 'learning_rate': 2.9967948717948718e-06, 'epoch': 1.7}


 85%|████████▌ | 1062/1248 [1:28:04<06:05,  1.97s/it]

{'loss': 0.0003, 'learning_rate': 2.980769230769231e-06, 'epoch': 1.7}


 85%|████████▌ | 1063/1248 [1:28:07<06:36,  2.14s/it]

{'loss': 0.0001, 'learning_rate': 2.96474358974359e-06, 'epoch': 1.7}


 85%|████████▌ | 1064/1248 [1:28:10<07:02,  2.30s/it]

{'loss': 0.0001, 'learning_rate': 2.948717948717949e-06, 'epoch': 1.71}


 85%|████████▌ | 1065/1248 [1:28:11<06:27,  2.12s/it]

{'loss': 0.0011, 'learning_rate': 2.932692307692308e-06, 'epoch': 1.71}


 85%|████████▌ | 1066/1248 [1:28:14<06:55,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 2.916666666666667e-06, 'epoch': 1.71}


 85%|████████▌ | 1067/1248 [1:28:17<07:30,  2.49s/it]

{'loss': 0.0002, 'learning_rate': 2.900641025641026e-06, 'epoch': 1.71}


 86%|████████▌ | 1068/1248 [1:28:19<07:04,  2.36s/it]

{'loss': 0.0001, 'learning_rate': 2.8846153846153845e-06, 'epoch': 1.71}


 86%|████████▌ | 1069/1248 [1:28:22<07:10,  2.41s/it]

{'loss': 0.0001, 'learning_rate': 2.8685897435897434e-06, 'epoch': 1.71}


 86%|████████▌ | 1070/1248 [1:28:24<06:52,  2.31s/it]

{'loss': 0.0001, 'learning_rate': 2.852564102564103e-06, 'epoch': 1.71}


 86%|████████▌ | 1071/1248 [1:28:26<06:39,  2.25s/it]

{'loss': 0.0001, 'learning_rate': 2.8365384615384617e-06, 'epoch': 1.72}


 86%|████████▌ | 1072/1248 [1:28:29<07:16,  2.48s/it]

{'loss': 0.0001, 'learning_rate': 2.8205128205128207e-06, 'epoch': 1.72}


 86%|████████▌ | 1073/1248 [1:28:31<06:40,  2.29s/it]

{'loss': 0.0002, 'learning_rate': 2.8044871794871797e-06, 'epoch': 1.72}


 86%|████████▌ | 1074/1248 [1:28:33<06:20,  2.18s/it]

{'loss': 0.0002, 'learning_rate': 2.7884615384615386e-06, 'epoch': 1.72}


 86%|████████▌ | 1075/1248 [1:28:34<06:06,  2.12s/it]

{'loss': 0.0003, 'learning_rate': 2.772435897435898e-06, 'epoch': 1.72}


 86%|████████▌ | 1076/1248 [1:28:37<06:28,  2.26s/it]

{'loss': 0.0001, 'learning_rate': 2.756410256410257e-06, 'epoch': 1.72}


 86%|████████▋ | 1077/1248 [1:28:39<06:10,  2.17s/it]

{'loss': 0.0002, 'learning_rate': 2.740384615384616e-06, 'epoch': 1.73}


 86%|████████▋ | 1078/1248 [1:28:41<05:52,  2.07s/it]

{'loss': 0.0002, 'learning_rate': 2.7243589743589744e-06, 'epoch': 1.73}


 86%|████████▋ | 1079/1248 [1:28:43<05:33,  1.97s/it]

{'loss': 0.0002, 'learning_rate': 2.7083333333333334e-06, 'epoch': 1.73}


 87%|████████▋ | 1080/1248 [1:28:47<07:40,  2.74s/it]

{'loss': 0.0001, 'learning_rate': 2.6923076923076923e-06, 'epoch': 1.73}


 87%|████████▋ | 1081/1248 [1:28:49<06:44,  2.42s/it]

{'loss': 0.0001, 'learning_rate': 2.6762820512820513e-06, 'epoch': 1.73}


 87%|████████▋ | 1082/1248 [1:28:50<05:59,  2.17s/it]

{'loss': 0.0002, 'learning_rate': 2.6602564102564107e-06, 'epoch': 1.73}


 87%|████████▋ | 1083/1248 [1:28:53<05:57,  2.17s/it]

{'loss': 0.0, 'learning_rate': 2.6442307692307696e-06, 'epoch': 1.74}


 87%|████████▋ | 1084/1248 [1:28:55<05:49,  2.13s/it]

{'loss': 0.0001, 'learning_rate': 2.6282051282051286e-06, 'epoch': 1.74}


 87%|████████▋ | 1085/1248 [1:28:57<05:59,  2.20s/it]

{'loss': 0.0001, 'learning_rate': 2.6121794871794875e-06, 'epoch': 1.74}


 87%|████████▋ | 1086/1248 [1:29:00<06:18,  2.34s/it]

{'loss': 0.0003, 'learning_rate': 2.5961538461538465e-06, 'epoch': 1.74}


 87%|████████▋ | 1087/1248 [1:29:02<06:23,  2.38s/it]

{'loss': 0.0001, 'learning_rate': 2.5801282051282054e-06, 'epoch': 1.74}


 87%|████████▋ | 1088/1248 [1:29:04<06:05,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 2.564102564102564e-06, 'epoch': 1.74}


 87%|████████▋ | 1089/1248 [1:29:06<05:49,  2.20s/it]

{'loss': 0.0002, 'learning_rate': 2.548076923076923e-06, 'epoch': 1.75}


 87%|████████▋ | 1090/1248 [1:29:08<05:24,  2.06s/it]

{'loss': 0.0001, 'learning_rate': 2.5320512820512823e-06, 'epoch': 1.75}


 87%|████████▋ | 1091/1248 [1:29:10<05:32,  2.12s/it]

{'loss': 0.0001, 'learning_rate': 2.5160256410256413e-06, 'epoch': 1.75}


 88%|████████▊ | 1092/1248 [1:29:12<05:33,  2.14s/it]

{'loss': 0.0001, 'learning_rate': 2.5e-06, 'epoch': 1.75}


 88%|████████▊ | 1093/1248 [1:29:14<05:09,  2.00s/it]

{'loss': 0.0002, 'learning_rate': 2.483974358974359e-06, 'epoch': 1.75}


 88%|████████▊ | 1094/1248 [1:29:16<05:18,  2.07s/it]

{'loss': 0.0002, 'learning_rate': 2.467948717948718e-06, 'epoch': 1.75}


 88%|████████▊ | 1095/1248 [1:29:19<05:28,  2.15s/it]

{'loss': 0.0001, 'learning_rate': 2.451923076923077e-06, 'epoch': 1.75}


 88%|████████▊ | 1096/1248 [1:29:21<05:57,  2.35s/it]

{'loss': 0.0, 'learning_rate': 2.435897435897436e-06, 'epoch': 1.76}


 88%|████████▊ | 1097/1248 [1:29:23<05:32,  2.20s/it]

{'loss': 0.0001, 'learning_rate': 2.419871794871795e-06, 'epoch': 1.76}


 88%|████████▊ | 1098/1248 [1:29:25<05:15,  2.10s/it]

{'loss': 0.0001, 'learning_rate': 2.403846153846154e-06, 'epoch': 1.76}


 88%|████████▊ | 1099/1248 [1:29:27<04:59,  2.01s/it]

{'loss': 0.0002, 'learning_rate': 2.3878205128205133e-06, 'epoch': 1.76}


 88%|████████▊ | 1100/1248 [1:29:29<04:48,  1.95s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.0001, 'learning_rate': 2.371794871794872e-06, 'epoch': 1.76}


                                                     
 88%|████████▊ | 1100/1248 [1:34:26<04:48,  1.95s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-1100
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-1100/config.json


{'eval_loss': 4.6288703742902726e-05, 'eval_runtime': 296.9521, 'eval_samples_per_second': 21.01, 'eval_steps_per_second': 2.101, 'epoch': 1.76}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-1100/pytorch_model.bin
 88%|████████▊ | 1101/1248 [1:34:29<3:44:12, 91.52s/it]

{'loss': 0.0001, 'learning_rate': 2.355769230769231e-06, 'epoch': 1.76}


 88%|████████▊ | 1102/1248 [1:34:32<2:37:39, 64.79s/it]

{'loss': 0.0001, 'learning_rate': 2.3397435897435897e-06, 'epoch': 1.77}


 88%|████████▊ | 1103/1248 [1:34:34<1:51:15, 46.03s/it]

{'loss': 0.0003, 'learning_rate': 2.323717948717949e-06, 'epoch': 1.77}


 88%|████████▊ | 1104/1248 [1:34:36<1:18:46, 32.82s/it]

{'loss': 0.0001, 'learning_rate': 2.307692307692308e-06, 'epoch': 1.77}


 89%|████████▊ | 1105/1248 [1:34:38<56:12, 23.59s/it]  

{'loss': 0.0001, 'learning_rate': 2.2916666666666666e-06, 'epoch': 1.77}


 89%|████████▊ | 1106/1248 [1:34:40<40:47, 17.24s/it]

{'loss': 0.0001, 'learning_rate': 2.275641025641026e-06, 'epoch': 1.77}


 89%|████████▊ | 1107/1248 [1:34:43<30:05, 12.81s/it]

{'loss': 0.0001, 'learning_rate': 2.259615384615385e-06, 'epoch': 1.77}


 89%|████████▉ | 1108/1248 [1:34:45<22:17,  9.55s/it]

{'loss': 0.0001, 'learning_rate': 2.243589743589744e-06, 'epoch': 1.78}


 89%|████████▉ | 1109/1248 [1:34:47<17:15,  7.45s/it]

{'loss': 0.0001, 'learning_rate': 2.227564102564103e-06, 'epoch': 1.78}


 89%|████████▉ | 1110/1248 [1:34:49<13:12,  5.75s/it]

{'loss': 0.0002, 'learning_rate': 2.211538461538462e-06, 'epoch': 1.78}


 89%|████████▉ | 1111/1248 [1:34:51<10:42,  4.69s/it]

{'loss': 0.0001, 'learning_rate': 2.1955128205128208e-06, 'epoch': 1.78}


 89%|████████▉ | 1112/1248 [1:34:54<09:01,  3.98s/it]

{'loss': 0.0001, 'learning_rate': 2.1794871794871797e-06, 'epoch': 1.78}


 89%|████████▉ | 1113/1248 [1:34:56<07:35,  3.37s/it]

{'loss': 0.0001, 'learning_rate': 2.1634615384615387e-06, 'epoch': 1.78}


 89%|████████▉ | 1114/1248 [1:34:58<06:33,  2.94s/it]

{'loss': 0.0001, 'learning_rate': 2.1474358974358976e-06, 'epoch': 1.79}


 89%|████████▉ | 1115/1248 [1:34:59<05:46,  2.61s/it]

{'loss': 0.0001, 'learning_rate': 2.1314102564102566e-06, 'epoch': 1.79}


 89%|████████▉ | 1116/1248 [1:35:02<06:01,  2.74s/it]

{'loss': 0.0001, 'learning_rate': 2.1153846153846155e-06, 'epoch': 1.79}


 90%|████████▉ | 1117/1248 [1:35:05<05:34,  2.55s/it]

{'loss': 0.0001, 'learning_rate': 2.0993589743589745e-06, 'epoch': 1.79}


 90%|████████▉ | 1118/1248 [1:35:07<05:30,  2.54s/it]

{'loss': 0.0001, 'learning_rate': 2.0833333333333334e-06, 'epoch': 1.79}


 90%|████████▉ | 1119/1248 [1:35:10<05:54,  2.75s/it]

{'loss': 0.0001, 'learning_rate': 2.0673076923076924e-06, 'epoch': 1.79}


 90%|████████▉ | 1120/1248 [1:35:12<05:21,  2.51s/it]

{'loss': 0.0001, 'learning_rate': 2.0512820512820513e-06, 'epoch': 1.79}


 90%|████████▉ | 1121/1248 [1:35:14<04:58,  2.35s/it]

{'loss': 0.0001, 'learning_rate': 2.0352564102564103e-06, 'epoch': 1.8}


 90%|████████▉ | 1122/1248 [1:35:16<04:23,  2.09s/it]

{'loss': 0.0001, 'learning_rate': 2.0192307692307692e-06, 'epoch': 1.8}


 90%|████████▉ | 1123/1248 [1:35:17<03:53,  1.87s/it]

{'loss': 0.0001, 'learning_rate': 2.0032051282051286e-06, 'epoch': 1.8}


 90%|█████████ | 1124/1248 [1:35:19<03:56,  1.91s/it]

{'loss': 0.0002, 'learning_rate': 1.987179487179487e-06, 'epoch': 1.8}


 90%|█████████ | 1125/1248 [1:35:22<04:14,  2.07s/it]

{'loss': 0.0001, 'learning_rate': 1.971153846153846e-06, 'epoch': 1.8}


 90%|█████████ | 1126/1248 [1:35:24<04:13,  2.08s/it]

{'loss': 0.0002, 'learning_rate': 1.9551282051282055e-06, 'epoch': 1.8}


 90%|█████████ | 1127/1248 [1:35:26<04:13,  2.09s/it]

{'loss': 0.0001, 'learning_rate': 1.9391025641025644e-06, 'epoch': 1.81}


 90%|█████████ | 1128/1248 [1:35:28<04:20,  2.17s/it]

{'loss': 0.0001, 'learning_rate': 1.9230769230769234e-06, 'epoch': 1.81}


 90%|█████████ | 1129/1248 [1:35:30<04:17,  2.17s/it]

{'loss': 0.0001, 'learning_rate': 1.9070512820512821e-06, 'epoch': 1.81}


 91%|█████████ | 1130/1248 [1:35:33<04:37,  2.35s/it]

{'loss': 0.0001, 'learning_rate': 1.891025641025641e-06, 'epoch': 1.81}


 91%|█████████ | 1131/1248 [1:35:35<04:27,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 1.8750000000000003e-06, 'epoch': 1.81}


 91%|█████████ | 1132/1248 [1:35:37<04:13,  2.19s/it]

{'loss': 0.0001, 'learning_rate': 1.8589743589743592e-06, 'epoch': 1.81}


 91%|█████████ | 1133/1248 [1:35:39<03:57,  2.07s/it]

{'loss': 0.0001, 'learning_rate': 1.8429487179487182e-06, 'epoch': 1.82}


 91%|█████████ | 1134/1248 [1:35:41<04:00,  2.11s/it]

{'loss': 0.0002, 'learning_rate': 1.826923076923077e-06, 'epoch': 1.82}


 91%|█████████ | 1135/1248 [1:35:43<03:56,  2.09s/it]

{'loss': 0.0001, 'learning_rate': 1.810897435897436e-06, 'epoch': 1.82}


 91%|█████████ | 1136/1248 [1:35:46<04:26,  2.38s/it]

{'loss': 0.0002, 'learning_rate': 1.794871794871795e-06, 'epoch': 1.82}


 91%|█████████ | 1137/1248 [1:35:48<04:12,  2.27s/it]

{'loss': 0.0001, 'learning_rate': 1.778846153846154e-06, 'epoch': 1.82}


 91%|█████████ | 1138/1248 [1:35:51<04:28,  2.44s/it]

{'loss': 0.0001, 'learning_rate': 1.7628205128205131e-06, 'epoch': 1.82}


 91%|█████████▏| 1139/1248 [1:35:53<04:08,  2.28s/it]

{'loss': 0.0001, 'learning_rate': 1.7467948717948719e-06, 'epoch': 1.83}


 91%|█████████▏| 1140/1248 [1:35:55<03:59,  2.21s/it]

{'loss': 0.0002, 'learning_rate': 1.7307692307692308e-06, 'epoch': 1.83}


 91%|█████████▏| 1141/1248 [1:35:57<03:33,  1.99s/it]

{'loss': 0.0001, 'learning_rate': 1.7147435897435898e-06, 'epoch': 1.83}


 92%|█████████▏| 1142/1248 [1:35:58<03:19,  1.88s/it]

{'loss': 0.0001, 'learning_rate': 1.698717948717949e-06, 'epoch': 1.83}


 92%|█████████▏| 1143/1248 [1:36:00<03:14,  1.85s/it]

{'loss': 0.0001, 'learning_rate': 1.682692307692308e-06, 'epoch': 1.83}


 92%|█████████▏| 1144/1248 [1:36:03<03:40,  2.12s/it]

{'loss': 0.0001, 'learning_rate': 1.6666666666666667e-06, 'epoch': 1.83}


 92%|█████████▏| 1145/1248 [1:36:05<03:36,  2.10s/it]

{'loss': 0.0002, 'learning_rate': 1.6506410256410258e-06, 'epoch': 1.83}


 92%|█████████▏| 1146/1248 [1:36:07<03:34,  2.10s/it]

{'loss': 0.0002, 'learning_rate': 1.6346153846153848e-06, 'epoch': 1.84}


 92%|█████████▏| 1147/1248 [1:36:09<03:30,  2.09s/it]

{'loss': 0.0001, 'learning_rate': 1.6185897435897437e-06, 'epoch': 1.84}


 92%|█████████▏| 1148/1248 [1:36:11<03:29,  2.10s/it]

{'loss': 0.0001, 'learning_rate': 1.602564102564103e-06, 'epoch': 1.84}


 92%|█████████▏| 1149/1248 [1:36:14<03:48,  2.31s/it]

{'loss': 0.0001, 'learning_rate': 1.5865384615384616e-06, 'epoch': 1.84}


 92%|█████████▏| 1150/1248 [1:36:16<03:55,  2.40s/it]

{'loss': 0.0001, 'learning_rate': 1.5705128205128206e-06, 'epoch': 1.84}


 92%|█████████▏| 1151/1248 [1:36:19<03:44,  2.32s/it]

{'loss': 0.0001, 'learning_rate': 1.5544871794871796e-06, 'epoch': 1.84}


 92%|█████████▏| 1152/1248 [1:36:22<04:13,  2.64s/it]

{'loss': 0.0001, 'learning_rate': 1.5384615384615387e-06, 'epoch': 1.85}


 92%|█████████▏| 1153/1248 [1:36:24<04:02,  2.55s/it]

{'loss': 0.0001, 'learning_rate': 1.5224358974358975e-06, 'epoch': 1.85}


 92%|█████████▏| 1154/1248 [1:36:26<03:33,  2.27s/it]

{'loss': 0.0001, 'learning_rate': 1.5064102564102564e-06, 'epoch': 1.85}


 93%|█████████▎| 1155/1248 [1:36:29<03:43,  2.40s/it]

{'loss': 0.0001, 'learning_rate': 1.4903846153846156e-06, 'epoch': 1.85}


 93%|█████████▎| 1156/1248 [1:36:31<03:49,  2.49s/it]

{'loss': 0.0001, 'learning_rate': 1.4743589743589745e-06, 'epoch': 1.85}


 93%|█████████▎| 1157/1248 [1:36:33<03:34,  2.35s/it]

{'loss': 0.0002, 'learning_rate': 1.4583333333333335e-06, 'epoch': 1.85}


 93%|█████████▎| 1158/1248 [1:36:35<03:22,  2.25s/it]

{'loss': 0.0002, 'learning_rate': 1.4423076923076922e-06, 'epoch': 1.86}


 93%|█████████▎| 1159/1248 [1:36:39<03:44,  2.52s/it]

{'loss': 0.0001, 'learning_rate': 1.4262820512820514e-06, 'epoch': 1.86}


 93%|█████████▎| 1160/1248 [1:36:41<03:31,  2.40s/it]

{'loss': 0.0001, 'learning_rate': 1.4102564102564104e-06, 'epoch': 1.86}


 93%|█████████▎| 1161/1248 [1:36:44<03:55,  2.70s/it]

{'loss': 0.0001, 'learning_rate': 1.3942307692307693e-06, 'epoch': 1.86}


 93%|█████████▎| 1162/1248 [1:36:47<03:50,  2.68s/it]

{'loss': 0.0005, 'learning_rate': 1.3782051282051285e-06, 'epoch': 1.86}


 93%|█████████▎| 1163/1248 [1:36:49<03:26,  2.42s/it]

{'loss': 0.0001, 'learning_rate': 1.3621794871794872e-06, 'epoch': 1.86}


 93%|█████████▎| 1164/1248 [1:36:50<03:12,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 1.3461538461538462e-06, 'epoch': 1.87}


 93%|█████████▎| 1165/1248 [1:36:52<02:51,  2.06s/it]

{'loss': 0.0001, 'learning_rate': 1.3301282051282053e-06, 'epoch': 1.87}


 93%|█████████▎| 1166/1248 [1:36:55<03:13,  2.36s/it]

{'loss': 0.0, 'learning_rate': 1.3141025641025643e-06, 'epoch': 1.87}


 94%|█████████▎| 1167/1248 [1:36:57<02:51,  2.12s/it]

{'loss': 0.0001, 'learning_rate': 1.2980769230769232e-06, 'epoch': 1.87}


 94%|█████████▎| 1168/1248 [1:37:00<03:13,  2.42s/it]

{'loss': 0.0001, 'learning_rate': 1.282051282051282e-06, 'epoch': 1.87}


 94%|█████████▎| 1169/1248 [1:37:02<03:03,  2.32s/it]

{'loss': 0.0002, 'learning_rate': 1.2660256410256411e-06, 'epoch': 1.87}


 94%|█████████▍| 1170/1248 [1:37:04<02:50,  2.19s/it]

{'loss': 0.0002, 'learning_rate': 1.25e-06, 'epoch': 1.88}


 94%|█████████▍| 1171/1248 [1:37:06<02:46,  2.16s/it]

{'loss': 0.0001, 'learning_rate': 1.233974358974359e-06, 'epoch': 1.88}


 94%|█████████▍| 1172/1248 [1:37:08<02:49,  2.23s/it]

{'loss': 0.0001, 'learning_rate': 1.217948717948718e-06, 'epoch': 1.88}


 94%|█████████▍| 1173/1248 [1:37:10<02:30,  2.01s/it]

{'loss': 0.0001, 'learning_rate': 1.201923076923077e-06, 'epoch': 1.88}


 94%|█████████▍| 1174/1248 [1:37:13<02:47,  2.26s/it]

{'loss': 0.0001, 'learning_rate': 1.185897435897436e-06, 'epoch': 1.88}


 94%|█████████▍| 1175/1248 [1:37:14<02:31,  2.07s/it]

{'loss': 0.0001, 'learning_rate': 1.1698717948717949e-06, 'epoch': 1.88}


 94%|█████████▍| 1176/1248 [1:37:17<02:35,  2.16s/it]

{'loss': 0.0001, 'learning_rate': 1.153846153846154e-06, 'epoch': 1.88}


 94%|█████████▍| 1177/1248 [1:37:19<02:37,  2.22s/it]

{'loss': 0.0001, 'learning_rate': 1.137820512820513e-06, 'epoch': 1.89}


 94%|█████████▍| 1178/1248 [1:37:21<02:32,  2.19s/it]

{'loss': 0.0001, 'learning_rate': 1.121794871794872e-06, 'epoch': 1.89}


 94%|█████████▍| 1179/1248 [1:37:23<02:25,  2.11s/it]

{'loss': 0.0001, 'learning_rate': 1.105769230769231e-06, 'epoch': 1.89}


 95%|█████████▍| 1180/1248 [1:37:27<03:03,  2.70s/it]

{'loss': 0.0001, 'learning_rate': 1.0897435897435899e-06, 'epoch': 1.89}


 95%|█████████▍| 1181/1248 [1:37:29<02:50,  2.55s/it]

{'loss': 0.0, 'learning_rate': 1.0737179487179488e-06, 'epoch': 1.89}


 95%|█████████▍| 1182/1248 [1:37:31<02:29,  2.27s/it]

{'loss': 0.0001, 'learning_rate': 1.0576923076923078e-06, 'epoch': 1.89}


 95%|█████████▍| 1183/1248 [1:37:33<02:31,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 1.0416666666666667e-06, 'epoch': 1.9}


 95%|█████████▍| 1184/1248 [1:37:35<02:24,  2.27s/it]

{'loss': 0.0001, 'learning_rate': 1.0256410256410257e-06, 'epoch': 1.9}


 95%|█████████▍| 1185/1248 [1:37:38<02:24,  2.29s/it]

{'loss': 0.0002, 'learning_rate': 1.0096153846153846e-06, 'epoch': 1.9}


 95%|█████████▌| 1186/1248 [1:37:41<02:37,  2.54s/it]

{'loss': 0.0, 'learning_rate': 9.935897435897436e-07, 'epoch': 1.9}


 95%|█████████▌| 1187/1248 [1:37:43<02:28,  2.43s/it]

{'loss': 0.0001, 'learning_rate': 9.775641025641027e-07, 'epoch': 1.9}


 95%|█████████▌| 1188/1248 [1:37:46<02:31,  2.52s/it]

{'loss': 0.0002, 'learning_rate': 9.615384615384617e-07, 'epoch': 1.9}


 95%|█████████▌| 1189/1248 [1:37:48<02:16,  2.31s/it]

{'loss': 0.0002, 'learning_rate': 9.455128205128205e-07, 'epoch': 1.91}


 95%|█████████▌| 1190/1248 [1:37:52<02:43,  2.82s/it]

{'loss': 0.0001, 'learning_rate': 9.294871794871796e-07, 'epoch': 1.91}


 95%|█████████▌| 1191/1248 [1:37:54<02:34,  2.70s/it]

{'loss': 0.0001, 'learning_rate': 9.134615384615385e-07, 'epoch': 1.91}


 96%|█████████▌| 1192/1248 [1:37:56<02:23,  2.55s/it]

{'loss': 0.0001, 'learning_rate': 8.974358974358975e-07, 'epoch': 1.91}


 96%|█████████▌| 1193/1248 [1:37:58<02:07,  2.31s/it]

{'loss': 0.0001, 'learning_rate': 8.814102564102566e-07, 'epoch': 1.91}


 96%|█████████▌| 1194/1248 [1:38:00<01:56,  2.15s/it]

{'loss': 0.0002, 'learning_rate': 8.653846153846154e-07, 'epoch': 1.91}


 96%|█████████▌| 1195/1248 [1:38:02<02:01,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 8.493589743589745e-07, 'epoch': 1.92}


 96%|█████████▌| 1196/1248 [1:38:04<01:50,  2.13s/it]

{'loss': 0.0001, 'learning_rate': 8.333333333333333e-07, 'epoch': 1.92}


 96%|█████████▌| 1197/1248 [1:38:06<01:43,  2.03s/it]

{'loss': 0.0002, 'learning_rate': 8.173076923076924e-07, 'epoch': 1.92}


 96%|█████████▌| 1198/1248 [1:38:08<01:40,  2.01s/it]

{'loss': 0.0001, 'learning_rate': 8.012820512820515e-07, 'epoch': 1.92}


 96%|█████████▌| 1199/1248 [1:38:10<01:37,  1.98s/it]

{'loss': 0.0005, 'learning_rate': 7.852564102564103e-07, 'epoch': 1.92}


 96%|█████████▌| 1200/1248 [1:38:12<01:32,  1.92s/it]***** Running Evaluation *****
  Num examples = 6239
  Batch size = 10


{'loss': 0.0001, 'learning_rate': 7.692307692307694e-07, 'epoch': 1.92}


                                                     
 96%|█████████▌| 1200/1248 [1:43:26<01:32,  1.92s/it]Saving model checkpoint to bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-1200
Configuration saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-1200/config.json


{'eval_loss': 4.492528023547493e-05, 'eval_runtime': 314.4118, 'eval_samples_per_second': 19.843, 'eval_steps_per_second': 1.985, 'epoch': 1.92}


Model weights saved in bert-base-cased-finetuned-adjs-wsent-black-clover/checkpoint-1200/pytorch_model.bin
 96%|█████████▌| 1201/1248 [1:43:29<1:15:34, 96.48s/it]

{'loss': 0.0001, 'learning_rate': 7.532051282051282e-07, 'epoch': 1.92}


 96%|█████████▋| 1202/1248 [1:43:31<52:13, 68.12s/it]  

{'loss': 0.0002, 'learning_rate': 7.371794871794873e-07, 'epoch': 1.93}


 96%|█████████▋| 1203/1248 [1:43:34<36:24, 48.54s/it]

{'loss': 0.0002, 'learning_rate': 7.211538461538461e-07, 'epoch': 1.93}


 96%|█████████▋| 1204/1248 [1:43:36<25:23, 34.61s/it]

{'loss': 0.0003, 'learning_rate': 7.051282051282052e-07, 'epoch': 1.93}


 97%|█████████▋| 1205/1248 [1:43:37<17:42, 24.72s/it]

{'loss': 0.0002, 'learning_rate': 6.891025641025642e-07, 'epoch': 1.93}


 97%|█████████▋| 1206/1248 [1:43:40<12:44, 18.20s/it]

{'loss': 0.0001, 'learning_rate': 6.730769230769231e-07, 'epoch': 1.93}


 97%|█████████▋| 1207/1248 [1:43:42<09:08, 13.38s/it]

{'loss': 0.0001, 'learning_rate': 6.570512820512821e-07, 'epoch': 1.93}


 97%|█████████▋| 1208/1248 [1:43:45<06:43, 10.08s/it]

{'loss': 0.0001, 'learning_rate': 6.41025641025641e-07, 'epoch': 1.94}


 97%|█████████▋| 1209/1248 [1:43:47<05:00,  7.70s/it]

{'loss': 0.0001, 'learning_rate': 6.25e-07, 'epoch': 1.94}


 97%|█████████▋| 1210/1248 [1:43:49<03:46,  5.96s/it]

{'loss': 0.0001, 'learning_rate': 6.08974358974359e-07, 'epoch': 1.94}


 97%|█████████▋| 1211/1248 [1:43:51<02:55,  4.74s/it]

{'loss': 0.0001, 'learning_rate': 5.92948717948718e-07, 'epoch': 1.94}


 97%|█████████▋| 1212/1248 [1:43:53<02:19,  3.87s/it]

{'loss': 0.0003, 'learning_rate': 5.76923076923077e-07, 'epoch': 1.94}


 97%|█████████▋| 1213/1248 [1:43:54<01:54,  3.26s/it]

{'loss': 0.0002, 'learning_rate': 5.60897435897436e-07, 'epoch': 1.94}


 97%|█████████▋| 1214/1248 [1:43:57<01:42,  3.02s/it]

{'loss': 0.0001, 'learning_rate': 5.448717948717949e-07, 'epoch': 1.95}


 97%|█████████▋| 1215/1248 [1:43:59<01:28,  2.69s/it]

{'loss': 0.0001, 'learning_rate': 5.288461538461539e-07, 'epoch': 1.95}


 97%|█████████▋| 1216/1248 [1:44:02<01:26,  2.72s/it]

{'loss': 0.0001, 'learning_rate': 5.128205128205128e-07, 'epoch': 1.95}


 98%|█████████▊| 1217/1248 [1:44:04<01:18,  2.53s/it]

{'loss': 0.0001, 'learning_rate': 4.967948717948718e-07, 'epoch': 1.95}


 98%|█████████▊| 1218/1248 [1:44:06<01:12,  2.40s/it]

{'loss': 0.0, 'learning_rate': 4.807692307692308e-07, 'epoch': 1.95}


 98%|█████████▊| 1219/1248 [1:44:08<01:06,  2.29s/it]

{'loss': 0.0001, 'learning_rate': 4.647435897435898e-07, 'epoch': 1.95}


 98%|█████████▊| 1220/1248 [1:44:10<01:00,  2.16s/it]

{'loss': 0.0007, 'learning_rate': 4.4871794871794876e-07, 'epoch': 1.96}


 98%|█████████▊| 1221/1248 [1:44:11<00:53,  1.99s/it]

{'loss': 0.0001, 'learning_rate': 4.326923076923077e-07, 'epoch': 1.96}


 98%|█████████▊| 1222/1248 [1:44:13<00:51,  1.97s/it]

{'loss': 0.0001, 'learning_rate': 4.1666666666666667e-07, 'epoch': 1.96}


 98%|█████████▊| 1223/1248 [1:44:16<00:52,  2.11s/it]

{'loss': 0.0001, 'learning_rate': 4.006410256410257e-07, 'epoch': 1.96}


 98%|█████████▊| 1224/1248 [1:44:18<00:49,  2.08s/it]

{'loss': 0.0001, 'learning_rate': 3.846153846153847e-07, 'epoch': 1.96}


 98%|█████████▊| 1225/1248 [1:44:19<00:46,  2.01s/it]

{'loss': 0.0001, 'learning_rate': 3.6858974358974363e-07, 'epoch': 1.96}


 98%|█████████▊| 1226/1248 [1:44:22<00:49,  2.25s/it]

{'loss': 0.0005, 'learning_rate': 3.525641025641026e-07, 'epoch': 1.96}


 98%|█████████▊| 1227/1248 [1:44:24<00:46,  2.22s/it]

{'loss': 0.0001, 'learning_rate': 3.3653846153846154e-07, 'epoch': 1.97}


 98%|█████████▊| 1228/1248 [1:44:27<00:45,  2.27s/it]

{'loss': 0.0, 'learning_rate': 3.205128205128205e-07, 'epoch': 1.97}


 98%|█████████▊| 1229/1248 [1:44:29<00:43,  2.29s/it]

{'loss': 0.0002, 'learning_rate': 3.044871794871795e-07, 'epoch': 1.97}


 99%|█████████▊| 1230/1248 [1:44:31<00:41,  2.29s/it]

{'loss': 0.0002, 'learning_rate': 2.884615384615385e-07, 'epoch': 1.97}


 99%|█████████▊| 1231/1248 [1:44:34<00:40,  2.38s/it]

{'loss': 0.0001, 'learning_rate': 2.7243589743589746e-07, 'epoch': 1.97}


 99%|█████████▊| 1232/1248 [1:44:36<00:37,  2.33s/it]

{'loss': 0.0001, 'learning_rate': 2.564102564102564e-07, 'epoch': 1.97}


 99%|█████████▉| 1233/1248 [1:44:38<00:32,  2.15s/it]

{'loss': 0.0002, 'learning_rate': 2.403846153846154e-07, 'epoch': 1.98}


 99%|█████████▉| 1234/1248 [1:44:40<00:28,  2.05s/it]

{'loss': 0.0001, 'learning_rate': 2.2435897435897438e-07, 'epoch': 1.98}


 99%|█████████▉| 1235/1248 [1:44:43<00:29,  2.30s/it]

{'loss': 0.0002, 'learning_rate': 2.0833333333333333e-07, 'epoch': 1.98}


 99%|█████████▉| 1236/1248 [1:44:45<00:28,  2.40s/it]

{'loss': 0.0003, 'learning_rate': 1.9230769230769234e-07, 'epoch': 1.98}


 99%|█████████▉| 1237/1248 [1:44:47<00:25,  2.28s/it]

{'loss': 0.0001, 'learning_rate': 1.762820512820513e-07, 'epoch': 1.98}


 99%|█████████▉| 1238/1248 [1:44:50<00:24,  2.42s/it]

{'loss': 0.0, 'learning_rate': 1.6025641025641025e-07, 'epoch': 1.98}


 99%|█████████▉| 1239/1248 [1:44:52<00:21,  2.35s/it]

{'loss': 0.0001, 'learning_rate': 1.4423076923076925e-07, 'epoch': 1.99}


 99%|█████████▉| 1240/1248 [1:44:54<00:17,  2.18s/it]

{'loss': 0.0001, 'learning_rate': 1.282051282051282e-07, 'epoch': 1.99}


 99%|█████████▉| 1241/1248 [1:44:56<00:15,  2.27s/it]

{'loss': 0.0001, 'learning_rate': 1.1217948717948719e-07, 'epoch': 1.99}


100%|█████████▉| 1242/1248 [1:44:59<00:14,  2.42s/it]

{'loss': 0.0001, 'learning_rate': 9.615384615384617e-08, 'epoch': 1.99}


100%|█████████▉| 1243/1248 [1:45:03<00:13,  2.69s/it]

{'loss': 0.0001, 'learning_rate': 8.012820512820512e-08, 'epoch': 1.99}


100%|█████████▉| 1244/1248 [1:45:05<00:10,  2.58s/it]

{'loss': 0.0001, 'learning_rate': 6.41025641025641e-08, 'epoch': 1.99}


100%|█████████▉| 1245/1248 [1:45:07<00:07,  2.45s/it]

{'loss': 0.0002, 'learning_rate': 4.8076923076923085e-08, 'epoch': 2.0}


100%|█████████▉| 1246/1248 [1:45:09<00:04,  2.20s/it]

{'loss': 0.0001, 'learning_rate': 3.205128205128205e-08, 'epoch': 2.0}


100%|█████████▉| 1247/1248 [1:45:11<00:02,  2.37s/it]

{'loss': 0.0, 'learning_rate': 1.6025641025641026e-08, 'epoch': 2.0}


## Summarize

In [None]:
import numpy as np

In [None]:
def predict_mask(input_str):
    """Tomamos el camino largo en lugar de usar pipeline
    """
    inputs = tokenizer(input_str, return_tensors="pt")
    mask_index = np.where(inputs['input_ids'] == tokenizer.mask_token_id)
    # .eval() to set dropout and batch normalization layers to evaluation mode
    model.eval()
    outputs = model(**inputs)
    top_5_predictions = torch.softmax(outputs.logits[mask_index], dim=1).topk(5)
    predicted = []
    for i in range(5):
        token = tokenizer.decode(top_5_predictions.indices[0, i])
        prob = top_5_predictions.values[0, i]
        predicted.append({
            "token": token, 
            "prob": prob.item()
        })
    return predicted

In [None]:
predicted_by_sentence_by_character = []
for sentence in sentences_w_subjects_tokenized:
    predicted_by_character = dict()
    for subject in sentence["subjects"]:
        print(sentence["tokens"] + " " + subject + " can be described as [MASK].")
        print()
        predicted = predict_mask(sentence["tokens"] + " " + subject + " can be described as [MASK].")
        predicted_by_character[subject] = predicted
        for i in range(0, len(predicted)):
            print(f" {i+1}) {predicted[i]['token']:<20} {predicted[i]['prob']:.3f}")
        print()
    predicted_by_sentence_by_character.append(predicted_by_character)

write_json(predicted_by_sentence_by_character, "29_sent_predicted_by_char_bert_sent_coref.json")

a priest takes two babies abandoned outside his church inside and discovers two babies abandoned outside his church names to be Yuno and Asta . Yuno can be described as [MASK].

 1) well                 0.059
 2) dead                 0.018
 3) dangerous            0.017
 4) drunk                0.015
 5) normal               0.011

a priest takes two babies abandoned outside his church inside and discovers two babies abandoned outside his church names to be Yuno and Asta . Asta can be described as [MASK].

 1) well                 0.114
 2) [CLS]                0.023
 3) dangerous            0.016
 4) dead                 0.013
 5) normal               0.011

Fifteen years later , Asta proposes to Sister Lily , who refuses repeatedly . Lily can be described as [MASK].

 1) well                 0.035
 2) dead                 0.024
 3) dangerous            0.015
 4) drunk                0.014
 5) beautiful            0.012

Fifteen years later , Asta proposes to Sister Lily , who refuses

In [None]:
predicted_by_sentence_by_character

[{'Yuno': [{'token': 'well', 'prob': 0.05913417786359787},
   {'token': 'dead', 'prob': 0.017927514389157295},
   {'token': 'dangerous', 'prob': 0.017457064241170883},
   {'token': 'drunk', 'prob': 0.015289518050849438},
   {'token': 'normal', 'prob': 0.010806072503328323}]},
 {'Asta': [{'token': 'well', 'prob': 0.11448170989751816},
   {'token': '[CLS]', 'prob': 0.022511247545480728},
   {'token': 'dangerous', 'prob': 0.01573476754128933},
   {'token': 'dead', 'prob': 0.012670634314417839},
   {'token': 'normal', 'prob': 0.010935131460428238}]},
 {'Lily': [{'token': 'well', 'prob': 0.03503146022558212},
   {'token': 'dead', 'prob': 0.02427370846271515},
   {'token': 'dangerous', 'prob': 0.015339055098593235},
   {'token': 'drunk', 'prob': 0.014185435138642788},
   {'token': 'beautiful', 'prob': 0.01214889157563448}]},
 {'Asta': [{'token': 'well', 'prob': 0.040930043905973434},
   {'token': 'drunk', 'prob': 0.024186421185731888},
   {'token': 'dead', 'prob': 0.016790080815553665},
   {

In [None]:
predicted_by_sentence_by_character = []
for sentence in sentences_w_subjects_tokenized:
    predicted_by_character = dict()
    for subject in sentence["subjects"]:
        print(subject + " can be described as [MASK].")
        print()
        predicted = predict_mask(subject + " can be described as [MASK].")
        predicted_by_character[subject] = predicted
        for i in range(0, len(predicted)):
            print(f" {i+1}) {predicted[i]['token']:<20} {predicted[i]['prob']:.3f}")
        print()
    predicted_by_sentence_by_character.append(predicted_by_character)

write_json(predicted_by_sentence_by_character, "29_predicted_by_char_bert_sent_coref.json")

Yuno can be described as [MASK].

 1) well                 0.161
 2) follows              0.023
 3) [CLS]                0.023
 4) drunk                0.009
 5) dangerous            0.008

Asta can be described as [MASK].

 1) well                 0.283
 2) follows              0.052
 3) [CLS]                0.025
 4) ##tral               0.017
 5) ##tern               0.016

Lily can be described as [MASK].

 1) well                 0.216
 2) [CLS]                0.026
 3) normal               0.011
 4) wet                  0.010
 5) dry                  0.010

Asta can be described as [MASK].

 1) well                 0.283
 2) follows              0.052
 3) [CLS]                0.025
 4) ##tral               0.017
 5) ##tern               0.016

Yuno can be described as [MASK].

 1) well                 0.161
 2) follows              0.023
 3) [CLS]                0.023
 4) drunk                0.009
 5) dangerous            0.008

Asta can be described as [MASK].

 1) well        

In [None]:
predicted_by_sentence_by_character

[{'Yuno': [{'token': 'well', 'prob': 0.16093774139881134},
   {'token': 'follows', 'prob': 0.023116296157240868},
   {'token': '[CLS]', 'prob': 0.022737780585885048},
   {'token': 'drunk', 'prob': 0.008910329081118107},
   {'token': 'dangerous', 'prob': 0.0075854710303246975}]},
 {'Asta': [{'token': 'well', 'prob': 0.2825336158275604},
   {'token': 'follows', 'prob': 0.052406907081604004},
   {'token': '[CLS]', 'prob': 0.024985894560813904},
   {'token': '##tral', 'prob': 0.017321910709142685},
   {'token': '##tern', 'prob': 0.015691304579377174}]},
 {'Lily': [{'token': 'well', 'prob': 0.216403990983963},
   {'token': '[CLS]', 'prob': 0.02634630911052227},
   {'token': 'normal', 'prob': 0.011455900967121124},
   {'token': 'wet', 'prob': 0.010365892201662064},
   {'token': 'dry', 'prob': 0.010297978296875954}]},
 {'Asta': [{'token': 'well', 'prob': 0.2825336158275604},
   {'token': 'follows', 'prob': 0.052406907081604004},
   {'token': '[CLS]', 'prob': 0.024985894560813904},
   {'token'

In [None]:
def predict_mask_x(input_str, x):
    """Tomamos el camino largo en lugar de usar pipeline
    """
    inputs = tokenizer(input_str, return_tensors="pt")
    mask_index = np.where(inputs['input_ids'] == tokenizer.mask_token_id)
    # .eval() to set dropout and batch normalization layers to evaluation mode
    model.eval()
    outputs = model(**inputs)
    top_x_predictions = torch.softmax(outputs.logits[mask_index], dim=1).topk(x)
    predicted = []
    for i in range(x):
        token = tokenizer.decode(top_x_predictions.indices[0, i])
        prob = top_x_predictions.values[0, i]
        predicted.append({
            "token": token, 
            "prob": prob.item()
        })
    return predicted

In [None]:
subject = "Asta"
predicted = predict_mask_x(subject + " can be described as [MASK].", 27)
predicted_by_character[subject] = predicted
for i in range(0, len(predicted)):
    print(f" {i+1}) {predicted[i]['token']:<20} {predicted[i]['prob']:.3f}")
print()

 1) well                 0.283
 2) follows              0.052
 3) [CLS]                0.025
 4) ##tral               0.017
 5) ##tern               0.016
 6) such                 0.011
 7) distinct             0.008
 8) ##self               0.007
 9) desired              0.007
 10) normal               0.006
 11) unnecessary          0.005
 12) usual                0.004
 13) necessary            0.004
 14) dangerous            0.004
 15) opposed              0.004
 16) ##tro                0.004
 17) useless              0.004
 18) [MASK]               0.004
 19) ##piration           0.004
 20) impossible           0.003
 21) above                0.003
 22) possible             0.003
 23) dry                  0.003
 24) wet                  0.003
 25) nothing              0.003
 26) sound                0.003
 27) drunk                0.003



In [None]:
subject = "Asta"
predicted = predict_mask_x(subject + " is [MASK].", 100)
predicted_by_character[subject] = predicted
for i in range(0, len(predicted)):
    print(f" {i+1}) {predicted[i]['token']:<20} {predicted[i]['prob']:.3f}")
print()

 1) blind                0.075
 2) dead                 0.059
 3) destroyed            0.025
 4) killed               0.020
 5) lost                 0.020
 6) defeated             0.017
 7) drunk                0.015
 8) broken               0.014
 9) gone                 0.011
 10) paralyzed            0.009
 11) frozen               0.009
 12) confused             0.008
 13) absent               0.008
 14) missing              0.007
 15) wounded              0.007
 16) exhausted            0.007
 17) weak                 0.006
 18) forgotten            0.006
 19) alive                0.005
 20) hungry               0.005
 21) trapped              0.005
 22) closed               0.005
 23) crushed              0.005
 24) deserted             0.005
 25) buried               0.005
 26) depressed            0.005
 27) silent               0.004
 28) wet                  0.004
 29) angry                0.004
 30) stuck                0.004
 31) abandoned            0.004
 32) imprisoned  

In [None]:
characters = read_json('3_characters_black_clover.json')
adjectives = read_json("14_varied_set_adjectives.json")

In [None]:

perplex_adjs = dict()
for character in characters:
    perplex_adjs[character] = dict()
    for adj in adjectives:
        sent = character + " is " + adj
        tokenized_sent = tokenizer(sent)
        tokenized_sent = Dataset.from_list([tokenized_sent])
        perplexity = np.exp(trainer.evaluate(tokenized_sent)["eval_loss"])
        perplex_adjs[character][adj] = perplexity

***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 184.18it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1244.60it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1514.74it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1345.19it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1370.24it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1449.81it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1225.33it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1482.61it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|

In [None]:
sorted_perplex_adjs = sorted(perplex_adjs["Asta"].items(), key=lambda kv: kv[1])
print(sorted_perplex_adjs)
write_json(sorted_perplex_adjs, "29_asta.json")

[('absent-minded', 1.0000053643578393), ('lanky', 1.0000067437517843), ('flamboyant', 1.0000069140349117), ('charismatic', 1.0000069289420879), ('kind-hearted', 1.000007763456901), ('morose', 1.0000079869133234), ('reclusive', 1.0000081401953396), ('stern', 1.0000082054921358), ('warm-hearted', 1.0000082551773308), ('rotund', 1.000008259410154), ('crestfallen', 1.000008359427208), ('ardent', 1.0000084488794923), ('boisterous', 1.0000089141419124), ('fussy', 1.000009074713737), ('demure', 1.0000095195953873), ('fervent', 1.000009745219549), ('restless', 1.0000098743745207), ('naive', 1.0000098744154484), ('jovial', 1.0000101922597509), ('generous', 1.000010231966876), ('petulant', 1.0000103710536956), ('fastidious', 1.000010535010929), ('zealous', 1.0000105499413512), ('haughty', 1.0000106265188875), ('nosy', 1.00001076280085), ('diligent', 1.0000110693330144), ('venal', 1.0000111374203629), ('drowsy', 1.0000116227850098), ('timid', 1.0000116525503893), ('cantankerous', 1.00001284795939

In [None]:
sorted_perplex_adjs = sorted(perplex_adjs["Noelle"].items(), key=lambda kv: kv[1])
print(sorted_perplex_adjs)
write_json(sorted_perplex_adjs, "29_noelle.json")

[('absent-minded', 1.0000086681068807), ('lanky', 1.0000103710827997), ('drowsy', 1.0000105414020155), ('charismatic', 1.0000107627135377), ('crestfallen', 1.0000109330692002), ('flamboyant', 1.0000114140999563), ('boisterous', 1.0000119953120723), ('morose', 1.000012142135393), ('petulant', 1.0000129084633993), ('fussy', 1.0000132150626153), ('reclusive', 1.0000135698084986), ('ardent', 1.0000138791937385), ('grouchy', 1.000014049546294), ('shrewd', 1.0000141411164207), ('tetchy', 1.0000148840197547), ('timid', 1.0000149009129609), ('stern', 1.0000155447276855), ('naughty', 1.0000155963423134), ('cantankerous', 1.0000157354662595), ('haughty', 1.0000161724599694), ('warm-hearted', 1.0000161952759228), ('rotund', 1.0000162122582832), ('kind-hearted', 1.0000169105325856), ('obnoxious', 1.0000178960218062), ('venal', 1.0000179407806615), ('nosy', 1.0000186957896906), ('jovial', 1.0000192775299177), ('zealous', 1.0000196182805534), ('fervent', 1.0000197031635438), ('fastidious', 1.0000197

In [None]:
sorted_perplex_adjs = sorted(perplex_adjs["Yuno"].items(), key=lambda kv: kv[1])
print(sorted_perplex_adjs)
write_json(sorted_perplex_adjs, "29_yuno.json")

[('absent-minded', 1.0000051259507428), ('charismatic', 1.0000064670293722), ('flamboyant', 1.0000069405364067), ('lanky', 1.0000072376299594), ('reclusive', 1.00000793586207), ('ardent', 1.0000080018555313), ('stern', 1.0000081260343918), ('boisterous', 1.0000085962452652), ('warm-hearted', 1.000008821426291), ('petulant', 1.000009029981654), ('kind-hearted', 1.0000090747573929), ('restless', 1.0000094571779927), ('morose', 1.0000095664329924), ('crestfallen', 1.0000099836213805), ('fussy', 1.000010117792297), ('shrewd', 1.0000103049109192), ('generous', 1.0000103313383795), ('demure', 1.000011035245686), ('haughty', 1.0000111374267293), ('fastidious', 1.0000112353704), ('nosy', 1.0000113077688133), ('diligent', 1.0000113418188614), ('timid', 1.000011488625715), ('cantankerous', 1.00001196054804), ('fervent', 1.0000120399542467), ('jovial', 1.0000120399706178), ('venal', 1.000012125160378), ('zealous', 1.0000125168918483), ('grouchy', 1.000012725442525), ('naughty', 1.0000134534916496

In [None]:
sorted_perplex_adjs = sorted(perplex_adjs["Julius"].items(), key=lambda kv: kv[1])
print(sorted_perplex_adjs)
write_json(sorted_perplex_adjs, "29_julius.json")

[('absent-minded', 1.0000070332782294), ('lanky', 1.000009059820626), ('charismatic', 1.0000096558153528), ('flamboyant', 1.0000101922461084), ('ardent', 1.0000102859586735), ('drowsy', 1.0000112396223355), ('boisterous', 1.0000116973126032), ('warm-hearted', 1.0000121081626254), ('crestfallen', 1.0000121421053794), ('morose', 1.00001222729151), ('reclusive', 1.0000126956298194), ('petulant', 1.0000130276796917), ('haughty', 1.0000134506321599), ('naughty', 1.000013629482167), ('shrewd', 1.0000142304791808), ('timid', 1.0000143219438211), ('fussy', 1.0000147306675824), ('grouchy', 1.0000151564602562), ('kind-hearted', 1.0000153267657348), ('diligent', 1.0000153778746648), ('nosy', 1.0000155765550383), ('cantankerous', 1.0000157056380024), ('rotund', 1.0000162718347916), ('jovial', 1.0000163995390288), ('tetchy', 1.0000166380597735), ('fastidious', 1.0000171659394204), ('demure', 1.0000172850907356), ('zealous', 1.0000176939357635), ('generous', 1.0000180957286087), ('fervent', 1.000018

In [None]:
sorted_perplex_adjs = sorted(perplex_adjs["Mereleona"].items(), key=lambda kv: kv[1])
print(sorted_perplex_adjs)
write_json(sorted_perplex_adjs, "29_mereleona.json")

[('absent-minded', 1.0000049709816128), ('charismatic', 1.0000071167049496), ('flamboyant', 1.00000713079858), ('lanky', 1.0000071657257), ('reclusive', 1.0000072319528523), ('stern', 1.0000073909032203), ('kind-hearted', 1.0000074862935533), ('boisterous', 1.0000077810548507), ('warm-hearted', 1.0000078319914991), ('restless', 1.0000081657678106), ('morose', 1.0000084756661567), ('fussy', 1.000008559141201), ('haughty', 1.0000085962134326), ('crestfallen', 1.000008809453597), ('naive', 1.0000089853387044), ('petulant', 1.0000090717151056), ('generous', 1.0000091939004683), ('ardent', 1.0000097035488253), ('shrewd', 1.000009764233631), ('zealous', 1.0000099419861173), ('nosy', 1.0000099737387125), ('diligent', 1.0000100664644456), ('demure', 1.000010225367515), ('fastidious', 1.0000102399732398), ('chic', 1.0000102916057843), ('naughty', 1.0000104373383591), ('clumsy', 1.000010520125982), ('jovial', 1.000010549895876), ('funny', 1.0000108181105483), ('cantankerous', 1.000010891278372),

In [None]:
sorted_perplex_adjs = sorted(perplex_adjs["Charmy"].items(), key=lambda kv: kv[1])
print(sorted_perplex_adjs)
write_json(sorted_perplex_adjs, "29_charmy.json")

[('absent-minded', 1.000004940510206), ('flamboyant', 1.0000075100774728), ('lanky', 1.000007599500122), ('charismatic', 1.0000077352550707), ('reclusive', 1.0000080912341138), ('crestfallen', 1.0000084372615088), ('kind-hearted', 1.000008569771466), ('stern', 1.0000088384230779), ('diligent', 1.0000096558580995), ('morose', 1.000009722040851), ('boisterous', 1.0000097989248267), ('ardent', 1.000009814788389), ('haughty', 1.0000099091266572), ('warm-hearted', 1.0000103578658859), ('restless', 1.0000103710827997), ('petulant', 1.0000105697385315), ('naughty', 1.0000106691091588), ('fervent', 1.000011390914853), ('nosy', 1.000011592979613), ('generous', 1.0000119036957573), ('zealous', 1.0000121857380089), ('fastidious', 1.0000122121809805), ('demure', 1.00001233796837), ('cantankerous', 1.0000123380320356), ('timid', 1.000012463788518), ('fussy', 1.0000124770727632), ('shrewd', 1.000012540691804), ('obnoxious', 1.0000126121489727), ('venal', 1.0000129489546268), ('drowsy', 1.00001352343

In [None]:
more_similar_vocab = read_json("14_more_similar_vocabulary.json")

In [None]:
perplex_adjs_simpler = dict()
for character in characters:
    perplex_adjs_simpler[character] = dict()
    for adj in more_similar_vocab:
        sent = character + " is " + adj
        tokenized_sent = tokenizer(sent)
        tokenized_sent = Dataset.from_list([tokenized_sent])
        perplexity = np.exp(trainer.evaluate(tokenized_sent)["eval_loss"])
        perplex_adjs_simpler[character][adj] = perplexity

***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1242.39it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1500.65it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1405.13it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1326.47it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1569.14it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1527.42it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 1558.06it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|██████████| 1/1 [00:00<00:00, 810.49it/s]
***** Running Evaluation *****
  Num examples = 1
  Batch size = 10
100%|

In [None]:
sorted_perplex_adjs = sorted(perplex_adjs_simpler["Asta"].items(), key=lambda kv: kv[1])
print(sorted_perplex_adjs)
write_json(sorted_perplex_adjs, "29_asta_simpler.json")