# Finetuning BERT
Notebook containing the steps necesarry to fine-tune BERT on paraphrased dataset

### Load data

In [5]:
import os

# Data path
path = os.path.join(".." , "data" , "original_Twitter_data.csv")
path2 = os.path.join(".." , "data" , "train_paraphrased_w_mistral_seed42_1.csv")
path3 = os.path.join(".." , "data" , "train_paraphrased_w_mistral_seed42_2.csv")
path4 = os.path.join(".." , "data" , "train_paraphrased_with_gpt4.csv")

### Functions

The following chunk contains the four functions
1. Function to **load and prepare dataset**
2. Function to **split dataset into test, train and validation sets**
3. Function to **tokenize dataset**
4. Function **defining evaluation metrics** 

In [8]:
from datasets import Dataset
from datasets import DatasetDict
import pandas as pd
os.environ["WANDB_DISABLED"] = "true"


####################################################
#####                LOADING DATA              #####
####################################################
def load_and_prepare_dataset(file_path):
    # Load the dataset
    dataset = pd.read_csv(file_path)
    #print the number of times a label is positive, negative or neutral
    print(dataset['label'].value_counts())
    # equalize dataset making sure there are the same number of positive, negative and neutral tweets
    # Remove all rows where language is not 'da'
    dataset = dataset[dataset['language'] == 'da']
    # Remove all columns except 'text' and 'label'
    dataset = dataset[['text', 'label']]
    # Remove all duplicates
    dataset = dataset.drop_duplicates()
    # Convert to dict and then to a Hugging Face Dataset
    dataset = Dataset.from_dict(dataset)
    print("Dataset loaded and prepared")
    return dataset#, dataset_pd


####################################################
##### SPLIT DATASET INTO TRAIN, VALID AND TEST #####
####################################################
def split_dataset(dataset, path, seed=42): #def split_dataset(dataset, path_to_df_train, path_to_df_train_2, seed=42):
    
    # load paraphrasings dataset
    paraphrasings = pd.read_csv(path)
    # concatenate the paraphrasings dataset with the original dataset
    #paraphrasings = pd.read_csv(path_to_df_train)
    #paraphrasings2 = pd.read_csv(path_to_df_train_2)

    #paraphrasings = pd.concat([paraphrasings, paraphrasings2])

    # remove rows where org_or_new == 1 - removing original tweets
    #paraphrasings = paraphrasings[paraphrasings['org_or_new'] == 0]
    print("Number of paraphrasings: ", len(paraphrasings))

    #paraphrasings = paraphrasings.rename(columns={"New":"text"})
    #paraphrasings = paraphrasings.drop(columns=["org_or_new"])
    #paraphrasings = paraphrasings[['paraphrased_text', 'label']]

    # rename paraphrased_text to text
    #paraphrasings = paraphrasings.rename(columns={"paraphrased_text":"text"})

    # remove duplicates
    #paraphrasings = paraphrasings.drop_duplicates()
    #print("Number of paraphrasings after removing duplicates: ", len(paraphrasings))

    paraphrasings_plus_org = Dataset.from_dict(paraphrasings)


    # 60% train, 20% validation, 20% test
    train_test = dataset.train_test_split(test_size=0.4, seed=seed) 
    test_valid = train_test['test'].train_test_split(test_size=0.5, seed=seed)

    # combine train, test and valid to one dictionary
    dataset_splitted_dict = DatasetDict({
        'train': paraphrasings_plus_org,
        'valid': test_valid['train'],
        'test': test_valid['test']})
    
    print("Dataset splitted into train (60%), valid (20%) and test (20%)")

    # output the train dataset as a csv file
    #dataset_splitted_dict['train'].to_csv(os.path.join("..", "data", "train.csv"))

    # print the length of the train dataset
    print("Length of train dataset: ", len(dataset_splitted_dict['train']))
    print("Length of valid dataset: ", len(dataset_splitted_dict['valid']))
    print("Length of test dataset: ", len(dataset_splitted_dict['test']))

    print("")

    return dataset_splitted_dict

####################################################
#####             TOKENIZE DATASET             #####
####################################################
from transformers import AutoTokenizer
from datasets import ClassLabel

def tokenize_dataset(dataset, model_name="NbAiLab/nb-bert-large", max_length=128):
    # defining the labels
    labels_cl = ClassLabel(num_classes=3, names=['negative', 'neutral', 'positive'])

    # load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # defining a function to tokenize the text and translate all labels into integers instead of strings
    def tokenize_function(example):
        tokens = tokenizer(example["text"], padding="max_length", truncation=True, max_length=max_length)
        tokens['label'] = labels_cl.str2int(example['label'])
        return tokens

    # actually tokenizing the dataset
    tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset['train'].column_names) # batched=True speeds up tokenization by allowing to process multiple lines at once


    print("Dataset tokenized")

    return tokenized_dataset

####################################################
#####              EVALUATION METRICS          #####
####################################################
import numpy as np
import evaluate

def compute_metrics(eval_pred):
    metric0 = evaluate.load("accuracy")
    metric1 = evaluate.load("precision")
    metric2 = evaluate.load("recall")
    metric3 = evaluate.load("f1")

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = metric0.compute(predictions=predictions, references=labels)["accuracy"]
    precision = metric1.compute(predictions=predictions, references=labels, average="weighted")["precision"]
    recall = metric2.compute(predictions=predictions, references=labels, average="weighted")["recall"]
    f1 = metric3.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

In [9]:
print("Loading and preparing dataset...")
dataset = load_and_prepare_dataset(path)
print(dataset)

print("Splitting dataset...")
dataset_splitted_dict = split_dataset(dataset, path2) #split_dataset(dataset, path2, path3)

print("Tokenizing dataset...")
tokenized_dataset = tokenize_dataset(dataset_splitted_dict)

print("Loading model (NbAiLab/nb-bert-large)...")

from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("NbAiLab/nb-bert-large", num_labels=3)

Loading and preparing dataset...
label
negative    1525
neutral     1281
positive    1000
Name: count, dtype: int64
Dataset loaded and prepared
Dataset({
    features: ['text', 'label'],
    num_rows: 3572
})
Splitting dataset...
Number of paraphrasings:  3317
Dataset splitted into train (60%), valid (20%) and test (20%)
Length of train dataset:  3317
Length of valid dataset:  714
Length of test dataset:  715

Tokenizing dataset...


Map:   0%|          | 0/3317 [00:00<?, ? examples/s]


KeyError: 'text'

Specifying training args

In [60]:
import numpy as np  
# count number of labels in each dataset
print(np.unique(dataset_splitted_dict['train']['label'],return_counts=True))
print(np.unique(dataset_splitted_dict['test']['label'],return_counts=True))
print(np.unique(dataset_splitted_dict['valid']['label'],return_counts=True))

(array(['negative', 'neutral', 'positive'], dtype='<U8'), array([1764, 1422, 1100]))
(array(['negative', 'neutral', 'positive'], dtype='<U8'), array([309, 221, 185]))
(array(['negative', 'neutral', 'positive'], dtype='<U8'), array([268, 252, 194]))


In [61]:
from transformers import TrainingArguments

batch_size = 8 # stating batch size
epochs = 4
learning_rate = 1e-5


training_args = TrainingArguments(output_dir="test_trainer",
                                  num_train_epochs=epochs,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  learning_rate=learning_rate,
                                  weight_decay=0.01,
                                  logging_dir="logs",
                                  logging_steps=10,
                                  load_best_model_at_end=True,
                                  evaluation_strategy="epoch",
                                  save_strategy="epoch",  # Add this line
                                  remove_unused_columns=False,
                                  run_name="test_trainer")

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Initializing trainer

In [62]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()

In [11]:
trainer.evaluate()

import tensorflow as tf

# creating model predictions for the validation data
predictions_val = trainer.predict(tokenized_dataset["valid"])

# choosing the prediction that has the highest probability 
preds_val_val = np.argmax(predictions_val.predictions, axis=-1)

# calculating the probabilities instead of logits from each
predictions_probabilities = tf.nn.softmax(predictions_val.predictions)

def compute_metrics_end(preds, refs):
    metric0 = evaluate.load("accuracy")
    metric1 = evaluate.load("precision")
    metric2 = evaluate.load("recall")
    metric3 = evaluate.load("f1")
    
    #logits, labels = eval_pred
    #predictions = np.argmax(logits, axis=-1)
    accuracy = metric0.compute(predictions=preds, references=refs)["accuracy"]
    precision = metric1.compute(predictions=preds, references=refs, average="weighted")["precision"]
    recall = metric2.compute(predictions=preds, references=refs, average="weighted")["recall"]
    f1 = metric3.compute(predictions=preds, references=refs, average="weighted")["f1"]
    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

metrics_val = compute_metrics_end(preds=preds_val_val, refs=predictions_val.label_ids)

import tensorflow as tf

# creating model predictions for the validation data
predictions_test = trainer.predict(tokenized_dataset["test"])

# choosing the prediction that has the highest probability 
preds_test_test = np.argmax(predictions_test.predictions, axis=-1)

# calculating the probabilities instead of logits from each
predictions_probabilities_test = tf.nn.softmax(predictions_test.predictions)

metrics_test = compute_metrics_end(preds=preds_test_test, refs=predictions_test.label_ids)

print(metrics_test)
print(metrics_val)

100%|██████████| 90/90 [00:26<00:00,  3.45it/s]
100%|██████████| 90/90 [00:26<00:00,  3.37it/s]
100%|██████████| 90/90 [00:26<00:00,  3.44it/s]


{'accuracy': 0.7034965034965035, 'precision': 0.7208567419348564, 'recall': 0.7034965034965035, 'f1': 0.7066208658676242}
{'accuracy': 0.665266106442577, 'precision': 0.6759627052306597, 'recall': 0.665266106442577, 'f1': 0.6661282766130079}


In [12]:
import pandas as pd

data = {'Predicted Labels': ["negative" if i == 0 else "neutral" if i == 1 else "positive" for i in preds_val_val],
        'True Labels': ["negative" if i == 0 else "neutral" if i == 1 else "positive" for i in predictions_val.label_ids],
        'Misclassification': ["TRUE" if preds_val_val[i] == predictions_val.label_ids[i] else 'MISS' for i, val in enumerate(preds_val_val)],
        'Text': dataset_splitted_dict['valid']['text'],
        'Logit Values': [str(i) for i in predictions_val.predictions],
        'Probabilities': [str(i) for i in np.asarray(predictions_probabilities)]}
df = pd.DataFrame(data)


Unnamed: 0,Predicted Labels,True Labels,Misclassification,Text,Logit Values,Probabilities
0,negative,neutral,MISS,"Det er så ikke den, jeg var lidt for entusiast...",[ 0.9340587 -0.01499593 -1.1753395 ],[0.66294634 0.25663105 0.08042265]
1,neutral,negative,MISS,Åh gud... Har intet med det at gøre,[ 0.49472684 0.52091426 -1.059791 ],[0.44686255 0.45871928 0.09441815]
2,neutral,negative,MISS,"Det kan du sige, men med Artikel 13 bliver det...",[ 0.61225605 0.7817579 -0.8384904 ],[0.4133752 0.4897316 0.09689319]
3,positive,positive,TRUE,"Tak gode mand! Da jeg fik notifikationen, troe...",[-1.0158063 -0.6964614 2.1499164],[0.03834047 0.05276516 0.90889436]
4,negative,negative,TRUE,Den er særdeles troværdig. Viser sandheden om ...,[ 1.1367214 -0.47244602 -0.9655315 ],[0.7562952 0.15129997 0.09240479]
...,...,...,...,...,...,...
709,neutral,negative,MISS,Mere brug af GMO i landbruget kan ikke løse al...,[ 0.12570588 0.72970027 -1.148641 ],[0.32164422 0.5884197 0.08993609]
710,neutral,neutral,TRUE,Rusland fejrer femåret for annekteringen af Kr...,[-0.76456714 1.6279043 -0.502376 ],[0.07552715 0.8263046 0.09816829]
711,negative,negative,TRUE,"Det er minimeret til det ekstreme, så måske ik...",[ 1.3416474 -0.5490633 -1.1396143],[0.8099776 0.12227785 0.06774461]
712,neutral,neutral,TRUE,🇩🇰 #Superliga\nGrupo 1 Descenso\n29º Fecha\n40...,[-1.4751852 1.7833322 -0.67671984],[0.03420784 0.889778 0.07601419]


In [13]:
import pandas as pd
from sklearn.metrics import classification_report

# Extract the true and predicted labels
true_labels = df['True Labels']
predicted_labels = df['Predicted Labels']

# Create a mapping for the labels to numbers if needed
label_mapping = {'negative': 0, 'neutral': 1, 'positive': 2}

# Map the labels to numbers using the mapping
true_labels_mapped = true_labels.map(label_mapping)
predicted_labels_mapped = predicted_labels.map(label_mapping)

# Generate the classification report
report = classification_report(true_labels_mapped, predicted_labels_mapped, target_names=label_mapping.keys())

# Print the classification report
print(report)

              precision    recall  f1-score   support

    negative       0.72      0.70      0.71       268
     neutral       0.58      0.70      0.63       252
    positive       0.73      0.58      0.65       194

    accuracy                           0.67       714
   macro avg       0.68      0.66      0.66       714
weighted avg       0.68      0.67      0.67       714



# Training loop

In [63]:
for i in range(10):    
    trainer.train()

    trainer.evaluate()

    import tensorflow as tf

    # creating model predictions for the validation data
    predictions_val = trainer.predict(tokenized_dataset["valid"])

    # choosing the prediction that has the highest probability 
    preds_val_val = np.argmax(predictions_val.predictions, axis=-1)

    # calculating the probabilities instead of logits from each
    predictions_probabilities = tf.nn.softmax(predictions_val.predictions)

    def compute_metrics_end(preds, refs):
        metric0 = evaluate.load("accuracy")
        metric1 = evaluate.load("precision")
        metric2 = evaluate.load("recall")
        metric3 = evaluate.load("f1")
        
        #logits, labels = eval_pred
        #predictions = np.argmax(logits, axis=-1)
        accuracy = metric0.compute(predictions=preds, references=refs)["accuracy"]
        precision = metric1.compute(predictions=preds, references=refs, average="weighted")["precision"]
        recall = metric2.compute(predictions=preds, references=refs, average="weighted")["recall"]
        f1 = metric3.compute(predictions=preds, references=refs, average="weighted")["f1"]
        return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

    metrics_val = compute_metrics_end(preds=preds_val_val, refs=predictions_val.label_ids)

    import tensorflow as tf

    # creating model predictions for the validation data
    predictions_test = trainer.predict(tokenized_dataset["test"])

    # choosing the prediction that has the highest probability 
    preds_test_test = np.argmax(predictions_test.predictions, axis=-1)

    # calculating the probabilities instead of logits from each
    predictions_probabilities_test = tf.nn.softmax(predictions_test.predictions)

    metrics_test = compute_metrics_end(preds=preds_test_test, refs=predictions_test.label_ids)

    print(metrics_test)
    print(metrics_val)

    import pandas as pd

    data = {'Predicted Labels': ["negative" if i == 0 else "neutral" if i == 1 else "positive" for i in preds_val_val],
            'True Labels': ["negative" if i == 0 else "neutral" if i == 1 else "positive" for i in predictions_val.label_ids],
            'Misclassification': ["TRUE" if preds_val_val[i] == predictions_val.label_ids[i] else 'MISS' for i, val in enumerate(preds_val_val)],
            'Text': dataset_splitted_dict['valid']['text'],
            'Logit Values': [str(i) for i in predictions_val.predictions],
            'Probabilities': [str(i) for i in np.asarray(predictions_probabilities)]}
    df = pd.DataFrame(data)



    import pandas as pd
    from sklearn.metrics import classification_report

    # Extract the true and predicted labels
    true_labels = df['True Labels']
    predicted_labels = df['Predicted Labels']

    # Create a mapping for the labels to numbers if needed
    label_mapping = {'negative': 0, 'neutral': 1, 'positive': 2}

    # Map the labels to numbers using the mapping
    true_labels_mapped = true_labels.map(label_mapping)
    predicted_labels_mapped = predicted_labels.map(label_mapping)

    # Generate the classification report
    report = classification_report(true_labels_mapped, predicted_labels_mapped, target_names=label_mapping.keys(), output_dict=True)


    # save classification report to csv
    df = pd.DataFrame(report).transpose()
    df.to_csv(f"../data/{i+1}classification_report_gpt4_plus_org_paraphrasings.csv")

  0%|          | 10/2144 [00:12<37:07,  1.04s/it] 

{'loss': 1.153, 'learning_rate': 9.953358208955226e-06, 'epoch': 0.02}


  1%|          | 20/2144 [00:22<35:54,  1.01s/it]

{'loss': 1.112, 'learning_rate': 9.906716417910449e-06, 'epoch': 0.04}


  1%|▏         | 30/2144 [00:32<35:28,  1.01s/it]

{'loss': 1.148, 'learning_rate': 9.860074626865672e-06, 'epoch': 0.06}


  2%|▏         | 40/2144 [00:42<35:20,  1.01s/it]

{'loss': 1.1064, 'learning_rate': 9.813432835820897e-06, 'epoch': 0.07}


  2%|▏         | 50/2144 [00:53<35:08,  1.01s/it]

{'loss': 1.1207, 'learning_rate': 9.76679104477612e-06, 'epoch': 0.09}


  3%|▎         | 60/2144 [01:03<34:57,  1.01s/it]

{'loss': 1.0512, 'learning_rate': 9.720149253731343e-06, 'epoch': 0.11}


  3%|▎         | 70/2144 [01:13<34:45,  1.01s/it]

{'loss': 1.0192, 'learning_rate': 9.673507462686568e-06, 'epoch': 0.13}


  4%|▎         | 80/2144 [01:23<34:37,  1.01s/it]

{'loss': 1.0382, 'learning_rate': 9.626865671641792e-06, 'epoch': 0.15}


  4%|▍         | 90/2144 [01:33<34:35,  1.01s/it]

{'loss': 1.0802, 'learning_rate': 9.580223880597016e-06, 'epoch': 0.17}


  5%|▍         | 100/2144 [01:43<34:15,  1.01s/it]

{'loss': 1.084, 'learning_rate': 9.533582089552239e-06, 'epoch': 0.19}


  5%|▌         | 110/2144 [01:53<34:04,  1.01s/it]

{'loss': 1.054, 'learning_rate': 9.486940298507463e-06, 'epoch': 0.21}


  6%|▌         | 120/2144 [02:03<33:53,  1.00s/it]

{'loss': 1.0234, 'learning_rate': 9.440298507462688e-06, 'epoch': 0.22}


  6%|▌         | 130/2144 [02:13<33:45,  1.01s/it]

{'loss': 1.034, 'learning_rate': 9.393656716417911e-06, 'epoch': 0.24}


  7%|▋         | 140/2144 [02:23<33:34,  1.01s/it]

{'loss': 1.0482, 'learning_rate': 9.347014925373134e-06, 'epoch': 0.26}


  7%|▋         | 150/2144 [02:33<33:32,  1.01s/it]

{'loss': 0.9043, 'learning_rate': 9.30037313432836e-06, 'epoch': 0.28}


  7%|▋         | 160/2144 [02:43<33:12,  1.00s/it]

{'loss': 0.9537, 'learning_rate': 9.253731343283582e-06, 'epoch': 0.3}


  8%|▊         | 170/2144 [02:53<33:09,  1.01s/it]

{'loss': 0.9375, 'learning_rate': 9.207089552238807e-06, 'epoch': 0.32}


  8%|▊         | 180/2144 [03:04<33:00,  1.01s/it]

{'loss': 0.8641, 'learning_rate': 9.16044776119403e-06, 'epoch': 0.34}


  9%|▉         | 190/2144 [03:14<32:45,  1.01s/it]

{'loss': 0.9378, 'learning_rate': 9.113805970149255e-06, 'epoch': 0.35}


  9%|▉         | 200/2144 [03:24<32:34,  1.01s/it]

{'loss': 0.9156, 'learning_rate': 9.067164179104478e-06, 'epoch': 0.37}


 10%|▉         | 210/2144 [03:34<32:29,  1.01s/it]

{'loss': 0.7941, 'learning_rate': 9.020522388059703e-06, 'epoch': 0.39}


 10%|█         | 220/2144 [03:44<32:17,  1.01s/it]

{'loss': 0.8469, 'learning_rate': 8.973880597014926e-06, 'epoch': 0.41}


 11%|█         | 230/2144 [03:54<32:03,  1.00s/it]

{'loss': 0.767, 'learning_rate': 8.927238805970149e-06, 'epoch': 0.43}


 11%|█         | 240/2144 [04:04<31:54,  1.01s/it]

{'loss': 0.9383, 'learning_rate': 8.880597014925374e-06, 'epoch': 0.45}


 12%|█▏        | 250/2144 [04:14<31:45,  1.01s/it]

{'loss': 0.8667, 'learning_rate': 8.833955223880599e-06, 'epoch': 0.47}


 12%|█▏        | 260/2144 [04:24<31:37,  1.01s/it]

{'loss': 0.7539, 'learning_rate': 8.787313432835822e-06, 'epoch': 0.49}


 13%|█▎        | 270/2144 [04:34<31:27,  1.01s/it]

{'loss': 0.8943, 'learning_rate': 8.740671641791045e-06, 'epoch': 0.5}


 13%|█▎        | 280/2144 [04:44<31:20,  1.01s/it]

{'loss': 0.7237, 'learning_rate': 8.69402985074627e-06, 'epoch': 0.52}


 14%|█▎        | 290/2144 [04:54<31:09,  1.01s/it]

{'loss': 0.8549, 'learning_rate': 8.647388059701494e-06, 'epoch': 0.54}


 14%|█▍        | 300/2144 [05:05<31:01,  1.01s/it]

{'loss': 0.6968, 'learning_rate': 8.600746268656716e-06, 'epoch': 0.56}


 14%|█▍        | 310/2144 [05:15<30:49,  1.01s/it]

{'loss': 0.7717, 'learning_rate': 8.55410447761194e-06, 'epoch': 0.58}


 15%|█▍        | 320/2144 [05:25<30:41,  1.01s/it]

{'loss': 0.7732, 'learning_rate': 8.507462686567165e-06, 'epoch': 0.6}


 15%|█▌        | 330/2144 [05:35<30:30,  1.01s/it]

{'loss': 0.6392, 'learning_rate': 8.460820895522389e-06, 'epoch': 0.62}


 16%|█▌        | 340/2144 [05:45<30:20,  1.01s/it]

{'loss': 0.8537, 'learning_rate': 8.414179104477612e-06, 'epoch': 0.63}


 16%|█▋        | 350/2144 [05:55<30:12,  1.01s/it]

{'loss': 0.7489, 'learning_rate': 8.367537313432836e-06, 'epoch': 0.65}


 17%|█▋        | 360/2144 [06:05<29:59,  1.01s/it]

{'loss': 0.7445, 'learning_rate': 8.320895522388061e-06, 'epoch': 0.67}


 17%|█▋        | 370/2144 [06:15<29:50,  1.01s/it]

{'loss': 0.6895, 'learning_rate': 8.274253731343284e-06, 'epoch': 0.69}


 18%|█▊        | 380/2144 [06:25<29:37,  1.01s/it]

{'loss': 0.6693, 'learning_rate': 8.227611940298507e-06, 'epoch': 0.71}


 18%|█▊        | 390/2144 [06:35<29:31,  1.01s/it]

{'loss': 0.6527, 'learning_rate': 8.180970149253732e-06, 'epoch': 0.73}


 19%|█▊        | 400/2144 [06:46<29:20,  1.01s/it]

{'loss': 0.8611, 'learning_rate': 8.134328358208955e-06, 'epoch': 0.75}


 19%|█▉        | 410/2144 [06:56<29:10,  1.01s/it]

{'loss': 0.7509, 'learning_rate': 8.08768656716418e-06, 'epoch': 0.76}


 20%|█▉        | 420/2144 [07:06<29:01,  1.01s/it]

{'loss': 0.657, 'learning_rate': 8.041044776119403e-06, 'epoch': 0.78}


 20%|██        | 430/2144 [07:16<28:54,  1.01s/it]

{'loss': 0.8333, 'learning_rate': 7.994402985074628e-06, 'epoch': 0.8}


 21%|██        | 440/2144 [07:26<28:41,  1.01s/it]

{'loss': 0.6547, 'learning_rate': 7.947761194029851e-06, 'epoch': 0.82}


 21%|██        | 450/2144 [07:36<28:29,  1.01s/it]

{'loss': 0.7453, 'learning_rate': 7.901119402985076e-06, 'epoch': 0.84}


 21%|██▏       | 460/2144 [07:46<28:25,  1.01s/it]

{'loss': 0.581, 'learning_rate': 7.854477611940299e-06, 'epoch': 0.86}


 22%|██▏       | 470/2144 [07:56<28:14,  1.01s/it]

{'loss': 0.9021, 'learning_rate': 7.807835820895522e-06, 'epoch': 0.88}


 22%|██▏       | 480/2144 [08:06<27:57,  1.01s/it]

{'loss': 0.835, 'learning_rate': 7.761194029850747e-06, 'epoch': 0.9}


 23%|██▎       | 490/2144 [08:16<27:46,  1.01s/it]

{'loss': 0.7433, 'learning_rate': 7.714552238805972e-06, 'epoch': 0.91}


 23%|██▎       | 500/2144 [08:27<27:39,  1.01s/it]

{'loss': 0.6877, 'learning_rate': 7.667910447761195e-06, 'epoch': 0.93}


 24%|██▍       | 510/2144 [08:37<27:26,  1.01s/it]

{'loss': 0.7761, 'learning_rate': 7.621268656716419e-06, 'epoch': 0.95}


 24%|██▍       | 520/2144 [08:47<27:43,  1.02s/it]

{'loss': 0.8236, 'learning_rate': 7.574626865671643e-06, 'epoch': 0.97}


 25%|██▍       | 530/2144 [08:57<27:54,  1.04s/it]

{'loss': 0.7652, 'learning_rate': 7.527985074626867e-06, 'epoch': 0.99}


                                                  
 25%|██▌       | 536/2144 [09:37<47:41,  1.78s/it]

{'eval_loss': 0.7254549264907837, 'eval_accuracy': 0.6923076923076923, 'eval_precision': 0.7056493916395856, 'eval_recall': 0.6923076923076923, 'eval_f1': 0.6914290297250478, 'eval_runtime': 31.5416, 'eval_samples_per_second': 22.668, 'eval_steps_per_second': 2.853, 'epoch': 1.0}


 25%|██▌       | 540/2144 [09:48<2:17:03,  5.13s/it]

{'loss': 0.7826, 'learning_rate': 7.48134328358209e-06, 'epoch': 1.01}


 26%|██▌       | 550/2144 [09:58<29:52,  1.12s/it]  

{'loss': 0.5474, 'learning_rate': 7.434701492537314e-06, 'epoch': 1.03}


 26%|██▌       | 560/2144 [10:08<26:45,  1.01s/it]

{'loss': 0.6007, 'learning_rate': 7.3880597014925385e-06, 'epoch': 1.04}


 27%|██▋       | 570/2144 [10:18<26:29,  1.01s/it]

{'loss': 0.5499, 'learning_rate': 7.3414179104477625e-06, 'epoch': 1.06}


 27%|██▋       | 580/2144 [10:28<26:17,  1.01s/it]

{'loss': 0.5307, 'learning_rate': 7.2947761194029856e-06, 'epoch': 1.08}


 28%|██▊       | 590/2144 [10:39<26:10,  1.01s/it]

{'loss': 0.4932, 'learning_rate': 7.2481343283582095e-06, 'epoch': 1.1}


 28%|██▊       | 600/2144 [10:49<26:00,  1.01s/it]

{'loss': 0.5984, 'learning_rate': 7.2014925373134335e-06, 'epoch': 1.12}


 28%|██▊       | 610/2144 [10:59<25:51,  1.01s/it]

{'loss': 0.5008, 'learning_rate': 7.154850746268658e-06, 'epoch': 1.14}


 29%|██▉       | 620/2144 [11:09<25:45,  1.01s/it]

{'loss': 0.541, 'learning_rate': 7.1082089552238805e-06, 'epoch': 1.16}


 29%|██▉       | 630/2144 [11:19<25:27,  1.01s/it]

{'loss': 0.6852, 'learning_rate': 7.061567164179105e-06, 'epoch': 1.18}


 30%|██▉       | 640/2144 [11:29<25:19,  1.01s/it]

{'loss': 0.4919, 'learning_rate': 7.014925373134329e-06, 'epoch': 1.19}


 30%|███       | 650/2144 [11:39<25:14,  1.01s/it]

{'loss': 0.6757, 'learning_rate': 6.968283582089553e-06, 'epoch': 1.21}


 31%|███       | 660/2144 [11:49<24:56,  1.01s/it]

{'loss': 0.5459, 'learning_rate': 6.921641791044776e-06, 'epoch': 1.23}


 31%|███▏      | 670/2144 [11:59<24:49,  1.01s/it]

{'loss': 0.6082, 'learning_rate': 6.875e-06, 'epoch': 1.25}


 32%|███▏      | 680/2144 [12:10<24:39,  1.01s/it]

{'loss': 0.5389, 'learning_rate': 6.828358208955225e-06, 'epoch': 1.27}


 32%|███▏      | 690/2144 [12:20<24:33,  1.01s/it]

{'loss': 0.598, 'learning_rate': 6.781716417910448e-06, 'epoch': 1.29}


 33%|███▎      | 700/2144 [12:30<24:20,  1.01s/it]

{'loss': 0.6078, 'learning_rate': 6.735074626865672e-06, 'epoch': 1.31}


 33%|███▎      | 710/2144 [12:40<24:10,  1.01s/it]

{'loss': 0.6952, 'learning_rate': 6.688432835820896e-06, 'epoch': 1.32}


 34%|███▎      | 720/2144 [12:50<23:58,  1.01s/it]

{'loss': 0.5348, 'learning_rate': 6.64179104477612e-06, 'epoch': 1.34}


 34%|███▍      | 730/2144 [13:00<23:46,  1.01s/it]

{'loss': 0.5806, 'learning_rate': 6.595149253731343e-06, 'epoch': 1.36}


 35%|███▍      | 740/2144 [13:10<23:33,  1.01s/it]

{'loss': 0.4728, 'learning_rate': 6.548507462686567e-06, 'epoch': 1.38}


 35%|███▍      | 750/2144 [13:20<23:28,  1.01s/it]

{'loss': 0.5218, 'learning_rate': 6.501865671641792e-06, 'epoch': 1.4}


 35%|███▌      | 760/2144 [13:30<23:20,  1.01s/it]

{'loss': 0.6746, 'learning_rate': 6.455223880597016e-06, 'epoch': 1.42}


 36%|███▌      | 770/2144 [13:41<23:07,  1.01s/it]

{'loss': 0.5835, 'learning_rate': 6.408582089552239e-06, 'epoch': 1.44}


 36%|███▋      | 780/2144 [13:51<22:57,  1.01s/it]

{'loss': 0.5598, 'learning_rate': 6.361940298507463e-06, 'epoch': 1.46}


 37%|███▋      | 790/2144 [14:01<22:49,  1.01s/it]

{'loss': 0.8373, 'learning_rate': 6.315298507462687e-06, 'epoch': 1.47}


 37%|███▋      | 800/2144 [14:11<22:39,  1.01s/it]

{'loss': 0.5629, 'learning_rate': 6.2686567164179116e-06, 'epoch': 1.49}


 38%|███▊      | 810/2144 [14:21<22:25,  1.01s/it]

{'loss': 0.4761, 'learning_rate': 6.222014925373135e-06, 'epoch': 1.51}


 38%|███▊      | 820/2144 [14:31<22:16,  1.01s/it]

{'loss': 0.4473, 'learning_rate': 6.175373134328359e-06, 'epoch': 1.53}


 39%|███▊      | 830/2144 [14:41<22:06,  1.01s/it]

{'loss': 0.3866, 'learning_rate': 6.1287313432835826e-06, 'epoch': 1.55}


 39%|███▉      | 840/2144 [14:51<21:55,  1.01s/it]

{'loss': 0.5046, 'learning_rate': 6.0820895522388065e-06, 'epoch': 1.57}


 40%|███▉      | 850/2144 [15:01<21:51,  1.01s/it]

{'loss': 0.3956, 'learning_rate': 6.03544776119403e-06, 'epoch': 1.59}


 40%|████      | 860/2144 [15:11<21:38,  1.01s/it]

{'loss': 0.4763, 'learning_rate': 5.988805970149254e-06, 'epoch': 1.6}


 41%|████      | 870/2144 [15:22<21:23,  1.01s/it]

{'loss': 0.5751, 'learning_rate': 5.942164179104478e-06, 'epoch': 1.62}


 41%|████      | 880/2144 [15:32<21:17,  1.01s/it]

{'loss': 0.3534, 'learning_rate': 5.895522388059702e-06, 'epoch': 1.64}


 42%|████▏     | 890/2144 [15:42<21:10,  1.01s/it]

{'loss': 0.3175, 'learning_rate': 5.848880597014925e-06, 'epoch': 1.66}


 42%|████▏     | 900/2144 [15:52<21:00,  1.01s/it]

{'loss': 0.7707, 'learning_rate': 5.802238805970149e-06, 'epoch': 1.68}


 42%|████▏     | 910/2144 [16:02<20:47,  1.01s/it]

{'loss': 0.5776, 'learning_rate': 5.755597014925373e-06, 'epoch': 1.7}


 43%|████▎     | 920/2144 [16:12<20:34,  1.01s/it]

{'loss': 0.6044, 'learning_rate': 5.708955223880598e-06, 'epoch': 1.72}


 43%|████▎     | 930/2144 [16:22<20:25,  1.01s/it]

{'loss': 0.5912, 'learning_rate': 5.662313432835821e-06, 'epoch': 1.74}


 44%|████▍     | 940/2144 [16:32<20:18,  1.01s/it]

{'loss': 0.6856, 'learning_rate': 5.615671641791045e-06, 'epoch': 1.75}


 44%|████▍     | 950/2144 [16:42<20:05,  1.01s/it]

{'loss': 0.6137, 'learning_rate': 5.569029850746269e-06, 'epoch': 1.77}


 45%|████▍     | 960/2144 [16:52<19:56,  1.01s/it]

{'loss': 0.7811, 'learning_rate': 5.522388059701493e-06, 'epoch': 1.79}


 45%|████▌     | 970/2144 [17:03<19:44,  1.01s/it]

{'loss': 0.5582, 'learning_rate': 5.475746268656716e-06, 'epoch': 1.81}


 46%|████▌     | 980/2144 [17:13<19:37,  1.01s/it]

{'loss': 0.6869, 'learning_rate': 5.429104477611941e-06, 'epoch': 1.83}


 46%|████▌     | 990/2144 [17:23<19:24,  1.01s/it]

{'loss': 0.4329, 'learning_rate': 5.382462686567165e-06, 'epoch': 1.85}


 47%|████▋     | 1000/2144 [17:33<19:15,  1.01s/it]

{'loss': 0.5374, 'learning_rate': 5.335820895522389e-06, 'epoch': 1.87}


 47%|████▋     | 1010/2144 [17:43<19:05,  1.01s/it]

{'loss': 0.4331, 'learning_rate': 5.289179104477612e-06, 'epoch': 1.88}


 48%|████▊     | 1020/2144 [17:53<18:56,  1.01s/it]

{'loss': 0.6313, 'learning_rate': 5.242537313432836e-06, 'epoch': 1.9}


 48%|████▊     | 1030/2144 [18:03<18:44,  1.01s/it]

{'loss': 0.468, 'learning_rate': 5.195895522388061e-06, 'epoch': 1.92}


 49%|████▊     | 1040/2144 [18:13<18:34,  1.01s/it]

{'loss': 0.5157, 'learning_rate': 5.149253731343285e-06, 'epoch': 1.94}


 49%|████▉     | 1050/2144 [18:23<18:28,  1.01s/it]

{'loss': 0.4192, 'learning_rate': 5.102611940298508e-06, 'epoch': 1.96}


 49%|████▉     | 1060/2144 [18:33<18:18,  1.01s/it]

{'loss': 0.5636, 'learning_rate': 5.055970149253732e-06, 'epoch': 1.98}


 50%|████▉     | 1070/2144 [18:44<18:04,  1.01s/it]

{'loss': 0.5035, 'learning_rate': 5.009328358208956e-06, 'epoch': 2.0}


                                                   
 50%|█████     | 1072/2144 [19:16<17:16,  1.03it/s]

{'eval_loss': 0.8527268767356873, 'eval_accuracy': 0.6937062937062937, 'eval_precision': 0.733110216969138, 'eval_recall': 0.6937062937062937, 'eval_f1': 0.6990273093144889, 'eval_runtime': 30.5013, 'eval_samples_per_second': 23.442, 'eval_steps_per_second': 2.951, 'epoch': 2.0}


 50%|█████     | 1080/2144 [19:30<33:54,  1.91s/it]  

{'loss': 0.3882, 'learning_rate': 4.9626865671641796e-06, 'epoch': 2.01}


 51%|█████     | 1090/2144 [19:40<18:12,  1.04s/it]

{'loss': 0.2703, 'learning_rate': 4.9160447761194035e-06, 'epoch': 2.03}


 51%|█████▏    | 1100/2144 [19:50<17:31,  1.01s/it]

{'loss': 0.6178, 'learning_rate': 4.8694029850746275e-06, 'epoch': 2.05}


 52%|█████▏    | 1110/2144 [20:00<17:25,  1.01s/it]

{'loss': 0.3475, 'learning_rate': 4.822761194029851e-06, 'epoch': 2.07}


 52%|█████▏    | 1120/2144 [20:11<17:12,  1.01s/it]

{'loss': 0.2698, 'learning_rate': 4.7761194029850745e-06, 'epoch': 2.09}


 53%|█████▎    | 1130/2144 [20:21<17:06,  1.01s/it]

{'loss': 0.2557, 'learning_rate': 4.729477611940299e-06, 'epoch': 2.11}


 53%|█████▎    | 1140/2144 [20:31<16:53,  1.01s/it]

{'loss': 0.2262, 'learning_rate': 4.682835820895522e-06, 'epoch': 2.13}


 54%|█████▎    | 1150/2144 [20:41<16:46,  1.01s/it]

{'loss': 0.4348, 'learning_rate': 4.636194029850747e-06, 'epoch': 2.15}


 54%|█████▍    | 1160/2144 [20:51<16:34,  1.01s/it]

{'loss': 0.5526, 'learning_rate': 4.58955223880597e-06, 'epoch': 2.16}


 55%|█████▍    | 1170/2144 [21:01<16:24,  1.01s/it]

{'loss': 0.3437, 'learning_rate': 4.542910447761194e-06, 'epoch': 2.18}


 55%|█████▌    | 1180/2144 [21:11<16:10,  1.01s/it]

{'loss': 0.363, 'learning_rate': 4.496268656716418e-06, 'epoch': 2.2}


 56%|█████▌    | 1190/2144 [21:21<16:01,  1.01s/it]

{'loss': 0.3431, 'learning_rate': 4.449626865671642e-06, 'epoch': 2.22}


 56%|█████▌    | 1200/2144 [21:31<15:55,  1.01s/it]

{'loss': 0.247, 'learning_rate': 4.402985074626866e-06, 'epoch': 2.24}


 56%|█████▋    | 1210/2144 [21:41<15:42,  1.01s/it]

{'loss': 0.369, 'learning_rate': 4.35634328358209e-06, 'epoch': 2.26}


 57%|█████▋    | 1220/2144 [21:52<15:33,  1.01s/it]

{'loss': 0.4948, 'learning_rate': 4.309701492537314e-06, 'epoch': 2.28}


 57%|█████▋    | 1230/2144 [22:02<15:22,  1.01s/it]

{'loss': 0.3558, 'learning_rate': 4.263059701492538e-06, 'epoch': 2.29}


 58%|█████▊    | 1240/2144 [22:12<15:14,  1.01s/it]

{'loss': 0.198, 'learning_rate': 4.216417910447762e-06, 'epoch': 2.31}


 58%|█████▊    | 1250/2144 [22:22<15:04,  1.01s/it]

{'loss': 0.3967, 'learning_rate': 4.169776119402986e-06, 'epoch': 2.33}


 59%|█████▉    | 1260/2144 [22:32<14:53,  1.01s/it]

{'loss': 0.2108, 'learning_rate': 4.123134328358209e-06, 'epoch': 2.35}


 59%|█████▉    | 1270/2144 [22:42<14:39,  1.01s/it]

{'loss': 0.2337, 'learning_rate': 4.076492537313434e-06, 'epoch': 2.37}


 60%|█████▉    | 1280/2144 [22:52<14:30,  1.01s/it]

{'loss': 0.4895, 'learning_rate': 4.029850746268657e-06, 'epoch': 2.39}


 60%|██████    | 1290/2144 [23:02<14:22,  1.01s/it]

{'loss': 0.4, 'learning_rate': 3.983208955223881e-06, 'epoch': 2.41}


 61%|██████    | 1300/2144 [23:12<14:11,  1.01s/it]

{'loss': 0.3178, 'learning_rate': 3.936567164179105e-06, 'epoch': 2.43}


 61%|██████    | 1310/2144 [23:22<14:03,  1.01s/it]

{'loss': 0.4487, 'learning_rate': 3.889925373134329e-06, 'epoch': 2.44}


 62%|██████▏   | 1320/2144 [23:33<13:50,  1.01s/it]

{'loss': 0.3541, 'learning_rate': 3.843283582089553e-06, 'epoch': 2.46}


 62%|██████▏   | 1330/2144 [23:43<13:41,  1.01s/it]

{'loss': 0.4371, 'learning_rate': 3.7966417910447766e-06, 'epoch': 2.48}


 62%|██████▎   | 1340/2144 [23:53<13:32,  1.01s/it]

{'loss': 0.321, 'learning_rate': 3.7500000000000005e-06, 'epoch': 2.5}


 63%|██████▎   | 1350/2144 [24:03<13:20,  1.01s/it]

{'loss': 0.4301, 'learning_rate': 3.703358208955224e-06, 'epoch': 2.52}


 63%|██████▎   | 1360/2144 [24:13<13:10,  1.01s/it]

{'loss': 0.4732, 'learning_rate': 3.656716417910448e-06, 'epoch': 2.54}


 64%|██████▍   | 1370/2144 [24:23<13:00,  1.01s/it]

{'loss': 0.3877, 'learning_rate': 3.6100746268656715e-06, 'epoch': 2.56}


 64%|██████▍   | 1380/2144 [24:33<12:50,  1.01s/it]

{'loss': 0.2291, 'learning_rate': 3.563432835820896e-06, 'epoch': 2.57}


 65%|██████▍   | 1390/2144 [24:43<12:42,  1.01s/it]

{'loss': 0.2115, 'learning_rate': 3.5167910447761194e-06, 'epoch': 2.59}


 65%|██████▌   | 1400/2144 [24:53<12:29,  1.01s/it]

{'loss': 0.4067, 'learning_rate': 3.4701492537313438e-06, 'epoch': 2.61}


 66%|██████▌   | 1410/2144 [25:03<12:21,  1.01s/it]

{'loss': 0.5094, 'learning_rate': 3.4235074626865673e-06, 'epoch': 2.63}


 66%|██████▌   | 1420/2144 [25:13<12:11,  1.01s/it]

{'loss': 0.2672, 'learning_rate': 3.3768656716417913e-06, 'epoch': 2.65}


 67%|██████▋   | 1430/2144 [25:24<11:58,  1.01s/it]

{'loss': 0.5759, 'learning_rate': 3.3302238805970148e-06, 'epoch': 2.67}


 67%|██████▋   | 1440/2144 [25:34<11:50,  1.01s/it]

{'loss': 0.2629, 'learning_rate': 3.283582089552239e-06, 'epoch': 2.69}


 68%|██████▊   | 1450/2144 [25:44<11:40,  1.01s/it]

{'loss': 0.3033, 'learning_rate': 3.2369402985074627e-06, 'epoch': 2.71}


 68%|██████▊   | 1460/2144 [25:54<11:31,  1.01s/it]

{'loss': 0.3859, 'learning_rate': 3.190298507462687e-06, 'epoch': 2.72}


 69%|██████▊   | 1470/2144 [26:04<11:19,  1.01s/it]

{'loss': 0.5135, 'learning_rate': 3.1436567164179106e-06, 'epoch': 2.74}


 69%|██████▉   | 1480/2144 [26:14<11:10,  1.01s/it]

{'loss': 0.5974, 'learning_rate': 3.0970149253731345e-06, 'epoch': 2.76}


 69%|██████▉   | 1490/2144 [26:24<10:59,  1.01s/it]

{'loss': 0.3048, 'learning_rate': 3.050373134328358e-06, 'epoch': 2.78}


 70%|██████▉   | 1500/2144 [26:34<10:50,  1.01s/it]

{'loss': 0.2197, 'learning_rate': 3.0037313432835824e-06, 'epoch': 2.8}


 70%|███████   | 1510/2144 [26:44<10:39,  1.01s/it]

{'loss': 0.5178, 'learning_rate': 2.957089552238806e-06, 'epoch': 2.82}


 71%|███████   | 1520/2144 [26:54<10:30,  1.01s/it]

{'loss': 0.2426, 'learning_rate': 2.9104477611940303e-06, 'epoch': 2.84}


 71%|███████▏  | 1530/2144 [27:04<10:20,  1.01s/it]

{'loss': 0.3565, 'learning_rate': 2.863805970149254e-06, 'epoch': 2.85}


 72%|███████▏  | 1540/2144 [27:15<10:08,  1.01s/it]

{'loss': 0.3606, 'learning_rate': 2.8171641791044778e-06, 'epoch': 2.87}


 72%|███████▏  | 1550/2144 [27:25<10:01,  1.01s/it]

{'loss': 0.3245, 'learning_rate': 2.7705223880597017e-06, 'epoch': 2.89}


 73%|███████▎  | 1560/2144 [27:35<09:49,  1.01s/it]

{'loss': 0.1817, 'learning_rate': 2.7238805970149257e-06, 'epoch': 2.91}


 73%|███████▎  | 1570/2144 [27:45<09:39,  1.01s/it]

{'loss': 0.3718, 'learning_rate': 2.677238805970149e-06, 'epoch': 2.93}


 74%|███████▎  | 1580/2144 [27:55<09:29,  1.01s/it]

{'loss': 0.2742, 'learning_rate': 2.6305970149253736e-06, 'epoch': 2.95}


 74%|███████▍  | 1590/2144 [28:05<09:18,  1.01s/it]

{'loss': 0.3117, 'learning_rate': 2.583955223880597e-06, 'epoch': 2.97}


 75%|███████▍  | 1600/2144 [28:15<09:08,  1.01s/it]

{'loss': 0.3719, 'learning_rate': 2.537313432835821e-06, 'epoch': 2.99}


                                                   
 75%|███████▌  | 1608/2144 [28:54<08:36,  1.04it/s]

{'eval_loss': 1.1464542150497437, 'eval_accuracy': 0.7090909090909091, 'eval_precision': 0.7158234246956051, 'eval_recall': 0.7090909090909091, 'eval_f1': 0.7114019634503443, 'eval_runtime': 30.6562, 'eval_samples_per_second': 23.323, 'eval_steps_per_second': 2.936, 'epoch': 3.0}


 75%|███████▌  | 1610/2144 [29:02<1:18:21,  8.80s/it]

{'loss': 0.1973, 'learning_rate': 2.490671641791045e-06, 'epoch': 3.0}


 76%|███████▌  | 1620/2144 [29:12<10:45,  1.23s/it]  

{'loss': 0.1369, 'learning_rate': 2.444029850746269e-06, 'epoch': 3.02}


 76%|███████▌  | 1630/2144 [29:23<08:43,  1.02s/it]

{'loss': 0.289, 'learning_rate': 2.397388059701493e-06, 'epoch': 3.04}


 76%|███████▋  | 1640/2144 [29:33<08:29,  1.01s/it]

{'loss': 0.248, 'learning_rate': 2.350746268656717e-06, 'epoch': 3.06}


 77%|███████▋  | 1650/2144 [29:43<08:19,  1.01s/it]

{'loss': 0.2201, 'learning_rate': 2.3041044776119408e-06, 'epoch': 3.08}


 77%|███████▋  | 1660/2144 [29:53<08:08,  1.01s/it]

{'loss': 0.2416, 'learning_rate': 2.2574626865671643e-06, 'epoch': 3.1}


 78%|███████▊  | 1670/2144 [30:03<07:58,  1.01s/it]

{'loss': 0.2348, 'learning_rate': 2.2108208955223883e-06, 'epoch': 3.12}


 78%|███████▊  | 1680/2144 [30:13<07:48,  1.01s/it]

{'loss': 0.2967, 'learning_rate': 2.1641791044776118e-06, 'epoch': 3.13}


 79%|███████▉  | 1690/2144 [30:23<07:37,  1.01s/it]

{'loss': 0.2453, 'learning_rate': 2.1175373134328357e-06, 'epoch': 3.15}


 79%|███████▉  | 1700/2144 [30:33<07:26,  1.01s/it]

{'loss': 0.4199, 'learning_rate': 2.0708955223880597e-06, 'epoch': 3.17}


 80%|███████▉  | 1710/2144 [30:43<07:17,  1.01s/it]

{'loss': 0.2511, 'learning_rate': 2.0242537313432836e-06, 'epoch': 3.19}


 80%|████████  | 1720/2144 [30:53<07:06,  1.01s/it]

{'loss': 0.2343, 'learning_rate': 1.9776119402985076e-06, 'epoch': 3.21}


 81%|████████  | 1730/2144 [31:03<06:57,  1.01s/it]

{'loss': 0.2237, 'learning_rate': 1.9309701492537315e-06, 'epoch': 3.23}


 81%|████████  | 1740/2144 [31:14<06:47,  1.01s/it]

{'loss': 0.3908, 'learning_rate': 1.8843283582089553e-06, 'epoch': 3.25}


 82%|████████▏ | 1750/2144 [31:24<06:37,  1.01s/it]

{'loss': 0.193, 'learning_rate': 1.8376865671641792e-06, 'epoch': 3.26}


 82%|████████▏ | 1760/2144 [31:34<06:27,  1.01s/it]

{'loss': 0.2936, 'learning_rate': 1.791044776119403e-06, 'epoch': 3.28}


 83%|████████▎ | 1770/2144 [31:44<06:15,  1.01s/it]

{'loss': 0.1659, 'learning_rate': 1.7444029850746269e-06, 'epoch': 3.3}


 83%|████████▎ | 1780/2144 [31:54<06:06,  1.01s/it]

{'loss': 0.1853, 'learning_rate': 1.6977611940298508e-06, 'epoch': 3.32}


 83%|████████▎ | 1790/2144 [32:04<05:57,  1.01s/it]

{'loss': 0.2334, 'learning_rate': 1.6511194029850746e-06, 'epoch': 3.34}


 84%|████████▍ | 1800/2144 [32:14<05:47,  1.01s/it]

{'loss': 0.1144, 'learning_rate': 1.6044776119402985e-06, 'epoch': 3.36}


 84%|████████▍ | 1810/2144 [32:24<05:36,  1.01s/it]

{'loss': 0.1494, 'learning_rate': 1.5578358208955225e-06, 'epoch': 3.38}


 85%|████████▍ | 1820/2144 [32:34<05:27,  1.01s/it]

{'loss': 0.19, 'learning_rate': 1.5111940298507464e-06, 'epoch': 3.4}


 85%|████████▌ | 1830/2144 [32:44<05:16,  1.01s/it]

{'loss': 0.2543, 'learning_rate': 1.4645522388059702e-06, 'epoch': 3.41}


 86%|████████▌ | 1840/2144 [32:54<05:06,  1.01s/it]

{'loss': 0.0845, 'learning_rate': 1.417910447761194e-06, 'epoch': 3.43}


 86%|████████▋ | 1850/2144 [33:05<04:56,  1.01s/it]

{'loss': 0.0628, 'learning_rate': 1.371268656716418e-06, 'epoch': 3.45}


 87%|████████▋ | 1860/2144 [33:15<04:45,  1.01s/it]

{'loss': 0.185, 'learning_rate': 1.3246268656716418e-06, 'epoch': 3.47}


 87%|████████▋ | 1870/2144 [33:25<04:37,  1.01s/it]

{'loss': 0.1423, 'learning_rate': 1.2779850746268657e-06, 'epoch': 3.49}


 88%|████████▊ | 1880/2144 [33:35<04:26,  1.01s/it]

{'loss': 0.4765, 'learning_rate': 1.2313432835820897e-06, 'epoch': 3.51}


 88%|████████▊ | 1890/2144 [33:45<04:16,  1.01s/it]

{'loss': 0.0869, 'learning_rate': 1.1847014925373134e-06, 'epoch': 3.53}


 89%|████████▊ | 1900/2144 [33:55<04:06,  1.01s/it]

{'loss': 0.08, 'learning_rate': 1.1380597014925374e-06, 'epoch': 3.54}


 89%|████████▉ | 1910/2144 [34:05<03:56,  1.01s/it]

{'loss': 0.325, 'learning_rate': 1.0914179104477613e-06, 'epoch': 3.56}


 90%|████████▉ | 1920/2144 [34:15<03:46,  1.01s/it]

{'loss': 0.2643, 'learning_rate': 1.044776119402985e-06, 'epoch': 3.58}


 90%|█████████ | 1930/2144 [34:25<03:36,  1.01s/it]

{'loss': 0.1776, 'learning_rate': 9.98134328358209e-07, 'epoch': 3.6}


 90%|█████████ | 1940/2144 [34:35<03:25,  1.01s/it]

{'loss': 0.0619, 'learning_rate': 9.514925373134328e-07, 'epoch': 3.62}


 91%|█████████ | 1950/2144 [34:45<03:15,  1.01s/it]

{'loss': 0.2053, 'learning_rate': 9.048507462686568e-07, 'epoch': 3.64}


 91%|█████████▏| 1960/2144 [34:56<03:05,  1.01s/it]

{'loss': 0.4424, 'learning_rate': 8.582089552238806e-07, 'epoch': 3.66}


 92%|█████████▏| 1970/2144 [35:06<02:55,  1.01s/it]

{'loss': 0.2337, 'learning_rate': 8.115671641791046e-07, 'epoch': 3.68}


 92%|█████████▏| 1980/2144 [35:16<02:45,  1.01s/it]

{'loss': 0.3194, 'learning_rate': 7.649253731343284e-07, 'epoch': 3.69}


 93%|█████████▎| 1990/2144 [35:26<02:35,  1.01s/it]

{'loss': 0.2926, 'learning_rate': 7.182835820895523e-07, 'epoch': 3.71}


 93%|█████████▎| 2000/2144 [35:36<02:25,  1.01s/it]

{'loss': 0.2188, 'learning_rate': 6.716417910447762e-07, 'epoch': 3.73}


 94%|█████████▍| 2010/2144 [35:46<02:15,  1.01s/it]

{'loss': 0.1756, 'learning_rate': 6.25e-07, 'epoch': 3.75}


 94%|█████████▍| 2020/2144 [35:56<02:04,  1.01s/it]

{'loss': 0.2039, 'learning_rate': 5.783582089552239e-07, 'epoch': 3.77}


 95%|█████████▍| 2030/2144 [36:06<01:55,  1.01s/it]

{'loss': 0.0699, 'learning_rate': 5.317164179104478e-07, 'epoch': 3.79}


 95%|█████████▌| 2040/2144 [36:16<01:45,  1.01s/it]

{'loss': 0.213, 'learning_rate': 4.850746268656717e-07, 'epoch': 3.81}


 96%|█████████▌| 2050/2144 [36:26<01:34,  1.01s/it]

{'loss': 0.068, 'learning_rate': 4.384328358208956e-07, 'epoch': 3.82}


 96%|█████████▌| 2060/2144 [36:36<01:24,  1.01s/it]

{'loss': 0.1567, 'learning_rate': 3.9179104477611947e-07, 'epoch': 3.84}


 97%|█████████▋| 2070/2144 [36:47<01:14,  1.01s/it]

{'loss': 0.0847, 'learning_rate': 3.451492537313433e-07, 'epoch': 3.86}


 97%|█████████▋| 2080/2144 [36:57<01:04,  1.01s/it]

{'loss': 0.1402, 'learning_rate': 2.9850746268656716e-07, 'epoch': 3.88}


 97%|█████████▋| 2090/2144 [37:07<00:54,  1.01s/it]

{'loss': 0.1067, 'learning_rate': 2.5186567164179105e-07, 'epoch': 3.9}


 98%|█████████▊| 2100/2144 [37:17<00:44,  1.01s/it]

{'loss': 0.2016, 'learning_rate': 2.0522388059701495e-07, 'epoch': 3.92}


 98%|█████████▊| 2110/2144 [37:27<00:34,  1.01s/it]

{'loss': 0.1387, 'learning_rate': 1.5858208955223882e-07, 'epoch': 3.94}


 99%|█████████▉| 2120/2144 [37:37<00:24,  1.01s/it]

{'loss': 0.1341, 'learning_rate': 1.1194029850746268e-07, 'epoch': 3.96}


 99%|█████████▉| 2130/2144 [37:47<00:14,  1.01s/it]

{'loss': 0.079, 'learning_rate': 6.529850746268657e-08, 'epoch': 3.97}


100%|█████████▉| 2140/2144 [37:57<00:04,  1.01s/it]

{'loss': 0.1806, 'learning_rate': 1.8656716417910447e-08, 'epoch': 3.99}


                                                   
100%|██████████| 2144/2144 [38:30<00:00,  1.03it/s]

{'eval_loss': 1.4323279857635498, 'eval_accuracy': 0.7062937062937062, 'eval_precision': 0.7134924207227917, 'eval_recall': 0.7062937062937062, 'eval_f1': 0.7087440227756433, 'eval_runtime': 28.4842, 'eval_samples_per_second': 25.102, 'eval_steps_per_second': 3.16, 'epoch': 4.0}


100%|██████████| 2144/2144 [38:39<00:00,  1.08s/it]


{'train_runtime': 2319.1889, 'train_samples_per_second': 7.392, 'train_steps_per_second': 0.924, 'train_loss': 0.49537577982227415, 'epoch': 4.0}


100%|██████████| 90/90 [00:27<00:00,  3.24it/s]
100%|██████████| 90/90 [00:27<00:00,  3.21it/s]
100%|██████████| 90/90 [00:28<00:00,  3.21it/s]


{'accuracy': 0.6923076923076923, 'precision': 0.7056493916395856, 'recall': 0.6923076923076923, 'f1': 0.6914290297250478}
{'accuracy': 0.6624649859943977, 'precision': 0.6751775640099236, 'recall': 0.6624649859943977, 'f1': 0.6595132485813353}


  0%|          | 10/2144 [00:10<36:11,  1.02s/it] 

{'loss': 0.5712, 'learning_rate': 9.953358208955226e-06, 'epoch': 0.02}


  1%|          | 20/2144 [00:20<35:42,  1.01s/it]

{'loss': 0.6879, 'learning_rate': 9.906716417910449e-06, 'epoch': 0.04}


  1%|▏         | 30/2144 [00:30<35:33,  1.01s/it]

{'loss': 0.6538, 'learning_rate': 9.860074626865672e-06, 'epoch': 0.06}


  2%|▏         | 40/2144 [00:41<35:21,  1.01s/it]

{'loss': 0.7533, 'learning_rate': 9.813432835820897e-06, 'epoch': 0.07}


  2%|▏         | 50/2144 [00:51<35:07,  1.01s/it]

{'loss': 0.6897, 'learning_rate': 9.76679104477612e-06, 'epoch': 0.09}


  3%|▎         | 60/2144 [01:01<35:00,  1.01s/it]

{'loss': 0.7649, 'learning_rate': 9.720149253731343e-06, 'epoch': 0.11}


  3%|▎         | 70/2144 [01:11<34:53,  1.01s/it]

{'loss': 0.7889, 'learning_rate': 9.673507462686568e-06, 'epoch': 0.13}


  4%|▎         | 80/2144 [01:21<34:38,  1.01s/it]

{'loss': 0.5881, 'learning_rate': 9.626865671641792e-06, 'epoch': 0.15}


  4%|▍         | 90/2144 [01:31<34:29,  1.01s/it]

{'loss': 0.7003, 'learning_rate': 9.580223880597016e-06, 'epoch': 0.17}


  5%|▍         | 100/2144 [01:41<34:24,  1.01s/it]

{'loss': 0.8425, 'learning_rate': 9.533582089552239e-06, 'epoch': 0.19}


  5%|▌         | 110/2144 [01:51<34:13,  1.01s/it]

{'loss': 0.6398, 'learning_rate': 9.486940298507463e-06, 'epoch': 0.21}


  6%|▌         | 120/2144 [02:01<33:58,  1.01s/it]

{'loss': 0.5953, 'learning_rate': 9.440298507462688e-06, 'epoch': 0.22}


  6%|▌         | 130/2144 [02:11<33:54,  1.01s/it]

{'loss': 0.6031, 'learning_rate': 9.393656716417911e-06, 'epoch': 0.24}


  7%|▋         | 140/2144 [02:21<33:41,  1.01s/it]

{'loss': 0.638, 'learning_rate': 9.347014925373134e-06, 'epoch': 0.26}


  7%|▋         | 150/2144 [02:32<33:30,  1.01s/it]

{'loss': 0.5179, 'learning_rate': 9.30037313432836e-06, 'epoch': 0.28}


  7%|▋         | 160/2144 [02:42<33:18,  1.01s/it]

{'loss': 0.747, 'learning_rate': 9.253731343283582e-06, 'epoch': 0.3}


  8%|▊         | 170/2144 [02:52<33:13,  1.01s/it]

{'loss': 0.6752, 'learning_rate': 9.207089552238807e-06, 'epoch': 0.32}


  8%|▊         | 180/2144 [03:02<33:04,  1.01s/it]

{'loss': 0.5588, 'learning_rate': 9.16044776119403e-06, 'epoch': 0.34}


  9%|▉         | 190/2144 [03:12<32:52,  1.01s/it]

{'loss': 0.6316, 'learning_rate': 9.113805970149255e-06, 'epoch': 0.35}


  9%|▉         | 200/2144 [03:22<32:45,  1.01s/it]

{'loss': 0.5642, 'learning_rate': 9.067164179104478e-06, 'epoch': 0.37}


 10%|▉         | 210/2144 [03:32<32:30,  1.01s/it]

{'loss': 0.4506, 'learning_rate': 9.020522388059703e-06, 'epoch': 0.39}


 10%|█         | 220/2144 [03:42<32:22,  1.01s/it]

{'loss': 0.4996, 'learning_rate': 8.973880597014926e-06, 'epoch': 0.41}


 11%|█         | 230/2144 [03:52<32:13,  1.01s/it]

{'loss': 0.4639, 'learning_rate': 8.927238805970149e-06, 'epoch': 0.43}


 11%|█         | 240/2144 [04:02<32:01,  1.01s/it]

{'loss': 0.6552, 'learning_rate': 8.880597014925374e-06, 'epoch': 0.45}


 12%|█▏        | 250/2144 [04:12<31:46,  1.01s/it]

{'loss': 0.5658, 'learning_rate': 8.833955223880599e-06, 'epoch': 0.47}


 12%|█▏        | 260/2144 [04:23<31:40,  1.01s/it]

{'loss': 0.5361, 'learning_rate': 8.787313432835822e-06, 'epoch': 0.49}


 13%|█▎        | 270/2144 [04:33<31:30,  1.01s/it]

{'loss': 0.6826, 'learning_rate': 8.740671641791045e-06, 'epoch': 0.5}


 13%|█▎        | 280/2144 [04:43<31:19,  1.01s/it]

{'loss': 0.5136, 'learning_rate': 8.69402985074627e-06, 'epoch': 0.52}


 14%|█▎        | 290/2144 [04:53<31:13,  1.01s/it]

{'loss': 0.5268, 'learning_rate': 8.647388059701494e-06, 'epoch': 0.54}


 14%|█▍        | 300/2144 [05:03<31:03,  1.01s/it]

{'loss': 0.5386, 'learning_rate': 8.600746268656716e-06, 'epoch': 0.56}


 14%|█▍        | 310/2144 [05:13<30:42,  1.00s/it]

{'loss': 0.55, 'learning_rate': 8.55410447761194e-06, 'epoch': 0.58}


 15%|█▍        | 320/2144 [05:23<30:43,  1.01s/it]

{'loss': 0.6262, 'learning_rate': 8.507462686567165e-06, 'epoch': 0.6}


 15%|█▌        | 330/2144 [05:33<30:29,  1.01s/it]

{'loss': 0.3488, 'learning_rate': 8.460820895522389e-06, 'epoch': 0.62}


 16%|█▌        | 340/2144 [05:43<30:22,  1.01s/it]

{'loss': 0.6842, 'learning_rate': 8.414179104477612e-06, 'epoch': 0.63}


 16%|█▋        | 350/2144 [05:53<30:10,  1.01s/it]

{'loss': 0.4654, 'learning_rate': 8.367537313432836e-06, 'epoch': 0.65}


 17%|█▋        | 360/2144 [06:03<29:57,  1.01s/it]

{'loss': 0.4784, 'learning_rate': 8.320895522388061e-06, 'epoch': 0.67}


 17%|█▋        | 370/2144 [06:14<29:48,  1.01s/it]

{'loss': 0.5346, 'learning_rate': 8.274253731343284e-06, 'epoch': 0.69}


 18%|█▊        | 380/2144 [06:24<29:35,  1.01s/it]

{'loss': 0.3774, 'learning_rate': 8.227611940298507e-06, 'epoch': 0.71}


 18%|█▊        | 390/2144 [06:34<29:32,  1.01s/it]

{'loss': 0.4362, 'learning_rate': 8.180970149253732e-06, 'epoch': 0.73}


 19%|█▊        | 400/2144 [06:44<29:14,  1.01s/it]

{'loss': 0.8143, 'learning_rate': 8.134328358208955e-06, 'epoch': 0.75}


 19%|█▉        | 410/2144 [06:54<29:05,  1.01s/it]

{'loss': 0.4887, 'learning_rate': 8.08768656716418e-06, 'epoch': 0.76}


 20%|█▉        | 420/2144 [07:04<28:57,  1.01s/it]

{'loss': 0.3516, 'learning_rate': 8.041044776119403e-06, 'epoch': 0.78}


 20%|██        | 430/2144 [07:14<28:45,  1.01s/it]

{'loss': 0.551, 'learning_rate': 7.994402985074628e-06, 'epoch': 0.8}


 21%|██        | 440/2144 [07:24<28:40,  1.01s/it]

{'loss': 0.3329, 'learning_rate': 7.947761194029851e-06, 'epoch': 0.82}


 21%|██        | 450/2144 [07:34<28:27,  1.01s/it]

{'loss': 0.5008, 'learning_rate': 7.901119402985076e-06, 'epoch': 0.84}


 21%|██▏       | 460/2144 [07:44<28:16,  1.01s/it]

{'loss': 0.4009, 'learning_rate': 7.854477611940299e-06, 'epoch': 0.86}


 22%|██▏       | 470/2144 [07:54<28:05,  1.01s/it]

{'loss': 0.6464, 'learning_rate': 7.807835820895522e-06, 'epoch': 0.88}


 22%|██▏       | 480/2144 [08:04<27:55,  1.01s/it]

{'loss': 0.691, 'learning_rate': 7.761194029850747e-06, 'epoch': 0.9}


 23%|██▎       | 490/2144 [08:14<27:54,  1.01s/it]

{'loss': 0.559, 'learning_rate': 7.714552238805972e-06, 'epoch': 0.91}


 23%|██▎       | 500/2144 [08:25<27:36,  1.01s/it]

{'loss': 0.4741, 'learning_rate': 7.667910447761195e-06, 'epoch': 0.93}


 24%|██▍       | 510/2144 [08:35<27:25,  1.01s/it]

{'loss': 0.4844, 'learning_rate': 7.621268656716419e-06, 'epoch': 0.95}


 24%|██▍       | 520/2144 [08:45<27:18,  1.01s/it]

{'loss': 0.486, 'learning_rate': 7.574626865671643e-06, 'epoch': 0.97}


 25%|██▍       | 530/2144 [08:55<27:11,  1.01s/it]

{'loss': 0.4507, 'learning_rate': 7.527985074626867e-06, 'epoch': 0.99}


                                                  
 25%|██▌       | 536/2144 [09:32<25:58,  1.03it/s]

{'eval_loss': 0.8260219693183899, 'eval_accuracy': 0.7104895104895105, 'eval_precision': 0.722589750436662, 'eval_recall': 0.7104895104895105, 'eval_f1': 0.7135105835598581, 'eval_runtime': 30.7824, 'eval_samples_per_second': 23.228, 'eval_steps_per_second': 2.924, 'epoch': 1.0}


 25%|██▌       | 540/2144 [09:42<2:09:00,  4.83s/it]

{'loss': 0.5797, 'learning_rate': 7.48134328358209e-06, 'epoch': 1.01}


 26%|██▌       | 550/2144 [09:52<29:41,  1.12s/it]  

{'loss': 0.2974, 'learning_rate': 7.434701492537314e-06, 'epoch': 1.03}


 26%|██▌       | 560/2144 [10:02<26:40,  1.01s/it]

{'loss': 0.3358, 'learning_rate': 7.3880597014925385e-06, 'epoch': 1.04}


 27%|██▋       | 570/2144 [10:12<26:25,  1.01s/it]

{'loss': 0.3191, 'learning_rate': 7.3414179104477625e-06, 'epoch': 1.06}


 27%|██▋       | 580/2144 [10:22<26:17,  1.01s/it]

{'loss': 0.3085, 'learning_rate': 7.2947761194029856e-06, 'epoch': 1.08}


 28%|██▊       | 590/2144 [10:32<26:09,  1.01s/it]

{'loss': 0.3027, 'learning_rate': 7.2481343283582095e-06, 'epoch': 1.1}


 28%|██▊       | 600/2144 [10:43<25:56,  1.01s/it]

{'loss': 0.3724, 'learning_rate': 7.2014925373134335e-06, 'epoch': 1.12}


 28%|██▊       | 610/2144 [10:53<25:49,  1.01s/it]

{'loss': 0.2741, 'learning_rate': 7.154850746268658e-06, 'epoch': 1.14}


 29%|██▉       | 620/2144 [11:03<25:40,  1.01s/it]

{'loss': 0.3105, 'learning_rate': 7.1082089552238805e-06, 'epoch': 1.16}


 29%|██▉       | 630/2144 [11:13<25:25,  1.01s/it]

{'loss': 0.4188, 'learning_rate': 7.061567164179105e-06, 'epoch': 1.18}


 30%|██▉       | 640/2144 [11:23<25:18,  1.01s/it]

{'loss': 0.3351, 'learning_rate': 7.014925373134329e-06, 'epoch': 1.19}


 30%|███       | 650/2144 [11:33<25:08,  1.01s/it]

{'loss': 0.3056, 'learning_rate': 6.968283582089553e-06, 'epoch': 1.21}


 31%|███       | 660/2144 [11:43<24:55,  1.01s/it]

{'loss': 0.3343, 'learning_rate': 6.921641791044776e-06, 'epoch': 1.23}


 31%|███▏      | 670/2144 [11:53<24:46,  1.01s/it]

{'loss': 0.2766, 'learning_rate': 6.875e-06, 'epoch': 1.25}


 32%|███▏      | 680/2144 [12:03<24:34,  1.01s/it]

{'loss': 0.3037, 'learning_rate': 6.828358208955225e-06, 'epoch': 1.27}


 32%|███▏      | 690/2144 [12:13<24:25,  1.01s/it]

{'loss': 0.3929, 'learning_rate': 6.781716417910448e-06, 'epoch': 1.29}


 33%|███▎      | 700/2144 [12:23<24:14,  1.01s/it]

{'loss': 0.449, 'learning_rate': 6.735074626865672e-06, 'epoch': 1.31}


 33%|███▎      | 710/2144 [12:34<24:05,  1.01s/it]

{'loss': 0.6245, 'learning_rate': 6.688432835820896e-06, 'epoch': 1.32}


 34%|███▎      | 720/2144 [12:44<23:54,  1.01s/it]

{'loss': 0.3863, 'learning_rate': 6.64179104477612e-06, 'epoch': 1.34}


 34%|███▍      | 730/2144 [12:54<23:49,  1.01s/it]

{'loss': 0.263, 'learning_rate': 6.595149253731343e-06, 'epoch': 1.36}


 35%|███▍      | 740/2144 [13:04<23:33,  1.01s/it]

{'loss': 0.2698, 'learning_rate': 6.548507462686567e-06, 'epoch': 1.38}


 35%|███▍      | 750/2144 [13:14<23:29,  1.01s/it]

{'loss': 0.1807, 'learning_rate': 6.501865671641792e-06, 'epoch': 1.4}


 35%|███▌      | 760/2144 [13:24<23:13,  1.01s/it]

{'loss': 0.336, 'learning_rate': 6.455223880597016e-06, 'epoch': 1.42}


 36%|███▌      | 770/2144 [13:34<23:06,  1.01s/it]

{'loss': 0.4337, 'learning_rate': 6.408582089552239e-06, 'epoch': 1.44}


 36%|███▋      | 780/2144 [13:44<22:56,  1.01s/it]

{'loss': 0.4278, 'learning_rate': 6.361940298507463e-06, 'epoch': 1.46}


 37%|███▋      | 790/2144 [13:54<22:46,  1.01s/it]

{'loss': 0.5296, 'learning_rate': 6.315298507462687e-06, 'epoch': 1.47}


 37%|███▋      | 800/2144 [14:04<22:35,  1.01s/it]

{'loss': 0.2012, 'learning_rate': 6.2686567164179116e-06, 'epoch': 1.49}


 38%|███▊      | 810/2144 [14:14<22:27,  1.01s/it]

{'loss': 0.2152, 'learning_rate': 6.222014925373135e-06, 'epoch': 1.51}


 38%|███▊      | 820/2144 [14:25<22:20,  1.01s/it]

{'loss': 0.1809, 'learning_rate': 6.175373134328359e-06, 'epoch': 1.53}


 39%|███▊      | 830/2144 [14:35<22:05,  1.01s/it]

{'loss': 0.2747, 'learning_rate': 6.1287313432835826e-06, 'epoch': 1.55}


 39%|███▉      | 840/2144 [14:45<21:55,  1.01s/it]

{'loss': 0.4527, 'learning_rate': 6.0820895522388065e-06, 'epoch': 1.57}


 40%|███▉      | 850/2144 [14:55<21:40,  1.00s/it]

{'loss': 0.2496, 'learning_rate': 6.03544776119403e-06, 'epoch': 1.59}


 40%|████      | 860/2144 [15:05<21:38,  1.01s/it]

{'loss': 0.3403, 'learning_rate': 5.988805970149254e-06, 'epoch': 1.6}


 41%|████      | 870/2144 [15:15<21:26,  1.01s/it]

{'loss': 0.3162, 'learning_rate': 5.942164179104478e-06, 'epoch': 1.62}


 41%|████      | 880/2144 [15:25<21:13,  1.01s/it]

{'loss': 0.1517, 'learning_rate': 5.895522388059702e-06, 'epoch': 1.64}


 42%|████▏     | 890/2144 [15:35<21:03,  1.01s/it]

{'loss': 0.2604, 'learning_rate': 5.848880597014925e-06, 'epoch': 1.66}


 42%|████▏     | 900/2144 [15:45<20:59,  1.01s/it]

{'loss': 0.6417, 'learning_rate': 5.802238805970149e-06, 'epoch': 1.68}


 42%|████▏     | 910/2144 [15:55<20:47,  1.01s/it]

{'loss': 0.3886, 'learning_rate': 5.755597014925373e-06, 'epoch': 1.7}


 43%|████▎     | 920/2144 [16:06<20:36,  1.01s/it]

{'loss': 0.585, 'learning_rate': 5.708955223880598e-06, 'epoch': 1.72}


 43%|████▎     | 930/2144 [16:16<20:27,  1.01s/it]

{'loss': 0.6313, 'learning_rate': 5.662313432835821e-06, 'epoch': 1.74}


 44%|████▍     | 940/2144 [16:26<20:15,  1.01s/it]

{'loss': 0.3085, 'learning_rate': 5.615671641791045e-06, 'epoch': 1.75}


 44%|████▍     | 950/2144 [16:36<20:03,  1.01s/it]

{'loss': 0.4355, 'learning_rate': 5.569029850746269e-06, 'epoch': 1.77}


 45%|████▍     | 960/2144 [16:46<19:56,  1.01s/it]

{'loss': 0.4377, 'learning_rate': 5.522388059701493e-06, 'epoch': 1.79}


 45%|████▌     | 970/2144 [16:56<19:41,  1.01s/it]

{'loss': 0.1495, 'learning_rate': 5.475746268656716e-06, 'epoch': 1.81}


 46%|████▌     | 980/2144 [17:06<19:35,  1.01s/it]

{'loss': 0.5045, 'learning_rate': 5.429104477611941e-06, 'epoch': 1.83}


 46%|████▌     | 990/2144 [17:16<19:23,  1.01s/it]

{'loss': 0.2674, 'learning_rate': 5.382462686567165e-06, 'epoch': 1.85}


 47%|████▋     | 1000/2144 [17:26<19:16,  1.01s/it]

{'loss': 0.3321, 'learning_rate': 5.335820895522389e-06, 'epoch': 1.87}


 47%|████▋     | 1010/2144 [17:36<19:06,  1.01s/it]

{'loss': 0.1923, 'learning_rate': 5.289179104477612e-06, 'epoch': 1.88}


 48%|████▊     | 1020/2144 [17:46<18:54,  1.01s/it]

{'loss': 0.478, 'learning_rate': 5.242537313432836e-06, 'epoch': 1.9}


 48%|████▊     | 1030/2144 [17:57<18:43,  1.01s/it]

{'loss': 0.2795, 'learning_rate': 5.195895522388061e-06, 'epoch': 1.92}


 49%|████▊     | 1040/2144 [18:07<18:33,  1.01s/it]

{'loss': 0.3651, 'learning_rate': 5.149253731343285e-06, 'epoch': 1.94}


 49%|████▉     | 1050/2144 [18:17<18:22,  1.01s/it]

{'loss': 0.3146, 'learning_rate': 5.102611940298508e-06, 'epoch': 1.96}


 49%|████▉     | 1060/2144 [18:27<18:14,  1.01s/it]

{'loss': 0.3803, 'learning_rate': 5.055970149253732e-06, 'epoch': 1.98}


 50%|████▉     | 1070/2144 [18:37<18:02,  1.01s/it]

{'loss': 0.2858, 'learning_rate': 5.009328358208956e-06, 'epoch': 2.0}


                                                   
 50%|█████     | 1072/2144 [19:10<17:21,  1.03it/s]

{'eval_loss': 1.221031904220581, 'eval_accuracy': 0.7118881118881119, 'eval_precision': 0.7263105492881912, 'eval_recall': 0.7118881118881119, 'eval_f1': 0.715692963172498, 'eval_runtime': 31.3086, 'eval_samples_per_second': 22.837, 'eval_steps_per_second': 2.875, 'epoch': 2.0}


 50%|█████     | 1080/2144 [19:25<34:29,  1.95s/it]  

{'loss': 0.2148, 'learning_rate': 4.9626865671641796e-06, 'epoch': 2.01}


 51%|█████     | 1090/2144 [19:35<18:09,  1.03s/it]

{'loss': 0.1024, 'learning_rate': 4.9160447761194035e-06, 'epoch': 2.03}


 51%|█████▏    | 1100/2144 [19:45<17:35,  1.01s/it]

{'loss': 0.3686, 'learning_rate': 4.8694029850746275e-06, 'epoch': 2.05}


 52%|█████▏    | 1110/2144 [19:55<17:22,  1.01s/it]

{'loss': 0.2012, 'learning_rate': 4.822761194029851e-06, 'epoch': 2.07}


 52%|█████▏    | 1120/2144 [20:05<17:07,  1.00s/it]

{'loss': 0.061, 'learning_rate': 4.7761194029850745e-06, 'epoch': 2.09}


 53%|█████▎    | 1130/2144 [20:15<17:02,  1.01s/it]

{'loss': 0.1065, 'learning_rate': 4.729477611940299e-06, 'epoch': 2.11}


 53%|█████▎    | 1140/2144 [20:25<16:53,  1.01s/it]

{'loss': 0.1853, 'learning_rate': 4.682835820895522e-06, 'epoch': 2.13}


 54%|█████▎    | 1150/2144 [20:35<16:42,  1.01s/it]

{'loss': 0.14, 'learning_rate': 4.636194029850747e-06, 'epoch': 2.15}


 54%|█████▍    | 1160/2144 [20:45<16:34,  1.01s/it]

{'loss': 0.1925, 'learning_rate': 4.58955223880597e-06, 'epoch': 2.16}


 55%|█████▍    | 1170/2144 [20:56<16:28,  1.01s/it]

{'loss': 0.1278, 'learning_rate': 4.542910447761194e-06, 'epoch': 2.18}


 55%|█████▌    | 1180/2144 [21:06<16:11,  1.01s/it]

{'loss': 0.2639, 'learning_rate': 4.496268656716418e-06, 'epoch': 2.2}


 56%|█████▌    | 1190/2144 [21:16<16:03,  1.01s/it]

{'loss': 0.295, 'learning_rate': 4.449626865671642e-06, 'epoch': 2.22}


 56%|█████▌    | 1200/2144 [21:26<15:52,  1.01s/it]

{'loss': 0.1728, 'learning_rate': 4.402985074626866e-06, 'epoch': 2.24}


 56%|█████▋    | 1210/2144 [21:36<15:44,  1.01s/it]

{'loss': 0.1986, 'learning_rate': 4.35634328358209e-06, 'epoch': 2.26}


 57%|█████▋    | 1220/2144 [21:46<15:29,  1.01s/it]

{'loss': 0.1687, 'learning_rate': 4.309701492537314e-06, 'epoch': 2.28}


 57%|█████▋    | 1230/2144 [21:56<15:21,  1.01s/it]

{'loss': 0.1031, 'learning_rate': 4.263059701492538e-06, 'epoch': 2.29}


 58%|█████▊    | 1240/2144 [22:06<15:07,  1.00s/it]

{'loss': 0.0657, 'learning_rate': 4.216417910447762e-06, 'epoch': 2.31}


 58%|█████▊    | 1250/2144 [22:16<15:01,  1.01s/it]

{'loss': 0.1482, 'learning_rate': 4.169776119402986e-06, 'epoch': 2.33}


 59%|█████▉    | 1260/2144 [22:26<14:50,  1.01s/it]

{'loss': 0.0239, 'learning_rate': 4.123134328358209e-06, 'epoch': 2.35}


 59%|█████▉    | 1270/2144 [22:37<14:42,  1.01s/it]

{'loss': 0.0935, 'learning_rate': 4.076492537313434e-06, 'epoch': 2.37}


 60%|█████▉    | 1280/2144 [22:47<14:29,  1.01s/it]

{'loss': 0.4065, 'learning_rate': 4.029850746268657e-06, 'epoch': 2.39}


 60%|██████    | 1290/2144 [22:57<14:21,  1.01s/it]

{'loss': 0.2169, 'learning_rate': 3.983208955223881e-06, 'epoch': 2.41}


 61%|██████    | 1300/2144 [23:07<14:06,  1.00s/it]

{'loss': 0.136, 'learning_rate': 3.936567164179105e-06, 'epoch': 2.43}


 61%|██████    | 1310/2144 [23:17<14:00,  1.01s/it]

{'loss': 0.3188, 'learning_rate': 3.889925373134329e-06, 'epoch': 2.44}


 62%|██████▏   | 1320/2144 [23:27<13:50,  1.01s/it]

{'loss': 0.1019, 'learning_rate': 3.843283582089553e-06, 'epoch': 2.46}


 62%|██████▏   | 1330/2144 [23:37<13:39,  1.01s/it]

{'loss': 0.3761, 'learning_rate': 3.7966417910447766e-06, 'epoch': 2.48}


 62%|██████▎   | 1340/2144 [23:47<13:31,  1.01s/it]

{'loss': 0.3741, 'learning_rate': 3.7500000000000005e-06, 'epoch': 2.5}


 63%|██████▎   | 1350/2144 [23:57<13:20,  1.01s/it]

{'loss': 0.2139, 'learning_rate': 3.703358208955224e-06, 'epoch': 2.52}


 63%|██████▎   | 1360/2144 [24:07<13:11,  1.01s/it]

{'loss': 0.1146, 'learning_rate': 3.656716417910448e-06, 'epoch': 2.54}


 64%|██████▍   | 1370/2144 [24:17<13:00,  1.01s/it]

{'loss': 0.2906, 'learning_rate': 3.6100746268656715e-06, 'epoch': 2.56}


 64%|██████▍   | 1380/2144 [24:27<12:50,  1.01s/it]

{'loss': 0.0588, 'learning_rate': 3.563432835820896e-06, 'epoch': 2.57}


 65%|██████▍   | 1390/2144 [24:38<12:40,  1.01s/it]

{'loss': 0.1961, 'learning_rate': 3.5167910447761194e-06, 'epoch': 2.59}


 65%|██████▌   | 1400/2144 [24:48<12:29,  1.01s/it]

{'loss': 0.092, 'learning_rate': 3.4701492537313438e-06, 'epoch': 2.61}


 66%|██████▌   | 1410/2144 [24:58<12:21,  1.01s/it]

{'loss': 0.4074, 'learning_rate': 3.4235074626865673e-06, 'epoch': 2.63}


 66%|██████▌   | 1420/2144 [25:08<12:11,  1.01s/it]

{'loss': 0.1161, 'learning_rate': 3.3768656716417913e-06, 'epoch': 2.65}


 67%|██████▋   | 1430/2144 [25:18<11:59,  1.01s/it]

{'loss': 0.4398, 'learning_rate': 3.3302238805970148e-06, 'epoch': 2.67}


 67%|██████▋   | 1440/2144 [25:28<11:49,  1.01s/it]

{'loss': 0.2372, 'learning_rate': 3.283582089552239e-06, 'epoch': 2.69}


 68%|██████▊   | 1450/2144 [25:38<11:40,  1.01s/it]

{'loss': 0.1804, 'learning_rate': 3.2369402985074627e-06, 'epoch': 2.71}


 68%|██████▊   | 1460/2144 [25:48<11:31,  1.01s/it]

{'loss': 0.2652, 'learning_rate': 3.190298507462687e-06, 'epoch': 2.72}


 69%|██████▊   | 1470/2144 [25:58<11:19,  1.01s/it]

{'loss': 0.3567, 'learning_rate': 3.1436567164179106e-06, 'epoch': 2.74}


 69%|██████▉   | 1480/2144 [26:08<11:10,  1.01s/it]

{'loss': 0.2914, 'learning_rate': 3.0970149253731345e-06, 'epoch': 2.76}


 69%|██████▉   | 1490/2144 [26:18<10:59,  1.01s/it]

{'loss': 0.1858, 'learning_rate': 3.050373134328358e-06, 'epoch': 2.78}


 70%|██████▉   | 1500/2144 [26:28<10:50,  1.01s/it]

{'loss': 0.0281, 'learning_rate': 3.0037313432835824e-06, 'epoch': 2.8}


 70%|███████   | 1510/2144 [26:39<10:38,  1.01s/it]

{'loss': 0.2987, 'learning_rate': 2.957089552238806e-06, 'epoch': 2.82}


 71%|███████   | 1520/2144 [26:49<10:29,  1.01s/it]

{'loss': 0.1586, 'learning_rate': 2.9104477611940303e-06, 'epoch': 2.84}


 71%|███████▏  | 1530/2144 [26:59<10:20,  1.01s/it]

{'loss': 0.0819, 'learning_rate': 2.863805970149254e-06, 'epoch': 2.85}


 72%|███████▏  | 1540/2144 [27:09<10:09,  1.01s/it]

{'loss': 0.3216, 'learning_rate': 2.8171641791044778e-06, 'epoch': 2.87}


 72%|███████▏  | 1550/2144 [27:19<10:00,  1.01s/it]

{'loss': 0.0105, 'learning_rate': 2.7705223880597017e-06, 'epoch': 2.89}


 73%|███████▎  | 1560/2144 [27:29<09:49,  1.01s/it]

{'loss': 0.0864, 'learning_rate': 2.7238805970149257e-06, 'epoch': 2.91}


 73%|███████▎  | 1570/2144 [27:39<09:39,  1.01s/it]

{'loss': 0.0074, 'learning_rate': 2.677238805970149e-06, 'epoch': 2.93}


 74%|███████▎  | 1580/2144 [27:49<09:28,  1.01s/it]

{'loss': 0.3238, 'learning_rate': 2.6305970149253736e-06, 'epoch': 2.95}


 74%|███████▍  | 1590/2144 [27:59<09:17,  1.01s/it]

{'loss': 0.2738, 'learning_rate': 2.583955223880597e-06, 'epoch': 2.97}


 75%|███████▍  | 1600/2144 [28:09<09:09,  1.01s/it]

{'loss': 0.2762, 'learning_rate': 2.537313432835821e-06, 'epoch': 2.99}


                                                   
 75%|███████▌  | 1608/2144 [28:46<08:34,  1.04it/s]

{'eval_loss': 1.6277292966842651, 'eval_accuracy': 0.7062937062937062, 'eval_precision': 0.7135429811567743, 'eval_recall': 0.7062937062937062, 'eval_f1': 0.7087028936927388, 'eval_runtime': 28.8097, 'eval_samples_per_second': 24.818, 'eval_steps_per_second': 3.124, 'epoch': 3.0}


 75%|███████▌  | 1610/2144 [28:54<1:14:15,  8.34s/it]

{'loss': 0.196, 'learning_rate': 2.490671641791045e-06, 'epoch': 3.0}


 76%|███████▌  | 1620/2144 [29:04<10:37,  1.22s/it]  

{'loss': 0.0075, 'learning_rate': 2.444029850746269e-06, 'epoch': 3.02}


 76%|███████▌  | 1630/2144 [29:15<08:44,  1.02s/it]

{'loss': 0.0828, 'learning_rate': 2.397388059701493e-06, 'epoch': 3.04}


 76%|███████▋  | 1640/2144 [29:25<08:28,  1.01s/it]

{'loss': 0.0925, 'learning_rate': 2.350746268656717e-06, 'epoch': 3.06}


 77%|███████▋  | 1650/2144 [29:35<08:19,  1.01s/it]

{'loss': 0.0992, 'learning_rate': 2.3041044776119408e-06, 'epoch': 3.08}


 77%|███████▋  | 1660/2144 [29:45<08:06,  1.01s/it]

{'loss': 0.1444, 'learning_rate': 2.2574626865671643e-06, 'epoch': 3.1}


 78%|███████▊  | 1670/2144 [29:55<07:56,  1.01s/it]

{'loss': 0.2008, 'learning_rate': 2.2108208955223883e-06, 'epoch': 3.12}


 78%|███████▊  | 1680/2144 [30:05<07:47,  1.01s/it]

{'loss': 0.1077, 'learning_rate': 2.1641791044776118e-06, 'epoch': 3.13}


 79%|███████▉  | 1690/2144 [30:15<07:38,  1.01s/it]

{'loss': 0.1639, 'learning_rate': 2.1175373134328357e-06, 'epoch': 3.15}


 79%|███████▉  | 1700/2144 [30:25<07:24,  1.00s/it]

{'loss': 0.3138, 'learning_rate': 2.0708955223880597e-06, 'epoch': 3.17}


 80%|███████▉  | 1710/2144 [30:35<07:18,  1.01s/it]

{'loss': 0.0919, 'learning_rate': 2.0242537313432836e-06, 'epoch': 3.19}


 80%|████████  | 1720/2144 [30:45<07:07,  1.01s/it]

{'loss': 0.209, 'learning_rate': 1.9776119402985076e-06, 'epoch': 3.21}


 81%|████████  | 1730/2144 [30:55<06:57,  1.01s/it]

{'loss': 0.1444, 'learning_rate': 1.9309701492537315e-06, 'epoch': 3.23}


 81%|████████  | 1740/2144 [31:05<06:46,  1.01s/it]

{'loss': 0.1668, 'learning_rate': 1.8843283582089553e-06, 'epoch': 3.25}


 82%|████████▏ | 1750/2144 [31:16<06:37,  1.01s/it]

{'loss': 0.0995, 'learning_rate': 1.8376865671641792e-06, 'epoch': 3.26}


 82%|████████▏ | 1760/2144 [31:26<06:26,  1.01s/it]

{'loss': 0.0662, 'learning_rate': 1.791044776119403e-06, 'epoch': 3.28}


 83%|████████▎ | 1770/2144 [31:36<06:17,  1.01s/it]

{'loss': 0.1478, 'learning_rate': 1.7444029850746269e-06, 'epoch': 3.3}


 83%|████████▎ | 1780/2144 [31:46<06:06,  1.01s/it]

{'loss': 0.0336, 'learning_rate': 1.6977611940298508e-06, 'epoch': 3.32}


 83%|████████▎ | 1790/2144 [31:56<05:56,  1.01s/it]

{'loss': 0.1722, 'learning_rate': 1.6511194029850746e-06, 'epoch': 3.34}


 84%|████████▍ | 1800/2144 [32:06<05:48,  1.01s/it]

{'loss': 0.0879, 'learning_rate': 1.6044776119402985e-06, 'epoch': 3.36}


 84%|████████▍ | 1810/2144 [32:16<05:36,  1.01s/it]

{'loss': 0.1873, 'learning_rate': 1.5578358208955225e-06, 'epoch': 3.38}


 85%|████████▍ | 1820/2144 [32:26<05:26,  1.01s/it]

{'loss': 0.0854, 'learning_rate': 1.5111940298507464e-06, 'epoch': 3.4}


 85%|████████▌ | 1830/2144 [32:36<05:14,  1.00s/it]

{'loss': 0.0794, 'learning_rate': 1.4645522388059702e-06, 'epoch': 3.41}


 86%|████████▌ | 1840/2144 [32:46<05:06,  1.01s/it]

{'loss': 0.0035, 'learning_rate': 1.417910447761194e-06, 'epoch': 3.43}


 86%|████████▋ | 1850/2144 [32:56<04:56,  1.01s/it]

{'loss': 0.0144, 'learning_rate': 1.371268656716418e-06, 'epoch': 3.45}


 87%|████████▋ | 1860/2144 [33:06<04:45,  1.01s/it]

{'loss': 0.1688, 'learning_rate': 1.3246268656716418e-06, 'epoch': 3.47}


 87%|████████▋ | 1870/2144 [33:17<04:35,  1.01s/it]

{'loss': 0.0032, 'learning_rate': 1.2779850746268657e-06, 'epoch': 3.49}


 88%|████████▊ | 1880/2144 [33:27<04:27,  1.01s/it]

{'loss': 0.2402, 'learning_rate': 1.2313432835820897e-06, 'epoch': 3.51}


 88%|████████▊ | 1890/2144 [33:37<04:15,  1.01s/it]

{'loss': 0.0093, 'learning_rate': 1.1847014925373134e-06, 'epoch': 3.53}


 89%|████████▊ | 1900/2144 [33:47<04:05,  1.01s/it]

{'loss': 0.0834, 'learning_rate': 1.1380597014925374e-06, 'epoch': 3.54}


 89%|████████▉ | 1910/2144 [33:57<03:55,  1.01s/it]

{'loss': 0.0032, 'learning_rate': 1.0914179104477613e-06, 'epoch': 3.56}


 90%|████████▉ | 1920/2144 [34:07<03:45,  1.01s/it]

{'loss': 0.1417, 'learning_rate': 1.044776119402985e-06, 'epoch': 3.58}


 90%|█████████ | 1930/2144 [34:17<03:35,  1.01s/it]

{'loss': 0.069, 'learning_rate': 9.98134328358209e-07, 'epoch': 3.6}


 90%|█████████ | 1940/2144 [34:27<03:25,  1.01s/it]

{'loss': 0.0062, 'learning_rate': 9.514925373134328e-07, 'epoch': 3.62}


 91%|█████████ | 1950/2144 [34:37<03:15,  1.01s/it]

{'loss': 0.0653, 'learning_rate': 9.048507462686568e-07, 'epoch': 3.64}


 91%|█████████▏| 1960/2144 [34:47<03:05,  1.01s/it]

{'loss': 0.2749, 'learning_rate': 8.582089552238806e-07, 'epoch': 3.66}


 92%|█████████▏| 1970/2144 [34:57<02:55,  1.01s/it]

{'loss': 0.1355, 'learning_rate': 8.115671641791046e-07, 'epoch': 3.68}


 92%|█████████▏| 1980/2144 [35:07<02:45,  1.01s/it]

{'loss': 0.2576, 'learning_rate': 7.649253731343284e-07, 'epoch': 3.69}


 93%|█████████▎| 1990/2144 [35:18<02:34,  1.01s/it]

{'loss': 0.2179, 'learning_rate': 7.182835820895523e-07, 'epoch': 3.71}


 93%|█████████▎| 2000/2144 [35:28<02:25,  1.01s/it]

{'loss': 0.0223, 'learning_rate': 6.716417910447762e-07, 'epoch': 3.73}


 94%|█████████▍| 2010/2144 [35:38<02:14,  1.01s/it]

{'loss': 0.0995, 'learning_rate': 6.25e-07, 'epoch': 3.75}


 94%|█████████▍| 2020/2144 [35:48<02:04,  1.01s/it]

{'loss': 0.1039, 'learning_rate': 5.783582089552239e-07, 'epoch': 3.77}


 95%|█████████▍| 2030/2144 [35:58<01:55,  1.01s/it]

{'loss': 0.063, 'learning_rate': 5.317164179104478e-07, 'epoch': 3.79}


 95%|█████████▌| 2040/2144 [36:08<01:44,  1.01s/it]

{'loss': 0.1084, 'learning_rate': 4.850746268656717e-07, 'epoch': 3.81}


 96%|█████████▌| 2050/2144 [36:18<01:35,  1.01s/it]

{'loss': 0.0922, 'learning_rate': 4.384328358208956e-07, 'epoch': 3.82}


 96%|█████████▌| 2060/2144 [36:28<01:24,  1.01s/it]

{'loss': 0.0233, 'learning_rate': 3.9179104477611947e-07, 'epoch': 3.84}


 97%|█████████▋| 2070/2144 [36:38<01:14,  1.01s/it]

{'loss': 0.0627, 'learning_rate': 3.451492537313433e-07, 'epoch': 3.86}


 97%|█████████▋| 2080/2144 [36:48<01:04,  1.01s/it]

{'loss': 0.0069, 'learning_rate': 2.9850746268656716e-07, 'epoch': 3.88}


 97%|█████████▋| 2090/2144 [36:58<00:54,  1.01s/it]

{'loss': 0.0019, 'learning_rate': 2.5186567164179105e-07, 'epoch': 3.9}


 98%|█████████▊| 2100/2144 [37:08<00:44,  1.01s/it]

{'loss': 0.106, 'learning_rate': 2.0522388059701495e-07, 'epoch': 3.92}


 98%|█████████▊| 2110/2144 [37:19<00:34,  1.01s/it]

{'loss': 0.0172, 'learning_rate': 1.5858208955223882e-07, 'epoch': 3.94}


 99%|█████████▉| 2120/2144 [37:29<00:24,  1.01s/it]

{'loss': 0.0017, 'learning_rate': 1.1194029850746268e-07, 'epoch': 3.96}


 99%|█████████▉| 2130/2144 [37:39<00:14,  1.01s/it]

{'loss': 0.091, 'learning_rate': 6.529850746268657e-08, 'epoch': 3.97}


100%|█████████▉| 2140/2144 [37:49<00:04,  1.01s/it]

{'loss': 0.0721, 'learning_rate': 1.8656716417910447e-08, 'epoch': 3.99}


                                                   
100%|██████████| 2144/2144 [38:22<00:00,  1.03it/s]

{'eval_loss': 1.8036545515060425, 'eval_accuracy': 0.7104895104895105, 'eval_precision': 0.7138840692423754, 'eval_recall': 0.7104895104895105, 'eval_f1': 0.7114422831164924, 'eval_runtime': 29.5861, 'eval_samples_per_second': 24.167, 'eval_steps_per_second': 3.042, 'epoch': 4.0}


100%|██████████| 2144/2144 [38:31<00:00,  1.08s/it]


{'train_runtime': 2311.3622, 'train_samples_per_second': 7.417, 'train_steps_per_second': 0.928, 'train_loss': 0.3049830186839733, 'epoch': 4.0}


100%|██████████| 90/90 [00:30<00:00,  2.92it/s]
100%|██████████| 90/90 [00:30<00:00,  2.94it/s]
100%|██████████| 90/90 [00:31<00:00,  2.84it/s]


{'accuracy': 0.7104895104895105, 'precision': 0.722589750436662, 'recall': 0.7104895104895105, 'f1': 0.7135105835598581}
{'accuracy': 0.6904761904761905, 'precision': 0.6943295984690537, 'recall': 0.6904761904761905, 'f1': 0.6907718308997535}


  0%|          | 10/2144 [00:10<36:14,  1.02s/it] 

{'loss': 0.4076, 'learning_rate': 9.953358208955226e-06, 'epoch': 0.02}


  1%|          | 20/2144 [00:20<35:54,  1.01s/it]

{'loss': 0.3554, 'learning_rate': 9.906716417910449e-06, 'epoch': 0.04}


  1%|▏         | 30/2144 [00:31<35:37,  1.01s/it]

{'loss': 0.551, 'learning_rate': 9.860074626865672e-06, 'epoch': 0.06}


  2%|▏         | 40/2144 [00:41<35:27,  1.01s/it]

{'loss': 0.5136, 'learning_rate': 9.813432835820897e-06, 'epoch': 0.07}


  2%|▏         | 50/2144 [00:51<35:16,  1.01s/it]

{'loss': 0.4019, 'learning_rate': 9.76679104477612e-06, 'epoch': 0.09}


  3%|▎         | 60/2144 [01:01<35:13,  1.01s/it]

{'loss': 0.3974, 'learning_rate': 9.720149253731343e-06, 'epoch': 0.11}


  3%|▎         | 70/2144 [01:11<34:52,  1.01s/it]

{'loss': 0.4983, 'learning_rate': 9.673507462686568e-06, 'epoch': 0.13}


  4%|▎         | 80/2144 [01:21<34:41,  1.01s/it]

{'loss': 0.4233, 'learning_rate': 9.626865671641792e-06, 'epoch': 0.15}


  4%|▍         | 90/2144 [01:31<34:31,  1.01s/it]

{'loss': 0.477, 'learning_rate': 9.580223880597016e-06, 'epoch': 0.17}


  5%|▍         | 100/2144 [01:41<34:19,  1.01s/it]

{'loss': 0.5472, 'learning_rate': 9.533582089552239e-06, 'epoch': 0.19}


  5%|▌         | 110/2144 [01:51<34:14,  1.01s/it]

{'loss': 0.3425, 'learning_rate': 9.486940298507463e-06, 'epoch': 0.21}


  6%|▌         | 120/2144 [02:02<34:01,  1.01s/it]

{'loss': 0.293, 'learning_rate': 9.440298507462688e-06, 'epoch': 0.22}


  6%|▌         | 130/2144 [02:12<33:55,  1.01s/it]

{'loss': 0.3532, 'learning_rate': 9.393656716417911e-06, 'epoch': 0.24}


  7%|▋         | 140/2144 [02:22<33:41,  1.01s/it]

{'loss': 0.4381, 'learning_rate': 9.347014925373134e-06, 'epoch': 0.26}


  7%|▋         | 150/2144 [02:32<33:34,  1.01s/it]

{'loss': 0.3958, 'learning_rate': 9.30037313432836e-06, 'epoch': 0.28}


  7%|▋         | 160/2144 [02:42<33:21,  1.01s/it]

{'loss': 0.4896, 'learning_rate': 9.253731343283582e-06, 'epoch': 0.3}


  8%|▊         | 170/2144 [02:52<33:13,  1.01s/it]

{'loss': 0.4048, 'learning_rate': 9.207089552238807e-06, 'epoch': 0.32}


  8%|▊         | 180/2144 [03:02<33:01,  1.01s/it]

{'loss': 0.2244, 'learning_rate': 9.16044776119403e-06, 'epoch': 0.34}


  9%|▉         | 190/2144 [03:12<32:57,  1.01s/it]

{'loss': 0.4782, 'learning_rate': 9.113805970149255e-06, 'epoch': 0.35}


  9%|▉         | 200/2144 [03:22<32:41,  1.01s/it]

{'loss': 0.3748, 'learning_rate': 9.067164179104478e-06, 'epoch': 0.37}


 10%|▉         | 210/2144 [03:32<32:29,  1.01s/it]

{'loss': 0.2991, 'learning_rate': 9.020522388059703e-06, 'epoch': 0.39}


 10%|█         | 220/2144 [03:43<32:18,  1.01s/it]

{'loss': 0.3998, 'learning_rate': 8.973880597014926e-06, 'epoch': 0.41}


 11%|█         | 230/2144 [03:53<32:07,  1.01s/it]

{'loss': 0.2499, 'learning_rate': 8.927238805970149e-06, 'epoch': 0.43}


 11%|█         | 240/2144 [04:03<32:01,  1.01s/it]

{'loss': 0.5525, 'learning_rate': 8.880597014925374e-06, 'epoch': 0.45}


 12%|█▏        | 250/2144 [04:13<31:48,  1.01s/it]

{'loss': 0.2676, 'learning_rate': 8.833955223880599e-06, 'epoch': 0.47}


 12%|█▏        | 260/2144 [04:23<31:43,  1.01s/it]

{'loss': 0.3421, 'learning_rate': 8.787313432835822e-06, 'epoch': 0.49}


 13%|█▎        | 270/2144 [04:33<31:34,  1.01s/it]

{'loss': 0.5807, 'learning_rate': 8.740671641791045e-06, 'epoch': 0.5}


 13%|█▎        | 280/2144 [04:43<31:17,  1.01s/it]

{'loss': 0.3558, 'learning_rate': 8.69402985074627e-06, 'epoch': 0.52}


 14%|█▎        | 290/2144 [04:53<31:10,  1.01s/it]

{'loss': 0.4177, 'learning_rate': 8.647388059701494e-06, 'epoch': 0.54}


 14%|█▍        | 300/2144 [05:03<30:55,  1.01s/it]

{'loss': 0.3016, 'learning_rate': 8.600746268656716e-06, 'epoch': 0.56}


 14%|█▍        | 310/2144 [05:13<30:50,  1.01s/it]

{'loss': 0.2568, 'learning_rate': 8.55410447761194e-06, 'epoch': 0.58}


 15%|█▍        | 320/2144 [05:23<30:43,  1.01s/it]

{'loss': 0.4508, 'learning_rate': 8.507462686567165e-06, 'epoch': 0.6}


 15%|█▌        | 330/2144 [05:33<30:26,  1.01s/it]

{'loss': 0.1859, 'learning_rate': 8.460820895522389e-06, 'epoch': 0.62}


 16%|█▌        | 340/2144 [05:44<30:20,  1.01s/it]

{'loss': 0.7759, 'learning_rate': 8.414179104477612e-06, 'epoch': 0.63}


 16%|█▋        | 350/2144 [05:54<30:09,  1.01s/it]

{'loss': 0.5447, 'learning_rate': 8.367537313432836e-06, 'epoch': 0.65}


 17%|█▋        | 360/2144 [06:04<30:00,  1.01s/it]

{'loss': 0.2867, 'learning_rate': 8.320895522388061e-06, 'epoch': 0.67}


 17%|█▋        | 370/2144 [06:14<29:55,  1.01s/it]

{'loss': 0.2208, 'learning_rate': 8.274253731343284e-06, 'epoch': 0.69}


 18%|█▊        | 380/2144 [06:24<29:43,  1.01s/it]

{'loss': 0.2808, 'learning_rate': 8.227611940298507e-06, 'epoch': 0.71}


 18%|█▊        | 390/2144 [06:34<29:27,  1.01s/it]

{'loss': 0.2336, 'learning_rate': 8.180970149253732e-06, 'epoch': 0.73}


 19%|█▊        | 400/2144 [06:44<29:24,  1.01s/it]

{'loss': 0.5506, 'learning_rate': 8.134328358208955e-06, 'epoch': 0.75}


 19%|█▉        | 410/2144 [06:54<29:10,  1.01s/it]

{'loss': 0.4762, 'learning_rate': 8.08768656716418e-06, 'epoch': 0.76}


 20%|█▉        | 420/2144 [07:04<29:00,  1.01s/it]

{'loss': 0.2149, 'learning_rate': 8.041044776119403e-06, 'epoch': 0.78}


 20%|██        | 430/2144 [07:15<28:52,  1.01s/it]

{'loss': 0.3567, 'learning_rate': 7.994402985074628e-06, 'epoch': 0.8}


 21%|██        | 440/2144 [07:25<28:44,  1.01s/it]

{'loss': 0.2399, 'learning_rate': 7.947761194029851e-06, 'epoch': 0.82}


 21%|██        | 450/2144 [07:35<28:30,  1.01s/it]

{'loss': 0.3281, 'learning_rate': 7.901119402985076e-06, 'epoch': 0.84}


 21%|██▏       | 460/2144 [07:45<28:16,  1.01s/it]

{'loss': 0.1426, 'learning_rate': 7.854477611940299e-06, 'epoch': 0.86}


 22%|██▏       | 470/2144 [07:55<28:09,  1.01s/it]

{'loss': 0.5968, 'learning_rate': 7.807835820895522e-06, 'epoch': 0.88}


 22%|██▏       | 480/2144 [08:05<27:56,  1.01s/it]

{'loss': 0.5727, 'learning_rate': 7.761194029850747e-06, 'epoch': 0.9}


 23%|██▎       | 490/2144 [08:15<27:53,  1.01s/it]

{'loss': 0.3889, 'learning_rate': 7.714552238805972e-06, 'epoch': 0.91}


 23%|██▎       | 500/2144 [08:25<27:40,  1.01s/it]

{'loss': 0.2734, 'learning_rate': 7.667910447761195e-06, 'epoch': 0.93}


 24%|██▍       | 510/2144 [08:35<27:28,  1.01s/it]

{'loss': 0.4222, 'learning_rate': 7.621268656716419e-06, 'epoch': 0.95}


 24%|██▍       | 520/2144 [08:45<27:18,  1.01s/it]

{'loss': 0.4829, 'learning_rate': 7.574626865671643e-06, 'epoch': 0.97}


 25%|██▍       | 530/2144 [08:55<27:08,  1.01s/it]

{'loss': 0.2742, 'learning_rate': 7.527985074626867e-06, 'epoch': 0.99}


                                                  
 25%|██▌       | 536/2144 [09:33<26:09,  1.02it/s]

{'eval_loss': 1.172874927520752, 'eval_accuracy': 0.7160839160839161, 'eval_precision': 0.7260395936289163, 'eval_recall': 0.7160839160839161, 'eval_f1': 0.718725994813422, 'eval_runtime': 31.4388, 'eval_samples_per_second': 22.743, 'eval_steps_per_second': 2.863, 'epoch': 1.0}


 25%|██▌       | 540/2144 [09:43<2:11:14,  4.91s/it]

{'loss': 0.491, 'learning_rate': 7.48134328358209e-06, 'epoch': 1.01}


 26%|██▌       | 550/2144 [09:53<29:45,  1.12s/it]  

{'loss': 0.222, 'learning_rate': 7.434701492537314e-06, 'epoch': 1.03}


 26%|██▌       | 560/2144 [10:04<26:49,  1.02s/it]

{'loss': 0.2255, 'learning_rate': 7.3880597014925385e-06, 'epoch': 1.04}


 27%|██▋       | 570/2144 [10:14<26:29,  1.01s/it]

{'loss': 0.1829, 'learning_rate': 7.3414179104477625e-06, 'epoch': 1.06}


 27%|██▋       | 580/2144 [10:24<26:14,  1.01s/it]

{'loss': 0.3221, 'learning_rate': 7.2947761194029856e-06, 'epoch': 1.08}


 28%|██▊       | 590/2144 [10:34<26:10,  1.01s/it]

{'loss': 0.1753, 'learning_rate': 7.2481343283582095e-06, 'epoch': 1.1}


 28%|██▊       | 600/2144 [10:44<25:56,  1.01s/it]

{'loss': 0.1741, 'learning_rate': 7.2014925373134335e-06, 'epoch': 1.12}


 28%|██▊       | 610/2144 [10:54<25:46,  1.01s/it]

{'loss': 0.1315, 'learning_rate': 7.154850746268658e-06, 'epoch': 1.14}


 29%|██▉       | 620/2144 [11:04<25:41,  1.01s/it]

{'loss': 0.1673, 'learning_rate': 7.1082089552238805e-06, 'epoch': 1.16}


 29%|██▉       | 630/2144 [11:14<25:33,  1.01s/it]

{'loss': 0.3013, 'learning_rate': 7.061567164179105e-06, 'epoch': 1.18}


 30%|██▉       | 640/2144 [11:24<25:23,  1.01s/it]

{'loss': 0.1527, 'learning_rate': 7.014925373134329e-06, 'epoch': 1.19}


 30%|███       | 650/2144 [11:34<25:05,  1.01s/it]

{'loss': 0.2987, 'learning_rate': 6.968283582089553e-06, 'epoch': 1.21}


 31%|███       | 660/2144 [11:45<24:59,  1.01s/it]

{'loss': 0.2701, 'learning_rate': 6.921641791044776e-06, 'epoch': 1.23}


 31%|███▏      | 670/2144 [11:55<24:45,  1.01s/it]

{'loss': 0.1494, 'learning_rate': 6.875e-06, 'epoch': 1.25}


 32%|███▏      | 680/2144 [12:05<24:38,  1.01s/it]

{'loss': 0.2626, 'learning_rate': 6.828358208955225e-06, 'epoch': 1.27}


 32%|███▏      | 690/2144 [12:15<24:26,  1.01s/it]

{'loss': 0.2591, 'learning_rate': 6.781716417910448e-06, 'epoch': 1.29}


 33%|███▎      | 700/2144 [12:25<24:17,  1.01s/it]

{'loss': 0.2944, 'learning_rate': 6.735074626865672e-06, 'epoch': 1.31}


 33%|███▎      | 710/2144 [12:35<24:03,  1.01s/it]

{'loss': 0.2497, 'learning_rate': 6.688432835820896e-06, 'epoch': 1.32}


 34%|███▎      | 720/2144 [12:45<24:01,  1.01s/it]

{'loss': 0.2148, 'learning_rate': 6.64179104477612e-06, 'epoch': 1.34}


 34%|███▍      | 730/2144 [12:55<23:47,  1.01s/it]

{'loss': 0.147, 'learning_rate': 6.595149253731343e-06, 'epoch': 1.36}


 35%|███▍      | 740/2144 [13:05<23:32,  1.01s/it]

{'loss': 0.1304, 'learning_rate': 6.548507462686567e-06, 'epoch': 1.38}


 35%|███▍      | 750/2144 [13:15<23:25,  1.01s/it]

{'loss': 0.1201, 'learning_rate': 6.501865671641792e-06, 'epoch': 1.4}


 35%|███▌      | 760/2144 [13:26<23:15,  1.01s/it]

{'loss': 0.1492, 'learning_rate': 6.455223880597016e-06, 'epoch': 1.42}


 36%|███▌      | 770/2144 [13:36<23:07,  1.01s/it]

{'loss': 0.2995, 'learning_rate': 6.408582089552239e-06, 'epoch': 1.44}


 36%|███▋      | 780/2144 [13:46<22:57,  1.01s/it]

{'loss': 0.5574, 'learning_rate': 6.361940298507463e-06, 'epoch': 1.46}


 37%|███▋      | 790/2144 [13:56<22:45,  1.01s/it]

{'loss': 0.2806, 'learning_rate': 6.315298507462687e-06, 'epoch': 1.47}


 37%|███▋      | 800/2144 [14:06<22:33,  1.01s/it]

{'loss': 0.3415, 'learning_rate': 6.2686567164179116e-06, 'epoch': 1.49}


 38%|███▊      | 810/2144 [14:16<22:25,  1.01s/it]

{'loss': 0.1716, 'learning_rate': 6.222014925373135e-06, 'epoch': 1.51}


 38%|███▊      | 820/2144 [14:26<22:17,  1.01s/it]

{'loss': 0.0854, 'learning_rate': 6.175373134328359e-06, 'epoch': 1.53}


 39%|███▊      | 830/2144 [14:36<22:07,  1.01s/it]

{'loss': 0.0533, 'learning_rate': 6.1287313432835826e-06, 'epoch': 1.55}


 39%|███▉      | 840/2144 [14:46<21:52,  1.01s/it]

{'loss': 0.2855, 'learning_rate': 6.0820895522388065e-06, 'epoch': 1.57}


 40%|███▉      | 850/2144 [14:56<21:46,  1.01s/it]

{'loss': 0.0133, 'learning_rate': 6.03544776119403e-06, 'epoch': 1.59}


 40%|████      | 860/2144 [15:06<21:29,  1.00s/it]

{'loss': 0.2338, 'learning_rate': 5.988805970149254e-06, 'epoch': 1.6}


 41%|████      | 870/2144 [15:17<21:25,  1.01s/it]

{'loss': 0.2116, 'learning_rate': 5.942164179104478e-06, 'epoch': 1.62}


 41%|████      | 880/2144 [15:27<21:13,  1.01s/it]

{'loss': 0.0792, 'learning_rate': 5.895522388059702e-06, 'epoch': 1.64}


 42%|████▏     | 890/2144 [15:37<21:06,  1.01s/it]

{'loss': 0.1205, 'learning_rate': 5.848880597014925e-06, 'epoch': 1.66}


 42%|████▏     | 900/2144 [15:47<20:53,  1.01s/it]

{'loss': 0.4455, 'learning_rate': 5.802238805970149e-06, 'epoch': 1.68}


 42%|████▏     | 910/2144 [15:57<20:46,  1.01s/it]

{'loss': 0.2545, 'learning_rate': 5.755597014925373e-06, 'epoch': 1.7}


 43%|████▎     | 920/2144 [16:07<20:36,  1.01s/it]

{'loss': 0.3098, 'learning_rate': 5.708955223880598e-06, 'epoch': 1.72}


 43%|████▎     | 930/2144 [16:17<20:29,  1.01s/it]

{'loss': 0.3462, 'learning_rate': 5.662313432835821e-06, 'epoch': 1.74}


 44%|████▍     | 940/2144 [16:27<20:13,  1.01s/it]

{'loss': 0.2454, 'learning_rate': 5.615671641791045e-06, 'epoch': 1.75}


 44%|████▍     | 950/2144 [16:37<20:00,  1.01s/it]

{'loss': 0.2873, 'learning_rate': 5.569029850746269e-06, 'epoch': 1.77}


 45%|████▍     | 960/2144 [16:47<19:55,  1.01s/it]

{'loss': 0.2434, 'learning_rate': 5.522388059701493e-06, 'epoch': 1.79}


 45%|████▌     | 970/2144 [16:57<19:43,  1.01s/it]

{'loss': 0.1626, 'learning_rate': 5.475746268656716e-06, 'epoch': 1.81}


 46%|████▌     | 980/2144 [17:08<19:35,  1.01s/it]

{'loss': 0.172, 'learning_rate': 5.429104477611941e-06, 'epoch': 1.83}


 46%|████▌     | 990/2144 [17:18<19:25,  1.01s/it]

{'loss': 0.2437, 'learning_rate': 5.382462686567165e-06, 'epoch': 1.85}


 47%|████▋     | 1000/2144 [17:28<19:13,  1.01s/it]

{'loss': 0.174, 'learning_rate': 5.335820895522389e-06, 'epoch': 1.87}


 47%|████▋     | 1010/2144 [17:38<19:04,  1.01s/it]

{'loss': 0.2417, 'learning_rate': 5.289179104477612e-06, 'epoch': 1.88}


 48%|████▊     | 1020/2144 [17:48<18:57,  1.01s/it]

{'loss': 0.155, 'learning_rate': 5.242537313432836e-06, 'epoch': 1.9}


 48%|████▊     | 1030/2144 [17:58<18:42,  1.01s/it]

{'loss': 0.0884, 'learning_rate': 5.195895522388061e-06, 'epoch': 1.92}


 49%|████▊     | 1040/2144 [18:08<18:32,  1.01s/it]

{'loss': 0.1911, 'learning_rate': 5.149253731343285e-06, 'epoch': 1.94}


 49%|████▉     | 1050/2144 [18:18<18:24,  1.01s/it]

{'loss': 0.2069, 'learning_rate': 5.102611940298508e-06, 'epoch': 1.96}


 49%|████▉     | 1060/2144 [18:28<18:13,  1.01s/it]

{'loss': 0.194, 'learning_rate': 5.055970149253732e-06, 'epoch': 1.98}


 50%|████▉     | 1070/2144 [18:38<18:06,  1.01s/it]

{'loss': 0.1839, 'learning_rate': 5.009328358208956e-06, 'epoch': 2.0}


                                                   
 50%|█████     | 1072/2144 [19:12<17:25,  1.03it/s]

{'eval_loss': 1.666427731513977, 'eval_accuracy': 0.6881118881118881, 'eval_precision': 0.7142029682504952, 'eval_recall': 0.6881118881118881, 'eval_f1': 0.6942348184278927, 'eval_runtime': 31.7213, 'eval_samples_per_second': 22.54, 'eval_steps_per_second': 2.837, 'epoch': 2.0}


 50%|█████     | 1080/2144 [19:26<34:26,  1.94s/it]  

{'loss': 0.2042, 'learning_rate': 4.9626865671641796e-06, 'epoch': 2.01}


 51%|█████     | 1090/2144 [19:36<18:10,  1.04s/it]

{'loss': 0.0123, 'learning_rate': 4.9160447761194035e-06, 'epoch': 2.03}


 51%|█████▏    | 1100/2144 [19:47<17:37,  1.01s/it]

{'loss': 0.3298, 'learning_rate': 4.8694029850746275e-06, 'epoch': 2.05}


 52%|█████▏    | 1110/2144 [19:57<17:21,  1.01s/it]

{'loss': 0.0331, 'learning_rate': 4.822761194029851e-06, 'epoch': 2.07}


 52%|█████▏    | 1120/2144 [20:07<17:15,  1.01s/it]

{'loss': 0.0461, 'learning_rate': 4.7761194029850745e-06, 'epoch': 2.09}


 53%|█████▎    | 1130/2144 [20:17<17:09,  1.02s/it]

{'loss': 0.0675, 'learning_rate': 4.729477611940299e-06, 'epoch': 2.11}


 53%|█████▎    | 1140/2144 [20:27<16:53,  1.01s/it]

{'loss': 0.1485, 'learning_rate': 4.682835820895522e-06, 'epoch': 2.13}


 54%|█████▎    | 1150/2144 [20:37<16:45,  1.01s/it]

{'loss': 0.0145, 'learning_rate': 4.636194029850747e-06, 'epoch': 2.15}


 54%|█████▍    | 1160/2144 [20:47<16:29,  1.01s/it]

{'loss': 0.0986, 'learning_rate': 4.58955223880597e-06, 'epoch': 2.16}


 55%|█████▍    | 1170/2144 [20:57<16:20,  1.01s/it]

{'loss': 0.0057, 'learning_rate': 4.542910447761194e-06, 'epoch': 2.18}


 55%|█████▌    | 1180/2144 [21:07<16:13,  1.01s/it]

{'loss': 0.1974, 'learning_rate': 4.496268656716418e-06, 'epoch': 2.2}


 56%|█████▌    | 1190/2144 [21:17<16:00,  1.01s/it]

{'loss': 0.1218, 'learning_rate': 4.449626865671642e-06, 'epoch': 2.22}


 56%|█████▌    | 1200/2144 [21:27<15:52,  1.01s/it]

{'loss': 0.0886, 'learning_rate': 4.402985074626866e-06, 'epoch': 2.24}


 56%|█████▋    | 1210/2144 [21:38<15:43,  1.01s/it]

{'loss': 0.0997, 'learning_rate': 4.35634328358209e-06, 'epoch': 2.26}


 57%|█████▋    | 1220/2144 [21:48<15:31,  1.01s/it]

{'loss': 0.0026, 'learning_rate': 4.309701492537314e-06, 'epoch': 2.28}


 57%|█████▋    | 1230/2144 [21:58<15:23,  1.01s/it]

{'loss': 0.0825, 'learning_rate': 4.263059701492538e-06, 'epoch': 2.29}


 58%|█████▊    | 1240/2144 [22:08<15:13,  1.01s/it]

{'loss': 0.0118, 'learning_rate': 4.216417910447762e-06, 'epoch': 2.31}


 58%|█████▊    | 1250/2144 [22:18<15:06,  1.01s/it]

{'loss': 0.1196, 'learning_rate': 4.169776119402986e-06, 'epoch': 2.33}


 59%|█████▉    | 1260/2144 [22:28<14:53,  1.01s/it]

{'loss': 0.0131, 'learning_rate': 4.123134328358209e-06, 'epoch': 2.35}


 59%|█████▉    | 1270/2144 [22:38<14:41,  1.01s/it]

{'loss': 0.0499, 'learning_rate': 4.076492537313434e-06, 'epoch': 2.37}


 60%|█████▉    | 1280/2144 [22:48<14:30,  1.01s/it]

{'loss': 0.216, 'learning_rate': 4.029850746268657e-06, 'epoch': 2.39}


 60%|██████    | 1290/2144 [22:58<14:20,  1.01s/it]

{'loss': 0.0134, 'learning_rate': 3.983208955223881e-06, 'epoch': 2.41}


 61%|██████    | 1300/2144 [23:08<14:12,  1.01s/it]

{'loss': 0.073, 'learning_rate': 3.936567164179105e-06, 'epoch': 2.43}


 61%|██████    | 1310/2144 [23:19<14:02,  1.01s/it]

{'loss': 0.1867, 'learning_rate': 3.889925373134329e-06, 'epoch': 2.44}


 62%|██████▏   | 1320/2144 [23:29<13:51,  1.01s/it]

{'loss': 0.036, 'learning_rate': 3.843283582089553e-06, 'epoch': 2.46}


 62%|██████▏   | 1330/2144 [23:39<13:40,  1.01s/it]

{'loss': 0.1134, 'learning_rate': 3.7966417910447766e-06, 'epoch': 2.48}


 62%|██████▎   | 1340/2144 [23:49<13:32,  1.01s/it]

{'loss': 0.2313, 'learning_rate': 3.7500000000000005e-06, 'epoch': 2.5}


 63%|██████▎   | 1350/2144 [23:59<13:20,  1.01s/it]

{'loss': 0.0013, 'learning_rate': 3.703358208955224e-06, 'epoch': 2.52}


 63%|██████▎   | 1360/2144 [24:09<13:11,  1.01s/it]

{'loss': 0.1122, 'learning_rate': 3.656716417910448e-06, 'epoch': 2.54}


 64%|██████▍   | 1370/2144 [24:19<13:02,  1.01s/it]

{'loss': 0.1329, 'learning_rate': 3.6100746268656715e-06, 'epoch': 2.56}


 64%|██████▍   | 1380/2144 [24:29<12:50,  1.01s/it]

{'loss': 0.0104, 'learning_rate': 3.563432835820896e-06, 'epoch': 2.57}


 65%|██████▍   | 1390/2144 [24:39<12:39,  1.01s/it]

{'loss': 0.0279, 'learning_rate': 3.5167910447761194e-06, 'epoch': 2.59}


 65%|██████▌   | 1400/2144 [24:49<12:30,  1.01s/it]

{'loss': 0.0986, 'learning_rate': 3.4701492537313438e-06, 'epoch': 2.61}


 66%|██████▌   | 1410/2144 [25:00<12:20,  1.01s/it]

{'loss': 0.0202, 'learning_rate': 3.4235074626865673e-06, 'epoch': 2.63}


 66%|██████▌   | 1420/2144 [25:10<12:10,  1.01s/it]

{'loss': 0.0278, 'learning_rate': 3.3768656716417913e-06, 'epoch': 2.65}


 67%|██████▋   | 1430/2144 [25:20<11:58,  1.01s/it]

{'loss': 0.1417, 'learning_rate': 3.3302238805970148e-06, 'epoch': 2.67}


 67%|██████▋   | 1440/2144 [25:30<11:48,  1.01s/it]

{'loss': 0.1718, 'learning_rate': 3.283582089552239e-06, 'epoch': 2.69}


 68%|██████▊   | 1450/2144 [25:40<11:40,  1.01s/it]

{'loss': 0.002, 'learning_rate': 3.2369402985074627e-06, 'epoch': 2.71}


 68%|██████▊   | 1460/2144 [25:50<11:30,  1.01s/it]

{'loss': 0.2015, 'learning_rate': 3.190298507462687e-06, 'epoch': 2.72}


 69%|██████▊   | 1470/2144 [26:00<11:20,  1.01s/it]

{'loss': 0.1954, 'learning_rate': 3.1436567164179106e-06, 'epoch': 2.74}


 69%|██████▉   | 1480/2144 [26:10<11:11,  1.01s/it]

{'loss': 0.0684, 'learning_rate': 3.0970149253731345e-06, 'epoch': 2.76}


 69%|██████▉   | 1490/2144 [26:20<11:01,  1.01s/it]

{'loss': 0.0748, 'learning_rate': 3.050373134328358e-06, 'epoch': 2.78}


 70%|██████▉   | 1500/2144 [26:30<10:52,  1.01s/it]

{'loss': 0.0083, 'learning_rate': 3.0037313432835824e-06, 'epoch': 2.8}


 70%|███████   | 1510/2144 [26:41<10:39,  1.01s/it]

{'loss': 0.2937, 'learning_rate': 2.957089552238806e-06, 'epoch': 2.82}


 71%|███████   | 1520/2144 [26:51<10:27,  1.01s/it]

{'loss': 0.0318, 'learning_rate': 2.9104477611940303e-06, 'epoch': 2.84}


 71%|███████▏  | 1530/2144 [27:01<10:18,  1.01s/it]

{'loss': 0.0547, 'learning_rate': 2.863805970149254e-06, 'epoch': 2.85}


 72%|███████▏  | 1540/2144 [27:11<10:10,  1.01s/it]

{'loss': 0.0021, 'learning_rate': 2.8171641791044778e-06, 'epoch': 2.87}


 72%|███████▏  | 1550/2144 [27:21<09:59,  1.01s/it]

{'loss': 0.0114, 'learning_rate': 2.7705223880597017e-06, 'epoch': 2.89}


 73%|███████▎  | 1560/2144 [27:31<09:50,  1.01s/it]

{'loss': 0.0167, 'learning_rate': 2.7238805970149257e-06, 'epoch': 2.91}


 73%|███████▎  | 1570/2144 [27:41<09:38,  1.01s/it]

{'loss': 0.0017, 'learning_rate': 2.677238805970149e-06, 'epoch': 2.93}


 74%|███████▎  | 1580/2144 [27:51<09:29,  1.01s/it]

{'loss': 0.0745, 'learning_rate': 2.6305970149253736e-06, 'epoch': 2.95}


 74%|███████▍  | 1590/2144 [28:01<09:19,  1.01s/it]

{'loss': 0.1164, 'learning_rate': 2.583955223880597e-06, 'epoch': 2.97}


 75%|███████▍  | 1600/2144 [28:11<09:08,  1.01s/it]

{'loss': 0.1318, 'learning_rate': 2.537313432835821e-06, 'epoch': 2.99}


                                                   
 75%|███████▌  | 1608/2144 [28:50<08:40,  1.03it/s]

{'eval_loss': 1.9687161445617676, 'eval_accuracy': 0.7076923076923077, 'eval_precision': 0.7108114297593787, 'eval_recall': 0.7076923076923077, 'eval_f1': 0.707765471189293, 'eval_runtime': 31.0248, 'eval_samples_per_second': 23.046, 'eval_steps_per_second': 2.901, 'epoch': 3.0}


 75%|███████▌  | 1610/2144 [28:59<1:18:29,  8.82s/it]

{'loss': 0.0019, 'learning_rate': 2.490671641791045e-06, 'epoch': 3.0}


 76%|███████▌  | 1620/2144 [29:09<10:45,  1.23s/it]  

{'loss': 0.0009, 'learning_rate': 2.444029850746269e-06, 'epoch': 3.02}


 76%|███████▌  | 1630/2144 [29:19<08:41,  1.01s/it]

{'loss': 0.0175, 'learning_rate': 2.397388059701493e-06, 'epoch': 3.04}


 76%|███████▋  | 1640/2144 [29:29<08:27,  1.01s/it]

{'loss': 0.0499, 'learning_rate': 2.350746268656717e-06, 'epoch': 3.06}


 77%|███████▋  | 1650/2144 [29:39<08:18,  1.01s/it]

{'loss': 0.059, 'learning_rate': 2.3041044776119408e-06, 'epoch': 3.08}


 77%|███████▋  | 1660/2144 [29:49<08:07,  1.01s/it]

{'loss': 0.0616, 'learning_rate': 2.2574626865671643e-06, 'epoch': 3.1}


 78%|███████▊  | 1670/2144 [29:59<07:58,  1.01s/it]

{'loss': 0.1224, 'learning_rate': 2.2108208955223883e-06, 'epoch': 3.12}


 78%|███████▊  | 1680/2144 [30:09<07:47,  1.01s/it]

{'loss': 0.0994, 'learning_rate': 2.1641791044776118e-06, 'epoch': 3.13}


 79%|███████▉  | 1690/2144 [30:19<07:37,  1.01s/it]

{'loss': 0.0812, 'learning_rate': 2.1175373134328357e-06, 'epoch': 3.15}


 79%|███████▉  | 1700/2144 [30:30<07:34,  1.02s/it]

{'loss': 0.0022, 'learning_rate': 2.0708955223880597e-06, 'epoch': 3.17}


 80%|███████▉  | 1710/2144 [30:40<07:19,  1.01s/it]

{'loss': 0.0173, 'learning_rate': 2.0242537313432836e-06, 'epoch': 3.19}


 80%|████████  | 1720/2144 [30:50<07:09,  1.01s/it]

{'loss': 0.0175, 'learning_rate': 1.9776119402985076e-06, 'epoch': 3.21}


 81%|████████  | 1730/2144 [31:00<06:58,  1.01s/it]

{'loss': 0.2023, 'learning_rate': 1.9309701492537315e-06, 'epoch': 3.23}


 81%|████████  | 1740/2144 [31:10<06:48,  1.01s/it]

{'loss': 0.0324, 'learning_rate': 1.8843283582089553e-06, 'epoch': 3.25}


 82%|████████▏ | 1750/2144 [31:20<06:39,  1.01s/it]

{'loss': 0.0161, 'learning_rate': 1.8376865671641792e-06, 'epoch': 3.26}


 82%|████████▏ | 1760/2144 [31:30<06:28,  1.01s/it]

{'loss': 0.0274, 'learning_rate': 1.791044776119403e-06, 'epoch': 3.28}


 83%|████████▎ | 1770/2144 [31:40<06:17,  1.01s/it]

{'loss': 0.0745, 'learning_rate': 1.7444029850746269e-06, 'epoch': 3.3}


 83%|████████▎ | 1780/2144 [31:50<06:07,  1.01s/it]

{'loss': 0.0127, 'learning_rate': 1.6977611940298508e-06, 'epoch': 3.32}


 83%|████████▎ | 1790/2144 [32:01<05:57,  1.01s/it]

{'loss': 0.072, 'learning_rate': 1.6511194029850746e-06, 'epoch': 3.34}


 84%|████████▍ | 1800/2144 [32:11<05:47,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 1.6044776119402985e-06, 'epoch': 3.36}


 84%|████████▍ | 1810/2144 [32:21<05:36,  1.01s/it]

{'loss': 0.0012, 'learning_rate': 1.5578358208955225e-06, 'epoch': 3.38}


 85%|████████▍ | 1820/2144 [32:31<05:27,  1.01s/it]

{'loss': 0.0478, 'learning_rate': 1.5111940298507464e-06, 'epoch': 3.4}


 85%|████████▌ | 1830/2144 [32:41<05:16,  1.01s/it]

{'loss': 0.1521, 'learning_rate': 1.4645522388059702e-06, 'epoch': 3.41}


 86%|████████▌ | 1840/2144 [32:51<05:05,  1.00s/it]

{'loss': 0.0185, 'learning_rate': 1.417910447761194e-06, 'epoch': 3.43}


 86%|████████▋ | 1850/2144 [33:01<04:55,  1.01s/it]

{'loss': 0.0045, 'learning_rate': 1.371268656716418e-06, 'epoch': 3.45}


 87%|████████▋ | 1860/2144 [33:11<04:46,  1.01s/it]

{'loss': 0.0074, 'learning_rate': 1.3246268656716418e-06, 'epoch': 3.47}


 87%|████████▋ | 1870/2144 [33:21<04:35,  1.01s/it]

{'loss': 0.0036, 'learning_rate': 1.2779850746268657e-06, 'epoch': 3.49}


 88%|████████▊ | 1880/2144 [33:31<04:25,  1.01s/it]

{'loss': 0.0803, 'learning_rate': 1.2313432835820897e-06, 'epoch': 3.51}


 88%|████████▊ | 1890/2144 [33:41<04:16,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 1.1847014925373134e-06, 'epoch': 3.53}


 89%|████████▊ | 1900/2144 [33:51<04:05,  1.01s/it]

{'loss': 0.0356, 'learning_rate': 1.1380597014925374e-06, 'epoch': 3.54}


 89%|████████▉ | 1910/2144 [34:02<03:56,  1.01s/it]

{'loss': 0.0008, 'learning_rate': 1.0914179104477613e-06, 'epoch': 3.56}


 90%|████████▉ | 1920/2144 [34:12<03:46,  1.01s/it]

{'loss': 0.1167, 'learning_rate': 1.044776119402985e-06, 'epoch': 3.58}


 90%|█████████ | 1930/2144 [34:22<03:36,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 9.98134328358209e-07, 'epoch': 3.6}


 90%|█████████ | 1940/2144 [34:32<03:26,  1.01s/it]

{'loss': 0.0313, 'learning_rate': 9.514925373134328e-07, 'epoch': 3.62}


 91%|█████████ | 1950/2144 [34:42<03:15,  1.01s/it]

{'loss': 0.0153, 'learning_rate': 9.048507462686568e-07, 'epoch': 3.64}


 91%|█████████▏| 1960/2144 [34:52<03:05,  1.01s/it]

{'loss': 0.0345, 'learning_rate': 8.582089552238806e-07, 'epoch': 3.66}


 92%|█████████▏| 1970/2144 [35:02<02:55,  1.01s/it]

{'loss': 0.0621, 'learning_rate': 8.115671641791046e-07, 'epoch': 3.68}


 92%|█████████▏| 1980/2144 [35:12<02:45,  1.01s/it]

{'loss': 0.1683, 'learning_rate': 7.649253731343284e-07, 'epoch': 3.69}


 93%|█████████▎| 1990/2144 [35:22<02:35,  1.01s/it]

{'loss': 0.1722, 'learning_rate': 7.182835820895523e-07, 'epoch': 3.71}


 93%|█████████▎| 2000/2144 [35:32<02:25,  1.01s/it]

{'loss': 0.0028, 'learning_rate': 6.716417910447762e-07, 'epoch': 3.73}


 94%|█████████▍| 2010/2144 [35:42<02:15,  1.01s/it]

{'loss': 0.0203, 'learning_rate': 6.25e-07, 'epoch': 3.75}


 94%|█████████▍| 2020/2144 [35:53<02:04,  1.01s/it]

{'loss': 0.0058, 'learning_rate': 5.783582089552239e-07, 'epoch': 3.77}


 95%|█████████▍| 2030/2144 [36:03<01:54,  1.01s/it]

{'loss': 0.0136, 'learning_rate': 5.317164179104478e-07, 'epoch': 3.79}


 95%|█████████▌| 2040/2144 [36:13<01:44,  1.01s/it]

{'loss': 0.0856, 'learning_rate': 4.850746268656717e-07, 'epoch': 3.81}


 96%|█████████▌| 2050/2144 [36:23<01:34,  1.01s/it]

{'loss': 0.017, 'learning_rate': 4.384328358208956e-07, 'epoch': 3.82}


 96%|█████████▌| 2060/2144 [36:33<01:24,  1.01s/it]

{'loss': 0.0015, 'learning_rate': 3.9179104477611947e-07, 'epoch': 3.84}


 97%|█████████▋| 2070/2144 [36:43<01:14,  1.01s/it]

{'loss': 0.0195, 'learning_rate': 3.451492537313433e-07, 'epoch': 3.86}


 97%|█████████▋| 2080/2144 [36:53<01:04,  1.01s/it]

{'loss': 0.0009, 'learning_rate': 2.9850746268656716e-07, 'epoch': 3.88}


 97%|█████████▋| 2090/2144 [37:03<00:54,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 2.5186567164179105e-07, 'epoch': 3.9}


 98%|█████████▊| 2100/2144 [37:13<00:44,  1.01s/it]

{'loss': 0.0146, 'learning_rate': 2.0522388059701495e-07, 'epoch': 3.92}


 98%|█████████▊| 2110/2144 [37:23<00:34,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 1.5858208955223882e-07, 'epoch': 3.94}


 99%|█████████▉| 2120/2144 [37:33<00:24,  1.01s/it]

{'loss': 0.003, 'learning_rate': 1.1194029850746268e-07, 'epoch': 3.96}


 99%|█████████▉| 2130/2144 [37:43<00:14,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 6.529850746268657e-08, 'epoch': 3.97}


100%|█████████▉| 2140/2144 [37:54<00:04,  1.00s/it]

{'loss': 0.0005, 'learning_rate': 1.8656716417910447e-08, 'epoch': 3.99}


                                                   
100%|██████████| 2144/2144 [38:29<00:00,  1.03it/s]

{'eval_loss': 2.011687755584717, 'eval_accuracy': 0.7062937062937062, 'eval_precision': 0.7110066882133922, 'eval_recall': 0.7062937062937062, 'eval_f1': 0.7081216535800603, 'eval_runtime': 31.3799, 'eval_samples_per_second': 22.785, 'eval_steps_per_second': 2.868, 'epoch': 4.0}


100%|██████████| 2144/2144 [38:37<00:00,  1.08s/it]


{'train_runtime': 2317.856, 'train_samples_per_second': 7.396, 'train_steps_per_second': 0.925, 'train_loss': 0.18384021638794637, 'epoch': 4.0}


100%|██████████| 90/90 [00:28<00:00,  3.11it/s]
100%|██████████| 90/90 [00:29<00:00,  3.07it/s]
100%|██████████| 90/90 [00:31<00:00,  2.89it/s]


{'accuracy': 0.7160839160839161, 'precision': 0.7260395936289163, 'recall': 0.7160839160839161, 'f1': 0.718725994813422}
{'accuracy': 0.7072829131652661, 'precision': 0.7119867872300678, 'recall': 0.7072829131652661, 'f1': 0.7076875025785141}


  0%|          | 10/2144 [00:10<36:21,  1.02s/it] 

{'loss': 0.3076, 'learning_rate': 9.953358208955226e-06, 'epoch': 0.02}


  1%|          | 20/2144 [00:21<35:35,  1.01s/it]

{'loss': 0.0943, 'learning_rate': 9.906716417910449e-06, 'epoch': 0.04}


  1%|▏         | 30/2144 [00:31<35:32,  1.01s/it]

{'loss': 0.3184, 'learning_rate': 9.860074626865672e-06, 'epoch': 0.06}


  2%|▏         | 40/2144 [00:41<35:19,  1.01s/it]

{'loss': 0.2813, 'learning_rate': 9.813432835820897e-06, 'epoch': 0.07}


  2%|▏         | 50/2144 [00:51<35:15,  1.01s/it]

{'loss': 0.2478, 'learning_rate': 9.76679104477612e-06, 'epoch': 0.09}


  3%|▎         | 60/2144 [01:01<34:56,  1.01s/it]

{'loss': 0.2976, 'learning_rate': 9.720149253731343e-06, 'epoch': 0.11}


  3%|▎         | 70/2144 [01:11<34:54,  1.01s/it]

{'loss': 0.5335, 'learning_rate': 9.673507462686568e-06, 'epoch': 0.13}


  4%|▎         | 80/2144 [01:21<34:44,  1.01s/it]

{'loss': 0.2253, 'learning_rate': 9.626865671641792e-06, 'epoch': 0.15}


  4%|▍         | 90/2144 [01:31<34:31,  1.01s/it]

{'loss': 0.3519, 'learning_rate': 9.580223880597016e-06, 'epoch': 0.17}


  5%|▍         | 100/2144 [01:41<34:25,  1.01s/it]

{'loss': 0.3443, 'learning_rate': 9.533582089552239e-06, 'epoch': 0.19}


  5%|▌         | 110/2144 [01:51<34:14,  1.01s/it]

{'loss': 0.0443, 'learning_rate': 9.486940298507463e-06, 'epoch': 0.21}


  6%|▌         | 120/2144 [02:02<34:02,  1.01s/it]

{'loss': 0.1308, 'learning_rate': 9.440298507462688e-06, 'epoch': 0.22}


  6%|▌         | 130/2144 [02:12<33:54,  1.01s/it]

{'loss': 0.2214, 'learning_rate': 9.393656716417911e-06, 'epoch': 0.24}


  7%|▋         | 140/2144 [02:22<33:41,  1.01s/it]

{'loss': 0.2568, 'learning_rate': 9.347014925373134e-06, 'epoch': 0.26}


  7%|▋         | 150/2144 [02:32<33:37,  1.01s/it]

{'loss': 0.3492, 'learning_rate': 9.30037313432836e-06, 'epoch': 0.28}


  7%|▋         | 160/2144 [02:42<33:22,  1.01s/it]

{'loss': 0.2186, 'learning_rate': 9.253731343283582e-06, 'epoch': 0.3}


  8%|▊         | 170/2144 [02:52<33:16,  1.01s/it]

{'loss': 0.0724, 'learning_rate': 9.207089552238807e-06, 'epoch': 0.32}


  8%|▊         | 180/2144 [03:02<33:06,  1.01s/it]

{'loss': 0.1561, 'learning_rate': 9.16044776119403e-06, 'epoch': 0.34}


  9%|▉         | 190/2144 [03:12<32:54,  1.01s/it]

{'loss': 0.1144, 'learning_rate': 9.113805970149255e-06, 'epoch': 0.35}


  9%|▉         | 200/2144 [03:22<32:47,  1.01s/it]

{'loss': 0.4338, 'learning_rate': 9.067164179104478e-06, 'epoch': 0.37}


 10%|▉         | 210/2144 [03:32<32:30,  1.01s/it]

{'loss': 0.2935, 'learning_rate': 9.020522388059703e-06, 'epoch': 0.39}


 10%|█         | 220/2144 [03:43<32:21,  1.01s/it]

{'loss': 0.1829, 'learning_rate': 8.973880597014926e-06, 'epoch': 0.41}


 11%|█         | 230/2144 [03:53<32:10,  1.01s/it]

{'loss': 0.1243, 'learning_rate': 8.927238805970149e-06, 'epoch': 0.43}


 11%|█         | 240/2144 [04:03<31:56,  1.01s/it]

{'loss': 0.4329, 'learning_rate': 8.880597014925374e-06, 'epoch': 0.45}


 12%|█▏        | 250/2144 [04:13<31:51,  1.01s/it]

{'loss': 0.0539, 'learning_rate': 8.833955223880599e-06, 'epoch': 0.47}


 12%|█▏        | 260/2144 [04:23<31:40,  1.01s/it]

{'loss': 0.2623, 'learning_rate': 8.787313432835822e-06, 'epoch': 0.49}


 13%|█▎        | 270/2144 [04:33<31:30,  1.01s/it]

{'loss': 0.9583, 'learning_rate': 8.740671641791045e-06, 'epoch': 0.5}


 13%|█▎        | 280/2144 [04:43<31:28,  1.01s/it]

{'loss': 0.2697, 'learning_rate': 8.69402985074627e-06, 'epoch': 0.52}


 14%|█▎        | 290/2144 [04:53<31:14,  1.01s/it]

{'loss': 0.1213, 'learning_rate': 8.647388059701494e-06, 'epoch': 0.54}


 14%|█▍        | 300/2144 [05:03<31:02,  1.01s/it]

{'loss': 0.1898, 'learning_rate': 8.600746268656716e-06, 'epoch': 0.56}


 14%|█▍        | 310/2144 [05:13<30:50,  1.01s/it]

{'loss': 0.2732, 'learning_rate': 8.55410447761194e-06, 'epoch': 0.58}


 15%|█▍        | 320/2144 [05:23<30:42,  1.01s/it]

{'loss': 0.1343, 'learning_rate': 8.507462686567165e-06, 'epoch': 0.6}


 15%|█▌        | 330/2144 [05:34<30:35,  1.01s/it]

{'loss': 0.1397, 'learning_rate': 8.460820895522389e-06, 'epoch': 0.62}


 16%|█▌        | 340/2144 [05:44<30:20,  1.01s/it]

{'loss': 0.255, 'learning_rate': 8.414179104477612e-06, 'epoch': 0.63}


 16%|█▋        | 350/2144 [05:54<30:08,  1.01s/it]

{'loss': 0.2915, 'learning_rate': 8.367537313432836e-06, 'epoch': 0.65}


 17%|█▋        | 360/2144 [06:04<30:03,  1.01s/it]

{'loss': 0.0344, 'learning_rate': 8.320895522388061e-06, 'epoch': 0.67}


 17%|█▋        | 370/2144 [06:14<29:49,  1.01s/it]

{'loss': 0.1467, 'learning_rate': 8.274253731343284e-06, 'epoch': 0.69}


 18%|█▊        | 380/2144 [06:24<29:42,  1.01s/it]

{'loss': 0.2236, 'learning_rate': 8.227611940298507e-06, 'epoch': 0.71}


 18%|█▊        | 390/2144 [06:34<29:31,  1.01s/it]

{'loss': 0.2643, 'learning_rate': 8.180970149253732e-06, 'epoch': 0.73}


 19%|█▊        | 400/2144 [06:44<29:16,  1.01s/it]

{'loss': 0.3169, 'learning_rate': 8.134328358208955e-06, 'epoch': 0.75}


 19%|█▉        | 410/2144 [06:54<29:06,  1.01s/it]

{'loss': 0.2777, 'learning_rate': 8.08768656716418e-06, 'epoch': 0.76}


 20%|█▉        | 420/2144 [07:04<29:04,  1.01s/it]

{'loss': 0.137, 'learning_rate': 8.041044776119403e-06, 'epoch': 0.78}


 20%|██        | 430/2144 [07:15<28:47,  1.01s/it]

{'loss': 0.1858, 'learning_rate': 7.994402985074628e-06, 'epoch': 0.8}


 21%|██        | 440/2144 [07:25<28:40,  1.01s/it]

{'loss': 0.1196, 'learning_rate': 7.947761194029851e-06, 'epoch': 0.82}


 21%|██        | 450/2144 [07:35<28:26,  1.01s/it]

{'loss': 0.1793, 'learning_rate': 7.901119402985076e-06, 'epoch': 0.84}


 21%|██▏       | 460/2144 [07:45<28:20,  1.01s/it]

{'loss': 0.0153, 'learning_rate': 7.854477611940299e-06, 'epoch': 0.86}


 22%|██▏       | 470/2144 [07:55<28:09,  1.01s/it]

{'loss': 0.4763, 'learning_rate': 7.807835820895522e-06, 'epoch': 0.88}


 22%|██▏       | 480/2144 [08:05<27:56,  1.01s/it]

{'loss': 0.3846, 'learning_rate': 7.761194029850747e-06, 'epoch': 0.9}


 23%|██▎       | 490/2144 [08:15<27:49,  1.01s/it]

{'loss': 0.3106, 'learning_rate': 7.714552238805972e-06, 'epoch': 0.91}


 23%|██▎       | 500/2144 [08:25<27:42,  1.01s/it]

{'loss': 0.2919, 'learning_rate': 7.667910447761195e-06, 'epoch': 0.93}


 24%|██▍       | 510/2144 [08:35<27:29,  1.01s/it]

{'loss': 0.3352, 'learning_rate': 7.621268656716419e-06, 'epoch': 0.95}


 24%|██▍       | 520/2144 [08:45<27:21,  1.01s/it]

{'loss': 0.29, 'learning_rate': 7.574626865671643e-06, 'epoch': 0.97}


 25%|██▍       | 530/2144 [08:56<27:11,  1.01s/it]

{'loss': 0.3135, 'learning_rate': 7.527985074626867e-06, 'epoch': 0.99}


                                                  
 25%|██▌       | 536/2144 [09:33<26:03,  1.03it/s]

{'eval_loss': 1.4429943561553955, 'eval_accuracy': 0.7020979020979021, 'eval_precision': 0.7057426189770213, 'eval_recall': 0.7020979020979021, 'eval_f1': 0.7019692239508026, 'eval_runtime': 31.3199, 'eval_samples_per_second': 22.829, 'eval_steps_per_second': 2.874, 'epoch': 1.0}


 25%|██▌       | 540/2144 [09:43<2:10:21,  4.88s/it]

{'loss': 0.4007, 'learning_rate': 7.48134328358209e-06, 'epoch': 1.01}


 26%|██▌       | 550/2144 [09:53<29:36,  1.11s/it]  

{'loss': 0.1263, 'learning_rate': 7.434701492537314e-06, 'epoch': 1.03}


 26%|██▌       | 560/2144 [10:03<26:45,  1.01s/it]

{'loss': 0.2062, 'learning_rate': 7.3880597014925385e-06, 'epoch': 1.04}


 27%|██▋       | 570/2144 [10:13<26:25,  1.01s/it]

{'loss': 0.0763, 'learning_rate': 7.3414179104477625e-06, 'epoch': 1.06}


 27%|██▋       | 580/2144 [10:23<26:16,  1.01s/it]

{'loss': 0.2939, 'learning_rate': 7.2947761194029856e-06, 'epoch': 1.08}


 28%|██▊       | 590/2144 [10:34<26:05,  1.01s/it]

{'loss': 0.2062, 'learning_rate': 7.2481343283582095e-06, 'epoch': 1.1}


 28%|██▊       | 600/2144 [10:44<25:56,  1.01s/it]

{'loss': 0.0801, 'learning_rate': 7.2014925373134335e-06, 'epoch': 1.12}


 28%|██▊       | 610/2144 [10:54<25:50,  1.01s/it]

{'loss': 0.193, 'learning_rate': 7.154850746268658e-06, 'epoch': 1.14}


 29%|██▉       | 620/2144 [11:04<25:32,  1.01s/it]

{'loss': 0.0526, 'learning_rate': 7.1082089552238805e-06, 'epoch': 1.16}


 29%|██▉       | 630/2144 [11:14<25:28,  1.01s/it]

{'loss': 0.2104, 'learning_rate': 7.061567164179105e-06, 'epoch': 1.18}


 30%|██▉       | 640/2144 [11:24<25:15,  1.01s/it]

{'loss': 0.0079, 'learning_rate': 7.014925373134329e-06, 'epoch': 1.19}


 30%|███       | 650/2144 [11:34<25:10,  1.01s/it]

{'loss': 0.0616, 'learning_rate': 6.968283582089553e-06, 'epoch': 1.21}


 31%|███       | 660/2144 [11:44<25:01,  1.01s/it]

{'loss': 0.1404, 'learning_rate': 6.921641791044776e-06, 'epoch': 1.23}


 31%|███▏      | 670/2144 [11:54<24:51,  1.01s/it]

{'loss': 0.0776, 'learning_rate': 6.875e-06, 'epoch': 1.25}


 32%|███▏      | 680/2144 [12:04<24:39,  1.01s/it]

{'loss': 0.099, 'learning_rate': 6.828358208955225e-06, 'epoch': 1.27}


 32%|███▏      | 690/2144 [12:15<24:27,  1.01s/it]

{'loss': 0.1265, 'learning_rate': 6.781716417910448e-06, 'epoch': 1.29}


 33%|███▎      | 700/2144 [12:25<24:17,  1.01s/it]

{'loss': 0.4831, 'learning_rate': 6.735074626865672e-06, 'epoch': 1.31}


 33%|███▎      | 710/2144 [12:35<23:58,  1.00s/it]

{'loss': 0.0521, 'learning_rate': 6.688432835820896e-06, 'epoch': 1.32}


 34%|███▎      | 720/2144 [12:45<23:58,  1.01s/it]

{'loss': 0.0984, 'learning_rate': 6.64179104477612e-06, 'epoch': 1.34}


 34%|███▍      | 730/2144 [12:55<23:47,  1.01s/it]

{'loss': 0.1496, 'learning_rate': 6.595149253731343e-06, 'epoch': 1.36}


 35%|███▍      | 740/2144 [13:05<23:34,  1.01s/it]

{'loss': 0.0953, 'learning_rate': 6.548507462686567e-06, 'epoch': 1.38}


 35%|███▍      | 750/2144 [13:15<23:27,  1.01s/it]

{'loss': 0.1301, 'learning_rate': 6.501865671641792e-06, 'epoch': 1.4}


 35%|███▌      | 760/2144 [13:25<23:15,  1.01s/it]

{'loss': 0.0787, 'learning_rate': 6.455223880597016e-06, 'epoch': 1.42}


 36%|███▌      | 770/2144 [13:35<23:04,  1.01s/it]

{'loss': 0.2893, 'learning_rate': 6.408582089552239e-06, 'epoch': 1.44}


 36%|███▋      | 780/2144 [13:45<22:53,  1.01s/it]

{'loss': 0.2073, 'learning_rate': 6.361940298507463e-06, 'epoch': 1.46}


 37%|███▋      | 790/2144 [13:55<22:46,  1.01s/it]

{'loss': 0.1098, 'learning_rate': 6.315298507462687e-06, 'epoch': 1.47}


 37%|███▋      | 800/2144 [14:05<22:35,  1.01s/it]

{'loss': 0.1256, 'learning_rate': 6.2686567164179116e-06, 'epoch': 1.49}


 38%|███▊      | 810/2144 [14:16<22:25,  1.01s/it]

{'loss': 0.1486, 'learning_rate': 6.222014925373135e-06, 'epoch': 1.51}


 38%|███▊      | 820/2144 [14:26<22:13,  1.01s/it]

{'loss': 0.1599, 'learning_rate': 6.175373134328359e-06, 'epoch': 1.53}


 39%|███▊      | 830/2144 [14:36<22:07,  1.01s/it]

{'loss': 0.0862, 'learning_rate': 6.1287313432835826e-06, 'epoch': 1.55}


 39%|███▉      | 840/2144 [14:46<21:54,  1.01s/it]

{'loss': 0.161, 'learning_rate': 6.0820895522388065e-06, 'epoch': 1.57}


 40%|███▉      | 850/2144 [14:56<21:47,  1.01s/it]

{'loss': 0.1052, 'learning_rate': 6.03544776119403e-06, 'epoch': 1.59}


 40%|████      | 860/2144 [15:06<21:35,  1.01s/it]

{'loss': 0.1171, 'learning_rate': 5.988805970149254e-06, 'epoch': 1.6}


 41%|████      | 870/2144 [15:16<21:24,  1.01s/it]

{'loss': 0.0586, 'learning_rate': 5.942164179104478e-06, 'epoch': 1.62}


 41%|████      | 880/2144 [15:26<21:14,  1.01s/it]

{'loss': 0.0146, 'learning_rate': 5.895522388059702e-06, 'epoch': 1.64}


 42%|████▏     | 890/2144 [15:36<21:06,  1.01s/it]

{'loss': 0.0509, 'learning_rate': 5.848880597014925e-06, 'epoch': 1.66}


 42%|████▏     | 900/2144 [15:46<20:53,  1.01s/it]

{'loss': 0.1697, 'learning_rate': 5.802238805970149e-06, 'epoch': 1.68}


 42%|████▏     | 910/2144 [15:56<20:43,  1.01s/it]

{'loss': 0.0631, 'learning_rate': 5.755597014925373e-06, 'epoch': 1.7}


 43%|████▎     | 920/2144 [16:07<20:32,  1.01s/it]

{'loss': 0.0921, 'learning_rate': 5.708955223880598e-06, 'epoch': 1.72}


 43%|████▎     | 930/2144 [16:17<20:23,  1.01s/it]

{'loss': 0.3937, 'learning_rate': 5.662313432835821e-06, 'epoch': 1.74}


 44%|████▍     | 940/2144 [16:27<20:13,  1.01s/it]

{'loss': 0.1506, 'learning_rate': 5.615671641791045e-06, 'epoch': 1.75}


 44%|████▍     | 950/2144 [16:37<20:04,  1.01s/it]

{'loss': 0.2236, 'learning_rate': 5.569029850746269e-06, 'epoch': 1.77}


 45%|████▍     | 960/2144 [16:47<19:55,  1.01s/it]

{'loss': 0.264, 'learning_rate': 5.522388059701493e-06, 'epoch': 1.79}


 45%|████▌     | 970/2144 [16:57<19:47,  1.01s/it]

{'loss': 0.1025, 'learning_rate': 5.475746268656716e-06, 'epoch': 1.81}


 46%|████▌     | 980/2144 [17:07<19:33,  1.01s/it]

{'loss': 0.1257, 'learning_rate': 5.429104477611941e-06, 'epoch': 1.83}


 46%|████▌     | 990/2144 [17:17<19:22,  1.01s/it]

{'loss': 0.1516, 'learning_rate': 5.382462686567165e-06, 'epoch': 1.85}


 47%|████▋     | 1000/2144 [17:27<19:15,  1.01s/it]

{'loss': 0.0702, 'learning_rate': 5.335820895522389e-06, 'epoch': 1.87}


 47%|████▋     | 1010/2144 [17:37<19:03,  1.01s/it]

{'loss': 0.0047, 'learning_rate': 5.289179104477612e-06, 'epoch': 1.88}


 48%|████▊     | 1020/2144 [17:47<18:57,  1.01s/it]

{'loss': 0.1727, 'learning_rate': 5.242537313432836e-06, 'epoch': 1.9}


 48%|████▊     | 1030/2144 [17:58<18:44,  1.01s/it]

{'loss': 0.0315, 'learning_rate': 5.195895522388061e-06, 'epoch': 1.92}


 49%|████▊     | 1040/2144 [18:08<18:33,  1.01s/it]

{'loss': 0.0161, 'learning_rate': 5.149253731343285e-06, 'epoch': 1.94}


 49%|████▉     | 1050/2144 [18:18<18:21,  1.01s/it]

{'loss': 0.065, 'learning_rate': 5.102611940298508e-06, 'epoch': 1.96}


 49%|████▉     | 1060/2144 [18:28<18:11,  1.01s/it]

{'loss': 0.195, 'learning_rate': 5.055970149253732e-06, 'epoch': 1.98}


 50%|████▉     | 1070/2144 [18:38<18:01,  1.01s/it]

{'loss': 0.0744, 'learning_rate': 5.009328358208956e-06, 'epoch': 2.0}


                                                   
 50%|█████     | 1072/2144 [19:11<17:20,  1.03it/s]

{'eval_loss': 1.904199242591858, 'eval_accuracy': 0.6937062937062937, 'eval_precision': 0.7118460997923746, 'eval_recall': 0.6937062937062937, 'eval_f1': 0.6977717518344926, 'eval_runtime': 31.3812, 'eval_samples_per_second': 22.784, 'eval_steps_per_second': 2.868, 'epoch': 2.0}


 50%|█████     | 1080/2144 [19:25<34:16,  1.93s/it]  

{'loss': 0.0632, 'learning_rate': 4.9626865671641796e-06, 'epoch': 2.01}


 51%|█████     | 1090/2144 [19:35<18:08,  1.03s/it]

{'loss': 0.084, 'learning_rate': 4.9160447761194035e-06, 'epoch': 2.03}


 51%|█████▏    | 1100/2144 [19:45<17:35,  1.01s/it]

{'loss': 0.2092, 'learning_rate': 4.8694029850746275e-06, 'epoch': 2.05}


 52%|█████▏    | 1110/2144 [19:56<17:26,  1.01s/it]

{'loss': 0.0806, 'learning_rate': 4.822761194029851e-06, 'epoch': 2.07}


 52%|█████▏    | 1120/2144 [20:06<17:09,  1.01s/it]

{'loss': 0.0031, 'learning_rate': 4.7761194029850745e-06, 'epoch': 2.09}


 53%|█████▎    | 1130/2144 [20:16<17:03,  1.01s/it]

{'loss': 0.0015, 'learning_rate': 4.729477611940299e-06, 'epoch': 2.11}


 53%|█████▎    | 1140/2144 [20:26<16:54,  1.01s/it]

{'loss': 0.0024, 'learning_rate': 4.682835820895522e-06, 'epoch': 2.13}


 54%|█████▎    | 1150/2144 [20:36<16:43,  1.01s/it]

{'loss': 0.001, 'learning_rate': 4.636194029850747e-06, 'epoch': 2.15}


 54%|█████▍    | 1160/2144 [20:46<16:33,  1.01s/it]

{'loss': 0.0481, 'learning_rate': 4.58955223880597e-06, 'epoch': 2.16}


 55%|█████▍    | 1170/2144 [20:56<16:21,  1.01s/it]

{'loss': 0.0324, 'learning_rate': 4.542910447761194e-06, 'epoch': 2.18}


 55%|█████▌    | 1180/2144 [21:06<16:11,  1.01s/it]

{'loss': 0.1222, 'learning_rate': 4.496268656716418e-06, 'epoch': 2.2}


 56%|█████▌    | 1190/2144 [21:16<16:01,  1.01s/it]

{'loss': 0.0353, 'learning_rate': 4.449626865671642e-06, 'epoch': 2.22}


 56%|█████▌    | 1200/2144 [21:26<15:54,  1.01s/it]

{'loss': 0.018, 'learning_rate': 4.402985074626866e-06, 'epoch': 2.24}


 56%|█████▋    | 1210/2144 [21:36<15:38,  1.01s/it]

{'loss': 0.0431, 'learning_rate': 4.35634328358209e-06, 'epoch': 2.26}


 57%|█████▋    | 1220/2144 [21:46<15:33,  1.01s/it]

{'loss': 0.0721, 'learning_rate': 4.309701492537314e-06, 'epoch': 2.28}


 57%|█████▋    | 1230/2144 [21:56<15:20,  1.01s/it]

{'loss': 0.0111, 'learning_rate': 4.263059701492538e-06, 'epoch': 2.29}


 58%|█████▊    | 1240/2144 [22:07<15:12,  1.01s/it]

{'loss': 0.0043, 'learning_rate': 4.216417910447762e-06, 'epoch': 2.31}


 58%|█████▊    | 1250/2144 [22:17<15:01,  1.01s/it]

{'loss': 0.2218, 'learning_rate': 4.169776119402986e-06, 'epoch': 2.33}


 59%|█████▉    | 1260/2144 [22:27<14:52,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 4.123134328358209e-06, 'epoch': 2.35}


 59%|█████▉    | 1270/2144 [22:37<14:42,  1.01s/it]

{'loss': 0.0077, 'learning_rate': 4.076492537313434e-06, 'epoch': 2.37}


 60%|█████▉    | 1280/2144 [22:47<14:31,  1.01s/it]

{'loss': 0.175, 'learning_rate': 4.029850746268657e-06, 'epoch': 2.39}


 60%|██████    | 1290/2144 [22:57<14:20,  1.01s/it]

{'loss': 0.0023, 'learning_rate': 3.983208955223881e-06, 'epoch': 2.41}


 61%|██████    | 1300/2144 [23:07<14:15,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 3.936567164179105e-06, 'epoch': 2.43}


 61%|██████    | 1310/2144 [23:17<13:59,  1.01s/it]

{'loss': 0.0686, 'learning_rate': 3.889925373134329e-06, 'epoch': 2.44}


 62%|██████▏   | 1320/2144 [23:27<13:49,  1.01s/it]

{'loss': 0.0575, 'learning_rate': 3.843283582089553e-06, 'epoch': 2.46}


 62%|██████▏   | 1330/2144 [23:37<13:40,  1.01s/it]

{'loss': 0.0305, 'learning_rate': 3.7966417910447766e-06, 'epoch': 2.48}


 62%|██████▎   | 1340/2144 [23:48<13:29,  1.01s/it]

{'loss': 0.005, 'learning_rate': 3.7500000000000005e-06, 'epoch': 2.5}


 63%|██████▎   | 1350/2144 [23:58<13:22,  1.01s/it]

{'loss': 0.0022, 'learning_rate': 3.703358208955224e-06, 'epoch': 2.52}


 63%|██████▎   | 1360/2144 [24:08<13:09,  1.01s/it]

{'loss': 0.0691, 'learning_rate': 3.656716417910448e-06, 'epoch': 2.54}


 64%|██████▍   | 1370/2144 [24:18<13:01,  1.01s/it]

{'loss': 0.2378, 'learning_rate': 3.6100746268656715e-06, 'epoch': 2.56}


 64%|██████▍   | 1380/2144 [24:28<12:50,  1.01s/it]

{'loss': 0.0257, 'learning_rate': 3.563432835820896e-06, 'epoch': 2.57}


 65%|██████▍   | 1390/2144 [24:38<12:43,  1.01s/it]

{'loss': 0.0008, 'learning_rate': 3.5167910447761194e-06, 'epoch': 2.59}


 65%|██████▌   | 1400/2144 [24:48<12:29,  1.01s/it]

{'loss': 0.0976, 'learning_rate': 3.4701492537313438e-06, 'epoch': 2.61}


 66%|██████▌   | 1410/2144 [24:58<12:19,  1.01s/it]

{'loss': 0.1017, 'learning_rate': 3.4235074626865673e-06, 'epoch': 2.63}


 66%|██████▌   | 1420/2144 [25:08<12:12,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 3.3768656716417913e-06, 'epoch': 2.65}


 67%|██████▋   | 1430/2144 [25:18<12:01,  1.01s/it]

{'loss': 0.012, 'learning_rate': 3.3302238805970148e-06, 'epoch': 2.67}


 67%|██████▋   | 1440/2144 [25:28<11:48,  1.01s/it]

{'loss': 0.1132, 'learning_rate': 3.283582089552239e-06, 'epoch': 2.69}


 68%|██████▊   | 1450/2144 [25:38<11:37,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 3.2369402985074627e-06, 'epoch': 2.71}


 68%|██████▊   | 1460/2144 [25:49<11:29,  1.01s/it]

{'loss': 0.032, 'learning_rate': 3.190298507462687e-06, 'epoch': 2.72}


 69%|██████▊   | 1470/2144 [25:59<11:20,  1.01s/it]

{'loss': 0.2811, 'learning_rate': 3.1436567164179106e-06, 'epoch': 2.74}


 69%|██████▉   | 1480/2144 [26:09<11:09,  1.01s/it]

{'loss': 0.0639, 'learning_rate': 3.0970149253731345e-06, 'epoch': 2.76}


 69%|██████▉   | 1490/2144 [26:19<10:57,  1.01s/it]

{'loss': 0.0708, 'learning_rate': 3.050373134328358e-06, 'epoch': 2.78}


 70%|██████▉   | 1500/2144 [26:29<10:51,  1.01s/it]

{'loss': 0.001, 'learning_rate': 3.0037313432835824e-06, 'epoch': 2.8}


 70%|███████   | 1510/2144 [26:39<10:40,  1.01s/it]

{'loss': 0.2185, 'learning_rate': 2.957089552238806e-06, 'epoch': 2.82}


 71%|███████   | 1520/2144 [26:49<10:30,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 2.9104477611940303e-06, 'epoch': 2.84}


 71%|███████▏  | 1530/2144 [26:59<10:19,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 2.863805970149254e-06, 'epoch': 2.85}


 72%|███████▏  | 1540/2144 [27:09<10:09,  1.01s/it]

{'loss': 0.0028, 'learning_rate': 2.8171641791044778e-06, 'epoch': 2.87}


 72%|███████▏  | 1550/2144 [27:19<09:58,  1.01s/it]

{'loss': 0.0009, 'learning_rate': 2.7705223880597017e-06, 'epoch': 2.89}


 73%|███████▎  | 1560/2144 [27:29<09:49,  1.01s/it]

{'loss': 0.0408, 'learning_rate': 2.7238805970149257e-06, 'epoch': 2.91}


 73%|███████▎  | 1570/2144 [27:40<09:38,  1.01s/it]

{'loss': 0.008, 'learning_rate': 2.677238805970149e-06, 'epoch': 2.93}


 74%|███████▎  | 1580/2144 [27:50<09:25,  1.00s/it]

{'loss': 0.0431, 'learning_rate': 2.6305970149253736e-06, 'epoch': 2.95}


 74%|███████▍  | 1590/2144 [28:00<09:18,  1.01s/it]

{'loss': 0.0916, 'learning_rate': 2.583955223880597e-06, 'epoch': 2.97}


 75%|███████▍  | 1600/2144 [28:10<09:09,  1.01s/it]

{'loss': 0.1426, 'learning_rate': 2.537313432835821e-06, 'epoch': 2.99}


                                                   
 75%|███████▌  | 1608/2144 [28:49<08:37,  1.04it/s]

{'eval_loss': 2.025635004043579, 'eval_accuracy': 0.6993006993006993, 'eval_precision': 0.7103280156330554, 'eval_recall': 0.6993006993006993, 'eval_f1': 0.7025780466624972, 'eval_runtime': 31.0353, 'eval_samples_per_second': 23.038, 'eval_steps_per_second': 2.9, 'epoch': 3.0}


 75%|███████▌  | 1610/2144 [28:57<1:18:40,  8.84s/it]

{'loss': 0.0048, 'learning_rate': 2.490671641791045e-06, 'epoch': 3.0}


 76%|███████▌  | 1620/2144 [29:07<10:46,  1.23s/it]  

{'loss': 0.0653, 'learning_rate': 2.444029850746269e-06, 'epoch': 3.02}


 76%|███████▌  | 1630/2144 [29:17<08:41,  1.01s/it]

{'loss': 0.0021, 'learning_rate': 2.397388059701493e-06, 'epoch': 3.04}


 76%|███████▋  | 1640/2144 [29:27<08:29,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 2.350746268656717e-06, 'epoch': 3.06}


 77%|███████▋  | 1650/2144 [29:38<08:17,  1.01s/it]

{'loss': 0.0617, 'learning_rate': 2.3041044776119408e-06, 'epoch': 3.08}


 77%|███████▋  | 1660/2144 [29:48<08:07,  1.01s/it]

{'loss': 0.0018, 'learning_rate': 2.2574626865671643e-06, 'epoch': 3.1}


 78%|███████▊  | 1670/2144 [29:58<07:57,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 2.2108208955223883e-06, 'epoch': 3.12}


 78%|███████▊  | 1680/2144 [30:08<07:48,  1.01s/it]

{'loss': 0.103, 'learning_rate': 2.1641791044776118e-06, 'epoch': 3.13}


 79%|███████▉  | 1690/2144 [30:18<07:37,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 2.1175373134328357e-06, 'epoch': 3.15}


 79%|███████▉  | 1700/2144 [30:28<07:26,  1.01s/it]

{'loss': 0.081, 'learning_rate': 2.0708955223880597e-06, 'epoch': 3.17}


 80%|███████▉  | 1710/2144 [30:38<07:18,  1.01s/it]

{'loss': 0.014, 'learning_rate': 2.0242537313432836e-06, 'epoch': 3.19}


 80%|████████  | 1720/2144 [30:48<07:09,  1.01s/it]

{'loss': 0.0507, 'learning_rate': 1.9776119402985076e-06, 'epoch': 3.21}


 81%|████████  | 1730/2144 [30:58<06:58,  1.01s/it]

{'loss': 0.1818, 'learning_rate': 1.9309701492537315e-06, 'epoch': 3.23}


 81%|████████  | 1740/2144 [31:08<06:47,  1.01s/it]

{'loss': 0.01, 'learning_rate': 1.8843283582089553e-06, 'epoch': 3.25}


 82%|████████▏ | 1750/2144 [31:18<06:37,  1.01s/it]

{'loss': 0.0088, 'learning_rate': 1.8376865671641792e-06, 'epoch': 3.26}


 82%|████████▏ | 1760/2144 [31:29<06:27,  1.01s/it]

{'loss': 0.0372, 'learning_rate': 1.791044776119403e-06, 'epoch': 3.28}


 83%|████████▎ | 1770/2144 [31:39<06:17,  1.01s/it]

{'loss': 0.0067, 'learning_rate': 1.7444029850746269e-06, 'epoch': 3.3}


 83%|████████▎ | 1780/2144 [31:49<06:06,  1.01s/it]

{'loss': 0.0015, 'learning_rate': 1.6977611940298508e-06, 'epoch': 3.32}


 83%|████████▎ | 1790/2144 [31:59<05:58,  1.01s/it]

{'loss': 0.0491, 'learning_rate': 1.6511194029850746e-06, 'epoch': 3.34}


 84%|████████▍ | 1800/2144 [32:09<05:47,  1.01s/it]

{'loss': 0.0012, 'learning_rate': 1.6044776119402985e-06, 'epoch': 3.36}


 84%|████████▍ | 1810/2144 [32:19<05:38,  1.01s/it]

{'loss': 0.0916, 'learning_rate': 1.5578358208955225e-06, 'epoch': 3.38}


 85%|████████▍ | 1820/2144 [32:29<05:27,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 1.5111940298507464e-06, 'epoch': 3.4}


 85%|████████▌ | 1830/2144 [32:39<05:16,  1.01s/it]

{'loss': 0.055, 'learning_rate': 1.4645522388059702e-06, 'epoch': 3.41}


 86%|████████▌ | 1840/2144 [32:49<05:06,  1.01s/it]

{'loss': 0.0008, 'learning_rate': 1.417910447761194e-06, 'epoch': 3.43}


 86%|████████▋ | 1850/2144 [32:59<04:56,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 1.371268656716418e-06, 'epoch': 3.45}


 87%|████████▋ | 1860/2144 [33:09<04:47,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 1.3246268656716418e-06, 'epoch': 3.47}


 87%|████████▋ | 1870/2144 [33:19<04:36,  1.01s/it]

{'loss': 0.0009, 'learning_rate': 1.2779850746268657e-06, 'epoch': 3.49}


 88%|████████▊ | 1880/2144 [33:30<04:29,  1.02s/it]

{'loss': 0.0703, 'learning_rate': 1.2313432835820897e-06, 'epoch': 3.51}


 88%|████████▊ | 1890/2144 [33:40<04:15,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 1.1847014925373134e-06, 'epoch': 3.53}


 89%|████████▊ | 1900/2144 [33:50<04:08,  1.02s/it]

{'loss': 0.0005, 'learning_rate': 1.1380597014925374e-06, 'epoch': 3.54}


 89%|████████▉ | 1910/2144 [34:00<03:54,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 1.0914179104477613e-06, 'epoch': 3.56}


 90%|████████▉ | 1920/2144 [34:10<03:45,  1.01s/it]

{'loss': 0.1039, 'learning_rate': 1.044776119402985e-06, 'epoch': 3.58}


 90%|█████████ | 1930/2144 [34:20<03:33,  1.00it/s]

{'loss': 0.0006, 'learning_rate': 9.98134328358209e-07, 'epoch': 3.6}


 90%|█████████ | 1940/2144 [34:30<03:24,  1.00s/it]

{'loss': 0.0005, 'learning_rate': 9.514925373134328e-07, 'epoch': 3.62}


 91%|█████████ | 1950/2144 [34:40<03:13,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 9.048507462686568e-07, 'epoch': 3.64}


 91%|█████████▏| 1960/2144 [34:50<03:03,  1.00it/s]

{'loss': 0.0017, 'learning_rate': 8.582089552238806e-07, 'epoch': 3.66}


 92%|█████████▏| 1970/2144 [35:00<02:54,  1.00s/it]

{'loss': 0.1053, 'learning_rate': 8.115671641791046e-07, 'epoch': 3.68}


 92%|█████████▏| 1980/2144 [35:10<02:49,  1.03s/it]

{'loss': 0.218, 'learning_rate': 7.649253731343284e-07, 'epoch': 3.69}


 93%|█████████▎| 1990/2144 [35:20<02:33,  1.00it/s]

{'loss': 0.0311, 'learning_rate': 7.182835820895523e-07, 'epoch': 3.71}


 93%|█████████▎| 2000/2144 [35:30<02:25,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 6.716417910447762e-07, 'epoch': 3.73}


 94%|█████████▍| 2010/2144 [35:40<02:13,  1.01it/s]

{'loss': 0.0197, 'learning_rate': 6.25e-07, 'epoch': 3.75}


 94%|█████████▍| 2020/2144 [35:50<02:09,  1.04s/it]

{'loss': 0.0008, 'learning_rate': 5.783582089552239e-07, 'epoch': 3.77}


 95%|█████████▍| 2030/2144 [36:00<01:54,  1.01s/it]

{'loss': 0.0041, 'learning_rate': 5.317164179104478e-07, 'epoch': 3.79}


 95%|█████████▌| 2040/2144 [36:10<01:44,  1.00s/it]

{'loss': 0.0826, 'learning_rate': 4.850746268656717e-07, 'epoch': 3.81}


 96%|█████████▌| 2050/2144 [36:20<01:33,  1.00it/s]

{'loss': 0.0183, 'learning_rate': 4.384328358208956e-07, 'epoch': 3.82}


 96%|█████████▌| 2060/2144 [36:30<01:23,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 3.9179104477611947e-07, 'epoch': 3.84}


 97%|█████████▋| 2070/2144 [36:40<01:14,  1.00s/it]

{'loss': 0.0183, 'learning_rate': 3.451492537313433e-07, 'epoch': 3.86}


 97%|█████████▋| 2080/2144 [36:50<01:04,  1.00s/it]

{'loss': 0.0009, 'learning_rate': 2.9850746268656716e-07, 'epoch': 3.88}


 97%|█████████▋| 2090/2144 [37:00<00:53,  1.00it/s]

{'loss': 0.0008, 'learning_rate': 2.5186567164179105e-07, 'epoch': 3.9}


 98%|█████████▊| 2100/2144 [37:10<00:43,  1.01it/s]

{'loss': 0.0956, 'learning_rate': 2.0522388059701495e-07, 'epoch': 3.92}


 98%|█████████▊| 2110/2144 [37:20<00:33,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 1.5858208955223882e-07, 'epoch': 3.94}


 99%|█████████▉| 2120/2144 [37:30<00:23,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 1.1194029850746268e-07, 'epoch': 3.96}


 99%|█████████▉| 2130/2144 [37:40<00:14,  1.00s/it]

{'loss': 0.0016, 'learning_rate': 6.529850746268657e-08, 'epoch': 3.97}


100%|█████████▉| 2140/2144 [37:50<00:03,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 1.8656716417910447e-08, 'epoch': 3.99}


                                                   
100%|██████████| 2144/2144 [38:25<00:00,  1.03it/s]

{'eval_loss': 2.0851545333862305, 'eval_accuracy': 0.7020979020979021, 'eval_precision': 0.7067038676702759, 'eval_recall': 0.7020979020979021, 'eval_f1': 0.7036665666990211, 'eval_runtime': 30.8782, 'eval_samples_per_second': 23.156, 'eval_steps_per_second': 2.915, 'epoch': 4.0}


100%|██████████| 2144/2144 [38:34<00:00,  1.08s/it]


{'train_runtime': 2314.0115, 'train_samples_per_second': 7.409, 'train_steps_per_second': 0.927, 'train_loss': 0.1184883113571924, 'epoch': 4.0}


100%|██████████| 90/90 [00:30<00:00,  2.95it/s]
100%|██████████| 90/90 [00:28<00:00,  3.19it/s]
100%|██████████| 90/90 [00:30<00:00,  2.97it/s]


{'accuracy': 0.7020979020979021, 'precision': 0.7057426189770213, 'recall': 0.7020979020979021, 'f1': 0.7019692239508026}
{'accuracy': 0.6904761904761905, 'precision': 0.6930159999867652, 'recall': 0.6904761904761905, 'f1': 0.6895264894512362}


  0%|          | 10/2144 [00:10<36:01,  1.01s/it] 

{'loss': 0.0918, 'learning_rate': 9.953358208955226e-06, 'epoch': 0.02}


  1%|          | 20/2144 [00:20<35:21,  1.00it/s]

{'loss': 0.01, 'learning_rate': 9.906716417910449e-06, 'epoch': 0.04}


  1%|▏         | 30/2144 [00:30<35:02,  1.01it/s]

{'loss': 0.3795, 'learning_rate': 9.860074626865672e-06, 'epoch': 0.06}


  2%|▏         | 40/2144 [00:40<34:56,  1.00it/s]

{'loss': 0.3644, 'learning_rate': 9.813432835820897e-06, 'epoch': 0.07}


  2%|▏         | 50/2144 [00:50<34:45,  1.00it/s]

{'loss': 0.1222, 'learning_rate': 9.76679104477612e-06, 'epoch': 0.09}


  3%|▎         | 60/2144 [01:00<34:39,  1.00it/s]

{'loss': 0.0861, 'learning_rate': 9.720149253731343e-06, 'epoch': 0.11}


  3%|▎         | 70/2144 [01:10<34:13,  1.01it/s]

{'loss': 0.2043, 'learning_rate': 9.673507462686568e-06, 'epoch': 0.13}


  4%|▎         | 80/2144 [01:20<34:09,  1.01it/s]

{'loss': 0.1041, 'learning_rate': 9.626865671641792e-06, 'epoch': 0.15}


  4%|▍         | 90/2144 [01:30<33:57,  1.01it/s]

{'loss': 0.1469, 'learning_rate': 9.580223880597016e-06, 'epoch': 0.17}


  5%|▍         | 100/2144 [01:40<34:00,  1.00it/s]

{'loss': 0.336, 'learning_rate': 9.533582089552239e-06, 'epoch': 0.19}


  5%|▌         | 110/2144 [01:50<33:41,  1.01it/s]

{'loss': 0.0905, 'learning_rate': 9.486940298507463e-06, 'epoch': 0.21}


  6%|▌         | 120/2144 [02:00<33:35,  1.00it/s]

{'loss': 0.09, 'learning_rate': 9.440298507462688e-06, 'epoch': 0.22}


  6%|▌         | 130/2144 [02:10<33:17,  1.01it/s]

{'loss': 0.3254, 'learning_rate': 9.393656716417911e-06, 'epoch': 0.24}


  7%|▋         | 140/2144 [02:20<33:12,  1.01it/s]

{'loss': 0.2547, 'learning_rate': 9.347014925373134e-06, 'epoch': 0.26}


  7%|▋         | 150/2144 [02:30<33:06,  1.00it/s]

{'loss': 0.1775, 'learning_rate': 9.30037313432836e-06, 'epoch': 0.28}


  7%|▋         | 160/2144 [02:40<33:01,  1.00it/s]

{'loss': 0.1261, 'learning_rate': 9.253731343283582e-06, 'epoch': 0.3}


  8%|▊         | 170/2144 [02:49<32:50,  1.00it/s]

{'loss': 0.0712, 'learning_rate': 9.207089552238807e-06, 'epoch': 0.32}


  8%|▊         | 180/2144 [02:59<32:33,  1.01it/s]

{'loss': 0.2382, 'learning_rate': 9.16044776119403e-06, 'epoch': 0.34}


  9%|▉         | 190/2144 [03:09<32:20,  1.01it/s]

{'loss': 0.1791, 'learning_rate': 9.113805970149255e-06, 'epoch': 0.35}


  9%|▉         | 200/2144 [03:19<32:13,  1.01it/s]

{'loss': 0.2649, 'learning_rate': 9.067164179104478e-06, 'epoch': 0.37}


 10%|▉         | 210/2144 [03:29<32:14,  1.00s/it]

{'loss': 0.3029, 'learning_rate': 9.020522388059703e-06, 'epoch': 0.39}


 10%|█         | 220/2144 [03:39<31:58,  1.00it/s]

{'loss': 0.111, 'learning_rate': 8.973880597014926e-06, 'epoch': 0.41}


 11%|█         | 230/2144 [03:49<31:57,  1.00s/it]

{'loss': 0.1255, 'learning_rate': 8.927238805970149e-06, 'epoch': 0.43}


 11%|█         | 240/2144 [03:59<31:33,  1.01it/s]

{'loss': 0.2449, 'learning_rate': 8.880597014925374e-06, 'epoch': 0.45}


 12%|█▏        | 250/2144 [04:09<31:15,  1.01it/s]

{'loss': 0.0709, 'learning_rate': 8.833955223880599e-06, 'epoch': 0.47}


 12%|█▏        | 260/2144 [04:19<31:14,  1.01it/s]

{'loss': 0.2859, 'learning_rate': 8.787313432835822e-06, 'epoch': 0.49}


 13%|█▎        | 270/2144 [04:29<31:08,  1.00it/s]

{'loss': 0.6222, 'learning_rate': 8.740671641791045e-06, 'epoch': 0.5}


 13%|█▎        | 280/2144 [04:39<31:00,  1.00it/s]

{'loss': 0.0832, 'learning_rate': 8.69402985074627e-06, 'epoch': 0.52}


 14%|█▎        | 290/2144 [04:49<30:48,  1.00it/s]

{'loss': 0.1025, 'learning_rate': 8.647388059701494e-06, 'epoch': 0.54}


 14%|█▍        | 300/2144 [04:59<30:38,  1.00it/s]

{'loss': 0.25, 'learning_rate': 8.600746268656716e-06, 'epoch': 0.56}


 14%|█▍        | 310/2144 [05:09<30:26,  1.00it/s]

{'loss': 0.4346, 'learning_rate': 8.55410447761194e-06, 'epoch': 0.58}


 15%|█▍        | 320/2144 [05:19<30:22,  1.00it/s]

{'loss': 0.081, 'learning_rate': 8.507462686567165e-06, 'epoch': 0.6}


 15%|█▌        | 330/2144 [05:29<30:12,  1.00it/s]

{'loss': 0.0737, 'learning_rate': 8.460820895522389e-06, 'epoch': 0.62}


 16%|█▌        | 340/2144 [05:39<30:02,  1.00it/s]

{'loss': 0.1536, 'learning_rate': 8.414179104477612e-06, 'epoch': 0.63}


 16%|█▋        | 350/2144 [05:49<29:50,  1.00it/s]

{'loss': 0.3791, 'learning_rate': 8.367537313432836e-06, 'epoch': 0.65}


 17%|█▋        | 360/2144 [05:59<29:35,  1.00it/s]

{'loss': 0.0598, 'learning_rate': 8.320895522388061e-06, 'epoch': 0.67}


 17%|█▋        | 370/2144 [06:09<29:25,  1.01it/s]

{'loss': 0.002, 'learning_rate': 8.274253731343284e-06, 'epoch': 0.69}


 18%|█▊        | 380/2144 [06:19<29:21,  1.00it/s]

{'loss': 0.0441, 'learning_rate': 8.227611940298507e-06, 'epoch': 0.71}


 18%|█▊        | 390/2144 [06:29<29:00,  1.01it/s]

{'loss': 0.2005, 'learning_rate': 8.180970149253732e-06, 'epoch': 0.73}


 19%|█▊        | 400/2144 [06:39<29:05,  1.00s/it]

{'loss': 0.1549, 'learning_rate': 8.134328358208955e-06, 'epoch': 0.75}


 19%|█▉        | 410/2144 [06:49<29:00,  1.00s/it]

{'loss': 0.1862, 'learning_rate': 8.08768656716418e-06, 'epoch': 0.76}


 20%|█▉        | 420/2144 [06:59<28:49,  1.00s/it]

{'loss': 0.0087, 'learning_rate': 8.041044776119403e-06, 'epoch': 0.78}


 20%|██        | 430/2144 [07:09<28:19,  1.01it/s]

{'loss': 0.2491, 'learning_rate': 7.994402985074628e-06, 'epoch': 0.8}


 21%|██        | 440/2144 [07:19<28:27,  1.00s/it]

{'loss': 0.1539, 'learning_rate': 7.947761194029851e-06, 'epoch': 0.82}


 21%|██        | 450/2144 [07:29<28:12,  1.00it/s]

{'loss': 0.1085, 'learning_rate': 7.901119402985076e-06, 'epoch': 0.84}


 21%|██▏       | 460/2144 [07:39<28:05,  1.00s/it]

{'loss': 0.1032, 'learning_rate': 7.854477611940299e-06, 'epoch': 0.86}


 22%|██▏       | 470/2144 [07:49<27:59,  1.00s/it]

{'loss': 0.4005, 'learning_rate': 7.807835820895522e-06, 'epoch': 0.88}


 22%|██▏       | 480/2144 [07:59<27:32,  1.01it/s]

{'loss': 0.3491, 'learning_rate': 7.761194029850747e-06, 'epoch': 0.9}


 23%|██▎       | 490/2144 [08:09<27:15,  1.01it/s]

{'loss': 0.2869, 'learning_rate': 7.714552238805972e-06, 'epoch': 0.91}


 23%|██▎       | 500/2144 [08:19<27:23,  1.00it/s]

{'loss': 0.1287, 'learning_rate': 7.667910447761195e-06, 'epoch': 0.93}


 24%|██▍       | 510/2144 [08:29<27:08,  1.00it/s]

{'loss': 0.0865, 'learning_rate': 7.621268656716419e-06, 'epoch': 0.95}


 24%|██▍       | 520/2144 [08:39<27:02,  1.00it/s]

{'loss': 0.2123, 'learning_rate': 7.574626865671643e-06, 'epoch': 0.97}


 25%|██▍       | 530/2144 [08:48<26:43,  1.01it/s]

{'loss': 0.2083, 'learning_rate': 7.527985074626867e-06, 'epoch': 0.99}


                                                  
 25%|██▌       | 536/2144 [09:24<25:47,  1.04it/s]

{'eval_loss': 1.8239643573760986, 'eval_accuracy': 0.7188811188811188, 'eval_precision': 0.7151165620098818, 'eval_recall': 0.7188811188811188, 'eval_f1': 0.716101308484136, 'eval_runtime': 29.9696, 'eval_samples_per_second': 23.857, 'eval_steps_per_second': 3.003, 'epoch': 1.0}


 25%|██▌       | 540/2144 [09:35<2:06:46,  4.74s/it]

{'loss': 0.3223, 'learning_rate': 7.48134328358209e-06, 'epoch': 1.01}


 26%|██▌       | 550/2144 [09:45<29:16,  1.10s/it]  

{'loss': 0.0962, 'learning_rate': 7.434701492537314e-06, 'epoch': 1.03}


 26%|██▌       | 560/2144 [09:55<26:15,  1.01it/s]

{'loss': 0.0221, 'learning_rate': 7.3880597014925385e-06, 'epoch': 1.04}


 27%|██▋       | 570/2144 [10:05<26:09,  1.00it/s]

{'loss': 0.0894, 'learning_rate': 7.3414179104477625e-06, 'epoch': 1.06}


 27%|██▋       | 580/2144 [10:15<26:03,  1.00it/s]

{'loss': 0.2715, 'learning_rate': 7.2947761194029856e-06, 'epoch': 1.08}


 28%|██▊       | 590/2144 [10:25<26:03,  1.01s/it]

{'loss': 0.0022, 'learning_rate': 7.2481343283582095e-06, 'epoch': 1.1}


 28%|██▊       | 600/2144 [10:35<25:55,  1.01s/it]

{'loss': 0.0517, 'learning_rate': 7.2014925373134335e-06, 'epoch': 1.12}


 28%|██▊       | 610/2144 [10:45<25:32,  1.00it/s]

{'loss': 0.2033, 'learning_rate': 7.154850746268658e-06, 'epoch': 1.14}


 29%|██▉       | 620/2144 [10:55<25:24,  1.00s/it]

{'loss': 0.001, 'learning_rate': 7.1082089552238805e-06, 'epoch': 1.16}


 29%|██▉       | 630/2144 [11:05<25:09,  1.00it/s]

{'loss': 0.2727, 'learning_rate': 7.061567164179105e-06, 'epoch': 1.18}


 30%|██▉       | 640/2144 [11:15<24:59,  1.00it/s]

{'loss': 0.0602, 'learning_rate': 7.014925373134329e-06, 'epoch': 1.19}


 30%|███       | 650/2144 [11:25<24:52,  1.00it/s]

{'loss': 0.167, 'learning_rate': 6.968283582089553e-06, 'epoch': 1.21}


 31%|███       | 660/2144 [11:35<24:38,  1.00it/s]

{'loss': 0.0791, 'learning_rate': 6.921641791044776e-06, 'epoch': 1.23}


 31%|███▏      | 670/2144 [11:45<24:20,  1.01it/s]

{'loss': 0.0021, 'learning_rate': 6.875e-06, 'epoch': 1.25}


 32%|███▏      | 680/2144 [11:55<24:21,  1.00it/s]

{'loss': 0.1071, 'learning_rate': 6.828358208955225e-06, 'epoch': 1.27}


 32%|███▏      | 690/2144 [12:04<24:05,  1.01it/s]

{'loss': 0.1775, 'learning_rate': 6.781716417910448e-06, 'epoch': 1.29}


 33%|███▎      | 700/2144 [12:14<23:59,  1.00it/s]

{'loss': 0.2792, 'learning_rate': 6.735074626865672e-06, 'epoch': 1.31}


 33%|███▎      | 710/2144 [12:25<23:57,  1.00s/it]

{'loss': 0.0656, 'learning_rate': 6.688432835820896e-06, 'epoch': 1.32}


 34%|███▎      | 720/2144 [12:34<23:33,  1.01it/s]

{'loss': 0.0689, 'learning_rate': 6.64179104477612e-06, 'epoch': 1.34}


 34%|███▍      | 730/2144 [12:44<23:33,  1.00it/s]

{'loss': 0.0018, 'learning_rate': 6.595149253731343e-06, 'epoch': 1.36}


 35%|███▍      | 740/2144 [12:54<23:17,  1.00it/s]

{'loss': 0.0007, 'learning_rate': 6.548507462686567e-06, 'epoch': 1.38}


 35%|███▍      | 750/2144 [13:04<23:02,  1.01it/s]

{'loss': 0.0011, 'learning_rate': 6.501865671641792e-06, 'epoch': 1.4}


 35%|███▌      | 760/2144 [13:14<22:58,  1.00it/s]

{'loss': 0.1018, 'learning_rate': 6.455223880597016e-06, 'epoch': 1.42}


 36%|███▌      | 770/2144 [13:24<22:42,  1.01it/s]

{'loss': 0.1265, 'learning_rate': 6.408582089552239e-06, 'epoch': 1.44}


 36%|███▋      | 780/2144 [13:34<22:34,  1.01it/s]

{'loss': 0.1967, 'learning_rate': 6.361940298507463e-06, 'epoch': 1.46}


 37%|███▋      | 790/2144 [13:44<22:27,  1.01it/s]

{'loss': 0.1786, 'learning_rate': 6.315298507462687e-06, 'epoch': 1.47}


 37%|███▋      | 800/2144 [13:54<22:19,  1.00it/s]

{'loss': 0.0273, 'learning_rate': 6.2686567164179116e-06, 'epoch': 1.49}


 38%|███▊      | 810/2144 [14:04<22:01,  1.01it/s]

{'loss': 0.0357, 'learning_rate': 6.222014925373135e-06, 'epoch': 1.51}


 38%|███▊      | 820/2144 [14:14<21:56,  1.01it/s]

{'loss': 0.0163, 'learning_rate': 6.175373134328359e-06, 'epoch': 1.53}


 39%|███▊      | 830/2144 [14:24<21:48,  1.00it/s]

{'loss': 0.1694, 'learning_rate': 6.1287313432835826e-06, 'epoch': 1.55}


 39%|███▉      | 840/2144 [14:34<21:38,  1.00it/s]

{'loss': 0.1783, 'learning_rate': 6.0820895522388065e-06, 'epoch': 1.57}


 40%|███▉      | 850/2144 [14:44<21:32,  1.00it/s]

{'loss': 0.1035, 'learning_rate': 6.03544776119403e-06, 'epoch': 1.59}


 40%|████      | 860/2144 [14:54<21:09,  1.01it/s]

{'loss': 0.0646, 'learning_rate': 5.988805970149254e-06, 'epoch': 1.6}


 41%|████      | 870/2144 [15:04<21:10,  1.00it/s]

{'loss': 0.205, 'learning_rate': 5.942164179104478e-06, 'epoch': 1.62}


 41%|████      | 880/2144 [15:14<21:10,  1.01s/it]

{'loss': 0.0904, 'learning_rate': 5.895522388059702e-06, 'epoch': 1.64}


 42%|████▏     | 890/2144 [15:24<21:05,  1.01s/it]

{'loss': 0.0012, 'learning_rate': 5.848880597014925e-06, 'epoch': 1.66}


 42%|████▏     | 900/2144 [15:34<20:46,  1.00s/it]

{'loss': 0.1135, 'learning_rate': 5.802238805970149e-06, 'epoch': 1.68}


 42%|████▏     | 910/2144 [15:44<20:36,  1.00s/it]

{'loss': 0.0069, 'learning_rate': 5.755597014925373e-06, 'epoch': 1.7}


 43%|████▎     | 920/2144 [15:54<20:26,  1.00s/it]

{'loss': 0.124, 'learning_rate': 5.708955223880598e-06, 'epoch': 1.72}


 43%|████▎     | 930/2144 [16:04<20:18,  1.00s/it]

{'loss': 0.0947, 'learning_rate': 5.662313432835821e-06, 'epoch': 1.74}


 44%|████▍     | 940/2144 [16:14<19:59,  1.00it/s]

{'loss': 0.0802, 'learning_rate': 5.615671641791045e-06, 'epoch': 1.75}


 44%|████▍     | 950/2144 [16:24<19:50,  1.00it/s]

{'loss': 0.1375, 'learning_rate': 5.569029850746269e-06, 'epoch': 1.77}


 45%|████▍     | 960/2144 [16:34<19:45,  1.00s/it]

{'loss': 0.2646, 'learning_rate': 5.522388059701493e-06, 'epoch': 1.79}


 45%|████▌     | 970/2144 [16:44<19:34,  1.00s/it]

{'loss': 0.2183, 'learning_rate': 5.475746268656716e-06, 'epoch': 1.81}


 46%|████▌     | 980/2144 [16:54<19:25,  1.00s/it]

{'loss': 0.132, 'learning_rate': 5.429104477611941e-06, 'epoch': 1.83}


 46%|████▌     | 990/2144 [17:04<19:11,  1.00it/s]

{'loss': 0.1256, 'learning_rate': 5.382462686567165e-06, 'epoch': 1.85}


 47%|████▋     | 1000/2144 [17:14<19:02,  1.00it/s]

{'loss': 0.0226, 'learning_rate': 5.335820895522389e-06, 'epoch': 1.87}


 47%|████▋     | 1010/2144 [17:24<18:57,  1.00s/it]

{'loss': 0.2474, 'learning_rate': 5.289179104477612e-06, 'epoch': 1.88}


 48%|████▊     | 1020/2144 [17:34<18:49,  1.00s/it]

{'loss': 0.024, 'learning_rate': 5.242537313432836e-06, 'epoch': 1.9}


 48%|████▊     | 1030/2144 [17:44<18:34,  1.00s/it]

{'loss': 0.002, 'learning_rate': 5.195895522388061e-06, 'epoch': 1.92}


 49%|████▊     | 1040/2144 [17:54<18:27,  1.00s/it]

{'loss': 0.0368, 'learning_rate': 5.149253731343285e-06, 'epoch': 1.94}


 49%|████▉     | 1050/2144 [18:04<18:14,  1.00s/it]

{'loss': 0.1037, 'learning_rate': 5.102611940298508e-06, 'epoch': 1.96}


 49%|████▉     | 1060/2144 [18:14<17:56,  1.01it/s]

{'loss': 0.0015, 'learning_rate': 5.055970149253732e-06, 'epoch': 1.98}


 50%|████▉     | 1070/2144 [18:24<17:56,  1.00s/it]

{'loss': 0.0006, 'learning_rate': 5.009328358208956e-06, 'epoch': 2.0}


                                                   
 50%|█████     | 1072/2144 [18:57<17:12,  1.04it/s]

{'eval_loss': 2.0783636569976807, 'eval_accuracy': 0.7062937062937062, 'eval_precision': 0.7180381081795931, 'eval_recall': 0.7062937062937062, 'eval_f1': 0.7088896930948695, 'eval_runtime': 30.7314, 'eval_samples_per_second': 23.266, 'eval_steps_per_second': 2.929, 'epoch': 2.0}


 50%|█████     | 1080/2144 [19:11<34:04,  1.92s/it]  

{'loss': 0.09, 'learning_rate': 4.9626865671641796e-06, 'epoch': 2.01}


 51%|█████     | 1090/2144 [19:21<18:01,  1.03s/it]

{'loss': 0.1946, 'learning_rate': 4.9160447761194035e-06, 'epoch': 2.03}


 51%|█████▏    | 1100/2144 [19:31<17:20,  1.00it/s]

{'loss': 0.0797, 'learning_rate': 4.8694029850746275e-06, 'epoch': 2.05}


 52%|█████▏    | 1110/2144 [19:41<17:15,  1.00s/it]

{'loss': 0.002, 'learning_rate': 4.822761194029851e-06, 'epoch': 2.07}


 52%|█████▏    | 1120/2144 [19:51<16:58,  1.01it/s]

{'loss': 0.0008, 'learning_rate': 4.7761194029850745e-06, 'epoch': 2.09}


 53%|█████▎    | 1130/2144 [20:01<16:50,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 4.729477611940299e-06, 'epoch': 2.11}


 53%|█████▎    | 1140/2144 [20:11<16:40,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 4.682835820895522e-06, 'epoch': 2.13}


 54%|█████▎    | 1150/2144 [20:21<16:35,  1.00s/it]

{'loss': 0.0057, 'learning_rate': 4.636194029850747e-06, 'epoch': 2.15}


 54%|█████▍    | 1160/2144 [20:31<16:22,  1.00it/s]

{'loss': 0.0409, 'learning_rate': 4.58955223880597e-06, 'epoch': 2.16}


 55%|█████▍    | 1170/2144 [20:41<16:09,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 4.542910447761194e-06, 'epoch': 2.18}


 55%|█████▌    | 1180/2144 [20:51<16:04,  1.00s/it]

{'loss': 0.0671, 'learning_rate': 4.496268656716418e-06, 'epoch': 2.2}


 56%|█████▌    | 1190/2144 [21:01<15:52,  1.00it/s]

{'loss': 0.0819, 'learning_rate': 4.449626865671642e-06, 'epoch': 2.22}


 56%|█████▌    | 1200/2144 [21:11<15:45,  1.00s/it]

{'loss': 0.0011, 'learning_rate': 4.402985074626866e-06, 'epoch': 2.24}


 56%|█████▋    | 1210/2144 [21:21<15:31,  1.00it/s]

{'loss': 0.0038, 'learning_rate': 4.35634328358209e-06, 'epoch': 2.26}


 57%|█████▋    | 1220/2144 [21:31<15:23,  1.00it/s]

{'loss': 0.0007, 'learning_rate': 4.309701492537314e-06, 'epoch': 2.28}


 57%|█████▋    | 1230/2144 [21:41<15:10,  1.00it/s]

{'loss': 0.004, 'learning_rate': 4.263059701492538e-06, 'epoch': 2.29}


 58%|█████▊    | 1240/2144 [21:51<15:01,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 4.216417910447762e-06, 'epoch': 2.31}


 58%|█████▊    | 1250/2144 [22:01<14:54,  1.00s/it]

{'loss': 0.1134, 'learning_rate': 4.169776119402986e-06, 'epoch': 2.33}


 59%|█████▉    | 1260/2144 [22:11<14:46,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 4.123134328358209e-06, 'epoch': 2.35}


 59%|█████▉    | 1270/2144 [22:21<14:29,  1.01it/s]

{'loss': 0.0835, 'learning_rate': 4.076492537313434e-06, 'epoch': 2.37}


 60%|█████▉    | 1280/2144 [22:31<14:25,  1.00s/it]

{'loss': 0.1216, 'learning_rate': 4.029850746268657e-06, 'epoch': 2.39}


 60%|██████    | 1290/2144 [22:41<14:13,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 3.983208955223881e-06, 'epoch': 2.41}


 61%|██████    | 1300/2144 [22:51<14:02,  1.00it/s]

{'loss': 0.0032, 'learning_rate': 3.936567164179105e-06, 'epoch': 2.43}


 61%|██████    | 1310/2144 [23:00<13:52,  1.00it/s]

{'loss': 0.0176, 'learning_rate': 3.889925373134329e-06, 'epoch': 2.44}


 62%|██████▏   | 1320/2144 [23:10<13:44,  1.00s/it]

{'loss': 0.0014, 'learning_rate': 3.843283582089553e-06, 'epoch': 2.46}


 62%|██████▏   | 1330/2144 [23:20<13:32,  1.00it/s]

{'loss': 0.1628, 'learning_rate': 3.7966417910447766e-06, 'epoch': 2.48}


 62%|██████▎   | 1340/2144 [23:30<13:22,  1.00it/s]

{'loss': 0.056, 'learning_rate': 3.7500000000000005e-06, 'epoch': 2.5}


 63%|██████▎   | 1350/2144 [23:40<13:11,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 3.703358208955224e-06, 'epoch': 2.52}


 63%|██████▎   | 1360/2144 [23:50<13:05,  1.00s/it]

{'loss': 0.0017, 'learning_rate': 3.656716417910448e-06, 'epoch': 2.54}


 64%|██████▍   | 1370/2144 [24:00<12:49,  1.01it/s]

{'loss': 0.0019, 'learning_rate': 3.6100746268656715e-06, 'epoch': 2.56}


 64%|██████▍   | 1380/2144 [24:10<12:41,  1.00it/s]

{'loss': 0.0343, 'learning_rate': 3.563432835820896e-06, 'epoch': 2.57}


 65%|██████▍   | 1390/2144 [24:20<12:32,  1.00it/s]

{'loss': 0.0008, 'learning_rate': 3.5167910447761194e-06, 'epoch': 2.59}


 65%|██████▌   | 1400/2144 [24:30<12:25,  1.00s/it]

{'loss': 0.1156, 'learning_rate': 3.4701492537313438e-06, 'epoch': 2.61}


 66%|██████▌   | 1410/2144 [24:40<12:12,  1.00it/s]

{'loss': 0.0936, 'learning_rate': 3.4235074626865673e-06, 'epoch': 2.63}


 66%|██████▌   | 1420/2144 [24:50<12:02,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 3.3768656716417913e-06, 'epoch': 2.65}


 67%|██████▋   | 1430/2144 [25:00<11:53,  1.00it/s]

{'loss': 0.0868, 'learning_rate': 3.3302238805970148e-06, 'epoch': 2.67}


 67%|██████▋   | 1440/2144 [25:10<11:43,  1.00it/s]

{'loss': 0.0881, 'learning_rate': 3.283582089552239e-06, 'epoch': 2.69}


 68%|██████▊   | 1450/2144 [25:20<11:35,  1.00s/it]

{'loss': 0.0027, 'learning_rate': 3.2369402985074627e-06, 'epoch': 2.71}


 68%|██████▊   | 1460/2144 [25:30<11:23,  1.00it/s]

{'loss': 0.0015, 'learning_rate': 3.190298507462687e-06, 'epoch': 2.72}


 69%|██████▊   | 1470/2144 [25:40<11:15,  1.00s/it]

{'loss': 0.0695, 'learning_rate': 3.1436567164179106e-06, 'epoch': 2.74}


 69%|██████▉   | 1480/2144 [25:50<11:03,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 3.0970149253731345e-06, 'epoch': 2.76}


 69%|██████▉   | 1490/2144 [26:00<10:57,  1.01s/it]

{'loss': 0.046, 'learning_rate': 3.050373134328358e-06, 'epoch': 2.78}


 70%|██████▉   | 1500/2144 [26:10<10:47,  1.00s/it]

{'loss': 0.0005, 'learning_rate': 3.0037313432835824e-06, 'epoch': 2.8}


 70%|███████   | 1510/2144 [26:20<10:33,  1.00it/s]

{'loss': 0.167, 'learning_rate': 2.957089552238806e-06, 'epoch': 2.82}


 71%|███████   | 1520/2144 [26:30<10:25,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 2.9104477611940303e-06, 'epoch': 2.84}


 71%|███████▏  | 1530/2144 [26:40<10:10,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 2.863805970149254e-06, 'epoch': 2.85}


 72%|███████▏  | 1540/2144 [26:50<10:01,  1.00it/s]

{'loss': 0.0475, 'learning_rate': 2.8171641791044778e-06, 'epoch': 2.87}


 72%|███████▏  | 1550/2144 [27:00<09:49,  1.01it/s]

{'loss': 0.0007, 'learning_rate': 2.7705223880597017e-06, 'epoch': 2.89}


 73%|███████▎  | 1560/2144 [27:10<09:43,  1.00it/s]

{'loss': 0.0068, 'learning_rate': 2.7238805970149257e-06, 'epoch': 2.91}


 73%|███████▎  | 1570/2144 [27:20<09:32,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 2.677238805970149e-06, 'epoch': 2.93}


 74%|███████▎  | 1580/2144 [27:30<09:23,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 2.6305970149253736e-06, 'epoch': 2.95}


 74%|███████▍  | 1590/2144 [27:40<09:14,  1.00s/it]

{'loss': 0.0608, 'learning_rate': 2.583955223880597e-06, 'epoch': 2.97}


 75%|███████▍  | 1600/2144 [27:50<09:00,  1.01it/s]

{'loss': 0.0005, 'learning_rate': 2.537313432835821e-06, 'epoch': 2.99}


                                                   
 75%|███████▌  | 1608/2144 [28:28<08:35,  1.04it/s]

{'eval_loss': 2.3281548023223877, 'eval_accuracy': 0.6965034965034965, 'eval_precision': 0.7202917956205733, 'eval_recall': 0.6965034965034965, 'eval_f1': 0.700643432573498, 'eval_runtime': 29.8784, 'eval_samples_per_second': 23.93, 'eval_steps_per_second': 3.012, 'epoch': 3.0}


 75%|███████▌  | 1610/2144 [28:36<1:16:28,  8.59s/it]

{'loss': 0.0422, 'learning_rate': 2.490671641791045e-06, 'epoch': 3.0}


 76%|███████▌  | 1620/2144 [28:46<10:41,  1.22s/it]  

{'loss': 0.0005, 'learning_rate': 2.444029850746269e-06, 'epoch': 3.02}


 76%|███████▌  | 1630/2144 [28:56<08:37,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 2.397388059701493e-06, 'epoch': 3.04}


 76%|███████▋  | 1640/2144 [29:06<08:24,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 2.350746268656717e-06, 'epoch': 3.06}


 77%|███████▋  | 1650/2144 [29:16<08:11,  1.00it/s]

{'loss': 0.0251, 'learning_rate': 2.3041044776119408e-06, 'epoch': 3.08}


 77%|███████▋  | 1660/2144 [29:26<08:01,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 2.2574626865671643e-06, 'epoch': 3.1}


 78%|███████▊  | 1670/2144 [29:36<07:53,  1.00it/s]

{'loss': 0.0066, 'learning_rate': 2.2108208955223883e-06, 'epoch': 3.12}


 78%|███████▊  | 1680/2144 [29:46<07:42,  1.00it/s]

{'loss': 0.107, 'learning_rate': 2.1641791044776118e-06, 'epoch': 3.13}


 79%|███████▉  | 1690/2144 [29:56<07:33,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 2.1175373134328357e-06, 'epoch': 3.15}


 79%|███████▉  | 1700/2144 [30:06<07:19,  1.01it/s]

{'loss': 0.0006, 'learning_rate': 2.0708955223880597e-06, 'epoch': 3.17}


 80%|███████▉  | 1710/2144 [30:16<07:13,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 2.0242537313432836e-06, 'epoch': 3.19}


 80%|████████  | 1720/2144 [30:26<07:03,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 1.9776119402985076e-06, 'epoch': 3.21}


 81%|████████  | 1730/2144 [30:36<06:51,  1.01it/s]

{'loss': 0.0268, 'learning_rate': 1.9309701492537315e-06, 'epoch': 3.23}


 81%|████████  | 1740/2144 [30:46<06:42,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 1.8843283582089553e-06, 'epoch': 3.25}


 82%|████████▏ | 1750/2144 [30:56<06:33,  1.00it/s]

{'loss': 0.0994, 'learning_rate': 1.8376865671641792e-06, 'epoch': 3.26}


 82%|████████▏ | 1760/2144 [31:06<06:20,  1.01it/s]

{'loss': 0.0262, 'learning_rate': 1.791044776119403e-06, 'epoch': 3.28}


 83%|████████▎ | 1770/2144 [31:16<06:15,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 1.7444029850746269e-06, 'epoch': 3.3}


 83%|████████▎ | 1780/2144 [31:26<06:03,  1.00it/s]

{'loss': 0.0354, 'learning_rate': 1.6977611940298508e-06, 'epoch': 3.32}


 83%|████████▎ | 1790/2144 [31:36<05:54,  1.00s/it]

{'loss': 0.0442, 'learning_rate': 1.6511194029850746e-06, 'epoch': 3.34}


 84%|████████▍ | 1800/2144 [31:46<05:43,  1.00it/s]

{'loss': 0.0007, 'learning_rate': 1.6044776119402985e-06, 'epoch': 3.36}


 84%|████████▍ | 1810/2144 [31:56<05:32,  1.00it/s]

{'loss': 0.0557, 'learning_rate': 1.5578358208955225e-06, 'epoch': 3.38}


 85%|████████▍ | 1820/2144 [32:06<05:24,  1.00s/it]

{'loss': 0.0005, 'learning_rate': 1.5111940298507464e-06, 'epoch': 3.4}


 85%|████████▌ | 1830/2144 [32:16<05:13,  1.00it/s]

{'loss': 0.0033, 'learning_rate': 1.4645522388059702e-06, 'epoch': 3.41}


 86%|████████▌ | 1840/2144 [32:26<05:03,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 1.417910447761194e-06, 'epoch': 3.43}


 86%|████████▋ | 1850/2144 [32:36<04:53,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 1.371268656716418e-06, 'epoch': 3.45}


 87%|████████▋ | 1860/2144 [32:46<04:45,  1.00s/it]

{'loss': 0.001, 'learning_rate': 1.3246268656716418e-06, 'epoch': 3.47}


 87%|████████▋ | 1870/2144 [32:56<04:33,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 1.2779850746268657e-06, 'epoch': 3.49}


 88%|████████▊ | 1880/2144 [33:06<04:21,  1.01it/s]

{'loss': 0.0139, 'learning_rate': 1.2313432835820897e-06, 'epoch': 3.51}


 88%|████████▊ | 1890/2144 [33:16<04:13,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 1.1847014925373134e-06, 'epoch': 3.53}


 89%|████████▊ | 1900/2144 [33:26<04:02,  1.01it/s]

{'loss': 0.0003, 'learning_rate': 1.1380597014925374e-06, 'epoch': 3.54}


 89%|████████▉ | 1910/2144 [33:36<03:53,  1.00it/s]

{'loss': 0.0028, 'learning_rate': 1.0914179104477613e-06, 'epoch': 3.56}


 90%|████████▉ | 1920/2144 [33:46<03:43,  1.00it/s]

{'loss': 0.1077, 'learning_rate': 1.044776119402985e-06, 'epoch': 3.58}


 90%|█████████ | 1930/2144 [33:56<03:32,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 9.98134328358209e-07, 'epoch': 3.6}


 90%|█████████ | 1940/2144 [34:06<03:24,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 9.514925373134328e-07, 'epoch': 3.62}


 91%|█████████ | 1950/2144 [34:16<03:12,  1.01it/s]

{'loss': 0.0003, 'learning_rate': 9.048507462686568e-07, 'epoch': 3.64}


 91%|█████████▏| 1960/2144 [34:26<03:05,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 8.582089552238806e-07, 'epoch': 3.66}


 92%|█████████▏| 1970/2144 [34:36<02:54,  1.00s/it]

{'loss': 0.0022, 'learning_rate': 8.115671641791046e-07, 'epoch': 3.68}


 92%|█████████▏| 1980/2144 [34:46<02:43,  1.01it/s]

{'loss': 0.0031, 'learning_rate': 7.649253731343284e-07, 'epoch': 3.69}


 93%|█████████▎| 1990/2144 [34:56<02:34,  1.01s/it]

{'loss': 0.0025, 'learning_rate': 7.182835820895523e-07, 'epoch': 3.71}


 93%|█████████▎| 2000/2144 [35:06<02:23,  1.01it/s]

{'loss': 0.0021, 'learning_rate': 6.716417910447762e-07, 'epoch': 3.73}


 94%|█████████▍| 2010/2144 [35:16<02:13,  1.01it/s]

{'loss': 0.0013, 'learning_rate': 6.25e-07, 'epoch': 3.75}


 94%|█████████▍| 2020/2144 [35:25<02:03,  1.00it/s]

{'loss': 0.0206, 'learning_rate': 5.783582089552239e-07, 'epoch': 3.77}


 95%|█████████▍| 2030/2144 [35:35<01:54,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 5.317164179104478e-07, 'epoch': 3.79}


 95%|█████████▌| 2040/2144 [35:45<01:43,  1.00it/s]

{'loss': 0.0006, 'learning_rate': 4.850746268656717e-07, 'epoch': 3.81}


 96%|█████████▌| 2050/2144 [35:55<01:33,  1.00it/s]

{'loss': 0.0064, 'learning_rate': 4.384328358208956e-07, 'epoch': 3.82}


 96%|█████████▌| 2060/2144 [36:05<01:23,  1.00it/s]

{'loss': 0.0904, 'learning_rate': 3.9179104477611947e-07, 'epoch': 3.84}


 97%|█████████▋| 2070/2144 [36:15<01:13,  1.00it/s]

{'loss': 0.0009, 'learning_rate': 3.451492537313433e-07, 'epoch': 3.86}


 97%|█████████▋| 2080/2144 [36:25<01:03,  1.01it/s]

{'loss': 0.0003, 'learning_rate': 2.9850746268656716e-07, 'epoch': 3.88}


 97%|█████████▋| 2090/2144 [36:35<00:53,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 2.5186567164179105e-07, 'epoch': 3.9}


 98%|█████████▊| 2100/2144 [36:45<00:44,  1.00s/it]

{'loss': 0.0388, 'learning_rate': 2.0522388059701495e-07, 'epoch': 3.92}


 98%|█████████▊| 2110/2144 [36:55<00:33,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 1.5858208955223882e-07, 'epoch': 3.94}


 99%|█████████▉| 2120/2144 [37:05<00:24,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 1.1194029850746268e-07, 'epoch': 3.96}


 99%|█████████▉| 2130/2144 [37:15<00:13,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 6.529850746268657e-08, 'epoch': 3.97}


100%|█████████▉| 2140/2144 [37:25<00:04,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 1.8656716417910447e-08, 'epoch': 3.99}


                                                   
100%|██████████| 2144/2144 [37:59<00:00,  1.02it/s]

{'eval_loss': 2.2869796752929688, 'eval_accuracy': 0.7076923076923077, 'eval_precision': 0.7101908336711066, 'eval_recall': 0.7076923076923077, 'eval_f1': 0.7082484243144835, 'eval_runtime': 29.7076, 'eval_samples_per_second': 24.068, 'eval_steps_per_second': 3.03, 'epoch': 4.0}


100%|██████████| 2144/2144 [38:08<00:00,  1.07s/it]


{'train_runtime': 2288.8802, 'train_samples_per_second': 7.49, 'train_steps_per_second': 0.937, 'train_loss': 0.08507606971824004, 'epoch': 4.0}


100%|██████████| 90/90 [00:30<00:00,  2.95it/s]
100%|██████████| 90/90 [00:29<00:00,  3.05it/s]
100%|██████████| 90/90 [00:30<00:00,  2.99it/s]


{'accuracy': 0.7188811188811188, 'precision': 0.7151165620098818, 'recall': 0.7188811188811188, 'f1': 0.716101308484136}
{'accuracy': 0.6834733893557423, 'precision': 0.6818862001603248, 'recall': 0.6834733893557423, 'f1': 0.6793587740237662}


  0%|          | 10/2144 [00:10<36:11,  1.02s/it] 

{'loss': 0.0019, 'learning_rate': 9.953358208955226e-06, 'epoch': 0.02}


  1%|          | 20/2144 [00:20<35:26,  1.00s/it]

{'loss': 0.0907, 'learning_rate': 9.906716417910449e-06, 'epoch': 0.04}


  1%|▏         | 30/2144 [00:30<35:07,  1.00it/s]

{'loss': 0.0998, 'learning_rate': 9.860074626865672e-06, 'epoch': 0.06}


  2%|▏         | 40/2144 [00:40<35:03,  1.00it/s]

{'loss': 0.3302, 'learning_rate': 9.813432835820897e-06, 'epoch': 0.07}


  2%|▏         | 50/2144 [00:50<34:38,  1.01it/s]

{'loss': 0.0847, 'learning_rate': 9.76679104477612e-06, 'epoch': 0.09}


  3%|▎         | 60/2144 [01:00<34:49,  1.00s/it]

{'loss': 0.2835, 'learning_rate': 9.720149253731343e-06, 'epoch': 0.11}


  3%|▎         | 70/2144 [01:10<34:31,  1.00it/s]

{'loss': 0.2781, 'learning_rate': 9.673507462686568e-06, 'epoch': 0.13}


  4%|▎         | 80/2144 [01:20<34:29,  1.00s/it]

{'loss': 0.0604, 'learning_rate': 9.626865671641792e-06, 'epoch': 0.15}


  4%|▍         | 90/2144 [01:30<34:24,  1.00s/it]

{'loss': 0.2153, 'learning_rate': 9.580223880597016e-06, 'epoch': 0.17}


  5%|▍         | 100/2144 [01:40<33:58,  1.00it/s]

{'loss': 0.0811, 'learning_rate': 9.533582089552239e-06, 'epoch': 0.19}


  5%|▌         | 110/2144 [01:50<34:04,  1.01s/it]

{'loss': 0.0013, 'learning_rate': 9.486940298507463e-06, 'epoch': 0.21}


  6%|▌         | 120/2144 [02:00<33:29,  1.01it/s]

{'loss': 0.0263, 'learning_rate': 9.440298507462688e-06, 'epoch': 0.22}


  6%|▌         | 130/2144 [02:10<33:19,  1.01it/s]

{'loss': 0.0244, 'learning_rate': 9.393656716417911e-06, 'epoch': 0.24}


  7%|▋         | 140/2144 [02:20<33:19,  1.00it/s]

{'loss': 0.1117, 'learning_rate': 9.347014925373134e-06, 'epoch': 0.26}


  7%|▋         | 150/2144 [02:30<33:11,  1.00it/s]

{'loss': 0.0923, 'learning_rate': 9.30037313432836e-06, 'epoch': 0.28}


  7%|▋         | 160/2144 [02:40<32:47,  1.01it/s]

{'loss': 0.1362, 'learning_rate': 9.253731343283582e-06, 'epoch': 0.3}


  8%|▊         | 170/2144 [02:50<32:49,  1.00it/s]

{'loss': 0.0416, 'learning_rate': 9.207089552238807e-06, 'epoch': 0.32}


  8%|▊         | 180/2144 [03:00<32:42,  1.00it/s]

{'loss': 0.0709, 'learning_rate': 9.16044776119403e-06, 'epoch': 0.34}


  9%|▉         | 190/2144 [03:10<32:35,  1.00s/it]

{'loss': 0.115, 'learning_rate': 9.113805970149255e-06, 'epoch': 0.35}


  9%|▉         | 200/2144 [03:20<32:16,  1.00it/s]

{'loss': 0.3168, 'learning_rate': 9.067164179104478e-06, 'epoch': 0.37}


 10%|▉         | 210/2144 [03:30<32:05,  1.00it/s]

{'loss': 0.0017, 'learning_rate': 9.020522388059703e-06, 'epoch': 0.39}


 10%|█         | 220/2144 [03:40<31:57,  1.00it/s]

{'loss': 0.0006, 'learning_rate': 8.973880597014926e-06, 'epoch': 0.41}


 11%|█         | 230/2144 [03:50<31:49,  1.00it/s]

{'loss': 0.0095, 'learning_rate': 8.927238805970149e-06, 'epoch': 0.43}


 11%|█         | 240/2144 [04:00<31:45,  1.00s/it]

{'loss': 0.0764, 'learning_rate': 8.880597014925374e-06, 'epoch': 0.45}


 12%|█▏        | 250/2144 [04:10<31:40,  1.00s/it]

{'loss': 0.164, 'learning_rate': 8.833955223880599e-06, 'epoch': 0.47}


 12%|█▏        | 260/2144 [04:20<31:25,  1.00s/it]

{'loss': 0.1435, 'learning_rate': 8.787313432835822e-06, 'epoch': 0.49}


 13%|█▎        | 270/2144 [04:30<31:15,  1.00s/it]

{'loss': 0.4973, 'learning_rate': 8.740671641791045e-06, 'epoch': 0.5}


 13%|█▎        | 280/2144 [04:40<31:07,  1.00s/it]

{'loss': 0.2044, 'learning_rate': 8.69402985074627e-06, 'epoch': 0.52}


 14%|█▎        | 290/2144 [04:50<30:45,  1.00it/s]

{'loss': 0.1809, 'learning_rate': 8.647388059701494e-06, 'epoch': 0.54}


 14%|█▍        | 300/2144 [05:00<30:46,  1.00s/it]

{'loss': 0.0733, 'learning_rate': 8.600746268656716e-06, 'epoch': 0.56}


 14%|█▍        | 310/2144 [05:10<30:45,  1.01s/it]

{'loss': 0.0114, 'learning_rate': 8.55410447761194e-06, 'epoch': 0.58}


 15%|█▍        | 320/2144 [05:20<30:23,  1.00it/s]

{'loss': 0.0014, 'learning_rate': 8.507462686567165e-06, 'epoch': 0.6}


 15%|█▌        | 330/2144 [05:30<30:14,  1.00s/it]

{'loss': 0.0174, 'learning_rate': 8.460820895522389e-06, 'epoch': 0.62}


 16%|█▌        | 340/2144 [05:40<30:06,  1.00s/it]

{'loss': 0.0232, 'learning_rate': 8.414179104477612e-06, 'epoch': 0.63}


 16%|█▋        | 350/2144 [05:50<29:52,  1.00it/s]

{'loss': 0.2313, 'learning_rate': 8.367537313432836e-06, 'epoch': 0.65}


 17%|█▋        | 360/2144 [06:00<29:41,  1.00it/s]

{'loss': 0.1178, 'learning_rate': 8.320895522388061e-06, 'epoch': 0.67}


 17%|█▋        | 370/2144 [06:10<29:34,  1.00s/it]

{'loss': 0.1491, 'learning_rate': 8.274253731343284e-06, 'epoch': 0.69}


 18%|█▊        | 380/2144 [06:20<29:21,  1.00it/s]

{'loss': 0.0009, 'learning_rate': 8.227611940298507e-06, 'epoch': 0.71}


 18%|█▊        | 390/2144 [06:30<29:17,  1.00s/it]

{'loss': 0.162, 'learning_rate': 8.180970149253732e-06, 'epoch': 0.73}


 19%|█▊        | 400/2144 [06:40<29:04,  1.00s/it]

{'loss': 0.2291, 'learning_rate': 8.134328358208955e-06, 'epoch': 0.75}


 19%|█▉        | 410/2144 [06:50<28:45,  1.00it/s]

{'loss': 0.0938, 'learning_rate': 8.08768656716418e-06, 'epoch': 0.76}


 20%|█▉        | 420/2144 [07:00<28:46,  1.00s/it]

{'loss': 0.0531, 'learning_rate': 8.041044776119403e-06, 'epoch': 0.78}


 20%|██        | 430/2144 [07:10<28:26,  1.00it/s]

{'loss': 0.1479, 'learning_rate': 7.994402985074628e-06, 'epoch': 0.8}


 21%|██        | 440/2144 [07:20<28:24,  1.00s/it]

{'loss': 0.1004, 'learning_rate': 7.947761194029851e-06, 'epoch': 0.82}


 21%|██        | 450/2144 [07:30<28:21,  1.00s/it]

{'loss': 0.0713, 'learning_rate': 7.901119402985076e-06, 'epoch': 0.84}


 21%|██▏       | 460/2144 [07:40<28:02,  1.00it/s]

{'loss': 0.0824, 'learning_rate': 7.854477611940299e-06, 'epoch': 0.86}


 22%|██▏       | 470/2144 [07:50<27:55,  1.00s/it]

{'loss': 0.1597, 'learning_rate': 7.807835820895522e-06, 'epoch': 0.88}


 22%|██▏       | 480/2144 [08:00<27:56,  1.01s/it]

{'loss': 0.3458, 'learning_rate': 7.761194029850747e-06, 'epoch': 0.9}


 23%|██▎       | 490/2144 [08:10<27:35,  1.00s/it]

{'loss': 0.2697, 'learning_rate': 7.714552238805972e-06, 'epoch': 0.91}


 23%|██▎       | 500/2144 [08:20<27:29,  1.00s/it]

{'loss': 0.1831, 'learning_rate': 7.667910447761195e-06, 'epoch': 0.93}


 24%|██▍       | 510/2144 [08:30<27:13,  1.00it/s]

{'loss': 0.1526, 'learning_rate': 7.621268656716419e-06, 'epoch': 0.95}


 24%|██▍       | 520/2144 [08:40<27:03,  1.00it/s]

{'loss': 0.1355, 'learning_rate': 7.574626865671643e-06, 'epoch': 0.97}


 25%|██▍       | 530/2144 [08:50<26:48,  1.00it/s]

{'loss': 0.1175, 'learning_rate': 7.527985074626867e-06, 'epoch': 0.99}


                                                  
 25%|██▌       | 536/2144 [09:26<25:41,  1.04it/s]

{'eval_loss': 2.053187847137451, 'eval_accuracy': 0.6937062937062937, 'eval_precision': 0.7108087824307312, 'eval_recall': 0.6937062937062937, 'eval_f1': 0.6934067795218153, 'eval_runtime': 30.2333, 'eval_samples_per_second': 23.649, 'eval_steps_per_second': 2.977, 'epoch': 1.0}


 25%|██▌       | 540/2144 [09:37<2:07:57,  4.79s/it]

{'loss': 0.0994, 'learning_rate': 7.48134328358209e-06, 'epoch': 1.01}


 26%|██▌       | 550/2144 [09:47<29:29,  1.11s/it]  

{'loss': 0.0619, 'learning_rate': 7.434701492537314e-06, 'epoch': 1.03}


 26%|██▌       | 560/2144 [09:57<26:29,  1.00s/it]

{'loss': 0.0855, 'learning_rate': 7.3880597014925385e-06, 'epoch': 1.04}


 27%|██▋       | 570/2144 [10:07<26:11,  1.00it/s]

{'loss': 0.0452, 'learning_rate': 7.3414179104477625e-06, 'epoch': 1.06}


 27%|██▋       | 580/2144 [10:17<26:04,  1.00s/it]

{'loss': 0.1119, 'learning_rate': 7.2947761194029856e-06, 'epoch': 1.08}


 28%|██▊       | 590/2144 [10:27<25:52,  1.00it/s]

{'loss': 0.0285, 'learning_rate': 7.2481343283582095e-06, 'epoch': 1.1}


 28%|██▊       | 600/2144 [10:37<25:43,  1.00it/s]

{'loss': 0.151, 'learning_rate': 7.2014925373134335e-06, 'epoch': 1.12}


 28%|██▊       | 610/2144 [10:47<25:24,  1.01it/s]

{'loss': 0.0327, 'learning_rate': 7.154850746268658e-06, 'epoch': 1.14}


 29%|██▉       | 620/2144 [10:57<25:14,  1.01it/s]

{'loss': 0.0021, 'learning_rate': 7.1082089552238805e-06, 'epoch': 1.16}


 29%|██▉       | 630/2144 [11:07<25:11,  1.00it/s]

{'loss': 0.0199, 'learning_rate': 7.061567164179105e-06, 'epoch': 1.18}


 30%|██▉       | 640/2144 [11:17<25:04,  1.00s/it]

{'loss': 0.0528, 'learning_rate': 7.014925373134329e-06, 'epoch': 1.19}


 30%|███       | 650/2144 [11:27<24:47,  1.00it/s]

{'loss': 0.0006, 'learning_rate': 6.968283582089553e-06, 'epoch': 1.21}


 31%|███       | 660/2144 [11:37<24:46,  1.00s/it]

{'loss': 0.0007, 'learning_rate': 6.921641791044776e-06, 'epoch': 1.23}


 31%|███▏      | 670/2144 [11:47<24:26,  1.01it/s]

{'loss': 0.0003, 'learning_rate': 6.875e-06, 'epoch': 1.25}


 32%|███▏      | 680/2144 [11:57<24:20,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 6.828358208955225e-06, 'epoch': 1.27}


 32%|███▏      | 690/2144 [12:07<23:51,  1.02it/s]

{'loss': 0.0583, 'learning_rate': 6.781716417910448e-06, 'epoch': 1.29}


 33%|███▎      | 700/2144 [12:17<23:49,  1.01it/s]

{'loss': 0.1057, 'learning_rate': 6.735074626865672e-06, 'epoch': 1.31}


 33%|███▎      | 710/2144 [12:27<23:42,  1.01it/s]

{'loss': 0.0148, 'learning_rate': 6.688432835820896e-06, 'epoch': 1.32}


 34%|███▎      | 720/2144 [12:37<23:37,  1.00it/s]

{'loss': 0.0626, 'learning_rate': 6.64179104477612e-06, 'epoch': 1.34}


 34%|███▍      | 730/2144 [12:47<23:24,  1.01it/s]

{'loss': 0.001, 'learning_rate': 6.595149253731343e-06, 'epoch': 1.36}


 35%|███▍      | 740/2144 [12:57<23:25,  1.00s/it]

{'loss': 0.1976, 'learning_rate': 6.548507462686567e-06, 'epoch': 1.38}


 35%|███▍      | 750/2144 [13:06<23:08,  1.00it/s]

{'loss': 0.0755, 'learning_rate': 6.501865671641792e-06, 'epoch': 1.4}


 35%|███▌      | 760/2144 [13:16<22:57,  1.00it/s]

{'loss': 0.0372, 'learning_rate': 6.455223880597016e-06, 'epoch': 1.42}


 36%|███▌      | 770/2144 [13:26<22:45,  1.01it/s]

{'loss': 0.2167, 'learning_rate': 6.408582089552239e-06, 'epoch': 1.44}


 36%|███▋      | 780/2144 [13:36<23:00,  1.01s/it]

{'loss': 0.0118, 'learning_rate': 6.361940298507463e-06, 'epoch': 1.46}


 37%|███▋      | 790/2144 [13:46<22:45,  1.01s/it]

{'loss': 0.0927, 'learning_rate': 6.315298507462687e-06, 'epoch': 1.47}


 37%|███▋      | 800/2144 [13:56<22:22,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 6.2686567164179116e-06, 'epoch': 1.49}


 38%|███▊      | 810/2144 [14:06<22:06,  1.01it/s]

{'loss': 0.0866, 'learning_rate': 6.222014925373135e-06, 'epoch': 1.51}


 38%|███▊      | 820/2144 [14:16<21:55,  1.01it/s]

{'loss': 0.0007, 'learning_rate': 6.175373134328359e-06, 'epoch': 1.53}


 39%|███▊      | 830/2144 [14:26<21:49,  1.00it/s]

{'loss': 0.0654, 'learning_rate': 6.1287313432835826e-06, 'epoch': 1.55}


 39%|███▉      | 840/2144 [14:36<21:45,  1.00s/it]

{'loss': 0.0276, 'learning_rate': 6.0820895522388065e-06, 'epoch': 1.57}


 40%|███▉      | 850/2144 [14:46<21:32,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 6.03544776119403e-06, 'epoch': 1.59}


 40%|████      | 860/2144 [14:56<21:30,  1.01s/it]

{'loss': 0.0182, 'learning_rate': 5.988805970149254e-06, 'epoch': 1.6}


 41%|████      | 870/2144 [15:06<21:14,  1.00s/it]

{'loss': 0.0911, 'learning_rate': 5.942164179104478e-06, 'epoch': 1.62}


 41%|████      | 880/2144 [15:16<21:04,  1.00s/it]

{'loss': 0.1245, 'learning_rate': 5.895522388059702e-06, 'epoch': 1.64}


 42%|████▏     | 890/2144 [15:26<20:45,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 5.848880597014925e-06, 'epoch': 1.66}


 42%|████▏     | 900/2144 [15:36<20:43,  1.00it/s]

{'loss': 0.0811, 'learning_rate': 5.802238805970149e-06, 'epoch': 1.68}


 42%|████▏     | 910/2144 [15:46<20:37,  1.00s/it]

{'loss': 0.0129, 'learning_rate': 5.755597014925373e-06, 'epoch': 1.7}


 43%|████▎     | 920/2144 [15:56<20:25,  1.00s/it]

{'loss': 0.0364, 'learning_rate': 5.708955223880598e-06, 'epoch': 1.72}


 43%|████▎     | 930/2144 [16:06<20:09,  1.00it/s]

{'loss': 0.0955, 'learning_rate': 5.662313432835821e-06, 'epoch': 1.74}


 44%|████▍     | 940/2144 [16:16<20:05,  1.00s/it]

{'loss': 0.0137, 'learning_rate': 5.615671641791045e-06, 'epoch': 1.75}


 44%|████▍     | 950/2144 [16:26<19:54,  1.00s/it]

{'loss': 0.2483, 'learning_rate': 5.569029850746269e-06, 'epoch': 1.77}


 45%|████▍     | 960/2144 [16:36<19:41,  1.00it/s]

{'loss': 0.0701, 'learning_rate': 5.522388059701493e-06, 'epoch': 1.79}


 45%|████▌     | 970/2144 [16:46<19:35,  1.00s/it]

{'loss': 0.1736, 'learning_rate': 5.475746268656716e-06, 'epoch': 1.81}


 46%|████▌     | 980/2144 [16:56<19:19,  1.00it/s]

{'loss': 0.0951, 'learning_rate': 5.429104477611941e-06, 'epoch': 1.83}


 46%|████▌     | 990/2144 [17:06<19:11,  1.00it/s]

{'loss': 0.0044, 'learning_rate': 5.382462686567165e-06, 'epoch': 1.85}


 47%|████▋     | 1000/2144 [17:16<19:02,  1.00it/s]

{'loss': 0.0085, 'learning_rate': 5.335820895522389e-06, 'epoch': 1.87}


 47%|████▋     | 1010/2144 [17:26<18:49,  1.00it/s]

{'loss': 0.0234, 'learning_rate': 5.289179104477612e-06, 'epoch': 1.88}


 48%|████▊     | 1020/2144 [17:36<18:37,  1.01it/s]

{'loss': 0.1054, 'learning_rate': 5.242537313432836e-06, 'epoch': 1.9}


 48%|████▊     | 1030/2144 [17:46<18:30,  1.00it/s]

{'loss': 0.0057, 'learning_rate': 5.195895522388061e-06, 'epoch': 1.92}


 49%|████▊     | 1040/2144 [17:56<18:19,  1.00it/s]

{'loss': 0.0721, 'learning_rate': 5.149253731343285e-06, 'epoch': 1.94}


 49%|████▉     | 1050/2144 [18:06<18:10,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 5.102611940298508e-06, 'epoch': 1.96}


 49%|████▉     | 1060/2144 [18:16<17:57,  1.01it/s]

{'loss': 0.0065, 'learning_rate': 5.055970149253732e-06, 'epoch': 1.98}


 50%|████▉     | 1070/2144 [18:26<17:46,  1.01it/s]

{'loss': 0.0003, 'learning_rate': 5.009328358208956e-06, 'epoch': 2.0}


                                                   
 50%|█████     | 1072/2144 [18:56<17:04,  1.05it/s]

{'eval_loss': 2.1939027309417725, 'eval_accuracy': 0.7076923076923077, 'eval_precision': 0.707380284175983, 'eval_recall': 0.7076923076923077, 'eval_f1': 0.7072742767691859, 'eval_runtime': 27.9159, 'eval_samples_per_second': 25.613, 'eval_steps_per_second': 3.224, 'epoch': 2.0}


 50%|█████     | 1080/2144 [19:11<33:06,  1.87s/it]  

{'loss': 0.0034, 'learning_rate': 4.9626865671641796e-06, 'epoch': 2.01}


 51%|█████     | 1090/2144 [19:21<18:02,  1.03s/it]

{'loss': 0.0003, 'learning_rate': 4.9160447761194035e-06, 'epoch': 2.03}


 51%|█████▏    | 1100/2144 [19:31<17:30,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 4.8694029850746275e-06, 'epoch': 2.05}


 52%|█████▏    | 1110/2144 [19:41<17:20,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 4.822761194029851e-06, 'epoch': 2.07}


 52%|█████▏    | 1120/2144 [19:51<16:57,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 4.7761194029850745e-06, 'epoch': 2.09}


 53%|█████▎    | 1130/2144 [20:01<16:47,  1.01it/s]

{'loss': 0.0009, 'learning_rate': 4.729477611940299e-06, 'epoch': 2.11}


 53%|█████▎    | 1140/2144 [20:11<16:37,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 4.682835820895522e-06, 'epoch': 2.13}


 54%|█████▎    | 1150/2144 [20:21<16:27,  1.01it/s]

{'loss': 0.1021, 'learning_rate': 4.636194029850747e-06, 'epoch': 2.15}


 54%|█████▍    | 1160/2144 [20:30<16:18,  1.01it/s]

{'loss': 0.0024, 'learning_rate': 4.58955223880597e-06, 'epoch': 2.16}


 55%|█████▍    | 1170/2144 [20:40<16:15,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 4.542910447761194e-06, 'epoch': 2.18}


 55%|█████▌    | 1180/2144 [20:50<16:01,  1.00it/s]

{'loss': 0.1238, 'learning_rate': 4.496268656716418e-06, 'epoch': 2.2}


 56%|█████▌    | 1190/2144 [21:00<15:51,  1.00it/s]

{'loss': 0.0025, 'learning_rate': 4.449626865671642e-06, 'epoch': 2.22}


 56%|█████▌    | 1200/2144 [21:10<15:46,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 4.402985074626866e-06, 'epoch': 2.24}


 56%|█████▋    | 1210/2144 [21:20<15:33,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 4.35634328358209e-06, 'epoch': 2.26}


 57%|█████▋    | 1220/2144 [21:30<15:22,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 4.309701492537314e-06, 'epoch': 2.28}


 57%|█████▋    | 1230/2144 [21:40<15:14,  1.00s/it]

{'loss': 0.0012, 'learning_rate': 4.263059701492538e-06, 'epoch': 2.29}


 58%|█████▊    | 1240/2144 [21:50<15:02,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 4.216417910447762e-06, 'epoch': 2.31}


 58%|█████▊    | 1250/2144 [22:00<14:48,  1.01it/s]

{'loss': 0.0997, 'learning_rate': 4.169776119402986e-06, 'epoch': 2.33}


 59%|█████▉    | 1260/2144 [22:10<14:47,  1.00s/it]

{'loss': 0.0007, 'learning_rate': 4.123134328358209e-06, 'epoch': 2.35}


 59%|█████▉    | 1270/2144 [22:20<14:35,  1.00s/it]

{'loss': 0.0007, 'learning_rate': 4.076492537313434e-06, 'epoch': 2.37}


 60%|█████▉    | 1280/2144 [22:30<14:22,  1.00it/s]

{'loss': 0.0026, 'learning_rate': 4.029850746268657e-06, 'epoch': 2.39}


 60%|██████    | 1290/2144 [22:40<14:15,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 3.983208955223881e-06, 'epoch': 2.41}


 61%|██████    | 1300/2144 [22:50<14:03,  1.00it/s]

{'loss': 0.0962, 'learning_rate': 3.936567164179105e-06, 'epoch': 2.43}


 61%|██████    | 1310/2144 [23:00<13:53,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 3.889925373134329e-06, 'epoch': 2.44}


 62%|██████▏   | 1320/2144 [23:10<13:43,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 3.843283582089553e-06, 'epoch': 2.46}


 62%|██████▏   | 1330/2144 [23:20<13:32,  1.00it/s]

{'loss': 0.0095, 'learning_rate': 3.7966417910447766e-06, 'epoch': 2.48}


 62%|██████▎   | 1340/2144 [23:30<13:15,  1.01it/s]

{'loss': 0.0584, 'learning_rate': 3.7500000000000005e-06, 'epoch': 2.5}


 63%|██████▎   | 1350/2144 [23:40<13:10,  1.00it/s]

{'loss': 0.0192, 'learning_rate': 3.703358208955224e-06, 'epoch': 2.52}


 63%|██████▎   | 1360/2144 [23:50<13:06,  1.00s/it]

{'loss': 0.0313, 'learning_rate': 3.656716417910448e-06, 'epoch': 2.54}


 64%|██████▍   | 1370/2144 [24:00<12:56,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 3.6100746268656715e-06, 'epoch': 2.56}


 64%|██████▍   | 1380/2144 [24:10<12:46,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 3.563432835820896e-06, 'epoch': 2.57}


 65%|██████▍   | 1390/2144 [24:20<12:35,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.5167910447761194e-06, 'epoch': 2.59}


 65%|██████▌   | 1400/2144 [24:30<12:26,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.4701492537313438e-06, 'epoch': 2.61}


 66%|██████▌   | 1410/2144 [24:40<12:16,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.4235074626865673e-06, 'epoch': 2.63}


 66%|██████▌   | 1420/2144 [24:51<12:02,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.3768656716417913e-06, 'epoch': 2.65}


 67%|██████▋   | 1430/2144 [25:01<11:52,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 3.3302238805970148e-06, 'epoch': 2.67}


 67%|██████▋   | 1440/2144 [25:10<11:43,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.283582089552239e-06, 'epoch': 2.69}


 68%|██████▊   | 1450/2144 [25:20<11:29,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 3.2369402985074627e-06, 'epoch': 2.71}


 68%|██████▊   | 1460/2144 [25:30<11:20,  1.00it/s]

{'loss': 0.2006, 'learning_rate': 3.190298507462687e-06, 'epoch': 2.72}


 69%|██████▊   | 1470/2144 [25:40<11:14,  1.00s/it]

{'loss': 0.0558, 'learning_rate': 3.1436567164179106e-06, 'epoch': 2.74}


 69%|██████▉   | 1480/2144 [25:50<11:00,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 3.0970149253731345e-06, 'epoch': 2.76}


 69%|██████▉   | 1490/2144 [26:00<10:51,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.050373134328358e-06, 'epoch': 2.78}


 70%|██████▉   | 1500/2144 [26:10<10:42,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 3.0037313432835824e-06, 'epoch': 2.8}


 70%|███████   | 1510/2144 [26:20<10:32,  1.00it/s]

{'loss': 0.1805, 'learning_rate': 2.957089552238806e-06, 'epoch': 2.82}


 71%|███████   | 1520/2144 [26:30<10:19,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 2.9104477611940303e-06, 'epoch': 2.84}


 71%|███████▏  | 1530/2144 [26:40<10:14,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 2.863805970149254e-06, 'epoch': 2.85}


 72%|███████▏  | 1540/2144 [26:50<10:04,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 2.8171641791044778e-06, 'epoch': 2.87}


 72%|███████▏  | 1550/2144 [27:00<09:56,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 2.7705223880597017e-06, 'epoch': 2.89}


 73%|███████▎  | 1560/2144 [27:10<09:43,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 2.7238805970149257e-06, 'epoch': 2.91}


 73%|███████▎  | 1570/2144 [27:20<09:30,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 2.677238805970149e-06, 'epoch': 2.93}


 74%|███████▎  | 1580/2144 [27:30<09:25,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 2.6305970149253736e-06, 'epoch': 2.95}


 74%|███████▍  | 1590/2144 [27:40<09:14,  1.00s/it]

{'loss': 0.0352, 'learning_rate': 2.583955223880597e-06, 'epoch': 2.97}


 75%|███████▍  | 1600/2144 [27:50<09:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.537313432835821e-06, 'epoch': 2.99}


                                                   
 75%|███████▌  | 1608/2144 [28:29<08:41,  1.03it/s]

{'eval_loss': 2.400238275527954, 'eval_accuracy': 0.7132867132867133, 'eval_precision': 0.7194595527758467, 'eval_recall': 0.7132867132867133, 'eval_f1': 0.7143174888336953, 'eval_runtime': 31.0205, 'eval_samples_per_second': 23.049, 'eval_steps_per_second': 2.901, 'epoch': 3.0}


 75%|███████▌  | 1610/2144 [28:38<1:19:16,  8.91s/it]

{'loss': 0.0824, 'learning_rate': 2.490671641791045e-06, 'epoch': 3.0}


 76%|███████▌  | 1620/2144 [28:48<10:41,  1.22s/it]  

{'loss': 0.0001, 'learning_rate': 2.444029850746269e-06, 'epoch': 3.02}


 76%|███████▌  | 1630/2144 [28:58<08:37,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.397388059701493e-06, 'epoch': 3.04}


 76%|███████▋  | 1640/2144 [29:08<08:21,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 2.350746268656717e-06, 'epoch': 3.06}


 77%|███████▋  | 1650/2144 [29:18<08:08,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 2.3041044776119408e-06, 'epoch': 3.08}


 77%|███████▋  | 1660/2144 [29:28<08:08,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.2574626865671643e-06, 'epoch': 3.1}


 78%|███████▊  | 1670/2144 [29:38<07:51,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.2108208955223883e-06, 'epoch': 3.12}


 78%|███████▊  | 1680/2144 [29:48<07:43,  1.00it/s]

{'loss': 0.09, 'learning_rate': 2.1641791044776118e-06, 'epoch': 3.13}


 79%|███████▉  | 1690/2144 [29:58<07:27,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 2.1175373134328357e-06, 'epoch': 3.15}


 79%|███████▉  | 1700/2144 [30:08<07:22,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.0708955223880597e-06, 'epoch': 3.17}


 80%|███████▉  | 1710/2144 [30:18<07:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.0242537313432836e-06, 'epoch': 3.19}


 80%|████████  | 1720/2144 [30:28<07:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.9776119402985076e-06, 'epoch': 3.21}


 81%|████████  | 1730/2144 [30:38<06:54,  1.00s/it]

{'loss': 0.0656, 'learning_rate': 1.9309701492537315e-06, 'epoch': 3.23}


 81%|████████  | 1740/2144 [30:48<06:43,  1.00it/s]

{'loss': 0.0089, 'learning_rate': 1.8843283582089553e-06, 'epoch': 3.25}


 82%|████████▏ | 1750/2144 [30:58<06:34,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.8376865671641792e-06, 'epoch': 3.26}


 82%|████████▏ | 1760/2144 [31:08<06:24,  1.00s/it]

{'loss': 0.05, 'learning_rate': 1.791044776119403e-06, 'epoch': 3.28}


 83%|████████▎ | 1770/2144 [31:18<06:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.7444029850746269e-06, 'epoch': 3.3}


 83%|████████▎ | 1780/2144 [31:28<06:03,  1.00it/s]

{'loss': 0.0012, 'learning_rate': 1.6977611940298508e-06, 'epoch': 3.32}


 83%|████████▎ | 1790/2144 [31:38<05:55,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.6511194029850746e-06, 'epoch': 3.34}


 84%|████████▍ | 1800/2144 [31:48<05:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.6044776119402985e-06, 'epoch': 3.36}


 84%|████████▍ | 1810/2144 [31:58<05:32,  1.00it/s]

{'loss': 0.0185, 'learning_rate': 1.5578358208955225e-06, 'epoch': 3.38}


 85%|████████▍ | 1820/2144 [32:08<05:24,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.5111940298507464e-06, 'epoch': 3.4}


 85%|████████▌ | 1830/2144 [32:18<05:12,  1.00it/s]

{'loss': 0.0297, 'learning_rate': 1.4645522388059702e-06, 'epoch': 3.41}


 86%|████████▌ | 1840/2144 [32:28<05:02,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.417910447761194e-06, 'epoch': 3.43}


 86%|████████▋ | 1850/2144 [32:38<04:52,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.371268656716418e-06, 'epoch': 3.45}


 87%|████████▋ | 1860/2144 [32:48<04:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.3246268656716418e-06, 'epoch': 3.47}


 87%|████████▋ | 1870/2144 [32:58<04:35,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 1.2779850746268657e-06, 'epoch': 3.49}


 88%|████████▊ | 1880/2144 [33:08<04:23,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.2313432835820897e-06, 'epoch': 3.51}


 88%|████████▊ | 1890/2144 [33:18<04:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.1847014925373134e-06, 'epoch': 3.53}


 89%|████████▊ | 1900/2144 [33:28<04:04,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 1.1380597014925374e-06, 'epoch': 3.54}


 89%|████████▉ | 1910/2144 [33:38<03:54,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.0914179104477613e-06, 'epoch': 3.56}


 90%|████████▉ | 1920/2144 [33:48<03:45,  1.01s/it]

{'loss': 0.0301, 'learning_rate': 1.044776119402985e-06, 'epoch': 3.58}


 90%|█████████ | 1930/2144 [33:58<03:34,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 9.98134328358209e-07, 'epoch': 3.6}


 90%|█████████ | 1940/2144 [34:08<03:24,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 9.514925373134328e-07, 'epoch': 3.62}


 91%|█████████ | 1950/2144 [34:18<03:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 9.048507462686568e-07, 'epoch': 3.64}


 91%|█████████▏| 1960/2144 [34:28<03:05,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 8.582089552238806e-07, 'epoch': 3.66}


 92%|█████████▏| 1970/2144 [34:38<02:54,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 8.115671641791046e-07, 'epoch': 3.68}


 92%|█████████▏| 1980/2144 [34:48<02:44,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 7.649253731343284e-07, 'epoch': 3.69}


 93%|█████████▎| 1990/2144 [34:58<02:35,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 7.182835820895523e-07, 'epoch': 3.71}


 93%|█████████▎| 2000/2144 [35:08<02:24,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 6.716417910447762e-07, 'epoch': 3.73}


 94%|█████████▍| 2010/2144 [35:18<02:14,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 6.25e-07, 'epoch': 3.75}


 94%|█████████▍| 2020/2144 [35:28<02:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 5.783582089552239e-07, 'epoch': 3.77}


 95%|█████████▍| 2030/2144 [35:38<01:53,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 5.317164179104478e-07, 'epoch': 3.79}


 95%|█████████▌| 2040/2144 [35:48<01:44,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 4.850746268656717e-07, 'epoch': 3.81}


 96%|█████████▌| 2050/2144 [35:58<01:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 4.384328358208956e-07, 'epoch': 3.82}


 96%|█████████▌| 2060/2144 [36:08<01:23,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 3.9179104477611947e-07, 'epoch': 3.84}


 97%|█████████▋| 2070/2144 [36:18<01:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.451492537313433e-07, 'epoch': 3.86}


 97%|█████████▋| 2080/2144 [36:28<01:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.9850746268656716e-07, 'epoch': 3.88}


 97%|█████████▋| 2090/2144 [36:38<00:54,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.5186567164179105e-07, 'epoch': 3.9}


 98%|█████████▊| 2100/2144 [36:48<00:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.0522388059701495e-07, 'epoch': 3.92}


 98%|█████████▊| 2110/2144 [36:58<00:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.5858208955223882e-07, 'epoch': 3.94}


 99%|█████████▉| 2120/2144 [37:08<00:23,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.1194029850746268e-07, 'epoch': 3.96}


 99%|█████████▉| 2130/2144 [37:18<00:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 6.529850746268657e-08, 'epoch': 3.97}


100%|█████████▉| 2140/2144 [37:28<00:03,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.8656716417910447e-08, 'epoch': 3.99}


                                                   
100%|██████████| 2144/2144 [38:02<00:00,  1.05it/s]

{'eval_loss': 2.4781997203826904, 'eval_accuracy': 0.7034965034965035, 'eval_precision': 0.7069457649359134, 'eval_recall': 0.7034965034965035, 'eval_f1': 0.7049317597621441, 'eval_runtime': 30.0269, 'eval_samples_per_second': 23.812, 'eval_steps_per_second': 2.997, 'epoch': 4.0}


100%|██████████| 2144/2144 [38:11<00:00,  1.07s/it]


{'train_runtime': 2291.4207, 'train_samples_per_second': 7.482, 'train_steps_per_second': 0.936, 'train_loss': 0.05187741144499029, 'epoch': 4.0}


100%|██████████| 90/90 [00:30<00:00,  2.95it/s]
100%|██████████| 90/90 [00:31<00:00,  2.87it/s]
100%|██████████| 90/90 [00:29<00:00,  3.01it/s]


{'accuracy': 0.6937062937062937, 'precision': 0.7108087824307312, 'recall': 0.6937062937062937, 'f1': 0.6934067795218153}
{'accuracy': 0.6764705882352942, 'precision': 0.6875296262071076, 'recall': 0.6764705882352942, 'f1': 0.6747606689615777}


  0%|          | 10/2144 [00:10<36:12,  1.02s/it] 

{'loss': 0.0006, 'learning_rate': 9.953358208955226e-06, 'epoch': 0.02}


  1%|          | 20/2144 [00:20<35:33,  1.00s/it]

{'loss': 0.0708, 'learning_rate': 9.906716417910449e-06, 'epoch': 0.04}


  1%|▏         | 30/2144 [00:30<35:13,  1.00it/s]

{'loss': 0.4453, 'learning_rate': 9.860074626865672e-06, 'epoch': 0.06}


  2%|▏         | 40/2144 [00:40<35:07,  1.00s/it]

{'loss': 0.2147, 'learning_rate': 9.813432835820897e-06, 'epoch': 0.07}


  2%|▏         | 50/2144 [00:50<34:51,  1.00it/s]

{'loss': 0.0916, 'learning_rate': 9.76679104477612e-06, 'epoch': 0.09}


  3%|▎         | 60/2144 [01:00<34:48,  1.00s/it]

{'loss': 0.1033, 'learning_rate': 9.720149253731343e-06, 'epoch': 0.11}


  3%|▎         | 70/2144 [01:10<34:28,  1.00it/s]

{'loss': 0.1947, 'learning_rate': 9.673507462686568e-06, 'epoch': 0.13}


  4%|▎         | 80/2144 [01:20<34:16,  1.00it/s]

{'loss': 0.0019, 'learning_rate': 9.626865671641792e-06, 'epoch': 0.15}


  4%|▍         | 90/2144 [01:30<34:13,  1.00it/s]

{'loss': 0.1084, 'learning_rate': 9.580223880597016e-06, 'epoch': 0.17}


  5%|▍         | 100/2144 [01:40<33:58,  1.00it/s]

{'loss': 0.2407, 'learning_rate': 9.533582089552239e-06, 'epoch': 0.19}


  5%|▌         | 110/2144 [01:50<33:51,  1.00it/s]

{'loss': 0.0491, 'learning_rate': 9.486940298507463e-06, 'epoch': 0.21}


  6%|▌         | 120/2144 [02:00<33:43,  1.00it/s]

{'loss': 0.0376, 'learning_rate': 9.440298507462688e-06, 'epoch': 0.22}


  6%|▌         | 130/2144 [02:10<33:31,  1.00it/s]

{'loss': 0.2691, 'learning_rate': 9.393656716417911e-06, 'epoch': 0.24}


  7%|▋         | 140/2144 [02:20<33:20,  1.00it/s]

{'loss': 0.1076, 'learning_rate': 9.347014925373134e-06, 'epoch': 0.26}


  7%|▋         | 150/2144 [02:30<33:20,  1.00s/it]

{'loss': 0.0062, 'learning_rate': 9.30037313432836e-06, 'epoch': 0.28}


  7%|▋         | 160/2144 [02:40<33:08,  1.00s/it]

{'loss': 0.0165, 'learning_rate': 9.253731343283582e-06, 'epoch': 0.3}


  8%|▊         | 170/2144 [02:50<33:02,  1.00s/it]

{'loss': 0.0454, 'learning_rate': 9.207089552238807e-06, 'epoch': 0.32}


  8%|▊         | 180/2144 [03:00<32:40,  1.00it/s]

{'loss': 0.147, 'learning_rate': 9.16044776119403e-06, 'epoch': 0.34}


  9%|▉         | 190/2144 [03:10<32:36,  1.00s/it]

{'loss': 0.1383, 'learning_rate': 9.113805970149255e-06, 'epoch': 0.35}


  9%|▉         | 200/2144 [03:20<32:29,  1.00s/it]

{'loss': 0.0341, 'learning_rate': 9.067164179104478e-06, 'epoch': 0.37}


 10%|▉         | 210/2144 [03:30<32:24,  1.01s/it]

{'loss': 0.2944, 'learning_rate': 9.020522388059703e-06, 'epoch': 0.39}


 10%|█         | 220/2144 [03:40<32:05,  1.00s/it]

{'loss': 0.0037, 'learning_rate': 8.973880597014926e-06, 'epoch': 0.41}


 11%|█         | 230/2144 [03:51<32:01,  1.00s/it]

{'loss': 0.0792, 'learning_rate': 8.927238805970149e-06, 'epoch': 0.43}


 11%|█         | 240/2144 [04:01<31:43,  1.00it/s]

{'loss': 0.0065, 'learning_rate': 8.880597014925374e-06, 'epoch': 0.45}


 12%|█▏        | 250/2144 [04:11<31:37,  1.00s/it]

{'loss': 0.0845, 'learning_rate': 8.833955223880599e-06, 'epoch': 0.47}


 12%|█▏        | 260/2144 [04:21<31:34,  1.01s/it]

{'loss': 0.1758, 'learning_rate': 8.787313432835822e-06, 'epoch': 0.49}


 13%|█▎        | 270/2144 [04:31<31:07,  1.00it/s]

{'loss': 0.198, 'learning_rate': 8.740671641791045e-06, 'epoch': 0.5}


 13%|█▎        | 280/2144 [04:41<31:05,  1.00s/it]

{'loss': 0.1151, 'learning_rate': 8.69402985074627e-06, 'epoch': 0.52}


 14%|█▎        | 290/2144 [04:51<30:39,  1.01it/s]

{'loss': 0.0866, 'learning_rate': 8.647388059701494e-06, 'epoch': 0.54}


 14%|█▍        | 300/2144 [05:01<30:40,  1.00it/s]

{'loss': 0.0007, 'learning_rate': 8.600746268656716e-06, 'epoch': 0.56}


 14%|█▍        | 310/2144 [05:11<30:37,  1.00s/it]

{'loss': 0.0263, 'learning_rate': 8.55410447761194e-06, 'epoch': 0.58}


 15%|█▍        | 320/2144 [05:21<30:29,  1.00s/it]

{'loss': 0.1175, 'learning_rate': 8.507462686567165e-06, 'epoch': 0.6}


 15%|█▌        | 330/2144 [05:31<30:28,  1.01s/it]

{'loss': 0.0166, 'learning_rate': 8.460820895522389e-06, 'epoch': 0.62}


 16%|█▌        | 340/2144 [05:41<30:11,  1.00s/it]

{'loss': 0.0005, 'learning_rate': 8.414179104477612e-06, 'epoch': 0.63}


 16%|█▋        | 350/2144 [05:51<29:58,  1.00s/it]

{'loss': 0.1858, 'learning_rate': 8.367537313432836e-06, 'epoch': 0.65}


 17%|█▋        | 360/2144 [06:01<29:51,  1.00s/it]

{'loss': 0.065, 'learning_rate': 8.320895522388061e-06, 'epoch': 0.67}


 17%|█▋        | 370/2144 [06:11<29:45,  1.01s/it]

{'loss': 0.0858, 'learning_rate': 8.274253731343284e-06, 'epoch': 0.69}


 18%|█▊        | 380/2144 [06:21<29:29,  1.00s/it]

{'loss': 0.0215, 'learning_rate': 8.227611940298507e-06, 'epoch': 0.71}


 18%|█▊        | 390/2144 [06:31<29:18,  1.00s/it]

{'loss': 0.2047, 'learning_rate': 8.180970149253732e-06, 'epoch': 0.73}


 19%|█▊        | 400/2144 [06:41<29:19,  1.01s/it]

{'loss': 0.0976, 'learning_rate': 8.134328358208955e-06, 'epoch': 0.75}


 19%|█▉        | 410/2144 [06:51<29:08,  1.01s/it]

{'loss': 0.0059, 'learning_rate': 8.08768656716418e-06, 'epoch': 0.76}


 20%|█▉        | 420/2144 [07:01<28:49,  1.00s/it]

{'loss': 0.0178, 'learning_rate': 8.041044776119403e-06, 'epoch': 0.78}


 20%|██        | 430/2144 [07:11<28:39,  1.00s/it]

{'loss': 0.1806, 'learning_rate': 7.994402985074628e-06, 'epoch': 0.8}


 21%|██        | 440/2144 [07:21<28:32,  1.00s/it]

{'loss': 0.1575, 'learning_rate': 7.947761194029851e-06, 'epoch': 0.82}


 21%|██        | 450/2144 [07:31<28:11,  1.00it/s]

{'loss': 0.1508, 'learning_rate': 7.901119402985076e-06, 'epoch': 0.84}


 21%|██▏       | 460/2144 [07:41<28:07,  1.00s/it]

{'loss': 0.1028, 'learning_rate': 7.854477611940299e-06, 'epoch': 0.86}


 22%|██▏       | 470/2144 [07:51<28:02,  1.01s/it]

{'loss': 0.1505, 'learning_rate': 7.807835820895522e-06, 'epoch': 0.88}


 22%|██▏       | 480/2144 [08:01<27:52,  1.00s/it]

{'loss': 0.2551, 'learning_rate': 7.761194029850747e-06, 'epoch': 0.9}


 23%|██▎       | 490/2144 [08:11<27:45,  1.01s/it]

{'loss': 0.0663, 'learning_rate': 7.714552238805972e-06, 'epoch': 0.91}


 23%|██▎       | 500/2144 [08:21<27:24,  1.00s/it]

{'loss': 0.1878, 'learning_rate': 7.667910447761195e-06, 'epoch': 0.93}


 24%|██▍       | 510/2144 [08:31<27:24,  1.01s/it]

{'loss': 0.0835, 'learning_rate': 7.621268656716419e-06, 'epoch': 0.95}


 24%|██▍       | 520/2144 [08:41<27:17,  1.01s/it]

{'loss': 0.1543, 'learning_rate': 7.574626865671643e-06, 'epoch': 0.97}


 25%|██▍       | 530/2144 [08:51<27:02,  1.01s/it]

{'loss': 0.0899, 'learning_rate': 7.527985074626867e-06, 'epoch': 0.99}


                                                  
 25%|██▌       | 536/2144 [09:26<26:01,  1.03it/s]

{'eval_loss': 2.099938154220581, 'eval_accuracy': 0.6881118881118881, 'eval_precision': 0.7065965733876833, 'eval_recall': 0.6881118881118881, 'eval_f1': 0.6924628021152268, 'eval_runtime': 28.4055, 'eval_samples_per_second': 25.171, 'eval_steps_per_second': 3.168, 'epoch': 1.0}


 25%|██▌       | 540/2144 [09:36<2:02:48,  4.59s/it]

{'loss': 0.0346, 'learning_rate': 7.48134328358209e-06, 'epoch': 1.01}


 26%|██▌       | 550/2144 [09:46<29:23,  1.11s/it]  

{'loss': 0.2368, 'learning_rate': 7.434701492537314e-06, 'epoch': 1.03}


 26%|██▌       | 560/2144 [09:56<26:26,  1.00s/it]

{'loss': 0.0131, 'learning_rate': 7.3880597014925385e-06, 'epoch': 1.04}


 27%|██▋       | 570/2144 [10:06<26:08,  1.00it/s]

{'loss': 0.0007, 'learning_rate': 7.3414179104477625e-06, 'epoch': 1.06}


 27%|██▋       | 580/2144 [10:16<26:03,  1.00it/s]

{'loss': 0.1597, 'learning_rate': 7.2947761194029856e-06, 'epoch': 1.08}


 28%|██▊       | 590/2144 [10:26<25:55,  1.00s/it]

{'loss': 0.0918, 'learning_rate': 7.2481343283582095e-06, 'epoch': 1.1}


 28%|██▊       | 600/2144 [10:36<25:40,  1.00it/s]

{'loss': 0.251, 'learning_rate': 7.2014925373134335e-06, 'epoch': 1.12}


 28%|██▊       | 610/2144 [10:46<25:33,  1.00it/s]

{'loss': 0.014, 'learning_rate': 7.154850746268658e-06, 'epoch': 1.14}


 29%|██▉       | 620/2144 [10:56<25:22,  1.00it/s]

{'loss': 0.0747, 'learning_rate': 7.1082089552238805e-06, 'epoch': 1.16}


 29%|██▉       | 630/2144 [11:06<25:44,  1.02s/it]

{'loss': 0.02, 'learning_rate': 7.061567164179105e-06, 'epoch': 1.18}


 30%|██▉       | 640/2144 [11:17<25:11,  1.01s/it]

{'loss': 0.2094, 'learning_rate': 7.014925373134329e-06, 'epoch': 1.19}


 30%|███       | 650/2144 [11:27<25:00,  1.00s/it]

{'loss': 0.1157, 'learning_rate': 6.968283582089553e-06, 'epoch': 1.21}


 31%|███       | 660/2144 [11:37<24:35,  1.01it/s]

{'loss': 0.0628, 'learning_rate': 6.921641791044776e-06, 'epoch': 1.23}


 31%|███▏      | 670/2144 [11:46<24:19,  1.01it/s]

{'loss': 0.0732, 'learning_rate': 6.875e-06, 'epoch': 1.25}


 32%|███▏      | 680/2144 [11:56<24:12,  1.01it/s]

{'loss': 0.001, 'learning_rate': 6.828358208955225e-06, 'epoch': 1.27}


 32%|███▏      | 690/2144 [12:06<24:02,  1.01it/s]

{'loss': 0.0005, 'learning_rate': 6.781716417910448e-06, 'epoch': 1.29}


 33%|███▎      | 700/2144 [12:16<23:54,  1.01it/s]

{'loss': 0.0889, 'learning_rate': 6.735074626865672e-06, 'epoch': 1.31}


 33%|███▎      | 710/2144 [12:26<23:50,  1.00it/s]

{'loss': 0.0373, 'learning_rate': 6.688432835820896e-06, 'epoch': 1.32}


 34%|███▎      | 720/2144 [12:36<23:33,  1.01it/s]

{'loss': 0.0842, 'learning_rate': 6.64179104477612e-06, 'epoch': 1.34}


 34%|███▍      | 730/2144 [12:46<23:26,  1.01it/s]

{'loss': 0.0847, 'learning_rate': 6.595149253731343e-06, 'epoch': 1.36}


 35%|███▍      | 740/2144 [12:56<23:19,  1.00it/s]

{'loss': 0.1094, 'learning_rate': 6.548507462686567e-06, 'epoch': 1.38}


 35%|███▍      | 750/2144 [13:06<23:10,  1.00it/s]

{'loss': 0.1228, 'learning_rate': 6.501865671641792e-06, 'epoch': 1.4}


 35%|███▌      | 760/2144 [13:16<22:53,  1.01it/s]

{'loss': 0.0005, 'learning_rate': 6.455223880597016e-06, 'epoch': 1.42}


 36%|███▌      | 770/2144 [13:26<22:53,  1.00it/s]

{'loss': 0.1686, 'learning_rate': 6.408582089552239e-06, 'epoch': 1.44}


 36%|███▋      | 780/2144 [13:36<22:38,  1.00it/s]

{'loss': 0.0459, 'learning_rate': 6.361940298507463e-06, 'epoch': 1.46}


 37%|███▋      | 790/2144 [13:46<22:27,  1.00it/s]

{'loss': 0.0458, 'learning_rate': 6.315298507462687e-06, 'epoch': 1.47}


 37%|███▋      | 800/2144 [13:56<22:12,  1.01it/s]

{'loss': 0.0927, 'learning_rate': 6.2686567164179116e-06, 'epoch': 1.49}


 38%|███▊      | 810/2144 [14:06<22:10,  1.00it/s]

{'loss': 0.0311, 'learning_rate': 6.222014925373135e-06, 'epoch': 1.51}


 38%|███▊      | 820/2144 [14:16<22:07,  1.00s/it]

{'loss': 0.0669, 'learning_rate': 6.175373134328359e-06, 'epoch': 1.53}


 39%|███▊      | 830/2144 [14:26<21:49,  1.00it/s]

{'loss': 0.0461, 'learning_rate': 6.1287313432835826e-06, 'epoch': 1.55}


 39%|███▉      | 840/2144 [14:35<21:39,  1.00it/s]

{'loss': 0.0332, 'learning_rate': 6.0820895522388065e-06, 'epoch': 1.57}


 40%|███▉      | 850/2144 [14:45<21:29,  1.00it/s]

{'loss': 0.005, 'learning_rate': 6.03544776119403e-06, 'epoch': 1.59}


 40%|████      | 860/2144 [14:55<21:23,  1.00it/s]

{'loss': 0.1266, 'learning_rate': 5.988805970149254e-06, 'epoch': 1.6}


 41%|████      | 870/2144 [15:05<20:58,  1.01it/s]

{'loss': 0.037, 'learning_rate': 5.942164179104478e-06, 'epoch': 1.62}


 41%|████      | 880/2144 [15:15<20:54,  1.01it/s]

{'loss': 0.0964, 'learning_rate': 5.895522388059702e-06, 'epoch': 1.64}


 42%|████▏     | 890/2144 [15:25<20:56,  1.00s/it]

{'loss': 0.001, 'learning_rate': 5.848880597014925e-06, 'epoch': 1.66}


 42%|████▏     | 900/2144 [15:35<20:38,  1.00it/s]

{'loss': 0.1055, 'learning_rate': 5.802238805970149e-06, 'epoch': 1.68}


 42%|████▏     | 910/2144 [15:45<20:36,  1.00s/it]

{'loss': 0.0012, 'learning_rate': 5.755597014925373e-06, 'epoch': 1.7}


 43%|████▎     | 920/2144 [15:55<20:13,  1.01it/s]

{'loss': 0.1197, 'learning_rate': 5.708955223880598e-06, 'epoch': 1.72}


 43%|████▎     | 930/2144 [16:05<20:10,  1.00it/s]

{'loss': 0.1269, 'learning_rate': 5.662313432835821e-06, 'epoch': 1.74}


 44%|████▍     | 940/2144 [16:15<19:59,  1.00it/s]

{'loss': 0.0006, 'learning_rate': 5.615671641791045e-06, 'epoch': 1.75}


 44%|████▍     | 950/2144 [16:25<19:56,  1.00s/it]

{'loss': 0.1321, 'learning_rate': 5.569029850746269e-06, 'epoch': 1.77}


 45%|████▍     | 960/2144 [16:35<19:43,  1.00it/s]

{'loss': 0.0007, 'learning_rate': 5.522388059701493e-06, 'epoch': 1.79}


 45%|████▌     | 970/2144 [16:45<19:30,  1.00it/s]

{'loss': 0.2, 'learning_rate': 5.475746268656716e-06, 'epoch': 1.81}


 46%|████▌     | 980/2144 [16:55<19:11,  1.01it/s]

{'loss': 0.0475, 'learning_rate': 5.429104477611941e-06, 'epoch': 1.83}


 46%|████▌     | 990/2144 [17:05<19:13,  1.00it/s]

{'loss': 0.0006, 'learning_rate': 5.382462686567165e-06, 'epoch': 1.85}


 47%|████▋     | 1000/2144 [17:15<18:59,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 5.335820895522389e-06, 'epoch': 1.87}


 47%|████▋     | 1010/2144 [17:25<18:58,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 5.289179104477612e-06, 'epoch': 1.88}


 48%|████▊     | 1020/2144 [17:35<18:36,  1.01it/s]

{'loss': 0.0004, 'learning_rate': 5.242537313432836e-06, 'epoch': 1.9}


 48%|████▊     | 1030/2144 [17:45<18:31,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 5.195895522388061e-06, 'epoch': 1.92}


 49%|████▊     | 1040/2144 [17:55<18:26,  1.00s/it]

{'loss': 0.0671, 'learning_rate': 5.149253731343285e-06, 'epoch': 1.94}


 49%|████▉     | 1050/2144 [18:05<18:13,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 5.102611940298508e-06, 'epoch': 1.96}


 49%|████▉     | 1060/2144 [18:15<17:57,  1.01it/s]

{'loss': 0.0004, 'learning_rate': 5.055970149253732e-06, 'epoch': 1.98}


 50%|████▉     | 1070/2144 [18:25<17:57,  1.00s/it]

{'loss': 0.0536, 'learning_rate': 5.009328358208956e-06, 'epoch': 2.0}


                                                   
 50%|█████     | 1072/2144 [18:56<17:17,  1.03it/s]

{'eval_loss': 2.268864154815674, 'eval_accuracy': 0.7118881118881119, 'eval_precision': 0.709556816639396, 'eval_recall': 0.7118881118881119, 'eval_f1': 0.7097412246844065, 'eval_runtime': 29.6479, 'eval_samples_per_second': 24.116, 'eval_steps_per_second': 3.036, 'epoch': 2.0}


 50%|█████     | 1080/2144 [19:11<33:41,  1.90s/it]  

{'loss': 0.0401, 'learning_rate': 4.9626865671641796e-06, 'epoch': 2.01}


 51%|█████     | 1090/2144 [19:21<18:04,  1.03s/it]

{'loss': 0.0131, 'learning_rate': 4.9160447761194035e-06, 'epoch': 2.03}


 51%|█████▏    | 1100/2144 [19:31<17:24,  1.00s/it]

{'loss': 0.132, 'learning_rate': 4.8694029850746275e-06, 'epoch': 2.05}


 52%|█████▏    | 1110/2144 [19:41<17:13,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 4.822761194029851e-06, 'epoch': 2.07}


 52%|█████▏    | 1120/2144 [19:51<17:03,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 4.7761194029850745e-06, 'epoch': 2.09}


 53%|█████▎    | 1130/2144 [20:01<16:36,  1.02it/s]

{'loss': 0.0003, 'learning_rate': 4.729477611940299e-06, 'epoch': 2.11}


 53%|█████▎    | 1140/2144 [20:11<16:38,  1.01it/s]

{'loss': 0.1694, 'learning_rate': 4.682835820895522e-06, 'epoch': 2.13}


 54%|█████▎    | 1150/2144 [20:21<16:28,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 4.636194029850747e-06, 'epoch': 2.15}


 54%|█████▍    | 1160/2144 [20:31<16:17,  1.01it/s]

{'loss': 0.0351, 'learning_rate': 4.58955223880597e-06, 'epoch': 2.16}


 55%|█████▍    | 1170/2144 [20:41<16:19,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 4.542910447761194e-06, 'epoch': 2.18}


 55%|█████▌    | 1180/2144 [20:51<16:04,  1.00s/it]

{'loss': 0.2364, 'learning_rate': 4.496268656716418e-06, 'epoch': 2.2}


 56%|█████▌    | 1190/2144 [21:01<15:49,  1.00it/s]

{'loss': 0.0669, 'learning_rate': 4.449626865671642e-06, 'epoch': 2.22}


 56%|█████▌    | 1200/2144 [21:11<15:44,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 4.402985074626866e-06, 'epoch': 2.24}


 56%|█████▋    | 1210/2144 [21:21<15:35,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 4.35634328358209e-06, 'epoch': 2.26}


 57%|█████▋    | 1220/2144 [21:31<15:23,  1.00it/s]

{'loss': 0.0007, 'learning_rate': 4.309701492537314e-06, 'epoch': 2.28}


 57%|█████▋    | 1230/2144 [21:41<15:11,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 4.263059701492538e-06, 'epoch': 2.29}


 58%|█████▊    | 1240/2144 [21:51<15:01,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 4.216417910447762e-06, 'epoch': 2.31}


 58%|█████▊    | 1250/2144 [22:01<14:55,  1.00s/it]

{'loss': 0.0839, 'learning_rate': 4.169776119402986e-06, 'epoch': 2.33}


 59%|█████▉    | 1260/2144 [22:11<14:44,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.123134328358209e-06, 'epoch': 2.35}


 59%|█████▉    | 1270/2144 [22:21<14:29,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 4.076492537313434e-06, 'epoch': 2.37}


 60%|█████▉    | 1280/2144 [22:30<14:19,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 4.029850746268657e-06, 'epoch': 2.39}


 60%|██████    | 1290/2144 [22:40<14:10,  1.00it/s]

{'loss': 0.0006, 'learning_rate': 3.983208955223881e-06, 'epoch': 2.41}


 61%|██████    | 1300/2144 [22:50<13:57,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 3.936567164179105e-06, 'epoch': 2.43}


 61%|██████    | 1310/2144 [23:00<13:44,  1.01it/s]

{'loss': 0.0175, 'learning_rate': 3.889925373134329e-06, 'epoch': 2.44}


 62%|██████▏   | 1320/2144 [23:10<13:36,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 3.843283582089553e-06, 'epoch': 2.46}


 62%|██████▏   | 1330/2144 [23:20<13:25,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 3.7966417910447766e-06, 'epoch': 2.48}


 62%|██████▎   | 1340/2144 [23:30<13:23,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 3.7500000000000005e-06, 'epoch': 2.5}


 63%|██████▎   | 1350/2144 [23:40<13:06,  1.01it/s]

{'loss': 0.0003, 'learning_rate': 3.703358208955224e-06, 'epoch': 2.52}


 63%|██████▎   | 1360/2144 [23:50<12:58,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 3.656716417910448e-06, 'epoch': 2.54}


 64%|██████▍   | 1370/2144 [24:00<12:49,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 3.6100746268656715e-06, 'epoch': 2.56}


 64%|██████▍   | 1380/2144 [24:10<12:37,  1.01it/s]

{'loss': 0.0003, 'learning_rate': 3.563432835820896e-06, 'epoch': 2.57}


 65%|██████▍   | 1390/2144 [24:20<12:31,  1.00it/s]

{'loss': 0.0747, 'learning_rate': 3.5167910447761194e-06, 'epoch': 2.59}


 65%|██████▌   | 1400/2144 [24:30<12:19,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 3.4701492537313438e-06, 'epoch': 2.61}


 66%|██████▌   | 1410/2144 [24:39<12:10,  1.00it/s]

{'loss': 0.0887, 'learning_rate': 3.4235074626865673e-06, 'epoch': 2.63}


 66%|██████▌   | 1420/2144 [24:49<12:01,  1.00it/s]

{'loss': 0.2421, 'learning_rate': 3.3768656716417913e-06, 'epoch': 2.65}


 67%|██████▋   | 1430/2144 [24:59<11:50,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 3.3302238805970148e-06, 'epoch': 2.67}


 67%|██████▋   | 1440/2144 [25:09<11:40,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 3.283582089552239e-06, 'epoch': 2.69}


 68%|██████▊   | 1450/2144 [25:19<11:33,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 3.2369402985074627e-06, 'epoch': 2.71}


 68%|██████▊   | 1460/2144 [25:29<11:16,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 3.190298507462687e-06, 'epoch': 2.72}


 69%|██████▊   | 1470/2144 [25:39<11:10,  1.01it/s]

{'loss': 0.0687, 'learning_rate': 3.1436567164179106e-06, 'epoch': 2.74}


 69%|██████▉   | 1480/2144 [25:49<11:01,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 3.0970149253731345e-06, 'epoch': 2.76}


 69%|██████▉   | 1490/2144 [25:59<10:52,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 3.050373134328358e-06, 'epoch': 2.78}


 70%|██████▉   | 1500/2144 [26:09<10:36,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 3.0037313432835824e-06, 'epoch': 2.8}


 70%|███████   | 1510/2144 [26:19<10:35,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 2.957089552238806e-06, 'epoch': 2.82}


 71%|███████   | 1520/2144 [26:29<10:20,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 2.9104477611940303e-06, 'epoch': 2.84}


 71%|███████▏  | 1530/2144 [26:39<10:10,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 2.863805970149254e-06, 'epoch': 2.85}


 72%|███████▏  | 1540/2144 [26:49<10:00,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 2.8171641791044778e-06, 'epoch': 2.87}


 72%|███████▏  | 1550/2144 [26:59<09:49,  1.01it/s]

{'loss': 0.0307, 'learning_rate': 2.7705223880597017e-06, 'epoch': 2.89}


 73%|███████▎  | 1560/2144 [27:09<09:38,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 2.7238805970149257e-06, 'epoch': 2.91}


 73%|███████▎  | 1570/2144 [27:18<09:31,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 2.677238805970149e-06, 'epoch': 2.93}


 74%|███████▎  | 1580/2144 [27:28<09:20,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 2.6305970149253736e-06, 'epoch': 2.95}


 74%|███████▍  | 1590/2144 [27:38<09:06,  1.01it/s]

{'loss': 0.0012, 'learning_rate': 2.583955223880597e-06, 'epoch': 2.97}


 75%|███████▍  | 1600/2144 [27:48<09:01,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.537313432835821e-06, 'epoch': 2.99}


                                                   
 75%|███████▌  | 1608/2144 [28:27<08:36,  1.04it/s]

{'eval_loss': 2.4502718448638916, 'eval_accuracy': 0.7034965034965035, 'eval_precision': 0.7128986242330314, 'eval_recall': 0.7034965034965035, 'eval_f1': 0.7054131314069587, 'eval_runtime': 30.7709, 'eval_samples_per_second': 23.236, 'eval_steps_per_second': 2.925, 'epoch': 3.0}


 75%|███████▌  | 1610/2144 [28:35<1:18:02,  8.77s/it]

{'loss': 0.0002, 'learning_rate': 2.490671641791045e-06, 'epoch': 3.0}


 76%|███████▌  | 1620/2144 [28:45<10:39,  1.22s/it]  

{'loss': 0.0002, 'learning_rate': 2.444029850746269e-06, 'epoch': 3.02}


 76%|███████▌  | 1630/2144 [28:55<08:37,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.397388059701493e-06, 'epoch': 3.04}


 76%|███████▋  | 1640/2144 [29:05<08:23,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 2.350746268656717e-06, 'epoch': 3.06}


 77%|███████▋  | 1650/2144 [29:15<08:13,  1.00it/s]

{'loss': 0.0865, 'learning_rate': 2.3041044776119408e-06, 'epoch': 3.08}


 77%|███████▋  | 1660/2144 [29:25<08:03,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 2.2574626865671643e-06, 'epoch': 3.1}


 78%|███████▊  | 1670/2144 [29:35<07:54,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.2108208955223883e-06, 'epoch': 3.12}


 78%|███████▊  | 1680/2144 [29:45<07:41,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 2.1641791044776118e-06, 'epoch': 3.13}


 79%|███████▉  | 1690/2144 [29:55<07:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.1175373134328357e-06, 'epoch': 3.15}


 79%|███████▉  | 1700/2144 [30:05<07:21,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 2.0708955223880597e-06, 'epoch': 3.17}


 80%|███████▉  | 1710/2144 [30:15<07:11,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.0242537313432836e-06, 'epoch': 3.19}


 80%|████████  | 1720/2144 [30:25<07:02,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.9776119402985076e-06, 'epoch': 3.21}


 81%|████████  | 1730/2144 [30:35<06:54,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.9309701492537315e-06, 'epoch': 3.23}


 81%|████████  | 1740/2144 [30:45<06:45,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.8843283582089553e-06, 'epoch': 3.25}


 82%|████████▏ | 1750/2144 [30:55<06:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.8376865671641792e-06, 'epoch': 3.26}


 82%|████████▏ | 1760/2144 [31:05<06:23,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 1.791044776119403e-06, 'epoch': 3.28}


 83%|████████▎ | 1770/2144 [31:15<06:13,  1.00it/s]

{'loss': 0.023, 'learning_rate': 1.7444029850746269e-06, 'epoch': 3.3}


 83%|████████▎ | 1780/2144 [31:25<06:05,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 1.6977611940298508e-06, 'epoch': 3.32}


 83%|████████▎ | 1790/2144 [31:35<05:55,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.6511194029850746e-06, 'epoch': 3.34}


 84%|████████▍ | 1800/2144 [31:45<05:43,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.6044776119402985e-06, 'epoch': 3.36}


 84%|████████▍ | 1810/2144 [31:55<05:34,  1.00s/it]

{'loss': 0.101, 'learning_rate': 1.5578358208955225e-06, 'epoch': 3.38}


 85%|████████▍ | 1820/2144 [32:05<05:23,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 1.5111940298507464e-06, 'epoch': 3.4}


 85%|████████▌ | 1830/2144 [32:15<05:11,  1.01it/s]

{'loss': 0.1103, 'learning_rate': 1.4645522388059702e-06, 'epoch': 3.41}


 86%|████████▌ | 1840/2144 [32:25<05:02,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.417910447761194e-06, 'epoch': 3.43}


 86%|████████▋ | 1850/2144 [32:35<04:52,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.371268656716418e-06, 'epoch': 3.45}


 87%|████████▋ | 1860/2144 [32:45<04:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.3246268656716418e-06, 'epoch': 3.47}


 87%|████████▋ | 1870/2144 [32:55<04:34,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.2779850746268657e-06, 'epoch': 3.49}


 88%|████████▊ | 1880/2144 [33:05<04:24,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.2313432835820897e-06, 'epoch': 3.51}


 88%|████████▊ | 1890/2144 [33:15<04:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.1847014925373134e-06, 'epoch': 3.53}


 89%|████████▊ | 1900/2144 [33:25<04:02,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 1.1380597014925374e-06, 'epoch': 3.54}


 89%|████████▉ | 1910/2144 [33:35<03:55,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.0914179104477613e-06, 'epoch': 3.56}


 90%|████████▉ | 1920/2144 [33:45<03:44,  1.00s/it]

{'loss': 0.0022, 'learning_rate': 1.044776119402985e-06, 'epoch': 3.58}


 90%|█████████ | 1930/2144 [33:55<03:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 9.98134328358209e-07, 'epoch': 3.6}


 90%|█████████ | 1940/2144 [34:05<03:24,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 9.514925373134328e-07, 'epoch': 3.62}


 91%|█████████ | 1950/2144 [34:15<03:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 9.048507462686568e-07, 'epoch': 3.64}


 91%|█████████▏| 1960/2144 [34:25<03:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 8.582089552238806e-07, 'epoch': 3.66}


 92%|█████████▏| 1970/2144 [34:35<02:54,  1.00s/it]

{'loss': 0.0028, 'learning_rate': 8.115671641791046e-07, 'epoch': 3.68}


 92%|█████████▏| 1980/2144 [34:45<02:44,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 7.649253731343284e-07, 'epoch': 3.69}


 93%|█████████▎| 1990/2144 [34:55<02:34,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 7.182835820895523e-07, 'epoch': 3.71}


 93%|█████████▎| 2000/2144 [35:05<02:23,  1.00it/s]

{'loss': 0.0965, 'learning_rate': 6.716417910447762e-07, 'epoch': 3.73}


 94%|█████████▍| 2010/2144 [35:15<02:14,  1.00s/it]

{'loss': 0.0513, 'learning_rate': 6.25e-07, 'epoch': 3.75}


 94%|█████████▍| 2020/2144 [35:25<02:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 5.783582089552239e-07, 'epoch': 3.77}


 95%|█████████▍| 2030/2144 [35:35<01:54,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 5.317164179104478e-07, 'epoch': 3.79}


 95%|█████████▌| 2040/2144 [35:45<01:44,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 4.850746268656717e-07, 'epoch': 3.81}


 96%|█████████▌| 2050/2144 [35:55<01:34,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 4.384328358208956e-07, 'epoch': 3.82}


 96%|█████████▌| 2060/2144 [36:05<01:24,  1.00s/it]

{'loss': 0.002, 'learning_rate': 3.9179104477611947e-07, 'epoch': 3.84}


 97%|█████████▋| 2070/2144 [36:15<01:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.451492537313433e-07, 'epoch': 3.86}


 97%|█████████▋| 2080/2144 [36:25<01:03,  1.00it/s]

{'loss': 0.0526, 'learning_rate': 2.9850746268656716e-07, 'epoch': 3.88}


 97%|█████████▋| 2090/2144 [36:35<00:54,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 2.5186567164179105e-07, 'epoch': 3.9}


 98%|█████████▊| 2100/2144 [36:45<00:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.0522388059701495e-07, 'epoch': 3.92}


 98%|█████████▊| 2110/2144 [36:55<00:34,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.5858208955223882e-07, 'epoch': 3.94}


 99%|█████████▉| 2120/2144 [37:05<00:24,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.1194029850746268e-07, 'epoch': 3.96}


 99%|█████████▉| 2130/2144 [37:15<00:14,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 6.529850746268657e-08, 'epoch': 3.97}


100%|█████████▉| 2140/2144 [37:25<00:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.8656716417910447e-08, 'epoch': 3.99}


                                                   
100%|██████████| 2144/2144 [38:00<00:00,  1.04it/s]

{'eval_loss': 2.528275489807129, 'eval_accuracy': 0.7006993006993008, 'eval_precision': 0.7078247775661118, 'eval_recall': 0.7006993006993008, 'eval_f1': 0.7031992713059761, 'eval_runtime': 30.6818, 'eval_samples_per_second': 23.304, 'eval_steps_per_second': 2.933, 'epoch': 4.0}


100%|██████████| 2144/2144 [38:08<00:00,  1.07s/it]


{'train_runtime': 2288.6541, 'train_samples_per_second': 7.491, 'train_steps_per_second': 0.937, 'train_loss': 0.05212550815654208, 'epoch': 4.0}


100%|██████████| 90/90 [00:30<00:00,  2.99it/s]
100%|██████████| 90/90 [00:30<00:00,  2.92it/s]
100%|██████████| 90/90 [00:29<00:00,  3.01it/s]


{'accuracy': 0.6881118881118881, 'precision': 0.7065965733876833, 'recall': 0.6881118881118881, 'f1': 0.6924628021152268}
{'accuracy': 0.6820728291316527, 'precision': 0.6897928699987728, 'recall': 0.6820728291316527, 'f1': 0.6832864416028716}


  0%|          | 10/2144 [00:10<35:33,  1.00it/s] 

{'loss': 0.0067, 'learning_rate': 9.953358208955226e-06, 'epoch': 0.02}


  1%|          | 20/2144 [00:20<35:24,  1.00s/it]

{'loss': 0.0006, 'learning_rate': 9.906716417910449e-06, 'epoch': 0.04}


  1%|▏         | 30/2144 [00:30<35:06,  1.00it/s]

{'loss': 0.1682, 'learning_rate': 9.860074626865672e-06, 'epoch': 0.06}


  2%|▏         | 40/2144 [00:40<35:03,  1.00it/s]

{'loss': 0.1696, 'learning_rate': 9.813432835820897e-06, 'epoch': 0.07}


  2%|▏         | 50/2144 [00:50<34:49,  1.00it/s]

{'loss': 0.1889, 'learning_rate': 9.76679104477612e-06, 'epoch': 0.09}


  3%|▎         | 60/2144 [01:00<34:46,  1.00s/it]

{'loss': 0.0223, 'learning_rate': 9.720149253731343e-06, 'epoch': 0.11}


  3%|▎         | 70/2144 [01:10<34:32,  1.00it/s]

{'loss': 0.248, 'learning_rate': 9.673507462686568e-06, 'epoch': 0.13}


  4%|▎         | 80/2144 [01:20<34:22,  1.00it/s]

{'loss': 0.52, 'learning_rate': 9.626865671641792e-06, 'epoch': 0.15}


  4%|▍         | 90/2144 [01:30<34:20,  1.00s/it]

{'loss': 0.2492, 'learning_rate': 9.580223880597016e-06, 'epoch': 0.17}


  5%|▍         | 100/2144 [01:40<34:05,  1.00s/it]

{'loss': 0.1096, 'learning_rate': 9.533582089552239e-06, 'epoch': 0.19}


  5%|▌         | 110/2144 [01:50<33:57,  1.00s/it]

{'loss': 0.3341, 'learning_rate': 9.486940298507463e-06, 'epoch': 0.21}


  6%|▌         | 120/2144 [02:00<33:32,  1.01it/s]

{'loss': 0.1104, 'learning_rate': 9.440298507462688e-06, 'epoch': 0.22}


  6%|▌         | 130/2144 [02:10<33:36,  1.00s/it]

{'loss': 0.2977, 'learning_rate': 9.393656716417911e-06, 'epoch': 0.24}


  7%|▋         | 140/2144 [02:20<33:22,  1.00it/s]

{'loss': 0.2252, 'learning_rate': 9.347014925373134e-06, 'epoch': 0.26}


  7%|▋         | 150/2144 [02:30<33:16,  1.00s/it]

{'loss': 0.3246, 'learning_rate': 9.30037313432836e-06, 'epoch': 0.28}


  7%|▋         | 160/2144 [02:40<33:07,  1.00s/it]

{'loss': 0.1557, 'learning_rate': 9.253731343283582e-06, 'epoch': 0.3}


  8%|▊         | 170/2144 [02:50<32:57,  1.00s/it]

{'loss': 0.2505, 'learning_rate': 9.207089552238807e-06, 'epoch': 0.32}


  8%|▊         | 180/2144 [03:00<32:46,  1.00s/it]

{'loss': 0.1085, 'learning_rate': 9.16044776119403e-06, 'epoch': 0.34}


  9%|▉         | 190/2144 [03:10<32:38,  1.00s/it]

{'loss': 0.0567, 'learning_rate': 9.113805970149255e-06, 'epoch': 0.35}


  9%|▉         | 200/2144 [03:20<32:24,  1.00s/it]

{'loss': 0.0968, 'learning_rate': 9.067164179104478e-06, 'epoch': 0.37}


 10%|▉         | 210/2144 [03:30<32:22,  1.00s/it]

{'loss': 0.0005, 'learning_rate': 9.020522388059703e-06, 'epoch': 0.39}


 10%|█         | 220/2144 [03:40<32:04,  1.00s/it]

{'loss': 0.0111, 'learning_rate': 8.973880597014926e-06, 'epoch': 0.41}


 11%|█         | 230/2144 [03:50<31:56,  1.00s/it]

{'loss': 0.0989, 'learning_rate': 8.927238805970149e-06, 'epoch': 0.43}


 11%|█         | 240/2144 [04:00<31:49,  1.00s/it]

{'loss': 0.0049, 'learning_rate': 8.880597014925374e-06, 'epoch': 0.45}


 12%|█▏        | 250/2144 [04:10<31:40,  1.00s/it]

{'loss': 0.0893, 'learning_rate': 8.833955223880599e-06, 'epoch': 0.47}


 12%|█▏        | 260/2144 [04:20<31:14,  1.00it/s]

{'loss': 0.018, 'learning_rate': 8.787313432835822e-06, 'epoch': 0.49}


 13%|█▎        | 270/2144 [04:30<31:17,  1.00s/it]

{'loss': 0.0008, 'learning_rate': 8.740671641791045e-06, 'epoch': 0.5}


 13%|█▎        | 280/2144 [04:40<31:06,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 8.69402985074627e-06, 'epoch': 0.52}


 14%|█▎        | 290/2144 [04:50<30:58,  1.00s/it]

{'loss': 0.192, 'learning_rate': 8.647388059701494e-06, 'epoch': 0.54}


 14%|█▍        | 300/2144 [05:00<30:48,  1.00s/it]

{'loss': 0.0772, 'learning_rate': 8.600746268656716e-06, 'epoch': 0.56}


 14%|█▍        | 310/2144 [05:10<30:39,  1.00s/it]

{'loss': 0.0953, 'learning_rate': 8.55410447761194e-06, 'epoch': 0.58}


 15%|█▍        | 320/2144 [05:20<30:24,  1.00s/it]

{'loss': 0.0844, 'learning_rate': 8.507462686567165e-06, 'epoch': 0.6}


 15%|█▌        | 330/2144 [05:30<30:13,  1.00it/s]

{'loss': 0.0905, 'learning_rate': 8.460820895522389e-06, 'epoch': 0.62}


 16%|█▌        | 340/2144 [05:40<30:04,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 8.414179104477612e-06, 'epoch': 0.63}


 16%|█▋        | 350/2144 [05:50<29:53,  1.00it/s]

{'loss': 0.0954, 'learning_rate': 8.367537313432836e-06, 'epoch': 0.65}


 17%|█▋        | 360/2144 [06:00<29:41,  1.00it/s]

{'loss': 0.1668, 'learning_rate': 8.320895522388061e-06, 'epoch': 0.67}


 17%|█▋        | 370/2144 [06:10<29:31,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 8.274253731343284e-06, 'epoch': 0.69}


 18%|█▊        | 380/2144 [06:20<29:24,  1.00s/it]

{'loss': 0.0367, 'learning_rate': 8.227611940298507e-06, 'epoch': 0.71}


 18%|█▊        | 390/2144 [06:30<29:08,  1.00it/s]

{'loss': 0.0958, 'learning_rate': 8.180970149253732e-06, 'epoch': 0.73}


 19%|█▊        | 400/2144 [06:40<29:14,  1.01s/it]

{'loss': 0.012, 'learning_rate': 8.134328358208955e-06, 'epoch': 0.75}


 19%|█▉        | 410/2144 [06:50<28:48,  1.00it/s]

{'loss': 0.0914, 'learning_rate': 8.08768656716418e-06, 'epoch': 0.76}


 20%|█▉        | 420/2144 [07:00<28:42,  1.00it/s]

{'loss': 0.0016, 'learning_rate': 8.041044776119403e-06, 'epoch': 0.78}


 20%|██        | 430/2144 [07:10<28:37,  1.00s/it]

{'loss': 0.106, 'learning_rate': 7.994402985074628e-06, 'epoch': 0.8}


 21%|██        | 440/2144 [07:20<28:31,  1.00s/it]

{'loss': 0.1306, 'learning_rate': 7.947761194029851e-06, 'epoch': 0.82}


 21%|██        | 450/2144 [07:30<28:03,  1.01it/s]

{'loss': 0.1753, 'learning_rate': 7.901119402985076e-06, 'epoch': 0.84}


 21%|██▏       | 460/2144 [07:40<28:03,  1.00it/s]

{'loss': 0.0979, 'learning_rate': 7.854477611940299e-06, 'epoch': 0.86}


 22%|██▏       | 470/2144 [07:50<27:51,  1.00it/s]

{'loss': 0.0475, 'learning_rate': 7.807835820895522e-06, 'epoch': 0.88}


 22%|██▏       | 480/2144 [08:00<27:49,  1.00s/it]

{'loss': 0.3453, 'learning_rate': 7.761194029850747e-06, 'epoch': 0.9}


 23%|██▎       | 490/2144 [08:10<27:43,  1.01s/it]

{'loss': 0.301, 'learning_rate': 7.714552238805972e-06, 'epoch': 0.91}


 23%|██▎       | 500/2144 [08:20<27:37,  1.01s/it]

{'loss': 0.028, 'learning_rate': 7.667910447761195e-06, 'epoch': 0.93}


 24%|██▍       | 510/2144 [08:31<27:21,  1.00s/it]

{'loss': 0.1127, 'learning_rate': 7.621268656716419e-06, 'epoch': 0.95}


 24%|██▍       | 520/2144 [08:41<27:09,  1.00s/it]

{'loss': 0.0404, 'learning_rate': 7.574626865671643e-06, 'epoch': 0.97}


 25%|██▍       | 530/2144 [08:51<26:59,  1.00s/it]

{'loss': 0.1592, 'learning_rate': 7.527985074626867e-06, 'epoch': 0.99}


                                                  
 25%|██▌       | 536/2144 [09:26<25:40,  1.04it/s]

{'eval_loss': 2.190413236618042, 'eval_accuracy': 0.7076923076923077, 'eval_precision': 0.71582963918972, 'eval_recall': 0.7076923076923077, 'eval_f1': 0.7103336891983634, 'eval_runtime': 30.0036, 'eval_samples_per_second': 23.83, 'eval_steps_per_second': 3.0, 'epoch': 1.0}


 25%|██▌       | 540/2144 [09:37<2:06:56,  4.75s/it]

{'loss': 0.2679, 'learning_rate': 7.48134328358209e-06, 'epoch': 1.01}


 26%|██▌       | 550/2144 [09:47<29:25,  1.11s/it]  

{'loss': 0.0486, 'learning_rate': 7.434701492537314e-06, 'epoch': 1.03}


 26%|██▌       | 560/2144 [09:57<26:25,  1.00s/it]

{'loss': 0.0005, 'learning_rate': 7.3880597014925385e-06, 'epoch': 1.04}


 27%|██▋       | 570/2144 [10:07<26:11,  1.00it/s]

{'loss': 0.0723, 'learning_rate': 7.3414179104477625e-06, 'epoch': 1.06}


 27%|██▋       | 580/2144 [10:17<25:57,  1.00it/s]

{'loss': 0.0014, 'learning_rate': 7.2947761194029856e-06, 'epoch': 1.08}


 28%|██▊       | 590/2144 [10:27<25:56,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 7.2481343283582095e-06, 'epoch': 1.1}


 28%|██▊       | 600/2144 [10:37<25:43,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 7.2014925373134335e-06, 'epoch': 1.12}


 28%|██▊       | 610/2144 [10:47<25:35,  1.00s/it]

{'loss': 0.0031, 'learning_rate': 7.154850746268658e-06, 'epoch': 1.14}


 29%|██▉       | 620/2144 [10:57<25:17,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 7.1082089552238805e-06, 'epoch': 1.16}


 29%|██▉       | 630/2144 [11:07<25:05,  1.01it/s]

{'loss': 0.0055, 'learning_rate': 7.061567164179105e-06, 'epoch': 1.18}


 30%|██▉       | 640/2144 [11:17<25:02,  1.00it/s]

{'loss': 0.0021, 'learning_rate': 7.014925373134329e-06, 'epoch': 1.19}


 30%|███       | 650/2144 [11:27<24:54,  1.00s/it]

{'loss': 0.0208, 'learning_rate': 6.968283582089553e-06, 'epoch': 1.21}


 31%|███       | 660/2144 [11:37<24:46,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 6.921641791044776e-06, 'epoch': 1.23}


 31%|███▏      | 670/2144 [11:47<24:34,  1.00s/it]

{'loss': 0.0006, 'learning_rate': 6.875e-06, 'epoch': 1.25}


 32%|███▏      | 680/2144 [11:57<24:17,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 6.828358208955225e-06, 'epoch': 1.27}


 32%|███▏      | 690/2144 [12:07<24:10,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 6.781716417910448e-06, 'epoch': 1.29}


 33%|███▎      | 700/2144 [12:17<24:00,  1.00it/s]

{'loss': 0.116, 'learning_rate': 6.735074626865672e-06, 'epoch': 1.31}


 33%|███▎      | 710/2144 [12:27<23:57,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 6.688432835820896e-06, 'epoch': 1.32}


 34%|███▎      | 720/2144 [12:37<23:42,  1.00it/s]

{'loss': 0.0933, 'learning_rate': 6.64179104477612e-06, 'epoch': 1.34}


 34%|███▍      | 730/2144 [12:47<23:35,  1.00s/it]

{'loss': 0.0132, 'learning_rate': 6.595149253731343e-06, 'epoch': 1.36}


 35%|███▍      | 740/2144 [12:57<23:24,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 6.548507462686567e-06, 'epoch': 1.38}


 35%|███▍      | 750/2144 [13:07<23:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 6.501865671641792e-06, 'epoch': 1.4}


 35%|███▌      | 760/2144 [13:17<23:03,  1.00it/s]

{'loss': 0.1876, 'learning_rate': 6.455223880597016e-06, 'epoch': 1.42}


 36%|███▌      | 770/2144 [13:27<22:41,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 6.408582089552239e-06, 'epoch': 1.44}


 36%|███▋      | 780/2144 [13:37<22:48,  1.00s/it]

{'loss': 0.1028, 'learning_rate': 6.361940298507463e-06, 'epoch': 1.46}


 37%|███▋      | 790/2144 [13:47<22:34,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 6.315298507462687e-06, 'epoch': 1.47}


 37%|███▋      | 800/2144 [13:57<22:24,  1.00s/it]

{'loss': 0.0753, 'learning_rate': 6.2686567164179116e-06, 'epoch': 1.49}


 38%|███▊      | 810/2144 [14:07<22:12,  1.00it/s]

{'loss': 0.2015, 'learning_rate': 6.222014925373135e-06, 'epoch': 1.51}


 38%|███▊      | 820/2144 [14:17<21:59,  1.00it/s]

{'loss': 0.0787, 'learning_rate': 6.175373134328359e-06, 'epoch': 1.53}


 39%|███▊      | 830/2144 [14:27<22:00,  1.00s/it]

{'loss': 0.0044, 'learning_rate': 6.1287313432835826e-06, 'epoch': 1.55}


 39%|███▉      | 840/2144 [14:37<21:46,  1.00s/it]

{'loss': 0.1196, 'learning_rate': 6.0820895522388065e-06, 'epoch': 1.57}


 40%|███▉      | 850/2144 [14:47<21:35,  1.00s/it]

{'loss': 0.0609, 'learning_rate': 6.03544776119403e-06, 'epoch': 1.59}


 40%|████      | 860/2144 [14:57<21:25,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 5.988805970149254e-06, 'epoch': 1.6}


 41%|████      | 870/2144 [15:07<21:18,  1.00s/it]

{'loss': 0.1077, 'learning_rate': 5.942164179104478e-06, 'epoch': 1.62}


 41%|████      | 880/2144 [15:17<21:12,  1.01s/it]

{'loss': 0.1138, 'learning_rate': 5.895522388059702e-06, 'epoch': 1.64}


 42%|████▏     | 890/2144 [15:27<21:02,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 5.848880597014925e-06, 'epoch': 1.66}


 42%|████▏     | 900/2144 [15:37<20:43,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 5.802238805970149e-06, 'epoch': 1.68}


 42%|████▏     | 910/2144 [15:47<20:33,  1.00it/s]

{'loss': 0.1311, 'learning_rate': 5.755597014925373e-06, 'epoch': 1.7}


 43%|████▎     | 920/2144 [15:57<20:26,  1.00s/it]

{'loss': 0.1124, 'learning_rate': 5.708955223880598e-06, 'epoch': 1.72}


 43%|████▎     | 930/2144 [16:07<20:17,  1.00s/it]

{'loss': 0.0091, 'learning_rate': 5.662313432835821e-06, 'epoch': 1.74}


 44%|████▍     | 940/2144 [16:17<20:00,  1.00it/s]

{'loss': 0.0032, 'learning_rate': 5.615671641791045e-06, 'epoch': 1.75}


 44%|████▍     | 950/2144 [16:27<19:54,  1.00s/it]

{'loss': 0.0562, 'learning_rate': 5.569029850746269e-06, 'epoch': 1.77}


 45%|████▍     | 960/2144 [16:37<19:44,  1.00s/it]

{'loss': 0.0549, 'learning_rate': 5.522388059701493e-06, 'epoch': 1.79}


 45%|████▌     | 970/2144 [16:47<19:39,  1.00s/it]

{'loss': 0.118, 'learning_rate': 5.475746268656716e-06, 'epoch': 1.81}


 46%|████▌     | 980/2144 [16:57<19:19,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 5.429104477611941e-06, 'epoch': 1.83}


 46%|████▌     | 990/2144 [17:07<19:12,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 5.382462686567165e-06, 'epoch': 1.85}


 47%|████▋     | 1000/2144 [17:17<19:00,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 5.335820895522389e-06, 'epoch': 1.87}


 47%|████▋     | 1010/2144 [17:26<18:48,  1.01it/s]

{'loss': 0.2197, 'learning_rate': 5.289179104477612e-06, 'epoch': 1.88}


 48%|████▊     | 1020/2144 [17:36<18:43,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 5.242537313432836e-06, 'epoch': 1.9}


 48%|████▊     | 1030/2144 [17:46<18:35,  1.00s/it]

{'loss': 0.0264, 'learning_rate': 5.195895522388061e-06, 'epoch': 1.92}


 49%|████▊     | 1040/2144 [17:56<18:23,  1.00it/s]

{'loss': 0.062, 'learning_rate': 5.149253731343285e-06, 'epoch': 1.94}


 49%|████▉     | 1050/2144 [18:06<18:20,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 5.102611940298508e-06, 'epoch': 1.96}


 49%|████▉     | 1060/2144 [18:16<17:58,  1.00it/s]

{'loss': 0.0033, 'learning_rate': 5.055970149253732e-06, 'epoch': 1.98}


 50%|████▉     | 1070/2144 [18:26<17:47,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 5.009328358208956e-06, 'epoch': 2.0}


                                                   
 50%|█████     | 1072/2144 [18:59<17:05,  1.05it/s]

{'eval_loss': 2.437784194946289, 'eval_accuracy': 0.7132867132867133, 'eval_precision': 0.7167707393463876, 'eval_recall': 0.7132867132867133, 'eval_f1': 0.7146133737728315, 'eval_runtime': 31.0833, 'eval_samples_per_second': 23.003, 'eval_steps_per_second': 2.895, 'epoch': 2.0}


 50%|█████     | 1080/2144 [19:14<34:07,  1.92s/it]  

{'loss': 0.0017, 'learning_rate': 4.9626865671641796e-06, 'epoch': 2.01}


 51%|█████     | 1090/2144 [19:24<18:04,  1.03s/it]

{'loss': 0.0915, 'learning_rate': 4.9160447761194035e-06, 'epoch': 2.03}


 51%|█████▏    | 1100/2144 [19:34<17:32,  1.01s/it]

{'loss': 0.0195, 'learning_rate': 4.8694029850746275e-06, 'epoch': 2.05}


 52%|█████▏    | 1110/2144 [19:44<17:13,  1.00it/s]

{'loss': 0.1228, 'learning_rate': 4.822761194029851e-06, 'epoch': 2.07}


 52%|█████▏    | 1120/2144 [19:54<17:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.7761194029850745e-06, 'epoch': 2.09}


 53%|█████▎    | 1130/2144 [20:04<16:58,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.729477611940299e-06, 'epoch': 2.11}


 53%|█████▎    | 1140/2144 [20:14<16:43,  1.00it/s]

{'loss': 0.0006, 'learning_rate': 4.682835820895522e-06, 'epoch': 2.13}


 54%|█████▎    | 1150/2144 [20:24<16:34,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.636194029850747e-06, 'epoch': 2.15}


 54%|█████▍    | 1160/2144 [20:34<16:27,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.58955223880597e-06, 'epoch': 2.16}


 55%|█████▍    | 1170/2144 [20:44<16:12,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 4.542910447761194e-06, 'epoch': 2.18}


 55%|█████▌    | 1180/2144 [20:54<15:56,  1.01it/s]

{'loss': 0.1164, 'learning_rate': 4.496268656716418e-06, 'epoch': 2.2}


 56%|█████▌    | 1190/2144 [21:04<15:53,  1.00it/s]

{'loss': 0.0006, 'learning_rate': 4.449626865671642e-06, 'epoch': 2.22}


 56%|█████▌    | 1200/2144 [21:14<15:45,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.402985074626866e-06, 'epoch': 2.24}


 56%|█████▋    | 1210/2144 [21:24<15:31,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 4.35634328358209e-06, 'epoch': 2.26}


 57%|█████▋    | 1220/2144 [21:34<15:24,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.309701492537314e-06, 'epoch': 2.28}


 57%|█████▋    | 1230/2144 [21:43<15:11,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 4.263059701492538e-06, 'epoch': 2.29}


 58%|█████▊    | 1240/2144 [21:53<15:03,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 4.216417910447762e-06, 'epoch': 2.31}


 58%|█████▊    | 1250/2144 [22:03<14:55,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.169776119402986e-06, 'epoch': 2.33}


 59%|█████▉    | 1260/2144 [22:13<14:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.123134328358209e-06, 'epoch': 2.35}


 59%|█████▉    | 1270/2144 [22:23<14:36,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.076492537313434e-06, 'epoch': 2.37}


 60%|█████▉    | 1280/2144 [22:33<14:25,  1.00s/it]

{'loss': 0.0159, 'learning_rate': 4.029850746268657e-06, 'epoch': 2.39}


 60%|██████    | 1290/2144 [22:43<14:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.983208955223881e-06, 'epoch': 2.41}


 61%|██████    | 1300/2144 [22:53<13:59,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 3.936567164179105e-06, 'epoch': 2.43}


 61%|██████    | 1310/2144 [23:03<13:50,  1.00it/s]

{'loss': 0.0024, 'learning_rate': 3.889925373134329e-06, 'epoch': 2.44}


 62%|██████▏   | 1320/2144 [23:13<13:38,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 3.843283582089553e-06, 'epoch': 2.46}


 62%|██████▏   | 1330/2144 [23:23<13:30,  1.00it/s]

{'loss': 0.0547, 'learning_rate': 3.7966417910447766e-06, 'epoch': 2.48}


 62%|██████▎   | 1340/2144 [23:33<13:26,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.7500000000000005e-06, 'epoch': 2.5}


 63%|██████▎   | 1350/2144 [23:43<13:10,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.703358208955224e-06, 'epoch': 2.52}


 63%|██████▎   | 1360/2144 [23:53<13:03,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 3.656716417910448e-06, 'epoch': 2.54}


 64%|██████▍   | 1370/2144 [24:03<12:51,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.6100746268656715e-06, 'epoch': 2.56}


 64%|██████▍   | 1380/2144 [24:13<12:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.563432835820896e-06, 'epoch': 2.57}


 65%|██████▍   | 1390/2144 [24:23<12:32,  1.00it/s]

{'loss': 0.0348, 'learning_rate': 3.5167910447761194e-06, 'epoch': 2.59}


 65%|██████▌   | 1400/2144 [24:33<12:27,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.4701492537313438e-06, 'epoch': 2.61}


 66%|██████▌   | 1410/2144 [24:43<12:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.4235074626865673e-06, 'epoch': 2.63}


 66%|██████▌   | 1420/2144 [24:53<12:05,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.3768656716417913e-06, 'epoch': 2.65}


 67%|██████▋   | 1430/2144 [25:03<11:53,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 3.3302238805970148e-06, 'epoch': 2.67}


 67%|██████▋   | 1440/2144 [25:13<11:42,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.283582089552239e-06, 'epoch': 2.69}


 68%|██████▊   | 1450/2144 [25:23<11:34,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.2369402985074627e-06, 'epoch': 2.71}


 68%|██████▊   | 1460/2144 [25:33<11:24,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.190298507462687e-06, 'epoch': 2.72}


 69%|██████▊   | 1470/2144 [25:43<11:11,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 3.1436567164179106e-06, 'epoch': 2.74}


 69%|██████▉   | 1480/2144 [25:53<11:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.0970149253731345e-06, 'epoch': 2.76}


 69%|██████▉   | 1490/2144 [26:03<10:49,  1.01it/s]

{'loss': 0.0044, 'learning_rate': 3.050373134328358e-06, 'epoch': 2.78}


 70%|██████▉   | 1500/2144 [26:13<10:41,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.0037313432835824e-06, 'epoch': 2.8}


 70%|███████   | 1510/2144 [26:23<10:29,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 2.957089552238806e-06, 'epoch': 2.82}


 71%|███████   | 1520/2144 [26:33<10:19,  1.01it/s]

{'loss': 0.1187, 'learning_rate': 2.9104477611940303e-06, 'epoch': 2.84}


 71%|███████▏  | 1530/2144 [26:43<10:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.863805970149254e-06, 'epoch': 2.85}


 72%|███████▏  | 1540/2144 [26:53<10:01,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.8171641791044778e-06, 'epoch': 2.87}


 72%|███████▏  | 1550/2144 [27:03<09:54,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.7705223880597017e-06, 'epoch': 2.89}


 73%|███████▎  | 1560/2144 [27:13<09:40,  1.01it/s]

{'loss': 0.0005, 'learning_rate': 2.7238805970149257e-06, 'epoch': 2.91}


 73%|███████▎  | 1570/2144 [27:23<09:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.677238805970149e-06, 'epoch': 2.93}


 74%|███████▎  | 1580/2144 [27:33<09:23,  1.00it/s]

{'loss': 0.0792, 'learning_rate': 2.6305970149253736e-06, 'epoch': 2.95}


 74%|███████▍  | 1590/2144 [27:43<09:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.583955223880597e-06, 'epoch': 2.97}


 75%|███████▍  | 1600/2144 [27:53<09:02,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.537313432835821e-06, 'epoch': 2.99}


                                                   
 75%|███████▌  | 1608/2144 [28:31<08:33,  1.04it/s]

{'eval_loss': 2.57185697555542, 'eval_accuracy': 0.7188811188811188, 'eval_precision': 0.7217472850197532, 'eval_recall': 0.7188811188811188, 'eval_f1': 0.7199160871097537, 'eval_runtime': 30.4594, 'eval_samples_per_second': 23.474, 'eval_steps_per_second': 2.955, 'epoch': 3.0}


 75%|███████▌  | 1610/2144 [28:39<1:17:53,  8.75s/it]

{'loss': 0.0001, 'learning_rate': 2.490671641791045e-06, 'epoch': 3.0}


 76%|███████▌  | 1620/2144 [28:49<10:40,  1.22s/it]  

{'loss': 0.0777, 'learning_rate': 2.444029850746269e-06, 'epoch': 3.02}


 76%|███████▌  | 1630/2144 [28:59<08:39,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.397388059701493e-06, 'epoch': 3.04}


 76%|███████▋  | 1640/2144 [29:09<08:23,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.350746268656717e-06, 'epoch': 3.06}


 77%|███████▋  | 1650/2144 [29:19<08:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.3041044776119408e-06, 'epoch': 3.08}


 77%|███████▋  | 1660/2144 [29:29<08:05,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.2574626865671643e-06, 'epoch': 3.1}


 78%|███████▊  | 1670/2144 [29:39<07:54,  1.00s/it]

{'loss': 0.1109, 'learning_rate': 2.2108208955223883e-06, 'epoch': 3.12}


 78%|███████▊  | 1680/2144 [29:49<07:41,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.1641791044776118e-06, 'epoch': 3.13}


 79%|███████▉  | 1690/2144 [29:59<07:32,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.1175373134328357e-06, 'epoch': 3.15}


 79%|███████▉  | 1700/2144 [30:09<07:23,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.0708955223880597e-06, 'epoch': 3.17}


 80%|███████▉  | 1710/2144 [30:19<07:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.0242537313432836e-06, 'epoch': 3.19}


 80%|████████  | 1720/2144 [30:29<07:02,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.9776119402985076e-06, 'epoch': 3.21}


 81%|████████  | 1730/2144 [30:39<06:54,  1.00s/it]

{'loss': 0.0, 'learning_rate': 1.9309701492537315e-06, 'epoch': 3.23}


 81%|████████  | 1740/2144 [30:49<06:43,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.8843283582089553e-06, 'epoch': 3.25}


 82%|████████▏ | 1750/2144 [30:59<06:31,  1.01it/s]

{'loss': 0.0, 'learning_rate': 1.8376865671641792e-06, 'epoch': 3.26}


 82%|████████▏ | 1760/2144 [31:09<06:21,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 1.791044776119403e-06, 'epoch': 3.28}


 83%|████████▎ | 1770/2144 [31:19<06:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.7444029850746269e-06, 'epoch': 3.3}


 83%|████████▎ | 1780/2144 [31:29<06:03,  1.00it/s]

{'loss': 0.0015, 'learning_rate': 1.6977611940298508e-06, 'epoch': 3.32}


 83%|████████▎ | 1790/2144 [31:39<05:54,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.6511194029850746e-06, 'epoch': 3.34}


 84%|████████▍ | 1800/2144 [31:49<05:42,  1.00it/s]

{'loss': 0.0, 'learning_rate': 1.6044776119402985e-06, 'epoch': 3.36}


 84%|████████▍ | 1810/2144 [31:59<05:34,  1.00s/it]

{'loss': 0.0609, 'learning_rate': 1.5578358208955225e-06, 'epoch': 3.38}


 85%|████████▍ | 1820/2144 [32:09<05:24,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.5111940298507464e-06, 'epoch': 3.4}


 85%|████████▌ | 1830/2144 [32:19<05:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.4645522388059702e-06, 'epoch': 3.41}


 86%|████████▌ | 1840/2144 [32:29<05:05,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.417910447761194e-06, 'epoch': 3.43}


 86%|████████▋ | 1850/2144 [32:39<04:55,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.371268656716418e-06, 'epoch': 3.45}


 87%|████████▋ | 1860/2144 [32:49<04:43,  1.00it/s]

{'loss': 0.0432, 'learning_rate': 1.3246268656716418e-06, 'epoch': 3.47}


 87%|████████▋ | 1870/2144 [32:59<04:34,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.2779850746268657e-06, 'epoch': 3.49}


 88%|████████▊ | 1880/2144 [33:09<04:24,  1.00s/it]

{'loss': 0.0278, 'learning_rate': 1.2313432835820897e-06, 'epoch': 3.51}


 88%|████████▊ | 1890/2144 [33:19<04:14,  1.00s/it]

{'loss': 0.0, 'learning_rate': 1.1847014925373134e-06, 'epoch': 3.53}


 89%|████████▊ | 1900/2144 [33:29<04:02,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.1380597014925374e-06, 'epoch': 3.54}


 89%|████████▉ | 1910/2144 [33:39<03:55,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.0914179104477613e-06, 'epoch': 3.56}


 90%|████████▉ | 1920/2144 [33:49<03:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.044776119402985e-06, 'epoch': 3.58}


 90%|█████████ | 1930/2144 [33:59<03:34,  1.00s/it]

{'loss': 0.0, 'learning_rate': 9.98134328358209e-07, 'epoch': 3.6}


 90%|█████████ | 1940/2144 [34:09<03:23,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 9.514925373134328e-07, 'epoch': 3.62}


 91%|█████████ | 1950/2144 [34:19<03:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 9.048507462686568e-07, 'epoch': 3.64}


 91%|█████████▏| 1960/2144 [34:29<03:02,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 8.582089552238806e-07, 'epoch': 3.66}


 92%|█████████▏| 1970/2144 [34:39<02:53,  1.00it/s]

{'loss': 0.0, 'learning_rate': 8.115671641791046e-07, 'epoch': 3.68}


 92%|█████████▏| 1980/2144 [34:49<02:43,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 7.649253731343284e-07, 'epoch': 3.69}


 93%|█████████▎| 1990/2144 [34:59<02:33,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 7.182835820895523e-07, 'epoch': 3.71}


 93%|█████████▎| 2000/2144 [35:09<02:23,  1.00it/s]

{'loss': 0.0, 'learning_rate': 6.716417910447762e-07, 'epoch': 3.73}


 94%|█████████▍| 2010/2144 [35:19<02:12,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 6.25e-07, 'epoch': 3.75}


 94%|█████████▍| 2020/2144 [35:29<02:03,  1.00it/s]

{'loss': 0.0, 'learning_rate': 5.783582089552239e-07, 'epoch': 3.77}


 95%|█████████▍| 2030/2144 [35:39<01:53,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 5.317164179104478e-07, 'epoch': 3.79}


 95%|█████████▌| 2040/2144 [35:49<01:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.850746268656717e-07, 'epoch': 3.81}


 96%|█████████▌| 2050/2144 [35:59<01:34,  1.00s/it]

{'loss': 0.0, 'learning_rate': 4.384328358208956e-07, 'epoch': 3.82}


 96%|█████████▌| 2060/2144 [36:09<01:23,  1.01it/s]

{'loss': 0.0, 'learning_rate': 3.9179104477611947e-07, 'epoch': 3.84}


 97%|█████████▋| 2070/2144 [36:19<01:13,  1.00it/s]

{'loss': 0.0, 'learning_rate': 3.451492537313433e-07, 'epoch': 3.86}


 97%|█████████▋| 2080/2144 [36:29<01:03,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.9850746268656716e-07, 'epoch': 3.88}


 97%|█████████▋| 2090/2144 [36:39<00:53,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.5186567164179105e-07, 'epoch': 3.9}


 98%|█████████▊| 2100/2144 [36:49<00:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.0522388059701495e-07, 'epoch': 3.92}


 98%|█████████▊| 2110/2144 [36:58<00:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.5858208955223882e-07, 'epoch': 3.94}


 99%|█████████▉| 2120/2144 [37:08<00:23,  1.00it/s]

{'loss': 0.0, 'learning_rate': 1.1194029850746268e-07, 'epoch': 3.96}


 99%|█████████▉| 2130/2144 [37:18<00:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 6.529850746268657e-08, 'epoch': 3.97}


100%|█████████▉| 2140/2144 [37:28<00:03,  1.00it/s]

{'loss': 0.0, 'learning_rate': 1.8656716417910447e-08, 'epoch': 3.99}


                                                   
100%|██████████| 2144/2144 [38:04<00:00,  1.03it/s]

{'eval_loss': 2.6593310832977295, 'eval_accuracy': 0.7132867132867133, 'eval_precision': 0.718898419639744, 'eval_recall': 0.7132867132867133, 'eval_f1': 0.7152895576710312, 'eval_runtime': 31.8035, 'eval_samples_per_second': 22.482, 'eval_steps_per_second': 2.83, 'epoch': 4.0}


100%|██████████| 2144/2144 [38:13<00:00,  1.07s/it]


{'train_runtime': 2293.3604, 'train_samples_per_second': 7.475, 'train_steps_per_second': 0.935, 'train_loss': 0.04638035445179357, 'epoch': 4.0}


100%|██████████| 90/90 [00:30<00:00,  2.91it/s]
100%|██████████| 90/90 [00:30<00:00,  2.99it/s]
100%|██████████| 90/90 [00:30<00:00,  2.94it/s]


{'accuracy': 0.7076923076923077, 'precision': 0.71582963918972, 'recall': 0.7076923076923077, 'f1': 0.7103336891983634}
{'accuracy': 0.6792717086834734, 'precision': 0.6800493171478005, 'recall': 0.6792717086834734, 'f1': 0.6795534561842252}


  0%|          | 10/2144 [00:10<36:07,  1.02s/it] 

{'loss': 0.0041, 'learning_rate': 9.953358208955226e-06, 'epoch': 0.02}


  1%|          | 20/2144 [00:20<35:36,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 9.906716417910449e-06, 'epoch': 0.04}


  1%|▏         | 30/2144 [00:30<35:21,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 9.860074626865672e-06, 'epoch': 0.06}


  2%|▏         | 40/2144 [00:40<35:01,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 9.813432835820897e-06, 'epoch': 0.07}


  2%|▏         | 50/2144 [00:50<34:54,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 9.76679104477612e-06, 'epoch': 0.09}


  3%|▎         | 60/2144 [01:00<34:30,  1.01it/s]

{'loss': 0.1293, 'learning_rate': 9.720149253731343e-06, 'epoch': 0.11}


  3%|▎         | 70/2144 [01:10<34:32,  1.00it/s]

{'loss': 0.0086, 'learning_rate': 9.673507462686568e-06, 'epoch': 0.13}


  4%|▎         | 80/2144 [01:20<34:24,  1.00s/it]

{'loss': 0.075, 'learning_rate': 9.626865671641792e-06, 'epoch': 0.15}


  4%|▍         | 90/2144 [01:30<34:16,  1.00s/it]

{'loss': 0.0741, 'learning_rate': 9.580223880597016e-06, 'epoch': 0.17}


  5%|▍         | 100/2144 [01:40<34:06,  1.00s/it]

{'loss': 0.0471, 'learning_rate': 9.533582089552239e-06, 'epoch': 0.19}


  5%|▌         | 110/2144 [01:50<33:58,  1.00s/it]

{'loss': 0.0903, 'learning_rate': 9.486940298507463e-06, 'epoch': 0.21}


  6%|▌         | 120/2144 [02:00<33:40,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 9.440298507462688e-06, 'epoch': 0.22}


  6%|▌         | 130/2144 [02:10<33:30,  1.00it/s]

{'loss': 0.099, 'learning_rate': 9.393656716417911e-06, 'epoch': 0.24}


  7%|▋         | 140/2144 [02:20<33:23,  1.00it/s]

{'loss': 0.0245, 'learning_rate': 9.347014925373134e-06, 'epoch': 0.26}


  7%|▋         | 150/2144 [02:30<33:12,  1.00it/s]

{'loss': 0.1401, 'learning_rate': 9.30037313432836e-06, 'epoch': 0.28}


  7%|▋         | 160/2144 [02:40<33:09,  1.00s/it]

{'loss': 0.0393, 'learning_rate': 9.253731343283582e-06, 'epoch': 0.3}


  8%|▊         | 170/2144 [02:50<32:44,  1.00it/s]

{'loss': 0.0687, 'learning_rate': 9.207089552238807e-06, 'epoch': 0.32}


  8%|▊         | 180/2144 [03:00<32:39,  1.00it/s]

{'loss': 0.157, 'learning_rate': 9.16044776119403e-06, 'epoch': 0.34}


  9%|▉         | 190/2144 [03:10<32:29,  1.00it/s]

{'loss': 0.0482, 'learning_rate': 9.113805970149255e-06, 'epoch': 0.35}


  9%|▉         | 200/2144 [03:20<32:17,  1.00it/s]

{'loss': 0.1925, 'learning_rate': 9.067164179104478e-06, 'epoch': 0.37}


 10%|▉         | 210/2144 [03:30<32:13,  1.00it/s]

{'loss': 0.1673, 'learning_rate': 9.020522388059703e-06, 'epoch': 0.39}


 10%|█         | 220/2144 [03:40<32:05,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 8.973880597014926e-06, 'epoch': 0.41}


 11%|█         | 230/2144 [03:50<31:50,  1.00it/s]

{'loss': 0.386, 'learning_rate': 8.927238805970149e-06, 'epoch': 0.43}


 11%|█         | 240/2144 [04:00<31:46,  1.00s/it]

{'loss': 0.1357, 'learning_rate': 8.880597014925374e-06, 'epoch': 0.45}


 12%|█▏        | 250/2144 [04:10<31:20,  1.01it/s]

{'loss': 0.0011, 'learning_rate': 8.833955223880599e-06, 'epoch': 0.47}


 12%|█▏        | 260/2144 [04:20<31:27,  1.00s/it]

{'loss': 0.1353, 'learning_rate': 8.787313432835822e-06, 'epoch': 0.49}


 13%|█▎        | 270/2144 [04:30<30:57,  1.01it/s]

{'loss': 0.0102, 'learning_rate': 8.740671641791045e-06, 'epoch': 0.5}


 13%|█▎        | 280/2144 [04:40<30:53,  1.01it/s]

{'loss': 0.0005, 'learning_rate': 8.69402985074627e-06, 'epoch': 0.52}


 14%|█▎        | 290/2144 [04:50<30:36,  1.01it/s]

{'loss': 0.0004, 'learning_rate': 8.647388059701494e-06, 'epoch': 0.54}


 14%|█▍        | 300/2144 [05:00<30:28,  1.01it/s]

{'loss': 0.1156, 'learning_rate': 8.600746268656716e-06, 'epoch': 0.56}


 14%|█▍        | 310/2144 [05:10<30:36,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 8.55410447761194e-06, 'epoch': 0.58}


 15%|█▍        | 320/2144 [05:20<30:22,  1.00it/s]

{'loss': 0.0996, 'learning_rate': 8.507462686567165e-06, 'epoch': 0.6}


 15%|█▌        | 330/2144 [05:30<30:11,  1.00it/s]

{'loss': 0.184, 'learning_rate': 8.460820895522389e-06, 'epoch': 0.62}


 16%|█▌        | 340/2144 [05:40<29:58,  1.00it/s]

{'loss': 0.1021, 'learning_rate': 8.414179104477612e-06, 'epoch': 0.63}


 16%|█▋        | 350/2144 [05:50<29:49,  1.00it/s]

{'loss': 0.1015, 'learning_rate': 8.367537313432836e-06, 'epoch': 0.65}


 17%|█▋        | 360/2144 [06:00<29:41,  1.00it/s]

{'loss': 0.0342, 'learning_rate': 8.320895522388061e-06, 'epoch': 0.67}


 17%|█▋        | 370/2144 [06:10<29:33,  1.00it/s]

{'loss': 0.2105, 'learning_rate': 8.274253731343284e-06, 'epoch': 0.69}


 18%|█▊        | 380/2144 [06:20<29:20,  1.00it/s]

{'loss': 0.004, 'learning_rate': 8.227611940298507e-06, 'epoch': 0.71}


 18%|█▊        | 390/2144 [06:30<29:13,  1.00it/s]

{'loss': 0.059, 'learning_rate': 8.180970149253732e-06, 'epoch': 0.73}


 19%|█▊        | 400/2144 [06:40<29:07,  1.00s/it]

{'loss': 0.1147, 'learning_rate': 8.134328358208955e-06, 'epoch': 0.75}


 19%|█▉        | 410/2144 [06:50<28:51,  1.00it/s]

{'loss': 0.0166, 'learning_rate': 8.08768656716418e-06, 'epoch': 0.76}


 20%|█▉        | 420/2144 [07:00<28:48,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 8.041044776119403e-06, 'epoch': 0.78}


 20%|██        | 430/2144 [07:10<28:26,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 7.994402985074628e-06, 'epoch': 0.8}


 21%|██        | 440/2144 [07:20<28:30,  1.00s/it]

{'loss': 0.0007, 'learning_rate': 7.947761194029851e-06, 'epoch': 0.82}


 21%|██        | 450/2144 [07:30<28:10,  1.00it/s]

{'loss': 0.1448, 'learning_rate': 7.901119402985076e-06, 'epoch': 0.84}


 21%|██▏       | 460/2144 [07:40<28:00,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 7.854477611940299e-06, 'epoch': 0.86}


 22%|██▏       | 470/2144 [07:50<27:40,  1.01it/s]

{'loss': 0.0003, 'learning_rate': 7.807835820895522e-06, 'epoch': 0.88}


 22%|██▏       | 480/2144 [07:59<27:40,  1.00it/s]

{'loss': 0.1848, 'learning_rate': 7.761194029850747e-06, 'epoch': 0.9}


 23%|██▎       | 490/2144 [08:09<27:30,  1.00it/s]

{'loss': 0.0955, 'learning_rate': 7.714552238805972e-06, 'epoch': 0.91}


 23%|██▎       | 500/2144 [08:19<27:27,  1.00s/it]

{'loss': 0.09, 'learning_rate': 7.667910447761195e-06, 'epoch': 0.93}


 24%|██▍       | 510/2144 [08:29<27:12,  1.00it/s]

{'loss': 0.0524, 'learning_rate': 7.621268656716419e-06, 'epoch': 0.95}


 24%|██▍       | 520/2144 [08:39<27:04,  1.00s/it]

{'loss': 0.0965, 'learning_rate': 7.574626865671643e-06, 'epoch': 0.97}


 25%|██▍       | 530/2144 [08:49<26:57,  1.00s/it]

{'loss': 0.0018, 'learning_rate': 7.527985074626867e-06, 'epoch': 0.99}


                                                  
 25%|██▌       | 536/2144 [09:25<25:48,  1.04it/s]

{'eval_loss': 2.323387384414673, 'eval_accuracy': 0.7076923076923077, 'eval_precision': 0.7081630583529711, 'eval_recall': 0.7076923076923077, 'eval_f1': 0.7061709105588093, 'eval_runtime': 29.7898, 'eval_samples_per_second': 24.001, 'eval_steps_per_second': 3.021, 'epoch': 1.0}


 25%|██▌       | 540/2144 [09:35<2:06:01,  4.71s/it]

{'loss': 0.2198, 'learning_rate': 7.48134328358209e-06, 'epoch': 1.01}


 26%|██▌       | 550/2144 [09:45<29:17,  1.10s/it]  

{'loss': 0.0665, 'learning_rate': 7.434701492537314e-06, 'epoch': 1.03}


 26%|██▌       | 560/2144 [09:55<26:21,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 7.3880597014925385e-06, 'epoch': 1.04}


 27%|██▋       | 570/2144 [10:05<26:08,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 7.3414179104477625e-06, 'epoch': 1.06}


 27%|██▋       | 580/2144 [10:15<26:01,  1.00it/s]

{'loss': 0.0959, 'learning_rate': 7.2947761194029856e-06, 'epoch': 1.08}


 28%|██▊       | 590/2144 [10:25<25:58,  1.00s/it]

{'loss': 0.0926, 'learning_rate': 7.2481343283582095e-06, 'epoch': 1.1}


 28%|██▊       | 600/2144 [10:35<25:47,  1.00s/it]

{'loss': 0.1133, 'learning_rate': 7.2014925373134335e-06, 'epoch': 1.12}


 28%|██▊       | 610/2144 [10:45<25:31,  1.00it/s]

{'loss': 0.1466, 'learning_rate': 7.154850746268658e-06, 'epoch': 1.14}


 29%|██▉       | 620/2144 [10:55<25:27,  1.00s/it]

{'loss': 0.0961, 'learning_rate': 7.1082089552238805e-06, 'epoch': 1.16}


 29%|██▉       | 630/2144 [11:05<25:12,  1.00it/s]

{'loss': 0.1213, 'learning_rate': 7.061567164179105e-06, 'epoch': 1.18}


 30%|██▉       | 640/2144 [11:15<25:05,  1.00s/it]

{'loss': 0.0205, 'learning_rate': 7.014925373134329e-06, 'epoch': 1.19}


 30%|███       | 650/2144 [11:25<24:57,  1.00s/it]

{'loss': 0.0005, 'learning_rate': 6.968283582089553e-06, 'epoch': 1.21}


 31%|███       | 660/2144 [11:35<24:48,  1.00s/it]

{'loss': 0.0131, 'learning_rate': 6.921641791044776e-06, 'epoch': 1.23}


 31%|███▏      | 670/2144 [11:45<24:35,  1.00s/it]

{'loss': 0.0951, 'learning_rate': 6.875e-06, 'epoch': 1.25}


 32%|███▏      | 680/2144 [11:55<24:22,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 6.828358208955225e-06, 'epoch': 1.27}


 32%|███▏      | 690/2144 [12:05<24:03,  1.01it/s]

{'loss': 0.0007, 'learning_rate': 6.781716417910448e-06, 'epoch': 1.29}


 33%|███▎      | 700/2144 [12:15<23:58,  1.00it/s]

{'loss': 0.0658, 'learning_rate': 6.735074626865672e-06, 'epoch': 1.31}


 33%|███▎      | 710/2144 [12:25<23:47,  1.00it/s]

{'loss': 0.0103, 'learning_rate': 6.688432835820896e-06, 'epoch': 1.32}


 34%|███▎      | 720/2144 [12:35<23:43,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 6.64179104477612e-06, 'epoch': 1.34}


 34%|███▍      | 730/2144 [12:45<23:32,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 6.595149253731343e-06, 'epoch': 1.36}


 35%|███▍      | 740/2144 [12:55<23:24,  1.00s/it]

{'loss': 0.0599, 'learning_rate': 6.548507462686567e-06, 'epoch': 1.38}


 35%|███▍      | 750/2144 [13:05<23:14,  1.00s/it]

{'loss': 0.0858, 'learning_rate': 6.501865671641792e-06, 'epoch': 1.4}


 35%|███▌      | 760/2144 [13:15<22:57,  1.01it/s]

{'loss': 0.1163, 'learning_rate': 6.455223880597016e-06, 'epoch': 1.42}


 36%|███▌      | 770/2144 [13:25<22:56,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 6.408582089552239e-06, 'epoch': 1.44}


 36%|███▋      | 780/2144 [13:35<22:47,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 6.361940298507463e-06, 'epoch': 1.46}


 37%|███▋      | 790/2144 [13:45<22:38,  1.00s/it]

{'loss': 0.1066, 'learning_rate': 6.315298507462687e-06, 'epoch': 1.47}


 37%|███▋      | 800/2144 [13:55<22:28,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 6.2686567164179116e-06, 'epoch': 1.49}


 38%|███▊      | 810/2144 [14:05<22:13,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 6.222014925373135e-06, 'epoch': 1.51}


 38%|███▊      | 820/2144 [14:15<22:02,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 6.175373134328359e-06, 'epoch': 1.53}


 39%|███▊      | 830/2144 [14:25<21:49,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 6.1287313432835826e-06, 'epoch': 1.55}


 39%|███▉      | 840/2144 [14:35<21:41,  1.00it/s]

{'loss': 0.0679, 'learning_rate': 6.0820895522388065e-06, 'epoch': 1.57}


 40%|███▉      | 850/2144 [14:45<21:28,  1.00it/s]

{'loss': 0.3102, 'learning_rate': 6.03544776119403e-06, 'epoch': 1.59}


 40%|████      | 860/2144 [14:55<21:15,  1.01it/s]

{'loss': 0.0486, 'learning_rate': 5.988805970149254e-06, 'epoch': 1.6}


 41%|████      | 870/2144 [15:05<21:04,  1.01it/s]

{'loss': 0.0642, 'learning_rate': 5.942164179104478e-06, 'epoch': 1.62}


 41%|████      | 880/2144 [15:15<21:03,  1.00it/s]

{'loss': 0.0007, 'learning_rate': 5.895522388059702e-06, 'epoch': 1.64}


 42%|████▏     | 890/2144 [15:25<20:51,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 5.848880597014925e-06, 'epoch': 1.66}


 42%|████▏     | 900/2144 [15:35<20:40,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 5.802238805970149e-06, 'epoch': 1.68}


 42%|████▏     | 910/2144 [15:45<20:30,  1.00it/s]

{'loss': 0.0008, 'learning_rate': 5.755597014925373e-06, 'epoch': 1.7}


 43%|████▎     | 920/2144 [15:55<20:24,  1.00s/it]

{'loss': 0.1165, 'learning_rate': 5.708955223880598e-06, 'epoch': 1.72}


 43%|████▎     | 930/2144 [16:05<20:11,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 5.662313432835821e-06, 'epoch': 1.74}


 44%|████▍     | 940/2144 [16:15<20:01,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 5.615671641791045e-06, 'epoch': 1.75}


 44%|████▍     | 950/2144 [16:25<19:59,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 5.569029850746269e-06, 'epoch': 1.77}


 45%|████▍     | 960/2144 [16:35<19:48,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 5.522388059701493e-06, 'epoch': 1.79}


 45%|████▌     | 970/2144 [16:45<19:29,  1.00it/s]

{'loss': 0.0863, 'learning_rate': 5.475746268656716e-06, 'epoch': 1.81}


 46%|████▌     | 980/2144 [16:55<19:19,  1.00it/s]

{'loss': 0.0665, 'learning_rate': 5.429104477611941e-06, 'epoch': 1.83}


 46%|████▌     | 990/2144 [17:05<19:07,  1.01it/s]

{'loss': 0.1106, 'learning_rate': 5.382462686567165e-06, 'epoch': 1.85}


 47%|████▋     | 1000/2144 [17:15<19:03,  1.00it/s]

{'loss': 0.0112, 'learning_rate': 5.335820895522389e-06, 'epoch': 1.87}


 47%|████▋     | 1010/2144 [17:24<18:53,  1.00it/s]

{'loss': 0.1175, 'learning_rate': 5.289179104477612e-06, 'epoch': 1.88}


 48%|████▊     | 1020/2144 [17:34<18:36,  1.01it/s]

{'loss': 0.0003, 'learning_rate': 5.242537313432836e-06, 'epoch': 1.9}


 48%|████▊     | 1030/2144 [17:44<18:27,  1.01it/s]

{'loss': 0.1248, 'learning_rate': 5.195895522388061e-06, 'epoch': 1.92}


 49%|████▊     | 1040/2144 [17:54<18:19,  1.00it/s]

{'loss': 0.0645, 'learning_rate': 5.149253731343285e-06, 'epoch': 1.94}


 49%|████▉     | 1050/2144 [18:04<18:15,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 5.102611940298508e-06, 'epoch': 1.96}


 49%|████▉     | 1060/2144 [18:14<18:00,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 5.055970149253732e-06, 'epoch': 1.98}


 50%|████▉     | 1070/2144 [18:24<17:53,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 5.009328358208956e-06, 'epoch': 2.0}


                                                   
 50%|█████     | 1072/2144 [18:55<17:08,  1.04it/s]

{'eval_loss': 2.255963087081909, 'eval_accuracy': 0.7048951048951049, 'eval_precision': 0.7104579187046719, 'eval_recall': 0.7048951048951049, 'eval_f1': 0.706843195395784, 'eval_runtime': 29.3698, 'eval_samples_per_second': 24.345, 'eval_steps_per_second': 3.064, 'epoch': 2.0}


 50%|█████     | 1080/2144 [19:10<33:17,  1.88s/it]  

{'loss': 0.0003, 'learning_rate': 4.9626865671641796e-06, 'epoch': 2.01}


 51%|█████     | 1090/2144 [19:20<18:04,  1.03s/it]

{'loss': 0.0012, 'learning_rate': 4.9160447761194035e-06, 'epoch': 2.03}


 51%|█████▏    | 1100/2144 [19:30<17:34,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 4.8694029850746275e-06, 'epoch': 2.05}


 52%|█████▏    | 1110/2144 [19:40<17:15,  1.00s/it]

{'loss': 0.1107, 'learning_rate': 4.822761194029851e-06, 'epoch': 2.07}


 52%|█████▏    | 1120/2144 [19:50<17:05,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.7761194029850745e-06, 'epoch': 2.09}


 53%|█████▎    | 1130/2144 [20:00<16:50,  1.00it/s]

{'loss': 0.0152, 'learning_rate': 4.729477611940299e-06, 'epoch': 2.11}


 53%|█████▎    | 1140/2144 [20:10<16:39,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 4.682835820895522e-06, 'epoch': 2.13}


 54%|█████▎    | 1150/2144 [20:20<16:30,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 4.636194029850747e-06, 'epoch': 2.15}


 54%|█████▍    | 1160/2144 [20:30<16:24,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.58955223880597e-06, 'epoch': 2.16}


 55%|█████▍    | 1170/2144 [20:39<16:05,  1.01it/s]

{'loss': 0.0082, 'learning_rate': 4.542910447761194e-06, 'epoch': 2.18}


 55%|█████▌    | 1180/2144 [20:49<16:02,  1.00it/s]

{'loss': 0.04, 'learning_rate': 4.496268656716418e-06, 'epoch': 2.2}


 56%|█████▌    | 1190/2144 [20:59<15:57,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.449626865671642e-06, 'epoch': 2.22}


 56%|█████▌    | 1200/2144 [21:09<15:45,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.402985074626866e-06, 'epoch': 2.24}


 56%|█████▋    | 1210/2144 [21:19<15:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 4.35634328358209e-06, 'epoch': 2.26}


 57%|█████▋    | 1220/2144 [21:29<15:25,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.309701492537314e-06, 'epoch': 2.28}


 57%|█████▋    | 1230/2144 [21:39<15:15,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.263059701492538e-06, 'epoch': 2.29}


 58%|█████▊    | 1240/2144 [21:49<15:07,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.216417910447762e-06, 'epoch': 2.31}


 58%|█████▊    | 1250/2144 [21:59<14:56,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.169776119402986e-06, 'epoch': 2.33}


 59%|█████▉    | 1260/2144 [22:09<14:41,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 4.123134328358209e-06, 'epoch': 2.35}


 59%|█████▉    | 1270/2144 [22:19<14:35,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.076492537313434e-06, 'epoch': 2.37}


 60%|█████▉    | 1280/2144 [22:29<14:17,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 4.029850746268657e-06, 'epoch': 2.39}


 60%|██████    | 1290/2144 [22:39<14:18,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.983208955223881e-06, 'epoch': 2.41}


 61%|██████    | 1300/2144 [22:49<14:05,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.936567164179105e-06, 'epoch': 2.43}


 61%|██████    | 1310/2144 [22:59<13:53,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.889925373134329e-06, 'epoch': 2.44}


 62%|██████▏   | 1320/2144 [23:09<13:47,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.843283582089553e-06, 'epoch': 2.46}


 62%|██████▏   | 1330/2144 [23:19<13:35,  1.00s/it]

{'loss': 0.003, 'learning_rate': 3.7966417910447766e-06, 'epoch': 2.48}


 62%|██████▎   | 1340/2144 [23:29<13:26,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.7500000000000005e-06, 'epoch': 2.5}


 63%|██████▎   | 1350/2144 [23:39<13:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.703358208955224e-06, 'epoch': 2.52}


 63%|██████▎   | 1360/2144 [23:49<13:03,  1.00it/s]

{'loss': 0.0931, 'learning_rate': 3.656716417910448e-06, 'epoch': 2.54}


 64%|██████▍   | 1370/2144 [23:59<12:55,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.6100746268656715e-06, 'epoch': 2.56}


 64%|██████▍   | 1380/2144 [24:09<12:46,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.563432835820896e-06, 'epoch': 2.57}


 65%|██████▍   | 1390/2144 [24:19<12:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.5167910447761194e-06, 'epoch': 2.59}


 65%|██████▌   | 1400/2144 [24:29<12:25,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.4701492537313438e-06, 'epoch': 2.61}


 66%|██████▌   | 1410/2144 [24:39<12:17,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.4235074626865673e-06, 'epoch': 2.63}


 66%|██████▌   | 1420/2144 [24:49<12:01,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.3768656716417913e-06, 'epoch': 2.65}


 67%|██████▋   | 1430/2144 [24:59<11:55,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.3302238805970148e-06, 'epoch': 2.67}


 67%|██████▋   | 1440/2144 [25:09<11:39,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 3.283582089552239e-06, 'epoch': 2.69}


 68%|██████▊   | 1450/2144 [25:19<11:33,  1.00it/s]

{'loss': 0.1513, 'learning_rate': 3.2369402985074627e-06, 'epoch': 2.71}


 68%|██████▊   | 1460/2144 [25:29<11:25,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.190298507462687e-06, 'epoch': 2.72}


 69%|██████▊   | 1470/2144 [25:39<11:12,  1.00it/s]

{'loss': 0.1059, 'learning_rate': 3.1436567164179106e-06, 'epoch': 2.74}


 69%|██████▉   | 1480/2144 [25:49<11:00,  1.01it/s]

{'loss': 0.0006, 'learning_rate': 3.0970149253731345e-06, 'epoch': 2.76}


 69%|██████▉   | 1490/2144 [25:59<10:52,  1.00it/s]

{'loss': 0.1006, 'learning_rate': 3.050373134328358e-06, 'epoch': 2.78}


 70%|██████▉   | 1500/2144 [26:09<10:42,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 3.0037313432835824e-06, 'epoch': 2.8}


 70%|███████   | 1510/2144 [26:19<10:31,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.957089552238806e-06, 'epoch': 2.82}


 71%|███████   | 1520/2144 [26:29<10:22,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.9104477611940303e-06, 'epoch': 2.84}


 71%|███████▏  | 1530/2144 [26:39<10:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.863805970149254e-06, 'epoch': 2.85}


 72%|███████▏  | 1540/2144 [26:49<10:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.8171641791044778e-06, 'epoch': 2.87}


 72%|███████▏  | 1550/2144 [26:59<09:51,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.7705223880597017e-06, 'epoch': 2.89}


 73%|███████▎  | 1560/2144 [27:09<09:47,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 2.7238805970149257e-06, 'epoch': 2.91}


 73%|███████▎  | 1570/2144 [27:19<09:36,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.677238805970149e-06, 'epoch': 2.93}


 74%|███████▎  | 1580/2144 [27:29<09:23,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.6305970149253736e-06, 'epoch': 2.95}


 74%|███████▍  | 1590/2144 [27:39<09:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.583955223880597e-06, 'epoch': 2.97}


 75%|███████▍  | 1600/2144 [27:49<09:02,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.537313432835821e-06, 'epoch': 2.99}


                                                   
 75%|███████▌  | 1608/2144 [28:27<08:37,  1.04it/s]

{'eval_loss': 2.4044554233551025, 'eval_accuracy': 0.7230769230769231, 'eval_precision': 0.7216074172725288, 'eval_recall': 0.7230769230769231, 'eval_f1': 0.7216287215453115, 'eval_runtime': 30.098, 'eval_samples_per_second': 23.756, 'eval_steps_per_second': 2.99, 'epoch': 3.0}


 75%|███████▌  | 1610/2144 [28:36<1:17:26,  8.70s/it]

{'loss': 0.0001, 'learning_rate': 2.490671641791045e-06, 'epoch': 3.0}


 76%|███████▌  | 1620/2144 [28:46<10:42,  1.23s/it]  

{'loss': 0.0001, 'learning_rate': 2.444029850746269e-06, 'epoch': 3.02}


 76%|███████▌  | 1630/2144 [28:56<08:37,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.397388059701493e-06, 'epoch': 3.04}


 76%|███████▋  | 1640/2144 [29:06<08:23,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.350746268656717e-06, 'epoch': 3.06}


 77%|███████▋  | 1650/2144 [29:16<08:15,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.3041044776119408e-06, 'epoch': 3.08}


 77%|███████▋  | 1660/2144 [29:26<08:05,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.2574626865671643e-06, 'epoch': 3.1}


 78%|███████▊  | 1670/2144 [29:36<07:53,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.2108208955223883e-06, 'epoch': 3.12}


 78%|███████▊  | 1680/2144 [29:46<07:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.1641791044776118e-06, 'epoch': 3.13}


 79%|███████▉  | 1690/2144 [29:56<07:32,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 2.1175373134328357e-06, 'epoch': 3.15}


 79%|███████▉  | 1700/2144 [30:06<07:25,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.0708955223880597e-06, 'epoch': 3.17}


 80%|███████▉  | 1710/2144 [30:16<07:16,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.0242537313432836e-06, 'epoch': 3.19}


 80%|████████  | 1720/2144 [30:26<07:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.9776119402985076e-06, 'epoch': 3.21}


 81%|████████  | 1730/2144 [30:36<06:54,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.9309701492537315e-06, 'epoch': 3.23}


 81%|████████  | 1740/2144 [30:46<06:43,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.8843283582089553e-06, 'epoch': 3.25}


 82%|████████▏ | 1750/2144 [30:56<06:34,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.8376865671641792e-06, 'epoch': 3.26}


 82%|████████▏ | 1760/2144 [31:06<06:23,  1.00it/s]

{'loss': 0.0389, 'learning_rate': 1.791044776119403e-06, 'epoch': 3.28}


 83%|████████▎ | 1770/2144 [31:16<06:13,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.7444029850746269e-06, 'epoch': 3.3}


 83%|████████▎ | 1780/2144 [31:26<06:01,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 1.6977611940298508e-06, 'epoch': 3.32}


 83%|████████▎ | 1790/2144 [31:36<05:53,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.6511194029850746e-06, 'epoch': 3.34}


 84%|████████▍ | 1800/2144 [31:46<05:43,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.6044776119402985e-06, 'epoch': 3.36}


 84%|████████▍ | 1810/2144 [31:56<05:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.5578358208955225e-06, 'epoch': 3.38}


 85%|████████▍ | 1820/2144 [32:05<05:22,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 1.5111940298507464e-06, 'epoch': 3.4}


 85%|████████▌ | 1830/2144 [32:15<05:12,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.4645522388059702e-06, 'epoch': 3.41}


 86%|████████▌ | 1840/2144 [32:25<05:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.417910447761194e-06, 'epoch': 3.43}


 86%|████████▋ | 1850/2144 [32:35<04:54,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.371268656716418e-06, 'epoch': 3.45}


 87%|████████▋ | 1860/2144 [32:45<04:42,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 1.3246268656716418e-06, 'epoch': 3.47}


 87%|████████▋ | 1870/2144 [32:55<04:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.2779850746268657e-06, 'epoch': 3.49}


 88%|████████▊ | 1880/2144 [33:05<04:24,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.2313432835820897e-06, 'epoch': 3.51}


 88%|████████▊ | 1890/2144 [33:15<04:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.1847014925373134e-06, 'epoch': 3.53}


 89%|████████▊ | 1900/2144 [33:25<04:03,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.1380597014925374e-06, 'epoch': 3.54}


 89%|████████▉ | 1910/2144 [33:35<03:54,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.0914179104477613e-06, 'epoch': 3.56}


 90%|████████▉ | 1920/2144 [33:45<03:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.044776119402985e-06, 'epoch': 3.58}


 90%|█████████ | 1930/2144 [33:55<03:34,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 9.98134328358209e-07, 'epoch': 3.6}


 90%|█████████ | 1940/2144 [34:05<03:23,  1.00it/s]

{'loss': 0.0013, 'learning_rate': 9.514925373134328e-07, 'epoch': 3.62}


 91%|█████████ | 1950/2144 [34:15<03:13,  1.00it/s]

{'loss': 0.0476, 'learning_rate': 9.048507462686568e-07, 'epoch': 3.64}


 91%|█████████▏| 1960/2144 [34:25<03:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 8.582089552238806e-07, 'epoch': 3.66}


 92%|█████████▏| 1970/2144 [34:35<02:53,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 8.115671641791046e-07, 'epoch': 3.68}


 92%|█████████▏| 1980/2144 [34:45<02:43,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 7.649253731343284e-07, 'epoch': 3.69}


 93%|█████████▎| 1990/2144 [34:55<02:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 7.182835820895523e-07, 'epoch': 3.71}


 93%|█████████▎| 2000/2144 [35:05<02:24,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 6.716417910447762e-07, 'epoch': 3.73}


 94%|█████████▍| 2010/2144 [35:15<02:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 6.25e-07, 'epoch': 3.75}


 94%|█████████▍| 2020/2144 [35:25<02:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 5.783582089552239e-07, 'epoch': 3.77}


 95%|█████████▍| 2030/2144 [35:35<01:54,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 5.317164179104478e-07, 'epoch': 3.79}


 95%|█████████▌| 2040/2144 [35:45<01:44,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.850746268656717e-07, 'epoch': 3.81}


 96%|█████████▌| 2050/2144 [35:55<01:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 4.384328358208956e-07, 'epoch': 3.82}


 96%|█████████▌| 2060/2144 [36:05<01:23,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 3.9179104477611947e-07, 'epoch': 3.84}


 97%|█████████▋| 2070/2144 [36:15<01:13,  1.00it/s]

{'loss': 0.0008, 'learning_rate': 3.451492537313433e-07, 'epoch': 3.86}


 97%|█████████▋| 2080/2144 [36:25<01:04,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.9850746268656716e-07, 'epoch': 3.88}


 97%|█████████▋| 2090/2144 [36:35<00:54,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.5186567164179105e-07, 'epoch': 3.9}


 98%|█████████▊| 2100/2144 [36:45<00:43,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.0522388059701495e-07, 'epoch': 3.92}


 98%|█████████▊| 2110/2144 [36:55<00:34,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.5858208955223882e-07, 'epoch': 3.94}


 99%|█████████▉| 2120/2144 [37:05<00:23,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 1.1194029850746268e-07, 'epoch': 3.96}


 99%|█████████▉| 2130/2144 [37:15<00:14,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 6.529850746268657e-08, 'epoch': 3.97}


100%|█████████▉| 2140/2144 [37:25<00:04,  1.00s/it]

{'loss': 0.012, 'learning_rate': 1.8656716417910447e-08, 'epoch': 3.99}


                                                   
100%|██████████| 2144/2144 [37:59<00:00,  1.04it/s]

{'eval_loss': 2.546006441116333, 'eval_accuracy': 0.7146853146853147, 'eval_precision': 0.7181263983018393, 'eval_recall': 0.7146853146853147, 'eval_f1': 0.7154820466824876, 'eval_runtime': 29.8548, 'eval_samples_per_second': 23.949, 'eval_steps_per_second': 3.015, 'epoch': 4.0}


100%|██████████| 2144/2144 [38:07<00:00,  1.07s/it]


{'train_runtime': 2287.9174, 'train_samples_per_second': 7.493, 'train_steps_per_second': 0.937, 'train_loss': 0.033626310374081606, 'epoch': 4.0}


100%|██████████| 90/90 [00:30<00:00,  2.97it/s]
100%|██████████| 90/90 [00:29<00:00,  3.03it/s]
100%|██████████| 90/90 [00:28<00:00,  3.11it/s]


{'accuracy': 0.7048951048951049, 'precision': 0.7104579187046719, 'recall': 0.7048951048951049, 'f1': 0.706843195395784}
{'accuracy': 0.6792717086834734, 'precision': 0.6789600570445761, 'recall': 0.6792717086834734, 'f1': 0.67893637569283}


  0%|          | 10/2144 [00:10<35:54,  1.01s/it] 

{'loss': 0.0002, 'learning_rate': 9.953358208955226e-06, 'epoch': 0.02}


  1%|          | 20/2144 [00:20<35:28,  1.00s/it]

{'loss': 0.152, 'learning_rate': 9.906716417910449e-06, 'epoch': 0.04}


  1%|▏         | 30/2144 [00:30<35:09,  1.00it/s]

{'loss': 0.3028, 'learning_rate': 9.860074626865672e-06, 'epoch': 0.06}


  2%|▏         | 40/2144 [00:40<34:51,  1.01it/s]

{'loss': 0.0004, 'learning_rate': 9.813432835820897e-06, 'epoch': 0.07}


  2%|▏         | 50/2144 [00:50<34:45,  1.00it/s]

{'loss': 0.1408, 'learning_rate': 9.76679104477612e-06, 'epoch': 0.09}


  3%|▎         | 60/2144 [01:00<34:42,  1.00it/s]

{'loss': 0.0015, 'learning_rate': 9.720149253731343e-06, 'epoch': 0.11}


  3%|▎         | 70/2144 [01:10<34:20,  1.01it/s]

{'loss': 0.1537, 'learning_rate': 9.673507462686568e-06, 'epoch': 0.13}


  4%|▎         | 80/2144 [01:20<34:32,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 9.626865671641792e-06, 'epoch': 0.15}


  4%|▍         | 90/2144 [01:30<34:18,  1.00s/it]

{'loss': 0.0006, 'learning_rate': 9.580223880597016e-06, 'epoch': 0.17}


  5%|▍         | 100/2144 [01:40<34:03,  1.00it/s]

{'loss': 0.0802, 'learning_rate': 9.533582089552239e-06, 'epoch': 0.19}


  5%|▌         | 110/2144 [01:50<33:51,  1.00it/s]

{'loss': 0.0006, 'learning_rate': 9.486940298507463e-06, 'epoch': 0.21}


  6%|▌         | 120/2144 [02:00<33:40,  1.00it/s]

{'loss': 0.0902, 'learning_rate': 9.440298507462688e-06, 'epoch': 0.22}


  6%|▌         | 130/2144 [02:10<33:40,  1.00s/it]

{'loss': 0.0008, 'learning_rate': 9.393656716417911e-06, 'epoch': 0.24}


  7%|▋         | 140/2144 [02:20<33:17,  1.00it/s]

{'loss': 0.0136, 'learning_rate': 9.347014925373134e-06, 'epoch': 0.26}


  7%|▋         | 150/2144 [02:30<33:12,  1.00it/s]

{'loss': 0.0759, 'learning_rate': 9.30037313432836e-06, 'epoch': 0.28}


  7%|▋         | 160/2144 [02:40<33:01,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 9.253731343283582e-06, 'epoch': 0.3}


  8%|▊         | 170/2144 [02:50<32:35,  1.01it/s]

{'loss': 0.0059, 'learning_rate': 9.207089552238807e-06, 'epoch': 0.32}


  8%|▊         | 180/2144 [03:00<32:33,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 9.16044776119403e-06, 'epoch': 0.34}


  9%|▉         | 190/2144 [03:10<32:25,  1.00it/s]

{'loss': 0.0532, 'learning_rate': 9.113805970149255e-06, 'epoch': 0.35}


  9%|▉         | 200/2144 [03:20<32:21,  1.00it/s]

{'loss': 0.0546, 'learning_rate': 9.067164179104478e-06, 'epoch': 0.37}


 10%|▉         | 210/2144 [03:30<32:06,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 9.020522388059703e-06, 'epoch': 0.39}


 10%|█         | 220/2144 [03:40<31:55,  1.00it/s]

{'loss': 0.0025, 'learning_rate': 8.973880597014926e-06, 'epoch': 0.41}


 11%|█         | 230/2144 [03:50<31:51,  1.00it/s]

{'loss': 0.1157, 'learning_rate': 8.927238805970149e-06, 'epoch': 0.43}


 11%|█         | 240/2144 [04:00<31:24,  1.01it/s]

{'loss': 0.0005, 'learning_rate': 8.880597014925374e-06, 'epoch': 0.45}


 12%|█▏        | 250/2144 [04:10<31:29,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 8.833955223880599e-06, 'epoch': 0.47}


 12%|█▏        | 260/2144 [04:20<31:11,  1.01it/s]

{'loss': 0.0005, 'learning_rate': 8.787313432835822e-06, 'epoch': 0.49}


 13%|█▎        | 270/2144 [04:30<31:07,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 8.740671641791045e-06, 'epoch': 0.5}


 13%|█▎        | 280/2144 [04:40<31:01,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 8.69402985074627e-06, 'epoch': 0.52}


 14%|█▎        | 290/2144 [04:49<30:45,  1.00it/s]

{'loss': 0.007, 'learning_rate': 8.647388059701494e-06, 'epoch': 0.54}


 14%|█▍        | 300/2144 [04:59<30:44,  1.00s/it]

{'loss': 0.0595, 'learning_rate': 8.600746268656716e-06, 'epoch': 0.56}


 14%|█▍        | 310/2144 [05:09<30:31,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 8.55410447761194e-06, 'epoch': 0.58}


 15%|█▍        | 320/2144 [05:19<30:20,  1.00it/s]

{'loss': 0.0213, 'learning_rate': 8.507462686567165e-06, 'epoch': 0.6}


 15%|█▌        | 330/2144 [05:29<30:13,  1.00it/s]

{'loss': 0.0662, 'learning_rate': 8.460820895522389e-06, 'epoch': 0.62}


 16%|█▌        | 340/2144 [05:39<29:57,  1.00it/s]

{'loss': 0.1831, 'learning_rate': 8.414179104477612e-06, 'epoch': 0.63}


 16%|█▋        | 350/2144 [05:49<30:01,  1.00s/it]

{'loss': 0.1724, 'learning_rate': 8.367537313432836e-06, 'epoch': 0.65}


 17%|█▋        | 360/2144 [05:59<29:37,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 8.320895522388061e-06, 'epoch': 0.67}


 17%|█▋        | 370/2144 [06:09<29:30,  1.00it/s]

{'loss': 0.0306, 'learning_rate': 8.274253731343284e-06, 'epoch': 0.69}


 18%|█▊        | 380/2144 [06:19<29:12,  1.01it/s]

{'loss': 0.2003, 'learning_rate': 8.227611940298507e-06, 'epoch': 0.71}


 18%|█▊        | 390/2144 [06:29<29:09,  1.00it/s]

{'loss': 0.3119, 'learning_rate': 8.180970149253732e-06, 'epoch': 0.73}


 19%|█▊        | 400/2144 [06:39<29:04,  1.00s/it]

{'loss': 0.1818, 'learning_rate': 8.134328358208955e-06, 'epoch': 0.75}


 19%|█▉        | 410/2144 [06:49<28:51,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 8.08768656716418e-06, 'epoch': 0.76}


 20%|█▉        | 420/2144 [06:59<28:41,  1.00it/s]

{'loss': 0.01, 'learning_rate': 8.041044776119403e-06, 'epoch': 0.78}


 20%|██        | 430/2144 [07:09<28:32,  1.00it/s]

{'loss': 0.1025, 'learning_rate': 7.994402985074628e-06, 'epoch': 0.8}


 21%|██        | 440/2144 [07:19<28:20,  1.00it/s]

{'loss': 0.106, 'learning_rate': 7.947761194029851e-06, 'epoch': 0.82}


 21%|██        | 450/2144 [07:29<28:08,  1.00it/s]

{'loss': 0.0064, 'learning_rate': 7.901119402985076e-06, 'epoch': 0.84}


 21%|██▏       | 460/2144 [07:39<27:48,  1.01it/s]

{'loss': 0.1128, 'learning_rate': 7.854477611940299e-06, 'epoch': 0.86}


 22%|██▏       | 470/2144 [07:49<27:49,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 7.807835820895522e-06, 'epoch': 0.88}


 22%|██▏       | 480/2144 [07:59<27:38,  1.00it/s]

{'loss': 0.0727, 'learning_rate': 7.761194029850747e-06, 'epoch': 0.9}


 23%|██▎       | 490/2144 [08:09<27:34,  1.00s/it]

{'loss': 0.0404, 'learning_rate': 7.714552238805972e-06, 'epoch': 0.91}


 23%|██▎       | 500/2144 [08:19<27:25,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 7.667910447761195e-06, 'epoch': 0.93}


 24%|██▍       | 510/2144 [08:29<27:09,  1.00it/s]

{'loss': 0.1877, 'learning_rate': 7.621268656716419e-06, 'epoch': 0.95}


 24%|██▍       | 520/2144 [08:39<27:09,  1.00s/it]

{'loss': 0.0481, 'learning_rate': 7.574626865671643e-06, 'epoch': 0.97}


 25%|██▍       | 530/2144 [08:49<26:55,  1.00s/it]

{'loss': 0.12, 'learning_rate': 7.527985074626867e-06, 'epoch': 0.99}


                                                  
 25%|██▌       | 536/2144 [09:25<25:50,  1.04it/s]

{'eval_loss': 2.3227908611297607, 'eval_accuracy': 0.7132867132867133, 'eval_precision': 0.7180924382973425, 'eval_recall': 0.7132867132867133, 'eval_f1': 0.7085658201447674, 'eval_runtime': 30.4496, 'eval_samples_per_second': 23.481, 'eval_steps_per_second': 2.956, 'epoch': 1.0}


 25%|██▌       | 540/2144 [09:36<2:08:02,  4.79s/it]

{'loss': 0.1855, 'learning_rate': 7.48134328358209e-06, 'epoch': 1.01}


 26%|██▌       | 550/2144 [09:46<29:20,  1.10s/it]  

{'loss': 0.1069, 'learning_rate': 7.434701492537314e-06, 'epoch': 1.03}


 26%|██▌       | 560/2144 [09:56<26:23,  1.00it/s]

{'loss': 0.1865, 'learning_rate': 7.3880597014925385e-06, 'epoch': 1.04}


 27%|██▋       | 570/2144 [10:06<26:00,  1.01it/s]

{'loss': 0.003, 'learning_rate': 7.3414179104477625e-06, 'epoch': 1.06}


 27%|██▋       | 580/2144 [10:15<25:57,  1.00it/s]

{'loss': 0.0062, 'learning_rate': 7.2947761194029856e-06, 'epoch': 1.08}


 28%|██▊       | 590/2144 [10:25<25:51,  1.00it/s]

{'loss': 0.1191, 'learning_rate': 7.2481343283582095e-06, 'epoch': 1.1}


 28%|██▊       | 600/2144 [10:35<25:33,  1.01it/s]

{'loss': 0.0005, 'learning_rate': 7.2014925373134335e-06, 'epoch': 1.12}


 28%|██▊       | 610/2144 [10:45<25:32,  1.00it/s]

{'loss': 0.0008, 'learning_rate': 7.154850746268658e-06, 'epoch': 1.14}


 29%|██▉       | 620/2144 [10:55<25:18,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 7.1082089552238805e-06, 'epoch': 1.16}


 29%|██▉       | 630/2144 [11:05<25:11,  1.00it/s]

{'loss': 0.1554, 'learning_rate': 7.061567164179105e-06, 'epoch': 1.18}


 30%|██▉       | 640/2144 [11:15<25:04,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 7.014925373134329e-06, 'epoch': 1.19}


 30%|███       | 650/2144 [11:25<24:52,  1.00it/s]

{'loss': 0.0003, 'learning_rate': 6.968283582089553e-06, 'epoch': 1.21}


 31%|███       | 660/2144 [11:35<24:44,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 6.921641791044776e-06, 'epoch': 1.23}


 31%|███▏      | 670/2144 [11:45<24:34,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 6.875e-06, 'epoch': 1.25}


 32%|███▏      | 680/2144 [11:55<24:21,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 6.828358208955225e-06, 'epoch': 1.27}


 32%|███▏      | 690/2144 [12:05<24:07,  1.00it/s]

{'loss': 0.0004, 'learning_rate': 6.781716417910448e-06, 'epoch': 1.29}


 33%|███▎      | 700/2144 [12:15<24:02,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 6.735074626865672e-06, 'epoch': 1.31}


 33%|███▎      | 710/2144 [12:25<24:01,  1.01s/it]

{'loss': 0.0986, 'learning_rate': 6.688432835820896e-06, 'epoch': 1.32}


 34%|███▎      | 720/2144 [12:35<23:29,  1.01it/s]

{'loss': 0.0875, 'learning_rate': 6.64179104477612e-06, 'epoch': 1.34}


 34%|███▍      | 730/2144 [12:45<23:26,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 6.595149253731343e-06, 'epoch': 1.36}


 35%|███▍      | 740/2144 [12:55<23:16,  1.01it/s]

{'loss': 0.0003, 'learning_rate': 6.548507462686567e-06, 'epoch': 1.38}


 35%|███▍      | 750/2144 [13:05<23:12,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 6.501865671641792e-06, 'epoch': 1.4}


 35%|███▌      | 760/2144 [13:15<23:08,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 6.455223880597016e-06, 'epoch': 1.42}


 36%|███▌      | 770/2144 [13:25<22:46,  1.01it/s]

{'loss': 0.0002, 'learning_rate': 6.408582089552239e-06, 'epoch': 1.44}


 36%|███▋      | 780/2144 [13:35<22:47,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 6.361940298507463e-06, 'epoch': 1.46}


 37%|███▋      | 790/2144 [13:45<22:30,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 6.315298507462687e-06, 'epoch': 1.47}


 37%|███▋      | 800/2144 [13:55<22:21,  1.00it/s]

{'loss': 0.1004, 'learning_rate': 6.2686567164179116e-06, 'epoch': 1.49}


 38%|███▊      | 810/2144 [14:05<22:15,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 6.222014925373135e-06, 'epoch': 1.51}


 38%|███▊      | 820/2144 [14:15<21:59,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 6.175373134328359e-06, 'epoch': 1.53}


 39%|███▊      | 830/2144 [14:25<22:06,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 6.1287313432835826e-06, 'epoch': 1.55}


 39%|███▉      | 840/2144 [14:35<21:41,  1.00it/s]

{'loss': 0.0861, 'learning_rate': 6.0820895522388065e-06, 'epoch': 1.57}


 40%|███▉      | 850/2144 [14:45<21:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 6.03544776119403e-06, 'epoch': 1.59}


 40%|████      | 860/2144 [14:55<21:25,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 5.988805970149254e-06, 'epoch': 1.6}


 41%|████      | 870/2144 [15:05<21:05,  1.01it/s]

{'loss': 0.0402, 'learning_rate': 5.942164179104478e-06, 'epoch': 1.62}


 41%|████      | 880/2144 [15:15<20:58,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 5.895522388059702e-06, 'epoch': 1.64}


 42%|████▏     | 890/2144 [15:25<20:47,  1.01it/s]

{'loss': 0.1107, 'learning_rate': 5.848880597014925e-06, 'epoch': 1.66}


 42%|████▏     | 900/2144 [15:35<20:43,  1.00it/s]

{'loss': 0.0884, 'learning_rate': 5.802238805970149e-06, 'epoch': 1.68}


 42%|████▏     | 910/2144 [15:45<20:24,  1.01it/s]

{'loss': 0.0537, 'learning_rate': 5.755597014925373e-06, 'epoch': 1.7}


 43%|████▎     | 920/2144 [15:55<20:19,  1.00it/s]

{'loss': 0.0002, 'learning_rate': 5.708955223880598e-06, 'epoch': 1.72}


 43%|████▎     | 930/2144 [16:05<20:09,  1.00it/s]

{'loss': 0.0347, 'learning_rate': 5.662313432835821e-06, 'epoch': 1.74}


 44%|████▍     | 940/2144 [16:15<20:14,  1.01s/it]

{'loss': 0.001, 'learning_rate': 5.615671641791045e-06, 'epoch': 1.75}


 44%|████▍     | 950/2144 [16:25<19:53,  1.00it/s]

{'loss': 0.1071, 'learning_rate': 5.569029850746269e-06, 'epoch': 1.77}


 45%|████▍     | 960/2144 [16:35<19:50,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 5.522388059701493e-06, 'epoch': 1.79}


 45%|████▌     | 970/2144 [16:45<19:36,  1.00s/it]

{'loss': 0.1799, 'learning_rate': 5.475746268656716e-06, 'epoch': 1.81}


 46%|████▌     | 980/2144 [16:55<19:14,  1.01it/s]

{'loss': 0.0408, 'learning_rate': 5.429104477611941e-06, 'epoch': 1.83}


 46%|████▌     | 990/2144 [17:05<19:10,  1.00it/s]

{'loss': 0.0031, 'learning_rate': 5.382462686567165e-06, 'epoch': 1.85}


 47%|████▋     | 1000/2144 [17:15<18:57,  1.01it/s]

{'loss': 0.0276, 'learning_rate': 5.335820895522389e-06, 'epoch': 1.87}


 47%|████▋     | 1010/2144 [17:25<18:52,  1.00it/s]

{'loss': 0.003, 'learning_rate': 5.289179104477612e-06, 'epoch': 1.88}


 48%|████▊     | 1020/2144 [17:35<18:38,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 5.242537313432836e-06, 'epoch': 1.9}


 48%|████▊     | 1030/2144 [17:45<18:35,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 5.195895522388061e-06, 'epoch': 1.92}


 49%|████▊     | 1040/2144 [17:54<18:25,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 5.149253731343285e-06, 'epoch': 1.94}


 49%|████▉     | 1050/2144 [18:04<18:09,  1.00it/s]

{'loss': 0.0803, 'learning_rate': 5.102611940298508e-06, 'epoch': 1.96}


 49%|████▉     | 1060/2144 [18:14<17:57,  1.01it/s]

{'loss': 0.0001, 'learning_rate': 5.055970149253732e-06, 'epoch': 1.98}


 50%|████▉     | 1070/2144 [18:24<17:52,  1.00it/s]

{'loss': 0.0005, 'learning_rate': 5.009328358208956e-06, 'epoch': 2.0}


                                                   
 50%|█████     | 1072/2144 [18:54<17:08,  1.04it/s]

{'eval_loss': 2.4344863891601562, 'eval_accuracy': 0.7118881118881119, 'eval_precision': 0.7153038087116517, 'eval_recall': 0.7118881118881119, 'eval_f1': 0.7109773677603328, 'eval_runtime': 27.3803, 'eval_samples_per_second': 26.114, 'eval_steps_per_second': 3.287, 'epoch': 2.0}


 50%|█████     | 1080/2144 [19:08<32:34,  1.84s/it]  

{'loss': 0.0001, 'learning_rate': 4.9626865671641796e-06, 'epoch': 2.01}


 51%|█████     | 1090/2144 [19:18<18:02,  1.03s/it]

{'loss': 0.0001, 'learning_rate': 4.9160447761194035e-06, 'epoch': 2.03}


 51%|█████▏    | 1100/2144 [19:28<17:25,  1.00s/it]

{'loss': 0.0088, 'learning_rate': 4.8694029850746275e-06, 'epoch': 2.05}


 52%|█████▏    | 1110/2144 [19:38<17:14,  1.00s/it]

{'loss': 0.0876, 'learning_rate': 4.822761194029851e-06, 'epoch': 2.07}


 52%|█████▏    | 1120/2144 [19:48<17:04,  1.00s/it]

{'loss': 0.002, 'learning_rate': 4.7761194029850745e-06, 'epoch': 2.09}


 53%|█████▎    | 1130/2144 [19:58<16:52,  1.00it/s]

{'loss': 0.0025, 'learning_rate': 4.729477611940299e-06, 'epoch': 2.11}


 53%|█████▎    | 1140/2144 [20:08<16:43,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 4.682835820895522e-06, 'epoch': 2.13}


 54%|█████▎    | 1150/2144 [20:18<16:31,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 4.636194029850747e-06, 'epoch': 2.15}


 54%|█████▍    | 1160/2144 [20:28<16:23,  1.00it/s]

{'loss': 0.0534, 'learning_rate': 4.58955223880597e-06, 'epoch': 2.16}


 55%|█████▍    | 1170/2144 [20:38<16:15,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.542910447761194e-06, 'epoch': 2.18}


 55%|█████▌    | 1180/2144 [20:48<16:05,  1.00s/it]

{'loss': 0.0008, 'learning_rate': 4.496268656716418e-06, 'epoch': 2.2}


 56%|█████▌    | 1190/2144 [20:58<15:56,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.449626865671642e-06, 'epoch': 2.22}


 56%|█████▌    | 1200/2144 [21:08<15:39,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 4.402985074626866e-06, 'epoch': 2.24}


 56%|█████▋    | 1210/2144 [21:18<15:33,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 4.35634328358209e-06, 'epoch': 2.26}


 57%|█████▋    | 1220/2144 [21:28<15:27,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.309701492537314e-06, 'epoch': 2.28}


 57%|█████▋    | 1230/2144 [21:38<15:14,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.263059701492538e-06, 'epoch': 2.29}


 58%|█████▊    | 1240/2144 [21:48<15:18,  1.02s/it]

{'loss': 0.0512, 'learning_rate': 4.216417910447762e-06, 'epoch': 2.31}


 58%|█████▊    | 1250/2144 [21:58<15:01,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 4.169776119402986e-06, 'epoch': 2.33}


 59%|█████▉    | 1260/2144 [22:08<14:45,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.123134328358209e-06, 'epoch': 2.35}


 59%|█████▉    | 1270/2144 [22:18<14:47,  1.02s/it]

{'loss': 0.0, 'learning_rate': 4.076492537313434e-06, 'epoch': 2.37}


 60%|█████▉    | 1280/2144 [22:28<14:26,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.029850746268657e-06, 'epoch': 2.39}


 60%|██████    | 1290/2144 [22:38<14:21,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.983208955223881e-06, 'epoch': 2.41}


 61%|██████    | 1300/2144 [22:49<14:11,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.936567164179105e-06, 'epoch': 2.43}


 61%|██████    | 1310/2144 [22:59<13:59,  1.01s/it]

{'loss': 0.0, 'learning_rate': 3.889925373134329e-06, 'epoch': 2.44}


 62%|██████▏   | 1320/2144 [23:09<13:52,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.843283582089553e-06, 'epoch': 2.46}


 62%|██████▏   | 1330/2144 [23:19<13:39,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.7966417910447766e-06, 'epoch': 2.48}


 62%|██████▎   | 1340/2144 [23:29<13:31,  1.01s/it]

{'loss': 0.0, 'learning_rate': 3.7500000000000005e-06, 'epoch': 2.5}


 63%|██████▎   | 1350/2144 [23:39<13:21,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.703358208955224e-06, 'epoch': 2.52}


 63%|██████▎   | 1360/2144 [23:49<13:10,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 3.656716417910448e-06, 'epoch': 2.54}


 64%|██████▍   | 1370/2144 [23:59<13:00,  1.01s/it]

{'loss': 0.056, 'learning_rate': 3.6100746268656715e-06, 'epoch': 2.56}


 64%|██████▍   | 1380/2144 [24:09<12:53,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.563432835820896e-06, 'epoch': 2.57}


 65%|██████▍   | 1390/2144 [24:19<12:43,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.5167910447761194e-06, 'epoch': 2.59}


 65%|██████▌   | 1400/2144 [24:29<12:32,  1.01s/it]

{'loss': 0.0, 'learning_rate': 3.4701492537313438e-06, 'epoch': 2.61}


 66%|██████▌   | 1410/2144 [24:40<12:20,  1.01s/it]

{'loss': 0.0, 'learning_rate': 3.4235074626865673e-06, 'epoch': 2.63}


 66%|██████▌   | 1420/2144 [24:50<12:13,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.3768656716417913e-06, 'epoch': 2.65}


 67%|██████▋   | 1430/2144 [25:00<12:02,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.3302238805970148e-06, 'epoch': 2.67}


 67%|██████▋   | 1440/2144 [25:10<11:52,  1.01s/it]

{'loss': 0.0, 'learning_rate': 3.283582089552239e-06, 'epoch': 2.69}


 68%|██████▊   | 1450/2144 [25:20<11:40,  1.01s/it]

{'loss': 0.0831, 'learning_rate': 3.2369402985074627e-06, 'epoch': 2.71}


 68%|██████▊   | 1460/2144 [25:30<11:29,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.190298507462687e-06, 'epoch': 2.72}


 69%|██████▊   | 1470/2144 [25:40<11:20,  1.01s/it]

{'loss': 0.0133, 'learning_rate': 3.1436567164179106e-06, 'epoch': 2.74}


 69%|██████▉   | 1480/2144 [25:50<11:09,  1.01s/it]

{'loss': 0.0, 'learning_rate': 3.0970149253731345e-06, 'epoch': 2.76}


 69%|██████▉   | 1490/2144 [26:00<11:01,  1.01s/it]

{'loss': 0.0, 'learning_rate': 3.050373134328358e-06, 'epoch': 2.78}


 70%|██████▉   | 1500/2144 [26:11<10:49,  1.01s/it]

{'loss': 0.0, 'learning_rate': 3.0037313432835824e-06, 'epoch': 2.8}


 70%|███████   | 1510/2144 [26:21<10:39,  1.01s/it]

{'loss': 0.0, 'learning_rate': 2.957089552238806e-06, 'epoch': 2.82}


 71%|███████   | 1520/2144 [26:31<10:29,  1.01s/it]

{'loss': 0.0, 'learning_rate': 2.9104477611940303e-06, 'epoch': 2.84}


 71%|███████▏  | 1530/2144 [26:41<10:20,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 2.863805970149254e-06, 'epoch': 2.85}


 72%|███████▏  | 1540/2144 [26:51<10:06,  1.00s/it]

{'loss': 0.0, 'learning_rate': 2.8171641791044778e-06, 'epoch': 2.87}


 72%|███████▏  | 1550/2144 [27:01<10:00,  1.01s/it]

{'loss': 0.0635, 'learning_rate': 2.7705223880597017e-06, 'epoch': 2.89}


 73%|███████▎  | 1560/2144 [27:11<09:49,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.7238805970149257e-06, 'epoch': 2.91}


 73%|███████▎  | 1570/2144 [27:21<09:40,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.677238805970149e-06, 'epoch': 2.93}


 74%|███████▎  | 1580/2144 [27:31<09:29,  1.01s/it]

{'loss': 0.0, 'learning_rate': 2.6305970149253736e-06, 'epoch': 2.95}


 74%|███████▍  | 1590/2144 [27:41<09:19,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.583955223880597e-06, 'epoch': 2.97}


 75%|███████▍  | 1600/2144 [27:51<09:08,  1.01s/it]

{'loss': 0.1105, 'learning_rate': 2.537313432835821e-06, 'epoch': 2.99}


                                                   
 75%|███████▌  | 1608/2144 [28:29<08:40,  1.03it/s]

{'eval_loss': 2.800408363342285, 'eval_accuracy': 0.7034965034965035, 'eval_precision': 0.7221616963786316, 'eval_recall': 0.7034965034965035, 'eval_f1': 0.7069604449756907, 'eval_runtime': 29.8544, 'eval_samples_per_second': 23.95, 'eval_steps_per_second': 3.015, 'epoch': 3.0}


 75%|███████▌  | 1610/2144 [28:38<1:16:36,  8.61s/it]

{'loss': 0.0, 'learning_rate': 2.490671641791045e-06, 'epoch': 3.0}


 76%|███████▌  | 1620/2144 [28:48<10:41,  1.22s/it]  

{'loss': 0.0002, 'learning_rate': 2.444029850746269e-06, 'epoch': 3.02}


 76%|███████▌  | 1630/2144 [28:58<08:39,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.397388059701493e-06, 'epoch': 3.04}


 76%|███████▋  | 1640/2144 [29:08<08:28,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.350746268656717e-06, 'epoch': 3.06}


 77%|███████▋  | 1650/2144 [29:18<08:18,  1.01s/it]

{'loss': 0.0, 'learning_rate': 2.3041044776119408e-06, 'epoch': 3.08}


 77%|███████▋  | 1660/2144 [29:28<08:08,  1.01s/it]

{'loss': 0.0, 'learning_rate': 2.2574626865671643e-06, 'epoch': 3.1}


 78%|███████▊  | 1670/2144 [29:38<07:57,  1.01s/it]

{'loss': 0.0, 'learning_rate': 2.2108208955223883e-06, 'epoch': 3.12}


 78%|███████▊  | 1680/2144 [29:48<07:47,  1.01s/it]

{'loss': 0.0, 'learning_rate': 2.1641791044776118e-06, 'epoch': 3.13}


 79%|███████▉  | 1690/2144 [29:58<07:39,  1.01s/it]

{'loss': 0.0, 'learning_rate': 2.1175373134328357e-06, 'epoch': 3.15}


 79%|███████▉  | 1700/2144 [30:09<07:27,  1.01s/it]

{'loss': 0.0, 'learning_rate': 2.0708955223880597e-06, 'epoch': 3.17}


 80%|███████▉  | 1710/2144 [30:19<07:17,  1.01s/it]

{'loss': 0.0034, 'learning_rate': 2.0242537313432836e-06, 'epoch': 3.19}


 80%|████████  | 1720/2144 [30:29<07:06,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.9776119402985076e-06, 'epoch': 3.21}


 81%|████████  | 1730/2144 [30:39<06:59,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.9309701492537315e-06, 'epoch': 3.23}


 81%|████████  | 1740/2144 [30:49<06:47,  1.01s/it]

{'loss': 0.0008, 'learning_rate': 1.8843283582089553e-06, 'epoch': 3.25}


 82%|████████▏ | 1750/2144 [30:59<06:35,  1.00s/it]

{'loss': 0.0, 'learning_rate': 1.8376865671641792e-06, 'epoch': 3.26}


 82%|████████▏ | 1760/2144 [31:09<06:27,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.791044776119403e-06, 'epoch': 3.28}


 83%|████████▎ | 1770/2144 [31:19<06:17,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.7444029850746269e-06, 'epoch': 3.3}


 83%|████████▎ | 1780/2144 [31:29<06:07,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.6977611940298508e-06, 'epoch': 3.32}


 83%|████████▎ | 1790/2144 [31:39<05:57,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.6511194029850746e-06, 'epoch': 3.34}


 84%|████████▍ | 1800/2144 [31:49<05:45,  1.00s/it]

{'loss': 0.0, 'learning_rate': 1.6044776119402985e-06, 'epoch': 3.36}


 84%|████████▍ | 1810/2144 [31:59<05:37,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.5578358208955225e-06, 'epoch': 3.38}


 85%|████████▍ | 1820/2144 [32:09<05:26,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.5111940298507464e-06, 'epoch': 3.4}


 85%|████████▌ | 1830/2144 [32:20<05:16,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.4645522388059702e-06, 'epoch': 3.41}


 86%|████████▌ | 1840/2144 [32:30<05:06,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.417910447761194e-06, 'epoch': 3.43}


 86%|████████▋ | 1850/2144 [32:40<04:55,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.371268656716418e-06, 'epoch': 3.45}


 87%|████████▋ | 1860/2144 [32:50<04:46,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.3246268656716418e-06, 'epoch': 3.47}


 87%|████████▋ | 1870/2144 [33:00<04:36,  1.01s/it]

{'loss': 0.0465, 'learning_rate': 1.2779850746268657e-06, 'epoch': 3.49}


 88%|████████▊ | 1880/2144 [33:10<04:26,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.2313432835820897e-06, 'epoch': 3.51}


 88%|████████▊ | 1890/2144 [33:20<04:16,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.1847014925373134e-06, 'epoch': 3.53}


 89%|████████▊ | 1900/2144 [33:30<04:06,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.1380597014925374e-06, 'epoch': 3.54}


 89%|████████▉ | 1910/2144 [33:40<03:56,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.0914179104477613e-06, 'epoch': 3.56}


 90%|████████▉ | 1920/2144 [33:50<03:46,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.044776119402985e-06, 'epoch': 3.58}


 90%|█████████ | 1930/2144 [34:00<03:36,  1.01s/it]

{'loss': 0.0, 'learning_rate': 9.98134328358209e-07, 'epoch': 3.6}


 90%|█████████ | 1940/2144 [34:10<03:26,  1.01s/it]

{'loss': 0.0, 'learning_rate': 9.514925373134328e-07, 'epoch': 3.62}


 91%|█████████ | 1950/2144 [34:21<03:15,  1.01s/it]

{'loss': 0.0, 'learning_rate': 9.048507462686568e-07, 'epoch': 3.64}


 91%|█████████▏| 1960/2144 [34:31<03:05,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 8.582089552238806e-07, 'epoch': 3.66}


 92%|█████████▏| 1970/2144 [34:41<02:55,  1.01s/it]

{'loss': 0.0, 'learning_rate': 8.115671641791046e-07, 'epoch': 3.68}


 92%|█████████▏| 1980/2144 [34:51<02:45,  1.01s/it]

{'loss': 0.079, 'learning_rate': 7.649253731343284e-07, 'epoch': 3.69}


 93%|█████████▎| 1990/2144 [35:01<02:34,  1.00s/it]

{'loss': 0.0, 'learning_rate': 7.182835820895523e-07, 'epoch': 3.71}


 93%|█████████▎| 2000/2144 [35:11<02:25,  1.01s/it]

{'loss': 0.0, 'learning_rate': 6.716417910447762e-07, 'epoch': 3.73}


 94%|█████████▍| 2010/2144 [35:21<02:15,  1.01s/it]

{'loss': 0.0, 'learning_rate': 6.25e-07, 'epoch': 3.75}


 94%|█████████▍| 2020/2144 [35:31<02:05,  1.01s/it]

{'loss': 0.0, 'learning_rate': 5.783582089552239e-07, 'epoch': 3.77}


 95%|█████████▍| 2030/2144 [35:41<01:55,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 5.317164179104478e-07, 'epoch': 3.79}


 95%|█████████▌| 2040/2144 [35:51<01:44,  1.01s/it]

{'loss': 0.0, 'learning_rate': 4.850746268656717e-07, 'epoch': 3.81}


 96%|█████████▌| 2050/2144 [36:02<01:35,  1.01s/it]

{'loss': 0.0, 'learning_rate': 4.384328358208956e-07, 'epoch': 3.82}


 96%|█████████▌| 2060/2144 [36:12<01:24,  1.01s/it]

{'loss': 0.0, 'learning_rate': 3.9179104477611947e-07, 'epoch': 3.84}


 97%|█████████▋| 2070/2144 [36:22<01:14,  1.01s/it]

{'loss': 0.0, 'learning_rate': 3.451492537313433e-07, 'epoch': 3.86}


 97%|█████████▋| 2080/2144 [36:32<01:04,  1.01s/it]

{'loss': 0.0406, 'learning_rate': 2.9850746268656716e-07, 'epoch': 3.88}


 97%|█████████▋| 2090/2144 [36:42<00:54,  1.01s/it]

{'loss': 0.0, 'learning_rate': 2.5186567164179105e-07, 'epoch': 3.9}


 98%|█████████▊| 2100/2144 [36:52<00:44,  1.01s/it]

{'loss': 0.0, 'learning_rate': 2.0522388059701495e-07, 'epoch': 3.92}


 98%|█████████▊| 2110/2144 [37:02<00:34,  1.01s/it]

{'loss': 0.0774, 'learning_rate': 1.5858208955223882e-07, 'epoch': 3.94}


 99%|█████████▉| 2120/2144 [37:12<00:24,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.1194029850746268e-07, 'epoch': 3.96}


 99%|█████████▉| 2130/2144 [37:22<00:14,  1.01s/it]

{'loss': 0.0, 'learning_rate': 6.529850746268657e-08, 'epoch': 3.97}


100%|█████████▉| 2140/2144 [37:32<00:04,  1.01s/it]

{'loss': 0.0, 'learning_rate': 1.8656716417910447e-08, 'epoch': 3.99}


                                                   
100%|██████████| 2144/2144 [38:06<00:00,  1.04it/s]

{'eval_loss': 2.7497148513793945, 'eval_accuracy': 0.7090909090909091, 'eval_precision': 0.7167016152158301, 'eval_recall': 0.7090909090909091, 'eval_f1': 0.7113385441308382, 'eval_runtime': 29.443, 'eval_samples_per_second': 24.284, 'eval_steps_per_second': 3.057, 'epoch': 4.0}


100%|██████████| 2144/2144 [38:14<00:00,  1.07s/it]


{'train_runtime': 2294.9341, 'train_samples_per_second': 7.47, 'train_steps_per_second': 0.934, 'train_loss': 0.027930649098753244, 'epoch': 4.0}


100%|██████████| 90/90 [00:29<00:00,  3.02it/s]
100%|██████████| 90/90 [00:29<00:00,  3.03it/s]
100%|██████████| 90/90 [00:29<00:00,  3.01it/s]


{'accuracy': 0.7132867132867133, 'precision': 0.7180924382973425, 'recall': 0.7132867132867133, 'f1': 0.7085658201447674}
{'accuracy': 0.6792717086834734, 'precision': 0.6955808449006221, 'recall': 0.6792717086834734, 'f1': 0.6742218420299042}
