In [6]:
import os

# Data path
path = os.path.join(".." , "data" , "tweets_data_temp.csv")
path2 = os.path.join(".." , "data" , "paraphrasings_on_train_2148rows_seed42.csv")
path3 = os.path.join(".." , "data" , "paraphrasings_on_train_2148rows_seed42_2.csv")

### Functions

In [9]:
import pandas as pd
pd.read_csv(path2).head()
# rename New to text
#df = pd.read_csv(path2).rename(columns={"New":"text"})


Unnamed: 0,label,New,org_or_new
0,positive,"Øh, tak! Her er et link til et interview, der ...",0
1,neutral,"det kan være værdt at nævne, at dyr også vari...",0
2,negative,"#3 Om ateismen: ""Ateister mener, at religione...",0
3,negative,Øjeblikkeligt: Vild jubel over forøget politiu...,0
4,negative,Selvom vi ikke ved det præcise datum for næst...,0


In [8]:
os.environ["WANDB_DISABLED"] = "true"

In [23]:
from datasets import Dataset
import pandas as pd

# Load and preprocess the dataset
def load_and_prepare_dataset(file_path):
    # Load the dataset
    dataset = pd.read_csv(file_path)
    #print the number of times a label is positive, negative or neutral
    print(dataset['label'].value_counts())
    # equalize dataset making sure there are the same number of positive, negative and neutral tweets
    # Remove all rows where language is not 'da'
    dataset = dataset[dataset['language'] == 'da']
    # Remove all columns except 'text' and 'label'
    dataset = dataset[['text', 'label']]
    # Remove all duplicates
    dataset = dataset.drop_duplicates()
    # Convert to dict and then to a Hugging Face Dataset
    dataset = Dataset.from_dict(dataset)
    print("Dataset loaded and prepared")
    return dataset#, dataset_pd

# Split the dataset and convert into a Hugging Face DatasetDict
from datasets import DatasetDict

def split_dataset(dataset,path_to_df_train, path_to_df_train_2, seed=42): #def split_dataset(dataset, path_to_df_train, path_to_df_train_2, seed=42):
    
    # concatenate the paraphrasings dataset with the original dataset
    paraphrasings = pd.read_csv(path_to_df_train)
    paraphrasings2 = pd.read_csv(path_to_df_train_2)

    paraphrasings = pd.concat([paraphrasings, paraphrasings2])

    # remove rows where org_or_new == 1 - removing original tweets
    paraphrasings = paraphrasings[paraphrasings['org_or_new'] == 0]
    print("Number of paraphrasings: ", len(paraphrasings))

    # remove rows where org_or_new

    # remove duplicates
    #paraphrasings = paraphrasings.drop_duplicates()
    paraphrasings = paraphrasings.rename(columns={"New":"text"})
    paraphrasings = paraphrasings.drop(columns=["org_or_new"])
    paraphrasings = paraphrasings[['text', 'label']]
    paraphrasings_plus_org = Dataset.from_dict(paraphrasings)


    # 60% train, 20% validation, 20% test
    train_test = dataset.train_test_split(test_size=0.4, seed=seed) 
    test_valid = train_test['test'].train_test_split(test_size=0.5, seed=seed)

    # combine train, test and valid to one dictionary
    dataset_splitted_dict = DatasetDict({
        'train': paraphrasings_plus_org,
        'valid': test_valid['train'],
        'test': test_valid['test']})
    
    print("Dataset splitted into train (60%), valid (20%) and test (20%)")

    # output the train dataset as a csv file
    #dataset_splitted_dict['train'].to_csv(os.path.join("..", "data", "train.csv"))

    # print the length of the train dataset
    print("Length of train dataset: ", len(dataset_splitted_dict['train']))
    print("Length of valid dataset: ", len(dataset_splitted_dict['valid']))
    print("Length of test dataset: ", len(dataset_splitted_dict['test']))

    print("")

    return dataset_splitted_dict

# Tokenize the dataset 
from transformers import AutoTokenizer
from datasets import ClassLabel

def tokenize_dataset(dataset, model_name="NbAiLab/nb-bert-large", max_length=128):
    # defining the labels
    labels_cl = ClassLabel(num_classes=3, names=['negative', 'neutral', 'positive'])

    # load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # defining a function to tokenize the text and translate all labels into integers instead of strings
    def tokenize_function(example):
        tokens = tokenizer(example["text"], padding="max_length", truncation=True, max_length=max_length)
        tokens['label'] = labels_cl.str2int(example['label'])
        return tokens

    # actually tokenizing the dataset
    tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset['train'].column_names) # batched=True speeds up tokenization by allowing to process multiple lines at once


    print("Dataset tokenized")

    return tokenized_dataset

# evaluation metrics
import numpy as np
import evaluate

def compute_metrics(eval_pred):
    metric0 = evaluate.load("accuracy")
    metric1 = evaluate.load("precision")
    metric2 = evaluate.load("recall")
    metric3 = evaluate.load("f1")

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = metric0.compute(predictions=predictions, references=labels)["accuracy"]
    precision = metric1.compute(predictions=predictions, references=labels, average="weighted")["precision"]
    recall = metric2.compute(predictions=predictions, references=labels, average="weighted")["recall"]
    f1 = metric3.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

In [24]:
print("Loading and preparing dataset...")
dataset = load_and_prepare_dataset(path)
print(dataset)

print("Splitting dataset...")
dataset_splitted_dict = split_dataset(dataset, path2, path3) #split_dataset(dataset, path2, path3)

print("Tokenizing dataset...")
tokenized_dataset = tokenize_dataset(dataset_splitted_dict)

print("Loading model (NbAiLab/nb-bert-large)...")

from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("NbAiLab/nb-bert-large", num_labels=3)

Loading and preparing dataset...
label
negative    1525
neutral     1281
positive    1000
Name: count, dtype: int64
Dataset loaded and prepared
Dataset({
    features: ['text', 'label'],
    num_rows: 3572
})
Splitting dataset...
Number of paraphrasings:  2308
Dataset splitted into train (60%), valid (20%) and test (20%)
Length of train dataset:  2308
Length of valid dataset:  714
Length of test dataset:  715

Tokenizing dataset...


Map: 100%|██████████| 2308/2308 [00:00<00:00, 19524.56 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 20584.48 examples/s]
Map: 100%|██████████| 715/715 [00:00<00:00, 20182.29 examples/s]


Dataset tokenized
Loading model (NbAiLab/nb-bert-large)...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at NbAiLab/nb-bert-large and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Specifying training args

In [13]:
import numpy as np  
# count number of labels in each dataset
print(np.unique(dataset_splitted_dict['train']['label'],return_counts=True))
print(np.unique(dataset_splitted_dict['test']['label'],return_counts=True))
print(np.unique(dataset_splitted_dict['valid']['label'],return_counts=True))

(array(['negative', 'neutral', 'positive'], dtype='<U8'), array([928, 737, 643]))
(array(['negative', 'neutral', 'positive'], dtype='<U8'), array([309, 221, 185]))
(array(['negative', 'neutral', 'positive'], dtype='<U8'), array([268, 252, 194]))


In [14]:
from transformers import TrainingArguments

batch_size = 8 # stating batch size
epochs = 4
learning_rate = 1e-5


training_args = TrainingArguments(output_dir="test_trainer",
                                  num_train_epochs=epochs,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  learning_rate=learning_rate,
                                  weight_decay=0.01,
                                  logging_dir="logs",
                                  logging_steps=10,
                                  load_best_model_at_end=True,
                                  evaluation_strategy="epoch",
                                  save_strategy="epoch",  # Add this line
                                  remove_unused_columns=False,
                                  run_name="test_trainer")

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Initializing trainer

In [15]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()

In [11]:
trainer.evaluate()

import tensorflow as tf

# creating model predictions for the validation data
predictions_val = trainer.predict(tokenized_dataset["valid"])

# choosing the prediction that has the highest probability 
preds_val_val = np.argmax(predictions_val.predictions, axis=-1)

# calculating the probabilities instead of logits from each
predictions_probabilities = tf.nn.softmax(predictions_val.predictions)

def compute_metrics_end(preds, refs):
    metric0 = evaluate.load("accuracy")
    metric1 = evaluate.load("precision")
    metric2 = evaluate.load("recall")
    metric3 = evaluate.load("f1")
    
    #logits, labels = eval_pred
    #predictions = np.argmax(logits, axis=-1)
    accuracy = metric0.compute(predictions=preds, references=refs)["accuracy"]
    precision = metric1.compute(predictions=preds, references=refs, average="weighted")["precision"]
    recall = metric2.compute(predictions=preds, references=refs, average="weighted")["recall"]
    f1 = metric3.compute(predictions=preds, references=refs, average="weighted")["f1"]
    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

metrics_val = compute_metrics_end(preds=preds_val_val, refs=predictions_val.label_ids)

import tensorflow as tf

# creating model predictions for the validation data
predictions_test = trainer.predict(tokenized_dataset["test"])

# choosing the prediction that has the highest probability 
preds_test_test = np.argmax(predictions_test.predictions, axis=-1)

# calculating the probabilities instead of logits from each
predictions_probabilities_test = tf.nn.softmax(predictions_test.predictions)

metrics_test = compute_metrics_end(preds=preds_test_test, refs=predictions_test.label_ids)

print(metrics_test)
print(metrics_val)

100%|██████████| 90/90 [00:26<00:00,  3.45it/s]
100%|██████████| 90/90 [00:26<00:00,  3.37it/s]
100%|██████████| 90/90 [00:26<00:00,  3.44it/s]


{'accuracy': 0.7034965034965035, 'precision': 0.7208567419348564, 'recall': 0.7034965034965035, 'f1': 0.7066208658676242}
{'accuracy': 0.665266106442577, 'precision': 0.6759627052306597, 'recall': 0.665266106442577, 'f1': 0.6661282766130079}


In [12]:
import pandas as pd

data = {'Predicted Labels': ["negative" if i == 0 else "neutral" if i == 1 else "positive" for i in preds_val_val],
        'True Labels': ["negative" if i == 0 else "neutral" if i == 1 else "positive" for i in predictions_val.label_ids],
        'Misclassification': ["TRUE" if preds_val_val[i] == predictions_val.label_ids[i] else 'MISS' for i, val in enumerate(preds_val_val)],
        'Text': dataset_splitted_dict['valid']['text'],
        'Logit Values': [str(i) for i in predictions_val.predictions],
        'Probabilities': [str(i) for i in np.asarray(predictions_probabilities)]}
df = pd.DataFrame(data)


Unnamed: 0,Predicted Labels,True Labels,Misclassification,Text,Logit Values,Probabilities
0,negative,neutral,MISS,"Det er så ikke den, jeg var lidt for entusiast...",[ 0.9340587 -0.01499593 -1.1753395 ],[0.66294634 0.25663105 0.08042265]
1,neutral,negative,MISS,Åh gud... Har intet med det at gøre,[ 0.49472684 0.52091426 -1.059791 ],[0.44686255 0.45871928 0.09441815]
2,neutral,negative,MISS,"Det kan du sige, men med Artikel 13 bliver det...",[ 0.61225605 0.7817579 -0.8384904 ],[0.4133752 0.4897316 0.09689319]
3,positive,positive,TRUE,"Tak gode mand! Da jeg fik notifikationen, troe...",[-1.0158063 -0.6964614 2.1499164],[0.03834047 0.05276516 0.90889436]
4,negative,negative,TRUE,Den er særdeles troværdig. Viser sandheden om ...,[ 1.1367214 -0.47244602 -0.9655315 ],[0.7562952 0.15129997 0.09240479]
...,...,...,...,...,...,...
709,neutral,negative,MISS,Mere brug af GMO i landbruget kan ikke løse al...,[ 0.12570588 0.72970027 -1.148641 ],[0.32164422 0.5884197 0.08993609]
710,neutral,neutral,TRUE,Rusland fejrer femåret for annekteringen af Kr...,[-0.76456714 1.6279043 -0.502376 ],[0.07552715 0.8263046 0.09816829]
711,negative,negative,TRUE,"Det er minimeret til det ekstreme, så måske ik...",[ 1.3416474 -0.5490633 -1.1396143],[0.8099776 0.12227785 0.06774461]
712,neutral,neutral,TRUE,🇩🇰 #Superliga\nGrupo 1 Descenso\n29º Fecha\n40...,[-1.4751852 1.7833322 -0.67671984],[0.03420784 0.889778 0.07601419]


In [13]:
import pandas as pd
from sklearn.metrics import classification_report

# Extract the true and predicted labels
true_labels = df['True Labels']
predicted_labels = df['Predicted Labels']

# Create a mapping for the labels to numbers if needed
label_mapping = {'negative': 0, 'neutral': 1, 'positive': 2}

# Map the labels to numbers using the mapping
true_labels_mapped = true_labels.map(label_mapping)
predicted_labels_mapped = predicted_labels.map(label_mapping)

# Generate the classification report
report = classification_report(true_labels_mapped, predicted_labels_mapped, target_names=label_mapping.keys())

# Print the classification report
print(report)

              precision    recall  f1-score   support

    negative       0.72      0.70      0.71       268
     neutral       0.58      0.70      0.63       252
    positive       0.73      0.58      0.65       194

    accuracy                           0.67       714
   macro avg       0.68      0.66      0.66       714
weighted avg       0.68      0.67      0.67       714



# Training loop

In [26]:
for i in range(10):    
    trainer.train()

    trainer.evaluate()

    import tensorflow as tf

    # creating model predictions for the validation data
    predictions_val = trainer.predict(tokenized_dataset["valid"])

    # choosing the prediction that has the highest probability 
    preds_val_val = np.argmax(predictions_val.predictions, axis=-1)

    # calculating the probabilities instead of logits from each
    predictions_probabilities = tf.nn.softmax(predictions_val.predictions)

    def compute_metrics_end(preds, refs):
        metric0 = evaluate.load("accuracy")
        metric1 = evaluate.load("precision")
        metric2 = evaluate.load("recall")
        metric3 = evaluate.load("f1")
        
        #logits, labels = eval_pred
        #predictions = np.argmax(logits, axis=-1)
        accuracy = metric0.compute(predictions=preds, references=refs)["accuracy"]
        precision = metric1.compute(predictions=preds, references=refs, average="weighted")["precision"]
        recall = metric2.compute(predictions=preds, references=refs, average="weighted")["recall"]
        f1 = metric3.compute(predictions=preds, references=refs, average="weighted")["f1"]
        return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

    metrics_val = compute_metrics_end(preds=preds_val_val, refs=predictions_val.label_ids)

    import tensorflow as tf

    # creating model predictions for the validation data
    predictions_test = trainer.predict(tokenized_dataset["test"])

    # choosing the prediction that has the highest probability 
    preds_test_test = np.argmax(predictions_test.predictions, axis=-1)

    # calculating the probabilities instead of logits from each
    predictions_probabilities_test = tf.nn.softmax(predictions_test.predictions)

    metrics_test = compute_metrics_end(preds=preds_test_test, refs=predictions_test.label_ids)

    print(metrics_test)
    print(metrics_val)

    import pandas as pd

    data = {'Predicted Labels': ["negative" if i == 0 else "neutral" if i == 1 else "positive" for i in preds_val_val],
            'True Labels': ["negative" if i == 0 else "neutral" if i == 1 else "positive" for i in predictions_val.label_ids],
            'Misclassification': ["TRUE" if preds_val_val[i] == predictions_val.label_ids[i] else 'MISS' for i, val in enumerate(preds_val_val)],
            'Text': dataset_splitted_dict['valid']['text'],
            'Logit Values': [str(i) for i in predictions_val.predictions],
            'Probabilities': [str(i) for i in np.asarray(predictions_probabilities)]}
    df = pd.DataFrame(data)



    import pandas as pd
    from sklearn.metrics import classification_report

    # Extract the true and predicted labels
    true_labels = df['True Labels']
    predicted_labels = df['Predicted Labels']

    # Create a mapping for the labels to numbers if needed
    label_mapping = {'negative': 0, 'neutral': 1, 'positive': 2}

    # Map the labels to numbers using the mapping
    true_labels_mapped = true_labels.map(label_mapping)
    predicted_labels_mapped = predicted_labels.map(label_mapping)

    # Generate the classification report
    report = classification_report(true_labels_mapped, predicted_labels_mapped, target_names=label_mapping.keys(), output_dict=True)


    # save classification report to csv
    df = pd.DataFrame(report).transpose()
    df.to_csv(f"../data/{i+1}classification_report_only_paraphrasings_2308_rows.csv")


  0%|          | 0/1156 [00:00<?, ?it/s]

  1%|          | 10/1156 [00:14<20:22,  1.07s/it] 

{'loss': 0.5255, 'learning_rate': 9.913494809688582e-06, 'epoch': 0.03}


  2%|▏         | 20/1156 [00:24<19:06,  1.01s/it]

{'loss': 0.3642, 'learning_rate': 9.826989619377163e-06, 'epoch': 0.07}


  3%|▎         | 30/1156 [00:34<18:59,  1.01s/it]

{'loss': 0.5619, 'learning_rate': 9.740484429065744e-06, 'epoch': 0.1}


  3%|▎         | 40/1156 [00:44<18:43,  1.01s/it]

{'loss': 0.55, 'learning_rate': 9.653979238754326e-06, 'epoch': 0.14}


  4%|▍         | 50/1156 [00:54<18:29,  1.00s/it]

{'loss': 0.5859, 'learning_rate': 9.567474048442907e-06, 'epoch': 0.17}


  5%|▌         | 60/1156 [01:04<18:36,  1.02s/it]

{'loss': 0.5592, 'learning_rate': 9.480968858131488e-06, 'epoch': 0.21}


  6%|▌         | 70/1156 [01:15<19:05,  1.05s/it]

{'loss': 0.5845, 'learning_rate': 9.39446366782007e-06, 'epoch': 0.24}


  7%|▋         | 80/1156 [01:25<18:08,  1.01s/it]

{'loss': 0.6868, 'learning_rate': 9.307958477508652e-06, 'epoch': 0.28}


  8%|▊         | 90/1156 [01:35<17:58,  1.01s/it]

{'loss': 0.5709, 'learning_rate': 9.221453287197234e-06, 'epoch': 0.31}


  9%|▊         | 100/1156 [01:45<17:42,  1.01s/it]

{'loss': 0.5075, 'learning_rate': 9.134948096885815e-06, 'epoch': 0.35}


 10%|▉         | 110/1156 [01:55<17:40,  1.01s/it]

{'loss': 0.4465, 'learning_rate': 9.048442906574394e-06, 'epoch': 0.38}


 10%|█         | 120/1156 [02:06<17:22,  1.01s/it]

{'loss': 0.4465, 'learning_rate': 8.961937716262975e-06, 'epoch': 0.42}


 11%|█         | 130/1156 [02:16<17:19,  1.01s/it]

{'loss': 0.5458, 'learning_rate': 8.875432525951558e-06, 'epoch': 0.45}


 12%|█▏        | 140/1156 [02:26<17:06,  1.01s/it]

{'loss': 0.3657, 'learning_rate': 8.78892733564014e-06, 'epoch': 0.48}


 13%|█▎        | 150/1156 [02:36<16:54,  1.01s/it]

{'loss': 0.4179, 'learning_rate': 8.702422145328721e-06, 'epoch': 0.52}


 14%|█▍        | 160/1156 [02:46<16:44,  1.01s/it]

{'loss': 0.5884, 'learning_rate': 8.615916955017302e-06, 'epoch': 0.55}


 15%|█▍        | 170/1156 [02:56<16:40,  1.01s/it]

{'loss': 0.3675, 'learning_rate': 8.529411764705883e-06, 'epoch': 0.59}


 16%|█▌        | 180/1156 [03:06<16:20,  1.01s/it]

{'loss': 0.4301, 'learning_rate': 8.442906574394465e-06, 'epoch': 0.62}


 16%|█▋        | 190/1156 [03:16<16:12,  1.01s/it]

{'loss': 0.4324, 'learning_rate': 8.356401384083046e-06, 'epoch': 0.66}


 17%|█▋        | 200/1156 [03:26<16:03,  1.01s/it]

{'loss': 0.3654, 'learning_rate': 8.269896193771627e-06, 'epoch': 0.69}


 18%|█▊        | 210/1156 [03:37<15:53,  1.01s/it]

{'loss': 0.4945, 'learning_rate': 8.183391003460208e-06, 'epoch': 0.73}


 19%|█▉        | 220/1156 [03:47<15:45,  1.01s/it]

{'loss': 0.3565, 'learning_rate': 8.09688581314879e-06, 'epoch': 0.76}


 20%|█▉        | 230/1156 [03:57<15:30,  1.01s/it]

{'loss': 0.4424, 'learning_rate': 8.01038062283737e-06, 'epoch': 0.8}


 21%|██        | 240/1156 [04:07<15:22,  1.01s/it]

{'loss': 0.4854, 'learning_rate': 7.923875432525952e-06, 'epoch': 0.83}


 22%|██▏       | 250/1156 [04:17<15:12,  1.01s/it]

{'loss': 0.5632, 'learning_rate': 7.837370242214533e-06, 'epoch': 0.87}


 22%|██▏       | 260/1156 [04:27<15:02,  1.01s/it]

{'loss': 0.3113, 'learning_rate': 7.750865051903114e-06, 'epoch': 0.9}


 23%|██▎       | 270/1156 [04:37<14:53,  1.01s/it]

{'loss': 0.5673, 'learning_rate': 7.664359861591696e-06, 'epoch': 0.93}


 24%|██▍       | 280/1156 [04:47<14:43,  1.01s/it]

{'loss': 0.5107, 'learning_rate': 7.577854671280277e-06, 'epoch': 0.97}


                                                  
 25%|██▌       | 289/1156 [05:26<13:29,  1.07it/s]

{'eval_loss': 0.808022141456604, 'eval_accuracy': 0.7062937062937062, 'eval_precision': 0.7075530568903329, 'eval_recall': 0.7062937062937062, 'eval_f1': 0.699899998058163, 'eval_runtime': 30.1618, 'eval_samples_per_second': 23.705, 'eval_steps_per_second': 2.984, 'epoch': 1.0}


 25%|██▌       | 290/1156 [05:34<2:52:35, 11.96s/it]

{'loss': 0.6572, 'learning_rate': 7.491349480968859e-06, 'epoch': 1.0}


 26%|██▌       | 300/1156 [05:44<18:45,  1.32s/it]  

{'loss': 0.4097, 'learning_rate': 7.40484429065744e-06, 'epoch': 1.04}


 27%|██▋       | 310/1156 [05:54<14:18,  1.01s/it]

{'loss': 0.2942, 'learning_rate': 7.318339100346021e-06, 'epoch': 1.07}


 28%|██▊       | 320/1156 [06:04<14:02,  1.01s/it]

{'loss': 0.449, 'learning_rate': 7.2318339100346025e-06, 'epoch': 1.11}


 29%|██▊       | 330/1156 [06:14<13:52,  1.01s/it]

{'loss': 0.2417, 'learning_rate': 7.145328719723184e-06, 'epoch': 1.14}


 29%|██▉       | 340/1156 [06:24<13:43,  1.01s/it]

{'loss': 0.3443, 'learning_rate': 7.058823529411766e-06, 'epoch': 1.18}


 30%|███       | 350/1156 [06:34<13:30,  1.01s/it]

{'loss': 0.2324, 'learning_rate': 6.972318339100347e-06, 'epoch': 1.21}


 31%|███       | 360/1156 [06:44<13:20,  1.01s/it]

{'loss': 0.299, 'learning_rate': 6.885813148788928e-06, 'epoch': 1.25}


 32%|███▏      | 370/1156 [06:54<13:14,  1.01s/it]

{'loss': 0.2031, 'learning_rate': 6.799307958477509e-06, 'epoch': 1.28}


 33%|███▎      | 380/1156 [07:05<13:02,  1.01s/it]

{'loss': 0.3596, 'learning_rate': 6.71280276816609e-06, 'epoch': 1.31}


 34%|███▎      | 390/1156 [07:15<12:48,  1.00s/it]

{'loss': 0.5141, 'learning_rate': 6.626297577854672e-06, 'epoch': 1.35}


 35%|███▍      | 400/1156 [07:25<12:42,  1.01s/it]

{'loss': 0.3932, 'learning_rate': 6.539792387543253e-06, 'epoch': 1.38}


 35%|███▌      | 410/1156 [07:35<12:31,  1.01s/it]

{'loss': 0.4745, 'learning_rate': 6.453287197231834e-06, 'epoch': 1.42}


 36%|███▋      | 420/1156 [07:45<12:28,  1.02s/it]

{'loss': 0.194, 'learning_rate': 6.3667820069204156e-06, 'epoch': 1.45}


 37%|███▋      | 430/1156 [07:55<12:12,  1.01s/it]

{'loss': 0.2247, 'learning_rate': 6.280276816608997e-06, 'epoch': 1.49}


 38%|███▊      | 440/1156 [08:05<12:03,  1.01s/it]

{'loss': 0.3968, 'learning_rate': 6.193771626297579e-06, 'epoch': 1.52}


 39%|███▉      | 450/1156 [08:15<11:52,  1.01s/it]

{'loss': 0.4238, 'learning_rate': 6.10726643598616e-06, 'epoch': 1.56}


 40%|███▉      | 460/1156 [08:25<11:42,  1.01s/it]

{'loss': 0.236, 'learning_rate': 6.020761245674741e-06, 'epoch': 1.59}


 41%|████      | 470/1156 [08:35<11:30,  1.01s/it]

{'loss': 0.3458, 'learning_rate': 5.9342560553633225e-06, 'epoch': 1.63}


 42%|████▏     | 480/1156 [08:46<11:25,  1.01s/it]

{'loss': 0.4005, 'learning_rate': 5.847750865051903e-06, 'epoch': 1.66}


 42%|████▏     | 490/1156 [08:56<11:14,  1.01s/it]

{'loss': 0.2057, 'learning_rate': 5.761245674740484e-06, 'epoch': 1.7}


 43%|████▎     | 500/1156 [09:06<11:00,  1.01s/it]

{'loss': 0.3371, 'learning_rate': 5.674740484429066e-06, 'epoch': 1.73}


 44%|████▍     | 510/1156 [09:16<10:51,  1.01s/it]

{'loss': 0.4568, 'learning_rate': 5.588235294117647e-06, 'epoch': 1.76}


 45%|████▍     | 520/1156 [09:26<10:42,  1.01s/it]

{'loss': 0.3644, 'learning_rate': 5.501730103806229e-06, 'epoch': 1.8}


 46%|████▌     | 530/1156 [09:36<10:31,  1.01s/it]

{'loss': 0.3068, 'learning_rate': 5.41522491349481e-06, 'epoch': 1.83}


 47%|████▋     | 540/1156 [09:46<10:22,  1.01s/it]

{'loss': 0.3506, 'learning_rate': 5.328719723183391e-06, 'epoch': 1.87}


 48%|████▊     | 550/1156 [09:56<10:13,  1.01s/it]

{'loss': 0.6422, 'learning_rate': 5.242214532871973e-06, 'epoch': 1.9}


 48%|████▊     | 560/1156 [10:06<10:01,  1.01s/it]

{'loss': 0.2777, 'learning_rate': 5.155709342560554e-06, 'epoch': 1.94}


 49%|████▉     | 570/1156 [10:17<09:51,  1.01s/it]

{'loss': 0.1259, 'learning_rate': 5.069204152249136e-06, 'epoch': 1.97}


                                                  
 50%|█████     | 578/1156 [10:53<08:51,  1.09it/s]

{'eval_loss': 1.0561504364013672, 'eval_accuracy': 0.6909090909090909, 'eval_precision': 0.6869960131616814, 'eval_recall': 0.6909090909090909, 'eval_f1': 0.6842159825924938, 'eval_runtime': 28.5236, 'eval_samples_per_second': 25.067, 'eval_steps_per_second': 3.155, 'epoch': 2.0}


 50%|█████     | 580/1156 [11:01<1:19:09,  8.25s/it]

{'loss': 0.4782, 'learning_rate': 4.982698961937717e-06, 'epoch': 2.01}


 51%|█████     | 590/1156 [11:11<11:23,  1.21s/it]  

{'loss': 0.107, 'learning_rate': 4.896193771626298e-06, 'epoch': 2.04}


 52%|█████▏    | 600/1156 [11:21<09:21,  1.01s/it]

{'loss': 0.1427, 'learning_rate': 4.809688581314879e-06, 'epoch': 2.08}


 53%|█████▎    | 610/1156 [11:31<09:09,  1.01s/it]

{'loss': 0.135, 'learning_rate': 4.7231833910034605e-06, 'epoch': 2.11}


 54%|█████▎    | 620/1156 [11:41<08:59,  1.01s/it]

{'loss': 0.4015, 'learning_rate': 4.636678200692042e-06, 'epoch': 2.15}


 54%|█████▍    | 630/1156 [11:51<08:51,  1.01s/it]

{'loss': 0.3396, 'learning_rate': 4.550173010380623e-06, 'epoch': 2.18}


 55%|█████▌    | 640/1156 [12:01<08:40,  1.01s/it]

{'loss': 0.2169, 'learning_rate': 4.463667820069205e-06, 'epoch': 2.21}


 56%|█████▌    | 650/1156 [12:12<08:29,  1.01s/it]

{'loss': 0.2317, 'learning_rate': 4.377162629757785e-06, 'epoch': 2.25}


 57%|█████▋    | 660/1156 [12:22<08:20,  1.01s/it]

{'loss': 0.1259, 'learning_rate': 4.2906574394463675e-06, 'epoch': 2.28}


 58%|█████▊    | 670/1156 [12:32<08:10,  1.01s/it]

{'loss': 0.1665, 'learning_rate': 4.204152249134949e-06, 'epoch': 2.32}


 59%|█████▉    | 680/1156 [12:42<08:00,  1.01s/it]

{'loss': 0.3433, 'learning_rate': 4.11764705882353e-06, 'epoch': 2.35}


 60%|█████▉    | 690/1156 [12:52<07:52,  1.01s/it]

{'loss': 0.1868, 'learning_rate': 4.031141868512111e-06, 'epoch': 2.39}


 61%|██████    | 700/1156 [13:02<07:38,  1.01s/it]

{'loss': 0.2957, 'learning_rate': 3.944636678200692e-06, 'epoch': 2.42}


 61%|██████▏   | 710/1156 [13:12<07:27,  1.00s/it]

{'loss': 0.1876, 'learning_rate': 3.8581314878892736e-06, 'epoch': 2.46}


 62%|██████▏   | 720/1156 [13:22<07:19,  1.01s/it]

{'loss': 0.2633, 'learning_rate': 3.7716262975778552e-06, 'epoch': 2.49}


 63%|██████▎   | 730/1156 [13:32<07:09,  1.01s/it]

{'loss': 0.2257, 'learning_rate': 3.685121107266436e-06, 'epoch': 2.53}


 64%|██████▍   | 740/1156 [13:42<07:03,  1.02s/it]

{'loss': 0.1088, 'learning_rate': 3.5986159169550177e-06, 'epoch': 2.56}


 65%|██████▍   | 750/1156 [13:53<06:51,  1.01s/it]

{'loss': 0.1116, 'learning_rate': 3.512110726643599e-06, 'epoch': 2.6}


 66%|██████▌   | 760/1156 [14:03<06:39,  1.01s/it]

{'loss': 0.4478, 'learning_rate': 3.42560553633218e-06, 'epoch': 2.63}


 67%|██████▋   | 770/1156 [14:13<06:29,  1.01s/it]

{'loss': 0.2257, 'learning_rate': 3.3391003460207618e-06, 'epoch': 2.66}


 67%|██████▋   | 780/1156 [14:23<06:20,  1.01s/it]

{'loss': 0.1318, 'learning_rate': 3.2525951557093425e-06, 'epoch': 2.7}


 68%|██████▊   | 790/1156 [14:33<06:13,  1.02s/it]

{'loss': 0.2565, 'learning_rate': 3.166089965397924e-06, 'epoch': 2.73}


 69%|██████▉   | 800/1156 [14:43<05:58,  1.01s/it]

{'loss': 0.4013, 'learning_rate': 3.0795847750865054e-06, 'epoch': 2.77}


 70%|███████   | 810/1156 [14:53<05:50,  1.01s/it]

{'loss': 0.2621, 'learning_rate': 2.9930795847750866e-06, 'epoch': 2.8}


 71%|███████   | 820/1156 [15:03<05:39,  1.01s/it]

{'loss': 0.2033, 'learning_rate': 2.9065743944636683e-06, 'epoch': 2.84}


 72%|███████▏  | 830/1156 [15:13<05:28,  1.01s/it]

{'loss': 0.195, 'learning_rate': 2.820069204152249e-06, 'epoch': 2.87}


 73%|███████▎  | 840/1156 [15:24<05:20,  1.01s/it]

{'loss': 0.1664, 'learning_rate': 2.7335640138408307e-06, 'epoch': 2.91}


 74%|███████▎  | 850/1156 [15:34<05:08,  1.01s/it]

{'loss': 0.3044, 'learning_rate': 2.647058823529412e-06, 'epoch': 2.94}


 74%|███████▍  | 860/1156 [15:44<04:57,  1.01s/it]

{'loss': 0.2288, 'learning_rate': 2.560553633217993e-06, 'epoch': 2.98}


                                                  
 75%|███████▌  | 867/1156 [16:19<04:26,  1.08it/s]

{'eval_loss': 1.3042268753051758, 'eval_accuracy': 0.6909090909090909, 'eval_precision': 0.6954778928004334, 'eval_recall': 0.6909090909090909, 'eval_f1': 0.6861092756501472, 'eval_runtime': 28.3277, 'eval_samples_per_second': 25.24, 'eval_steps_per_second': 3.177, 'epoch': 3.0}


 75%|███████▌  | 870/1156 [16:29<29:34,  6.21s/it]

{'loss': 0.2234, 'learning_rate': 2.4740484429065744e-06, 'epoch': 3.01}


 76%|███████▌  | 880/1156 [16:39<05:23,  1.17s/it]

{'loss': 0.1469, 'learning_rate': 2.387543252595156e-06, 'epoch': 3.04}


 77%|███████▋  | 890/1156 [16:49<04:31,  1.02s/it]

{'loss': 0.2506, 'learning_rate': 2.3010380622837373e-06, 'epoch': 3.08}


 78%|███████▊  | 900/1156 [17:00<04:17,  1.01s/it]

{'loss': 0.0643, 'learning_rate': 2.2145328719723185e-06, 'epoch': 3.11}


 79%|███████▊  | 910/1156 [17:10<04:07,  1.01s/it]

{'loss': 0.245, 'learning_rate': 2.1280276816609e-06, 'epoch': 3.15}


 80%|███████▉  | 920/1156 [17:20<03:57,  1.01s/it]

{'loss': 0.1331, 'learning_rate': 2.041522491349481e-06, 'epoch': 3.18}


 80%|████████  | 930/1156 [17:30<03:47,  1.01s/it]

{'loss': 0.2635, 'learning_rate': 1.9550173010380626e-06, 'epoch': 3.22}


 81%|████████▏ | 940/1156 [17:40<03:38,  1.01s/it]

{'loss': 0.0822, 'learning_rate': 1.8685121107266438e-06, 'epoch': 3.25}


 82%|████████▏ | 950/1156 [17:50<03:27,  1.01s/it]

{'loss': 0.2305, 'learning_rate': 1.7820069204152252e-06, 'epoch': 3.29}


 83%|████████▎ | 960/1156 [18:00<03:17,  1.01s/it]

{'loss': 0.1895, 'learning_rate': 1.6955017301038063e-06, 'epoch': 3.32}


 84%|████████▍ | 970/1156 [18:10<03:07,  1.01s/it]

{'loss': 0.0596, 'learning_rate': 1.6089965397923877e-06, 'epoch': 3.36}


 85%|████████▍ | 980/1156 [18:20<02:57,  1.01s/it]

{'loss': 0.2274, 'learning_rate': 1.522491349480969e-06, 'epoch': 3.39}


 86%|████████▌ | 990/1156 [18:30<02:47,  1.01s/it]

{'loss': 0.0332, 'learning_rate': 1.4359861591695503e-06, 'epoch': 3.43}


 87%|████████▋ | 1000/1156 [18:40<02:37,  1.01s/it]

{'loss': 0.0977, 'learning_rate': 1.3494809688581318e-06, 'epoch': 3.46}


 87%|████████▋ | 1010/1156 [18:51<02:27,  1.01s/it]

{'loss': 0.2169, 'learning_rate': 1.2629757785467128e-06, 'epoch': 3.49}


 88%|████████▊ | 1020/1156 [19:01<02:16,  1.01s/it]

{'loss': 0.211, 'learning_rate': 1.1764705882352942e-06, 'epoch': 3.53}


 89%|████████▉ | 1030/1156 [19:11<02:06,  1.01s/it]

{'loss': 0.1, 'learning_rate': 1.0899653979238757e-06, 'epoch': 3.56}


 90%|████████▉ | 1040/1156 [19:21<01:57,  1.01s/it]

{'loss': 0.2675, 'learning_rate': 1.0034602076124569e-06, 'epoch': 3.6}


 91%|█████████ | 1050/1156 [19:31<01:46,  1.01s/it]

{'loss': 0.1949, 'learning_rate': 9.169550173010382e-07, 'epoch': 3.63}


 92%|█████████▏| 1060/1156 [19:41<01:41,  1.06s/it]

{'loss': 0.0853, 'learning_rate': 8.304498269896194e-07, 'epoch': 3.67}


 93%|█████████▎| 1070/1156 [19:52<01:29,  1.04s/it]

{'loss': 0.1261, 'learning_rate': 7.439446366782008e-07, 'epoch': 3.7}


 93%|█████████▎| 1080/1156 [20:02<01:17,  1.02s/it]

{'loss': 0.1134, 'learning_rate': 6.57439446366782e-07, 'epoch': 3.74}


 94%|█████████▍| 1090/1156 [20:12<01:06,  1.01s/it]

{'loss': 0.1492, 'learning_rate': 5.709342560553634e-07, 'epoch': 3.77}


 95%|█████████▌| 1100/1156 [20:22<00:56,  1.01s/it]

{'loss': 0.1259, 'learning_rate': 4.844290657439446e-07, 'epoch': 3.81}


 96%|█████████▌| 1110/1156 [20:32<00:46,  1.01s/it]

{'loss': 0.1749, 'learning_rate': 3.9792387543252597e-07, 'epoch': 3.84}


 97%|█████████▋| 1120/1156 [20:42<00:36,  1.01s/it]

{'loss': 0.2542, 'learning_rate': 3.114186851211073e-07, 'epoch': 3.88}


 98%|█████████▊| 1130/1156 [20:52<00:26,  1.01s/it]

{'loss': 0.0985, 'learning_rate': 2.249134948096886e-07, 'epoch': 3.91}


 99%|█████████▊| 1140/1156 [21:02<00:16,  1.01s/it]

{'loss': 0.3419, 'learning_rate': 1.384083044982699e-07, 'epoch': 3.94}


 99%|█████████▉| 1150/1156 [21:12<00:06,  1.01s/it]

{'loss': 0.1143, 'learning_rate': 5.1903114186851215e-08, 'epoch': 3.98}


                                                   
100%|██████████| 1156/1156 [21:49<00:00,  1.10it/s]

{'eval_loss': 1.3754510879516602, 'eval_accuracy': 0.6965034965034965, 'eval_precision': 0.7009507860949103, 'eval_recall': 0.6965034965034965, 'eval_f1': 0.6973430817036445, 'eval_runtime': 30.6382, 'eval_samples_per_second': 23.337, 'eval_steps_per_second': 2.938, 'epoch': 4.0}


100%|██████████| 1156/1156 [21:58<00:00,  1.14s/it]


{'train_runtime': 1318.5914, 'train_samples_per_second': 7.001, 'train_steps_per_second': 0.877, 'train_loss': 0.3074852222235145, 'epoch': 4.0}


100%|██████████| 90/90 [00:28<00:00,  3.13it/s]
100%|██████████| 90/90 [00:29<00:00,  3.07it/s]
100%|██████████| 90/90 [00:29<00:00,  3.10it/s]


{'accuracy': 0.7062937062937062, 'precision': 0.7075530568903329, 'recall': 0.7062937062937062, 'f1': 0.699899998058163}
{'accuracy': 0.6876750700280112, 'precision': 0.6890547020468535, 'recall': 0.6876750700280112, 'f1': 0.6785096994748426}


  1%|          | 10/1156 [00:11<19:48,  1.04s/it]

{'loss': 0.3002, 'learning_rate': 9.913494809688582e-06, 'epoch': 0.03}


  2%|▏         | 20/1156 [00:22<19:07,  1.01s/it]

{'loss': 0.1879, 'learning_rate': 9.826989619377163e-06, 'epoch': 0.07}


  3%|▎         | 30/1156 [00:32<18:55,  1.01s/it]

{'loss': 0.4828, 'learning_rate': 9.740484429065744e-06, 'epoch': 0.1}


  3%|▎         | 40/1156 [00:42<18:43,  1.01s/it]

{'loss': 0.4279, 'learning_rate': 9.653979238754326e-06, 'epoch': 0.14}


  4%|▍         | 50/1156 [00:52<18:37,  1.01s/it]

{'loss': 0.4561, 'learning_rate': 9.567474048442907e-06, 'epoch': 0.17}


  5%|▌         | 60/1156 [01:02<18:29,  1.01s/it]

{'loss': 0.2955, 'learning_rate': 9.480968858131488e-06, 'epoch': 0.21}


  6%|▌         | 70/1156 [01:12<18:41,  1.03s/it]

{'loss': 0.5235, 'learning_rate': 9.39446366782007e-06, 'epoch': 0.24}


  7%|▋         | 80/1156 [01:23<18:16,  1.02s/it]

{'loss': 0.5286, 'learning_rate': 9.307958477508652e-06, 'epoch': 0.28}


  8%|▊         | 90/1156 [01:33<17:57,  1.01s/it]

{'loss': 0.5279, 'learning_rate': 9.221453287197234e-06, 'epoch': 0.31}


  9%|▊         | 100/1156 [01:43<17:41,  1.01s/it]

{'loss': 0.4445, 'learning_rate': 9.134948096885815e-06, 'epoch': 0.35}


 10%|▉         | 110/1156 [01:53<17:36,  1.01s/it]

{'loss': 0.2817, 'learning_rate': 9.048442906574394e-06, 'epoch': 0.38}


 10%|█         | 120/1156 [02:03<17:26,  1.01s/it]

{'loss': 0.3107, 'learning_rate': 8.961937716262975e-06, 'epoch': 0.42}


 11%|█         | 130/1156 [02:13<17:22,  1.02s/it]

{'loss': 0.3903, 'learning_rate': 8.875432525951558e-06, 'epoch': 0.45}


 12%|█▏        | 140/1156 [02:24<17:04,  1.01s/it]

{'loss': 0.1888, 'learning_rate': 8.78892733564014e-06, 'epoch': 0.48}


 13%|█▎        | 150/1156 [02:34<16:52,  1.01s/it]

{'loss': 0.4015, 'learning_rate': 8.702422145328721e-06, 'epoch': 0.52}


 14%|█▍        | 160/1156 [02:44<16:45,  1.01s/it]

{'loss': 0.4475, 'learning_rate': 8.615916955017302e-06, 'epoch': 0.55}


 15%|█▍        | 170/1156 [02:54<16:34,  1.01s/it]

{'loss': 0.1543, 'learning_rate': 8.529411764705883e-06, 'epoch': 0.59}


 16%|█▌        | 180/1156 [03:04<16:26,  1.01s/it]

{'loss': 0.2391, 'learning_rate': 8.442906574394465e-06, 'epoch': 0.62}


 16%|█▋        | 190/1156 [03:14<16:18,  1.01s/it]

{'loss': 0.2714, 'learning_rate': 8.356401384083046e-06, 'epoch': 0.66}


 17%|█▋        | 200/1156 [03:24<16:06,  1.01s/it]

{'loss': 0.2605, 'learning_rate': 8.269896193771627e-06, 'epoch': 0.69}


 18%|█▊        | 210/1156 [03:34<16:01,  1.02s/it]

{'loss': 0.4728, 'learning_rate': 8.183391003460208e-06, 'epoch': 0.73}


 19%|█▉        | 220/1156 [03:45<15:46,  1.01s/it]

{'loss': 0.397, 'learning_rate': 8.09688581314879e-06, 'epoch': 0.76}


 20%|█▉        | 230/1156 [03:55<15:35,  1.01s/it]

{'loss': 0.2969, 'learning_rate': 8.01038062283737e-06, 'epoch': 0.8}


 21%|██        | 240/1156 [04:05<15:30,  1.02s/it]

{'loss': 0.4825, 'learning_rate': 7.923875432525952e-06, 'epoch': 0.83}


 22%|██▏       | 250/1156 [04:15<15:20,  1.02s/it]

{'loss': 0.4092, 'learning_rate': 7.837370242214533e-06, 'epoch': 0.87}


 22%|██▏       | 260/1156 [04:25<15:03,  1.01s/it]

{'loss': 0.2873, 'learning_rate': 7.750865051903114e-06, 'epoch': 0.9}


 23%|██▎       | 270/1156 [04:35<14:57,  1.01s/it]

{'loss': 0.5245, 'learning_rate': 7.664359861591696e-06, 'epoch': 0.93}


 24%|██▍       | 280/1156 [04:45<14:44,  1.01s/it]

{'loss': 0.375, 'learning_rate': 7.577854671280277e-06, 'epoch': 0.97}


                                                  
 25%|██▌       | 289/1156 [05:23<13:38,  1.06it/s]

{'eval_loss': 0.9733930826187134, 'eval_accuracy': 0.7090909090909091, 'eval_precision': 0.7156578719202394, 'eval_recall': 0.7090909090909091, 'eval_f1': 0.7113573499652737, 'eval_runtime': 28.6075, 'eval_samples_per_second': 24.993, 'eval_steps_per_second': 3.146, 'epoch': 1.0}


 25%|██▌       | 290/1156 [05:31<2:46:52, 11.56s/it]

{'loss': 0.658, 'learning_rate': 7.491349480968859e-06, 'epoch': 1.0}


 26%|██▌       | 300/1156 [05:41<18:38,  1.31s/it]  

{'loss': 0.1858, 'learning_rate': 7.40484429065744e-06, 'epoch': 1.04}


 27%|██▋       | 310/1156 [05:51<14:23,  1.02s/it]

{'loss': 0.0955, 'learning_rate': 7.318339100346021e-06, 'epoch': 1.07}


 28%|██▊       | 320/1156 [06:01<14:12,  1.02s/it]

{'loss': 0.3544, 'learning_rate': 7.2318339100346025e-06, 'epoch': 1.11}


 29%|██▊       | 330/1156 [06:11<13:56,  1.01s/it]

{'loss': 0.157, 'learning_rate': 7.145328719723184e-06, 'epoch': 1.14}


 29%|██▉       | 340/1156 [06:21<13:44,  1.01s/it]

{'loss': 0.4378, 'learning_rate': 7.058823529411766e-06, 'epoch': 1.18}


 30%|███       | 350/1156 [06:32<13:45,  1.02s/it]

{'loss': 0.0625, 'learning_rate': 6.972318339100347e-06, 'epoch': 1.21}


 31%|███       | 360/1156 [06:42<13:23,  1.01s/it]

{'loss': 0.4551, 'learning_rate': 6.885813148788928e-06, 'epoch': 1.25}


 32%|███▏      | 370/1156 [06:52<13:24,  1.02s/it]

{'loss': 0.2151, 'learning_rate': 6.799307958477509e-06, 'epoch': 1.28}


 33%|███▎      | 380/1156 [07:02<13:03,  1.01s/it]

{'loss': 0.2909, 'learning_rate': 6.71280276816609e-06, 'epoch': 1.31}


 34%|███▎      | 390/1156 [07:12<12:58,  1.02s/it]

{'loss': 0.3946, 'learning_rate': 6.626297577854672e-06, 'epoch': 1.35}


 35%|███▍      | 400/1156 [07:22<12:41,  1.01s/it]

{'loss': 0.3198, 'learning_rate': 6.539792387543253e-06, 'epoch': 1.38}


 35%|███▌      | 410/1156 [07:32<12:29,  1.01s/it]

{'loss': 0.388, 'learning_rate': 6.453287197231834e-06, 'epoch': 1.42}


 36%|███▋      | 420/1156 [07:42<12:21,  1.01s/it]

{'loss': 0.2316, 'learning_rate': 6.3667820069204156e-06, 'epoch': 1.45}


 37%|███▋      | 430/1156 [07:52<12:11,  1.01s/it]

{'loss': 0.1923, 'learning_rate': 6.280276816608997e-06, 'epoch': 1.49}


 38%|███▊      | 440/1156 [08:03<12:12,  1.02s/it]

{'loss': 0.1925, 'learning_rate': 6.193771626297579e-06, 'epoch': 1.52}


 39%|███▉      | 450/1156 [08:13<11:55,  1.01s/it]

{'loss': 0.4767, 'learning_rate': 6.10726643598616e-06, 'epoch': 1.56}


 40%|███▉      | 460/1156 [08:23<11:41,  1.01s/it]

{'loss': 0.1341, 'learning_rate': 6.020761245674741e-06, 'epoch': 1.59}


 41%|████      | 470/1156 [08:33<11:32,  1.01s/it]

{'loss': 0.3511, 'learning_rate': 5.9342560553633225e-06, 'epoch': 1.63}


 42%|████▏     | 480/1156 [08:44<11:20,  1.01s/it]

{'loss': 0.2589, 'learning_rate': 5.847750865051903e-06, 'epoch': 1.66}


 42%|████▏     | 490/1156 [08:54<11:10,  1.01s/it]

{'loss': 0.2831, 'learning_rate': 5.761245674740484e-06, 'epoch': 1.7}


 43%|████▎     | 500/1156 [09:04<11:00,  1.01s/it]

{'loss': 0.3721, 'learning_rate': 5.674740484429066e-06, 'epoch': 1.73}


 44%|████▍     | 510/1156 [09:14<10:52,  1.01s/it]

{'loss': 0.1841, 'learning_rate': 5.588235294117647e-06, 'epoch': 1.76}


 45%|████▍     | 520/1156 [09:24<10:41,  1.01s/it]

{'loss': 0.3346, 'learning_rate': 5.501730103806229e-06, 'epoch': 1.8}


 46%|████▌     | 530/1156 [09:34<11:11,  1.07s/it]

{'loss': 0.3085, 'learning_rate': 5.41522491349481e-06, 'epoch': 1.83}


 47%|████▋     | 540/1156 [09:45<10:38,  1.04s/it]

{'loss': 0.2139, 'learning_rate': 5.328719723183391e-06, 'epoch': 1.87}


 48%|████▊     | 550/1156 [09:55<10:21,  1.03s/it]

{'loss': 0.4045, 'learning_rate': 5.242214532871973e-06, 'epoch': 1.9}


 48%|████▊     | 560/1156 [10:05<10:01,  1.01s/it]

{'loss': 0.0892, 'learning_rate': 5.155709342560554e-06, 'epoch': 1.94}


 49%|████▉     | 570/1156 [10:15<09:52,  1.01s/it]

{'loss': 0.0929, 'learning_rate': 5.069204152249136e-06, 'epoch': 1.97}


                                                  
 50%|█████     | 578/1156 [10:52<08:46,  1.10it/s]

{'eval_loss': 1.3612942695617676, 'eval_accuracy': 0.6965034965034965, 'eval_precision': 0.692239942378064, 'eval_recall': 0.6965034965034965, 'eval_f1': 0.6889327002077416, 'eval_runtime': 28.9633, 'eval_samples_per_second': 24.686, 'eval_steps_per_second': 3.107, 'epoch': 2.0}


 50%|█████     | 580/1156 [11:01<1:22:39,  8.61s/it]

{'loss': 0.4905, 'learning_rate': 4.982698961937717e-06, 'epoch': 2.01}


 51%|█████     | 590/1156 [11:12<11:43,  1.24s/it]  

{'loss': 0.1079, 'learning_rate': 4.896193771626298e-06, 'epoch': 2.04}


 52%|█████▏    | 600/1156 [11:22<09:26,  1.02s/it]

{'loss': 0.0415, 'learning_rate': 4.809688581314879e-06, 'epoch': 2.08}


 53%|█████▎    | 610/1156 [11:32<09:12,  1.01s/it]

{'loss': 0.1257, 'learning_rate': 4.7231833910034605e-06, 'epoch': 2.11}


 54%|█████▎    | 620/1156 [11:42<08:58,  1.00s/it]

{'loss': 0.3165, 'learning_rate': 4.636678200692042e-06, 'epoch': 2.15}


 54%|█████▍    | 630/1156 [11:52<08:51,  1.01s/it]

{'loss': 0.2451, 'learning_rate': 4.550173010380623e-06, 'epoch': 2.18}


 55%|█████▌    | 640/1156 [12:02<08:40,  1.01s/it]

{'loss': 0.0877, 'learning_rate': 4.463667820069205e-06, 'epoch': 2.21}


 56%|█████▌    | 650/1156 [12:12<08:33,  1.01s/it]

{'loss': 0.1944, 'learning_rate': 4.377162629757785e-06, 'epoch': 2.25}


 57%|█████▋    | 660/1156 [12:22<08:22,  1.01s/it]

{'loss': 0.1262, 'learning_rate': 4.2906574394463675e-06, 'epoch': 2.28}


 58%|█████▊    | 670/1156 [12:33<08:10,  1.01s/it]

{'loss': 0.1489, 'learning_rate': 4.204152249134949e-06, 'epoch': 2.32}


 59%|█████▉    | 680/1156 [12:43<08:00,  1.01s/it]

{'loss': 0.1939, 'learning_rate': 4.11764705882353e-06, 'epoch': 2.35}


 60%|█████▉    | 690/1156 [12:53<07:49,  1.01s/it]

{'loss': 0.1626, 'learning_rate': 4.031141868512111e-06, 'epoch': 2.39}


 61%|██████    | 700/1156 [13:03<07:38,  1.01s/it]

{'loss': 0.2773, 'learning_rate': 3.944636678200692e-06, 'epoch': 2.42}


 61%|██████▏   | 710/1156 [13:13<07:30,  1.01s/it]

{'loss': 0.1131, 'learning_rate': 3.8581314878892736e-06, 'epoch': 2.46}


 62%|██████▏   | 720/1156 [13:23<07:17,  1.00s/it]

{'loss': 0.1803, 'learning_rate': 3.7716262975778552e-06, 'epoch': 2.49}


 63%|██████▎   | 730/1156 [13:33<07:07,  1.00s/it]

{'loss': 0.2884, 'learning_rate': 3.685121107266436e-06, 'epoch': 2.53}


 64%|██████▍   | 740/1156 [13:43<06:58,  1.01s/it]

{'loss': 0.0932, 'learning_rate': 3.5986159169550177e-06, 'epoch': 2.56}


 65%|██████▍   | 750/1156 [13:53<06:48,  1.01s/it]

{'loss': 0.0463, 'learning_rate': 3.512110726643599e-06, 'epoch': 2.6}


 66%|██████▌   | 760/1156 [14:04<06:44,  1.02s/it]

{'loss': 0.3232, 'learning_rate': 3.42560553633218e-06, 'epoch': 2.63}


 67%|██████▋   | 770/1156 [14:14<06:27,  1.00s/it]

{'loss': 0.226, 'learning_rate': 3.3391003460207618e-06, 'epoch': 2.66}


 67%|██████▋   | 780/1156 [14:24<06:18,  1.01s/it]

{'loss': 0.0643, 'learning_rate': 3.2525951557093425e-06, 'epoch': 2.7}


 68%|██████▊   | 790/1156 [14:34<06:08,  1.01s/it]

{'loss': 0.1302, 'learning_rate': 3.166089965397924e-06, 'epoch': 2.73}


 69%|██████▉   | 800/1156 [14:44<05:59,  1.01s/it]

{'loss': 0.2199, 'learning_rate': 3.0795847750865054e-06, 'epoch': 2.77}


 70%|███████   | 810/1156 [14:54<05:47,  1.00s/it]

{'loss': 0.1214, 'learning_rate': 2.9930795847750866e-06, 'epoch': 2.8}


 71%|███████   | 820/1156 [15:04<05:39,  1.01s/it]

{'loss': 0.1437, 'learning_rate': 2.9065743944636683e-06, 'epoch': 2.84}


 72%|███████▏  | 830/1156 [15:14<05:30,  1.02s/it]

{'loss': 0.144, 'learning_rate': 2.820069204152249e-06, 'epoch': 2.87}


 73%|███████▎  | 840/1156 [15:24<05:18,  1.01s/it]

{'loss': 0.1112, 'learning_rate': 2.7335640138408307e-06, 'epoch': 2.91}


 74%|███████▎  | 850/1156 [15:34<05:08,  1.01s/it]

{'loss': 0.1619, 'learning_rate': 2.647058823529412e-06, 'epoch': 2.94}


 74%|███████▍  | 860/1156 [15:45<04:58,  1.01s/it]

{'loss': 0.1842, 'learning_rate': 2.560553633217993e-06, 'epoch': 2.98}


                                                  
 75%|███████▌  | 867/1156 [16:21<04:26,  1.08it/s]

{'eval_loss': 1.4916032552719116, 'eval_accuracy': 0.6979020979020979, 'eval_precision': 0.6954790111076978, 'eval_recall': 0.6979020979020979, 'eval_f1': 0.694597837416101, 'eval_runtime': 29.3916, 'eval_samples_per_second': 24.327, 'eval_steps_per_second': 3.062, 'epoch': 3.0}


 75%|███████▌  | 870/1156 [16:31<30:34,  6.41s/it]

{'loss': 0.197, 'learning_rate': 2.4740484429065744e-06, 'epoch': 3.01}


 76%|███████▌  | 880/1156 [16:41<05:20,  1.16s/it]

{'loss': 0.0818, 'learning_rate': 2.387543252595156e-06, 'epoch': 3.04}


 77%|███████▋  | 890/1156 [16:51<04:29,  1.01s/it]

{'loss': 0.0906, 'learning_rate': 2.3010380622837373e-06, 'epoch': 3.08}


 78%|███████▊  | 900/1156 [17:02<04:18,  1.01s/it]

{'loss': 0.0056, 'learning_rate': 2.2145328719723185e-06, 'epoch': 3.11}


 79%|███████▊  | 910/1156 [17:12<04:06,  1.00s/it]

{'loss': 0.1964, 'learning_rate': 2.1280276816609e-06, 'epoch': 3.15}


 80%|███████▉  | 920/1156 [17:22<03:57,  1.01s/it]

{'loss': 0.0409, 'learning_rate': 2.041522491349481e-06, 'epoch': 3.18}


 80%|████████  | 930/1156 [17:32<03:47,  1.01s/it]

{'loss': 0.1296, 'learning_rate': 1.9550173010380626e-06, 'epoch': 3.22}


 81%|████████▏ | 940/1156 [17:42<03:38,  1.01s/it]

{'loss': 0.0221, 'learning_rate': 1.8685121107266438e-06, 'epoch': 3.25}


 82%|████████▏ | 950/1156 [17:52<03:27,  1.01s/it]

{'loss': 0.1402, 'learning_rate': 1.7820069204152252e-06, 'epoch': 3.29}


 83%|████████▎ | 960/1156 [18:02<03:16,  1.00s/it]

{'loss': 0.0957, 'learning_rate': 1.6955017301038063e-06, 'epoch': 3.32}


 84%|████████▍ | 970/1156 [18:12<03:07,  1.01s/it]

{'loss': 0.0599, 'learning_rate': 1.6089965397923877e-06, 'epoch': 3.36}


 85%|████████▍ | 980/1156 [18:22<02:57,  1.01s/it]

{'loss': 0.0244, 'learning_rate': 1.522491349480969e-06, 'epoch': 3.39}


 86%|████████▌ | 990/1156 [18:33<02:47,  1.01s/it]

{'loss': 0.0793, 'learning_rate': 1.4359861591695503e-06, 'epoch': 3.43}


 87%|████████▋ | 1000/1156 [18:43<02:37,  1.01s/it]

{'loss': 0.006, 'learning_rate': 1.3494809688581318e-06, 'epoch': 3.46}


 87%|████████▋ | 1010/1156 [18:53<02:27,  1.01s/it]

{'loss': 0.1427, 'learning_rate': 1.2629757785467128e-06, 'epoch': 3.49}


 88%|████████▊ | 1020/1156 [19:03<02:17,  1.01s/it]

{'loss': 0.0831, 'learning_rate': 1.1764705882352942e-06, 'epoch': 3.53}


 89%|████████▉ | 1030/1156 [19:13<02:07,  1.01s/it]

{'loss': 0.0025, 'learning_rate': 1.0899653979238757e-06, 'epoch': 3.56}


 90%|████████▉ | 1040/1156 [19:23<01:56,  1.01s/it]

{'loss': 0.2904, 'learning_rate': 1.0034602076124569e-06, 'epoch': 3.6}


 91%|█████████ | 1050/1156 [19:33<01:46,  1.01s/it]

{'loss': 0.0654, 'learning_rate': 9.169550173010382e-07, 'epoch': 3.63}


 92%|█████████▏| 1060/1156 [19:43<01:36,  1.01s/it]

{'loss': 0.0699, 'learning_rate': 8.304498269896194e-07, 'epoch': 3.67}


 93%|█████████▎| 1070/1156 [19:53<01:27,  1.01s/it]

{'loss': 0.1333, 'learning_rate': 7.439446366782008e-07, 'epoch': 3.7}


 93%|█████████▎| 1080/1156 [20:04<01:18,  1.04s/it]

{'loss': 0.074, 'learning_rate': 6.57439446366782e-07, 'epoch': 3.74}


 94%|█████████▍| 1090/1156 [20:14<01:06,  1.01s/it]

{'loss': 0.0239, 'learning_rate': 5.709342560553634e-07, 'epoch': 3.77}


 95%|█████████▌| 1100/1156 [20:24<00:56,  1.01s/it]

{'loss': 0.1148, 'learning_rate': 4.844290657439446e-07, 'epoch': 3.81}


 96%|█████████▌| 1110/1156 [20:34<00:46,  1.01s/it]

{'loss': 0.1936, 'learning_rate': 3.9792387543252597e-07, 'epoch': 3.84}


 97%|█████████▋| 1120/1156 [20:44<00:36,  1.01s/it]

{'loss': 0.1636, 'learning_rate': 3.114186851211073e-07, 'epoch': 3.88}


 98%|█████████▊| 1130/1156 [20:54<00:26,  1.01s/it]

{'loss': 0.0307, 'learning_rate': 2.249134948096886e-07, 'epoch': 3.91}


 99%|█████████▊| 1140/1156 [21:04<00:16,  1.01s/it]

{'loss': 0.2517, 'learning_rate': 1.384083044982699e-07, 'epoch': 3.94}


 99%|█████████▉| 1150/1156 [21:15<00:06,  1.01s/it]

{'loss': 0.078, 'learning_rate': 5.1903114186851215e-08, 'epoch': 3.98}


                                                   
100%|██████████| 1156/1156 [21:49<00:00,  1.08it/s]

{'eval_loss': 1.5683996677398682, 'eval_accuracy': 0.7090909090909091, 'eval_precision': 0.7184092644739242, 'eval_recall': 0.7090909090909091, 'eval_f1': 0.7096162009503154, 'eval_runtime': 28.9043, 'eval_samples_per_second': 24.737, 'eval_steps_per_second': 3.114, 'epoch': 4.0}


100%|██████████| 1156/1156 [21:59<00:00,  1.14s/it]


{'train_runtime': 1319.3061, 'train_samples_per_second': 6.998, 'train_steps_per_second': 0.876, 'train_loss': 0.2291393962891766, 'epoch': 4.0}


100%|██████████| 90/90 [00:29<00:00,  3.04it/s]
100%|██████████| 90/90 [00:27<00:00,  3.22it/s]
100%|██████████| 90/90 [00:27<00:00,  3.25it/s]


{'accuracy': 0.7090909090909091, 'precision': 0.7156578719202394, 'recall': 0.7090909090909091, 'f1': 0.7113573499652737}
{'accuracy': 0.6848739495798319, 'precision': 0.687652637840514, 'recall': 0.6848739495798319, 'f1': 0.6857907714589349}


  1%|          | 10/1156 [00:11<19:38,  1.03s/it]

{'loss': 0.2879, 'learning_rate': 9.913494809688582e-06, 'epoch': 0.03}


  2%|▏         | 20/1156 [00:21<19:09,  1.01s/it]

{'loss': 0.0553, 'learning_rate': 9.826989619377163e-06, 'epoch': 0.07}


  3%|▎         | 30/1156 [00:31<18:53,  1.01s/it]

{'loss': 0.4189, 'learning_rate': 9.740484429065744e-06, 'epoch': 0.1}


  3%|▎         | 40/1156 [00:41<18:40,  1.00s/it]

{'loss': 0.3465, 'learning_rate': 9.653979238754326e-06, 'epoch': 0.14}


  4%|▍         | 50/1156 [00:52<18:46,  1.02s/it]

{'loss': 0.3988, 'learning_rate': 9.567474048442907e-06, 'epoch': 0.17}


  5%|▌         | 60/1156 [01:02<18:34,  1.02s/it]

{'loss': 0.216, 'learning_rate': 9.480968858131488e-06, 'epoch': 0.21}


  6%|▌         | 70/1156 [01:13<18:30,  1.02s/it]

{'loss': 0.2638, 'learning_rate': 9.39446366782007e-06, 'epoch': 0.24}


  7%|▋         | 80/1156 [01:23<18:13,  1.02s/it]

{'loss': 0.4266, 'learning_rate': 9.307958477508652e-06, 'epoch': 0.28}


  8%|▊         | 90/1156 [01:33<17:54,  1.01s/it]

{'loss': 0.4084, 'learning_rate': 9.221453287197234e-06, 'epoch': 0.31}


  9%|▊         | 100/1156 [01:43<17:52,  1.02s/it]

{'loss': 0.3752, 'learning_rate': 9.134948096885815e-06, 'epoch': 0.35}


 10%|▉         | 110/1156 [01:53<17:34,  1.01s/it]

{'loss': 0.1677, 'learning_rate': 9.048442906574394e-06, 'epoch': 0.38}


 10%|█         | 120/1156 [02:03<17:22,  1.01s/it]

{'loss': 0.3128, 'learning_rate': 8.961937716262975e-06, 'epoch': 0.42}


 11%|█         | 130/1156 [02:13<18:14,  1.07s/it]

{'loss': 0.3177, 'learning_rate': 8.875432525951558e-06, 'epoch': 0.45}


 12%|█▏        | 140/1156 [02:24<17:02,  1.01s/it]

{'loss': 0.1755, 'learning_rate': 8.78892733564014e-06, 'epoch': 0.48}


 13%|█▎        | 150/1156 [02:34<16:55,  1.01s/it]

{'loss': 0.2977, 'learning_rate': 8.702422145328721e-06, 'epoch': 0.52}


 14%|█▍        | 160/1156 [02:44<16:49,  1.01s/it]

{'loss': 0.3591, 'learning_rate': 8.615916955017302e-06, 'epoch': 0.55}


 15%|█▍        | 170/1156 [02:54<16:40,  1.02s/it]

{'loss': 0.0517, 'learning_rate': 8.529411764705883e-06, 'epoch': 0.59}


 16%|█▌        | 180/1156 [03:04<16:21,  1.01s/it]

{'loss': 0.1964, 'learning_rate': 8.442906574394465e-06, 'epoch': 0.62}


 16%|█▋        | 190/1156 [03:14<16:11,  1.01s/it]

{'loss': 0.2506, 'learning_rate': 8.356401384083046e-06, 'epoch': 0.66}


 17%|█▋        | 200/1156 [03:24<16:02,  1.01s/it]

{'loss': 0.202, 'learning_rate': 8.269896193771627e-06, 'epoch': 0.69}


 18%|█▊        | 210/1156 [03:34<15:54,  1.01s/it]

{'loss': 0.4129, 'learning_rate': 8.183391003460208e-06, 'epoch': 0.73}


 19%|█▉        | 220/1156 [03:44<15:37,  1.00s/it]

{'loss': 0.2044, 'learning_rate': 8.09688581314879e-06, 'epoch': 0.76}


 20%|█▉        | 230/1156 [03:54<15:33,  1.01s/it]

{'loss': 0.2378, 'learning_rate': 8.01038062283737e-06, 'epoch': 0.8}


 21%|██        | 240/1156 [04:04<15:20,  1.01s/it]

{'loss': 0.4087, 'learning_rate': 7.923875432525952e-06, 'epoch': 0.83}


 22%|██▏       | 250/1156 [04:14<15:08,  1.00s/it]

{'loss': 0.3826, 'learning_rate': 7.837370242214533e-06, 'epoch': 0.87}


 22%|██▏       | 260/1156 [04:24<15:02,  1.01s/it]

{'loss': 0.2105, 'learning_rate': 7.750865051903114e-06, 'epoch': 0.9}


 23%|██▎       | 270/1156 [04:35<14:50,  1.01s/it]

{'loss': 0.3595, 'learning_rate': 7.664359861591696e-06, 'epoch': 0.93}


 24%|██▍       | 280/1156 [04:45<14:37,  1.00s/it]

{'loss': 0.5752, 'learning_rate': 7.577854671280277e-06, 'epoch': 0.97}


                                                  
 25%|██▌       | 289/1156 [05:22<13:08,  1.10it/s]

{'eval_loss': 1.05899178981781, 'eval_accuracy': 0.6993006993006993, 'eval_precision': 0.7143570457904823, 'eval_recall': 0.6993006993006993, 'eval_f1': 0.7029886689839709, 'eval_runtime': 28.405, 'eval_samples_per_second': 25.172, 'eval_steps_per_second': 3.168, 'epoch': 1.0}


 25%|██▌       | 290/1156 [05:30<2:47:22, 11.60s/it]

{'loss': 0.6746, 'learning_rate': 7.491349480968859e-06, 'epoch': 1.0}


 26%|██▌       | 300/1156 [05:40<18:30,  1.30s/it]  

{'loss': 0.2684, 'learning_rate': 7.40484429065744e-06, 'epoch': 1.04}


 27%|██▋       | 310/1156 [05:50<14:12,  1.01s/it]

{'loss': 0.027, 'learning_rate': 7.318339100346021e-06, 'epoch': 1.07}


 28%|██▊       | 320/1156 [06:00<13:56,  1.00s/it]

{'loss': 0.1975, 'learning_rate': 7.2318339100346025e-06, 'epoch': 1.11}


 29%|██▊       | 330/1156 [06:10<13:48,  1.00s/it]

{'loss': 0.1258, 'learning_rate': 7.145328719723184e-06, 'epoch': 1.14}


 29%|██▉       | 340/1156 [06:20<13:39,  1.00s/it]

{'loss': 0.3059, 'learning_rate': 7.058823529411766e-06, 'epoch': 1.18}


 30%|███       | 350/1156 [06:30<13:27,  1.00s/it]

{'loss': 0.0148, 'learning_rate': 6.972318339100347e-06, 'epoch': 1.21}


 31%|███       | 360/1156 [06:40<13:18,  1.00s/it]

{'loss': 0.1424, 'learning_rate': 6.885813148788928e-06, 'epoch': 1.25}


 32%|███▏      | 370/1156 [06:50<13:08,  1.00s/it]

{'loss': 0.1728, 'learning_rate': 6.799307958477509e-06, 'epoch': 1.28}


 33%|███▎      | 380/1156 [07:00<13:01,  1.01s/it]

{'loss': 0.3441, 'learning_rate': 6.71280276816609e-06, 'epoch': 1.31}


 34%|███▎      | 390/1156 [07:10<12:50,  1.01s/it]

{'loss': 0.4893, 'learning_rate': 6.626297577854672e-06, 'epoch': 1.35}


 35%|███▍      | 400/1156 [07:20<12:43,  1.01s/it]

{'loss': 0.0941, 'learning_rate': 6.539792387543253e-06, 'epoch': 1.38}


 35%|███▌      | 410/1156 [07:30<12:31,  1.01s/it]

{'loss': 0.2223, 'learning_rate': 6.453287197231834e-06, 'epoch': 1.42}


 36%|███▋      | 420/1156 [07:40<12:20,  1.01s/it]

{'loss': 0.188, 'learning_rate': 6.3667820069204156e-06, 'epoch': 1.45}


 37%|███▋      | 430/1156 [07:51<12:09,  1.01s/it]

{'loss': 0.0807, 'learning_rate': 6.280276816608997e-06, 'epoch': 1.49}


 38%|███▊      | 440/1156 [08:01<11:58,  1.00s/it]

{'loss': 0.1829, 'learning_rate': 6.193771626297579e-06, 'epoch': 1.52}


 39%|███▉      | 450/1156 [08:11<11:49,  1.01s/it]

{'loss': 0.3132, 'learning_rate': 6.10726643598616e-06, 'epoch': 1.56}


 40%|███▉      | 460/1156 [08:21<11:46,  1.02s/it]

{'loss': 0.0914, 'learning_rate': 6.020761245674741e-06, 'epoch': 1.59}


 41%|████      | 470/1156 [08:31<11:29,  1.01s/it]

{'loss': 0.3233, 'learning_rate': 5.9342560553633225e-06, 'epoch': 1.63}


 42%|████▏     | 480/1156 [08:41<11:18,  1.00s/it]

{'loss': 0.2352, 'learning_rate': 5.847750865051903e-06, 'epoch': 1.66}


 42%|████▏     | 490/1156 [08:51<11:16,  1.02s/it]

{'loss': 0.0865, 'learning_rate': 5.761245674740484e-06, 'epoch': 1.7}


 43%|████▎     | 500/1156 [09:01<10:58,  1.00s/it]

{'loss': 0.1741, 'learning_rate': 5.674740484429066e-06, 'epoch': 1.73}


 44%|████▍     | 510/1156 [09:11<10:49,  1.00s/it]

{'loss': 0.2296, 'learning_rate': 5.588235294117647e-06, 'epoch': 1.76}


 45%|████▍     | 520/1156 [09:21<10:37,  1.00s/it]

{'loss': 0.2963, 'learning_rate': 5.501730103806229e-06, 'epoch': 1.8}


 46%|████▌     | 530/1156 [09:31<10:31,  1.01s/it]

{'loss': 0.1931, 'learning_rate': 5.41522491349481e-06, 'epoch': 1.83}


 47%|████▋     | 540/1156 [09:41<10:18,  1.00s/it]

{'loss': 0.1877, 'learning_rate': 5.328719723183391e-06, 'epoch': 1.87}


 48%|████▊     | 550/1156 [09:52<10:09,  1.01s/it]

{'loss': 0.299, 'learning_rate': 5.242214532871973e-06, 'epoch': 1.9}


 48%|████▊     | 560/1156 [10:02<10:01,  1.01s/it]

{'loss': 0.0712, 'learning_rate': 5.155709342560554e-06, 'epoch': 1.94}


 49%|████▉     | 570/1156 [10:12<09:51,  1.01s/it]

{'loss': 0.0922, 'learning_rate': 5.069204152249136e-06, 'epoch': 1.97}


                                                  
 50%|█████     | 578/1156 [10:49<08:50,  1.09it/s]

{'eval_loss': 1.5282212495803833, 'eval_accuracy': 0.7020979020979021, 'eval_precision': 0.7006474373944253, 'eval_recall': 0.7020979020979021, 'eval_f1': 0.7003606702606648, 'eval_runtime': 29.0842, 'eval_samples_per_second': 24.584, 'eval_steps_per_second': 3.094, 'epoch': 2.0}


 50%|█████     | 580/1156 [10:58<1:22:02,  8.55s/it]

{'loss': 0.3589, 'learning_rate': 4.982698961937717e-06, 'epoch': 2.01}


 51%|█████     | 590/1156 [11:08<11:29,  1.22s/it]  

{'loss': 0.0624, 'learning_rate': 4.896193771626298e-06, 'epoch': 2.04}


 52%|█████▏    | 600/1156 [11:18<09:24,  1.01s/it]

{'loss': 0.0109, 'learning_rate': 4.809688581314879e-06, 'epoch': 2.08}


 53%|█████▎    | 610/1156 [11:28<09:09,  1.01s/it]

{'loss': 0.0784, 'learning_rate': 4.7231833910034605e-06, 'epoch': 2.11}


 54%|█████▎    | 620/1156 [11:38<08:55,  1.00it/s]

{'loss': 0.2869, 'learning_rate': 4.636678200692042e-06, 'epoch': 2.15}


 54%|█████▍    | 630/1156 [11:48<08:49,  1.01s/it]

{'loss': 0.1179, 'learning_rate': 4.550173010380623e-06, 'epoch': 2.18}


 55%|█████▌    | 640/1156 [11:58<08:40,  1.01s/it]

{'loss': 0.0094, 'learning_rate': 4.463667820069205e-06, 'epoch': 2.21}


 56%|█████▌    | 650/1156 [12:08<08:31,  1.01s/it]

{'loss': 0.1959, 'learning_rate': 4.377162629757785e-06, 'epoch': 2.25}


 57%|█████▋    | 660/1156 [12:18<08:19,  1.01s/it]

{'loss': 0.0163, 'learning_rate': 4.2906574394463675e-06, 'epoch': 2.28}


 58%|█████▊    | 670/1156 [12:28<08:10,  1.01s/it]

{'loss': 0.0069, 'learning_rate': 4.204152249134949e-06, 'epoch': 2.32}


 59%|█████▉    | 680/1156 [12:38<07:58,  1.00s/it]

{'loss': 0.142, 'learning_rate': 4.11764705882353e-06, 'epoch': 2.35}


 60%|█████▉    | 690/1156 [12:48<07:46,  1.00s/it]

{'loss': 0.0956, 'learning_rate': 4.031141868512111e-06, 'epoch': 2.39}


 61%|██████    | 700/1156 [12:58<07:38,  1.01s/it]

{'loss': 0.2211, 'learning_rate': 3.944636678200692e-06, 'epoch': 2.42}


 61%|██████▏   | 710/1156 [13:08<07:28,  1.00s/it]

{'loss': 0.0047, 'learning_rate': 3.8581314878892736e-06, 'epoch': 2.46}


 62%|██████▏   | 720/1156 [13:19<07:18,  1.01s/it]

{'loss': 0.0223, 'learning_rate': 3.7716262975778552e-06, 'epoch': 2.49}


 63%|██████▎   | 730/1156 [13:29<07:13,  1.02s/it]

{'loss': 0.1841, 'learning_rate': 3.685121107266436e-06, 'epoch': 2.53}


 64%|██████▍   | 740/1156 [13:39<07:00,  1.01s/it]

{'loss': 0.1068, 'learning_rate': 3.5986159169550177e-06, 'epoch': 2.56}


 65%|██████▍   | 750/1156 [13:49<06:50,  1.01s/it]

{'loss': 0.0032, 'learning_rate': 3.512110726643599e-06, 'epoch': 2.6}


 66%|██████▌   | 760/1156 [13:59<06:39,  1.01s/it]

{'loss': 0.2593, 'learning_rate': 3.42560553633218e-06, 'epoch': 2.63}


 67%|██████▋   | 770/1156 [14:09<06:28,  1.01s/it]

{'loss': 0.2228, 'learning_rate': 3.3391003460207618e-06, 'epoch': 2.66}


 67%|██████▋   | 780/1156 [14:19<06:21,  1.02s/it]

{'loss': 0.1048, 'learning_rate': 3.2525951557093425e-06, 'epoch': 2.7}


 68%|██████▊   | 790/1156 [14:29<06:08,  1.01s/it]

{'loss': 0.0574, 'learning_rate': 3.166089965397924e-06, 'epoch': 2.73}


 69%|██████▉   | 800/1156 [14:39<05:56,  1.00s/it]

{'loss': 0.2262, 'learning_rate': 3.0795847750865054e-06, 'epoch': 2.77}


 70%|███████   | 810/1156 [14:49<05:47,  1.00s/it]

{'loss': 0.0994, 'learning_rate': 2.9930795847750866e-06, 'epoch': 2.8}


 71%|███████   | 820/1156 [15:00<05:38,  1.01s/it]

{'loss': 0.08, 'learning_rate': 2.9065743944636683e-06, 'epoch': 2.84}


 72%|███████▏  | 830/1156 [15:10<05:27,  1.01s/it]

{'loss': 0.1486, 'learning_rate': 2.820069204152249e-06, 'epoch': 2.87}


 73%|███████▎  | 840/1156 [15:20<05:17,  1.00s/it]

{'loss': 0.1274, 'learning_rate': 2.7335640138408307e-06, 'epoch': 2.91}


 74%|███████▎  | 850/1156 [15:30<05:09,  1.01s/it]

{'loss': 0.0266, 'learning_rate': 2.647058823529412e-06, 'epoch': 2.94}


 74%|███████▍  | 860/1156 [15:40<04:57,  1.01s/it]

{'loss': 0.1057, 'learning_rate': 2.560553633217993e-06, 'epoch': 2.98}


                                                  
 75%|███████▌  | 867/1156 [16:16<04:23,  1.10it/s]

{'eval_loss': 1.8142880201339722, 'eval_accuracy': 0.6867132867132867, 'eval_precision': 0.6916137649759941, 'eval_recall': 0.6867132867132867, 'eval_f1': 0.6828090354446221, 'eval_runtime': 29.0818, 'eval_samples_per_second': 24.586, 'eval_steps_per_second': 3.095, 'epoch': 3.0}


 75%|███████▌  | 870/1156 [16:26<30:14,  6.34s/it]

{'loss': 0.0272, 'learning_rate': 2.4740484429065744e-06, 'epoch': 3.01}


 76%|███████▌  | 880/1156 [16:36<05:19,  1.16s/it]

{'loss': 0.0832, 'learning_rate': 2.387543252595156e-06, 'epoch': 3.04}


 77%|███████▋  | 890/1156 [16:46<04:27,  1.01s/it]

{'loss': 0.0684, 'learning_rate': 2.3010380622837373e-06, 'epoch': 3.08}


 78%|███████▊  | 900/1156 [16:56<04:16,  1.00s/it]

{'loss': 0.0017, 'learning_rate': 2.2145328719723185e-06, 'epoch': 3.11}


 79%|███████▊  | 910/1156 [17:06<04:07,  1.01s/it]

{'loss': 0.2332, 'learning_rate': 2.1280276816609e-06, 'epoch': 3.15}


 80%|███████▉  | 920/1156 [17:17<04:08,  1.05s/it]

{'loss': 0.0203, 'learning_rate': 2.041522491349481e-06, 'epoch': 3.18}


 80%|████████  | 930/1156 [17:27<03:50,  1.02s/it]

{'loss': 0.0168, 'learning_rate': 1.9550173010380626e-06, 'epoch': 3.22}


 81%|████████▏ | 940/1156 [17:37<03:37,  1.01s/it]

{'loss': 0.0101, 'learning_rate': 1.8685121107266438e-06, 'epoch': 3.25}


 82%|████████▏ | 950/1156 [17:47<03:25,  1.00it/s]

{'loss': 0.146, 'learning_rate': 1.7820069204152252e-06, 'epoch': 3.29}


 83%|████████▎ | 960/1156 [17:57<03:16,  1.00s/it]

{'loss': 0.0749, 'learning_rate': 1.6955017301038063e-06, 'epoch': 3.32}


 84%|████████▍ | 970/1156 [18:07<03:07,  1.01s/it]

{'loss': 0.0052, 'learning_rate': 1.6089965397923877e-06, 'epoch': 3.36}


 85%|████████▍ | 980/1156 [18:17<02:57,  1.01s/it]

{'loss': 0.015, 'learning_rate': 1.522491349480969e-06, 'epoch': 3.39}


 86%|████████▌ | 990/1156 [18:27<02:47,  1.01s/it]

{'loss': 0.0025, 'learning_rate': 1.4359861591695503e-06, 'epoch': 3.43}


 87%|████████▋ | 1000/1156 [18:37<02:37,  1.01s/it]

{'loss': 0.0222, 'learning_rate': 1.3494809688581318e-06, 'epoch': 3.46}


 87%|████████▋ | 1010/1156 [18:47<02:27,  1.01s/it]

{'loss': 0.1206, 'learning_rate': 1.2629757785467128e-06, 'epoch': 3.49}


 88%|████████▊ | 1020/1156 [18:57<02:17,  1.01s/it]

{'loss': 0.0491, 'learning_rate': 1.1764705882352942e-06, 'epoch': 3.53}


 89%|████████▉ | 1030/1156 [19:07<02:06,  1.01s/it]

{'loss': 0.0011, 'learning_rate': 1.0899653979238757e-06, 'epoch': 3.56}


 90%|████████▉ | 1040/1156 [19:17<01:56,  1.01s/it]

{'loss': 0.2303, 'learning_rate': 1.0034602076124569e-06, 'epoch': 3.6}


 91%|█████████ | 1050/1156 [19:28<01:47,  1.01s/it]

{'loss': 0.0412, 'learning_rate': 9.169550173010382e-07, 'epoch': 3.63}


 92%|█████████▏| 1060/1156 [19:38<01:36,  1.01s/it]

{'loss': 0.0129, 'learning_rate': 8.304498269896194e-07, 'epoch': 3.67}


 93%|█████████▎| 1070/1156 [19:48<01:26,  1.01s/it]

{'loss': 0.133, 'learning_rate': 7.439446366782008e-07, 'epoch': 3.7}


 93%|█████████▎| 1080/1156 [19:58<01:16,  1.01s/it]

{'loss': 0.0782, 'learning_rate': 6.57439446366782e-07, 'epoch': 3.74}


 94%|█████████▍| 1090/1156 [20:08<01:06,  1.00s/it]

{'loss': 0.0081, 'learning_rate': 5.709342560553634e-07, 'epoch': 3.77}


 95%|█████████▌| 1100/1156 [20:18<00:57,  1.03s/it]

{'loss': 0.0064, 'learning_rate': 4.844290657439446e-07, 'epoch': 3.81}


 96%|█████████▌| 1110/1156 [20:28<00:46,  1.01s/it]

{'loss': 0.1708, 'learning_rate': 3.9792387543252597e-07, 'epoch': 3.84}


 97%|█████████▋| 1120/1156 [20:38<00:36,  1.00s/it]

{'loss': 0.1354, 'learning_rate': 3.114186851211073e-07, 'epoch': 3.88}


 98%|█████████▊| 1130/1156 [20:48<00:26,  1.01s/it]

{'loss': 0.0245, 'learning_rate': 2.249134948096886e-07, 'epoch': 3.91}


 99%|█████████▊| 1140/1156 [20:59<00:16,  1.01s/it]

{'loss': 0.1201, 'learning_rate': 1.384083044982699e-07, 'epoch': 3.94}


 99%|█████████▉| 1150/1156 [21:09<00:06,  1.00s/it]

{'loss': 0.0649, 'learning_rate': 5.1903114186851215e-08, 'epoch': 3.98}


                                                   
100%|██████████| 1156/1156 [21:43<00:00,  1.10it/s]

{'eval_loss': 1.7976229190826416, 'eval_accuracy': 0.6979020979020979, 'eval_precision': 0.7025360227292313, 'eval_recall': 0.6979020979020979, 'eval_f1': 0.6971241785350715, 'eval_runtime': 28.7048, 'eval_samples_per_second': 24.909, 'eval_steps_per_second': 3.135, 'epoch': 4.0}


100%|██████████| 1156/1156 [21:53<00:00,  1.14s/it]


{'train_runtime': 1313.0727, 'train_samples_per_second': 7.031, 'train_steps_per_second': 0.88, 'train_loss': 0.1708903636051838, 'epoch': 4.0}


100%|██████████| 90/90 [00:28<00:00,  3.21it/s]
100%|██████████| 90/90 [00:27<00:00,  3.22it/s]
100%|██████████| 90/90 [00:27<00:00,  3.27it/s]


{'accuracy': 0.6993006993006993, 'precision': 0.7143570457904823, 'recall': 0.6993006993006993, 'f1': 0.7029886689839709}
{'accuracy': 0.6764705882352942, 'precision': 0.6811077053071255, 'recall': 0.6764705882352942, 'f1': 0.6778650738906539}


  1%|          | 10/1156 [00:11<20:07,  1.05s/it]

{'loss': 0.218, 'learning_rate': 9.913494809688582e-06, 'epoch': 0.03}


  2%|▏         | 20/1156 [00:21<18:59,  1.00s/it]

{'loss': 0.0324, 'learning_rate': 9.826989619377163e-06, 'epoch': 0.07}


  3%|▎         | 30/1156 [00:31<18:56,  1.01s/it]

{'loss': 0.1462, 'learning_rate': 9.740484429065744e-06, 'epoch': 0.1}


  3%|▎         | 40/1156 [00:41<18:41,  1.01s/it]

{'loss': 0.3066, 'learning_rate': 9.653979238754326e-06, 'epoch': 0.14}


  4%|▍         | 50/1156 [00:52<18:49,  1.02s/it]

{'loss': 0.5229, 'learning_rate': 9.567474048442907e-06, 'epoch': 0.17}


  5%|▌         | 60/1156 [01:02<18:20,  1.00s/it]

{'loss': 0.1819, 'learning_rate': 9.480968858131488e-06, 'epoch': 0.21}


  6%|▌         | 70/1156 [01:12<18:10,  1.00s/it]

{'loss': 0.1574, 'learning_rate': 9.39446366782007e-06, 'epoch': 0.24}


  7%|▋         | 80/1156 [01:22<18:01,  1.01s/it]

{'loss': 0.2381, 'learning_rate': 9.307958477508652e-06, 'epoch': 0.28}


  8%|▊         | 90/1156 [01:32<17:54,  1.01s/it]

{'loss': 0.1841, 'learning_rate': 9.221453287197234e-06, 'epoch': 0.31}


  9%|▊         | 100/1156 [01:42<17:38,  1.00s/it]

{'loss': 0.2245, 'learning_rate': 9.134948096885815e-06, 'epoch': 0.35}


 10%|▉         | 110/1156 [01:52<17:33,  1.01s/it]

{'loss': 0.0736, 'learning_rate': 9.048442906574394e-06, 'epoch': 0.38}


 10%|█         | 120/1156 [02:02<17:22,  1.01s/it]

{'loss': 0.085, 'learning_rate': 8.961937716262975e-06, 'epoch': 0.42}


 11%|█         | 130/1156 [02:12<17:09,  1.00s/it]

{'loss': 0.3641, 'learning_rate': 8.875432525951558e-06, 'epoch': 0.45}


 12%|█▏        | 140/1156 [02:22<17:01,  1.01s/it]

{'loss': 0.1452, 'learning_rate': 8.78892733564014e-06, 'epoch': 0.48}


 13%|█▎        | 150/1156 [02:32<16:52,  1.01s/it]

{'loss': 0.2681, 'learning_rate': 8.702422145328721e-06, 'epoch': 0.52}


 14%|█▍        | 160/1156 [02:42<17:26,  1.05s/it]

{'loss': 0.3174, 'learning_rate': 8.615916955017302e-06, 'epoch': 0.55}


 15%|█▍        | 170/1156 [02:53<16:52,  1.03s/it]

{'loss': 0.1179, 'learning_rate': 8.529411764705883e-06, 'epoch': 0.59}


 16%|█▌        | 180/1156 [03:03<16:21,  1.01s/it]

{'loss': 0.0999, 'learning_rate': 8.442906574394465e-06, 'epoch': 0.62}


 16%|█▋        | 190/1156 [03:13<16:09,  1.00s/it]

{'loss': 0.1149, 'learning_rate': 8.356401384083046e-06, 'epoch': 0.66}


 17%|█▋        | 200/1156 [03:23<15:55,  1.00it/s]

{'loss': 0.2639, 'learning_rate': 8.269896193771627e-06, 'epoch': 0.69}


 18%|█▊        | 210/1156 [03:33<15:49,  1.00s/it]

{'loss': 0.382, 'learning_rate': 8.183391003460208e-06, 'epoch': 0.73}


 19%|█▉        | 220/1156 [03:43<15:41,  1.01s/it]

{'loss': 0.121, 'learning_rate': 8.09688581314879e-06, 'epoch': 0.76}


 20%|█▉        | 230/1156 [03:53<15:37,  1.01s/it]

{'loss': 0.1487, 'learning_rate': 8.01038062283737e-06, 'epoch': 0.8}


 21%|██        | 240/1156 [04:03<15:24,  1.01s/it]

{'loss': 0.18, 'learning_rate': 7.923875432525952e-06, 'epoch': 0.83}


 22%|██▏       | 250/1156 [04:13<15:11,  1.01s/it]

{'loss': 0.3172, 'learning_rate': 7.837370242214533e-06, 'epoch': 0.87}


 22%|██▏       | 260/1156 [04:23<14:58,  1.00s/it]

{'loss': 0.1573, 'learning_rate': 7.750865051903114e-06, 'epoch': 0.9}


 23%|██▎       | 270/1156 [04:33<14:52,  1.01s/it]

{'loss': 0.1136, 'learning_rate': 7.664359861591696e-06, 'epoch': 0.93}


 24%|██▍       | 280/1156 [04:43<15:01,  1.03s/it]

{'loss': 0.2669, 'learning_rate': 7.577854671280277e-06, 'epoch': 0.97}


                                                  
 25%|██▌       | 289/1156 [05:20<13:16,  1.09it/s]

{'eval_loss': 1.5173479318618774, 'eval_accuracy': 0.7034965034965035, 'eval_precision': 0.7025351358447551, 'eval_recall': 0.7034965034965035, 'eval_f1': 0.7029598465274266, 'eval_runtime': 27.6282, 'eval_samples_per_second': 25.879, 'eval_steps_per_second': 3.258, 'epoch': 1.0}


 25%|██▌       | 290/1156 [05:28<2:41:51, 11.21s/it]

{'loss': 0.6899, 'learning_rate': 7.491349480968859e-06, 'epoch': 1.0}


 26%|██▌       | 300/1156 [05:38<18:29,  1.30s/it]  

{'loss': 0.1216, 'learning_rate': 7.40484429065744e-06, 'epoch': 1.04}


 27%|██▋       | 310/1156 [05:48<14:24,  1.02s/it]

{'loss': 0.0048, 'learning_rate': 7.318339100346021e-06, 'epoch': 1.07}


 28%|██▊       | 320/1156 [05:58<14:05,  1.01s/it]

{'loss': 0.0684, 'learning_rate': 7.2318339100346025e-06, 'epoch': 1.11}


 29%|██▊       | 330/1156 [06:08<13:48,  1.00s/it]

{'loss': 0.072, 'learning_rate': 7.145328719723184e-06, 'epoch': 1.14}


 29%|██▉       | 340/1156 [06:18<13:38,  1.00s/it]

{'loss': 0.033, 'learning_rate': 7.058823529411766e-06, 'epoch': 1.18}


 30%|███       | 350/1156 [06:28<13:35,  1.01s/it]

{'loss': 0.0018, 'learning_rate': 6.972318339100347e-06, 'epoch': 1.21}


 31%|███       | 360/1156 [06:38<13:43,  1.03s/it]

{'loss': 0.1008, 'learning_rate': 6.885813148788928e-06, 'epoch': 1.25}


 32%|███▏      | 370/1156 [06:49<13:26,  1.03s/it]

{'loss': 0.1531, 'learning_rate': 6.799307958477509e-06, 'epoch': 1.28}


 33%|███▎      | 380/1156 [06:59<13:07,  1.01s/it]

{'loss': 0.1785, 'learning_rate': 6.71280276816609e-06, 'epoch': 1.31}


 34%|███▎      | 390/1156 [07:09<12:58,  1.02s/it]

{'loss': 0.297, 'learning_rate': 6.626297577854672e-06, 'epoch': 1.35}


 35%|███▍      | 400/1156 [07:19<12:39,  1.01s/it]

{'loss': 0.0293, 'learning_rate': 6.539792387543253e-06, 'epoch': 1.38}


 35%|███▌      | 410/1156 [07:29<12:31,  1.01s/it]

{'loss': 0.0543, 'learning_rate': 6.453287197231834e-06, 'epoch': 1.42}


 36%|███▋      | 420/1156 [07:39<12:20,  1.01s/it]

{'loss': 0.0521, 'learning_rate': 6.3667820069204156e-06, 'epoch': 1.45}


 37%|███▋      | 430/1156 [07:49<12:08,  1.00s/it]

{'loss': 0.0072, 'learning_rate': 6.280276816608997e-06, 'epoch': 1.49}


 38%|███▊      | 440/1156 [07:59<12:00,  1.01s/it]

{'loss': 0.0188, 'learning_rate': 6.193771626297579e-06, 'epoch': 1.52}


 39%|███▉      | 450/1156 [08:10<11:47,  1.00s/it]

{'loss': 0.2205, 'learning_rate': 6.10726643598616e-06, 'epoch': 1.56}


 40%|███▉      | 460/1156 [08:20<11:40,  1.01s/it]

{'loss': 0.1472, 'learning_rate': 6.020761245674741e-06, 'epoch': 1.59}


 41%|████      | 470/1156 [08:30<11:29,  1.00s/it]

{'loss': 0.2699, 'learning_rate': 5.9342560553633225e-06, 'epoch': 1.63}


 42%|████▏     | 480/1156 [08:40<11:19,  1.00s/it]

{'loss': 0.2057, 'learning_rate': 5.847750865051903e-06, 'epoch': 1.66}


 42%|████▏     | 490/1156 [08:50<11:31,  1.04s/it]

{'loss': 0.1698, 'learning_rate': 5.761245674740484e-06, 'epoch': 1.7}


 43%|████▎     | 500/1156 [09:00<11:02,  1.01s/it]

{'loss': 0.2792, 'learning_rate': 5.674740484429066e-06, 'epoch': 1.73}


 44%|████▍     | 510/1156 [09:11<10:50,  1.01s/it]

{'loss': 0.3027, 'learning_rate': 5.588235294117647e-06, 'epoch': 1.76}


 45%|████▍     | 520/1156 [09:21<10:39,  1.01s/it]

{'loss': 0.2506, 'learning_rate': 5.501730103806229e-06, 'epoch': 1.8}


 46%|████▌     | 530/1156 [09:31<10:32,  1.01s/it]

{'loss': 0.1619, 'learning_rate': 5.41522491349481e-06, 'epoch': 1.83}


 47%|████▋     | 540/1156 [09:41<10:20,  1.01s/it]

{'loss': 0.1274, 'learning_rate': 5.328719723183391e-06, 'epoch': 1.87}


 48%|████▊     | 550/1156 [09:51<10:11,  1.01s/it]

{'loss': 0.3403, 'learning_rate': 5.242214532871973e-06, 'epoch': 1.9}


 48%|████▊     | 560/1156 [10:01<10:00,  1.01s/it]

{'loss': 0.0969, 'learning_rate': 5.155709342560554e-06, 'epoch': 1.94}


 49%|████▉     | 570/1156 [10:11<09:49,  1.01s/it]

{'loss': 0.0077, 'learning_rate': 5.069204152249136e-06, 'epoch': 1.97}


                                                  
 50%|█████     | 578/1156 [10:48<08:49,  1.09it/s]

{'eval_loss': 1.8100396394729614, 'eval_accuracy': 0.6853146853146853, 'eval_precision': 0.701841333392699, 'eval_recall': 0.6853146853146853, 'eval_f1': 0.6891709192080862, 'eval_runtime': 29.1627, 'eval_samples_per_second': 24.518, 'eval_steps_per_second': 3.086, 'epoch': 2.0}


 50%|█████     | 580/1156 [10:57<1:22:05,  8.55s/it]

{'loss': 0.172, 'learning_rate': 4.982698961937717e-06, 'epoch': 2.01}


 51%|█████     | 590/1156 [11:07<11:31,  1.22s/it]  

{'loss': 0.0451, 'learning_rate': 4.896193771626298e-06, 'epoch': 2.04}


 52%|█████▏    | 600/1156 [11:17<09:19,  1.01s/it]

{'loss': 0.0038, 'learning_rate': 4.809688581314879e-06, 'epoch': 2.08}


 53%|█████▎    | 610/1156 [11:27<09:07,  1.00s/it]

{'loss': 0.0031, 'learning_rate': 4.7231833910034605e-06, 'epoch': 2.11}


 54%|█████▎    | 620/1156 [11:37<09:00,  1.01s/it]

{'loss': 0.145, 'learning_rate': 4.636678200692042e-06, 'epoch': 2.15}


 54%|█████▍    | 630/1156 [11:47<08:53,  1.01s/it]

{'loss': 0.1014, 'learning_rate': 4.550173010380623e-06, 'epoch': 2.18}


 55%|█████▌    | 640/1156 [11:58<08:40,  1.01s/it]

{'loss': 0.0163, 'learning_rate': 4.463667820069205e-06, 'epoch': 2.21}


 56%|█████▌    | 650/1156 [12:08<08:27,  1.00s/it]

{'loss': 0.0907, 'learning_rate': 4.377162629757785e-06, 'epoch': 2.25}


 57%|█████▋    | 660/1156 [12:18<08:19,  1.01s/it]

{'loss': 0.0008, 'learning_rate': 4.2906574394463675e-06, 'epoch': 2.28}


 58%|█████▊    | 670/1156 [12:28<08:08,  1.01s/it]

{'loss': 0.0131, 'learning_rate': 4.204152249134949e-06, 'epoch': 2.32}


 59%|█████▉    | 680/1156 [12:38<07:58,  1.01s/it]

{'loss': 0.1727, 'learning_rate': 4.11764705882353e-06, 'epoch': 2.35}


 60%|█████▉    | 690/1156 [12:48<07:47,  1.00s/it]

{'loss': 0.0623, 'learning_rate': 4.031141868512111e-06, 'epoch': 2.39}


 61%|██████    | 700/1156 [12:59<07:54,  1.04s/it]

{'loss': 0.0939, 'learning_rate': 3.944636678200692e-06, 'epoch': 2.42}


 61%|██████▏   | 710/1156 [13:09<07:29,  1.01s/it]

{'loss': 0.0463, 'learning_rate': 3.8581314878892736e-06, 'epoch': 2.46}


 62%|██████▏   | 720/1156 [13:19<07:19,  1.01s/it]

{'loss': 0.0145, 'learning_rate': 3.7716262975778552e-06, 'epoch': 2.49}


 63%|██████▎   | 730/1156 [13:29<07:07,  1.00s/it]

{'loss': 0.1434, 'learning_rate': 3.685121107266436e-06, 'epoch': 2.53}


 64%|██████▍   | 740/1156 [13:39<07:02,  1.02s/it]

{'loss': 0.0636, 'learning_rate': 3.5986159169550177e-06, 'epoch': 2.56}


 65%|██████▍   | 750/1156 [13:49<06:48,  1.01s/it]

{'loss': 0.0017, 'learning_rate': 3.512110726643599e-06, 'epoch': 2.6}


 66%|██████▌   | 760/1156 [13:59<06:38,  1.01s/it]

{'loss': 0.1559, 'learning_rate': 3.42560553633218e-06, 'epoch': 2.63}


 67%|██████▋   | 770/1156 [14:09<06:28,  1.01s/it]

{'loss': 0.1519, 'learning_rate': 3.3391003460207618e-06, 'epoch': 2.66}


 67%|██████▋   | 780/1156 [14:19<06:17,  1.01s/it]

{'loss': 0.0261, 'learning_rate': 3.2525951557093425e-06, 'epoch': 2.7}


 68%|██████▊   | 790/1156 [14:29<06:08,  1.01s/it]

{'loss': 0.0447, 'learning_rate': 3.166089965397924e-06, 'epoch': 2.73}


 69%|██████▉   | 800/1156 [14:39<05:57,  1.00s/it]

{'loss': 0.1103, 'learning_rate': 3.0795847750865054e-06, 'epoch': 2.77}


 70%|███████   | 810/1156 [14:50<05:50,  1.01s/it]

{'loss': 0.0056, 'learning_rate': 2.9930795847750866e-06, 'epoch': 2.8}


 71%|███████   | 820/1156 [15:00<05:39,  1.01s/it]

{'loss': 0.0708, 'learning_rate': 2.9065743944636683e-06, 'epoch': 2.84}


 72%|███████▏  | 830/1156 [15:10<05:26,  1.00s/it]

{'loss': 0.081, 'learning_rate': 2.820069204152249e-06, 'epoch': 2.87}


 73%|███████▎  | 840/1156 [15:20<05:17,  1.00s/it]

{'loss': 0.0009, 'learning_rate': 2.7335640138408307e-06, 'epoch': 2.91}


 74%|███████▎  | 850/1156 [15:30<05:07,  1.01s/it]

{'loss': 0.0336, 'learning_rate': 2.647058823529412e-06, 'epoch': 2.94}


 74%|███████▍  | 860/1156 [15:40<04:58,  1.01s/it]

{'loss': 0.053, 'learning_rate': 2.560553633217993e-06, 'epoch': 2.98}


                                                  
 75%|███████▌  | 867/1156 [16:15<04:23,  1.10it/s]

{'eval_loss': 1.874664306640625, 'eval_accuracy': 0.7048951048951049, 'eval_precision': 0.7132556972552685, 'eval_recall': 0.7048951048951049, 'eval_f1': 0.7055926231288405, 'eval_runtime': 28.3427, 'eval_samples_per_second': 25.227, 'eval_steps_per_second': 3.175, 'epoch': 3.0}


 75%|███████▌  | 870/1156 [16:25<29:16,  6.14s/it]

{'loss': 0.0213, 'learning_rate': 2.4740484429065744e-06, 'epoch': 3.01}


 76%|███████▌  | 880/1156 [16:35<05:17,  1.15s/it]

{'loss': 0.0014, 'learning_rate': 2.387543252595156e-06, 'epoch': 3.04}


 77%|███████▋  | 890/1156 [16:45<04:27,  1.01s/it]

{'loss': 0.1468, 'learning_rate': 2.3010380622837373e-06, 'epoch': 3.08}


 78%|███████▊  | 900/1156 [16:55<04:17,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 2.2145328719723185e-06, 'epoch': 3.11}


 79%|███████▊  | 910/1156 [17:05<04:06,  1.00s/it]

{'loss': 0.1477, 'learning_rate': 2.1280276816609e-06, 'epoch': 3.15}


 80%|███████▉  | 920/1156 [17:15<03:56,  1.00s/it]

{'loss': 0.0126, 'learning_rate': 2.041522491349481e-06, 'epoch': 3.18}


 80%|████████  | 930/1156 [17:25<03:46,  1.00s/it]

{'loss': 0.0126, 'learning_rate': 1.9550173010380626e-06, 'epoch': 3.22}


 81%|████████▏ | 940/1156 [17:35<03:36,  1.00s/it]

{'loss': 0.0024, 'learning_rate': 1.8685121107266438e-06, 'epoch': 3.25}


 82%|████████▏ | 950/1156 [17:46<03:31,  1.03s/it]

{'loss': 0.0008, 'learning_rate': 1.7820069204152252e-06, 'epoch': 3.29}


 83%|████████▎ | 960/1156 [17:56<03:18,  1.02s/it]

{'loss': 0.0822, 'learning_rate': 1.6955017301038063e-06, 'epoch': 3.32}


 84%|████████▍ | 970/1156 [18:06<03:06,  1.00s/it]

{'loss': 0.0009, 'learning_rate': 1.6089965397923877e-06, 'epoch': 3.36}


 85%|████████▍ | 980/1156 [18:16<02:56,  1.00s/it]

{'loss': 0.025, 'learning_rate': 1.522491349480969e-06, 'epoch': 3.39}


 86%|████████▌ | 990/1156 [18:26<02:46,  1.01s/it]

{'loss': 0.0009, 'learning_rate': 1.4359861591695503e-06, 'epoch': 3.43}


 87%|████████▋ | 1000/1156 [18:36<02:36,  1.01s/it]

{'loss': 0.0012, 'learning_rate': 1.3494809688581318e-06, 'epoch': 3.46}


 87%|████████▋ | 1010/1156 [18:46<02:26,  1.01s/it]

{'loss': 0.0538, 'learning_rate': 1.2629757785467128e-06, 'epoch': 3.49}


 88%|████████▊ | 1020/1156 [18:56<02:16,  1.00s/it]

{'loss': 0.027, 'learning_rate': 1.1764705882352942e-06, 'epoch': 3.53}


 89%|████████▉ | 1030/1156 [19:06<02:06,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 1.0899653979238757e-06, 'epoch': 3.56}


 90%|████████▉ | 1040/1156 [19:16<01:56,  1.00s/it]

{'loss': 0.1019, 'learning_rate': 1.0034602076124569e-06, 'epoch': 3.6}


 91%|█████████ | 1050/1156 [19:26<01:47,  1.01s/it]

{'loss': 0.003, 'learning_rate': 9.169550173010382e-07, 'epoch': 3.63}


 92%|█████████▏| 1060/1156 [19:36<01:36,  1.01s/it]

{'loss': 0.0015, 'learning_rate': 8.304498269896194e-07, 'epoch': 3.67}


 93%|█████████▎| 1070/1156 [19:46<01:26,  1.01s/it]

{'loss': 0.0727, 'learning_rate': 7.439446366782008e-07, 'epoch': 3.7}


 93%|█████████▎| 1080/1156 [19:56<01:16,  1.01s/it]

{'loss': 0.0465, 'learning_rate': 6.57439446366782e-07, 'epoch': 3.74}


 94%|█████████▍| 1090/1156 [20:07<01:06,  1.01s/it]

{'loss': 0.0013, 'learning_rate': 5.709342560553634e-07, 'epoch': 3.77}


 95%|█████████▌| 1100/1156 [20:17<00:56,  1.01s/it]

{'loss': 0.0008, 'learning_rate': 4.844290657439446e-07, 'epoch': 3.81}


 96%|█████████▌| 1110/1156 [20:27<00:46,  1.01s/it]

{'loss': 0.0608, 'learning_rate': 3.9792387543252597e-07, 'epoch': 3.84}


 97%|█████████▋| 1120/1156 [20:37<00:36,  1.00s/it]

{'loss': 0.1055, 'learning_rate': 3.114186851211073e-07, 'epoch': 3.88}


 98%|█████████▊| 1130/1156 [20:47<00:26,  1.02s/it]

{'loss': 0.0391, 'learning_rate': 2.249134948096886e-07, 'epoch': 3.91}


 99%|█████████▊| 1140/1156 [20:57<00:16,  1.01s/it]

{'loss': 0.0153, 'learning_rate': 1.384083044982699e-07, 'epoch': 3.94}


 99%|█████████▉| 1150/1156 [21:07<00:06,  1.00s/it]

{'loss': 0.0039, 'learning_rate': 5.1903114186851215e-08, 'epoch': 3.98}


                                                   
100%|██████████| 1156/1156 [21:42<00:00,  1.09it/s]

{'eval_loss': 1.9650212526321411, 'eval_accuracy': 0.7146853146853147, 'eval_precision': 0.7208561357042049, 'eval_recall': 0.7146853146853147, 'eval_f1': 0.7125899125080272, 'eval_runtime': 29.5262, 'eval_samples_per_second': 24.216, 'eval_steps_per_second': 3.048, 'epoch': 4.0}


100%|██████████| 1156/1156 [21:52<00:00,  1.14s/it]


{'train_runtime': 1312.1907, 'train_samples_per_second': 7.036, 'train_steps_per_second': 0.881, 'train_loss': 0.11354542547509809, 'epoch': 4.0}


100%|██████████| 90/90 [00:28<00:00,  3.19it/s]
100%|██████████| 90/90 [00:28<00:00,  3.15it/s]
100%|██████████| 90/90 [00:28<00:00,  3.19it/s]


{'accuracy': 0.7034965034965035, 'precision': 0.7025351358447551, 'recall': 0.7034965034965035, 'f1': 0.7029598465274266}
{'accuracy': 0.696078431372549, 'precision': 0.6939170111996336, 'recall': 0.696078431372549, 'f1': 0.694331466564936}


  1%|          | 10/1156 [00:11<19:24,  1.02s/it]

{'loss': 0.1876, 'learning_rate': 9.913494809688582e-06, 'epoch': 0.03}


  2%|▏         | 20/1156 [00:21<19:18,  1.02s/it]

{'loss': 0.0038, 'learning_rate': 9.826989619377163e-06, 'epoch': 0.07}


  3%|▎         | 30/1156 [00:31<19:00,  1.01s/it]

{'loss': 0.0589, 'learning_rate': 9.740484429065744e-06, 'epoch': 0.1}


  3%|▎         | 40/1156 [00:41<18:47,  1.01s/it]

{'loss': 0.2517, 'learning_rate': 9.653979238754326e-06, 'epoch': 0.14}


  4%|▍         | 50/1156 [00:51<18:35,  1.01s/it]

{'loss': 0.5752, 'learning_rate': 9.567474048442907e-06, 'epoch': 0.17}


  5%|▌         | 60/1156 [01:01<18:18,  1.00s/it]

{'loss': 0.1991, 'learning_rate': 9.480968858131488e-06, 'epoch': 0.21}


  6%|▌         | 70/1156 [01:11<18:13,  1.01s/it]

{'loss': 0.0037, 'learning_rate': 9.39446366782007e-06, 'epoch': 0.24}


  7%|▋         | 80/1156 [01:22<18:04,  1.01s/it]

{'loss': 0.1601, 'learning_rate': 9.307958477508652e-06, 'epoch': 0.28}


  8%|▊         | 90/1156 [01:32<17:56,  1.01s/it]

{'loss': 0.154, 'learning_rate': 9.221453287197234e-06, 'epoch': 0.31}


  9%|▊         | 100/1156 [01:42<17:53,  1.02s/it]

{'loss': 0.251, 'learning_rate': 9.134948096885815e-06, 'epoch': 0.35}


 10%|▉         | 110/1156 [01:52<17:28,  1.00s/it]

{'loss': 0.2008, 'learning_rate': 9.048442906574394e-06, 'epoch': 0.38}


 10%|█         | 120/1156 [02:02<17:16,  1.00s/it]

{'loss': 0.2383, 'learning_rate': 8.961937716262975e-06, 'epoch': 0.42}


 11%|█         | 130/1156 [02:12<17:14,  1.01s/it]

{'loss': 0.0916, 'learning_rate': 8.875432525951558e-06, 'epoch': 0.45}


 12%|█▏        | 140/1156 [02:22<16:59,  1.00s/it]

{'loss': 0.1771, 'learning_rate': 8.78892733564014e-06, 'epoch': 0.48}


 13%|█▎        | 150/1156 [02:32<16:49,  1.00s/it]

{'loss': 0.2297, 'learning_rate': 8.702422145328721e-06, 'epoch': 0.52}


 14%|█▍        | 160/1156 [02:42<16:41,  1.01s/it]

{'loss': 0.203, 'learning_rate': 8.615916955017302e-06, 'epoch': 0.55}


 15%|█▍        | 170/1156 [02:52<16:28,  1.00s/it]

{'loss': 0.0047, 'learning_rate': 8.529411764705883e-06, 'epoch': 0.59}


 16%|█▌        | 180/1156 [03:02<16:18,  1.00s/it]

{'loss': 0.0935, 'learning_rate': 8.442906574394465e-06, 'epoch': 0.62}


 16%|█▋        | 190/1156 [03:12<16:15,  1.01s/it]

{'loss': 0.0031, 'learning_rate': 8.356401384083046e-06, 'epoch': 0.66}


 17%|█▋        | 200/1156 [03:23<16:03,  1.01s/it]

{'loss': 0.1388, 'learning_rate': 8.269896193771627e-06, 'epoch': 0.69}


 18%|█▊        | 210/1156 [03:33<15:52,  1.01s/it]

{'loss': 0.2802, 'learning_rate': 8.183391003460208e-06, 'epoch': 0.73}


 19%|█▉        | 220/1156 [03:43<15:40,  1.00s/it]

{'loss': 0.094, 'learning_rate': 8.09688581314879e-06, 'epoch': 0.76}


 20%|█▉        | 230/1156 [03:53<15:33,  1.01s/it]

{'loss': 0.1137, 'learning_rate': 8.01038062283737e-06, 'epoch': 0.8}


 21%|██        | 240/1156 [04:03<15:18,  1.00s/it]

{'loss': 0.224, 'learning_rate': 7.923875432525952e-06, 'epoch': 0.83}


 22%|██▏       | 250/1156 [04:13<15:06,  1.00s/it]

{'loss': 0.2007, 'learning_rate': 7.837370242214533e-06, 'epoch': 0.87}


 22%|██▏       | 260/1156 [04:23<15:03,  1.01s/it]

{'loss': 0.1138, 'learning_rate': 7.750865051903114e-06, 'epoch': 0.9}


 23%|██▎       | 270/1156 [04:33<14:50,  1.01s/it]

{'loss': 0.1006, 'learning_rate': 7.664359861591696e-06, 'epoch': 0.93}


 24%|██▍       | 280/1156 [04:43<14:36,  1.00s/it]

{'loss': 0.2248, 'learning_rate': 7.577854671280277e-06, 'epoch': 0.97}


                                                  
 25%|██▌       | 289/1156 [05:19<13:08,  1.10it/s]

{'eval_loss': 1.7837897539138794, 'eval_accuracy': 0.7132867132867133, 'eval_precision': 0.7123775962315888, 'eval_recall': 0.7132867132867133, 'eval_f1': 0.7121093100354915, 'eval_runtime': 27.6688, 'eval_samples_per_second': 25.841, 'eval_steps_per_second': 3.253, 'epoch': 1.0}


 25%|██▌       | 290/1156 [05:28<2:45:26, 11.46s/it]

{'loss': 0.4693, 'learning_rate': 7.491349480968859e-06, 'epoch': 1.0}


 26%|██▌       | 300/1156 [05:38<18:33,  1.30s/it]  

{'loss': 0.058, 'learning_rate': 7.40484429065744e-06, 'epoch': 1.04}


 27%|██▋       | 310/1156 [05:48<14:15,  1.01s/it]

{'loss': 0.0058, 'learning_rate': 7.318339100346021e-06, 'epoch': 1.07}


 28%|██▊       | 320/1156 [05:58<13:57,  1.00s/it]

{'loss': 0.0179, 'learning_rate': 7.2318339100346025e-06, 'epoch': 1.11}


 29%|██▊       | 330/1156 [06:08<13:50,  1.00s/it]

{'loss': 0.1124, 'learning_rate': 7.145328719723184e-06, 'epoch': 1.14}


 29%|██▉       | 340/1156 [06:18<13:44,  1.01s/it]

{'loss': 0.1312, 'learning_rate': 7.058823529411766e-06, 'epoch': 1.18}


 30%|███       | 350/1156 [06:28<13:33,  1.01s/it]

{'loss': 0.0346, 'learning_rate': 6.972318339100347e-06, 'epoch': 1.21}


 31%|███       | 360/1156 [06:39<13:50,  1.04s/it]

{'loss': 0.0029, 'learning_rate': 6.885813148788928e-06, 'epoch': 1.25}


 32%|███▏      | 370/1156 [06:49<13:15,  1.01s/it]

{'loss': 0.0623, 'learning_rate': 6.799307958477509e-06, 'epoch': 1.28}


 33%|███▎      | 380/1156 [06:59<13:01,  1.01s/it]

{'loss': 0.2936, 'learning_rate': 6.71280276816609e-06, 'epoch': 1.31}


 34%|███▎      | 390/1156 [07:09<12:46,  1.00s/it]

{'loss': 0.2069, 'learning_rate': 6.626297577854672e-06, 'epoch': 1.35}


 35%|███▍      | 400/1156 [07:19<12:48,  1.02s/it]

{'loss': 0.0845, 'learning_rate': 6.539792387543253e-06, 'epoch': 1.38}


 35%|███▌      | 410/1156 [07:29<12:29,  1.00s/it]

{'loss': 0.1695, 'learning_rate': 6.453287197231834e-06, 'epoch': 1.42}


 36%|███▋      | 420/1156 [07:39<12:22,  1.01s/it]

{'loss': 0.0393, 'learning_rate': 6.3667820069204156e-06, 'epoch': 1.45}


 37%|███▋      | 430/1156 [07:49<12:11,  1.01s/it]

{'loss': 0.0009, 'learning_rate': 6.280276816608997e-06, 'epoch': 1.49}


 38%|███▊      | 440/1156 [07:59<12:01,  1.01s/it]

{'loss': 0.0013, 'learning_rate': 6.193771626297579e-06, 'epoch': 1.52}


 39%|███▉      | 450/1156 [08:09<11:47,  1.00s/it]

{'loss': 0.2995, 'learning_rate': 6.10726643598616e-06, 'epoch': 1.56}


 40%|███▉      | 460/1156 [08:19<11:42,  1.01s/it]

{'loss': 0.0202, 'learning_rate': 6.020761245674741e-06, 'epoch': 1.59}


 41%|████      | 470/1156 [08:29<11:32,  1.01s/it]

{'loss': 0.2137, 'learning_rate': 5.9342560553633225e-06, 'epoch': 1.63}


 42%|████▏     | 480/1156 [08:40<11:22,  1.01s/it]

{'loss': 0.1873, 'learning_rate': 5.847750865051903e-06, 'epoch': 1.66}


 42%|████▏     | 490/1156 [08:50<11:12,  1.01s/it]

{'loss': 0.1424, 'learning_rate': 5.761245674740484e-06, 'epoch': 1.7}


 43%|████▎     | 500/1156 [09:00<11:01,  1.01s/it]

{'loss': 0.1471, 'learning_rate': 5.674740484429066e-06, 'epoch': 1.73}


 44%|████▍     | 510/1156 [09:10<11:17,  1.05s/it]

{'loss': 0.1486, 'learning_rate': 5.588235294117647e-06, 'epoch': 1.76}


 45%|████▍     | 520/1156 [09:20<10:46,  1.02s/it]

{'loss': 0.0532, 'learning_rate': 5.501730103806229e-06, 'epoch': 1.8}


 46%|████▌     | 530/1156 [09:31<10:30,  1.01s/it]

{'loss': 0.1069, 'learning_rate': 5.41522491349481e-06, 'epoch': 1.83}


 47%|████▋     | 540/1156 [09:41<10:19,  1.01s/it]

{'loss': 0.1869, 'learning_rate': 5.328719723183391e-06, 'epoch': 1.87}


 48%|████▊     | 550/1156 [09:51<10:08,  1.00s/it]

{'loss': 0.1448, 'learning_rate': 5.242214532871973e-06, 'epoch': 1.9}


 48%|████▊     | 560/1156 [10:01<10:00,  1.01s/it]

{'loss': 0.0042, 'learning_rate': 5.155709342560554e-06, 'epoch': 1.94}


 49%|████▉     | 570/1156 [10:11<09:49,  1.01s/it]

{'loss': 0.0975, 'learning_rate': 5.069204152249136e-06, 'epoch': 1.97}


                                                  
 50%|█████     | 578/1156 [10:48<08:48,  1.09it/s]

{'eval_loss': 1.802547574043274, 'eval_accuracy': 0.6923076923076923, 'eval_precision': 0.7129268727516248, 'eval_recall': 0.6923076923076923, 'eval_f1': 0.6960127378732665, 'eval_runtime': 29.2719, 'eval_samples_per_second': 24.426, 'eval_steps_per_second': 3.075, 'epoch': 2.0}


 50%|█████     | 580/1156 [10:57<1:23:07,  8.66s/it]

{'loss': 0.0925, 'learning_rate': 4.982698961937717e-06, 'epoch': 2.01}


 51%|█████     | 590/1156 [11:07<11:30,  1.22s/it]  

{'loss': 0.0058, 'learning_rate': 4.896193771626298e-06, 'epoch': 2.04}


 52%|█████▏    | 600/1156 [11:17<09:22,  1.01s/it]

{'loss': 0.0195, 'learning_rate': 4.809688581314879e-06, 'epoch': 2.08}


 53%|█████▎    | 610/1156 [11:27<09:07,  1.00s/it]

{'loss': 0.079, 'learning_rate': 4.7231833910034605e-06, 'epoch': 2.11}


 54%|█████▎    | 620/1156 [11:37<08:58,  1.01s/it]

{'loss': 0.0174, 'learning_rate': 4.636678200692042e-06, 'epoch': 2.15}


 54%|█████▍    | 630/1156 [11:47<08:50,  1.01s/it]

{'loss': 0.0767, 'learning_rate': 4.550173010380623e-06, 'epoch': 2.18}


 55%|█████▌    | 640/1156 [11:58<08:39,  1.01s/it]

{'loss': 0.0054, 'learning_rate': 4.463667820069205e-06, 'epoch': 2.21}


 56%|█████▌    | 650/1156 [12:08<08:53,  1.06s/it]

{'loss': 0.0882, 'learning_rate': 4.377162629757785e-06, 'epoch': 2.25}


 57%|█████▋    | 660/1156 [12:18<08:36,  1.04s/it]

{'loss': 0.0005, 'learning_rate': 4.2906574394463675e-06, 'epoch': 2.28}


 58%|█████▊    | 670/1156 [12:28<08:08,  1.01s/it]

{'loss': 0.0558, 'learning_rate': 4.204152249134949e-06, 'epoch': 2.32}


 59%|█████▉    | 680/1156 [12:38<07:58,  1.01s/it]

{'loss': 0.0692, 'learning_rate': 4.11764705882353e-06, 'epoch': 2.35}


 60%|█████▉    | 690/1156 [12:48<07:50,  1.01s/it]

{'loss': 0.086, 'learning_rate': 4.031141868512111e-06, 'epoch': 2.39}


 61%|██████    | 700/1156 [12:59<07:38,  1.01s/it]

{'loss': 0.1061, 'learning_rate': 3.944636678200692e-06, 'epoch': 2.42}


 61%|██████▏   | 710/1156 [13:09<07:27,  1.00s/it]

{'loss': 0.0006, 'learning_rate': 3.8581314878892736e-06, 'epoch': 2.46}


 62%|██████▏   | 720/1156 [13:19<07:20,  1.01s/it]

{'loss': 0.0036, 'learning_rate': 3.7716262975778552e-06, 'epoch': 2.49}


 63%|██████▎   | 730/1156 [13:29<07:08,  1.01s/it]

{'loss': 0.092, 'learning_rate': 3.685121107266436e-06, 'epoch': 2.53}


 64%|██████▍   | 740/1156 [13:39<07:01,  1.01s/it]

{'loss': 0.0253, 'learning_rate': 3.5986159169550177e-06, 'epoch': 2.56}


 65%|██████▍   | 750/1156 [13:49<06:51,  1.01s/it]

{'loss': 0.0009, 'learning_rate': 3.512110726643599e-06, 'epoch': 2.6}


 66%|██████▌   | 760/1156 [13:59<06:38,  1.01s/it]

{'loss': 0.0755, 'learning_rate': 3.42560553633218e-06, 'epoch': 2.63}


 67%|██████▋   | 770/1156 [14:09<06:28,  1.01s/it]

{'loss': 0.1232, 'learning_rate': 3.3391003460207618e-06, 'epoch': 2.66}


 67%|██████▋   | 780/1156 [14:19<06:22,  1.02s/it]

{'loss': 0.0147, 'learning_rate': 3.2525951557093425e-06, 'epoch': 2.7}


 68%|██████▊   | 790/1156 [14:29<06:07,  1.01s/it]

{'loss': 0.0009, 'learning_rate': 3.166089965397924e-06, 'epoch': 2.73}


 69%|██████▉   | 800/1156 [14:39<05:57,  1.00s/it]

{'loss': 0.0948, 'learning_rate': 3.0795847750865054e-06, 'epoch': 2.77}


 70%|███████   | 810/1156 [14:49<05:50,  1.01s/it]

{'loss': 0.0278, 'learning_rate': 2.9930795847750866e-06, 'epoch': 2.8}


 71%|███████   | 820/1156 [15:00<05:38,  1.01s/it]

{'loss': 0.038, 'learning_rate': 2.9065743944636683e-06, 'epoch': 2.84}


 72%|███████▏  | 830/1156 [15:10<05:30,  1.01s/it]

{'loss': 0.1532, 'learning_rate': 2.820069204152249e-06, 'epoch': 2.87}


 73%|███████▎  | 840/1156 [15:20<05:20,  1.01s/it]

{'loss': 0.0341, 'learning_rate': 2.7335640138408307e-06, 'epoch': 2.91}


 74%|███████▎  | 850/1156 [15:30<05:08,  1.01s/it]

{'loss': 0.0021, 'learning_rate': 2.647058823529412e-06, 'epoch': 2.94}


 74%|███████▍  | 860/1156 [15:40<04:58,  1.01s/it]

{'loss': 0.0051, 'learning_rate': 2.560553633217993e-06, 'epoch': 2.98}


                                                  
 75%|███████▌  | 867/1156 [16:15<04:26,  1.09it/s]

{'eval_loss': 1.9533182382583618, 'eval_accuracy': 0.7104895104895105, 'eval_precision': 0.7157302309934109, 'eval_recall': 0.7104895104895105, 'eval_f1': 0.7093850452800496, 'eval_runtime': 28.2713, 'eval_samples_per_second': 25.291, 'eval_steps_per_second': 3.183, 'epoch': 3.0}


 75%|███████▌  | 870/1156 [16:25<29:41,  6.23s/it]

{'loss': 0.1129, 'learning_rate': 2.4740484429065744e-06, 'epoch': 3.01}


 76%|███████▌  | 880/1156 [16:35<05:18,  1.15s/it]

{'loss': 0.0006, 'learning_rate': 2.387543252595156e-06, 'epoch': 3.04}


 77%|███████▋  | 890/1156 [16:46<04:28,  1.01s/it]

{'loss': 0.0915, 'learning_rate': 2.3010380622837373e-06, 'epoch': 3.08}


 78%|███████▊  | 900/1156 [16:56<04:18,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 2.2145328719723185e-06, 'epoch': 3.11}


 79%|███████▊  | 910/1156 [17:06<04:06,  1.00s/it]

{'loss': 0.11, 'learning_rate': 2.1280276816609e-06, 'epoch': 3.15}


 80%|███████▉  | 920/1156 [17:16<03:57,  1.01s/it]

{'loss': 0.0026, 'learning_rate': 2.041522491349481e-06, 'epoch': 3.18}


 80%|████████  | 930/1156 [17:26<03:47,  1.01s/it]

{'loss': 0.0231, 'learning_rate': 1.9550173010380626e-06, 'epoch': 3.22}


 81%|████████▏ | 940/1156 [17:36<03:36,  1.00s/it]

{'loss': 0.0118, 'learning_rate': 1.8685121107266438e-06, 'epoch': 3.25}


 82%|████████▏ | 950/1156 [17:46<03:26,  1.00s/it]

{'loss': 0.0013, 'learning_rate': 1.7820069204152252e-06, 'epoch': 3.29}


 83%|████████▎ | 960/1156 [17:56<03:17,  1.01s/it]

{'loss': 0.0322, 'learning_rate': 1.6955017301038063e-06, 'epoch': 3.32}


 84%|████████▍ | 970/1156 [18:06<03:11,  1.03s/it]

{'loss': 0.0007, 'learning_rate': 1.6089965397923877e-06, 'epoch': 3.36}


 85%|████████▍ | 980/1156 [18:16<02:58,  1.02s/it]

{'loss': 0.0036, 'learning_rate': 1.522491349480969e-06, 'epoch': 3.39}


 86%|████████▌ | 990/1156 [18:26<02:47,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 1.4359861591695503e-06, 'epoch': 3.43}


 87%|████████▋ | 1000/1156 [18:36<02:37,  1.01s/it]

{'loss': 0.0042, 'learning_rate': 1.3494809688581318e-06, 'epoch': 3.46}


 87%|████████▋ | 1010/1156 [18:46<02:27,  1.01s/it]

{'loss': 0.0279, 'learning_rate': 1.2629757785467128e-06, 'epoch': 3.49}


 88%|████████▊ | 1020/1156 [18:57<02:17,  1.01s/it]

{'loss': 0.0218, 'learning_rate': 1.1764705882352942e-06, 'epoch': 3.53}


 89%|████████▉ | 1030/1156 [19:07<02:06,  1.01s/it]

{'loss': 0.0411, 'learning_rate': 1.0899653979238757e-06, 'epoch': 3.56}


 90%|████████▉ | 1040/1156 [19:17<01:57,  1.01s/it]

{'loss': 0.0972, 'learning_rate': 1.0034602076124569e-06, 'epoch': 3.6}


 91%|█████████ | 1050/1156 [19:27<01:46,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 9.169550173010382e-07, 'epoch': 3.63}


 92%|█████████▏| 1060/1156 [19:37<01:36,  1.00s/it]

{'loss': 0.0011, 'learning_rate': 8.304498269896194e-07, 'epoch': 3.67}


 93%|█████████▎| 1070/1156 [19:47<01:26,  1.01s/it]

{'loss': 0.051, 'learning_rate': 7.439446366782008e-07, 'epoch': 3.7}


 93%|█████████▎| 1080/1156 [19:57<01:17,  1.01s/it]

{'loss': 0.1053, 'learning_rate': 6.57439446366782e-07, 'epoch': 3.74}


 94%|█████████▍| 1090/1156 [20:07<01:06,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 5.709342560553634e-07, 'epoch': 3.77}


 95%|█████████▌| 1100/1156 [20:17<00:56,  1.01s/it]

{'loss': 0.0015, 'learning_rate': 4.844290657439446e-07, 'epoch': 3.81}


 96%|█████████▌| 1110/1156 [20:27<00:46,  1.01s/it]

{'loss': 0.0531, 'learning_rate': 3.9792387543252597e-07, 'epoch': 3.84}


 97%|█████████▋| 1120/1156 [20:37<00:36,  1.01s/it]

{'loss': 0.071, 'learning_rate': 3.114186851211073e-07, 'epoch': 3.88}


 98%|█████████▊| 1130/1156 [20:47<00:26,  1.00s/it]

{'loss': 0.0025, 'learning_rate': 2.249134948096886e-07, 'epoch': 3.91}


 99%|█████████▊| 1140/1156 [20:58<00:16,  1.01s/it]

{'loss': 0.0256, 'learning_rate': 1.384083044982699e-07, 'epoch': 3.94}


 99%|█████████▉| 1150/1156 [21:08<00:06,  1.06s/it]

{'loss': 0.0005, 'learning_rate': 5.1903114186851215e-08, 'epoch': 3.98}


                                                   
100%|██████████| 1156/1156 [21:44<00:00,  1.09it/s]

{'eval_loss': 2.0653793811798096, 'eval_accuracy': 0.7020979020979021, 'eval_precision': 0.7097749884636767, 'eval_recall': 0.7020979020979021, 'eval_f1': 0.7000751721167028, 'eval_runtime': 30.6048, 'eval_samples_per_second': 23.362, 'eval_steps_per_second': 2.941, 'epoch': 4.0}


100%|██████████| 1156/1156 [21:54<00:00,  1.14s/it]


{'train_runtime': 1314.509, 'train_samples_per_second': 7.023, 'train_steps_per_second': 0.879, 'train_loss': 0.08922992250720961, 'epoch': 4.0}


100%|██████████| 90/90 [00:28<00:00,  3.11it/s]
100%|██████████| 90/90 [00:27<00:00,  3.22it/s]
100%|██████████| 90/90 [00:28<00:00,  3.18it/s]


{'accuracy': 0.7132867132867133, 'precision': 0.7123775962315888, 'recall': 0.7132867132867133, 'f1': 0.7121093100354915}
{'accuracy': 0.6862745098039216, 'precision': 0.685827306532369, 'recall': 0.6862745098039216, 'f1': 0.6841881663801778}


  1%|          | 10/1156 [00:11<19:31,  1.02s/it]

{'loss': 0.2874, 'learning_rate': 9.913494809688582e-06, 'epoch': 0.03}


  2%|▏         | 20/1156 [00:21<19:05,  1.01s/it]

{'loss': 0.0049, 'learning_rate': 9.826989619377163e-06, 'epoch': 0.07}


  3%|▎         | 30/1156 [00:31<19:23,  1.03s/it]

{'loss': 0.1417, 'learning_rate': 9.740484429065744e-06, 'epoch': 0.1}


  3%|▎         | 40/1156 [00:41<18:50,  1.01s/it]

{'loss': 0.1784, 'learning_rate': 9.653979238754326e-06, 'epoch': 0.14}


  4%|▍         | 50/1156 [00:52<18:54,  1.03s/it]

{'loss': 0.4022, 'learning_rate': 9.567474048442907e-06, 'epoch': 0.17}


  5%|▌         | 60/1156 [01:02<18:21,  1.01s/it]

{'loss': 0.0585, 'learning_rate': 9.480968858131488e-06, 'epoch': 0.21}


  6%|▌         | 70/1156 [01:12<18:14,  1.01s/it]

{'loss': 0.1948, 'learning_rate': 9.39446366782007e-06, 'epoch': 0.24}


  7%|▋         | 80/1156 [01:22<18:02,  1.01s/it]

{'loss': 0.1076, 'learning_rate': 9.307958477508652e-06, 'epoch': 0.28}


  8%|▊         | 90/1156 [01:32<17:54,  1.01s/it]

{'loss': 0.2462, 'learning_rate': 9.221453287197234e-06, 'epoch': 0.31}


  9%|▊         | 100/1156 [01:42<17:43,  1.01s/it]

{'loss': 0.1006, 'learning_rate': 9.134948096885815e-06, 'epoch': 0.35}


 10%|▉         | 110/1156 [01:52<17:36,  1.01s/it]

{'loss': 0.0146, 'learning_rate': 9.048442906574394e-06, 'epoch': 0.38}


 10%|█         | 120/1156 [02:02<17:24,  1.01s/it]

{'loss': 0.0877, 'learning_rate': 8.961937716262975e-06, 'epoch': 0.42}


 11%|█         | 130/1156 [02:13<17:21,  1.01s/it]

{'loss': 0.2226, 'learning_rate': 8.875432525951558e-06, 'epoch': 0.45}


 12%|█▏        | 140/1156 [02:23<17:08,  1.01s/it]

{'loss': 0.1193, 'learning_rate': 8.78892733564014e-06, 'epoch': 0.48}


 13%|█▎        | 150/1156 [02:33<16:53,  1.01s/it]

{'loss': 0.1105, 'learning_rate': 8.702422145328721e-06, 'epoch': 0.52}


 14%|█▍        | 160/1156 [02:43<16:48,  1.01s/it]

{'loss': 0.2347, 'learning_rate': 8.615916955017302e-06, 'epoch': 0.55}


 15%|█▍        | 170/1156 [02:53<16:36,  1.01s/it]

{'loss': 0.0671, 'learning_rate': 8.529411764705883e-06, 'epoch': 0.59}


 16%|█▌        | 180/1156 [03:03<16:21,  1.01s/it]

{'loss': 0.0014, 'learning_rate': 8.442906574394465e-06, 'epoch': 0.62}


 16%|█▋        | 190/1156 [03:13<16:07,  1.00s/it]

{'loss': 0.0023, 'learning_rate': 8.356401384083046e-06, 'epoch': 0.66}


 17%|█▋        | 200/1156 [03:23<16:21,  1.03s/it]

{'loss': 0.1422, 'learning_rate': 8.269896193771627e-06, 'epoch': 0.69}


 18%|█▊        | 210/1156 [03:34<16:00,  1.02s/it]

{'loss': 0.2636, 'learning_rate': 8.183391003460208e-06, 'epoch': 0.73}


 19%|█▉        | 220/1156 [03:44<15:50,  1.02s/it]

{'loss': 0.0855, 'learning_rate': 8.09688581314879e-06, 'epoch': 0.76}


 20%|█▉        | 230/1156 [03:54<15:31,  1.01s/it]

{'loss': 0.0319, 'learning_rate': 8.01038062283737e-06, 'epoch': 0.8}


 21%|██        | 240/1156 [04:04<15:21,  1.01s/it]

{'loss': 0.1043, 'learning_rate': 7.923875432525952e-06, 'epoch': 0.83}


 22%|██▏       | 250/1156 [04:14<15:09,  1.00s/it]

{'loss': 0.2847, 'learning_rate': 7.837370242214533e-06, 'epoch': 0.87}


 22%|██▏       | 260/1156 [04:24<15:04,  1.01s/it]

{'loss': 0.0025, 'learning_rate': 7.750865051903114e-06, 'epoch': 0.9}


 23%|██▎       | 270/1156 [04:34<14:52,  1.01s/it]

{'loss': 0.0394, 'learning_rate': 7.664359861591696e-06, 'epoch': 0.93}


 24%|██▍       | 280/1156 [04:44<14:44,  1.01s/it]

{'loss': 0.1005, 'learning_rate': 7.577854671280277e-06, 'epoch': 0.97}


                                                  
 25%|██▌       | 289/1156 [05:22<13:17,  1.09it/s]

{'eval_loss': 1.860872507095337, 'eval_accuracy': 0.6979020979020979, 'eval_precision': 0.6944623239119703, 'eval_recall': 0.6979020979020979, 'eval_f1': 0.6949399839006734, 'eval_runtime': 28.4751, 'eval_samples_per_second': 25.11, 'eval_steps_per_second': 3.161, 'epoch': 1.0}


 25%|██▌       | 290/1156 [05:29<2:46:57, 11.57s/it]

{'loss': 0.3832, 'learning_rate': 7.491349480968859e-06, 'epoch': 1.0}


 26%|██▌       | 300/1156 [05:40<18:46,  1.32s/it]  

{'loss': 0.1318, 'learning_rate': 7.40484429065744e-06, 'epoch': 1.04}


 27%|██▋       | 310/1156 [05:50<14:15,  1.01s/it]

{'loss': 0.086, 'learning_rate': 7.318339100346021e-06, 'epoch': 1.07}


 28%|██▊       | 320/1156 [06:00<13:59,  1.00s/it]

{'loss': 0.1088, 'learning_rate': 7.2318339100346025e-06, 'epoch': 1.11}


 29%|██▊       | 330/1156 [06:10<13:51,  1.01s/it]

{'loss': 0.1163, 'learning_rate': 7.145328719723184e-06, 'epoch': 1.14}


 29%|██▉       | 340/1156 [06:20<13:37,  1.00s/it]

{'loss': 0.0497, 'learning_rate': 7.058823529411766e-06, 'epoch': 1.18}


 30%|███       | 350/1156 [06:30<13:31,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 6.972318339100347e-06, 'epoch': 1.21}


 31%|███       | 360/1156 [06:40<13:23,  1.01s/it]

{'loss': 0.0811, 'learning_rate': 6.885813148788928e-06, 'epoch': 1.25}


 32%|███▏      | 370/1156 [06:50<13:20,  1.02s/it]

{'loss': 0.0069, 'learning_rate': 6.799307958477509e-06, 'epoch': 1.28}


 33%|███▎      | 380/1156 [07:01<14:08,  1.09s/it]

{'loss': 0.2096, 'learning_rate': 6.71280276816609e-06, 'epoch': 1.31}


 34%|███▎      | 390/1156 [07:11<13:09,  1.03s/it]

{'loss': 0.1966, 'learning_rate': 6.626297577854672e-06, 'epoch': 1.35}


 35%|███▍      | 400/1156 [07:21<12:42,  1.01s/it]

{'loss': 0.0144, 'learning_rate': 6.539792387543253e-06, 'epoch': 1.38}


 35%|███▌      | 410/1156 [07:31<12:33,  1.01s/it]

{'loss': 0.0845, 'learning_rate': 6.453287197231834e-06, 'epoch': 1.42}


 36%|███▋      | 420/1156 [07:41<12:24,  1.01s/it]

{'loss': 0.0252, 'learning_rate': 6.3667820069204156e-06, 'epoch': 1.45}


 37%|███▋      | 430/1156 [07:51<12:08,  1.00s/it]

{'loss': 0.0021, 'learning_rate': 6.280276816608997e-06, 'epoch': 1.49}


 38%|███▊      | 440/1156 [08:01<12:00,  1.01s/it]

{'loss': 0.0497, 'learning_rate': 6.193771626297579e-06, 'epoch': 1.52}


 39%|███▉      | 450/1156 [08:12<11:50,  1.01s/it]

{'loss': 0.0398, 'learning_rate': 6.10726643598616e-06, 'epoch': 1.56}


 40%|███▉      | 460/1156 [08:22<11:40,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 6.020761245674741e-06, 'epoch': 1.59}


 41%|████      | 470/1156 [08:32<11:30,  1.01s/it]

{'loss': 0.0936, 'learning_rate': 5.9342560553633225e-06, 'epoch': 1.63}


 42%|████▏     | 480/1156 [08:42<11:23,  1.01s/it]

{'loss': 0.2283, 'learning_rate': 5.847750865051903e-06, 'epoch': 1.66}


 42%|████▏     | 490/1156 [08:52<11:11,  1.01s/it]

{'loss': 0.1401, 'learning_rate': 5.761245674740484e-06, 'epoch': 1.7}


 43%|████▎     | 500/1156 [09:02<10:56,  1.00s/it]

{'loss': 0.1669, 'learning_rate': 5.674740484429066e-06, 'epoch': 1.73}


 44%|████▍     | 510/1156 [09:12<10:50,  1.01s/it]

{'loss': 0.1246, 'learning_rate': 5.588235294117647e-06, 'epoch': 1.76}


 45%|████▍     | 520/1156 [09:22<10:40,  1.01s/it]

{'loss': 0.0014, 'learning_rate': 5.501730103806229e-06, 'epoch': 1.8}


 46%|████▌     | 530/1156 [09:33<10:56,  1.05s/it]

{'loss': 0.0118, 'learning_rate': 5.41522491349481e-06, 'epoch': 1.83}


 47%|████▋     | 540/1156 [09:43<10:30,  1.02s/it]

{'loss': 0.125, 'learning_rate': 5.328719723183391e-06, 'epoch': 1.87}


 48%|████▊     | 550/1156 [09:53<10:14,  1.01s/it]

{'loss': 0.1058, 'learning_rate': 5.242214532871973e-06, 'epoch': 1.9}


 48%|████▊     | 560/1156 [10:03<10:00,  1.01s/it]

{'loss': 0.0312, 'learning_rate': 5.155709342560554e-06, 'epoch': 1.94}


 49%|████▉     | 570/1156 [10:13<09:49,  1.01s/it]

{'loss': 0.0936, 'learning_rate': 5.069204152249136e-06, 'epoch': 1.97}


                                                  
 50%|█████     | 578/1156 [10:50<08:49,  1.09it/s]

{'eval_loss': 2.054884433746338, 'eval_accuracy': 0.6895104895104895, 'eval_precision': 0.685695979599723, 'eval_recall': 0.6895104895104895, 'eval_f1': 0.6823208700060442, 'eval_runtime': 29.5416, 'eval_samples_per_second': 24.203, 'eval_steps_per_second': 3.047, 'epoch': 2.0}


 50%|█████     | 580/1156 [11:00<1:23:10,  8.66s/it]

{'loss': 0.0308, 'learning_rate': 4.982698961937717e-06, 'epoch': 2.01}


 51%|█████     | 590/1156 [11:10<11:32,  1.22s/it]  

{'loss': 0.0012, 'learning_rate': 4.896193771626298e-06, 'epoch': 2.04}


 52%|█████▏    | 600/1156 [11:20<09:24,  1.02s/it]

{'loss': 0.002, 'learning_rate': 4.809688581314879e-06, 'epoch': 2.08}


 53%|█████▎    | 610/1156 [11:30<09:10,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 4.7231833910034605e-06, 'epoch': 2.11}


 54%|█████▎    | 620/1156 [11:40<08:58,  1.00s/it]

{'loss': 0.1098, 'learning_rate': 4.636678200692042e-06, 'epoch': 2.15}


 54%|█████▍    | 630/1156 [11:50<08:50,  1.01s/it]

{'loss': 0.0522, 'learning_rate': 4.550173010380623e-06, 'epoch': 2.18}


 55%|█████▌    | 640/1156 [12:00<08:37,  1.00s/it]

{'loss': 0.0014, 'learning_rate': 4.463667820069205e-06, 'epoch': 2.21}


 56%|█████▌    | 650/1156 [12:10<08:27,  1.00s/it]

{'loss': 0.0341, 'learning_rate': 4.377162629757785e-06, 'epoch': 2.25}


 57%|█████▋    | 660/1156 [12:20<08:21,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 4.2906574394463675e-06, 'epoch': 2.28}


 58%|█████▊    | 670/1156 [12:30<08:22,  1.03s/it]

{'loss': 0.0005, 'learning_rate': 4.204152249134949e-06, 'epoch': 2.32}


 59%|█████▉    | 680/1156 [12:40<08:00,  1.01s/it]

{'loss': 0.0603, 'learning_rate': 4.11764705882353e-06, 'epoch': 2.35}


 60%|█████▉    | 690/1156 [12:51<07:50,  1.01s/it]

{'loss': 0.0102, 'learning_rate': 4.031141868512111e-06, 'epoch': 2.39}


 61%|██████    | 700/1156 [13:01<07:38,  1.01s/it]

{'loss': 0.1715, 'learning_rate': 3.944636678200692e-06, 'epoch': 2.42}


 61%|██████▏   | 710/1156 [13:11<07:29,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 3.8581314878892736e-06, 'epoch': 2.46}


 62%|██████▏   | 720/1156 [13:21<07:19,  1.01s/it]

{'loss': 0.0275, 'learning_rate': 3.7716262975778552e-06, 'epoch': 2.49}


 63%|██████▎   | 730/1156 [13:31<07:08,  1.01s/it]

{'loss': 0.088, 'learning_rate': 3.685121107266436e-06, 'epoch': 2.53}


 64%|██████▍   | 740/1156 [13:41<07:01,  1.01s/it]

{'loss': 0.0201, 'learning_rate': 3.5986159169550177e-06, 'epoch': 2.56}


 65%|██████▍   | 750/1156 [13:51<06:49,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 3.512110726643599e-06, 'epoch': 2.6}


 66%|██████▌   | 760/1156 [14:01<06:38,  1.01s/it]

{'loss': 0.0624, 'learning_rate': 3.42560553633218e-06, 'epoch': 2.63}


 67%|██████▋   | 770/1156 [14:11<06:28,  1.01s/it]

{'loss': 0.0331, 'learning_rate': 3.3391003460207618e-06, 'epoch': 2.66}


 67%|██████▋   | 780/1156 [14:21<06:19,  1.01s/it]

{'loss': 0.0012, 'learning_rate': 3.2525951557093425e-06, 'epoch': 2.7}


 68%|██████▊   | 790/1156 [14:31<06:08,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 3.166089965397924e-06, 'epoch': 2.73}


 69%|██████▉   | 800/1156 [14:42<05:58,  1.01s/it]

{'loss': 0.0495, 'learning_rate': 3.0795847750865054e-06, 'epoch': 2.77}


 70%|███████   | 810/1156 [14:52<05:48,  1.01s/it]

{'loss': 0.0353, 'learning_rate': 2.9930795847750866e-06, 'epoch': 2.8}


 71%|███████   | 820/1156 [15:02<05:38,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 2.9065743944636683e-06, 'epoch': 2.84}


 72%|███████▏  | 830/1156 [15:12<05:28,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 2.820069204152249e-06, 'epoch': 2.87}


 73%|███████▎  | 840/1156 [15:22<05:17,  1.00s/it]

{'loss': 0.0024, 'learning_rate': 2.7335640138408307e-06, 'epoch': 2.91}


 74%|███████▎  | 850/1156 [15:32<05:13,  1.02s/it]

{'loss': 0.0004, 'learning_rate': 2.647058823529412e-06, 'epoch': 2.94}


 74%|███████▍  | 860/1156 [15:42<05:00,  1.01s/it]

{'loss': 0.0122, 'learning_rate': 2.560553633217993e-06, 'epoch': 2.98}


                                                  
 75%|███████▌  | 867/1156 [16:17<04:25,  1.09it/s]

{'eval_loss': 2.243972063064575, 'eval_accuracy': 0.6965034965034965, 'eval_precision': 0.7037035695559342, 'eval_recall': 0.6965034965034965, 'eval_f1': 0.6959707766541656, 'eval_runtime': 28.4759, 'eval_samples_per_second': 25.109, 'eval_steps_per_second': 3.161, 'epoch': 3.0}


 75%|███████▌  | 870/1156 [16:28<29:33,  6.20s/it]

{'loss': 0.032, 'learning_rate': 2.4740484429065744e-06, 'epoch': 3.01}


 76%|███████▌  | 880/1156 [16:38<05:17,  1.15s/it]

{'loss': 0.0003, 'learning_rate': 2.387543252595156e-06, 'epoch': 3.04}


 77%|███████▋  | 890/1156 [16:48<04:28,  1.01s/it]

{'loss': 0.0463, 'learning_rate': 2.3010380622837373e-06, 'epoch': 3.08}


 78%|███████▊  | 900/1156 [16:58<04:17,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 2.2145328719723185e-06, 'epoch': 3.11}


 79%|███████▊  | 910/1156 [17:08<04:08,  1.01s/it]

{'loss': 0.0385, 'learning_rate': 2.1280276816609e-06, 'epoch': 3.15}


 80%|███████▉  | 920/1156 [17:18<03:56,  1.00s/it]

{'loss': 0.0022, 'learning_rate': 2.041522491349481e-06, 'epoch': 3.18}


 80%|████████  | 930/1156 [17:28<03:48,  1.01s/it]

{'loss': 0.004, 'learning_rate': 1.9550173010380626e-06, 'epoch': 3.22}


 81%|████████▏ | 940/1156 [17:38<03:38,  1.01s/it]

{'loss': 0.0841, 'learning_rate': 1.8685121107266438e-06, 'epoch': 3.25}


 82%|████████▏ | 950/1156 [17:48<03:27,  1.01s/it]

{'loss': 0.0013, 'learning_rate': 1.7820069204152252e-06, 'epoch': 3.29}


 83%|████████▎ | 960/1156 [17:58<03:17,  1.01s/it]

{'loss': 0.1005, 'learning_rate': 1.6955017301038063e-06, 'epoch': 3.32}


 84%|████████▍ | 970/1156 [18:08<03:07,  1.01s/it]

{'loss': 0.0008, 'learning_rate': 1.6089965397923877e-06, 'epoch': 3.36}


 85%|████████▍ | 980/1156 [18:18<02:56,  1.00s/it]

{'loss': 0.0042, 'learning_rate': 1.522491349480969e-06, 'epoch': 3.39}


 86%|████████▌ | 990/1156 [18:28<02:47,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 1.4359861591695503e-06, 'epoch': 3.43}


 87%|████████▋ | 1000/1156 [18:38<02:37,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 1.3494809688581318e-06, 'epoch': 3.46}


 87%|████████▋ | 1010/1156 [18:49<02:27,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 1.2629757785467128e-06, 'epoch': 3.49}


 88%|████████▊ | 1020/1156 [18:59<02:18,  1.02s/it]

{'loss': 0.0098, 'learning_rate': 1.1764705882352942e-06, 'epoch': 3.53}


 89%|████████▉ | 1030/1156 [19:09<02:06,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 1.0899653979238757e-06, 'epoch': 3.56}


 90%|████████▉ | 1040/1156 [19:19<01:57,  1.01s/it]

{'loss': 0.0387, 'learning_rate': 1.0034602076124569e-06, 'epoch': 3.6}


 91%|█████████ | 1050/1156 [19:29<01:46,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 9.169550173010382e-07, 'epoch': 3.63}


 92%|█████████▏| 1060/1156 [19:39<01:37,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 8.304498269896194e-07, 'epoch': 3.67}


 93%|█████████▎| 1070/1156 [19:49<01:26,  1.00s/it]

{'loss': 0.0204, 'learning_rate': 7.439446366782008e-07, 'epoch': 3.7}


 93%|█████████▎| 1080/1156 [19:59<01:16,  1.00s/it]

{'loss': 0.0023, 'learning_rate': 6.57439446366782e-07, 'epoch': 3.74}


 94%|█████████▍| 1090/1156 [20:09<01:07,  1.03s/it]

{'loss': 0.0002, 'learning_rate': 5.709342560553634e-07, 'epoch': 3.77}


 95%|█████████▌| 1100/1156 [20:20<00:56,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 4.844290657439446e-07, 'epoch': 3.81}


 96%|█████████▌| 1110/1156 [20:30<00:46,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 3.9792387543252597e-07, 'epoch': 3.84}


 97%|█████████▋| 1120/1156 [20:40<00:36,  1.01s/it]

{'loss': 0.0329, 'learning_rate': 3.114186851211073e-07, 'epoch': 3.88}


 98%|█████████▊| 1130/1156 [20:50<00:26,  1.01s/it]

{'loss': 0.0012, 'learning_rate': 2.249134948096886e-07, 'epoch': 3.91}


 99%|█████████▊| 1140/1156 [21:00<00:16,  1.01s/it]

{'loss': 0.0023, 'learning_rate': 1.384083044982699e-07, 'epoch': 3.94}


 99%|█████████▉| 1150/1156 [21:10<00:06,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 5.1903114186851215e-08, 'epoch': 3.98}


                                                   
100%|██████████| 1156/1156 [21:45<00:00,  1.09it/s]

{'eval_loss': 2.2877697944641113, 'eval_accuracy': 0.6979020979020979, 'eval_precision': 0.7016821332040011, 'eval_recall': 0.6979020979020979, 'eval_f1': 0.696159746401241, 'eval_runtime': 29.2587, 'eval_samples_per_second': 24.437, 'eval_steps_per_second': 3.076, 'epoch': 4.0}


100%|██████████| 1156/1156 [21:55<00:00,  1.14s/it]


{'train_runtime': 1315.2366, 'train_samples_per_second': 7.019, 'train_steps_per_second': 0.879, 'train_loss': 0.06556770994805708, 'epoch': 4.0}


100%|██████████| 90/90 [00:28<00:00,  3.17it/s]
100%|██████████| 90/90 [00:27<00:00,  3.28it/s]
100%|██████████| 90/90 [00:28<00:00,  3.17it/s]


{'accuracy': 0.6979020979020979, 'precision': 0.6944623239119703, 'recall': 0.6979020979020979, 'f1': 0.6949399839006734}
{'accuracy': 0.6708683473389355, 'precision': 0.667680444770228, 'recall': 0.6708683473389355, 'f1': 0.6659629504906237}


  1%|          | 10/1156 [00:11<19:23,  1.02s/it]

{'loss': 0.1737, 'learning_rate': 9.913494809688582e-06, 'epoch': 0.03}


  2%|▏         | 20/1156 [00:21<19:30,  1.03s/it]

{'loss': 0.0009, 'learning_rate': 9.826989619377163e-06, 'epoch': 0.07}


  3%|▎         | 30/1156 [00:31<18:53,  1.01s/it]

{'loss': 0.002, 'learning_rate': 9.740484429065744e-06, 'epoch': 0.1}


  3%|▎         | 40/1156 [00:41<18:42,  1.01s/it]

{'loss': 0.1036, 'learning_rate': 9.653979238754326e-06, 'epoch': 0.14}


  4%|▍         | 50/1156 [00:51<19:25,  1.05s/it]

{'loss': 0.2878, 'learning_rate': 9.567474048442907e-06, 'epoch': 0.17}


  5%|▌         | 60/1156 [01:02<18:56,  1.04s/it]

{'loss': 0.0204, 'learning_rate': 9.480968858131488e-06, 'epoch': 0.21}


  6%|▌         | 70/1156 [01:12<18:15,  1.01s/it]

{'loss': 0.0035, 'learning_rate': 9.39446366782007e-06, 'epoch': 0.24}


  7%|▋         | 80/1156 [01:22<18:05,  1.01s/it]

{'loss': 0.1792, 'learning_rate': 9.307958477508652e-06, 'epoch': 0.28}


  8%|▊         | 90/1156 [01:32<17:53,  1.01s/it]

{'loss': 0.1246, 'learning_rate': 9.221453287197234e-06, 'epoch': 0.31}


  9%|▊         | 100/1156 [01:42<17:42,  1.01s/it]

{'loss': 0.1243, 'learning_rate': 9.134948096885815e-06, 'epoch': 0.35}


 10%|▉         | 110/1156 [01:52<17:37,  1.01s/it]

{'loss': 0.0033, 'learning_rate': 9.048442906574394e-06, 'epoch': 0.38}


 10%|█         | 120/1156 [02:03<18:07,  1.05s/it]

{'loss': 0.0464, 'learning_rate': 8.961937716262975e-06, 'epoch': 0.42}


 11%|█         | 130/1156 [02:13<17:08,  1.00s/it]

{'loss': 0.0492, 'learning_rate': 8.875432525951558e-06, 'epoch': 0.45}


 12%|█▏        | 140/1156 [02:23<16:58,  1.00s/it]

{'loss': 0.0986, 'learning_rate': 8.78892733564014e-06, 'epoch': 0.48}


 13%|█▎        | 150/1156 [02:33<16:50,  1.00s/it]

{'loss': 0.011, 'learning_rate': 8.702422145328721e-06, 'epoch': 0.52}


 14%|█▍        | 160/1156 [02:43<16:45,  1.01s/it]

{'loss': 0.2466, 'learning_rate': 8.615916955017302e-06, 'epoch': 0.55}


 15%|█▍        | 170/1156 [02:53<16:28,  1.00s/it]

{'loss': 0.0007, 'learning_rate': 8.529411764705883e-06, 'epoch': 0.59}


 16%|█▌        | 180/1156 [03:03<16:22,  1.01s/it]

{'loss': 0.0659, 'learning_rate': 8.442906574394465e-06, 'epoch': 0.62}


 16%|█▋        | 190/1156 [03:13<16:15,  1.01s/it]

{'loss': 0.0408, 'learning_rate': 8.356401384083046e-06, 'epoch': 0.66}


 17%|█▋        | 200/1156 [03:23<16:06,  1.01s/it]

{'loss': 0.0634, 'learning_rate': 8.269896193771627e-06, 'epoch': 0.69}


 18%|█▊        | 210/1156 [03:33<15:50,  1.00s/it]

{'loss': 0.0225, 'learning_rate': 8.183391003460208e-06, 'epoch': 0.73}


 19%|█▉        | 220/1156 [03:43<15:37,  1.00s/it]

{'loss': 0.1946, 'learning_rate': 8.09688581314879e-06, 'epoch': 0.76}


 20%|█▉        | 230/1156 [03:53<15:41,  1.02s/it]

{'loss': 0.0336, 'learning_rate': 8.01038062283737e-06, 'epoch': 0.8}


 21%|██        | 240/1156 [04:04<15:32,  1.02s/it]

{'loss': 0.16, 'learning_rate': 7.923875432525952e-06, 'epoch': 0.83}


 22%|██▏       | 250/1156 [04:14<15:12,  1.01s/it]

{'loss': 0.1148, 'learning_rate': 7.837370242214533e-06, 'epoch': 0.87}


 22%|██▏       | 260/1156 [04:24<15:03,  1.01s/it]

{'loss': 0.0042, 'learning_rate': 7.750865051903114e-06, 'epoch': 0.9}


 23%|██▎       | 270/1156 [04:34<14:52,  1.01s/it]

{'loss': 0.001, 'learning_rate': 7.664359861591696e-06, 'epoch': 0.93}


 24%|██▍       | 280/1156 [04:44<14:36,  1.00s/it]

{'loss': 0.1432, 'learning_rate': 7.577854671280277e-06, 'epoch': 0.97}


                                                  
 25%|██▌       | 289/1156 [05:22<13:13,  1.09it/s]

{'eval_loss': 1.9029861688613892, 'eval_accuracy': 0.6979020979020979, 'eval_precision': 0.7008202323991798, 'eval_recall': 0.6979020979020979, 'eval_f1': 0.6988221741414398, 'eval_runtime': 29.1755, 'eval_samples_per_second': 24.507, 'eval_steps_per_second': 3.085, 'epoch': 1.0}


 25%|██▌       | 290/1156 [05:29<2:48:33, 11.68s/it]

{'loss': 0.3301, 'learning_rate': 7.491349480968859e-06, 'epoch': 1.0}


 26%|██▌       | 300/1156 [05:39<18:38,  1.31s/it]  

{'loss': 0.014, 'learning_rate': 7.40484429065744e-06, 'epoch': 1.04}


 27%|██▋       | 310/1156 [05:50<14:17,  1.01s/it]

{'loss': 0.1619, 'learning_rate': 7.318339100346021e-06, 'epoch': 1.07}


 28%|██▊       | 320/1156 [06:00<14:00,  1.00s/it]

{'loss': 0.0082, 'learning_rate': 7.2318339100346025e-06, 'epoch': 1.11}


 29%|██▊       | 330/1156 [06:10<13:50,  1.01s/it]

{'loss': 0.2735, 'learning_rate': 7.145328719723184e-06, 'epoch': 1.14}


 29%|██▉       | 340/1156 [06:20<13:39,  1.00s/it]

{'loss': 0.1082, 'learning_rate': 7.058823529411766e-06, 'epoch': 1.18}


 30%|███       | 350/1156 [06:30<13:28,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 6.972318339100347e-06, 'epoch': 1.21}


 31%|███       | 360/1156 [06:40<13:21,  1.01s/it]

{'loss': 0.1511, 'learning_rate': 6.885813148788928e-06, 'epoch': 1.25}


 32%|███▏      | 370/1156 [06:50<13:09,  1.00s/it]

{'loss': 0.0197, 'learning_rate': 6.799307958477509e-06, 'epoch': 1.28}


 33%|███▎      | 380/1156 [07:00<13:06,  1.01s/it]

{'loss': 0.2297, 'learning_rate': 6.71280276816609e-06, 'epoch': 1.31}


 34%|███▎      | 390/1156 [07:10<12:50,  1.01s/it]

{'loss': 0.0961, 'learning_rate': 6.626297577854672e-06, 'epoch': 1.35}


 35%|███▍      | 400/1156 [07:20<12:41,  1.01s/it]

{'loss': 0.0067, 'learning_rate': 6.539792387543253e-06, 'epoch': 1.38}


 35%|███▌      | 410/1156 [07:30<12:28,  1.00s/it]

{'loss': 0.0069, 'learning_rate': 6.453287197231834e-06, 'epoch': 1.42}


 36%|███▋      | 420/1156 [07:40<12:20,  1.01s/it]

{'loss': 0.0462, 'learning_rate': 6.3667820069204156e-06, 'epoch': 1.45}


 37%|███▋      | 430/1156 [07:50<12:09,  1.00s/it]

{'loss': 0.0007, 'learning_rate': 6.280276816608997e-06, 'epoch': 1.49}


 38%|███▊      | 440/1156 [08:00<12:02,  1.01s/it]

{'loss': 0.0597, 'learning_rate': 6.193771626297579e-06, 'epoch': 1.52}


 39%|███▉      | 450/1156 [08:11<11:49,  1.01s/it]

{'loss': 0.1947, 'learning_rate': 6.10726643598616e-06, 'epoch': 1.56}


 40%|███▉      | 460/1156 [08:21<11:41,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 6.020761245674741e-06, 'epoch': 1.59}


 41%|████      | 470/1156 [08:31<11:31,  1.01s/it]

{'loss': 0.0959, 'learning_rate': 5.9342560553633225e-06, 'epoch': 1.63}


 42%|████▏     | 480/1156 [08:41<11:21,  1.01s/it]

{'loss': 0.0299, 'learning_rate': 5.847750865051903e-06, 'epoch': 1.66}


 42%|████▏     | 490/1156 [08:51<11:09,  1.01s/it]

{'loss': 0.0951, 'learning_rate': 5.761245674740484e-06, 'epoch': 1.7}


 43%|████▎     | 500/1156 [09:01<11:01,  1.01s/it]

{'loss': 0.2139, 'learning_rate': 5.674740484429066e-06, 'epoch': 1.73}


 44%|████▍     | 510/1156 [09:11<10:51,  1.01s/it]

{'loss': 0.0191, 'learning_rate': 5.588235294117647e-06, 'epoch': 1.76}


 45%|████▍     | 520/1156 [09:21<10:43,  1.01s/it]

{'loss': 0.0317, 'learning_rate': 5.501730103806229e-06, 'epoch': 1.8}


 46%|████▌     | 530/1156 [09:31<10:31,  1.01s/it]

{'loss': 0.1402, 'learning_rate': 5.41522491349481e-06, 'epoch': 1.83}


 47%|████▋     | 540/1156 [09:41<10:20,  1.01s/it]

{'loss': 0.0446, 'learning_rate': 5.328719723183391e-06, 'epoch': 1.87}


 48%|████▊     | 550/1156 [09:51<10:15,  1.02s/it]

{'loss': 0.186, 'learning_rate': 5.242214532871973e-06, 'epoch': 1.9}


 48%|████▊     | 560/1156 [10:02<10:02,  1.01s/it]

{'loss': 0.0015, 'learning_rate': 5.155709342560554e-06, 'epoch': 1.94}


 49%|████▉     | 570/1156 [10:12<09:49,  1.01s/it]

{'loss': 0.0218, 'learning_rate': 5.069204152249136e-06, 'epoch': 1.97}


                                                  
 50%|█████     | 578/1156 [10:48<08:50,  1.09it/s]

{'eval_loss': 2.1383326053619385, 'eval_accuracy': 0.7104895104895105, 'eval_precision': 0.7206129001739353, 'eval_recall': 0.7104895104895105, 'eval_f1': 0.7061557383331794, 'eval_runtime': 29.0371, 'eval_samples_per_second': 24.624, 'eval_steps_per_second': 3.099, 'epoch': 2.0}


 50%|█████     | 580/1156 [10:58<1:22:41,  8.61s/it]

{'loss': 0.0007, 'learning_rate': 4.982698961937717e-06, 'epoch': 2.01}


 51%|█████     | 590/1156 [11:08<11:34,  1.23s/it]  

{'loss': 0.0003, 'learning_rate': 4.896193771626298e-06, 'epoch': 2.04}


 52%|█████▏    | 600/1156 [11:18<09:22,  1.01s/it]

{'loss': 0.0011, 'learning_rate': 4.809688581314879e-06, 'epoch': 2.08}


 53%|█████▎    | 610/1156 [11:28<09:09,  1.01s/it]

{'loss': 0.0457, 'learning_rate': 4.7231833910034605e-06, 'epoch': 2.11}


 54%|█████▎    | 620/1156 [11:38<08:57,  1.00s/it]

{'loss': 0.092, 'learning_rate': 4.636678200692042e-06, 'epoch': 2.15}


 54%|█████▍    | 630/1156 [11:48<08:51,  1.01s/it]

{'loss': 0.0226, 'learning_rate': 4.550173010380623e-06, 'epoch': 2.18}


 55%|█████▌    | 640/1156 [11:58<08:43,  1.01s/it]

{'loss': 0.0015, 'learning_rate': 4.463667820069205e-06, 'epoch': 2.21}


 56%|█████▌    | 650/1156 [12:08<08:27,  1.00s/it]

{'loss': 0.0136, 'learning_rate': 4.377162629757785e-06, 'epoch': 2.25}


 57%|█████▋    | 660/1156 [12:18<08:19,  1.01s/it]

{'loss': 0.0166, 'learning_rate': 4.2906574394463675e-06, 'epoch': 2.28}


 58%|█████▊    | 670/1156 [12:28<08:09,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 4.204152249134949e-06, 'epoch': 2.32}


 59%|█████▉    | 680/1156 [12:39<07:58,  1.01s/it]

{'loss': 0.0276, 'learning_rate': 4.11764705882353e-06, 'epoch': 2.35}


 60%|█████▉    | 690/1156 [12:49<07:49,  1.01s/it]

{'loss': 0.0018, 'learning_rate': 4.031141868512111e-06, 'epoch': 2.39}


 61%|██████    | 700/1156 [12:59<07:45,  1.02s/it]

{'loss': 0.022, 'learning_rate': 3.944636678200692e-06, 'epoch': 2.42}


 61%|██████▏   | 710/1156 [13:10<07:34,  1.02s/it]

{'loss': 0.0003, 'learning_rate': 3.8581314878892736e-06, 'epoch': 2.46}


 62%|██████▏   | 720/1156 [13:20<07:19,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 3.7716262975778552e-06, 'epoch': 2.49}


 63%|██████▎   | 730/1156 [13:30<07:09,  1.01s/it]

{'loss': 0.0222, 'learning_rate': 3.685121107266436e-06, 'epoch': 2.53}


 64%|██████▍   | 740/1156 [13:40<06:57,  1.00s/it]

{'loss': 0.004, 'learning_rate': 3.5986159169550177e-06, 'epoch': 2.56}


 65%|██████▍   | 750/1156 [13:50<06:50,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 3.512110726643599e-06, 'epoch': 2.6}


 66%|██████▌   | 760/1156 [14:00<06:38,  1.01s/it]

{'loss': 0.0094, 'learning_rate': 3.42560553633218e-06, 'epoch': 2.63}


 67%|██████▋   | 770/1156 [14:10<06:30,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 3.3391003460207618e-06, 'epoch': 2.66}


 67%|██████▋   | 780/1156 [14:20<06:19,  1.01s/it]

{'loss': 0.1193, 'learning_rate': 3.2525951557093425e-06, 'epoch': 2.7}


 68%|██████▊   | 790/1156 [14:30<06:07,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 3.166089965397924e-06, 'epoch': 2.73}


 69%|██████▉   | 800/1156 [14:40<05:58,  1.01s/it]

{'loss': 0.0403, 'learning_rate': 3.0795847750865054e-06, 'epoch': 2.77}


 70%|███████   | 810/1156 [14:51<05:48,  1.01s/it]

{'loss': 0.0845, 'learning_rate': 2.9930795847750866e-06, 'epoch': 2.8}


 71%|███████   | 820/1156 [15:01<05:40,  1.01s/it]

{'loss': 0.1069, 'learning_rate': 2.9065743944636683e-06, 'epoch': 2.84}


 72%|███████▏  | 830/1156 [15:11<05:29,  1.01s/it]

{'loss': 0.0009, 'learning_rate': 2.820069204152249e-06, 'epoch': 2.87}


 73%|███████▎  | 840/1156 [15:21<05:18,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 2.7335640138408307e-06, 'epoch': 2.91}


 74%|███████▎  | 850/1156 [15:31<05:06,  1.00s/it]

{'loss': 0.0924, 'learning_rate': 2.647058823529412e-06, 'epoch': 2.94}


 74%|███████▍  | 860/1156 [15:41<04:58,  1.01s/it]

{'loss': 0.0098, 'learning_rate': 2.560553633217993e-06, 'epoch': 2.98}


                                                  
 75%|███████▌  | 867/1156 [16:17<04:25,  1.09it/s]

{'eval_loss': 2.3196675777435303, 'eval_accuracy': 0.7160839160839161, 'eval_precision': 0.734100570061155, 'eval_recall': 0.7160839160839161, 'eval_f1': 0.7156627114820663, 'eval_runtime': 28.7838, 'eval_samples_per_second': 24.84, 'eval_steps_per_second': 3.127, 'epoch': 3.0}


 75%|███████▌  | 870/1156 [16:26<29:07,  6.11s/it]

{'loss': 0.0006, 'learning_rate': 2.4740484429065744e-06, 'epoch': 3.01}


 76%|███████▌  | 880/1156 [16:36<05:17,  1.15s/it]

{'loss': 0.0002, 'learning_rate': 2.387543252595156e-06, 'epoch': 3.04}


 77%|███████▋  | 890/1156 [16:46<04:28,  1.01s/it]

{'loss': 0.0014, 'learning_rate': 2.3010380622837373e-06, 'epoch': 3.08}


 78%|███████▊  | 900/1156 [16:56<04:17,  1.01s/it]

{'loss': 0.0022, 'learning_rate': 2.2145328719723185e-06, 'epoch': 3.11}


 79%|███████▊  | 910/1156 [17:06<04:07,  1.00s/it]

{'loss': 0.0318, 'learning_rate': 2.1280276816609e-06, 'epoch': 3.15}


 80%|███████▉  | 920/1156 [17:16<03:56,  1.00s/it]

{'loss': 0.0016, 'learning_rate': 2.041522491349481e-06, 'epoch': 3.18}


 80%|████████  | 930/1156 [17:26<03:48,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 1.9550173010380626e-06, 'epoch': 3.22}


 81%|████████▏ | 940/1156 [17:36<03:38,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 1.8685121107266438e-06, 'epoch': 3.25}


 82%|████████▏ | 950/1156 [17:46<03:28,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 1.7820069204152252e-06, 'epoch': 3.29}


 83%|████████▎ | 960/1156 [17:56<03:17,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 1.6955017301038063e-06, 'epoch': 3.32}


 84%|████████▍ | 970/1156 [18:06<03:06,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 1.6089965397923877e-06, 'epoch': 3.36}


 85%|████████▍ | 980/1156 [18:16<02:57,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 1.522491349480969e-06, 'epoch': 3.39}


 86%|████████▌ | 990/1156 [18:26<02:46,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.4359861591695503e-06, 'epoch': 3.43}


 87%|████████▋ | 1000/1156 [18:37<02:36,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 1.3494809688581318e-06, 'epoch': 3.46}


 87%|████████▋ | 1010/1156 [18:47<02:26,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.2629757785467128e-06, 'epoch': 3.49}


 88%|████████▊ | 1020/1156 [18:57<02:19,  1.03s/it]

{'loss': 0.0076, 'learning_rate': 1.1764705882352942e-06, 'epoch': 3.53}


 89%|████████▉ | 1030/1156 [19:08<02:09,  1.03s/it]

{'loss': 0.0008, 'learning_rate': 1.0899653979238757e-06, 'epoch': 3.56}


 90%|████████▉ | 1040/1156 [19:18<01:56,  1.01s/it]

{'loss': 0.0186, 'learning_rate': 1.0034602076124569e-06, 'epoch': 3.6}


 91%|█████████ | 1050/1156 [19:28<01:46,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 9.169550173010382e-07, 'epoch': 3.63}


 92%|█████████▏| 1060/1156 [19:38<01:36,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 8.304498269896194e-07, 'epoch': 3.67}


 93%|█████████▎| 1070/1156 [19:48<01:26,  1.01s/it]

{'loss': 0.0121, 'learning_rate': 7.439446366782008e-07, 'epoch': 3.7}


 93%|█████████▎| 1080/1156 [19:58<01:16,  1.01s/it]

{'loss': 0.0113, 'learning_rate': 6.57439446366782e-07, 'epoch': 3.74}


 94%|█████████▍| 1090/1156 [20:08<01:06,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 5.709342560553634e-07, 'epoch': 3.77}


 95%|█████████▌| 1100/1156 [20:18<00:56,  1.01s/it]

{'loss': 0.0021, 'learning_rate': 4.844290657439446e-07, 'epoch': 3.81}


 96%|█████████▌| 1110/1156 [20:28<00:46,  1.01s/it]

{'loss': 0.0435, 'learning_rate': 3.9792387543252597e-07, 'epoch': 3.84}


 97%|█████████▋| 1120/1156 [20:38<00:36,  1.00s/it]

{'loss': 0.0013, 'learning_rate': 3.114186851211073e-07, 'epoch': 3.88}


 98%|█████████▊| 1130/1156 [20:48<00:26,  1.01s/it]

{'loss': 0.0012, 'learning_rate': 2.249134948096886e-07, 'epoch': 3.91}


 99%|█████████▊| 1140/1156 [20:58<00:16,  1.01s/it]

{'loss': 0.001, 'learning_rate': 1.384083044982699e-07, 'epoch': 3.94}


 99%|█████████▉| 1150/1156 [21:08<00:06,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 5.1903114186851215e-08, 'epoch': 3.98}


                                                   
100%|██████████| 1156/1156 [21:42<00:00,  1.10it/s]

{'eval_loss': 2.170813798904419, 'eval_accuracy': 0.7118881118881119, 'eval_precision': 0.7103600163037759, 'eval_recall': 0.7118881118881119, 'eval_f1': 0.7089078715588183, 'eval_runtime': 27.9081, 'eval_samples_per_second': 25.62, 'eval_steps_per_second': 3.225, 'epoch': 4.0}


100%|██████████| 1156/1156 [21:52<00:00,  1.14s/it]


{'train_runtime': 1312.4316, 'train_samples_per_second': 7.034, 'train_steps_per_second': 0.881, 'train_loss': 0.05004586384619206, 'epoch': 4.0}


100%|██████████| 90/90 [00:27<00:00,  3.22it/s]
100%|██████████| 90/90 [00:27<00:00,  3.22it/s]
100%|██████████| 90/90 [00:27<00:00,  3.26it/s]


{'accuracy': 0.6979020979020979, 'precision': 0.7008202323991798, 'recall': 0.6979020979020979, 'f1': 0.6988221741414398}
{'accuracy': 0.6918767507002801, 'precision': 0.6910124850126986, 'recall': 0.6918767507002801, 'f1': 0.6908958308785171}


  1%|          | 10/1156 [00:11<19:55,  1.04s/it]

{'loss': 0.045, 'learning_rate': 9.913494809688582e-06, 'epoch': 0.03}


  2%|▏         | 20/1156 [00:21<19:05,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 9.826989619377163e-06, 'epoch': 0.07}


  3%|▎         | 30/1156 [00:31<18:54,  1.01s/it]

{'loss': 0.0692, 'learning_rate': 9.740484429065744e-06, 'epoch': 0.1}


  3%|▎         | 40/1156 [00:41<18:38,  1.00s/it]

{'loss': 0.1511, 'learning_rate': 9.653979238754326e-06, 'epoch': 0.14}


  4%|▍         | 50/1156 [00:51<18:32,  1.01s/it]

{'loss': 0.1114, 'learning_rate': 9.567474048442907e-06, 'epoch': 0.17}


  5%|▌         | 60/1156 [01:01<18:25,  1.01s/it]

{'loss': 0.0297, 'learning_rate': 9.480968858131488e-06, 'epoch': 0.21}


  6%|▌         | 70/1156 [01:11<18:16,  1.01s/it]

{'loss': 0.0008, 'learning_rate': 9.39446366782007e-06, 'epoch': 0.24}


  7%|▋         | 80/1156 [01:22<18:21,  1.02s/it]

{'loss': 0.1003, 'learning_rate': 9.307958477508652e-06, 'epoch': 0.28}


  8%|▊         | 90/1156 [01:32<18:05,  1.02s/it]

{'loss': 0.0273, 'learning_rate': 9.221453287197234e-06, 'epoch': 0.31}


  9%|▊         | 100/1156 [01:42<17:44,  1.01s/it]

{'loss': 0.0012, 'learning_rate': 9.134948096885815e-06, 'epoch': 0.35}


 10%|▉         | 110/1156 [01:52<17:38,  1.01s/it]

{'loss': 0.0013, 'learning_rate': 9.048442906574394e-06, 'epoch': 0.38}


 10%|█         | 120/1156 [02:02<17:23,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 8.961937716262975e-06, 'epoch': 0.42}


 11%|█         | 130/1156 [02:12<17:10,  1.00s/it]

{'loss': 0.0078, 'learning_rate': 8.875432525951558e-06, 'epoch': 0.45}


 12%|█▏        | 140/1156 [02:22<16:58,  1.00s/it]

{'loss': 0.0555, 'learning_rate': 8.78892733564014e-06, 'epoch': 0.48}


 13%|█▎        | 150/1156 [02:32<16:48,  1.00s/it]

{'loss': 0.0009, 'learning_rate': 8.702422145328721e-06, 'epoch': 0.52}


 14%|█▍        | 160/1156 [02:42<16:39,  1.00s/it]

{'loss': 0.0232, 'learning_rate': 8.615916955017302e-06, 'epoch': 0.55}


 15%|█▍        | 170/1156 [02:52<16:32,  1.01s/it]

{'loss': 0.0027, 'learning_rate': 8.529411764705883e-06, 'epoch': 0.59}


 16%|█▌        | 180/1156 [03:02<16:20,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 8.442906574394465e-06, 'epoch': 0.62}


 16%|█▋        | 190/1156 [03:12<16:07,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 8.356401384083046e-06, 'epoch': 0.66}


 17%|█▋        | 200/1156 [03:22<16:01,  1.01s/it]

{'loss': 0.0876, 'learning_rate': 8.269896193771627e-06, 'epoch': 0.69}


 18%|█▊        | 210/1156 [03:33<15:53,  1.01s/it]

{'loss': 0.1946, 'learning_rate': 8.183391003460208e-06, 'epoch': 0.73}


 19%|█▉        | 220/1156 [03:43<15:43,  1.01s/it]

{'loss': 0.0636, 'learning_rate': 8.09688581314879e-06, 'epoch': 0.76}


 20%|█▉        | 230/1156 [03:53<15:30,  1.00s/it]

{'loss': 0.0151, 'learning_rate': 8.01038062283737e-06, 'epoch': 0.8}


 21%|██        | 240/1156 [04:03<15:18,  1.00s/it]

{'loss': 0.0012, 'learning_rate': 7.923875432525952e-06, 'epoch': 0.83}


 22%|██▏       | 250/1156 [04:13<15:11,  1.01s/it]

{'loss': 0.1443, 'learning_rate': 7.837370242214533e-06, 'epoch': 0.87}


 22%|██▏       | 260/1156 [04:23<15:47,  1.06s/it]

{'loss': 0.0503, 'learning_rate': 7.750865051903114e-06, 'epoch': 0.9}


 23%|██▎       | 270/1156 [04:34<15:03,  1.02s/it]

{'loss': 0.0061, 'learning_rate': 7.664359861591696e-06, 'epoch': 0.93}


 24%|██▍       | 280/1156 [04:44<14:41,  1.01s/it]

{'loss': 0.1759, 'learning_rate': 7.577854671280277e-06, 'epoch': 0.97}


                                                  
 25%|██▌       | 289/1156 [05:21<13:20,  1.08it/s]

{'eval_loss': 2.209754705429077, 'eval_accuracy': 0.6979020979020979, 'eval_precision': 0.7100900077796256, 'eval_recall': 0.6979020979020979, 'eval_f1': 0.6976076237404896, 'eval_runtime': 28.848, 'eval_samples_per_second': 24.785, 'eval_steps_per_second': 3.12, 'epoch': 1.0}


 25%|██▌       | 290/1156 [05:29<2:48:53, 11.70s/it]

{'loss': 0.1812, 'learning_rate': 7.491349480968859e-06, 'epoch': 1.0}


 26%|██▌       | 300/1156 [05:39<18:38,  1.31s/it]  

{'loss': 0.0423, 'learning_rate': 7.40484429065744e-06, 'epoch': 1.04}


 27%|██▋       | 310/1156 [05:49<14:15,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 7.318339100346021e-06, 'epoch': 1.07}


 28%|██▊       | 320/1156 [05:59<13:56,  1.00s/it]

{'loss': 0.0033, 'learning_rate': 7.2318339100346025e-06, 'epoch': 1.11}


 29%|██▊       | 330/1156 [06:09<13:45,  1.00it/s]

{'loss': 0.1896, 'learning_rate': 7.145328719723184e-06, 'epoch': 1.14}


 29%|██▉       | 340/1156 [06:19<13:37,  1.00s/it]

{'loss': 0.1594, 'learning_rate': 7.058823529411766e-06, 'epoch': 1.18}


 30%|███       | 350/1156 [06:29<13:29,  1.00s/it]

{'loss': 0.0006, 'learning_rate': 6.972318339100347e-06, 'epoch': 1.21}


 31%|███       | 360/1156 [06:40<13:19,  1.00s/it]

{'loss': 0.02, 'learning_rate': 6.885813148788928e-06, 'epoch': 1.25}


 32%|███▏      | 370/1156 [06:50<13:12,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 6.799307958477509e-06, 'epoch': 1.28}


 33%|███▎      | 380/1156 [07:00<13:02,  1.01s/it]

{'loss': 0.0404, 'learning_rate': 6.71280276816609e-06, 'epoch': 1.31}


 34%|███▎      | 390/1156 [07:10<12:49,  1.00s/it]

{'loss': 0.0158, 'learning_rate': 6.626297577854672e-06, 'epoch': 1.35}


 35%|███▍      | 400/1156 [07:20<12:41,  1.01s/it]

{'loss': 0.001, 'learning_rate': 6.539792387543253e-06, 'epoch': 1.38}


 35%|███▌      | 410/1156 [07:30<12:36,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 6.453287197231834e-06, 'epoch': 1.42}


 36%|███▋      | 420/1156 [07:40<12:17,  1.00s/it]

{'loss': 0.0193, 'learning_rate': 6.3667820069204156e-06, 'epoch': 1.45}


 37%|███▋      | 430/1156 [07:50<12:22,  1.02s/it]

{'loss': 0.0006, 'learning_rate': 6.280276816608997e-06, 'epoch': 1.49}


 38%|███▊      | 440/1156 [08:00<12:01,  1.01s/it]

{'loss': 0.1245, 'learning_rate': 6.193771626297579e-06, 'epoch': 1.52}


 39%|███▉      | 450/1156 [08:10<11:49,  1.01s/it]

{'loss': 0.0995, 'learning_rate': 6.10726643598616e-06, 'epoch': 1.56}


 40%|███▉      | 460/1156 [08:20<11:42,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 6.020761245674741e-06, 'epoch': 1.59}


 41%|████      | 470/1156 [08:30<11:33,  1.01s/it]

{'loss': 0.0054, 'learning_rate': 5.9342560553633225e-06, 'epoch': 1.63}


 42%|████▏     | 480/1156 [08:40<11:16,  1.00s/it]

{'loss': 0.0058, 'learning_rate': 5.847750865051903e-06, 'epoch': 1.66}


 42%|████▏     | 490/1156 [08:50<11:08,  1.00s/it]

{'loss': 0.0642, 'learning_rate': 5.761245674740484e-06, 'epoch': 1.7}


 43%|████▎     | 500/1156 [09:00<10:55,  1.00it/s]

{'loss': 0.0709, 'learning_rate': 5.674740484429066e-06, 'epoch': 1.73}


 44%|████▍     | 510/1156 [09:11<10:49,  1.01s/it]

{'loss': 0.006, 'learning_rate': 5.588235294117647e-06, 'epoch': 1.76}


 45%|████▍     | 520/1156 [09:21<10:41,  1.01s/it]

{'loss': 0.0945, 'learning_rate': 5.501730103806229e-06, 'epoch': 1.8}


 46%|████▌     | 530/1156 [09:31<10:29,  1.01s/it]

{'loss': 0.0018, 'learning_rate': 5.41522491349481e-06, 'epoch': 1.83}


 47%|████▋     | 540/1156 [09:41<10:25,  1.01s/it]

{'loss': 0.064, 'learning_rate': 5.328719723183391e-06, 'epoch': 1.87}


 48%|████▊     | 550/1156 [09:51<10:08,  1.00s/it]

{'loss': 0.079, 'learning_rate': 5.242214532871973e-06, 'epoch': 1.9}


 48%|████▊     | 560/1156 [10:01<10:00,  1.01s/it]

{'loss': 0.0298, 'learning_rate': 5.155709342560554e-06, 'epoch': 1.94}


 49%|████▉     | 570/1156 [10:11<09:47,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 5.069204152249136e-06, 'epoch': 1.97}


                                                  
 50%|█████     | 578/1156 [10:47<09:45,  1.01s/it]

{'eval_loss': 2.326932668685913, 'eval_accuracy': 0.7034965034965035, 'eval_precision': 0.7011079302381438, 'eval_recall': 0.7034965034965035, 'eval_f1': 0.701388372998209, 'eval_runtime': 27.9709, 'eval_samples_per_second': 25.562, 'eval_steps_per_second': 3.218, 'epoch': 2.0}


 50%|█████     | 580/1156 [10:56<1:20:04,  8.34s/it]

{'loss': 0.0599, 'learning_rate': 4.982698961937717e-06, 'epoch': 2.01}


 51%|█████     | 590/1156 [11:06<11:26,  1.21s/it]  

{'loss': 0.0001, 'learning_rate': 4.896193771626298e-06, 'epoch': 2.04}


 52%|█████▏    | 600/1156 [11:16<09:19,  1.01s/it]

{'loss': 0.1068, 'learning_rate': 4.809688581314879e-06, 'epoch': 2.08}


 53%|█████▎    | 610/1156 [11:26<09:08,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.7231833910034605e-06, 'epoch': 2.11}


 54%|█████▎    | 620/1156 [11:36<09:01,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 4.636678200692042e-06, 'epoch': 2.15}


 54%|█████▍    | 630/1156 [11:46<08:47,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 4.550173010380623e-06, 'epoch': 2.18}


 55%|█████▌    | 640/1156 [11:56<08:37,  1.00s/it]

{'loss': 0.001, 'learning_rate': 4.463667820069205e-06, 'epoch': 2.21}


 56%|█████▌    | 650/1156 [12:06<08:28,  1.01s/it]

{'loss': 0.0097, 'learning_rate': 4.377162629757785e-06, 'epoch': 2.25}


 57%|█████▋    | 660/1156 [12:16<08:16,  1.00s/it]

{'loss': 0.1087, 'learning_rate': 4.2906574394463675e-06, 'epoch': 2.28}


 58%|█████▊    | 670/1156 [12:26<08:07,  1.00s/it]

{'loss': 0.0776, 'learning_rate': 4.204152249134949e-06, 'epoch': 2.32}


 59%|█████▉    | 680/1156 [12:37<07:57,  1.00s/it]

{'loss': 0.003, 'learning_rate': 4.11764705882353e-06, 'epoch': 2.35}


 60%|█████▉    | 690/1156 [12:47<07:48,  1.00s/it]

{'loss': 0.0013, 'learning_rate': 4.031141868512111e-06, 'epoch': 2.39}


 61%|██████    | 700/1156 [12:57<07:38,  1.01s/it]

{'loss': 0.0395, 'learning_rate': 3.944636678200692e-06, 'epoch': 2.42}


 61%|██████▏   | 710/1156 [13:07<07:28,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 3.8581314878892736e-06, 'epoch': 2.46}


 62%|██████▏   | 720/1156 [13:17<07:19,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 3.7716262975778552e-06, 'epoch': 2.49}


 63%|██████▎   | 730/1156 [13:27<07:08,  1.00s/it]

{'loss': 0.0107, 'learning_rate': 3.685121107266436e-06, 'epoch': 2.53}


 64%|██████▍   | 740/1156 [13:37<06:57,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 3.5986159169550177e-06, 'epoch': 2.56}


 65%|██████▍   | 750/1156 [13:47<06:46,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 3.512110726643599e-06, 'epoch': 2.6}


 66%|██████▌   | 760/1156 [13:57<06:55,  1.05s/it]

{'loss': 0.0026, 'learning_rate': 3.42560553633218e-06, 'epoch': 2.63}


 67%|██████▋   | 770/1156 [14:07<06:28,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 3.3391003460207618e-06, 'epoch': 2.66}


 67%|██████▋   | 780/1156 [14:17<06:19,  1.01s/it]

{'loss': 0.001, 'learning_rate': 3.2525951557093425e-06, 'epoch': 2.7}


 68%|██████▊   | 790/1156 [14:27<06:06,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.166089965397924e-06, 'epoch': 2.73}


 69%|██████▉   | 800/1156 [14:38<05:58,  1.01s/it]

{'loss': 0.0058, 'learning_rate': 3.0795847750865054e-06, 'epoch': 2.77}


 70%|███████   | 810/1156 [14:48<05:48,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 2.9930795847750866e-06, 'epoch': 2.8}


 71%|███████   | 820/1156 [14:58<05:37,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.9065743944636683e-06, 'epoch': 2.84}


 72%|███████▏  | 830/1156 [15:08<05:27,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.820069204152249e-06, 'epoch': 2.87}


 73%|███████▎  | 840/1156 [15:18<05:17,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.7335640138408307e-06, 'epoch': 2.91}


 74%|███████▎  | 850/1156 [15:28<05:07,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 2.647058823529412e-06, 'epoch': 2.94}


 74%|███████▍  | 860/1156 [15:38<04:57,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.560553633217993e-06, 'epoch': 2.98}


                                                  
 75%|███████▌  | 867/1156 [16:13<04:23,  1.10it/s]

{'eval_loss': 2.5237162113189697, 'eval_accuracy': 0.7132867132867133, 'eval_precision': 0.7128779075652023, 'eval_recall': 0.7132867132867133, 'eval_f1': 0.712933422610842, 'eval_runtime': 28.3324, 'eval_samples_per_second': 25.236, 'eval_steps_per_second': 3.177, 'epoch': 3.0}


 75%|███████▌  | 870/1156 [16:25<30:37,  6.42s/it]

{'loss': 0.0001, 'learning_rate': 2.4740484429065744e-06, 'epoch': 3.01}


 76%|███████▌  | 880/1156 [16:35<05:24,  1.17s/it]

{'loss': 0.0001, 'learning_rate': 2.387543252595156e-06, 'epoch': 3.04}


 77%|███████▋  | 890/1156 [16:45<04:31,  1.02s/it]

{'loss': 0.0646, 'learning_rate': 2.3010380622837373e-06, 'epoch': 3.08}


 78%|███████▊  | 900/1156 [16:55<04:20,  1.02s/it]

{'loss': 0.0001, 'learning_rate': 2.2145328719723185e-06, 'epoch': 3.11}


 79%|███████▊  | 910/1156 [17:05<04:07,  1.01s/it]

{'loss': 0.0014, 'learning_rate': 2.1280276816609e-06, 'epoch': 3.15}


 80%|███████▉  | 920/1156 [17:15<03:57,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.041522491349481e-06, 'epoch': 3.18}


 80%|████████  | 930/1156 [17:26<03:47,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.9550173010380626e-06, 'epoch': 3.22}


 81%|████████▏ | 940/1156 [17:36<03:39,  1.02s/it]

{'loss': 0.0001, 'learning_rate': 1.8685121107266438e-06, 'epoch': 3.25}


 82%|████████▏ | 950/1156 [17:46<03:27,  1.01s/it]

{'loss': 0.0624, 'learning_rate': 1.7820069204152252e-06, 'epoch': 3.29}


 83%|████████▎ | 960/1156 [17:56<03:17,  1.01s/it]

{'loss': 0.1231, 'learning_rate': 1.6955017301038063e-06, 'epoch': 3.32}


 84%|████████▍ | 970/1156 [18:06<03:07,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.6089965397923877e-06, 'epoch': 3.36}


 85%|████████▍ | 980/1156 [18:16<02:57,  1.01s/it]

{'loss': 0.0781, 'learning_rate': 1.522491349480969e-06, 'epoch': 3.39}


 86%|████████▌ | 990/1156 [18:26<02:47,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 1.4359861591695503e-06, 'epoch': 3.43}


 87%|████████▋ | 1000/1156 [18:36<02:37,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.3494809688581318e-06, 'epoch': 3.46}


 87%|████████▋ | 1010/1156 [18:46<02:27,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.2629757785467128e-06, 'epoch': 3.49}


 88%|████████▊ | 1020/1156 [18:56<02:17,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 1.1764705882352942e-06, 'epoch': 3.53}


 89%|████████▉ | 1030/1156 [19:06<02:06,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.0899653979238757e-06, 'epoch': 3.56}


 90%|████████▉ | 1040/1156 [19:17<01:56,  1.01s/it]

{'loss': 0.0062, 'learning_rate': 1.0034602076124569e-06, 'epoch': 3.6}


 91%|█████████ | 1050/1156 [19:27<01:48,  1.02s/it]

{'loss': 0.0001, 'learning_rate': 9.169550173010382e-07, 'epoch': 3.63}


 92%|█████████▏| 1060/1156 [19:37<01:36,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 8.304498269896194e-07, 'epoch': 3.67}


 93%|█████████▎| 1070/1156 [19:47<01:26,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 7.439446366782008e-07, 'epoch': 3.7}


 93%|█████████▎| 1080/1156 [19:57<01:16,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 6.57439446366782e-07, 'epoch': 3.74}


 94%|█████████▍| 1090/1156 [20:07<01:06,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 5.709342560553634e-07, 'epoch': 3.77}


 95%|█████████▌| 1100/1156 [20:17<00:56,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 4.844290657439446e-07, 'epoch': 3.81}


 96%|█████████▌| 1110/1156 [20:28<00:46,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.9792387543252597e-07, 'epoch': 3.84}


 97%|█████████▋| 1120/1156 [20:38<00:36,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.114186851211073e-07, 'epoch': 3.88}


 98%|█████████▊| 1130/1156 [20:48<00:26,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.249134948096886e-07, 'epoch': 3.91}


 99%|█████████▊| 1140/1156 [20:58<00:16,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 1.384083044982699e-07, 'epoch': 3.94}


 99%|█████████▉| 1150/1156 [21:08<00:06,  1.01s/it]

{'loss': 0.0008, 'learning_rate': 5.1903114186851215e-08, 'epoch': 3.98}


                                                   
100%|██████████| 1156/1156 [21:43<00:00,  1.08it/s]

{'eval_loss': 2.5502054691314697, 'eval_accuracy': 0.7104895104895105, 'eval_precision': 0.7175039446926421, 'eval_recall': 0.7104895104895105, 'eval_f1': 0.7100093058396281, 'eval_runtime': 28.873, 'eval_samples_per_second': 24.764, 'eval_steps_per_second': 3.117, 'epoch': 4.0}


100%|██████████| 1156/1156 [21:51<00:00,  1.13s/it]


{'train_runtime': 1311.8761, 'train_samples_per_second': 7.037, 'train_steps_per_second': 0.881, 'train_loss': 0.029924218900881185, 'epoch': 4.0}


100%|██████████| 90/90 [00:28<00:00,  3.11it/s]
100%|██████████| 90/90 [00:27<00:00,  3.23it/s]
100%|██████████| 90/90 [00:28<00:00,  3.21it/s]


{'accuracy': 0.6979020979020979, 'precision': 0.7100900077796256, 'recall': 0.6979020979020979, 'f1': 0.6976076237404896}
{'accuracy': 0.6876750700280112, 'precision': 0.6932600406580898, 'recall': 0.6876750700280112, 'f1': 0.6855243320405557}


  1%|          | 10/1156 [00:11<19:40,  1.03s/it]

{'loss': 0.0291, 'learning_rate': 9.913494809688582e-06, 'epoch': 0.03}


  2%|▏         | 20/1156 [00:21<19:03,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 9.826989619377163e-06, 'epoch': 0.07}


  3%|▎         | 30/1156 [00:31<18:58,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 9.740484429065744e-06, 'epoch': 0.1}


  3%|▎         | 40/1156 [00:41<18:41,  1.00s/it]

{'loss': 0.0521, 'learning_rate': 9.653979238754326e-06, 'epoch': 0.14}


  4%|▍         | 50/1156 [00:51<18:42,  1.01s/it]

{'loss': 0.1498, 'learning_rate': 9.567474048442907e-06, 'epoch': 0.17}


  5%|▌         | 60/1156 [01:02<18:27,  1.01s/it]

{'loss': 0.0147, 'learning_rate': 9.480968858131488e-06, 'epoch': 0.21}


  6%|▌         | 70/1156 [01:12<18:47,  1.04s/it]

{'loss': 0.0709, 'learning_rate': 9.39446366782007e-06, 'epoch': 0.24}


  7%|▋         | 80/1156 [01:22<18:15,  1.02s/it]

{'loss': 0.1507, 'learning_rate': 9.307958477508652e-06, 'epoch': 0.28}


  8%|▊         | 90/1156 [01:32<17:54,  1.01s/it]

{'loss': 0.0208, 'learning_rate': 9.221453287197234e-06, 'epoch': 0.31}


  9%|▊         | 100/1156 [01:42<17:46,  1.01s/it]

{'loss': 0.0257, 'learning_rate': 9.134948096885815e-06, 'epoch': 0.35}


 10%|▉         | 110/1156 [01:53<17:49,  1.02s/it]

{'loss': 0.3336, 'learning_rate': 9.048442906574394e-06, 'epoch': 0.38}


 10%|█         | 120/1156 [02:03<17:30,  1.01s/it]

{'loss': 0.0468, 'learning_rate': 8.961937716262975e-06, 'epoch': 0.42}


 11%|█         | 130/1156 [02:13<17:12,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 8.875432525951558e-06, 'epoch': 0.45}


 12%|█▏        | 140/1156 [02:23<17:02,  1.01s/it]

{'loss': 0.1014, 'learning_rate': 8.78892733564014e-06, 'epoch': 0.48}


 13%|█▎        | 150/1156 [02:33<16:52,  1.01s/it]

{'loss': 0.0011, 'learning_rate': 8.702422145328721e-06, 'epoch': 0.52}


 14%|█▍        | 160/1156 [02:43<16:40,  1.00s/it]

{'loss': 0.1331, 'learning_rate': 8.615916955017302e-06, 'epoch': 0.55}


 15%|█▍        | 170/1156 [02:53<16:29,  1.00s/it]

{'loss': 0.0007, 'learning_rate': 8.529411764705883e-06, 'epoch': 0.59}


 16%|█▌        | 180/1156 [03:03<16:26,  1.01s/it]

{'loss': 0.0011, 'learning_rate': 8.442906574394465e-06, 'epoch': 0.62}


 16%|█▋        | 190/1156 [03:13<16:10,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 8.356401384083046e-06, 'epoch': 0.66}


 17%|█▋        | 200/1156 [03:23<16:04,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 8.269896193771627e-06, 'epoch': 0.69}


 18%|█▊        | 210/1156 [03:34<15:51,  1.01s/it]

{'loss': 0.0717, 'learning_rate': 8.183391003460208e-06, 'epoch': 0.73}


 19%|█▉        | 220/1156 [03:44<15:46,  1.01s/it]

{'loss': 0.0508, 'learning_rate': 8.09688581314879e-06, 'epoch': 0.76}


 20%|█▉        | 230/1156 [03:54<15:34,  1.01s/it]

{'loss': 0.0164, 'learning_rate': 8.01038062283737e-06, 'epoch': 0.8}


 21%|██        | 240/1156 [04:04<15:20,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 7.923875432525952e-06, 'epoch': 0.83}


 22%|██▏       | 250/1156 [04:14<15:13,  1.01s/it]

{'loss': 0.0527, 'learning_rate': 7.837370242214533e-06, 'epoch': 0.87}


 22%|██▏       | 260/1156 [04:24<15:00,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 7.750865051903114e-06, 'epoch': 0.9}


 23%|██▎       | 270/1156 [04:34<14:53,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 7.664359861591696e-06, 'epoch': 0.93}


 24%|██▍       | 280/1156 [04:44<14:44,  1.01s/it]

{'loss': 0.1352, 'learning_rate': 7.577854671280277e-06, 'epoch': 0.97}


                                                  
 25%|██▌       | 289/1156 [05:21<13:13,  1.09it/s]

{'eval_loss': 2.3541903495788574, 'eval_accuracy': 0.7090909090909091, 'eval_precision': 0.7139717250480606, 'eval_recall': 0.7090909090909091, 'eval_f1': 0.7095146809351461, 'eval_runtime': 28.4403, 'eval_samples_per_second': 25.14, 'eval_steps_per_second': 3.165, 'epoch': 1.0}


 25%|██▌       | 290/1156 [05:29<2:46:59, 11.57s/it]

{'loss': 0.472, 'learning_rate': 7.491349480968859e-06, 'epoch': 1.0}


 26%|██▌       | 300/1156 [05:39<18:32,  1.30s/it]  

{'loss': 0.0005, 'learning_rate': 7.40484429065744e-06, 'epoch': 1.04}


 27%|██▋       | 310/1156 [05:50<14:18,  1.01s/it]

{'loss': 0.104, 'learning_rate': 7.318339100346021e-06, 'epoch': 1.07}


 28%|██▊       | 320/1156 [06:00<13:58,  1.00s/it]

{'loss': 0.0069, 'learning_rate': 7.2318339100346025e-06, 'epoch': 1.11}


 29%|██▊       | 330/1156 [06:10<13:58,  1.02s/it]

{'loss': 0.0625, 'learning_rate': 7.145328719723184e-06, 'epoch': 1.14}


 29%|██▉       | 340/1156 [06:20<13:40,  1.01s/it]

{'loss': 0.0766, 'learning_rate': 7.058823529411766e-06, 'epoch': 1.18}


 30%|███       | 350/1156 [06:30<13:27,  1.00s/it]

{'loss': 0.0379, 'learning_rate': 6.972318339100347e-06, 'epoch': 1.21}


 31%|███       | 360/1156 [06:40<13:19,  1.00s/it]

{'loss': 0.1034, 'learning_rate': 6.885813148788928e-06, 'epoch': 1.25}


 32%|███▏      | 370/1156 [06:50<13:06,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 6.799307958477509e-06, 'epoch': 1.28}


 33%|███▎      | 380/1156 [07:00<13:03,  1.01s/it]

{'loss': 0.0071, 'learning_rate': 6.71280276816609e-06, 'epoch': 1.31}


 34%|███▎      | 390/1156 [07:10<12:47,  1.00s/it]

{'loss': 0.0064, 'learning_rate': 6.626297577854672e-06, 'epoch': 1.35}


 35%|███▍      | 400/1156 [07:20<12:42,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 6.539792387543253e-06, 'epoch': 1.38}


 35%|███▌      | 410/1156 [07:30<12:28,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 6.453287197231834e-06, 'epoch': 1.42}


 36%|███▋      | 420/1156 [07:40<12:19,  1.01s/it]

{'loss': 0.021, 'learning_rate': 6.3667820069204156e-06, 'epoch': 1.45}


 37%|███▋      | 430/1156 [07:50<12:26,  1.03s/it]

{'loss': 0.0351, 'learning_rate': 6.280276816608997e-06, 'epoch': 1.49}


 38%|███▊      | 440/1156 [08:01<12:08,  1.02s/it]

{'loss': 0.0002, 'learning_rate': 6.193771626297579e-06, 'epoch': 1.52}


 39%|███▉      | 450/1156 [08:11<11:50,  1.01s/it]

{'loss': 0.162, 'learning_rate': 6.10726643598616e-06, 'epoch': 1.56}


 40%|███▉      | 460/1156 [08:21<11:38,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 6.020761245674741e-06, 'epoch': 1.59}


 41%|████      | 470/1156 [08:31<11:32,  1.01s/it]

{'loss': 0.0945, 'learning_rate': 5.9342560553633225e-06, 'epoch': 1.63}


 42%|████▏     | 480/1156 [08:41<11:20,  1.01s/it]

{'loss': 0.001, 'learning_rate': 5.847750865051903e-06, 'epoch': 1.66}


 42%|████▏     | 490/1156 [08:51<11:11,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 5.761245674740484e-06, 'epoch': 1.7}


 43%|████▎     | 500/1156 [09:01<11:00,  1.01s/it]

{'loss': 0.0253, 'learning_rate': 5.674740484429066e-06, 'epoch': 1.73}


 44%|████▍     | 510/1156 [09:11<10:49,  1.01s/it]

{'loss': 0.0012, 'learning_rate': 5.588235294117647e-06, 'epoch': 1.76}


 45%|████▍     | 520/1156 [09:21<10:40,  1.01s/it]

{'loss': 0.0084, 'learning_rate': 5.501730103806229e-06, 'epoch': 1.8}


 46%|████▌     | 530/1156 [09:32<10:30,  1.01s/it]

{'loss': 0.0179, 'learning_rate': 5.41522491349481e-06, 'epoch': 1.83}


 47%|████▋     | 540/1156 [09:42<10:19,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 5.328719723183391e-06, 'epoch': 1.87}


 48%|████▊     | 550/1156 [09:52<10:10,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 5.242214532871973e-06, 'epoch': 1.9}


 48%|████▊     | 560/1156 [10:02<10:02,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 5.155709342560554e-06, 'epoch': 1.94}


 49%|████▉     | 570/1156 [10:12<09:48,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 5.069204152249136e-06, 'epoch': 1.97}


                                                  
 50%|█████     | 578/1156 [10:48<08:48,  1.09it/s]

{'eval_loss': 2.5606448650360107, 'eval_accuracy': 0.6951048951048951, 'eval_precision': 0.6969064711805567, 'eval_recall': 0.6951048951048951, 'eval_f1': 0.6915857364702185, 'eval_runtime': 28.5961, 'eval_samples_per_second': 25.003, 'eval_steps_per_second': 3.147, 'epoch': 2.0}


 50%|█████     | 580/1156 [10:58<1:21:53,  8.53s/it]

{'loss': 0.0193, 'learning_rate': 4.982698961937717e-06, 'epoch': 2.01}


 51%|█████     | 590/1156 [11:08<11:34,  1.23s/it]  

{'loss': 0.0001, 'learning_rate': 4.896193771626298e-06, 'epoch': 2.04}


 52%|█████▏    | 600/1156 [11:18<09:19,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 4.809688581314879e-06, 'epoch': 2.08}


 53%|█████▎    | 610/1156 [11:28<09:09,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 4.7231833910034605e-06, 'epoch': 2.11}


 54%|█████▎    | 620/1156 [11:38<08:57,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.636678200692042e-06, 'epoch': 2.15}


 54%|█████▍    | 630/1156 [11:48<08:46,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.550173010380623e-06, 'epoch': 2.18}


 55%|█████▌    | 640/1156 [11:58<08:41,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 4.463667820069205e-06, 'epoch': 2.21}


 56%|█████▌    | 650/1156 [12:08<08:28,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 4.377162629757785e-06, 'epoch': 2.25}


 57%|█████▋    | 660/1156 [12:18<08:18,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.2906574394463675e-06, 'epoch': 2.28}


 58%|█████▊    | 670/1156 [12:28<08:08,  1.00s/it]

{'loss': 0.1067, 'learning_rate': 4.204152249134949e-06, 'epoch': 2.32}


 59%|█████▉    | 680/1156 [12:38<07:59,  1.01s/it]

{'loss': 0.0009, 'learning_rate': 4.11764705882353e-06, 'epoch': 2.35}


 60%|█████▉    | 690/1156 [12:48<07:50,  1.01s/it]

{'loss': 0.0031, 'learning_rate': 4.031141868512111e-06, 'epoch': 2.39}


 61%|██████    | 700/1156 [12:58<07:38,  1.00s/it]

{'loss': 0.0144, 'learning_rate': 3.944636678200692e-06, 'epoch': 2.42}


 61%|██████▏   | 710/1156 [13:08<07:27,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 3.8581314878892736e-06, 'epoch': 2.46}


 62%|██████▏   | 720/1156 [13:18<07:20,  1.01s/it]

{'loss': 0.058, 'learning_rate': 3.7716262975778552e-06, 'epoch': 2.49}


 63%|██████▎   | 730/1156 [13:28<07:09,  1.01s/it]

{'loss': 0.002, 'learning_rate': 3.685121107266436e-06, 'epoch': 2.53}


 64%|██████▍   | 740/1156 [13:39<06:58,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 3.5986159169550177e-06, 'epoch': 2.56}


 65%|██████▍   | 750/1156 [13:49<06:56,  1.03s/it]

{'loss': 0.0001, 'learning_rate': 3.512110726643599e-06, 'epoch': 2.6}


 66%|██████▌   | 760/1156 [13:59<06:42,  1.02s/it]

{'loss': 0.0002, 'learning_rate': 3.42560553633218e-06, 'epoch': 2.63}


 67%|██████▋   | 770/1156 [14:09<06:28,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.3391003460207618e-06, 'epoch': 2.66}


 67%|██████▋   | 780/1156 [14:19<06:17,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.2525951557093425e-06, 'epoch': 2.7}


 68%|██████▊   | 790/1156 [14:29<06:09,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 3.166089965397924e-06, 'epoch': 2.73}


 69%|██████▉   | 800/1156 [14:39<05:57,  1.00s/it]

{'loss': 0.0006, 'learning_rate': 3.0795847750865054e-06, 'epoch': 2.77}


 70%|███████   | 810/1156 [14:49<05:48,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.9930795847750866e-06, 'epoch': 2.8}


 71%|███████   | 820/1156 [15:00<05:40,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.9065743944636683e-06, 'epoch': 2.84}


 72%|███████▏  | 830/1156 [15:10<05:28,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.820069204152249e-06, 'epoch': 2.87}


 73%|███████▎  | 840/1156 [15:20<05:17,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.7335640138408307e-06, 'epoch': 2.91}


 74%|███████▎  | 850/1156 [15:30<05:08,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.647058823529412e-06, 'epoch': 2.94}


 74%|███████▍  | 860/1156 [15:40<04:56,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.560553633217993e-06, 'epoch': 2.98}


                                                  
 75%|███████▌  | 867/1156 [16:15<04:26,  1.08it/s]

{'eval_loss': 2.548391342163086, 'eval_accuracy': 0.7062937062937062, 'eval_precision': 0.7078089689949246, 'eval_recall': 0.7062937062937062, 'eval_f1': 0.706649286516448, 'eval_runtime': 28.8366, 'eval_samples_per_second': 24.795, 'eval_steps_per_second': 3.121, 'epoch': 3.0}


 75%|███████▌  | 870/1156 [16:26<29:56,  6.28s/it]

{'loss': 0.0001, 'learning_rate': 2.4740484429065744e-06, 'epoch': 3.01}


 76%|███████▌  | 880/1156 [16:36<05:18,  1.15s/it]

{'loss': 0.0001, 'learning_rate': 2.387543252595156e-06, 'epoch': 3.04}


 77%|███████▋  | 890/1156 [16:46<04:32,  1.03s/it]

{'loss': 0.0037, 'learning_rate': 2.3010380622837373e-06, 'epoch': 3.08}


 78%|███████▊  | 900/1156 [16:56<04:27,  1.05s/it]

{'loss': 0.0001, 'learning_rate': 2.2145328719723185e-06, 'epoch': 3.11}


 79%|███████▊  | 910/1156 [17:06<04:06,  1.00s/it]

{'loss': 0.0005, 'learning_rate': 2.1280276816609e-06, 'epoch': 3.15}


 80%|███████▉  | 920/1156 [17:16<03:57,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.041522491349481e-06, 'epoch': 3.18}


 80%|████████  | 930/1156 [17:26<03:46,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.9550173010380626e-06, 'epoch': 3.22}


 81%|████████▏ | 940/1156 [17:36<03:37,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.8685121107266438e-06, 'epoch': 3.25}


 82%|████████▏ | 950/1156 [17:46<03:27,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.7820069204152252e-06, 'epoch': 3.29}


 83%|████████▎ | 960/1156 [17:56<03:17,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.6955017301038063e-06, 'epoch': 3.32}


 84%|████████▍ | 970/1156 [18:07<03:07,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.6089965397923877e-06, 'epoch': 3.36}


 85%|████████▍ | 980/1156 [18:17<02:56,  1.01s/it]

{'loss': 0.0668, 'learning_rate': 1.522491349480969e-06, 'epoch': 3.39}


 86%|████████▌ | 990/1156 [18:27<02:46,  1.01s/it]

{'loss': 0.0045, 'learning_rate': 1.4359861591695503e-06, 'epoch': 3.43}


 87%|████████▋ | 1000/1156 [18:37<02:37,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.3494809688581318e-06, 'epoch': 3.46}


 87%|████████▋ | 1010/1156 [18:47<02:26,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.2629757785467128e-06, 'epoch': 3.49}


 88%|████████▊ | 1020/1156 [18:57<02:17,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 1.1764705882352942e-06, 'epoch': 3.53}


 89%|████████▉ | 1030/1156 [19:07<02:06,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.0899653979238757e-06, 'epoch': 3.56}


 90%|████████▉ | 1040/1156 [19:17<01:56,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 1.0034602076124569e-06, 'epoch': 3.6}


 91%|█████████ | 1050/1156 [19:27<01:46,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 9.169550173010382e-07, 'epoch': 3.63}


 92%|█████████▏| 1060/1156 [19:37<01:36,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 8.304498269896194e-07, 'epoch': 3.67}


 93%|█████████▎| 1070/1156 [19:47<01:29,  1.04s/it]

{'loss': 0.0003, 'learning_rate': 7.439446366782008e-07, 'epoch': 3.7}


 93%|█████████▎| 1080/1156 [19:57<01:16,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 6.57439446366782e-07, 'epoch': 3.74}


 94%|█████████▍| 1090/1156 [20:08<01:06,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 5.709342560553634e-07, 'epoch': 3.77}


 95%|█████████▌| 1100/1156 [20:18<00:56,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 4.844290657439446e-07, 'epoch': 3.81}


 96%|█████████▌| 1110/1156 [20:28<00:46,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.9792387543252597e-07, 'epoch': 3.84}


 97%|█████████▋| 1120/1156 [20:38<00:36,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.114186851211073e-07, 'epoch': 3.88}


 98%|█████████▊| 1130/1156 [20:48<00:26,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.249134948096886e-07, 'epoch': 3.91}


 99%|█████████▊| 1140/1156 [20:58<00:16,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 1.384083044982699e-07, 'epoch': 3.94}


 99%|█████████▉| 1150/1156 [21:08<00:06,  1.02s/it]

{'loss': 0.0001, 'learning_rate': 5.1903114186851215e-08, 'epoch': 3.98}


                                                   
100%|██████████| 1156/1156 [21:43<00:00,  1.09it/s]

{'eval_loss': 2.645047426223755, 'eval_accuracy': 0.7020979020979021, 'eval_precision': 0.7017633532637987, 'eval_recall': 0.7020979020979021, 'eval_f1': 0.6991211128343893, 'eval_runtime': 28.7851, 'eval_samples_per_second': 24.839, 'eval_steps_per_second': 3.127, 'epoch': 4.0}


100%|██████████| 1156/1156 [21:52<00:00,  1.14s/it]


{'train_runtime': 1312.4173, 'train_samples_per_second': 7.034, 'train_steps_per_second': 0.881, 'train_loss': 0.025919986201024647, 'epoch': 4.0}


100%|██████████| 90/90 [00:28<00:00,  3.11it/s]
100%|██████████| 90/90 [00:28<00:00,  3.19it/s]
100%|██████████| 90/90 [00:30<00:00,  3.00it/s]


{'accuracy': 0.7090909090909091, 'precision': 0.7139717250480606, 'recall': 0.7090909090909091, 'f1': 0.7095146809351461}
{'accuracy': 0.6848739495798319, 'precision': 0.6868246210531853, 'recall': 0.6848739495798319, 'f1': 0.6834952094932877}


  1%|          | 10/1156 [00:11<19:50,  1.04s/it]

{'loss': 0.0003, 'learning_rate': 9.913494809688582e-06, 'epoch': 0.03}


  2%|▏         | 20/1156 [00:21<19:13,  1.02s/it]

{'loss': 0.1331, 'learning_rate': 9.826989619377163e-06, 'epoch': 0.07}


  3%|▎         | 30/1156 [00:31<18:54,  1.01s/it]

{'loss': 0.0645, 'learning_rate': 9.740484429065744e-06, 'epoch': 0.1}


  3%|▎         | 40/1156 [00:41<18:45,  1.01s/it]

{'loss': 0.1569, 'learning_rate': 9.653979238754326e-06, 'epoch': 0.14}


  4%|▍         | 50/1156 [00:51<18:34,  1.01s/it]

{'loss': 0.1342, 'learning_rate': 9.567474048442907e-06, 'epoch': 0.17}


  5%|▌         | 60/1156 [01:02<18:28,  1.01s/it]

{'loss': 0.0032, 'learning_rate': 9.480968858131488e-06, 'epoch': 0.21}


  6%|▌         | 70/1156 [01:12<18:14,  1.01s/it]

{'loss': 0.1851, 'learning_rate': 9.39446366782007e-06, 'epoch': 0.24}


  7%|▋         | 80/1156 [01:22<18:15,  1.02s/it]

{'loss': 0.0057, 'learning_rate': 9.307958477508652e-06, 'epoch': 0.28}


  8%|▊         | 90/1156 [01:32<18:24,  1.04s/it]

{'loss': 0.2206, 'learning_rate': 9.221453287197234e-06, 'epoch': 0.31}


  9%|▊         | 100/1156 [01:42<17:47,  1.01s/it]

{'loss': 0.1041, 'learning_rate': 9.134948096885815e-06, 'epoch': 0.35}


 10%|▉         | 110/1156 [01:52<17:31,  1.01s/it]

{'loss': 0.2394, 'learning_rate': 9.048442906574394e-06, 'epoch': 0.38}


 10%|█         | 120/1156 [02:02<17:23,  1.01s/it]

{'loss': 0.2337, 'learning_rate': 8.961937716262975e-06, 'epoch': 0.42}


 11%|█         | 130/1156 [02:13<17:25,  1.02s/it]

{'loss': 0.089, 'learning_rate': 8.875432525951558e-06, 'epoch': 0.45}


 12%|█▏        | 140/1156 [02:23<17:13,  1.02s/it]

{'loss': 0.1505, 'learning_rate': 8.78892733564014e-06, 'epoch': 0.48}


 13%|█▎        | 150/1156 [02:33<16:57,  1.01s/it]

{'loss': 0.1337, 'learning_rate': 8.702422145328721e-06, 'epoch': 0.52}


 14%|█▍        | 160/1156 [02:43<16:40,  1.00s/it]

{'loss': 0.0951, 'learning_rate': 8.615916955017302e-06, 'epoch': 0.55}


 15%|█▍        | 170/1156 [02:53<16:31,  1.01s/it]

{'loss': 0.079, 'learning_rate': 8.529411764705883e-06, 'epoch': 0.59}


 16%|█▌        | 180/1156 [03:03<16:17,  1.00s/it]

{'loss': 0.0045, 'learning_rate': 8.442906574394465e-06, 'epoch': 0.62}


 16%|█▋        | 190/1156 [03:13<16:15,  1.01s/it]

{'loss': 0.0053, 'learning_rate': 8.356401384083046e-06, 'epoch': 0.66}


 17%|█▋        | 200/1156 [03:23<16:02,  1.01s/it]

{'loss': 0.0646, 'learning_rate': 8.269896193771627e-06, 'epoch': 0.69}


 18%|█▊        | 210/1156 [03:33<15:50,  1.00s/it]

{'loss': 0.082, 'learning_rate': 8.183391003460208e-06, 'epoch': 0.73}


 19%|█▉        | 220/1156 [03:43<15:42,  1.01s/it]

{'loss': 0.0138, 'learning_rate': 8.09688581314879e-06, 'epoch': 0.76}


 20%|█▉        | 230/1156 [03:53<15:30,  1.01s/it]

{'loss': 0.014, 'learning_rate': 8.01038062283737e-06, 'epoch': 0.8}


 21%|██        | 240/1156 [04:03<15:20,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 7.923875432525952e-06, 'epoch': 0.83}


 22%|██▏       | 250/1156 [04:14<15:13,  1.01s/it]

{'loss': 0.1245, 'learning_rate': 7.837370242214533e-06, 'epoch': 0.87}


 22%|██▏       | 260/1156 [04:24<15:04,  1.01s/it]

{'loss': 0.0028, 'learning_rate': 7.750865051903114e-06, 'epoch': 0.9}


 23%|██▎       | 270/1156 [04:34<14:49,  1.00s/it]

{'loss': 0.0314, 'learning_rate': 7.664359861591696e-06, 'epoch': 0.93}


 24%|██▍       | 280/1156 [04:44<14:42,  1.01s/it]

{'loss': 0.0726, 'learning_rate': 7.577854671280277e-06, 'epoch': 0.97}


                                                  
 25%|██▌       | 289/1156 [05:21<13:14,  1.09it/s]

{'eval_loss': 2.3725156784057617, 'eval_accuracy': 0.6811188811188811, 'eval_precision': 0.6855197674322578, 'eval_recall': 0.6811188811188811, 'eval_f1': 0.67257774778649, 'eval_runtime': 28.1506, 'eval_samples_per_second': 25.399, 'eval_steps_per_second': 3.197, 'epoch': 1.0}


 25%|██▌       | 290/1156 [05:28<2:42:38, 11.27s/it]

{'loss': 0.6164, 'learning_rate': 7.491349480968859e-06, 'epoch': 1.0}


 26%|██▌       | 300/1156 [05:38<18:28,  1.30s/it]  

{'loss': 0.1001, 'learning_rate': 7.40484429065744e-06, 'epoch': 1.04}


 27%|██▋       | 310/1156 [05:48<14:15,  1.01s/it]

{'loss': 0.0759, 'learning_rate': 7.318339100346021e-06, 'epoch': 1.07}


 28%|██▊       | 320/1156 [05:58<13:58,  1.00s/it]

{'loss': 0.0005, 'learning_rate': 7.2318339100346025e-06, 'epoch': 1.11}


 29%|██▊       | 330/1156 [06:08<13:51,  1.01s/it]

{'loss': 0.1147, 'learning_rate': 7.145328719723184e-06, 'epoch': 1.14}


 29%|██▉       | 340/1156 [06:18<13:45,  1.01s/it]

{'loss': 0.0007, 'learning_rate': 7.058823529411766e-06, 'epoch': 1.18}


 30%|███       | 350/1156 [06:28<13:30,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 6.972318339100347e-06, 'epoch': 1.21}


 31%|███       | 360/1156 [06:38<13:19,  1.00s/it]

{'loss': 0.2652, 'learning_rate': 6.885813148788928e-06, 'epoch': 1.25}


 32%|███▏      | 370/1156 [06:48<13:08,  1.00s/it]

{'loss': 0.0004, 'learning_rate': 6.799307958477509e-06, 'epoch': 1.28}


 33%|███▎      | 380/1156 [06:58<13:01,  1.01s/it]

{'loss': 0.0028, 'learning_rate': 6.71280276816609e-06, 'epoch': 1.31}


 34%|███▎      | 390/1156 [07:09<12:50,  1.01s/it]

{'loss': 0.0025, 'learning_rate': 6.626297577854672e-06, 'epoch': 1.35}


 35%|███▍      | 400/1156 [07:19<12:40,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 6.539792387543253e-06, 'epoch': 1.38}


 35%|███▌      | 410/1156 [07:29<12:29,  1.00s/it]

{'loss': 0.0003, 'learning_rate': 6.453287197231834e-06, 'epoch': 1.42}


 36%|███▋      | 420/1156 [07:39<12:22,  1.01s/it]

{'loss': 0.0006, 'learning_rate': 6.3667820069204156e-06, 'epoch': 1.45}


 37%|███▋      | 430/1156 [07:49<12:10,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 6.280276816608997e-06, 'epoch': 1.49}


 38%|███▊      | 440/1156 [07:59<12:00,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 6.193771626297579e-06, 'epoch': 1.52}


 39%|███▉      | 450/1156 [08:09<12:02,  1.02s/it]

{'loss': 0.0274, 'learning_rate': 6.10726643598616e-06, 'epoch': 1.56}


 40%|███▉      | 460/1156 [08:19<11:48,  1.02s/it]

{'loss': 0.0031, 'learning_rate': 6.020761245674741e-06, 'epoch': 1.59}


 41%|████      | 470/1156 [08:29<11:32,  1.01s/it]

{'loss': 0.0168, 'learning_rate': 5.9342560553633225e-06, 'epoch': 1.63}


 42%|████▏     | 480/1156 [08:39<11:18,  1.00s/it]

{'loss': 0.0006, 'learning_rate': 5.847750865051903e-06, 'epoch': 1.66}


 42%|████▏     | 490/1156 [08:49<11:11,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 5.761245674740484e-06, 'epoch': 1.7}


 43%|████▎     | 500/1156 [09:00<10:59,  1.01s/it]

{'loss': 0.076, 'learning_rate': 5.674740484429066e-06, 'epoch': 1.73}


 44%|████▍     | 510/1156 [09:10<10:48,  1.00s/it]

{'loss': 0.001, 'learning_rate': 5.588235294117647e-06, 'epoch': 1.76}


 45%|████▍     | 520/1156 [09:20<10:40,  1.01s/it]

{'loss': 0.0459, 'learning_rate': 5.501730103806229e-06, 'epoch': 1.8}


 46%|████▌     | 530/1156 [09:30<10:30,  1.01s/it]

{'loss': 0.1825, 'learning_rate': 5.41522491349481e-06, 'epoch': 1.83}


 47%|████▋     | 540/1156 [09:40<10:19,  1.01s/it]

{'loss': 0.0989, 'learning_rate': 5.328719723183391e-06, 'epoch': 1.87}


 48%|████▊     | 550/1156 [09:50<10:10,  1.01s/it]

{'loss': 0.0105, 'learning_rate': 5.242214532871973e-06, 'epoch': 1.9}


 48%|████▊     | 560/1156 [10:00<10:01,  1.01s/it]

{'loss': 0.0046, 'learning_rate': 5.155709342560554e-06, 'epoch': 1.94}


 49%|████▉     | 570/1156 [10:10<09:51,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 5.069204152249136e-06, 'epoch': 1.97}


                                                  
 50%|█████     | 578/1156 [10:46<08:47,  1.10it/s]

{'eval_loss': 2.8180506229400635, 'eval_accuracy': 0.6881118881118881, 'eval_precision': 0.7103887491388149, 'eval_recall': 0.6881118881118881, 'eval_f1': 0.678709661466995, 'eval_runtime': 28.6093, 'eval_samples_per_second': 24.992, 'eval_steps_per_second': 3.146, 'epoch': 2.0}


 50%|█████     | 580/1156 [10:56<1:21:46,  8.52s/it]

{'loss': 0.0972, 'learning_rate': 4.982698961937717e-06, 'epoch': 2.01}


 51%|█████     | 590/1156 [11:06<11:29,  1.22s/it]  

{'loss': 0.0845, 'learning_rate': 4.896193771626298e-06, 'epoch': 2.04}


 52%|█████▏    | 600/1156 [11:16<09:25,  1.02s/it]

{'loss': 0.0942, 'learning_rate': 4.809688581314879e-06, 'epoch': 2.08}


 53%|█████▎    | 610/1156 [11:26<09:08,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.7231833910034605e-06, 'epoch': 2.11}


 54%|█████▎    | 620/1156 [11:36<08:56,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.636678200692042e-06, 'epoch': 2.15}


 54%|█████▍    | 630/1156 [11:46<08:49,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 4.550173010380623e-06, 'epoch': 2.18}


 55%|█████▌    | 640/1156 [11:56<08:38,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 4.463667820069205e-06, 'epoch': 2.21}


 56%|█████▌    | 650/1156 [12:06<08:27,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 4.377162629757785e-06, 'epoch': 2.25}


 57%|█████▋    | 660/1156 [12:16<08:16,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 4.2906574394463675e-06, 'epoch': 2.28}


 58%|█████▊    | 670/1156 [12:26<08:09,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 4.204152249134949e-06, 'epoch': 2.32}


 59%|█████▉    | 680/1156 [12:37<07:59,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 4.11764705882353e-06, 'epoch': 2.35}


 60%|█████▉    | 690/1156 [12:47<07:46,  1.00s/it]

{'loss': 0.001, 'learning_rate': 4.031141868512111e-06, 'epoch': 2.39}


 61%|██████    | 700/1156 [12:57<07:38,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 3.944636678200692e-06, 'epoch': 2.42}


 61%|██████▏   | 710/1156 [13:07<07:28,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.8581314878892736e-06, 'epoch': 2.46}


 62%|██████▏   | 720/1156 [13:17<07:19,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.7716262975778552e-06, 'epoch': 2.49}


 63%|██████▎   | 730/1156 [13:27<07:09,  1.01s/it]

{'loss': 0.0004, 'learning_rate': 3.685121107266436e-06, 'epoch': 2.53}


 64%|██████▍   | 740/1156 [13:37<06:56,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 3.5986159169550177e-06, 'epoch': 2.56}


 65%|██████▍   | 750/1156 [13:47<06:48,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 3.512110726643599e-06, 'epoch': 2.6}


 66%|██████▌   | 760/1156 [13:57<06:37,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 3.42560553633218e-06, 'epoch': 2.63}


 67%|██████▋   | 770/1156 [14:07<06:30,  1.01s/it]

{'loss': 0.0005, 'learning_rate': 3.3391003460207618e-06, 'epoch': 2.66}


 67%|██████▋   | 780/1156 [14:17<06:17,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 3.2525951557093425e-06, 'epoch': 2.7}


 68%|██████▊   | 790/1156 [14:27<06:09,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 3.166089965397924e-06, 'epoch': 2.73}


 69%|██████▉   | 800/1156 [14:37<05:58,  1.01s/it]

{'loss': 0.0342, 'learning_rate': 3.0795847750865054e-06, 'epoch': 2.77}


 70%|███████   | 810/1156 [14:47<05:46,  1.00s/it]

{'loss': 0.0806, 'learning_rate': 2.9930795847750866e-06, 'epoch': 2.8}


 71%|███████   | 820/1156 [14:57<05:37,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.9065743944636683e-06, 'epoch': 2.84}


 72%|███████▏  | 830/1156 [15:08<05:28,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.820069204152249e-06, 'epoch': 2.87}


 73%|███████▎  | 840/1156 [15:18<05:18,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.7335640138408307e-06, 'epoch': 2.91}


 74%|███████▎  | 850/1156 [15:28<05:07,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.647058823529412e-06, 'epoch': 2.94}


 74%|███████▍  | 860/1156 [15:38<04:57,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.560553633217993e-06, 'epoch': 2.98}


                                                  
 75%|███████▌  | 867/1156 [16:14<04:26,  1.09it/s]

{'eval_loss': 2.662191390991211, 'eval_accuracy': 0.6951048951048951, 'eval_precision': 0.7021123528419345, 'eval_recall': 0.6951048951048951, 'eval_f1': 0.6950120243421192, 'eval_runtime': 29.3603, 'eval_samples_per_second': 24.353, 'eval_steps_per_second': 3.065, 'epoch': 3.0}


 75%|███████▌  | 870/1156 [16:24<30:25,  6.38s/it]

{'loss': 0.0002, 'learning_rate': 2.4740484429065744e-06, 'epoch': 3.01}


 76%|███████▌  | 880/1156 [16:34<05:19,  1.16s/it]

{'loss': 0.0001, 'learning_rate': 2.387543252595156e-06, 'epoch': 3.04}


 77%|███████▋  | 890/1156 [16:44<04:26,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.3010380622837373e-06, 'epoch': 3.08}


 78%|███████▊  | 900/1156 [16:54<04:15,  1.00it/s]

{'loss': 0.0001, 'learning_rate': 2.2145328719723185e-06, 'epoch': 3.11}


 79%|███████▊  | 910/1156 [17:04<04:06,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 2.1280276816609e-06, 'epoch': 3.15}


 80%|███████▉  | 920/1156 [17:15<03:58,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 2.041522491349481e-06, 'epoch': 3.18}


 80%|████████  | 930/1156 [17:25<03:47,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.9550173010380626e-06, 'epoch': 3.22}


 81%|████████▏ | 940/1156 [17:35<03:37,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.8685121107266438e-06, 'epoch': 3.25}


 82%|████████▏ | 950/1156 [17:45<03:27,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.7820069204152252e-06, 'epoch': 3.29}


 83%|████████▎ | 960/1156 [17:55<03:16,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 1.6955017301038063e-06, 'epoch': 3.32}


 84%|████████▍ | 970/1156 [18:05<03:06,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.6089965397923877e-06, 'epoch': 3.36}


 85%|████████▍ | 980/1156 [18:15<02:56,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.522491349480969e-06, 'epoch': 3.39}


 86%|████████▌ | 990/1156 [18:25<02:48,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.4359861591695503e-06, 'epoch': 3.43}


 87%|████████▋ | 1000/1156 [18:35<02:37,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.3494809688581318e-06, 'epoch': 3.46}


 87%|████████▋ | 1010/1156 [18:45<02:26,  1.00s/it]

{'loss': 0.001, 'learning_rate': 1.2629757785467128e-06, 'epoch': 3.49}


 88%|████████▊ | 1020/1156 [18:55<02:17,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 1.1764705882352942e-06, 'epoch': 3.53}


 89%|████████▉ | 1030/1156 [19:05<02:06,  1.00s/it]

{'loss': 0.0002, 'learning_rate': 1.0899653979238757e-06, 'epoch': 3.56}


 90%|████████▉ | 1040/1156 [19:15<01:56,  1.01s/it]

{'loss': 0.0982, 'learning_rate': 1.0034602076124569e-06, 'epoch': 3.6}


 91%|█████████ | 1050/1156 [19:25<01:46,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 9.169550173010382e-07, 'epoch': 3.63}


 92%|█████████▏| 1060/1156 [19:35<01:36,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 8.304498269896194e-07, 'epoch': 3.67}


 93%|█████████▎| 1070/1156 [19:45<01:26,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 7.439446366782008e-07, 'epoch': 3.7}


 93%|█████████▎| 1080/1156 [19:56<01:16,  1.01s/it]

{'loss': 0.0003, 'learning_rate': 6.57439446366782e-07, 'epoch': 3.74}


 94%|█████████▍| 1090/1156 [20:06<01:06,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 5.709342560553634e-07, 'epoch': 3.77}


 95%|█████████▌| 1100/1156 [20:16<00:57,  1.03s/it]

{'loss': 0.0001, 'learning_rate': 4.844290657439446e-07, 'epoch': 3.81}


 96%|█████████▌| 1110/1156 [20:26<00:46,  1.02s/it]

{'loss': 0.0001, 'learning_rate': 3.9792387543252597e-07, 'epoch': 3.84}


 97%|█████████▋| 1120/1156 [20:36<00:36,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 3.114186851211073e-07, 'epoch': 3.88}


 98%|█████████▊| 1130/1156 [20:46<00:26,  1.00s/it]

{'loss': 0.0001, 'learning_rate': 2.249134948096886e-07, 'epoch': 3.91}


 99%|█████████▊| 1140/1156 [20:56<00:16,  1.01s/it]

{'loss': 0.0002, 'learning_rate': 1.384083044982699e-07, 'epoch': 3.94}


 99%|█████████▉| 1150/1156 [21:07<00:06,  1.01s/it]

{'loss': 0.0001, 'learning_rate': 5.1903114186851215e-08, 'epoch': 3.98}


                                                   
100%|██████████| 1156/1156 [21:41<00:00,  1.08it/s]

{'eval_loss': 2.6630849838256836, 'eval_accuracy': 0.7034965034965035, 'eval_precision': 0.7085279057212253, 'eval_recall': 0.7034965034965035, 'eval_f1': 0.7035441927614453, 'eval_runtime': 28.7008, 'eval_samples_per_second': 24.912, 'eval_steps_per_second': 3.136, 'epoch': 4.0}


100%|██████████| 1156/1156 [21:50<00:00,  1.13s/it]


{'train_runtime': 1310.8064, 'train_samples_per_second': 7.043, 'train_steps_per_second': 0.882, 'train_loss': 0.03971024376507784, 'epoch': 4.0}


100%|██████████| 90/90 [00:28<00:00,  3.21it/s]
100%|██████████| 90/90 [00:27<00:00,  3.25it/s]
100%|██████████| 90/90 [00:27<00:00,  3.29it/s]


{'accuracy': 0.6811188811188811, 'precision': 0.6855197674322578, 'recall': 0.6811188811188811, 'f1': 0.67257774778649}
{'accuracy': 0.6736694677871149, 'precision': 0.680845742429091, 'recall': 0.6736694677871149, 'f1': 0.6647245407601585}
