In [None]:
# !pip install transformers==4.46.2 datasets==3.1.0 numpy==1.26.4 sklearn-pandas==2.2.0 torch==2.5.1+cu121

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
# from sklearn.metrics import f1_score, precision_score, recall_score
import pandas as pd
import numpy as np
import datasets
import time
import wandb
import random
# import os
# from fvcore.nn import FlopCountAnalysis
# from torch.utils.data import DataLoader
import torch

key_file = r'C:\Development\TactitalTensorsFinalProject\WANDB_API_KEY.txt' 

with open(key_file, "r") as f:
    api_key = f.read().strip()

# Log into WandB with the API key
wandb.login(key=api_key)

# Initialize WandB (no need to manually set the WANDB_API_KEY env variable again)


  from .autonotebook import tqdm as notebook_tqdm
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mayoungren94[0m ([33mayoungren-colostate[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\ayoun\_netrc


True

The WANDB_API_KEY is nessesary to use the Trainer class from HuggingFace and is what's used in the example notebook. However, I think we may all have to put in our own api key secret into google colab for it to work.

In [2]:
# from google.colab import userdata
# os.environ["WANDB_API_KEY"] = userdata.get('WANDB_API_KEY')


In [3]:
langs = ['java', 'python', 'pharo']

# langs = ['java'] # Using Java as the only language for testing purposes.
# langs = ['python']
# langs = ['pharo']
labels = {
    'java': ['summary', 'Ownership', 'Expand', 'usage', 'Pointer', 'deprecation', 'rational'],
    'python': ['Usage', 'Parameters', 'DevelopmentNotes', 'Expand', 'Summary'],
    'pharo': ['Keyimplementationpoints', 'Example', 'Responsibilities', 'Classreferences', 'Intent', 'Keymessages', 'Collaborators']
}
ds = datasets.load_dataset('NLBSE/nlbse25-code-comment-classification')

In [4]:
ds

DatasetDict({
    java_train: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 7614
    })
    java_test: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 1725
    })
    python_train: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 1884
    })
    python_test: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 406
    })
    pharo_train: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 1298
    })
    pharo_test: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 289
    })
})

In [5]:
ds['java_test'][0]

{'index': 5,
 'class': 'AbstractContractGetFileStatusTest.java',
 'comment_sentence': 'accept everything.',
 'partition': 1,
 'combo': 'accept everything. | AbstractContractGetFileStatusTest.java',
 'labels': [0, 0, 1, 0, 0, 0, 0]}

In [6]:
model_name = "microsoft/codebert-base"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

The next few functions are to preprocess the trainng and validation sets, and to let the Trainer class to evaluate how well the trainng is going after each epoch.

In [7]:
# To tokenize the text in the 'combo' column of the training dataset of each language.

def tokenize_dataset(examples):
  return tokenizer(examples['combo'], truncation=True, padding="max_length", max_length=128)

In [8]:
# Tokenizes the text 'combo' column of the dataset, changes the values of the
# labels column to float instead of int, and sets to format of the lables column
# to torch tesnsors, which is required by the Trainer class.

def preprocess_dataset(input_dataset):
  processed_dataset = input_dataset.map(tokenize_dataset, batched=True, load_from_cache_file=False)
  processed_dataset = processed_dataset.cast_column("labels", datasets.features.Sequence(datasets.features.Value("float32")))
  processed_dataset.set_format('pt')
  return processed_dataset

Evaluating the predictions.

Below is non-functional code I still haven't finnished. We need to implement a way for the logits returned from the model to be converted into binary (0 - 1) predictions for the labels, and for those labels to be compaired to the ground truth labels of the testing datasets for each language. The competition also wants us to complete this step using profiling that they have set up, and which is partially shown below. The link to the reference notebook for the competition is here. Also, another notebook that is fairly close to what I believe we are trying to do can be found here_for_multi_label_text_classification.ipynb).

In [None]:
# scores must be global so that it can later be referenced for printing the results
scores = pd.DataFrame(columns=['model', 'lan', 'cat', 'precision', 'recall', 'f1'])

def compute_metrics(eval_pred, lang, categories, mod_name):
    global scores
    #scores.drop(scores[scores['lan'] == lang].index, inplace=True)

    predictions, true_labels = eval_pred
    predictions = (predictions > 0.5).astype(int)
    num_classes = len(categories)

    metrics_list = []

    for i in range(num_classes):
        y_pred = predictions[:, i]
        y_true = true_labels[:, i]

        tp = np.sum((y_true == 1) & (y_pred == 1))
        fp = np.sum((y_true == 0) & (y_pred == 1))
        fn = np.sum((y_true == 1) & (y_pred == 0))

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0

        metrics_list.append({
            'model': mod_name,
            'lan': lang,
            'cat': categories[i],
            'precision': precision,
            'recall': recall,
            'f1': f1
        })

    temp_scores = pd.DataFrame([m for m in metrics_list if m['model'] == mod_name])
    scores = pd.concat([scores, temp_scores], ignore_index=True)

    avg_f1 = temp_scores['f1'].mean()
    avg_precision = temp_scores['precision'].mean()
    avg_recall = temp_scores['recall'].mean()

    return {'f1': avg_f1, 'precision': avg_precision, 'recall': avg_recall}

In [None]:
def measure_runtime_and_flops(total_time, total_flops, trainer, validation_dataset):

    for batch in validation_dataset:
        inputs = {key: val.unsqueeze(0).to(trainer.model.device) for key, val in batch.items() if key in ['input_ids', 'attention_mask']}
        with torch.profiler.profile(with_flops=True, activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA]) as prof:
            start_time = time.time()
            _ = trainer.model(**inputs)
            end_time = time.time()

        total_time += (end_time - start_time)
        total_flops += sum(k.flops for k in prof.key_averages()) / 1e9 # Convert Flops to GFLOPs

    avg_runtime = total_time / len(validation_dataset)
    avg_flops = (total_flops / len(validation_dataset))
    
    return avg_runtime, avg_flops, total_flops, total_time

In [2]:
def compute_metrics_wrapper(mod_name):
    def inner(eval_pred):
        return compute_metrics(eval_pred, lang, labels[lang], mod_name)
    return inner


In [13]:
max_avg_runtime = 5
max_avg_flops = 5000

def score(avg_f1, avg_runtime, avg_flops):
    return (0.6 * avg_f1 +
            0.2 * ((max_avg_runtime - avg_runtime) / max_avg_runtime) +
            0.2 * ((max_avg_flops - avg_flops) / max_avg_flops))

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # for CUDA devices
    torch.backends.cudnn.deterministic = True  # Ensure deterministic behavior
    torch.backends.cudnn.benchmark = False  # Disable to ensure deterministic operations

In [None]:
# Training the models for each language and saving the the models and thier
# tokenizers after training.

# trying different pre-trained models such as roberta-base, distilbert-base, or codebert-large

seed = 44
set_seed(seed)

# learning_rates = [5e-5, 5e-7]
lr = 5e-5
epochs = [5, 10, 15, 20]
batch_sizes = [4, 8]
weights_of_decay = [0.01] #, 0.001]
final_model_stats = {}


for epoch in epochs:
  for bs in batch_sizes:
    for wd in weights_of_decay:
      print(f'------------------ Starting model ==> epochs: {epoch}, batch size: {bs}, weights of decay: {wd} ---------------------')
      fin_mod_name = f'epoch-{epoch}_batchSize-{bs}_weightsOfDecay-{wd}'
      total_flops = 0
      total_time = 0

      for lang in langs:
        wandb.init(
            project="NBSE2025GridSearch2", 
            entity="ayoungren-colostate",
            config={  # Log hyperparameters for each run
                "learning_rate": lr,
                "epochs": epoch,
                "batch_size": bs,
                "weight_decay": wd,
                "language": lang
            }
        )
        lang_mod_name = f'{lang}_epoch-{epoch}_batchSize-{bs}_weightsOfDecay-{wd}'
        num_labels = len(labels[lang])
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, problem_type="multi_label_classification")

        dataset = preprocess_dataset(ds[f'{lang}_train'])
        train_valalidation_split = dataset.train_test_split(test_size=0.2, seed=seed)

        train_dataset =  train_valalidation_split['train']
        validation_dataset = train_valalidation_split['test']

        training_args = TrainingArguments(
          output_dir=f'./results_{lang_mod_name}',
          eval_strategy="epoch",
          save_strategy="epoch",
          logging_dir=f'./logs_{lang_mod_name}',
          per_device_train_batch_size=bs,
          per_device_eval_batch_size=bs,
          num_train_epochs=epoch,
          weight_decay=wd,
          learning_rate=lr,
          logging_steps=10000,
          save_total_limit=2,
          load_best_model_at_end=True,
          metric_for_best_model="f1",
          seed=seed
        )

        trainer = Trainer(
          model=model,
          args=training_args,
          train_dataset=train_dataset,
          eval_dataset= validation_dataset,
          processing_class=tokenizer,
          compute_metrics=compute_metrics_wrapper(fin_mod_name),
        )

        trainer.train()
        metrics = trainer.evaluate()

        # print("Evaluation Metrics:", metrics)

        avg_runtime, avg_flops, total_flops, total_time = measure_runtime_and_flops(total_time, total_flops, trainer, validation_dataset)

        trainer.model.save_pretrained(f'./models/{lang_mod_name}')
        tokenizer.save_pretrained(f'./tokenizers/{lang_mod_name}')

      model_stats = scores[scores['model'] == fin_mod_name]
      avg_f1 = model_stats['f1'].mean()
      final_model_stats[fin_mod_name] = round(score(avg_f1, avg_runtime, avg_flops), 2)

------------------ Starting model ==> epochs: 10, batch size: 4, weights of decay: 0.01 ---------------------


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 19296.04 examples/s]
  scores = pd.concat([scores, temp_scores], ignore_index=True)
                                                    
 10%|█         | 1523/15230 [01:44<14:44, 15.50it/s]

{'eval_loss': 0.10316664725542068, 'eval_f1': 0.655723682687799, 'eval_precision': 0.6747067424039914, 'eval_recall': 0.6392114900124053, 'eval_runtime': 5.7762, 'eval_samples_per_second': 263.67, 'eval_steps_per_second': 65.961, 'epoch': 1.0}


                                                      
 20%|██        | 3046/15230 [03:30<12:23, 16.39it/s]

{'eval_loss': 0.11878565698862076, 'eval_f1': 0.7229915427009209, 'eval_precision': 0.888890491336867, 'eval_recall': 0.7030742473390014, 'eval_runtime': 5.8774, 'eval_samples_per_second': 259.13, 'eval_steps_per_second': 64.825, 'epoch': 2.0}


                                                      
 30%|███       | 4569/15230 [05:12<10:27, 16.99it/s]

{'eval_loss': 0.10687720030546188, 'eval_f1': 0.8129402121740759, 'eval_precision': 0.8911977742641504, 'eval_recall': 0.7838805202745746, 'eval_runtime': 5.8732, 'eval_samples_per_second': 259.314, 'eval_steps_per_second': 64.871, 'epoch': 3.0}


                                                      
 40%|████      | 6092/15230 [06:57<09:39, 15.78it/s]

{'eval_loss': 0.09779678285121918, 'eval_f1': 0.8514725724168205, 'eval_precision': 0.902544537859756, 'eval_recall': 0.8264782227649754, 'eval_runtime': 6.3359, 'eval_samples_per_second': 240.376, 'eval_steps_per_second': 60.133, 'epoch': 4.0}


                                                      
 50%|█████     | 7615/15230 [08:44<08:39, 14.66it/s]

{'eval_loss': 0.10166020691394806, 'eval_f1': 0.8495805406373037, 'eval_precision': 0.860321690545643, 'eval_recall': 0.8421485157308091, 'eval_runtime': 6.8665, 'eval_samples_per_second': 221.803, 'eval_steps_per_second': 55.487, 'epoch': 5.0}


                                                      
 60%|██████    | 9138/15230 [10:30<06:19, 16.07it/s]

{'eval_loss': 0.0953066349029541, 'eval_f1': 0.874202122973024, 'eval_precision': 0.8722204270641888, 'eval_recall': 0.877004540039365, 'eval_runtime': 6.9955, 'eval_samples_per_second': 217.712, 'eval_steps_per_second': 54.464, 'epoch': 6.0}


 66%|██████▌   | 10003/15230 [11:30<05:29, 15.86it/s] 

{'loss': 0.0677, 'grad_norm': 0.016584930941462517, 'learning_rate': 1.717005909389363e-05, 'epoch': 6.57}


                                                     
 70%|███████   | 10661/15230 [12:19<04:39, 16.33it/s]

{'eval_loss': 0.10941113531589508, 'eval_f1': 0.8768720269359891, 'eval_precision': 0.9001108926795138, 'eval_recall': 0.8568333657636199, 'eval_runtime': 6.3699, 'eval_samples_per_second': 239.093, 'eval_steps_per_second': 59.812, 'epoch': 7.0}


                                                       
 80%|████████  | 12184/15230 [14:03<03:05, 16.38it/s]

{'eval_loss': 0.10673347860574722, 'eval_f1': 0.8698631926174227, 'eval_precision': 0.8727879676828794, 'eval_recall': 0.8684618402442376, 'eval_runtime': 5.382, 'eval_samples_per_second': 282.982, 'eval_steps_per_second': 70.792, 'epoch': 8.0}


                                                     
 90%|█████████ | 13707/15230 [15:47<01:38, 15.47it/s]

{'eval_loss': 0.11093059182167053, 'eval_f1': 0.8907227250421706, 'eval_precision': 0.9003500200369488, 'eval_recall': 0.8818766247299218, 'eval_runtime': 5.5552, 'eval_samples_per_second': 274.159, 'eval_steps_per_second': 68.585, 'epoch': 9.0}


                                                     
100%|██████████| 15230/15230 [17:33<00:00, 16.68it/s]

{'eval_loss': 0.11056418716907501, 'eval_f1': 0.8851123734932426, 'eval_precision': 0.9021709432896167, 'eval_recall': 0.8704591364380222, 'eval_runtime': 5.8644, 'eval_samples_per_second': 259.702, 'eval_steps_per_second': 64.968, 'epoch': 10.0}


100%|██████████| 15230/15230 [17:34<00:00, 14.44it/s]


{'train_runtime': 1054.7356, 'train_samples_per_second': 57.749, 'train_steps_per_second': 14.44, 'train_loss': 0.04754291817457834, 'epoch': 10.0}


100%|██████████| 381/381 [00:05<00:00, 68.73it/s]


0,1
eval/f1,▁▃▆▇▇██▇███
eval/loss,▃█▄▂▃▁▅▄▆▆▆
eval/precision,▁███▇▇█▇███
eval/recall,▁▃▅▆▇█▇████
eval/runtime,▃▃▃▅▇█▅▁▂▃▂
eval/samples_per_second,▆▅▅▃▁▁▃█▇▆▇
eval/steps_per_second,▆▅▅▃▁▁▃█▇▆▇
train/epoch,▁▂▃▃▄▅▅▆▆▇███
train/global_step,▁▂▃▃▄▅▅▆▆▇███
train/grad_norm,▁

0,1
eval/f1,0.89072
eval/loss,0.11093
eval/precision,0.90035
eval/recall,0.88188
eval/runtime,5.5602
eval/samples_per_second,273.912
eval/steps_per_second,68.523
total_flos,4006703460288000.0
train/epoch,10.0
train/global_step,15230.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 15103.95 examples/s]
 10%|█         | 377/3770 [00:24<03:46, 15.01it/s]
 10%|█         | 377/3770 [00:25<03:46, 15.01it/s]

{'eval_loss': 0.3130854666233063, 'eval_f1': 0.45742347479048223, 'eval_precision': 0.5181602435123562, 'eval_recall': 0.4143465656385955, 'eval_runtime': 1.4247, 'eval_samples_per_second': 264.62, 'eval_steps_per_second': 66.682, 'epoch': 1.0}


 20%|█▉        | 753/3770 [00:50<03:23, 14.83it/s]
 20%|██        | 754/3770 [00:52<03:23, 14.83it/s]

{'eval_loss': 0.3027876019477844, 'eval_f1': 0.5929929833884614, 'eval_precision': 0.717800343433999, 'eval_recall': 0.5490691585924138, 'eval_runtime': 1.3598, 'eval_samples_per_second': 277.242, 'eval_steps_per_second': 69.862, 'epoch': 2.0}


 30%|███       | 1131/3770 [01:17<02:43, 16.19it/s]
 30%|███       | 1131/3770 [01:19<02:43, 16.19it/s]

{'eval_loss': 0.36772242188453674, 'eval_f1': 0.6621837050421475, 'eval_precision': 0.7448647583493957, 'eval_recall': 0.6143115203691034, 'eval_runtime': 1.5374, 'eval_samples_per_second': 245.214, 'eval_steps_per_second': 61.791, 'epoch': 3.0}


 40%|███▉      | 1507/3770 [01:44<02:20, 16.12it/s]
 40%|████      | 1508/3770 [01:46<02:20, 16.12it/s]

{'eval_loss': 0.4073038399219513, 'eval_f1': 0.6449398799374485, 'eval_precision': 0.7081621671944253, 'eval_recall': 0.6149018871918054, 'eval_runtime': 1.406, 'eval_samples_per_second': 268.138, 'eval_steps_per_second': 67.568, 'epoch': 4.0}


 50%|█████     | 1885/3770 [02:11<01:59, 15.78it/s]
 50%|█████     | 1885/3770 [02:12<01:59, 15.78it/s]

{'eval_loss': 0.3866775333881378, 'eval_f1': 0.6931975844056381, 'eval_precision': 0.7400881079704609, 'eval_recall': 0.6695377559073483, 'eval_runtime': 1.4019, 'eval_samples_per_second': 268.916, 'eval_steps_per_second': 67.764, 'epoch': 5.0}


 60%|█████▉    | 2261/3770 [02:38<01:32, 16.26it/s]
 60%|██████    | 2262/3770 [02:39<01:32, 16.26it/s]

{'eval_loss': 0.4216238856315613, 'eval_f1': 0.7185855315074967, 'eval_precision': 0.7356114349848323, 'eval_recall': 0.7172836154135901, 'eval_runtime': 1.4179, 'eval_samples_per_second': 265.886, 'eval_steps_per_second': 67.0, 'epoch': 6.0}


 70%|███████   | 2639/3770 [03:04<01:13, 15.29it/s]
 70%|███████   | 2639/3770 [03:06<01:13, 15.29it/s]

{'eval_loss': 0.41656509041786194, 'eval_f1': 0.721802204742884, 'eval_precision': 0.7409014123367454, 'eval_recall': 0.7198732859718836, 'eval_runtime': 1.4478, 'eval_samples_per_second': 260.404, 'eval_steps_per_second': 65.619, 'epoch': 7.0}


 80%|███████▉  | 3015/3770 [03:31<00:50, 15.03it/s]
 80%|████████  | 3016/3770 [03:33<00:50, 15.03it/s]

{'eval_loss': 0.4551067054271698, 'eval_f1': 0.7297628248421086, 'eval_precision': 0.7315132633152062, 'eval_recall': 0.7366503540857864, 'eval_runtime': 1.5558, 'eval_samples_per_second': 242.315, 'eval_steps_per_second': 61.061, 'epoch': 8.0}


 90%|█████████ | 3393/3770 [03:58<00:22, 16.62it/s]
 90%|█████████ | 3393/3770 [04:00<00:22, 16.62it/s]

{'eval_loss': 0.44599035382270813, 'eval_f1': 0.7276926825717396, 'eval_precision': 0.7298108529961691, 'eval_recall': 0.735552836987403, 'eval_runtime': 1.4633, 'eval_samples_per_second': 257.639, 'eval_steps_per_second': 64.922, 'epoch': 9.0}


100%|█████████▉| 3769/3770 [04:25<00:00, 15.81it/s]
100%|██████████| 3770/3770 [04:28<00:00, 15.81it/s]

{'eval_loss': 0.45260804891586304, 'eval_f1': 0.7451254137518004, 'eval_precision': 0.7329023486896454, 'eval_recall': 0.7698484215623747, 'eval_runtime': 1.3619, 'eval_samples_per_second': 276.824, 'eval_steps_per_second': 69.757, 'epoch': 10.0}


100%|██████████| 3770/3770 [04:30<00:00, 13.93it/s]


{'train_runtime': 270.5573, 'train_samples_per_second': 55.7, 'train_steps_per_second': 13.934, 'train_loss': 0.1277463149012558, 'epoch': 10.0}


100%|██████████| 95/95 [00:01<00:00, 67.19it/s]


0,1
eval/f1,▁▄▆▆▇▇▇████
eval/loss,▁▁▄▆▅▆▆████
eval/precision,▁▇█▇███████
eval/recall,▁▄▅▅▆▇▇▇▇██
eval/runtime,▃▁▇▃▃▃▄█▅▁▄
eval/samples_per_second,▅█▂▆▆▆▅▁▄█▅
eval/steps_per_second,▅█▂▆▆▆▅▁▄█▅
train/epoch,▁▂▃▃▄▅▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▇███

0,1
eval/f1,0.74513
eval/loss,0.45261
eval/precision,0.7329
eval/recall,0.76985
eval/runtime,1.4416
eval/samples_per_second,261.513
eval/steps_per_second,65.898
total_flos,991297601733120.0
train/epoch,10.0
train/global_step,3770.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 13751.36 examples/s]
 10%|█         | 260/2600 [00:16<02:23, 16.30it/s]
 10%|█         | 260/2600 [00:17<02:23, 16.30it/s]

{'eval_loss': 0.3191966712474823, 'eval_f1': 0.15168249973003795, 'eval_precision': 0.33465608465608465, 'eval_recall': 0.13974242537035658, 'eval_runtime': 0.987, 'eval_samples_per_second': 263.433, 'eval_steps_per_second': 65.858, 'epoch': 1.0}


 20%|██        | 520/2600 [00:35<02:04, 16.66it/s]
 20%|██        | 520/2600 [00:36<02:04, 16.66it/s]

{'eval_loss': 0.26629212498664856, 'eval_f1': 0.46741762164621903, 'eval_precision': 0.5878945447910965, 'eval_recall': 0.4055582755981108, 'eval_runtime': 1.1298, 'eval_samples_per_second': 230.126, 'eval_steps_per_second': 57.531, 'epoch': 2.0}


 30%|███       | 780/2600 [00:54<01:57, 15.49it/s]
 30%|███       | 780/2600 [00:55<01:57, 15.49it/s]

{'eval_loss': 0.25247177481651306, 'eval_f1': 0.5171438632670223, 'eval_precision': 0.5689761821270982, 'eval_recall': 0.48523670969834826, 'eval_runtime': 0.9457, 'eval_samples_per_second': 274.937, 'eval_steps_per_second': 68.734, 'epoch': 3.0}


 40%|████      | 1040/2600 [01:13<01:39, 15.75it/s]
 40%|████      | 1040/2600 [01:14<01:39, 15.75it/s]

{'eval_loss': 0.23904836177825928, 'eval_f1': 0.5346717593424586, 'eval_precision': 0.5871113997835083, 'eval_recall': 0.49372794440583995, 'eval_runtime': 0.9874, 'eval_samples_per_second': 263.316, 'eval_steps_per_second': 65.829, 'epoch': 4.0}


 50%|█████     | 1300/2600 [01:32<01:23, 15.65it/s]
 50%|█████     | 1300/2600 [01:34<01:23, 15.65it/s]

{'eval_loss': 0.22758130729198456, 'eval_f1': 0.6328526730000003, 'eval_precision': 0.7609438833872602, 'eval_recall': 0.5673901642969562, 'eval_runtime': 1.2631, 'eval_samples_per_second': 205.848, 'eval_steps_per_second': 51.462, 'epoch': 5.0}


 60%|██████    | 1560/2600 [01:51<01:04, 16.13it/s]
 60%|██████    | 1560/2600 [01:52<01:04, 16.13it/s]

{'eval_loss': 0.2455279529094696, 'eval_f1': 0.6637868312888733, 'eval_precision': 0.8049953659709758, 'eval_recall': 0.6011089612743525, 'eval_runtime': 0.8803, 'eval_samples_per_second': 295.361, 'eval_steps_per_second': 73.84, 'epoch': 6.0}


 70%|███████   | 1820/2600 [02:11<00:50, 15.31it/s]
 70%|███████   | 1820/2600 [02:12<00:50, 15.31it/s]

{'eval_loss': 0.23813730478286743, 'eval_f1': 0.7021200965599722, 'eval_precision': 0.7991015844963876, 'eval_recall': 0.6489113685233276, 'eval_runtime': 0.9196, 'eval_samples_per_second': 282.719, 'eval_steps_per_second': 70.68, 'epoch': 7.0}


 80%|████████  | 2080/2600 [02:30<00:35, 14.79it/s]
 80%|████████  | 2080/2600 [02:31<00:35, 14.79it/s]

{'eval_loss': 0.2483331561088562, 'eval_f1': 0.7031045725370904, 'eval_precision': 0.8198533963199474, 'eval_recall': 0.636573837766344, 'eval_runtime': 1.0642, 'eval_samples_per_second': 244.313, 'eval_steps_per_second': 61.078, 'epoch': 8.0}


 90%|█████████ | 2340/2600 [02:49<00:16, 16.17it/s]
 90%|█████████ | 2340/2600 [02:50<00:16, 16.17it/s]

{'eval_loss': 0.24757996201515198, 'eval_f1': 0.7087232110322637, 'eval_precision': 0.8204565844783237, 'eval_recall': 0.6469527236792515, 'eval_runtime': 0.9512, 'eval_samples_per_second': 273.347, 'eval_steps_per_second': 68.337, 'epoch': 9.0}


100%|██████████| 2600/2600 [03:08<00:00, 15.86it/s]
100%|██████████| 2600/2600 [03:10<00:00, 15.86it/s]

{'eval_loss': 0.24937139451503754, 'eval_f1': 0.7066054401886355, 'eval_precision': 0.8165341004853096, 'eval_recall': 0.645871854845516, 'eval_runtime': 0.9552, 'eval_samples_per_second': 272.207, 'eval_steps_per_second': 68.052, 'epoch': 10.0}


100%|██████████| 2600/2600 [03:12<00:00, 13.51it/s]


{'train_runtime': 192.4425, 'train_samples_per_second': 53.938, 'train_steps_per_second': 13.511, 'train_loss': 0.11824987558218149, 'epoch': 10.0}


100%|██████████| 65/65 [00:00<00:00, 71.13it/s]


------------------ Starting model ==> epochs: 10, batch size: 4, weights of decay: 0.001 ---------------------


0,1
eval/f1,▁▅▆▆▇▇█████
eval/loss,█▄▃▂▁▂▂▃▃▃▃
eval/precision,▁▅▄▅▇██████
eval/recall,▁▅▆▆▇▇█████
eval/runtime,▃▆▂▃█▁▂▄▂▂▂
eval/samples_per_second,▆▃▆▅▁█▇▄▆▆▇
eval/steps_per_second,▆▃▆▅▁█▇▄▆▆▇
train/epoch,▁▂▃▃▄▅▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▇███

0,1
eval/f1,0.70872
eval/loss,0.24758
eval/precision,0.82046
eval/recall,0.64695
eval/runtime,0.9301
eval/samples_per_second,279.545
eval/steps_per_second,69.886
total_flos,682803840384000.0
train/epoch,10.0
train/global_step,2600.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 21192.36 examples/s]
 10%|▉         | 1522/15230 [01:36<13:42, 16.66it/s]
 10%|█         | 1523/15230 [01:42<13:42, 16.66it/s]

{'eval_loss': 0.11025039106607437, 'eval_f1': 0.6618349261112763, 'eval_precision': 0.8114119298560057, 'eval_recall': 0.6434066299054858, 'eval_runtime': 6.4664, 'eval_samples_per_second': 235.525, 'eval_steps_per_second': 58.92, 'epoch': 1.0}


 20%|██        | 3046/15230 [03:21<12:33, 16.17it/s]  
 20%|██        | 3046/15230 [03:27<12:33, 16.17it/s]

{'eval_loss': 0.14669211208820343, 'eval_f1': 0.7486795309375889, 'eval_precision': 0.8650372946221262, 'eval_recall': 0.7188569305581786, 'eval_runtime': 5.7233, 'eval_samples_per_second': 266.107, 'eval_steps_per_second': 66.57, 'epoch': 2.0}


 30%|██▉       | 4568/15230 [05:04<11:02, 16.10it/s]  
 30%|███       | 4569/15230 [05:10<11:02, 16.10it/s]

{'eval_loss': 0.10452292859554291, 'eval_f1': 0.8102741182954807, 'eval_precision': 0.8712801725343665, 'eval_recall': 0.774843946553271, 'eval_runtime': 5.7276, 'eval_samples_per_second': 265.904, 'eval_steps_per_second': 66.52, 'epoch': 3.0}


 40%|████      | 6092/15230 [06:49<09:38, 15.80it/s]  
 40%|████      | 6092/15230 [06:55<09:38, 15.80it/s]

{'eval_loss': 0.09475956112146378, 'eval_f1': 0.8341642313055838, 'eval_precision': 0.8779881601043181, 'eval_recall': 0.8024966926318771, 'eval_runtime': 5.7024, 'eval_samples_per_second': 267.08, 'eval_steps_per_second': 66.814, 'epoch': 4.0}


 50%|████▉     | 7614/15230 [08:32<08:12, 15.47it/s]  
 50%|█████     | 7615/15230 [08:38<08:12, 15.47it/s]

{'eval_loss': 0.10859733074903488, 'eval_f1': 0.8425916582412866, 'eval_precision': 0.8741970721092777, 'eval_recall': 0.8168839567355037, 'eval_runtime': 5.8071, 'eval_samples_per_second': 262.263, 'eval_steps_per_second': 65.609, 'epoch': 5.0}


 60%|██████    | 9138/15230 [10:16<07:02, 14.40it/s]  
 60%|██████    | 9138/15230 [10:22<07:02, 14.40it/s]

{'eval_loss': 0.10901514440774918, 'eval_f1': 0.8575601354455191, 'eval_precision': 0.9183719088033191, 'eval_recall': 0.8334394753909014, 'eval_runtime': 6.1859, 'eval_samples_per_second': 246.206, 'eval_steps_per_second': 61.592, 'epoch': 6.0}


 66%|██████▌   | 10002/15230 [11:18<05:28, 15.92it/s] 

{'loss': 0.0684, 'grad_norm': 0.10808705538511276, 'learning_rate': 1.717005909389363e-05, 'epoch': 6.57}


 70%|██████▉   | 10660/15230 [11:59<04:50, 15.76it/s]
 70%|███████   | 10661/15230 [12:06<04:49, 15.76it/s]

{'eval_loss': 0.12278147786855698, 'eval_f1': 0.8609456137039062, 'eval_precision': 0.897760298174462, 'eval_recall': 0.8372282983190047, 'eval_runtime': 6.3893, 'eval_samples_per_second': 238.366, 'eval_steps_per_second': 59.631, 'epoch': 7.0}


 80%|████████  | 12184/15230 [13:43<03:20, 15.17it/s]  
 80%|████████  | 12184/15230 [13:48<03:20, 15.17it/s]

{'eval_loss': 0.12079031020402908, 'eval_f1': 0.8660758431587224, 'eval_precision': 0.886256963858942, 'eval_recall': 0.8487203393843396, 'eval_runtime': 5.3049, 'eval_samples_per_second': 287.091, 'eval_steps_per_second': 71.82, 'epoch': 8.0}


 90%|████████▉ | 13706/15230 [15:26<01:31, 16.60it/s]
 90%|█████████ | 13707/15230 [15:32<01:31, 16.60it/s]

{'eval_loss': 0.12747521698474884, 'eval_f1': 0.8628168741511943, 'eval_precision': 0.8703544016125738, 'eval_recall': 0.85726765386785, 'eval_runtime': 5.7818, 'eval_samples_per_second': 263.412, 'eval_steps_per_second': 65.896, 'epoch': 9.0}


100%|██████████| 15230/15230 [17:09<00:00, 15.22it/s]
100%|██████████| 15230/15230 [17:16<00:00, 15.22it/s]

{'eval_loss': 0.12861327826976776, 'eval_f1': 0.8651428559498583, 'eval_precision': 0.8743876606567255, 'eval_recall': 0.8580160610639621, 'eval_runtime': 5.8454, 'eval_samples_per_second': 260.548, 'eval_steps_per_second': 65.18, 'epoch': 10.0}


100%|██████████| 15230/15230 [17:18<00:00, 14.66it/s]


{'train_runtime': 1038.676, 'train_samples_per_second': 58.642, 'train_steps_per_second': 14.663, 'train_loss': 0.04788424766775971, 'epoch': 10.0}


100%|██████████| 381/381 [00:05<00:00, 70.51it/s]


0,1
eval/f1,▁▄▆▇▇██████
eval/loss,▃█▂▁▃▃▅▅▅▆▅
eval/precision,▁▅▅▅▅█▇▆▅▅▆
eval/recall,▁▃▅▆▇▇▇████
eval/runtime,█▄▄▃▄▆█▁▄▄▂
eval/samples_per_second,▁▅▅▅▅▂▁█▅▄▇
eval/steps_per_second,▁▅▅▅▅▂▁█▅▄▇
train/epoch,▁▂▃▃▄▅▅▆▆▇███
train/global_step,▁▂▃▃▄▅▅▆▆▇███
train/grad_norm,▁

0,1
eval/f1,0.86608
eval/loss,0.12079
eval/precision,0.88626
eval/recall,0.84872
eval/runtime,5.4216
eval/samples_per_second,280.913
eval/steps_per_second,70.274
total_flos,4006703460288000.0
train/epoch,10.0
train/global_step,15230.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 15063.41 examples/s]
 10%|█         | 377/3770 [00:23<03:27, 16.37it/s]
 10%|█         | 377/3770 [00:25<03:27, 16.37it/s]

{'eval_loss': 0.31621822714805603, 'eval_f1': 0.4547305834262357, 'eval_precision': 0.5266498416922866, 'eval_recall': 0.40404125762158144, 'eval_runtime': 1.3631, 'eval_samples_per_second': 276.57, 'eval_steps_per_second': 69.693, 'epoch': 1.0}


 20%|█▉        | 753/3770 [00:50<03:00, 16.72it/s]
 20%|██        | 754/3770 [00:52<03:00, 16.72it/s]

{'eval_loss': 0.3240654170513153, 'eval_f1': 0.5727261120228355, 'eval_precision': 0.7500348123981428, 'eval_recall': 0.5211474117504493, 'eval_runtime': 1.3821, 'eval_samples_per_second': 272.766, 'eval_steps_per_second': 68.734, 'epoch': 2.0}


 30%|███       | 1131/3770 [01:17<02:54, 15.10it/s]
 30%|███       | 1131/3770 [01:18<02:54, 15.10it/s]

{'eval_loss': 0.3470121920108795, 'eval_f1': 0.6686349206349206, 'eval_precision': 0.750091238005321, 'eval_recall': 0.6181351874522863, 'eval_runtime': 1.3693, 'eval_samples_per_second': 275.324, 'eval_steps_per_second': 69.379, 'epoch': 3.0}


 40%|███▉      | 1507/3770 [01:44<02:29, 15.10it/s]
 40%|████      | 1508/3770 [01:45<02:29, 15.10it/s]

{'eval_loss': 0.3640974462032318, 'eval_f1': 0.6928976802889847, 'eval_precision': 0.7425256430960603, 'eval_recall': 0.6674172440352255, 'eval_runtime': 1.3468, 'eval_samples_per_second': 279.914, 'eval_steps_per_second': 70.535, 'epoch': 4.0}


 50%|█████     | 1885/3770 [02:11<01:54, 16.51it/s]
 50%|█████     | 1885/3770 [02:12<01:54, 16.51it/s]

{'eval_loss': 0.3980275094509125, 'eval_f1': 0.7002427478901406, 'eval_precision': 0.7389317699291935, 'eval_recall': 0.677490226983838, 'eval_runtime': 1.3341, 'eval_samples_per_second': 282.591, 'eval_steps_per_second': 71.21, 'epoch': 5.0}


 60%|█████▉    | 2261/3770 [02:38<01:41, 14.87it/s]
 60%|██████    | 2262/3770 [02:39<01:41, 14.87it/s]

{'eval_loss': 0.42527446150779724, 'eval_f1': 0.7169775919246227, 'eval_precision': 0.7414729691617608, 'eval_recall': 0.7007366491504776, 'eval_runtime': 1.4278, 'eval_samples_per_second': 264.05, 'eval_steps_per_second': 66.538, 'epoch': 6.0}


 70%|███████   | 2639/3770 [03:04<01:12, 15.64it/s]
 70%|███████   | 2639/3770 [03:06<01:12, 15.64it/s]

{'eval_loss': 0.42957165837287903, 'eval_f1': 0.7381880899641031, 'eval_precision': 0.7499216267410338, 'eval_recall': 0.7372990578104552, 'eval_runtime': 1.3765, 'eval_samples_per_second': 273.884, 'eval_steps_per_second': 69.016, 'epoch': 7.0}


 80%|███████▉  | 3015/3770 [03:31<00:48, 15.66it/s]
 80%|████████  | 3016/3770 [03:33<00:48, 15.66it/s]

{'eval_loss': 0.456579327583313, 'eval_f1': 0.732399400707416, 'eval_precision': 0.7374409997854537, 'eval_recall': 0.7363450460687723, 'eval_runtime': 1.4196, 'eval_samples_per_second': 265.571, 'eval_steps_per_second': 66.921, 'epoch': 8.0}


 90%|█████████ | 3393/3770 [03:58<00:24, 15.16it/s]
 90%|█████████ | 3393/3770 [03:59<00:24, 15.16it/s]

{'eval_loss': 0.47136470675468445, 'eval_f1': 0.7245254664603495, 'eval_precision': 0.7205473279157489, 'eval_recall': 0.7385688045417862, 'eval_runtime': 1.3978, 'eval_samples_per_second': 269.704, 'eval_steps_per_second': 67.963, 'epoch': 9.0}


100%|█████████▉| 3769/3770 [04:25<00:00, 15.75it/s]
100%|██████████| 3770/3770 [04:28<00:00, 15.75it/s]

{'eval_loss': 0.4792849123477936, 'eval_f1': 0.7240350778835485, 'eval_precision': 0.7181820405485787, 'eval_recall': 0.7392263630480371, 'eval_runtime': 1.279, 'eval_samples_per_second': 294.767, 'eval_steps_per_second': 74.278, 'epoch': 10.0}


100%|██████████| 3770/3770 [04:30<00:00, 13.95it/s]


{'train_runtime': 270.1979, 'train_samples_per_second': 55.774, 'train_steps_per_second': 13.953, 'train_loss': 0.12907281769365467, 'epoch': 10.0}


100%|██████████| 95/95 [00:01<00:00, 60.35it/s]


0,1
eval/f1,▁▄▆▇▇▇█████
eval/loss,▁▁▂▃▅▆▆▇██▆
eval/precision,▁███████▇▇█
eval/recall,▁▃▅▇▇▇█████
eval/runtime,▃▃▃▂▂▄▃▄▄▁█
eval/samples_per_second,▆▅▆▆▇▄▆▅▅█▁
eval/steps_per_second,▆▅▆▆▇▄▆▅▅█▁
train/epoch,▁▂▃▃▄▅▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▇███

0,1
eval/f1,0.73819
eval/loss,0.42957
eval/precision,0.74992
eval/recall,0.7373
eval/runtime,1.6002
eval/samples_per_second,235.6
eval/steps_per_second,59.369
total_flos,991297601733120.0
train/epoch,10.0
train/global_step,3770.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14528.07 examples/s]
 10%|▉         | 259/2600 [00:16<02:33, 15.30it/s]
 10%|█         | 260/2600 [00:17<02:32, 15.30it/s]

{'eval_loss': 0.3195844292640686, 'eval_f1': 0.1764172335600907, 'eval_precision': 0.33955035789898175, 'eval_recall': 0.15529219087322263, 'eval_runtime': 1.0095, 'eval_samples_per_second': 257.553, 'eval_steps_per_second': 64.388, 'epoch': 1.0}


 20%|█▉        | 519/2600 [00:35<02:12, 15.75it/s]
 20%|██        | 520/2600 [00:36<02:12, 15.75it/s]

{'eval_loss': 0.24268695712089539, 'eval_f1': 0.40064023242786684, 'eval_precision': 0.6188902381078085, 'eval_recall': 0.33106530960704, 'eval_runtime': 1.4164, 'eval_samples_per_second': 183.569, 'eval_steps_per_second': 45.892, 'epoch': 2.0}


 30%|██▉       | 779/2600 [00:54<01:56, 15.65it/s]
 30%|███       | 780/2600 [00:55<01:56, 15.65it/s]

{'eval_loss': 0.23851080238819122, 'eval_f1': 0.5194019801789226, 'eval_precision': 0.5763460317631388, 'eval_recall': 0.4810559411861382, 'eval_runtime': 0.9719, 'eval_samples_per_second': 267.522, 'eval_steps_per_second': 66.88, 'epoch': 3.0}


 40%|███▉      | 1039/2600 [01:14<01:36, 16.20it/s]
 40%|████      | 1040/2600 [01:15<01:36, 16.20it/s]

{'eval_loss': 0.22474698722362518, 'eval_f1': 0.6137365914830092, 'eval_precision': 0.8791002536543715, 'eval_recall': 0.5560013828244813, 'eval_runtime': 0.9301, 'eval_samples_per_second': 279.53, 'eval_steps_per_second': 69.882, 'epoch': 4.0}


 50%|████▉     | 1299/2600 [01:33<01:26, 15.12it/s]
 50%|█████     | 1300/2600 [01:34<01:25, 15.12it/s]

{'eval_loss': 0.2438943088054657, 'eval_f1': 0.658090470782527, 'eval_precision': 0.8079328108054815, 'eval_recall': 0.5952547375541083, 'eval_runtime': 1.0058, 'eval_samples_per_second': 258.512, 'eval_steps_per_second': 64.628, 'epoch': 5.0}


 60%|█████▉    | 1559/2600 [01:51<01:01, 16.81it/s]
 60%|██████    | 1560/2600 [01:52<01:01, 16.81it/s]

{'eval_loss': 0.2535620629787445, 'eval_f1': 0.6636417405532092, 'eval_precision': 0.7237241949807185, 'eval_recall': 0.6212421955909969, 'eval_runtime': 0.9138, 'eval_samples_per_second': 284.515, 'eval_steps_per_second': 71.129, 'epoch': 6.0}


 70%|██████▉   | 1819/2600 [02:11<00:53, 14.68it/s]
 70%|███████   | 1820/2600 [02:11<00:53, 14.68it/s]

{'eval_loss': 0.2350355088710785, 'eval_f1': 0.7088316432133958, 'eval_precision': 0.7838068041709414, 'eval_recall': 0.6621266752906523, 'eval_runtime': 0.8824, 'eval_samples_per_second': 294.667, 'eval_steps_per_second': 73.667, 'epoch': 7.0}


 80%|███████▉  | 2079/2600 [02:30<00:31, 16.56it/s]
 80%|████████  | 2080/2600 [02:31<00:31, 16.56it/s]

{'eval_loss': 0.2275777906179428, 'eval_f1': 0.7145105578539102, 'eval_precision': 0.8388773091654244, 'eval_recall': 0.6545424939459003, 'eval_runtime': 0.9885, 'eval_samples_per_second': 263.033, 'eval_steps_per_second': 65.758, 'epoch': 8.0}


 90%|████████▉ | 2339/2600 [02:49<00:17, 14.67it/s]
 90%|█████████ | 2340/2600 [02:50<00:17, 14.67it/s]

{'eval_loss': 0.227298766374588, 'eval_f1': 0.7201237526500962, 'eval_precision': 0.8763537880559157, 'eval_recall': 0.6586713058489614, 'eval_runtime': 1.0926, 'eval_samples_per_second': 237.955, 'eval_steps_per_second': 59.489, 'epoch': 9.0}


100%|█████████▉| 2599/2600 [03:08<00:00, 16.47it/s]
100%|██████████| 2600/2600 [03:11<00:00, 16.47it/s]

{'eval_loss': 0.22397780418395996, 'eval_f1': 0.7179583463427918, 'eval_precision': 0.8184302719028587, 'eval_recall': 0.6673968166979185, 'eval_runtime': 0.9187, 'eval_samples_per_second': 283.013, 'eval_steps_per_second': 70.753, 'epoch': 10.0}


100%|██████████| 2600/2600 [03:13<00:00, 13.47it/s]


{'train_runtime': 193.0068, 'train_samples_per_second': 53.78, 'train_steps_per_second': 13.471, 'train_loss': 0.11523768498347356, 'epoch': 10.0}


100%|██████████| 65/65 [00:01<00:00, 55.13it/s]


------------------ Starting model ==> epochs: 10, batch size: 8, weights of decay: 0.01 ---------------------


0,1
eval/f1,▁▄▅▇▇▇█████
eval/loss,█▂▂▁▂▃▂▁▁▁▁
eval/precision,▁▅▄█▇▆▇▇█▇█
eval/recall,▁▃▅▆▇▇█████
eval/runtime,▃█▂▂▃▁▁▂▄▁▅
eval/samples_per_second,▆▁▆▇▆▇█▆▄▇▃
eval/steps_per_second,▆▁▆▇▆▇█▆▄▇▃
train/epoch,▁▂▃▃▄▅▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▇███

0,1
eval/f1,0.72012
eval/loss,0.2273
eval/precision,0.87635
eval/recall,0.65867
eval/runtime,1.2051
eval/samples_per_second,215.755
eval/steps_per_second,53.939
total_flos,682803840384000.0
train/epoch,10.0
train/global_step,2600.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 21248.92 examples/s]
 10%|▉         | 761/7620 [00:57<08:27, 13.51it/s]
 10%|█         | 762/7620 [01:01<08:27, 13.51it/s]

{'eval_loss': 0.10730195045471191, 'eval_f1': 0.6868425667220347, 'eval_precision': 0.8119428723820251, 'eval_recall': 0.6593352330521339, 'eval_runtime': 3.8378, 'eval_samples_per_second': 396.843, 'eval_steps_per_second': 49.768, 'epoch': 1.0}


 20%|█▉        | 1523/7620 [02:00<07:44, 13.13it/s] 
 20%|██        | 1524/7620 [02:04<07:44, 13.13it/s]

{'eval_loss': 0.09038858860731125, 'eval_f1': 0.7733446993922757, 'eval_precision': 0.8931295379987718, 'eval_recall': 0.7352476463816194, 'eval_runtime': 3.9162, 'eval_samples_per_second': 388.893, 'eval_steps_per_second': 48.771, 'epoch': 2.0}


 30%|███       | 2286/7620 [03:04<06:29, 13.69it/s]  
 30%|███       | 2286/7620 [03:08<06:29, 13.69it/s]

{'eval_loss': 0.09058745205402374, 'eval_f1': 0.8457057927418955, 'eval_precision': 0.9007435604047369, 'eval_recall': 0.8144451235734431, 'eval_runtime': 3.9195, 'eval_samples_per_second': 388.575, 'eval_steps_per_second': 48.731, 'epoch': 3.0}


 40%|████      | 3048/7620 [04:08<05:31, 13.81it/s]  
 40%|████      | 3048/7620 [04:12<05:31, 13.81it/s]

{'eval_loss': 0.08665385842323303, 'eval_f1': 0.8655991680601308, 'eval_precision': 0.8853211945897014, 'eval_recall': 0.8491932472991257, 'eval_runtime': 3.7859, 'eval_samples_per_second': 402.284, 'eval_steps_per_second': 50.451, 'epoch': 4.0}


 50%|█████     | 3810/7620 [05:11<04:35, 13.84it/s]  
 50%|█████     | 3810/7620 [05:15<04:35, 13.84it/s]

{'eval_loss': 0.09418103098869324, 'eval_f1': 0.8543205185439282, 'eval_precision': 0.8626686014688312, 'eval_recall': 0.8483839170318797, 'eval_runtime': 3.9342, 'eval_samples_per_second': 387.118, 'eval_steps_per_second': 48.549, 'epoch': 5.0}


 60%|██████    | 4572/7620 [06:14<03:40, 13.83it/s]
 60%|██████    | 4572/7620 [06:18<03:40, 13.83it/s]

{'eval_loss': 0.09725707769393921, 'eval_f1': 0.8644235981713406, 'eval_precision': 0.8896418075141378, 'eval_recall': 0.8449264530682912, 'eval_runtime': 3.7849, 'eval_samples_per_second': 402.394, 'eval_steps_per_second': 50.464, 'epoch': 6.0}


 70%|███████   | 5334/7620 [07:17<02:52, 13.22it/s]
 70%|███████   | 5334/7620 [07:21<02:52, 13.22it/s]

{'eval_loss': 0.10439416021108627, 'eval_f1': 0.872130942842805, 'eval_precision': 0.8940950037453248, 'eval_recall': 0.8532737487163194, 'eval_runtime': 3.9011, 'eval_samples_per_second': 390.403, 'eval_steps_per_second': 48.961, 'epoch': 7.0}


 80%|████████  | 6096/7620 [08:21<01:57, 13.00it/s]
 80%|████████  | 6096/7620 [08:25<01:57, 13.00it/s]

{'eval_loss': 0.11089707165956497, 'eval_f1': 0.8799919500100015, 'eval_precision': 0.8855089206119929, 'eval_recall': 0.8771500760885989, 'eval_runtime': 3.8107, 'eval_samples_per_second': 399.661, 'eval_steps_per_second': 50.122, 'epoch': 8.0}


 90%|█████████ | 6858/7620 [09:24<00:57, 13.28it/s]
 90%|█████████ | 6858/7620 [09:28<00:57, 13.28it/s]

{'eval_loss': 0.10455796122550964, 'eval_f1': 0.8812680567594018, 'eval_precision': 0.8902860123536136, 'eval_recall': 0.8729559723367774, 'eval_runtime': 3.8633, 'eval_samples_per_second': 394.225, 'eval_steps_per_second': 49.44, 'epoch': 9.0}


100%|██████████| 7620/7620 [10:33<00:00, 13.29it/s]

{'eval_loss': 0.10687035322189331, 'eval_f1': 0.8788193625738886, 'eval_precision': 0.892513518758747, 'eval_recall': 0.8669491251534597, 'eval_runtime': 3.7607, 'eval_samples_per_second': 404.978, 'eval_steps_per_second': 50.788, 'epoch': 10.0}


100%|██████████| 7620/7620 [10:35<00:00, 11.99it/s]


{'train_runtime': 635.3544, 'train_samples_per_second': 95.868, 'train_steps_per_second': 11.993, 'train_loss': 0.03948382280004306, 'epoch': 10.0}


100%|██████████| 191/191 [00:03<00:00, 49.49it/s]


0,1
eval/f1,▁▄▇▇▇▇█████
eval/loss,▇▂▂▁▃▄▆█▆▇▆
eval/precision,▁▇█▇▅▇▇▇▇▇▇
eval/recall,▁▃▆▇▇▇▇████
eval/runtime,▄▇▇▂█▂▇▃▅▁▇
eval/samples_per_second,▅▂▂▇▁▇▂▆▄█▂
eval/steps_per_second,▅▂▂▇▁▇▂▆▄█▂
train/epoch,▁▂▃▃▄▅▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▇███

0,1
eval/f1,0.88127
eval/loss,0.10456
eval/precision,0.89029
eval/recall,0.87296
eval/runtime,3.8993
eval/samples_per_second,390.583
eval/steps_per_second,48.983
total_flos,4006703460288000.0
train/epoch,10.0
train/global_step,7620.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16354.84 examples/s]
 10%|█         | 189/1890 [00:14<02:06, 13.49it/s]
 10%|█         | 189/1890 [00:15<02:06, 13.49it/s]

{'eval_loss': 0.312615305185318, 'eval_f1': 0.46191049322417477, 'eval_precision': 0.526274159216688, 'eval_recall': 0.4141019472432922, 'eval_runtime': 0.9007, 'eval_samples_per_second': 418.56, 'eval_steps_per_second': 53.291, 'epoch': 1.0}


 20%|█▉        | 377/1890 [00:30<01:55, 13.12it/s]
 20%|██        | 378/1890 [00:31<01:55, 13.12it/s]

{'eval_loss': 0.2904539108276367, 'eval_f1': 0.5677958010623834, 'eval_precision': 0.6494822199077518, 'eval_recall': 0.5126758679983002, 'eval_runtime': 0.9668, 'eval_samples_per_second': 389.942, 'eval_steps_per_second': 49.648, 'epoch': 2.0}


 30%|███       | 567/1890 [00:47<01:33, 14.10it/s]
 30%|███       | 567/1890 [00:48<01:33, 14.10it/s]

{'eval_loss': 0.277765154838562, 'eval_f1': 0.6295376825652289, 'eval_precision': 0.7686432748538012, 'eval_recall': 0.5778593842402115, 'eval_runtime': 0.9289, 'eval_samples_per_second': 405.847, 'eval_steps_per_second': 51.673, 'epoch': 3.0}


 40%|███▉      | 755/1890 [01:04<01:24, 13.39it/s]
 40%|████      | 756/1890 [01:05<01:24, 13.39it/s]

{'eval_loss': 0.3016514778137207, 'eval_f1': 0.745235052289043, 'eval_precision': 0.7898566255089566, 'eval_recall': 0.7195152442175832, 'eval_runtime': 0.9257, 'eval_samples_per_second': 407.274, 'eval_steps_per_second': 51.854, 'epoch': 4.0}


 50%|█████     | 945/1890 [01:20<01:10, 13.44it/s]
 50%|█████     | 945/1890 [01:21<01:10, 13.44it/s]

{'eval_loss': 0.3029501140117645, 'eval_f1': 0.7453474281844319, 'eval_precision': 0.7847641897208577, 'eval_recall': 0.7163414364165622, 'eval_runtime': 0.9716, 'eval_samples_per_second': 388.035, 'eval_steps_per_second': 49.405, 'epoch': 5.0}


 60%|█████▉    | 1133/1890 [01:37<00:55, 13.60it/s]
 60%|██████    | 1134/1890 [01:38<00:55, 13.60it/s]

{'eval_loss': 0.34749913215637207, 'eval_f1': 0.7527532116524789, 'eval_precision': 0.7828084404493504, 'eval_recall': 0.735362437209045, 'eval_runtime': 0.9213, 'eval_samples_per_second': 409.192, 'eval_steps_per_second': 52.099, 'epoch': 6.0}


 70%|███████   | 1323/1890 [01:54<00:41, 13.55it/s]
 70%|███████   | 1323/1890 [01:55<00:41, 13.55it/s]

{'eval_loss': 0.34307220578193665, 'eval_f1': 0.762050132354768, 'eval_precision': 0.7646012484247778, 'eval_recall': 0.763167697175494, 'eval_runtime': 0.9523, 'eval_samples_per_second': 395.904, 'eval_steps_per_second': 50.407, 'epoch': 7.0}


 80%|███████▉  | 1511/1890 [02:11<00:29, 12.82it/s]
 80%|████████  | 1512/1890 [02:12<00:29, 12.82it/s]

{'eval_loss': 0.3607766628265381, 'eval_f1': 0.758710937268931, 'eval_precision': 0.7701133274438703, 'eval_recall': 0.7587162658663552, 'eval_runtime': 0.9961, 'eval_samples_per_second': 378.478, 'eval_steps_per_second': 48.188, 'epoch': 8.0}


 90%|█████████ | 1701/1890 [02:27<00:13, 13.79it/s]
 90%|█████████ | 1701/1890 [02:28<00:13, 13.79it/s]

{'eval_loss': 0.3583037257194519, 'eval_f1': 0.771691851949985, 'eval_precision': 0.7719946332060484, 'eval_recall': 0.775369281484664, 'eval_runtime': 0.9269, 'eval_samples_per_second': 406.725, 'eval_steps_per_second': 51.785, 'epoch': 9.0}


100%|█████████▉| 1889/1890 [02:45<00:00, 13.12it/s]
100%|██████████| 1890/1890 [02:47<00:00, 13.12it/s]

{'eval_loss': 0.35964053869247437, 'eval_f1': 0.7726648749179711, 'eval_precision': 0.7723518152802764, 'eval_recall': 0.7757765158839933, 'eval_runtime': 0.9799, 'eval_samples_per_second': 384.718, 'eval_steps_per_second': 48.983, 'epoch': 10.0}


100%|██████████| 1890/1890 [02:49<00:00, 11.15it/s]


{'train_runtime': 169.5869, 'train_samples_per_second': 88.863, 'train_steps_per_second': 11.145, 'train_loss': 0.13288272272342097, 'epoch': 10.0}


100%|██████████| 48/48 [00:01<00:00, 47.84it/s]


0,1
eval/f1,▁▃▅▇▇██████
eval/loss,▄▂▁▃▃▇▇████
eval/precision,▁▄▇███▇▇███
eval/recall,▁▃▄▇▇▇█████
eval/runtime,▁▄▂▂▄▂▄▆▂▅█
eval/samples_per_second,█▅▆▇▄▇▅▃▇▄▁
eval/steps_per_second,█▅▆▇▄▇▅▃▇▄▁
train/epoch,▁▂▃▃▄▅▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▇███

0,1
eval/f1,0.77266
eval/loss,0.35964
eval/precision,0.77235
eval/recall,0.77578
eval/runtime,1.0445
eval/samples_per_second,360.955
eval/steps_per_second,45.957
total_flos,991297601733120.0
train/epoch,10.0
train/global_step,1890.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14831.89 examples/s]
 10%|▉         | 129/1300 [00:09<01:25, 13.75it/s]
 10%|█         | 130/1300 [00:10<01:25, 13.75it/s]

{'eval_loss': 0.3005034923553467, 'eval_f1': 0.11739745403111738, 'eval_precision': 0.11739745403111738, 'eval_recall': 0.11739745403111738, 'eval_runtime': 0.6358, 'eval_samples_per_second': 408.907, 'eval_steps_per_second': 51.9, 'epoch': 1.0}


 20%|█▉        | 259/1300 [00:22<01:16, 13.62it/s]
 20%|██        | 260/1300 [00:22<01:16, 13.62it/s]

{'eval_loss': 0.244866281747818, 'eval_f1': 0.4109391026666461, 'eval_precision': 0.5945163018185331, 'eval_recall': 0.35413087927619463, 'eval_runtime': 0.6439, 'eval_samples_per_second': 403.81, 'eval_steps_per_second': 51.253, 'epoch': 2.0}


 30%|██▉       | 389/1300 [00:34<01:07, 13.56it/s]
 30%|███       | 390/1300 [00:35<01:07, 13.56it/s]

{'eval_loss': 0.22831684350967407, 'eval_f1': 0.5100722417251065, 'eval_precision': 0.5621214927152184, 'eval_recall': 0.46802483351838625, 'eval_runtime': 0.6327, 'eval_samples_per_second': 410.934, 'eval_steps_per_second': 52.157, 'epoch': 3.0}


 40%|████      | 520/1300 [00:46<00:56, 13.77it/s]
 40%|████      | 520/1300 [00:47<00:56, 13.77it/s]

{'eval_loss': 0.2353920340538025, 'eval_f1': 0.5169209419551416, 'eval_precision': 0.5603513787500893, 'eval_recall': 0.48156988969459374, 'eval_runtime': 0.644, 'eval_samples_per_second': 403.738, 'eval_steps_per_second': 51.244, 'epoch': 4.0}


 50%|█████     | 650/1300 [01:01<00:46, 13.83it/s]
 50%|█████     | 650/1300 [01:01<00:46, 13.83it/s]

{'eval_loss': 0.22967183589935303, 'eval_f1': 0.5561724099444618, 'eval_precision': 0.660245604089589, 'eval_recall': 0.5013086479477759, 'eval_runtime': 0.6421, 'eval_samples_per_second': 404.924, 'eval_steps_per_second': 51.394, 'epoch': 5.0}


 60%|██████    | 780/1300 [01:13<00:37, 13.74it/s]
 60%|██████    | 780/1300 [01:14<00:37, 13.74it/s]

{'eval_loss': 0.24201880395412445, 'eval_f1': 0.6270748799846569, 'eval_precision': 0.7101855812344349, 'eval_recall': 0.5800128303846679, 'eval_runtime': 0.6497, 'eval_samples_per_second': 400.184, 'eval_steps_per_second': 50.793, 'epoch': 6.0}


 70%|███████   | 910/1300 [01:25<00:28, 13.70it/s]
 70%|███████   | 910/1300 [01:26<00:28, 13.70it/s]

{'eval_loss': 0.23701071739196777, 'eval_f1': 0.6013454360962042, 'eval_precision': 0.8139516587696874, 'eval_recall': 0.5501961930315572, 'eval_runtime': 0.6464, 'eval_samples_per_second': 402.247, 'eval_steps_per_second': 51.054, 'epoch': 7.0}


 80%|████████  | 1040/1300 [01:37<00:19, 13.00it/s]
 80%|████████  | 1040/1300 [01:38<00:19, 13.00it/s]

{'eval_loss': 0.2279946506023407, 'eval_f1': 0.6567017098803474, 'eval_precision': 0.8208311896187047, 'eval_recall': 0.5950782279361467, 'eval_runtime': 0.6727, 'eval_samples_per_second': 386.52, 'eval_steps_per_second': 49.058, 'epoch': 8.0}


 90%|█████████ | 1170/1300 [01:49<00:09, 13.28it/s]
 90%|█████████ | 1170/1300 [01:50<00:09, 13.28it/s]

{'eval_loss': 0.23628823459148407, 'eval_f1': 0.6734486386984971, 'eval_precision': 0.7766015515559034, 'eval_recall': 0.6179740459819687, 'eval_runtime': 0.6753, 'eval_samples_per_second': 385.007, 'eval_steps_per_second': 48.866, 'epoch': 9.0}


100%|██████████| 1300/1300 [02:01<00:00, 13.02it/s]
100%|██████████| 1300/1300 [02:04<00:00, 13.02it/s]

{'eval_loss': 0.2371763437986374, 'eval_f1': 0.6443428227681061, 'eval_precision': 0.8172440293949456, 'eval_recall': 0.5836458988815443, 'eval_runtime': 0.6498, 'eval_samples_per_second': 400.127, 'eval_steps_per_second': 50.785, 'epoch': 10.0}


100%|██████████| 1300/1300 [02:05<00:00, 10.32it/s]


{'train_runtime': 126.0034, 'train_samples_per_second': 82.379, 'train_steps_per_second': 10.317, 'train_loss': 0.14041971059945912, 'epoch': 10.0}


100%|██████████| 33/33 [00:00<00:00, 46.44it/s]


------------------ Starting model ==> epochs: 10, batch size: 8, weights of decay: 0.001 ---------------------


0,1
eval/f1,▁▅▆▆▇▇▇████
eval/loss,█▃▁▂▁▂▂▁▂▂▂
eval/precision,▁▆▅▅▆▇█████
eval/recall,▁▄▆▆▆▇▇████
eval/runtime,▁▂▁▂▂▂▂▃▃▂█
eval/samples_per_second,█▇█▇▇▇▇▅▅▇▁
eval/steps_per_second,█▇█▇▇▇▇▅▅▇▁
train/epoch,▁▂▃▃▄▅▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▇███

0,1
eval/f1,0.67345
eval/loss,0.23629
eval/precision,0.7766
eval/recall,0.61797
eval/runtime,0.7573
eval/samples_per_second,343.303
eval/steps_per_second,43.573
total_flos,682803840384000.0
train/epoch,10.0
train/global_step,1300.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 13060.62 examples/s]
 10%|█         | 762/7620 [00:57<08:46, 13.03it/s]
 10%|█         | 762/7620 [01:01<08:46, 13.03it/s]

{'eval_loss': 0.11105383932590485, 'eval_f1': 0.6801102299596302, 'eval_precision': 0.7952863700164946, 'eval_recall': 0.6469384903513704, 'eval_runtime': 3.7328, 'eval_samples_per_second': 407.999, 'eval_steps_per_second': 51.167, 'epoch': 1.0}


 20%|██        | 1524/7620 [02:00<07:27, 13.63it/s] 
 20%|██        | 1524/7620 [02:04<07:27, 13.63it/s]

{'eval_loss': 0.09524760395288467, 'eval_f1': 0.7687751146654028, 'eval_precision': 0.9004946462509456, 'eval_recall': 0.7289569693642537, 'eval_runtime': 3.8629, 'eval_samples_per_second': 394.259, 'eval_steps_per_second': 49.444, 'epoch': 2.0}


 30%|███       | 2286/7620 [03:04<06:27, 13.78it/s]  
 30%|███       | 2286/7620 [03:08<06:27, 13.78it/s]

{'eval_loss': 0.08561817556619644, 'eval_f1': 0.8411746473692174, 'eval_precision': 0.8925318446375808, 'eval_recall': 0.8052213877627797, 'eval_runtime': 3.8277, 'eval_samples_per_second': 397.892, 'eval_steps_per_second': 49.9, 'epoch': 3.0}


 40%|████      | 3048/7620 [04:08<05:30, 13.85it/s]  
 40%|████      | 3048/7620 [04:12<05:30, 13.85it/s]

{'eval_loss': 0.08797266334295273, 'eval_f1': 0.8682909367878346, 'eval_precision': 0.9009531698947004, 'eval_recall': 0.841456774022203, 'eval_runtime': 3.7765, 'eval_samples_per_second': 403.286, 'eval_steps_per_second': 50.576, 'epoch': 4.0}


 50%|█████     | 3810/7620 [05:11<04:36, 13.76it/s]  
 50%|█████     | 3810/7620 [05:15<04:36, 13.76it/s]

{'eval_loss': 0.09828367084264755, 'eval_f1': 0.8542184394157897, 'eval_precision': 0.8660212062894936, 'eval_recall': 0.8435361107149274, 'eval_runtime': 3.7859, 'eval_samples_per_second': 402.279, 'eval_steps_per_second': 50.45, 'epoch': 5.0}


 60%|██████    | 4572/7620 [06:15<03:40, 13.83it/s]
 60%|██████    | 4572/7620 [06:19<03:40, 13.83it/s]

{'eval_loss': 0.09567072242498398, 'eval_f1': 0.8740839090955871, 'eval_precision': 0.9053950975346883, 'eval_recall': 0.8504724071306768, 'eval_runtime': 3.8744, 'eval_samples_per_second': 393.091, 'eval_steps_per_second': 49.298, 'epoch': 6.0}


 70%|███████   | 5334/7620 [07:18<02:45, 13.82it/s]
 70%|███████   | 5334/7620 [07:22<02:45, 13.82it/s]

{'eval_loss': 0.1145443543791771, 'eval_f1': 0.8593774699232749, 'eval_precision': 0.8860595059463059, 'eval_recall': 0.8373280489639162, 'eval_runtime': 3.7752, 'eval_samples_per_second': 403.42, 'eval_steps_per_second': 50.593, 'epoch': 7.0}


 80%|████████  | 6096/7620 [08:21<01:50, 13.83it/s]
 80%|████████  | 6096/7620 [08:25<01:50, 13.83it/s]

{'eval_loss': 0.11234863102436066, 'eval_f1': 0.8672071539820111, 'eval_precision': 0.8929413347056973, 'eval_recall': 0.8464279760185766, 'eval_runtime': 3.8306, 'eval_samples_per_second': 397.584, 'eval_steps_per_second': 49.861, 'epoch': 8.0}


 90%|█████████ | 6858/7620 [09:25<00:55, 13.61it/s]
 90%|█████████ | 6858/7620 [09:28<00:55, 13.61it/s]

{'eval_loss': 0.10545245558023453, 'eval_f1': 0.8759251076689133, 'eval_precision': 0.8805203749888694, 'eval_recall': 0.8718321797971049, 'eval_runtime': 3.7623, 'eval_samples_per_second': 404.807, 'eval_steps_per_second': 50.767, 'epoch': 9.0}


100%|██████████| 7620/7620 [10:28<00:00, 13.61it/s]
100%|██████████| 7620/7620 [10:33<00:00, 13.61it/s]

{'eval_loss': 0.10683093219995499, 'eval_f1': 0.8767934473752824, 'eval_precision': 0.891426069952049, 'eval_recall': 0.8645939273999195, 'eval_runtime': 3.9154, 'eval_samples_per_second': 388.98, 'eval_steps_per_second': 48.782, 'epoch': 10.0}


100%|██████████| 7620/7620 [10:35<00:00, 11.99it/s]


{'train_runtime': 635.3209, 'train_samples_per_second': 95.873, 'train_steps_per_second': 11.994, 'train_loss': 0.03921744279035433, 'epoch': 10.0}


100%|██████████| 191/191 [00:03<00:00, 50.16it/s]


0,1
eval/f1,▁▄▇█▇█▇████
eval/loss,▇▃▁▂▄▃█▇▆▆▆
eval/precision,▁█▇█▅█▇▇▆▇▇
eval/recall,▁▄▆▇▇▇▇▇███
eval/runtime,▁▆▅▃▃▆▃▅▂█▆
eval/samples_per_second,█▃▄▆▆▃▆▄▇▁▃
eval/steps_per_second,█▃▄▆▆▃▆▄▇▁▃
train/epoch,▁▂▃▃▄▅▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▇███

0,1
eval/f1,0.87679
eval/loss,0.10683
eval/precision,0.89143
eval/recall,0.86459
eval/runtime,3.8531
eval/samples_per_second,395.27
eval/steps_per_second,49.571
total_flos,4006703460288000.0
train/epoch,10.0
train/global_step,7620.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 15799.78 examples/s]
 10%|▉         | 188/1890 [00:14<02:13, 12.76it/s]
 10%|█         | 189/1890 [00:15<02:13, 12.76it/s]

{'eval_loss': 0.31312188506126404, 'eval_f1': 0.46191049322417477, 'eval_precision': 0.526274159216688, 'eval_recall': 0.4141019472432922, 'eval_runtime': 0.9746, 'eval_samples_per_second': 386.806, 'eval_steps_per_second': 49.249, 'epoch': 1.0}


 20%|██        | 378/1890 [00:30<01:45, 14.30it/s]
 20%|██        | 378/1890 [00:31<01:45, 14.30it/s]

{'eval_loss': 0.28135204315185547, 'eval_f1': 0.5783004141367882, 'eval_precision': 0.654169803335543, 'eval_recall': 0.5207034971904483, 'eval_runtime': 0.922, 'eval_samples_per_second': 408.904, 'eval_steps_per_second': 52.062, 'epoch': 2.0}


 30%|██▉       | 566/1890 [00:47<01:36, 13.68it/s]
 30%|███       | 567/1890 [00:48<01:36, 13.68it/s]

{'eval_loss': 0.29226839542388916, 'eval_f1': 0.6134903035968717, 'eval_precision': 0.7514043655532022, 'eval_recall': 0.555574679485151, 'eval_runtime': 0.9256, 'eval_samples_per_second': 407.301, 'eval_steps_per_second': 51.858, 'epoch': 3.0}


 40%|████      | 756/1890 [01:04<01:25, 13.28it/s]
 40%|████      | 756/1890 [01:05<01:25, 13.28it/s]

{'eval_loss': 0.29324084520339966, 'eval_f1': 0.7252667966948392, 'eval_precision': 0.7760501164733429, 'eval_recall': 0.6907107252846059, 'eval_runtime': 0.9852, 'eval_samples_per_second': 382.672, 'eval_steps_per_second': 48.722, 'epoch': 4.0}


 50%|████▉     | 944/1890 [01:21<01:10, 13.50it/s]
 50%|█████     | 945/1890 [01:22<01:09, 13.50it/s]

{'eval_loss': 0.2976195514202118, 'eval_f1': 0.737514359176462, 'eval_precision': 0.779344876520776, 'eval_recall': 0.7015916269194735, 'eval_runtime': 0.9256, 'eval_samples_per_second': 407.301, 'eval_steps_per_second': 51.858, 'epoch': 5.0}


 60%|██████    | 1134/1890 [01:37<00:55, 13.70it/s]
 60%|██████    | 1134/1890 [01:38<00:55, 13.70it/s]

{'eval_loss': 0.3598151206970215, 'eval_f1': 0.7251448261142246, 'eval_precision': 0.7534780828664047, 'eval_recall': 0.7103843922154335, 'eval_runtime': 0.9511, 'eval_samples_per_second': 396.377, 'eval_steps_per_second': 50.467, 'epoch': 6.0}


 70%|██████▉   | 1322/1890 [01:54<00:43, 12.91it/s]
 70%|███████   | 1323/1890 [01:55<00:43, 12.91it/s]

{'eval_loss': 0.36743471026420593, 'eval_f1': 0.7374415716257653, 'eval_precision': 0.7645662666150471, 'eval_recall': 0.7195430760424804, 'eval_runtime': 0.9755, 'eval_samples_per_second': 386.458, 'eval_steps_per_second': 49.204, 'epoch': 7.0}


 80%|████████  | 1512/1890 [02:11<00:27, 13.96it/s]
 80%|████████  | 1512/1890 [02:12<00:27, 13.96it/s]

{'eval_loss': 0.35510867834091187, 'eval_f1': 0.7514021578127538, 'eval_precision': 0.7729527361880303, 'eval_recall': 0.7350332117302445, 'eval_runtime': 0.935, 'eval_samples_per_second': 403.223, 'eval_steps_per_second': 51.339, 'epoch': 8.0}


 90%|████████▉ | 1700/1890 [02:27<00:14, 13.38it/s]
 90%|█████████ | 1701/1890 [02:28<00:14, 13.38it/s]

{'eval_loss': 0.3700663447380066, 'eval_f1': 0.751876252688566, 'eval_precision': 0.7652879500958838, 'eval_recall': 0.7430039801103475, 'eval_runtime': 0.9585, 'eval_samples_per_second': 393.311, 'eval_steps_per_second': 50.077, 'epoch': 9.0}


100%|██████████| 1890/1890 [02:44<00:00, 13.33it/s]
100%|██████████| 1890/1890 [02:46<00:00, 13.33it/s]

{'eval_loss': 0.3670259714126587, 'eval_f1': 0.7689452796449701, 'eval_precision': 0.7874533799533799, 'eval_recall': 0.7539493937307842, 'eval_runtime': 1.0102, 'eval_samples_per_second': 373.199, 'eval_steps_per_second': 47.516, 'epoch': 10.0}


100%|██████████| 1890/1890 [02:48<00:00, 11.20it/s]


{'train_runtime': 168.7837, 'train_samples_per_second': 89.286, 'train_steps_per_second': 11.198, 'train_loss': 0.13265076490937086, 'epoch': 10.0}


100%|██████████| 48/48 [00:01<00:00, 47.03it/s]


0,1
eval/f1,▁▄▄▇▇▇▇████
eval/loss,▄▁▂▂▂▇█▇███
eval/precision,▁▄▇██▇▇█▇██
eval/recall,▁▃▄▇▇▇▇████
eval/runtime,▄▁▁▄▁▂▄▂▃▅█
eval/samples_per_second,▅██▅█▆▅▇▆▃▁
eval/steps_per_second,▅██▅█▆▅▇▆▃▁
train/epoch,▁▂▃▃▄▅▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▇███

0,1
eval/f1,0.76895
eval/loss,0.36703
eval/precision,0.78745
eval/recall,0.75395
eval/runtime,1.0668
eval/samples_per_second,353.399
eval/steps_per_second,44.995
total_flos,991297601733120.0
train/epoch,10.0
train/global_step,1890.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 10707.50 examples/s]
 10%|▉         | 129/1300 [00:09<01:29, 13.08it/s]
 10%|█         | 130/1300 [00:10<01:29, 13.08it/s]

{'eval_loss': 0.30279475450515747, 'eval_f1': 0.126413638041545, 'eval_precision': 0.2650957290132548, 'eval_recall': 0.12079881457533508, 'eval_runtime': 0.6493, 'eval_samples_per_second': 400.425, 'eval_steps_per_second': 50.823, 'epoch': 1.0}


 20%|█▉        | 259/1300 [00:21<01:18, 13.32it/s]
 20%|██        | 260/1300 [00:22<01:18, 13.32it/s]

{'eval_loss': 0.24871374666690826, 'eval_f1': 0.3820968685981855, 'eval_precision': 0.6146068586585828, 'eval_recall': 0.33222804370159803, 'eval_runtime': 0.6628, 'eval_samples_per_second': 392.293, 'eval_steps_per_second': 49.791, 'epoch': 2.0}


 30%|██▉       | 389/1300 [00:33<01:08, 13.36it/s]
 30%|███       | 390/1300 [00:34<01:08, 13.36it/s]

{'eval_loss': 0.2293790876865387, 'eval_f1': 0.5290039552958203, 'eval_precision': 0.5711152081583976, 'eval_recall': 0.49982093878652206, 'eval_runtime': 0.6642, 'eval_samples_per_second': 391.465, 'eval_steps_per_second': 49.686, 'epoch': 3.0}


 40%|████      | 520/1300 [00:45<00:58, 13.29it/s]
 40%|████      | 520/1300 [00:46<00:58, 13.29it/s]

{'eval_loss': 0.22556228935718536, 'eval_f1': 0.5339049115470752, 'eval_precision': 0.5857786780089211, 'eval_recall': 0.5021265493324935, 'eval_runtime': 0.6516, 'eval_samples_per_second': 399.011, 'eval_steps_per_second': 50.644, 'epoch': 4.0}


 50%|█████     | 650/1300 [00:57<00:48, 13.50it/s]
 50%|█████     | 650/1300 [00:58<00:48, 13.50it/s]

{'eval_loss': 0.21795769035816193, 'eval_f1': 0.5617556171937252, 'eval_precision': 0.7385683939534624, 'eval_recall': 0.5111030457663314, 'eval_runtime': 0.652, 'eval_samples_per_second': 398.787, 'eval_steps_per_second': 50.615, 'epoch': 5.0}


 60%|██████    | 780/1300 [01:09<00:37, 13.75it/s]
 60%|██████    | 780/1300 [01:10<00:37, 13.75it/s]

{'eval_loss': 0.2214464545249939, 'eval_f1': 0.6404124797781894, 'eval_precision': 0.7010681341560846, 'eval_recall': 0.598367024878094, 'eval_runtime': 0.6651, 'eval_samples_per_second': 390.946, 'eval_steps_per_second': 49.62, 'epoch': 6.0}


 70%|███████   | 910/1300 [01:21<00:28, 13.67it/s]
 70%|███████   | 910/1300 [01:22<00:28, 13.67it/s]

{'eval_loss': 0.21112819015979767, 'eval_f1': 0.6294995795741704, 'eval_precision': 0.8775155402956283, 'eval_recall': 0.5730882768328024, 'eval_runtime': 0.6527, 'eval_samples_per_second': 398.345, 'eval_steps_per_second': 50.559, 'epoch': 7.0}


 80%|████████  | 1040/1300 [01:33<00:18, 13.70it/s]
 80%|████████  | 1040/1300 [01:34<00:18, 13.70it/s]

{'eval_loss': 0.21689000725746155, 'eval_f1': 0.6179562018275169, 'eval_precision': 0.7075262194849824, 'eval_recall': 0.5698251255753818, 'eval_runtime': 0.6421, 'eval_samples_per_second': 404.944, 'eval_steps_per_second': 51.397, 'epoch': 8.0}


 90%|█████████ | 1170/1300 [01:45<00:09, 13.56it/s]
 90%|█████████ | 1170/1300 [01:46<00:09, 13.56it/s]

{'eval_loss': 0.2249705046415329, 'eval_f1': 0.684527205319459, 'eval_precision': 0.7867708505999988, 'eval_recall': 0.6299683686630884, 'eval_runtime': 0.6489, 'eval_samples_per_second': 400.681, 'eval_steps_per_second': 50.856, 'epoch': 9.0}


100%|██████████| 1300/1300 [01:57<00:00, 13.63it/s]
100%|██████████| 1300/1300 [01:59<00:00, 13.63it/s]

{'eval_loss': 0.22351716458797455, 'eval_f1': 0.6553213206183989, 'eval_precision': 0.8299229657392618, 'eval_recall': 0.5954013249440673, 'eval_runtime': 0.6643, 'eval_samples_per_second': 391.412, 'eval_steps_per_second': 49.679, 'epoch': 10.0}


100%|██████████| 1300/1300 [02:02<00:00, 10.63it/s]


{'train_runtime': 122.2481, 'train_samples_per_second': 84.909, 'train_steps_per_second': 10.634, 'train_loss': 0.14074048555814303, 'epoch': 10.0}


100%|██████████| 33/33 [00:00<00:00, 44.17it/s]


------------------ Starting model ==> epochs: 15, batch size: 4, weights of decay: 0.01 ---------------------


0,1
eval/f1,▁▄▆▆▆▇▇▇███
eval/loss,█▄▂▂▂▂▁▁▂▂▂
eval/precision,▁▅▄▅▆▆█▆▇▇▇
eval/recall,▁▄▆▆▆█▇▇███
eval/runtime,▁▂▂▁▁▂▁▁▁▂█
eval/samples_per_second,█▇▇▇▇▇▇██▇▁
eval/steps_per_second,█▇▇▇▇▇▇██▇▁
train/epoch,▁▂▃▃▄▅▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▇███

0,1
eval/f1,0.68453
eval/loss,0.22497
eval/precision,0.78677
eval/recall,0.62997
eval/runtime,0.7998
eval/samples_per_second,325.087
eval/steps_per_second,41.261
total_flos,682803840384000.0
train/epoch,10.0
train/global_step,1300.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20802.08 examples/s]
  7%|▋         | 1523/22845 [01:37<21:29, 16.53it/s]
  7%|▋         | 1523/22845 [01:43<21:29, 16.53it/s]

{'eval_loss': 0.10882176458835602, 'eval_f1': 0.6528527193475488, 'eval_precision': 0.6713909875069598, 'eval_recall': 0.6366476596617165, 'eval_runtime': 5.8005, 'eval_samples_per_second': 262.564, 'eval_steps_per_second': 65.684, 'epoch': 1.0}


 13%|█▎        | 3045/22845 [03:21<22:12, 14.86it/s]  
 13%|█▎        | 3046/22845 [03:27<22:12, 14.86it/s]

{'eval_loss': 0.1155092641711235, 'eval_f1': 0.7001988069358199, 'eval_precision': 0.8661421624142137, 'eval_recall': 0.6977608275187261, 'eval_runtime': 6.4549, 'eval_samples_per_second': 235.946, 'eval_steps_per_second': 59.025, 'epoch': 2.0}


 20%|██        | 4569/22845 [05:05<18:06, 16.82it/s]  
 20%|██        | 4569/22845 [05:12<18:06, 16.82it/s]

{'eval_loss': 0.11170659214258194, 'eval_f1': 0.8068219566687654, 'eval_precision': 0.9017094128682095, 'eval_recall': 0.769480131165034, 'eval_runtime': 6.7312, 'eval_samples_per_second': 226.261, 'eval_steps_per_second': 56.603, 'epoch': 3.0}


 27%|██▋       | 6091/22845 [06:50<17:42, 15.77it/s]  
 27%|██▋       | 6092/22845 [06:56<17:42, 15.77it/s]

{'eval_loss': 0.09462782740592957, 'eval_f1': 0.835557246405931, 'eval_precision': 0.8701230442010183, 'eval_recall': 0.8092708673136725, 'eval_runtime': 5.9827, 'eval_samples_per_second': 254.569, 'eval_steps_per_second': 63.684, 'epoch': 4.0}


 33%|███▎      | 7615/22845 [08:34<17:13, 14.73it/s]  
 33%|███▎      | 7615/22845 [08:40<17:13, 14.73it/s]

{'eval_loss': 0.10347720235586166, 'eval_f1': 0.8461894893779299, 'eval_precision': 0.8686113691074553, 'eval_recall': 0.8272595871176723, 'eval_runtime': 5.5399, 'eval_samples_per_second': 274.914, 'eval_steps_per_second': 68.774, 'epoch': 5.0}


 40%|███▉      | 9137/22845 [10:19<13:56, 16.38it/s]  
 40%|████      | 9138/22845 [10:25<13:56, 16.38it/s]

{'eval_loss': 0.11023245751857758, 'eval_f1': 0.8413818272433726, 'eval_precision': 0.8486519767096292, 'eval_recall': 0.8358820035684833, 'eval_runtime': 6.2528, 'eval_samples_per_second': 243.572, 'eval_steps_per_second': 60.933, 'epoch': 6.0}


 44%|████▍     | 10003/22845 [11:21<13:19, 16.07it/s] 

{'loss': 0.0723, 'grad_norm': 0.014752350747585297, 'learning_rate': 2.8113372729262422e-05, 'epoch': 6.57}


 47%|████▋     | 10661/22845 [12:03<12:31, 16.22it/s]
 47%|████▋     | 10661/22845 [12:09<12:31, 16.22it/s]

{'eval_loss': 0.11966491490602493, 'eval_f1': 0.8594791598787592, 'eval_precision': 0.8558559169689612, 'eval_recall': 0.8656591551347622, 'eval_runtime': 5.7659, 'eval_samples_per_second': 264.14, 'eval_steps_per_second': 66.078, 'epoch': 7.0}


 53%|█████▎    | 12183/22845 [13:48<12:17, 14.46it/s]  
 53%|█████▎    | 12184/22845 [13:54<12:17, 14.46it/s]

{'eval_loss': 0.12755094468593597, 'eval_f1': 0.8604288882414505, 'eval_precision': 0.8729427630241957, 'eval_recall': 0.8521546689094716, 'eval_runtime': 6.0388, 'eval_samples_per_second': 252.203, 'eval_steps_per_second': 63.092, 'epoch': 8.0}


 60%|██████    | 13707/22845 [15:33<10:27, 14.55it/s]  
 60%|██████    | 13707/22845 [15:39<10:27, 14.55it/s]

{'eval_loss': 0.13892415165901184, 'eval_f1': 0.845755311883926, 'eval_precision': 0.8646781925080047, 'eval_recall': 0.8296682927127325, 'eval_runtime': 6.6288, 'eval_samples_per_second': 229.755, 'eval_steps_per_second': 57.477, 'epoch': 9.0}


 67%|██████▋   | 15229/22845 [17:18<07:52, 16.12it/s]  
 67%|██████▋   | 15230/22845 [17:25<07:52, 16.12it/s]

{'eval_loss': 0.1401943415403366, 'eval_f1': 0.8587515837066719, 'eval_precision': 0.874976729030171, 'eval_recall': 0.8471802758940038, 'eval_runtime': 6.6166, 'eval_samples_per_second': 230.177, 'eval_steps_per_second': 57.582, 'epoch': 10.0}


 73%|███████▎  | 16753/22845 [19:03<06:26, 15.78it/s]  
 73%|███████▎  | 16753/22845 [19:09<06:26, 15.78it/s]

{'eval_loss': 0.13779889047145844, 'eval_f1': 0.8616260122860696, 'eval_precision': 0.8719139769799205, 'eval_recall': 0.8545231450102323, 'eval_runtime': 5.824, 'eval_samples_per_second': 261.505, 'eval_steps_per_second': 65.419, 'epoch': 11.0}


 80%|███████▉  | 18275/22845 [20:48<04:40, 16.31it/s]  
 80%|████████  | 18276/22845 [20:53<04:40, 16.31it/s]

{'eval_loss': 0.1492016464471817, 'eval_f1': 0.8545003708398539, 'eval_precision': 0.8608567552576878, 'eval_recall': 0.8491246631623149, 'eval_runtime': 5.4935, 'eval_samples_per_second': 277.236, 'eval_steps_per_second': 69.354, 'epoch': 12.0}


 87%|████████▋ | 19799/22845 [22:31<03:05, 16.40it/s]  
 87%|████████▋ | 19799/22845 [22:37<03:05, 16.40it/s]

{'eval_loss': 0.15211698412895203, 'eval_f1': 0.8614601444829858, 'eval_precision': 0.8617149479142491, 'eval_recall': 0.8637915784368874, 'eval_runtime': 5.7276, 'eval_samples_per_second': 265.904, 'eval_steps_per_second': 66.52, 'epoch': 13.0}


 88%|████████▊ | 20003/22845 [22:52<02:54, 16.30it/s]

{'loss': 0.0114, 'grad_norm': 0.0022258427925407887, 'learning_rate': 6.2267454585248415e-06, 'epoch': 13.13}


 93%|█████████▎| 21321/22845 [24:15<01:34, 16.20it/s]
 93%|█████████▎| 21322/22845 [24:21<01:34, 16.20it/s]

{'eval_loss': 0.14903059601783752, 'eval_f1': 0.8593157813801965, 'eval_precision': 0.8599482583240491, 'eval_recall': 0.8610790964635937, 'eval_runtime': 6.1643, 'eval_samples_per_second': 247.069, 'eval_steps_per_second': 61.808, 'epoch': 14.0}


100%|██████████| 22845/22845 [26:00<00:00, 15.22it/s]
100%|██████████| 22845/22845 [26:07<00:00, 15.22it/s]

{'eval_loss': 0.1495642513036728, 'eval_f1': 0.8661269498564247, 'eval_precision': 0.8624165438732065, 'eval_recall': 0.8718947280261051, 'eval_runtime': 5.8763, 'eval_samples_per_second': 259.175, 'eval_steps_per_second': 64.836, 'epoch': 15.0}


100%|██████████| 22845/22845 [26:09<00:00, 14.55it/s]


{'train_runtime': 1569.5912, 'train_samples_per_second': 58.209, 'train_steps_per_second': 14.555, 'train_loss': 0.03697052089618525, 'epoch': 15.0}


100%|██████████| 381/381 [00:05<00:00, 67.72it/s]


0,1
eval/f1,▁▃▆▇▇▇██▇███████
eval/loss,▃▄▃▁▂▃▄▅▆▇▆█████
eval/precision,▁▇█▇▇▆▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▃▅▆▇▇█▇▇▇▇▇████
eval/runtime,▃▆█▄▁▅▃▄▇▇▃▁▂▅▃▂
eval/samples_per_second,▆▂▁▅█▃▆▅▁▂▆█▆▄▆▇
eval/steps_per_second,▆▂▁▅█▃▆▅▁▂▆█▆▄▆▇
train/epoch,▁▁▂▃▃▃▄▄▅▅▅▆▇▇▇▇███
train/global_step,▁▁▂▂▃▃▄▄▄▅▅▆▆▇▇▇███
train/grad_norm,█▁

0,1
eval/f1,0.86613
eval/loss,0.14956
eval/precision,0.86242
eval/recall,0.87189
eval/runtime,5.6553
eval/samples_per_second,269.305
eval/steps_per_second,67.37
total_flos,6010055190432000.0
train/epoch,15.0
train/global_step,22845.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16186.23 examples/s]
  7%|▋         | 377/5655 [00:24<05:50, 15.06it/s]
  7%|▋         | 377/5655 [00:25<05:50, 15.06it/s]

{'eval_loss': 0.3235349953174591, 'eval_f1': 0.4460204902587076, 'eval_precision': 0.5249593495934959, 'eval_recall': 0.3887139945981789, 'eval_runtime': 1.458, 'eval_samples_per_second': 258.573, 'eval_steps_per_second': 65.158, 'epoch': 1.0}


 13%|█▎        | 753/5655 [00:51<04:53, 16.69it/s]
 13%|█▎        | 754/5655 [00:53<04:53, 16.69it/s]

{'eval_loss': 0.2981829345226288, 'eval_f1': 0.5579940665395211, 'eval_precision': 0.6624860123114298, 'eval_recall': 0.48293568267781806, 'eval_runtime': 1.4372, 'eval_samples_per_second': 262.317, 'eval_steps_per_second': 66.101, 'epoch': 2.0}


 20%|██        | 1131/5655 [01:19<04:43, 15.98it/s]
 20%|██        | 1131/5655 [01:20<04:43, 15.98it/s]

{'eval_loss': 0.2999751567840576, 'eval_f1': 0.6702867828362422, 'eval_precision': 0.7761856463818073, 'eval_recall': 0.6110070994124002, 'eval_runtime': 1.3397, 'eval_samples_per_second': 281.401, 'eval_steps_per_second': 70.91, 'epoch': 3.0}


 27%|██▋       | 1507/5655 [01:46<04:38, 14.87it/s]
 27%|██▋       | 1508/5655 [01:47<04:38, 14.87it/s]

{'eval_loss': 0.36814650893211365, 'eval_f1': 0.7011669842315064, 'eval_precision': 0.7299661865844552, 'eval_recall': 0.6806711462684986, 'eval_runtime': 1.3904, 'eval_samples_per_second': 271.143, 'eval_steps_per_second': 68.325, 'epoch': 4.0}


 33%|███▎      | 1885/5655 [02:13<03:58, 15.82it/s]
 33%|███▎      | 1885/5655 [02:14<03:58, 15.82it/s]

{'eval_loss': 0.4048359990119934, 'eval_f1': 0.6905361736346893, 'eval_precision': 0.7610961288038807, 'eval_recall': 0.6498190984120109, 'eval_runtime': 1.3641, 'eval_samples_per_second': 276.368, 'eval_steps_per_second': 69.642, 'epoch': 5.0}


 40%|███▉      | 2261/5655 [02:40<03:30, 16.13it/s]
 40%|████      | 2262/5655 [02:41<03:30, 16.13it/s]

{'eval_loss': 0.44288769364356995, 'eval_f1': 0.7010597245891363, 'eval_precision': 0.736106853578939, 'eval_recall': 0.6742328276069951, 'eval_runtime': 1.3334, 'eval_samples_per_second': 282.746, 'eval_steps_per_second': 71.249, 'epoch': 6.0}


 47%|████▋     | 2639/5655 [03:07<03:14, 15.47it/s]
 47%|████▋     | 2639/5655 [03:08<03:14, 15.47it/s]

{'eval_loss': 0.40334057807922363, 'eval_f1': 0.7309611524957939, 'eval_precision': 0.7565625769019357, 'eval_recall': 0.710128416893266, 'eval_runtime': 1.3298, 'eval_samples_per_second': 283.508, 'eval_steps_per_second': 71.441, 'epoch': 7.0}


 53%|█████▎    | 3015/5655 [03:33<02:49, 15.56it/s]
 53%|█████▎    | 3016/5655 [03:35<02:49, 15.56it/s]

{'eval_loss': 0.39760151505470276, 'eval_f1': 0.7534002381549508, 'eval_precision': 0.787810816634346, 'eval_recall': 0.7306407161971892, 'eval_runtime': 1.3503, 'eval_samples_per_second': 279.196, 'eval_steps_per_second': 70.354, 'epoch': 8.0}


 60%|██████    | 3393/5655 [04:00<02:22, 15.88it/s]
 60%|██████    | 3393/5655 [04:02<02:22, 15.88it/s]

{'eval_loss': 0.43891894817352295, 'eval_f1': 0.7332267223692588, 'eval_precision': 0.7563749847765193, 'eval_recall': 0.7145363923260669, 'eval_runtime': 1.4041, 'eval_samples_per_second': 268.499, 'eval_steps_per_second': 67.659, 'epoch': 9.0}


 67%|██████▋   | 3769/5655 [04:27<01:57, 16.01it/s]
 67%|██████▋   | 3770/5655 [04:28<01:57, 16.01it/s]

{'eval_loss': 0.4634981155395508, 'eval_f1': 0.7438326126329948, 'eval_precision': 0.7445983188811708, 'eval_recall': 0.7494730294521837, 'eval_runtime': 1.3359, 'eval_samples_per_second': 282.217, 'eval_steps_per_second': 71.116, 'epoch': 10.0}


 73%|███████▎  | 4147/5655 [04:53<01:32, 16.37it/s]
 73%|███████▎  | 4147/5655 [04:55<01:32, 16.37it/s]

{'eval_loss': 0.4781377911567688, 'eval_f1': 0.745832876971013, 'eval_precision': 0.7529979265361693, 'eval_recall': 0.7435122109377619, 'eval_runtime': 1.5965, 'eval_samples_per_second': 236.145, 'eval_steps_per_second': 59.506, 'epoch': 11.0}


 80%|███████▉  | 4523/5655 [05:20<01:08, 16.64it/s]
 80%|████████  | 4524/5655 [05:22<01:07, 16.64it/s]

{'eval_loss': 0.48816514015197754, 'eval_f1': 0.7542461967128823, 'eval_precision': 0.7598148892266539, 'eval_recall': 0.7538501950970141, 'eval_runtime': 1.4049, 'eval_samples_per_second': 268.338, 'eval_steps_per_second': 67.618, 'epoch': 12.0}


 87%|████████▋ | 4901/5655 [05:47<00:49, 15.38it/s]
 87%|████████▋ | 4901/5655 [05:48<00:49, 15.38it/s]

{'eval_loss': 0.48999160528182983, 'eval_f1': 0.7510800467244048, 'eval_precision': 0.7454241246304039, 'eval_recall': 0.7589657830753721, 'eval_runtime': 1.2286, 'eval_samples_per_second': 306.845, 'eval_steps_per_second': 77.322, 'epoch': 13.0}


 93%|█████████▎| 5277/5655 [06:13<00:22, 16.47it/s]
 93%|█████████▎| 5278/5655 [06:15<00:22, 16.47it/s]

{'eval_loss': 0.5003989338874817, 'eval_f1': 0.7452730751650821, 'eval_precision': 0.7443438075334627, 'eval_recall': 0.7476026225295271, 'eval_runtime': 1.4287, 'eval_samples_per_second': 263.884, 'eval_steps_per_second': 66.496, 'epoch': 14.0}


100%|██████████| 5655/5655 [06:40<00:00, 17.32it/s]
100%|██████████| 5655/5655 [06:43<00:00, 17.32it/s]

{'eval_loss': 0.5019848346710205, 'eval_f1': 0.7409822032113054, 'eval_precision': 0.7382608349504901, 'eval_recall': 0.7459497299675435, 'eval_runtime': 1.5068, 'eval_samples_per_second': 250.196, 'eval_steps_per_second': 63.047, 'epoch': 15.0}


100%|██████████| 5655/5655 [06:45<00:00, 13.94it/s]


{'train_runtime': 405.6781, 'train_samples_per_second': 55.722, 'train_steps_per_second': 13.94, 'train_loss': 0.09528045519382737, 'epoch': 15.0}


100%|██████████| 95/95 [00:01<00:00, 69.25it/s]


0,1
eval/f1,▁▄▆▇▇▇▇█████████
eval/loss,▂▁▁▃▅▆▅▄▆▇▇█████
eval/precision,▁▅█▆▇▇▇█▇▇▇▇▇▇▇▇
eval/recall,▁▃▅▇▆▆▇▇▇███████
eval/runtime,▅▅▃▄▄▃▃▃▄▃█▄▁▅▆▄
eval/samples_per_second,▃▄▅▄▅▆▆▅▄▆▁▄█▄▂▄
eval/steps_per_second,▃▄▅▄▅▆▆▅▄▆▁▄█▄▂▄
train/epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███

0,1
eval/f1,0.75425
eval/loss,0.48817
eval/precision,0.75981
eval/recall,0.75385
eval/runtime,1.3919
eval/samples_per_second,270.844
eval/steps_per_second,68.25
total_flos,1486946402599680.0
train/epoch,15.0
train/global_step,5655.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 15881.07 examples/s]
  7%|▋         | 260/3900 [00:16<04:00, 15.16it/s]
  7%|▋         | 260/3900 [00:17<04:00, 15.16it/s]

{'eval_loss': 0.29643505811691284, 'eval_f1': 0.25661907852044125, 'eval_precision': 0.48951701892878363, 'eval_recall': 0.2032549549262123, 'eval_runtime': 1.1819, 'eval_samples_per_second': 219.987, 'eval_steps_per_second': 54.997, 'epoch': 1.0}


 13%|█▎        | 520/3900 [00:35<03:15, 17.33it/s]
 13%|█▎        | 520/3900 [00:36<03:15, 17.33it/s]

{'eval_loss': 0.22587350010871887, 'eval_f1': 0.49241921278361145, 'eval_precision': 0.5865119529593213, 'eval_recall': 0.4325352971341653, 'eval_runtime': 0.8338, 'eval_samples_per_second': 311.808, 'eval_steps_per_second': 77.952, 'epoch': 2.0}


 20%|██        | 780/3900 [00:53<03:17, 15.80it/s]
 20%|██        | 780/3900 [00:54<03:17, 15.80it/s]

{'eval_loss': 0.2463422566652298, 'eval_f1': 0.5174932484742752, 'eval_precision': 0.5595888388573116, 'eval_recall': 0.49122416773523686, 'eval_runtime': 0.9594, 'eval_samples_per_second': 271.008, 'eval_steps_per_second': 67.752, 'epoch': 3.0}


 27%|██▋       | 1040/3900 [01:13<02:51, 16.63it/s]
 27%|██▋       | 1040/3900 [01:14<02:51, 16.63it/s]

{'eval_loss': 0.23256047070026398, 'eval_f1': 0.6255540048662871, 'eval_precision': 0.8583400971724722, 'eval_recall': 0.55866927392513, 'eval_runtime': 1.0055, 'eval_samples_per_second': 258.571, 'eval_steps_per_second': 64.643, 'epoch': 4.0}


 33%|███▎      | 1300/3900 [01:31<02:32, 17.04it/s]
 33%|███▎      | 1300/3900 [01:32<02:32, 17.04it/s]

{'eval_loss': 0.24217060208320618, 'eval_f1': 0.6753544990264027, 'eval_precision': 0.7732763235164196, 'eval_recall': 0.6132214877679919, 'eval_runtime': 1.0511, 'eval_samples_per_second': 247.359, 'eval_steps_per_second': 61.84, 'epoch': 5.0}


 40%|████      | 1560/3900 [01:50<02:27, 15.84it/s]
 40%|████      | 1560/3900 [01:51<02:27, 15.84it/s]

{'eval_loss': 0.2510354816913605, 'eval_f1': 0.6994754900234966, 'eval_precision': 0.8176407070495741, 'eval_recall': 0.6470698361058081, 'eval_runtime': 0.8412, 'eval_samples_per_second': 309.078, 'eval_steps_per_second': 77.27, 'epoch': 6.0}


 47%|████▋     | 1820/3900 [02:09<02:03, 16.84it/s]
 47%|████▋     | 1820/3900 [02:10<02:03, 16.84it/s]

{'eval_loss': 0.28768759965896606, 'eval_f1': 0.6610050262975395, 'eval_precision': 0.7355568044458047, 'eval_recall': 0.6272210748967576, 'eval_runtime': 1.0261, 'eval_samples_per_second': 253.383, 'eval_steps_per_second': 63.346, 'epoch': 7.0}


 53%|█████▎    | 2080/3900 [02:28<01:58, 15.40it/s]
 53%|█████▎    | 2080/3900 [02:29<01:58, 15.40it/s]

{'eval_loss': 0.25363850593566895, 'eval_f1': 0.6839205107540998, 'eval_precision': 0.772772969164898, 'eval_recall': 0.6378682607388192, 'eval_runtime': 1.1986, 'eval_samples_per_second': 216.911, 'eval_steps_per_second': 54.228, 'epoch': 8.0}


 60%|██████    | 2340/3900 [02:47<01:31, 17.09it/s]
 60%|██████    | 2340/3900 [02:48<01:31, 17.09it/s]

{'eval_loss': 0.2783795893192291, 'eval_f1': 0.6878829929769498, 'eval_precision': 0.7629829501400771, 'eval_recall': 0.6441483489940054, 'eval_runtime': 0.8543, 'eval_samples_per_second': 304.347, 'eval_steps_per_second': 76.087, 'epoch': 9.0}


 67%|██████▋   | 2600/3900 [03:06<01:21, 15.89it/s]
 67%|██████▋   | 2600/3900 [03:07<01:21, 15.89it/s]

{'eval_loss': 0.27694225311279297, 'eval_f1': 0.7029303051847958, 'eval_precision': 0.7617093236069555, 'eval_recall': 0.6694736140661623, 'eval_runtime': 1.0026, 'eval_samples_per_second': 259.318, 'eval_steps_per_second': 64.829, 'epoch': 10.0}


 73%|███████▎  | 2860/3900 [03:25<01:08, 15.23it/s]
 73%|███████▎  | 2860/3900 [03:26<01:08, 15.23it/s]

{'eval_loss': 0.28190022706985474, 'eval_f1': 0.7080163743977178, 'eval_precision': 0.7336418680087726, 'eval_recall': 0.6939954561537008, 'eval_runtime': 1.1566, 'eval_samples_per_second': 224.793, 'eval_steps_per_second': 56.198, 'epoch': 11.0}


 80%|████████  | 3120/3900 [03:43<00:48, 16.15it/s]
 80%|████████  | 3120/3900 [03:44<00:48, 16.15it/s]

{'eval_loss': 0.28589338064193726, 'eval_f1': 0.7183739227803215, 'eval_precision': 0.7597507549562198, 'eval_recall': 0.6947758016426, 'eval_runtime': 0.8679, 'eval_samples_per_second': 299.579, 'eval_steps_per_second': 74.895, 'epoch': 12.0}


 87%|████████▋ | 3380/3900 [04:03<00:30, 16.88it/s]
 87%|████████▋ | 3380/3900 [04:04<00:30, 16.88it/s]

{'eval_loss': 0.295258492231369, 'eval_f1': 0.7028557657880966, 'eval_precision': 0.7409438959985212, 'eval_recall': 0.6808548802865751, 'eval_runtime': 0.989, 'eval_samples_per_second': 262.903, 'eval_steps_per_second': 65.726, 'epoch': 13.0}


 93%|█████████▎| 3640/3900 [04:21<00:16, 15.61it/s]
 93%|█████████▎| 3640/3900 [04:22<00:16, 15.61it/s]

{'eval_loss': 0.29305583238601685, 'eval_f1': 0.706889221814489, 'eval_precision': 0.7414178829426508, 'eval_recall': 0.6865691660008608, 'eval_runtime': 1.2143, 'eval_samples_per_second': 214.11, 'eval_steps_per_second': 53.527, 'epoch': 14.0}


100%|██████████| 3900/3900 [04:41<00:00, 16.99it/s]
100%|██████████| 3900/3900 [04:43<00:00, 16.99it/s]

{'eval_loss': 0.2939419448375702, 'eval_f1': 0.7010940777650844, 'eval_precision': 0.7335738398947703, 'eval_recall': 0.6811884386098414, 'eval_runtime': 0.8676, 'eval_samples_per_second': 299.668, 'eval_steps_per_second': 74.917, 'epoch': 15.0}


100%|██████████| 3900/3900 [04:45<00:00, 13.66it/s]


{'train_runtime': 285.5045, 'train_samples_per_second': 54.535, 'train_steps_per_second': 13.66, 'train_loss': 0.07732536120292469, 'epoch': 15.0}


100%|██████████| 65/65 [00:01<00:00, 55.60it/s]


------------------ Starting model ==> epochs: 15, batch size: 4, weights of decay: 0.001 ---------------------


0,1
eval/f1,▁▅▅▇▇█▇▇████████
eval/loss,█▁▃▂▃▃▇▄▆▆▇▇███▇
eval/precision,▁▃▂█▆▇▆▆▆▆▆▆▆▆▆▆
eval/recall,▁▄▅▆▇▇▇▇▇███████
eval/runtime,▇▁▃▄▅▁▅█▁▄▇▂▄█▂█
eval/samples_per_second,▁█▅▄▃█▄▁▇▄▂▇▄▁▇▁
eval/steps_per_second,▁█▅▄▃█▄▁▇▄▂▇▄▁▇▁
train/epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███

0,1
eval/f1,0.71837
eval/loss,0.28589
eval/precision,0.75975
eval/recall,0.69478
eval/runtime,1.2001
eval/samples_per_second,216.642
eval/steps_per_second,54.16
total_flos,1024205760576000.0
train/epoch,15.0
train/global_step,3900.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 22136.03 examples/s]
  7%|▋         | 1523/22845 [01:36<23:55, 14.85it/s]
  7%|▋         | 1523/22845 [01:42<23:55, 14.85it/s]

{'eval_loss': 0.09914836287498474, 'eval_f1': 0.6906309137488165, 'eval_precision': 0.8044253237879706, 'eval_recall': 0.6590937222189266, 'eval_runtime': 6.1301, 'eval_samples_per_second': 248.445, 'eval_steps_per_second': 62.152, 'epoch': 1.0}


 13%|█▎        | 3045/22845 [03:20<19:53, 16.59it/s]  
 13%|█▎        | 3046/22845 [03:26<19:53, 16.59it/s]

{'eval_loss': 0.10599664598703384, 'eval_f1': 0.7387575508466642, 'eval_precision': 0.8603060102541564, 'eval_recall': 0.7138388692933624, 'eval_runtime': 6.5195, 'eval_samples_per_second': 233.605, 'eval_steps_per_second': 58.44, 'epoch': 2.0}


 20%|██        | 4569/22845 [05:04<20:00, 15.23it/s]  
 20%|██        | 4569/22845 [05:10<20:00, 15.23it/s]

{'eval_loss': 0.10069762915372849, 'eval_f1': 0.8384335605644776, 'eval_precision': 0.8906814799009161, 'eval_recall': 0.805478082685167, 'eval_runtime': 5.291, 'eval_samples_per_second': 287.848, 'eval_steps_per_second': 72.009, 'epoch': 3.0}


 27%|██▋       | 6091/22845 [06:48<17:33, 15.91it/s]  
 27%|██▋       | 6092/22845 [06:54<17:33, 15.91it/s]

{'eval_loss': 0.10781493782997131, 'eval_f1': 0.8456951910838749, 'eval_precision': 0.8774959798647499, 'eval_recall': 0.8268829013728346, 'eval_runtime': 6.0286, 'eval_samples_per_second': 252.629, 'eval_steps_per_second': 63.199, 'epoch': 4.0}


 33%|███▎      | 7615/22845 [08:31<15:51, 16.01it/s]  
 33%|███▎      | 7615/22845 [08:37<15:51, 16.01it/s]

{'eval_loss': 0.10520164668560028, 'eval_f1': 0.846733636921224, 'eval_precision': 0.8610239974762214, 'eval_recall': 0.8349927732309518, 'eval_runtime': 6.5472, 'eval_samples_per_second': 232.617, 'eval_steps_per_second': 58.192, 'epoch': 5.0}


 40%|███▉      | 9137/22845 [10:15<14:24, 15.86it/s]  
 40%|████      | 9138/22845 [10:21<14:24, 15.86it/s]

{'eval_loss': 0.11803270131349564, 'eval_f1': 0.8494300754937683, 'eval_precision': 0.8613557122554115, 'eval_recall': 0.8444967288803223, 'eval_runtime': 5.8156, 'eval_samples_per_second': 261.884, 'eval_steps_per_second': 65.514, 'epoch': 6.0}


 44%|████▍     | 10003/22845 [11:16<13:44, 15.58it/s] 

{'loss': 0.0658, 'grad_norm': 0.025514749810099602, 'learning_rate': 2.8113372729262422e-05, 'epoch': 6.57}


 47%|████▋     | 10661/22845 [11:58<13:10, 15.40it/s]
 47%|████▋     | 10661/22845 [12:03<13:10, 15.40it/s]

{'eval_loss': 0.11758042126893997, 'eval_f1': 0.8640503687811076, 'eval_precision': 0.8624981011067688, 'eval_recall': 0.8671441094493612, 'eval_runtime': 5.4167, 'eval_samples_per_second': 281.167, 'eval_steps_per_second': 70.338, 'epoch': 7.0}


 53%|█████▎    | 12183/22845 [13:40<11:47, 15.08it/s]  
 53%|█████▎    | 12184/22845 [13:46<11:47, 15.08it/s]

{'eval_loss': 0.12483879923820496, 'eval_f1': 0.8626206168017377, 'eval_precision': 0.862765379996626, 'eval_recall': 0.8629478407452756, 'eval_runtime': 6.0597, 'eval_samples_per_second': 251.331, 'eval_steps_per_second': 62.874, 'epoch': 8.0}


 60%|██████    | 13707/22845 [15:23<08:55, 17.06it/s]  
 60%|██████    | 13707/22845 [15:29<08:55, 17.06it/s]

{'eval_loss': 0.13954612612724304, 'eval_f1': 0.8537258872077036, 'eval_precision': 0.8507890068869661, 'eval_recall': 0.8574940551826501, 'eval_runtime': 5.9962, 'eval_samples_per_second': 253.994, 'eval_steps_per_second': 63.54, 'epoch': 9.0}


 67%|██████▋   | 15229/22845 [17:06<08:10, 15.52it/s]  
 67%|██████▋   | 15230/22845 [17:12<08:10, 15.52it/s]

{'eval_loss': 0.14792783558368683, 'eval_f1': 0.8542406775441113, 'eval_precision': 0.8619768262402533, 'eval_recall': 0.8483055016524238, 'eval_runtime': 5.2451, 'eval_samples_per_second': 290.369, 'eval_steps_per_second': 72.64, 'epoch': 10.0}


 73%|███████▎  | 16753/22845 [18:48<06:17, 16.16it/s]  
 73%|███████▎  | 16753/22845 [18:54<06:17, 16.16it/s]

{'eval_loss': 0.14176605641841888, 'eval_f1': 0.8620916107201878, 'eval_precision': 0.8533867728505266, 'eval_recall': 0.8723153191568477, 'eval_runtime': 5.693, 'eval_samples_per_second': 267.523, 'eval_steps_per_second': 66.925, 'epoch': 11.0}


 80%|███████▉  | 18275/22845 [20:30<04:25, 17.22it/s]  
 80%|████████  | 18276/22845 [20:37<04:25, 17.22it/s]

{'eval_loss': 0.16503888368606567, 'eval_f1': 0.8570643822193765, 'eval_precision': 0.8622350014907924, 'eval_recall': 0.858051282442183, 'eval_runtime': 6.7231, 'eval_samples_per_second': 226.533, 'eval_steps_per_second': 56.67, 'epoch': 12.0}


 87%|████████▋ | 19799/22845 [22:14<03:13, 15.76it/s]  
 87%|████████▋ | 19799/22845 [22:19<03:13, 15.76it/s]

{'eval_loss': 0.15058019757270813, 'eval_f1': 0.8676770449295045, 'eval_precision': 0.8783424354254584, 'eval_recall': 0.8578792987173273, 'eval_runtime': 5.185, 'eval_samples_per_second': 293.734, 'eval_steps_per_second': 73.482, 'epoch': 13.0}


 88%|████████▊ | 20003/22845 [22:34<02:50, 16.66it/s]

{'loss': 0.0098, 'grad_norm': 0.002504088217392564, 'learning_rate': 6.2267454585248415e-06, 'epoch': 13.13}


 93%|█████████▎| 21321/22845 [23:55<01:35, 16.00it/s]
 93%|█████████▎| 21322/22845 [24:01<01:35, 16.00it/s]

{'eval_loss': 0.1472594141960144, 'eval_f1': 0.8738726125882812, 'eval_precision': 0.8816310751595919, 'eval_recall': 0.8679558891027461, 'eval_runtime': 5.562, 'eval_samples_per_second': 273.822, 'eval_steps_per_second': 68.5, 'epoch': 14.0}


100%|██████████| 22845/22845 [25:37<00:00, 17.25it/s]
100%|██████████| 22845/22845 [25:46<00:00, 17.25it/s]

{'eval_loss': 0.14884977042675018, 'eval_f1': 0.875243142182858, 'eval_precision': 0.8818121946312918, 'eval_recall': 0.8695019467782578, 'eval_runtime': 6.8807, 'eval_samples_per_second': 221.344, 'eval_steps_per_second': 55.372, 'epoch': 15.0}


100%|██████████| 22845/22845 [25:47<00:00, 14.76it/s]


{'train_runtime': 1547.8575, 'train_samples_per_second': 59.027, 'train_steps_per_second': 14.759, 'train_loss': 0.03336791821031243, 'epoch': 15.0}


100%|██████████| 381/381 [00:05<00:00, 67.06it/s]


0,1
eval/f1,▁▃▇▇▇▇██▇▇█▇████
eval/loss,▁▂▁▂▂▃▃▄▅▆▆█▆▆▆▆
eval/precision,▁▆█▇▆▆▆▆▅▆▅▆▇▇▇▇
eval/recall,▁▃▆▇▇▇███▇██████
eval/runtime,▅▇▁▄▇▄▂▅▄▁▃▇▁▃█▃
eval/samples_per_second,▄▂▇▄▂▅▇▄▄█▅▂█▆▁▅
eval/steps_per_second,▄▂▇▄▂▅▇▄▄█▅▂█▆▁▅
train/epoch,▁▁▂▃▃▃▄▄▅▅▅▆▇▇▇▇███
train/global_step,▁▁▂▂▃▃▄▄▄▅▅▆▆▇▇▇███
train/grad_norm,█▁

0,1
eval/f1,0.87524
eval/loss,0.14885
eval/precision,0.88181
eval/recall,0.8695
eval/runtime,5.7067
eval/samples_per_second,266.88
eval/steps_per_second,66.764
total_flos,6010055190432000.0
train/epoch,15.0
train/global_step,22845.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16461.55 examples/s]
  7%|▋         | 377/5655 [00:24<05:36, 15.69it/s]
  7%|▋         | 377/5655 [00:25<05:36, 15.69it/s]

{'eval_loss': 0.33982422947883606, 'eval_f1': 0.44299622071050637, 'eval_precision': 0.5190649350649351, 'eval_recall': 0.3949693519432, 'eval_runtime': 1.3282, 'eval_samples_per_second': 283.849, 'eval_steps_per_second': 71.527, 'epoch': 1.0}


 13%|█▎        | 753/5655 [00:50<04:59, 16.37it/s]
 13%|█▎        | 754/5655 [00:51<04:59, 16.37it/s]

{'eval_loss': 0.3216930329799652, 'eval_f1': 0.5597500538677009, 'eval_precision': 0.6292079071000984, 'eval_recall': 0.5104309133662375, 'eval_runtime': 1.2645, 'eval_samples_per_second': 298.149, 'eval_steps_per_second': 75.13, 'epoch': 2.0}


 20%|██        | 1131/5655 [01:16<04:28, 16.88it/s]
 20%|██        | 1131/5655 [01:18<04:28, 16.88it/s]

{'eval_loss': 0.3385504186153412, 'eval_f1': 0.590560678386105, 'eval_precision': 0.749781295093795, 'eval_recall': 0.5592485477595662, 'eval_runtime': 1.2501, 'eval_samples_per_second': 301.567, 'eval_steps_per_second': 75.992, 'epoch': 3.0}


 27%|██▋       | 1507/5655 [01:43<04:38, 14.87it/s]
 27%|██▋       | 1508/5655 [01:45<04:38, 14.87it/s]

{'eval_loss': 0.35457152128219604, 'eval_f1': 0.6812578561736478, 'eval_precision': 0.7396953171553553, 'eval_recall': 0.6340314084276941, 'eval_runtime': 1.3636, 'eval_samples_per_second': 276.468, 'eval_steps_per_second': 69.667, 'epoch': 4.0}


 33%|███▎      | 1885/5655 [02:10<03:55, 15.98it/s]
 33%|███▎      | 1885/5655 [02:11<03:55, 15.98it/s]

{'eval_loss': 0.44305533170700073, 'eval_f1': 0.6678800995333557, 'eval_precision': 0.7092673358302473, 'eval_recall': 0.6618510993852105, 'eval_runtime': 1.3723, 'eval_samples_per_second': 274.713, 'eval_steps_per_second': 69.225, 'epoch': 5.0}


 40%|███▉      | 2261/5655 [02:36<03:16, 17.27it/s]
 40%|████      | 2262/5655 [02:38<03:16, 17.27it/s]

{'eval_loss': 0.46431297063827515, 'eval_f1': 0.6903691779749475, 'eval_precision': 0.720180681021389, 'eval_recall': 0.6646902871525212, 'eval_runtime': 1.4156, 'eval_samples_per_second': 266.309, 'eval_steps_per_second': 67.107, 'epoch': 6.0}


 47%|████▋     | 2639/5655 [03:03<03:21, 14.97it/s]
 47%|████▋     | 2639/5655 [03:05<03:21, 14.97it/s]

{'eval_loss': 0.48256850242614746, 'eval_f1': 0.6889668262411419, 'eval_precision': 0.7181824076565207, 'eval_recall': 0.6833905750340046, 'eval_runtime': 1.2375, 'eval_samples_per_second': 304.653, 'eval_steps_per_second': 76.769, 'epoch': 7.0}


 53%|█████▎    | 3015/5655 [03:30<02:50, 15.51it/s]
 53%|█████▎    | 3016/5655 [03:32<02:50, 15.51it/s]

{'eval_loss': 0.4727003276348114, 'eval_f1': 0.7054620164417995, 'eval_precision': 0.7215885131666196, 'eval_recall': 0.6937220090808537, 'eval_runtime': 1.4302, 'eval_samples_per_second': 263.597, 'eval_steps_per_second': 66.424, 'epoch': 8.0}


 60%|██████    | 3393/5655 [03:57<02:21, 16.01it/s]
 60%|██████    | 3393/5655 [03:59<02:21, 16.01it/s]

{'eval_loss': 0.46053239703178406, 'eval_f1': 0.7407755451441463, 'eval_precision': 0.7524463109553446, 'eval_recall': 0.7372057295493865, 'eval_runtime': 1.3494, 'eval_samples_per_second': 279.385, 'eval_steps_per_second': 70.402, 'epoch': 9.0}


 67%|██████▋   | 3769/5655 [04:24<02:05, 15.03it/s]
 67%|██████▋   | 3770/5655 [04:26<02:05, 15.03it/s]

{'eval_loss': 0.5010688304901123, 'eval_f1': 0.7220239526396335, 'eval_precision': 0.7203196671740962, 'eval_recall': 0.7341089462182917, 'eval_runtime': 1.3202, 'eval_samples_per_second': 285.566, 'eval_steps_per_second': 71.96, 'epoch': 10.0}


 73%|███████▎  | 4147/5655 [04:51<01:31, 16.56it/s]
 73%|███████▎  | 4147/5655 [04:52<01:31, 16.56it/s]

{'eval_loss': 0.5118533968925476, 'eval_f1': 0.7331771368939538, 'eval_precision': 0.7279648623314392, 'eval_recall': 0.7413310469468085, 'eval_runtime': 1.3368, 'eval_samples_per_second': 282.015, 'eval_steps_per_second': 71.065, 'epoch': 11.0}


 80%|███████▉  | 4523/5655 [05:18<01:09, 16.24it/s]
 80%|████████  | 4524/5655 [05:19<01:09, 16.24it/s]

{'eval_loss': 0.5159872770309448, 'eval_f1': 0.7369964353563718, 'eval_precision': 0.7407662364053341, 'eval_recall': 0.738527567781182, 'eval_runtime': 1.3742, 'eval_samples_per_second': 274.337, 'eval_steps_per_second': 69.13, 'epoch': 12.0}


 87%|████████▋ | 4901/5655 [05:45<00:49, 15.20it/s]
 87%|████████▋ | 4901/5655 [05:46<00:49, 15.20it/s]

{'eval_loss': 0.5339096784591675, 'eval_f1': 0.7344746307288161, 'eval_precision': 0.7252235983581673, 'eval_recall': 0.7480252618228416, 'eval_runtime': 1.2462, 'eval_samples_per_second': 302.516, 'eval_steps_per_second': 76.231, 'epoch': 13.0}


 93%|█████████▎| 5277/5655 [06:11<00:23, 16.04it/s]
 93%|█████████▎| 5278/5655 [06:13<00:23, 16.04it/s]

{'eval_loss': 0.5362844467163086, 'eval_f1': 0.7412009093909121, 'eval_precision': 0.7357198077811776, 'eval_recall': 0.7530357743489208, 'eval_runtime': 1.278, 'eval_samples_per_second': 294.985, 'eval_steps_per_second': 74.333, 'epoch': 14.0}


100%|██████████| 5655/5655 [06:38<00:00, 16.12it/s]
100%|██████████| 5655/5655 [06:42<00:00, 16.12it/s]

{'eval_loss': 0.5368742346763611, 'eval_f1': 0.7314337142934326, 'eval_precision': 0.7256243023224155, 'eval_recall': 0.7428114532155086, 'eval_runtime': 1.5692, 'eval_samples_per_second': 240.249, 'eval_steps_per_second': 60.54, 'epoch': 15.0}


100%|██████████| 5655/5655 [06:44<00:00, 13.99it/s]


{'train_runtime': 404.1348, 'train_samples_per_second': 55.934, 'train_steps_per_second': 13.993, 'train_loss': 0.10093120976320734, 'epoch': 15.0}


100%|██████████| 95/95 [00:01<00:00, 74.36it/s]


0,1
eval/f1,▁▄▄▇▆▇▇▇████████
eval/loss,▂▁▂▂▅▆▆▆▆▇▇▇████
eval/precision,▁▄██▇▇▇▇█▇▇█▇▇▇▇
eval/recall,▁▃▄▆▆▆▇▇████████
eval/runtime,▃▂▁▄▄▅▁▅▃▃▃▄▁▂█▂
eval/samples_per_second,▆▇█▅▅▄█▄▅▆▆▅█▇▁▆
eval/steps_per_second,▆▇█▅▅▄█▄▅▆▆▅█▇▁▆
train/epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███

0,1
eval/f1,0.7412
eval/loss,0.53628
eval/precision,0.73572
eval/recall,0.75304
eval/runtime,1.3024
eval/samples_per_second,289.459
eval/steps_per_second,72.941
total_flos,1486946402599680.0
train/epoch,15.0
train/global_step,5655.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 16599.56 examples/s]
  7%|▋         | 260/3900 [00:16<03:45, 16.12it/s]
  7%|▋         | 260/3900 [00:17<03:45, 16.12it/s]

{'eval_loss': 0.2943302392959595, 'eval_f1': 0.175406449410594, 'eval_precision': 0.5473833097595474, 'eval_recall': 0.15048391842406583, 'eval_runtime': 0.9428, 'eval_samples_per_second': 275.777, 'eval_steps_per_second': 68.944, 'epoch': 1.0}


 13%|█▎        | 520/3900 [00:35<03:28, 16.22it/s]
 13%|█▎        | 520/3900 [00:36<03:28, 16.22it/s]

{'eval_loss': 0.22610215842723846, 'eval_f1': 0.49800632607231654, 'eval_precision': 0.6007980115122972, 'eval_recall': 0.4384975152679013, 'eval_runtime': 1.0998, 'eval_samples_per_second': 236.407, 'eval_steps_per_second': 59.102, 'epoch': 2.0}


 20%|██        | 780/3900 [00:54<03:16, 15.88it/s]
 20%|██        | 780/3900 [00:55<03:16, 15.88it/s]

{'eval_loss': 0.2284662127494812, 'eval_f1': 0.533965164777141, 'eval_precision': 0.5787337662337662, 'eval_recall': 0.500965139684204, 'eval_runtime': 1.0821, 'eval_samples_per_second': 240.282, 'eval_steps_per_second': 60.07, 'epoch': 3.0}


 27%|██▋       | 1040/3900 [01:13<02:51, 16.66it/s]
 27%|██▋       | 1040/3900 [01:14<02:51, 16.66it/s]

{'eval_loss': 0.23471252620220184, 'eval_f1': 0.5959901744322555, 'eval_precision': 0.8764796321920841, 'eval_recall': 0.5460087721183847, 'eval_runtime': 0.9847, 'eval_samples_per_second': 264.034, 'eval_steps_per_second': 66.009, 'epoch': 4.0}


 33%|███▎      | 1300/3900 [01:32<02:45, 15.69it/s]
 33%|███▎      | 1300/3900 [01:33<02:45, 15.69it/s]

{'eval_loss': 0.260710209608078, 'eval_f1': 0.6529589114820655, 'eval_precision': 0.7683639209501278, 'eval_recall': 0.5979757747011304, 'eval_runtime': 1.3741, 'eval_samples_per_second': 189.213, 'eval_steps_per_second': 47.303, 'epoch': 5.0}


 40%|████      | 1560/3900 [01:51<02:30, 15.58it/s]
 40%|████      | 1560/3900 [01:52<02:30, 15.58it/s]

{'eval_loss': 0.26331523060798645, 'eval_f1': 0.6496511425082854, 'eval_precision': 0.7119428473143643, 'eval_recall': 0.607636753414126, 'eval_runtime': 0.8583, 'eval_samples_per_second': 302.926, 'eval_steps_per_second': 75.732, 'epoch': 6.0}


 47%|████▋     | 1820/3900 [02:10<02:05, 16.61it/s]
 47%|████▋     | 1820/3900 [02:11<02:05, 16.61it/s]

{'eval_loss': 0.2689041793346405, 'eval_f1': 0.6577735694556096, 'eval_precision': 0.7873131932877667, 'eval_recall': 0.609634936297981, 'eval_runtime': 0.9442, 'eval_samples_per_second': 275.373, 'eval_steps_per_second': 68.843, 'epoch': 7.0}


 53%|█████▎    | 2080/3900 [02:29<01:57, 15.45it/s]
 53%|█████▎    | 2080/3900 [02:30<01:57, 15.45it/s]

{'eval_loss': 0.2510795593261719, 'eval_f1': 0.6841855743812199, 'eval_precision': 0.7772430951465047, 'eval_recall': 0.6312252825026544, 'eval_runtime': 1.2777, 'eval_samples_per_second': 203.493, 'eval_steps_per_second': 50.873, 'epoch': 8.0}


 60%|██████    | 2340/3900 [02:48<01:33, 16.75it/s]
 60%|██████    | 2340/3900 [02:49<01:33, 16.75it/s]

{'eval_loss': 0.2714879810810089, 'eval_f1': 0.6856066008872249, 'eval_precision': 0.7406931963343207, 'eval_recall': 0.6510664025014838, 'eval_runtime': 0.8856, 'eval_samples_per_second': 293.595, 'eval_steps_per_second': 73.399, 'epoch': 9.0}


 67%|██████▋   | 2600/3900 [03:07<01:26, 15.05it/s]
 67%|██████▋   | 2600/3900 [03:08<01:26, 15.05it/s]

{'eval_loss': 0.2807294428348541, 'eval_f1': 0.6979267692492465, 'eval_precision': 0.7545397431390048, 'eval_recall': 0.6614283753260487, 'eval_runtime': 1.0573, 'eval_samples_per_second': 245.917, 'eval_steps_per_second': 61.479, 'epoch': 10.0}


 73%|███████▎  | 2860/3900 [03:27<01:06, 15.71it/s]
 73%|███████▎  | 2860/3900 [03:28<01:06, 15.71it/s]

{'eval_loss': 0.2970771789550781, 'eval_f1': 0.6839282634590244, 'eval_precision': 0.7362569251722283, 'eval_recall': 0.6571492387714901, 'eval_runtime': 1.0241, 'eval_samples_per_second': 253.884, 'eval_steps_per_second': 63.471, 'epoch': 11.0}


 80%|████████  | 3120/3900 [03:45<00:46, 16.95it/s]
 80%|████████  | 3120/3900 [03:46<00:46, 16.95it/s]

{'eval_loss': 0.27982383966445923, 'eval_f1': 0.698103215241604, 'eval_precision': 0.7427364460870083, 'eval_recall': 0.6650121066909351, 'eval_runtime': 0.9662, 'eval_samples_per_second': 269.091, 'eval_steps_per_second': 67.273, 'epoch': 12.0}


 87%|████████▋ | 3380/3900 [04:05<00:33, 15.34it/s]
 87%|████████▋ | 3380/3900 [04:06<00:33, 15.34it/s]

{'eval_loss': 0.2911909818649292, 'eval_f1': 0.6948523770814757, 'eval_precision': 0.7503097184369899, 'eval_recall': 0.659433875398418, 'eval_runtime': 1.0355, 'eval_samples_per_second': 251.092, 'eval_steps_per_second': 62.773, 'epoch': 13.0}


 93%|█████████▎| 3640/3900 [04:24<00:15, 16.77it/s]
 93%|█████████▎| 3640/3900 [04:25<00:15, 16.77it/s]

{'eval_loss': 0.2930741608142853, 'eval_f1': 0.6915439414214705, 'eval_precision': 0.748717866749176, 'eval_recall': 0.6537195896841324, 'eval_runtime': 0.9837, 'eval_samples_per_second': 264.312, 'eval_steps_per_second': 66.078, 'epoch': 14.0}


100%|██████████| 3900/3900 [04:42<00:00, 15.01it/s]
100%|██████████| 3900/3900 [04:45<00:00, 15.01it/s]

{'eval_loss': 0.2944593131542206, 'eval_f1': 0.6985884758344257, 'eval_precision': 0.7600233368781756, 'eval_recall': 0.657665167915425, 'eval_runtime': 1.2147, 'eval_samples_per_second': 214.042, 'eval_steps_per_second': 53.51, 'epoch': 15.0}


100%|██████████| 3900/3900 [04:47<00:00, 13.55it/s]


{'train_runtime': 287.821, 'train_samples_per_second': 54.096, 'train_steps_per_second': 13.55, 'train_loss': 0.07776582375550882, 'epoch': 15.0}


100%|██████████| 65/65 [00:01<00:00, 58.19it/s]


------------------ Starting model ==> epochs: 15, batch size: 8, weights of decay: 0.01 ---------------------


0,1
eval/f1,▁▅▆▇▇▇▇█████████
eval/loss,█▁▁▂▄▅▅▃▅▆█▆▇███
eval/precision,▁▂▂█▆▅▆▆▅▅▅▅▅▅▆▆
eval/recall,▁▅▆▆▇▇▇█████████
eval/runtime,▂▄▄▃█▁▂▇▁▄▃▂▃▃▆▅
eval/samples_per_second,▆▄▄▆▁█▆▂▇▄▅▆▅▆▃▃
eval/steps_per_second,▆▄▄▆▁█▆▂▇▄▅▆▅▆▃▃
train/epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███

0,1
eval/f1,0.69859
eval/loss,0.29446
eval/precision,0.76002
eval/recall,0.65767
eval/runtime,1.1504
eval/samples_per_second,226.018
eval/steps_per_second,56.504
total_flos,1024205760576000.0
train/epoch,15.0
train/global_step,3900.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 22226.74 examples/s]
  7%|▋         | 762/11430 [00:57<13:31, 13.14it/s]
  7%|▋         | 762/11430 [01:00<13:31, 13.14it/s]

{'eval_loss': 0.10108546912670135, 'eval_f1': 0.6569056952602341, 'eval_precision': 0.8190600602578222, 'eval_recall': 0.6330584355429076, 'eval_runtime': 3.7166, 'eval_samples_per_second': 409.778, 'eval_steps_per_second': 51.39, 'epoch': 1.0}


 13%|█▎        | 1524/11430 [02:00<11:59, 13.76it/s] 
 13%|█▎        | 1524/11430 [02:03<11:59, 13.76it/s]

{'eval_loss': 0.09416117519140244, 'eval_f1': 0.7668294631977587, 'eval_precision': 0.8703382971796655, 'eval_recall': 0.7334661829767882, 'eval_runtime': 3.8036, 'eval_samples_per_second': 400.411, 'eval_steps_per_second': 50.216, 'epoch': 2.0}


 20%|██        | 2286/11430 [03:03<11:32, 13.21it/s]  
 20%|██        | 2286/11430 [03:07<11:32, 13.21it/s]

{'eval_loss': 0.09499816596508026, 'eval_f1': 0.806773035935552, 'eval_precision': 0.8658142862086667, 'eval_recall': 0.7974450345967374, 'eval_runtime': 3.7857, 'eval_samples_per_second': 402.306, 'eval_steps_per_second': 50.453, 'epoch': 3.0}


 27%|██▋       | 3048/11430 [04:06<09:59, 13.98it/s]  
 27%|██▋       | 3048/11430 [04:09<09:59, 13.98it/s]

{'eval_loss': 0.09622645378112793, 'eval_f1': 0.8476160954944056, 'eval_precision': 0.8813963970395171, 'eval_recall': 0.8209780852301477, 'eval_runtime': 3.6913, 'eval_samples_per_second': 412.589, 'eval_steps_per_second': 51.743, 'epoch': 4.0}


 33%|███▎      | 3810/11430 [05:09<09:26, 13.46it/s]  
 33%|███▎      | 3810/11430 [05:12<09:26, 13.46it/s]

{'eval_loss': 0.0936262458562851, 'eval_f1': 0.8645045122104937, 'eval_precision': 0.8734029142644963, 'eval_recall': 0.8584099512084435, 'eval_runtime': 3.8044, 'eval_samples_per_second': 400.322, 'eval_steps_per_second': 50.205, 'epoch': 5.0}


 40%|████      | 4572/11430 [06:13<08:36, 13.28it/s]  
 40%|████      | 4572/11430 [06:16<08:36, 13.28it/s]

{'eval_loss': 0.10089780390262604, 'eval_f1': 0.8674751432308975, 'eval_precision': 0.8684369311812276, 'eval_recall': 0.8691383472665323, 'eval_runtime': 3.7384, 'eval_samples_per_second': 407.388, 'eval_steps_per_second': 51.091, 'epoch': 6.0}


 47%|████▋     | 5334/11430 [07:16<07:44, 13.14it/s]  
 47%|████▋     | 5334/11430 [07:20<07:44, 13.14it/s]

{'eval_loss': 0.10828304290771484, 'eval_f1': 0.8523823269792138, 'eval_precision': 0.8745227715364734, 'eval_recall': 0.8344759752293934, 'eval_runtime': 3.8523, 'eval_samples_per_second': 395.351, 'eval_steps_per_second': 49.581, 'epoch': 7.0}


 53%|█████▎    | 6096/11430 [08:19<06:37, 13.43it/s]  
 53%|█████▎    | 6096/11430 [08:23<06:37, 13.43it/s]

{'eval_loss': 0.11759807169437408, 'eval_f1': 0.8525870788091321, 'eval_precision': 0.8508733607630096, 'eval_recall': 0.8596397700372471, 'eval_runtime': 3.7156, 'eval_samples_per_second': 409.896, 'eval_steps_per_second': 51.405, 'epoch': 8.0}


 60%|██████    | 6858/11430 [09:22<05:34, 13.66it/s]  
 60%|██████    | 6858/11430 [09:26<05:34, 13.66it/s]

{'eval_loss': 0.12248149514198303, 'eval_f1': 0.8573696780440974, 'eval_precision': 0.8579947550113095, 'eval_recall': 0.8581279937432468, 'eval_runtime': 3.8499, 'eval_samples_per_second': 395.597, 'eval_steps_per_second': 49.612, 'epoch': 9.0}


 67%|██████▋   | 7620/11430 [10:25<04:33, 13.91it/s]  
 67%|██████▋   | 7620/11430 [10:29<04:33, 13.91it/s]

{'eval_loss': 0.12368674576282501, 'eval_f1': 0.8756812809599195, 'eval_precision': 0.8794311106559357, 'eval_recall': 0.8782194186901607, 'eval_runtime': 3.7114, 'eval_samples_per_second': 410.354, 'eval_steps_per_second': 51.463, 'epoch': 10.0}


 73%|███████▎  | 8382/11430 [11:28<03:42, 13.69it/s]
 73%|███████▎  | 8382/11430 [11:31<03:42, 13.69it/s]

{'eval_loss': 0.12462454289197922, 'eval_f1': 0.8692731798477789, 'eval_precision': 0.8758214620623613, 'eval_recall': 0.8644689486352467, 'eval_runtime': 3.7608, 'eval_samples_per_second': 404.968, 'eval_steps_per_second': 50.787, 'epoch': 11.0}


 80%|████████  | 9144/11430 [12:31<02:43, 13.94it/s]
 80%|████████  | 9144/11430 [12:34<02:43, 13.94it/s]

{'eval_loss': 0.12571784853935242, 'eval_f1': 0.8624061354393119, 'eval_precision': 0.8716827484378636, 'eval_recall': 0.8546180212314859, 'eval_runtime': 3.7118, 'eval_samples_per_second': 410.309, 'eval_steps_per_second': 51.457, 'epoch': 12.0}


 87%|████████▋ | 9906/11430 [13:34<01:49, 13.95it/s]
 87%|████████▋ | 9906/11430 [13:38<01:49, 13.95it/s]

{'eval_loss': 0.1248757392168045, 'eval_f1': 0.8747139716707303, 'eval_precision': 0.8709812753885338, 'eval_recall': 0.8795154818134261, 'eval_runtime': 3.802, 'eval_samples_per_second': 400.577, 'eval_steps_per_second': 50.236, 'epoch': 13.0}


 88%|████████▊ | 10002/11430 [13:47<01:47, 13.34it/s]

{'loss': 0.0351, 'grad_norm': 0.021629121154546738, 'learning_rate': 6.255468066491689e-06, 'epoch': 13.12}


 93%|█████████▎| 10668/11430 [14:37<00:54, 13.87it/s]
 93%|█████████▎| 10668/11430 [14:41<00:54, 13.87it/s]

{'eval_loss': 0.12466122210025787, 'eval_f1': 0.8730987941636911, 'eval_precision': 0.8701568098339172, 'eval_recall': 0.8765846124960023, 'eval_runtime': 3.7075, 'eval_samples_per_second': 410.791, 'eval_steps_per_second': 51.517, 'epoch': 14.0}


100%|██████████| 11430/11430 [15:45<00:00, 13.92it/s]

{'eval_loss': 0.12537136673927307, 'eval_f1': 0.8706059337915386, 'eval_precision': 0.8706196486148069, 'eval_recall': 0.871213075545518, 'eval_runtime': 3.7339, 'eval_samples_per_second': 407.88, 'eval_steps_per_second': 51.152, 'epoch': 15.0}


100%|██████████| 11430/11430 [15:47<00:00, 12.07it/s]


{'train_runtime': 947.2011, 'train_samples_per_second': 96.458, 'train_steps_per_second': 12.067, 'train_loss': 0.03093735230980687, 'epoch': 15.0}


100%|██████████| 191/191 [00:03<00:00, 51.27it/s]


0,1
eval/f1,▁▅▆▇██▇▇▇███████
eval/loss,▃▁▁▂▁▃▄▆▇███████
eval/precision,▁▇▆█▇▇▇▅▅█▇▇▇▇▇█
eval/recall,▁▄▆▆▇█▇▇▇██▇████
eval/runtime,▂▆▅▁▆▃█▂█▂▄▂▆▂▃▃
eval/samples_per_second,▇▃▄█▃▆▁▇▁▇▅▇▃▇▆▅
eval/steps_per_second,▇▃▄█▃▆▁▇▁▇▅▇▃▇▆▅
train/epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇▇███
train/global_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇▇███
train/grad_norm,▁

0,1
eval/f1,0.87568
eval/loss,0.12369
eval/precision,0.87943
eval/recall,0.87822
eval/runtime,3.7477
eval/samples_per_second,406.381
eval/steps_per_second,50.964
total_flos,6010055190432000.0
train/epoch,15.0
train/global_step,11430.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 18229.25 examples/s]
  7%|▋         | 189/2835 [00:14<03:18, 13.33it/s]
  7%|▋         | 189/2835 [00:15<03:18, 13.33it/s]

{'eval_loss': 0.3167298138141632, 'eval_f1': 0.4519793129868471, 'eval_precision': 0.5304147465437788, 'eval_recall': 0.3989497582119002, 'eval_runtime': 0.9177, 'eval_samples_per_second': 410.789, 'eval_steps_per_second': 52.302, 'epoch': 1.0}


 13%|█▎        | 377/2835 [00:30<02:58, 13.80it/s]
 13%|█▎        | 378/2835 [00:31<02:58, 13.80it/s]

{'eval_loss': 0.27442774176597595, 'eval_f1': 0.6308598101361848, 'eval_precision': 0.7791152646191717, 'eval_recall': 0.5551326312794178, 'eval_runtime': 0.8987, 'eval_samples_per_second': 419.479, 'eval_steps_per_second': 53.408, 'epoch': 2.0}


 20%|██        | 567/2835 [00:46<02:36, 14.49it/s]
 20%|██        | 567/2835 [00:47<02:36, 14.49it/s]

{'eval_loss': 0.2974584698677063, 'eval_f1': 0.6701829897989129, 'eval_precision': 0.7665391556271564, 'eval_recall': 0.6108876451214964, 'eval_runtime': 0.8943, 'eval_samples_per_second': 421.544, 'eval_steps_per_second': 53.671, 'epoch': 3.0}


 27%|██▋       | 755/2835 [01:03<02:37, 13.21it/s]
 27%|██▋       | 756/2835 [01:04<02:37, 13.21it/s]

{'eval_loss': 0.29241663217544556, 'eval_f1': 0.7349969218456613, 'eval_precision': 0.783328746872385, 'eval_recall': 0.6998258676818697, 'eval_runtime': 0.9402, 'eval_samples_per_second': 400.974, 'eval_steps_per_second': 51.052, 'epoch': 4.0}


 33%|███▎      | 945/2835 [01:19<02:12, 14.22it/s]
 33%|███▎      | 945/2835 [01:20<02:12, 14.22it/s]

{'eval_loss': 0.33078068494796753, 'eval_f1': 0.7332142055064533, 'eval_precision': 0.7730342598577892, 'eval_recall': 0.7041741379435623, 'eval_runtime': 0.904, 'eval_samples_per_second': 417.051, 'eval_steps_per_second': 53.099, 'epoch': 5.0}


 40%|███▉      | 1133/2835 [01:36<02:04, 13.67it/s]
 40%|████      | 1134/2835 [01:37<02:04, 13.67it/s]

{'eval_loss': 0.33881324529647827, 'eval_f1': 0.7566427185635456, 'eval_precision': 0.7840037279185533, 'eval_recall': 0.7433078823553403, 'eval_runtime': 0.9177, 'eval_samples_per_second': 410.831, 'eval_steps_per_second': 52.307, 'epoch': 6.0}


 47%|████▋     | 1323/2835 [01:53<01:50, 13.65it/s]
 47%|████▋     | 1323/2835 [01:53<01:50, 13.65it/s]

{'eval_loss': 0.37538689374923706, 'eval_f1': 0.7583755296820955, 'eval_precision': 0.7599566189642889, 'eval_recall': 0.766575360631211, 'eval_runtime': 0.9471, 'eval_samples_per_second': 398.064, 'eval_steps_per_second': 50.682, 'epoch': 7.0}


 53%|█████▎    | 1511/2835 [02:11<01:37, 13.64it/s]
 53%|█████▎    | 1512/2835 [02:12<01:37, 13.64it/s]

{'eval_loss': 0.3790934085845947, 'eval_f1': 0.7592203988657579, 'eval_precision': 0.7808077672277097, 'eval_recall': 0.747670754714991, 'eval_runtime': 0.9103, 'eval_samples_per_second': 414.143, 'eval_steps_per_second': 52.729, 'epoch': 8.0}


 60%|██████    | 1701/2835 [02:28<01:20, 14.02it/s]
 60%|██████    | 1701/2835 [02:29<01:20, 14.02it/s]

{'eval_loss': 0.36026376485824585, 'eval_f1': 0.7767768258322596, 'eval_precision': 0.7852480689245395, 'eval_recall': 0.7736306807780629, 'eval_runtime': 0.9194, 'eval_samples_per_second': 410.041, 'eval_steps_per_second': 52.207, 'epoch': 9.0}


 67%|██████▋   | 1889/2835 [02:44<01:14, 12.76it/s]
 67%|██████▋   | 1890/2835 [02:45<01:14, 12.76it/s]

{'eval_loss': 0.382016658782959, 'eval_f1': 0.7772407110617435, 'eval_precision': 0.7806148765812632, 'eval_recall': 0.7788443182785054, 'eval_runtime': 0.9625, 'eval_samples_per_second': 391.684, 'eval_steps_per_second': 49.87, 'epoch': 10.0}


 73%|███████▎  | 2079/2835 [03:01<00:54, 13.83it/s]
 73%|███████▎  | 2079/2835 [03:02<00:54, 13.83it/s]

{'eval_loss': 0.39681166410446167, 'eval_f1': 0.773247909455508, 'eval_precision': 0.7798718456801682, 'eval_recall': 0.7744691045413878, 'eval_runtime': 0.9247, 'eval_samples_per_second': 407.692, 'eval_steps_per_second': 51.908, 'epoch': 11.0}


 80%|███████▉  | 2267/2835 [03:18<00:42, 13.36it/s]
 80%|████████  | 2268/2835 [03:19<00:42, 13.36it/s]

{'eval_loss': 0.40453624725341797, 'eval_f1': 0.7684435991650711, 'eval_precision': 0.766629031951493, 'eval_recall': 0.7794604689172614, 'eval_runtime': 0.9504, 'eval_samples_per_second': 396.68, 'eval_steps_per_second': 50.506, 'epoch': 12.0}


 87%|████████▋ | 2457/2835 [03:35<00:29, 13.03it/s]
 87%|████████▋ | 2457/2835 [03:36<00:29, 13.03it/s]

{'eval_loss': 0.40320974588394165, 'eval_f1': 0.7807863798474365, 'eval_precision': 0.782643511890757, 'eval_recall': 0.7873416522925158, 'eval_runtime': 0.9792, 'eval_samples_per_second': 385.018, 'eval_steps_per_second': 49.021, 'epoch': 13.0}


 93%|█████████▎| 2645/2835 [03:51<00:14, 13.28it/s]
 93%|█████████▎| 2646/2835 [03:52<00:14, 13.28it/s]

{'eval_loss': 0.4079759120941162, 'eval_f1': 0.7776194824961948, 'eval_precision': 0.7827090899095938, 'eval_recall': 0.7788048248158432, 'eval_runtime': 0.9508, 'eval_samples_per_second': 396.513, 'eval_steps_per_second': 50.484, 'epoch': 14.0}


100%|██████████| 2835/2835 [04:08<00:00, 13.05it/s]
100%|██████████| 2835/2835 [04:11<00:00, 13.05it/s]

{'eval_loss': 0.40963804721832275, 'eval_f1': 0.7766805153600447, 'eval_precision': 0.7809206576417095, 'eval_recall': 0.7788048248158432, 'eval_runtime': 0.9229, 'eval_samples_per_second': 408.48, 'eval_steps_per_second': 52.008, 'epoch': 15.0}


100%|██████████| 2835/2835 [04:13<00:00, 11.19it/s]


{'train_runtime': 253.2641, 'train_samples_per_second': 89.255, 'train_steps_per_second': 11.194, 'train_loss': 0.08778182741195437, 'epoch': 15.0}


100%|██████████| 48/48 [00:01<00:00, 47.79it/s]


0,1
eval/f1,▁▅▆▇▇▇██████████
eval/loss,▃▁▂▂▄▄▆▆▅▇▇█████
eval/precision,▁█▇███▇████▇████
eval/recall,▁▄▅▆▇▇█▇████████
eval/runtime,▂▁▁▃▁▂▃▂▂▄▂▄▅▄▂█
eval/samples_per_second,▇██▆▇▇▅▇▇▅▆▅▄▅▇▁
eval/steps_per_second,▇██▆▇▇▅▇▇▅▆▅▄▅▇▁
train/epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███

0,1
eval/f1,0.78079
eval/loss,0.40321
eval/precision,0.78264
eval/recall,0.78734
eval/runtime,1.0463
eval/samples_per_second,360.31
eval/steps_per_second,45.875
total_flos,1486946402599680.0
train/epoch,15.0
train/global_step,2835.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 12991.10 examples/s]
  7%|▋         | 129/1950 [00:09<02:11, 13.83it/s]
  7%|▋         | 130/1950 [00:10<02:11, 13.83it/s]

{'eval_loss': 0.2932186722755432, 'eval_f1': 0.19423558897243107, 'eval_precision': 0.260748959778086, 'eval_recall': 0.17124671650838552, 'eval_runtime': 0.6343, 'eval_samples_per_second': 409.927, 'eval_steps_per_second': 52.029, 'epoch': 1.0}


 13%|█▎        | 259/1950 [00:21<02:04, 13.63it/s]
 13%|█▎        | 260/1950 [00:22<02:03, 13.63it/s]

{'eval_loss': 0.23626990616321564, 'eval_f1': 0.4896028866091891, 'eval_precision': 0.6230827010257893, 'eval_recall': 0.409909638220261, 'eval_runtime': 0.6325, 'eval_samples_per_second': 411.092, 'eval_steps_per_second': 52.177, 'epoch': 2.0}


 20%|█▉        | 389/1950 [00:33<01:54, 13.64it/s]
 20%|██        | 390/1950 [00:34<01:54, 13.64it/s]

{'eval_loss': 0.22136500477790833, 'eval_f1': 0.5439710059510048, 'eval_precision': 0.5780488713839261, 'eval_recall': 0.5159150758415193, 'eval_runtime': 0.635, 'eval_samples_per_second': 409.425, 'eval_steps_per_second': 51.965, 'epoch': 3.0}


 27%|██▋       | 519/1950 [00:45<01:45, 13.58it/s]
 27%|██▋       | 520/1950 [00:46<01:45, 13.58it/s]

{'eval_loss': 0.2147180438041687, 'eval_f1': 0.5763014774623706, 'eval_precision': 0.7280379279765337, 'eval_recall': 0.5437639254475531, 'eval_runtime': 0.6415, 'eval_samples_per_second': 405.301, 'eval_steps_per_second': 51.442, 'epoch': 4.0}


 33%|███▎      | 649/1950 [00:57<01:35, 13.58it/s]
 33%|███▎      | 650/1950 [00:58<01:35, 13.58it/s]

{'eval_loss': 0.21054013073444366, 'eval_f1': 0.6600539865668938, 'eval_precision': 0.7299358186849847, 'eval_recall': 0.6073003516042809, 'eval_runtime': 0.6409, 'eval_samples_per_second': 405.663, 'eval_steps_per_second': 51.488, 'epoch': 5.0}


 40%|███▉      | 779/1950 [01:09<01:26, 13.53it/s]
 40%|████      | 780/1950 [01:10<01:26, 13.53it/s]

{'eval_loss': 0.2292068749666214, 'eval_f1': 0.638468099227443, 'eval_precision': 0.7207931224437828, 'eval_recall': 0.5862383221890533, 'eval_runtime': 0.6373, 'eval_samples_per_second': 407.962, 'eval_steps_per_second': 51.78, 'epoch': 6.0}


 47%|████▋     | 909/1950 [01:21<01:16, 13.59it/s]
 47%|████▋     | 910/1950 [01:22<01:16, 13.59it/s]

{'eval_loss': 0.21076330542564392, 'eval_f1': 0.6579140075132119, 'eval_precision': 0.771951559705519, 'eval_recall': 0.604074732861224, 'eval_runtime': 0.6298, 'eval_samples_per_second': 412.841, 'eval_steps_per_second': 52.399, 'epoch': 7.0}


 53%|█████▎    | 1039/1950 [01:33<01:07, 13.41it/s]
 53%|█████▎    | 1040/1950 [01:34<01:07, 13.41it/s]

{'eval_loss': 0.23158665001392365, 'eval_f1': 0.687198127267532, 'eval_precision': 0.7460354081217898, 'eval_recall': 0.6410210749118425, 'eval_runtime': 0.6428, 'eval_samples_per_second': 404.487, 'eval_steps_per_second': 51.339, 'epoch': 8.0}


 60%|█████▉    | 1169/1950 [01:45<00:57, 13.50it/s]
 60%|██████    | 1170/1950 [01:46<00:57, 13.50it/s]

{'eval_loss': 0.22534306347370148, 'eval_f1': 0.68617878234356, 'eval_precision': 0.7528253236778583, 'eval_recall': 0.6406866537374765, 'eval_runtime': 0.6358, 'eval_samples_per_second': 408.913, 'eval_steps_per_second': 51.901, 'epoch': 9.0}


 67%|██████▋   | 1299/1950 [01:57<00:49, 13.03it/s]
 67%|██████▋   | 1300/1950 [01:58<00:49, 13.03it/s]

{'eval_loss': 0.2556510865688324, 'eval_f1': 0.6884490417429713, 'eval_precision': 0.7476778460063356, 'eval_recall': 0.6484720219399503, 'eval_runtime': 0.6583, 'eval_samples_per_second': 394.984, 'eval_steps_per_second': 50.133, 'epoch': 10.0}


 73%|███████▎  | 1429/1950 [02:09<00:38, 13.39it/s]
 73%|███████▎  | 1430/1950 [02:10<00:38, 13.39it/s]

{'eval_loss': 0.23915962874889374, 'eval_f1': 0.7004152770954369, 'eval_precision': 0.792995718844834, 'eval_recall': 0.649559594463, 'eval_runtime': 0.6497, 'eval_samples_per_second': 400.202, 'eval_steps_per_second': 50.795, 'epoch': 11.0}


 80%|███████▉  | 1559/1950 [02:21<00:31, 12.60it/s]
 80%|████████  | 1560/1950 [02:22<00:30, 12.60it/s]

{'eval_loss': 0.26258692145347595, 'eval_f1': 0.6985638114768824, 'eval_precision': 0.7892980073099416, 'eval_recall': 0.6432981962867083, 'eval_runtime': 0.6917, 'eval_samples_per_second': 375.865, 'eval_steps_per_second': 47.706, 'epoch': 12.0}


 87%|████████▋ | 1689/1950 [02:33<00:20, 13.01it/s]
 87%|████████▋ | 1690/1950 [02:34<00:19, 13.01it/s]

{'eval_loss': 0.2662447392940521, 'eval_f1': 0.6922180463753876, 'eval_precision': 0.7456902063392119, 'eval_recall': 0.6527794470376488, 'eval_runtime': 0.6583, 'eval_samples_per_second': 394.932, 'eval_steps_per_second': 50.126, 'epoch': 13.0}


 93%|█████████▎| 1820/1950 [02:46<00:09, 13.26it/s]
 93%|█████████▎| 1820/1950 [02:46<00:09, 13.26it/s]

{'eval_loss': 0.25959691405296326, 'eval_f1': 0.6929499934455211, 'eval_precision': 0.75511800570464, 'eval_recall': 0.6533189929592407, 'eval_runtime': 0.6948, 'eval_samples_per_second': 374.234, 'eval_steps_per_second': 47.499, 'epoch': 14.0}


100%|██████████| 1950/1950 [02:58<00:00, 13.46it/s]
100%|██████████| 1950/1950 [03:00<00:00, 13.46it/s]

{'eval_loss': 0.26052847504615784, 'eval_f1': 0.7000594172805522, 'eval_precision': 0.7571038176815789, 'eval_recall': 0.6635230745918937, 'eval_runtime': 0.7522, 'eval_samples_per_second': 345.631, 'eval_steps_per_second': 43.869, 'epoch': 15.0}


100%|██████████| 1950/1950 [03:02<00:00, 10.68it/s]


{'train_runtime': 182.5872, 'train_samples_per_second': 85.274, 'train_steps_per_second': 10.68, 'train_loss': 0.08626627604166667, 'epoch': 15.0}


100%|██████████| 33/33 [00:00<00:00, 47.99it/s]


------------------ Starting model ==> epochs: 15, batch size: 8, weights of decay: 0.001 ---------------------


0,1
eval/f1,▁▅▆▆▇▇▇█████████
eval/loss,█▃▂▁▁▃▁▃▂▅▃▅▆▅▅▃
eval/precision,▁▆▅▇▇▇█▇▇▇██▇███
eval/recall,▁▄▆▆▇▇▇█████████
eval/runtime,▁▁▁▂▂▁▁▂▁▃▂▅▃▅█▇
eval/samples_per_second,███▇▇▇█▇█▆▇▄▆▄▁▂
eval/steps_per_second,███▇▇▇█▇█▆▇▄▆▄▁▂
train/epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███

0,1
eval/f1,0.70042
eval/loss,0.23916
eval/precision,0.793
eval/recall,0.64956
eval/runtime,0.7286
eval/samples_per_second,356.834
eval/steps_per_second,45.291
total_flos,1024205760576000.0
train/epoch,15.0
train/global_step,1950.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 22014.73 examples/s]
  7%|▋         | 761/11430 [00:57<13:22, 13.30it/s]
  7%|▋         | 762/11430 [01:00<13:22, 13.30it/s]

{'eval_loss': 0.11075489223003387, 'eval_f1': 0.6560129446228855, 'eval_precision': 0.6796540687446287, 'eval_recall': 0.6356517728207821, 'eval_runtime': 3.6592, 'eval_samples_per_second': 416.213, 'eval_steps_per_second': 52.197, 'epoch': 1.0}


 13%|█▎        | 1523/11430 [02:00<13:06, 12.59it/s] 
 13%|█▎        | 1524/11430 [02:04<13:06, 12.59it/s]

{'eval_loss': 0.09506964683532715, 'eval_f1': 0.7304236903948436, 'eval_precision': 0.8807621204979504, 'eval_recall': 0.6953724622455272, 'eval_runtime': 3.8519, 'eval_samples_per_second': 395.387, 'eval_steps_per_second': 49.586, 'epoch': 2.0}


 20%|██        | 2286/11430 [03:04<11:21, 13.43it/s]  
 20%|██        | 2286/11430 [03:07<11:21, 13.43it/s]

{'eval_loss': 0.08499564975500107, 'eval_f1': 0.8365928901445893, 'eval_precision': 0.8887021959897592, 'eval_recall': 0.7999708711763786, 'eval_runtime': 3.6981, 'eval_samples_per_second': 411.838, 'eval_steps_per_second': 51.649, 'epoch': 3.0}


 27%|██▋       | 3048/11430 [04:07<10:11, 13.70it/s]  
 27%|██▋       | 3048/11430 [04:10<10:11, 13.70it/s]

{'eval_loss': 0.08016509562730789, 'eval_f1': 0.8625224950559254, 'eval_precision': 0.8887891727986832, 'eval_recall': 0.8402517904104352, 'eval_runtime': 3.7268, 'eval_samples_per_second': 408.665, 'eval_steps_per_second': 51.251, 'epoch': 4.0}


 33%|███▎      | 3810/11430 [05:09<09:10, 13.83it/s]  
 33%|███▎      | 3810/11430 [05:13<09:10, 13.83it/s]

{'eval_loss': 0.0987967848777771, 'eval_f1': 0.8435098264464678, 'eval_precision': 0.835535857373504, 'eval_recall': 0.8592923874561472, 'eval_runtime': 3.6903, 'eval_samples_per_second': 412.703, 'eval_steps_per_second': 51.757, 'epoch': 5.0}


 40%|████      | 4572/11430 [06:12<08:35, 13.31it/s]  
 40%|████      | 4572/11430 [06:16<08:35, 13.31it/s]

{'eval_loss': 0.09122855961322784, 'eval_f1': 0.8720594834043511, 'eval_precision': 0.8773082080941429, 'eval_recall': 0.8691390842247524, 'eval_runtime': 3.7469, 'eval_samples_per_second': 406.474, 'eval_steps_per_second': 50.976, 'epoch': 6.0}


 47%|████▋     | 5334/11430 [07:15<07:39, 13.26it/s]  
 47%|████▋     | 5334/11430 [07:19<07:39, 13.26it/s]

{'eval_loss': 0.1008591502904892, 'eval_f1': 0.8713333499868406, 'eval_precision': 0.871566089631033, 'eval_recall': 0.8744789558954127, 'eval_runtime': 3.7073, 'eval_samples_per_second': 410.808, 'eval_steps_per_second': 51.52, 'epoch': 7.0}


 53%|█████▎    | 6096/11430 [08:18<06:42, 13.24it/s]  
 53%|█████▎    | 6096/11430 [08:22<06:42, 13.24it/s]

{'eval_loss': 0.10844346135854721, 'eval_f1': 0.8763046673374922, 'eval_precision': 0.8984161627116523, 'eval_recall': 0.8607879605687424, 'eval_runtime': 3.7884, 'eval_samples_per_second': 402.015, 'eval_steps_per_second': 50.417, 'epoch': 8.0}


 60%|██████    | 6858/11430 [09:21<05:33, 13.72it/s]  
 60%|██████    | 6858/11430 [09:25<05:33, 13.72it/s]

{'eval_loss': 0.1092694029211998, 'eval_f1': 0.8811063700757622, 'eval_precision': 0.8914348684382425, 'eval_recall': 0.8728573546776344, 'eval_runtime': 3.7218, 'eval_samples_per_second': 409.213, 'eval_steps_per_second': 51.32, 'epoch': 9.0}


 67%|██████▋   | 7620/11430 [10:24<04:48, 13.21it/s]  
 67%|██████▋   | 7620/11430 [10:28<04:48, 13.21it/s]

{'eval_loss': 0.11466887593269348, 'eval_f1': 0.8787250161008062, 'eval_precision': 0.8858414587160289, 'eval_recall': 0.8744773662105355, 'eval_runtime': 3.8167, 'eval_samples_per_second': 399.036, 'eval_steps_per_second': 50.043, 'epoch': 10.0}


 73%|███████▎  | 8382/11430 [11:27<03:44, 13.55it/s]
 73%|███████▎  | 8382/11430 [11:30<03:44, 13.55it/s]

{'eval_loss': 0.11698335409164429, 'eval_f1': 0.8789194669283303, 'eval_precision': 0.8826977714336269, 'eval_recall': 0.8761438418592814, 'eval_runtime': 3.721, 'eval_samples_per_second': 409.301, 'eval_steps_per_second': 51.331, 'epoch': 11.0}


 80%|████████  | 9144/11430 [12:29<02:43, 13.99it/s]
 80%|████████  | 9144/11430 [12:33<02:43, 13.99it/s]

{'eval_loss': 0.11568009853363037, 'eval_f1': 0.8802828091248066, 'eval_precision': 0.8874895060566775, 'eval_recall': 0.8762014139703889, 'eval_runtime': 3.791, 'eval_samples_per_second': 401.736, 'eval_steps_per_second': 50.382, 'epoch': 12.0}


 87%|████████▋ | 9906/11430 [13:32<01:49, 13.96it/s]
 87%|████████▋ | 9906/11430 [13:35<01:49, 13.96it/s]

{'eval_loss': 0.11828453838825226, 'eval_f1': 0.8733156633360343, 'eval_precision': 0.8747378551858038, 'eval_recall': 0.8732284200161053, 'eval_runtime': 3.698, 'eval_samples_per_second': 411.841, 'eval_steps_per_second': 51.649, 'epoch': 13.0}


 88%|████████▊ | 10002/11430 [13:44<01:46, 13.39it/s]

{'loss': 0.0352, 'grad_norm': 0.013338720425963402, 'learning_rate': 6.255468066491689e-06, 'epoch': 13.12}


 93%|█████████▎| 10668/11430 [14:34<00:55, 13.72it/s]
 93%|█████████▎| 10668/11430 [14:38<00:55, 13.72it/s]

{'eval_loss': 0.12155476212501526, 'eval_f1': 0.8737624520206576, 'eval_precision': 0.8735506313902757, 'eval_recall': 0.8747185069147264, 'eval_runtime': 3.7913, 'eval_samples_per_second': 401.71, 'eval_steps_per_second': 50.379, 'epoch': 14.0}


100%|██████████| 11430/11430 [15:37<00:00, 13.82it/s]
100%|██████████| 11430/11430 [15:42<00:00, 13.82it/s]

{'eval_loss': 0.12099426239728928, 'eval_f1': 0.876494255153303, 'eval_precision': 0.877663788696097, 'eval_recall': 0.8759024302954462, 'eval_runtime': 3.7102, 'eval_samples_per_second': 410.486, 'eval_steps_per_second': 51.479, 'epoch': 15.0}


100%|██████████| 11430/11430 [15:44<00:00, 12.11it/s]


{'train_runtime': 944.0898, 'train_samples_per_second': 96.776, 'train_steps_per_second': 12.107, 'train_loss': 0.03109705629624094, 'epoch': 15.0}


100%|██████████| 191/191 [00:03<00:00, 51.53it/s]


0,1
eval/f1,▁▃▇▇▇███████████
eval/loss,▆▄▂▁▄▃▄▆▆▇▇▇▇██▆
eval/precision,▁▇██▆▇▇███▇█▇▇▇█
eval/recall,▁▃▆▇████████████
eval/runtime,▁█▂▃▂▄▃▆▃▇▃▆▂▆▃▄
eval/samples_per_second,█▁▇▅▇▅▆▃▆▂▆▃▇▃▆▅
eval/steps_per_second,█▁▇▅▇▅▆▃▆▂▆▃▇▃▆▅
train/epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇▇███
train/global_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇▇███
train/grad_norm,▁

0,1
eval/f1,0.88111
eval/loss,0.10927
eval/precision,0.89143
eval/recall,0.87286
eval/runtime,3.7472
eval/samples_per_second,406.439
eval/steps_per_second,50.972
total_flos,6010055190432000.0
train/epoch,15.0
train/global_step,11430.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16591.33 examples/s]
  7%|▋         | 189/2835 [00:14<03:05, 14.30it/s]
  7%|▋         | 189/2835 [00:15<03:05, 14.30it/s]

{'eval_loss': 0.3174854516983032, 'eval_f1': 0.4534204014809027, 'eval_precision': 0.5312014960261804, 'eval_recall': 0.4016894842392974, 'eval_runtime': 0.9425, 'eval_samples_per_second': 399.988, 'eval_steps_per_second': 50.927, 'epoch': 1.0}


 13%|█▎        | 377/2835 [00:30<03:00, 13.60it/s]
 13%|█▎        | 378/2835 [00:31<03:00, 13.60it/s]

{'eval_loss': 0.27279698848724365, 'eval_f1': 0.6374854924161963, 'eval_precision': 0.8053287280842021, 'eval_recall': 0.5555465868980406, 'eval_runtime': 0.9019, 'eval_samples_per_second': 417.998, 'eval_steps_per_second': 53.22, 'epoch': 2.0}


 20%|██        | 567/2835 [00:47<02:37, 14.43it/s]
 20%|██        | 567/2835 [00:47<02:37, 14.43it/s]

{'eval_loss': 0.3032306730747223, 'eval_f1': 0.6927055398830463, 'eval_precision': 0.8163183047223794, 'eval_recall': 0.6272509392713518, 'eval_runtime': 0.898, 'eval_samples_per_second': 419.816, 'eval_steps_per_second': 53.451, 'epoch': 3.0}


 27%|██▋       | 755/2835 [01:03<02:39, 13.01it/s]
 27%|██▋       | 756/2835 [01:04<02:39, 13.01it/s]

{'eval_loss': 0.30382099747657776, 'eval_f1': 0.7470980405464299, 'eval_precision': 0.7900980960956703, 'eval_recall': 0.7252676926282697, 'eval_runtime': 0.9337, 'eval_samples_per_second': 403.761, 'eval_steps_per_second': 51.407, 'epoch': 4.0}


 33%|███▎      | 945/2835 [01:20<02:20, 13.44it/s]
 33%|███▎      | 945/2835 [01:21<02:20, 13.44it/s]

{'eval_loss': 0.294567346572876, 'eval_f1': 0.7760503392441125, 'eval_precision': 0.8181042453621448, 'eval_recall': 0.7401217331768526, 'eval_runtime': 0.9255, 'eval_samples_per_second': 407.366, 'eval_steps_per_second': 51.866, 'epoch': 5.0}


 40%|███▉      | 1133/2835 [01:36<02:04, 13.66it/s]
 40%|████      | 1134/2835 [01:37<02:04, 13.66it/s]

{'eval_loss': 0.31734517216682434, 'eval_f1': 0.7527973082826301, 'eval_precision': 0.7772365537525037, 'eval_recall': 0.7350898908877141, 'eval_runtime': 0.9075, 'eval_samples_per_second': 415.435, 'eval_steps_per_second': 52.894, 'epoch': 6.0}


 47%|████▋     | 1323/2835 [01:53<01:49, 13.86it/s]
 47%|████▋     | 1323/2835 [01:54<01:49, 13.86it/s]

{'eval_loss': 0.3624469041824341, 'eval_f1': 0.7383954911692765, 'eval_precision': 0.7503350618444957, 'eval_recall': 0.7314048848438908, 'eval_runtime': 0.9288, 'eval_samples_per_second': 405.898, 'eval_steps_per_second': 51.679, 'epoch': 7.0}


 53%|█████▎    | 1511/2835 [02:10<01:39, 13.29it/s]
 53%|█████▎    | 1512/2835 [02:11<01:39, 13.29it/s]

{'eval_loss': 0.39258596301078796, 'eval_f1': 0.7677135152843009, 'eval_precision': 0.797477086735148, 'eval_recall': 0.7455028309986618, 'eval_runtime': 0.9499, 'eval_samples_per_second': 396.877, 'eval_steps_per_second': 50.531, 'epoch': 8.0}


 60%|██████    | 1701/2835 [02:26<01:20, 14.02it/s]
 60%|██████    | 1701/2835 [02:27<01:20, 14.02it/s]

{'eval_loss': 0.3966819643974304, 'eval_f1': 0.7644063105116953, 'eval_precision': 0.7755700980700981, 'eval_recall': 0.7581955290003913, 'eval_runtime': 0.9169, 'eval_samples_per_second': 411.15, 'eval_steps_per_second': 52.348, 'epoch': 9.0}


 67%|██████▋   | 1889/2835 [02:43<01:09, 13.55it/s]
 67%|██████▋   | 1890/2835 [02:44<01:09, 13.55it/s]

{'eval_loss': 0.4073052406311035, 'eval_f1': 0.76064968763279, 'eval_precision': 0.7735658316947157, 'eval_recall': 0.7522039006979989, 'eval_runtime': 0.9397, 'eval_samples_per_second': 401.193, 'eval_steps_per_second': 51.08, 'epoch': 10.0}


 73%|███████▎  | 2079/2835 [02:59<00:56, 13.33it/s]
 73%|███████▎  | 2079/2835 [03:00<00:56, 13.33it/s]

{'eval_loss': 0.4220474660396576, 'eval_f1': 0.7536959645714265, 'eval_precision': 0.7597789988675508, 'eval_recall': 0.7544102918216298, 'eval_runtime': 0.9777, 'eval_samples_per_second': 385.616, 'eval_steps_per_second': 49.097, 'epoch': 11.0}


 80%|███████▉  | 2267/2835 [03:16<00:42, 13.47it/s]
 80%|████████  | 2268/2835 [03:17<00:42, 13.47it/s]

{'eval_loss': 0.42226630449295044, 'eval_f1': 0.7585797798662034, 'eval_precision': 0.7711913116123642, 'eval_recall': 0.7549677152940729, 'eval_runtime': 0.9401, 'eval_samples_per_second': 401.039, 'eval_steps_per_second': 51.061, 'epoch': 12.0}


 87%|████████▋ | 2457/2835 [03:33<00:27, 13.83it/s]
 87%|████████▋ | 2457/2835 [03:34<00:27, 13.83it/s]

{'eval_loss': 0.4256786108016968, 'eval_f1': 0.7595099730987164, 'eval_precision': 0.7647398393768554, 'eval_recall': 0.7586893704414956, 'eval_runtime': 0.9486, 'eval_samples_per_second': 397.429, 'eval_steps_per_second': 50.601, 'epoch': 13.0}


 93%|█████████▎| 2645/2835 [03:49<00:14, 12.69it/s]
 93%|█████████▎| 2646/2835 [03:50<00:14, 12.69it/s]

{'eval_loss': 0.42734241485595703, 'eval_f1': 0.7688675405895435, 'eval_precision': 0.7771969696969697, 'eval_recall': 0.7629664010525183, 'eval_runtime': 1.0001, 'eval_samples_per_second': 376.95, 'eval_steps_per_second': 47.994, 'epoch': 14.0}


100%|██████████| 2835/2835 [04:06<00:00, 14.00it/s]
100%|██████████| 2835/2835 [04:09<00:00, 14.00it/s]

{'eval_loss': 0.42953065037727356, 'eval_f1': 0.7682535726887536, 'eval_precision': 0.775036231884058, 'eval_recall': 0.7646192936145018, 'eval_runtime': 0.9234, 'eval_samples_per_second': 408.288, 'eval_steps_per_second': 51.984, 'epoch': 15.0}


100%|██████████| 2835/2835 [04:11<00:00, 11.27it/s]


{'train_runtime': 251.5226, 'train_samples_per_second': 89.873, 'train_steps_per_second': 11.271, 'train_loss': 0.08637110257695381, 'epoch': 15.0}


100%|██████████| 48/48 [00:01<00:00, 47.49it/s]


0,1
eval/f1,▁▅▆▇█▇▇█████████
eval/loss,▃▁▂▂▂▃▅▆▇▇█████▂
eval/precision,▁██▇█▇▆▇▇▇▇▇▇▇▇█
eval/recall,▁▄▅▇█▇▇█████████
eval/runtime,▃▁▁▂▂▁▂▃▂▃▄▃▃▅▂█
eval/samples_per_second,▆██▆▇█▇▆▇▆▄▆▆▄▇▁
eval/steps_per_second,▆██▆▇█▇▆▇▆▄▆▆▄▇▁
train/epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███

0,1
eval/f1,0.77605
eval/loss,0.29457
eval/precision,0.8181
eval/recall,0.74012
eval/runtime,1.0681
eval/samples_per_second,352.97
eval/steps_per_second,44.941
total_flos,1486946402599680.0
train/epoch,15.0
train/global_step,2835.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 13501.42 examples/s]
  7%|▋         | 129/1950 [00:09<02:12, 13.73it/s]
  7%|▋         | 130/1950 [00:10<02:12, 13.73it/s]

{'eval_loss': 0.2931697964668274, 'eval_f1': 0.19423558897243107, 'eval_precision': 0.260748959778086, 'eval_recall': 0.17124671650838552, 'eval_runtime': 0.625, 'eval_samples_per_second': 415.987, 'eval_steps_per_second': 52.798, 'epoch': 1.0}


 13%|█▎        | 259/1950 [00:21<02:02, 13.77it/s]
 13%|█▎        | 260/1950 [00:22<02:02, 13.77it/s]

{'eval_loss': 0.236205592751503, 'eval_f1': 0.4896028866091891, 'eval_precision': 0.6230827010257893, 'eval_recall': 0.409909638220261, 'eval_runtime': 0.6255, 'eval_samples_per_second': 415.679, 'eval_steps_per_second': 52.759, 'epoch': 2.0}


 20%|██        | 390/1950 [00:34<01:53, 13.70it/s]
 20%|██        | 390/1950 [00:34<01:53, 13.70it/s]

{'eval_loss': 0.22133871912956238, 'eval_f1': 0.5426393674899256, 'eval_precision': 0.582153014729425, 'eval_recall': 0.5108886287764914, 'eval_runtime': 0.6293, 'eval_samples_per_second': 413.134, 'eval_steps_per_second': 52.436, 'epoch': 3.0}


 27%|██▋       | 519/1950 [00:46<01:46, 13.38it/s]
 27%|██▋       | 520/1950 [00:46<01:46, 13.38it/s]

{'eval_loss': 0.22743217647075653, 'eval_f1': 0.5590250164102352, 'eval_precision': 0.5895712724594825, 'eval_recall': 0.5346162330198786, 'eval_runtime': 0.6288, 'eval_samples_per_second': 413.458, 'eval_steps_per_second': 52.477, 'epoch': 4.0}


 33%|███▎      | 650/1950 [00:59<01:38, 13.24it/s]
 33%|███▎      | 650/1950 [00:59<01:38, 13.24it/s]

{'eval_loss': 0.2138938456773758, 'eval_f1': 0.6612878212335875, 'eval_precision': 0.8120221187890361, 'eval_recall': 0.5978028733695867, 'eval_runtime': 0.6785, 'eval_samples_per_second': 383.178, 'eval_steps_per_second': 48.634, 'epoch': 5.0}


 40%|███▉      | 779/1950 [01:11<01:26, 13.60it/s]
 40%|████      | 780/1950 [01:12<01:26, 13.60it/s]

{'eval_loss': 0.21441462635993958, 'eval_f1': 0.6766156366380496, 'eval_precision': 0.794357621361999, 'eval_recall': 0.6127498664805805, 'eval_runtime': 0.6588, 'eval_samples_per_second': 394.671, 'eval_steps_per_second': 50.093, 'epoch': 6.0}


 47%|████▋     | 909/1950 [01:23<01:18, 13.34it/s]
 47%|████▋     | 910/1950 [01:24<01:17, 13.34it/s]

{'eval_loss': 0.2227177619934082, 'eval_f1': 0.6137032484651478, 'eval_precision': 0.7618766252244427, 'eval_recall': 0.5694733108614927, 'eval_runtime': 0.675, 'eval_samples_per_second': 385.203, 'eval_steps_per_second': 48.891, 'epoch': 7.0}


 53%|█████▎    | 1039/1950 [01:35<01:07, 13.50it/s]
 53%|█████▎    | 1040/1950 [01:36<01:07, 13.50it/s]

{'eval_loss': 0.22793588042259216, 'eval_f1': 0.6734932486886003, 'eval_precision': 0.731427443231887, 'eval_recall': 0.6274543825934574, 'eval_runtime': 0.6394, 'eval_samples_per_second': 406.607, 'eval_steps_per_second': 51.608, 'epoch': 8.0}


 60%|█████▉    | 1169/1950 [01:47<00:59, 13.08it/s]
 60%|██████    | 1170/1950 [01:47<00:59, 13.08it/s]

{'eval_loss': 0.23680026829242706, 'eval_f1': 0.6723943174369371, 'eval_precision': 0.7160500453847228, 'eval_recall': 0.6475789868906218, 'eval_runtime': 0.6653, 'eval_samples_per_second': 390.8, 'eval_steps_per_second': 49.602, 'epoch': 9.0}


 67%|██████▋   | 1299/1950 [01:59<00:48, 13.29it/s]
 67%|██████▋   | 1300/1950 [02:00<00:48, 13.29it/s]

{'eval_loss': 0.24418295919895172, 'eval_f1': 0.6869066690416938, 'eval_precision': 0.8167886361434747, 'eval_recall': 0.622529049006096, 'eval_runtime': 0.65, 'eval_samples_per_second': 400.006, 'eval_steps_per_second': 50.77, 'epoch': 10.0}


 73%|███████▎  | 1429/1950 [02:11<00:39, 13.09it/s]
 73%|███████▎  | 1430/1950 [02:11<00:39, 13.09it/s]

{'eval_loss': 0.24062380194664001, 'eval_f1': 0.6865654937774197, 'eval_precision': 0.7845245458838567, 'eval_recall': 0.6257867885881304, 'eval_runtime': 0.6704, 'eval_samples_per_second': 387.825, 'eval_steps_per_second': 49.224, 'epoch': 11.0}


 80%|███████▉  | 1559/1950 [02:23<00:29, 13.28it/s]
 80%|████████  | 1560/1950 [02:23<00:29, 13.28it/s]

{'eval_loss': 0.24933157861232758, 'eval_f1': 0.6921407811951071, 'eval_precision': 0.7765169980946409, 'eval_recall': 0.644014272190754, 'eval_runtime': 0.6467, 'eval_samples_per_second': 402.07, 'eval_steps_per_second': 51.032, 'epoch': 12.0}


 87%|████████▋ | 1689/1950 [02:35<00:19, 13.21it/s]
 87%|████████▋ | 1690/1950 [02:35<00:19, 13.21it/s]

{'eval_loss': 0.2526836097240448, 'eval_f1': 0.6941036911038049, 'eval_precision': 0.7601731601731601, 'eval_recall': 0.6485040681091215, 'eval_runtime': 0.6648, 'eval_samples_per_second': 391.085, 'eval_steps_per_second': 49.638, 'epoch': 13.0}


 93%|█████████▎| 1819/1950 [02:47<00:09, 13.37it/s]
 93%|█████████▎| 1820/1950 [02:47<00:09, 13.37it/s]

{'eval_loss': 0.24934826791286469, 'eval_f1': 0.7095416649264693, 'eval_precision': 0.7925215003486421, 'eval_recall': 0.6585286736960371, 'eval_runtime': 0.6595, 'eval_samples_per_second': 394.266, 'eval_steps_per_second': 50.041, 'epoch': 14.0}


100%|█████████▉| 1949/1950 [02:59<00:00, 13.34it/s]
100%|██████████| 1950/1950 [03:02<00:00, 13.34it/s]

{'eval_loss': 0.25013747811317444, 'eval_f1': 0.7089182977130347, 'eval_precision': 0.8038065835518837, 'eval_recall': 0.6520877994740079, 'eval_runtime': 0.8752, 'eval_samples_per_second': 297.064, 'eval_steps_per_second': 37.704, 'epoch': 15.0}


100%|██████████| 1950/1950 [03:04<00:00, 10.57it/s]


{'train_runtime': 184.4982, 'train_samples_per_second': 84.391, 'train_steps_per_second': 10.569, 'train_loss': 0.08618448893229166, 'epoch': 15.0}


100%|██████████| 33/33 [00:00<00:00, 46.36it/s]


------------------ Starting model ==> epochs: 20, batch size: 4, weights of decay: 0.01 ---------------------


0,1
eval/f1,▁▅▆▆▇█▇█▇███████
eval/loss,█▃▂▂▁▁▂▂▃▄▃▄▄▄▄▄
eval/precision,▁▆▅▅██▇▇▇██▇▇███
eval/recall,▁▄▆▆▇▇▇██▇██████
eval/runtime,▁▁▁▁▂▂▂▁▂▂▂▂▂▂█▅
eval/samples_per_second,████▆▇▆▇▇▇▆▇▇▇▁▄
eval/steps_per_second,████▆▇▆▇▇▇▆▇▇▇▁▄
train/epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇███

0,1
eval/f1,0.70954
eval/loss,0.24935
eval/precision,0.79252
eval/recall,0.65853
eval/runtime,0.7514
eval/samples_per_second,346.021
eval/steps_per_second,43.918
total_flos,1024205760576000.0
train/epoch,15.0
train/global_step,1950.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 22028.57 examples/s]
  5%|▍         | 1522/30460 [01:35<31:38, 15.25it/s]
  5%|▌         | 1523/30460 [01:40<31:38, 15.25it/s]

{'eval_loss': 0.10448677837848663, 'eval_f1': 0.6582195890575006, 'eval_precision': 0.6770544910144284, 'eval_recall': 0.6420168311963016, 'eval_runtime': 5.6135, 'eval_samples_per_second': 271.309, 'eval_steps_per_second': 67.872, 'epoch': 1.0}


 10%|█         | 3046/30460 [03:17<28:46, 15.88it/s]  
 10%|█         | 3046/30460 [03:23<28:46, 15.88it/s]

{'eval_loss': 0.10550930351018906, 'eval_f1': 0.7682486879065407, 'eval_precision': 0.8203377258198651, 'eval_recall': 0.749920345128314, 'eval_runtime': 6.5871, 'eval_samples_per_second': 231.208, 'eval_steps_per_second': 57.84, 'epoch': 2.0}


 15%|█▍        | 4568/30460 [05:00<25:31, 16.90it/s]  
 15%|█▌        | 4569/30460 [05:05<25:31, 16.90it/s]

{'eval_loss': 0.10501212626695633, 'eval_f1': 0.8344555794425823, 'eval_precision': 0.9034524240304835, 'eval_recall': 0.7960834963104064, 'eval_runtime': 5.3424, 'eval_samples_per_second': 285.075, 'eval_steps_per_second': 71.316, 'epoch': 3.0}


 20%|██        | 6092/30460 [06:43<26:52, 15.11it/s]  
 20%|██        | 6092/30460 [06:49<26:52, 15.11it/s]

{'eval_loss': 0.10427603870630264, 'eval_f1': 0.8450080957087917, 'eval_precision': 0.871350976608932, 'eval_recall': 0.8246201709771119, 'eval_runtime': 5.703, 'eval_samples_per_second': 267.052, 'eval_steps_per_second': 66.807, 'epoch': 4.0}


 25%|██▍       | 7614/30460 [08:25<24:36, 15.48it/s]  
 25%|██▌       | 7615/30460 [08:31<24:35, 15.48it/s]

{'eval_loss': 0.11354140192270279, 'eval_f1': 0.8557278585550278, 'eval_precision': 0.8601369793256437, 'eval_recall': 0.8538569380790022, 'eval_runtime': 6.0818, 'eval_samples_per_second': 250.421, 'eval_steps_per_second': 62.646, 'epoch': 5.0}


 30%|███       | 9138/30460 [10:07<20:29, 17.34it/s]  
 30%|███       | 9138/30460 [10:12<20:29, 17.34it/s]

{'eval_loss': 0.11724736541509628, 'eval_f1': 0.8610599904813648, 'eval_precision': 0.8832733677987188, 'eval_recall': 0.8446927748910327, 'eval_runtime': 5.4966, 'eval_samples_per_second': 277.081, 'eval_steps_per_second': 69.316, 'epoch': 6.0}


 33%|███▎      | 10002/30460 [11:08<22:25, 15.20it/s] 

{'loss': 0.0683, 'grad_norm': 0.027898017317056656, 'learning_rate': 3.3585029546946817e-05, 'epoch': 6.57}


 35%|███▍      | 10660/30460 [11:49<19:48, 16.65it/s]
 35%|███▌      | 10661/30460 [11:55<19:48, 16.65it/s]

{'eval_loss': 0.1200980618596077, 'eval_f1': 0.8588555914038503, 'eval_precision': 0.8759683988483491, 'eval_recall': 0.8441305068422734, 'eval_runtime': 6.0508, 'eval_samples_per_second': 251.702, 'eval_steps_per_second': 62.967, 'epoch': 7.0}


 40%|████      | 12184/30460 [13:30<18:26, 16.52it/s]  
 40%|████      | 12184/30460 [13:36<18:26, 16.52it/s]

{'eval_loss': 0.12143341451883316, 'eval_f1': 0.8656531069259723, 'eval_precision': 0.873769890642845, 'eval_recall': 0.8606230683668524, 'eval_runtime': 5.6667, 'eval_samples_per_second': 268.765, 'eval_steps_per_second': 67.235, 'epoch': 8.0}


 45%|████▍     | 13706/30460 [15:11<17:11, 16.24it/s]  
 45%|████▌     | 13707/30460 [15:17<17:11, 16.24it/s]

{'eval_loss': 0.12828806042671204, 'eval_f1': 0.8768080722219288, 'eval_precision': 0.8867231742147477, 'eval_recall': 0.8694916664860707, 'eval_runtime': 5.4833, 'eval_samples_per_second': 277.75, 'eval_steps_per_second': 69.483, 'epoch': 9.0}


 50%|█████     | 15230/30460 [16:53<16:39, 15.23it/s]  
 50%|█████     | 15230/30460 [16:59<16:39, 15.23it/s]

{'eval_loss': 0.13105027377605438, 'eval_f1': 0.8623585346927438, 'eval_precision': 0.8709458368375006, 'eval_recall': 0.8546527514875495, 'eval_runtime': 6.7631, 'eval_samples_per_second': 225.191, 'eval_steps_per_second': 56.335, 'epoch': 10.0}


 55%|█████▍    | 16752/30460 [18:36<14:12, 16.08it/s]  
 55%|█████▌    | 16753/30460 [18:42<14:12, 16.08it/s]

{'eval_loss': 0.1447814404964447, 'eval_f1': 0.8633992337993929, 'eval_precision': 0.8705736177199965, 'eval_recall': 0.8626188124236315, 'eval_runtime': 6.0301, 'eval_samples_per_second': 252.567, 'eval_steps_per_second': 63.183, 'epoch': 11.0}


 60%|██████    | 18276/30460 [20:18<13:14, 15.34it/s]  
 60%|██████    | 18276/30460 [20:23<13:14, 15.34it/s]

{'eval_loss': 0.14371423423290253, 'eval_f1': 0.8772625410521903, 'eval_precision': 0.9069130910113011, 'eval_recall': 0.8543801676909383, 'eval_runtime': 5.2588, 'eval_samples_per_second': 289.61, 'eval_steps_per_second': 72.45, 'epoch': 12.0}


 65%|██████▍   | 19798/30460 [21:59<11:42, 15.18it/s]  
 65%|██████▌   | 19799/30460 [22:05<11:42, 15.18it/s]

{'eval_loss': 0.1555105596780777, 'eval_f1': 0.8617171484028601, 'eval_precision': 0.8535463002205722, 'eval_recall': 0.8721947866151085, 'eval_runtime': 5.9021, 'eval_samples_per_second': 258.044, 'eval_steps_per_second': 64.553, 'epoch': 13.0}


 66%|██████▌   | 20002/30460 [22:19<11:20, 15.37it/s]  

{'loss': 0.0138, 'grad_norm': 0.006395579315721989, 'learning_rate': 1.717005909389363e-05, 'epoch': 13.13}


 70%|███████   | 21322/30460 [23:41<08:51, 17.21it/s]
 70%|███████   | 21322/30460 [23:47<08:51, 17.21it/s]

{'eval_loss': 0.14977353811264038, 'eval_f1': 0.8658354418527383, 'eval_precision': 0.8878579049302887, 'eval_recall': 0.8497559544979888, 'eval_runtime': 5.6294, 'eval_samples_per_second': 270.542, 'eval_steps_per_second': 67.68, 'epoch': 14.0}


 75%|███████▍  | 22844/30460 [25:23<07:51, 16.15it/s]  
 75%|███████▌  | 22845/30460 [25:29<07:51, 16.15it/s]

{'eval_loss': 0.15697874128818512, 'eval_f1': 0.8549055894303917, 'eval_precision': 0.8627551740523438, 'eval_recall': 0.8479636421332092, 'eval_runtime': 5.3853, 'eval_samples_per_second': 282.809, 'eval_steps_per_second': 70.749, 'epoch': 15.0}


 80%|████████  | 24368/30460 [27:05<06:29, 15.66it/s]  
 80%|████████  | 24368/30460 [27:11<06:29, 15.66it/s]

{'eval_loss': 0.15626834332942963, 'eval_f1': 0.8643480236653099, 'eval_precision': 0.8779957004634424, 'eval_recall': 0.8525115982993414, 'eval_runtime': 6.4679, 'eval_samples_per_second': 235.471, 'eval_steps_per_second': 58.906, 'epoch': 16.0}


 85%|████████▍ | 25890/30460 [28:48<04:36, 16.52it/s]  
 85%|████████▌ | 25891/30460 [28:54<04:36, 16.52it/s]

{'eval_loss': 0.15183183550834656, 'eval_f1': 0.872776610653658, 'eval_precision': 0.8776581825968879, 'eval_recall': 0.8688495077664934, 'eval_runtime': 5.6515, 'eval_samples_per_second': 269.488, 'eval_steps_per_second': 67.416, 'epoch': 17.0}


 90%|█████████ | 27414/30460 [30:32<03:15, 15.57it/s]  
 90%|█████████ | 27414/30460 [30:37<03:15, 15.57it/s]

{'eval_loss': 0.16028772294521332, 'eval_f1': 0.8716712926015742, 'eval_precision': 0.8969973170286595, 'eval_recall': 0.8538070658063736, 'eval_runtime': 5.407, 'eval_samples_per_second': 281.671, 'eval_steps_per_second': 70.464, 'epoch': 18.0}


 95%|█████████▍| 28936/30460 [32:15<01:37, 15.57it/s]
 95%|█████████▌| 28937/30460 [32:20<01:37, 15.57it/s]

{'eval_loss': 0.15818195044994354, 'eval_f1': 0.8730547424459308, 'eval_precision': 0.8911255341801911, 'eval_recall': 0.8593773823875933, 'eval_runtime': 5.5312, 'eval_samples_per_second': 275.347, 'eval_steps_per_second': 68.882, 'epoch': 19.0}


 98%|█████████▊| 30002/30460 [33:29<00:29, 15.40it/s]

{'loss': 0.003, 'grad_norm': 0.0020209464710205793, 'learning_rate': 7.550886408404465e-07, 'epoch': 19.7}


100%|██████████| 30460/30460 [33:58<00:00, 14.64it/s]
100%|██████████| 30460/30460 [34:06<00:00, 14.64it/s]

{'eval_loss': 0.15919309854507446, 'eval_f1': 0.8759931487894514, 'eval_precision': 0.8925508993489871, 'eval_recall': 0.8625971762252362, 'eval_runtime': 6.4441, 'eval_samples_per_second': 236.341, 'eval_steps_per_second': 59.124, 'epoch': 20.0}


100%|██████████| 30460/30460 [34:08<00:00, 14.87it/s]


{'train_runtime': 2048.7434, 'train_samples_per_second': 59.461, 'train_steps_per_second': 14.868, 'train_loss': 0.027962893365991795, 'epoch': 20.0}


100%|██████████| 381/381 [00:05<00:00, 72.28it/s]


0,1
eval/f1,▁▅▇▇▇▇▇███████▇██████
eval/loss,▁▁▁▁▂▃▃▃▄▄▆▆▇▇█▇▇███▆
eval/precision,▁▅█▇▇▇▇▇▇▇▇█▆▇▇▇▇████
eval/recall,▁▄▆▇▇▇▇██▇█▇█▇▇▇█▇██▇
eval/runtime,▃▇▁▃▅▂▅▃▂█▅▁▄▃▂▇▃▂▂▇▁
eval/samples_per_second,▆▂█▆▄▇▄▆▇▁▄█▅▆▇▂▆▇▆▂█
eval/steps_per_second,▆▂█▆▄▇▄▆▇▁▄█▅▆▇▂▆▇▆▂█
train/epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▇▇▇█████
train/global_step,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▇▇▇█████
train/grad_norm,█▂▁

0,1
eval/f1,0.87726
eval/loss,0.14371
eval/precision,0.90691
eval/recall,0.85438
eval/runtime,5.2994
eval/samples_per_second,287.393
eval/steps_per_second,71.895
total_flos,8013406920576000.0
train/epoch,20.0
train/global_step,30460.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16743.20 examples/s]
  5%|▌         | 377/7540 [00:23<08:05, 14.76it/s]
  5%|▌         | 377/7540 [00:25<08:05, 14.76it/s]

{'eval_loss': 0.30170226097106934, 'eval_f1': 0.4704222585924713, 'eval_precision': 0.5294986348969968, 'eval_recall': 0.427402112209086, 'eval_runtime': 1.7646, 'eval_samples_per_second': 213.65, 'eval_steps_per_second': 53.838, 'epoch': 1.0}


 10%|▉         | 753/7540 [00:49<07:37, 14.83it/s]  
 10%|█         | 754/7540 [00:51<07:37, 14.83it/s]

{'eval_loss': 0.30159834027290344, 'eval_f1': 0.566278331722187, 'eval_precision': 0.6462142238935147, 'eval_recall': 0.5100795553283506, 'eval_runtime': 1.9441, 'eval_samples_per_second': 193.916, 'eval_steps_per_second': 48.865, 'epoch': 2.0}


 15%|█▌        | 1131/7540 [01:16<06:09, 17.35it/s] 
 15%|█▌        | 1131/7540 [01:18<06:09, 17.35it/s]

{'eval_loss': 0.35925066471099854, 'eval_f1': 0.6848350433360892, 'eval_precision': 0.7612709192335678, 'eval_recall': 0.6458212872910474, 'eval_runtime': 1.5768, 'eval_samples_per_second': 239.095, 'eval_steps_per_second': 60.249, 'epoch': 3.0}


 20%|█▉        | 1507/7540 [01:43<06:19, 15.90it/s]
 20%|██        | 1508/7540 [01:45<06:19, 15.90it/s]

{'eval_loss': 0.34929943084716797, 'eval_f1': 0.727475845410628, 'eval_precision': 0.7513953854015172, 'eval_recall': 0.7138141987779218, 'eval_runtime': 1.3835, 'eval_samples_per_second': 272.496, 'eval_steps_per_second': 68.666, 'epoch': 4.0}


 25%|██▌       | 1885/7540 [02:10<05:44, 16.43it/s]
 25%|██▌       | 1885/7540 [02:11<05:44, 16.43it/s]

{'eval_loss': 0.4490513503551483, 'eval_f1': 0.6980172732083586, 'eval_precision': 0.7458844585631702, 'eval_recall': 0.6804484453767035, 'eval_runtime': 1.2831, 'eval_samples_per_second': 293.825, 'eval_steps_per_second': 74.041, 'epoch': 5.0}


 30%|██▉       | 2261/7540 [02:36<05:05, 17.27it/s]
 30%|███       | 2262/7540 [02:37<05:05, 17.27it/s]

{'eval_loss': 0.46956151723861694, 'eval_f1': 0.710560496737911, 'eval_precision': 0.7416010245013605, 'eval_recall': 0.7070033739700479, 'eval_runtime': 1.222, 'eval_samples_per_second': 308.522, 'eval_steps_per_second': 77.744, 'epoch': 6.0}


 35%|███▌      | 2639/7540 [03:03<05:08, 15.87it/s]
 35%|███▌      | 2639/7540 [03:04<05:08, 15.87it/s]

{'eval_loss': 0.47289812564849854, 'eval_f1': 0.7373324157593335, 'eval_precision': 0.7587638184805953, 'eval_recall': 0.7326311821446911, 'eval_runtime': 1.3102, 'eval_samples_per_second': 287.751, 'eval_steps_per_second': 72.51, 'epoch': 7.0}


 40%|███▉      | 3015/7540 [03:30<04:42, 15.99it/s]
 40%|████      | 3016/7540 [03:31<04:42, 15.99it/s]

{'eval_loss': 0.47716349363327026, 'eval_f1': 0.7246788264885231, 'eval_precision': 0.7401285023820235, 'eval_recall': 0.7182818793818306, 'eval_runtime': 1.2494, 'eval_samples_per_second': 301.743, 'eval_steps_per_second': 76.036, 'epoch': 8.0}


 45%|████▌     | 3393/7540 [03:56<03:59, 17.31it/s]
 45%|████▌     | 3393/7540 [03:58<03:59, 17.31it/s]

{'eval_loss': 0.5082098245620728, 'eval_f1': 0.7345280804587053, 'eval_precision': 0.7707075948904221, 'eval_recall': 0.7168975291227444, 'eval_runtime': 1.2871, 'eval_samples_per_second': 292.91, 'eval_steps_per_second': 73.81, 'epoch': 9.0}


 50%|████▉     | 3769/7540 [04:24<04:00, 15.70it/s]
 50%|█████     | 3770/7540 [04:25<04:00, 15.70it/s]

{'eval_loss': 0.5092110633850098, 'eval_f1': 0.7324751489718231, 'eval_precision': 0.744945232647553, 'eval_recall': 0.7314894438768227, 'eval_runtime': 1.2385, 'eval_samples_per_second': 304.397, 'eval_steps_per_second': 76.705, 'epoch': 10.0}


 55%|█████▌    | 4147/7540 [04:50<03:44, 15.13it/s]
 55%|█████▌    | 4147/7540 [04:52<03:44, 15.13it/s]

{'eval_loss': 0.5441771745681763, 'eval_f1': 0.7303070889627655, 'eval_precision': 0.7262560122552653, 'eval_recall': 0.7520901415710285, 'eval_runtime': 1.2933, 'eval_samples_per_second': 291.495, 'eval_steps_per_second': 73.454, 'epoch': 11.0}


 60%|█████▉    | 4523/7540 [05:17<03:06, 16.20it/s]
 60%|██████    | 4524/7540 [05:18<03:06, 16.20it/s]

{'eval_loss': 0.5627690553665161, 'eval_f1': 0.7389139028099357, 'eval_precision': 0.7499370598441806, 'eval_recall': 0.7433282171201118, 'eval_runtime': 1.3424, 'eval_samples_per_second': 280.835, 'eval_steps_per_second': 70.767, 'epoch': 12.0}


 65%|██████▌   | 4901/7540 [05:44<02:47, 15.74it/s]
 65%|██████▌   | 4901/7540 [05:45<02:47, 15.74it/s]

{'eval_loss': 0.5793030858039856, 'eval_f1': 0.7344713978978417, 'eval_precision': 0.7383852353687002, 'eval_recall': 0.7382166021031147, 'eval_runtime': 1.2631, 'eval_samples_per_second': 298.462, 'eval_steps_per_second': 75.209, 'epoch': 13.0}


 70%|██████▉   | 5277/7540 [06:10<02:22, 15.85it/s]
 70%|███████   | 5278/7540 [06:12<02:22, 15.85it/s]

{'eval_loss': 0.567049503326416, 'eval_f1': 0.7382580848701149, 'eval_precision': 0.740060410945284, 'eval_recall': 0.7396278917357482, 'eval_runtime': 1.4616, 'eval_samples_per_second': 257.931, 'eval_steps_per_second': 64.996, 'epoch': 14.0}


 75%|███████▌  | 5655/7540 [06:37<01:57, 15.98it/s]
 75%|███████▌  | 5655/7540 [06:39<01:57, 15.98it/s]

{'eval_loss': 0.5852558612823486, 'eval_f1': 0.7399312585122915, 'eval_precision': 0.7470321318095269, 'eval_recall': 0.735894660504006, 'eval_runtime': 1.4026, 'eval_samples_per_second': 268.785, 'eval_steps_per_second': 67.731, 'epoch': 15.0}


 80%|███████▉  | 6031/7540 [07:04<01:39, 15.16it/s]
 80%|████████  | 6032/7540 [07:05<01:39, 15.16it/s]

{'eval_loss': 0.6081244349479675, 'eval_f1': 0.741876657558347, 'eval_precision': 0.7610093634974018, 'eval_recall': 0.7281751461850547, 'eval_runtime': 1.3309, 'eval_samples_per_second': 283.276, 'eval_steps_per_second': 71.383, 'epoch': 16.0}


 85%|████████▌ | 6409/7540 [07:30<01:11, 15.84it/s]
 85%|████████▌ | 6409/7540 [07:32<01:11, 15.84it/s]

{'eval_loss': 0.5981758236885071, 'eval_f1': 0.7341433020123024, 'eval_precision': 0.7378979751077682, 'eval_recall': 0.7339896149837133, 'eval_runtime': 1.3969, 'eval_samples_per_second': 269.892, 'eval_steps_per_second': 68.01, 'epoch': 17.0}


 90%|████████▉ | 6785/7540 [07:57<00:47, 15.82it/s]
 90%|█████████ | 6786/7540 [07:58<00:47, 15.82it/s]

{'eval_loss': 0.6021186113357544, 'eval_f1': 0.7339682259429272, 'eval_precision': 0.7424835038356375, 'eval_recall': 0.7286689876261592, 'eval_runtime': 1.3883, 'eval_samples_per_second': 271.554, 'eval_steps_per_second': 68.429, 'epoch': 18.0}


 95%|█████████▌| 7163/7540 [08:24<00:23, 16.03it/s]
 95%|█████████▌| 7163/7540 [08:25<00:23, 16.03it/s]

{'eval_loss': 0.6080227494239807, 'eval_f1': 0.7430528373792251, 'eval_precision': 0.7490776180537182, 'eval_recall': 0.7403940978436079, 'eval_runtime': 1.3761, 'eval_samples_per_second': 273.962, 'eval_steps_per_second': 69.035, 'epoch': 19.0}


100%|█████████▉| 7539/7540 [08:50<00:00, 17.05it/s]
100%|██████████| 7540/7540 [08:53<00:00, 17.05it/s]

{'eval_loss': 0.6135283708572388, 'eval_f1': 0.7365398081591157, 'eval_precision': 0.742676266898287, 'eval_recall': 0.7349146457888133, 'eval_runtime': 1.4011, 'eval_samples_per_second': 269.068, 'eval_steps_per_second': 67.802, 'epoch': 20.0}


100%|██████████| 7540/7540 [08:58<00:00, 13.99it/s]


{'train_runtime': 538.9098, 'train_samples_per_second': 55.928, 'train_steps_per_second': 13.991, 'train_loss': 0.0699595727085751, 'epoch': 20.0}


100%|██████████| 95/95 [00:01<00:00, 66.55it/s]


0,1
eval/f1,▁▃▇█▇▇███████████████
eval/loss,▁▁▂▂▄▅▅▅▆▆▆▇▇▇▇██████
eval/precision,▁▄█▇▇▇█▇█▇▇▇▇▇▇█▇▇▇▇▇
eval/recall,▁▃▆▇▆▇█▇▇██████▇█▇███
eval/runtime,▆█▄▃▂▁▂▁▂▁▂▂▁▃▃▂▃▃▂▃▄
eval/samples_per_second,▂▁▄▆▇█▇█▇█▇▆▇▅▆▆▆▆▆▆▄
eval/steps_per_second,▂▁▄▆▇█▇█▇█▇▆▇▅▆▆▆▆▆▆▄
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████

0,1
eval/f1,0.74305
eval/loss,0.60802
eval/precision,0.74908
eval/recall,0.74039
eval/runtime,1.5068
eval/samples_per_second,250.206
eval/steps_per_second,63.049
total_flos,1982595203466240.0
train/epoch,20.0
train/global_step,7540.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 13867.45 examples/s]
  5%|▍         | 259/5200 [00:16<05:11, 15.85it/s]
  5%|▌         | 260/5200 [00:17<05:11, 15.85it/s]

{'eval_loss': 0.2930745780467987, 'eval_f1': 0.2540457061020546, 'eval_precision': 0.4971072541165999, 'eval_recall': 0.20838992814426446, 'eval_runtime': 1.0143, 'eval_samples_per_second': 256.336, 'eval_steps_per_second': 64.084, 'epoch': 1.0}


 10%|▉         | 519/5200 [00:35<04:39, 16.76it/s]
 10%|█         | 520/5200 [00:35<04:39, 16.76it/s]

{'eval_loss': 0.22624893486499786, 'eval_f1': 0.518949893401782, 'eval_precision': 0.6061902766971892, 'eval_recall': 0.46221344386878294, 'eval_runtime': 0.8301, 'eval_samples_per_second': 313.233, 'eval_steps_per_second': 78.308, 'epoch': 2.0}


 15%|█▍        | 779/5200 [00:53<04:46, 15.41it/s]
 15%|█▌        | 780/5200 [00:54<04:46, 15.41it/s]

{'eval_loss': 0.2348979264497757, 'eval_f1': 0.520248991144353, 'eval_precision': 0.5844958823682228, 'eval_recall': 0.48602043041335924, 'eval_runtime': 0.9724, 'eval_samples_per_second': 267.392, 'eval_steps_per_second': 66.848, 'epoch': 3.0}


 20%|█▉        | 1039/5200 [01:13<04:10, 16.62it/s]
 20%|██        | 1040/5200 [01:14<04:10, 16.62it/s]

{'eval_loss': 0.24614502489566803, 'eval_f1': 0.5668328185012813, 'eval_precision': 0.7458512352309346, 'eval_recall': 0.5184570661056325, 'eval_runtime': 1.0179, 'eval_samples_per_second': 255.438, 'eval_steps_per_second': 63.86, 'epoch': 4.0}


 25%|██▍       | 1299/5200 [01:32<04:24, 14.76it/s]
 25%|██▌       | 1300/5200 [01:33<04:24, 14.76it/s]

{'eval_loss': 0.23555758595466614, 'eval_f1': 0.6606551569475145, 'eval_precision': 0.8485538912333724, 'eval_recall': 0.5919073346190116, 'eval_runtime': 1.1551, 'eval_samples_per_second': 225.09, 'eval_steps_per_second': 56.273, 'epoch': 5.0}


 30%|██▉       | 1559/5200 [01:51<03:37, 16.75it/s]
 30%|███       | 1560/5200 [01:52<03:37, 16.75it/s]

{'eval_loss': 0.22940517961978912, 'eval_f1': 0.685341510174952, 'eval_precision': 0.7679628183933265, 'eval_recall': 0.6248164544497963, 'eval_runtime': 0.9109, 'eval_samples_per_second': 285.429, 'eval_steps_per_second': 71.357, 'epoch': 6.0}


 35%|███▍      | 1819/5200 [02:11<03:25, 16.46it/s]
 35%|███▌      | 1820/5200 [02:12<03:25, 16.46it/s]

{'eval_loss': 0.24995973706245422, 'eval_f1': 0.67245332346239, 'eval_precision': 0.7706099257884972, 'eval_recall': 0.6319214788905817, 'eval_runtime': 0.944, 'eval_samples_per_second': 275.437, 'eval_steps_per_second': 68.859, 'epoch': 7.0}


 40%|███▉      | 2079/5200 [02:30<03:25, 15.20it/s]
 40%|████      | 2080/5200 [02:31<03:25, 15.20it/s]

{'eval_loss': 0.25283342599868774, 'eval_f1': 0.6644658085422728, 'eval_precision': 0.7513090944296483, 'eval_recall': 0.6080828753396628, 'eval_runtime': 1.2344, 'eval_samples_per_second': 210.622, 'eval_steps_per_second': 52.655, 'epoch': 8.0}


 45%|████▍     | 2339/5200 [02:48<03:03, 15.59it/s]
 45%|████▌     | 2340/5200 [02:49<03:03, 15.59it/s]

{'eval_loss': 0.25592273473739624, 'eval_f1': 0.706363243271422, 'eval_precision': 0.795963659108286, 'eval_recall': 0.6436459735513511, 'eval_runtime': 0.868, 'eval_samples_per_second': 299.533, 'eval_steps_per_second': 74.883, 'epoch': 9.0}


 50%|████▉     | 2599/5200 [03:08<02:43, 15.95it/s]
 50%|█████     | 2600/5200 [03:09<02:43, 15.95it/s]

{'eval_loss': 0.2595635652542114, 'eval_f1': 0.6692968374286367, 'eval_precision': 0.7683203261736012, 'eval_recall': 0.6187451195927464, 'eval_runtime': 0.889, 'eval_samples_per_second': 292.461, 'eval_steps_per_second': 73.115, 'epoch': 10.0}


 55%|█████▍    | 2859/5200 [03:27<02:30, 15.59it/s]
 55%|█████▌    | 2860/5200 [03:28<02:30, 15.59it/s]

{'eval_loss': 0.24804463982582092, 'eval_f1': 0.7354382967154692, 'eval_precision': 0.8139284129800989, 'eval_recall': 0.6934590056350061, 'eval_runtime': 1.0908, 'eval_samples_per_second': 238.352, 'eval_steps_per_second': 59.588, 'epoch': 11.0}


 60%|█████▉    | 3119/5200 [03:46<02:08, 16.16it/s]
 60%|██████    | 3120/5200 [03:47<02:08, 16.16it/s]

{'eval_loss': 0.30200228095054626, 'eval_f1': 0.6964869736248682, 'eval_precision': 0.8010361980779247, 'eval_recall': 0.6437337339557024, 'eval_runtime': 0.8596, 'eval_samples_per_second': 302.481, 'eval_steps_per_second': 75.62, 'epoch': 12.0}


 65%|██████▍   | 3379/5200 [04:05<01:51, 16.36it/s]
 65%|██████▌   | 3380/5200 [04:06<01:51, 16.36it/s]

{'eval_loss': 0.2575932443141937, 'eval_f1': 0.7372756685421871, 'eval_precision': 0.8242951189692468, 'eval_recall': 0.6942138826858896, 'eval_runtime': 0.9606, 'eval_samples_per_second': 270.664, 'eval_steps_per_second': 67.666, 'epoch': 13.0}


 70%|██████▉   | 3639/5200 [04:24<01:39, 15.71it/s]
 70%|███████   | 3640/5200 [04:25<01:39, 15.71it/s]

{'eval_loss': 0.26782307028770447, 'eval_f1': 0.7294239554161345, 'eval_precision': 0.8088234716328776, 'eval_recall': 0.6811573299615769, 'eval_runtime': 0.9617, 'eval_samples_per_second': 270.361, 'eval_steps_per_second': 67.59, 'epoch': 14.0}


 75%|███████▍  | 3899/5200 [04:43<01:18, 16.54it/s]
 75%|███████▌  | 3900/5200 [04:44<01:18, 16.54it/s]

{'eval_loss': 0.2781018912792206, 'eval_f1': 0.722791820160014, 'eval_precision': 0.7968813787480632, 'eval_recall': 0.6833190676290474, 'eval_runtime': 1.1045, 'eval_samples_per_second': 235.41, 'eval_steps_per_second': 58.853, 'epoch': 15.0}


 80%|███████▉  | 4159/5200 [05:02<01:09, 14.92it/s]
 80%|████████  | 4160/5200 [05:03<01:09, 14.92it/s]

{'eval_loss': 0.28384020924568176, 'eval_f1': 0.7129993044506074, 'eval_precision': 0.7802820154035641, 'eval_recall': 0.6736950587670235, 'eval_runtime': 0.9066, 'eval_samples_per_second': 286.772, 'eval_steps_per_second': 71.693, 'epoch': 16.0}


 85%|████████▍ | 4419/5200 [05:21<00:47, 16.60it/s]
 85%|████████▌ | 4420/5200 [05:22<00:46, 16.60it/s]

{'eval_loss': 0.2781849801540375, 'eval_f1': 0.7258203355422623, 'eval_precision': 0.8003873175102838, 'eval_recall': 0.6813397007822454, 'eval_runtime': 0.9566, 'eval_samples_per_second': 271.785, 'eval_steps_per_second': 67.946, 'epoch': 17.0}


 90%|████████▉ | 4679/5200 [05:40<00:35, 14.84it/s]
 90%|█████████ | 4680/5200 [05:41<00:35, 14.84it/s]

{'eval_loss': 0.2856522798538208, 'eval_f1': 0.7209787072723827, 'eval_precision': 0.7895292207792208, 'eval_recall': 0.6809778539158392, 'eval_runtime': 1.276, 'eval_samples_per_second': 203.767, 'eval_steps_per_second': 50.942, 'epoch': 18.0}


 95%|█████████▍| 4939/5200 [05:59<00:16, 15.91it/s]
 95%|█████████▌| 4940/5200 [06:00<00:16, 15.91it/s]

{'eval_loss': 0.2892530560493469, 'eval_f1': 0.7230244550650449, 'eval_precision': 0.796432436611008, 'eval_recall': 0.6807954830951707, 'eval_runtime': 0.9264, 'eval_samples_per_second': 280.667, 'eval_steps_per_second': 70.167, 'epoch': 19.0}


100%|█████████▉| 5199/5200 [06:19<00:00, 16.40it/s]
100%|██████████| 5200/5200 [06:21<00:00, 16.40it/s]

{'eval_loss': 0.2891383171081543, 'eval_f1': 0.7236135566850743, 'eval_precision': 0.7947657699443413, 'eval_recall': 0.6838349967729822, 'eval_runtime': 0.9639, 'eval_samples_per_second': 269.743, 'eval_steps_per_second': 67.436, 'epoch': 20.0}


100%|██████████| 5200/5200 [06:23<00:00, 13.56it/s]


{'train_runtime': 383.3463, 'train_samples_per_second': 54.155, 'train_steps_per_second': 13.565, 'train_loss': 0.05745114253117488, 'epoch': 20.0}


100%|██████████| 65/65 [00:00<00:00, 70.36it/s]


------------------ Starting model ==> epochs: 20, batch size: 4, weights of decay: 0.001 ---------------------


0,1
eval/f1,▁▅▅▆▇▇▇▇█▇█▇█████████
eval/loss,▇▁▂▃▂▁▃▃▄▄▃█▄▅▆▆▆▆▇▇▄
eval/precision,▁▃▃▆█▆▆▆▇▆▇▇█▇▇▇▇▇▇▇█
eval/recall,▁▅▅▅▇▇▇▇▇▇█▇█████████
eval/runtime,▄▁▃▄▆▂▃▇▂▂▅▁▃▃▅▂▃█▃▃▃
eval/samples_per_second,▄█▅▄▂▆▆▁▇▇▃▇▅▅▃▆▅▁▆▅▅
eval/steps_per_second,▄█▅▄▂▆▆▁▇▇▃▇▅▅▃▆▅▁▆▅▅
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████

0,1
eval/f1,0.73728
eval/loss,0.25759
eval/precision,0.8243
eval/recall,0.69421
eval/runtime,0.9491
eval/samples_per_second,273.951
eval/steps_per_second,68.488
total_flos,1365607680768000.0
train/epoch,20.0
train/global_step,5200.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 13286.77 examples/s]
  5%|▍         | 1522/30460 [01:35<28:16, 17.06it/s]
  5%|▌         | 1523/30460 [01:41<28:16, 17.06it/s]

{'eval_loss': 0.112165667116642, 'eval_f1': 0.6528301870677629, 'eval_precision': 0.6789549090952488, 'eval_recall': 0.6300368224199542, 'eval_runtime': 5.3356, 'eval_samples_per_second': 285.441, 'eval_steps_per_second': 71.407, 'epoch': 1.0}


 10%|█         | 3046/30460 [03:18<28:31, 16.01it/s]  
 10%|█         | 3046/30460 [03:23<28:31, 16.01it/s]

{'eval_loss': 0.11807902902364731, 'eval_f1': 0.7405199659962544, 'eval_precision': 0.893121511381488, 'eval_recall': 0.710428562406333, 'eval_runtime': 5.3943, 'eval_samples_per_second': 282.337, 'eval_steps_per_second': 70.631, 'epoch': 2.0}


 15%|█▍        | 4568/30460 [04:59<27:34, 15.65it/s]  
 15%|█▌        | 4569/30460 [05:06<27:34, 15.65it/s]

{'eval_loss': 0.10771065950393677, 'eval_f1': 0.7977554817294294, 'eval_precision': 0.8485756269901353, 'eval_recall': 0.7684273973468827, 'eval_runtime': 6.6344, 'eval_samples_per_second': 229.56, 'eval_steps_per_second': 57.428, 'epoch': 3.0}


 20%|██        | 6092/30460 [06:43<24:08, 16.82it/s]  
 20%|██        | 6092/30460 [06:49<24:08, 16.82it/s]

{'eval_loss': 0.11615699529647827, 'eval_f1': 0.8254013676016291, 'eval_precision': 0.8425977192866583, 'eval_recall': 0.8101056047655283, 'eval_runtime': 5.5094, 'eval_samples_per_second': 276.437, 'eval_steps_per_second': 69.155, 'epoch': 4.0}


 25%|██▍       | 7614/30460 [08:27<22:23, 17.00it/s]  
 25%|██▌       | 7615/30460 [08:32<22:23, 17.00it/s]

{'eval_loss': 0.1074962317943573, 'eval_f1': 0.8198978185430589, 'eval_precision': 0.8443336863992551, 'eval_recall': 0.8014376520462709, 'eval_runtime': 5.1348, 'eval_samples_per_second': 296.603, 'eval_steps_per_second': 74.199, 'epoch': 5.0}


 30%|███       | 9138/30460 [10:09<23:18, 15.25it/s]  
 30%|███       | 9138/30460 [10:15<23:18, 15.25it/s]

{'eval_loss': 0.10722072422504425, 'eval_f1': 0.8653192659018524, 'eval_precision': 0.8875556484809705, 'eval_recall': 0.8461713567169022, 'eval_runtime': 6.509, 'eval_samples_per_second': 233.983, 'eval_steps_per_second': 58.534, 'epoch': 6.0}


 33%|███▎      | 10002/30460 [11:11<20:26, 16.68it/s] 

{'loss': 0.0756, 'grad_norm': 0.21369463205337524, 'learning_rate': 3.3585029546946817e-05, 'epoch': 6.57}


 35%|███▌      | 10661/30460 [11:58<19:07, 17.26it/s]

{'eval_loss': 0.12457729130983353, 'eval_f1': 0.8546574866949811, 'eval_precision': 0.8678944827347103, 'eval_recall': 0.8429187913871491, 'eval_runtime': 5.8631, 'eval_samples_per_second': 259.758, 'eval_steps_per_second': 64.982, 'epoch': 7.0}


 40%|████      | 12184/30460 [13:36<18:30, 16.46it/s]  
 40%|████      | 12184/30460 [13:42<18:30, 16.46it/s]

{'eval_loss': 0.14172233641147614, 'eval_f1': 0.8517375701382128, 'eval_precision': 0.8536146256855351, 'eval_recall': 0.8514100704224352, 'eval_runtime': 5.3559, 'eval_samples_per_second': 284.359, 'eval_steps_per_second': 71.136, 'epoch': 8.0}


 45%|████▍     | 13706/30460 [15:19<18:31, 15.08it/s]  
 45%|████▌     | 13707/30460 [15:25<18:31, 15.08it/s]

{'eval_loss': 0.13705386221408844, 'eval_f1': 0.8377937819844323, 'eval_precision': 0.8440871727433678, 'eval_recall': 0.8405672358021261, 'eval_runtime': 5.9154, 'eval_samples_per_second': 257.463, 'eval_steps_per_second': 64.408, 'epoch': 9.0}


 50%|█████     | 15230/30460 [17:02<16:08, 15.73it/s]  
 50%|█████     | 15230/30460 [17:08<16:08, 15.73it/s]

{'eval_loss': 0.14506247639656067, 'eval_f1': 0.8616344668250514, 'eval_precision': 0.879582971966469, 'eval_recall': 0.8477032024731195, 'eval_runtime': 6.6489, 'eval_samples_per_second': 229.059, 'eval_steps_per_second': 57.302, 'epoch': 10.0}


 55%|█████▍    | 16752/30460 [18:45<13:16, 17.21it/s]  
 55%|█████▌    | 16753/30460 [18:51<13:16, 17.21it/s]

{'eval_loss': 0.14436917006969452, 'eval_f1': 0.8527807988406425, 'eval_precision': 0.8498208403682057, 'eval_recall': 0.857353080799805, 'eval_runtime': 5.3567, 'eval_samples_per_second': 284.319, 'eval_steps_per_second': 71.127, 'epoch': 11.0}


 60%|██████    | 18276/30460 [20:28<13:24, 15.14it/s]  
 60%|██████    | 18276/30460 [20:34<13:24, 15.14it/s]

{'eval_loss': 0.1415310800075531, 'eval_f1': 0.8721958252070338, 'eval_precision': 0.8813480223645226, 'eval_recall': 0.8684201990799688, 'eval_runtime': 5.7963, 'eval_samples_per_second': 262.753, 'eval_steps_per_second': 65.731, 'epoch': 12.0}


 65%|██████▍   | 19798/30460 [22:10<11:13, 15.83it/s]  
 65%|██████▌   | 19799/30460 [22:16<11:13, 15.83it/s]

{'eval_loss': 0.16352270543575287, 'eval_f1': 0.8533423252000266, 'eval_precision': 0.8346561931406643, 'eval_recall': 0.8821024045299569, 'eval_runtime': 6.2466, 'eval_samples_per_second': 243.815, 'eval_steps_per_second': 60.994, 'epoch': 13.0}


 66%|██████▌   | 20002/30460 [22:31<10:19, 16.88it/s]  

{'loss': 0.0158, 'grad_norm': 0.00574097502976656, 'learning_rate': 1.717005909389363e-05, 'epoch': 13.13}


 70%|███████   | 21322/30460 [23:52<08:49, 17.26it/s]
 70%|███████   | 21322/30460 [23:58<08:49, 17.26it/s]

{'eval_loss': 0.14636768400669098, 'eval_f1': 0.8683049396374657, 'eval_precision': 0.8626093024867973, 'eval_recall': 0.8751655886686558, 'eval_runtime': 5.159, 'eval_samples_per_second': 295.213, 'eval_steps_per_second': 73.852, 'epoch': 14.0}


 75%|███████▍  | 22844/30460 [25:35<08:25, 15.08it/s]  
 75%|███████▌  | 22845/30460 [25:41<08:25, 15.08it/s]

{'eval_loss': 0.16031865775585175, 'eval_f1': 0.8643689607940716, 'eval_precision': 0.8651061664416126, 'eval_recall': 0.8648311938487507, 'eval_runtime': 5.9237, 'eval_samples_per_second': 257.105, 'eval_steps_per_second': 64.318, 'epoch': 15.0}


 80%|████████  | 24368/30460 [27:17<06:32, 15.53it/s]  
 80%|████████  | 24368/30460 [27:23<06:32, 15.53it/s]

{'eval_loss': 0.16597707569599152, 'eval_f1': 0.8568094729090896, 'eval_precision': 0.8498046575051433, 'eval_recall': 0.8662787211351717, 'eval_runtime': 6.6522, 'eval_samples_per_second': 228.946, 'eval_steps_per_second': 57.274, 'epoch': 16.0}


 85%|████████▍ | 25890/30460 [29:00<04:52, 15.60it/s]  
 85%|████████▌ | 25891/30460 [29:05<04:52, 15.60it/s]

{'eval_loss': 0.1692919284105301, 'eval_f1': 0.8604363442063016, 'eval_precision': 0.8508966845792836, 'eval_recall': 0.874412208654464, 'eval_runtime': 5.2284, 'eval_samples_per_second': 291.291, 'eval_steps_per_second': 72.871, 'epoch': 17.0}


 90%|█████████ | 27414/30460 [30:41<03:07, 16.25it/s]  
 90%|█████████ | 27414/30460 [30:47<03:07, 16.25it/s]

{'eval_loss': 0.16739831864833832, 'eval_f1': 0.8468106551956543, 'eval_precision': 0.8501275013460388, 'eval_recall': 0.8452405097387118, 'eval_runtime': 5.5976, 'eval_samples_per_second': 272.083, 'eval_steps_per_second': 68.065, 'epoch': 18.0}


 95%|█████████▍| 28936/30460 [32:23<01:29, 17.05it/s]
 95%|█████████▌| 28937/30460 [32:29<01:29, 17.05it/s]

{'eval_loss': 0.16374903917312622, 'eval_f1': 0.84477367244091, 'eval_precision': 0.8384022147354514, 'eval_recall': 0.8530071006831964, 'eval_runtime': 6.0611, 'eval_samples_per_second': 251.275, 'eval_steps_per_second': 62.86, 'epoch': 19.0}


 98%|█████████▊| 30002/30460 [33:37<00:29, 15.58it/s]

{'loss': 0.0033, 'grad_norm': 0.0035960651002824306, 'learning_rate': 7.550886408404465e-07, 'epoch': 19.7}


100%|██████████| 30460/30460 [34:05<00:00, 15.43it/s]
100%|██████████| 30460/30460 [34:12<00:00, 15.43it/s]

{'eval_loss': 0.16563522815704346, 'eval_f1': 0.8428932970465469, 'eval_precision': 0.8351078540235152, 'eval_recall': 0.8524544535140078, 'eval_runtime': 5.4412, 'eval_samples_per_second': 279.902, 'eval_steps_per_second': 70.022, 'epoch': 20.0}


100%|██████████| 30460/30460 [34:14<00:00, 14.83it/s]


{'train_runtime': 2054.1797, 'train_samples_per_second': 59.303, 'train_steps_per_second': 14.828, 'train_loss': 0.031078187901910716, 'epoch': 20.0}


100%|██████████| 381/381 [00:06<00:00, 58.80it/s]


0,1
eval/f1,▁▄▆▇▆█▇▇▇█▇█▇████▇▇▇█
eval/loss,▂▂▁▂▁▁▃▅▄▅▅▅▇▅▇███▇█▅
eval/precision,▁█▇▆▆█▇▇▆█▇█▆▇▇▇▇▇▆▆█
eval/recall,▁▃▅▆▆▇▇▇▇▇▇██████▇▇▇█
eval/runtime,▂▂█▃▁▇▄▂▅█▂▄▆▁▅█▁▃▅▂▇
eval/samples_per_second,▇▇▁▆█▂▄▇▄▁▇▄▃█▄▁▇▅▃▆▂
eval/steps_per_second,▇▇▁▆█▂▄▇▄▁▇▄▃█▄▁▇▅▃▆▂
train/epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▇▇▇█████
train/global_step,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▇▇▇█████
train/grad_norm,█▁▁

0,1
eval/f1,0.8722
eval/loss,0.14153
eval/precision,0.88135
eval/recall,0.86842
eval/runtime,6.5063
eval/samples_per_second,234.081
eval/steps_per_second,58.559
total_flos,8013406920576000.0
train/epoch,20.0
train/global_step,30460.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 15992.92 examples/s]
  5%|▌         | 377/7540 [00:23<07:16, 16.42it/s]
  5%|▌         | 377/7540 [00:24<07:16, 16.42it/s]

{'eval_loss': 0.3014770746231079, 'eval_f1': 0.4720447460113699, 'eval_precision': 0.5253665399072385, 'eval_recall': 0.43127395642962263, 'eval_runtime': 1.3194, 'eval_samples_per_second': 285.74, 'eval_steps_per_second': 72.004, 'epoch': 1.0}


 10%|▉         | 753/7540 [00:50<06:47, 16.65it/s]  
 10%|█         | 754/7540 [00:51<06:47, 16.65it/s]

{'eval_loss': 0.30813533067703247, 'eval_f1': 0.5851454749058332, 'eval_precision': 0.6361676598953123, 'eval_recall': 0.5496187802938304, 'eval_runtime': 1.331, 'eval_samples_per_second': 283.252, 'eval_steps_per_second': 71.376, 'epoch': 2.0}


 15%|█▌        | 1131/7540 [01:16<07:00, 15.24it/s]
 15%|█▌        | 1131/7540 [01:18<07:00, 15.24it/s]

{'eval_loss': 0.326185017824173, 'eval_f1': 0.6917018963038292, 'eval_precision': 0.7496103896103895, 'eval_recall': 0.6509287482780903, 'eval_runtime': 1.3367, 'eval_samples_per_second': 282.039, 'eval_steps_per_second': 71.071, 'epoch': 3.0}


 20%|█▉        | 1507/7540 [01:43<06:28, 15.52it/s]  
 20%|██        | 1508/7540 [01:44<06:28, 15.52it/s]

{'eval_loss': 0.3441547751426697, 'eval_f1': 0.7215657721378544, 'eval_precision': 0.7795405371446791, 'eval_recall': 0.679926269923698, 'eval_runtime': 1.2843, 'eval_samples_per_second': 293.534, 'eval_steps_per_second': 73.968, 'epoch': 4.0}


 25%|██▌       | 1885/7540 [02:10<05:36, 16.80it/s]
 25%|██▌       | 1885/7540 [02:11<05:36, 16.80it/s]

{'eval_loss': 0.4649273753166199, 'eval_f1': 0.6725218945231719, 'eval_precision': 0.7148754399054199, 'eval_recall': 0.6509625735320143, 'eval_runtime': 1.4165, 'eval_samples_per_second': 266.142, 'eval_steps_per_second': 67.065, 'epoch': 5.0}


 30%|██▉       | 2261/7540 [02:36<05:28, 16.08it/s]
 30%|███       | 2262/7540 [02:38<05:28, 16.08it/s]

{'eval_loss': 0.43140503764152527, 'eval_f1': 0.7092997405713413, 'eval_precision': 0.738036040327062, 'eval_recall': 0.6993559981989242, 'eval_runtime': 1.4513, 'eval_samples_per_second': 259.773, 'eval_steps_per_second': 65.46, 'epoch': 6.0}


 35%|███▌      | 2639/7540 [03:03<05:05, 16.04it/s]
 35%|███▌      | 2639/7540 [03:05<05:05, 16.04it/s]

{'eval_loss': 0.44419240951538086, 'eval_f1': 0.7450407779523178, 'eval_precision': 0.7634779606081862, 'eval_recall': 0.737577176239394, 'eval_runtime': 1.4612, 'eval_samples_per_second': 257.999, 'eval_steps_per_second': 65.013, 'epoch': 7.0}


 40%|███▉      | 3015/7540 [03:30<04:34, 16.48it/s]
 40%|████      | 3016/7540 [03:32<04:34, 16.48it/s]

{'eval_loss': 0.4831363260746002, 'eval_f1': 0.7247030125961247, 'eval_precision': 0.7412975775072616, 'eval_recall': 0.7134147866590067, 'eval_runtime': 1.534, 'eval_samples_per_second': 245.771, 'eval_steps_per_second': 61.932, 'epoch': 8.0}


 45%|████▌     | 3393/7540 [03:57<04:06, 16.79it/s]
 45%|████▌     | 3393/7540 [03:58<04:06, 16.79it/s]

{'eval_loss': 0.5060276389122009, 'eval_f1': 0.7369308380398604, 'eval_precision': 0.7559989244340651, 'eval_recall': 0.7284109578493412, 'eval_runtime': 1.4349, 'eval_samples_per_second': 262.745, 'eval_steps_per_second': 66.209, 'epoch': 9.0}


 50%|████▉     | 3769/7540 [04:23<04:01, 15.60it/s]
 50%|█████     | 3770/7540 [04:24<04:01, 15.60it/s]

{'eval_loss': 0.5064071416854858, 'eval_f1': 0.7421039243373444, 'eval_precision': 0.7457792242650317, 'eval_recall': 0.7468756051734285, 'eval_runtime': 1.3902, 'eval_samples_per_second': 271.179, 'eval_steps_per_second': 68.334, 'epoch': 10.0}


 55%|█████▌    | 4147/7540 [04:50<03:39, 15.43it/s]
 55%|█████▌    | 4147/7540 [04:51<03:39, 15.43it/s]

{'eval_loss': 0.5333792567253113, 'eval_f1': 0.7444129549137608, 'eval_precision': 0.7469758219758219, 'eval_recall': 0.7500350405816321, 'eval_runtime': 1.5508, 'eval_samples_per_second': 243.105, 'eval_steps_per_second': 61.26, 'epoch': 11.0}


 60%|█████▉    | 4523/7540 [05:17<03:25, 14.70it/s]
 60%|██████    | 4524/7540 [05:18<03:25, 14.70it/s]

{'eval_loss': 0.558233916759491, 'eval_f1': 0.7361348797104348, 'eval_precision': 0.7405005633911603, 'eval_recall': 0.7387528669193892, 'eval_runtime': 1.711, 'eval_samples_per_second': 220.342, 'eval_steps_per_second': 55.524, 'epoch': 12.0}


 65%|██████▌   | 4901/7540 [05:43<02:57, 14.85it/s]
 65%|██████▌   | 4901/7540 [05:45<02:57, 14.85it/s]

{'eval_loss': 0.5698754787445068, 'eval_f1': 0.718833476865108, 'eval_precision': 0.7347438209000074, 'eval_recall': 0.7057599243754422, 'eval_runtime': 1.7877, 'eval_samples_per_second': 210.889, 'eval_steps_per_second': 53.142, 'epoch': 13.0}


 70%|██████▉   | 5277/7540 [06:11<02:30, 15.00it/s]
 70%|███████   | 5278/7540 [06:12<02:30, 15.00it/s]

{'eval_loss': 0.5721365213394165, 'eval_f1': 0.7488400295456746, 'eval_precision': 0.7594940683911338, 'eval_recall': 0.7440494057082664, 'eval_runtime': 1.6831, 'eval_samples_per_second': 223.991, 'eval_steps_per_second': 56.443, 'epoch': 14.0}


 75%|███████▌  | 5655/7540 [06:38<02:05, 14.99it/s]
 75%|███████▌  | 5655/7540 [06:39<02:05, 14.99it/s]

{'eval_loss': 0.6062642931938171, 'eval_f1': 0.7390011241187712, 'eval_precision': 0.7429426582938795, 'eval_recall': 0.7390724307187029, 'eval_runtime': 1.8688, 'eval_samples_per_second': 201.731, 'eval_steps_per_second': 50.834, 'epoch': 15.0}


 80%|████████  | 6032/7540 [07:06<01:42, 14.72it/s]

{'eval_loss': 0.608995258808136, 'eval_f1': 0.7430661656960627, 'eval_precision': 0.7413580793831149, 'eval_recall': 0.7502920859078475, 'eval_runtime': 1.6602, 'eval_samples_per_second': 227.086, 'eval_steps_per_second': 57.223, 'epoch': 16.0}


 85%|████████▌ | 6409/7540 [07:30<01:11, 15.88it/s]
 85%|████████▌ | 6409/7540 [07:32<01:11, 15.88it/s]

{'eval_loss': 0.6135256886482239, 'eval_f1': 0.7519157334420162, 'eval_precision': 0.7549326387788378, 'eval_recall': 0.7529038825794199, 'eval_runtime': 1.878, 'eval_samples_per_second': 200.75, 'eval_steps_per_second': 50.587, 'epoch': 17.0}


 90%|████████▉ | 6785/7540 [07:57<00:45, 16.68it/s]
 90%|█████████ | 6786/7540 [07:59<00:45, 16.68it/s]

{'eval_loss': 0.6012457609176636, 'eval_f1': 0.7589526196932275, 'eval_precision': 0.7585086082999498, 'eval_recall': 0.7635305457442552, 'eval_runtime': 1.4827, 'eval_samples_per_second': 254.265, 'eval_steps_per_second': 64.072, 'epoch': 18.0}


 95%|█████████▌| 7163/7540 [08:24<00:23, 15.87it/s]
 95%|█████████▌| 7163/7540 [08:26<00:23, 15.87it/s]

{'eval_loss': 0.6041615605354309, 'eval_f1': 0.7598614606468822, 'eval_precision': 0.7644779613185161, 'eval_recall': 0.7591379271548745, 'eval_runtime': 1.2619, 'eval_samples_per_second': 298.758, 'eval_steps_per_second': 75.284, 'epoch': 19.0}


100%|█████████▉| 7539/7540 [08:51<00:00, 16.99it/s]
100%|██████████| 7540/7540 [08:53<00:00, 16.99it/s]

{'eval_loss': 0.6091938018798828, 'eval_f1': 0.7615089702254263, 'eval_precision': 0.7649865685172641, 'eval_recall': 0.7618776531822717, 'eval_runtime': 1.4018, 'eval_samples_per_second': 268.943, 'eval_steps_per_second': 67.771, 'epoch': 20.0}


100%|██████████| 7540/7540 [08:55<00:00, 14.07it/s]


{'train_runtime': 535.792, 'train_samples_per_second': 56.253, 'train_steps_per_second': 14.073, 'train_loss': 0.07369719082860163, 'epoch': 20.0}


100%|██████████| 95/95 [00:01<00:00, 57.82it/s]


0,1
eval/f1,▁▄▆▇▆▇█▇▇██▇▇█▇██████
eval/loss,▁▁▂▂▅▄▄▅▆▆▆▇▇▇███████
eval/precision,▁▄▇█▆▇█▇▇▇▇▇▇▇▇▇▇▇███
eval/recall,▁▃▆▆▆▇▇▇▇██▇▇█▇██████
eval/runtime,▂▂▂▁▃▃▃▄▃▂▄▆▇▆█▆█▄▁▃▆
eval/samples_per_second,▇▇▇█▆▅▅▄▅▆▄▂▂▃▁▃▁▅█▆▃
eval/steps_per_second,▇▇▇█▆▅▅▄▅▆▄▂▂▃▁▃▁▅█▆▃
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████

0,1
eval/f1,0.76151
eval/loss,0.60919
eval/precision,0.76499
eval/recall,0.76188
eval/runtime,1.6709
eval/samples_per_second,225.629
eval/steps_per_second,56.856
total_flos,1982595203466240.0
train/epoch,20.0
train/global_step,7540.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 15305.48 examples/s]
  5%|▍         | 259/5200 [00:16<05:25, 15.19it/s]
  5%|▌         | 260/5200 [00:17<05:25, 15.19it/s]

{'eval_loss': 0.3001478314399719, 'eval_f1': 0.24195483312042057, 'eval_precision': 0.5016172506738544, 'eval_recall': 0.19624152318561608, 'eval_runtime': 0.8843, 'eval_samples_per_second': 294.028, 'eval_steps_per_second': 73.507, 'epoch': 1.0}


 10%|▉         | 519/5200 [00:34<04:41, 16.66it/s]
 10%|█         | 520/5200 [00:36<04:40, 16.66it/s]

{'eval_loss': 0.23125353455543518, 'eval_f1': 0.5023671324199783, 'eval_precision': 0.6024131922151996, 'eval_recall': 0.4432488591547182, 'eval_runtime': 0.979, 'eval_samples_per_second': 265.566, 'eval_steps_per_second': 66.392, 'epoch': 2.0}


 15%|█▍        | 779/5200 [00:53<04:37, 15.96it/s]
 15%|█▌        | 780/5200 [00:54<04:36, 15.96it/s]

{'eval_loss': 0.2242853343486786, 'eval_f1': 0.5363766854712235, 'eval_precision': 0.5990560917020481, 'eval_recall': 0.49107856613757445, 'eval_runtime': 0.9406, 'eval_samples_per_second': 276.406, 'eval_steps_per_second': 69.101, 'epoch': 3.0}


 20%|█▉        | 1039/5200 [01:13<04:08, 16.75it/s]
 20%|██        | 1040/5200 [01:14<04:08, 16.75it/s]

{'eval_loss': 0.25434014201164246, 'eval_f1': 0.512874822740156, 'eval_precision': 0.580643962778327, 'eval_recall': 0.4749414645283346, 'eval_runtime': 0.8485, 'eval_samples_per_second': 306.429, 'eval_steps_per_second': 76.607, 'epoch': 4.0}


 25%|██▍       | 1299/5200 [01:32<04:07, 15.78it/s]
 25%|██▌       | 1300/5200 [01:33<04:07, 15.78it/s]

{'eval_loss': 0.2549494802951813, 'eval_f1': 0.6539836132160742, 'eval_precision': 0.776109798758213, 'eval_recall': 0.5959993539176117, 'eval_runtime': 1.0056, 'eval_samples_per_second': 258.554, 'eval_steps_per_second': 64.639, 'epoch': 5.0}


 30%|██▉       | 1559/5200 [01:50<03:40, 16.52it/s]
 30%|███       | 1560/5200 [01:51<03:40, 16.52it/s]

{'eval_loss': 0.2465965896844864, 'eval_f1': 0.6618580640536651, 'eval_precision': 0.7522160379303237, 'eval_recall': 0.6118581234451803, 'eval_runtime': 1.2336, 'eval_samples_per_second': 210.763, 'eval_steps_per_second': 52.691, 'epoch': 6.0}


 35%|███▍      | 1819/5200 [02:10<03:35, 15.69it/s]
 35%|███▌      | 1820/5200 [02:11<03:35, 15.69it/s]

{'eval_loss': 0.24963249266147614, 'eval_f1': 0.6523946070000856, 'eval_precision': 0.7539784490871447, 'eval_recall': 0.6099307878773342, 'eval_runtime': 0.9718, 'eval_samples_per_second': 267.533, 'eval_steps_per_second': 66.883, 'epoch': 7.0}


 40%|███▉      | 2079/5200 [02:29<03:05, 16.86it/s]
 40%|████      | 2080/5200 [02:30<03:04, 16.86it/s]

{'eval_loss': 0.2741676867008209, 'eval_f1': 0.664899858795124, 'eval_precision': 0.7480632806719764, 'eval_recall': 0.6231015552363106, 'eval_runtime': 1.004, 'eval_samples_per_second': 258.976, 'eval_steps_per_second': 64.744, 'epoch': 8.0}


 45%|████▍     | 2339/5200 [02:47<03:13, 14.82it/s]
 45%|████▌     | 2340/5200 [02:48<03:12, 14.82it/s]

{'eval_loss': 0.2696228623390198, 'eval_f1': 0.700660224498484, 'eval_precision': 0.7584770905423078, 'eval_recall': 0.6588736074741007, 'eval_runtime': 1.2146, 'eval_samples_per_second': 214.061, 'eval_steps_per_second': 53.515, 'epoch': 9.0}


 50%|████▉     | 2599/5200 [03:06<02:31, 17.13it/s]
 50%|█████     | 2600/5200 [03:07<02:31, 17.13it/s]

{'eval_loss': 0.26153793931007385, 'eval_f1': 0.7248471375816148, 'eval_precision': 0.774234474610106, 'eval_recall': 0.6883171514811285, 'eval_runtime': 0.887, 'eval_samples_per_second': 293.109, 'eval_steps_per_second': 73.277, 'epoch': 10.0}


 55%|█████▍    | 2859/5200 [03:24<02:19, 16.79it/s]
 55%|█████▌    | 2860/5200 [03:26<02:19, 16.79it/s]

{'eval_loss': 0.29385635256767273, 'eval_f1': 0.6980540474973663, 'eval_precision': 0.7739813076986259, 'eval_recall': 0.6576597796214935, 'eval_runtime': 1.0948, 'eval_samples_per_second': 237.491, 'eval_steps_per_second': 59.373, 'epoch': 11.0}


 60%|█████▉    | 3119/5200 [03:44<02:19, 14.94it/s]
 60%|██████    | 3120/5200 [03:46<02:19, 14.94it/s]

{'eval_loss': 0.2961101830005646, 'eval_f1': 0.6738003698199874, 'eval_precision': 0.7499205890782034, 'eval_recall': 0.6385014999203146, 'eval_runtime': 1.18, 'eval_samples_per_second': 220.345, 'eval_steps_per_second': 55.086, 'epoch': 12.0}


 65%|██████▍   | 3379/5200 [04:03<01:46, 17.07it/s]
 65%|██████▌   | 3380/5200 [04:03<01:46, 17.07it/s]

{'eval_loss': 0.2901892066001892, 'eval_f1': 0.7100454674446265, 'eval_precision': 0.762128944705226, 'eval_recall': 0.6760873866027735, 'eval_runtime': 0.8767, 'eval_samples_per_second': 296.56, 'eval_steps_per_second': 74.14, 'epoch': 13.0}


 70%|██████▉   | 3639/5200 [04:21<01:35, 16.30it/s]
 70%|███████   | 3640/5200 [04:22<01:35, 16.30it/s]

{'eval_loss': 0.284734845161438, 'eval_f1': 0.7115551527567875, 'eval_precision': 0.7660825948662159, 'eval_recall': 0.679510658586941, 'eval_runtime': 0.9062, 'eval_samples_per_second': 286.898, 'eval_steps_per_second': 71.724, 'epoch': 14.0}


 75%|███████▍  | 3899/5200 [04:40<01:19, 16.43it/s]
 75%|███████▌  | 3900/5200 [04:41<01:19, 16.43it/s]

{'eval_loss': 0.29958659410476685, 'eval_f1': 0.7205873765115715, 'eval_precision': 0.7794283853930393, 'eval_recall': 0.6861119921896891, 'eval_runtime': 1.2963, 'eval_samples_per_second': 200.577, 'eval_steps_per_second': 50.144, 'epoch': 15.0}


 80%|███████▉  | 4159/5200 [04:59<01:06, 15.67it/s]
 80%|████████  | 4160/5200 [05:00<01:06, 15.67it/s]

{'eval_loss': 0.2990255355834961, 'eval_f1': 0.7044566616321642, 'eval_precision': 0.7585045836516425, 'eval_recall': 0.6727928031279259, 'eval_runtime': 0.8471, 'eval_samples_per_second': 306.936, 'eval_steps_per_second': 76.734, 'epoch': 16.0}


 85%|████████▍ | 4419/5200 [05:18<00:46, 16.91it/s]
 85%|████████▌ | 4420/5200 [05:19<00:46, 16.91it/s]

{'eval_loss': 0.3073582351207733, 'eval_f1': 0.68765231276445, 'eval_precision': 0.7597063903931478, 'eval_recall': 0.6503977064754033, 'eval_runtime': 0.8326, 'eval_samples_per_second': 312.287, 'eval_steps_per_second': 78.072, 'epoch': 17.0}


 90%|████████▉ | 4679/5200 [05:36<00:34, 15.14it/s]
 90%|█████████ | 4680/5200 [05:37<00:34, 15.14it/s]

{'eval_loss': 0.30300119519233704, 'eval_f1': 0.706677646053142, 'eval_precision': 0.7714520759881583, 'eval_recall': 0.6679487268835667, 'eval_runtime': 1.0169, 'eval_samples_per_second': 255.68, 'eval_steps_per_second': 63.92, 'epoch': 18.0}


 95%|█████████▍| 4939/5200 [05:55<00:15, 17.20it/s]
 95%|█████████▌| 4940/5200 [05:56<00:15, 17.20it/s]

{'eval_loss': 0.3062427341938019, 'eval_f1': 0.696020475739443, 'eval_precision': 0.7653689960832818, 'eval_recall': 0.6563019295507722, 'eval_runtime': 0.8935, 'eval_samples_per_second': 290.987, 'eval_steps_per_second': 72.747, 'epoch': 19.0}


100%|█████████▉| 5199/5200 [06:13<00:00, 15.61it/s]
100%|██████████| 5200/5200 [06:16<00:00, 15.61it/s]

{'eval_loss': 0.306115984916687, 'eval_f1': 0.7058938838480601, 'eval_precision': 0.7713600288600289, 'eval_recall': 0.6665342997265652, 'eval_runtime': 1.0825, 'eval_samples_per_second': 240.178, 'eval_steps_per_second': 60.045, 'epoch': 20.0}


100%|██████████| 5200/5200 [06:18<00:00, 13.74it/s]


{'train_runtime': 378.3588, 'train_samples_per_second': 54.869, 'train_steps_per_second': 13.744, 'train_loss': 0.059059982299804685, 'epoch': 20.0}


100%|██████████| 65/65 [00:00<00:00, 69.01it/s]


------------------ Starting model ==> epochs: 20, batch size: 8, weights of decay: 0.01 ---------------------


0,1
eval/f1,▁▅▅▅▇▇▇▇███▇████▇████
eval/loss,▇▂▁▄▄▃▃▅▅▄▇▇▇▆▇▇████▄
eval/precision,▁▄▃▃█▇▇▇▇██▇███▇█████
eval/recall,▁▅▅▅▇▇▇▇███▇████▇████
eval/runtime,▂▃▃▁▄▇▃▄▇▂▅▆▂▂█▁▁▄▂▅▃
eval/samples_per_second,▇▅▆█▅▂▅▅▂▇▃▂▇▆▁██▄▇▃▅
eval/steps_per_second,▇▅▆█▅▂▅▅▂▇▃▂▇▆▁██▄▇▃▅
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████

0,1
eval/f1,0.72485
eval/loss,0.26154
eval/precision,0.77423
eval/recall,0.68832
eval/runtime,0.9698
eval/samples_per_second,268.091
eval/steps_per_second,67.023
total_flos,1365607680768000.0
train/epoch,20.0
train/global_step,5200.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 14006.17 examples/s]
  5%|▌         | 762/15240 [00:56<17:41, 13.64it/s]
  5%|▌         | 762/15240 [01:00<17:41, 13.64it/s]

{'eval_loss': 0.09960047155618668, 'eval_f1': 0.6526294605587877, 'eval_precision': 0.6758206621408039, 'eval_recall': 0.6334310352943894, 'eval_runtime': 3.6703, 'eval_samples_per_second': 414.952, 'eval_steps_per_second': 52.039, 'epoch': 1.0}


 10%|█         | 1524/15240 [01:59<17:56, 12.75it/s] 
 10%|█         | 1524/15240 [02:03<17:56, 12.75it/s]

{'eval_loss': 0.10114627331495285, 'eval_f1': 0.7762858171322395, 'eval_precision': 0.8427652914702569, 'eval_recall': 0.7470838988078379, 'eval_runtime': 3.8746, 'eval_samples_per_second': 393.075, 'eval_steps_per_second': 49.296, 'epoch': 2.0}


 15%|█▌        | 2286/15240 [03:02<15:52, 13.60it/s]  
 15%|█▌        | 2286/15240 [03:06<15:52, 13.60it/s]

{'eval_loss': 0.0922490656375885, 'eval_f1': 0.8285345693041629, 'eval_precision': 0.8989136069030615, 'eval_recall': 0.7917549763289432, 'eval_runtime': 3.7037, 'eval_samples_per_second': 411.209, 'eval_steps_per_second': 51.57, 'epoch': 3.0}


 20%|██        | 3048/15240 [04:05<15:31, 13.08it/s]  
 20%|██        | 3048/15240 [04:09<15:31, 13.08it/s]

{'eval_loss': 0.09031760692596436, 'eval_f1': 0.8605323336004836, 'eval_precision': 0.886008709403005, 'eval_recall': 0.8387819932365439, 'eval_runtime': 3.7529, 'eval_samples_per_second': 405.817, 'eval_steps_per_second': 50.894, 'epoch': 4.0}


 25%|██▌       | 3810/15240 [05:08<14:04, 13.53it/s]  
 25%|██▌       | 3810/15240 [05:11<14:04, 13.53it/s]

{'eval_loss': 0.08898472785949707, 'eval_f1': 0.8649655922305926, 'eval_precision': 0.871580204321233, 'eval_recall': 0.8611308259809308, 'eval_runtime': 3.6998, 'eval_samples_per_second': 411.639, 'eval_steps_per_second': 51.624, 'epoch': 5.0}


 30%|███       | 4572/15240 [06:10<13:19, 13.35it/s]  
 30%|███       | 4572/15240 [06:14<13:19, 13.35it/s]

{'eval_loss': 0.09357060492038727, 'eval_f1': 0.878639351496786, 'eval_precision': 0.8907988109556574, 'eval_recall': 0.868200115307013, 'eval_runtime': 3.8184, 'eval_samples_per_second': 398.856, 'eval_steps_per_second': 50.021, 'epoch': 6.0}


 35%|███▌      | 5334/15240 [07:13<12:19, 13.40it/s]  
 35%|███▌      | 5334/15240 [07:17<12:19, 13.40it/s]

{'eval_loss': 0.10773894190788269, 'eval_f1': 0.860165530330507, 'eval_precision': 0.870444544116934, 'eval_recall': 0.8565518573479654, 'eval_runtime': 3.7059, 'eval_samples_per_second': 410.964, 'eval_steps_per_second': 51.539, 'epoch': 7.0}


 40%|████      | 6096/15240 [08:16<11:11, 13.62it/s]  
 40%|████      | 6096/15240 [08:19<11:11, 13.62it/s]

{'eval_loss': 0.10815170407295227, 'eval_f1': 0.8682418464664364, 'eval_precision': 0.8776629791726095, 'eval_recall': 0.8602173685668869, 'eval_runtime': 3.8441, 'eval_samples_per_second': 396.189, 'eval_steps_per_second': 49.686, 'epoch': 8.0}


 45%|████▌     | 6858/15240 [09:19<10:04, 13.86it/s]  
 45%|████▌     | 6858/15240 [09:22<10:04, 13.86it/s]

{'eval_loss': 0.12215203791856766, 'eval_f1': 0.8659254140101026, 'eval_precision': 0.8840548035823552, 'eval_recall': 0.8504627054056896, 'eval_runtime': 3.7206, 'eval_samples_per_second': 409.343, 'eval_steps_per_second': 51.336, 'epoch': 9.0}


 50%|█████     | 7620/15240 [10:21<09:05, 13.98it/s]  
 50%|█████     | 7620/15240 [10:25<09:05, 13.98it/s]

{'eval_loss': 0.11187924444675446, 'eval_f1': 0.8876444837286334, 'eval_precision': 0.9020666659229654, 'eval_recall': 0.8758626851486879, 'eval_runtime': 3.8225, 'eval_samples_per_second': 398.431, 'eval_steps_per_second': 49.967, 'epoch': 10.0}


 55%|█████▌    | 8382/15240 [11:24<08:13, 13.88it/s]  
 55%|█████▌    | 8382/15240 [11:28<08:13, 13.88it/s]

{'eval_loss': 0.12455720454454422, 'eval_f1': 0.8814448576809808, 'eval_precision': 0.8943651248383235, 'eval_recall': 0.8702400585580402, 'eval_runtime': 3.6929, 'eval_samples_per_second': 412.411, 'eval_steps_per_second': 51.721, 'epoch': 11.0}


 60%|██████    | 9144/15240 [12:27<07:16, 13.98it/s]  
 60%|██████    | 9144/15240 [12:31<07:16, 13.98it/s]

{'eval_loss': 0.12809789180755615, 'eval_f1': 0.8816803685901834, 'eval_precision': 0.8948266574934409, 'eval_recall': 0.8702528673318819, 'eval_runtime': 3.7501, 'eval_samples_per_second': 406.127, 'eval_steps_per_second': 50.933, 'epoch': 12.0}


 65%|██████▌   | 9906/15240 [13:30<06:22, 13.95it/s]  
 65%|██████▌   | 9906/15240 [13:33<06:22, 13.95it/s]

{'eval_loss': 0.13246874511241913, 'eval_f1': 0.8761095565597671, 'eval_precision': 0.8910306186749318, 'eval_recall': 0.8636805642055341, 'eval_runtime': 3.7107, 'eval_samples_per_second': 410.431, 'eval_steps_per_second': 51.472, 'epoch': 13.0}


 66%|██████▌   | 10002/15240 [13:42<06:30, 13.42it/s] 

{'loss': 0.0345, 'grad_norm': 0.006119920872151852, 'learning_rate': 1.7191601049868766e-05, 'epoch': 13.12}


 70%|███████   | 10668/15240 [14:32<05:29, 13.89it/s]
 70%|███████   | 10668/15240 [14:36<05:29, 13.89it/s]

{'eval_loss': 0.12652014195919037, 'eval_f1': 0.8841300767698238, 'eval_precision': 0.8993275816956666, 'eval_recall': 0.8727477206430344, 'eval_runtime': 3.6829, 'eval_samples_per_second': 413.535, 'eval_steps_per_second': 51.862, 'epoch': 14.0}


 75%|███████▌  | 11430/15240 [15:35<04:38, 13.70it/s]  
 75%|███████▌  | 11430/15240 [15:39<04:38, 13.70it/s]

{'eval_loss': 0.12711726129055023, 'eval_f1': 0.8848482159376648, 'eval_precision': 0.8921500343157033, 'eval_recall': 0.8795786979795596, 'eval_runtime': 3.6901, 'eval_samples_per_second': 412.728, 'eval_steps_per_second': 51.76, 'epoch': 15.0}


 80%|████████  | 12192/15240 [16:38<03:36, 14.06it/s]
 80%|████████  | 12192/15240 [16:42<03:36, 14.06it/s]

{'eval_loss': 0.126966655254364, 'eval_f1': 0.8916408395747756, 'eval_precision': 0.9061767874742693, 'eval_recall': 0.8788046968844231, 'eval_runtime': 3.6773, 'eval_samples_per_second': 414.167, 'eval_steps_per_second': 51.941, 'epoch': 16.0}


 85%|████████▌ | 12954/15240 [17:41<02:45, 13.82it/s]
 85%|████████▌ | 12954/15240 [17:45<02:45, 13.82it/s]

{'eval_loss': 0.1332123875617981, 'eval_f1': 0.8766006615603429, 'eval_precision': 0.8888040788012541, 'eval_recall': 0.8684879304901226, 'eval_runtime': 3.7271, 'eval_samples_per_second': 408.632, 'eval_steps_per_second': 51.247, 'epoch': 17.0}


 90%|█████████ | 13716/15240 [18:44<01:51, 13.69it/s]
 90%|█████████ | 13716/15240 [18:47<01:51, 13.69it/s]

{'eval_loss': 0.1372414231300354, 'eval_f1': 0.8729453304078504, 'eval_precision': 0.8929318018946594, 'eval_recall': 0.8617711940520241, 'eval_runtime': 3.6752, 'eval_samples_per_second': 414.403, 'eval_steps_per_second': 51.97, 'epoch': 18.0}


 95%|█████████▌| 14478/15240 [19:47<00:57, 13.16it/s]
 95%|█████████▌| 14478/15240 [19:51<00:57, 13.16it/s]

{'eval_loss': 0.13420946896076202, 'eval_f1': 0.8743271414327707, 'eval_precision': 0.8942061378767266, 'eval_recall': 0.8624065042848574, 'eval_runtime': 3.7501, 'eval_samples_per_second': 406.124, 'eval_steps_per_second': 50.932, 'epoch': 19.0}


100%|██████████| 15240/15240 [20:50<00:00, 13.52it/s]
100%|██████████| 15240/15240 [20:55<00:00, 13.52it/s]

{'eval_loss': 0.13438700139522552, 'eval_f1': 0.8797859771577529, 'eval_precision': 0.8977303333611181, 'eval_recall': 0.8690499697518524, 'eval_runtime': 3.6709, 'eval_samples_per_second': 414.884, 'eval_steps_per_second': 52.031, 'epoch': 20.0}


100%|██████████| 15240/15240 [20:57<00:00, 12.12it/s]


{'train_runtime': 1257.1804, 'train_samples_per_second': 96.899, 'train_steps_per_second': 12.122, 'train_loss': 0.023151216231618966, 'epoch': 20.0}


100%|██████████| 191/191 [00:03<00:00, 49.97it/s]


0,1
eval/f1,▁▅▆▇▇█▇▇▇████████▇▇██
eval/loss,▃▃▁▁▁▂▄▄▆▄▆▇▇▆▇▇▇███▇
eval/precision,▁▆█▇▇█▇▇▇███████▇████
eval/recall,▁▄▆▇▇█▇▇▇████████▇███
eval/runtime,▁█▂▄▂▆▂▇▃▆▂▄▂▁▂▁▃▁▄▁█
eval/samples_per_second,█▁▇▅▇▃▇▂▆▃▇▅▇█▇█▆█▅█▁
eval/steps_per_second,█▁▇▅▇▃▇▂▆▃▇▅▇█▇█▆█▅█▁
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▅▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▅▆▆▇▇▇████
train/grad_norm,▁

0,1
eval/f1,0.89164
eval/loss,0.12697
eval/precision,0.90618
eval/recall,0.8788
eval/runtime,3.8622
eval/samples_per_second,394.332
eval/steps_per_second,49.453
total_flos,8013406920576000.0
train/epoch,20.0
train/global_step,15240.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 14889.76 examples/s]
  5%|▌         | 189/3780 [00:14<04:21, 13.72it/s]
  5%|▌         | 189/3780 [00:15<04:21, 13.72it/s]

{'eval_loss': 0.3370732069015503, 'eval_f1': 0.4613946154964128, 'eval_precision': 0.5153256704980842, 'eval_recall': 0.4328257670100758, 'eval_runtime': 0.9669, 'eval_samples_per_second': 389.902, 'eval_steps_per_second': 49.643, 'epoch': 1.0}


 10%|▉         | 377/3780 [00:30<04:05, 13.85it/s]
 10%|█         | 378/3780 [00:31<04:05, 13.85it/s]

{'eval_loss': 0.3166757822036743, 'eval_f1': 0.5264233187419706, 'eval_precision': 0.635614973262032, 'eval_recall': 0.4614885092347326, 'eval_runtime': 0.8881, 'eval_samples_per_second': 424.503, 'eval_steps_per_second': 54.048, 'epoch': 2.0}


 15%|█▌        | 567/3780 [00:46<03:45, 14.22it/s]
 15%|█▌        | 567/3780 [00:47<03:45, 14.22it/s]

{'eval_loss': 0.3097614347934723, 'eval_f1': 0.6707337698404802, 'eval_precision': 0.77864364090568, 'eval_recall': 0.6154401894692381, 'eval_runtime': 0.907, 'eval_samples_per_second': 415.639, 'eval_steps_per_second': 52.92, 'epoch': 3.0}


 20%|█▉        | 755/3780 [01:03<03:54, 12.92it/s]
 20%|██        | 756/3780 [01:04<03:54, 12.92it/s]

{'eval_loss': 0.3805956542491913, 'eval_f1': 0.6428123773115603, 'eval_precision': 0.7117336296440028, 'eval_recall': 0.621129279107919, 'eval_runtime': 0.9556, 'eval_samples_per_second': 394.533, 'eval_steps_per_second': 50.232, 'epoch': 4.0}


 25%|██▌       | 945/3780 [01:20<03:27, 13.63it/s]
 25%|██▌       | 945/3780 [01:21<03:27, 13.63it/s]

{'eval_loss': 0.3381912410259247, 'eval_f1': 0.6992382024826721, 'eval_precision': 0.7527432029015213, 'eval_recall': 0.6716065665433526, 'eval_runtime': 0.9147, 'eval_samples_per_second': 412.167, 'eval_steps_per_second': 52.477, 'epoch': 5.0}


 30%|██▉       | 1133/3780 [01:36<03:14, 13.63it/s]
 30%|███       | 1134/3780 [01:37<03:14, 13.63it/s]

{'eval_loss': 0.33635658025741577, 'eval_f1': 0.7267949023502152, 'eval_precision': 0.7529224813576054, 'eval_recall': 0.7156923424275473, 'eval_runtime': 0.9202, 'eval_samples_per_second': 409.713, 'eval_steps_per_second': 52.165, 'epoch': 6.0}


 35%|███▌      | 1323/3780 [01:53<03:03, 13.36it/s]
 35%|███▌      | 1323/3780 [01:54<03:03, 13.36it/s]

{'eval_loss': 0.3788832724094391, 'eval_f1': 0.729168916176101, 'eval_precision': 0.7610366958622772, 'eval_recall': 0.714591253179834, 'eval_runtime': 0.9672, 'eval_samples_per_second': 389.797, 'eval_steps_per_second': 49.629, 'epoch': 7.0}


 40%|███▉      | 1511/3780 [02:09<02:48, 13.44it/s]
 40%|████      | 1512/3780 [02:10<02:48, 13.44it/s]

{'eval_loss': 0.4063006043434143, 'eval_f1': 0.7394728515403102, 'eval_precision': 0.766582072114782, 'eval_recall': 0.7252053077591818, 'eval_runtime': 0.925, 'eval_samples_per_second': 407.583, 'eval_steps_per_second': 51.894, 'epoch': 8.0}


 45%|████▍     | 1700/3780 [02:27<02:33, 13.51it/s]
 45%|████▌     | 1701/3780 [02:28<02:33, 13.51it/s]

{'eval_loss': 0.3978836238384247, 'eval_f1': 0.7304263016628034, 'eval_precision': 0.7559412722802046, 'eval_recall': 0.7155335177303069, 'eval_runtime': 0.9343, 'eval_samples_per_second': 403.497, 'eval_steps_per_second': 51.374, 'epoch': 9.0}


 50%|█████     | 1890/3780 [02:44<02:20, 13.48it/s]
 50%|█████     | 1890/3780 [02:45<02:20, 13.48it/s]

{'eval_loss': 0.4085858166217804, 'eval_f1': 0.7567641438546213, 'eval_precision': 0.760549618146967, 'eval_recall': 0.7569555765493553, 'eval_runtime': 0.9664, 'eval_samples_per_second': 390.123, 'eval_steps_per_second': 49.671, 'epoch': 10.0}


 55%|█████▍    | 2078/3780 [03:00<02:08, 13.25it/s]
 55%|█████▌    | 2079/3780 [03:01<02:08, 13.25it/s]

{'eval_loss': 0.4387723505496979, 'eval_f1': 0.7543248977901982, 'eval_precision': 0.769140671046481, 'eval_recall': 0.7436325576800156, 'eval_runtime': 0.9274, 'eval_samples_per_second': 406.523, 'eval_steps_per_second': 51.759, 'epoch': 11.0}


 60%|██████    | 2268/3780 [03:18<01:49, 13.86it/s]
 60%|██████    | 2268/3780 [03:19<01:49, 13.86it/s]

{'eval_loss': 0.440563827753067, 'eval_f1': 0.7543394682478154, 'eval_precision': 0.7641618677402725, 'eval_recall': 0.7499917558121847, 'eval_runtime': 0.9421, 'eval_samples_per_second': 400.151, 'eval_steps_per_second': 50.948, 'epoch': 12.0}


 65%|██████▌   | 2457/3780 [03:35<01:39, 13.35it/s]
 65%|██████▌   | 2457/3780 [03:36<01:39, 13.35it/s]

{'eval_loss': 0.46991127729415894, 'eval_f1': 0.7418855253131442, 'eval_precision': 0.7574674531994396, 'eval_recall': 0.7319968133099597, 'eval_runtime': 0.9953, 'eval_samples_per_second': 378.769, 'eval_steps_per_second': 48.225, 'epoch': 13.0}


 70%|██████▉   | 2645/3780 [03:51<01:28, 12.76it/s]
 70%|███████   | 2646/3780 [03:52<01:28, 12.76it/s]

{'eval_loss': 0.4663825035095215, 'eval_f1': 0.7545268043716068, 'eval_precision': 0.7593343467084323, 'eval_recall': 0.7560824661378291, 'eval_runtime': 0.9689, 'eval_samples_per_second': 389.111, 'eval_steps_per_second': 49.542, 'epoch': 14.0}


 75%|███████▍  | 2834/3780 [04:09<01:11, 13.29it/s]
 75%|███████▌  | 2835/3780 [04:10<01:11, 13.29it/s]

{'eval_loss': 0.4562954306602478, 'eval_f1': 0.7668765488995785, 'eval_precision': 0.77153996816715, 'eval_recall': 0.7653200888396351, 'eval_runtime': 0.9663, 'eval_samples_per_second': 390.135, 'eval_steps_per_second': 49.672, 'epoch': 15.0}


 80%|████████  | 3024/3780 [04:26<00:57, 13.19it/s]
 80%|████████  | 3024/3780 [04:27<00:57, 13.19it/s]

{'eval_loss': 0.4656699597835541, 'eval_f1': 0.7549486529998166, 'eval_precision': 0.7581023095092292, 'eval_recall': 0.7547781183117421, 'eval_runtime': 1.0098, 'eval_samples_per_second': 373.335, 'eval_steps_per_second': 47.533, 'epoch': 16.0}


 85%|████████▍ | 3212/3780 [04:42<00:42, 13.28it/s]
 85%|████████▌ | 3213/3780 [04:43<00:42, 13.28it/s]

{'eval_loss': 0.458935409784317, 'eval_f1': 0.7672005198711466, 'eval_precision': 0.7757623321107285, 'eval_recall': 0.7625371260933554, 'eval_runtime': 0.947, 'eval_samples_per_second': 398.11, 'eval_steps_per_second': 50.688, 'epoch': 17.0}


 90%|█████████ | 3402/3780 [04:59<00:27, 13.83it/s]
 90%|█████████ | 3402/3780 [05:00<00:27, 13.83it/s]

{'eval_loss': 0.48320022225379944, 'eval_f1': 0.7600571732221484, 'eval_precision': 0.7725842311846367, 'eval_recall': 0.750899552871912, 'eval_runtime': 0.9403, 'eval_samples_per_second': 400.948, 'eval_steps_per_second': 51.049, 'epoch': 18.0}


 95%|█████████▍| 3590/3780 [05:16<00:14, 12.80it/s]
 95%|█████████▌| 3591/3780 [05:17<00:14, 12.80it/s]

{'eval_loss': 0.4901028275489807, 'eval_f1': 0.759414372348836, 'eval_precision': 0.7698263096039307, 'eval_recall': 0.7521452110345661, 'eval_runtime': 0.9721, 'eval_samples_per_second': 387.818, 'eval_steps_per_second': 49.377, 'epoch': 19.0}


100%|██████████| 3780/3780 [05:33<00:00, 13.49it/s]
100%|██████████| 3780/3780 [05:35<00:00, 13.49it/s]

{'eval_loss': 0.4883478581905365, 'eval_f1': 0.7611799940632791, 'eval_precision': 0.7749444796318418, 'eval_recall': 0.750899552871912, 'eval_runtime': 0.9208, 'eval_samples_per_second': 409.435, 'eval_steps_per_second': 52.13, 'epoch': 20.0}


100%|██████████| 3780/3780 [05:37<00:00, 11.19it/s]


{'train_runtime': 337.8181, 'train_samples_per_second': 89.22, 'train_steps_per_second': 11.189, 'train_loss': 0.07426943501467427, 'epoch': 20.0}


100%|██████████| 48/48 [00:01<00:00, 46.92it/s]


0,1
eval/f1,▁▂▆▅▆▇▇▇▇███▇████████
eval/loss,▂▁▁▄▂▂▄▅▄▅▆▆▇▇▇▇▇███▇
eval/precision,▁▄█▆▇▇██▇███▇▇█▇█████
eval/recall,▁▂▅▅▆▇▇▇▇███▇████████
eval/runtime,▄▁▂▄▂▂▄▂▃▄▂▃▅▄▄▆▃▃▄▂█
eval/samples_per_second,▅█▇▅▇▇▅▆▆▅▆▆▄▅▅▃▅▆▄▇▁
eval/steps_per_second,▅█▇▅▇▇▅▆▆▅▆▆▄▅▅▃▅▆▄▇▁
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████

0,1
eval/f1,0.7672
eval/loss,0.45894
eval/precision,0.77576
eval/recall,0.76254
eval/runtime,1.0732
eval/samples_per_second,351.296
eval/steps_per_second,44.727
total_flos,1982595203466240.0
train/epoch,20.0
train/global_step,3780.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 13403.60 examples/s]
  5%|▍         | 129/2600 [00:09<03:04, 13.42it/s]
  5%|▌         | 130/2600 [00:10<03:03, 13.42it/s]

{'eval_loss': 0.297756552696228, 'eval_f1': 0.12244897959183673, 'eval_precision': 0.12631578947368421, 'eval_recall': 0.1188118811881188, 'eval_runtime': 0.6815, 'eval_samples_per_second': 381.51, 'eval_steps_per_second': 48.422, 'epoch': 1.0}


 10%|▉         | 259/2600 [00:21<02:50, 13.74it/s]
 10%|█         | 260/2600 [00:22<02:50, 13.74it/s]

{'eval_loss': 0.23350033164024353, 'eval_f1': 0.45992537313432835, 'eval_precision': 0.6176900468410176, 'eval_recall': 0.3821576315823414, 'eval_runtime': 0.6248, 'eval_samples_per_second': 416.156, 'eval_steps_per_second': 52.82, 'epoch': 2.0}


 15%|█▍        | 389/2600 [00:33<02:43, 13.49it/s]
 15%|█▌        | 390/2600 [00:33<02:43, 13.49it/s]

{'eval_loss': 0.22931469976902008, 'eval_f1': 0.5370688936712188, 'eval_precision': 0.5727992333487179, 'eval_recall': 0.5115898748043783, 'eval_runtime': 0.6578, 'eval_samples_per_second': 395.278, 'eval_steps_per_second': 50.17, 'epoch': 3.0}


 20%|█▉        | 519/2600 [00:45<02:34, 13.45it/s]
 20%|██        | 520/2600 [00:45<02:34, 13.45it/s]

{'eval_loss': 0.22790901362895966, 'eval_f1': 0.5744799762354991, 'eval_precision': 0.6824665606178212, 'eval_recall': 0.525990669592818, 'eval_runtime': 0.6425, 'eval_samples_per_second': 404.672, 'eval_steps_per_second': 51.362, 'epoch': 4.0}


 25%|██▍       | 649/2600 [00:57<02:30, 12.95it/s]
 25%|██▌       | 650/2600 [00:57<02:30, 12.95it/s]

{'eval_loss': 0.2206651270389557, 'eval_f1': 0.6362032951779302, 'eval_precision': 0.7131492829240013, 'eval_recall': 0.5836939206181515, 'eval_runtime': 0.6456, 'eval_samples_per_second': 402.74, 'eval_steps_per_second': 51.117, 'epoch': 5.0}


 30%|██▉       | 779/2600 [01:09<02:17, 13.22it/s]
 30%|███       | 780/2600 [01:09<02:17, 13.22it/s]

{'eval_loss': 0.23968560993671417, 'eval_f1': 0.6763793971983058, 'eval_precision': 0.7414435530921214, 'eval_recall': 0.6286597810545504, 'eval_runtime': 0.6315, 'eval_samples_per_second': 411.737, 'eval_steps_per_second': 52.259, 'epoch': 6.0}


 35%|███▍      | 909/2600 [01:20<02:05, 13.52it/s]
 35%|███▌      | 910/2600 [01:21<02:04, 13.52it/s]

{'eval_loss': 0.2511122524738312, 'eval_f1': 0.6640701059567699, 'eval_precision': 0.7667164408354525, 'eval_recall': 0.6024658738241674, 'eval_runtime': 0.6732, 'eval_samples_per_second': 386.195, 'eval_steps_per_second': 49.017, 'epoch': 7.0}


 40%|████      | 1040/2600 [01:33<01:54, 13.64it/s]
 40%|████      | 1040/2600 [01:34<01:54, 13.64it/s]

{'eval_loss': 0.24462585151195526, 'eval_f1': 0.6789680809133575, 'eval_precision': 0.772392290249433, 'eval_recall': 0.6195292023431838, 'eval_runtime': 0.6529, 'eval_samples_per_second': 398.2, 'eval_steps_per_second': 50.541, 'epoch': 8.0}


 45%|████▌     | 1170/2600 [01:46<01:45, 13.59it/s]
 45%|████▌     | 1170/2600 [01:46<01:45, 13.59it/s]

{'eval_loss': 0.2465810328722, 'eval_f1': 0.6951264997300803, 'eval_precision': 0.7728353140916807, 'eval_recall': 0.6461778411540904, 'eval_runtime': 0.6613, 'eval_samples_per_second': 393.169, 'eval_steps_per_second': 49.902, 'epoch': 9.0}


 50%|████▉     | 1299/2600 [01:58<01:36, 13.49it/s]
 50%|█████     | 1300/2600 [01:59<01:36, 13.49it/s]

{'eval_loss': 0.2426643967628479, 'eval_f1': 0.710910431689179, 'eval_precision': 0.792835531084379, 'eval_recall': 0.65104659463821, 'eval_runtime': 0.6514, 'eval_samples_per_second': 399.124, 'eval_steps_per_second': 50.658, 'epoch': 10.0}


 55%|█████▍    | 1429/2600 [02:10<01:26, 13.53it/s]
 55%|█████▌    | 1430/2600 [02:11<01:26, 13.53it/s]

{'eval_loss': 0.24510611593723297, 'eval_f1': 0.700107517557126, 'eval_precision': 0.7864721626869732, 'eval_recall': 0.6481962301401883, 'eval_runtime': 0.6338, 'eval_samples_per_second': 410.237, 'eval_steps_per_second': 52.069, 'epoch': 11.0}


 60%|█████▉    | 1559/2600 [02:22<01:17, 13.44it/s]
 60%|██████    | 1560/2600 [02:23<01:17, 13.44it/s]

{'eval_loss': 0.25952059030532837, 'eval_f1': 0.7012543212047088, 'eval_precision': 0.7613907044710773, 'eval_recall': 0.6563424563998096, 'eval_runtime': 0.6451, 'eval_samples_per_second': 403.022, 'eval_steps_per_second': 51.153, 'epoch': 12.0}


 65%|██████▍   | 1689/2600 [02:34<01:07, 13.47it/s]
 65%|██████▌   | 1690/2600 [02:34<01:07, 13.47it/s]

{'eval_loss': 0.24638281762599945, 'eval_f1': 0.7133139226085966, 'eval_precision': 0.7765579594496644, 'eval_recall': 0.6645907879633176, 'eval_runtime': 0.6354, 'eval_samples_per_second': 409.167, 'eval_steps_per_second': 51.933, 'epoch': 13.0}


 70%|██████▉   | 1819/2600 [02:46<00:58, 13.41it/s]
 70%|███████   | 1820/2600 [02:47<00:58, 13.41it/s]

{'eval_loss': 0.2623489499092102, 'eval_f1': 0.7029099842139237, 'eval_precision': 0.7615708152574513, 'eval_recall': 0.6700385537559629, 'eval_runtime': 0.6503, 'eval_samples_per_second': 399.8, 'eval_steps_per_second': 50.744, 'epoch': 14.0}


 75%|███████▌  | 1950/2600 [02:59<00:47, 13.74it/s]
 75%|███████▌  | 1950/2600 [03:00<00:47, 13.74it/s]

{'eval_loss': 0.25740814208984375, 'eval_f1': 0.7259121610123226, 'eval_precision': 0.7928558377825945, 'eval_recall': 0.6854714587485898, 'eval_runtime': 0.6431, 'eval_samples_per_second': 404.266, 'eval_steps_per_second': 51.311, 'epoch': 15.0}


 80%|████████  | 2080/2600 [03:11<00:38, 13.59it/s]
 80%|████████  | 2080/2600 [03:12<00:38, 13.59it/s]

{'eval_loss': 0.2634448707103729, 'eval_f1': 0.7273200633400293, 'eval_precision': 0.8083555233168385, 'eval_recall': 0.6769000301771613, 'eval_runtime': 0.6553, 'eval_samples_per_second': 396.759, 'eval_steps_per_second': 50.358, 'epoch': 16.0}


 85%|████████▌ | 2210/2600 [03:23<00:28, 13.56it/s]
 85%|████████▌ | 2210/2600 [03:24<00:28, 13.56it/s]

{'eval_loss': 0.26483967900276184, 'eval_f1': 0.7167550595976613, 'eval_precision': 0.7836505324095846, 'eval_recall': 0.6755138915632999, 'eval_runtime': 0.6475, 'eval_samples_per_second': 401.543, 'eval_steps_per_second': 50.965, 'epoch': 17.0}


 90%|█████████ | 2340/2600 [03:35<00:19, 13.52it/s]
 90%|█████████ | 2340/2600 [03:36<00:19, 13.52it/s]

{'eval_loss': 0.26663389801979065, 'eval_f1': 0.7252968723947933, 'eval_precision': 0.8051044331188043, 'eval_recall': 0.6755138915632999, 'eval_runtime': 0.6645, 'eval_samples_per_second': 391.254, 'eval_steps_per_second': 49.659, 'epoch': 18.0}


 95%|█████████▌| 2470/2600 [03:47<00:09, 13.54it/s]
 95%|█████████▌| 2470/2600 [03:48<00:09, 13.54it/s]

{'eval_loss': 0.267433762550354, 'eval_f1': 0.7192717270561312, 'eval_precision': 0.7917288108995189, 'eval_recall': 0.6726567487061571, 'eval_runtime': 0.6545, 'eval_samples_per_second': 397.271, 'eval_steps_per_second': 50.423, 'epoch': 19.0}


100%|██████████| 2600/2600 [03:59<00:00, 13.30it/s]
100%|██████████| 2600/2600 [04:01<00:00, 13.30it/s]

{'eval_loss': 0.2669370174407959, 'eval_f1': 0.7173761776100837, 'eval_precision': 0.7880156117746638, 'eval_recall': 0.6724743778854884, 'eval_runtime': 0.6423, 'eval_samples_per_second': 404.786, 'eval_steps_per_second': 51.377, 'epoch': 20.0}


100%|██████████| 2600/2600 [04:03<00:00, 10.67it/s]


{'train_runtime': 243.7395, 'train_samples_per_second': 85.173, 'train_steps_per_second': 10.667, 'train_loss': 0.0636680896465595, 'epoch': 20.0}


100%|██████████| 33/33 [00:00<00:00, 45.10it/s]


------------------ Starting model ==> epochs: 20, batch size: 8, weights of decay: 0.001 ---------------------


0,1
eval/f1,▁▅▆▆▇▇▇▇█████████████
eval/loss,█▂▂▂▁▃▄▃▃▃▃▅▃▅▄▅▅▅▅▅▅
eval/precision,▁▆▆▇▇▇███████████████
eval/recall,▁▄▆▆▇▇▇▇█████████████
eval/runtime,▄▁▃▂▂▁▃▂▃▂▁▂▂▂▂▂▂▃▂▂█
eval/samples_per_second,▅█▆▇▇█▅▆▆▇▇▇▇▇▇▆▇▆▆▇▁
eval/steps_per_second,▅█▆▇▇█▅▆▆▇▇▇▇▇▇▆▇▆▆▇▁
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████

0,1
eval/f1,0.72732
eval/loss,0.26344
eval/precision,0.80836
eval/recall,0.6769
eval/runtime,0.7729
eval/samples_per_second,336.384
eval/steps_per_second,42.695
total_flos,1365607680768000.0
train/epoch,20.0
train/global_step,2600.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20084.54 examples/s]
  5%|▍         | 761/15240 [00:56<17:48, 13.56it/s]
  5%|▌         | 762/15240 [01:00<17:47, 13.56it/s]

{'eval_loss': 0.10241478681564331, 'eval_f1': 0.6950769235501346, 'eval_precision': 0.9695650063725971, 'eval_recall': 0.668518170525747, 'eval_runtime': 3.6771, 'eval_samples_per_second': 414.187, 'eval_steps_per_second': 51.943, 'epoch': 1.0}


 10%|▉         | 1523/15240 [01:59<17:12, 13.29it/s] 
 10%|█         | 1524/15240 [02:03<17:12, 13.29it/s]

{'eval_loss': 0.10158085823059082, 'eval_f1': 0.7571013993658889, 'eval_precision': 0.8703741486900997, 'eval_recall': 0.7246036207653805, 'eval_runtime': 3.8537, 'eval_samples_per_second': 395.201, 'eval_steps_per_second': 49.562, 'epoch': 2.0}


 15%|█▍        | 2285/15240 [03:02<16:03, 13.45it/s]  
 15%|█▌        | 2286/15240 [03:05<16:03, 13.45it/s]

{'eval_loss': 0.09736205637454987, 'eval_f1': 0.8017752028452367, 'eval_precision': 0.8588597941406715, 'eval_recall': 0.7689295733273965, 'eval_runtime': 3.7338, 'eval_samples_per_second': 407.894, 'eval_steps_per_second': 51.154, 'epoch': 3.0}


 20%|█▉        | 3047/15240 [04:04<15:05, 13.47it/s]  
 20%|██        | 3048/15240 [04:08<15:05, 13.47it/s]

{'eval_loss': 0.09321895986795425, 'eval_f1': 0.8521243600475764, 'eval_precision': 0.903784491092714, 'eval_recall': 0.813900441877674, 'eval_runtime': 3.7409, 'eval_samples_per_second': 407.123, 'eval_steps_per_second': 51.057, 'epoch': 4.0}


 25%|██▍       | 3809/15240 [05:07<14:17, 13.33it/s]  
 25%|██▌       | 3810/15240 [05:11<14:17, 13.33it/s]

{'eval_loss': 0.10162515938282013, 'eval_f1': 0.8534975307920669, 'eval_precision': 0.8854786158816529, 'eval_recall': 0.8283407174759596, 'eval_runtime': 3.7316, 'eval_samples_per_second': 408.135, 'eval_steps_per_second': 51.184, 'epoch': 5.0}


 30%|██▉       | 4571/15240 [06:09<13:14, 13.44it/s]  
 30%|███       | 4572/15240 [06:13<13:13, 13.44it/s]

{'eval_loss': 0.09715622663497925, 'eval_f1': 0.8772273787579455, 'eval_precision': 0.9019466863057204, 'eval_recall': 0.8572224632048104, 'eval_runtime': 3.6655, 'eval_samples_per_second': 415.493, 'eval_steps_per_second': 52.107, 'epoch': 6.0}


 35%|███▍      | 5333/15240 [07:12<12:37, 13.09it/s]  
 35%|███▌      | 5334/15240 [07:16<12:36, 13.09it/s]

{'eval_loss': 0.10592883825302124, 'eval_f1': 0.8721314196914921, 'eval_precision': 0.9082666336828442, 'eval_recall': 0.8460561671222594, 'eval_runtime': 3.7102, 'eval_samples_per_second': 410.494, 'eval_steps_per_second': 51.48, 'epoch': 7.0}


 40%|███▉      | 6095/15240 [08:15<11:19, 13.46it/s]  
 40%|████      | 6096/15240 [08:18<11:19, 13.46it/s]

{'eval_loss': 0.12334678322076797, 'eval_f1': 0.8517447187604333, 'eval_precision': 0.8695055056606261, 'eval_recall': 0.8381064618589376, 'eval_runtime': 3.6805, 'eval_samples_per_second': 413.807, 'eval_steps_per_second': 51.896, 'epoch': 8.0}


 45%|████▍     | 6857/15240 [09:17<10:36, 13.17it/s]  
 45%|████▌     | 6858/15240 [09:21<10:36, 13.17it/s]

{'eval_loss': 0.13233448565006256, 'eval_f1': 0.8578425722958267, 'eval_precision': 0.9032007451444141, 'eval_recall': 0.8251422750778558, 'eval_runtime': 3.7577, 'eval_samples_per_second': 405.303, 'eval_steps_per_second': 50.829, 'epoch': 9.0}


 50%|████▉     | 7619/15240 [10:20<09:41, 13.10it/s]  
 50%|█████     | 7620/15240 [10:24<09:41, 13.10it/s]

{'eval_loss': 0.12338266521692276, 'eval_f1': 0.8695752541484186, 'eval_precision': 0.8660244788566185, 'eval_recall': 0.8756092721652402, 'eval_runtime': 3.6775, 'eval_samples_per_second': 414.145, 'eval_steps_per_second': 51.938, 'epoch': 10.0}


 55%|█████▍    | 8381/15240 [11:23<08:54, 12.83it/s]  
 55%|█████▌    | 8382/15240 [11:26<08:54, 12.83it/s]

{'eval_loss': 0.11965570598840714, 'eval_f1': 0.8748433190615658, 'eval_precision': 0.8862016047229567, 'eval_recall': 0.864755821721766, 'eval_runtime': 3.7658, 'eval_samples_per_second': 404.433, 'eval_steps_per_second': 50.72, 'epoch': 11.0}


 60%|█████▉    | 9143/15240 [12:26<07:36, 13.35it/s]  
 60%|██████    | 9144/15240 [12:30<07:36, 13.35it/s]

{'eval_loss': 0.12427908182144165, 'eval_f1': 0.8756561915339552, 'eval_precision': 0.9071055237867137, 'eval_recall': 0.8515064508084141, 'eval_runtime': 3.6999, 'eval_samples_per_second': 411.633, 'eval_steps_per_second': 51.623, 'epoch': 12.0}


 65%|██████▍   | 9905/15240 [13:29<06:44, 13.20it/s]  
 65%|██████▌   | 9906/15240 [13:33<06:44, 13.20it/s]

{'eval_loss': 0.1307927370071411, 'eval_f1': 0.8761965594116293, 'eval_precision': 0.9132637199119145, 'eval_recall': 0.8518223311754454, 'eval_runtime': 3.8413, 'eval_samples_per_second': 396.483, 'eval_steps_per_second': 49.723, 'epoch': 13.0}


 66%|██████▌   | 10001/15240 [13:41<06:31, 13.37it/s] 

{'loss': 0.0351, 'grad_norm': 0.005065578036010265, 'learning_rate': 1.7191601049868766e-05, 'epoch': 13.12}


 70%|██████▉   | 10667/15240 [14:31<05:40, 13.42it/s]
 70%|███████   | 10668/15240 [14:35<05:40, 13.42it/s]

{'eval_loss': 0.11781496554613113, 'eval_f1': 0.886510451170908, 'eval_precision': 0.9293483350263381, 'eval_recall': 0.857692089005667, 'eval_runtime': 3.6824, 'eval_samples_per_second': 413.587, 'eval_steps_per_second': 51.868, 'epoch': 14.0}


 75%|███████▍  | 11429/15240 [15:33<04:43, 13.46it/s]  
 75%|███████▌  | 11430/15240 [15:37<04:43, 13.46it/s]

{'eval_loss': 0.12194004654884338, 'eval_f1': 0.8815286849685388, 'eval_precision': 0.90053905097723, 'eval_recall': 0.8663354589435778, 'eval_runtime': 3.7924, 'eval_samples_per_second': 401.596, 'eval_steps_per_second': 50.364, 'epoch': 15.0}


 80%|███████▉  | 12191/15240 [16:36<03:47, 13.39it/s]
 80%|████████  | 12192/15240 [16:40<03:47, 13.39it/s]

{'eval_loss': 0.13999466598033905, 'eval_f1': 0.8810279748548719, 'eval_precision': 0.9136443929484671, 'eval_recall': 0.8580489846724486, 'eval_runtime': 3.7305, 'eval_samples_per_second': 408.257, 'eval_steps_per_second': 51.2, 'epoch': 16.0}


 85%|████████▍ | 12953/15240 [17:38<02:49, 13.52it/s]
 85%|████████▌ | 12954/15240 [17:42<02:49, 13.52it/s]

{'eval_loss': 0.13442493975162506, 'eval_f1': 0.8813514098758477, 'eval_precision': 0.9036536492323848, 'eval_recall': 0.8624039683286265, 'eval_runtime': 3.7595, 'eval_samples_per_second': 405.106, 'eval_steps_per_second': 50.804, 'epoch': 17.0}


 90%|████████▉ | 13715/15240 [18:41<01:53, 13.45it/s]
 90%|█████████ | 13716/15240 [18:45<01:53, 13.45it/s]

{'eval_loss': 0.13662000000476837, 'eval_f1': 0.8827923631951622, 'eval_precision': 0.9125449152477414, 'eval_recall': 0.8596564162185378, 'eval_runtime': 3.7144, 'eval_samples_per_second': 410.023, 'eval_steps_per_second': 51.421, 'epoch': 18.0}


 95%|█████████▍| 14477/15240 [19:43<00:56, 13.46it/s]
 95%|█████████▌| 14478/15240 [19:47<00:56, 13.46it/s]

{'eval_loss': 0.13707999885082245, 'eval_f1': 0.8855046932686524, 'eval_precision': 0.9163723362703461, 'eval_recall': 0.8619057776971745, 'eval_runtime': 3.7033, 'eval_samples_per_second': 411.254, 'eval_steps_per_second': 51.575, 'epoch': 19.0}


100%|█████████▉| 15239/15240 [20:45<00:00, 13.41it/s]
100%|██████████| 15240/15240 [20:51<00:00, 13.41it/s]

{'eval_loss': 0.138478085398674, 'eval_f1': 0.8807892908341465, 'eval_precision': 0.9139254320603437, 'eval_recall': 0.8546296406977522, 'eval_runtime': 3.7027, 'eval_samples_per_second': 411.323, 'eval_steps_per_second': 51.584, 'epoch': 20.0}


100%|██████████| 15240/15240 [20:53<00:00, 12.16it/s]


{'train_runtime': 1253.2361, 'train_samples_per_second': 97.204, 'train_steps_per_second': 12.161, 'train_loss': 0.023589036345794758, 'epoch': 20.0}


100%|██████████| 191/191 [00:03<00:00, 51.77it/s]


0,1
eval/f1,▁▃▅▇▇█▇▇▇▇███████████
eval/loss,▂▂▂▁▂▂▃▆▇▆▅▆▇▅▅█▇▇██▅
eval/precision,█▂▁▄▃▄▄▂▄▁▃▄▄▅▄▄▄▄▅▄▅
eval/recall,▁▃▄▆▆▇▇▇▆██▇▇▇█▇█▇█▇▇
eval/runtime,▁█▄▄▃▁▃▂▄▁▅▂█▂▆▃▄▃▂▂▃
eval/samples_per_second,█▁▅▅▅█▆▇▄█▄▇▁▇▃▆▄▆▇▇▆
eval/steps_per_second,█▁▅▅▅█▆▇▄█▄▇▁▇▃▆▄▆▇▇▆
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▅▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▅▆▆▇▇▇████
train/grad_norm,▁

0,1
eval/f1,0.88651
eval/loss,0.11781
eval/precision,0.92935
eval/recall,0.85769
eval/runtime,3.7286
eval/samples_per_second,408.46
eval/steps_per_second,51.225
total_flos,8013406920576000.0
train/epoch,20.0
train/global_step,15240.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 17202.34 examples/s]
  5%|▌         | 189/3780 [00:13<04:08, 14.46it/s]
  5%|▌         | 189/3780 [00:14<04:08, 14.46it/s]

{'eval_loss': 0.33706367015838623, 'eval_f1': 0.4613946154964128, 'eval_precision': 0.5153256704980842, 'eval_recall': 0.4328257670100758, 'eval_runtime': 0.9365, 'eval_samples_per_second': 402.567, 'eval_steps_per_second': 51.255, 'epoch': 1.0}


 10%|▉         | 377/3780 [00:30<04:05, 13.85it/s]
 10%|█         | 378/3780 [00:31<04:05, 13.85it/s]

{'eval_loss': 0.3130533993244171, 'eval_f1': 0.540423167568564, 'eval_precision': 0.6438770785277804, 'eval_recall': 0.4757897139678774, 'eval_runtime': 0.9058, 'eval_samples_per_second': 416.185, 'eval_steps_per_second': 52.989, 'epoch': 2.0}


 15%|█▌        | 567/3780 [00:46<03:45, 14.28it/s]
 15%|█▌        | 567/3780 [00:47<03:45, 14.28it/s]

{'eval_loss': 0.30680373311042786, 'eval_f1': 0.6418775609972374, 'eval_precision': 0.8461430618293363, 'eval_recall': 0.587175774709886, 'eval_runtime': 0.9531, 'eval_samples_per_second': 395.566, 'eval_steps_per_second': 50.364, 'epoch': 3.0}


 20%|█▉        | 755/3780 [01:02<03:42, 13.60it/s]
 20%|██        | 756/3780 [01:03<03:42, 13.60it/s]

{'eval_loss': 0.36004766821861267, 'eval_f1': 0.6739880351117276, 'eval_precision': 0.7242654192654192, 'eval_recall': 0.645341530525352, 'eval_runtime': 0.9141, 'eval_samples_per_second': 412.419, 'eval_steps_per_second': 52.51, 'epoch': 4.0}


 25%|██▌       | 945/3780 [01:19<03:24, 13.90it/s]
 25%|██▌       | 945/3780 [01:20<03:24, 13.90it/s]

{'eval_loss': 0.3338938057422638, 'eval_f1': 0.7150521057889478, 'eval_precision': 0.7691975189762801, 'eval_recall': 0.6770688223556542, 'eval_runtime': 0.9226, 'eval_samples_per_second': 408.627, 'eval_steps_per_second': 52.027, 'epoch': 5.0}


 30%|██▉       | 1133/3780 [01:37<03:13, 13.67it/s]
 30%|███       | 1134/3780 [01:38<03:13, 13.67it/s]

{'eval_loss': 0.3551078140735626, 'eval_f1': 0.7288386575596778, 'eval_precision': 0.7726689342403629, 'eval_recall': 0.7040661806293406, 'eval_runtime': 0.9433, 'eval_samples_per_second': 399.662, 'eval_steps_per_second': 50.885, 'epoch': 6.0}


 35%|███▌      | 1323/3780 [01:53<02:52, 14.24it/s]
 35%|███▌      | 1323/3780 [01:54<02:52, 14.24it/s]

{'eval_loss': 0.4167894423007965, 'eval_f1': 0.7199634653417906, 'eval_precision': 0.7441544180936086, 'eval_recall': 0.708174889524719, 'eval_runtime': 0.9276, 'eval_samples_per_second': 406.412, 'eval_steps_per_second': 51.745, 'epoch': 7.0}


 40%|███▉      | 1511/3780 [02:10<02:48, 13.45it/s]
 40%|████      | 1512/3780 [02:11<02:48, 13.45it/s]

{'eval_loss': 0.4174942374229431, 'eval_f1': 0.72791324683192, 'eval_precision': 0.7496386531089492, 'eval_recall': 0.719473702746078, 'eval_runtime': 0.9189, 'eval_samples_per_second': 410.267, 'eval_steps_per_second': 52.236, 'epoch': 8.0}


 45%|████▌     | 1701/3780 [02:26<02:30, 13.83it/s]
 45%|████▌     | 1701/3780 [02:27<02:30, 13.83it/s]

{'eval_loss': 0.4380316734313965, 'eval_f1': 0.731202297314988, 'eval_precision': 0.7338150667674477, 'eval_recall': 0.7376158938612238, 'eval_runtime': 0.9234, 'eval_samples_per_second': 408.291, 'eval_steps_per_second': 51.984, 'epoch': 9.0}


 50%|████▉     | 1889/3780 [02:42<02:21, 13.37it/s]
 50%|█████     | 1890/3780 [02:43<02:21, 13.37it/s]

{'eval_loss': 0.4151136577129364, 'eval_f1': 0.7450098555244689, 'eval_precision': 0.7640900900900901, 'eval_recall': 0.7304335057528246, 'eval_runtime': 0.929, 'eval_samples_per_second': 405.802, 'eval_steps_per_second': 51.667, 'epoch': 10.0}


 55%|█████▍    | 2078/3780 [03:00<02:11, 12.92it/s]
 55%|█████▌    | 2079/3780 [03:01<02:11, 12.92it/s]

{'eval_loss': 0.4588054120540619, 'eval_f1': 0.7360109114669429, 'eval_precision': 0.7322644230583572, 'eval_recall': 0.752069030965425, 'eval_runtime': 0.9272, 'eval_samples_per_second': 406.613, 'eval_steps_per_second': 51.77, 'epoch': 11.0}


 60%|█████▉    | 2267/3780 [03:17<01:51, 13.51it/s]
 60%|██████    | 2268/3780 [03:18<01:51, 13.51it/s]

{'eval_loss': 0.442125141620636, 'eval_f1': 0.7605529509308474, 'eval_precision': 0.7668108497317422, 'eval_recall': 0.7574985625849235, 'eval_runtime': 0.9223, 'eval_samples_per_second': 408.781, 'eval_steps_per_second': 52.046, 'epoch': 12.0}


 65%|██████▌   | 2457/3780 [03:33<01:36, 13.73it/s]
 65%|██████▌   | 2457/3780 [03:34<01:36, 13.73it/s]

{'eval_loss': 0.475790411233902, 'eval_f1': 0.7423241485649127, 'eval_precision': 0.7619030552430468, 'eval_recall': 0.729549797261774, 'eval_runtime': 0.9384, 'eval_samples_per_second': 401.749, 'eval_steps_per_second': 51.151, 'epoch': 13.0}


 70%|██████▉   | 2645/3780 [03:50<01:26, 13.07it/s]
 70%|███████   | 2646/3780 [03:51<01:26, 13.07it/s]

{'eval_loss': 0.4786747694015503, 'eval_f1': 0.7560404853996306, 'eval_precision': 0.7685367384696711, 'eval_recall': 0.7510130072879816, 'eval_runtime': 0.9773, 'eval_samples_per_second': 385.769, 'eval_steps_per_second': 49.116, 'epoch': 14.0}


 75%|███████▌  | 2835/3780 [04:07<01:09, 13.60it/s]
 75%|███████▌  | 2835/3780 [04:08<01:09, 13.60it/s]

{'eval_loss': 0.49126890301704407, 'eval_f1': 0.7439778809997788, 'eval_precision': 0.7588337373170821, 'eval_recall': 0.7360013937007819, 'eval_runtime': 0.9507, 'eval_samples_per_second': 396.546, 'eval_steps_per_second': 50.489, 'epoch': 15.0}


 80%|███████▉  | 3023/3780 [04:25<00:57, 13.18it/s]
 80%|████████  | 3024/3780 [04:26<00:57, 13.18it/s]

{'eval_loss': 0.5088483095169067, 'eval_f1': 0.744391367860999, 'eval_precision': 0.7518091590478544, 'eval_recall': 0.7421913401630766, 'eval_runtime': 0.9404, 'eval_samples_per_second': 400.876, 'eval_steps_per_second': 51.04, 'epoch': 16.0}


 85%|████████▌ | 3213/3780 [04:42<00:43, 13.15it/s]
 85%|████████▌ | 3213/3780 [04:42<00:43, 13.15it/s]

{'eval_loss': 0.50049889087677, 'eval_f1': 0.749583145507785, 'eval_precision': 0.765085704419753, 'eval_recall': 0.7376591786306714, 'eval_runtime': 0.9697, 'eval_samples_per_second': 388.797, 'eval_steps_per_second': 49.502, 'epoch': 17.0}


 90%|████████▉ | 3401/3780 [04:58<00:28, 13.42it/s]
 90%|█████████ | 3402/3780 [04:59<00:28, 13.42it/s]

{'eval_loss': 0.523382842540741, 'eval_f1': 0.7419540503923129, 'eval_precision': 0.747397297079864, 'eval_recall': 0.7445498347646542, 'eval_runtime': 0.9507, 'eval_samples_per_second': 396.552, 'eval_steps_per_second': 50.489, 'epoch': 18.0}


 95%|█████████▌| 3591/3780 [05:15<00:13, 13.69it/s]
 95%|█████████▌| 3591/3780 [05:16<00:13, 13.69it/s]

{'eval_loss': 0.5099475383758545, 'eval_f1': 0.7495300400110563, 'eval_precision': 0.7629179583080621, 'eval_recall': 0.7411045066976628, 'eval_runtime': 0.9372, 'eval_samples_per_second': 402.273, 'eval_steps_per_second': 51.218, 'epoch': 19.0}


100%|█████████▉| 3779/3780 [05:32<00:00, 13.31it/s]
100%|██████████| 3780/3780 [05:34<00:00, 13.31it/s]

{'eval_loss': 0.511868417263031, 'eval_f1': 0.7550215715759243, 'eval_precision': 0.7654711771645801, 'eval_recall': 0.7496413341743354, 'eval_runtime': 0.9197, 'eval_samples_per_second': 409.895, 'eval_steps_per_second': 52.188, 'epoch': 20.0}


100%|██████████| 3780/3780 [05:36<00:00, 11.23it/s]


{'train_runtime': 336.4615, 'train_samples_per_second': 89.579, 'train_steps_per_second': 11.235, 'train_loss': 0.07386933987733549, 'epoch': 20.0}


100%|██████████| 48/48 [00:01<00:00, 47.50it/s]


0,1
eval/f1,▁▃▅▆▇▇▇▇▇█▇██████████
eval/loss,▂▁▁▃▂▃▅▅▅▅▆▅▆▇▇█▇███▅
eval/precision,▁▄█▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
eval/recall,▁▂▄▆▆▇▇▇█▇██▇████████
eval/runtime,▂▁▃▁▂▃▂▂▂▂▂▂▃▄▃▃▄▃▂▂█
eval/samples_per_second,▆█▆█▇▆▇▇▇▇▇▇▆▄▆▆▅▆▆▇▁
eval/steps_per_second,▆█▆█▇▆▇▇▇▇▇▇▆▄▆▆▅▆▆▇▁
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████

0,1
eval/f1,0.76055
eval/loss,0.44213
eval/precision,0.76681
eval/recall,0.7575
eval/runtime,1.0549
eval/samples_per_second,357.381
eval/steps_per_second,45.502
total_flos,1982595203466240.0
train/epoch,20.0
train/global_step,3780.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14824.45 examples/s]
  5%|▍         | 129/2600 [00:09<02:59, 13.80it/s]
  5%|▌         | 130/2600 [00:10<02:59, 13.80it/s]

{'eval_loss': 0.2977910041809082, 'eval_f1': 0.12244897959183673, 'eval_precision': 0.12631578947368421, 'eval_recall': 0.1188118811881188, 'eval_runtime': 0.6211, 'eval_samples_per_second': 418.635, 'eval_steps_per_second': 53.134, 'epoch': 1.0}


 10%|▉         | 259/2600 [00:21<02:56, 13.26it/s]
 10%|█         | 260/2600 [00:22<02:56, 13.26it/s]

{'eval_loss': 0.2337455153465271, 'eval_f1': 0.44538530706563495, 'eval_precision': 0.6018249405025168, 'eval_recall': 0.36988338160095235, 'eval_runtime': 0.6646, 'eval_samples_per_second': 391.212, 'eval_steps_per_second': 49.654, 'epoch': 2.0}


 15%|█▍        | 389/2600 [00:33<02:41, 13.70it/s]
 15%|█▌        | 390/2600 [00:33<02:41, 13.70it/s]

{'eval_loss': 0.2353605180978775, 'eval_f1': 0.52694282753762, 'eval_precision': 0.5637832405689548, 'eval_recall': 0.5006429568157355, 'eval_runtime': 0.6435, 'eval_samples_per_second': 404.058, 'eval_steps_per_second': 51.284, 'epoch': 3.0}


 20%|█▉        | 519/2600 [00:44<02:33, 13.53it/s]
 20%|██        | 520/2600 [00:45<02:33, 13.53it/s]

{'eval_loss': 0.2217971384525299, 'eval_f1': 0.5833826596868495, 'eval_precision': 0.8643712476387053, 'eval_recall': 0.5360520677841947, 'eval_runtime': 0.6606, 'eval_samples_per_second': 393.563, 'eval_steps_per_second': 49.952, 'epoch': 4.0}


 25%|██▍       | 649/2600 [00:56<02:24, 13.50it/s]
 25%|██▌       | 650/2600 [00:57<02:24, 13.50it/s]

{'eval_loss': 0.23374953866004944, 'eval_f1': 0.628184001447182, 'eval_precision': 0.7512015634135449, 'eval_recall': 0.5719223183602269, 'eval_runtime': 0.6316, 'eval_samples_per_second': 411.635, 'eval_steps_per_second': 52.246, 'epoch': 5.0}


 30%|███       | 780/2600 [01:09<02:16, 13.30it/s]
 30%|███       | 780/2600 [01:10<02:16, 13.30it/s]

{'eval_loss': 0.22713860869407654, 'eval_f1': 0.6474653322867608, 'eval_precision': 0.7629737209569142, 'eval_recall': 0.5829220791905733, 'eval_runtime': 0.661, 'eval_samples_per_second': 393.331, 'eval_steps_per_second': 49.923, 'epoch': 6.0}


 35%|███▌      | 910/2600 [01:21<02:01, 13.95it/s]
 35%|███▌      | 910/2600 [01:22<02:01, 13.95it/s]

{'eval_loss': 0.23116470873355865, 'eval_f1': 0.6943859345631138, 'eval_precision': 0.7876696270510705, 'eval_recall': 0.6364350145278028, 'eval_runtime': 0.6388, 'eval_samples_per_second': 407.036, 'eval_steps_per_second': 51.662, 'epoch': 7.0}


 40%|████      | 1040/2600 [01:34<01:57, 13.33it/s]
 40%|████      | 1040/2600 [01:35<01:57, 13.33it/s]

{'eval_loss': 0.24788565933704376, 'eval_f1': 0.688791761329472, 'eval_precision': 0.7844369964891857, 'eval_recall': 0.6334691916525793, 'eval_runtime': 0.6551, 'eval_samples_per_second': 396.89, 'eval_steps_per_second': 50.374, 'epoch': 8.0}


 45%|████▌     | 1170/2600 [01:46<01:44, 13.63it/s]
 45%|████▌     | 1170/2600 [01:46<01:44, 13.63it/s]

{'eval_loss': 0.2534095048904419, 'eval_f1': 0.6798300246972383, 'eval_precision': 0.7552250319194211, 'eval_recall': 0.6387761769526588, 'eval_runtime': 0.6375, 'eval_samples_per_second': 407.819, 'eval_steps_per_second': 51.762, 'epoch': 9.0}


 50%|█████     | 1300/2600 [01:58<01:36, 13.41it/s]
 50%|█████     | 1300/2600 [01:58<01:36, 13.41it/s]

{'eval_loss': 0.2539650499820709, 'eval_f1': 0.7232660094350424, 'eval_precision': 0.7903906801596327, 'eval_recall': 0.6762261351715078, 'eval_runtime': 0.6591, 'eval_samples_per_second': 394.458, 'eval_steps_per_second': 50.066, 'epoch': 10.0}


 55%|█████▌    | 1430/2600 [02:10<01:25, 13.74it/s]
 55%|█████▌    | 1430/2600 [02:10<01:25, 13.74it/s]

{'eval_loss': 0.2670820951461792, 'eval_f1': 0.7089386843387474, 'eval_precision': 0.7699859906127402, 'eval_recall': 0.6658511328427904, 'eval_runtime': 0.6315, 'eval_samples_per_second': 411.747, 'eval_steps_per_second': 52.26, 'epoch': 11.0}


 60%|██████    | 1560/2600 [02:22<01:16, 13.63it/s]
 60%|██████    | 1560/2600 [02:23<01:16, 13.63it/s]

{'eval_loss': 0.2789032459259033, 'eval_f1': 0.7045702663349722, 'eval_precision': 0.7740482135103282, 'eval_recall': 0.6584889350282926, 'eval_runtime': 0.6509, 'eval_samples_per_second': 399.436, 'eval_steps_per_second': 50.698, 'epoch': 12.0}


 65%|██████▌   | 1690/2600 [02:34<01:06, 13.63it/s]
 65%|██████▌   | 1690/2600 [02:35<01:06, 13.63it/s]

{'eval_loss': 0.27644145488739014, 'eval_f1': 0.7345874347009973, 'eval_precision': 0.8163785925777509, 'eval_recall': 0.6785107717984359, 'eval_runtime': 0.6499, 'eval_samples_per_second': 400.054, 'eval_steps_per_second': 50.776, 'epoch': 13.0}


 70%|███████   | 1820/2600 [02:46<00:57, 13.63it/s]
 70%|███████   | 1820/2600 [02:47<00:57, 13.63it/s]

{'eval_loss': 0.276349812746048, 'eval_f1': 0.7069468228975511, 'eval_precision': 0.7838257502731187, 'eval_recall': 0.6594697304819724, 'eval_runtime': 0.6468, 'eval_samples_per_second': 401.949, 'eval_steps_per_second': 51.017, 'epoch': 14.0}


 75%|███████▌  | 1950/2600 [03:00<00:47, 13.72it/s]
 75%|███████▌  | 1950/2600 [03:00<00:47, 13.72it/s]

{'eval_loss': 0.29612550139427185, 'eval_f1': 0.7172506958561918, 'eval_precision': 0.7981130622699005, 'eval_recall': 0.6625536545930115, 'eval_runtime': 0.6541, 'eval_samples_per_second': 397.501, 'eval_steps_per_second': 50.452, 'epoch': 15.0}


 80%|████████  | 2080/2600 [03:12<00:37, 13.73it/s]
 80%|████████  | 2080/2600 [03:12<00:37, 13.73it/s]

{'eval_loss': 0.291095495223999, 'eval_f1': 0.7262221687442386, 'eval_precision': 0.8023050887021475, 'eval_recall': 0.6736599918647623, 'eval_runtime': 0.6501, 'eval_samples_per_second': 399.92, 'eval_steps_per_second': 50.759, 'epoch': 16.0}


 85%|████████▌ | 2210/2600 [03:23<00:28, 13.63it/s]
 85%|████████▌ | 2210/2600 [03:24<00:28, 13.63it/s]

{'eval_loss': 0.2970832586288452, 'eval_f1': 0.7195359237185878, 'eval_precision': 0.7780124170880472, 'eval_recall': 0.6765171347219051, 'eval_runtime': 0.6619, 'eval_samples_per_second': 392.837, 'eval_steps_per_second': 49.86, 'epoch': 17.0}


 90%|█████████ | 2340/2600 [03:35<00:19, 13.60it/s]
 90%|█████████ | 2340/2600 [03:36<00:19, 13.60it/s]

{'eval_loss': 0.29760926961898804, 'eval_f1': 0.7251137701638463, 'eval_precision': 0.799745446565723, 'eval_recall': 0.6733019539127394, 'eval_runtime': 0.6564, 'eval_samples_per_second': 396.077, 'eval_steps_per_second': 50.271, 'epoch': 18.0}


 95%|█████████▌| 2470/2600 [03:48<00:09, 13.61it/s]
 95%|█████████▌| 2470/2600 [03:48<00:09, 13.61it/s]

{'eval_loss': 0.2984340488910675, 'eval_f1': 0.7252811111432214, 'eval_precision': 0.7950192198091358, 'eval_recall': 0.67706135240898, 'eval_runtime': 0.6655, 'eval_samples_per_second': 390.691, 'eval_steps_per_second': 49.588, 'epoch': 19.0}


100%|██████████| 2600/2600 [04:00<00:00, 13.69it/s]
100%|██████████| 2600/2600 [04:02<00:00, 13.69it/s]

{'eval_loss': 0.2993040084838867, 'eval_f1': 0.7252811111432214, 'eval_precision': 0.7950192198091358, 'eval_recall': 0.67706135240898, 'eval_runtime': 0.6421, 'eval_samples_per_second': 404.898, 'eval_steps_per_second': 51.391, 'epoch': 20.0}


100%|██████████| 2600/2600 [04:04<00:00, 10.65it/s]


{'train_runtime': 244.1652, 'train_samples_per_second': 85.024, 'train_steps_per_second': 10.649, 'train_loss': 0.06484808114858774, 'epoch': 20.0}


100%|██████████| 33/33 [00:00<00:00, 46.92it/s]


------------------ Starting model ==> epochs: 25, batch size: 4, weights of decay: 0.01 ---------------------


0,1
eval/f1,▁▅▆▆▇▇█▇▇████████████
eval/loss,█▂▂▁▂▁▂▃▄▄▅▆▆▆█▇████▆
eval/precision,▁▆▅█▇▇▇▇▇▇▇▇█▇▇▇▇▇▇▇█
eval/recall,▁▄▆▆▇▇▇▇█████████████
eval/runtime,▁▃▂▃▂▃▂▃▂▃▂▃▃▂▃▃▃▃▄▂█
eval/samples_per_second,█▅▇▅▇▅▇▆▇▆▇▆▆▆▆▆▅▆▅▇▁
eval/steps_per_second,█▅▇▅▇▅▇▆▇▆▇▆▆▆▆▆▅▆▅▇▁
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇████

0,1
eval/f1,0.73459
eval/loss,0.27644
eval/precision,0.81638
eval/recall,0.67851
eval/runtime,0.7439
eval/samples_per_second,349.532
eval/steps_per_second,44.364
total_flos,1365607680768000.0
train/epoch,20.0
train/global_step,2600.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 21368.42 examples/s]
  4%|▍         | 1523/38075 [01:34<39:59, 15.23it/s]
  4%|▍         | 1523/38075 [01:40<39:59, 15.23it/s]

{'eval_loss': 0.11835820972919464, 'eval_f1': 0.6006663534269971, 'eval_precision': 0.6845485468274984, 'eval_recall': 0.5618575517562114, 'eval_runtime': 6.4423, 'eval_samples_per_second': 236.407, 'eval_steps_per_second': 59.14, 'epoch': 1.0}


  8%|▊         | 3045/38075 [03:17<34:20, 17.00it/s]   
  8%|▊         | 3046/38075 [03:23<34:20, 17.00it/s]

{'eval_loss': 0.10390178114175797, 'eval_f1': 0.7147121648840439, 'eval_precision': 0.771649681863236, 'eval_recall': 0.6986124820115167, 'eval_runtime': 5.3151, 'eval_samples_per_second': 286.545, 'eval_steps_per_second': 71.683, 'epoch': 2.0}


 12%|█▏        | 4569/38075 [05:00<33:46, 16.53it/s]   
 12%|█▏        | 4569/38075 [05:05<33:46, 16.53it/s]

{'eval_loss': 0.10442852973937988, 'eval_f1': 0.8288191215994392, 'eval_precision': 0.9107390158081292, 'eval_recall': 0.78938123500168, 'eval_runtime': 5.4166, 'eval_samples_per_second': 281.173, 'eval_steps_per_second': 70.339, 'epoch': 3.0}


 16%|█▌        | 6091/38075 [06:43<35:32, 15.00it/s]   
 16%|█▌        | 6092/38075 [06:49<35:32, 15.00it/s]

{'eval_loss': 0.09618503600358963, 'eval_f1': 0.8552894308601413, 'eval_precision': 0.8941116858443546, 'eval_recall': 0.8310816078137195, 'eval_runtime': 6.4785, 'eval_samples_per_second': 235.086, 'eval_steps_per_second': 58.81, 'epoch': 4.0}


 20%|██        | 7615/38075 [08:26<29:44, 17.07it/s]   
 20%|██        | 7615/38075 [08:31<29:44, 17.07it/s]

{'eval_loss': 0.1104944497346878, 'eval_f1': 0.8366160804294775, 'eval_precision': 0.8538178159794806, 'eval_recall': 0.8221018624706874, 'eval_runtime': 5.4322, 'eval_samples_per_second': 280.363, 'eval_steps_per_second': 70.137, 'epoch': 5.0}


 24%|██▍       | 9137/38075 [10:08<28:44, 16.78it/s]  
 24%|██▍       | 9138/38075 [10:13<28:44, 16.78it/s]

{'eval_loss': 0.10024699568748474, 'eval_f1': 0.865799185118802, 'eval_precision': 0.8721620931017985, 'eval_recall': 0.8602979339494541, 'eval_runtime': 5.259, 'eval_samples_per_second': 289.601, 'eval_steps_per_second': 72.448, 'epoch': 6.0}


 26%|██▋       | 10003/38075 [11:09<29:41, 15.76it/s] 

{'loss': 0.0712, 'grad_norm': 0.018366245552897453, 'learning_rate': 3.6868023637557454e-05, 'epoch': 6.57}


 28%|██▊       | 10661/38075 [11:50<28:53, 15.81it/s]
 28%|██▊       | 10661/38075 [11:56<28:53, 15.81it/s]

{'eval_loss': 0.11304350942373276, 'eval_f1': 0.8639128846983294, 'eval_precision': 0.9043765035627702, 'eval_recall': 0.838940967223951, 'eval_runtime': 6.2497, 'eval_samples_per_second': 243.693, 'eval_steps_per_second': 60.963, 'epoch': 7.0}


 32%|███▏      | 12183/38075 [13:33<26:58, 16.00it/s]  
 32%|███▏      | 12184/38075 [13:38<26:58, 16.00it/s]

{'eval_loss': 0.11525370925664902, 'eval_f1': 0.8551527811483456, 'eval_precision': 0.8923597700261015, 'eval_recall': 0.8299196347885847, 'eval_runtime': 5.3887, 'eval_samples_per_second': 282.63, 'eval_steps_per_second': 70.704, 'epoch': 8.0}


 36%|███▌      | 13707/38075 [15:15<24:33, 16.54it/s]  
 36%|███▌      | 13707/38075 [15:21<24:33, 16.54it/s]

{'eval_loss': 0.12141165882349014, 'eval_f1': 0.8675977306320204, 'eval_precision': 0.8928191340530376, 'eval_recall': 0.8483488166467995, 'eval_runtime': 5.7224, 'eval_samples_per_second': 266.145, 'eval_steps_per_second': 66.58, 'epoch': 9.0}


 40%|███▉      | 15229/38075 [16:58<25:21, 15.02it/s]  
 40%|████      | 15230/38075 [17:05<25:21, 15.02it/s]

{'eval_loss': 0.13904878497123718, 'eval_f1': 0.8751562446704033, 'eval_precision': 0.89873421515257, 'eval_recall': 0.8579742180611362, 'eval_runtime': 6.5501, 'eval_samples_per_second': 232.514, 'eval_steps_per_second': 58.167, 'epoch': 10.0}


 44%|████▍     | 16753/38075 [18:42<22:17, 15.94it/s]  
 44%|████▍     | 16753/38075 [18:47<22:17, 15.94it/s]

{'eval_loss': 0.1353825479745865, 'eval_f1': 0.8618716046371088, 'eval_precision': 0.8836806327600183, 'eval_recall': 0.8455772031258062, 'eval_runtime': 5.7705, 'eval_samples_per_second': 263.927, 'eval_steps_per_second': 66.025, 'epoch': 11.0}


 48%|████▊     | 18275/38075 [20:24<19:56, 16.55it/s]  
 48%|████▊     | 18276/38075 [20:30<19:56, 16.55it/s]

{'eval_loss': 0.13291563093662262, 'eval_f1': 0.8676573446118427, 'eval_precision': 0.9097826362287235, 'eval_recall': 0.8410490606863893, 'eval_runtime': 5.3932, 'eval_samples_per_second': 282.39, 'eval_steps_per_second': 70.644, 'epoch': 12.0}


 52%|█████▏    | 19799/38075 [22:07<18:45, 16.24it/s]  
 52%|█████▏    | 19799/38075 [22:13<18:45, 16.24it/s]

{'eval_loss': 0.1531919538974762, 'eval_f1': 0.8608357726557403, 'eval_precision': 0.8601502725140715, 'eval_recall': 0.8626270145086368, 'eval_runtime': 6.4726, 'eval_samples_per_second': 235.299, 'eval_steps_per_second': 58.863, 'epoch': 13.0}


 53%|█████▎    | 20003/38075 [22:27<17:47, 16.92it/s]  

{'loss': 0.0172, 'grad_norm': 0.0017248104559257627, 'learning_rate': 2.3736047275114905e-05, 'epoch': 13.13}


 56%|█████▌    | 21321/38075 [23:50<16:44, 16.69it/s]
 56%|█████▌    | 21322/38075 [23:57<16:44, 16.69it/s]

{'eval_loss': 0.14761556684970856, 'eval_f1': 0.8649612106913508, 'eval_precision': 0.8621062459961918, 'eval_recall': 0.8714102460174803, 'eval_runtime': 6.4516, 'eval_samples_per_second': 236.066, 'eval_steps_per_second': 59.055, 'epoch': 14.0}


 60%|██████    | 22845/38075 [25:35<15:33, 16.31it/s]  
 60%|██████    | 22845/38075 [25:40<15:33, 16.31it/s]

{'eval_loss': 0.15071675181388855, 'eval_f1': 0.874672954414824, 'eval_precision': 0.8987106346922141, 'eval_recall': 0.8537620552581229, 'eval_runtime': 5.1578, 'eval_samples_per_second': 295.279, 'eval_steps_per_second': 73.868, 'epoch': 15.0}


 64%|██████▍   | 24367/38075 [27:18<13:59, 16.33it/s]  
 64%|██████▍   | 24368/38075 [27:23<13:59, 16.33it/s]

{'eval_loss': 0.15302154421806335, 'eval_f1': 0.8633168921520032, 'eval_precision': 0.8569464892937376, 'eval_recall': 0.8798567952635988, 'eval_runtime': 5.4615, 'eval_samples_per_second': 278.862, 'eval_steps_per_second': 69.761, 'epoch': 16.0}


 68%|██████▊   | 25891/38075 [29:01<13:34, 14.96it/s]  
 68%|██████▊   | 25891/38075 [29:07<13:34, 14.96it/s]

{'eval_loss': 0.13695719838142395, 'eval_f1': 0.8737516084926528, 'eval_precision': 0.8888427673672215, 'eval_recall': 0.86377261936172, 'eval_runtime': 6.1778, 'eval_samples_per_second': 246.528, 'eval_steps_per_second': 61.672, 'epoch': 17.0}


 72%|███████▏  | 27413/38075 [30:45<10:23, 17.10it/s]  
 72%|███████▏  | 27414/38075 [30:51<10:23, 17.10it/s]

{'eval_loss': 0.15706858038902283, 'eval_f1': 0.8649692535171359, 'eval_precision': 0.8887070389187933, 'eval_recall': 0.8485827926321745, 'eval_runtime': 6.3621, 'eval_samples_per_second': 239.387, 'eval_steps_per_second': 59.886, 'epoch': 18.0}


 76%|███████▌  | 28937/38075 [32:29<09:55, 15.35it/s]  
 76%|███████▌  | 28937/38075 [32:34<09:55, 15.35it/s]

{'eval_loss': 0.1567983776330948, 'eval_f1': 0.8753863678040676, 'eval_precision': 0.8671943689309195, 'eval_recall': 0.8852858633025162, 'eval_runtime': 5.5248, 'eval_samples_per_second': 275.668, 'eval_steps_per_second': 68.962, 'epoch': 19.0}


 79%|███████▉  | 30003/38075 [33:43<08:45, 15.35it/s]  

{'loss': 0.0053, 'grad_norm': 0.0035644464660435915, 'learning_rate': 1.0604070912672358e-05, 'epoch': 19.7}


 80%|███████▉  | 30459/38075 [34:12<07:35, 16.73it/s]
 80%|████████  | 30460/38075 [34:18<07:35, 16.73it/s]

{'eval_loss': 0.15190498530864716, 'eval_f1': 0.8729162834282379, 'eval_precision': 0.8760879218747863, 'eval_recall': 0.8703653686789276, 'eval_runtime': 5.7865, 'eval_samples_per_second': 263.197, 'eval_steps_per_second': 65.842, 'epoch': 20.0}


 84%|████████▍ | 31983/38075 [35:55<06:34, 15.45it/s]  
 84%|████████▍ | 31983/38075 [36:01<06:34, 15.45it/s]

{'eval_loss': 0.15894067287445068, 'eval_f1': 0.8724082842541021, 'eval_precision': 0.8845431519520719, 'eval_recall': 0.8615567837074288, 'eval_runtime': 6.353, 'eval_samples_per_second': 239.728, 'eval_steps_per_second': 59.971, 'epoch': 21.0}


 88%|████████▊ | 33505/38075 [37:39<04:54, 15.54it/s]  
 88%|████████▊ | 33506/38075 [37:45<04:53, 15.54it/s]

{'eval_loss': 0.15950804948806763, 'eval_f1': 0.8715301670575804, 'eval_precision': 0.8645124121946652, 'eval_recall': 0.8807726761652497, 'eval_runtime': 5.8796, 'eval_samples_per_second': 259.03, 'eval_steps_per_second': 64.8, 'epoch': 22.0}


 92%|█████████▏| 35029/38075 [39:23<03:07, 16.20it/s]  
 92%|█████████▏| 35029/38075 [39:28<03:07, 16.20it/s]

{'eval_loss': 0.1654374748468399, 'eval_f1': 0.8688375593109037, 'eval_precision': 0.8792620830964099, 'eval_recall': 0.8593875568107825, 'eval_runtime': 5.4368, 'eval_samples_per_second': 280.127, 'eval_steps_per_second': 70.078, 'epoch': 23.0}


 96%|█████████▌| 36551/38075 [41:06<01:32, 16.49it/s]
 96%|█████████▌| 36552/38075 [41:12<01:32, 16.49it/s]

{'eval_loss': 0.16705293953418732, 'eval_f1': 0.8752784092822551, 'eval_precision': 0.8946821123086026, 'eval_recall': 0.859215653056393, 'eval_runtime': 5.7092, 'eval_samples_per_second': 266.764, 'eval_steps_per_second': 66.735, 'epoch': 24.0}


100%|██████████| 38075/38075 [42:49<00:00, 15.16it/s]
100%|██████████| 38075/38075 [42:57<00:00, 15.16it/s]

{'eval_loss': 0.16937501728534698, 'eval_f1': 0.8772985156331129, 'eval_precision': 0.8972585636778624, 'eval_recall': 0.8616328842681168, 'eval_runtime': 6.5114, 'eval_samples_per_second': 233.897, 'eval_steps_per_second': 58.513, 'epoch': 25.0}


100%|██████████| 38075/38075 [42:59<00:00, 14.76it/s]


{'train_runtime': 2579.0765, 'train_samples_per_second': 59.042, 'train_steps_per_second': 14.763, 'train_loss': 0.024939380352699044, 'epoch': 25.0}


100%|██████████| 381/381 [00:05<00:00, 70.01it/s]


0,1
eval/f1,▁▄▇▇▇██▇██████████████████
eval/loss,▃▂▂▁▂▁▃▃▃▅▅▅▆▆▆▆▅▇▇▆▇▇████
eval/precision,▁▄█▇▆▇█▇▇█▇█▆▆█▆▇▇▇▇▇▇▇███
eval/recall,▁▄▆▇▇▇▇▇▇▇▇▇██▇██▇██▇█▇▇▇▇
eval/runtime,▇▂▂█▂▂▆▂▄█▄▂██▁▃▆▇▃▄▇▅▂▄█▃
eval/samples_per_second,▁▇▆▁▆▇▂▇▅▁▅▇▁▁█▆▃▂▆▄▂▄▆▅▁▆
eval/steps_per_second,▁▇▆▁▆▇▂▇▅▁▅▇▁▁█▆▃▂▆▄▂▄▆▅▁▆
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇████
train/grad_norm,█▁▂

0,1
eval/f1,0.8773
eval/loss,0.16938
eval/precision,0.89726
eval/recall,0.86163
eval/runtime,5.469
eval/samples_per_second,278.48
eval/steps_per_second,69.666
total_flos,1.001675865072e+16
train/epoch,25.0
train/global_step,38075.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 15158.56 examples/s]
  4%|▍         | 376/9425 [00:24<09:22, 16.09it/s]
  4%|▍         | 377/9425 [00:26<09:22, 16.09it/s]

{'eval_loss': 0.34859809279441833, 'eval_f1': 0.3128462826955289, 'eval_precision': 0.38251467408093914, 'eval_recall': 0.2651711924439197, 'eval_runtime': 1.6245, 'eval_samples_per_second': 232.077, 'eval_steps_per_second': 58.481, 'epoch': 1.0}


  8%|▊         | 754/9425 [00:51<09:22, 15.42it/s]  
  8%|▊         | 754/9425 [00:53<09:22, 15.42it/s]

{'eval_loss': 0.31932148337364197, 'eval_f1': 0.516860275116104, 'eval_precision': 0.6247809940963626, 'eval_recall': 0.456200890271495, 'eval_runtime': 1.9989, 'eval_samples_per_second': 188.601, 'eval_steps_per_second': 47.526, 'epoch': 2.0}


 12%|█▏        | 1130/9425 [01:17<09:14, 14.97it/s] 
 12%|█▏        | 1131/9425 [01:19<09:14, 14.97it/s]

{'eval_loss': 0.32955020666122437, 'eval_f1': 0.5714003867834819, 'eval_precision': 0.8085644218072062, 'eval_recall': 0.539751527011801, 'eval_runtime': 1.7168, 'eval_samples_per_second': 219.591, 'eval_steps_per_second': 55.334, 'epoch': 3.0}


 16%|█▌        | 1508/9425 [01:45<08:53, 14.85it/s]  
 16%|█▌        | 1508/9425 [01:46<08:53, 14.85it/s]

{'eval_loss': 0.32104095816612244, 'eval_f1': 0.6892531174281429, 'eval_precision': 0.7502250437721156, 'eval_recall': 0.6431873270033159, 'eval_runtime': 1.7821, 'eval_samples_per_second': 211.547, 'eval_steps_per_second': 53.308, 'epoch': 4.0}


 20%|█▉        | 1884/9425 [02:12<07:59, 15.74it/s]  
 20%|██        | 1885/9425 [02:13<07:59, 15.74it/s]

{'eval_loss': 0.37986961007118225, 'eval_f1': 0.7086943954186687, 'eval_precision': 0.7609033253980062, 'eval_recall': 0.6796810110495205, 'eval_runtime': 1.6419, 'eval_samples_per_second': 229.617, 'eval_steps_per_second': 57.861, 'epoch': 5.0}


 24%|██▍       | 2262/9425 [02:38<07:58, 14.98it/s]  
 24%|██▍       | 2262/9425 [02:40<07:58, 14.98it/s]

{'eval_loss': 0.42388713359832764, 'eval_f1': 0.7257798783648602, 'eval_precision': 0.7589302809731487, 'eval_recall': 0.7046537716529324, 'eval_runtime': 1.5715, 'eval_samples_per_second': 239.9, 'eval_steps_per_second': 60.452, 'epoch': 6.0}


 28%|██▊       | 2638/9425 [03:06<07:20, 15.40it/s]  
 28%|██▊       | 2639/9425 [03:08<07:20, 15.40it/s]

{'eval_loss': 0.43899860978126526, 'eval_f1': 0.7221631240696921, 'eval_precision': 0.743875720673904, 'eval_recall': 0.7107800130275631, 'eval_runtime': 1.9273, 'eval_samples_per_second': 195.612, 'eval_steps_per_second': 49.292, 'epoch': 7.0}


 32%|███▏      | 3016/9425 [03:33<06:42, 15.91it/s]  
 32%|███▏      | 3016/9425 [03:35<06:42, 15.91it/s]

{'eval_loss': 0.41661885380744934, 'eval_f1': 0.7548364854392003, 'eval_precision': 0.7597441777978617, 'eval_recall': 0.75590155283019, 'eval_runtime': 1.8097, 'eval_samples_per_second': 208.318, 'eval_steps_per_second': 52.494, 'epoch': 8.0}


 36%|███▌      | 3392/9425 [04:00<06:45, 14.89it/s]  
 36%|███▌      | 3393/9425 [04:02<06:45, 14.89it/s]

{'eval_loss': 0.4611753523349762, 'eval_f1': 0.7496196611927948, 'eval_precision': 0.753974457552857, 'eval_recall': 0.7476487516580376, 'eval_runtime': 1.759, 'eval_samples_per_second': 214.325, 'eval_steps_per_second': 54.008, 'epoch': 9.0}


 40%|████      | 3770/9425 [04:27<06:04, 15.51it/s]
 40%|████      | 3770/9425 [04:29<06:04, 15.51it/s]

{'eval_loss': 0.4817638099193573, 'eval_f1': 0.7426059091320745, 'eval_precision': 0.736393104411576, 'eval_recall': 0.7580021032083949, 'eval_runtime': 1.8655, 'eval_samples_per_second': 202.086, 'eval_steps_per_second': 50.924, 'epoch': 10.0}


 44%|████▍     | 4146/9425 [04:54<05:30, 15.99it/s]
 44%|████▍     | 4147/9425 [04:56<05:30, 15.99it/s]

{'eval_loss': 0.5467368364334106, 'eval_f1': 0.7220955802946027, 'eval_precision': 0.7315925455884484, 'eval_recall': 0.7342150477259308, 'eval_runtime': 1.6412, 'eval_samples_per_second': 229.705, 'eval_steps_per_second': 57.883, 'epoch': 11.0}


 48%|████▊     | 4524/9425 [05:21<04:57, 16.45it/s]
 48%|████▊     | 4524/9425 [05:22<04:57, 16.45it/s]

{'eval_loss': 0.5433095693588257, 'eval_f1': 0.7304120293998577, 'eval_precision': 0.7224561014337085, 'eval_recall': 0.7457782591819356, 'eval_runtime': 1.7019, 'eval_samples_per_second': 221.52, 'eval_steps_per_second': 55.821, 'epoch': 12.0}


 52%|█████▏    | 4900/9425 [05:48<04:30, 16.75it/s]
 52%|█████▏    | 4901/9425 [05:50<04:30, 16.75it/s]

{'eval_loss': 0.528772234916687, 'eval_f1': 0.7516856474591657, 'eval_precision': 0.7521658278103229, 'eval_recall': 0.755877426758633, 'eval_runtime': 1.4842, 'eval_samples_per_second': 254.013, 'eval_steps_per_second': 64.009, 'epoch': 13.0}


 56%|█████▌    | 5278/9425 [06:15<04:19, 16.00it/s]
 56%|█████▌    | 5278/9425 [06:16<04:19, 16.00it/s]

{'eval_loss': 0.5617451667785645, 'eval_f1': 0.7599266846117246, 'eval_precision': 0.7606669377531123, 'eval_recall': 0.7618795675871045, 'eval_runtime': 1.3809, 'eval_samples_per_second': 273.013, 'eval_steps_per_second': 68.796, 'epoch': 14.0}


 60%|█████▉    | 5654/9425 [06:41<03:44, 16.81it/s]
 60%|██████    | 5655/9425 [06:42<03:44, 16.81it/s]

{'eval_loss': 0.5971536040306091, 'eval_f1': 0.7520715962794697, 'eval_precision': 0.7411747804776867, 'eval_recall': 0.7766330722898485, 'eval_runtime': 1.3492, 'eval_samples_per_second': 279.42, 'eval_steps_per_second': 70.411, 'epoch': 15.0}


 64%|██████▍   | 6032/9425 [07:08<03:20, 16.94it/s]
 64%|██████▍   | 6032/9425 [07:09<03:20, 16.94it/s]

{'eval_loss': 0.5873567461967468, 'eval_f1': 0.7579879610730123, 'eval_precision': 0.7498603338594729, 'eval_recall': 0.7712837997636849, 'eval_runtime': 1.2864, 'eval_samples_per_second': 293.061, 'eval_steps_per_second': 73.848, 'epoch': 16.0}


 68%|██████▊   | 6408/9425 [07:35<03:07, 16.08it/s]
 68%|██████▊   | 6409/9425 [07:36<03:07, 16.08it/s]

{'eval_loss': 0.6062362790107727, 'eval_f1': 0.7563350462873104, 'eval_precision': 0.7395978930798522, 'eval_recall': 0.783951308135617, 'eval_runtime': 1.3411, 'eval_samples_per_second': 281.113, 'eval_steps_per_second': 70.837, 'epoch': 17.0}


 72%|███████▏  | 6786/9425 [08:03<02:35, 17.00it/s]
 72%|███████▏  | 6786/9425 [08:04<02:35, 17.00it/s]

{'eval_loss': 0.6583302021026611, 'eval_f1': 0.7394240315945584, 'eval_precision': 0.7453538226326506, 'eval_recall': 0.7551603277423571, 'eval_runtime': 1.2704, 'eval_samples_per_second': 296.762, 'eval_steps_per_second': 74.781, 'epoch': 18.0}


 76%|███████▌  | 7162/9425 [08:30<02:13, 17.00it/s]
 76%|███████▌  | 7163/9425 [08:31<02:13, 17.00it/s]

{'eval_loss': 0.6569766998291016, 'eval_f1': 0.7488596257423674, 'eval_precision': 0.7345271081183398, 'eval_recall': 0.7713741981122456, 'eval_runtime': 1.2343, 'eval_samples_per_second': 305.441, 'eval_steps_per_second': 76.968, 'epoch': 19.0}


 80%|████████  | 7540/9425 [08:57<01:54, 16.43it/s]
 80%|████████  | 7540/9425 [08:58<01:54, 16.43it/s]

{'eval_loss': 0.6240456104278564, 'eval_f1': 0.772231454049636, 'eval_precision': 0.7693874789914394, 'eval_recall': 0.7807475349474104, 'eval_runtime': 1.2682, 'eval_samples_per_second': 297.262, 'eval_steps_per_second': 74.907, 'epoch': 20.0}


 84%|████████▍ | 7916/9425 [09:25<01:28, 17.05it/s]
 84%|████████▍ | 7917/9425 [09:26<01:28, 17.05it/s]

{'eval_loss': 0.665627658367157, 'eval_f1': 0.7462825611375861, 'eval_precision': 0.7468609651218346, 'eval_recall': 0.7517169140801985, 'eval_runtime': 1.2785, 'eval_samples_per_second': 294.887, 'eval_steps_per_second': 74.308, 'epoch': 21.0}


 88%|████████▊ | 8294/9425 [09:52<01:05, 17.31it/s]
 88%|████████▊ | 8294/9425 [09:53<01:05, 17.31it/s]

{'eval_loss': 0.6666761636734009, 'eval_f1': 0.7472026412658745, 'eval_precision': 0.7463674611776131, 'eval_recall': 0.7556040776412751, 'eval_runtime': 1.2533, 'eval_samples_per_second': 300.814, 'eval_steps_per_second': 75.802, 'epoch': 22.0}


 92%|█████████▏| 8670/9425 [10:19<00:48, 15.52it/s]
 92%|█████████▏| 8671/9425 [10:20<00:48, 15.52it/s]

{'eval_loss': 0.6697038412094116, 'eval_f1': 0.7485053947144559, 'eval_precision': 0.7485532484769412, 'eval_recall': 0.7556040776412751, 'eval_runtime': 1.3143, 'eval_samples_per_second': 286.846, 'eval_steps_per_second': 72.282, 'epoch': 23.0}


 96%|█████████▌| 9048/9425 [10:46<00:23, 16.30it/s]
 96%|█████████▌| 9048/9425 [10:47<00:23, 16.30it/s]

{'eval_loss': 0.6727623343467712, 'eval_f1': 0.7478888553307158, 'eval_precision': 0.7453854989781847, 'eval_recall': 0.7573897919269894, 'eval_runtime': 1.2094, 'eval_samples_per_second': 311.725, 'eval_steps_per_second': 78.551, 'epoch': 24.0}


100%|█████████▉| 9424/9425 [11:14<00:00, 15.40it/s]
100%|██████████| 9425/9425 [11:17<00:00, 15.40it/s]

{'eval_loss': 0.6654099225997925, 'eval_f1': 0.7555839371427815, 'eval_precision': 0.7535867752750043, 'eval_recall': 0.7643343309099441, 'eval_runtime': 1.2695, 'eval_samples_per_second': 296.965, 'eval_steps_per_second': 74.832, 'epoch': 25.0}


100%|██████████| 9425/9425 [11:19<00:00, 13.88it/s]


{'train_runtime': 679.2625, 'train_samples_per_second': 55.465, 'train_steps_per_second': 13.875, 'train_loss': 0.07193247989887268, 'epoch': 25.0}


100%|██████████| 95/95 [00:01<00:00, 65.64it/s]


0,1
eval/f1,▁▄▅▇▇▇▇███▇▇██████████████
eval/loss,▂▁▁▁▂▃▃▃▄▄▆▅▅▆▇▆▇██▇█████▇
eval/precision,▁▅█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▄▅▆▇▇▇███▇▇██████████████
eval/runtime,▅█▅▆▅▄▇▆▆▇▅▅▃▃▂▂▂▂▁▂▂▁▂▁▂▃
eval/samples_per_second,▃▁▃▂▃▄▁▂▂▂▃▃▅▆▆▇▆▇█▇▇▇▇█▇▅
eval/steps_per_second,▃▁▃▂▃▄▁▂▂▂▃▃▅▆▆▇▆▇█▇▇▇▇█▇▅
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████

0,1
eval/f1,0.77223
eval/loss,0.62405
eval/precision,0.76939
eval/recall,0.78075
eval/runtime,1.4635
eval/samples_per_second,257.602
eval/steps_per_second,64.913
total_flos,2478244004332800.0
train/epoch,25.0
train/global_step,9425.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 12820.18 examples/s]
  4%|▍         | 259/6500 [00:16<06:15, 16.62it/s]
  4%|▍         | 260/6500 [00:17<06:15, 16.62it/s]

{'eval_loss': 0.27709975838661194, 'eval_f1': 0.1836833427129923, 'eval_precision': 0.399043062200957, 'eval_recall': 0.1565848851265363, 'eval_runtime': 0.9886, 'eval_samples_per_second': 263.007, 'eval_steps_per_second': 65.752, 'epoch': 1.0}


  8%|▊         | 519/6500 [00:35<06:39, 14.96it/s]
  8%|▊         | 520/6500 [00:36<06:39, 14.96it/s]

{'eval_loss': 0.22686268389225006, 'eval_f1': 0.48849057612759517, 'eval_precision': 0.600084058713091, 'eval_recall': 0.41877521303333565, 'eval_runtime': 1.2673, 'eval_samples_per_second': 205.155, 'eval_steps_per_second': 51.289, 'epoch': 2.0}


 12%|█▏        | 779/6500 [00:53<05:32, 17.21it/s]
 12%|█▏        | 780/6500 [00:54<05:32, 17.21it/s]

{'eval_loss': 0.23782841861248016, 'eval_f1': 0.5418313586992556, 'eval_precision': 0.5765976473729741, 'eval_recall': 0.516254475003, 'eval_runtime': 0.8853, 'eval_samples_per_second': 293.688, 'eval_steps_per_second': 73.422, 'epoch': 3.0}


 16%|█▌        | 1039/6500 [01:12<05:54, 15.39it/s]
 16%|█▌        | 1040/6500 [01:13<05:54, 15.39it/s]

{'eval_loss': 0.2398318350315094, 'eval_f1': 0.5389914282330782, 'eval_precision': 0.6009911088450978, 'eval_recall': 0.49378675102695496, 'eval_runtime': 0.9376, 'eval_samples_per_second': 277.298, 'eval_steps_per_second': 69.324, 'epoch': 4.0}


 20%|█▉        | 1299/6500 [01:33<05:12, 16.63it/s]  
 20%|██        | 1300/6500 [01:34<05:12, 16.63it/s]

{'eval_loss': 0.23954743146896362, 'eval_f1': 0.616038180983253, 'eval_precision': 0.720074729841557, 'eval_recall': 0.5679278555412275, 'eval_runtime': 1.0357, 'eval_samples_per_second': 251.028, 'eval_steps_per_second': 62.757, 'epoch': 5.0}


 24%|██▍       | 1559/6500 [01:52<05:15, 15.67it/s]
 24%|██▍       | 1560/6500 [01:53<05:15, 15.67it/s]

{'eval_loss': 0.2734355032444, 'eval_f1': 0.6562010299092365, 'eval_precision': 0.7822451350124489, 'eval_recall': 0.5943665748982284, 'eval_runtime': 0.8994, 'eval_samples_per_second': 289.084, 'eval_steps_per_second': 72.271, 'epoch': 6.0}


 28%|██▊       | 1819/6500 [02:11<05:03, 15.41it/s]
 28%|██▊       | 1820/6500 [02:12<05:03, 15.41it/s]

{'eval_loss': 0.2673289477825165, 'eval_f1': 0.6467506331724303, 'eval_precision': 0.6886685164120595, 'eval_recall': 0.6172901422052528, 'eval_runtime': 0.9586, 'eval_samples_per_second': 271.228, 'eval_steps_per_second': 67.807, 'epoch': 7.0}


 32%|███▏      | 2079/6500 [02:29<04:30, 16.36it/s]
 32%|███▏      | 2080/6500 [02:30<04:30, 16.36it/s]

{'eval_loss': 0.2843894362449646, 'eval_f1': 0.674147278830576, 'eval_precision': 0.784030632877375, 'eval_recall': 0.6124081276651558, 'eval_runtime': 0.9185, 'eval_samples_per_second': 283.056, 'eval_steps_per_second': 70.764, 'epoch': 8.0}


 36%|███▌      | 2339/6500 [02:49<04:06, 16.86it/s]
 36%|███▌      | 2340/6500 [02:50<04:06, 16.86it/s]

{'eval_loss': 0.2870059013366699, 'eval_f1': 0.6961599066991427, 'eval_precision': 0.8039672745555099, 'eval_recall': 0.6468402317298256, 'eval_runtime': 1.1965, 'eval_samples_per_second': 217.309, 'eval_steps_per_second': 54.327, 'epoch': 9.0}


 40%|███▉      | 2599/6500 [03:08<04:04, 15.95it/s]
 40%|████      | 2600/6500 [03:09<04:04, 15.95it/s]

{'eval_loss': 0.27708199620246887, 'eval_f1': 0.7032856349609151, 'eval_precision': 0.7733999680581239, 'eval_recall': 0.6654712581206244, 'eval_runtime': 1.0611, 'eval_samples_per_second': 245.026, 'eval_steps_per_second': 61.256, 'epoch': 10.0}


 44%|████▍     | 2859/6500 [03:27<03:37, 16.75it/s]
 44%|████▍     | 2860/6500 [03:28<03:37, 16.75it/s]

{'eval_loss': 0.28466057777404785, 'eval_f1': 0.6977980588371454, 'eval_precision': 0.7471081222955915, 'eval_recall': 0.6635268322179249, 'eval_runtime': 0.9818, 'eval_samples_per_second': 264.822, 'eval_steps_per_second': 66.206, 'epoch': 11.0}


 48%|████▊     | 3119/6500 [03:46<03:40, 15.31it/s]
 48%|████▊     | 3120/6500 [03:48<03:40, 15.31it/s]

{'eval_loss': 0.2747170627117157, 'eval_f1': 0.7077962345719209, 'eval_precision': 0.7323584287120122, 'eval_recall': 0.6926245765788271, 'eval_runtime': 1.2684, 'eval_samples_per_second': 204.979, 'eval_steps_per_second': 51.245, 'epoch': 12.0}


 52%|█████▏    | 3379/6500 [04:06<03:16, 15.85it/s]
 52%|█████▏    | 3380/6500 [04:07<03:16, 15.85it/s]

{'eval_loss': 0.28763464093208313, 'eval_f1': 0.7040893864198533, 'eval_precision': 0.7330672128991456, 'eval_recall': 0.6902831109493013, 'eval_runtime': 0.8347, 'eval_samples_per_second': 311.471, 'eval_steps_per_second': 77.868, 'epoch': 13.0}


 56%|█████▌    | 3639/6500 [04:24<02:50, 16.83it/s]
 56%|█████▌    | 3640/6500 [04:25<02:49, 16.83it/s]

{'eval_loss': 0.2788948118686676, 'eval_f1': 0.7128643784240435, 'eval_precision': 0.7423193022315994, 'eval_recall': 0.6913935090517531, 'eval_runtime': 0.9887, 'eval_samples_per_second': 262.979, 'eval_steps_per_second': 65.745, 'epoch': 14.0}


 60%|█████▉    | 3899/6500 [04:43<02:49, 15.32it/s]
 60%|██████    | 3900/6500 [04:44<02:49, 15.32it/s]

{'eval_loss': 0.3036247789859772, 'eval_f1': 0.6943479627650132, 'eval_precision': 0.7178008816603034, 'eval_recall': 0.6874695156742862, 'eval_runtime': 1.1553, 'eval_samples_per_second': 225.041, 'eval_steps_per_second': 56.26, 'epoch': 15.0}


 64%|██████▍   | 4159/6500 [05:01<02:15, 17.25it/s]
 64%|██████▍   | 4160/6500 [05:02<02:15, 17.25it/s]

{'eval_loss': 0.31002548336982727, 'eval_f1': 0.6914660975118034, 'eval_precision': 0.7200427772432381, 'eval_recall': 0.6801575178428783, 'eval_runtime': 0.8906, 'eval_samples_per_second': 291.938, 'eval_steps_per_second': 72.984, 'epoch': 16.0}


 68%|██████▊   | 4419/6500 [05:22<02:23, 14.53it/s]
 68%|██████▊   | 4420/6500 [05:23<02:23, 14.53it/s]

{'eval_loss': 0.2920832335948944, 'eval_f1': 0.7054347643862152, 'eval_precision': 0.7262638797641353, 'eval_recall': 0.6915438337032506, 'eval_runtime': 1.1391, 'eval_samples_per_second': 228.255, 'eval_steps_per_second': 57.064, 'epoch': 17.0}


 72%|███████▏  | 4679/6500 [05:45<01:54, 15.97it/s]
 72%|███████▏  | 4680/6500 [05:46<01:53, 15.97it/s]

{'eval_loss': 0.3308905363082886, 'eval_f1': 0.7001855973301417, 'eval_precision': 0.7490963370735857, 'eval_recall': 0.6795605832722587, 'eval_runtime': 1.1138, 'eval_samples_per_second': 233.43, 'eval_steps_per_second': 58.358, 'epoch': 18.0}


 76%|███████▌  | 4939/6500 [06:05<01:39, 15.71it/s]
 76%|███████▌  | 4940/6500 [06:06<01:39, 15.71it/s]

{'eval_loss': 0.30333760380744934, 'eval_f1': 0.7108138833342242, 'eval_precision': 0.7397953558391419, 'eval_recall': 0.6879921485075353, 'eval_runtime': 1.2966, 'eval_samples_per_second': 200.528, 'eval_steps_per_second': 50.132, 'epoch': 19.0}


 80%|███████▉  | 5199/6500 [06:25<01:25, 15.29it/s]
 80%|████████  | 5200/6500 [06:26<01:25, 15.29it/s]

{'eval_loss': 0.3100723326206207, 'eval_f1': 0.7059343566693538, 'eval_precision': 0.7362380794344563, 'eval_recall': 0.6837886057042754, 'eval_runtime': 1.0007, 'eval_samples_per_second': 259.829, 'eval_steps_per_second': 64.957, 'epoch': 20.0}


 84%|████████▍ | 5459/6500 [06:45<01:04, 16.03it/s]
 84%|████████▍ | 5460/6500 [06:47<01:04, 16.03it/s]

{'eval_loss': 0.3101125657558441, 'eval_f1': 0.7110400964239094, 'eval_precision': 0.7562220295018618, 'eval_recall': 0.6777880668748822, 'eval_runtime': 1.1185, 'eval_samples_per_second': 232.45, 'eval_steps_per_second': 58.112, 'epoch': 21.0}


 88%|████████▊ | 5719/6500 [07:07<00:48, 16.10it/s]
 88%|████████▊ | 5720/6500 [07:08<00:48, 16.10it/s]

{'eval_loss': 0.3172200322151184, 'eval_f1': 0.6994001919135481, 'eval_precision': 0.7334711470795214, 'eval_recall': 0.6729722791736632, 'eval_runtime': 0.9991, 'eval_samples_per_second': 260.228, 'eval_steps_per_second': 65.057, 'epoch': 22.0}


 92%|█████████▏| 5979/6500 [07:26<00:34, 15.15it/s]
 92%|█████████▏| 5980/6500 [07:27<00:34, 15.15it/s]

{'eval_loss': 0.3223993182182312, 'eval_f1': 0.6977924110120272, 'eval_precision': 0.7389069264069263, 'eval_recall': 0.6690267009423706, 'eval_runtime': 1.0823, 'eval_samples_per_second': 240.23, 'eval_steps_per_second': 60.057, 'epoch': 23.0}


 96%|█████████▌| 6239/6500 [07:46<00:15, 16.33it/s]
 96%|█████████▌| 6240/6500 [07:47<00:15, 16.33it/s]

{'eval_loss': 0.32470956444740295, 'eval_f1': 0.6977924110120272, 'eval_precision': 0.7389069264069263, 'eval_recall': 0.6690267009423706, 'eval_runtime': 1.1536, 'eval_samples_per_second': 225.389, 'eval_steps_per_second': 56.347, 'epoch': 24.0}


100%|█████████▉| 6499/6500 [08:06<00:00, 15.30it/s]
100%|██████████| 6500/6500 [08:08<00:00, 15.30it/s]

{'eval_loss': 0.32652878761291504, 'eval_f1': 0.6963509695705857, 'eval_precision': 0.7351190476190476, 'eval_recall': 0.6690267009423706, 'eval_runtime': 0.9776, 'eval_samples_per_second': 265.965, 'eval_steps_per_second': 66.491, 'epoch': 25.0}


100%|██████████| 6500/6500 [08:10<00:00, 13.24it/s]


{'train_runtime': 490.7951, 'train_samples_per_second': 52.873, 'train_steps_per_second': 13.244, 'train_loss': 0.049015559269831734, 'epoch': 25.0}


100%|██████████| 65/65 [00:01<00:00, 63.11it/s]


------------------ Starting model ==> epochs: 25, batch size: 4, weights of decay: 0.001 ---------------------


0,1
eval/f1,▁▅▆▆▇▇▇▇██████████████████
eval/loss,▄▁▂▂▂▄▄▅▅▄▅▄▅▅▆▇▅█▆▇▇▇▇██▅
eval/precision,▁▄▄▄▇█▆██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▄▆▅▆▇▇▇▇█████████████████
eval/runtime,▃█▂▃▄▂▃▂▆▄▃█▁▃▆▂▆▅█▄▅▃▅▆▃▄
eval/samples_per_second,▅▁▇▆▄▇▅▆▂▄▅▁█▅▃▇▃▃▁▅▃▅▄▃▅▄
eval/steps_per_second,▅▁▇▆▄▇▅▆▂▄▅▁█▅▃▇▃▃▁▅▃▅▄▃▅▄
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████

0,1
eval/f1,0.71286
eval/loss,0.27889
eval/precision,0.74232
eval/recall,0.69139
eval/runtime,1.0594
eval/samples_per_second,245.419
eval/steps_per_second,61.355
total_flos,1707009600960000.0
train/epoch,25.0
train/global_step,6500.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 11598.66 examples/s]
  4%|▍         | 1522/38075 [01:38<39:43, 15.34it/s]
  4%|▍         | 1523/38075 [01:44<39:43, 15.34it/s]

{'eval_loss': 0.11137732118368149, 'eval_f1': 0.6438160346643113, 'eval_precision': 0.6720540547206708, 'eval_recall': 0.6207506764224646, 'eval_runtime': 5.9051, 'eval_samples_per_second': 257.914, 'eval_steps_per_second': 64.521, 'epoch': 1.0}


  8%|▊         | 3046/38075 [03:24<38:15, 15.26it/s]   
  8%|▊         | 3046/38075 [03:30<38:15, 15.26it/s]

{'eval_loss': 0.12052269279956818, 'eval_f1': 0.7459586360773062, 'eval_precision': 0.8341961613382767, 'eval_recall': 0.7218124946256016, 'eval_runtime': 6.0661, 'eval_samples_per_second': 251.069, 'eval_steps_per_second': 62.808, 'epoch': 2.0}


 12%|█▏        | 4568/38075 [05:11<34:46, 16.06it/s]   
 12%|█▏        | 4569/38075 [05:17<34:46, 16.06it/s]

{'eval_loss': 0.11408853530883789, 'eval_f1': 0.8172322117203559, 'eval_precision': 0.8520283542030659, 'eval_recall': 0.7944843076970071, 'eval_runtime': 6.1579, 'eval_samples_per_second': 247.325, 'eval_steps_per_second': 61.872, 'epoch': 3.0}


 16%|█▌        | 6092/38075 [06:57<33:52, 15.74it/s]   
 16%|█▌        | 6092/38075 [07:03<33:52, 15.74it/s]

{'eval_loss': 0.10021704435348511, 'eval_f1': 0.8235132057139445, 'eval_precision': 0.886280741964132, 'eval_recall': 0.7883366770200222, 'eval_runtime': 5.9839, 'eval_samples_per_second': 254.518, 'eval_steps_per_second': 63.671, 'epoch': 4.0}


 20%|█▉        | 7614/38075 [08:40<32:04, 15.83it/s]   
 20%|██        | 7615/38075 [08:47<32:04, 15.83it/s]

{'eval_loss': 0.11785938590765, 'eval_f1': 0.8173881127578136, 'eval_precision': 0.8421303117677137, 'eval_recall': 0.8008983468456962, 'eval_runtime': 6.7426, 'eval_samples_per_second': 225.877, 'eval_steps_per_second': 56.506, 'epoch': 5.0}


 24%|██▍       | 9138/38075 [10:24<29:11, 16.52it/s]   
 24%|██▍       | 9138/38075 [10:31<29:11, 16.52it/s]

{'eval_loss': 0.09463755786418915, 'eval_f1': 0.8643478553798971, 'eval_precision': 0.8890102441766254, 'eval_recall': 0.8443887073043085, 'eval_runtime': 6.6049, 'eval_samples_per_second': 230.587, 'eval_steps_per_second': 57.685, 'epoch': 6.0}


 26%|██▋       | 10002/38075 [11:28<29:35, 15.81it/s]  

{'loss': 0.0758, 'grad_norm': 11.339011192321777, 'learning_rate': 3.6868023637557454e-05, 'epoch': 6.57}


 28%|██▊       | 10660/38075 [12:09<27:25, 16.66it/s]
 28%|██▊       | 10661/38075 [12:15<27:25, 16.66it/s]

{'eval_loss': 0.11127214133739471, 'eval_f1': 0.8567003176742229, 'eval_precision': 0.8837787328386757, 'eval_recall': 0.8361419512314653, 'eval_runtime': 5.6418, 'eval_samples_per_second': 269.95, 'eval_steps_per_second': 67.532, 'epoch': 7.0}


 32%|███▏      | 12184/38075 [13:57<28:40, 15.05it/s]  
 32%|███▏      | 12184/38075 [14:04<28:40, 15.05it/s]

{'eval_loss': 0.1231653019785881, 'eval_f1': 0.8481848214222166, 'eval_precision': 0.8615491602473643, 'eval_recall': 0.8376909678392286, 'eval_runtime': 6.3817, 'eval_samples_per_second': 238.65, 'eval_steps_per_second': 59.702, 'epoch': 8.0}


 36%|███▌      | 13706/38075 [15:46<27:55, 14.54it/s]  
 36%|███▌      | 13707/38075 [15:53<27:55, 14.54it/s]

{'eval_loss': 0.13191348314285278, 'eval_f1': 0.8503921351579325, 'eval_precision': 0.880457939250885, 'eval_recall': 0.8310200319295948, 'eval_runtime': 6.5219, 'eval_samples_per_second': 233.522, 'eval_steps_per_second': 58.419, 'epoch': 9.0}


 40%|████      | 15230/38075 [17:36<24:46, 15.37it/s]  
 40%|████      | 15230/38075 [17:43<24:46, 15.37it/s]

{'eval_loss': 0.13960276544094086, 'eval_f1': 0.8562773020611958, 'eval_precision': 0.8832811948346594, 'eval_recall': 0.837877194431624, 'eval_runtime': 7.1438, 'eval_samples_per_second': 213.193, 'eval_steps_per_second': 53.333, 'epoch': 10.0}


 44%|████▍     | 16752/38075 [19:23<22:24, 15.86it/s]  
 44%|████▍     | 16753/38075 [19:30<22:24, 15.86it/s]

{'eval_loss': 0.13756270706653595, 'eval_f1': 0.8575884855598652, 'eval_precision': 0.8816953513082287, 'eval_recall': 0.8382973483371537, 'eval_runtime': 6.7307, 'eval_samples_per_second': 226.278, 'eval_steps_per_second': 56.607, 'epoch': 11.0}


 48%|████▊     | 18276/38075 [21:10<20:55, 15.77it/s]  
 48%|████▊     | 18276/38075 [21:17<20:55, 15.77it/s]

{'eval_loss': 0.13351556658744812, 'eval_f1': 0.8619782323303208, 'eval_precision': 0.8663426143558105, 'eval_recall': 0.8613703349276297, 'eval_runtime': 6.5819, 'eval_samples_per_second': 231.393, 'eval_steps_per_second': 57.886, 'epoch': 12.0}


 52%|█████▏    | 19798/38075 [22:57<19:02, 16.00it/s]  
 52%|█████▏    | 19799/38075 [23:03<19:02, 16.00it/s]

{'eval_loss': 0.13274002075195312, 'eval_f1': 0.8766768117943121, 'eval_precision': 0.890889582223469, 'eval_recall': 0.8646230372496166, 'eval_runtime': 6.1097, 'eval_samples_per_second': 249.274, 'eval_steps_per_second': 62.359, 'epoch': 13.0}


 53%|█████▎    | 20002/38075 [23:18<20:01, 15.05it/s]  

{'loss': 0.0206, 'grad_norm': 0.0027923251036554575, 'learning_rate': 2.3736047275114905e-05, 'epoch': 13.13}


 56%|█████▌    | 21322/38075 [24:43<18:24, 15.17it/s]
 56%|█████▌    | 21322/38075 [24:49<18:24, 15.17it/s]

{'eval_loss': 0.1417851746082306, 'eval_f1': 0.8698909044674926, 'eval_precision': 0.8903168406633817, 'eval_recall': 0.8537016081850035, 'eval_runtime': 5.8005, 'eval_samples_per_second': 262.563, 'eval_steps_per_second': 65.684, 'epoch': 14.0}


 60%|█████▉    | 22844/38075 [26:27<16:50, 15.08it/s]  
 60%|██████    | 22845/38075 [26:33<16:50, 15.08it/s]

{'eval_loss': 0.1446545273065567, 'eval_f1': 0.868191092578285, 'eval_precision': 0.8751450126835062, 'eval_recall': 0.8629795211492421, 'eval_runtime': 5.741, 'eval_samples_per_second': 265.284, 'eval_steps_per_second': 66.365, 'epoch': 15.0}


 64%|██████▍   | 24368/38075 [28:13<14:06, 16.20it/s]  
 64%|██████▍   | 24368/38075 [28:18<14:06, 16.20it/s]

{'eval_loss': 0.15280760824680328, 'eval_f1': 0.851992130396365, 'eval_precision': 0.8534537859907267, 'eval_recall': 0.8523491320099208, 'eval_runtime': 5.735, 'eval_samples_per_second': 265.564, 'eval_steps_per_second': 66.435, 'epoch': 16.0}


 68%|██████▊   | 25890/38075 [29:57<12:26, 16.33it/s]  
 68%|██████▊   | 25891/38075 [30:03<12:26, 16.33it/s]

{'eval_loss': 0.15259391069412231, 'eval_f1': 0.8597861476597773, 'eval_precision': 0.8774778756069265, 'eval_recall': 0.8460459558511594, 'eval_runtime': 6.1756, 'eval_samples_per_second': 246.615, 'eval_steps_per_second': 61.694, 'epoch': 17.0}


 72%|███████▏  | 27414/38075 [31:42<10:57, 16.21it/s]  
 72%|███████▏  | 27414/38075 [31:48<10:57, 16.21it/s]

{'eval_loss': 0.15688441693782806, 'eval_f1': 0.8659037324565528, 'eval_precision': 0.8843689135932689, 'eval_recall': 0.8580756383613911, 'eval_runtime': 5.9917, 'eval_samples_per_second': 254.186, 'eval_steps_per_second': 63.588, 'epoch': 18.0}


 76%|███████▌  | 28936/38075 [33:25<09:53, 15.41it/s]  
 76%|███████▌  | 28937/38075 [33:32<09:52, 15.41it/s]

{'eval_loss': 0.16250616312026978, 'eval_f1': 0.8615790537605361, 'eval_precision': 0.8699514483100319, 'eval_recall': 0.8544774547005188, 'eval_runtime': 6.0991, 'eval_samples_per_second': 249.709, 'eval_steps_per_second': 62.468, 'epoch': 19.0}


 79%|███████▉  | 30002/38075 [34:40<08:10, 16.46it/s]  

{'loss': 0.0064, 'grad_norm': 0.0014061147812753916, 'learning_rate': 1.0604070912672358e-05, 'epoch': 19.7}


 80%|████████  | 30460/38075 [35:09<07:58, 15.90it/s]
 80%|████████  | 30460/38075 [35:16<07:58, 15.90it/s]

{'eval_loss': 0.17374463379383087, 'eval_f1': 0.8463493781538479, 'eval_precision': 0.8606807293688694, 'eval_recall': 0.8365179860882032, 'eval_runtime': 6.5047, 'eval_samples_per_second': 234.139, 'eval_steps_per_second': 58.573, 'epoch': 20.0}


 84%|████████▍ | 31982/38075 [36:53<06:40, 15.21it/s]  
 84%|████████▍ | 31983/38075 [37:00<06:40, 15.21it/s]

{'eval_loss': 0.17203885316848755, 'eval_f1': 0.8403505919250664, 'eval_precision': 0.8573921797698862, 'eval_recall': 0.8284515081449874, 'eval_runtime': 6.1835, 'eval_samples_per_second': 246.301, 'eval_steps_per_second': 61.616, 'epoch': 21.0}


 88%|████████▊ | 33506/38075 [38:38<04:53, 15.58it/s]  
 88%|████████▊ | 33506/38075 [38:43<04:53, 15.58it/s]

{'eval_loss': 0.18065187335014343, 'eval_f1': 0.8494342851569401, 'eval_precision': 0.8622642118530777, 'eval_recall': 0.8383113043459562, 'eval_runtime': 5.5874, 'eval_samples_per_second': 272.576, 'eval_steps_per_second': 68.189, 'epoch': 22.0}


 92%|█████████▏| 35028/38075 [40:21<03:06, 16.36it/s]  
 92%|█████████▏| 35029/38075 [40:27<03:06, 16.36it/s]

{'eval_loss': 0.1785220205783844, 'eval_f1': 0.856374163262451, 'eval_precision': 0.8624621715408832, 'eval_recall': 0.8514566616310748, 'eval_runtime': 5.5778, 'eval_samples_per_second': 273.048, 'eval_steps_per_second': 68.307, 'epoch': 23.0}


 96%|█████████▌| 36552/38075 [42:05<01:34, 16.18it/s]
 96%|█████████▌| 36552/38075 [42:11<01:34, 16.18it/s]

{'eval_loss': 0.18226516246795654, 'eval_f1': 0.8554889607962635, 'eval_precision': 0.8659512918281447, 'eval_recall': 0.8466066408146674, 'eval_runtime': 5.969, 'eval_samples_per_second': 255.153, 'eval_steps_per_second': 63.83, 'epoch': 24.0}


100%|█████████▉| 38074/38075 [43:49<00:00, 15.84it/s]
100%|██████████| 38075/38075 [43:56<00:00, 15.84it/s]

{'eval_loss': 0.1826571822166443, 'eval_f1': 0.8568015862593346, 'eval_precision': 0.8672826403230457, 'eval_recall': 0.848007201038757, 'eval_runtime': 5.8292, 'eval_samples_per_second': 261.269, 'eval_steps_per_second': 65.36, 'epoch': 25.0}


100%|██████████| 38075/38075 [43:58<00:00, 14.43it/s]


{'train_runtime': 2638.8182, 'train_samples_per_second': 57.706, 'train_steps_per_second': 14.429, 'train_loss': 0.027350619034945612, 'epoch': 25.0}


100%|██████████| 381/381 [00:05<00:00, 65.23it/s]


0,1
eval/f1,▁▄▆▆▆█▇▇▇▇▇████▇▇██▇▇▇▇▇▇█
eval/loss,▂▃▃▁▃▁▂▃▄▅▄▄▄▅▅▆▆▆▆▇▇████▄
eval/precision,▁▆▇█▆██▇███▇██▇▇██▇▇▇▇▇▇▇█
eval/recall,▁▄▆▆▆▇▇▇▇▇▇█████▇██▇▇▇█▇██
eval/runtime,▂▃▄▃▆▆▁▅▅█▆▅▃▂▂▂▄▃▃▅▄▁▁▃▂▂
eval/samples_per_second,▆▅▅▆▂▃█▄▃▁▃▃▅▇▇▇▅▆▅▃▅██▆▇▆
eval/steps_per_second,▆▅▅▆▂▃█▄▃▁▃▃▅▇▇▇▅▆▅▃▅██▆▇▆
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇████
train/grad_norm,█▁▁

0,1
eval/f1,0.87668
eval/loss,0.13274
eval/precision,0.89089
eval/recall,0.86462
eval/runtime,5.8715
eval/samples_per_second,259.388
eval/steps_per_second,64.89
total_flos,1.001675865072e+16
train/epoch,25.0
train/global_step,38075.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 12612.84 examples/s]
  4%|▍         | 376/9425 [00:23<09:29, 15.90it/s]
  4%|▍         | 377/9425 [00:25<09:29, 15.90it/s]

{'eval_loss': 0.3581213653087616, 'eval_f1': 0.29674390428593067, 'eval_precision': 0.3582256169212691, 'eval_recall': 0.25379279811097993, 'eval_runtime': 1.4402, 'eval_samples_per_second': 261.777, 'eval_steps_per_second': 65.965, 'epoch': 1.0}


  8%|▊         | 754/9425 [00:50<08:57, 16.14it/s]  
  8%|▊         | 754/9425 [00:52<08:57, 16.14it/s]

{'eval_loss': 0.33525359630584717, 'eval_f1': 0.5193399780672773, 'eval_precision': 0.615679765892484, 'eval_recall': 0.45600893412361276, 'eval_runtime': 1.4248, 'eval_samples_per_second': 264.601, 'eval_steps_per_second': 66.677, 'epoch': 2.0}


 12%|█▏        | 1130/9425 [01:17<08:48, 15.68it/s] 
 12%|█▏        | 1131/9425 [01:19<08:48, 15.68it/s]

{'eval_loss': 0.3051663041114807, 'eval_f1': 0.6654675824955254, 'eval_precision': 0.800635090191338, 'eval_recall': 0.5899045668897853, 'eval_runtime': 1.5184, 'eval_samples_per_second': 248.28, 'eval_steps_per_second': 62.564, 'epoch': 3.0}


 16%|█▌        | 1508/9425 [01:44<08:15, 15.99it/s]  
 16%|█▌        | 1508/9425 [01:46<08:15, 15.99it/s]

{'eval_loss': 0.36141902208328247, 'eval_f1': 0.7093972999835334, 'eval_precision': 0.755350486221754, 'eval_recall': 0.6723056559734235, 'eval_runtime': 1.4388, 'eval_samples_per_second': 262.03, 'eval_steps_per_second': 66.029, 'epoch': 4.0}


 20%|█▉        | 1884/9425 [02:11<07:36, 16.53it/s]  
 20%|██        | 1885/9425 [02:13<07:36, 16.53it/s]

{'eval_loss': 0.41128745675086975, 'eval_f1': 0.6979407664118022, 'eval_precision': 0.7452110013378295, 'eval_recall': 0.6687629891811222, 'eval_runtime': 1.5429, 'eval_samples_per_second': 244.345, 'eval_steps_per_second': 61.572, 'epoch': 5.0}


 24%|██▍       | 2262/9425 [02:39<07:43, 15.46it/s]  
 24%|██▍       | 2262/9425 [02:40<07:43, 15.46it/s]

{'eval_loss': 0.44528084993362427, 'eval_f1': 0.7189369287990932, 'eval_precision': 0.7712461223234126, 'eval_recall': 0.6892599796921903, 'eval_runtime': 1.5126, 'eval_samples_per_second': 249.232, 'eval_steps_per_second': 62.804, 'epoch': 6.0}


 28%|██▊       | 2638/9425 [03:05<07:13, 15.66it/s]  
 28%|██▊       | 2639/9425 [03:07<07:13, 15.66it/s]

{'eval_loss': 0.458030104637146, 'eval_f1': 0.7261365711305112, 'eval_precision': 0.7433564861567321, 'eval_recall': 0.7127890704418941, 'eval_runtime': 1.4484, 'eval_samples_per_second': 260.285, 'eval_steps_per_second': 65.589, 'epoch': 7.0}


 32%|███▏      | 3016/9425 [03:32<06:27, 16.53it/s]
 32%|███▏      | 3016/9425 [03:34<06:27, 16.53it/s]

{'eval_loss': 0.5884878039360046, 'eval_f1': 0.6671480935578681, 'eval_precision': 0.7196006229542815, 'eval_recall': 0.6586312790661971, 'eval_runtime': 1.5359, 'eval_samples_per_second': 245.459, 'eval_steps_per_second': 61.853, 'epoch': 8.0}


 36%|███▌      | 3392/9425 [03:59<06:33, 15.33it/s]
 36%|███▌      | 3393/9425 [04:01<06:33, 15.33it/s]

{'eval_loss': 0.5411632061004639, 'eval_f1': 0.6981563715328676, 'eval_precision': 0.7416814420168436, 'eval_recall': 0.6695801482254458, 'eval_runtime': 1.3941, 'eval_samples_per_second': 270.426, 'eval_steps_per_second': 68.144, 'epoch': 9.0}


 40%|████      | 3770/9425 [04:26<06:37, 14.22it/s]
 40%|████      | 3770/9425 [04:28<06:37, 14.22it/s]

{'eval_loss': 0.5336940884590149, 'eval_f1': 0.7133743507888384, 'eval_precision': 0.720290400000825, 'eval_recall': 0.7273773649960774, 'eval_runtime': 1.4844, 'eval_samples_per_second': 253.979, 'eval_steps_per_second': 64.0, 'epoch': 10.0}


 44%|████▍     | 4146/9425 [04:53<05:27, 16.12it/s]
 44%|████▍     | 4147/9425 [04:55<05:27, 16.12it/s]

{'eval_loss': 0.549667239189148, 'eval_f1': 0.701577091276413, 'eval_precision': 0.7180499144707635, 'eval_recall': 0.6976622046443098, 'eval_runtime': 1.4768, 'eval_samples_per_second': 255.283, 'eval_steps_per_second': 64.329, 'epoch': 11.0}


 48%|████▊     | 4524/9425 [05:21<05:36, 14.58it/s]
 48%|████▊     | 4524/9425 [05:22<05:36, 14.58it/s]

{'eval_loss': 0.5357143878936768, 'eval_f1': 0.7388139056657108, 'eval_precision': 0.7430609246983592, 'eval_recall': 0.737649650555195, 'eval_runtime': 1.4399, 'eval_samples_per_second': 261.829, 'eval_steps_per_second': 65.978, 'epoch': 12.0}


 52%|█████▏    | 4900/9425 [05:48<04:54, 15.35it/s]
 52%|█████▏    | 4901/9425 [05:49<04:54, 15.35it/s]

{'eval_loss': 0.5276204347610474, 'eval_f1': 0.7696324254713127, 'eval_precision': 0.7781108801108803, 'eval_recall': 0.7659977735008867, 'eval_runtime': 1.4775, 'eval_samples_per_second': 255.153, 'eval_steps_per_second': 64.296, 'epoch': 13.0}


 56%|█████▌    | 5278/9425 [06:15<04:21, 15.84it/s]
 56%|█████▌    | 5278/9425 [06:16<04:21, 15.84it/s]

{'eval_loss': 0.5548107624053955, 'eval_f1': 0.7571088679099397, 'eval_precision': 0.7714681397034339, 'eval_recall': 0.7437037598278055, 'eval_runtime': 1.4036, 'eval_samples_per_second': 268.592, 'eval_steps_per_second': 67.682, 'epoch': 14.0}


 60%|█████▉    | 5654/9425 [06:42<04:03, 15.47it/s]
 60%|██████    | 5655/9425 [06:44<04:03, 15.47it/s]

{'eval_loss': 0.5438772439956665, 'eval_f1': 0.7602889250601433, 'eval_precision': 0.7757671049589678, 'eval_recall': 0.7465465988520837, 'eval_runtime': 1.4431, 'eval_samples_per_second': 261.244, 'eval_steps_per_second': 65.831, 'epoch': 15.0}


 64%|██████▍   | 6032/9425 [07:10<03:44, 15.08it/s]
 64%|██████▍   | 6032/9425 [07:11<03:44, 15.08it/s]

{'eval_loss': 0.5755258202552795, 'eval_f1': 0.7622497423409126, 'eval_precision': 0.7648880983862252, 'eval_recall': 0.7669921776023892, 'eval_runtime': 1.5273, 'eval_samples_per_second': 246.835, 'eval_steps_per_second': 62.2, 'epoch': 16.0}


 68%|██████▊   | 6408/9425 [07:37<03:11, 15.72it/s]
 68%|██████▊   | 6409/9425 [07:38<03:11, 15.72it/s]

{'eval_loss': 0.6007586121559143, 'eval_f1': 0.7454233393711676, 'eval_precision': 0.7525796758661231, 'eval_recall': 0.7465551114198848, 'eval_runtime': 1.4174, 'eval_samples_per_second': 265.976, 'eval_steps_per_second': 67.023, 'epoch': 17.0}


 72%|███████▏  | 6786/9425 [08:04<02:49, 15.56it/s]
 72%|███████▏  | 6786/9425 [08:05<02:49, 15.56it/s]

{'eval_loss': 0.616004467010498, 'eval_f1': 0.7633913388602891, 'eval_precision': 0.7821701068390678, 'eval_recall': 0.7528711583866168, 'eval_runtime': 1.4329, 'eval_samples_per_second': 263.103, 'eval_steps_per_second': 66.299, 'epoch': 18.0}


 76%|███████▌  | 7162/9425 [08:31<02:31, 14.96it/s]
 76%|███████▌  | 7163/9425 [08:32<02:31, 14.96it/s]

{'eval_loss': 0.5770158171653748, 'eval_f1': 0.7714689579676358, 'eval_precision': 0.7830427114259757, 'eval_recall': 0.7619402572088154, 'eval_runtime': 1.3705, 'eval_samples_per_second': 275.083, 'eval_steps_per_second': 69.318, 'epoch': 19.0}


 80%|████████  | 7540/9425 [08:58<01:57, 16.02it/s]
 80%|████████  | 7540/9425 [08:59<01:57, 16.02it/s]

{'eval_loss': 0.6024102568626404, 'eval_f1': 0.764523376539936, 'eval_precision': 0.7774342119816574, 'eval_recall': 0.7565887345098457, 'eval_runtime': 1.4229, 'eval_samples_per_second': 264.957, 'eval_steps_per_second': 66.766, 'epoch': 20.0}


 84%|████████▍ | 7916/9425 [09:25<01:36, 15.72it/s]
 84%|████████▍ | 7917/9425 [09:27<01:35, 15.72it/s]

{'eval_loss': 0.5993843674659729, 'eval_f1': 0.7677580572562211, 'eval_precision': 0.7931648948929283, 'eval_recall': 0.7456877922734223, 'eval_runtime': 1.3555, 'eval_samples_per_second': 278.124, 'eval_steps_per_second': 70.084, 'epoch': 21.0}


 88%|████████▊ | 8294/9425 [09:54<01:11, 15.73it/s]

{'eval_loss': 0.618737518787384, 'eval_f1': 0.7600442919938711, 'eval_precision': 0.7749267399267399, 'eval_recall': 0.7500927522402697, 'eval_runtime': 1.349, 'eval_samples_per_second': 279.476, 'eval_steps_per_second': 70.425, 'epoch': 22.0}


 92%|█████████▏| 8670/9425 [10:20<00:47, 15.88it/s]
 92%|█████████▏| 8671/9425 [10:21<00:47, 15.88it/s]

{'eval_loss': 0.648483157157898, 'eval_f1': 0.7563123913863135, 'eval_precision': 0.7788117227304577, 'eval_recall': 0.7396692140497978, 'eval_runtime': 1.3889, 'eval_samples_per_second': 271.43, 'eval_steps_per_second': 68.397, 'epoch': 23.0}


 96%|█████████▌| 9048/9425 [10:47<00:23, 16.34it/s]
 96%|█████████▌| 9048/9425 [10:49<00:23, 16.34it/s]

{'eval_loss': 0.6379270553588867, 'eval_f1': 0.7660611995986047, 'eval_precision': 0.7775218342357524, 'eval_recall': 0.7571038887205166, 'eval_runtime': 1.431, 'eval_samples_per_second': 263.448, 'eval_steps_per_second': 66.386, 'epoch': 24.0}


100%|█████████▉| 9424/9425 [11:14<00:00, 16.00it/s]
100%|██████████| 9425/9425 [11:17<00:00, 16.00it/s]

{'eval_loss': 0.63825923204422, 'eval_f1': 0.7629977887254217, 'eval_precision': 0.7772642738947708, 'eval_recall': 0.7516244366657221, 'eval_runtime': 1.4025, 'eval_samples_per_second': 268.803, 'eval_steps_per_second': 67.735, 'epoch': 25.0}


100%|██████████| 9425/9425 [11:19<00:00, 13.86it/s]


{'train_runtime': 679.8332, 'train_samples_per_second': 55.418, 'train_steps_per_second': 13.864, 'train_loss': 0.07035020904136273, 'epoch': 25.0}


100%|██████████| 95/95 [00:01<00:00, 52.50it/s]


0,1
eval/f1,▁▄▆▇▇▇▇▆▇▇▇███████████████
eval/loss,▂▂▁▂▃▄▄▇▆▆▆▆▆▆▆▇▇▇▇▇▇▇███▇
eval/precision,▁▅█▇▇█▇▇▇▇▇▇███▇▇█████████
eval/recall,▁▄▆▇▇▇▇▇▇▇▇███████████████
eval/runtime,▂▂▃▂▄▃▂▄▂▃▃▂▃▂▂▄▂▂▁▂▁▁▂▂▂█
eval/samples_per_second,▆▇▅▆▅▅▆▅▇▆▆▆▆▇▆▅▇▆█▇██▇▆▇▁
eval/steps_per_second,▆▇▅▆▅▅▆▅▇▆▆▆▆▇▆▅▇▆█▇██▇▆▇▁
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████

0,1
eval/f1,0.77147
eval/loss,0.57702
eval/precision,0.78304
eval/recall,0.76194
eval/runtime,1.8378
eval/samples_per_second,205.141
eval/steps_per_second,51.693
total_flos,2478244004332800.0
train/epoch,25.0
train/global_step,9425.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 12076.93 examples/s]
  4%|▍         | 259/6500 [00:17<06:32, 15.89it/s]
  4%|▍         | 260/6500 [00:18<06:32, 15.89it/s]

{'eval_loss': 0.277275413274765, 'eval_f1': 0.18478664192949906, 'eval_precision': 0.25818392134181606, 'eval_recall': 0.15962820771873104, 'eval_runtime': 0.9209, 'eval_samples_per_second': 282.329, 'eval_steps_per_second': 70.582, 'epoch': 1.0}


  8%|▊         | 519/6500 [00:35<06:09, 16.20it/s]
  8%|▊         | 520/6500 [00:37<06:09, 16.20it/s]

{'eval_loss': 0.2425008863210678, 'eval_f1': 0.48525646318823284, 'eval_precision': 0.5986609472931662, 'eval_recall': 0.4279841030244213, 'eval_runtime': 1.0005, 'eval_samples_per_second': 259.877, 'eval_steps_per_second': 64.969, 'epoch': 2.0}


 12%|█▏        | 779/6500 [00:55<06:16, 15.21it/s]
 12%|█▏        | 780/6500 [00:56<06:15, 15.21it/s]

{'eval_loss': 0.24548006057739258, 'eval_f1': 0.5243971623875655, 'eval_precision': 0.5787239121105703, 'eval_recall': 0.4879062473059474, 'eval_runtime': 1.2987, 'eval_samples_per_second': 200.201, 'eval_steps_per_second': 50.05, 'epoch': 3.0}


 16%|█▌        | 1039/6500 [01:14<06:00, 15.15it/s]
 16%|█▌        | 1040/6500 [01:15<06:00, 15.15it/s]

{'eval_loss': 0.2689289450645447, 'eval_f1': 0.5153954155064444, 'eval_precision': 0.5752610343435094, 'eval_recall': 0.4941663768497652, 'eval_runtime': 0.987, 'eval_samples_per_second': 263.42, 'eval_steps_per_second': 65.855, 'epoch': 4.0}


 20%|█▉        | 1299/6500 [01:34<05:14, 16.54it/s]
 20%|██        | 1300/6500 [01:35<05:14, 16.54it/s]

{'eval_loss': 0.2495645433664322, 'eval_f1': 0.6773181200058251, 'eval_precision': 0.8034228912364527, 'eval_recall': 0.6176971394147893, 'eval_runtime': 1.1035, 'eval_samples_per_second': 235.606, 'eval_steps_per_second': 58.901, 'epoch': 5.0}


 24%|██▍       | 1559/6500 [01:53<05:19, 15.47it/s]
 24%|██▍       | 1560/6500 [01:54<05:19, 15.47it/s]

{'eval_loss': 0.2694318890571594, 'eval_f1': 0.6196998387099828, 'eval_precision': 0.7798145505447219, 'eval_recall': 0.5692477469517182, 'eval_runtime': 1.1272, 'eval_samples_per_second': 230.654, 'eval_steps_per_second': 57.664, 'epoch': 6.0}


 28%|██▊       | 1819/6500 [02:13<04:47, 16.26it/s]
 28%|██▊       | 1820/6500 [02:14<04:47, 16.26it/s]

{'eval_loss': 0.2867196798324585, 'eval_f1': 0.6761826513628721, 'eval_precision': 0.8089371980676329, 'eval_recall': 0.6186467002620457, 'eval_runtime': 1.0047, 'eval_samples_per_second': 258.787, 'eval_steps_per_second': 64.697, 'epoch': 7.0}


 32%|███▏      | 2079/6500 [02:32<04:38, 15.86it/s]
 32%|███▏      | 2080/6500 [02:33<04:38, 15.86it/s]

{'eval_loss': 0.29958415031433105, 'eval_f1': 0.6738998902439138, 'eval_precision': 0.7419916109799528, 'eval_recall': 0.629127814420037, 'eval_runtime': 1.0889, 'eval_samples_per_second': 238.771, 'eval_steps_per_second': 59.693, 'epoch': 8.0}


 36%|███▌      | 2339/6500 [02:51<04:24, 15.73it/s]
 36%|███▌      | 2340/6500 [02:52<04:24, 15.73it/s]

{'eval_loss': 0.2821155786514282, 'eval_f1': 0.6628065320841914, 'eval_precision': 0.764029581230354, 'eval_recall': 0.6077663326814432, 'eval_runtime': 0.9844, 'eval_samples_per_second': 264.128, 'eval_steps_per_second': 66.032, 'epoch': 9.0}


 40%|███▉      | 2599/6500 [03:10<04:25, 14.67it/s]
 40%|████      | 2600/6500 [03:12<04:25, 14.67it/s]

{'eval_loss': 0.30194002389907837, 'eval_f1': 0.6840953473975373, 'eval_precision': 0.7234872261373523, 'eval_recall': 0.6537357628625718, 'eval_runtime': 1.3228, 'eval_samples_per_second': 196.548, 'eval_steps_per_second': 49.137, 'epoch': 10.0}


 44%|████▍     | 2859/6500 [03:30<03:52, 15.63it/s]
 44%|████▍     | 2860/6500 [03:31<03:52, 15.63it/s]

{'eval_loss': 0.2916073203086853, 'eval_f1': 0.6972364528333216, 'eval_precision': 0.756381202305325, 'eval_recall': 0.6575441206163262, 'eval_runtime': 0.9263, 'eval_samples_per_second': 280.674, 'eval_steps_per_second': 70.169, 'epoch': 11.0}


 48%|████▊     | 3119/6500 [03:49<03:39, 15.40it/s]
 48%|████▊     | 3120/6500 [03:50<03:39, 15.40it/s]

{'eval_loss': 0.28918522596359253, 'eval_f1': 0.7224720742525322, 'eval_precision': 0.7706653365867214, 'eval_recall': 0.68547783585178, 'eval_runtime': 1.053, 'eval_samples_per_second': 246.921, 'eval_steps_per_second': 61.73, 'epoch': 12.0}


 52%|█████▏    | 3379/6500 [04:08<03:24, 15.28it/s]
 52%|█████▏    | 3380/6500 [04:09<03:24, 15.28it/s]

{'eval_loss': 0.3139324486255646, 'eval_f1': 0.7041422583230627, 'eval_precision': 0.7112771872314322, 'eval_recall': 0.7043394524108579, 'eval_runtime': 1.005, 'eval_samples_per_second': 258.707, 'eval_steps_per_second': 64.677, 'epoch': 13.0}


 56%|█████▌    | 3639/6500 [04:27<02:54, 16.38it/s]
 56%|█████▌    | 3640/6500 [04:29<02:54, 16.38it/s]

{'eval_loss': 0.32728227972984314, 'eval_f1': 0.7107143130891761, 'eval_precision': 0.7478470723198029, 'eval_recall': 0.6805412527677797, 'eval_runtime': 1.07, 'eval_samples_per_second': 242.997, 'eval_steps_per_second': 60.749, 'epoch': 14.0}


 60%|█████▉    | 3899/6500 [04:47<02:54, 14.94it/s]
 60%|██████    | 3900/6500 [04:48<02:53, 14.94it/s]

{'eval_loss': 0.32696419954299927, 'eval_f1': 0.7087986705794417, 'eval_precision': 0.7705363928329801, 'eval_recall': 0.663276485474724, 'eval_runtime': 1.1028, 'eval_samples_per_second': 235.762, 'eval_steps_per_second': 58.94, 'epoch': 15.0}


 64%|██████▍   | 4159/6500 [05:06<02:22, 16.43it/s]
 64%|██████▍   | 4160/6500 [05:07<02:22, 16.43it/s]

{'eval_loss': 0.32695260643959045, 'eval_f1': 0.7188298735267405, 'eval_precision': 0.745838401883112, 'eval_recall': 0.6988291223710664, 'eval_runtime': 1.055, 'eval_samples_per_second': 246.443, 'eval_steps_per_second': 61.611, 'epoch': 16.0}


 68%|██████▊   | 4419/6500 [05:25<02:14, 15.41it/s]
 68%|██████▊   | 4420/6500 [05:27<02:14, 15.41it/s]

{'eval_loss': 0.34260788559913635, 'eval_f1': 0.7196123072660928, 'eval_precision': 0.7625485689009287, 'eval_recall': 0.6874875948183982, 'eval_runtime': 1.0587, 'eval_samples_per_second': 245.595, 'eval_steps_per_second': 61.399, 'epoch': 17.0}


 72%|███████▏  | 4679/6500 [05:45<01:51, 16.32it/s]
 72%|███████▏  | 4680/6500 [05:46<01:51, 16.32it/s]

{'eval_loss': 0.3576156497001648, 'eval_f1': 0.7148092667880216, 'eval_precision': 0.7440079365079365, 'eval_recall': 0.6925140418834259, 'eval_runtime': 1.0209, 'eval_samples_per_second': 254.669, 'eval_steps_per_second': 63.667, 'epoch': 18.0}


 76%|███████▌  | 4939/6500 [06:04<01:44, 14.99it/s]
 76%|███████▌  | 4940/6500 [06:06<01:44, 14.99it/s]

{'eval_loss': 0.3567768335342407, 'eval_f1': 0.7218582606525606, 'eval_precision': 0.7544818250147285, 'eval_recall': 0.6967856118975702, 'eval_runtime': 1.0995, 'eval_samples_per_second': 236.48, 'eval_steps_per_second': 59.12, 'epoch': 19.0}


 80%|███████▉  | 5199/6500 [06:24<01:21, 15.92it/s]
 80%|████████  | 5200/6500 [06:25<01:21, 15.92it/s]

{'eval_loss': 0.361420214176178, 'eval_f1': 0.723175765130262, 'eval_precision': 0.7630650500669477, 'eval_recall': 0.6910713261832845, 'eval_runtime': 1.0831, 'eval_samples_per_second': 240.059, 'eval_steps_per_second': 60.015, 'epoch': 20.0}


 84%|████████▍ | 5459/6500 [06:43<01:07, 15.52it/s]
 84%|████████▍ | 5460/6500 [06:44<01:07, 15.52it/s]

{'eval_loss': 0.3494110405445099, 'eval_f1': 0.7282886568472327, 'eval_precision': 0.7630286501575753, 'eval_recall': 0.7001869724417878, 'eval_runtime': 1.0552, 'eval_samples_per_second': 246.397, 'eval_steps_per_second': 61.599, 'epoch': 21.0}


 88%|████████▊ | 5719/6500 [07:02<00:49, 15.68it/s]
 88%|████████▊ | 5720/6500 [07:03<00:49, 15.68it/s]

{'eval_loss': 0.3578433692455292, 'eval_f1': 0.7269203380534618, 'eval_precision': 0.7577110707116427, 'eval_recall': 0.7032264861195994, 'eval_runtime': 0.9358, 'eval_samples_per_second': 277.837, 'eval_steps_per_second': 69.459, 'epoch': 22.0}


 92%|█████████▏| 5979/6500 [07:21<00:33, 15.78it/s]
 92%|█████████▏| 5980/6500 [07:22<00:32, 15.78it/s]

{'eval_loss': 0.35889434814453125, 'eval_f1': 0.72579798737054, 'eval_precision': 0.7535274249559963, 'eval_recall': 0.7032264861195994, 'eval_runtime': 0.9992, 'eval_samples_per_second': 260.215, 'eval_steps_per_second': 65.054, 'epoch': 23.0}


 96%|█████████▌| 6239/6500 [07:41<00:17, 14.90it/s]
 96%|█████████▌| 6240/6500 [07:42<00:17, 14.90it/s]

{'eval_loss': 0.36400875449180603, 'eval_f1': 0.7291466772635122, 'eval_precision': 0.7643827578680239, 'eval_recall': 0.7018120589625979, 'eval_runtime': 1.0762, 'eval_samples_per_second': 241.596, 'eval_steps_per_second': 60.399, 'epoch': 24.0}


100%|█████████▉| 6499/6500 [08:00<00:00, 16.51it/s]
100%|██████████| 6500/6500 [08:02<00:00, 16.51it/s]

{'eval_loss': 0.3650988042354584, 'eval_f1': 0.7241511051471228, 'eval_precision': 0.7557561188066023, 'eval_recall': 0.6987725452847864, 'eval_runtime': 1.1512, 'eval_samples_per_second': 225.857, 'eval_steps_per_second': 56.464, 'epoch': 25.0}


100%|██████████| 6500/6500 [08:04<00:00, 13.41it/s]


{'train_runtime': 484.7114, 'train_samples_per_second': 53.537, 'train_steps_per_second': 13.41, 'train_loss': 0.0496825937124399, 'epoch': 25.0}


100%|██████████| 65/65 [00:01<00:00, 59.81it/s]


------------------ Starting model ==> epochs: 25, batch size: 8, weights of decay: 0.01 ---------------------


0,1
eval/f1,▁▅▅▅▇▇▇▇▇▇████████████████
eval/loss,▃▁▁▃▁▃▄▄▃▄▄▄▅▆▆▆▇███▇█████
eval/precision,▁▅▅▅███▇▇▇▇█▇▇█▇▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▄▅▅▇▆▇▇▇▇▇███▇███████████
eval/runtime,▁▂█▂▄▅▂▄▂█▁▃▂▄▄▃▃▃▄▄▃▁▂▄▅▄
eval/samples_per_second,█▆▁▆▄▄▆▄▇▁█▅▆▅▄▅▅▆▄▅▅█▆▅▃▄
eval/steps_per_second,█▆▁▆▄▄▆▄▇▁█▅▆▅▄▅▅▆▄▅▅█▆▅▃▄
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████

0,1
eval/f1,0.72915
eval/loss,0.36401
eval/precision,0.76438
eval/recall,0.70181
eval/runtime,1.1147
eval/samples_per_second,233.257
eval/steps_per_second,58.314
total_flos,1707009600960000.0
train/epoch,25.0
train/global_step,6500.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20743.69 examples/s]
  4%|▍         | 761/19050 [00:57<23:25, 13.01it/s]
  4%|▍         | 762/19050 [01:01<23:25, 13.01it/s]

{'eval_loss': 0.1031247079372406, 'eval_f1': 0.7005717129736123, 'eval_precision': 0.90183869570986, 'eval_recall': 0.67398688339211, 'eval_runtime': 3.7477, 'eval_samples_per_second': 406.387, 'eval_steps_per_second': 50.965, 'epoch': 1.0}


  8%|▊         | 1523/19050 [01:59<22:18, 13.09it/s] 
  8%|▊         | 1524/19050 [02:03<22:18, 13.09it/s]

{'eval_loss': 0.10045843571424484, 'eval_f1': 0.7553639921425718, 'eval_precision': 0.8537088233296825, 'eval_recall': 0.7236335973081929, 'eval_runtime': 3.8314, 'eval_samples_per_second': 397.503, 'eval_steps_per_second': 49.851, 'epoch': 2.0}


 12%|█▏        | 2285/19050 [03:02<21:43, 12.87it/s]  
 12%|█▏        | 2286/19050 [03:06<21:43, 12.87it/s]

{'eval_loss': 0.09915340691804886, 'eval_f1': 0.8241029635220137, 'eval_precision': 0.8724410648937544, 'eval_recall': 0.7889989849972928, 'eval_runtime': 3.8178, 'eval_samples_per_second': 398.921, 'eval_steps_per_second': 50.029, 'epoch': 3.0}


 16%|█▌        | 3047/19050 [04:05<19:51, 13.43it/s]  
 16%|█▌        | 3048/19050 [04:08<19:51, 13.43it/s]

{'eval_loss': 0.09309061616659164, 'eval_f1': 0.8586893896540023, 'eval_precision': 0.8859000182202555, 'eval_recall': 0.8374281226155157, 'eval_runtime': 3.711, 'eval_samples_per_second': 410.405, 'eval_steps_per_second': 51.469, 'epoch': 4.0}


 20%|█▉        | 3809/19050 [05:07<19:38, 12.93it/s]  
 20%|██        | 3810/19050 [05:11<19:38, 12.93it/s]

{'eval_loss': 0.11039870232343674, 'eval_f1': 0.8624339359175146, 'eval_precision': 0.8868699561476733, 'eval_recall': 0.8491336766013227, 'eval_runtime': 3.8516, 'eval_samples_per_second': 395.419, 'eval_steps_per_second': 49.59, 'epoch': 5.0}


 24%|██▍       | 4571/19050 [06:10<17:59, 13.41it/s]  
 24%|██▍       | 4572/19050 [06:14<17:59, 13.41it/s]

{'eval_loss': 0.09643596410751343, 'eval_f1': 0.8609724216653188, 'eval_precision': 0.8683764896073579, 'eval_recall': 0.8603468660503831, 'eval_runtime': 3.6928, 'eval_samples_per_second': 412.42, 'eval_steps_per_second': 51.722, 'epoch': 6.0}


 28%|██▊       | 5333/19050 [07:12<17:01, 13.43it/s]  
 28%|██▊       | 5334/19050 [07:16<17:01, 13.43it/s]

{'eval_loss': 0.10023246705532074, 'eval_f1': 0.8739583135961254, 'eval_precision': 0.8847157496996196, 'eval_recall': 0.8650168559481484, 'eval_runtime': 3.7912, 'eval_samples_per_second': 401.725, 'eval_steps_per_second': 50.38, 'epoch': 7.0}


 32%|███▏      | 6095/19050 [08:15<15:59, 13.50it/s]  
 32%|███▏      | 6096/19050 [08:19<15:59, 13.50it/s]

{'eval_loss': 0.11301874369382858, 'eval_f1': 0.8611917628424131, 'eval_precision': 0.9090465940024754, 'eval_recall': 0.8260994562131035, 'eval_runtime': 3.7497, 'eval_samples_per_second': 406.161, 'eval_steps_per_second': 50.937, 'epoch': 8.0}


 36%|███▌      | 6857/19050 [09:17<15:06, 13.46it/s]  
 36%|███▌      | 6858/19050 [09:21<15:05, 13.46it/s]

{'eval_loss': 0.1178220584988594, 'eval_f1': 0.8808568649326336, 'eval_precision': 0.8955854505302678, 'eval_recall': 0.8694678759348106, 'eval_runtime': 3.7442, 'eval_samples_per_second': 406.765, 'eval_steps_per_second': 51.013, 'epoch': 9.0}


 40%|███▉      | 7619/19050 [10:20<14:09, 13.46it/s]  
 40%|████      | 7620/19050 [10:24<14:09, 13.46it/s]

{'eval_loss': 0.12684401869773865, 'eval_f1': 0.8603504842417352, 'eval_precision': 0.8770954844736144, 'eval_recall': 0.8456594927606217, 'eval_runtime': 3.7264, 'eval_samples_per_second': 408.704, 'eval_steps_per_second': 51.256, 'epoch': 10.0}


 44%|████▍     | 8381/19050 [11:22<13:11, 13.48it/s]  
 44%|████▍     | 8382/19050 [11:26<13:11, 13.48it/s]

{'eval_loss': 0.12427019327878952, 'eval_f1': 0.8742885861248322, 'eval_precision': 0.8922528135733864, 'eval_recall': 0.8631407258101873, 'eval_runtime': 3.6843, 'eval_samples_per_second': 413.378, 'eval_steps_per_second': 51.842, 'epoch': 11.0}


 48%|████▊     | 9143/19050 [12:25<12:33, 13.15it/s]  
 48%|████▊     | 9144/19050 [12:29<12:33, 13.15it/s]

{'eval_loss': 0.12834841012954712, 'eval_f1': 0.8690010693508999, 'eval_precision': 0.9008343993761524, 'eval_recall': 0.8512352699822653, 'eval_runtime': 3.7225, 'eval_samples_per_second': 409.138, 'eval_steps_per_second': 51.31, 'epoch': 12.0}


 52%|█████▏    | 9905/19050 [13:27<11:16, 13.53it/s]  
 52%|█████▏    | 9906/19050 [13:31<11:15, 13.53it/s]

{'eval_loss': 0.13615407049655914, 'eval_f1': 0.8594099042790776, 'eval_precision': 0.8778453625938115, 'eval_recall': 0.8440948982860028, 'eval_runtime': 3.6859, 'eval_samples_per_second': 413.193, 'eval_steps_per_second': 51.819, 'epoch': 13.0}


 52%|█████▏    | 10001/19050 [13:40<11:37, 12.97it/s] 

{'loss': 0.0351, 'grad_norm': 0.009500537998974323, 'learning_rate': 2.3753280839895015e-05, 'epoch': 13.12}


 56%|█████▌    | 10667/19050 [14:30<10:39, 13.12it/s]
 56%|█████▌    | 10668/19050 [14:33<10:39, 13.12it/s]

{'eval_loss': 0.13836096227169037, 'eval_f1': 0.8621467924640015, 'eval_precision': 0.8980887774548417, 'eval_recall': 0.8373359346418593, 'eval_runtime': 3.7257, 'eval_samples_per_second': 408.78, 'eval_steps_per_second': 51.265, 'epoch': 14.0}


 60%|█████▉    | 11429/19050 [15:32<09:25, 13.48it/s]  
 60%|██████    | 11430/19050 [15:36<09:25, 13.48it/s]

{'eval_loss': 0.13634946942329407, 'eval_f1': 0.8718282413493107, 'eval_precision': 0.8905957907810153, 'eval_recall': 0.8559027896450738, 'eval_runtime': 3.6765, 'eval_samples_per_second': 414.256, 'eval_steps_per_second': 51.952, 'epoch': 15.0}


 64%|██████▍   | 12191/19050 [16:35<08:46, 13.03it/s]  
 64%|██████▍   | 12192/19050 [16:39<08:46, 13.03it/s]

{'eval_loss': 0.13937628269195557, 'eval_f1': 0.8559465703175257, 'eval_precision': 0.854711368773603, 'eval_recall': 0.8605126832998302, 'eval_runtime': 3.8154, 'eval_samples_per_second': 399.175, 'eval_steps_per_second': 50.061, 'epoch': 16.0}


 68%|██████▊   | 12953/19050 [17:37<07:33, 13.44it/s]  
 68%|██████▊   | 12954/19050 [17:41<07:33, 13.44it/s]

{'eval_loss': 0.15058469772338867, 'eval_f1': 0.8601655584033917, 'eval_precision': 0.8553677809022926, 'eval_recall': 0.8682997355593167, 'eval_runtime': 3.6843, 'eval_samples_per_second': 413.375, 'eval_steps_per_second': 51.841, 'epoch': 17.0}


 72%|███████▏  | 13715/19050 [18:39<06:50, 12.99it/s]  
 72%|███████▏  | 13716/19050 [18:43<06:50, 12.99it/s]

{'eval_loss': 0.1486213505268097, 'eval_f1': 0.8701350833876464, 'eval_precision': 0.8963746359918873, 'eval_recall': 0.8505641914272185, 'eval_runtime': 3.8294, 'eval_samples_per_second': 397.709, 'eval_steps_per_second': 49.877, 'epoch': 18.0}


 76%|███████▌  | 14477/19050 [19:42<05:39, 13.46it/s]  
 76%|███████▌  | 14478/19050 [19:46<05:39, 13.46it/s]

{'eval_loss': 0.14237476885318756, 'eval_f1': 0.8703290068934262, 'eval_precision': 0.8864290578594031, 'eval_recall': 0.856999992292187, 'eval_runtime': 3.7109, 'eval_samples_per_second': 410.407, 'eval_steps_per_second': 51.469, 'epoch': 19.0}


 80%|███████▉  | 15239/19050 [20:44<04:42, 13.48it/s]  
 80%|████████  | 15240/19050 [20:48<04:42, 13.48it/s]

{'eval_loss': 0.14973445236682892, 'eval_f1': 0.8678863502238979, 'eval_precision': 0.8641576934128381, 'eval_recall': 0.8742995626656186, 'eval_runtime': 3.8129, 'eval_samples_per_second': 399.438, 'eval_steps_per_second': 50.094, 'epoch': 20.0}


 84%|████████▍ | 16002/19050 [21:47<03:39, 13.88it/s]
 84%|████████▍ | 16002/19050 [21:51<03:39, 13.88it/s]

{'eval_loss': 0.15280073881149292, 'eval_f1': 0.8646682639504863, 'eval_precision': 0.8683154475622767, 'eval_recall': 0.8631395279780951, 'eval_runtime': 3.6981, 'eval_samples_per_second': 411.83, 'eval_steps_per_second': 51.648, 'epoch': 21.0}


 88%|████████▊ | 16764/19050 [22:49<02:41, 14.11it/s]
 88%|████████▊ | 16764/19050 [22:53<02:41, 14.11it/s]

{'eval_loss': 0.1588459610939026, 'eval_f1': 0.8643047797558365, 'eval_precision': 0.8657126278502282, 'eval_recall': 0.8654547097252324, 'eval_runtime': 3.7168, 'eval_samples_per_second': 409.765, 'eval_steps_per_second': 51.389, 'epoch': 22.0}


 92%|█████████▏| 17526/19050 [23:52<01:49, 13.93it/s]
 92%|█████████▏| 17526/19050 [23:55<01:49, 13.93it/s]

{'eval_loss': 0.15663333237171173, 'eval_f1': 0.8664290109873717, 'eval_precision': 0.8628214008922869, 'eval_recall': 0.8713653566373603, 'eval_runtime': 3.7016, 'eval_samples_per_second': 411.449, 'eval_steps_per_second': 51.6, 'epoch': 23.0}


 96%|█████████▌| 18288/19050 [24:54<00:54, 13.91it/s]
 96%|█████████▌| 18288/19050 [24:58<00:54, 13.91it/s]

{'eval_loss': 0.16101029515266418, 'eval_f1': 0.867137061795945, 'eval_precision': 0.8698300675365355, 'eval_recall': 0.8658611311842856, 'eval_runtime': 3.6969, 'eval_samples_per_second': 411.963, 'eval_steps_per_second': 51.664, 'epoch': 24.0}


100%|██████████| 19050/19050 [25:56<00:00, 13.89it/s]
100%|██████████| 19050/19050 [26:02<00:00, 13.89it/s]

{'eval_loss': 0.15975473821163177, 'eval_f1': 0.866438682727055, 'eval_precision': 0.8736850410364924, 'eval_recall': 0.861488164097629, 'eval_runtime': 3.7282, 'eval_samples_per_second': 408.512, 'eval_steps_per_second': 51.232, 'epoch': 25.0}


100%|██████████| 19050/19050 [26:04<00:00, 12.18it/s]


{'train_runtime': 1564.1183, 'train_samples_per_second': 97.355, 'train_steps_per_second': 12.179, 'train_loss': 0.01948837210187136, 'epoch': 25.0}


100%|██████████| 191/191 [00:03<00:00, 51.31it/s]


0,1
eval/f1,▁▃▆▇▇▇█▇█▇██▇▇█▇▇██▇▇▇▇▇▇█
eval/loss,▂▂▂▁▃▁▂▃▄▄▄▅▅▆▅▆▇▇▆▇▇████▄
eval/precision,▇▁▃▅▅▃▅█▆▄▆▇▄▇▆▁▁▆▅▂▃▃▂▃▄▆
eval/recall,▁▃▅▇▇██▆█▇█▇▇▇▇██▇▇███████
eval/runtime,▄▇▇▂█▂▆▄▄▃▁▃▁▃▁▇▁▇▂▆▂▃▂▂▃▄
eval/samples_per_second,▅▂▂▇▁▇▃▅▅▆█▆█▆█▂█▂▇▂▇▆▇▇▆▄
eval/steps_per_second,▅▂▂▇▁▇▃▅▅▆█▆█▆█▂█▂▇▂▇▆▇▇▆▄
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇████
train/grad_norm,▁

0,1
eval/f1,0.88086
eval/loss,0.11782
eval/precision,0.89559
eval/recall,0.86947
eval/runtime,3.7627
eval/samples_per_second,404.757
eval/steps_per_second,50.761
total_flos,1.001675865072e+16
train/epoch,25.0
train/global_step,19050.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16043.67 examples/s]
  4%|▍         | 189/4725 [00:13<05:21, 14.13it/s]
  4%|▍         | 189/4725 [00:14<05:21, 14.13it/s]

{'eval_loss': 0.33860665559768677, 'eval_f1': 0.361503670838751, 'eval_precision': 0.5501350135013501, 'eval_recall': 0.3031046319807217, 'eval_runtime': 0.9763, 'eval_samples_per_second': 386.143, 'eval_steps_per_second': 49.164, 'epoch': 1.0}


  8%|▊         | 377/4725 [00:29<05:13, 13.85it/s]
  8%|▊         | 378/4725 [00:30<05:13, 13.85it/s]

{'eval_loss': 0.2918270528316498, 'eval_f1': 0.5708080596207659, 'eval_precision': 0.6437523906668366, 'eval_recall': 0.5286732020123108, 'eval_runtime': 0.9233, 'eval_samples_per_second': 408.313, 'eval_steps_per_second': 51.987, 'epoch': 2.0}


 12%|█▏        | 566/4725 [00:46<05:02, 13.77it/s]
 12%|█▏        | 567/4725 [00:47<05:02, 13.77it/s]

{'eval_loss': 0.27048635482788086, 'eval_f1': 0.6620546077662836, 'eval_precision': 0.8397455176402545, 'eval_recall': 0.5812226618653604, 'eval_runtime': 0.9127, 'eval_samples_per_second': 413.038, 'eval_steps_per_second': 52.588, 'epoch': 3.0}


 16%|█▌        | 756/4725 [01:03<04:38, 14.24it/s]
 16%|█▌        | 756/4725 [01:04<04:38, 14.24it/s]

{'eval_loss': 0.3160361647605896, 'eval_f1': 0.7049755989000677, 'eval_precision': 0.7447183409904203, 'eval_recall': 0.6803696670611501, 'eval_runtime': 0.9016, 'eval_samples_per_second': 418.127, 'eval_steps_per_second': 53.236, 'epoch': 4.0}


 20%|█▉        | 944/4725 [01:19<04:35, 13.71it/s]
 20%|██        | 945/4725 [01:20<04:35, 13.71it/s]

{'eval_loss': 0.34531038999557495, 'eval_f1': 0.7389574949717111, 'eval_precision': 0.7795071824857669, 'eval_recall': 0.7097480673711114, 'eval_runtime': 0.91, 'eval_samples_per_second': 414.305, 'eval_steps_per_second': 52.75, 'epoch': 5.0}


 24%|██▍       | 1134/4725 [01:36<04:19, 13.83it/s]
 24%|██▍       | 1134/4725 [01:37<04:19, 13.83it/s]

{'eval_loss': 0.34012070298194885, 'eval_f1': 0.7384643546194212, 'eval_precision': 0.7526709977263002, 'eval_recall': 0.7411063355490504, 'eval_runtime': 0.9217, 'eval_samples_per_second': 409.009, 'eval_steps_per_second': 52.075, 'epoch': 6.0}


 28%|██▊       | 1322/4725 [01:52<04:09, 13.63it/s]
 28%|██▊       | 1323/4725 [01:53<04:09, 13.63it/s]

{'eval_loss': 0.3891563415527344, 'eval_f1': 0.7305292919575385, 'eval_precision': 0.7476926665471473, 'eval_recall': 0.7196143156931505, 'eval_runtime': 0.9192, 'eval_samples_per_second': 410.161, 'eval_steps_per_second': 52.222, 'epoch': 7.0}


 32%|███▏      | 1512/4725 [02:09<03:50, 13.93it/s]
 32%|███▏      | 1512/4725 [02:10<03:50, 13.93it/s]

{'eval_loss': 0.38974758982658386, 'eval_f1': 0.75547684906476, 'eval_precision': 0.766229980058892, 'eval_recall': 0.7499195381567194, 'eval_runtime': 0.9257, 'eval_samples_per_second': 407.248, 'eval_steps_per_second': 51.851, 'epoch': 8.0}


 36%|███▌      | 1700/4725 [02:25<03:44, 13.45it/s]
 36%|███▌      | 1701/4725 [02:26<03:44, 13.45it/s]

{'eval_loss': 0.38887348771095276, 'eval_f1': 0.7755820593215949, 'eval_precision': 0.8068733153638815, 'eval_recall': 0.7552229317689007, 'eval_runtime': 0.9523, 'eval_samples_per_second': 395.885, 'eval_steps_per_second': 50.404, 'epoch': 9.0}


 40%|████      | 1890/4725 [02:42<03:22, 13.99it/s]
 40%|████      | 1890/4725 [02:43<03:22, 13.99it/s]

{'eval_loss': 0.436879962682724, 'eval_f1': 0.7472199742134504, 'eval_precision': 0.7479411764405729, 'eval_recall': 0.759591242632149, 'eval_runtime': 0.9149, 'eval_samples_per_second': 412.075, 'eval_steps_per_second': 52.466, 'epoch': 10.0}


 44%|████▍     | 2078/4725 [02:58<03:17, 13.43it/s]
 44%|████▍     | 2079/4725 [02:59<03:16, 13.43it/s]

{'eval_loss': 0.4148733615875244, 'eval_f1': 0.7493215024703067, 'eval_precision': 0.7612270742776029, 'eval_recall': 0.7543709800135432, 'eval_runtime': 0.9434, 'eval_samples_per_second': 399.609, 'eval_steps_per_second': 50.879, 'epoch': 11.0}


 48%|████▊     | 2268/4725 [03:15<03:05, 13.26it/s]
 48%|████▊     | 2268/4725 [03:16<03:05, 13.26it/s]

{'eval_loss': 0.4558809995651245, 'eval_f1': 0.755635709431921, 'eval_precision': 0.7737927946343801, 'eval_recall': 0.7531694469192438, 'eval_runtime': 0.9632, 'eval_samples_per_second': 391.413, 'eval_steps_per_second': 49.835, 'epoch': 12.0}


 52%|█████▏    | 2456/4725 [03:31<02:48, 13.50it/s]
 52%|█████▏    | 2457/4725 [03:32<02:47, 13.50it/s]

{'eval_loss': 0.41290533542633057, 'eval_f1': 0.7821187171759822, 'eval_precision': 0.7912792867509848, 'eval_recall': 0.7748888994756393, 'eval_runtime': 0.9293, 'eval_samples_per_second': 405.666, 'eval_steps_per_second': 51.65, 'epoch': 13.0}


 56%|█████▌    | 2646/4725 [03:48<02:30, 13.79it/s]
 56%|█████▌    | 2646/4725 [03:49<02:30, 13.79it/s]

{'eval_loss': 0.4208346903324127, 'eval_f1': 0.778171071572524, 'eval_precision': 0.7908921782303611, 'eval_recall': 0.7666987023300306, 'eval_runtime': 0.9523, 'eval_samples_per_second': 395.884, 'eval_steps_per_second': 50.404, 'epoch': 14.0}


 60%|█████▉    | 2834/4725 [04:05<02:27, 12.81it/s]
 60%|██████    | 2835/4725 [04:06<02:27, 12.81it/s]

{'eval_loss': 0.43987101316452026, 'eval_f1': 0.7777414819579461, 'eval_precision': 0.7777795571989917, 'eval_recall': 0.7780908438124585, 'eval_runtime': 0.9967, 'eval_samples_per_second': 378.244, 'eval_steps_per_second': 48.158, 'epoch': 15.0}


 64%|██████▍   | 3024/4725 [04:21<02:01, 13.99it/s]
 64%|██████▍   | 3024/4725 [04:22<02:01, 13.99it/s]

{'eval_loss': 0.4414869546890259, 'eval_f1': 0.7837337287077222, 'eval_precision': 0.7927449708558578, 'eval_recall': 0.7772060717631674, 'eval_runtime': 0.946, 'eval_samples_per_second': 398.537, 'eval_steps_per_second': 50.742, 'epoch': 16.0}


 68%|██████▊   | 3212/4725 [04:38<01:53, 13.38it/s]
 68%|██████▊   | 3213/4725 [04:39<01:53, 13.38it/s]

{'eval_loss': 0.4427254796028137, 'eval_f1': 0.7813874650562487, 'eval_precision': 0.7939177440301322, 'eval_recall': 0.7703221368484783, 'eval_runtime': 0.9453, 'eval_samples_per_second': 398.819, 'eval_steps_per_second': 50.778, 'epoch': 17.0}


 72%|███████▏  | 3402/4725 [04:55<01:39, 13.25it/s]
 72%|███████▏  | 3402/4725 [04:56<01:39, 13.25it/s]

{'eval_loss': 0.46472108364105225, 'eval_f1': 0.7813988947289606, 'eval_precision': 0.7884005427534994, 'eval_recall': 0.7813453882825533, 'eval_runtime': 0.9674, 'eval_samples_per_second': 389.696, 'eval_steps_per_second': 49.616, 'epoch': 18.0}


 76%|███████▌  | 3590/4725 [05:11<01:24, 13.48it/s]
 76%|███████▌  | 3591/4725 [05:12<01:24, 13.48it/s]

{'eval_loss': 0.4626315236091614, 'eval_f1': 0.7816689094836822, 'eval_precision': 0.7910238539514702, 'eval_recall': 0.7751083282411455, 'eval_runtime': 0.9327, 'eval_samples_per_second': 404.193, 'eval_steps_per_second': 51.462, 'epoch': 19.0}


 80%|████████  | 3780/4725 [05:28<01:07, 13.99it/s]
 80%|████████  | 3780/4725 [05:29<01:07, 13.99it/s]

{'eval_loss': 0.46720343828201294, 'eval_f1': 0.7797940965841903, 'eval_precision': 0.7917430292840129, 'eval_recall': 0.7707812060879362, 'eval_runtime': 0.9392, 'eval_samples_per_second': 401.392, 'eval_steps_per_second': 51.106, 'epoch': 20.0}


 84%|████████▍ | 3968/4725 [05:46<00:59, 12.77it/s]
 84%|████████▍ | 3969/4725 [05:47<00:59, 12.77it/s]

{'eval_loss': 0.47045591473579407, 'eval_f1': 0.7768897066543634, 'eval_precision': 0.7862047119846972, 'eval_recall': 0.7695663577132525, 'eval_runtime': 0.9699, 'eval_samples_per_second': 388.692, 'eval_steps_per_second': 49.489, 'epoch': 21.0}


 88%|████████▊ | 4158/4725 [06:03<00:39, 14.19it/s]
 88%|████████▊ | 4158/4725 [06:04<00:39, 14.19it/s]

{'eval_loss': 0.47358715534210205, 'eval_f1': 0.7768897066543634, 'eval_precision': 0.7862047119846972, 'eval_recall': 0.7695663577132525, 'eval_runtime': 0.9178, 'eval_samples_per_second': 410.783, 'eval_steps_per_second': 52.301, 'epoch': 22.0}


 92%|█████████▏| 4346/4725 [06:19<00:28, 13.42it/s]
 92%|█████████▏| 4347/4725 [06:20<00:28, 13.42it/s]

{'eval_loss': 0.47604450583457947, 'eval_f1': 0.7800574886681104, 'eval_precision': 0.7869034194075425, 'eval_recall': 0.7753634591625279, 'eval_runtime': 0.9422, 'eval_samples_per_second': 400.107, 'eval_steps_per_second': 50.942, 'epoch': 23.0}


 96%|█████████▌| 4536/4725 [06:36<00:14, 13.27it/s]
 96%|█████████▌| 4536/4725 [06:37<00:14, 13.27it/s]

{'eval_loss': 0.4763515591621399, 'eval_f1': 0.7786145765422046, 'eval_precision': 0.7885011524544973, 'eval_recall': 0.7706791941521758, 'eval_runtime': 0.9748, 'eval_samples_per_second': 386.762, 'eval_steps_per_second': 49.243, 'epoch': 24.0}


100%|█████████▉| 4724/4725 [06:52<00:00, 13.51it/s]
100%|██████████| 4725/4725 [06:55<00:00, 13.51it/s]

{'eval_loss': 0.4767909348011017, 'eval_f1': 0.7796105161729409, 'eval_precision': 0.7886569156943726, 'eval_recall': 0.7723320867141594, 'eval_runtime': 0.9237, 'eval_samples_per_second': 408.129, 'eval_steps_per_second': 51.963, 'epoch': 25.0}


100%|██████████| 4725/4725 [06:57<00:00, 11.31it/s]


{'train_runtime': 417.8244, 'train_samples_per_second': 90.169, 'train_steps_per_second': 11.309, 'train_loss': 0.06020607277199074, 'epoch': 25.0}


100%|██████████| 48/48 [00:01<00:00, 45.03it/s]


0,1
eval/f1,▁▄▆▇▇▇▇██▇▇███████████████
eval/loss,▃▂▁▃▄▃▅▅▅▇▆▇▆▆▇▇▇████████▇
eval/precision,▁▃█▆▇▆▆▆▇▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▄▅▇▇▇▇███████████████████
eval/runtime,▃▂▁▁▁▂▂▂▃▁▂▃▂▃▄▂▂▃▂▂▃▂▂▃▂█
eval/samples_per_second,▅▇███▇▇▇▆▇▆▆▇▆▅▆▆▆▇▇▅▇▆▅▇▁
eval/steps_per_second,▅▇███▇▇▇▆▇▆▆▇▆▅▆▆▆▇▇▅▇▆▅▇▁
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████

0,1
eval/f1,0.78373
eval/loss,0.44149
eval/precision,0.79274
eval/recall,0.77721
eval/runtime,1.1219
eval/samples_per_second,336.035
eval/steps_per_second,42.784
total_flos,2478244004332800.0
train/epoch,25.0
train/global_step,4725.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 15562.97 examples/s]
  4%|▍         | 130/3250 [00:09<03:43, 13.98it/s]
  4%|▍         | 130/3250 [00:10<03:43, 13.98it/s]

{'eval_loss': 0.2954673767089844, 'eval_f1': 0.20051759834368532, 'eval_precision': 0.26745075540256263, 'eval_recall': 0.1667340203408096, 'eval_runtime': 0.6286, 'eval_samples_per_second': 413.609, 'eval_steps_per_second': 52.496, 'epoch': 1.0}


  8%|▊         | 260/3250 [00:21<03:32, 14.05it/s]
  8%|▊         | 260/3250 [00:21<03:32, 14.05it/s]

{'eval_loss': 0.23760104179382324, 'eval_f1': 0.46385804534645686, 'eval_precision': 0.6088634947436129, 'eval_recall': 0.3875520152513458, 'eval_runtime': 0.6291, 'eval_samples_per_second': 413.267, 'eval_steps_per_second': 52.453, 'epoch': 2.0}


 12%|█▏        | 390/3250 [00:33<03:23, 14.03it/s]
 12%|█▏        | 390/3250 [00:33<03:23, 14.03it/s]

{'eval_loss': 0.2228594273328781, 'eval_f1': 0.5206188705454535, 'eval_precision': 0.5837456463501197, 'eval_recall': 0.4735351635581777, 'eval_runtime': 0.6348, 'eval_samples_per_second': 409.607, 'eval_steps_per_second': 51.989, 'epoch': 3.0}


 16%|█▌        | 520/3250 [00:45<03:16, 13.91it/s]
 16%|█▌        | 520/3250 [00:45<03:16, 13.91it/s]

{'eval_loss': 0.24211876094341278, 'eval_f1': 0.5311161031892311, 'eval_precision': 0.5771976245972297, 'eval_recall': 0.4996417629284925, 'eval_runtime': 0.6286, 'eval_samples_per_second': 413.635, 'eval_steps_per_second': 52.5, 'epoch': 4.0}


 20%|██        | 650/3250 [00:57<03:11, 13.59it/s]
 20%|██        | 650/3250 [00:57<03:11, 13.59it/s]

{'eval_loss': 0.23447000980377197, 'eval_f1': 0.644985969426423, 'eval_precision': 0.7947384108282488, 'eval_recall': 0.5745050127198883, 'eval_runtime': 0.6434, 'eval_samples_per_second': 404.124, 'eval_steps_per_second': 51.293, 'epoch': 5.0}


 24%|██▍       | 780/3250 [01:08<03:04, 13.39it/s]
 24%|██▍       | 780/3250 [01:09<03:04, 13.39it/s]

{'eval_loss': 0.2537279427051544, 'eval_f1': 0.6379461626028545, 'eval_precision': 0.7569751676894533, 'eval_recall': 0.5750781766477947, 'eval_runtime': 0.6357, 'eval_samples_per_second': 408.995, 'eval_steps_per_second': 51.911, 'epoch': 6.0}


 28%|██▊       | 910/3250 [01:20<02:49, 13.82it/s]
 28%|██▊       | 910/3250 [01:21<02:49, 13.82it/s]

{'eval_loss': 0.23287852108478546, 'eval_f1': 0.661420325478972, 'eval_precision': 0.7680088702147525, 'eval_recall': 0.6008878405951982, 'eval_runtime': 0.6435, 'eval_samples_per_second': 404.057, 'eval_steps_per_second': 51.284, 'epoch': 7.0}


 32%|███▏      | 1040/3250 [01:32<02:40, 13.78it/s]
 32%|███▏      | 1040/3250 [01:33<02:40, 13.78it/s]

{'eval_loss': 0.24012769758701324, 'eval_f1': 0.6779045923698018, 'eval_precision': 0.7491151392467181, 'eval_recall': 0.6311496887513556, 'eval_runtime': 0.6507, 'eval_samples_per_second': 399.553, 'eval_steps_per_second': 50.712, 'epoch': 8.0}


 36%|███▌      | 1170/3250 [01:44<02:34, 13.48it/s]
 36%|███▌      | 1170/3250 [01:44<02:34, 13.48it/s]

{'eval_loss': 0.24302911758422852, 'eval_f1': 0.6696360316625397, 'eval_precision': 0.7519112356836214, 'eval_recall': 0.6218535233325422, 'eval_runtime': 0.6716, 'eval_samples_per_second': 387.131, 'eval_steps_per_second': 49.136, 'epoch': 9.0}


 40%|████      | 1300/3250 [01:55<02:22, 13.68it/s]
 40%|████      | 1300/3250 [01:56<02:22, 13.68it/s]

{'eval_loss': 0.2639148533344269, 'eval_f1': 0.6876198049037693, 'eval_precision': 0.7647181595892115, 'eval_recall': 0.6359089506546468, 'eval_runtime': 0.6473, 'eval_samples_per_second': 401.675, 'eval_steps_per_second': 50.982, 'epoch': 10.0}


 44%|████▍     | 1430/3250 [02:07<02:16, 13.32it/s]
 44%|████▍     | 1430/3250 [02:08<02:16, 13.32it/s]

{'eval_loss': 0.26957184076309204, 'eval_f1': 0.6987290971660813, 'eval_precision': 0.7448164023224556, 'eval_recall': 0.6653930961805187, 'eval_runtime': 0.6528, 'eval_samples_per_second': 398.309, 'eval_steps_per_second': 50.555, 'epoch': 11.0}


 48%|████▊     | 1560/3250 [02:19<02:03, 13.73it/s]
 48%|████▊     | 1560/3250 [02:20<02:03, 13.73it/s]

{'eval_loss': 0.2738127112388611, 'eval_f1': 0.7143168476121542, 'eval_precision': 0.7788900668779688, 'eval_recall': 0.6686081510315257, 'eval_runtime': 0.6562, 'eval_samples_per_second': 396.219, 'eval_steps_per_second': 50.289, 'epoch': 12.0}


 52%|█████▏    | 1689/3250 [02:32<01:59, 13.03it/s]
 52%|█████▏    | 1690/3250 [02:32<01:59, 13.03it/s]

{'eval_loss': 0.2750310003757477, 'eval_f1': 0.7065203959398533, 'eval_precision': 0.7340528626909701, 'eval_recall': 0.6882736038990008, 'eval_runtime': 0.6784, 'eval_samples_per_second': 383.263, 'eval_steps_per_second': 48.645, 'epoch': 13.0}


 56%|█████▌    | 1819/3250 [02:44<01:46, 13.40it/s]
 56%|█████▌    | 1820/3250 [02:44<01:46, 13.40it/s]

{'eval_loss': 0.28252702951431274, 'eval_f1': 0.7195934428689927, 'eval_precision': 0.7631277896265823, 'eval_recall': 0.6902246822026628, 'eval_runtime': 0.6389, 'eval_samples_per_second': 406.969, 'eval_steps_per_second': 51.654, 'epoch': 14.0}


 60%|█████▉    | 1949/3250 [02:56<01:36, 13.47it/s]
 60%|██████    | 1950/3250 [02:56<01:36, 13.47it/s]

{'eval_loss': 0.2834254503250122, 'eval_f1': 0.6953353457199525, 'eval_precision': 0.7494518401230224, 'eval_recall': 0.6559012068677216, 'eval_runtime': 0.6753, 'eval_samples_per_second': 385.002, 'eval_steps_per_second': 48.866, 'epoch': 15.0}


 64%|██████▍   | 2079/3250 [03:08<01:27, 13.34it/s]
 64%|██████▍   | 2080/3250 [03:08<01:27, 13.34it/s]

{'eval_loss': 0.30150124430656433, 'eval_f1': 0.6918030717441616, 'eval_precision': 0.7292098543836586, 'eval_recall': 0.6654381718538424, 'eval_runtime': 0.6541, 'eval_samples_per_second': 397.485, 'eval_steps_per_second': 50.45, 'epoch': 16.0}


 68%|██████▊   | 2209/3250 [03:20<01:18, 13.31it/s]
 68%|██████▊   | 2210/3250 [03:20<01:18, 13.31it/s]

{'eval_loss': 0.29844793677330017, 'eval_f1': 0.6970394975127367, 'eval_precision': 0.740812009863368, 'eval_recall': 0.6649015207071819, 'eval_runtime': 0.6496, 'eval_samples_per_second': 400.272, 'eval_steps_per_second': 50.804, 'epoch': 17.0}


 72%|███████▏  | 2339/3250 [03:32<01:08, 13.38it/s]
 72%|███████▏  | 2340/3250 [03:32<01:07, 13.38it/s]

{'eval_loss': 0.3065604269504547, 'eval_f1': 0.6917751320621914, 'eval_precision': 0.7326196104629895, 'eval_recall': 0.6661335770435146, 'eval_runtime': 0.6568, 'eval_samples_per_second': 395.871, 'eval_steps_per_second': 50.245, 'epoch': 18.0}


 76%|███████▌  | 2469/3250 [03:43<00:58, 13.29it/s]
 76%|███████▌  | 2470/3250 [03:44<00:58, 13.29it/s]

{'eval_loss': 0.30849742889404297, 'eval_f1': 0.7086174104736832, 'eval_precision': 0.7478714928400603, 'eval_recall': 0.6839708505667854, 'eval_runtime': 0.649, 'eval_samples_per_second': 400.594, 'eval_steps_per_second': 50.845, 'epoch': 19.0}


 80%|███████▉  | 2599/3250 [03:56<00:48, 13.40it/s]
 80%|████████  | 2600/3250 [03:56<00:48, 13.40it/s]

{'eval_loss': 0.30701562762260437, 'eval_f1': 0.7000228920411671, 'eval_precision': 0.754729825852136, 'eval_recall': 0.6647191498865131, 'eval_runtime': 0.6585, 'eval_samples_per_second': 394.821, 'eval_steps_per_second': 50.112, 'epoch': 20.0}


 84%|████████▍ | 2729/3250 [04:08<00:39, 13.32it/s]
 84%|████████▍ | 2730/3250 [04:08<00:39, 13.32it/s]

{'eval_loss': 0.3141945004463196, 'eval_f1': 0.694497910646979, 'eval_precision': 0.7393947709107282, 'eval_recall': 0.6633047227295118, 'eval_runtime': 0.6448, 'eval_samples_per_second': 403.238, 'eval_steps_per_second': 51.18, 'epoch': 21.0}


 88%|████████▊ | 2859/3250 [04:20<00:29, 13.40it/s]
 88%|████████▊ | 2860/3250 [04:20<00:29, 13.40it/s]

{'eval_loss': 0.31695497035980225, 'eval_f1': 0.6949222480278381, 'eval_precision': 0.7351288978948552, 'eval_recall': 0.6663442364073233, 'eval_runtime': 0.6537, 'eval_samples_per_second': 397.746, 'eval_steps_per_second': 50.483, 'epoch': 22.0}


 92%|█████████▏| 2989/3250 [04:32<00:19, 13.47it/s]
 92%|█████████▏| 2990/3250 [04:32<00:19, 13.47it/s]

{'eval_loss': 0.31742265820503235, 'eval_f1': 0.6987590045887416, 'eval_precision': 0.7426259681361722, 'eval_recall': 0.6705875178783275, 'eval_runtime': 0.6459, 'eval_samples_per_second': 402.554, 'eval_steps_per_second': 51.093, 'epoch': 23.0}


 96%|█████████▌| 3119/3250 [04:44<00:09, 13.35it/s]
 96%|█████████▌| 3120/3250 [04:44<00:09, 13.35it/s]

{'eval_loss': 0.3177088797092438, 'eval_f1': 0.7030646186869066, 'eval_precision': 0.7459047507892045, 'eval_recall': 0.6745613846527602, 'eval_runtime': 0.646, 'eval_samples_per_second': 402.462, 'eval_steps_per_second': 51.082, 'epoch': 24.0}


100%|█████████▉| 3249/3250 [04:56<00:00, 13.39it/s]
100%|██████████| 3250/3250 [04:59<00:00, 13.39it/s]

{'eval_loss': 0.3184692859649658, 'eval_f1': 0.7006084783360293, 'eval_precision': 0.7450134851920566, 'eval_recall': 0.6711600241085424, 'eval_runtime': 0.7676, 'eval_samples_per_second': 338.728, 'eval_steps_per_second': 42.992, 'epoch': 25.0}


100%|██████████| 3250/3250 [05:00<00:00, 10.80it/s]


{'train_runtime': 300.9085, 'train_samples_per_second': 86.239, 'train_steps_per_second': 10.801, 'train_loss': 0.05466150841346154, 'epoch': 25.0}


100%|██████████| 33/33 [00:00<00:00, 46.63it/s]


------------------ Starting model ==> epochs: 25, batch size: 8, weights of decay: 0.001 ---------------------


0,1
eval/f1,▁▅▅▅▇▇▇▇▇█████████████████
eval/loss,▆▂▁▂▂▃▂▂▂▄▄▅▅▅▅▇▇▇▇▇█████▅
eval/precision,▁▆▅▅█▇█▇▇█▇█▇█▇▇▇▇▇▇▇▇▇▇▇█
eval/recall,▁▄▅▅▆▆▇▇▇▇████████████████
eval/runtime,▁▁▁▁▂▁▂▂▃▂▂▂▄▂▃▂▂▂▂▃▂▂▂▂█▇
eval/samples_per_second,████▇█▇▇▆▇▇▆▅▇▅▆▇▆▇▆▇▇▇▇▁▂
eval/steps_per_second,████▇█▇▇▆▇▇▆▅▇▅▆▇▆▇▆▇▇▇▇▁▂
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████

0,1
eval/f1,0.71959
eval/loss,0.28253
eval/precision,0.76313
eval/recall,0.69022
eval/runtime,0.7477
eval/samples_per_second,347.755
eval/steps_per_second,44.138
total_flos,1707009600960000.0
train/epoch,25.0
train/global_step,3250.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20556.87 examples/s]
  4%|▍         | 762/19050 [00:56<22:14, 13.71it/s]
  4%|▍         | 762/19050 [01:00<22:14, 13.71it/s]

{'eval_loss': 0.10392778366804123, 'eval_f1': 0.6729081836871288, 'eval_precision': 0.7479769613359061, 'eval_recall': 0.6580637914830173, 'eval_runtime': 3.8036, 'eval_samples_per_second': 400.405, 'eval_steps_per_second': 50.215, 'epoch': 1.0}


  8%|▊         | 1524/19050 [01:59<21:18, 13.71it/s] 
  8%|▊         | 1524/19050 [02:03<21:18, 13.71it/s]

{'eval_loss': 0.09424516558647156, 'eval_f1': 0.7688956014188274, 'eval_precision': 0.8995762366274909, 'eval_recall': 0.7269112643648727, 'eval_runtime': 3.8694, 'eval_samples_per_second': 393.604, 'eval_steps_per_second': 49.362, 'epoch': 2.0}


 12%|█▏        | 2286/19050 [03:02<20:02, 13.94it/s]  
 12%|█▏        | 2286/19050 [03:06<20:02, 13.94it/s]

{'eval_loss': 0.0866229310631752, 'eval_f1': 0.8462195100261293, 'eval_precision': 0.8908812827489251, 'eval_recall': 0.8146858218277175, 'eval_runtime': 3.8554, 'eval_samples_per_second': 395.032, 'eval_steps_per_second': 49.541, 'epoch': 3.0}


 16%|█▌        | 3048/19050 [04:04<19:05, 13.97it/s]  
 16%|█▌        | 3048/19050 [04:08<19:05, 13.97it/s]

{'eval_loss': 0.08937394618988037, 'eval_f1': 0.8615492492086065, 'eval_precision': 0.8934780230643842, 'eval_recall': 0.8376628152102644, 'eval_runtime': 3.6992, 'eval_samples_per_second': 411.715, 'eval_steps_per_second': 51.633, 'epoch': 4.0}


 20%|██        | 3810/19050 [05:07<18:30, 13.72it/s]  
 20%|██        | 3810/19050 [05:11<18:30, 13.72it/s]

{'eval_loss': 0.102632075548172, 'eval_f1': 0.8540856079679069, 'eval_precision': 0.8475606207792081, 'eval_recall': 0.8653202461982726, 'eval_runtime': 3.8107, 'eval_samples_per_second': 399.667, 'eval_steps_per_second': 50.122, 'epoch': 5.0}


 24%|██▍       | 4571/19050 [06:10<18:00, 13.40it/s]  
 24%|██▍       | 4572/19050 [06:14<18:00, 13.40it/s]

{'eval_loss': 0.09654296189546585, 'eval_f1': 0.8645012887493673, 'eval_precision': 0.8811877221633482, 'eval_recall': 0.8541094510384744, 'eval_runtime': 3.7157, 'eval_samples_per_second': 409.881, 'eval_steps_per_second': 51.403, 'epoch': 6.0}


 28%|██▊       | 5334/19050 [07:14<16:22, 13.96it/s]  
 28%|██▊       | 5334/19050 [07:18<16:22, 13.96it/s]

{'eval_loss': 0.1257154643535614, 'eval_f1': 0.8310309643942878, 'eval_precision': 0.8721706445957171, 'eval_recall': 0.8005018180008084, 'eval_runtime': 3.7414, 'eval_samples_per_second': 407.062, 'eval_steps_per_second': 51.05, 'epoch': 7.0}


 32%|███▏      | 6096/19050 [08:17<15:43, 13.73it/s]  
 32%|███▏      | 6096/19050 [08:20<15:43, 13.73it/s]

{'eval_loss': 0.11588722467422485, 'eval_f1': 0.8533425925102305, 'eval_precision': 0.8453092753104021, 'eval_recall': 0.8630424309827421, 'eval_runtime': 3.7131, 'eval_samples_per_second': 410.173, 'eval_steps_per_second': 51.44, 'epoch': 8.0}


 36%|███▌      | 6858/19050 [09:19<14:50, 13.69it/s]  
 36%|███▌      | 6858/19050 [09:23<14:50, 13.69it/s]

{'eval_loss': 0.11362927407026291, 'eval_f1': 0.8707743107678422, 'eval_precision': 0.8698881683107932, 'eval_recall': 0.8747728447091945, 'eval_runtime': 3.6894, 'eval_samples_per_second': 412.8, 'eval_steps_per_second': 51.769, 'epoch': 9.0}


 40%|████      | 7620/19050 [10:22<13:44, 13.86it/s]  
 40%|████      | 7620/19050 [10:26<13:44, 13.86it/s]

{'eval_loss': 0.12544740736484528, 'eval_f1': 0.8721912593996402, 'eval_precision': 0.8974532589260544, 'eval_recall': 0.8626599153434144, 'eval_runtime': 3.7171, 'eval_samples_per_second': 409.73, 'eval_steps_per_second': 51.384, 'epoch': 10.0}


 44%|████▍     | 8382/19050 [11:25<12:51, 13.83it/s]  
 44%|████▍     | 8382/19050 [11:29<12:51, 13.83it/s]

{'eval_loss': 0.12594832479953766, 'eval_f1': 0.8572037601378383, 'eval_precision': 0.8609228518554851, 'eval_recall': 0.8545078498273506, 'eval_runtime': 3.6766, 'eval_samples_per_second': 414.24, 'eval_steps_per_second': 51.95, 'epoch': 11.0}


 48%|████▊     | 9144/19050 [12:28<12:27, 13.25it/s]  
 48%|████▊     | 9144/19050 [12:32<12:27, 13.25it/s]

{'eval_loss': 0.1361003816127777, 'eval_f1': 0.8655413486851918, 'eval_precision': 0.8741255412824699, 'eval_recall': 0.8609215296214124, 'eval_runtime': 3.7163, 'eval_samples_per_second': 409.815, 'eval_steps_per_second': 51.395, 'epoch': 12.0}


 52%|█████▏    | 9906/19050 [13:31<10:57, 13.91it/s]  
 52%|█████▏    | 9906/19050 [13:35<10:57, 13.91it/s]

{'eval_loss': 0.12838101387023926, 'eval_f1': 0.8620974821302096, 'eval_precision': 0.867092121826018, 'eval_recall': 0.8599738247239352, 'eval_runtime': 3.7086, 'eval_samples_per_second': 410.667, 'eval_steps_per_second': 51.502, 'epoch': 13.0}


 53%|█████▎    | 10002/19050 [13:44<11:38, 12.94it/s] 

{'loss': 0.0354, 'grad_norm': 0.03513708710670471, 'learning_rate': 2.3753280839895015e-05, 'epoch': 13.12}


 56%|█████▌    | 10668/19050 [14:34<10:35, 13.19it/s]
 56%|█████▌    | 10668/19050 [14:38<10:35, 13.19it/s]

{'eval_loss': 0.12462300807237625, 'eval_f1': 0.8738338273463769, 'eval_precision': 0.8814835283186101, 'eval_recall': 0.8696720287609274, 'eval_runtime': 3.7618, 'eval_samples_per_second': 404.862, 'eval_steps_per_second': 50.774, 'epoch': 14.0}


 60%|██████    | 11430/19050 [15:37<09:46, 13.00it/s]  
 60%|██████    | 11430/19050 [15:41<09:46, 13.00it/s]

{'eval_loss': 0.14071008563041687, 'eval_f1': 0.857808558361853, 'eval_precision': 0.8660151810336627, 'eval_recall': 0.8509888661264543, 'eval_runtime': 3.688, 'eval_samples_per_second': 412.957, 'eval_steps_per_second': 51.789, 'epoch': 15.0}


 64%|██████▍   | 12192/19050 [16:40<08:28, 13.50it/s]  
 64%|██████▍   | 12192/19050 [16:43<08:28, 13.50it/s]

{'eval_loss': 0.140594020485878, 'eval_f1': 0.8690418522941619, 'eval_precision': 0.87980642767606, 'eval_recall': 0.861367078887222, 'eval_runtime': 3.7825, 'eval_samples_per_second': 402.643, 'eval_steps_per_second': 50.496, 'epoch': 16.0}


 68%|██████▊   | 12954/19050 [17:43<07:38, 13.30it/s]  
 68%|██████▊   | 12954/19050 [17:46<07:38, 13.30it/s]

{'eval_loss': 0.1417272984981537, 'eval_f1': 0.8656296992362978, 'eval_precision': 0.8759971048728871, 'eval_recall': 0.8569579474508037, 'eval_runtime': 3.6964, 'eval_samples_per_second': 412.022, 'eval_steps_per_second': 51.672, 'epoch': 17.0}


 72%|███████▏  | 13716/19050 [18:47<06:38, 13.37it/s]  
 72%|███████▏  | 13716/19050 [18:50<06:38, 13.37it/s]

{'eval_loss': 0.13999241590499878, 'eval_f1': 0.8724453548128828, 'eval_precision': 0.8836312821224297, 'eval_recall': 0.8646886859278453, 'eval_runtime': 3.8084, 'eval_samples_per_second': 399.907, 'eval_steps_per_second': 50.153, 'epoch': 18.0}


 76%|███████▌  | 14477/19050 [19:50<05:44, 13.26it/s]  
 76%|███████▌  | 14478/19050 [19:53<05:44, 13.26it/s]

{'eval_loss': 0.1604202687740326, 'eval_f1': 0.8623498884694808, 'eval_precision': 0.8834900028765142, 'eval_recall': 0.845941293759689, 'eval_runtime': 3.7093, 'eval_samples_per_second': 410.589, 'eval_steps_per_second': 51.492, 'epoch': 19.0}


 80%|███████▉  | 15239/19050 [20:53<04:52, 13.05it/s]  
 80%|████████  | 15240/19050 [20:57<04:51, 13.05it/s]

{'eval_loss': 0.1401103436946869, 'eval_f1': 0.8768673448282014, 'eval_precision': 0.8848256892289301, 'eval_recall': 0.8701732389961917, 'eval_runtime': 3.8373, 'eval_samples_per_second': 396.892, 'eval_steps_per_second': 49.774, 'epoch': 20.0}


 84%|████████▍ | 16001/19050 [21:56<03:45, 13.50it/s]
 84%|████████▍ | 16002/19050 [21:59<03:45, 13.50it/s]

{'eval_loss': 0.15591759979724884, 'eval_f1': 0.8651872971296989, 'eval_precision': 0.8856348976140261, 'eval_recall': 0.8492641514794083, 'eval_runtime': 3.6893, 'eval_samples_per_second': 412.816, 'eval_steps_per_second': 51.771, 'epoch': 21.0}


 88%|████████▊ | 16763/19050 [22:58<02:54, 13.12it/s]
 88%|████████▊ | 16764/19050 [23:02<02:54, 13.12it/s]

{'eval_loss': 0.145138218998909, 'eval_f1': 0.8779313591891421, 'eval_precision': 0.888011486694504, 'eval_recall': 0.870313901496718, 'eval_runtime': 3.7765, 'eval_samples_per_second': 403.287, 'eval_steps_per_second': 50.576, 'epoch': 22.0}


 92%|█████████▏| 17525/19050 [24:01<01:53, 13.43it/s]
 92%|█████████▏| 17526/19050 [24:05<01:53, 13.43it/s]

{'eval_loss': 0.15135043859481812, 'eval_f1': 0.8716211961008106, 'eval_precision': 0.8882396142086703, 'eval_recall': 0.8575594879481194, 'eval_runtime': 3.6941, 'eval_samples_per_second': 412.275, 'eval_steps_per_second': 51.704, 'epoch': 23.0}


 96%|█████████▌| 18287/19050 [25:05<00:56, 13.46it/s]
 96%|█████████▌| 18288/19050 [25:09<00:56, 13.46it/s]

{'eval_loss': 0.15652596950531006, 'eval_f1': 0.8673812856880695, 'eval_precision': 0.8923421337237393, 'eval_recall': 0.8495403169479665, 'eval_runtime': 3.8296, 'eval_samples_per_second': 397.689, 'eval_steps_per_second': 49.874, 'epoch': 24.0}


100%|██████████| 19050/19050 [26:09<00:00, 13.86it/s]
100%|██████████| 19050/19050 [26:14<00:00, 13.86it/s]

{'eval_loss': 0.15452265739440918, 'eval_f1': 0.8692792735945071, 'eval_precision': 0.8945383525386259, 'eval_recall': 0.8505820486398531, 'eval_runtime': 3.7257, 'eval_samples_per_second': 408.781, 'eval_steps_per_second': 51.265, 'epoch': 25.0}


100%|██████████| 19050/19050 [26:16<00:00, 12.08it/s]


{'train_runtime': 1576.7367, 'train_samples_per_second': 96.576, 'train_steps_per_second': 12.082, 'train_loss': 0.019608923644218543, 'epoch': 25.0}


100%|██████████| 191/191 [00:03<00:00, 51.58it/s]


0,1
eval/f1,▁▄▇▇▇█▆▇██▇█▇█▇███▇███████
eval/loss,▃▂▁▁▃▂▅▄▄▅▅▆▅▅▆▆▆▆█▆█▇▇█▇▇
eval/precision,▁███▆▇▇▅▇█▆▇▇▇▆▇▇▇▇▇▇▇▇██▇
eval/recall,▁▃▆▇█▇▆███▇███▇█▇█▇█▇█▇▇▇█
eval/runtime,▆█▇▂▆▂▃▂▁▂▁▂▂▄▁▅▂▆▂▇▁▅▂▇▃▄
eval/samples_per_second,▃▁▁▇▃▇▆▇█▆█▆▇▅█▄▇▃▇▂█▄▇▂▆▅
eval/steps_per_second,▃▁▁▇▃▇▆▇█▆█▆▇▅█▄▇▃▇▂█▄▇▂▆▅
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇████
train/grad_norm,▁

0,1
eval/f1,0.87793
eval/loss,0.14514
eval/precision,0.88801
eval/recall,0.87031
eval/runtime,3.7486
eval/samples_per_second,406.29
eval/steps_per_second,50.953
total_flos,1.001675865072e+16
train/epoch,25.0
train/global_step,19050.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16743.76 examples/s]
  4%|▍         | 189/4725 [00:13<05:36, 13.49it/s]
  4%|▍         | 189/4725 [00:14<05:36, 13.49it/s]

{'eval_loss': 0.3387203812599182, 'eval_f1': 0.35637529568717136, 'eval_precision': 0.5498181818181818, 'eval_recall': 0.29857919166761004, 'eval_runtime': 0.9563, 'eval_samples_per_second': 394.236, 'eval_steps_per_second': 50.194, 'epoch': 1.0}


  8%|▊         | 378/4725 [00:30<05:15, 13.80it/s]
  8%|▊         | 378/4725 [00:31<05:15, 13.80it/s]

{'eval_loss': 0.2921161651611328, 'eval_f1': 0.5734033149275888, 'eval_precision': 0.6462629987410548, 'eval_recall': 0.52908043641164, 'eval_runtime': 0.8965, 'eval_samples_per_second': 420.511, 'eval_steps_per_second': 53.54, 'epoch': 2.0}


 12%|█▏        | 566/4725 [00:47<05:01, 13.81it/s]
 12%|█▏        | 567/4725 [00:48<05:01, 13.81it/s]

{'eval_loss': 0.2655353844165802, 'eval_f1': 0.6723044720807119, 'eval_precision': 0.8123370711472344, 'eval_recall': 0.6043678309435795, 'eval_runtime': 0.9194, 'eval_samples_per_second': 410.031, 'eval_steps_per_second': 52.206, 'epoch': 3.0}


 16%|█▌        | 756/4725 [01:03<04:50, 13.67it/s]
 16%|█▌        | 756/4725 [01:04<04:50, 13.67it/s]

{'eval_loss': 0.30607858300209045, 'eval_f1': 0.7054764332590044, 'eval_precision': 0.7522985357404819, 'eval_recall': 0.6684012586669742, 'eval_runtime': 0.9564, 'eval_samples_per_second': 394.189, 'eval_steps_per_second': 50.189, 'epoch': 4.0}


 20%|██        | 945/4725 [01:20<04:29, 14.04it/s]
 20%|██        | 945/4725 [01:21<04:29, 14.04it/s]

{'eval_loss': 0.35271185636520386, 'eval_f1': 0.7254971422259802, 'eval_precision': 0.7656837509778687, 'eval_recall': 0.698997993947504, 'eval_runtime': 0.9179, 'eval_samples_per_second': 410.707, 'eval_steps_per_second': 52.292, 'epoch': 5.0}


 24%|██▍       | 1133/4725 [01:37<04:23, 13.64it/s]
 24%|██▍       | 1134/4725 [01:38<04:23, 13.64it/s]

{'eval_loss': 0.35919323563575745, 'eval_f1': 0.7334489178391209, 'eval_precision': 0.7440369521903138, 'eval_recall': 0.7363893463458953, 'eval_runtime': 0.9216, 'eval_samples_per_second': 409.079, 'eval_steps_per_second': 52.084, 'epoch': 6.0}


 28%|██▊       | 1323/4725 [01:53<04:14, 13.38it/s]
 28%|██▊       | 1323/4725 [01:54<04:14, 13.38it/s]

{'eval_loss': 0.37131887674331665, 'eval_f1': 0.7426467187431043, 'eval_precision': 0.7629978194235119, 'eval_recall': 0.7308820624662034, 'eval_runtime': 0.958, 'eval_samples_per_second': 393.519, 'eval_steps_per_second': 50.103, 'epoch': 7.0}


 32%|███▏      | 1511/4725 [02:10<04:01, 13.33it/s]
 32%|███▏      | 1512/4725 [02:11<04:01, 13.33it/s]

{'eval_loss': 0.38917839527130127, 'eval_f1': 0.7538719866253939, 'eval_precision': 0.7659406460105609, 'eval_recall': 0.7458699777490724, 'eval_runtime': 0.92, 'eval_samples_per_second': 409.767, 'eval_steps_per_second': 52.172, 'epoch': 8.0}


 36%|███▌      | 1701/4725 [02:26<03:36, 13.97it/s]
 36%|███▌      | 1701/4725 [02:27<03:36, 13.97it/s]

{'eval_loss': 0.41207870841026306, 'eval_f1': 0.7624501652569897, 'eval_precision': 0.7922011550835019, 'eval_recall': 0.7510658576357856, 'eval_runtime': 0.9309, 'eval_samples_per_second': 404.963, 'eval_steps_per_second': 51.56, 'epoch': 9.0}


 40%|███▉      | 1889/4725 [02:43<03:28, 13.57it/s]
 40%|████      | 1890/4725 [02:44<03:28, 13.57it/s]

{'eval_loss': 0.3892602026462555, 'eval_f1': 0.7689826582644754, 'eval_precision': 0.778269933485111, 'eval_recall': 0.7716464772255557, 'eval_runtime': 0.9791, 'eval_samples_per_second': 385.038, 'eval_steps_per_second': 49.023, 'epoch': 10.0}


 44%|████▍     | 2079/4725 [02:59<03:08, 14.06it/s]
 44%|████▍     | 2079/4725 [03:00<03:08, 14.06it/s]

{'eval_loss': 0.41938236355781555, 'eval_f1': 0.7601253842506378, 'eval_precision': 0.7633080690996306, 'eval_recall': 0.762412902490871, 'eval_runtime': 0.9369, 'eval_samples_per_second': 402.41, 'eval_steps_per_second': 51.235, 'epoch': 11.0}


 48%|████▊     | 2267/4725 [03:16<03:04, 13.35it/s]
 48%|████▊     | 2268/4725 [03:17<03:04, 13.35it/s]

{'eval_loss': 0.4181179404258728, 'eval_f1': 0.7829145827598245, 'eval_precision': 0.7899525416147257, 'eval_recall': 0.7796915968234123, 'eval_runtime': 0.9383, 'eval_samples_per_second': 401.773, 'eval_steps_per_second': 51.154, 'epoch': 12.0}


 52%|█████▏    | 2457/4725 [03:33<02:43, 13.91it/s]
 52%|█████▏    | 2457/4725 [03:34<02:43, 13.91it/s]

{'eval_loss': 0.44032832980155945, 'eval_f1': 0.758685084764636, 'eval_precision': 0.7623473492060684, 'eval_recall': 0.7603557534926315, 'eval_runtime': 0.9631, 'eval_samples_per_second': 391.442, 'eval_steps_per_second': 49.839, 'epoch': 13.0}


 56%|█████▌    | 2645/4725 [03:50<02:39, 13.02it/s]
 56%|█████▌    | 2646/4725 [03:51<02:39, 13.02it/s]

{'eval_loss': 0.4314117133617401, 'eval_f1': 0.7813508599495395, 'eval_precision': 0.7900023613370415, 'eval_recall': 0.780010230668496, 'eval_runtime': 0.9475, 'eval_samples_per_second': 397.893, 'eval_steps_per_second': 50.66, 'epoch': 14.0}


 60%|██████    | 2835/4725 [04:07<02:16, 13.85it/s]
 60%|██████    | 2835/4725 [04:08<02:16, 13.85it/s]

{'eval_loss': 0.4329001009464264, 'eval_f1': 0.7899388323229383, 'eval_precision': 0.786933089796182, 'eval_recall': 0.7947986722339693, 'eval_runtime': 0.9581, 'eval_samples_per_second': 393.5, 'eval_steps_per_second': 50.101, 'epoch': 15.0}


 64%|██████▍   | 3023/4725 [04:23<02:07, 13.34it/s]
 64%|██████▍   | 3024/4725 [04:24<02:07, 13.34it/s]

{'eval_loss': 0.43014809489250183, 'eval_f1': 0.7845248002510214, 'eval_precision': 0.7907330036803721, 'eval_recall': 0.782548739680555, 'eval_runtime': 0.9864, 'eval_samples_per_second': 382.192, 'eval_steps_per_second': 48.661, 'epoch': 16.0}


 68%|██████▊   | 3213/4725 [04:40<01:50, 13.63it/s]
 68%|██████▊   | 3213/4725 [04:41<01:50, 13.63it/s]

{'eval_loss': 0.43073245882987976, 'eval_f1': 0.7971864498778809, 'eval_precision': 0.8088437271633993, 'eval_recall': 0.7881784663619088, 'eval_runtime': 0.9445, 'eval_samples_per_second': 399.172, 'eval_steps_per_second': 50.823, 'epoch': 17.0}


 72%|███████▏  | 3401/4725 [04:57<01:38, 13.40it/s]
 72%|███████▏  | 3402/4725 [04:58<01:38, 13.40it/s]

{'eval_loss': 0.45025205612182617, 'eval_f1': 0.7878072817342889, 'eval_precision': 0.7849331544734981, 'eval_recall': 0.7963685779540727, 'eval_runtime': 0.9429, 'eval_samples_per_second': 399.816, 'eval_steps_per_second': 50.905, 'epoch': 18.0}


 76%|███████▌  | 3591/4725 [05:13<01:20, 14.05it/s]
 76%|███████▌  | 3591/4725 [05:14<01:20, 14.05it/s]

{'eval_loss': 0.4527928829193115, 'eval_f1': 0.7855564502738825, 'eval_precision': 0.7803393283957651, 'eval_recall': 0.7939773966625718, 'eval_runtime': 0.9615, 'eval_samples_per_second': 392.101, 'eval_steps_per_second': 49.923, 'epoch': 19.0}


 80%|███████▉  | 3779/4725 [05:30<01:11, 13.24it/s]
 80%|████████  | 3780/4725 [05:31<01:11, 13.24it/s]

{'eval_loss': 0.4507671594619751, 'eval_f1': 0.7874947108029815, 'eval_precision': 0.7905919483915635, 'eval_recall': 0.7870414558008635, 'eval_runtime': 0.9365, 'eval_samples_per_second': 402.549, 'eval_steps_per_second': 51.253, 'epoch': 20.0}


 84%|████████▍ | 3969/4725 [05:47<00:54, 13.95it/s]
 84%|████████▍ | 3969/4725 [05:48<00:54, 13.95it/s]

{'eval_loss': 0.45419538021087646, 'eval_f1': 0.794184320016216, 'eval_precision': 0.7955942649146532, 'eval_recall': 0.7961856646881553, 'eval_runtime': 0.9358, 'eval_samples_per_second': 402.874, 'eval_steps_per_second': 51.294, 'epoch': 21.0}


 88%|████████▊ | 4157/4725 [06:03<00:42, 13.47it/s]
 88%|████████▊ | 4158/4725 [06:04<00:42, 13.47it/s]

{'eval_loss': 0.45876988768577576, 'eval_f1': 0.7893201137670905, 'eval_precision': 0.7923343180076475, 'eval_recall': 0.789399950402441, 'eval_runtime': 0.9605, 'eval_samples_per_second': 392.493, 'eval_steps_per_second': 49.973, 'epoch': 22.0}


 92%|█████████▏| 4347/4725 [06:20<00:28, 13.49it/s]
 92%|█████████▏| 4347/4725 [06:21<00:28, 13.49it/s]

{'eval_loss': 0.46405017375946045, 'eval_f1': 0.7906850179078152, 'eval_precision': 0.7950088032005104, 'eval_recall': 0.789399950402441, 'eval_runtime': 0.9277, 'eval_samples_per_second': 406.374, 'eval_steps_per_second': 51.74, 'epoch': 23.0}


 96%|█████████▌| 4535/4725 [06:37<00:14, 13.40it/s]
 96%|█████████▌| 4536/4725 [06:38<00:14, 13.40it/s]

{'eval_loss': 0.461450457572937, 'eval_f1': 0.7902997985971645, 'eval_precision': 0.7925805877454428, 'eval_recall': 0.7911856646881553, 'eval_runtime': 0.936, 'eval_samples_per_second': 402.786, 'eval_steps_per_second': 51.283, 'epoch': 24.0}


100%|██████████| 4725/4725 [06:53<00:00, 13.93it/s]
100%|██████████| 4725/4725 [06:56<00:00, 13.93it/s]

{'eval_loss': 0.4616656005382538, 'eval_f1': 0.7877270038450163, 'eval_precision': 0.7901152989272127, 'eval_recall': 0.7884459386607581, 'eval_runtime': 0.9628, 'eval_samples_per_second': 391.583, 'eval_steps_per_second': 49.857, 'epoch': 25.0}


100%|██████████| 4725/4725 [06:58<00:00, 11.28it/s]


{'train_runtime': 418.9735, 'train_samples_per_second': 89.922, 'train_steps_per_second': 11.278, 'train_loss': 0.06047852208374669, 'epoch': 25.0}


100%|██████████| 48/48 [00:01<00:00, 46.07it/s]


0,1
eval/f1,▁▄▆▇▇▇▇▇▇█▇█▇█████████████
eval/loss,▄▂▁▂▄▄▅▅▆▅▆▆▇▇▇▇▇████████▇
eval/precision,▁▄█▆▇▆▇▇▇▇▇▇▇▇▇▇█▇▇▇█▇█▇▇█
eval/recall,▁▄▅▆▇▇▇▇▇███▇█████████████
eval/runtime,▃▁▂▃▂▂▃▂▂▄▂▂▃▃▃▄▃▃▃▂▂▃▂▂▃█
eval/samples_per_second,▆█▇▆▇▇▆▇▇▅▆▆▅▆▆▄▆▆▅▆▆▅▇▆▅▁
eval/steps_per_second,▆█▇▆▇▇▆▇▇▅▆▆▅▆▆▄▆▆▅▆▆▅▇▆▅▁
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████

0,1
eval/f1,0.79719
eval/loss,0.43073
eval/precision,0.80884
eval/recall,0.78818
eval/runtime,1.0938
eval/samples_per_second,344.656
eval/steps_per_second,43.882
total_flos,2478244004332800.0
train/epoch,25.0
train/global_step,4725.0


  return self.fget.__get__(instance, owner)()
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 11852.96 examples/s]
  4%|▍         | 130/3250 [00:09<03:41, 14.09it/s]
  4%|▍         | 130/3250 [00:10<03:41, 14.09it/s]

{'eval_loss': 0.2955192029476166, 'eval_f1': 0.20051759834368532, 'eval_precision': 0.26745075540256263, 'eval_recall': 0.1667340203408096, 'eval_runtime': 0.6218, 'eval_samples_per_second': 418.122, 'eval_steps_per_second': 53.069, 'epoch': 1.0}


  8%|▊         | 260/3250 [00:20<03:36, 13.81it/s]
  8%|▊         | 260/3250 [00:21<03:36, 13.81it/s]

{'eval_loss': 0.23722675442695618, 'eval_f1': 0.45246679121468986, 'eval_precision': 0.6151753008895866, 'eval_recall': 0.3742839916004765, 'eval_runtime': 0.6535, 'eval_samples_per_second': 397.886, 'eval_steps_per_second': 50.501, 'epoch': 2.0}


 12%|█▏        | 390/3250 [00:32<03:27, 13.81it/s]
 12%|█▏        | 390/3250 [00:33<03:27, 13.81it/s]

{'eval_loss': 0.2268332839012146, 'eval_f1': 0.5076220265121486, 'eval_precision': 0.5683665217977397, 'eval_recall': 0.47288140300192233, 'eval_runtime': 0.6222, 'eval_samples_per_second': 417.871, 'eval_steps_per_second': 53.037, 'epoch': 3.0}


 16%|█▌        | 520/3250 [00:44<03:16, 13.89it/s]
 16%|█▌        | 520/3250 [00:45<03:16, 13.89it/s]

{'eval_loss': 0.22507095336914062, 'eval_f1': 0.5364201906722924, 'eval_precision': 0.5810007185535434, 'eval_recall': 0.5112632690697824, 'eval_runtime': 0.6556, 'eval_samples_per_second': 396.6, 'eval_steps_per_second': 50.338, 'epoch': 4.0}


 20%|██        | 650/3250 [00:56<03:08, 13.80it/s]
 20%|██        | 650/3250 [00:57<03:08, 13.80it/s]

{'eval_loss': 0.21693478524684906, 'eval_f1': 0.6480944179115806, 'eval_precision': 0.7328119713751968, 'eval_recall': 0.5915653438872693, 'eval_runtime': 0.6366, 'eval_samples_per_second': 408.409, 'eval_steps_per_second': 51.837, 'epoch': 5.0}


 24%|██▍       | 780/3250 [01:08<02:58, 13.83it/s]
 24%|██▍       | 780/3250 [01:09<02:58, 13.83it/s]

{'eval_loss': 0.2542686462402344, 'eval_f1': 0.6317876080601462, 'eval_precision': 0.7212521339580835, 'eval_recall': 0.5741644708840928, 'eval_runtime': 0.6395, 'eval_samples_per_second': 406.585, 'eval_steps_per_second': 51.605, 'epoch': 6.0}


 28%|██▊       | 910/3250 [01:20<02:50, 13.72it/s]
 28%|██▊       | 910/3250 [01:21<02:50, 13.72it/s]

{'eval_loss': 0.24570801854133606, 'eval_f1': 0.6686973066496186, 'eval_precision': 0.760291662300377, 'eval_recall': 0.6151322368336201, 'eval_runtime': 0.6376, 'eval_samples_per_second': 407.759, 'eval_steps_per_second': 51.754, 'epoch': 7.0}


 32%|███▏      | 1040/3250 [01:32<02:41, 13.70it/s]
 32%|███▏      | 1040/3250 [01:32<02:41, 13.70it/s]

{'eval_loss': 0.2410241812467575, 'eval_f1': 0.6877996576215665, 'eval_precision': 0.7552268903651392, 'eval_recall': 0.6404561352220419, 'eval_runtime': 0.6426, 'eval_samples_per_second': 404.585, 'eval_steps_per_second': 51.351, 'epoch': 8.0}


 36%|███▌      | 1170/3250 [01:44<02:32, 13.67it/s]
 36%|███▌      | 1170/3250 [01:44<02:32, 13.67it/s]

{'eval_loss': 0.247065469622612, 'eval_f1': 0.6716410563957325, 'eval_precision': 0.7599256931488991, 'eval_recall': 0.6136334833291359, 'eval_runtime': 0.6442, 'eval_samples_per_second': 403.633, 'eval_steps_per_second': 51.23, 'epoch': 9.0}


 40%|████      | 1300/3250 [01:56<02:20, 13.86it/s]
 40%|████      | 1300/3250 [01:56<02:20, 13.86it/s]

{'eval_loss': 0.2660147547721863, 'eval_f1': 0.6586982173552602, 'eval_precision': 0.7455912129729855, 'eval_recall': 0.6063142191450217, 'eval_runtime': 0.6358, 'eval_samples_per_second': 408.924, 'eval_steps_per_second': 51.902, 'epoch': 10.0}


 44%|████▍     | 1430/3250 [02:08<02:12, 13.71it/s]
 44%|████▍     | 1430/3250 [02:08<02:12, 13.71it/s]

{'eval_loss': 0.2719353139400482, 'eval_f1': 0.6872074956108569, 'eval_precision': 0.7177799963829894, 'eval_recall': 0.6742498637276028, 'eval_runtime': 0.6466, 'eval_samples_per_second': 402.125, 'eval_steps_per_second': 51.039, 'epoch': 11.0}


 48%|████▊     | 1560/3250 [02:19<02:02, 13.79it/s]
 48%|████▊     | 1560/3250 [02:20<02:02, 13.79it/s]

{'eval_loss': 0.25168201327323914, 'eval_f1': 0.6865662356399653, 'eval_precision': 0.7234358895073181, 'eval_recall': 0.6558218042020396, 'eval_runtime': 0.6361, 'eval_samples_per_second': 408.753, 'eval_steps_per_second': 51.88, 'epoch': 12.0}


 52%|█████▏    | 1690/3250 [02:32<02:00, 12.91it/s]
 52%|█████▏    | 1690/3250 [02:32<02:00, 12.91it/s]

{'eval_loss': 0.271687775850296, 'eval_f1': 0.6892272087102579, 'eval_precision': 0.7266780045351473, 'eval_recall': 0.6663547723924756, 'eval_runtime': 0.6449, 'eval_samples_per_second': 403.187, 'eval_steps_per_second': 51.174, 'epoch': 13.0}


 56%|█████▌    | 1820/3250 [02:43<01:46, 13.48it/s]
 56%|█████▌    | 1820/3250 [02:44<01:46, 13.48it/s]

{'eval_loss': 0.27895841002464294, 'eval_f1': 0.6876086259251382, 'eval_precision': 0.7296550684349713, 'eval_recall': 0.6620132692017745, 'eval_runtime': 0.6395, 'eval_samples_per_second': 406.592, 'eval_steps_per_second': 51.606, 'epoch': 14.0}


 60%|██████    | 1950/3250 [02:55<01:39, 13.01it/s]
 60%|██████    | 1950/3250 [02:56<01:39, 13.01it/s]

{'eval_loss': 0.2866038978099823, 'eval_f1': 0.6999524830116801, 'eval_precision': 0.7399990193600037, 'eval_recall': 0.6764889208485718, 'eval_runtime': 0.6868, 'eval_samples_per_second': 378.562, 'eval_steps_per_second': 48.048, 'epoch': 15.0}


 64%|██████▍   | 2080/3250 [03:08<01:31, 12.78it/s]
 64%|██████▍   | 2080/3250 [03:08<01:31, 12.78it/s]

{'eval_loss': 0.29692596197128296, 'eval_f1': 0.6793931183158568, 'eval_precision': 0.7073253543841779, 'eval_recall': 0.6636129619543759, 'eval_runtime': 0.6629, 'eval_samples_per_second': 392.205, 'eval_steps_per_second': 49.78, 'epoch': 16.0}


 68%|██████▊   | 2210/3250 [03:20<01:21, 12.82it/s]
 68%|██████▊   | 2210/3250 [03:20<01:21, 12.82it/s]

{'eval_loss': 0.3052338659763336, 'eval_f1': 0.659692734446429, 'eval_precision': 0.6897702804065348, 'eval_recall': 0.6416835944626982, 'eval_runtime': 0.6857, 'eval_samples_per_second': 379.192, 'eval_steps_per_second': 48.128, 'epoch': 17.0}


 72%|███████▏  | 2340/3250 [03:32<01:07, 13.51it/s]
 72%|███████▏  | 2340/3250 [03:32<01:07, 13.51it/s]

{'eval_loss': 0.29117244482040405, 'eval_f1': 0.6890581760926345, 'eval_precision': 0.7137896284054747, 'eval_recall': 0.673022427868401, 'eval_runtime': 0.6602, 'eval_samples_per_second': 393.845, 'eval_steps_per_second': 49.988, 'epoch': 18.0}


 76%|███████▌  | 2470/3250 [03:44<01:00, 12.90it/s]
 76%|███████▌  | 2470/3250 [03:44<01:00, 12.90it/s]

{'eval_loss': 0.2954113781452179, 'eval_f1': 0.6848155547406292, 'eval_precision': 0.7089366917938346, 'eval_recall': 0.6709599007298861, 'eval_runtime': 0.6901, 'eval_samples_per_second': 376.757, 'eval_steps_per_second': 47.819, 'epoch': 19.0}


 80%|████████  | 2600/3250 [03:56<00:47, 13.64it/s]
 80%|████████  | 2600/3250 [03:56<00:47, 13.64it/s]

{'eval_loss': 0.30089303851127625, 'eval_f1': 0.6854880552338419, 'eval_precision': 0.7140892742933559, 'eval_recall': 0.6675868287288085, 'eval_runtime': 0.6531, 'eval_samples_per_second': 398.112, 'eval_steps_per_second': 50.53, 'epoch': 20.0}


 84%|████████▍ | 2730/3250 [04:07<00:39, 13.30it/s]
 84%|████████▍ | 2730/3250 [04:08<00:39, 13.30it/s]

{'eval_loss': 0.3016676604747772, 'eval_f1': 0.6805742994539242, 'eval_precision': 0.7089518386997379, 'eval_recall': 0.6627427524844494, 'eval_runtime': 0.6808, 'eval_samples_per_second': 381.929, 'eval_steps_per_second': 48.476, 'epoch': 21.0}


 88%|████████▊ | 2860/3250 [04:19<00:28, 13.66it/s]
 88%|████████▊ | 2860/3250 [04:20<00:28, 13.66it/s]

{'eval_loss': 0.30449166893959045, 'eval_f1': 0.6971069041810412, 'eval_precision': 0.7371555490883223, 'eval_recall': 0.673817043587029, 'eval_runtime': 0.6693, 'eval_samples_per_second': 388.441, 'eval_steps_per_second': 49.302, 'epoch': 22.0}


 92%|█████████▏| 2990/3250 [04:31<00:19, 13.02it/s]
 92%|█████████▏| 2990/3250 [04:32<00:19, 13.02it/s]

{'eval_loss': 0.30453163385391235, 'eval_f1': 0.6873877233054589, 'eval_precision': 0.7161742270906691, 'eval_recall': 0.6690012558858099, 'eval_runtime': 0.6707, 'eval_samples_per_second': 387.649, 'eval_steps_per_second': 49.202, 'epoch': 23.0}


 96%|█████████▌| 3120/3250 [04:43<00:09, 13.63it/s]
 96%|█████████▌| 3120/3250 [04:44<00:09, 13.63it/s]

{'eval_loss': 0.30667078495025635, 'eval_f1': 0.6945783869369926, 'eval_precision': 0.7349296290173236, 'eval_recall': 0.6709599007298861, 'eval_runtime': 0.6527, 'eval_samples_per_second': 398.364, 'eval_steps_per_second': 50.562, 'epoch': 24.0}


100%|██████████| 3250/3250 [04:55<00:00, 13.78it/s]
100%|██████████| 3250/3250 [04:57<00:00, 13.78it/s]

{'eval_loss': 0.30710625648498535, 'eval_f1': 0.6952067844147053, 'eval_precision': 0.7423616927519082, 'eval_recall': 0.6672005022336455, 'eval_runtime': 0.6661, 'eval_samples_per_second': 390.309, 'eval_steps_per_second': 49.539, 'epoch': 25.0}


100%|██████████| 3250/3250 [04:59<00:00, 10.84it/s]


{'train_runtime': 299.6886, 'train_samples_per_second': 86.59, 'train_steps_per_second': 10.845, 'train_loss': 0.05475941819411058, 'epoch': 25.0}


100%|██████████| 33/33 [00:00<00:00, 46.63it/s]


Evaluating the predictions.

In [15]:
scores

Unnamed: 0,model,lan,cat,precision,recall,f1
0,java_epoch-25_batchSize-8_weightsOfDecay-0.001,java,summary,0.949224,0.957326,0.953258
1,java_epoch-25_batchSize-8_weightsOfDecay-0.001,java,Ownership,0.964912,1.0,0.982143
2,java_epoch-25_batchSize-8_weightsOfDecay-0.001,java,Expand,0.681818,0.735294,0.707547
3,java_epoch-25_batchSize-8_weightsOfDecay-0.001,java,usage,0.964953,0.94508,0.954913
4,java_epoch-25_batchSize-8_weightsOfDecay-0.001,java,Pointer,0.988506,0.910053,0.947658
5,java_epoch-25_batchSize-8_weightsOfDecay-0.001,java,deprecation,0.933333,0.933333,0.933333
6,java_epoch-25_batchSize-8_weightsOfDecay-0.001,java,rational,0.733333,0.611111,0.666667
7,python_epoch-25_batchSize-8_weightsOfDecay-0.001,python,Usage,0.883929,0.818182,0.849785
8,python_epoch-25_batchSize-8_weightsOfDecay-0.001,python,Parameters,0.85,0.910714,0.87931
9,python_epoch-25_batchSize-8_weightsOfDecay-0.001,python,DevelopmentNotes,0.783784,0.725,0.753247


In [16]:
final_model_stats

{'epoch-10_batchSize-4_weightsOfDecay-0.01': nan,
 'epoch-10_batchSize-4_weightsOfDecay-0.001': nan,
 'epoch-10_batchSize-8_weightsOfDecay-0.01': nan,
 'epoch-10_batchSize-8_weightsOfDecay-0.001': nan,
 'epoch-15_batchSize-4_weightsOfDecay-0.01': nan,
 'epoch-15_batchSize-4_weightsOfDecay-0.001': nan,
 'epoch-15_batchSize-8_weightsOfDecay-0.01': nan,
 'epoch-15_batchSize-8_weightsOfDecay-0.001': nan,
 'epoch-20_batchSize-4_weightsOfDecay-0.01': nan,
 'epoch-20_batchSize-4_weightsOfDecay-0.001': nan,
 'epoch-20_batchSize-8_weightsOfDecay-0.01': nan,
 'epoch-20_batchSize-8_weightsOfDecay-0.001': nan,
 'epoch-25_batchSize-4_weightsOfDecay-0.01': nan,
 'epoch-25_batchSize-4_weightsOfDecay-0.001': nan,
 'epoch-25_batchSize-8_weightsOfDecay-0.01': nan,
 'epoch-25_batchSize-8_weightsOfDecay-0.001': nan}