In [1]:
# !pip install transformers==4.46.2 datasets==3.1.0 numpy==1.26.4 sklearn-pandas==2.2.0 torch==2.5.1+cu121

# DistilBERT Hyperparameter Grid Search for Classifying Code Comment Data 

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import pandas as pd
import numpy as np
import datasets
import time
import wandb
import random
import torch

key_file = r'C:\Development\TactitalTensorsFinalProject\WANDB_API_KEY.txt' # Set the variable to a txt file containing your WandB API key

with open(key_file, "r") as f: # Retrieve key from file
    api_key = f.read().strip()

# Log into WandB with the API key
wandb.login(key=api_key)

# Initialize WandB (no need to manually set the WANDB_API_KEY env variable again)


  from .autonotebook import tqdm as notebook_tqdm
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mayoungren94[0m ([33mayoungren-colostate[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\ayoun\_netrc


True

If using Google Colab, utilize the cell below in replacement of the code following the imports in the cell above.

In [3]:
# from google.colab import userdata
# os.environ["WANDB_API_KEY"] = userdata.get('WANDB_API_KEY')


## Data

Below, we use the code provided by the NLBSE 2025 competition, to create the classification labels for each language represented in the data set provided data set before using a data loader to retrieve the data set.  

In [4]:
langs = ['java', 'python', 'pharo']

# langs = ['java'] # Using Java as the only language for testing purposes.
# langs = ['python']
# langs = ['pharo']
labels = {
    'java': ['summary', 'Ownership', 'Expand', 'usage', 'Pointer', 'deprecation', 'rational'],
    'python': ['Usage', 'Parameters', 'DevelopmentNotes', 'Expand', 'Summary'],
    'pharo': ['Keyimplementationpoints', 'Example', 'Responsibilities', 'Classreferences', 'Intent', 'Keymessages', 'Collaborators']
}
ds = datasets.load_dataset('NLBSE/nlbse25-code-comment-classification')

Below we display the loaded dataset to gain an idea of the data we will be working with. Here we can see the the number of samples for each language, along with the sample features. The data set seems to be pre-split into train and test features for each language. In the following cell, we print one sample from the data set for the Java language.   

In [5]:
ds

DatasetDict({
    java_train: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 7614
    })
    java_test: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 1725
    })
    python_train: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 1884
    })
    python_test: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 406
    })
    pharo_train: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 1298
    })
    pharo_test: Dataset({
        features: ['index', 'class', 'comment_sentence', 'partition', 'combo', 'labels'],
        num_rows: 289
    })
})

In [6]:
ds['java_test'][0]

{'index': 5,
 'class': 'AbstractContractGetFileStatusTest.java',
 'comment_sentence': 'accept everything.',
 'partition': 1,
 'combo': 'accept everything. | AbstractContractGetFileStatusTest.java',
 'labels': [0, 0, 1, 0, 0, 0, 0]}

In [7]:
model_name = 'distilbert/distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

The next few functions are to preprocess the trainng and validation sets, and to let the Trainer class to evaluate how well the trainng is going after each epoch.

In [8]:
# To tokenize the text in the 'combo' column of the training dataset of each language.

def tokenize_dataset(examples):
  return tokenizer(examples['combo'], truncation=True, padding="max_length", max_length=128)

In [9]:
# Tokenizes the text 'combo' column of the dataset, changes the values of the
# labels column to float instead of int, and sets to format of the lables column
# to torch tesnsors, which is required by the Trainer class.

def preprocess_dataset(input_dataset):
  processed_dataset = input_dataset.map(tokenize_dataset, batched=True, load_from_cache_file=False)
  processed_dataset = processed_dataset.cast_column("labels", datasets.features.Sequence(datasets.features.Value("float32")))
  processed_dataset.set_format('pt')
  return processed_dataset

## Evaluating Model Predictions

The compute_metrics function calculates precision, recall, and F1 score for multi-label classification tasks. It takes the predictions and true labels, computes the metrics for each class, and appends the results to a global DataFrame, scores. It returns the average precision, recall, and F1 score for all categories.

In [None]:
scores = pd.DataFrame(columns=['model', 'lan', 'cat', 'precision', 'recall', 'f1'])

def compute_metrics(eval_pred, lang, categories, mod_name):
    global scores
    scores.drop(scores[scores['lan'] == lang].index, inplace=True)

    predictions, true_labels = eval_pred
    predictions = (predictions > 0.5).astype(int)
    num_classes = len(categories)

    metrics_list = []

    for i in range(num_classes):
        y_pred = predictions[:, i]
        y_true = true_labels[:, i]

        tp = np.sum((y_true == 1) & (y_pred == 1)) # True Positive
        fp = np.sum((y_true == 0) & (y_pred == 1)) # False Positive
        fn = np.sum((y_true == 1) & (y_pred == 0)) # False Negative

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0

        metrics_list.append({
            'model': mod_name,
            'lan': lang,
            'cat': categories[i],
            'precision': precision,
            'recall': recall,
            'f1': f1
        })

    temp_scores = pd.DataFrame([m for m in metrics_list if m['model'] == mod_name])
    scores = pd.concat([scores, temp_scores], ignore_index=True)
    
    avg_f1 = temp_scores['f1'].mean()
    avg_precision = temp_scores['precision'].mean()
    avg_recall = temp_scores['recall'].mean()

    return {'f1': avg_f1, 'precision': avg_precision, 'recall': avg_recall}

The function, measure_runtime_and_flops measures the runtime and FLOPs of the model during inference. It uses PyTorch's profiler to calculate the FLOPs and tracks the time taken for each batch in the dataset, returning the average runtime and FLOPs.

In [11]:
def measure_runtime_and_flops(trainer, dataset):
    flops = 0
    runtime = 0

    for i in range(10):
        for batch in dataset:
            inputs = {key: val.unsqueeze(0).to(trainer.model.device) for key, val in batch.items() if key in ['input_ids', 'attention_mask']}
            with torch.profiler.profile(with_flops=True, activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA]) as prof:
                start_time = time.time()
                _ = trainer.model(**inputs)
                end_time = time.time()

            runtime += (end_time - start_time)
            flops += sum(k.flops for k in prof.key_averages()) / 1e9 # Convert Flops to GFLOPs

    avg_runtime = runtime / 10
    avg_flops = flops / 10
    
    return avg_runtime, avg_flops, flops, runtime

In [12]:
def compute_metrics_wrapper(mod_name):
    def inner(eval_pred):
        return compute_metrics(eval_pred, lang, labels[lang], mod_name)
    return inner

The score function calculates a weighted score for model performance based on the average F1 score, runtime, and FLOPs. It applies a weighted sum, prioritizing F1 score, while penalizing models for exceeding the maximum allowed runtime and FLOPs.

In [13]:
max_avg_runtime = 5
max_avg_flops = 5000

def score(avg_f1, avg_runtime, avg_flops):
    return (0.6 * avg_f1 +
            0.2 * ((max_avg_runtime - avg_runtime) / max_avg_runtime) +
            0.2 * ((max_avg_flops - avg_flops) / max_avg_flops))

## Ensuring Reprodicability

Below, we create a function to set the seed in multiple locations to help ensure all calculated scores and models are reproducable for future work. 

In [14]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # for CUDA devices
    torch.backends.cudnn.deterministic = True  # Ensure deterministic behavior
    torch.backends.cudnn.benchmark = False  # Disable to ensure deterministic operations

## Grid Search

Below we test for the best hyperparameters for DistilBERT, by testing different combinations of values for learning rate, number of epochs, batch sizes, and weight of decay. Due to the size of the Java dataset out weighing Python or Pharo's datasets, we use half the number of epochs, rounded down, to save time and ensure each model gets roughly the same amount of training per language. To choose the best model, we seperately evaluate each f1 score, per language per model, and evaluate the f1 score averaged across all three languages per model. The model with the highest f1 score will be evaluated again, against a validation set and a test set of the model, before calculating it's submission score (weighted composite score), using a formula provided by the NBSE2025 competition, and re evaluating it's f1 performance with the test set.

The code performs a grid search to train and evaluate a model on multiple language datasets using different hyperparameter combinations (learning rate, epochs, batch size, weight decay). For each combination, it initializes a model, preprocesses the dataset, splits it into training and validation sets, and trains the model using the Trainer class. Evaluation metrics, particularly F1 scores, are calculated and logged with Weights & Biases. After training, the models and tokenizers are saved, and the average F1 score for each language is stored. The final F1 score across all languages for each model configuration is then recorded in final_model_stats.

In [None]:
# Training the models for each language and saving the the models and thier
# tokenizers after training.

seed = 27
set_seed(seed)

learning_rates = [5e-5, 5e-6]
epochs = [10, 15, 20]
batch_sizes = [4, 8]
weight_of_decay = [0.01, 0.001]

gs_metrics_dict = {}
final_model_stats = {}


for wd in weight_of_decay:
  for epoch in epochs:
    for bs in batch_sizes:
      for lr in learning_rates:
        print(f'------------------ Starting model ==> learning rate: {lr} epochs: {epoch}, batch size: {bs}, weights of decay: {wd} ---------------------')
        fin_mod_name = f'lr-{lr}_epoch-{epoch}_batchSize-{bs}_weightsOfDecay-{wd}'

        total_flops = 0
        total_time = 0
        total_avg_runtime = 0
        total_avg_flops = 0

        for lang in langs:
          lang_mod_name = f'{lang}_lr-{lr}_epoch-{epoch}_batchSize-{bs}_weightsOfDecay-{wd}'

          wandb.init(
              project="NBSE2025_Distilbert", 
              entity="ayoungren-colostate",
              name=lang_mod_name,
              config={  # Log hyperparameters for each run
                  "learning_rate": lr,
                  "epochs": epoch,
                  "batch_size": bs,
                  "weight_decay": wd,
                  "language": lang
              }
          )
          
          num_labels = len(labels[lang])
          model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

          dataset = preprocess_dataset(ds[f'{lang}_train'])
          train_validation_split = dataset.train_test_split(test_size=0.2, seed=seed)

          train_dataset =  train_validation_split['train']
          validation_dataset = train_validation_split['test']

          training_args = TrainingArguments(
            output_dir=f'./results_{lang_mod_name}',
            eval_strategy="epoch",
            save_strategy="epoch",
            logging_dir=f'./logs_{lang_mod_name}',
            per_device_train_batch_size=bs,
            per_device_eval_batch_size=bs,
              # half as many epochs for Java.
            num_train_epochs=epoch,
            weight_decay=wd,
            learning_rate=lr,
            logging_steps=1000,
            save_total_limit=2,
            load_best_model_at_end=True,
            metric_for_best_model="f1",
            seed=seed
          )
          
          trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset= validation_dataset,
            processing_class=tokenizer,
            compute_metrics=compute_metrics_wrapper(fin_mod_name),
          )

          trainer.train()
          metrics = trainer.evaluate()

          print("Evaluation Metrics:", metrics)

          # avg_runtime, avg_flops, total_flops, total_time = measure_runtime_and_flops(trainer, validation_dataset)

            # store each model and succession score by individual language
          lang_mod_scores = scores[(scores['lan'] == lang) & (scores['model'] == fin_mod_name)]
          print('CHECK SCORE', scores[(scores['lan'] == lang) & (scores['model'] == fin_mod_name)])

          lang_avg_f1 = lang_mod_scores['f1'].mean()

          print("Scores: ", scores)

          # sc = round(score(lang_avg_f1, avg_runtime, avg_flops), 2)

          gs_metrics_dict[lang_mod_name] = {
            'lang': lang,
            'avg_f1': lang_avg_f1,
            # 'model': fin_mod_name,
            # 'avg_runtime': avg_runtime,
            # 'avg_flops': avg_flops,
            # 'total_flops': total_flops,
            # 'total_time': total_time,
            # 'succ_score': sc
          }

          print("Metrics Dict:", gs_metrics_dict)
          # total_avg_runtime += avg_runtime
          # total_avg_flops += avg_flops

          trainer.model.save_pretrained(f'./models/{lang_mod_name}')
          tokenizer.save_pretrained(f'./tokenizers/{lang_mod_name}')
          
        
        # store combined average succession score of all languages trained on. (Average Combined score of Java, Python, and Pharo)
        model_stats = scores[scores['model'] == fin_mod_name]
        fin_avg_f1 = model_stats['f1'].mean()
        final_model_stats[fin_mod_name] = fin_avg_f1

------------------ Starting model ==> learning rate: 5e-05 epochs: 10, batch size: 4, weights of decay: 0.01 ---------------------


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 17531.02 examples/s]
  7%|▋         | 1003/15230 [00:36<08:06, 29.24it/s]

{'loss': 0.1608, 'grad_norm': 1.0593228340148926, 'learning_rate': 4.6717005909389365e-05, 'epoch': 0.66}


  scores = pd.concat([scores, temp_scores], ignore_index=True)
                                                    
 10%|█         | 1523/15230 [00:59<08:34, 26.62it/s]

{'eval_loss': 0.1295263171195984, 'eval_f1': 0.6404807062022011, 'eval_precision': 0.6601292916925239, 'eval_recall': 0.6283423946058794, 'eval_runtime': 3.9416, 'eval_samples_per_second': 386.393, 'eval_steps_per_second': 96.662, 'epoch': 1.0}


 13%|█▎        | 2006/15230 [01:17<07:28, 29.46it/s]  

{'loss': 0.1013, 'grad_norm': 0.04986262321472168, 'learning_rate': 4.343401181877873e-05, 'epoch': 1.31}


 20%|█▉        | 3005/15230 [01:53<07:27, 27.30it/s]

{'loss': 0.0874, 'grad_norm': 0.794562578201294, 'learning_rate': 4.015101772816809e-05, 'epoch': 1.97}


                                                    
 20%|██        | 3046/15230 [01:58<06:54, 29.41it/s]

{'eval_loss': 0.09012085199356079, 'eval_f1': 0.7782227708035412, 'eval_precision': 0.8865635741130587, 'eval_recall': 0.7313404137914262, 'eval_runtime': 3.4386, 'eval_samples_per_second': 442.916, 'eval_steps_per_second': 110.802, 'epoch': 2.0}


 26%|██▋       | 4004/15230 [02:33<06:24, 29.20it/s]  

{'loss': 0.0626, 'grad_norm': 0.03471510857343674, 'learning_rate': 3.6868023637557454e-05, 'epoch': 2.63}


                                                    
 30%|███       | 4569/15230 [02:58<06:30, 27.28it/s]

{'eval_loss': 0.08777686953544617, 'eval_f1': 0.8470477952373051, 'eval_precision': 0.89890306085653, 'eval_recall': 0.8077602987512031, 'eval_runtime': 3.531, 'eval_samples_per_second': 431.326, 'eval_steps_per_second': 107.902, 'epoch': 3.0}


 33%|███▎      | 5003/15230 [03:14<05:56, 28.68it/s]  

{'loss': 0.0524, 'grad_norm': 2.932372570037842, 'learning_rate': 3.3585029546946817e-05, 'epoch': 3.28}


 39%|███▉      | 6004/15230 [03:51<05:21, 28.72it/s]

{'loss': 0.0408, 'grad_norm': 0.12557421624660492, 'learning_rate': 3.030203545633618e-05, 'epoch': 3.94}


                                                    
 40%|████      | 6092/15230 [03:57<05:13, 29.14it/s]

{'eval_loss': 0.10356926918029785, 'eval_f1': 0.8532767595693523, 'eval_precision': 0.8796589943980021, 'eval_recall': 0.8324022201702336, 'eval_runtime': 3.3215, 'eval_samples_per_second': 458.523, 'eval_steps_per_second': 114.706, 'epoch': 4.0}


 46%|████▌     | 7003/15230 [04:31<05:17, 25.91it/s]  

{'loss': 0.0239, 'grad_norm': 0.00821049977093935, 'learning_rate': 2.7019041365725546e-05, 'epoch': 4.6}


                                                    
 50%|█████     | 7615/15230 [04:57<04:24, 28.83it/s]

{'eval_loss': 0.1228063777089119, 'eval_f1': 0.8497166918602098, 'eval_precision': 0.8722625800515942, 'eval_recall': 0.8296504118514386, 'eval_runtime': 3.6155, 'eval_samples_per_second': 421.237, 'eval_steps_per_second': 105.378, 'epoch': 5.0}


 53%|█████▎    | 8005/15230 [05:11<04:07, 29.23it/s]

{'loss': 0.023, 'grad_norm': 0.005763672757893801, 'learning_rate': 2.3736047275114905e-05, 'epoch': 5.25}


 59%|█████▉    | 9004/15230 [05:45<03:23, 30.57it/s]

{'loss': 0.0161, 'grad_norm': 3.2562286853790283, 'learning_rate': 2.045305318450427e-05, 'epoch': 5.91}


                                                    
 60%|██████    | 9138/15230 [05:54<03:21, 30.26it/s]

{'eval_loss': 0.13462309539318085, 'eval_f1': 0.8099515348068175, 'eval_precision': 0.8394996194926847, 'eval_recall': 0.7950987983101515, 'eval_runtime': 4.172, 'eval_samples_per_second': 365.048, 'eval_steps_per_second': 91.322, 'epoch': 6.0}


 66%|██████▌   | 10006/15230 [06:25<02:53, 30.06it/s]

{'loss': 0.0154, 'grad_norm': 0.014368973672389984, 'learning_rate': 1.717005909389363e-05, 'epoch': 6.57}


                                                     
 70%|███████   | 10661/15230 [06:51<02:45, 27.66it/s]

{'eval_loss': 0.12995536625385284, 'eval_f1': 0.8614530647193451, 'eval_precision': 0.8799754453977338, 'eval_recall': 0.8456276558905254, 'eval_runtime': 3.9075, 'eval_samples_per_second': 389.765, 'eval_steps_per_second': 97.505, 'epoch': 7.0}


 72%|███████▏  | 11005/15230 [07:04<02:23, 29.47it/s]

{'loss': 0.0098, 'grad_norm': 0.0072998711839318275, 'learning_rate': 1.3887065003282995e-05, 'epoch': 7.22}


 79%|███████▉  | 12005/15230 [07:39<01:57, 27.37it/s]

{'loss': 0.0093, 'grad_norm': 0.0013591448077932, 'learning_rate': 1.0604070912672358e-05, 'epoch': 7.88}


                                                     
 80%|████████  | 12184/15230 [07:48<01:43, 29.48it/s]

{'eval_loss': 0.1588105708360672, 'eval_f1': 0.8376003629554694, 'eval_precision': 0.8832603917280649, 'eval_recall': 0.8072850156960688, 'eval_runtime': 3.4168, 'eval_samples_per_second': 445.741, 'eval_steps_per_second': 111.508, 'epoch': 8.0}


 85%|████████▌ | 13004/15230 [08:18<01:18, 28.22it/s]

{'loss': 0.0039, 'grad_norm': 0.1332862675189972, 'learning_rate': 7.321076822061721e-06, 'epoch': 8.54}


                                                     
 90%|█████████ | 13707/15230 [08:45<00:50, 29.92it/s]

{'eval_loss': 0.14228519797325134, 'eval_f1': 0.8446597531398368, 'eval_precision': 0.8525542510904607, 'eval_recall': 0.8400502771596747, 'eval_runtime': 3.5279, 'eval_samples_per_second': 431.702, 'eval_steps_per_second': 107.996, 'epoch': 9.0}


 92%|█████████▏| 14003/15230 [08:56<00:41, 29.27it/s]

{'loss': 0.0049, 'grad_norm': 0.004233178682625294, 'learning_rate': 4.038082731451084e-06, 'epoch': 9.19}


 99%|█████████▊| 15002/15230 [09:31<00:08, 27.16it/s]

{'loss': 0.0035, 'grad_norm': 0.0028131103608757257, 'learning_rate': 7.550886408404465e-07, 'epoch': 9.85}


                                                     
100%|██████████| 15230/15230 [09:43<00:00, 30.38it/s]

{'eval_loss': 0.14409562945365906, 'eval_f1': 0.8493397563913418, 'eval_precision': 0.8657394911049733, 'eval_recall': 0.8360042142900824, 'eval_runtime': 3.513, 'eval_samples_per_second': 433.536, 'eval_steps_per_second': 108.455, 'epoch': 10.0}


100%|██████████| 15230/15230 [09:44<00:00, 26.04it/s]


{'train_runtime': 584.7878, 'train_samples_per_second': 104.157, 'train_steps_per_second': 26.044, 'train_loss': 0.040420388243970165, 'epoch': 10.0}


100%|██████████| 381/381 [00:03<00:00, 117.44it/s]


Evaluation Metrics: {'eval_loss': 0.12995536625385284, 'eval_f1': 0.8614530647193451, 'eval_precision': 0.8799754453977338, 'eval_recall': 0.8456276558905254, 'eval_runtime': 3.2549, 'eval_samples_per_second': 467.909, 'eval_steps_per_second': 117.054, 'epoch': 10.0}
CHECK SCORE                                                model   lan          cat  \
0  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  java      summary   
1  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  java    Ownership   
2  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  java       Expand   
3  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  java        usage   
4  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  java      Pointer   
5  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  java  deprecation   
6  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  java     rational   

   precision    recall        f1  
0   0.928669  0.963016  0.945531  
1   0.964912  1.000000  0.982143  
2   0.724138  

0,1
eval/f1,▁▅███▆█▇▇██
eval/loss,▅▁▁▃▄▆▅█▆▇▅
eval/precision,▁██▇▇▆▇█▇▇▇
eval/recall,▁▄▇█▇▆█▇███
eval/runtime,▆▂▃▂▄█▆▂▃▃▁
eval/samples_per_second,▂▆▆▇▅▁▃▆▆▆█
eval/steps_per_second,▂▆▆▇▅▁▃▆▆▆█
train/epoch,▁▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████
train/grad_norm,▃▁▃▁▇▁▁▁█▁▁▁▁▁▁

0,1
eval/f1,0.86145
eval/loss,0.12996
eval/precision,0.87998
eval/recall,0.84563
eval/runtime,3.2549
eval/samples_per_second,467.909
eval/steps_per_second,117.054
total_flos,2017327177827840.0
train/epoch,10.0
train/global_step,15230.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16168.01 examples/s]
 10%|▉         | 375/3770 [00:13<01:51, 30.39it/s]
 10%|█         | 377/3770 [00:14<01:51, 30.39it/s]

{'eval_loss': 0.32168370485305786, 'eval_f1': 0.40281893497316335, 'eval_precision': 0.524993317294841, 'eval_recall': 0.3353172356908347, 'eval_runtime': 0.8518, 'eval_samples_per_second': 442.587, 'eval_steps_per_second': 111.527, 'epoch': 1.0}


 20%|██        | 754/3770 [00:28<01:43, 29.16it/s]
 20%|██        | 754/3770 [00:28<01:43, 29.16it/s]

{'eval_loss': 0.3131145238876343, 'eval_f1': 0.6106441403126813, 'eval_precision': 0.787876440828248, 'eval_recall': 0.5462511669841519, 'eval_runtime': 0.8168, 'eval_samples_per_second': 461.54, 'eval_steps_per_second': 116.303, 'epoch': 2.0}


 27%|██▋       | 1003/3770 [00:38<01:33, 29.45it/s]

{'loss': 0.3077, 'grad_norm': 4.1457295417785645, 'learning_rate': 3.673740053050398e-05, 'epoch': 2.65}


 30%|███       | 1131/3770 [00:42<01:25, 30.84it/s]
 30%|███       | 1131/3770 [00:43<01:25, 30.84it/s]

{'eval_loss': 0.33945727348327637, 'eval_f1': 0.6823102075338172, 'eval_precision': 0.782731631399183, 'eval_recall': 0.6225720272111522, 'eval_runtime': 0.8472, 'eval_samples_per_second': 444.998, 'eval_steps_per_second': 112.135, 'epoch': 3.0}


 40%|███▉      | 1507/3770 [00:57<01:15, 29.80it/s]
 40%|████      | 1508/3770 [00:58<01:15, 29.80it/s]

{'eval_loss': 0.3939705491065979, 'eval_f1': 0.7093602100242153, 'eval_precision': 0.7433944765841826, 'eval_recall': 0.6819225956962163, 'eval_runtime': 0.9766, 'eval_samples_per_second': 386.039, 'eval_steps_per_second': 97.278, 'epoch': 4.0}


 50%|████▉     | 1884/3770 [01:12<01:11, 26.54it/s]
 50%|█████     | 1885/3770 [01:13<01:11, 26.54it/s]

{'eval_loss': 0.40446802973747253, 'eval_f1': 0.7242639661430127, 'eval_precision': 0.7644324473975637, 'eval_recall': 0.6985756218622101, 'eval_runtime': 0.9065, 'eval_samples_per_second': 415.863, 'eval_steps_per_second': 104.793, 'epoch': 5.0}


 53%|█████▎    | 2004/3770 [01:18<01:03, 27.75it/s]

{'loss': 0.0874, 'grad_norm': 1.5689811706542969, 'learning_rate': 2.347480106100796e-05, 'epoch': 5.31}


 60%|██████    | 2262/3770 [01:27<00:55, 27.26it/s]
 60%|██████    | 2262/3770 [01:28<00:55, 27.26it/s]

{'eval_loss': 0.44816774129867554, 'eval_f1': 0.7347766420507832, 'eval_precision': 0.7684693120474837, 'eval_recall': 0.716918852439996, 'eval_runtime': 1.102, 'eval_samples_per_second': 342.108, 'eval_steps_per_second': 86.208, 'epoch': 6.0}


 70%|██████▉   | 2638/3770 [01:42<00:41, 27.31it/s]
 70%|███████   | 2639/3770 [01:43<00:41, 27.31it/s]

{'eval_loss': 0.4876149594783783, 'eval_f1': 0.7115775668666445, 'eval_precision': 0.7269454298567977, 'eval_recall': 0.7027542982402476, 'eval_runtime': 0.8835, 'eval_samples_per_second': 426.727, 'eval_steps_per_second': 107.531, 'epoch': 7.0}


 80%|███████▉  | 3004/3770 [01:57<00:28, 26.83it/s]

{'loss': 0.0245, 'grad_norm': 0.2245696783065796, 'learning_rate': 1.0212201591511936e-05, 'epoch': 7.96}


 80%|████████  | 3016/3770 [01:57<00:28, 26.74it/s]
 80%|████████  | 3016/3770 [01:58<00:28, 26.74it/s]

{'eval_loss': 0.45008763670921326, 'eval_f1': 0.7499024741736606, 'eval_precision': 0.7742212717750012, 'eval_recall': 0.7359707175168217, 'eval_runtime': 0.8628, 'eval_samples_per_second': 436.95, 'eval_steps_per_second': 110.107, 'epoch': 8.0}


 90%|████████▉ | 3390/3770 [02:11<00:12, 30.28it/s]
 90%|█████████ | 3393/3770 [02:12<00:12, 30.28it/s]

{'eval_loss': 0.4858294129371643, 'eval_f1': 0.7292396571422299, 'eval_precision': 0.744281893869313, 'eval_recall': 0.7218022009033975, 'eval_runtime': 0.8055, 'eval_samples_per_second': 468.052, 'eval_steps_per_second': 117.944, 'epoch': 9.0}


100%|██████████| 3770/3770 [02:27<00:00, 29.00it/s]
100%|██████████| 3770/3770 [02:28<00:00, 29.00it/s]

{'eval_loss': 0.47284120321273804, 'eval_f1': 0.7447094868955334, 'eval_precision': 0.7650293305341194, 'eval_recall': 0.7299944460578496, 'eval_runtime': 0.8798, 'eval_samples_per_second': 428.517, 'eval_steps_per_second': 107.982, 'epoch': 10.0}


100%|██████████| 3770/3770 [02:29<00:00, 25.17it/s]


{'train_runtime': 149.784, 'train_samples_per_second': 100.612, 'train_steps_per_second': 25.17, 'train_loss': 0.11264981565804318, 'epoch': 10.0}


100%|██████████| 95/95 [00:00<00:00, 118.98it/s]


Evaluation Metrics: {'eval_loss': 0.45008763670921326, 'eval_f1': 0.7499024741736606, 'eval_precision': 0.7742212717750012, 'eval_recall': 0.7359707175168217, 'eval_runtime': 0.809, 'eval_samples_per_second': 465.979, 'eval_steps_per_second': 117.422, 'epoch': 10.0}
CHECK SCORE                                                 model     lan  \
7   lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  python   
8   lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  python   
9   lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  python   
10  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  python   
11  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
7              Usage   0.872727  0.793388  0.831169  
8         Parameters   0.867257  0.875000  0.871111  
9   DevelopmentNotes   0.625000  0.750000  0.681818  
10            Expand   0.734694  0.521739  0.610169  
11           Summary   0.771429  0.739726  0.755245  
Scores

0,1
eval/f1,▁▅▇▇▇█▇████
eval/loss,▁▁▂▄▅▆█▆█▇▆
eval/precision,▁██▇▇▇▆█▇▇█
eval/recall,▁▅▆▇▇█▇████
eval/runtime,▂▁▂▅▃█▃▂▁▃▁
eval/samples_per_second,▇█▇▃▅▁▆▆█▆█
eval/steps_per_second,▇█▇▃▅▁▆▆█▆█
train/epoch,▁▂▂▃▃▄▄▅▆▆▆▇███
train/global_step,▁▂▂▃▃▄▄▅▆▆▆▇███
train/grad_norm,█▃▁

0,1
eval/f1,0.7499
eval/loss,0.45009
eval/precision,0.77422
eval/recall,0.73597
eval/runtime,0.809
eval/samples_per_second,465.979
eval/steps_per_second,117.422
total_flos,499097625100800.0
train/epoch,10.0
train/global_step,3770.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 13965.20 examples/s]
 10%|█         | 260/2600 [00:09<01:20, 28.94it/s]
 10%|█         | 260/2600 [00:09<01:20, 28.94it/s]

{'eval_loss': 0.26525795459747314, 'eval_f1': 0.3776287438052144, 'eval_precision': 0.6582718193660443, 'eval_recall': 0.3049047729920969, 'eval_runtime': 0.5415, 'eval_samples_per_second': 480.152, 'eval_steps_per_second': 120.038, 'epoch': 1.0}


 20%|█▉        | 517/2600 [00:20<01:09, 29.90it/s]
 20%|██        | 520/2600 [00:20<01:09, 29.90it/s]

{'eval_loss': 0.2282593846321106, 'eval_f1': 0.501759445196843, 'eval_precision': 0.5753458482473685, 'eval_recall': 0.454210712613186, 'eval_runtime': 0.5941, 'eval_samples_per_second': 437.607, 'eval_steps_per_second': 109.402, 'epoch': 2.0}


 30%|███       | 780/2600 [00:30<01:04, 28.15it/s]
 30%|███       | 780/2600 [00:30<01:04, 28.15it/s]

{'eval_loss': 0.25611430406570435, 'eval_f1': 0.4939331302001157, 'eval_precision': 0.7012712155569298, 'eval_recall': 0.44263445184932604, 'eval_runtime': 0.5504, 'eval_samples_per_second': 472.391, 'eval_steps_per_second': 118.098, 'epoch': 3.0}


 39%|███▊      | 1004/2600 [00:39<00:55, 28.93it/s]

{'loss': 0.1955, 'grad_norm': 0.1492239236831665, 'learning_rate': 3.0769230769230774e-05, 'epoch': 3.85}


 40%|████      | 1040/2600 [00:40<00:55, 27.98it/s]
 40%|████      | 1040/2600 [00:41<00:55, 27.98it/s]

{'eval_loss': 0.24688608944416046, 'eval_f1': 0.6117652728323767, 'eval_precision': 0.8196148386446237, 'eval_recall': 0.54779469589439, 'eval_runtime': 0.7619, 'eval_samples_per_second': 341.267, 'eval_steps_per_second': 85.317, 'epoch': 4.0}


 50%|████▉     | 1298/2600 [00:51<00:44, 29.53it/s]
 50%|█████     | 1300/2600 [00:52<00:44, 29.53it/s]

{'eval_loss': 0.2495628297328949, 'eval_f1': 0.6367414200708431, 'eval_precision': 0.8359522552982546, 'eval_recall': 0.5769781921172669, 'eval_runtime': 0.5448, 'eval_samples_per_second': 477.213, 'eval_steps_per_second': 119.303, 'epoch': 5.0}


 60%|█████▉    | 1559/2600 [01:02<00:33, 30.74it/s]
 60%|██████    | 1560/2600 [01:02<00:33, 30.74it/s]

{'eval_loss': 0.25884369015693665, 'eval_f1': 0.6462498598095883, 'eval_precision': 0.807757972754319, 'eval_recall': 0.5881913577467398, 'eval_runtime': 0.5494, 'eval_samples_per_second': 473.283, 'eval_steps_per_second': 118.321, 'epoch': 6.0}


 70%|███████   | 1820/2600 [01:12<00:26, 29.55it/s]
 70%|███████   | 1820/2600 [01:13<00:26, 29.55it/s]

{'eval_loss': 0.28539514541625977, 'eval_f1': 0.66037534426326, 'eval_precision': 0.8246214101205586, 'eval_recall': 0.5895065743747855, 'eval_runtime': 0.6966, 'eval_samples_per_second': 373.255, 'eval_steps_per_second': 93.314, 'epoch': 7.0}


 77%|███████▋  | 2003/2600 [01:20<00:21, 28.10it/s]

{'loss': 0.0428, 'grad_norm': 0.026332279667258263, 'learning_rate': 1.153846153846154e-05, 'epoch': 7.69}


 80%|███████▉  | 2079/2600 [01:22<00:18, 27.89it/s]
 80%|████████  | 2080/2600 [01:23<00:18, 27.89it/s]

{'eval_loss': 0.2774313688278198, 'eval_f1': 0.6554374250431493, 'eval_precision': 0.7955507526346997, 'eval_recall': 0.6080241200068998, 'eval_runtime': 0.634, 'eval_samples_per_second': 410.123, 'eval_steps_per_second': 102.531, 'epoch': 8.0}


 90%|████████▉ | 2338/2600 [01:33<00:09, 27.25it/s]
 90%|█████████ | 2340/2600 [01:34<00:09, 27.25it/s]

{'eval_loss': 0.28064897656440735, 'eval_f1': 0.6710891777919857, 'eval_precision': 0.8000751563251562, 'eval_recall': 0.6172106882512188, 'eval_runtime': 0.7846, 'eval_samples_per_second': 331.399, 'eval_steps_per_second': 82.85, 'epoch': 9.0}


100%|█████████▉| 2599/2600 [01:44<00:00, 29.68it/s]
100%|██████████| 2600/2600 [01:46<00:00, 29.68it/s]

{'eval_loss': 0.28468838334083557, 'eval_f1': 0.6760992595162517, 'eval_precision': 0.8059458189449316, 'eval_recall': 0.6188716298555833, 'eval_runtime': 0.6197, 'eval_samples_per_second': 419.567, 'eval_steps_per_second': 104.892, 'epoch': 10.0}


100%|██████████| 2600/2600 [01:47<00:00, 24.22it/s]


{'train_runtime': 107.3724, 'train_samples_per_second': 96.673, 'train_steps_per_second': 24.215, 'train_loss': 0.0950811085334191, 'epoch': 10.0}


100%|██████████| 65/65 [00:00<00:00, 100.84it/s]


Evaluation Metrics: {'eval_loss': 0.28468838334083557, 'eval_f1': 0.6760992595162517, 'eval_precision': 0.8059458189449316, 'eval_recall': 0.6188716298555833, 'eval_runtime': 0.6553, 'eval_samples_per_second': 396.765, 'eval_steps_per_second': 99.191, 'epoch': 10.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
13  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
14  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
15  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
16  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
17  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
18  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.833333  0.595238  0.694444  
13                  Example   0.923913  0.841584  0.880829  
14         Responsibiliti

0,1
eval/f1,▁▄▄▆▇▇█████
eval/loss,▆▁▄▃▄▅█▇▇██
eval/precision,▃▁▄██▇█▇▇▇▇
eval/recall,▁▄▄▆▇▇▇████
eval/runtime,▁▃▁▇▁▁▅▄█▃▄
eval/samples_per_second,█▆█▁██▃▅▁▅▄
eval/steps_per_second,█▆█▁██▃▅▁▅▄
train/epoch,▁▂▃▃▃▄▅▆▆▆▇███
train/global_step,▁▂▃▃▃▄▅▆▆▆▇███
train/grad_norm,█▁

0,1
eval/f1,0.6761
eval/loss,0.28469
eval/precision,0.80595
eval/recall,0.61887
eval/runtime,0.6553
eval/samples_per_second,396.765
eval/steps_per_second,99.191
total_flos,343783551237120.0
train/epoch,10.0
train/global_step,2600.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 21614.75 examples/s]
  7%|▋         | 1003/15230 [00:34<07:59, 29.70it/s]

{'loss': 0.2727, 'grad_norm': 0.6654465794563293, 'learning_rate': 4.671700590938937e-06, 'epoch': 0.66}


 10%|▉         | 1522/15230 [00:52<07:52, 29.02it/s]
 10%|█         | 1523/15230 [00:55<07:52, 29.02it/s]

{'eval_loss': 0.13916493952274323, 'eval_f1': 0.5240221839192052, 'eval_precision': 0.5297823616377006, 'eval_recall': 0.5194583943848409, 'eval_runtime': 3.6112, 'eval_samples_per_second': 421.746, 'eval_steps_per_second': 105.506, 'epoch': 1.0}


 13%|█▎        | 2005/15230 [01:13<07:38, 28.86it/s]  

{'loss': 0.1421, 'grad_norm': 2.341257333755493, 'learning_rate': 4.343401181877873e-06, 'epoch': 1.31}


 20%|█▉        | 3003/15230 [01:48<07:26, 27.41it/s]

{'loss': 0.1112, 'grad_norm': 4.752479553222656, 'learning_rate': 4.015101772816809e-06, 'epoch': 1.97}


 20%|██        | 3046/15230 [01:49<07:29, 27.13it/s]
 20%|██        | 3046/15230 [01:53<07:29, 27.13it/s]

{'eval_loss': 0.11430926620960236, 'eval_f1': 0.5909436124724898, 'eval_precision': 0.8112670539107866, 'eval_recall': 0.5633420622459716, 'eval_runtime': 3.4001, 'eval_samples_per_second': 447.926, 'eval_steps_per_second': 112.055, 'epoch': 2.0}


 26%|██▋       | 4003/15230 [02:27<07:07, 26.28it/s]  

{'loss': 0.0891, 'grad_norm': 0.07874835282564163, 'learning_rate': 3.6868023637557455e-06, 'epoch': 2.63}


 30%|██▉       | 4568/15230 [02:47<05:51, 30.36it/s]
 30%|███       | 4569/15230 [02:50<05:51, 30.36it/s]

{'eval_loss': 0.09479905664920807, 'eval_f1': 0.72597568063839, 'eval_precision': 0.9321722177241965, 'eval_recall': 0.6560710157579568, 'eval_runtime': 3.2001, 'eval_samples_per_second': 475.927, 'eval_steps_per_second': 119.06, 'epoch': 3.0}


 33%|███▎      | 5004/15230 [03:06<06:09, 27.67it/s]  

{'loss': 0.0826, 'grad_norm': 8.861104011535645, 'learning_rate': 3.358502954694682e-06, 'epoch': 3.28}


 39%|███▉      | 6005/15230 [03:41<05:34, 27.56it/s]

{'loss': 0.0733, 'grad_norm': 2.035860061645508, 'learning_rate': 3.030203545633618e-06, 'epoch': 3.94}


 40%|███▉      | 6090/15230 [03:43<05:17, 28.77it/s]
 40%|████      | 6092/15230 [03:47<05:17, 28.77it/s]

{'eval_loss': 0.09817290306091309, 'eval_f1': 0.7790877276993465, 'eval_precision': 0.9131951745870264, 'eval_recall': 0.7204286292550431, 'eval_runtime': 3.7229, 'eval_samples_per_second': 409.088, 'eval_steps_per_second': 102.339, 'epoch': 4.0}


 46%|████▌     | 7003/15230 [04:20<05:11, 26.40it/s]  

{'loss': 0.0625, 'grad_norm': 0.04355984553694725, 'learning_rate': 2.7019041365725546e-06, 'epoch': 4.6}


 50%|████▉     | 7612/15230 [04:41<04:09, 30.50it/s]
 50%|█████     | 7615/15230 [04:45<04:09, 30.50it/s]

{'eval_loss': 0.09297066926956177, 'eval_f1': 0.8308799718023597, 'eval_precision': 0.8975546266923764, 'eval_recall': 0.7864276642962488, 'eval_runtime': 3.9569, 'eval_samples_per_second': 384.899, 'eval_steps_per_second': 96.288, 'epoch': 5.0}


 53%|█████▎    | 8003/15230 [04:59<04:19, 27.89it/s]

{'loss': 0.0634, 'grad_norm': 0.046921879053115845, 'learning_rate': 2.3736047275114905e-06, 'epoch': 5.25}


 59%|█████▉    | 9006/15230 [05:34<03:35, 28.94it/s]

{'loss': 0.0527, 'grad_norm': 13.174018859863281, 'learning_rate': 2.045305318450427e-06, 'epoch': 5.91}


 60%|█████▉    | 9136/15230 [05:38<03:45, 27.02it/s]
 60%|██████    | 9138/15230 [05:43<03:45, 27.02it/s]

{'eval_loss': 0.10016866773366928, 'eval_f1': 0.8328330149186557, 'eval_precision': 0.9095474254806312, 'eval_recall': 0.7848242752959788, 'eval_runtime': 4.2792, 'eval_samples_per_second': 355.906, 'eval_steps_per_second': 89.035, 'epoch': 6.0}


 66%|██████▌   | 10004/15230 [06:13<02:57, 29.37it/s]

{'loss': 0.0479, 'grad_norm': 0.22608231008052826, 'learning_rate': 1.7170059093893632e-06, 'epoch': 6.57}


 70%|███████   | 10661/15230 [06:37<02:49, 26.90it/s]
 70%|███████   | 10661/15230 [06:40<02:49, 26.90it/s]

{'eval_loss': 0.09630576521158218, 'eval_f1': 0.8399127879986926, 'eval_precision': 0.8898763998132365, 'eval_recall': 0.8021138362368676, 'eval_runtime': 3.6332, 'eval_samples_per_second': 419.193, 'eval_steps_per_second': 104.867, 'epoch': 7.0}


 72%|███████▏  | 11006/15230 [06:53<02:18, 30.41it/s]

{'loss': 0.0472, 'grad_norm': 0.1435578614473343, 'learning_rate': 1.3887065003282996e-06, 'epoch': 7.22}


 79%|███████▉  | 12003/15230 [07:27<01:53, 28.41it/s]

{'loss': 0.0416, 'grad_norm': 0.03205009177327156, 'learning_rate': 1.0604070912672358e-06, 'epoch': 7.88}


 80%|███████▉  | 12182/15230 [07:33<01:44, 29.03it/s]
 80%|████████  | 12184/15230 [07:37<01:44, 29.03it/s]

{'eval_loss': 0.09651077538728714, 'eval_f1': 0.8357196685096061, 'eval_precision': 0.8869243729914533, 'eval_recall': 0.7984088321318865, 'eval_runtime': 3.6489, 'eval_samples_per_second': 417.392, 'eval_steps_per_second': 104.416, 'epoch': 8.0}


 85%|████████▌ | 13003/15230 [08:06<01:18, 28.39it/s]

{'loss': 0.0362, 'grad_norm': 7.736608028411865, 'learning_rate': 7.32107682206172e-07, 'epoch': 8.54}


 90%|████████▉ | 13706/15230 [08:31<00:50, 30.20it/s]
 90%|█████████ | 13707/15230 [08:34<00:50, 30.20it/s]

{'eval_loss': 0.09817993640899658, 'eval_f1': 0.847175679083368, 'eval_precision': 0.8814203651665347, 'eval_recall': 0.8197327608553885, 'eval_runtime': 3.4416, 'eval_samples_per_second': 442.53, 'eval_steps_per_second': 110.705, 'epoch': 9.0}


 92%|█████████▏| 14003/15230 [08:45<00:42, 28.93it/s]

{'loss': 0.0401, 'grad_norm': 0.3693678677082062, 'learning_rate': 4.038082731451084e-07, 'epoch': 9.19}


 99%|█████████▊| 15003/15230 [09:20<00:08, 27.37it/s]

{'loss': 0.0348, 'grad_norm': 0.08364568650722504, 'learning_rate': 7.550886408404465e-08, 'epoch': 9.85}


100%|█████████▉| 15228/15230 [09:28<00:00, 27.99it/s]
100%|██████████| 15230/15230 [09:32<00:00, 27.99it/s]

{'eval_loss': 0.09730931371450424, 'eval_f1': 0.8420395603915399, 'eval_precision': 0.8800432094596179, 'eval_recall': 0.8111127626674808, 'eval_runtime': 3.6026, 'eval_samples_per_second': 422.751, 'eval_steps_per_second': 105.757, 'epoch': 10.0}


100%|██████████| 15230/15230 [09:33<00:00, 26.56it/s]


{'train_runtime': 573.4673, 'train_samples_per_second': 106.214, 'train_steps_per_second': 26.558, 'train_loss': 0.07900882784817771, 'epoch': 10.0}


100%|██████████| 381/381 [00:03<00:00, 120.74it/s]


Evaluation Metrics: {'eval_loss': 0.09817993640899658, 'eval_f1': 0.847175679083368, 'eval_precision': 0.8814203651665347, 'eval_recall': 0.8197327608553885, 'eval_runtime': 3.168, 'eval_samples_per_second': 480.745, 'eval_steps_per_second': 120.265, 'epoch': 10.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  java      summary   
13  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  java    Ownership   
14  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  java       Expand   
15  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  java        usage   
16  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  java      Pointer   
17  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  java  deprecation   
18  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  java     rational   

    precision    recall        f1  
12   0.936111  0.958748  0.947294  
13   0.964912  1.000000  0.982143  
14   

0,1
eval/f1,▁▂▅▇███████
eval/loss,█▄▁▂▁▂▂▂▂▂▂
eval/precision,▁▆██▇█▇▇▇▇▇
eval/recall,▁▂▄▆▇▇█████
eval/runtime,▄▂▁▄▆█▄▄▃▄▁
eval/samples_per_second,▅▆█▄▃▁▅▄▆▅█
eval/steps_per_second,▅▆█▄▃▁▅▄▆▅█
train/epoch,▁▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████
train/grad_norm,▁▂▄▁▆▂▁▁█▁▁▁▅▁▁

0,1
eval/f1,0.84718
eval/loss,0.09818
eval/precision,0.88142
eval/recall,0.81973
eval/runtime,3.168
eval/samples_per_second,480.745
eval/steps_per_second,120.265
total_flos,2017327177827840.0
train/epoch,10.0
train/global_step,15230.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16329.83 examples/s]
 10%|▉         | 375/3770 [00:13<01:52, 30.10it/s]
 10%|█         | 377/3770 [00:14<01:52, 30.10it/s]

{'eval_loss': 0.47246357798576355, 'eval_f1': 0.03308270676691729, 'eval_precision': 0.18333333333333332, 'eval_recall': 0.01818181818181818, 'eval_runtime': 0.9757, 'eval_samples_per_second': 386.382, 'eval_steps_per_second': 97.364, 'epoch': 1.0}


 20%|██        | 754/3770 [00:28<01:43, 29.14it/s]
 20%|██        | 754/3770 [00:29<01:43, 29.14it/s]

{'eval_loss': 0.4195431172847748, 'eval_f1': 0.12358897243107769, 'eval_precision': 0.3888888888888889, 'eval_recall': 0.08786894923258559, 'eval_runtime': 1.1806, 'eval_samples_per_second': 319.318, 'eval_steps_per_second': 80.465, 'epoch': 2.0}


 27%|██▋       | 1004/3770 [00:38<01:30, 30.45it/s]

{'loss': 0.4636, 'grad_norm': 3.4785094261169434, 'learning_rate': 3.673740053050398e-06, 'epoch': 2.65}


 30%|██▉       | 1129/3770 [00:43<01:36, 27.49it/s]
 30%|███       | 1131/3770 [00:44<01:35, 27.49it/s]

{'eval_loss': 0.37720075249671936, 'eval_f1': 0.3210689791691519, 'eval_precision': 0.5669330289193303, 'eval_recall': 0.25051875272921353, 'eval_runtime': 0.774, 'eval_samples_per_second': 487.074, 'eval_steps_per_second': 122.737, 'epoch': 3.0}


 40%|███▉      | 1506/3770 [00:58<01:21, 27.85it/s]
 40%|████      | 1508/3770 [00:59<01:21, 27.85it/s]

{'eval_loss': 0.3514925539493561, 'eval_f1': 0.3831325941878706, 'eval_precision': 0.5390752587507152, 'eval_recall': 0.3071305251411104, 'eval_runtime': 1.1442, 'eval_samples_per_second': 329.495, 'eval_steps_per_second': 83.029, 'epoch': 4.0}


 50%|████▉     | 1884/3770 [01:13<01:02, 30.16it/s]
 50%|█████     | 1885/3770 [01:14<01:02, 30.16it/s]

{'eval_loss': 0.33306682109832764, 'eval_f1': 0.43252828281321837, 'eval_precision': 0.6192484934420419, 'eval_recall': 0.3700509500065513, 'eval_runtime': 0.893, 'eval_samples_per_second': 422.151, 'eval_steps_per_second': 106.378, 'epoch': 5.0}


 53%|█████▎    | 2006/3770 [01:19<00:58, 30.08it/s]

{'loss': 0.3284, 'grad_norm': 5.579546928405762, 'learning_rate': 2.347480106100796e-06, 'epoch': 5.31}


 60%|█████▉    | 2260/3770 [01:28<00:55, 27.05it/s]
 60%|██████    | 2262/3770 [01:29<00:55, 27.05it/s]

{'eval_loss': 0.3248472213745117, 'eval_f1': 0.4966815999287557, 'eval_precision': 0.6974575854438161, 'eval_recall': 0.4277447010188361, 'eval_runtime': 0.9154, 'eval_samples_per_second': 411.853, 'eval_steps_per_second': 103.782, 'epoch': 6.0}


 70%|██████▉   | 2638/3770 [01:43<00:37, 30.17it/s]
 70%|███████   | 2639/3770 [01:43<00:37, 30.17it/s]

{'eval_loss': 0.3175298571586609, 'eval_f1': 0.4949608743659087, 'eval_precision': 0.6813200602375861, 'eval_recall': 0.43789118433860974, 'eval_runtime': 0.8005, 'eval_samples_per_second': 470.947, 'eval_steps_per_second': 118.674, 'epoch': 7.0}


 80%|███████▉  | 3004/3770 [01:57<00:25, 30.00it/s]

{'loss': 0.2616, 'grad_norm': 1.6872822046279907, 'learning_rate': 1.0212201591511937e-06, 'epoch': 7.96}


 80%|████████  | 3016/3770 [01:57<00:24, 30.24it/s]
 80%|████████  | 3016/3770 [01:58<00:24, 30.24it/s]

{'eval_loss': 0.31550440192222595, 'eval_f1': 0.50911159102067, 'eval_precision': 0.6707464120578874, 'eval_recall': 0.4531829163130262, 'eval_runtime': 0.8248, 'eval_samples_per_second': 457.065, 'eval_steps_per_second': 115.176, 'epoch': 8.0}


 90%|████████▉ | 3391/3770 [02:12<00:13, 28.35it/s]
 90%|█████████ | 3393/3770 [02:13<00:13, 28.35it/s]

{'eval_loss': 0.31358736753463745, 'eval_f1': 0.5156491329432653, 'eval_precision': 0.6585356860935458, 'eval_recall': 0.45861806809069794, 'eval_runtime': 0.8063, 'eval_samples_per_second': 467.547, 'eval_steps_per_second': 117.817, 'epoch': 9.0}


100%|██████████| 3770/3770 [02:27<00:00, 30.08it/s]
100%|██████████| 3770/3770 [02:29<00:00, 30.08it/s]

{'eval_loss': 0.31343743205070496, 'eval_f1': 0.5185931839398215, 'eval_precision': 0.6624289916818827, 'eval_recall': 0.4607917350215255, 'eval_runtime': 0.9683, 'eval_samples_per_second': 389.33, 'eval_steps_per_second': 98.107, 'epoch': 10.0}


100%|██████████| 3770/3770 [02:30<00:00, 25.12it/s]


{'train_runtime': 150.0654, 'train_samples_per_second': 100.423, 'train_steps_per_second': 25.122, 'train_loss': 0.3274936018318966, 'epoch': 10.0}


100%|██████████| 95/95 [00:00<00:00, 110.16it/s]


Evaluation Metrics: {'eval_loss': 0.31343743205070496, 'eval_f1': 0.5185931839398215, 'eval_precision': 0.6624289916818827, 'eval_recall': 0.4607917350215255, 'eval_runtime': 0.8794, 'eval_samples_per_second': 428.682, 'eval_steps_per_second': 108.023, 'epoch': 10.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  python   
15  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  python   
16  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  python   
17  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  python   
18  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
14             Usage   0.881720  0.677686  0.766355  
15        Parameters   0.867925  0.821429  0.844037  
16  DevelopmentNotes   0.000000  0.000000  0.000000  
17            Expand   0.812500  0.188406  0.305882  
18           Summary   0.750000  0.616438  0.676692  
Score

0,1
eval/f1,▁▂▅▆▇██████
eval/loss,█▆▄▃▂▂▁▁▁▁▁
eval/precision,▁▄▆▆▇███▇██
eval/recall,▁▂▅▆▇▇█████
eval/runtime,▄█▁▇▃▃▁▂▂▄▃
eval/samples_per_second,▄▁█▁▅▅▇▇▇▄▆
eval/steps_per_second,▄▁█▁▅▅▇▇▇▄▆
train/epoch,▁▂▂▃▃▄▄▅▆▆▆▇███
train/global_step,▁▂▂▃▃▄▄▅▆▆▆▇███
train/grad_norm,▄█▁

0,1
eval/f1,0.51859
eval/loss,0.31344
eval/precision,0.66243
eval/recall,0.46079
eval/runtime,0.8794
eval/samples_per_second,428.682
eval/steps_per_second,108.023
total_flos,499097625100800.0
train/epoch,10.0
train/global_step,3770.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14622.39 examples/s]
 10%|▉         | 258/2600 [00:08<01:16, 30.76it/s]
 10%|█         | 260/2600 [00:09<01:16, 30.76it/s]

{'eval_loss': 0.3987762928009033, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.7108, 'eval_samples_per_second': 365.784, 'eval_steps_per_second': 91.446, 'epoch': 1.0}


 20%|██        | 520/2600 [00:19<01:10, 29.53it/s]
 20%|██        | 520/2600 [00:20<01:10, 29.53it/s]

{'eval_loss': 0.34300878643989563, 'eval_f1': 0.11330049261083744, 'eval_precision': 0.1350293542074364, 'eval_recall': 0.0975954738330976, 'eval_runtime': 0.6327, 'eval_samples_per_second': 410.915, 'eval_steps_per_second': 102.729, 'epoch': 2.0}


 30%|███       | 780/2600 [00:29<01:01, 29.41it/s]
 30%|███       | 780/2600 [00:30<01:01, 29.41it/s]

{'eval_loss': 0.30889931321144104, 'eval_f1': 0.12454212454212454, 'eval_precision': 0.12917933130699089, 'eval_recall': 0.12022630834512023, 'eval_runtime': 0.5255, 'eval_samples_per_second': 494.805, 'eval_steps_per_second': 123.701, 'epoch': 3.0}


 39%|███▊      | 1005/2600 [00:39<00:55, 28.85it/s]

{'loss': 0.3563, 'grad_norm': 1.1454435586929321, 'learning_rate': 3.0769230769230774e-06, 'epoch': 3.85}


 40%|███▉      | 1038/2600 [00:40<00:55, 28.32it/s]
 40%|████      | 1040/2600 [00:41<00:55, 28.32it/s]

{'eval_loss': 0.2861657440662384, 'eval_f1': 0.148990743838519, 'eval_precision': 0.2787456445993031, 'eval_recall': 0.12536291223107274, 'eval_runtime': 0.8678, 'eval_samples_per_second': 299.602, 'eval_steps_per_second': 74.9, 'epoch': 4.0}


 50%|█████     | 1300/2600 [00:51<00:44, 29.35it/s]
 50%|█████     | 1300/2600 [00:51<00:44, 29.35it/s]

{'eval_loss': 0.27219146490097046, 'eval_f1': 0.2120432872602593, 'eval_precision': 0.3876306620209059, 'eval_recall': 0.16918335442566815, 'eval_runtime': 0.5717, 'eval_samples_per_second': 454.811, 'eval_steps_per_second': 113.703, 'epoch': 5.0}


 60%|█████▉    | 1559/2600 [01:03<00:40, 25.90it/s]
 60%|██████    | 1560/2600 [01:03<00:40, 25.90it/s]

{'eval_loss': 0.25737568736076355, 'eval_f1': 0.27234636568652965, 'eval_precision': 0.5360353931782503, 'eval_recall': 0.21629505163190862, 'eval_runtime': 0.6997, 'eval_samples_per_second': 371.58, 'eval_steps_per_second': 92.895, 'epoch': 6.0}


 70%|██████▉   | 1819/2600 [01:14<00:27, 28.85it/s]
 70%|███████   | 1820/2600 [01:14<00:27, 28.85it/s]

{'eval_loss': 0.25029897689819336, 'eval_f1': 0.3123954369856009, 'eval_precision': 0.5321997590038827, 'eval_recall': 0.24810725540867726, 'eval_runtime': 0.6024, 'eval_samples_per_second': 431.628, 'eval_steps_per_second': 107.907, 'epoch': 7.0}


 77%|███████▋  | 2002/2600 [01:22<00:22, 26.40it/s]

{'loss': 0.2311, 'grad_norm': 1.238744854927063, 'learning_rate': 1.153846153846154e-06, 'epoch': 7.69}


 80%|███████▉  | 2079/2600 [01:25<00:18, 28.91it/s]
 80%|████████  | 2080/2600 [01:25<00:17, 28.91it/s]

{'eval_loss': 0.2468331903219223, 'eval_f1': 0.3253556139436907, 'eval_precision': 0.6536861197055372, 'eval_recall': 0.26255080959795135, 'eval_runtime': 0.5886, 'eval_samples_per_second': 441.744, 'eval_steps_per_second': 110.436, 'epoch': 8.0}


 90%|█████████ | 2340/2600 [01:36<00:09, 28.54it/s]
 90%|█████████ | 2340/2600 [01:36<00:09, 28.54it/s]

{'eval_loss': 0.24127556383609772, 'eval_f1': 0.34123931623931625, 'eval_precision': 0.6605243161094225, 'eval_recall': 0.2711825728816911, 'eval_runtime': 0.6209, 'eval_samples_per_second': 418.74, 'eval_steps_per_second': 104.685, 'epoch': 9.0}


100%|█████████▉| 2598/2600 [01:46<00:00, 26.95it/s]
100%|██████████| 2600/2600 [01:48<00:00, 26.95it/s]

{'eval_loss': 0.24075563251972198, 'eval_f1': 0.3464381423565097, 'eval_precision': 0.657860824742268, 'eval_recall': 0.2756365137165041, 'eval_runtime': 0.665, 'eval_samples_per_second': 390.976, 'eval_steps_per_second': 97.744, 'epoch': 10.0}


100%|██████████| 2600/2600 [01:49<00:00, 23.73it/s]


{'train_runtime': 109.5721, 'train_samples_per_second': 94.732, 'train_steps_per_second': 23.729, 'train_loss': 0.2715848130446214, 'epoch': 10.0}


100%|██████████| 65/65 [00:00<00:00, 107.87it/s]


Evaluation Metrics: {'eval_loss': 0.24075563251972198, 'eval_f1': 0.3464381423565097, 'eval_precision': 0.657860824742268, 'eval_recall': 0.2756365137165041, 'eval_runtime': 0.6199, 'eval_samples_per_second': 419.427, 'eval_steps_per_second': 104.857, 'epoch': 10.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
13  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
14  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
15  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
16  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
17  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   
18  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   1.000000  0.285714  0.444444  
13                  Example   0.917526  0.881188  0.898990  
14         Responsibiliti

0,1
eval/f1,▁▃▄▄▅▇▇████
eval/loss,█▆▄▃▂▂▁▁▁▁▁
eval/precision,▁▂▂▄▅▇▇████
eval/recall,▁▃▄▄▅▆▇████
eval/runtime,▅▃▁█▂▅▃▂▃▄▃
eval/samples_per_second,▃▅█▁▇▄▆▆▅▄▅
eval/steps_per_second,▃▅█▁▇▄▆▆▅▄▅
train/epoch,▁▂▃▃▃▄▅▆▆▆▇███
train/global_step,▁▂▃▃▃▄▅▆▆▆▇███
train/grad_norm,▁█

0,1
eval/f1,0.34644
eval/loss,0.24076
eval/precision,0.65786
eval/recall,0.27564
eval/runtime,0.6199
eval/samples_per_second,419.427
eval/steps_per_second,104.857
total_flos,343783551237120.0
train/epoch,10.0
train/global_step,2600.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20499.76 examples/s]
 10%|▉         | 761/7620 [00:32<04:52, 23.42it/s]
 10%|█         | 762/7620 [00:35<04:52, 23.42it/s]

{'eval_loss': 0.10050240159034729, 'eval_f1': 0.6572994463345975, 'eval_precision': 0.6828109720966864, 'eval_recall': 0.6378768157780907, 'eval_runtime': 2.3194, 'eval_samples_per_second': 656.645, 'eval_steps_per_second': 82.35, 'epoch': 1.0}


 13%|█▎        | 1004/7620 [00:46<04:38, 23.79it/s]

{'loss': 0.1302, 'grad_norm': 1.9515794515609741, 'learning_rate': 4.343832020997376e-05, 'epoch': 1.31}


 20%|█▉        | 1523/7620 [01:07<04:30, 22.56it/s]
 20%|██        | 1524/7620 [01:10<04:30, 22.56it/s]

{'eval_loss': 0.08148975670337677, 'eval_f1': 0.8219044227250503, 'eval_precision': 0.8937850573529359, 'eval_recall': 0.7813139931517717, 'eval_runtime': 2.5935, 'eval_samples_per_second': 587.229, 'eval_steps_per_second': 73.645, 'epoch': 2.0}


 26%|██▋       | 2003/7620 [01:31<04:03, 23.03it/s]

{'loss': 0.0644, 'grad_norm': 0.03616556152701378, 'learning_rate': 3.6876640419947505e-05, 'epoch': 2.62}


 30%|██▉       | 2285/7620 [01:44<03:41, 24.14it/s]
 30%|███       | 2286/7620 [01:46<03:40, 24.14it/s]

{'eval_loss': 0.08742769807577133, 'eval_f1': 0.8550247032151171, 'eval_precision': 0.876501909612122, 'eval_recall': 0.8359927191167696, 'eval_runtime': 2.3567, 'eval_samples_per_second': 646.236, 'eval_steps_per_second': 81.045, 'epoch': 3.0}


 39%|███▉      | 3002/7620 [02:17<03:10, 24.22it/s]

{'loss': 0.036, 'grad_norm': 0.06224724277853966, 'learning_rate': 3.0314960629921263e-05, 'epoch': 3.94}


 40%|███▉      | 3047/7620 [02:19<03:11, 23.87it/s]
 40%|████      | 3048/7620 [02:21<03:11, 23.87it/s]

{'eval_loss': 0.10144107788801193, 'eval_f1': 0.8507993648116915, 'eval_precision': 0.8860074343720126, 'eval_recall': 0.8267106846861861, 'eval_runtime': 2.296, 'eval_samples_per_second': 663.34, 'eval_steps_per_second': 83.19, 'epoch': 4.0}


 50%|████▉     | 3809/7620 [02:54<02:35, 24.49it/s]
 50%|█████     | 3810/7620 [02:56<02:35, 24.49it/s]

{'eval_loss': 0.10723888128995895, 'eval_f1': 0.8263738989894855, 'eval_precision': 0.8387988518977758, 'eval_recall': 0.8214574831520007, 'eval_runtime': 2.1232, 'eval_samples_per_second': 717.317, 'eval_steps_per_second': 89.959, 'epoch': 5.0}


 53%|█████▎    | 4004/7620 [03:05<02:34, 23.43it/s]

{'loss': 0.0168, 'grad_norm': 0.03193296119570732, 'learning_rate': 2.3753280839895015e-05, 'epoch': 5.25}


 60%|█████▉    | 4571/7620 [03:29<02:11, 23.19it/s]
 60%|██████    | 4572/7620 [03:31<02:11, 23.19it/s]

{'eval_loss': 0.09529615938663483, 'eval_f1': 0.8602605221177615, 'eval_precision': 0.8926940564918401, 'eval_recall': 0.8361379172101631, 'eval_runtime': 2.4586, 'eval_samples_per_second': 619.446, 'eval_steps_per_second': 77.685, 'epoch': 6.0}


 66%|██████▌   | 5003/7620 [03:50<01:53, 22.96it/s]

{'loss': 0.0101, 'grad_norm': 0.021050285547971725, 'learning_rate': 1.7191601049868766e-05, 'epoch': 6.56}


 70%|██████▉   | 5333/7620 [04:04<01:33, 24.34it/s]
 70%|███████   | 5334/7620 [04:07<01:33, 24.34it/s]

{'eval_loss': 0.1144045889377594, 'eval_f1': 0.838623531163971, 'eval_precision': 0.8381261395765452, 'eval_recall': 0.8397670495414856, 'eval_runtime': 2.2064, 'eval_samples_per_second': 690.276, 'eval_steps_per_second': 86.568, 'epoch': 7.0}


 79%|███████▉  | 6002/7620 [04:36<01:07, 24.06it/s]

{'loss': 0.0056, 'grad_norm': 0.013392729684710503, 'learning_rate': 1.062992125984252e-05, 'epoch': 7.87}


 80%|███████▉  | 6095/7620 [04:39<01:02, 24.35it/s]
 80%|████████  | 6096/7620 [04:42<01:02, 24.35it/s]

{'eval_loss': 0.10820001363754272, 'eval_f1': 0.8612716333442287, 'eval_precision': 0.8716327272683547, 'eval_recall': 0.8554922381951593, 'eval_runtime': 2.2693, 'eval_samples_per_second': 671.132, 'eval_steps_per_second': 84.167, 'epoch': 8.0}


 90%|████████▉ | 6857/7620 [05:14<00:31, 24.45it/s]
 90%|█████████ | 6858/7620 [05:17<00:31, 24.45it/s]

{'eval_loss': 0.11538326740264893, 'eval_f1': 0.8532405661771184, 'eval_precision': 0.8707051401947421, 'eval_recall': 0.8384679214419808, 'eval_runtime': 2.1479, 'eval_samples_per_second': 709.081, 'eval_steps_per_second': 88.926, 'epoch': 9.0}


 92%|█████████▏| 7004/7620 [05:24<00:26, 23.00it/s]

{'loss': 0.0029, 'grad_norm': 0.021750453859567642, 'learning_rate': 4.068241469816273e-06, 'epoch': 9.19}


100%|█████████▉| 7619/7620 [05:49<00:00, 22.95it/s]
100%|██████████| 7620/7620 [05:53<00:00, 22.95it/s]

{'eval_loss': 0.11686110496520996, 'eval_f1': 0.85905016000366, 'eval_precision': 0.8790405670522992, 'eval_recall': 0.8424820599964341, 'eval_runtime': 2.2465, 'eval_samples_per_second': 677.939, 'eval_steps_per_second': 85.021, 'epoch': 10.0}


100%|██████████| 7620/7620 [05:54<00:00, 21.52it/s]


{'train_runtime': 354.1004, 'train_samples_per_second': 172.013, 'train_steps_per_second': 21.519, 'train_loss': 0.03503173078138997, 'epoch': 10.0}


100%|██████████| 191/191 [00:02<00:00, 86.97it/s]


Evaluation Metrics: {'eval_loss': 0.10820001363754272, 'eval_f1': 0.8612716333442287, 'eval_precision': 0.8716327272683547, 'eval_recall': 0.8554922381951593, 'eval_runtime': 2.211, 'eval_samples_per_second': 688.839, 'eval_steps_per_second': 86.388, 'epoch': 10.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  java      summary   
13  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  java    Ownership   
14  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  java       Expand   
15  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  java        usage   
16  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  java      Pointer   
17  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  java  deprecation   
18  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  java     rational   

    precision    recall        f1  
12   0.941011  0.953058  0.946996  
13   0.932203  1.000000  0.964912  
14   

0,1
eval/f1,▁▇██▇█▇████
eval/loss,▅▁▂▅▆▄█▆██▆
eval/precision,▁█▇█▆█▆▇▇█▇
eval/recall,▁▆▇▇▇▇▇█▇██
eval/runtime,▄█▄▄▁▆▂▃▁▃▂
eval/samples_per_second,▅▁▄▅█▃▇▆█▆▆
eval/steps_per_second,▅▁▄▅█▃▇▆█▆▆
train/epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇███
train/global_step,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇███
train/grad_norm,█▁▁▁▁▁▁

0,1
eval/f1,0.86127
eval/loss,0.1082
eval/precision,0.87163
eval/recall,0.85549
eval/runtime,2.211
eval/samples_per_second,688.839
eval/steps_per_second,86.388
total_flos,2017327177827840.0
train/epoch,10.0
train/global_step,7620.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16722.19 examples/s]
 10%|▉         | 187/1890 [00:08<01:08, 24.74it/s]
 10%|█         | 189/1890 [00:08<01:08, 24.74it/s]

{'eval_loss': 0.3638896048069, 'eval_f1': 0.2844869750132908, 'eval_precision': 0.35621045621045616, 'eval_recall': 0.23699822904368356, 'eval_runtime': 0.54, 'eval_samples_per_second': 698.157, 'eval_steps_per_second': 88.89, 'epoch': 1.0}


 20%|█▉        | 376/1890 [00:17<01:02, 24.29it/s]
 20%|██        | 378/1890 [00:17<01:02, 24.29it/s]

{'eval_loss': 0.30212539434432983, 'eval_f1': 0.5500532304233865, 'eval_precision': 0.6527631578947368, 'eval_recall': 0.4797597893650739, 'eval_runtime': 0.5356, 'eval_samples_per_second': 703.855, 'eval_steps_per_second': 89.615, 'epoch': 2.0}


 30%|██▉       | 565/1890 [00:26<00:55, 23.97it/s]
 30%|███       | 567/1890 [00:27<00:55, 23.97it/s]

{'eval_loss': 0.3207625448703766, 'eval_f1': 0.5758642401684911, 'eval_precision': 0.8187276187896844, 'eval_recall': 0.5333874740146058, 'eval_runtime': 0.5791, 'eval_samples_per_second': 651.063, 'eval_steps_per_second': 82.894, 'epoch': 3.0}


 40%|███▉      | 754/1890 [00:35<00:48, 23.61it/s]
 40%|████      | 756/1890 [00:36<00:48, 23.61it/s]

{'eval_loss': 0.34280160069465637, 'eval_f1': 0.6625519261419853, 'eval_precision': 0.7313293506807657, 'eval_recall': 0.6093011478928421, 'eval_runtime': 0.5537, 'eval_samples_per_second': 680.879, 'eval_steps_per_second': 86.69, 'epoch': 4.0}


 50%|████▉     | 943/1890 [00:45<00:38, 24.54it/s]
 50%|█████     | 945/1890 [00:45<00:38, 24.54it/s]

{'eval_loss': 0.3687432110309601, 'eval_f1': 0.6720844887591149, 'eval_precision': 0.7358752025517599, 'eval_recall': 0.638795857525621, 'eval_runtime': 0.5341, 'eval_samples_per_second': 705.916, 'eval_steps_per_second': 89.878, 'epoch': 5.0}


 53%|█████▎    | 1003/1890 [00:49<00:37, 23.36it/s]

{'loss': 0.2249, 'grad_norm': 3.0419366359710693, 'learning_rate': 2.3544973544973546e-05, 'epoch': 5.29}


 60%|█████▉    | 1132/1890 [00:54<00:31, 23.97it/s]
 60%|██████    | 1134/1890 [00:55<00:31, 23.97it/s]

{'eval_loss': 0.39055177569389343, 'eval_f1': 0.6826516788563654, 'eval_precision': 0.743964628176712, 'eval_recall': 0.6497257806986271, 'eval_runtime': 0.5378, 'eval_samples_per_second': 700.944, 'eval_steps_per_second': 89.245, 'epoch': 6.0}


 70%|██████▉   | 1321/1890 [01:04<00:23, 23.86it/s]
 70%|███████   | 1323/1890 [01:04<00:23, 23.86it/s]

{'eval_loss': 0.39334535598754883, 'eval_f1': 0.6883943750583328, 'eval_precision': 0.7487083684734401, 'eval_recall': 0.6513862078236163, 'eval_runtime': 0.5692, 'eval_samples_per_second': 662.29, 'eval_steps_per_second': 84.323, 'epoch': 7.0}


 80%|███████▉  | 1510/1890 [01:13<00:15, 25.07it/s]
 80%|████████  | 1512/1890 [01:14<00:15, 25.07it/s]

{'eval_loss': 0.3803950846195221, 'eval_f1': 0.7202505734514754, 'eval_precision': 0.7585588972431078, 'eval_recall': 0.6880442869167218, 'eval_runtime': 0.5599, 'eval_samples_per_second': 673.297, 'eval_steps_per_second': 85.725, 'epoch': 8.0}


 90%|████████▉ | 1699/1890 [01:22<00:07, 25.16it/s]
 90%|█████████ | 1701/1890 [01:23<00:07, 25.16it/s]

{'eval_loss': 0.39687657356262207, 'eval_f1': 0.7103121231070967, 'eval_precision': 0.7500602593686532, 'eval_recall': 0.6814297012028344, 'eval_runtime': 0.5212, 'eval_samples_per_second': 723.275, 'eval_steps_per_second': 92.088, 'epoch': 9.0}


100%|█████████▉| 1888/1890 [01:31<00:00, 23.75it/s]
100%|██████████| 1890/1890 [01:33<00:00, 23.75it/s]

{'eval_loss': 0.40181660652160645, 'eval_f1': 0.7121184647391067, 'eval_precision': 0.7613154925922554, 'eval_recall': 0.6738536922405839, 'eval_runtime': 0.5176, 'eval_samples_per_second': 728.394, 'eval_steps_per_second': 92.74, 'epoch': 10.0}


100%|██████████| 1890/1890 [01:34<00:00, 20.10it/s]


{'train_runtime': 94.0322, 'train_samples_per_second': 160.264, 'train_steps_per_second': 20.1, 'train_loss': 0.13281160687643384, 'epoch': 10.0}


100%|██████████| 48/48 [00:00<00:00, 95.50it/s] 


Evaluation Metrics: {'eval_loss': 0.3803950846195221, 'eval_f1': 0.7202505734514754, 'eval_precision': 0.7585588972431078, 'eval_recall': 0.6880442869167218, 'eval_runtime': 0.5158, 'eval_samples_per_second': 730.918, 'eval_steps_per_second': 93.061, 'epoch': 10.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  python   
15  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  python   
16  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  python   
17  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  python   
18  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
14             Usage   0.885714  0.768595  0.823009  
15        Parameters   0.895238  0.839286  0.866359  
16  DevelopmentNotes   0.625000  0.500000  0.555556  
17            Expand   0.650000  0.565217  0.604651  
18           Summary   0.736842  0.767123  0.751678  
Scores:

0,1
eval/f1,▁▅▆▇▇▇▇████
eval/loss,▅▁▂▄▆▇▇▆██▆
eval/precision,▁▅█▇▇▇▇▇▇▇▇
eval/recall,▁▅▆▇▇▇▇████
eval/runtime,▄▃█▅▃▃▇▆▂▁▁
eval/samples_per_second,▅▆▁▄▆▅▂▃▇██
eval/steps_per_second,▅▆▁▄▆▅▂▃▇██
train/epoch,▁▂▃▃▄▄▅▆▆▇███
train/global_step,▁▂▃▃▄▄▅▆▆▇███
train/grad_norm,▁

0,1
eval/f1,0.72025
eval/loss,0.3804
eval/precision,0.75856
eval/recall,0.68804
eval/runtime,0.5158
eval/samples_per_second,730.918
eval/steps_per_second,93.061
total_flos,499097625100800.0
train/epoch,10.0
train/global_step,1890.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 12686.50 examples/s]
 10%|▉         | 129/1300 [00:05<00:48, 24.12it/s]
 10%|█         | 130/1300 [00:05<00:48, 24.12it/s]

{'eval_loss': 0.2844652533531189, 'eval_f1': 0.1918782463404889, 'eval_precision': 0.3848314606741573, 'eval_recall': 0.15745849772947218, 'eval_runtime': 0.403, 'eval_samples_per_second': 645.099, 'eval_steps_per_second': 81.878, 'epoch': 1.0}


 20%|█▉        | 258/1300 [00:12<00:41, 25.19it/s]
 20%|██        | 260/1300 [00:12<00:41, 25.19it/s]

{'eval_loss': 0.22459401190280914, 'eval_f1': 0.45598796554530957, 'eval_precision': 0.6441490784347927, 'eval_recall': 0.37168169302246856, 'eval_runtime': 0.3631, 'eval_samples_per_second': 716.14, 'eval_steps_per_second': 90.895, 'epoch': 2.0}


 30%|███       | 390/1300 [00:18<00:36, 25.13it/s]
 30%|███       | 390/1300 [00:18<00:36, 25.13it/s]

{'eval_loss': 0.2202502340078354, 'eval_f1': 0.4847293574190239, 'eval_precision': 0.5928412281954261, 'eval_recall': 0.416161279541808, 'eval_runtime': 0.3738, 'eval_samples_per_second': 695.538, 'eval_steps_per_second': 88.28, 'epoch': 3.0}


 40%|███▉      | 518/1300 [00:26<00:30, 25.26it/s]
 40%|████      | 520/1300 [00:26<00:30, 25.26it/s]

{'eval_loss': 0.2268470823764801, 'eval_f1': 0.5600167460496384, 'eval_precision': 0.7914391205995405, 'eval_recall': 0.5076047585333069, 'eval_runtime': 0.3591, 'eval_samples_per_second': 724.052, 'eval_steps_per_second': 91.899, 'epoch': 4.0}


 50%|█████     | 650/1300 [00:32<00:25, 25.32it/s]
 50%|█████     | 650/1300 [00:33<00:25, 25.32it/s]

{'eval_loss': 0.22687281668186188, 'eval_f1': 0.5711384295278658, 'eval_precision': 0.6862107512717788, 'eval_recall': 0.517408318110875, 'eval_runtime': 0.4045, 'eval_samples_per_second': 642.774, 'eval_steps_per_second': 81.583, 'epoch': 5.0}


 60%|█████▉    | 779/1300 [00:41<00:21, 24.05it/s]
 60%|██████    | 780/1300 [00:41<00:21, 24.05it/s]

{'eval_loss': 0.23244015872478485, 'eval_f1': 0.6138360074556716, 'eval_precision': 0.8300039862766564, 'eval_recall': 0.5510232327938327, 'eval_runtime': 0.377, 'eval_samples_per_second': 689.674, 'eval_steps_per_second': 87.536, 'epoch': 6.0}


 70%|██████▉   | 908/1300 [00:48<00:15, 25.45it/s]
 70%|███████   | 910/1300 [00:48<00:15, 25.45it/s]

{'eval_loss': 0.2302369326353073, 'eval_f1': 0.6566590085917817, 'eval_precision': 0.8393818542146809, 'eval_recall': 0.5858502699916907, 'eval_runtime': 0.3551, 'eval_samples_per_second': 732.255, 'eval_steps_per_second': 92.94, 'epoch': 7.0}


 77%|███████▋  | 1004/1300 [00:53<00:12, 24.31it/s]

{'loss': 0.1319, 'grad_norm': 0.28007733821868896, 'learning_rate': 1.153846153846154e-05, 'epoch': 7.69}


 80%|████████  | 1040/1300 [00:54<00:10, 24.33it/s]
 80%|████████  | 1040/1300 [00:54<00:10, 24.33it/s]

{'eval_loss': 0.2370995581150055, 'eval_f1': 0.6498424219156259, 'eval_precision': 0.8074462608505162, 'eval_recall': 0.5801823006763047, 'eval_runtime': 0.388, 'eval_samples_per_second': 670.114, 'eval_steps_per_second': 85.053, 'epoch': 8.0}


 90%|████████▉ | 1169/1300 [01:02<00:05, 25.24it/s]
 90%|█████████ | 1170/1300 [01:02<00:05, 25.24it/s]

{'eval_loss': 0.2320442646741867, 'eval_f1': 0.6557315233785823, 'eval_precision': 0.8207569664818244, 'eval_recall': 0.580716057048034, 'eval_runtime': 0.3643, 'eval_samples_per_second': 713.606, 'eval_steps_per_second': 90.573, 'epoch': 9.0}


100%|█████████▉| 1298/1300 [01:09<00:00, 24.79it/s]
100%|██████████| 1300/1300 [01:11<00:00, 24.79it/s]

{'eval_loss': 0.2371280938386917, 'eval_f1': 0.6646768174062629, 'eval_precision': 0.8538886476576446, 'eval_recall': 0.5866127135829885, 'eval_runtime': 0.3633, 'eval_samples_per_second': 715.605, 'eval_steps_per_second': 90.827, 'epoch': 10.0}


100%|██████████| 1300/1300 [01:12<00:00, 17.98it/s]


{'train_runtime': 72.3098, 'train_samples_per_second': 143.549, 'train_steps_per_second': 17.978, 'train_loss': 0.10683276433211107, 'epoch': 10.0}


100%|██████████| 33/33 [00:00<00:00, 97.74it/s] 


Evaluation Metrics: {'eval_loss': 0.2371280938386917, 'eval_f1': 0.6646768174062629, 'eval_precision': 0.8538886476576446, 'eval_recall': 0.5866127135829885, 'eval_runtime': 0.353, 'eval_samples_per_second': 736.475, 'eval_steps_per_second': 93.476, 'epoch': 10.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
13  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
14  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
15  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
16  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
17  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
18  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.821429  0.547619  0.657143  
13                  Example   0.936170  0.871287  0.902564  
14         Responsibilities

0,1
eval/f1,▁▅▅▆▇▇█████
eval/loss,█▁▁▂▂▂▂▃▂▃▃
eval/precision,▁▅▄▇▅██▇███
eval/recall,▁▄▅▇▇▇█████
eval/runtime,█▂▄▂█▄▁▆▃▂▁
eval/samples_per_second,▁▆▅▇▁▅█▃▆▆█
eval/steps_per_second,▁▆▅▇▁▅█▃▆▆█
train/epoch,▁▂▃▃▄▅▆▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▆▇███
train/grad_norm,▁

0,1
eval/f1,0.66468
eval/loss,0.23713
eval/precision,0.85389
eval/recall,0.58661
eval/runtime,0.353
eval/samples_per_second,736.475
eval/steps_per_second,93.476
total_flos,343783551237120.0
train/epoch,10.0
train/global_step,1300.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20775.13 examples/s]
 10%|▉         | 761/7620 [00:31<04:37, 24.73it/s]
 10%|█         | 762/7620 [00:33<04:37, 24.73it/s]

{'eval_loss': 0.16648782789707184, 'eval_f1': 0.427186283877066, 'eval_precision': 0.5227970914226047, 'eval_recall': 0.4001249276175723, 'eval_runtime': 2.282, 'eval_samples_per_second': 667.4, 'eval_steps_per_second': 83.699, 'epoch': 1.0}


 13%|█▎        | 1004/7620 [00:45<04:31, 24.35it/s]

{'loss': 0.2528, 'grad_norm': 2.616354465484619, 'learning_rate': 4.343832020997376e-06, 'epoch': 1.31}


 20%|█▉        | 1523/7620 [01:06<04:04, 24.94it/s]
 20%|██        | 1524/7620 [01:08<04:04, 24.94it/s]

{'eval_loss': 0.12337208539247513, 'eval_f1': 0.5345809540300082, 'eval_precision': 0.540659249876952, 'eval_recall': 0.5295251352786494, 'eval_runtime': 2.1605, 'eval_samples_per_second': 704.94, 'eval_steps_per_second': 88.407, 'epoch': 2.0}


 26%|██▋       | 2003/7620 [01:29<03:45, 24.88it/s]

{'loss': 0.121, 'grad_norm': 1.2595150470733643, 'learning_rate': 3.6876640419947506e-06, 'epoch': 2.62}


 30%|██▉       | 2285/7620 [01:40<03:33, 24.99it/s]
 30%|███       | 2286/7620 [01:42<03:33, 24.99it/s]

{'eval_loss': 0.10360140353441238, 'eval_f1': 0.5515734309794681, 'eval_precision': 0.6905632514587537, 'eval_recall': 0.5332979378899807, 'eval_runtime': 2.0675, 'eval_samples_per_second': 736.622, 'eval_steps_per_second': 92.38, 'epoch': 3.0}


 39%|███▉      | 3005/7620 [02:12<03:09, 24.34it/s]

{'loss': 0.0928, 'grad_norm': 1.202949047088623, 'learning_rate': 3.0314960629921263e-06, 'epoch': 3.94}


 40%|███▉      | 3047/7620 [02:14<03:11, 23.88it/s]
 40%|████      | 3048/7620 [02:16<03:11, 23.88it/s]

{'eval_loss': 0.10289280116558075, 'eval_f1': 0.6031257549803934, 'eval_precision': 0.9235083943606069, 'eval_recall': 0.5692864150972337, 'eval_runtime': 2.1261, 'eval_samples_per_second': 716.337, 'eval_steps_per_second': 89.836, 'epoch': 4.0}


 50%|████▉     | 3809/7620 [02:48<02:36, 24.30it/s]
 50%|█████     | 3810/7620 [02:50<02:36, 24.30it/s]

{'eval_loss': 0.09244699031114578, 'eval_f1': 0.7488013850015474, 'eval_precision': 0.908276121064472, 'eval_recall': 0.6907230016339106, 'eval_runtime': 2.1121, 'eval_samples_per_second': 721.071, 'eval_steps_per_second': 90.43, 'epoch': 5.0}


 53%|█████▎    | 4004/7620 [02:58<02:23, 25.15it/s]

{'loss': 0.0744, 'grad_norm': 1.7525253295898438, 'learning_rate': 2.3753280839895016e-06, 'epoch': 5.25}


 60%|█████▉    | 4571/7620 [03:21<01:59, 25.48it/s]
 60%|██████    | 4572/7620 [03:23<01:59, 25.48it/s]

{'eval_loss': 0.09366071969270706, 'eval_f1': 0.7501122717942516, 'eval_precision': 0.888070873213425, 'eval_recall': 0.6947998718319661, 'eval_runtime': 2.0643, 'eval_samples_per_second': 737.798, 'eval_steps_per_second': 92.527, 'epoch': 6.0}


 66%|██████▌   | 5003/7620 [03:42<01:47, 24.32it/s]

{'loss': 0.064, 'grad_norm': 1.0639724731445312, 'learning_rate': 1.7191601049868767e-06, 'epoch': 6.56}


 70%|██████▉   | 5333/7620 [03:55<01:37, 23.46it/s]
 70%|███████   | 5334/7620 [03:57<01:37, 23.46it/s]

{'eval_loss': 0.09112562984228134, 'eval_f1': 0.7778198531419849, 'eval_precision': 0.9039005809604598, 'eval_recall': 0.7220125864819441, 'eval_runtime': 2.1361, 'eval_samples_per_second': 712.989, 'eval_steps_per_second': 89.416, 'epoch': 7.0}


 79%|███████▉  | 6002/7620 [04:25<01:05, 24.72it/s]

{'loss': 0.0579, 'grad_norm': 0.16131126880645752, 'learning_rate': 1.062992125984252e-06, 'epoch': 7.87}


 80%|███████▉  | 6095/7620 [04:29<01:03, 24.16it/s]
 80%|████████  | 6096/7620 [04:31<01:03, 24.16it/s]

{'eval_loss': 0.09135416150093079, 'eval_f1': 0.8009252600378867, 'eval_precision': 0.9066920488956702, 'eval_recall': 0.7488126031536988, 'eval_runtime': 2.164, 'eval_samples_per_second': 703.784, 'eval_steps_per_second': 88.262, 'epoch': 8.0}


 90%|████████▉ | 6857/7620 [05:03<00:29, 25.56it/s]
 90%|█████████ | 6858/7620 [05:05<00:29, 25.56it/s]

{'eval_loss': 0.08975690603256226, 'eval_f1': 0.8150691618385736, 'eval_precision': 0.9040532050128872, 'eval_recall': 0.7635646560766266, 'eval_runtime': 2.1024, 'eval_samples_per_second': 724.409, 'eval_steps_per_second': 90.848, 'epoch': 9.0}


 92%|█████████▏| 7004/7620 [05:12<00:24, 24.66it/s]

{'loss': 0.0529, 'grad_norm': 1.161339521408081, 'learning_rate': 4.068241469816273e-07, 'epoch': 9.19}


100%|█████████▉| 7619/7620 [05:37<00:00, 24.27it/s]
100%|██████████| 7620/7620 [05:40<00:00, 24.27it/s]

{'eval_loss': 0.08940153568983078, 'eval_f1': 0.8181074795409444, 'eval_precision': 0.9066341051616977, 'eval_recall': 0.7656130493236045, 'eval_runtime': 2.0789, 'eval_samples_per_second': 732.583, 'eval_steps_per_second': 91.873, 'epoch': 10.0}


100%|██████████| 7620/7620 [05:41<00:00, 22.32it/s]


{'train_runtime': 341.342, 'train_samples_per_second': 178.443, 'train_steps_per_second': 22.324, 'train_loss': 0.09787746489517332, 'epoch': 10.0}


100%|██████████| 191/191 [00:02<00:00, 92.50it/s]


Evaluation Metrics: {'eval_loss': 0.08940153568983078, 'eval_f1': 0.8181074795409444, 'eval_precision': 0.9066341051616977, 'eval_recall': 0.7656130493236045, 'eval_runtime': 2.0783, 'eval_samples_per_second': 732.827, 'eval_steps_per_second': 91.904, 'epoch': 10.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  java      summary   
13  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  java    Ownership   
14  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  java       Expand   
15  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  java        usage   
16  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  java      Pointer   
17  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  java  deprecation   
18  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  java     rational   

    precision    recall        f1  
12   0.929363  0.954481  0.941754  
13   0.964912  1.000000  0.982143  
14  

0,1
eval/f1,▁▃▃▄▇▇▇████
eval/loss,█▄▂▂▁▁▁▁▁▁▁
eval/precision,▁▁▄██▇█████
eval/recall,▁▃▄▄▇▇▇████
eval/runtime,█▄▁▃▃▁▃▄▂▁▁
eval/samples_per_second,▁▅█▆▆█▆▅▇▇█
eval/steps_per_second,▁▅█▆▆█▆▅▇▇█
train/epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇███
train/global_step,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇███
train/grad_norm,█▄▄▆▄▁▄

0,1
eval/f1,0.81811
eval/loss,0.0894
eval/precision,0.90663
eval/recall,0.76561
eval/runtime,2.0783
eval/samples_per_second,732.827
eval/steps_per_second,91.904
total_flos,2017327177827840.0
train/epoch,10.0
train/global_step,7620.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 14842.38 examples/s]
 10%|▉         | 187/1890 [00:07<01:07, 25.21it/s]
 10%|█         | 189/1890 [00:08<01:07, 25.21it/s]

{'eval_loss': 0.4918714463710785, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.5217, 'eval_samples_per_second': 722.608, 'eval_steps_per_second': 92.003, 'epoch': 1.0}


 20%|█▉        | 376/1890 [00:16<01:01, 24.78it/s]
 20%|██        | 378/1890 [00:17<01:01, 24.78it/s]

{'eval_loss': 0.45451512932777405, 'eval_f1': 0.09113924050632911, 'eval_precision': 0.1945945945945946, 'eval_recall': 0.05950413223140496, 'eval_runtime': 0.5156, 'eval_samples_per_second': 731.16, 'eval_steps_per_second': 93.092, 'epoch': 2.0}


 30%|██▉       | 565/1890 [00:25<00:54, 24.46it/s]
 30%|███       | 567/1890 [00:26<00:54, 24.46it/s]

{'eval_loss': 0.42044398188591003, 'eval_f1': 0.14238655462184874, 'eval_precision': 0.39259259259259255, 'eval_recall': 0.09845041322314049, 'eval_runtime': 0.5126, 'eval_samples_per_second': 735.446, 'eval_steps_per_second': 93.638, 'epoch': 3.0}


 40%|███▉      | 754/1890 [00:35<00:44, 25.37it/s]
 40%|████      | 756/1890 [00:35<00:44, 25.37it/s]

{'eval_loss': 0.389670729637146, 'eval_f1': 0.2532259502511219, 'eval_precision': 0.5850092764378478, 'eval_recall': 0.17988387701961797, 'eval_runtime': 0.521, 'eval_samples_per_second': 723.572, 'eval_steps_per_second': 92.126, 'epoch': 4.0}


 50%|████▉     | 943/1890 [00:44<00:38, 24.88it/s]
 50%|█████     | 945/1890 [00:44<00:37, 24.88it/s]

{'eval_loss': 0.36631467938423157, 'eval_f1': 0.3217351656923778, 'eval_precision': 0.5739304278288684, 'eval_recall': 0.24958900066309778, 'eval_runtime': 0.5307, 'eval_samples_per_second': 710.444, 'eval_steps_per_second': 90.454, 'epoch': 5.0}


 53%|█████▎    | 1003/1890 [00:48<00:35, 24.97it/s]

{'loss': 0.4376, 'grad_norm': 2.6306777000427246, 'learning_rate': 2.3544973544973545e-06, 'epoch': 5.29}


 60%|█████▉    | 1132/1890 [00:53<00:31, 23.93it/s]
 60%|██████    | 1134/1890 [00:54<00:31, 23.93it/s]

{'eval_loss': 0.35431474447250366, 'eval_f1': 0.3839008563018464, 'eval_precision': 0.5491248863041261, 'eval_recall': 0.2998443337484433, 'eval_runtime': 0.5476, 'eval_samples_per_second': 688.403, 'eval_steps_per_second': 87.648, 'epoch': 6.0}


 70%|██████▉   | 1321/1890 [01:02<00:23, 24.64it/s]
 70%|███████   | 1323/1890 [01:02<00:23, 24.64it/s]

{'eval_loss': 0.3441721796989441, 'eval_f1': 0.4093075666458657, 'eval_precision': 0.5481097524957174, 'eval_recall': 0.3304703546764568, 'eval_runtime': 0.5547, 'eval_samples_per_second': 679.608, 'eval_steps_per_second': 86.528, 'epoch': 7.0}


 80%|███████▉  | 1510/1890 [01:11<00:15, 24.62it/s]
 80%|████████  | 1512/1890 [01:12<00:15, 24.62it/s]

{'eval_loss': 0.3411194682121277, 'eval_f1': 0.421001182036859, 'eval_precision': 0.6299910356872382, 'eval_recall': 0.3498282866176363, 'eval_runtime': 0.5173, 'eval_samples_per_second': 728.787, 'eval_steps_per_second': 92.79, 'epoch': 8.0}


 90%|████████▉ | 1699/1890 [01:20<00:07, 24.64it/s]
 90%|█████████ | 1701/1890 [01:21<00:07, 24.64it/s]

{'eval_loss': 0.3374147415161133, 'eval_f1': 0.4235556374824035, 'eval_precision': 0.6304911138244471, 'eval_recall': 0.3531340717416033, 'eval_runtime': 0.5196, 'eval_samples_per_second': 725.599, 'eval_steps_per_second': 92.384, 'epoch': 9.0}


100%|█████████▉| 1888/1890 [01:29<00:00, 24.15it/s]
100%|██████████| 1890/1890 [01:31<00:00, 24.15it/s]

{'eval_loss': 0.33699777722358704, 'eval_f1': 0.42173906040148335, 'eval_precision': 0.6239014376695536, 'eval_recall': 0.3531340717416033, 'eval_runtime': 0.5835, 'eval_samples_per_second': 646.132, 'eval_steps_per_second': 82.266, 'epoch': 10.0}


100%|██████████| 1890/1890 [01:33<00:00, 20.31it/s]


{'train_runtime': 93.055, 'train_samples_per_second': 161.947, 'train_steps_per_second': 20.311, 'train_loss': 0.3779780635127315, 'epoch': 10.0}


100%|██████████| 48/48 [00:00<00:00, 91.11it/s]


Evaluation Metrics: {'eval_loss': 0.3374147415161133, 'eval_f1': 0.4235556374824035, 'eval_precision': 0.6304911138244471, 'eval_recall': 0.3531340717416033, 'eval_runtime': 0.5419, 'eval_samples_per_second': 695.718, 'eval_steps_per_second': 88.579, 'epoch': 10.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  python   
15  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  python   
16  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  python   
17  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  python   
18  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
14             Usage   0.901235  0.603306  0.722772  
15        Parameters   0.890110  0.723214  0.798030  
16  DevelopmentNotes   0.000000  0.000000  0.000000  
17            Expand   0.500000  0.014493  0.028169  
18           Summary   0.861111  0.424658  0.568807  
Scores:

0,1
eval/f1,▁▃▃▅▆▇█████
eval/loss,█▆▅▃▂▂▁▁▁▁▁
eval/precision,▁▃▅▇▇▇▇████
eval/recall,▁▂▃▅▆▇█████
eval/runtime,▂▁▁▂▃▄▅▁▂█▄
eval/samples_per_second,▇██▇▆▄▄▇▇▁▅
eval/steps_per_second,▇██▇▆▄▄▇▇▁▅
train/epoch,▁▂▃▃▄▄▅▆▆▇███
train/global_step,▁▂▃▃▄▄▅▆▆▇███
train/grad_norm,▁

0,1
eval/f1,0.42356
eval/loss,0.33741
eval/precision,0.63049
eval/recall,0.35313
eval/runtime,0.5419
eval/samples_per_second,695.718
eval/steps_per_second,88.579
total_flos,499097625100800.0
train/epoch,10.0
train/global_step,1890.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 13064.20 examples/s]
 10%|▉         | 128/1300 [00:05<00:47, 24.90it/s]
 10%|█         | 130/1300 [00:05<00:46, 24.90it/s]

{'eval_loss': 0.42935189604759216, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.3647, 'eval_samples_per_second': 712.841, 'eval_steps_per_second': 90.476, 'epoch': 1.0}


 20%|██        | 260/1300 [00:12<00:40, 25.67it/s]
 20%|██        | 260/1300 [00:12<00:40, 25.67it/s]

{'eval_loss': 0.3827970623970032, 'eval_f1': 0.05110336817653891, 'eval_precision': 0.14285714285714285, 'eval_recall': 0.031117397454031116, 'eval_runtime': 0.361, 'eval_samples_per_second': 720.181, 'eval_steps_per_second': 91.408, 'epoch': 2.0}


 30%|██▉       | 389/1300 [00:18<00:37, 24.56it/s]
 30%|███       | 390/1300 [00:18<00:37, 24.56it/s]

{'eval_loss': 0.347027063369751, 'eval_f1': 0.11299435028248588, 'eval_precision': 0.13157894736842105, 'eval_recall': 0.099009900990099, 'eval_runtime': 0.3929, 'eval_samples_per_second': 661.802, 'eval_steps_per_second': 83.998, 'epoch': 3.0}


 40%|███▉      | 518/1300 [00:25<00:30, 25.32it/s]
 40%|████      | 520/1300 [00:25<00:30, 25.32it/s]

{'eval_loss': 0.32352975010871887, 'eval_f1': 0.11494252873563217, 'eval_precision': 0.136986301369863, 'eval_recall': 0.099009900990099, 'eval_runtime': 0.3664, 'eval_samples_per_second': 709.642, 'eval_steps_per_second': 90.07, 'epoch': 4.0}


 50%|█████     | 650/1300 [00:31<00:26, 24.44it/s]
 50%|█████     | 650/1300 [00:31<00:26, 24.44it/s]

{'eval_loss': 0.30800193548202515, 'eval_f1': 0.12063492063492064, 'eval_precision': 0.13743218806509946, 'eval_recall': 0.1074964639321075, 'eval_runtime': 0.3593, 'eval_samples_per_second': 723.583, 'eval_steps_per_second': 91.839, 'epoch': 5.0}


 60%|█████▉    | 779/1300 [00:38<00:20, 25.19it/s]
 60%|██████    | 780/1300 [00:38<00:20, 25.19it/s]

{'eval_loss': 0.29598286747932434, 'eval_f1': 0.12396069538926681, 'eval_precision': 0.1331168831168831, 'eval_recall': 0.11598302687411599, 'eval_runtime': 0.3592, 'eval_samples_per_second': 723.886, 'eval_steps_per_second': 91.878, 'epoch': 6.0}


 70%|██████▉   | 908/1300 [00:45<00:16, 23.81it/s]
 70%|███████   | 910/1300 [00:45<00:16, 23.81it/s]

{'eval_loss': 0.28794384002685547, 'eval_f1': 0.12462006079027356, 'eval_precision': 0.13464696223316913, 'eval_recall': 0.11598302687411599, 'eval_runtime': 0.3972, 'eval_samples_per_second': 654.51, 'eval_steps_per_second': 83.072, 'epoch': 7.0}


 77%|███████▋  | 1004/1300 [00:50<00:12, 24.61it/s]

{'loss': 0.3404, 'grad_norm': 1.8712176084518433, 'learning_rate': 1.153846153846154e-06, 'epoch': 7.69}


 80%|████████  | 1040/1300 [00:51<00:10, 24.78it/s]
 80%|████████  | 1040/1300 [00:52<00:10, 24.78it/s]

{'eval_loss': 0.2826854884624481, 'eval_f1': 0.15216261450308635, 'eval_precision': 0.41904761904761906, 'eval_recall': 0.13294721953398347, 'eval_runtime': 0.3692, 'eval_samples_per_second': 704.31, 'eval_steps_per_second': 89.393, 'epoch': 8.0}


 90%|████████▉ | 1169/1300 [00:58<00:05, 25.07it/s]
 90%|█████████ | 1170/1300 [00:58<00:05, 25.07it/s]

{'eval_loss': 0.2794322371482849, 'eval_f1': 0.1708790693310817, 'eval_precision': 0.42054574638844305, 'eval_recall': 0.14422541502270528, 'eval_runtime': 0.3714, 'eval_samples_per_second': 700.083, 'eval_steps_per_second': 88.857, 'epoch': 9.0}


100%|█████████▉| 1298/1300 [01:04<00:00, 25.07it/s]
100%|██████████| 1300/1300 [01:05<00:00, 25.07it/s]

{'eval_loss': 0.27836477756500244, 'eval_f1': 0.17670353364713584, 'eval_precision': 0.41781874039938555, 'eval_recall': 0.15081366783294872, 'eval_runtime': 0.3542, 'eval_samples_per_second': 734.097, 'eval_steps_per_second': 93.174, 'epoch': 10.0}


100%|██████████| 1300/1300 [01:06<00:00, 19.42it/s]


{'train_runtime': 66.9587, 'train_samples_per_second': 155.021, 'train_steps_per_second': 19.415, 'train_loss': 0.3198290311373197, 'epoch': 10.0}


100%|██████████| 33/33 [00:00<00:00, 96.23it/s] 


Evaluation Metrics: {'eval_loss': 0.27836477756500244, 'eval_f1': 0.17670353364713584, 'eval_precision': 0.41781874039938555, 'eval_recall': 0.15081366783294872, 'eval_runtime': 0.3563, 'eval_samples_per_second': 729.667, 'eval_steps_per_second': 92.612, 'epoch': 10.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
13  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
14  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
15  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
16  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
17  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   
18  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.000000  0.000000  0.000000  
13                  Example   0.924731  0.851485  0.886598  
14         Responsibil

0,1
eval/f1,▁▃▅▆▆▆▆▇███
eval/loss,█▆▄▃▂▂▁▁▁▁▁
eval/precision,▁▃▃▃▃▃▃████
eval/recall,▁▂▆▆▆▆▆▇███
eval/runtime,▃▂▇▃▂▂█▃▄▁▁
eval/samples_per_second,▆▇▂▆▇▇▁▅▅██
eval/steps_per_second,▆▇▂▆▇▇▁▅▅██
train/epoch,▁▂▃▃▄▅▆▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▆▇███
train/grad_norm,▁

0,1
eval/f1,0.1767
eval/loss,0.27836
eval/precision,0.41782
eval/recall,0.15081
eval/runtime,0.3563
eval/samples_per_second,729.667
eval/steps_per_second,92.612
total_flos,343783551237120.0
train/epoch,10.0
train/global_step,1300.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20340.55 examples/s]
  4%|▍         | 1004/22845 [00:35<12:16, 29.66it/s]

{'loss': 0.1625, 'grad_norm': 1.6320780515670776, 'learning_rate': 4.781133727292624e-05, 'epoch': 0.66}


  7%|▋         | 1521/22845 [00:52<12:25, 28.59it/s]
  7%|▋         | 1523/22845 [00:56<12:25, 28.59it/s]

{'eval_loss': 0.10881763696670532, 'eval_f1': 0.6593358823280312, 'eval_precision': 0.6789460432077983, 'eval_recall': 0.6424578737426073, 'eval_runtime': 3.8209, 'eval_samples_per_second': 398.598, 'eval_steps_per_second': 99.715, 'epoch': 1.0}


  9%|▉         | 2006/22845 [01:14<11:21, 30.59it/s]  

{'loss': 0.1015, 'grad_norm': 6.592591762542725, 'learning_rate': 4.562267454585248e-05, 'epoch': 1.31}


 13%|█▎        | 3004/22845 [01:48<11:57, 27.66it/s]

{'loss': 0.0901, 'grad_norm': 0.5484303832054138, 'learning_rate': 4.343401181877873e-05, 'epoch': 1.97}


 13%|█▎        | 3044/22845 [01:50<11:39, 28.29it/s]
 13%|█▎        | 3046/22845 [01:54<11:39, 28.29it/s]

{'eval_loss': 0.08420589566230774, 'eval_f1': 0.8033344002268606, 'eval_precision': 0.8787932714920915, 'eval_recall': 0.7582669199922051, 'eval_runtime': 4.0855, 'eval_samples_per_second': 372.786, 'eval_steps_per_second': 93.258, 'epoch': 2.0}


 18%|█▊        | 4004/22845 [02:28<11:05, 28.29it/s]  

{'loss': 0.0596, 'grad_norm': 0.022636815905570984, 'learning_rate': 4.1245349091704974e-05, 'epoch': 2.63}


 20%|██        | 4569/22845 [02:47<11:07, 27.37it/s]
 20%|██        | 4569/22845 [02:51<11:07, 27.37it/s]

{'eval_loss': 0.10023856163024902, 'eval_f1': 0.8251525117405729, 'eval_precision': 0.8769224532388984, 'eval_recall': 0.788525770049913, 'eval_runtime': 3.6282, 'eval_samples_per_second': 419.768, 'eval_steps_per_second': 105.011, 'epoch': 3.0}


 22%|██▏       | 5003/22845 [03:07<10:13, 29.10it/s]  

{'loss': 0.051, 'grad_norm': 0.8399732708930969, 'learning_rate': 3.9056686364631214e-05, 'epoch': 3.28}


 26%|██▋       | 6005/22845 [03:42<09:52, 28.43it/s]

{'loss': 0.0422, 'grad_norm': 0.20007315278053284, 'learning_rate': 3.6868023637557454e-05, 'epoch': 3.94}


 27%|██▋       | 6089/22845 [03:45<09:25, 29.63it/s]
 27%|██▋       | 6092/22845 [03:49<09:25, 29.63it/s]

{'eval_loss': 0.11754463613033295, 'eval_f1': 0.8388966717007101, 'eval_precision': 0.866717695506731, 'eval_recall': 0.8151301463129625, 'eval_runtime': 3.5485, 'eval_samples_per_second': 429.192, 'eval_steps_per_second': 107.369, 'epoch': 4.0}


 31%|███       | 7005/22845 [04:21<09:36, 27.50it/s]  

{'loss': 0.0259, 'grad_norm': 0.010465878993272781, 'learning_rate': 3.467936091048369e-05, 'epoch': 4.6}


 33%|███▎      | 7613/22845 [04:42<09:06, 27.88it/s]
 33%|███▎      | 7615/22845 [04:46<09:06, 27.88it/s]

{'eval_loss': 0.1244703009724617, 'eval_f1': 0.8296309603820015, 'eval_precision': 0.8553443187893395, 'eval_recall': 0.8141307066734446, 'eval_runtime': 3.6416, 'eval_samples_per_second': 418.22, 'eval_steps_per_second': 104.624, 'epoch': 5.0}


 35%|███▌      | 8004/22845 [05:00<09:11, 26.89it/s]  

{'loss': 0.028, 'grad_norm': 0.004122333135455847, 'learning_rate': 3.249069818340994e-05, 'epoch': 5.25}


 39%|███▉      | 9004/22845 [05:35<08:05, 28.51it/s]

{'loss': 0.0179, 'grad_norm': 2.127842664718628, 'learning_rate': 3.030203545633618e-05, 'epoch': 5.91}


 40%|███▉      | 9136/22845 [05:39<08:08, 28.08it/s]
 40%|████      | 9138/22845 [05:43<08:08, 28.08it/s]

{'eval_loss': 0.1534069925546646, 'eval_f1': 0.8306319932108838, 'eval_precision': 0.87684606881293, 'eval_recall': 0.8008102283663311, 'eval_runtime': 3.474, 'eval_samples_per_second': 438.401, 'eval_steps_per_second': 109.672, 'epoch': 6.0}


 44%|████▍     | 10002/22845 [06:14<07:22, 29.05it/s] 

{'loss': 0.0151, 'grad_norm': 0.0027550009544938803, 'learning_rate': 2.8113372729262422e-05, 'epoch': 6.57}


 47%|████▋     | 10659/22845 [06:36<06:45, 30.07it/s]
 47%|████▋     | 10661/22845 [06:40<06:45, 30.07it/s]

{'eval_loss': 0.13376574218273163, 'eval_f1': 0.8314661069479377, 'eval_precision': 0.8520052752603936, 'eval_recall': 0.8177167882704103, 'eval_runtime': 3.292, 'eval_samples_per_second': 462.632, 'eval_steps_per_second': 115.734, 'epoch': 7.0}


 48%|████▊     | 11004/22845 [06:53<07:03, 27.98it/s]  

{'loss': 0.0147, 'grad_norm': 0.004796348512172699, 'learning_rate': 2.5924710002188662e-05, 'epoch': 7.22}


 53%|█████▎    | 12004/22845 [07:27<06:25, 28.11it/s]

{'loss': 0.0133, 'grad_norm': 0.0018354071071371436, 'learning_rate': 2.3736047275114905e-05, 'epoch': 7.88}


 53%|█████▎    | 12182/22845 [07:34<06:01, 29.48it/s]
 53%|█████▎    | 12184/22845 [07:37<06:01, 29.48it/s]

{'eval_loss': 0.14738483726978302, 'eval_f1': 0.8491215461845173, 'eval_precision': 0.8618986481382309, 'eval_recall': 0.8378021503610411, 'eval_runtime': 3.2354, 'eval_samples_per_second': 470.733, 'eval_steps_per_second': 117.761, 'epoch': 8.0}


 57%|█████▋    | 13004/22845 [08:06<05:55, 27.69it/s]  

{'loss': 0.0082, 'grad_norm': 0.004374058451503515, 'learning_rate': 2.1547384548041148e-05, 'epoch': 8.54}


 60%|█████▉    | 13704/22845 [08:30<04:59, 30.50it/s]
 60%|██████    | 13707/22845 [08:34<04:59, 30.50it/s]

{'eval_loss': 0.1422530710697174, 'eval_f1': 0.8461547066968552, 'eval_precision': 0.8616497713055807, 'eval_recall': 0.8361845529162799, 'eval_runtime': 3.8246, 'eval_samples_per_second': 398.216, 'eval_steps_per_second': 99.619, 'epoch': 9.0}


 61%|██████▏   | 14005/22845 [08:45<05:26, 27.11it/s]

{'loss': 0.0107, 'grad_norm': 0.0035904471296817064, 'learning_rate': 1.935872182096739e-05, 'epoch': 9.19}


 66%|██████▌   | 15004/22845 [09:19<04:38, 28.11it/s]

{'loss': 0.0059, 'grad_norm': 0.008601104840636253, 'learning_rate': 1.717005909389363e-05, 'epoch': 9.85}


 67%|██████▋   | 15230/22845 [09:27<04:33, 27.84it/s]
 67%|██████▋   | 15230/22845 [09:31<04:33, 27.84it/s]

{'eval_loss': 0.16609293222427368, 'eval_f1': 0.8564557119757568, 'eval_precision': 0.8938516479921997, 'eval_recall': 0.8275945764263746, 'eval_runtime': 4.2325, 'eval_samples_per_second': 359.836, 'eval_steps_per_second': 90.018, 'epoch': 10.0}


 70%|███████   | 16003/22845 [09:58<04:11, 27.17it/s]  

{'loss': 0.0039, 'grad_norm': 0.0025699164252728224, 'learning_rate': 1.4981396366819875e-05, 'epoch': 10.51}


 73%|███████▎  | 16751/22845 [10:24<03:37, 28.06it/s]
 73%|███████▎  | 16753/22845 [10:28<03:37, 28.06it/s]

{'eval_loss': 0.16831183433532715, 'eval_f1': 0.8508256076954618, 'eval_precision': 0.8633238079134938, 'eval_recall': 0.8406157331027051, 'eval_runtime': 3.9783, 'eval_samples_per_second': 382.826, 'eval_steps_per_second': 95.769, 'epoch': 11.0}


 74%|███████▍  | 17004/22845 [10:37<03:16, 29.71it/s]

{'loss': 0.0032, 'grad_norm': 0.0015455440152436495, 'learning_rate': 1.2792733639746115e-05, 'epoch': 11.16}


 79%|███████▉  | 18006/22845 [11:12<02:37, 30.75it/s]

{'loss': 0.0037, 'grad_norm': 0.0013005448272451758, 'learning_rate': 1.0604070912672358e-05, 'epoch': 11.82}


 80%|███████▉  | 18274/22845 [11:21<02:48, 27.18it/s]
 80%|████████  | 18276/22845 [11:25<02:48, 27.18it/s]

{'eval_loss': 0.1670171618461609, 'eval_f1': 0.8460377238380007, 'eval_precision': 0.8625286977983391, 'eval_recall': 0.8328744132255467, 'eval_runtime': 3.5316, 'eval_samples_per_second': 431.246, 'eval_steps_per_second': 107.882, 'epoch': 12.0}


 83%|████████▎ | 19004/22845 [11:51<02:08, 29.90it/s]

{'loss': 0.0015, 'grad_norm': 0.014015790075063705, 'learning_rate': 8.4154081855986e-06, 'epoch': 12.48}


 87%|████████▋ | 19796/22845 [12:18<01:40, 30.40it/s]
 87%|████████▋ | 19799/22845 [12:22<01:40, 30.40it/s]

{'eval_loss': 0.17298288643360138, 'eval_f1': 0.8380740683365874, 'eval_precision': 0.8561859766189338, 'eval_recall': 0.823138660292286, 'eval_runtime': 3.3747, 'eval_samples_per_second': 451.301, 'eval_steps_per_second': 112.899, 'epoch': 13.0}


 88%|████████▊ | 20003/22845 [12:30<01:32, 30.60it/s]

{'loss': 0.002, 'grad_norm': 0.0005419133231043816, 'learning_rate': 6.2267454585248415e-06, 'epoch': 13.13}


 92%|█████████▏| 21005/22845 [13:04<01:05, 28.28it/s]

{'loss': 0.0014, 'grad_norm': 0.00045534136006608605, 'learning_rate': 4.038082731451084e-06, 'epoch': 13.79}


 93%|█████████▎| 21320/22845 [13:15<00:50, 30.14it/s]
 93%|█████████▎| 21322/22845 [13:19<00:50, 30.14it/s]

{'eval_loss': 0.177291601896286, 'eval_f1': 0.8482842047876137, 'eval_precision': 0.8571442964936805, 'eval_recall': 0.8410828876186643, 'eval_runtime': 3.472, 'eval_samples_per_second': 438.648, 'eval_steps_per_second': 109.734, 'epoch': 14.0}


 96%|█████████▋| 22004/22845 [13:43<00:27, 30.94it/s]

{'loss': 0.0005, 'grad_norm': 0.03117884136736393, 'learning_rate': 1.8494200043773256e-06, 'epoch': 14.45}


100%|█████████▉| 22842/22845 [14:12<00:00, 29.80it/s]
100%|██████████| 22845/22845 [14:17<00:00, 29.80it/s]

{'eval_loss': 0.17924188077449799, 'eval_f1': 0.8498429443774365, 'eval_precision': 0.8596902550691926, 'eval_recall': 0.8417366960527932, 'eval_runtime': 3.5927, 'eval_samples_per_second': 423.91, 'eval_steps_per_second': 106.047, 'epoch': 15.0}


100%|██████████| 22845/22845 [14:18<00:00, 26.62it/s]


{'train_runtime': 858.0935, 'train_samples_per_second': 106.474, 'train_steps_per_second': 26.623, 'train_loss': 0.02905994662833595, 'epoch': 15.0}


100%|██████████| 381/381 [00:03<00:00, 120.04it/s]


Evaluation Metrics: {'eval_loss': 0.16609293222427368, 'eval_f1': 0.8564557119757568, 'eval_precision': 0.8938516479921997, 'eval_recall': 0.8275945764263746, 'eval_runtime': 3.1868, 'eval_samples_per_second': 477.907, 'eval_steps_per_second': 119.555, 'epoch': 15.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  java      summary   
13  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  java    Ownership   
14  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  java       Expand   
15  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  java        usage   
16  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  java      Pointer   
17  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  java  deprecation   
18  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  java     rational   

    precision    recall        f1  
12   0.930041  0.964438  0.946927  
13   0.948276  1.000000  0.973451  
14 

0,1
eval/f1,▁▆▇▇▇▇▇█████▇███
eval/loss,▃▁▂▃▄▆▅▆▅▇▇▇███▇
eval/precision,▁█▇▇▇▇▇▇▇█▇▇▇▇▇█
eval/recall,▁▅▆▇▇▇▇█████▇███
eval/runtime,▅▇▄▃▄▃▂▁▅█▆▃▂▃▄▁
eval/samples_per_second,▃▂▅▅▄▆▇█▃▁▂▅▆▆▅█
eval/steps_per_second,▃▂▅▅▄▆▇█▃▁▂▅▆▆▅█
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▃█▂▁▂▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f1,0.85646
eval/loss,0.16609
eval/precision,0.89385
eval/recall,0.82759
eval/runtime,3.1868
eval/samples_per_second,477.907
eval/steps_per_second,119.555
total_flos,3025990766741760.0
train/epoch,15.0
train/global_step,22845.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16516.74 examples/s]
  7%|▋         | 375/5655 [00:12<02:53, 30.46it/s]
  7%|▋         | 377/5655 [00:13<02:53, 30.46it/s]

{'eval_loss': 0.3238903284072876, 'eval_f1': 0.322274987100417, 'eval_precision': 0.5662212040218912, 'eval_recall': 0.2839823470427455, 'eval_runtime': 0.8319, 'eval_samples_per_second': 453.184, 'eval_steps_per_second': 114.198, 'epoch': 1.0}


 13%|█▎        | 752/5655 [00:27<02:56, 27.73it/s]
 13%|█▎        | 754/5655 [00:28<02:56, 27.73it/s]

{'eval_loss': 0.3401316702365875, 'eval_f1': 0.5313712906298672, 'eval_precision': 0.7900674338220057, 'eval_recall': 0.48433893087258184, 'eval_runtime': 1.1233, 'eval_samples_per_second': 335.626, 'eval_steps_per_second': 84.574, 'epoch': 2.0}


 18%|█▊        | 1004/5655 [00:38<02:33, 30.30it/s]

{'loss': 0.3256, 'grad_norm': 1.9842981100082397, 'learning_rate': 4.1158267020335985e-05, 'epoch': 2.65}


 20%|██        | 1131/5655 [00:42<02:32, 29.70it/s]
 20%|██        | 1131/5655 [00:43<02:32, 29.70it/s]

{'eval_loss': 0.3673070967197418, 'eval_f1': 0.6035850750556632, 'eval_precision': 0.7238514998716403, 'eval_recall': 0.5420009526903511, 'eval_runtime': 0.8636, 'eval_samples_per_second': 436.522, 'eval_steps_per_second': 109.999, 'epoch': 3.0}


 27%|██▋       | 1508/5655 [00:57<02:30, 27.54it/s]
 27%|██▋       | 1508/5655 [00:58<02:30, 27.54it/s]

{'eval_loss': 0.40413522720336914, 'eval_f1': 0.6331951161330917, 'eval_precision': 0.6848406320048112, 'eval_recall': 0.5959778243126015, 'eval_runtime': 1.0494, 'eval_samples_per_second': 359.268, 'eval_steps_per_second': 90.532, 'epoch': 4.0}


 33%|███▎      | 1883/5655 [01:12<02:18, 27.20it/s]
 33%|███▎      | 1885/5655 [01:13<02:18, 27.20it/s]

{'eval_loss': 0.47719040513038635, 'eval_f1': 0.6635585090882545, 'eval_precision': 0.689203497960052, 'eval_recall': 0.6424182284858283, 'eval_runtime': 0.87, 'eval_samples_per_second': 433.351, 'eval_steps_per_second': 109.2, 'epoch': 5.0}


 35%|███▌      | 2003/5655 [01:18<02:07, 28.71it/s]

{'loss': 0.1142, 'grad_norm': 3.966982841491699, 'learning_rate': 3.2316534040671975e-05, 'epoch': 5.31}


 40%|███▉      | 2261/5655 [01:27<01:56, 29.03it/s]
 40%|████      | 2262/5655 [01:28<01:56, 29.03it/s]

{'eval_loss': 0.5295835733413696, 'eval_f1': 0.6713503700795455, 'eval_precision': 0.6989925377985079, 'eval_recall': 0.6593316529699117, 'eval_runtime': 0.8981, 'eval_samples_per_second': 419.766, 'eval_steps_per_second': 105.777, 'epoch': 6.0}


 47%|████▋     | 2639/5655 [01:41<01:40, 29.92it/s]
 47%|████▋     | 2639/5655 [01:42<01:40, 29.92it/s]

{'eval_loss': 0.5339688062667847, 'eval_f1': 0.7016033639563052, 'eval_precision': 0.7210853634383045, 'eval_recall': 0.6945312913483902, 'eval_runtime': 0.825, 'eval_samples_per_second': 456.971, 'eval_steps_per_second': 115.152, 'epoch': 7.0}


 53%|█████▎    | 3006/5655 [01:56<01:29, 29.59it/s]

{'loss': 0.0356, 'grad_norm': 0.2988766133785248, 'learning_rate': 2.347480106100796e-05, 'epoch': 7.96}


 53%|█████▎    | 3013/5655 [01:56<01:29, 29.57it/s]
 53%|█████▎    | 3016/5655 [01:57<01:29, 29.57it/s]

{'eval_loss': 0.5379941463470459, 'eval_f1': 0.7094472934472935, 'eval_precision': 0.7232396137800527, 'eval_recall': 0.7153186936152987, 'eval_runtime': 0.8821, 'eval_samples_per_second': 427.4, 'eval_steps_per_second': 107.7, 'epoch': 8.0}


 60%|█████▉    | 3392/5655 [02:10<01:13, 30.73it/s]
 60%|██████    | 3393/5655 [02:11<01:13, 30.73it/s]

{'eval_loss': 0.6180746555328369, 'eval_f1': 0.7022910142668823, 'eval_precision': 0.7257123704182528, 'eval_recall': 0.689535933501768, 'eval_runtime': 0.8291, 'eval_samples_per_second': 454.734, 'eval_steps_per_second': 114.588, 'epoch': 9.0}


 67%|██████▋   | 3769/5655 [02:25<01:06, 28.40it/s]
 67%|██████▋   | 3770/5655 [02:26<01:06, 28.40it/s]

{'eval_loss': 0.5968746542930603, 'eval_f1': 0.7048901046517777, 'eval_precision': 0.710151455979986, 'eval_recall': 0.7065711585506919, 'eval_runtime': 0.8503, 'eval_samples_per_second': 443.347, 'eval_steps_per_second': 111.719, 'epoch': 10.0}


 71%|███████   | 4004/5655 [02:35<00:57, 28.65it/s]

{'loss': 0.0089, 'grad_norm': 1.1311196088790894, 'learning_rate': 1.4633068081343945e-05, 'epoch': 10.61}


 73%|███████▎  | 4145/5655 [02:40<00:50, 29.90it/s]
 73%|███████▎  | 4147/5655 [02:41<00:50, 29.90it/s]

{'eval_loss': 0.6143102049827576, 'eval_f1': 0.7146001078114497, 'eval_precision': 0.7157325983075735, 'eval_recall': 0.7183136361175236, 'eval_runtime': 1.0328, 'eval_samples_per_second': 365.024, 'eval_steps_per_second': 91.982, 'epoch': 11.0}


 80%|███████▉  | 4522/5655 [02:55<00:38, 29.41it/s]
 80%|████████  | 4524/5655 [02:56<00:38, 29.41it/s]

{'eval_loss': 0.6496825218200684, 'eval_f1': 0.7010564526278271, 'eval_precision': 0.7115821178959596, 'eval_recall': 0.6928100954940524, 'eval_runtime': 0.7879, 'eval_samples_per_second': 478.508, 'eval_steps_per_second': 120.579, 'epoch': 12.0}


 87%|████████▋ | 4900/5655 [03:09<00:27, 27.67it/s]
 87%|████████▋ | 4901/5655 [03:11<00:27, 27.67it/s]

{'eval_loss': 0.6488313674926758, 'eval_f1': 0.7116483596695746, 'eval_precision': 0.7177511899251028, 'eval_recall': 0.7073510262545943, 'eval_runtime': 1.0489, 'eval_samples_per_second': 359.426, 'eval_steps_per_second': 90.572, 'epoch': 13.0}


 88%|████████▊ | 5003/5655 [03:15<00:22, 28.95it/s]

{'loss': 0.0025, 'grad_norm': 0.006491248495876789, 'learning_rate': 5.7913351016799295e-06, 'epoch': 13.26}


 93%|█████████▎| 5276/5655 [03:24<00:12, 30.85it/s]
 93%|█████████▎| 5278/5655 [03:25<00:12, 30.85it/s]

{'eval_loss': 0.6599661707878113, 'eval_f1': 0.7125447444293551, 'eval_precision': 0.7219597406766164, 'eval_recall': 0.7056740451239342, 'eval_runtime': 0.8876, 'eval_samples_per_second': 424.752, 'eval_steps_per_second': 107.033, 'epoch': 14.0}


100%|█████████▉| 5654/5655 [03:39<00:00, 27.81it/s]
100%|██████████| 5655/5655 [03:40<00:00, 27.81it/s]

{'eval_loss': 0.6623179912567139, 'eval_f1': 0.7093901141242525, 'eval_precision': 0.7218824055443773, 'eval_recall': 0.7003775063350568, 'eval_runtime': 0.93, 'eval_samples_per_second': 405.387, 'eval_steps_per_second': 102.153, 'epoch': 15.0}


100%|██████████| 5655/5655 [03:41<00:00, 25.49it/s]


{'train_runtime': 221.8176, 'train_samples_per_second': 101.908, 'train_steps_per_second': 25.494, 'train_loss': 0.0861867634518604, 'epoch': 15.0}


100%|██████████| 95/95 [00:00<00:00, 110.06it/s]


Evaluation Metrics: {'eval_loss': 0.6143102049827576, 'eval_f1': 0.7146001078114497, 'eval_precision': 0.7157325983075735, 'eval_recall': 0.7183136361175236, 'eval_runtime': 0.8771, 'eval_samples_per_second': 429.827, 'eval_steps_per_second': 108.312, 'epoch': 15.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  python   
15  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  python   
16  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  python   
17  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  python   
18  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
14             Usage   0.849057  0.743802  0.792952  
15        Parameters   0.859649  0.875000  0.867257  
16  DevelopmentNotes   0.580000  0.725000  0.644444  
17            Expand   0.553846  0.521739  0.537313  
18           Summary   0.736111  0.726027  0.731034  
Scores

0,1
eval/f1,▁▅▆▇▇▇██████████
eval/loss,▁▁▂▃▄▅▅▅▇▇▇████▇
eval/precision,▁█▆▅▅▅▆▆▆▆▆▆▆▆▆▆
eval/recall,▁▄▅▆▇▇██████████
eval/runtime,▂█▃▆▃▃▂▃▂▂▆▁▆▃▄▃
eval/samples_per_second,▇▁▆▂▆▅▇▅▇▆▂█▂▅▄▆
eval/steps_per_second,▇▁▆▂▆▅▇▅▇▆▂█▂▅▄▆
train/epoch,▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▇▇▇▇███
train/global_step,▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▇▇▇▇███
train/grad_norm,▄█▂▃▁

0,1
eval/f1,0.7146
eval/loss,0.61431
eval/precision,0.71573
eval/recall,0.71831
eval/runtime,0.8771
eval/samples_per_second,429.827
eval/steps_per_second,108.312
total_flos,748646437651200.0
train/epoch,15.0
train/global_step,5655.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14600.07 examples/s]
  7%|▋         | 260/3900 [00:09<02:12, 27.42it/s]
  7%|▋         | 260/3900 [00:10<02:12, 27.42it/s]

{'eval_loss': 0.2639153003692627, 'eval_f1': 0.34797151614690514, 'eval_precision': 0.6239480198019801, 'eval_recall': 0.2966630425411991, 'eval_runtime': 0.5801, 'eval_samples_per_second': 448.167, 'eval_steps_per_second': 112.042, 'epoch': 1.0}


 13%|█▎        | 519/3900 [00:20<02:06, 26.78it/s]
 13%|█▎        | 520/3900 [00:21<02:06, 26.78it/s]

{'eval_loss': 0.2184857875108719, 'eval_f1': 0.5156541586150779, 'eval_precision': 0.6076294969398418, 'eval_recall': 0.45323943793698335, 'eval_runtime': 0.6866, 'eval_samples_per_second': 378.701, 'eval_steps_per_second': 94.675, 'epoch': 2.0}


 20%|█▉        | 778/3900 [00:30<01:43, 30.11it/s]
 20%|██        | 780/3900 [00:31<01:43, 30.11it/s]

{'eval_loss': 0.22514760494232178, 'eval_f1': 0.4995086683297137, 'eval_precision': 0.5816820896014653, 'eval_recall': 0.455943561995663, 'eval_runtime': 0.5817, 'eval_samples_per_second': 446.998, 'eval_steps_per_second': 111.75, 'epoch': 3.0}


 26%|██▌       | 1005/3900 [00:39<01:38, 29.45it/s]

{'loss': 0.1976, 'grad_norm': 0.13365519046783447, 'learning_rate': 3.717948717948718e-05, 'epoch': 3.85}


 27%|██▋       | 1040/3900 [00:41<01:34, 30.27it/s]
 27%|██▋       | 1040/3900 [00:41<01:34, 30.27it/s]

{'eval_loss': 0.217050701379776, 'eval_f1': 0.6282736664730668, 'eval_precision': 0.8215910754545721, 'eval_recall': 0.5681678156388895, 'eval_runtime': 0.5667, 'eval_samples_per_second': 458.808, 'eval_steps_per_second': 114.702, 'epoch': 4.0}


 33%|███▎      | 1298/3900 [00:51<01:26, 30.06it/s]
 33%|███▎      | 1300/3900 [00:52<01:26, 30.06it/s]

{'eval_loss': 0.24631676077842712, 'eval_f1': 0.6313784371816775, 'eval_precision': 0.8406086079510603, 'eval_recall': 0.5700319760248073, 'eval_runtime': 0.6482, 'eval_samples_per_second': 401.101, 'eval_steps_per_second': 100.275, 'epoch': 5.0}


 40%|███▉      | 1559/3900 [01:02<01:25, 27.34it/s]
 40%|████      | 1560/3900 [01:03<01:25, 27.34it/s]

{'eval_loss': 0.24779146909713745, 'eval_f1': 0.639334781117611, 'eval_precision': 0.8110495739485101, 'eval_recall': 0.5732623412031538, 'eval_runtime': 0.6227, 'eval_samples_per_second': 417.512, 'eval_steps_per_second': 104.378, 'epoch': 6.0}


 47%|████▋     | 1820/3900 [01:12<01:12, 28.52it/s]
 47%|████▋     | 1820/3900 [01:13<01:12, 28.52it/s]

{'eval_loss': 0.24679894745349884, 'eval_f1': 0.6708820314726387, 'eval_precision': 0.7878589144198821, 'eval_recall': 0.6238065588902286, 'eval_runtime': 0.6446, 'eval_samples_per_second': 403.35, 'eval_steps_per_second': 100.837, 'epoch': 7.0}


 51%|█████▏    | 2005/3900 [01:20<01:05, 29.08it/s]

{'loss': 0.0416, 'grad_norm': 0.029730312526226044, 'learning_rate': 2.435897435897436e-05, 'epoch': 7.69}


 53%|█████▎    | 2079/3900 [01:23<01:02, 29.27it/s]
 53%|█████▎    | 2080/3900 [01:23<01:02, 29.27it/s]

{'eval_loss': 0.275287389755249, 'eval_f1': 0.6532787100348874, 'eval_precision': 0.7958660387231815, 'eval_recall': 0.608286882302509, 'eval_runtime': 0.5918, 'eval_samples_per_second': 439.374, 'eval_steps_per_second': 109.844, 'epoch': 8.0}


 60%|██████    | 2340/3900 [01:33<00:54, 28.45it/s]
 60%|██████    | 2340/3900 [01:34<00:54, 28.45it/s]

{'eval_loss': 0.25924283266067505, 'eval_f1': 0.6568399931634387, 'eval_precision': 0.8154249794257534, 'eval_recall': 0.5958048164438904, 'eval_runtime': 0.5479, 'eval_samples_per_second': 474.537, 'eval_steps_per_second': 118.634, 'epoch': 9.0}


 67%|██████▋   | 2598/3900 [01:44<00:45, 28.73it/s]
 67%|██████▋   | 2600/3900 [01:45<00:45, 28.73it/s]

{'eval_loss': 0.2889151871204376, 'eval_f1': 0.6451641774320155, 'eval_precision': 0.7848750875403118, 'eval_recall': 0.6021710857238797, 'eval_runtime': 0.5932, 'eval_samples_per_second': 438.299, 'eval_steps_per_second': 109.575, 'epoch': 10.0}


 73%|███████▎  | 2858/3900 [01:54<00:35, 29.26it/s]
 73%|███████▎  | 2860/3900 [01:55<00:35, 29.26it/s]

{'eval_loss': 0.28610852360725403, 'eval_f1': 0.655049102185276, 'eval_precision': 0.7175373593054984, 'eval_recall': 0.618849001887185, 'eval_runtime': 0.6748, 'eval_samples_per_second': 385.28, 'eval_steps_per_second': 96.32, 'epoch': 11.0}


 77%|███████▋  | 3003/3900 [02:00<00:29, 30.49it/s]

{'loss': 0.0109, 'grad_norm': 0.008419076912105083, 'learning_rate': 1.153846153846154e-05, 'epoch': 11.54}


 80%|███████▉  | 3118/3900 [02:04<00:27, 27.98it/s]
 80%|████████  | 3120/3900 [02:05<00:27, 27.98it/s]

{'eval_loss': 0.2993468940258026, 'eval_f1': 0.6555183581489755, 'eval_precision': 0.8206395963364332, 'eval_recall': 0.6018450939410279, 'eval_runtime': 0.783, 'eval_samples_per_second': 332.075, 'eval_steps_per_second': 83.019, 'epoch': 12.0}


 87%|████████▋ | 3380/3900 [02:15<00:17, 29.60it/s]
 87%|████████▋ | 3380/3900 [02:16<00:17, 29.60it/s]

{'eval_loss': 0.29642462730407715, 'eval_f1': 0.6744210070179335, 'eval_precision': 0.8284955789680473, 'eval_recall': 0.6190313727078538, 'eval_runtime': 0.5945, 'eval_samples_per_second': 437.325, 'eval_steps_per_second': 109.331, 'epoch': 13.0}


 93%|█████████▎| 3639/3900 [02:26<00:09, 28.99it/s]
 93%|█████████▎| 3640/3900 [02:26<00:08, 28.99it/s]

{'eval_loss': 0.3025836646556854, 'eval_f1': 0.675305853566723, 'eval_precision': 0.8530507007525746, 'eval_recall': 0.6150886892514924, 'eval_runtime': 0.5671, 'eval_samples_per_second': 458.45, 'eval_steps_per_second': 114.613, 'epoch': 14.0}


100%|█████████▉| 3898/3900 [02:36<00:00, 29.75it/s]
100%|██████████| 3900/3900 [02:37<00:00, 29.75it/s]

{'eval_loss': 0.30390530824661255, 'eval_f1': 0.6742837527371032, 'eval_precision': 0.8519458322279438, 'eval_recall': 0.6150886892514924, 'eval_runtime': 0.6136, 'eval_samples_per_second': 423.698, 'eval_steps_per_second': 105.924, 'epoch': 15.0}


100%|██████████| 3900/3900 [02:38<00:00, 24.54it/s]


{'train_runtime': 158.9451, 'train_samples_per_second': 97.958, 'train_steps_per_second': 24.537, 'train_loss': 0.0648883354358184, 'epoch': 15.0}


100%|██████████| 65/65 [00:00<00:00, 104.20it/s]


Evaluation Metrics: {'eval_loss': 0.3025836646556854, 'eval_f1': 0.675305853566723, 'eval_precision': 0.8530507007525746, 'eval_recall': 0.6150886892514924, 'eval_runtime': 0.6368, 'eval_samples_per_second': 408.315, 'eval_steps_per_second': 102.079, 'epoch': 15.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
13  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
14  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
15  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
16  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
17  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
18  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.800000  0.666667  0.727273  
13                  Example   0.938144  0.900990  0.919192  
14         Responsibilitie

0,1
eval/f1,▁▅▄▇▇▇███▇██████
eval/loss,▅▁▂▁▃▃▃▆▄▇▇█▇███
eval/precision,▂▂▁▇█▇▆▇▇▆▅▇▇███
eval/recall,▁▄▄▇▇▇██▇███████
eval/runtime,▂▅▂▂▄▃▄▂▁▂▅█▂▂▃▄
eval/samples_per_second,▇▃▇▇▄▅▅▆█▆▄▁▆▇▆▅
eval/steps_per_second,▇▃▇▇▄▅▅▆█▆▄▁▆▇▆▅
train/epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇███
train/grad_norm,█▂▁

0,1
eval/f1,0.67531
eval/loss,0.30258
eval/precision,0.85305
eval/recall,0.61509
eval/runtime,0.6368
eval/samples_per_second,408.315
eval/steps_per_second,102.079
total_flos,515675326855680.0
train/epoch,15.0
train/global_step,3900.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20257.96 examples/s]
  4%|▍         | 1003/22845 [00:35<12:33, 28.97it/s]

{'loss': 0.2771, 'grad_norm': 0.5761655569076538, 'learning_rate': 4.781133727292624e-06, 'epoch': 0.66}


  7%|▋         | 1520/22845 [00:52<12:12, 29.10it/s]
  7%|▋         | 1523/22845 [00:56<12:12, 29.10it/s]

{'eval_loss': 0.14447197318077087, 'eval_f1': 0.5256945393806732, 'eval_precision': 0.5270225870252195, 'eval_recall': 0.5250400746968252, 'eval_runtime': 3.3907, 'eval_samples_per_second': 449.176, 'eval_steps_per_second': 112.368, 'epoch': 1.0}


  9%|▉         | 2004/22845 [01:14<12:07, 28.64it/s]  

{'loss': 0.1466, 'grad_norm': 1.4978907108306885, 'learning_rate': 4.562267454585248e-06, 'epoch': 1.31}


 13%|█▎        | 3004/22845 [01:48<11:08, 29.70it/s]

{'loss': 0.115, 'grad_norm': 0.9867639541625977, 'learning_rate': 4.343401181877873e-06, 'epoch': 1.97}


 13%|█▎        | 3046/22845 [01:50<11:13, 29.41it/s]
 13%|█▎        | 3046/22845 [01:53<11:13, 29.41it/s]

{'eval_loss': 0.11473934352397919, 'eval_f1': 0.5471973862687426, 'eval_precision': 0.655117699112914, 'eval_recall': 0.539425898107128, 'eval_runtime': 3.4326, 'eval_samples_per_second': 443.69, 'eval_steps_per_second': 110.995, 'epoch': 2.0}


 18%|█▊        | 4004/22845 [02:27<10:27, 30.04it/s]  

{'loss': 0.093, 'grad_norm': 0.10720562189817429, 'learning_rate': 4.124534909170497e-06, 'epoch': 2.63}


 20%|█▉        | 4567/22845 [02:47<11:02, 27.59it/s]
 20%|██        | 4569/22845 [02:50<11:02, 27.59it/s]

{'eval_loss': 0.09868783503770828, 'eval_f1': 0.7563306818049019, 'eval_precision': 0.9083364867947292, 'eval_recall': 0.69252063659694, 'eval_runtime': 3.3576, 'eval_samples_per_second': 453.597, 'eval_steps_per_second': 113.474, 'epoch': 3.0}


 22%|██▏       | 5005/22845 [03:06<10:08, 29.32it/s]  

{'loss': 0.0844, 'grad_norm': 10.29241943359375, 'learning_rate': 3.905668636463121e-06, 'epoch': 3.28}


 26%|██▋       | 6003/22845 [03:41<10:10, 27.57it/s]

{'loss': 0.0754, 'grad_norm': 2.1655008792877197, 'learning_rate': 3.6868023637557455e-06, 'epoch': 3.94}


 27%|██▋       | 6091/22845 [03:44<09:20, 29.90it/s]
 27%|██▋       | 6092/22845 [03:47<09:20, 29.90it/s]

{'eval_loss': 0.10489629209041595, 'eval_f1': 0.7786445726971618, 'eval_precision': 0.9225330722011449, 'eval_recall': 0.7216563587522955, 'eval_runtime': 3.2931, 'eval_samples_per_second': 462.484, 'eval_steps_per_second': 115.697, 'epoch': 4.0}


 31%|███       | 7005/22845 [04:20<09:40, 27.27it/s]  

{'loss': 0.063, 'grad_norm': 0.04300951957702637, 'learning_rate': 3.4679360910483694e-06, 'epoch': 4.6}


 33%|███▎      | 7615/22845 [04:40<08:19, 30.49it/s]
 33%|███▎      | 7615/22845 [04:44<08:19, 30.49it/s]

{'eval_loss': 0.0969235897064209, 'eval_f1': 0.8218413064672133, 'eval_precision': 0.8994585009312102, 'eval_recall': 0.773058856104065, 'eval_runtime': 3.5995, 'eval_samples_per_second': 423.111, 'eval_steps_per_second': 105.847, 'epoch': 5.0}


 35%|███▌      | 8003/22845 [04:59<08:47, 28.15it/s]  

{'loss': 0.063, 'grad_norm': 0.05805671215057373, 'learning_rate': 3.2490698183409942e-06, 'epoch': 5.25}


 39%|███▉      | 9005/22845 [05:33<07:55, 29.08it/s]

{'loss': 0.0505, 'grad_norm': 11.14932918548584, 'learning_rate': 3.030203545633618e-06, 'epoch': 5.91}


 40%|███▉      | 9135/22845 [05:38<08:11, 27.88it/s]
 40%|████      | 9138/22845 [05:42<08:11, 27.88it/s]

{'eval_loss': 0.09434119611978531, 'eval_f1': 0.8334304028524652, 'eval_precision': 0.9095940744103006, 'eval_recall': 0.7844243591431254, 'eval_runtime': 4.0365, 'eval_samples_per_second': 377.311, 'eval_steps_per_second': 94.39, 'epoch': 6.0}


 44%|████▍     | 10004/22845 [06:13<07:21, 29.09it/s] 

{'loss': 0.0456, 'grad_norm': 0.04525031894445419, 'learning_rate': 2.811337272926242e-06, 'epoch': 6.57}


 47%|████▋     | 10659/22845 [06:35<07:20, 27.66it/s]
 47%|████▋     | 10661/22845 [06:39<07:20, 27.66it/s]

{'eval_loss': 0.10049441456794739, 'eval_f1': 0.8370817118886839, 'eval_precision': 0.9015807285246783, 'eval_recall': 0.7922084036231298, 'eval_runtime': 3.7659, 'eval_samples_per_second': 404.416, 'eval_steps_per_second': 101.17, 'epoch': 7.0}


 48%|████▊     | 11003/22845 [06:52<06:51, 28.75it/s]  

{'loss': 0.0434, 'grad_norm': 0.04738566279411316, 'learning_rate': 2.5924710002188665e-06, 'epoch': 7.22}


 53%|█████▎    | 12004/22845 [07:26<06:31, 27.71it/s]

{'loss': 0.0385, 'grad_norm': 0.02939128316938877, 'learning_rate': 2.3736047275114905e-06, 'epoch': 7.88}


 53%|█████▎    | 12183/22845 [07:33<05:56, 29.93it/s]
 53%|█████▎    | 12184/22845 [07:36<05:56, 29.93it/s]

{'eval_loss': 0.09896978735923767, 'eval_f1': 0.8528345321782504, 'eval_precision': 0.8892910441821422, 'eval_recall': 0.8269041180700124, 'eval_runtime': 3.4751, 'eval_samples_per_second': 438.262, 'eval_steps_per_second': 109.637, 'epoch': 8.0}


 57%|█████▋    | 13003/22845 [08:05<06:04, 26.97it/s]  

{'loss': 0.0304, 'grad_norm': 6.387925624847412, 'learning_rate': 2.154738454804115e-06, 'epoch': 8.54}


 60%|█████▉    | 13706/22845 [08:30<05:01, 30.34it/s]
 60%|██████    | 13707/22845 [08:34<05:01, 30.34it/s]

{'eval_loss': 0.10250886529684067, 'eval_f1': 0.8521786192881112, 'eval_precision': 0.8789377860622779, 'eval_recall': 0.8307743641570535, 'eval_runtime': 3.4482, 'eval_samples_per_second': 441.678, 'eval_steps_per_second': 110.492, 'epoch': 9.0}


 61%|██████▏   | 14005/22845 [08:44<05:09, 28.55it/s]

{'loss': 0.0335, 'grad_norm': 0.11393134295940399, 'learning_rate': 1.9358721820967393e-06, 'epoch': 9.19}


 66%|██████▌   | 15005/22845 [09:19<04:37, 28.30it/s]

{'loss': 0.0263, 'grad_norm': 0.018971897661685944, 'learning_rate': 1.7170059093893632e-06, 'epoch': 9.85}


 67%|██████▋   | 15228/22845 [09:27<04:39, 27.27it/s]
 67%|██████▋   | 15230/22845 [09:31<04:39, 27.27it/s]

{'eval_loss': 0.10149437934160233, 'eval_f1': 0.8508550021875266, 'eval_precision': 0.8843547817334033, 'eval_recall': 0.8252958555210349, 'eval_runtime': 3.5501, 'eval_samples_per_second': 429.002, 'eval_steps_per_second': 107.321, 'epoch': 10.0}


 70%|███████   | 16006/22845 [09:58<03:55, 28.99it/s]

{'loss': 0.023, 'grad_norm': 0.16762614250183105, 'learning_rate': 1.4981396366819876e-06, 'epoch': 10.51}


 73%|███████▎  | 16751/22845 [10:24<03:22, 30.06it/s]
 73%|███████▎  | 16753/22845 [10:28<03:22, 30.06it/s]

{'eval_loss': 0.10273660719394684, 'eval_f1': 0.8438491152266951, 'eval_precision': 0.8658849068073303, 'eval_recall': 0.8251903506023461, 'eval_runtime': 3.3407, 'eval_samples_per_second': 455.89, 'eval_steps_per_second': 114.047, 'epoch': 11.0}


 74%|███████▍  | 17002/22845 [10:37<03:25, 28.49it/s]

{'loss': 0.0234, 'grad_norm': 0.012165950611233711, 'learning_rate': 1.2792733639746116e-06, 'epoch': 11.16}


 79%|███████▉  | 18003/22845 [11:12<02:41, 29.91it/s]

{'loss': 0.0214, 'grad_norm': 0.033902548253536224, 'learning_rate': 1.0604070912672358e-06, 'epoch': 11.82}


 80%|███████▉  | 18275/22845 [11:21<02:36, 29.28it/s]
 80%|████████  | 18276/22845 [11:24<02:36, 29.28it/s]

{'eval_loss': 0.10549398511648178, 'eval_f1': 0.8476135497297038, 'eval_precision': 0.8714255340859579, 'eval_recall': 0.827094444404984, 'eval_runtime': 3.2444, 'eval_samples_per_second': 469.421, 'eval_steps_per_second': 117.432, 'epoch': 12.0}


 83%|████████▎ | 19003/22845 [11:50<02:08, 29.80it/s]

{'loss': 0.0195, 'grad_norm': 0.21027763187885284, 'learning_rate': 8.4154081855986e-07, 'epoch': 12.48}


 87%|████████▋ | 19796/22845 [12:18<01:39, 30.73it/s]
 87%|████████▋ | 19799/22845 [12:22<01:39, 30.73it/s]

{'eval_loss': 0.10666470974683762, 'eval_f1': 0.8465127498085633, 'eval_precision': 0.8667593663285952, 'eval_recall': 0.8290610779121057, 'eval_runtime': 3.4815, 'eval_samples_per_second': 437.457, 'eval_steps_per_second': 109.436, 'epoch': 13.0}


 88%|████████▊ | 20006/22845 [12:30<01:35, 29.79it/s]

{'loss': 0.0182, 'grad_norm': 15.266670227050781, 'learning_rate': 6.226745458524842e-07, 'epoch': 13.13}


 92%|█████████▏| 21003/22845 [13:04<01:00, 30.23it/s]

{'loss': 0.0181, 'grad_norm': 0.01398453302681446, 'learning_rate': 4.038082731451084e-07, 'epoch': 13.79}


 93%|█████████▎| 21321/22845 [13:14<00:49, 30.79it/s]
 93%|█████████▎| 21322/22845 [13:19<00:49, 30.79it/s]

{'eval_loss': 0.10866724699735641, 'eval_f1': 0.8532847748579167, 'eval_precision': 0.8784333243580978, 'eval_recall': 0.8340572295190298, 'eval_runtime': 4.2713, 'eval_samples_per_second': 356.57, 'eval_steps_per_second': 89.201, 'epoch': 14.0}


 96%|█████████▋| 22002/22845 [13:43<00:30, 27.67it/s]

{'loss': 0.0181, 'grad_norm': 10.351978302001953, 'learning_rate': 1.8494200043773256e-07, 'epoch': 14.45}


100%|██████████| 22845/22845 [14:12<00:00, 28.44it/s]
100%|██████████| 22845/22845 [14:17<00:00, 28.44it/s]

{'eval_loss': 0.10818362981081009, 'eval_f1': 0.8464762392140279, 'eval_precision': 0.8684786170549315, 'eval_recall': 0.8274959199944297, 'eval_runtime': 4.0153, 'eval_samples_per_second': 379.301, 'eval_steps_per_second': 94.888, 'epoch': 15.0}


100%|██████████| 22845/22845 [14:18<00:00, 26.61it/s]


{'train_runtime': 858.6113, 'train_samples_per_second': 106.41, 'train_steps_per_second': 26.607, 'train_loss': 0.05865270389616163, 'epoch': 15.0}


100%|██████████| 381/381 [00:03<00:00, 110.39it/s]


Evaluation Metrics: {'eval_loss': 0.10866724699735641, 'eval_f1': 0.8532847748579167, 'eval_precision': 0.8784333243580978, 'eval_recall': 0.8340572295190298, 'eval_runtime': 3.4614, 'eval_samples_per_second': 440.0, 'eval_steps_per_second': 110.072, 'epoch': 15.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  java      summary   
13  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  java    Ownership   
14  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  java       Expand   
15  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  java        usage   
16  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  java      Pointer   
17  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  java  deprecation   
18  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  java     rational   

    precision    recall        f1  
12   0.941341  0.958748  0.949965  
13   0.964912  1.000000  0.982143  
14   

0,1
eval/f1,▁▁▆▆▇███████████
eval/loss,█▄▂▂▁▁▂▂▂▂▂▃▃▃▃▃
eval/precision,▁▃█████▇▇▇▇▇▇▇▇▇
eval/recall,▁▁▅▅▇▇▇█████████
eval/runtime,▂▂▂▁▃▆▅▃▂▃▂▁▃█▆▂
eval/samples_per_second,▇▆▇█▅▂▄▆▆▅▇█▆▁▂▆
eval/steps_per_second,▇▆▇█▅▂▄▆▆▅▇█▆▁▂▆
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▁▂▁▁▆▂▁▁▆▁▁▁▄▁▁▁▁▁▁█▁▆

0,1
eval/f1,0.85328
eval/loss,0.10867
eval/precision,0.87843
eval/recall,0.83406
eval/runtime,3.4614
eval/samples_per_second,440.0
eval/steps_per_second,110.072
total_flos,3025990766741760.0
train/epoch,15.0
train/global_step,22845.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 15222.60 examples/s]
  7%|▋         | 376/5655 [00:12<03:04, 28.56it/s]
  7%|▋         | 377/5655 [00:14<03:04, 28.56it/s]

{'eval_loss': 0.4678226411342621, 'eval_f1': 0.003278688524590164, 'eval_precision': 0.2, 'eval_recall': 0.001652892561983471, 'eval_runtime': 1.0793, 'eval_samples_per_second': 349.296, 'eval_steps_per_second': 88.019, 'epoch': 1.0}


 13%|█▎        | 753/5655 [00:27<02:57, 27.65it/s]
 13%|█▎        | 754/5655 [00:28<02:57, 27.65it/s]

{'eval_loss': 0.4107454717159271, 'eval_f1': 0.161015873015873, 'eval_precision': 0.3888888888888889, 'eval_recall': 0.10929752066115701, 'eval_runtime': 0.8195, 'eval_samples_per_second': 460.038, 'eval_steps_per_second': 115.925, 'epoch': 2.0}


 18%|█▊        | 1002/5655 [00:38<02:47, 27.76it/s]

{'loss': 0.4566, 'grad_norm': 2.526475429534912, 'learning_rate': 4.115826702033599e-06, 'epoch': 2.65}


 20%|██        | 1131/5655 [00:42<02:43, 27.59it/s]
 20%|██        | 1131/5655 [00:43<02:43, 27.59it/s]

{'eval_loss': 0.3678071200847626, 'eval_f1': 0.3826934673366834, 'eval_precision': 0.5548097871627283, 'eval_recall': 0.2977824634891883, 'eval_runtime': 1.0502, 'eval_samples_per_second': 358.983, 'eval_steps_per_second': 90.46, 'epoch': 3.0}


 27%|██▋       | 1506/5655 [00:57<02:25, 28.46it/s]
 27%|██▋       | 1508/5655 [00:58<02:25, 28.46it/s]

{'eval_loss': 0.3464376628398895, 'eval_f1': 0.4272533355774938, 'eval_precision': 0.6957693263504744, 'eval_recall': 0.342559328970342, 'eval_runtime': 0.828, 'eval_samples_per_second': 455.292, 'eval_steps_per_second': 114.729, 'epoch': 4.0}


 33%|███▎      | 1883/5655 [01:12<02:14, 28.09it/s]
 33%|███▎      | 1885/5655 [01:13<02:14, 28.09it/s]

{'eval_loss': 0.33032411336898804, 'eval_f1': 0.45730823443083946, 'eval_precision': 0.6237892603850052, 'eval_recall': 0.4038566555540945, 'eval_runtime': 0.8387, 'eval_samples_per_second': 449.513, 'eval_steps_per_second': 113.273, 'epoch': 5.0}


 35%|███▌      | 2003/5655 [01:18<02:09, 28.18it/s]

{'loss': 0.3147, 'grad_norm': 4.024868965148926, 'learning_rate': 3.2316534040671975e-06, 'epoch': 5.31}


 40%|███▉      | 2259/5655 [01:27<01:49, 30.94it/s]
 40%|████      | 2262/5655 [01:28<01:49, 30.94it/s]

{'eval_loss': 0.32194623351097107, 'eval_f1': 0.5081342438674269, 'eval_precision': 0.6211995593909554, 'eval_recall': 0.4565552133808401, 'eval_runtime': 0.7969, 'eval_samples_per_second': 473.058, 'eval_steps_per_second': 119.206, 'epoch': 6.0}


 47%|████▋     | 2639/5655 [01:41<01:39, 30.43it/s]
 47%|████▋     | 2639/5655 [01:42<01:39, 30.43it/s]

{'eval_loss': 0.31744080781936646, 'eval_f1': 0.500845007678323, 'eval_precision': 0.6280708061185006, 'eval_recall': 0.45696244778016937, 'eval_runtime': 0.8623, 'eval_samples_per_second': 437.222, 'eval_steps_per_second': 110.175, 'epoch': 7.0}


 53%|█████▎    | 3004/5655 [01:56<01:32, 28.80it/s]

{'loss': 0.2335, 'grad_norm': 4.056074142456055, 'learning_rate': 2.347480106100796e-06, 'epoch': 7.96}


 53%|█████▎    | 3013/5655 [01:56<01:34, 28.00it/s]
 53%|█████▎    | 3016/5655 [01:57<01:34, 28.00it/s]

{'eval_loss': 0.30935192108154297, 'eval_f1': 0.5247998095703765, 'eval_precision': 0.6388636282267015, 'eval_recall': 0.47863662205464685, 'eval_runtime': 0.8075, 'eval_samples_per_second': 466.863, 'eval_steps_per_second': 117.644, 'epoch': 8.0}


 60%|█████▉    | 3390/5655 [02:11<01:15, 29.83it/s]
 60%|██████    | 3393/5655 [02:12<01:15, 29.83it/s]

{'eval_loss': 0.31748175621032715, 'eval_f1': 0.5277298474945534, 'eval_precision': 0.6443141952596904, 'eval_recall': 0.4868663126629178, 'eval_runtime': 0.8451, 'eval_samples_per_second': 446.117, 'eval_steps_per_second': 112.417, 'epoch': 9.0}


 67%|██████▋   | 3769/5655 [02:26<01:04, 29.31it/s]
 67%|██████▋   | 3770/5655 [02:27<01:04, 29.31it/s]

{'eval_loss': 0.3186889886856079, 'eval_f1': 0.5273749989729998, 'eval_precision': 0.642719971283708, 'eval_recall': 0.4865043629913141, 'eval_runtime': 1.0979, 'eval_samples_per_second': 343.396, 'eval_steps_per_second': 86.532, 'epoch': 10.0}


 71%|███████   | 4005/5655 [02:36<00:56, 29.03it/s]

{'loss': 0.1829, 'grad_norm': 14.238274574279785, 'learning_rate': 1.4633068081343946e-06, 'epoch': 10.61}


 73%|███████▎  | 4145/5655 [02:41<00:54, 27.49it/s]
 73%|███████▎  | 4147/5655 [02:42<00:54, 27.49it/s]

{'eval_loss': 0.312686562538147, 'eval_f1': 0.5481632781305261, 'eval_precision': 0.8306502743344849, 'eval_recall': 0.5001788190061783, 'eval_runtime': 0.8752, 'eval_samples_per_second': 430.769, 'eval_steps_per_second': 108.549, 'epoch': 11.0}


 80%|███████▉  | 4523/5655 [02:56<00:40, 27.95it/s]
 80%|████████  | 4524/5655 [02:57<00:40, 27.95it/s]

{'eval_loss': 0.31043490767478943, 'eval_f1': 0.5747291388821937, 'eval_precision': 0.8111501831501832, 'eval_recall': 0.5215005236339635, 'eval_runtime': 0.9647, 'eval_samples_per_second': 390.796, 'eval_steps_per_second': 98.476, 'epoch': 12.0}


 87%|████████▋ | 4899/5655 [03:10<00:27, 27.75it/s]
 87%|████████▋ | 4901/5655 [03:11<00:27, 27.75it/s]

{'eval_loss': 0.31467142701148987, 'eval_f1': 0.5919782695077342, 'eval_precision': 0.8232745618141155, 'eval_recall': 0.5332776022955513, 'eval_runtime': 0.8391, 'eval_samples_per_second': 449.302, 'eval_steps_per_second': 113.219, 'epoch': 13.0}


 88%|████████▊ | 5003/5655 [03:15<00:22, 29.15it/s]

{'loss': 0.1475, 'grad_norm': 5.150198936462402, 'learning_rate': 5.79133510167993e-07, 'epoch': 13.26}


 93%|█████████▎| 5277/5655 [03:25<00:13, 28.39it/s]
 93%|█████████▎| 5278/5655 [03:26<00:13, 28.39it/s]

{'eval_loss': 0.3125326931476593, 'eval_f1': 0.5967898954244027, 'eval_precision': 0.8151111772142485, 'eval_recall': 0.5403117637862346, 'eval_runtime': 0.8491, 'eval_samples_per_second': 444.018, 'eval_steps_per_second': 111.888, 'epoch': 14.0}


100%|██████████| 5655/5655 [03:40<00:00, 30.95it/s]
100%|██████████| 5655/5655 [03:41<00:00, 30.95it/s]

{'eval_loss': 0.3131919503211975, 'eval_f1': 0.5951766597256221, 'eval_precision': 0.8167474682180564, 'eval_recall': 0.535503275173398, 'eval_runtime': 0.8132, 'eval_samples_per_second': 463.575, 'eval_steps_per_second': 116.816, 'epoch': 15.0}


100%|██████████| 5655/5655 [03:42<00:00, 25.36it/s]


{'train_runtime': 222.9792, 'train_samples_per_second': 101.377, 'train_steps_per_second': 25.361, 'train_loss': 0.25191891212362194, 'epoch': 15.0}


100%|██████████| 95/95 [00:00<00:00, 115.53it/s]


Evaluation Metrics: {'eval_loss': 0.3125326931476593, 'eval_f1': 0.5967898954244027, 'eval_precision': 0.8151111772142485, 'eval_recall': 0.5403117637862346, 'eval_runtime': 0.8358, 'eval_samples_per_second': 451.049, 'eval_steps_per_second': 113.659, 'epoch': 15.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  python   
15  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  python   
16  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  python   
17  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  python   
18  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
14             Usage   0.852941  0.719008  0.780269  
15        Parameters   0.831858  0.839286  0.835556  
16  DevelopmentNotes   1.000000  0.125000  0.222222  
17            Expand   0.676471  0.333333  0.446602  
18           Summary   0.714286  0.684932  0.699301  
Scores

0,1
eval/f1,▁▃▅▆▆▇▇▇▇▇▇█████
eval/loss,█▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁
eval/precision,▁▃▅▇▆▆▆▆▆▆██████
eval/recall,▁▂▅▅▆▇▇▇▇▇▇█████
eval/runtime,█▂▇▂▂▁▃▁▂█▃▅▂▂▁▂
eval/samples_per_second,▁▇▂▇▇█▆█▇▁▆▄▇▆▇▇
eval/steps_per_second,▁▇▂▇▇█▆█▇▁▆▄▇▆▇▇
train/epoch,▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▇▇▇▇███
train/global_step,▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▇▇▇▇███
train/grad_norm,▁▂▂█▃

0,1
eval/f1,0.59679
eval/loss,0.31253
eval/precision,0.81511
eval/recall,0.54031
eval/runtime,0.8358
eval/samples_per_second,451.049
eval/steps_per_second,113.659
total_flos,748646437651200.0
train/epoch,15.0
train/global_step,5655.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 12369.32 examples/s]
  7%|▋         | 260/3900 [00:09<02:01, 29.87it/s]
  7%|▋         | 260/3900 [00:09<02:01, 29.87it/s]

{'eval_loss': 0.40901485085487366, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.6335, 'eval_samples_per_second': 410.415, 'eval_steps_per_second': 102.604, 'epoch': 1.0}


 13%|█▎        | 520/3900 [00:19<01:59, 28.25it/s]
 13%|█▎        | 520/3900 [00:20<01:59, 28.25it/s]

{'eval_loss': 0.3517684042453766, 'eval_f1': 0.11065235342691991, 'eval_precision': 0.13293650793650794, 'eval_recall': 0.09476661951909478, 'eval_runtime': 0.5464, 'eval_samples_per_second': 475.862, 'eval_steps_per_second': 118.965, 'epoch': 2.0}


 20%|█▉        | 777/3900 [00:30<01:48, 28.80it/s]
 20%|██        | 780/3900 [00:30<01:48, 28.80it/s]

{'eval_loss': 0.30836021900177, 'eval_f1': 0.12956293628562535, 'eval_precision': 0.2759740259740259, 'eval_recall': 0.11884016973125885, 'eval_runtime': 0.6692, 'eval_samples_per_second': 388.521, 'eval_steps_per_second': 97.13, 'epoch': 3.0}


 26%|██▌       | 1005/3900 [00:39<01:38, 29.54it/s]

{'loss': 0.363, 'grad_norm': 1.366237998008728, 'learning_rate': 3.7179487179487184e-06, 'epoch': 3.85}


 27%|██▋       | 1038/3900 [00:40<01:36, 29.72it/s]
 27%|██▋       | 1040/3900 [00:41<01:36, 29.72it/s]

{'eval_loss': 0.27770838141441345, 'eval_f1': 0.17227089346905017, 'eval_precision': 0.3521008403361345, 'eval_recall': 0.14264969354078266, 'eval_runtime': 0.5349, 'eval_samples_per_second': 486.111, 'eval_steps_per_second': 121.528, 'epoch': 4.0}


 33%|███▎      | 1298/3900 [00:51<01:28, 29.24it/s]
 33%|███▎      | 1300/3900 [00:51<01:28, 29.24it/s]

{'eval_loss': 0.25728198885917664, 'eval_f1': 0.33600379020323257, 'eval_precision': 0.6790553619821912, 'eval_recall': 0.2449991345090585, 'eval_runtime': 0.5286, 'eval_samples_per_second': 491.831, 'eval_steps_per_second': 122.958, 'epoch': 5.0}


 40%|███▉      | 1557/3900 [01:01<01:22, 28.45it/s]
 40%|████      | 1560/3900 [01:02<01:22, 28.45it/s]

{'eval_loss': 0.23844178020954132, 'eval_f1': 0.4112727078561152, 'eval_precision': 0.6705980877939842, 'eval_recall': 0.3146372597357549, 'eval_runtime': 0.6302, 'eval_samples_per_second': 412.555, 'eval_steps_per_second': 103.139, 'epoch': 6.0}


 47%|████▋     | 1819/3900 [01:12<01:13, 28.22it/s]
 47%|████▋     | 1820/3900 [01:13<01:13, 28.22it/s]

{'eval_loss': 0.2265237420797348, 'eval_f1': 0.44722025420528627, 'eval_precision': 0.6746742743383282, 'eval_recall': 0.35039123282943285, 'eval_runtime': 0.6011, 'eval_samples_per_second': 432.507, 'eval_steps_per_second': 108.127, 'epoch': 7.0}


 51%|█████▏    | 2005/3900 [01:20<01:06, 28.48it/s]

{'loss': 0.2152, 'grad_norm': 0.6480087637901306, 'learning_rate': 2.435897435897436e-06, 'epoch': 7.69}


 53%|█████▎    | 2078/3900 [01:22<01:02, 29.22it/s]
 53%|█████▎    | 2080/3900 [01:23<01:02, 29.22it/s]

{'eval_loss': 0.21884754300117493, 'eval_f1': 0.4815876186680674, 'eval_precision': 0.6413528960112812, 'eval_recall': 0.3981132719849219, 'eval_runtime': 0.7494, 'eval_samples_per_second': 346.943, 'eval_steps_per_second': 86.736, 'epoch': 8.0}


 60%|█████▉    | 2337/3900 [01:33<00:57, 27.36it/s]
 60%|██████    | 2340/3900 [01:33<00:57, 27.36it/s]

{'eval_loss': 0.2078242152929306, 'eval_f1': 0.4971667898497167, 'eval_precision': 0.6463551329622759, 'eval_recall': 0.4101738727932715, 'eval_runtime': 0.5389, 'eval_samples_per_second': 482.467, 'eval_steps_per_second': 120.617, 'epoch': 9.0}


 67%|██████▋   | 2599/3900 [01:43<00:44, 29.35it/s]
 67%|██████▋   | 2600/3900 [01:44<00:44, 29.35it/s]

{'eval_loss': 0.20918521285057068, 'eval_f1': 0.5000574557582023, 'eval_precision': 0.6354173776534024, 'eval_recall': 0.42209084509703343, 'eval_runtime': 0.574, 'eval_samples_per_second': 452.966, 'eval_steps_per_second': 113.242, 'epoch': 10.0}


 73%|███████▎  | 2857/3900 [01:54<00:36, 28.67it/s]
 73%|███████▎  | 2860/3900 [01:55<00:36, 28.67it/s]

{'eval_loss': 0.1999826282262802, 'eval_f1': 0.5138826575758383, 'eval_precision': 0.6389861751152074, 'eval_recall': 0.435172791589555, 'eval_runtime': 0.5786, 'eval_samples_per_second': 449.363, 'eval_steps_per_second': 112.341, 'epoch': 11.0}


 77%|███████▋  | 3003/3900 [02:01<00:31, 28.74it/s]

{'loss': 0.1522, 'grad_norm': 0.3268507719039917, 'learning_rate': 1.153846153846154e-06, 'epoch': 11.54}


 80%|███████▉  | 3119/3900 [02:05<00:26, 29.25it/s]
 80%|████████  | 3120/3900 [02:05<00:26, 29.25it/s]

{'eval_loss': 0.19987210631370544, 'eval_f1': 0.5204924146786121, 'eval_precision': 0.6431978594556897, 'eval_recall': 0.4428457221479169, 'eval_runtime': 0.6663, 'eval_samples_per_second': 390.213, 'eval_steps_per_second': 97.553, 'epoch': 12.0}


 87%|████████▋ | 3379/3900 [02:15<00:18, 28.75it/s]
 87%|████████▋ | 3380/3900 [02:16<00:18, 28.75it/s]

{'eval_loss': 0.1975764036178589, 'eval_f1': 0.5193543375937766, 'eval_precision': 0.6450783897788506, 'eval_recall': 0.44161656058651555, 'eval_runtime': 0.7188, 'eval_samples_per_second': 361.71, 'eval_steps_per_second': 90.427, 'epoch': 13.0}


 93%|█████████▎| 3638/3900 [02:26<00:08, 29.68it/s]
 93%|█████████▎| 3640/3900 [02:26<00:08, 29.68it/s]

{'eval_loss': 0.1942129284143448, 'eval_f1': 0.5278254447469232, 'eval_precision': 0.6517006802721088, 'eval_recall': 0.44877356200094265, 'eval_runtime': 0.6179, 'eval_samples_per_second': 420.786, 'eval_steps_per_second': 105.197, 'epoch': 14.0}


100%|██████████| 3900/3900 [02:36<00:00, 29.20it/s]
100%|██████████| 3900/3900 [02:38<00:00, 29.20it/s]

{'eval_loss': 0.19450749456882477, 'eval_f1': 0.5228717201550481, 'eval_precision': 0.6461484381395394, 'eval_recall': 0.4450112174414187, 'eval_runtime': 0.6225, 'eval_samples_per_second': 417.669, 'eval_steps_per_second': 104.417, 'epoch': 15.0}


100%|██████████| 3900/3900 [02:39<00:00, 24.48it/s]


{'train_runtime': 159.3321, 'train_samples_per_second': 97.72, 'train_steps_per_second': 24.477, 'train_loss': 0.21670300997220554, 'epoch': 15.0}


100%|██████████| 65/65 [00:00<00:00, 100.37it/s]


Evaluation Metrics: {'eval_loss': 0.1942129284143448, 'eval_f1': 0.5278254447469232, 'eval_precision': 0.6517006802721088, 'eval_recall': 0.44877356200094265, 'eval_runtime': 0.658, 'eval_samples_per_second': 395.159, 'eval_steps_per_second': 98.79, 'epoch': 15.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
13  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
14  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
15  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
16  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
17  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   
18  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   1.000000  0.571429  0.727273  
13                  Example   0.966667  0.861386  0.910995  
14         Responsibilities

0,1
eval/f1,▁▂▃▃▅▆▇▇████████
eval/loss,█▆▅▄▃▂▂▂▁▁▁▁▁▁▁▁
eval/precision,▁▂▄▅████████████
eval/recall,▁▂▃▃▅▆▆▇▇███████
eval/runtime,▄▂▅▁▁▄▃█▁▂▃▅▇▄▄▅
eval/samples_per_second,▄▇▃██▄▅▁█▆▆▃▂▅▄▃
eval/steps_per_second,▄▇▃██▄▅▁█▆▆▃▂▅▄▃
train/epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇███
train/grad_norm,█▃▁

0,1
eval/f1,0.52783
eval/loss,0.19421
eval/precision,0.6517
eval/recall,0.44877
eval/runtime,0.658
eval/samples_per_second,395.159
eval/steps_per_second,98.79
total_flos,515675326855680.0
train/epoch,15.0
train/global_step,3900.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 19398.52 examples/s]
  7%|▋         | 762/11430 [00:30<07:01, 25.29it/s]
  7%|▋         | 762/11430 [00:33<07:01, 25.29it/s]

{'eval_loss': 0.1162429079413414, 'eval_f1': 0.6398214082203131, 'eval_precision': 0.6601029538653945, 'eval_recall': 0.6271064741479909, 'eval_runtime': 2.1018, 'eval_samples_per_second': 724.606, 'eval_steps_per_second': 90.873, 'epoch': 1.0}


  9%|▉         | 1002/11430 [00:43<07:00, 24.77it/s]

{'loss': 0.1342, 'grad_norm': 2.4384546279907227, 'learning_rate': 4.562554680664917e-05, 'epoch': 1.31}


 13%|█▎        | 1524/11430 [01:04<06:35, 25.04it/s]
 13%|█▎        | 1524/11430 [01:07<06:35, 25.04it/s]

{'eval_loss': 0.08601590991020203, 'eval_f1': 0.8119579684354497, 'eval_precision': 0.8917489068681954, 'eval_recall': 0.7677777524517134, 'eval_runtime': 2.1782, 'eval_samples_per_second': 699.188, 'eval_steps_per_second': 87.685, 'epoch': 2.0}


 18%|█▊        | 2004/11430 [01:27<06:29, 24.18it/s]

{'loss': 0.066, 'grad_norm': 0.059006333351135254, 'learning_rate': 4.125109361329834e-05, 'epoch': 2.62}


 20%|██        | 2286/11430 [01:38<05:59, 25.43it/s]
 20%|██        | 2286/11430 [01:40<05:59, 25.43it/s]

{'eval_loss': 0.08620494604110718, 'eval_f1': 0.8405165076908012, 'eval_precision': 0.8670119843636819, 'eval_recall': 0.8165848297936832, 'eval_runtime': 2.164, 'eval_samples_per_second': 703.79, 'eval_steps_per_second': 88.263, 'epoch': 3.0}


 26%|██▋       | 3003/11430 [02:10<05:37, 24.97it/s]

{'loss': 0.039, 'grad_norm': 0.10754668712615967, 'learning_rate': 3.6876640419947505e-05, 'epoch': 3.94}


 27%|██▋       | 3048/11430 [02:12<05:37, 24.83it/s]
 27%|██▋       | 3048/11430 [02:14<05:37, 24.83it/s]

{'eval_loss': 0.09988516569137573, 'eval_f1': 0.854434034554755, 'eval_precision': 0.9023177005757771, 'eval_recall': 0.8273483064379744, 'eval_runtime': 2.0975, 'eval_samples_per_second': 726.088, 'eval_steps_per_second': 91.059, 'epoch': 4.0}


 33%|███▎      | 3810/11430 [02:46<05:02, 25.22it/s]
 33%|███▎      | 3810/11430 [02:48<05:02, 25.22it/s]

{'eval_loss': 0.09944228082895279, 'eval_f1': 0.8715469366560297, 'eval_precision': 0.886542037006312, 'eval_recall': 0.85786479351466, 'eval_runtime': 2.0993, 'eval_samples_per_second': 725.492, 'eval_steps_per_second': 90.984, 'epoch': 5.0}


 35%|███▌      | 4002/11430 [02:57<05:06, 24.26it/s]

{'loss': 0.0194, 'grad_norm': 0.03077687881886959, 'learning_rate': 3.2502187226596675e-05, 'epoch': 5.25}


 40%|████      | 4572/11430 [03:20<04:30, 25.40it/s]
 40%|████      | 4572/11430 [03:22<04:30, 25.40it/s]

{'eval_loss': 0.11835508048534393, 'eval_f1': 0.7953369261079898, 'eval_precision': 0.8658076211220104, 'eval_recall': 0.765132667232546, 'eval_runtime': 2.2979, 'eval_samples_per_second': 662.777, 'eval_steps_per_second': 83.119, 'epoch': 6.0}


 44%|████▍     | 5004/11430 [03:40<04:17, 24.92it/s]

{'loss': 0.0115, 'grad_norm': 0.004815181251615286, 'learning_rate': 2.8127734033245845e-05, 'epoch': 6.56}


 47%|████▋     | 5334/11430 [03:54<03:59, 25.45it/s]
 47%|████▋     | 5334/11430 [03:56<03:59, 25.45it/s]

{'eval_loss': 0.11870657652616501, 'eval_f1': 0.8555308362924237, 'eval_precision': 0.8602836022766364, 'eval_recall': 0.8512299421593117, 'eval_runtime': 2.1002, 'eval_samples_per_second': 725.165, 'eval_steps_per_second': 90.943, 'epoch': 7.0}


 53%|█████▎    | 6003/11430 [04:24<03:40, 24.63it/s]

{'loss': 0.008, 'grad_norm': 0.009758083149790764, 'learning_rate': 2.3753280839895015e-05, 'epoch': 7.87}


 53%|█████▎    | 6096/11430 [04:27<03:32, 25.06it/s]
 53%|█████▎    | 6096/11430 [04:29<03:32, 25.06it/s]

{'eval_loss': 0.118619903922081, 'eval_f1': 0.8617260102928853, 'eval_precision': 0.8744823562869618, 'eval_recall': 0.8502492531567778, 'eval_runtime': 2.0714, 'eval_samples_per_second': 735.247, 'eval_steps_per_second': 92.208, 'epoch': 8.0}


 60%|██████    | 6858/11430 [05:01<03:02, 25.09it/s]
 60%|██████    | 6858/11430 [05:03<03:02, 25.09it/s]

{'eval_loss': 0.13138365745544434, 'eval_f1': 0.856239065943206, 'eval_precision': 0.8728661113628025, 'eval_recall': 0.8419109146259504, 'eval_runtime': 2.287, 'eval_samples_per_second': 665.949, 'eval_steps_per_second': 83.517, 'epoch': 9.0}


 61%|██████▏   | 7002/11430 [05:10<02:58, 24.79it/s]

{'loss': 0.0042, 'grad_norm': 0.15777164697647095, 'learning_rate': 1.9378827646544184e-05, 'epoch': 9.19}


 67%|██████▋   | 7620/11430 [05:35<02:28, 25.72it/s]
 67%|██████▋   | 7620/11430 [05:37<02:28, 25.72it/s]

{'eval_loss': 0.13412493467330933, 'eval_f1': 0.8722117760602498, 'eval_precision': 0.885704279748601, 'eval_recall': 0.8619565164456807, 'eval_runtime': 2.0823, 'eval_samples_per_second': 731.39, 'eval_steps_per_second': 91.724, 'epoch': 10.0}


 70%|███████   | 8004/11430 [05:53<02:19, 24.58it/s]

{'loss': 0.0037, 'grad_norm': 0.031348250806331635, 'learning_rate': 1.500437445319335e-05, 'epoch': 10.5}


 73%|███████▎  | 8382/11430 [06:08<01:59, 25.48it/s]
 73%|███████▎  | 8382/11430 [06:10<01:59, 25.48it/s]

{'eval_loss': 0.14141134917736053, 'eval_f1': 0.8652962265561923, 'eval_precision': 0.8705310285520668, 'eval_recall': 0.8613541903179041, 'eval_runtime': 2.0724, 'eval_samples_per_second': 734.881, 'eval_steps_per_second': 92.162, 'epoch': 11.0}


 79%|███████▉  | 9003/11430 [06:36<01:37, 24.93it/s]

{'loss': 0.0022, 'grad_norm': 0.005909424275159836, 'learning_rate': 1.062992125984252e-05, 'epoch': 11.81}


 80%|████████  | 9144/11430 [06:42<01:31, 24.95it/s]
 80%|████████  | 9144/11430 [06:44<01:31, 24.95it/s]

{'eval_loss': 0.14714953303337097, 'eval_f1': 0.864634713067823, 'eval_precision': 0.8735988055886506, 'eval_recall': 0.8597052438253908, 'eval_runtime': 2.2364, 'eval_samples_per_second': 681.007, 'eval_steps_per_second': 85.405, 'epoch': 12.0}


 87%|████████▋ | 9906/11430 [07:15<00:58, 25.86it/s]
 87%|████████▋ | 9906/11430 [07:17<00:58, 25.86it/s]

{'eval_loss': 0.145726278424263, 'eval_f1': 0.8713596353722838, 'eval_precision': 0.8804609405765259, 'eval_recall': 0.8637140900298795, 'eval_runtime': 2.1222, 'eval_samples_per_second': 717.639, 'eval_steps_per_second': 89.999, 'epoch': 13.0}


 88%|████████▊ | 10002/11430 [07:22<01:01, 23.13it/s]

{'loss': 0.0016, 'grad_norm': 0.00481717474758625, 'learning_rate': 6.255468066491689e-06, 'epoch': 13.12}


 93%|█████████▎| 10668/11430 [07:49<00:29, 25.51it/s]
 93%|█████████▎| 10668/11430 [07:51<00:29, 25.51it/s]

{'eval_loss': 0.14902564883232117, 'eval_f1': 0.8745717867788306, 'eval_precision': 0.8898896588504221, 'eval_recall': 0.8610047387540993, 'eval_runtime': 2.064, 'eval_samples_per_second': 737.903, 'eval_steps_per_second': 92.541, 'epoch': 14.0}


 96%|█████████▋| 11004/11430 [08:05<00:17, 25.02it/s]

{'loss': 0.001, 'grad_norm': 0.000773697393015027, 'learning_rate': 1.8810148731408575e-06, 'epoch': 14.44}


100%|██████████| 11430/11430 [08:22<00:00, 25.22it/s]
100%|██████████| 11430/11430 [08:26<00:00, 25.22it/s]

{'eval_loss': 0.15033559501171112, 'eval_f1': 0.8748918516616965, 'eval_precision': 0.8931525227001933, 'eval_recall': 0.8593533748736333, 'eval_runtime': 2.2716, 'eval_samples_per_second': 670.452, 'eval_steps_per_second': 84.082, 'epoch': 15.0}


100%|██████████| 11430/11430 [08:26<00:00, 22.55it/s]


{'train_runtime': 506.9835, 'train_samples_per_second': 180.213, 'train_steps_per_second': 22.545, 'train_loss': 0.025485083275684012, 'epoch': 15.0}


100%|██████████| 191/191 [00:02<00:00, 93.27it/s]


Evaluation Metrics: {'eval_loss': 0.15033559501171112, 'eval_f1': 0.8748918516616965, 'eval_precision': 0.8931525227001933, 'eval_recall': 0.8593533748736333, 'eval_runtime': 2.0612, 'eval_samples_per_second': 738.895, 'eval_steps_per_second': 92.665, 'epoch': 15.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  java      summary   
13  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  java    Ownership   
14  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  java       Expand   
15  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  java        usage   
16  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  java      Pointer   
17  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  java  deprecation   
18  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  java     rational   

    precision    recall        f1  
12   0.937238  0.955903  0.946479  
13   0.964912  1.000000  0.982143  
14  

0,1
eval/f1,▁▆▇▇█▆▇█▇███████
eval/loss,▄▁▁▃▂▅▅▅▆▆▇█▇███
eval/precision,▁█▇██▇▇▇▇█▇▇▇███
eval/recall,▁▅▇▇█▅██▇███████
eval/runtime,▂▄▄▂▂█▂▁█▂▁▆▃▁▇▁
eval/samples_per_second,▇▄▅▇▇▁▇█▁▇█▃▆█▂█
eval/steps_per_second,▇▄▅▇▇▁▇█▁▇█▃▆█▂█
train/epoch,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/grad_norm,█▁▁▁▁▁▁▁▁▁▁

0,1
eval/f1,0.87489
eval/loss,0.15034
eval/precision,0.89315
eval/recall,0.85935
eval/runtime,2.0612
eval/samples_per_second,738.895
eval/steps_per_second,92.665
total_flos,3025990766741760.0
train/epoch,15.0
train/global_step,11430.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16337.76 examples/s]
  7%|▋         | 188/2835 [00:07<01:48, 24.50it/s]
  7%|▋         | 189/2835 [00:08<01:48, 24.50it/s]

{'eval_loss': 0.3603341579437256, 'eval_f1': 0.3333601389923058, 'eval_precision': 0.5582694414019715, 'eval_recall': 0.27942536915139654, 'eval_runtime': 0.5233, 'eval_samples_per_second': 720.454, 'eval_steps_per_second': 91.729, 'epoch': 1.0}


 13%|█▎        | 377/2835 [00:16<01:42, 23.98it/s]
 13%|█▎        | 378/2835 [00:17<01:42, 23.98it/s]

{'eval_loss': 0.31497082114219666, 'eval_f1': 0.5239916967397884, 'eval_precision': 0.637710672999225, 'eval_recall': 0.4509637829998976, 'eval_runtime': 0.5538, 'eval_samples_per_second': 680.806, 'eval_steps_per_second': 86.681, 'epoch': 2.0}


 20%|█▉        | 566/2835 [00:25<01:29, 25.25it/s]
 20%|██        | 567/2835 [00:26<01:29, 25.25it/s]

{'eval_loss': 0.3522469997406006, 'eval_f1': 0.6031249940958997, 'eval_precision': 0.752593110683686, 'eval_recall': 0.5400553126462759, 'eval_runtime': 0.5198, 'eval_samples_per_second': 725.302, 'eval_steps_per_second': 92.346, 'epoch': 3.0}


 27%|██▋       | 755/2835 [00:34<01:21, 25.48it/s]
 27%|██▋       | 756/2835 [00:35<01:21, 25.48it/s]

{'eval_loss': 0.33179351687431335, 'eval_f1': 0.6950585244722159, 'eval_precision': 0.7537079530481107, 'eval_recall': 0.6465789122711534, 'eval_runtime': 0.5267, 'eval_samples_per_second': 715.79, 'eval_steps_per_second': 91.135, 'epoch': 4.0}


 33%|███▎      | 944/2835 [00:43<01:16, 24.85it/s]
 33%|███▎      | 945/2835 [00:44<01:16, 24.85it/s]

{'eval_loss': 0.3779992163181305, 'eval_f1': 0.6771824794042762, 'eval_precision': 0.718242263347569, 'eval_recall': 0.647745577062676, 'eval_runtime': 0.596, 'eval_samples_per_second': 632.535, 'eval_steps_per_second': 80.535, 'epoch': 5.0}


 35%|███▌      | 1004/2835 [00:47<01:14, 24.59it/s]

{'loss': 0.2173, 'grad_norm': 1.3158479928970337, 'learning_rate': 3.2363315696649034e-05, 'epoch': 5.29}


 40%|███▉      | 1133/2835 [00:52<01:10, 24.15it/s]
 40%|████      | 1134/2835 [00:53<01:10, 24.15it/s]

{'eval_loss': 0.4032663106918335, 'eval_f1': 0.6998622165860995, 'eval_precision': 0.7256649195690885, 'eval_recall': 0.6838678624694381, 'eval_runtime': 0.5735, 'eval_samples_per_second': 657.317, 'eval_steps_per_second': 83.69, 'epoch': 6.0}


 47%|████▋     | 1322/2835 [01:02<01:01, 24.77it/s]
 47%|████▋     | 1323/2835 [01:02<01:01, 24.77it/s]

{'eval_loss': 0.42447230219841003, 'eval_f1': 0.7004860236395792, 'eval_precision': 0.7262719289705796, 'eval_recall': 0.6813140276710313, 'eval_runtime': 0.538, 'eval_samples_per_second': 700.749, 'eval_steps_per_second': 89.22, 'epoch': 7.0}


 53%|█████▎    | 1511/2835 [01:11<00:54, 24.22it/s]
 53%|█████▎    | 1512/2835 [01:12<00:54, 24.22it/s]

{'eval_loss': 0.48205578327178955, 'eval_f1': 0.6955272762467335, 'eval_precision': 0.7119557216411094, 'eval_recall': 0.6891721860358495, 'eval_runtime': 0.5327, 'eval_samples_per_second': 707.669, 'eval_steps_per_second': 90.101, 'epoch': 8.0}


 60%|█████▉    | 1700/2835 [01:20<00:46, 24.18it/s]
 60%|██████    | 1701/2835 [01:21<00:46, 24.18it/s]

{'eval_loss': 0.47734543681144714, 'eval_f1': 0.7139344239113138, 'eval_precision': 0.7271626950266963, 'eval_recall': 0.7064346310736207, 'eval_runtime': 0.5573, 'eval_samples_per_second': 676.439, 'eval_steps_per_second': 86.125, 'epoch': 9.0}


 67%|██████▋   | 1889/2835 [01:29<00:38, 24.59it/s]
 67%|██████▋   | 1890/2835 [01:30<00:38, 24.59it/s]

{'eval_loss': 0.4735375642776489, 'eval_f1': 0.7235940952026223, 'eval_precision': 0.7414094252785222, 'eval_recall': 0.7101197332185746, 'eval_runtime': 0.5572, 'eval_samples_per_second': 676.635, 'eval_steps_per_second': 86.15, 'epoch': 10.0}


 71%|███████   | 2003/2835 [01:35<00:34, 23.99it/s]

{'loss': 0.0226, 'grad_norm': 0.14429301023483276, 'learning_rate': 1.472663139329806e-05, 'epoch': 10.58}


 73%|███████▎  | 2078/2835 [01:38<00:30, 24.84it/s]
 73%|███████▎  | 2079/2835 [01:39<00:30, 24.84it/s]

{'eval_loss': 0.495787650346756, 'eval_f1': 0.7220193788493361, 'eval_precision': 0.7475492187137757, 'eval_recall': 0.7023540042176677, 'eval_runtime': 0.5274, 'eval_samples_per_second': 714.868, 'eval_steps_per_second': 91.018, 'epoch': 11.0}


 80%|███████▉  | 2267/2835 [01:47<00:23, 23.96it/s]
 80%|████████  | 2268/2835 [01:48<00:23, 23.96it/s]

{'eval_loss': 0.5113682150840759, 'eval_f1': 0.7253296705372749, 'eval_precision': 0.7446883559789963, 'eval_recall': 0.7100041452402165, 'eval_runtime': 0.5332, 'eval_samples_per_second': 707.05, 'eval_steps_per_second': 90.022, 'epoch': 12.0}


 87%|████████▋ | 2456/2835 [01:56<00:15, 23.77it/s]
 87%|████████▋ | 2457/2835 [01:57<00:15, 23.77it/s]

{'eval_loss': 0.5239384770393372, 'eval_f1': 0.729623705033541, 'eval_precision': 0.7498254587876341, 'eval_recall': 0.7141483541275083, 'eval_runtime': 0.5591, 'eval_samples_per_second': 674.291, 'eval_steps_per_second': 85.851, 'epoch': 13.0}


 93%|█████████▎| 2645/2835 [02:05<00:07, 24.52it/s]
 93%|█████████▎| 2646/2835 [02:06<00:07, 24.52it/s]

{'eval_loss': 0.5225372910499573, 'eval_f1': 0.7196628830487098, 'eval_precision': 0.7430148501347823, 'eval_recall': 0.7024868259413986, 'eval_runtime': 0.5303, 'eval_samples_per_second': 710.871, 'eval_steps_per_second': 90.509, 'epoch': 14.0}


100%|█████████▉| 2834/2835 [02:14<00:00, 24.74it/s]
100%|██████████| 2835/2835 [02:16<00:00, 24.74it/s]

{'eval_loss': 0.5190646052360535, 'eval_f1': 0.7263868228307884, 'eval_precision': 0.7482006959839607, 'eval_recall': 0.7092985432006225, 'eval_runtime': 0.5228, 'eval_samples_per_second': 721.091, 'eval_steps_per_second': 91.81, 'epoch': 15.0}


100%|██████████| 2835/2835 [02:17<00:00, 20.67it/s]


{'train_runtime': 137.1774, 'train_samples_per_second': 164.787, 'train_steps_per_second': 20.667, 'train_loss': 0.08565940730786198, 'epoch': 15.0}


100%|██████████| 48/48 [00:00<00:00, 96.30it/s] 


Evaluation Metrics: {'eval_loss': 0.5239384770393372, 'eval_f1': 0.729623705033541, 'eval_precision': 0.7498254587876341, 'eval_recall': 0.7141483541275083, 'eval_runtime': 0.5126, 'eval_samples_per_second': 735.4, 'eval_steps_per_second': 93.632, 'epoch': 15.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  python   
15  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  python   
16  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  python   
17  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  python   
18  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
14             Usage   0.916667  0.727273  0.811060  
15        Parameters   0.834783  0.857143  0.845815  
16  DevelopmentNotes   0.657895  0.625000  0.641026  
17            Expand   0.602941  0.594203  0.598540  
18           Summary   0.736842  0.767123  0.751678  
Scores:   

0,1
eval/f1,▁▄▆▇▇▇▇▇████████
eval/loss,▃▁▂▂▃▄▅▇▆▆▇█████
eval/precision,▁▄██▇▇▇▇▇███████
eval/recall,▁▄▅▇▇█▇█████████
eval/runtime,▂▄▂▂█▆▃▃▅▅▂▃▅▂▂▁
eval/samples_per_second,▇▄▇▇▁▃▆▆▄▄▇▆▄▆▇█
eval/steps_per_second,▇▄▇▇▁▃▆▆▄▄▇▆▄▆▇█
train/epoch,▁▁▂▃▃▃▃▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▃▃▃▃▄▅▅▅▆▆▇▇▇███
train/grad_norm,█▁

0,1
eval/f1,0.72962
eval/loss,0.52394
eval/precision,0.74983
eval/recall,0.71415
eval/runtime,0.5126
eval/samples_per_second,735.4
eval/steps_per_second,93.632
total_flos,748646437651200.0
train/epoch,15.0
train/global_step,2835.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14520.98 examples/s]
  7%|▋         | 130/1950 [00:05<01:14, 24.41it/s]
  7%|▋         | 130/1950 [00:05<01:14, 24.41it/s]

{'eval_loss': 0.2993982434272766, 'eval_f1': 0.19399592297652046, 'eval_precision': 0.2714285714285714, 'eval_recall': 0.1634407801682424, 'eval_runtime': 0.3667, 'eval_samples_per_second': 709.056, 'eval_steps_per_second': 89.996, 'epoch': 1.0}


 13%|█▎        | 259/1950 [00:13<01:07, 25.08it/s]
 13%|█▎        | 260/1950 [00:13<01:07, 25.08it/s]

{'eval_loss': 0.23016348481178284, 'eval_f1': 0.4657413457280005, 'eval_precision': 0.640336991939633, 'eval_recall': 0.3865124365580055, 'eval_runtime': 0.3595, 'eval_samples_per_second': 723.306, 'eval_steps_per_second': 91.804, 'epoch': 2.0}


 20%|█▉        | 388/1950 [00:19<01:04, 24.35it/s]
 20%|██        | 390/1950 [00:20<01:04, 24.35it/s]

{'eval_loss': 0.22965580224990845, 'eval_f1': 0.4750492387344862, 'eval_precision': 0.619920745067537, 'eval_recall': 0.4176007429570345, 'eval_runtime': 0.3605, 'eval_samples_per_second': 721.166, 'eval_steps_per_second': 91.533, 'epoch': 3.0}


 27%|██▋       | 520/1950 [00:26<00:56, 25.48it/s]
 27%|██▋       | 520/1950 [00:26<00:56, 25.48it/s]

{'eval_loss': 0.20161834359169006, 'eval_f1': 0.579001662626963, 'eval_precision': 0.7625486819035207, 'eval_recall': 0.5118980906477877, 'eval_runtime': 0.3608, 'eval_samples_per_second': 720.656, 'eval_steps_per_second': 91.468, 'epoch': 4.0}


 33%|███▎      | 649/1950 [00:33<00:53, 24.26it/s]
 33%|███▎      | 650/1950 [00:33<00:53, 24.26it/s]

{'eval_loss': 0.2001257985830307, 'eval_f1': 0.634394669535294, 'eval_precision': 0.8554890791267714, 'eval_recall': 0.563777332826182, 'eval_runtime': 0.3978, 'eval_samples_per_second': 653.604, 'eval_steps_per_second': 82.957, 'epoch': 5.0}


 40%|███▉      | 778/1950 [00:40<00:46, 25.22it/s]
 40%|████      | 780/1950 [00:40<00:46, 25.22it/s]

{'eval_loss': 0.2330123782157898, 'eval_f1': 0.6010538531934841, 'eval_precision': 0.8244530724279464, 'eval_recall': 0.5443962301251036, 'eval_runtime': 0.3648, 'eval_samples_per_second': 712.796, 'eval_steps_per_second': 90.47, 'epoch': 6.0}


 47%|████▋     | 910/1950 [00:46<00:41, 25.21it/s]
 47%|████▋     | 910/1950 [00:47<00:41, 25.21it/s]

{'eval_loss': 0.24669785797595978, 'eval_f1': 0.6578260505867481, 'eval_precision': 0.8007861010563607, 'eval_recall': 0.5980989768652574, 'eval_runtime': 0.3948, 'eval_samples_per_second': 658.561, 'eval_steps_per_second': 83.587, 'epoch': 7.0}


 51%|█████▏    | 1003/1950 [00:51<00:38, 24.70it/s]

{'loss': 0.1411, 'grad_norm': 1.341634750366211, 'learning_rate': 2.435897435897436e-05, 'epoch': 7.69}


 53%|█████▎    | 1039/1950 [00:53<00:36, 24.95it/s]
 53%|█████▎    | 1040/1950 [00:53<00:36, 24.95it/s]

{'eval_loss': 0.21885287761688232, 'eval_f1': 0.6870112306253909, 'eval_precision': 0.8385758799024104, 'eval_recall': 0.6187518232820608, 'eval_runtime': 0.3608, 'eval_samples_per_second': 720.658, 'eval_steps_per_second': 91.468, 'epoch': 8.0}


 60%|█████▉    | 1168/1950 [00:59<00:32, 24.05it/s]
 60%|██████    | 1170/1950 [01:00<00:32, 24.05it/s]

{'eval_loss': 0.24507392942905426, 'eval_f1': 0.6853676237190307, 'eval_precision': 0.8533765446949337, 'eval_recall': 0.6094538062028887, 'eval_runtime': 0.3658, 'eval_samples_per_second': 710.751, 'eval_steps_per_second': 90.211, 'epoch': 9.0}


 67%|██████▋   | 1300/1950 [01:06<00:25, 25.02it/s]
 67%|██████▋   | 1300/1950 [01:06<00:25, 25.02it/s]

{'eval_loss': 0.23640018701553345, 'eval_f1': 0.6809075254892577, 'eval_precision': 0.8429500921480039, 'eval_recall': 0.6119802855116967, 'eval_runtime': 0.3832, 'eval_samples_per_second': 678.45, 'eval_steps_per_second': 86.111, 'epoch': 10.0}


 73%|███████▎  | 1429/1950 [01:12<00:20, 25.14it/s]
 73%|███████▎  | 1430/1950 [01:13<00:20, 25.14it/s]

{'eval_loss': 0.24507224559783936, 'eval_f1': 0.6856357701217979, 'eval_precision': 0.8214350621609562, 'eval_recall': 0.6261252736101592, 'eval_runtime': 0.3647, 'eval_samples_per_second': 712.831, 'eval_steps_per_second': 90.475, 'epoch': 11.0}


 80%|███████▉  | 1558/1950 [01:19<00:16, 23.69it/s]
 80%|████████  | 1560/1950 [01:19<00:16, 23.69it/s]

{'eval_loss': 0.2451728880405426, 'eval_f1': 0.6875669136354521, 'eval_precision': 0.8311800658615622, 'eval_recall': 0.621640149457275, 'eval_runtime': 0.3964, 'eval_samples_per_second': 655.972, 'eval_steps_per_second': 83.258, 'epoch': 12.0}


 87%|████████▋ | 1690/1950 [01:25<00:10, 25.23it/s]
 87%|████████▋ | 1690/1950 [01:26<00:10, 25.23it/s]

{'eval_loss': 0.25851571559906006, 'eval_f1': 0.6627392306607056, 'eval_precision': 0.8013939228913705, 'eval_recall': 0.5998251255753818, 'eval_runtime': 0.3683, 'eval_samples_per_second': 705.897, 'eval_steps_per_second': 89.595, 'epoch': 13.0}


 93%|█████████▎| 1819/1950 [01:32<00:05, 24.64it/s]
 93%|█████████▎| 1820/1950 [01:32<00:05, 24.64it/s]

{'eval_loss': 0.25303953886032104, 'eval_f1': 0.6890077660945533, 'eval_precision': 0.825014116318464, 'eval_recall': 0.6277191768128979, 'eval_runtime': 0.3706, 'eval_samples_per_second': 701.501, 'eval_steps_per_second': 89.037, 'epoch': 14.0}


100%|█████████▉| 1948/1950 [01:38<00:00, 25.05it/s]
100%|██████████| 1950/1950 [01:40<00:00, 25.05it/s]

{'eval_loss': 0.2553471326828003, 'eval_f1': 0.6826374890591878, 'eval_precision': 0.825943660039318, 'eval_recall': 0.6175150951802449, 'eval_runtime': 0.3585, 'eval_samples_per_second': 725.232, 'eval_steps_per_second': 92.049, 'epoch': 15.0}


100%|██████████| 1950/1950 [01:41<00:00, 19.29it/s]


{'train_runtime': 101.0867, 'train_samples_per_second': 154.026, 'train_steps_per_second': 19.29, 'train_loss': 0.07878094061827048, 'epoch': 15.0}


100%|██████████| 33/33 [00:00<00:00, 95.89it/s] 


Evaluation Metrics: {'eval_loss': 0.25303953886032104, 'eval_f1': 0.6890077660945533, 'eval_precision': 0.825014116318464, 'eval_recall': 0.6277191768128979, 'eval_runtime': 0.3581, 'eval_samples_per_second': 726.078, 'eval_steps_per_second': 92.156, 'epoch': 15.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
13  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
14  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
15  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
16  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
17  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
18  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.818182  0.642857  0.720000  
13                  Example   0.909091  0.891089  0.900000  
14         Responsibilitie

0,1
eval/f1,▁▅▅▆▇▇██████████
eval/loss,█▃▃▁▁▃▄▂▄▄▄▄▅▅▅▅
eval/precision,▁▅▅▇██▇█████▇███
eval/recall,▁▄▅▆▇▇██████████
eval/runtime,▃▁▁▁█▂▇▁▂▅▂█▃▃▁▁
eval/samples_per_second,▆██▇▁▇▁▇▇▃▇▁▆▆██
eval/steps_per_second,▆██▇▁▇▁▇▇▃▇▁▆▆██
train/epoch,▁▁▂▃▃▃▄▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▄▅▅▅▆▇▇▇███
train/grad_norm,▁

0,1
eval/f1,0.68901
eval/loss,0.25304
eval/precision,0.82501
eval/recall,0.62772
eval/runtime,0.3581
eval/samples_per_second,726.078
eval/steps_per_second,92.156
total_flos,515675326855680.0
train/epoch,15.0
train/global_step,1950.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 19395.93 examples/s]
  7%|▋         | 760/11430 [00:31<07:18, 24.31it/s]
  7%|▋         | 762/11430 [00:33<07:18, 24.31it/s]

{'eval_loss': 0.16807402670383453, 'eval_f1': 0.38171117968115553, 'eval_precision': 0.38823987150706935, 'eval_recall': 0.3765697234150977, 'eval_runtime': 2.1217, 'eval_samples_per_second': 717.834, 'eval_steps_per_second': 90.024, 'epoch': 1.0}


  9%|▉         | 1003/11430 [00:43<07:02, 24.69it/s]

{'loss': 0.2561, 'grad_norm': 3.099538803100586, 'learning_rate': 4.5625546806649176e-06, 'epoch': 1.31}


 13%|█▎        | 1522/11430 [01:05<06:46, 24.35it/s]
 13%|█▎        | 1524/11430 [01:07<06:46, 24.35it/s]

{'eval_loss': 0.12503378093242645, 'eval_f1': 0.5304585789164797, 'eval_precision': 0.5340751086011533, 'eval_recall': 0.5274696075611408, 'eval_runtime': 2.1811, 'eval_samples_per_second': 698.27, 'eval_steps_per_second': 87.57, 'epoch': 2.0}


 18%|█▊        | 2002/11430 [01:27<06:33, 23.93it/s]

{'loss': 0.1244, 'grad_norm': 0.3336063623428345, 'learning_rate': 4.125109361329835e-06, 'epoch': 2.62}


 20%|█▉        | 2284/11430 [01:39<06:04, 25.10it/s]
 20%|██        | 2286/11430 [01:41<06:04, 25.10it/s]

{'eval_loss': 0.10198411345481873, 'eval_f1': 0.5363349765614336, 'eval_precision': 0.5908529102319706, 'eval_recall': 0.5264541712535097, 'eval_runtime': 2.223, 'eval_samples_per_second': 685.098, 'eval_steps_per_second': 85.918, 'epoch': 3.0}


 26%|██▋       | 3004/11430 [02:11<05:41, 24.64it/s]

{'loss': 0.0935, 'grad_norm': 0.8775229454040527, 'learning_rate': 3.6876640419947506e-06, 'epoch': 3.94}


 27%|██▋       | 3046/11430 [02:13<05:33, 25.16it/s]
 27%|██▋       | 3048/11430 [02:15<05:33, 25.16it/s]

{'eval_loss': 0.10001513361930847, 'eval_f1': 0.6579502687989834, 'eval_precision': 0.9292534628691496, 'eval_recall': 0.6087446147981117, 'eval_runtime': 2.1059, 'eval_samples_per_second': 723.212, 'eval_steps_per_second': 90.698, 'epoch': 4.0}


 33%|███▎      | 3808/11430 [02:47<05:16, 24.10it/s]
 33%|███▎      | 3810/11430 [02:49<05:16, 24.10it/s]

{'eval_loss': 0.09051789343357086, 'eval_f1': 0.7811163439107337, 'eval_precision': 0.9200690509685263, 'eval_recall': 0.7234517209691719, 'eval_runtime': 2.1645, 'eval_samples_per_second': 703.631, 'eval_steps_per_second': 88.243, 'epoch': 5.0}


 35%|███▌      | 4003/11430 [02:58<05:07, 24.14it/s]

{'loss': 0.0737, 'grad_norm': 3.5871269702911377, 'learning_rate': 3.2502187226596677e-06, 'epoch': 5.25}


 40%|███▉      | 4570/11430 [03:21<04:45, 24.03it/s]
 40%|████      | 4572/11430 [03:23<04:45, 24.03it/s]

{'eval_loss': 0.09147614985704422, 'eval_f1': 0.7681286134357723, 'eval_precision': 0.9396203892605323, 'eval_recall': 0.710165431198833, 'eval_runtime': 2.2955, 'eval_samples_per_second': 663.484, 'eval_steps_per_second': 83.208, 'epoch': 6.0}


 44%|████▍     | 5005/11430 [03:42<04:18, 24.88it/s]

{'loss': 0.0613, 'grad_norm': 5.003730297088623, 'learning_rate': 2.8127734033245845e-06, 'epoch': 6.56}


 47%|████▋     | 5332/11430 [03:55<04:05, 24.87it/s]
 47%|████▋     | 5334/11430 [03:57<04:05, 24.87it/s]

{'eval_loss': 0.08651707321405411, 'eval_f1': 0.8100846468015043, 'eval_precision': 0.9207686470356246, 'eval_recall': 0.7558519010300638, 'eval_runtime': 2.0919, 'eval_samples_per_second': 728.045, 'eval_steps_per_second': 91.304, 'epoch': 7.0}


 53%|█████▎    | 6004/11430 [04:25<03:47, 23.83it/s]

{'loss': 0.0541, 'grad_norm': 0.1250491738319397, 'learning_rate': 2.3753280839895016e-06, 'epoch': 7.87}


 53%|█████▎    | 6094/11430 [04:29<03:38, 24.39it/s]
 53%|█████▎    | 6096/11430 [04:31<03:38, 24.39it/s]

{'eval_loss': 0.08727654814720154, 'eval_f1': 0.8268788017901486, 'eval_precision': 0.9120603100156767, 'eval_recall': 0.7745637234147511, 'eval_runtime': 2.1015, 'eval_samples_per_second': 724.715, 'eval_steps_per_second': 90.887, 'epoch': 8.0}


 60%|█████▉    | 6856/11430 [05:03<03:07, 24.40it/s]
 60%|██████    | 6858/11430 [05:05<03:07, 24.40it/s]

{'eval_loss': 0.08915341645479202, 'eval_f1': 0.8327515580831486, 'eval_precision': 0.8902579260016401, 'eval_recall': 0.7916784819905793, 'eval_runtime': 2.3071, 'eval_samples_per_second': 660.15, 'eval_steps_per_second': 82.79, 'epoch': 9.0}


 61%|██████▏   | 7003/11430 [05:12<02:58, 24.84it/s]

{'loss': 0.045, 'grad_norm': 1.5776854753494263, 'learning_rate': 1.9378827646544183e-06, 'epoch': 9.19}


 67%|██████▋   | 7618/11430 [05:37<02:31, 25.08it/s]
 67%|██████▋   | 7620/11430 [05:39<02:31, 25.08it/s]

{'eval_loss': 0.08784846216440201, 'eval_f1': 0.8409593024406277, 'eval_precision': 0.9119114634225544, 'eval_recall': 0.7969247929654061, 'eval_runtime': 2.0955, 'eval_samples_per_second': 726.785, 'eval_steps_per_second': 91.146, 'epoch': 10.0}


 70%|███████   | 8002/11430 [05:55<02:23, 23.88it/s]

{'loss': 0.038, 'grad_norm': 1.7397581338882446, 'learning_rate': 1.500437445319335e-06, 'epoch': 10.5}


 73%|███████▎  | 8380/11430 [06:11<02:05, 24.35it/s]
 73%|███████▎  | 8382/11430 [06:13<02:05, 24.35it/s]

{'eval_loss': 0.08667561411857605, 'eval_f1': 0.8578459840979088, 'eval_precision': 0.911010783883997, 'eval_recall': 0.818987767040764, 'eval_runtime': 2.0675, 'eval_samples_per_second': 736.639, 'eval_steps_per_second': 92.382, 'epoch': 11.0}


 79%|███████▉  | 9004/11430 [06:39<01:36, 25.15it/s]

{'loss': 0.035, 'grad_norm': 3.197036027908325, 'learning_rate': 1.062992125984252e-06, 'epoch': 11.81}


 80%|███████▉  | 9142/11430 [06:44<01:34, 24.12it/s]
 80%|████████  | 9144/11430 [06:47<01:34, 24.12it/s]

{'eval_loss': 0.09035903960466385, 'eval_f1': 0.8468570350290443, 'eval_precision': 0.8933642243853415, 'eval_recall': 0.8104902040117316, 'eval_runtime': 2.2296, 'eval_samples_per_second': 683.091, 'eval_steps_per_second': 85.667, 'epoch': 12.0}


 87%|████████▋ | 9904/11430 [07:18<01:00, 25.08it/s]
 87%|████████▋ | 9906/11430 [07:20<01:00, 25.08it/s]

{'eval_loss': 0.0902199000120163, 'eval_f1': 0.8566485983093507, 'eval_precision': 0.9090741012432488, 'eval_recall': 0.8180372746263175, 'eval_runtime': 2.1163, 'eval_samples_per_second': 719.66, 'eval_steps_per_second': 90.253, 'epoch': 13.0}


 88%|████████▊ | 10003/11430 [07:25<00:57, 24.70it/s]

{'loss': 0.0309, 'grad_norm': 0.07816463708877563, 'learning_rate': 6.25546806649169e-07, 'epoch': 13.12}


 93%|█████████▎| 10666/11430 [07:52<00:30, 25.03it/s]
 93%|█████████▎| 10668/11430 [07:54<00:30, 25.03it/s]

{'eval_loss': 0.08909045159816742, 'eval_f1': 0.8544013924904865, 'eval_precision': 0.9071194779233357, 'eval_recall': 0.8156475366309415, 'eval_runtime': 2.0783, 'eval_samples_per_second': 732.814, 'eval_steps_per_second': 91.902, 'epoch': 14.0}


 96%|█████████▋| 11002/11430 [08:08<00:17, 24.56it/s]

{'loss': 0.03, 'grad_norm': 0.028124937787652016, 'learning_rate': 1.8810148731408576e-07, 'epoch': 14.44}


100%|█████████▉| 11428/11430 [08:26<00:00, 23.49it/s]
100%|██████████| 11430/11430 [08:29<00:00, 23.49it/s]

{'eval_loss': 0.08876147121191025, 'eval_f1': 0.8531859021532104, 'eval_precision': 0.9062776892794709, 'eval_recall': 0.8135136507307345, 'eval_runtime': 2.2253, 'eval_samples_per_second': 684.402, 'eval_steps_per_second': 85.831, 'epoch': 15.0}


100%|██████████| 11430/11430 [08:30<00:00, 22.39it/s]


{'train_runtime': 510.4632, 'train_samples_per_second': 178.984, 'train_steps_per_second': 22.391, 'train_loss': 0.0746552111074159, 'epoch': 15.0}


100%|██████████| 191/191 [00:02<00:00, 92.68it/s]


Evaluation Metrics: {'eval_loss': 0.08667561411857605, 'eval_f1': 0.8578459840979088, 'eval_precision': 0.911010783883997, 'eval_recall': 0.818987767040764, 'eval_runtime': 2.0742, 'eval_samples_per_second': 734.241, 'eval_steps_per_second': 92.081, 'epoch': 15.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  java      summary   
13  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  java    Ownership   
14  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  java       Expand   
15  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  java        usage   
16  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  java      Pointer   
17  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  java  deprecation   
18  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  java     rational   

    precision    recall        f1  
12   0.951567  0.950213  0.950890  
13   0.964912  1.000000  0.982143  
14   0

0,1
eval/f1,▁▃▃▅▇▇▇█████████
eval/loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁
eval/precision,▁▃▄█████▇██▇████
eval/recall,▁▃▃▅▆▆▇▇████████
eval/runtime,▃▄▆▂▄█▂▂█▂▁▆▂▁▆▁
eval/samples_per_second,▆▄▃▇▅▁▇▇▁▇█▃▆█▃█
eval/steps_per_second,▆▄▃▇▅▁▇▇▁▇█▃▆█▃█
train/epoch,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/grad_norm,▅▁▂▆█▁▃▃▅▁▁

0,1
eval/f1,0.85785
eval/loss,0.08668
eval/precision,0.91101
eval/recall,0.81899
eval/runtime,2.0742
eval/samples_per_second,734.241
eval/steps_per_second,92.081
total_flos,3025990766741760.0
train/epoch,15.0
train/global_step,11430.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16990.77 examples/s]
  7%|▋         | 188/2835 [00:07<01:47, 24.63it/s]
  7%|▋         | 189/2835 [00:08<01:47, 24.63it/s]

{'eval_loss': 0.4913784861564636, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.5237, 'eval_samples_per_second': 719.874, 'eval_steps_per_second': 91.655, 'epoch': 1.0}


 13%|█▎        | 377/2835 [00:17<01:40, 24.53it/s]
 13%|█▎        | 378/2835 [00:17<01:40, 24.53it/s]

{'eval_loss': 0.44751712679862976, 'eval_f1': 0.09876543209876543, 'eval_precision': 0.1951219512195122, 'eval_recall': 0.06611570247933884, 'eval_runtime': 0.5767, 'eval_samples_per_second': 653.708, 'eval_steps_per_second': 83.231, 'epoch': 2.0}


 20%|█▉        | 566/2835 [00:26<01:30, 25.04it/s]
 20%|██        | 567/2835 [00:26<01:30, 25.04it/s]

{'eval_loss': 0.40828755497932434, 'eval_f1': 0.19903474133460972, 'eval_precision': 0.3823225806451613, 'eval_recall': 0.14037780401416763, 'eval_runtime': 0.5451, 'eval_samples_per_second': 691.613, 'eval_steps_per_second': 88.057, 'epoch': 3.0}


 27%|██▋       | 755/2835 [00:35<01:22, 25.21it/s]
 27%|██▋       | 756/2835 [00:36<01:22, 25.21it/s]

{'eval_loss': 0.3825637996196747, 'eval_f1': 0.2859602560682849, 'eval_precision': 0.5833800186741362, 'eval_recall': 0.20598445763451992, 'eval_runtime': 0.5239, 'eval_samples_per_second': 719.616, 'eval_steps_per_second': 91.622, 'epoch': 4.0}


 33%|███▎      | 944/2835 [00:44<01:18, 24.19it/s]
 33%|███▎      | 945/2835 [00:45<01:18, 24.19it/s]

{'eval_loss': 0.35902395844459534, 'eval_f1': 0.3608580008580009, 'eval_precision': 0.5488687086347545, 'eval_recall': 0.28826660574792584, 'eval_runtime': 0.565, 'eval_samples_per_second': 667.272, 'eval_steps_per_second': 84.958, 'epoch': 5.0}


 35%|███▌      | 1004/2835 [00:48<01:14, 24.57it/s]

{'loss': 0.428, 'grad_norm': 2.584009885787964, 'learning_rate': 3.2363315696649034e-06, 'epoch': 5.29}


 40%|███▉      | 1133/2835 [00:53<01:09, 24.39it/s]
 40%|████      | 1134/2835 [00:54<01:09, 24.39it/s]

{'eval_loss': 0.3495561480522156, 'eval_f1': 0.440107127814337, 'eval_precision': 0.7343965975544923, 'eval_recall': 0.3648285743350447, 'eval_runtime': 0.5535, 'eval_samples_per_second': 681.112, 'eval_steps_per_second': 86.72, 'epoch': 6.0}


 47%|████▋     | 1322/2835 [01:02<00:59, 25.36it/s]
 47%|████▋     | 1323/2835 [01:03<00:59, 25.36it/s]

{'eval_loss': 0.33787477016448975, 'eval_f1': 0.4540411020149988, 'eval_precision': 0.6637043189368771, 'eval_recall': 0.3900011461817732, 'eval_runtime': 0.5303, 'eval_samples_per_second': 710.96, 'eval_steps_per_second': 90.52, 'epoch': 7.0}


 53%|█████▎    | 1511/2835 [01:11<00:52, 25.07it/s]
 53%|█████▎    | 1512/2835 [01:12<00:52, 25.07it/s]

{'eval_loss': 0.32847729325294495, 'eval_f1': 0.47331387272943637, 'eval_precision': 0.6471563088512241, 'eval_recall': 0.4063952020690339, 'eval_runtime': 0.5456, 'eval_samples_per_second': 690.936, 'eval_steps_per_second': 87.971, 'epoch': 8.0}


 60%|█████▉    | 1700/2835 [01:20<00:48, 23.52it/s]
 60%|██████    | 1701/2835 [01:21<00:48, 23.52it/s]

{'eval_loss': 0.32482001185417175, 'eval_f1': 0.4901235134645575, 'eval_precision': 0.6404715821812595, 'eval_recall': 0.418310845668851, 'eval_runtime': 0.5808, 'eval_samples_per_second': 649.091, 'eval_steps_per_second': 82.643, 'epoch': 9.0}


 67%|██████▋   | 1889/2835 [01:29<00:39, 24.16it/s]
 67%|██████▋   | 1890/2835 [01:30<00:39, 24.16it/s]

{'eval_loss': 0.32176655530929565, 'eval_f1': 0.5055070069792185, 'eval_precision': 0.6504037629037629, 'eval_recall': 0.44852465427618393, 'eval_runtime': 0.5756, 'eval_samples_per_second': 654.962, 'eval_steps_per_second': 83.39, 'epoch': 10.0}


 71%|███████   | 2003/2835 [01:36<00:34, 24.24it/s]

{'loss': 0.2753, 'grad_norm': 3.605976104736328, 'learning_rate': 1.472663139329806e-06, 'epoch': 10.58}


 73%|███████▎  | 2078/2835 [01:39<00:30, 24.84it/s]
 73%|███████▎  | 2079/2835 [01:39<00:30, 24.84it/s]

{'eval_loss': 0.3207177519798279, 'eval_f1': 0.5155886753028128, 'eval_precision': 0.6386438024960519, 'eval_recall': 0.46581194672997156, 'eval_runtime': 0.5376, 'eval_samples_per_second': 701.288, 'eval_steps_per_second': 89.289, 'epoch': 11.0}


 80%|███████▉  | 2267/2835 [01:48<00:23, 24.23it/s]
 80%|████████  | 2268/2835 [01:48<00:23, 24.23it/s]

{'eval_loss': 0.3159741461277008, 'eval_f1': 0.5211238787982975, 'eval_precision': 0.64950182943991, 'eval_recall': 0.4661123623790796, 'eval_runtime': 0.5174, 'eval_samples_per_second': 728.6, 'eval_steps_per_second': 92.766, 'epoch': 12.0}


 87%|████████▋ | 2456/2835 [01:57<00:15, 23.86it/s]
 87%|████████▋ | 2457/2835 [01:57<00:15, 23.86it/s]

{'eval_loss': 0.3148028552532196, 'eval_f1': 0.5161563717818897, 'eval_precision': 0.6235095822052343, 'eval_recall': 0.46238883032970424, 'eval_runtime': 0.6074, 'eval_samples_per_second': 620.692, 'eval_steps_per_second': 79.027, 'epoch': 13.0}


 93%|█████████▎| 2645/2835 [02:06<00:07, 25.06it/s]
 93%|█████████▎| 2646/2835 [02:07<00:07, 25.06it/s]

{'eval_loss': 0.3126058578491211, 'eval_f1': 0.5213171321316087, 'eval_precision': 0.642767623564762, 'eval_recall': 0.46843434148106844, 'eval_runtime': 0.524, 'eval_samples_per_second': 719.4, 'eval_steps_per_second': 91.595, 'epoch': 14.0}


100%|█████████▉| 2834/2835 [02:15<00:00, 24.97it/s]
100%|██████████| 2835/2835 [02:16<00:00, 24.97it/s]

{'eval_loss': 0.3116725981235504, 'eval_f1': 0.5229177933777438, 'eval_precision': 0.6429998514557338, 'eval_recall': 0.4705204714158907, 'eval_runtime': 0.5179, 'eval_samples_per_second': 727.981, 'eval_steps_per_second': 92.687, 'epoch': 15.0}


100%|██████████| 2835/2835 [02:18<00:00, 20.54it/s]


{'train_runtime': 138.0206, 'train_samples_per_second': 163.78, 'train_steps_per_second': 20.54, 'train_loss': 0.31311298888406636, 'epoch': 15.0}


100%|██████████| 48/48 [00:00<00:00, 95.15it/s] 


Evaluation Metrics: {'eval_loss': 0.3116725981235504, 'eval_f1': 0.5229177933777438, 'eval_precision': 0.6429998514557338, 'eval_recall': 0.4705204714158907, 'eval_runtime': 0.5182, 'eval_samples_per_second': 727.505, 'eval_steps_per_second': 92.627, 'epoch': 15.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  python   
15  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  python   
16  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  python   
17  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  python   
18  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
14             Usage   0.858586  0.702479  0.772727  
15        Parameters   0.852941  0.776786  0.813084  
16  DevelopmentNotes   0.000000  0.000000  0.000000  
17            Expand   0.722222  0.188406  0.298851  
18           Summary   0.781250  0.684932  0.729927  
Scores:

0,1
eval/f1,▁▂▄▅▆▇▇▇████████
eval/loss,█▆▅▄▃▂▂▂▂▁▁▁▁▁▁▁
eval/precision,▁▃▅▇▆█▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▂▃▄▅▆▇▇▇███████
eval/runtime,▁▆▃▂▅▄▂▃▆▆▃▁█▂▁▁
eval/samples_per_second,▇▃▆▇▄▅▇▆▃▃▆█▁▇██
eval/steps_per_second,▇▃▆▇▄▅▇▆▃▃▆█▁▇██
train/epoch,▁▁▂▃▃▃▃▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▃▃▃▃▄▅▅▅▆▆▇▇▇███
train/grad_norm,▁█

0,1
eval/f1,0.52292
eval/loss,0.31167
eval/precision,0.643
eval/recall,0.47052
eval/runtime,0.5182
eval/samples_per_second,727.505
eval/steps_per_second,92.627
total_flos,748646437651200.0
train/epoch,15.0
train/global_step,2835.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 11513.46 examples/s]
  7%|▋         | 128/1950 [00:05<01:15, 24.22it/s]
  7%|▋         | 130/1950 [00:05<01:15, 24.22it/s]

{'eval_loss': 0.43564853072166443, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.381, 'eval_samples_per_second': 682.489, 'eval_steps_per_second': 86.624, 'epoch': 1.0}


 13%|█▎        | 260/1950 [00:12<01:06, 25.59it/s]
 13%|█▎        | 260/1950 [00:12<01:06, 25.59it/s]

{'eval_loss': 0.39387354254722595, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.3609, 'eval_samples_per_second': 720.425, 'eval_steps_per_second': 91.439, 'epoch': 2.0}


 20%|█▉        | 389/1950 [00:18<01:03, 24.52it/s]
 20%|██        | 390/1950 [00:19<01:03, 24.52it/s]

{'eval_loss': 0.3559569716453552, 'eval_f1': 0.09463148316651501, 'eval_precision': 0.1326530612244898, 'eval_recall': 0.07355021216407355, 'eval_runtime': 0.3851, 'eval_samples_per_second': 675.175, 'eval_steps_per_second': 85.695, 'epoch': 3.0}


 27%|██▋       | 518/1950 [00:25<00:56, 25.27it/s]
 27%|██▋       | 520/1950 [00:25<00:56, 25.27it/s]

{'eval_loss': 0.32405221462249756, 'eval_f1': 0.11187607573149741, 'eval_precision': 0.14285714285714285, 'eval_recall': 0.09193776520509193, 'eval_runtime': 0.3793, 'eval_samples_per_second': 685.429, 'eval_steps_per_second': 86.997, 'epoch': 4.0}


 33%|███▎      | 650/1950 [00:31<00:51, 25.08it/s]
 33%|███▎      | 650/1950 [00:32<00:51, 25.08it/s]

{'eval_loss': 0.30009925365448, 'eval_f1': 0.12470402525651146, 'eval_precision': 0.14107142857142857, 'eval_recall': 0.11173974540311174, 'eval_runtime': 0.3788, 'eval_samples_per_second': 686.446, 'eval_steps_per_second': 87.126, 'epoch': 5.0}


 40%|███▉      | 779/1950 [00:38<00:47, 24.87it/s]
 40%|████      | 780/1950 [00:38<00:47, 24.87it/s]

{'eval_loss': 0.2819616198539734, 'eval_f1': 0.1324166030048383, 'eval_precision': 0.2807308970099668, 'eval_recall': 0.12025459688826025, 'eval_runtime': 0.3641, 'eval_samples_per_second': 714.039, 'eval_steps_per_second': 90.628, 'epoch': 6.0}


 47%|████▋     | 908/1950 [00:45<00:41, 25.14it/s]
 47%|████▋     | 910/1950 [00:45<00:41, 25.14it/s]

{'eval_loss': 0.26720282435417175, 'eval_f1': 0.26206244735656503, 'eval_precision': 0.5378737541528239, 'eval_recall': 0.2004336158642717, 'eval_runtime': 0.3654, 'eval_samples_per_second': 711.606, 'eval_steps_per_second': 90.319, 'epoch': 7.0}


 51%|█████▏    | 1004/1950 [00:50<00:39, 24.25it/s]

{'loss': 0.3407, 'grad_norm': 2.1007895469665527, 'learning_rate': 2.435897435897436e-06, 'epoch': 7.69}


 53%|█████▎    | 1040/1950 [00:51<00:36, 24.71it/s]
 53%|█████▎    | 1040/1950 [00:52<00:36, 24.71it/s]

{'eval_loss': 0.2571287155151367, 'eval_f1': 0.3202944789775749, 'eval_precision': 0.6649350649350649, 'eval_recall': 0.23921867324424695, 'eval_runtime': 0.3758, 'eval_samples_per_second': 691.935, 'eval_steps_per_second': 87.822, 'epoch': 8.0}


 60%|█████▉    | 1169/1950 [00:58<00:30, 25.26it/s]
 60%|██████    | 1170/1950 [00:58<00:30, 25.26it/s]

{'eval_loss': 0.24900488555431366, 'eval_f1': 0.3801034384028178, 'eval_precision': 0.6741041244083841, 'eval_recall': 0.2844623807856938, 'eval_runtime': 0.3703, 'eval_samples_per_second': 702.179, 'eval_steps_per_second': 89.123, 'epoch': 9.0}


 67%|██████▋   | 1298/1950 [01:04<00:26, 24.41it/s]
 67%|██████▋   | 1300/1950 [01:05<00:26, 24.41it/s]

{'eval_loss': 0.24336445331573486, 'eval_f1': 0.384923602007549, 'eval_precision': 0.6580453149001536, 'eval_recall': 0.29352434473284844, 'eval_runtime': 0.3594, 'eval_samples_per_second': 723.403, 'eval_steps_per_second': 91.817, 'epoch': 10.0}


 73%|███████▎  | 1430/1950 [01:11<00:20, 25.39it/s]
 73%|███████▎  | 1430/1950 [01:11<00:20, 25.39it/s]

{'eval_loss': 0.23837818205356598, 'eval_f1': 0.4073644322091527, 'eval_precision': 0.6488827588451649, 'eval_recall': 0.3121769559774727, 'eval_runtime': 0.364, 'eval_samples_per_second': 714.234, 'eval_steps_per_second': 90.653, 'epoch': 11.0}


 80%|███████▉  | 1559/1950 [01:18<00:15, 24.52it/s]
 80%|████████  | 1560/1950 [01:18<00:15, 24.52it/s]

{'eval_loss': 0.2337077409029007, 'eval_f1': 0.4087880473325217, 'eval_precision': 0.6399969078540507, 'eval_recall': 0.31539213678663847, 'eval_runtime': 0.4168, 'eval_samples_per_second': 623.848, 'eval_steps_per_second': 79.181, 'epoch': 12.0}


 87%|████████▋ | 1688/1950 [01:24<00:10, 24.92it/s]
 87%|████████▋ | 1690/1950 [01:25<00:10, 24.92it/s]

{'eval_loss': 0.2311081886291504, 'eval_f1': 0.43551491916299606, 'eval_precision': 0.6418790604697652, 'eval_recall': 0.34133130080610957, 'eval_runtime': 0.3589, 'eval_samples_per_second': 724.446, 'eval_steps_per_second': 91.949, 'epoch': 13.0}


 93%|█████████▎| 1820/1950 [01:31<00:05, 24.83it/s]
 93%|█████████▎| 1820/1950 [01:32<00:05, 24.83it/s]

{'eval_loss': 0.229455828666687, 'eval_f1': 0.444186369478282, 'eval_precision': 0.6459593280282936, 'eval_recall': 0.35008510019820677, 'eval_runtime': 0.3708, 'eval_samples_per_second': 701.242, 'eval_steps_per_second': 89.004, 'epoch': 14.0}


100%|█████████▉| 1949/1950 [01:38<00:00, 25.12it/s]
100%|██████████| 1950/1950 [01:39<00:00, 25.12it/s]

{'eval_loss': 0.22885540127754211, 'eval_f1': 0.4354734524338979, 'eval_precision': 0.6398264656691622, 'eval_recall': 0.34292809878377967, 'eval_runtime': 0.3575, 'eval_samples_per_second': 727.201, 'eval_steps_per_second': 92.299, 'epoch': 15.0}


100%|██████████| 1950/1950 [01:40<00:00, 19.37it/s]


{'train_runtime': 100.6797, 'train_samples_per_second': 154.649, 'train_steps_per_second': 19.368, 'train_loss': 0.273633555876903, 'epoch': 15.0}


100%|██████████| 33/33 [00:00<00:00, 92.64it/s] 


Evaluation Metrics: {'eval_loss': 0.229455828666687, 'eval_f1': 0.444186369478282, 'eval_precision': 0.6459593280282936, 'eval_recall': 0.35008510019820677, 'eval_runtime': 0.3707, 'eval_samples_per_second': 701.38, 'eval_steps_per_second': 89.021, 'epoch': 15.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
13  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
14  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
15  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
16  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
17  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   
18  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   1.000000  0.380952  0.551724  
13                  Example   0.954023  0.821782  0.882979  
14         Responsibilities 

0,1
eval/f1,▁▁▂▃▃▃▅▆▇▇▇▇████
eval/loss,█▇▅▄▃▃▂▂▂▁▁▁▁▁▁▁
eval/precision,▁▁▂▂▂▄▇█████████
eval/recall,▁▁▂▃▃▃▅▆▇▇▇▇████
eval/runtime,▄▁▄▄▄▂▂▃▃▁▂█▁▃▁▃
eval/samples_per_second,▅█▄▅▅▇▇▆▆█▇▁█▆█▆
eval/steps_per_second,▅█▄▅▅▇▇▆▆█▇▁█▆█▆
train/epoch,▁▁▂▃▃▃▄▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▄▅▅▅▆▇▇▇███
train/grad_norm,▁

0,1
eval/f1,0.44419
eval/loss,0.22946
eval/precision,0.64596
eval/recall,0.35009
eval/runtime,0.3707
eval/samples_per_second,701.38
eval/steps_per_second,89.021
total_flos,515675326855680.0
train/epoch,15.0
train/global_step,1950.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 19604.63 examples/s]
  3%|▎         | 1005/30460 [00:35<17:04, 28.76it/s]

{'loss': 0.161, 'grad_norm': 0.3549679219722748, 'learning_rate': 4.8358502954694684e-05, 'epoch': 0.66}


  5%|▍         | 1520/30460 [00:53<18:11, 26.50it/s]
  5%|▌         | 1523/30460 [00:57<18:11, 26.50it/s]

{'eval_loss': 0.11682207137346268, 'eval_f1': 0.6615796605353623, 'eval_precision': 0.6692466938531052, 'eval_recall': 0.6548023592809471, 'eval_runtime': 4.1273, 'eval_samples_per_second': 369.003, 'eval_steps_per_second': 92.311, 'epoch': 1.0}


  7%|▋         | 2005/30460 [01:14<16:04, 29.51it/s]  

{'loss': 0.1034, 'grad_norm': 0.4843895137310028, 'learning_rate': 4.6717005909389365e-05, 'epoch': 1.31}


 10%|▉         | 3002/30460 [01:49<16:59, 26.93it/s]

{'loss': 0.0897, 'grad_norm': 0.6416785717010498, 'learning_rate': 4.507550886408405e-05, 'epoch': 1.97}


 10%|▉         | 3044/30460 [01:51<16:16, 28.08it/s]
 10%|█         | 3046/30460 [01:54<16:16, 28.08it/s]

{'eval_loss': 0.09182408452033997, 'eval_f1': 0.8000248945307027, 'eval_precision': 0.8477872240401124, 'eval_recall': 0.7682586696624348, 'eval_runtime': 3.6424, 'eval_samples_per_second': 418.136, 'eval_steps_per_second': 104.603, 'epoch': 2.0}


 13%|█▎        | 4004/30460 [02:28<14:59, 29.40it/s]  

{'loss': 0.059, 'grad_norm': 0.010915370658040047, 'learning_rate': 4.343401181877873e-05, 'epoch': 2.63}


 15%|█▍        | 4567/30460 [02:48<14:52, 29.02it/s]
 15%|█▌        | 4569/30460 [02:52<14:52, 29.02it/s]

{'eval_loss': 0.09842108190059662, 'eval_f1': 0.8285205196377552, 'eval_precision': 0.8690846526889985, 'eval_recall': 0.7950447941217705, 'eval_runtime': 3.4843, 'eval_samples_per_second': 437.107, 'eval_steps_per_second': 109.349, 'epoch': 3.0}


 16%|█▋        | 5004/30460 [03:08<15:06, 28.08it/s]  

{'loss': 0.054, 'grad_norm': 1.148534893989563, 'learning_rate': 4.179251477347341e-05, 'epoch': 3.28}


 20%|█▉        | 6004/30460 [03:43<13:47, 29.56it/s]

{'loss': 0.0456, 'grad_norm': 0.22328919172286987, 'learning_rate': 4.015101772816809e-05, 'epoch': 3.94}


 20%|██        | 6092/30460 [03:45<13:42, 29.64it/s]
 20%|██        | 6092/30460 [03:49<13:42, 29.64it/s]

{'eval_loss': 0.10673418641090393, 'eval_f1': 0.8311518360801647, 'eval_precision': 0.8762496881836185, 'eval_recall': 0.8000285425568584, 'eval_runtime': 3.5983, 'eval_samples_per_second': 423.257, 'eval_steps_per_second': 105.884, 'epoch': 4.0}


 23%|██▎       | 7006/30460 [04:22<12:56, 30.21it/s]  

{'loss': 0.0325, 'grad_norm': 0.009296189062297344, 'learning_rate': 3.850952068286277e-05, 'epoch': 4.6}


 25%|██▍       | 7614/30460 [04:43<13:52, 27.45it/s]
 25%|██▌       | 7615/30460 [04:47<13:52, 27.45it/s]

{'eval_loss': 0.12154172360897064, 'eval_f1': 0.8555570713038405, 'eval_precision': 0.9053942718854836, 'eval_recall': 0.8189238007171589, 'eval_runtime': 3.6927, 'eval_samples_per_second': 412.435, 'eval_steps_per_second': 103.177, 'epoch': 5.0}


 26%|██▋       | 8003/30460 [05:02<12:36, 29.70it/s]  

{'loss': 0.026, 'grad_norm': 0.007011891808360815, 'learning_rate': 3.6868023637557454e-05, 'epoch': 5.25}


 30%|██▉       | 9004/30460 [05:36<13:02, 27.42it/s]

{'loss': 0.0216, 'grad_norm': 0.36752161383628845, 'learning_rate': 3.5226526592252135e-05, 'epoch': 5.91}


 30%|██▉       | 9135/30460 [05:41<11:46, 30.18it/s]
 30%|███       | 9138/30460 [05:44<11:46, 30.18it/s]

{'eval_loss': 0.1513126641511917, 'eval_f1': 0.8277630238705801, 'eval_precision': 0.9097971674479668, 'eval_recall': 0.7800824366964783, 'eval_runtime': 3.2634, 'eval_samples_per_second': 466.688, 'eval_steps_per_second': 116.749, 'epoch': 6.0}


 33%|███▎      | 10002/30460 [06:15<12:14, 27.85it/s] 

{'loss': 0.0146, 'grad_norm': 11.401954650878906, 'learning_rate': 3.3585029546946817e-05, 'epoch': 6.57}


 35%|███▍      | 10660/30460 [06:38<11:41, 28.22it/s]
 35%|███▌      | 10661/30460 [06:42<11:41, 28.22it/s]

{'eval_loss': 0.12660424411296844, 'eval_f1': 0.8674833096036192, 'eval_precision': 0.8938749017510798, 'eval_recall': 0.8467066278961747, 'eval_runtime': 3.7231, 'eval_samples_per_second': 409.07, 'eval_steps_per_second': 102.335, 'epoch': 7.0}


 36%|███▌      | 11005/30460 [06:55<10:45, 30.15it/s]  

{'loss': 0.0191, 'grad_norm': 0.0071038855239748955, 'learning_rate': 3.19435325016415e-05, 'epoch': 7.22}


 39%|███▉      | 12004/30460 [07:29<10:14, 30.04it/s]

{'loss': 0.0126, 'grad_norm': 0.0012351435143500566, 'learning_rate': 3.030203545633618e-05, 'epoch': 7.88}


 40%|████      | 12184/30460 [07:35<10:38, 28.62it/s]
 40%|████      | 12184/30460 [07:39<10:38, 28.62it/s]

{'eval_loss': 0.15498292446136475, 'eval_f1': 0.8515446889332493, 'eval_precision': 0.8414897334810432, 'eval_recall': 0.8746511673573575, 'eval_runtime': 4.1623, 'eval_samples_per_second': 365.901, 'eval_steps_per_second': 91.535, 'epoch': 8.0}


 43%|████▎     | 13004/30460 [08:09<09:37, 30.25it/s]  

{'loss': 0.0084, 'grad_norm': 0.007071142550557852, 'learning_rate': 2.8660538411030864e-05, 'epoch': 8.54}


 45%|████▌     | 13707/30460 [08:33<10:12, 27.37it/s]
 45%|████▌     | 13707/30460 [08:37<10:12, 27.37it/s]

{'eval_loss': 0.16701507568359375, 'eval_f1': 0.853066315316073, 'eval_precision': 0.8527149263202617, 'eval_recall': 0.8552832453684329, 'eval_runtime': 3.7943, 'eval_samples_per_second': 401.396, 'eval_steps_per_second': 100.415, 'epoch': 9.0}


 46%|████▌     | 14005/30460 [08:48<09:19, 29.41it/s]  

{'loss': 0.0115, 'grad_norm': 0.007011132314801216, 'learning_rate': 2.7019041365725546e-05, 'epoch': 9.19}


 49%|████▉     | 15005/30460 [09:23<09:14, 27.89it/s]

{'loss': 0.0063, 'grad_norm': 0.0014107017777860165, 'learning_rate': 2.5377544320420227e-05, 'epoch': 9.85}


 50%|█████     | 15230/30460 [09:31<08:36, 29.51it/s]
 50%|█████     | 15230/30460 [09:34<08:36, 29.51it/s]

{'eval_loss': 0.17314893007278442, 'eval_f1': 0.8535335918734788, 'eval_precision': 0.8712905955629004, 'eval_recall': 0.8382036528989589, 'eval_runtime': 3.5491, 'eval_samples_per_second': 429.122, 'eval_steps_per_second': 107.351, 'epoch': 10.0}


 53%|█████▎    | 16004/30460 [10:02<08:55, 27.02it/s]  

{'loss': 0.0082, 'grad_norm': 0.015803273767232895, 'learning_rate': 2.3736047275114905e-05, 'epoch': 10.51}


 55%|█████▍    | 16751/30460 [10:28<08:07, 28.09it/s]
 55%|█████▌    | 16753/30460 [10:31<08:07, 28.09it/s]

{'eval_loss': 0.16472375392913818, 'eval_f1': 0.8603173381927895, 'eval_precision': 0.8935534240758518, 'eval_recall': 0.8354909572627781, 'eval_runtime': 3.4437, 'eval_samples_per_second': 442.258, 'eval_steps_per_second': 110.637, 'epoch': 11.0}


 56%|█████▌    | 17006/30460 [10:41<08:07, 27.62it/s]  

{'loss': 0.0058, 'grad_norm': 0.0004934301250614226, 'learning_rate': 2.2094550229809586e-05, 'epoch': 11.16}


 59%|█████▉    | 18004/30460 [11:16<07:06, 29.23it/s]

{'loss': 0.0054, 'grad_norm': 0.0005783144733868539, 'learning_rate': 2.045305318450427e-05, 'epoch': 11.82}


 60%|█████▉    | 18275/30460 [11:25<06:53, 29.48it/s]
 60%|██████    | 18276/30460 [11:28<06:53, 29.48it/s]

{'eval_loss': 0.182911217212677, 'eval_f1': 0.8514224593178353, 'eval_precision': 0.876753794171221, 'eval_recall': 0.8302699022311353, 'eval_runtime': 3.5257, 'eval_samples_per_second': 431.973, 'eval_steps_per_second': 108.064, 'epoch': 12.0}


 62%|██████▏   | 19003/30460 [11:55<06:44, 28.33it/s]  

{'loss': 0.0034, 'grad_norm': 0.0008297091699205339, 'learning_rate': 1.881155613919895e-05, 'epoch': 12.48}


 65%|██████▍   | 19798/30460 [12:22<06:21, 27.95it/s]
 65%|██████▌   | 19799/30460 [12:26<06:21, 27.95it/s]

{'eval_loss': 0.16999013721942902, 'eval_f1': 0.8626620898191274, 'eval_precision': 0.8806579204740104, 'eval_recall': 0.8474691357428893, 'eval_runtime': 3.3609, 'eval_samples_per_second': 453.149, 'eval_steps_per_second': 113.362, 'epoch': 13.0}


 66%|██████▌   | 20006/30460 [12:34<06:18, 27.62it/s]  

{'loss': 0.0032, 'grad_norm': 0.0006091349641792476, 'learning_rate': 1.717005909389363e-05, 'epoch': 13.13}


 69%|██████▉   | 21004/30460 [13:08<05:24, 29.13it/s]

{'loss': 0.0027, 'grad_norm': 0.00039557606214657426, 'learning_rate': 1.5528562048588312e-05, 'epoch': 13.79}


 70%|██████▉   | 21320/30460 [13:19<05:03, 30.10it/s]
 70%|███████   | 21322/30460 [13:23<05:03, 30.10it/s]

{'eval_loss': 0.1829359233379364, 'eval_f1': 0.8568449369542194, 'eval_precision': 0.8705921443816713, 'eval_recall': 0.846902581237139, 'eval_runtime': 3.5963, 'eval_samples_per_second': 423.487, 'eval_steps_per_second': 105.941, 'epoch': 14.0}


 72%|███████▏  | 22004/30460 [13:47<04:52, 28.90it/s]

{'loss': 0.002, 'grad_norm': 0.009894183836877346, 'learning_rate': 1.3887065003282995e-05, 'epoch': 14.45}


 75%|███████▍  | 22843/30460 [14:16<04:36, 27.56it/s]
 75%|███████▌  | 22845/30460 [14:20<04:36, 27.56it/s]

{'eval_loss': 0.20743593573570251, 'eval_f1': 0.8641431684337822, 'eval_precision': 0.9055447498250091, 'eval_recall': 0.8350583127403696, 'eval_runtime': 4.1889, 'eval_samples_per_second': 363.583, 'eval_steps_per_second': 90.955, 'epoch': 15.0}


 76%|███████▌  | 23005/30460 [14:27<04:15, 29.15it/s]  

{'loss': 0.0028, 'grad_norm': 0.0003520978498272598, 'learning_rate': 1.2245567957977677e-05, 'epoch': 15.1}


 79%|███████▉  | 24004/30460 [15:01<03:57, 27.19it/s]

{'loss': 0.002, 'grad_norm': 0.0011449467856436968, 'learning_rate': 1.0604070912672358e-05, 'epoch': 15.76}


 80%|███████▉  | 24366/30460 [15:14<03:50, 26.49it/s]
 80%|████████  | 24368/30460 [15:18<03:49, 26.49it/s]

{'eval_loss': 0.2014881670475006, 'eval_f1': 0.8621658766891406, 'eval_precision': 0.902527808111828, 'eval_recall': 0.8327947242182959, 'eval_runtime': 3.7748, 'eval_samples_per_second': 403.462, 'eval_steps_per_second': 100.932, 'epoch': 16.0}


 82%|████████▏ | 25004/30460 [15:40<03:15, 27.84it/s]

{'loss': 0.0018, 'grad_norm': 0.00044060469372197986, 'learning_rate': 8.96257386736704e-06, 'epoch': 16.41}


 85%|████████▍ | 25888/30460 [16:11<02:32, 29.92it/s]
 85%|████████▌ | 25891/30460 [16:15<02:32, 29.92it/s]

{'eval_loss': 0.21037325263023376, 'eval_f1': 0.8627496929996842, 'eval_precision': 0.8807637727270379, 'eval_recall': 0.8484008709207256, 'eval_runtime': 3.5021, 'eval_samples_per_second': 434.878, 'eval_steps_per_second': 108.791, 'epoch': 17.0}


 85%|████████▌ | 26004/30460 [16:20<02:41, 27.51it/s]

{'loss': 0.0008, 'grad_norm': 0.0007994017214514315, 'learning_rate': 7.321076822061721e-06, 'epoch': 17.07}


 89%|████████▊ | 27004/30460 [16:54<01:58, 29.20it/s]

{'loss': 0.0008, 'grad_norm': 0.002988239284604788, 'learning_rate': 5.679579776756402e-06, 'epoch': 17.73}


 90%|████████▉ | 27413/30460 [17:09<01:44, 29.09it/s]
 90%|█████████ | 27414/30460 [17:12<01:44, 29.09it/s]

{'eval_loss': 0.21451881527900696, 'eval_f1': 0.8669786328225337, 'eval_precision': 0.8809042765747226, 'eval_recall': 0.8544446034129322, 'eval_runtime': 3.7113, 'eval_samples_per_second': 410.374, 'eval_steps_per_second': 102.661, 'epoch': 18.0}


 92%|█████████▏| 28004/30460 [17:34<01:23, 29.49it/s]

{'loss': 0.0003, 'grad_norm': 0.00041232327930629253, 'learning_rate': 4.038082731451084e-06, 'epoch': 18.38}


 95%|█████████▍| 28936/30460 [18:06<00:52, 28.89it/s]
 95%|█████████▌| 28937/30460 [18:10<00:52, 28.89it/s]

{'eval_loss': 0.21605724096298218, 'eval_f1': 0.8668344854743785, 'eval_precision': 0.8825835125492134, 'eval_recall': 0.852840832459316, 'eval_runtime': 3.6135, 'eval_samples_per_second': 421.473, 'eval_steps_per_second': 105.438, 'epoch': 19.0}


 95%|█████████▌| 29004/30460 [18:13<00:49, 29.63it/s]

{'loss': 0.0007, 'grad_norm': 0.0001009036714094691, 'learning_rate': 2.396585686145765e-06, 'epoch': 19.04}


 98%|█████████▊| 30003/30460 [18:47<00:16, 27.97it/s]

{'loss': 0.0006, 'grad_norm': 9.702933311928064e-05, 'learning_rate': 7.550886408404465e-07, 'epoch': 19.7}


100%|██████████| 30460/30460 [19:03<00:00, 28.40it/s]
100%|██████████| 30460/30460 [19:07<00:00, 28.40it/s]

{'eval_loss': 0.21646733582019806, 'eval_f1': 0.8669646528790652, 'eval_precision': 0.8875834060861489, 'eval_recall': 0.8492011910487768, 'eval_runtime': 3.2718, 'eval_samples_per_second': 465.494, 'eval_steps_per_second': 116.45, 'epoch': 20.0}


100%|██████████| 30460/30460 [19:08<00:00, 26.51it/s]


{'train_runtime': 1148.8718, 'train_samples_per_second': 106.034, 'train_steps_per_second': 26.513, 'train_loss': 0.023175487905320022, 'epoch': 20.0}


100%|██████████| 381/381 [00:04<00:00, 93.69it/s] 


Evaluation Metrics: {'eval_loss': 0.12660424411296844, 'eval_f1': 0.8674833096036192, 'eval_precision': 0.8938749017510798, 'eval_recall': 0.8467066278961747, 'eval_runtime': 4.0766, 'eval_samples_per_second': 373.595, 'eval_steps_per_second': 93.46, 'epoch': 20.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  java      summary   
13  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  java    Ownership   
14  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  java       Expand   
15  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  java        usage   
16  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  java      Pointer   
17  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  java  deprecation   
18  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  java     rational   

    precision    recall        f1  
12   0.939860  0.955903  0.947814  
13   0.964912  1.000000  0.982143  
14   

0,1
eval/f1,▁▆▇▇█▇█▇███▇█████████
eval/loss,▂▁▁▂▃▄▃▅▅▆▅▆▅▆▇▇████▃
eval/precision,▁▆▇▇███▆▆▇█▇▇▇██▇▇▇▇█
eval/recall,▁▅▅▆▆▅▇█▇▇▇▇▇▇▇▇▇▇▇▇▇
eval/runtime,█▄▃▄▄▁▄█▅▃▂▃▂▄█▅▃▄▄▁▇
eval/samples_per_second,▁▅▆▅▄█▄▁▄▅▆▆▇▅▁▄▆▄▅█▂
eval/steps_per_second,▁▅▆▅▄█▄▁▄▅▆▆▇▅▁▄▆▄▅█▂
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▁▁▁▁▂▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f1,0.86748
eval/loss,0.1266
eval/precision,0.89387
eval/recall,0.84671
eval/runtime,4.0766
eval/samples_per_second,373.595
eval/steps_per_second,93.46
total_flos,4034654355655680.0
train/epoch,20.0
train/global_step,30460.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16273.56 examples/s]
  5%|▍         | 374/7540 [00:13<04:04, 29.32it/s]
  5%|▌         | 377/7540 [00:14<04:04, 29.32it/s]

{'eval_loss': 0.3395572006702423, 'eval_f1': 0.3255435547540405, 'eval_precision': 0.5654320987654321, 'eval_recall': 0.2681555368666203, 'eval_runtime': 0.9015, 'eval_samples_per_second': 418.209, 'eval_steps_per_second': 105.384, 'epoch': 1.0}


 10%|█         | 754/7540 [00:27<04:06, 27.55it/s]
 10%|█         | 754/7540 [00:28<04:06, 27.55it/s]

{'eval_loss': 0.32879841327667236, 'eval_f1': 0.5320615801298528, 'eval_precision': 0.5890874310129133, 'eval_recall': 0.4975278540922322, 'eval_runtime': 0.928, 'eval_samples_per_second': 406.245, 'eval_steps_per_second': 102.37, 'epoch': 2.0}


 13%|█▎        | 1003/7540 [00:38<04:01, 27.03it/s]

{'loss': 0.324, 'grad_norm': 3.53851318359375, 'learning_rate': 4.3368700265251996e-05, 'epoch': 2.65}


 15%|█▍        | 1128/7540 [00:42<03:43, 28.65it/s]
 15%|█▌        | 1131/7540 [00:43<03:43, 28.65it/s]

{'eval_loss': 0.3374228775501251, 'eval_f1': 0.6194591011330328, 'eval_precision': 0.7913035240621447, 'eval_recall': 0.5637082244284854, 'eval_runtime': 0.8012, 'eval_samples_per_second': 470.542, 'eval_steps_per_second': 118.572, 'epoch': 3.0}


 20%|█▉        | 1507/7540 [00:58<03:20, 30.06it/s]
 20%|██        | 1508/7540 [00:58<03:20, 30.06it/s]

{'eval_loss': 0.35621052980422974, 'eval_f1': 0.6980776942655098, 'eval_precision': 0.7570722411451725, 'eval_recall': 0.6579926665461886, 'eval_runtime': 0.9062, 'eval_samples_per_second': 416.038, 'eval_steps_per_second': 104.837, 'epoch': 4.0}


 25%|██▍       | 1883/7540 [01:13<03:17, 28.67it/s]
 25%|██▌       | 1885/7540 [01:13<03:17, 28.67it/s]

{'eval_loss': 0.39074066281318665, 'eval_f1': 0.7173128451208113, 'eval_precision': 0.75967995334843, 'eval_recall': 0.6859434423753012, 'eval_runtime': 0.8012, 'eval_samples_per_second': 470.538, 'eval_steps_per_second': 118.571, 'epoch': 5.0}


 27%|██▋       | 2005/7540 [01:19<03:13, 28.61it/s]

{'loss': 0.1127, 'grad_norm': 0.1349160075187683, 'learning_rate': 3.673740053050398e-05, 'epoch': 5.31}


 30%|██▉       | 2261/7540 [01:28<03:01, 29.02it/s]
 30%|███       | 2262/7540 [01:29<03:01, 29.02it/s]

{'eval_loss': 0.4148860573768616, 'eval_f1': 0.7206173202333706, 'eval_precision': 0.7530027239266369, 'eval_recall': 0.7010630613272879, 'eval_runtime': 0.947, 'eval_samples_per_second': 398.083, 'eval_steps_per_second': 100.313, 'epoch': 6.0}


 35%|███▍      | 2637/7540 [01:42<02:46, 29.47it/s]
 35%|███▌      | 2639/7540 [01:43<02:46, 29.47it/s]

{'eval_loss': 0.4655418395996094, 'eval_f1': 0.7144446378793515, 'eval_precision': 0.7360229063010422, 'eval_recall': 0.7078506044643899, 'eval_runtime': 0.981, 'eval_samples_per_second': 384.298, 'eval_steps_per_second': 96.839, 'epoch': 7.0}


 40%|███▉      | 3004/7540 [01:57<02:44, 27.66it/s]

{'loss': 0.0387, 'grad_norm': 0.18613705039024353, 'learning_rate': 3.010610079575597e-05, 'epoch': 7.96}


 40%|███▉      | 3013/7540 [01:57<02:41, 27.95it/s]
 40%|████      | 3016/7540 [01:58<02:41, 27.95it/s]

{'eval_loss': 0.4795076549053192, 'eval_f1': 0.7337510938535358, 'eval_precision': 0.756511405616265, 'eval_recall': 0.717203900697999, 'eval_runtime': 0.8682, 'eval_samples_per_second': 434.251, 'eval_steps_per_second': 109.427, 'epoch': 8.0}


 45%|████▌     | 3393/7540 [02:12<02:22, 29.20it/s]
 45%|████▌     | 3393/7540 [02:14<02:22, 29.20it/s]

{'eval_loss': 0.4842349886894226, 'eval_f1': 0.7391435051829471, 'eval_precision': 0.7523559190031153, 'eval_recall': 0.7292658565106992, 'eval_runtime': 1.1634, 'eval_samples_per_second': 324.043, 'eval_steps_per_second': 81.656, 'epoch': 9.0}


 50%|████▉     | 3768/7540 [02:27<02:11, 28.79it/s]
 50%|█████     | 3770/7540 [02:28<02:10, 28.79it/s]

{'eval_loss': 0.5249894261360168, 'eval_f1': 0.7299115630356865, 'eval_precision': 0.7354159375371018, 'eval_recall': 0.7353644371673231, 'eval_runtime': 0.8254, 'eval_samples_per_second': 456.769, 'eval_steps_per_second': 115.101, 'epoch': 10.0}


 53%|█████▎    | 4005/7540 [02:37<02:08, 27.59it/s]

{'loss': 0.0133, 'grad_norm': 0.07263433188199997, 'learning_rate': 2.347480106100796e-05, 'epoch': 10.61}


 55%|█████▌    | 4147/7540 [02:42<01:59, 28.40it/s]
 55%|█████▌    | 4147/7540 [02:43<01:59, 28.40it/s]

{'eval_loss': 0.5765374898910522, 'eval_f1': 0.7219334552641107, 'eval_precision': 0.7501786684291549, 'eval_recall': 0.7045140095524524, 'eval_runtime': 0.89, 'eval_samples_per_second': 423.582, 'eval_steps_per_second': 106.738, 'epoch': 11.0}


 60%|█████▉    | 4522/7540 [02:57<01:39, 30.40it/s]
 60%|██████    | 4524/7540 [02:58<01:39, 30.40it/s]

{'eval_loss': 0.5909558534622192, 'eval_f1': 0.7113667554624782, 'eval_precision': 0.743551021677604, 'eval_recall': 0.6896228763115166, 'eval_runtime': 0.8282, 'eval_samples_per_second': 455.186, 'eval_steps_per_second': 114.702, 'epoch': 12.0}


 65%|██████▍   | 4898/7540 [03:12<01:29, 29.43it/s]
 65%|██████▌   | 4901/7540 [03:13<01:29, 29.43it/s]

{'eval_loss': 0.5905212759971619, 'eval_f1': 0.7317996371256809, 'eval_precision': 0.7591873424626037, 'eval_recall': 0.7105442013632766, 'eval_runtime': 0.8533, 'eval_samples_per_second': 441.808, 'eval_steps_per_second': 111.331, 'epoch': 13.0}


 66%|██████▋   | 5004/7540 [03:17<01:34, 26.78it/s]

{'loss': 0.0024, 'grad_norm': 0.00399342505261302, 'learning_rate': 1.6843501326259946e-05, 'epoch': 13.26}


 70%|██████▉   | 5277/7540 [03:27<01:17, 29.04it/s]
 70%|███████   | 5278/7540 [03:28<01:17, 29.04it/s]

{'eval_loss': 0.597848653793335, 'eval_f1': 0.741239188620691, 'eval_precision': 0.7713872534443453, 'eval_recall': 0.717421415058672, 'eval_runtime': 0.8455, 'eval_samples_per_second': 445.866, 'eval_steps_per_second': 112.354, 'epoch': 14.0}


 75%|███████▍  | 5653/7540 [03:42<01:05, 28.65it/s]
 75%|███████▌  | 5655/7540 [03:43<01:05, 28.65it/s]

{'eval_loss': 0.5939534902572632, 'eval_f1': 0.7361372172849067, 'eval_precision': 0.7527801849087565, 'eval_recall': 0.7236930761948359, 'eval_runtime': 0.9178, 'eval_samples_per_second': 410.756, 'eval_steps_per_second': 103.506, 'epoch': 15.0}


 80%|███████▉  | 6002/7540 [03:55<00:56, 27.14it/s]

{'loss': 0.0006, 'grad_norm': 0.017145980149507523, 'learning_rate': 1.0212201591511936e-05, 'epoch': 15.92}


 80%|███████▉  | 6031/7540 [03:56<00:54, 27.72it/s]
 80%|████████  | 6032/7540 [03:57<00:54, 27.72it/s]

{'eval_loss': 0.6176096200942993, 'eval_f1': 0.7435576976412348, 'eval_precision': 0.7597982266584344, 'eval_recall': 0.7333167371369222, 'eval_runtime': 0.901, 'eval_samples_per_second': 418.415, 'eval_steps_per_second': 105.436, 'epoch': 16.0}


 85%|████████▍ | 6407/7540 [04:11<00:40, 28.08it/s]
 85%|████████▌ | 6409/7540 [04:12<00:40, 28.08it/s]

{'eval_loss': 0.6203736066818237, 'eval_f1': 0.7265867182976999, 'eval_precision': 0.7479522348715629, 'eval_recall': 0.7124455036273489, 'eval_runtime': 0.8949, 'eval_samples_per_second': 421.288, 'eval_steps_per_second': 106.16, 'epoch': 17.0}


 90%|████████▉ | 6784/7540 [04:27<00:28, 26.53it/s]
 90%|█████████ | 6786/7540 [04:28<00:28, 26.53it/s]

{'eval_loss': 0.6406990885734558, 'eval_f1': 0.7318226388105893, 'eval_precision': 0.755650648537131, 'eval_recall': 0.7149368199526573, 'eval_runtime': 0.9746, 'eval_samples_per_second': 386.817, 'eval_steps_per_second': 97.474, 'epoch': 18.0}


 93%|█████████▎| 7005/7540 [04:36<00:19, 27.66it/s]

{'loss': 0.0004, 'grad_norm': 0.004116511438041925, 'learning_rate': 3.580901856763926e-06, 'epoch': 18.57}


 95%|█████████▌| 7163/7540 [04:42<00:12, 29.02it/s]
 95%|█████████▌| 7163/7540 [04:42<00:12, 29.02it/s]

{'eval_loss': 0.6406493782997131, 'eval_f1': 0.7339869953331668, 'eval_precision': 0.7552514663375318, 'eval_recall': 0.7189482071162183, 'eval_runtime': 0.8259, 'eval_samples_per_second': 456.471, 'eval_steps_per_second': 115.026, 'epoch': 19.0}


100%|█████████▉| 7537/7540 [04:57<00:00, 29.62it/s]
100%|██████████| 7540/7540 [04:58<00:00, 29.62it/s]

{'eval_loss': 0.6454920172691345, 'eval_f1': 0.7327850722562437, 'eval_precision': 0.7563073151844338, 'eval_recall': 0.7160496563915807, 'eval_runtime': 0.9055, 'eval_samples_per_second': 416.334, 'eval_steps_per_second': 104.912, 'epoch': 20.0}


100%|██████████| 7540/7540 [04:59<00:00, 25.13it/s]


{'train_runtime': 299.9946, 'train_samples_per_second': 100.468, 'train_steps_per_second': 25.134, 'train_loss': 0.06529424050402895, 'epoch': 20.0}


100%|██████████| 95/95 [00:00<00:00, 111.47it/s]


Evaluation Metrics: {'eval_loss': 0.6176096200942993, 'eval_f1': 0.7435576976412348, 'eval_precision': 0.7597982266584344, 'eval_recall': 0.7333167371369222, 'eval_runtime': 0.8643, 'eval_samples_per_second': 436.185, 'eval_steps_per_second': 109.914, 'epoch': 20.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  python   
15  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  python   
16  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  python   
17  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  python   
18  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
14             Usage   0.947917  0.752066  0.838710  
15        Parameters   0.867257  0.875000  0.871111  
16  DevelopmentNotes   0.604651  0.650000  0.626506  
17            Expand   0.666667  0.608696  0.636364  
18           Summary   0.712500  0.780822  0.745098  
Scores

0,1
eval/f1,▁▄▆▇███████▇█████████
eval/loss,▁▁▁▂▂▃▄▄▄▅▆▇▇▇▇▇▇███▇
eval/precision,▁▂█▇▇▇▆▇▇▆▇▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▄▅▇▇▇█████▇█████████
eval/runtime,▃▃▁▃▁▄▄▂█▁▃▂▂▂▃▃▃▄▁▃▂
eval/samples_per_second,▅▅█▅█▅▄▆▁▇▆▇▇▇▅▆▆▄▇▅▆
eval/steps_per_second,▅▅█▅█▅▄▆▁▇▆▇▇▇▅▆▆▄▇▅▆
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████
train/grad_norm,█▁▁▁▁▁▁

0,1
eval/f1,0.74356
eval/loss,0.61761
eval/precision,0.7598
eval/recall,0.73332
eval/runtime,0.8643
eval/samples_per_second,436.185
eval/steps_per_second,109.914
total_flos,998195250201600.0
train/epoch,20.0
train/global_step,7540.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 13392.85 examples/s]
  5%|▍         | 258/5200 [00:08<02:45, 29.93it/s]
  5%|▌         | 260/5200 [00:09<02:45, 29.93it/s]

{'eval_loss': 0.26819658279418945, 'eval_f1': 0.3099733059975648, 'eval_precision': 0.6558503401360544, 'eval_recall': 0.2509629290053808, 'eval_runtime': 0.5382, 'eval_samples_per_second': 483.07, 'eval_steps_per_second': 120.768, 'epoch': 1.0}


 10%|▉         | 517/5200 [00:19<02:51, 27.24it/s]
 10%|█         | 520/5200 [00:20<02:51, 27.24it/s]

{'eval_loss': 0.23664571344852448, 'eval_f1': 0.48742452227869854, 'eval_precision': 0.6132037075433302, 'eval_recall': 0.4249074098167866, 'eval_runtime': 0.6184, 'eval_samples_per_second': 420.47, 'eval_steps_per_second': 105.118, 'epoch': 2.0}


 15%|█▍        | 777/5200 [00:30<02:38, 27.94it/s]
 15%|█▌        | 780/5200 [00:31<02:38, 27.94it/s]

{'eval_loss': 0.22219790518283844, 'eval_f1': 0.5459526986674754, 'eval_precision': 0.7371235806150418, 'eval_recall': 0.4893212574908335, 'eval_runtime': 0.585, 'eval_samples_per_second': 444.473, 'eval_steps_per_second': 111.118, 'epoch': 3.0}


 19%|█▉        | 1003/5200 [00:39<02:30, 27.80it/s]

{'loss': 0.1961, 'grad_norm': 0.07677570730447769, 'learning_rate': 4.038461538461539e-05, 'epoch': 3.85}


 20%|█▉        | 1038/5200 [00:41<02:23, 29.05it/s]
 20%|██        | 1040/5200 [00:41<02:23, 29.05it/s]

{'eval_loss': 0.21900954842567444, 'eval_f1': 0.6685852815563967, 'eval_precision': 0.8285626432705955, 'eval_recall': 0.5961581592489756, 'eval_runtime': 0.7714, 'eval_samples_per_second': 337.05, 'eval_steps_per_second': 84.263, 'epoch': 4.0}


 25%|██▍       | 1299/5200 [00:51<02:10, 29.83it/s]
 25%|██▌       | 1300/5200 [00:52<02:10, 29.83it/s]

{'eval_loss': 0.22935806214809418, 'eval_f1': 0.6808328923200774, 'eval_precision': 0.7691746475856087, 'eval_recall': 0.6215928442491163, 'eval_runtime': 0.5907, 'eval_samples_per_second': 440.166, 'eval_steps_per_second': 110.041, 'epoch': 5.0}


 30%|███       | 1560/5200 [01:02<02:05, 29.08it/s]
 30%|███       | 1560/5200 [01:02<02:05, 29.08it/s]

{'eval_loss': 0.28589725494384766, 'eval_f1': 0.6075966122695543, 'eval_precision': 0.6664345139954897, 'eval_recall': 0.562514041883426, 'eval_runtime': 0.6045, 'eval_samples_per_second': 430.13, 'eval_steps_per_second': 107.532, 'epoch': 6.0}


 35%|███▍      | 1818/5200 [01:12<02:01, 27.92it/s]
 35%|███▌      | 1820/5200 [01:13<02:01, 27.92it/s]

{'eval_loss': 0.2807515561580658, 'eval_f1': 0.6635880162765162, 'eval_precision': 0.7759824740402641, 'eval_recall': 0.6049669563478047, 'eval_runtime': 0.6184, 'eval_samples_per_second': 420.428, 'eval_steps_per_second': 105.107, 'epoch': 7.0}


 39%|███▊      | 2003/5200 [01:20<02:02, 26.01it/s]

{'loss': 0.0381, 'grad_norm': 0.03134394437074661, 'learning_rate': 3.0769230769230774e-05, 'epoch': 7.69}


 40%|███▉      | 2078/5200 [01:23<01:49, 28.45it/s]
 40%|████      | 2080/5200 [01:24<01:49, 28.45it/s]

{'eval_loss': 0.2995927035808563, 'eval_f1': 0.6812679312482925, 'eval_precision': 0.8167734031655269, 'eval_recall': 0.6276426455406927, 'eval_runtime': 0.6389, 'eval_samples_per_second': 406.919, 'eval_steps_per_second': 101.73, 'epoch': 8.0}


 45%|████▍     | 2339/5200 [01:33<01:40, 28.52it/s]
 45%|████▌     | 2340/5200 [01:34<01:40, 28.52it/s]

{'eval_loss': 0.3356500566005707, 'eval_f1': 0.6621878285166941, 'eval_precision': 0.7672420634920635, 'eval_recall': 0.6180801065228902, 'eval_runtime': 0.7961, 'eval_samples_per_second': 326.596, 'eval_steps_per_second': 81.649, 'epoch': 9.0}


 50%|████▉     | 2599/5200 [01:44<01:28, 29.42it/s]
 50%|█████     | 2600/5200 [01:45<01:28, 29.42it/s]

{'eval_loss': 0.29751160740852356, 'eval_f1': 0.6896999584395866, 'eval_precision': 0.7561118879283216, 'eval_recall': 0.6530034112033823, 'eval_runtime': 0.5865, 'eval_samples_per_second': 443.297, 'eval_steps_per_second': 110.824, 'epoch': 10.0}


 55%|█████▍    | 2857/5200 [01:55<01:20, 28.94it/s]
 55%|█████▌    | 2860/5200 [01:55<01:20, 28.94it/s]

{'eval_loss': 0.3203778862953186, 'eval_f1': 0.7103206135352297, 'eval_precision': 0.8090803607030788, 'eval_recall': 0.6587780829183314, 'eval_runtime': 0.5672, 'eval_samples_per_second': 458.369, 'eval_steps_per_second': 114.592, 'epoch': 11.0}


 58%|█████▊    | 3004/5200 [02:01<01:21, 27.11it/s]

{'loss': 0.0098, 'grad_norm': 0.008327344432473183, 'learning_rate': 2.1153846153846154e-05, 'epoch': 11.54}


 60%|█████▉    | 3119/5200 [02:06<01:12, 28.54it/s]
 60%|██████    | 3120/5200 [02:06<01:12, 28.54it/s]

{'eval_loss': 0.3289725184440613, 'eval_f1': 0.6544097025540324, 'eval_precision': 0.8075485678142224, 'eval_recall': 0.6184135419049596, 'eval_runtime': 0.5938, 'eval_samples_per_second': 437.881, 'eval_steps_per_second': 109.47, 'epoch': 12.0}


 65%|██████▍   | 3379/5200 [02:16<01:03, 28.52it/s]
 65%|██████▌   | 3380/5200 [02:16<01:03, 28.52it/s]

{'eval_loss': 0.3310617208480835, 'eval_f1': 0.6927015048735942, 'eval_precision': 0.7458798995788792, 'eval_recall': 0.6660570691527639, 'eval_runtime': 0.6255, 'eval_samples_per_second': 415.645, 'eval_steps_per_second': 103.911, 'epoch': 13.0}


 70%|██████▉   | 3638/5200 [02:26<00:57, 27.33it/s]
 70%|███████   | 3640/5200 [02:27<00:57, 27.33it/s]

{'eval_loss': 0.33103108406066895, 'eval_f1': 0.675509747700759, 'eval_precision': 0.7658492830869088, 'eval_recall': 0.6319159646384913, 'eval_runtime': 0.5978, 'eval_samples_per_second': 434.947, 'eval_steps_per_second': 108.737, 'epoch': 14.0}


 75%|███████▍  | 3897/5200 [02:37<00:45, 28.53it/s]
 75%|███████▌  | 3900/5200 [02:37<00:45, 28.53it/s]

{'eval_loss': 0.3523886501789093, 'eval_f1': 0.6773465007376077, 'eval_precision': 0.7406424110400369, 'eval_recall': 0.6409948182925339, 'eval_runtime': 0.5857, 'eval_samples_per_second': 443.935, 'eval_steps_per_second': 110.984, 'epoch': 15.0}


 77%|███████▋  | 4005/5200 [02:42<00:43, 27.38it/s]

{'loss': 0.0028, 'grad_norm': 0.005396177526563406, 'learning_rate': 1.153846153846154e-05, 'epoch': 15.38}


 80%|████████  | 4160/5200 [02:48<00:34, 30.12it/s]

{'eval_loss': 0.3481789529323578, 'eval_f1': 0.6846014531008082, 'eval_precision': 0.7792244461887318, 'eval_recall': 0.6396171837399934, 'eval_runtime': 0.5375, 'eval_samples_per_second': 483.711, 'eval_steps_per_second': 120.928, 'epoch': 16.0}


 85%|████████▍ | 4419/5200 [02:58<00:27, 28.15it/s]
 85%|████████▌ | 4420/5200 [02:59<00:27, 28.15it/s]

{'eval_loss': 0.3538217842578888, 'eval_f1': 0.6806028437070557, 'eval_precision': 0.7813708645301849, 'eval_recall': 0.6302976330953479, 'eval_runtime': 0.6133, 'eval_samples_per_second': 423.959, 'eval_steps_per_second': 105.99, 'epoch': 17.0}


 90%|████████▉ | 4679/5200 [03:08<00:18, 28.56it/s]
 90%|█████████ | 4680/5200 [03:09<00:18, 28.56it/s]

{'eval_loss': 0.3556845486164093, 'eval_f1': 0.6818758921057687, 'eval_precision': 0.7564602572520363, 'eval_recall': 0.6383568901488728, 'eval_runtime': 0.5622, 'eval_samples_per_second': 462.474, 'eval_steps_per_second': 115.618, 'epoch': 18.0}


 95%|█████████▍| 4938/5200 [03:19<00:09, 27.73it/s]
 95%|█████████▌| 4940/5200 [03:19<00:09, 27.73it/s]

{'eval_loss': 0.3583716154098511, 'eval_f1': 0.6843943346875292, 'eval_precision': 0.7894162320948036, 'eval_recall': 0.6326426044345871, 'eval_runtime': 0.7256, 'eval_samples_per_second': 358.304, 'eval_steps_per_second': 89.576, 'epoch': 19.0}


 96%|█████████▌| 5002/5200 [03:22<00:07, 26.30it/s]

{'loss': 0.0011, 'grad_norm': 0.004475237336009741, 'learning_rate': 1.9230769230769234e-06, 'epoch': 19.23}


100%|██████████| 5200/5200 [03:29<00:00, 29.64it/s]
100%|██████████| 5200/5200 [03:31<00:00, 29.64it/s]

{'eval_loss': 0.3584434986114502, 'eval_f1': 0.6790167550116556, 'eval_precision': 0.7791416718202432, 'eval_recall': 0.6292412438903694, 'eval_runtime': 0.5481, 'eval_samples_per_second': 474.357, 'eval_steps_per_second': 118.589, 'epoch': 20.0}


100%|██████████| 5200/5200 [03:32<00:00, 24.45it/s]


{'train_runtime': 212.6578, 'train_samples_per_second': 97.622, 'train_steps_per_second': 24.452, 'train_loss': 0.047709544421388556, 'epoch': 20.0}


100%|██████████| 65/65 [00:00<00:00, 106.74it/s]


Evaluation Metrics: {'eval_loss': 0.3203778862953186, 'eval_f1': 0.7103206135352297, 'eval_precision': 0.8090803607030788, 'eval_recall': 0.6587780829183314, 'eval_runtime': 0.619, 'eval_samples_per_second': 420.066, 'eval_steps_per_second': 105.017, 'epoch': 20.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
13  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
14  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
15  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
16  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
17  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
18  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.769231  0.714286  0.740741  
13                  Example   0.977011  0.841584  0.904255  
14         Responsibilitie

0,1
eval/f1,▁▄▅▇▇▆▇▇▇██▇█▇▇█▇██▇█
eval/loss,▃▂▁▁▂▄▄▅▇▅▆▇▇▇█▇████▆
eval/precision,▂▁▅█▆▃▆█▆▆▇▇▅▆▅▆▆▆▇▆▇
eval/recall,▁▄▅▇▇▆▇▇▇██▇█▇██▇█▇▇█
eval/runtime,▁▃▂▇▂▃▃▄█▂▂▃▃▃▂▁▃▂▆▁▃
eval/samples_per_second,█▅▆▁▆▆▅▅▁▆▇▆▅▆▆█▅▇▂█▅
eval/steps_per_second,█▅▆▁▆▆▅▅▁▆▇▆▅▆▆█▅▇▂█▅
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇█████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇█████
train/grad_norm,█▄▁▁▁

0,1
eval/f1,0.71032
eval/loss,0.32038
eval/precision,0.80908
eval/recall,0.65878
eval/runtime,0.619
eval/samples_per_second,420.066
eval/steps_per_second,105.017
total_flos,687567102474240.0
train/epoch,20.0
train/global_step,5200.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20012.64 examples/s]
  3%|▎         | 1004/30460 [00:35<17:06, 28.69it/s]

{'loss': 0.2763, 'grad_norm': 0.5995298624038696, 'learning_rate': 4.835850295469468e-06, 'epoch': 0.66}


  5%|▌         | 1523/30460 [00:53<16:23, 29.43it/s]
  5%|▌         | 1523/30460 [00:56<16:23, 29.43it/s]

{'eval_loss': 0.1423070728778839, 'eval_f1': 0.5235667611760607, 'eval_precision': 0.530002972736636, 'eval_recall': 0.5182715463199281, 'eval_runtime': 3.5428, 'eval_samples_per_second': 429.885, 'eval_steps_per_second': 107.542, 'epoch': 1.0}


  7%|▋         | 2005/30460 [01:14<16:08, 29.39it/s]  

{'loss': 0.1473, 'grad_norm': 2.576875925064087, 'learning_rate': 4.671700590938937e-06, 'epoch': 1.31}


 10%|▉         | 3005/30460 [01:49<16:45, 27.31it/s]

{'loss': 0.114, 'grad_norm': 2.509891986846924, 'learning_rate': 4.507550886408405e-06, 'epoch': 1.97}


 10%|█         | 3046/30460 [01:50<16:23, 27.88it/s]
 10%|█         | 3046/30460 [01:53<16:23, 27.88it/s]

{'eval_loss': 0.10964614152908325, 'eval_f1': 0.5499204569391762, 'eval_precision': 0.6416266749491297, 'eval_recall': 0.5429800619885806, 'eval_runtime': 3.354, 'eval_samples_per_second': 454.085, 'eval_steps_per_second': 113.596, 'epoch': 2.0}


 13%|█▎        | 4005/30460 [02:28<15:30, 28.43it/s]  

{'loss': 0.091, 'grad_norm': 0.07790869474411011, 'learning_rate': 4.343401181877873e-06, 'epoch': 2.63}


 15%|█▍        | 4568/30460 [02:47<14:24, 29.96it/s]
 15%|█▌        | 4569/30460 [02:51<14:24, 29.96it/s]

{'eval_loss': 0.09301864355802536, 'eval_f1': 0.7236287553633056, 'eval_precision': 0.93606500018564, 'eval_recall': 0.6691839836793011, 'eval_runtime': 3.6368, 'eval_samples_per_second': 418.771, 'eval_steps_per_second': 104.762, 'epoch': 3.0}


 16%|█▋        | 5006/30460 [03:07<15:22, 27.60it/s]  

{'loss': 0.0839, 'grad_norm': 4.520733833312988, 'learning_rate': 4.179251477347341e-06, 'epoch': 3.28}


 20%|█▉        | 6005/30460 [03:42<14:00, 29.11it/s]

{'loss': 0.0737, 'grad_norm': 1.7289667129516602, 'learning_rate': 4.015101772816809e-06, 'epoch': 3.94}


 20%|█▉        | 6091/30460 [03:45<14:49, 27.40it/s]
 20%|██        | 6092/30460 [03:49<14:49, 27.40it/s]

{'eval_loss': 0.09045727550983429, 'eval_f1': 0.7817302610562169, 'eval_precision': 0.8916477146353632, 'eval_recall': 0.7344682872152442, 'eval_runtime': 4.3734, 'eval_samples_per_second': 348.238, 'eval_steps_per_second': 87.117, 'epoch': 4.0}


 23%|██▎       | 7004/30460 [04:22<14:03, 27.81it/s]  

{'loss': 0.0606, 'grad_norm': 0.034619204699993134, 'learning_rate': 3.850952068286277e-06, 'epoch': 4.6}


 25%|██▌       | 7615/30460 [04:43<13:47, 27.62it/s]
 25%|██▌       | 7615/30460 [04:47<13:47, 27.62it/s]

{'eval_loss': 0.08600937575101852, 'eval_f1': 0.8319055075175877, 'eval_precision': 0.906203804946993, 'eval_recall': 0.7844372553620096, 'eval_runtime': 3.5672, 'eval_samples_per_second': 426.94, 'eval_steps_per_second': 106.805, 'epoch': 5.0}


 26%|██▋       | 8004/30460 [05:01<12:55, 28.95it/s]  

{'loss': 0.0585, 'grad_norm': 0.07075486332178116, 'learning_rate': 3.6868023637557455e-06, 'epoch': 5.25}


 30%|██▉       | 9002/30460 [05:36<13:30, 26.48it/s]

{'loss': 0.0468, 'grad_norm': 16.438405990600586, 'learning_rate': 3.5226526592252134e-06, 'epoch': 5.91}


 30%|██▉       | 9137/30460 [05:41<12:33, 28.29it/s]
 30%|███       | 9138/30460 [05:44<12:33, 28.29it/s]

{'eval_loss': 0.0884387418627739, 'eval_f1': 0.8491825535659885, 'eval_precision': 0.89425307040636, 'eval_recall': 0.8165078765063382, 'eval_runtime': 3.5934, 'eval_samples_per_second': 423.832, 'eval_steps_per_second': 106.028, 'epoch': 6.0}


 33%|███▎      | 10003/30460 [06:15<12:28, 27.32it/s] 

{'loss': 0.0413, 'grad_norm': 0.06964392960071564, 'learning_rate': 3.358502954694682e-06, 'epoch': 6.57}


 35%|███▌      | 10661/30460 [06:38<10:58, 30.04it/s]
 35%|███▌      | 10661/30460 [06:42<10:58, 30.04it/s]

{'eval_loss': 0.09325408190488815, 'eval_f1': 0.8574622900605401, 'eval_precision': 0.9177703230011419, 'eval_recall': 0.8163248917051582, 'eval_runtime': 3.6026, 'eval_samples_per_second': 422.749, 'eval_steps_per_second': 105.757, 'epoch': 7.0}


 36%|███▌      | 11004/30460 [06:54<12:01, 26.97it/s]  

{'loss': 0.0427, 'grad_norm': 0.10241290926933289, 'learning_rate': 3.19435325016415e-06, 'epoch': 7.22}


 39%|███▉      | 12004/30460 [07:29<10:14, 30.05it/s]

{'loss': 0.0344, 'grad_norm': 0.015298320911824703, 'learning_rate': 3.030203545633618e-06, 'epoch': 7.88}


 40%|███▉      | 12182/30460 [07:35<10:38, 28.62it/s]
 40%|████      | 12184/30460 [07:39<10:38, 28.62it/s]

{'eval_loss': 0.09692960977554321, 'eval_f1': 0.8625648802310005, 'eval_precision': 0.8857710934351157, 'eval_recall': 0.8441177769889575, 'eval_runtime': 3.3185, 'eval_samples_per_second': 458.941, 'eval_steps_per_second': 114.811, 'epoch': 8.0}


 43%|████▎     | 13004/30460 [08:08<09:46, 29.78it/s]  

{'loss': 0.0238, 'grad_norm': 1.068877935409546, 'learning_rate': 2.8660538411030866e-06, 'epoch': 8.54}


 45%|████▍     | 13706/30460 [08:33<09:23, 29.72it/s]
 45%|████▌     | 13707/30460 [08:36<09:23, 29.72it/s]

{'eval_loss': 0.10803873091936111, 'eval_f1': 0.8548596267224707, 'eval_precision': 0.875773077386496, 'eval_recall': 0.8368715624181656, 'eval_runtime': 3.2125, 'eval_samples_per_second': 474.079, 'eval_steps_per_second': 118.598, 'epoch': 9.0}


 46%|████▌     | 14005/30460 [08:47<09:14, 29.67it/s]  

{'loss': 0.0281, 'grad_norm': 0.18690919876098633, 'learning_rate': 2.7019041365725546e-06, 'epoch': 9.19}


 49%|████▉     | 15004/30460 [09:22<09:05, 28.33it/s]

{'loss': 0.0201, 'grad_norm': 0.06332346796989441, 'learning_rate': 2.537754432042023e-06, 'epoch': 9.85}


 50%|████▉     | 15228/30460 [09:29<08:47, 28.90it/s]
 50%|█████     | 15230/30460 [09:34<08:47, 28.90it/s]

{'eval_loss': 0.10297109931707382, 'eval_f1': 0.8774644678177916, 'eval_precision': 0.9145092645095377, 'eval_recall': 0.8559634839291783, 'eval_runtime': 4.1018, 'eval_samples_per_second': 371.302, 'eval_steps_per_second': 92.887, 'epoch': 10.0}


 53%|█████▎    | 16004/30460 [10:01<08:21, 28.83it/s]  

{'loss': 0.0169, 'grad_norm': 0.1719648838043213, 'learning_rate': 2.3736047275114905e-06, 'epoch': 10.51}


 55%|█████▍    | 16751/30460 [10:27<08:16, 27.61it/s]
 55%|█████▌    | 16753/30460 [10:32<08:16, 27.61it/s]

{'eval_loss': 0.10630276054143906, 'eval_f1': 0.8690993623501436, 'eval_precision': 0.8969537809301414, 'eval_recall': 0.8475332467082943, 'eval_runtime': 4.1686, 'eval_samples_per_second': 365.35, 'eval_steps_per_second': 91.397, 'epoch': 11.0}


 56%|█████▌    | 17005/30460 [10:41<07:38, 29.36it/s]  

{'loss': 0.0173, 'grad_norm': 0.007404767442494631, 'learning_rate': 2.209455022980959e-06, 'epoch': 11.16}


 59%|█████▉    | 18006/30460 [11:16<06:52, 30.22it/s]

{'loss': 0.0164, 'grad_norm': 0.11455921828746796, 'learning_rate': 2.045305318450427e-06, 'epoch': 11.82}


 60%|██████    | 18276/30460 [11:25<06:55, 29.31it/s]
 60%|██████    | 18276/30460 [11:29<06:55, 29.31it/s]

{'eval_loss': 0.1119573712348938, 'eval_f1': 0.8635697663959007, 'eval_precision': 0.8878336620256155, 'eval_recall': 0.8424118960741848, 'eval_runtime': 3.8307, 'eval_samples_per_second': 397.58, 'eval_steps_per_second': 99.46, 'epoch': 12.0}


 62%|██████▏   | 19004/30460 [11:55<06:21, 30.01it/s]  

{'loss': 0.0124, 'grad_norm': 0.2645890712738037, 'learning_rate': 1.881155613919895e-06, 'epoch': 12.48}


 65%|██████▍   | 19797/30460 [12:22<06:13, 28.53it/s]
 65%|██████▌   | 19799/30460 [12:26<06:13, 28.53it/s]

{'eval_loss': 0.11996947228908539, 'eval_f1': 0.8598279349010041, 'eval_precision': 0.8946008602865904, 'eval_recall': 0.8330166687107632, 'eval_runtime': 3.4628, 'eval_samples_per_second': 439.815, 'eval_steps_per_second': 110.026, 'epoch': 13.0}


 66%|██████▌   | 20006/30460 [12:34<05:41, 30.58it/s]  

{'loss': 0.012, 'grad_norm': 0.022230589762330055, 'learning_rate': 1.7170059093893632e-06, 'epoch': 13.13}


 69%|██████▉   | 21005/30460 [13:09<05:54, 26.71it/s]

{'loss': 0.011, 'grad_norm': 0.008437014184892178, 'learning_rate': 1.5528562048588314e-06, 'epoch': 13.79}


 70%|██████▉   | 21319/30460 [13:19<04:55, 30.93it/s]
 70%|███████   | 21322/30460 [13:23<04:55, 30.93it/s]

{'eval_loss': 0.11513375490903854, 'eval_f1': 0.874396383938847, 'eval_precision': 0.8979525196487688, 'eval_recall': 0.8569345185040433, 'eval_runtime': 3.3472, 'eval_samples_per_second': 455.007, 'eval_steps_per_second': 113.826, 'epoch': 14.0}


 72%|███████▏  | 22003/30460 [13:48<05:04, 27.77it/s]

{'loss': 0.0101, 'grad_norm': 7.249629020690918, 'learning_rate': 1.3887065003282996e-06, 'epoch': 14.45}


 75%|███████▍  | 22843/30460 [14:16<04:32, 28.00it/s]
 75%|███████▌  | 22845/30460 [14:20<04:31, 28.00it/s]

{'eval_loss': 0.11740756779909134, 'eval_f1': 0.8741232421734961, 'eval_precision': 0.8987734767667889, 'eval_recall': 0.8545476672950088, 'eval_runtime': 3.4323, 'eval_samples_per_second': 443.725, 'eval_steps_per_second': 111.004, 'epoch': 15.0}


 76%|███████▌  | 23004/30460 [14:26<04:27, 27.92it/s]

{'loss': 0.0093, 'grad_norm': 0.0036961426958441734, 'learning_rate': 1.2245567957977676e-06, 'epoch': 15.1}


 79%|███████▉  | 24005/30460 [15:02<03:48, 28.23it/s]

{'loss': 0.0097, 'grad_norm': 0.014676831662654877, 'learning_rate': 1.0604070912672358e-06, 'epoch': 15.76}


 80%|████████  | 24368/30460 [15:14<03:30, 28.89it/s]
 80%|████████  | 24368/30460 [15:17<03:30, 28.89it/s]

{'eval_loss': 0.11714282631874084, 'eval_f1': 0.8806694034600173, 'eval_precision': 0.8883714426484862, 'eval_recall': 0.8739751481574068, 'eval_runtime': 3.3347, 'eval_samples_per_second': 456.716, 'eval_steps_per_second': 114.254, 'epoch': 16.0}


 82%|████████▏ | 25004/30460 [15:41<03:11, 28.45it/s]

{'loss': 0.0089, 'grad_norm': 0.002502932446077466, 'learning_rate': 8.96257386736704e-07, 'epoch': 16.41}


 85%|████████▍ | 25889/30460 [16:11<02:33, 29.84it/s]
 85%|████████▌ | 25891/30460 [16:15<02:33, 29.84it/s]

{'eval_loss': 0.11894141882658005, 'eval_f1': 0.8817292765348507, 'eval_precision': 0.9026589572984723, 'eval_recall': 0.8676167331302613, 'eval_runtime': 3.6299, 'eval_samples_per_second': 419.572, 'eval_steps_per_second': 104.962, 'epoch': 17.0}


 85%|████████▌ | 26004/30460 [16:20<02:47, 26.56it/s]

{'loss': 0.0067, 'grad_norm': 0.0545472651720047, 'learning_rate': 7.32107682206172e-07, 'epoch': 17.07}


 89%|████████▊ | 27003/30460 [16:54<02:02, 28.20it/s]

{'loss': 0.0079, 'grad_norm': 0.11060985177755356, 'learning_rate': 5.679579776756403e-07, 'epoch': 17.73}


 90%|████████▉ | 27413/30460 [17:08<01:48, 28.19it/s]
 90%|█████████ | 27414/30460 [17:13<01:48, 28.19it/s]

{'eval_loss': 0.11849165707826614, 'eval_f1': 0.8778538019637915, 'eval_precision': 0.8831885040032155, 'eval_recall': 0.873060526634359, 'eval_runtime': 4.3287, 'eval_samples_per_second': 351.834, 'eval_steps_per_second': 88.016, 'epoch': 18.0}


 92%|█████████▏| 28003/30460 [17:34<01:27, 28.06it/s]

{'loss': 0.0065, 'grad_norm': 0.041777387261390686, 'learning_rate': 4.038082731451084e-07, 'epoch': 18.38}


 95%|█████████▍| 28936/30460 [18:06<00:54, 27.96it/s]
 95%|█████████▌| 28937/30460 [18:10<00:54, 27.96it/s]

{'eval_loss': 0.11888343095779419, 'eval_f1': 0.8838231230593722, 'eval_precision': 0.8972693222282142, 'eval_recall': 0.8731541109788518, 'eval_runtime': 3.798, 'eval_samples_per_second': 401.005, 'eval_steps_per_second': 100.317, 'epoch': 19.0}


 95%|█████████▌| 29003/30460 [18:13<00:50, 28.61it/s]

{'loss': 0.0078, 'grad_norm': 0.025529900565743446, 'learning_rate': 2.396585686145765e-07, 'epoch': 19.04}


 99%|█████████▊| 30004/30460 [18:48<00:16, 28.30it/s]

{'loss': 0.0064, 'grad_norm': 0.0038647835608571768, 'learning_rate': 7.550886408404465e-08, 'epoch': 19.7}


100%|█████████▉| 30457/30460 [19:04<00:00, 30.10it/s]
100%|██████████| 30460/30460 [19:08<00:00, 30.10it/s]

{'eval_loss': 0.12041997909545898, 'eval_f1': 0.8826798609838991, 'eval_precision': 0.8927855873064512, 'eval_recall': 0.8751468196077161, 'eval_runtime': 3.6235, 'eval_samples_per_second': 420.315, 'eval_steps_per_second': 105.148, 'epoch': 20.0}


100%|██████████| 30460/30460 [19:09<00:00, 26.49it/s]


{'train_runtime': 1149.9766, 'train_samples_per_second': 105.933, 'train_steps_per_second': 26.487, 'train_loss': 0.042819329765542744, 'epoch': 20.0}


100%|██████████| 381/381 [00:03<00:00, 114.80it/s]


Evaluation Metrics: {'eval_loss': 0.11888343095779419, 'eval_f1': 0.8838231230593722, 'eval_precision': 0.8972693222282142, 'eval_recall': 0.8731541109788518, 'eval_runtime': 3.3301, 'eval_samples_per_second': 457.344, 'eval_steps_per_second': 114.411, 'epoch': 20.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  java      summary   
13  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  java    Ownership   
14  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  java       Expand   
15  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  java        usage   
16  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  java      Pointer   
17  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  java  deprecation   
18  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  java     rational   

    precision    recall        f1  
12   0.944993  0.953058  0.949008  
13   0.964912  1.000000  0.982143  
14 

0,1
eval/f1,▁▂▅▆▇▇▇█▇████████████
eval/loss,█▄▂▂▁▁▂▂▄▃▄▄▅▅▅▅▅▅▅▅▅
eval/precision,▁▃█▇▇▇█▇▇█▇▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▁▄▅▆▇▇▇▇█▇▇▇████████
eval/runtime,▃▂▄█▃▃▃▂▁▆▇▅▃▂▂▂▄█▅▃▂
eval/samples_per_second,▆▇▅▁▅▅▅▇█▂▂▄▆▇▆▇▅▁▄▅▇
eval/steps_per_second,▆▇▅▁▅▅▅▇█▂▂▄▆▇▆▇▅▁▄▅▇
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▁▂▂▁▃▂▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▄▁▁▁▁▁▁▁▁

0,1
eval/f1,0.88382
eval/loss,0.11888
eval/precision,0.89727
eval/recall,0.87315
eval/runtime,3.3301
eval/samples_per_second,457.344
eval/steps_per_second,114.411
total_flos,4034654355655680.0
train/epoch,20.0
train/global_step,30460.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 15886.11 examples/s]
  5%|▌         | 377/7540 [00:13<04:03, 29.47it/s]
  5%|▌         | 377/7540 [00:13<04:03, 29.47it/s]

{'eval_loss': 0.468020498752594, 'eval_f1': 0.03582089552238806, 'eval_precision': 0.18461538461538463, 'eval_recall': 0.019834710743801654, 'eval_runtime': 0.8926, 'eval_samples_per_second': 422.352, 'eval_steps_per_second': 106.428, 'epoch': 1.0}


 10%|▉         | 753/7540 [00:27<03:56, 28.66it/s]
 10%|█         | 754/7540 [00:28<03:56, 28.66it/s]

{'eval_loss': 0.4154733121395111, 'eval_f1': 0.16374474053295934, 'eval_precision': 0.37063492063492065, 'eval_recall': 0.11716351829988195, 'eval_runtime': 0.8092, 'eval_samples_per_second': 465.911, 'eval_steps_per_second': 117.405, 'epoch': 2.0}


 13%|█▎        | 1004/7540 [00:38<03:51, 28.24it/s]

{'loss': 0.4612, 'grad_norm': 2.817878007888794, 'learning_rate': 4.3368700265252e-06, 'epoch': 2.65}


 15%|█▌        | 1131/7540 [00:42<03:39, 29.16it/s]
 15%|█▌        | 1131/7540 [00:43<03:39, 29.16it/s]

{'eval_loss': 0.36940687894821167, 'eval_f1': 0.3636448855803695, 'eval_precision': 0.5530910220565393, 'eval_recall': 0.2900523200336401, 'eval_runtime': 0.8776, 'eval_samples_per_second': 429.603, 'eval_steps_per_second': 108.255, 'epoch': 3.0}


 20%|█▉        | 1507/7540 [00:57<03:31, 28.58it/s]
 20%|██        | 1508/7540 [00:58<03:31, 28.58it/s]

{'eval_loss': 0.34388381242752075, 'eval_f1': 0.40707451877263195, 'eval_precision': 0.5333333333333333, 'eval_recall': 0.33527195096310913, 'eval_runtime': 0.8317, 'eval_samples_per_second': 453.279, 'eval_steps_per_second': 114.222, 'epoch': 4.0}


 25%|██▌       | 1885/7540 [01:13<03:23, 27.84it/s]
 25%|██▌       | 1885/7540 [01:14<03:23, 27.84it/s]

{'eval_loss': 0.32425934076309204, 'eval_f1': 0.4539674534171322, 'eval_precision': 0.5201514322203977, 'eval_recall': 0.40503307402435673, 'eval_runtime': 0.9827, 'eval_samples_per_second': 383.64, 'eval_steps_per_second': 96.673, 'epoch': 5.0}


 27%|██▋       | 2004/7540 [01:19<03:27, 26.71it/s]

{'loss': 0.3134, 'grad_norm': 5.547484397888184, 'learning_rate': 3.673740053050398e-06, 'epoch': 5.31}


 30%|██▉       | 2260/7540 [01:27<03:13, 27.26it/s]
 30%|███       | 2262/7540 [01:28<03:13, 27.26it/s]

{'eval_loss': 0.32117193937301636, 'eval_f1': 0.5117778026889825, 'eval_precision': 0.6530871663307067, 'eval_recall': 0.450068680079103, 'eval_runtime': 1.0134, 'eval_samples_per_second': 372.031, 'eval_steps_per_second': 93.748, 'epoch': 6.0}


 35%|███▌      | 2639/7540 [01:42<02:55, 28.00it/s]
 35%|███▌      | 2639/7540 [01:43<02:55, 28.00it/s]

{'eval_loss': 0.31100767850875854, 'eval_f1': 0.5254593755753703, 'eval_precision': 0.6381947622679889, 'eval_recall': 0.4688550183189849, 'eval_runtime': 0.826, 'eval_samples_per_second': 456.438, 'eval_steps_per_second': 115.017, 'epoch': 7.0}


 40%|███▉      | 3003/7540 [01:57<02:41, 28.12it/s]

{'loss': 0.2262, 'grad_norm': 3.9439947605133057, 'learning_rate': 3.0106100795755973e-06, 'epoch': 7.96}


 40%|███▉      | 3015/7540 [01:57<02:47, 27.04it/s]
 40%|████      | 3016/7540 [01:58<02:47, 27.04it/s]

{'eval_loss': 0.3050437271595001, 'eval_f1': 0.5446463477011422, 'eval_precision': 0.6370426065162907, 'eval_recall': 0.49271834997172037, 'eval_runtime': 1.0525, 'eval_samples_per_second': 358.19, 'eval_steps_per_second': 90.26, 'epoch': 8.0}


 45%|████▌     | 3393/7540 [02:12<02:23, 28.90it/s]
 45%|████▌     | 3393/7540 [02:13<02:23, 28.90it/s]

{'eval_loss': 0.3180551826953888, 'eval_f1': 0.5492578483443686, 'eval_precision': 0.6308444062797085, 'eval_recall': 0.508508644648257, 'eval_runtime': 0.8759, 'eval_samples_per_second': 430.42, 'eval_steps_per_second': 108.461, 'epoch': 9.0}


 50%|████▉     | 3769/7540 [02:27<02:12, 28.56it/s]
 50%|█████     | 3770/7540 [02:28<02:12, 28.56it/s]

{'eval_loss': 0.3118119239807129, 'eval_f1': 0.5719513680838089, 'eval_precision': 0.8202639026812314, 'eval_recall': 0.5162416014057956, 'eval_runtime': 0.8903, 'eval_samples_per_second': 423.469, 'eval_steps_per_second': 106.71, 'epoch': 10.0}


 53%|█████▎    | 4005/7540 [02:37<02:07, 27.68it/s]

{'loss': 0.1716, 'grad_norm': 5.788593292236328, 'learning_rate': 2.347480106100796e-06, 'epoch': 10.61}


 55%|█████▍    | 4144/7540 [02:42<01:51, 30.34it/s]
 55%|█████▌    | 4147/7540 [02:43<01:51, 30.34it/s]

{'eval_loss': 0.32673919200897217, 'eval_f1': 0.5750002869817554, 'eval_precision': 0.7574407015522843, 'eval_recall': 0.5338774010925995, 'eval_runtime': 0.8408, 'eval_samples_per_second': 448.362, 'eval_steps_per_second': 112.982, 'epoch': 11.0}


 60%|█████▉    | 4522/7540 [02:56<01:37, 30.83it/s]
 60%|██████    | 4524/7540 [02:57<01:37, 30.83it/s]

{'eval_loss': 0.3207625150680542, 'eval_f1': 0.5885524821314854, 'eval_precision': 0.7538011695906432, 'eval_recall': 0.527654853493827, 'eval_runtime': 0.8572, 'eval_samples_per_second': 439.792, 'eval_steps_per_second': 110.823, 'epoch': 12.0}


 65%|██████▍   | 4900/7540 [03:11<01:31, 28.95it/s]
 65%|██████▌   | 4901/7540 [03:12<01:31, 28.95it/s]

{'eval_loss': 0.33450713753700256, 'eval_f1': 0.590273315497307, 'eval_precision': 0.7259057853910795, 'eval_recall': 0.5468731088879716, 'eval_runtime': 0.9229, 'eval_samples_per_second': 408.473, 'eval_steps_per_second': 102.931, 'epoch': 13.0}


 66%|██████▋   | 5004/7540 [03:17<01:34, 26.91it/s]

{'loss': 0.1295, 'grad_norm': 22.92491340637207, 'learning_rate': 1.6843501326259947e-06, 'epoch': 13.26}


 70%|██████▉   | 5276/7540 [03:26<01:19, 28.65it/s]
 70%|███████   | 5278/7540 [03:27<01:18, 28.65it/s]

{'eval_loss': 0.3335685431957245, 'eval_f1': 0.6088286394449881, 'eval_precision': 0.7382197560598682, 'eval_recall': 0.5688466663103894, 'eval_runtime': 0.9318, 'eval_samples_per_second': 404.585, 'eval_steps_per_second': 101.951, 'epoch': 14.0}


 75%|███████▍  | 5652/7540 [03:41<01:07, 27.92it/s]
 75%|███████▌  | 5655/7540 [03:42<01:07, 27.92it/s]

{'eval_loss': 0.33375677466392517, 'eval_f1': 0.6035744933532545, 'eval_precision': 0.7320014210058668, 'eval_recall': 0.552877212171841, 'eval_runtime': 1.0139, 'eval_samples_per_second': 371.826, 'eval_steps_per_second': 93.696, 'epoch': 15.0}


 80%|███████▉  | 6002/7540 [03:55<00:56, 27.15it/s]

{'loss': 0.1058, 'grad_norm': 1.2441747188568115, 'learning_rate': 1.0212201591511937e-06, 'epoch': 15.92}


 80%|████████  | 6032/7540 [03:56<00:55, 26.95it/s]
 80%|████████  | 6032/7540 [03:57<00:55, 26.95it/s]

{'eval_loss': 0.3369327485561371, 'eval_f1': 0.6169972278899278, 'eval_precision': 0.7387890850589885, 'eval_recall': 0.5675510021589939, 'eval_runtime': 0.9749, 'eval_samples_per_second': 386.695, 'eval_steps_per_second': 97.443, 'epoch': 16.0}


 85%|████████▌ | 6409/7540 [04:11<00:39, 28.44it/s]
 85%|████████▌ | 6409/7540 [04:12<00:39, 28.44it/s]

{'eval_loss': 0.3398837745189667, 'eval_f1': 0.6220518740080356, 'eval_precision': 0.7466560859348299, 'eval_recall': 0.5722044198784954, 'eval_runtime': 1.0111, 'eval_samples_per_second': 372.862, 'eval_steps_per_second': 93.957, 'epoch': 17.0}


 90%|█████████ | 6786/7540 [04:26<00:26, 28.44it/s]
 90%|█████████ | 6786/7540 [04:27<00:26, 28.44it/s]

{'eval_loss': 0.33592650294303894, 'eval_f1': 0.6404550811485152, 'eval_precision': 0.7682636707098827, 'eval_recall': 0.5869138745186447, 'eval_runtime': 0.8698, 'eval_samples_per_second': 433.452, 'eval_steps_per_second': 109.225, 'epoch': 18.0}


 93%|█████████▎| 7004/7540 [04:36<00:19, 27.21it/s]

{'loss': 0.0859, 'grad_norm': 7.278031349182129, 'learning_rate': 3.5809018567639264e-07, 'epoch': 18.57}


 95%|█████████▍| 7161/7540 [04:41<00:12, 30.42it/s]
 95%|█████████▌| 7163/7540 [04:42<00:12, 30.42it/s]

{'eval_loss': 0.3409707546234131, 'eval_f1': 0.6326539896145689, 'eval_precision': 0.7515532354968975, 'eval_recall': 0.5841846610173265, 'eval_runtime': 0.8674, 'eval_samples_per_second': 434.635, 'eval_steps_per_second': 109.523, 'epoch': 19.0}


100%|█████████▉| 7537/7540 [04:55<00:00, 30.88it/s]
100%|██████████| 7540/7540 [04:57<00:00, 30.88it/s]

{'eval_loss': 0.33929094672203064, 'eval_f1': 0.650923853934118, 'eval_precision': 0.7710220147542453, 'eval_recall': 0.5980026223888912, 'eval_runtime': 0.8234, 'eval_samples_per_second': 457.869, 'eval_steps_per_second': 115.378, 'epoch': 20.0}


100%|██████████| 7540/7540 [04:58<00:00, 25.25it/s]


{'train_runtime': 298.6411, 'train_samples_per_second': 100.924, 'train_steps_per_second': 25.248, 'train_loss': 0.20409511545924988, 'epoch': 20.0}


100%|██████████| 95/95 [00:01<00:00, 94.65it/s] 


Evaluation Metrics: {'eval_loss': 0.33929094672203064, 'eval_f1': 0.650923853934118, 'eval_precision': 0.7710220147542453, 'eval_recall': 0.5980026223888912, 'eval_runtime': 1.0144, 'eval_samples_per_second': 371.643, 'eval_steps_per_second': 93.65, 'epoch': 20.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  python   
15  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  python   
16  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  python   
17  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  python   
18  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
14             Usage   0.848485  0.694215  0.763636  
15        Parameters   0.851852  0.821429  0.836364  
16  DevelopmentNotes   0.727273  0.200000  0.313725  
17            Expand   0.660377  0.507246  0.573770  
18           Summary   0.767123  0.767123  0.767123  
Scores: 

0,1
eval/f1,▁▂▅▅▆▆▇▇▇▇▇▇▇█▇██████
eval/loss,█▆▄▃▂▂▁▁▂▁▂▂▂▂▂▂▂▂▃▂▂
eval/precision,▁▃▅▅▅▆▆▆▆█▇▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▂▄▅▆▆▆▇▇▇▇▇▇█▇██████
eval/runtime,▃▁▃▂▆▇▁█▃▃▂▂▄▅▇▆▇▃▃▁▇
eval/samples_per_second,▅█▆▇▃▂▇▁▆▅▇▆▄▄▂▃▂▆▆▇▂
eval/steps_per_second,▅█▆▇▃▂▇▁▆▅▇▆▄▄▂▃▂▆▆▇▂
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████
train/grad_norm,▂▂▂▂█▁▃

0,1
eval/f1,0.65092
eval/loss,0.33929
eval/precision,0.77102
eval/recall,0.598
eval/runtime,1.0144
eval/samples_per_second,371.643
eval/steps_per_second,93.65
total_flos,998195250201600.0
train/epoch,20.0
train/global_step,7540.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14686.13 examples/s]
  5%|▍         | 259/5200 [00:09<02:51, 28.87it/s]
  5%|▌         | 260/5200 [00:09<02:51, 28.87it/s]

{'eval_loss': 0.40379711985588074, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.6044, 'eval_samples_per_second': 430.161, 'eval_steps_per_second': 107.54, 'epoch': 1.0}


 10%|▉         | 518/5200 [00:19<02:41, 29.03it/s]
 10%|█         | 520/5200 [00:20<02:41, 29.03it/s]

{'eval_loss': 0.34325435757637024, 'eval_f1': 0.11330049261083744, 'eval_precision': 0.1350293542074364, 'eval_recall': 0.0975954738330976, 'eval_runtime': 0.5926, 'eval_samples_per_second': 438.739, 'eval_steps_per_second': 109.685, 'epoch': 2.0}


 15%|█▍        | 779/5200 [00:30<02:45, 26.71it/s]
 15%|█▌        | 780/5200 [00:31<02:45, 26.71it/s]

{'eval_loss': 0.3011396527290344, 'eval_f1': 0.14902210884353742, 'eval_precision': 0.24934589220303507, 'eval_recall': 0.13168316831683166, 'eval_runtime': 0.644, 'eval_samples_per_second': 403.732, 'eval_steps_per_second': 100.933, 'epoch': 3.0}


 19%|█▉        | 1003/5200 [00:39<02:26, 28.71it/s]

{'loss': 0.3568, 'grad_norm': 1.268946886062622, 'learning_rate': 4.0384615384615385e-06, 'epoch': 3.85}


 20%|█▉        | 1039/5200 [00:40<02:22, 29.22it/s]
 20%|██        | 1040/5200 [00:41<02:22, 29.22it/s]

{'eval_loss': 0.27474117279052734, 'eval_f1': 0.28410393732974376, 'eval_precision': 0.5408163265306122, 'eval_recall': 0.2133331915361246, 'eval_runtime': 0.6487, 'eval_samples_per_second': 400.811, 'eval_steps_per_second': 100.203, 'epoch': 4.0}


 25%|██▍       | 1298/5200 [00:51<02:22, 27.34it/s]
 25%|██▌       | 1300/5200 [00:52<02:22, 27.34it/s]

{'eval_loss': 0.2595542371273041, 'eval_f1': 0.38361609152350656, 'eval_precision': 0.6451465201465201, 'eval_recall': 0.2849440342256224, 'eval_runtime': 0.809, 'eval_samples_per_second': 321.398, 'eval_steps_per_second': 80.349, 'epoch': 5.0}


 30%|██▉       | 1559/5200 [01:02<02:08, 28.42it/s]
 30%|███       | 1560/5200 [01:02<02:08, 28.42it/s]

{'eval_loss': 0.2419772446155548, 'eval_f1': 0.4277154906259312, 'eval_precision': 0.6454577612409601, 'eval_recall': 0.3378637647113661, 'eval_runtime': 0.6416, 'eval_samples_per_second': 405.229, 'eval_steps_per_second': 101.307, 'epoch': 6.0}


 35%|███▍      | 1818/5200 [01:12<01:56, 29.02it/s]
 35%|███▌      | 1820/5200 [01:13<01:56, 29.02it/s]

{'eval_loss': 0.23475803434848785, 'eval_f1': 0.45606781759405735, 'eval_precision': 0.6398556998556998, 'eval_recall': 0.3686224740580875, 'eval_runtime': 0.5497, 'eval_samples_per_second': 472.944, 'eval_steps_per_second': 118.236, 'epoch': 7.0}


 39%|███▊      | 2004/5200 [01:21<01:53, 28.16it/s]

{'loss': 0.2034, 'grad_norm': 0.590850830078125, 'learning_rate': 3.0769230769230774e-06, 'epoch': 7.69}


 40%|███▉      | 2078/5200 [01:23<01:53, 27.52it/s]
 40%|████      | 2080/5200 [01:24<01:53, 27.52it/s]

{'eval_loss': 0.2332942932844162, 'eval_f1': 0.49492158327109786, 'eval_precision': 0.6345164152617568, 'eval_recall': 0.4193743771387903, 'eval_runtime': 0.7633, 'eval_samples_per_second': 340.645, 'eval_steps_per_second': 85.161, 'epoch': 8.0}


 45%|████▍     | 2338/5200 [01:34<01:47, 26.66it/s]
 45%|████▌     | 2340/5200 [01:35<01:47, 26.66it/s]

{'eval_loss': 0.22548142075538635, 'eval_f1': 0.4989483668169448, 'eval_precision': 0.6237699498569064, 'eval_recall': 0.4245794373984557, 'eval_runtime': 0.7168, 'eval_samples_per_second': 362.706, 'eval_steps_per_second': 90.676, 'epoch': 9.0}


 50%|████▉     | 2598/5200 [01:45<01:35, 27.29it/s]
 50%|█████     | 2600/5200 [01:46<01:35, 27.29it/s]

{'eval_loss': 0.22870981693267822, 'eval_f1': 0.4998935454399546, 'eval_precision': 0.6240111429187059, 'eval_recall': 0.4270784910152235, 'eval_runtime': 0.7776, 'eval_samples_per_second': 334.342, 'eval_steps_per_second': 83.586, 'epoch': 10.0}


 55%|█████▍    | 2857/5200 [01:55<01:18, 29.77it/s]
 55%|█████▌    | 2860/5200 [01:56<01:18, 29.77it/s]

{'eval_loss': 0.21644631028175354, 'eval_f1': 0.5126419238469953, 'eval_precision': 0.6023227898227898, 'eval_recall': 0.45109019350868973, 'eval_runtime': 0.6234, 'eval_samples_per_second': 417.085, 'eval_steps_per_second': 104.271, 'epoch': 11.0}


 58%|█████▊    | 3002/5200 [02:02<01:14, 29.40it/s]

{'loss': 0.1358, 'grad_norm': 0.4065839946269989, 'learning_rate': 2.1153846153846155e-06, 'epoch': 11.54}


 60%|█████▉    | 3117/5200 [02:06<01:08, 30.40it/s]
 60%|██████    | 3120/5200 [02:06<01:08, 30.40it/s]

{'eval_loss': 0.22263005375862122, 'eval_f1': 0.511118572097274, 'eval_precision': 0.6093410068742137, 'eval_recall': 0.4467553940073031, 'eval_runtime': 0.5686, 'eval_samples_per_second': 457.276, 'eval_steps_per_second': 114.319, 'epoch': 12.0}


 65%|██████▍   | 3378/5200 [02:16<01:04, 28.21it/s]
 65%|██████▌   | 3380/5200 [02:17<01:04, 28.21it/s]

{'eval_loss': 0.2198844701051712, 'eval_f1': 0.5357873421086766, 'eval_precision': 0.7466335192820761, 'eval_recall': 0.46600618054812315, 'eval_runtime': 0.7483, 'eval_samples_per_second': 347.433, 'eval_steps_per_second': 86.858, 'epoch': 13.0}


 70%|███████   | 3640/5200 [02:27<00:55, 27.86it/s]
 70%|███████   | 3640/5200 [02:27<00:55, 27.86it/s]

{'eval_loss': 0.21824708580970764, 'eval_f1': 0.5605730503431812, 'eval_precision': 0.8908185642678124, 'eval_recall': 0.4809323023167778, 'eval_runtime': 0.6398, 'eval_samples_per_second': 406.385, 'eval_steps_per_second': 101.596, 'epoch': 14.0}


 75%|███████▍  | 3897/5200 [02:37<00:42, 30.72it/s]
 75%|███████▌  | 3900/5200 [02:38<00:42, 30.72it/s]

{'eval_loss': 0.21852520108222961, 'eval_f1': 0.5297198227528981, 'eval_precision': 0.752140055106711, 'eval_recall': 0.45550439567575834, 'eval_runtime': 0.646, 'eval_samples_per_second': 402.469, 'eval_steps_per_second': 100.617, 'epoch': 15.0}


 77%|███████▋  | 4005/5200 [02:42<00:42, 28.41it/s]

{'loss': 0.1037, 'grad_norm': 0.8445443511009216, 'learning_rate': 1.153846153846154e-06, 'epoch': 15.38}


 80%|███████▉  | 4159/5200 [02:47<00:34, 29.96it/s]
 80%|████████  | 4160/5200 [02:48<00:34, 29.96it/s]

{'eval_loss': 0.2178392857313156, 'eval_f1': 0.5772830132113785, 'eval_precision': 0.8890607059520723, 'eval_recall': 0.49411932054096236, 'eval_runtime': 0.6849, 'eval_samples_per_second': 379.608, 'eval_steps_per_second': 94.902, 'epoch': 16.0}


 85%|████████▍ | 4418/5200 [02:58<00:26, 29.34it/s]
 85%|████████▌ | 4420/5200 [02:59<00:26, 29.34it/s]

{'eval_loss': 0.22069740295410156, 'eval_f1': 0.571155596155369, 'eval_precision': 0.8868917424512076, 'eval_recall': 0.48590598120990874, 'eval_runtime': 0.5769, 'eval_samples_per_second': 450.704, 'eval_steps_per_second': 112.676, 'epoch': 17.0}


 90%|█████████ | 4680/5200 [03:09<00:19, 27.17it/s]
 90%|█████████ | 4680/5200 [03:10<00:19, 27.17it/s]

{'eval_loss': 0.21756716072559357, 'eval_f1': 0.5931211455824946, 'eval_precision': 0.899172971675657, 'eval_recall': 0.5121606756968862, 'eval_runtime': 0.6166, 'eval_samples_per_second': 421.7, 'eval_steps_per_second': 105.425, 'epoch': 18.0}


 95%|█████████▌| 4940/5200 [03:19<00:09, 28.83it/s]
 95%|█████████▌| 4940/5200 [03:20<00:09, 28.83it/s]

{'eval_loss': 0.21909017860889435, 'eval_f1': 0.5845813660343117, 'eval_precision': 0.8891061919273203, 'eval_recall': 0.5041334648125323, 'eval_runtime': 0.6403, 'eval_samples_per_second': 406.09, 'eval_steps_per_second': 101.523, 'epoch': 19.0}


 96%|█████████▋| 5005/5200 [03:23<00:06, 30.17it/s]

{'loss': 0.0859, 'grad_norm': 0.40932366251945496, 'learning_rate': 1.9230769230769234e-07, 'epoch': 19.23}


100%|█████████▉| 5197/5200 [03:30<00:00, 30.45it/s]
100%|██████████| 5200/5200 [03:31<00:00, 30.45it/s]

{'eval_loss': 0.21954675018787384, 'eval_f1': 0.5783480031239302, 'eval_precision': 0.8907801948618275, 'eval_recall': 0.49501781855402893, 'eval_runtime': 0.7066, 'eval_samples_per_second': 367.946, 'eval_steps_per_second': 91.987, 'epoch': 20.0}


100%|██████████| 5200/5200 [03:32<00:00, 24.44it/s]


{'train_runtime': 212.7736, 'train_samples_per_second': 97.569, 'train_steps_per_second': 24.439, 'train_loss': 0.17354190973135142, 'epoch': 20.0}


100%|██████████| 65/65 [00:00<00:00, 101.12it/s]


Evaluation Metrics: {'eval_loss': 0.21756716072559357, 'eval_f1': 0.5931211455824946, 'eval_precision': 0.899172971675657, 'eval_recall': 0.5121606756968862, 'eval_runtime': 0.6551, 'eval_samples_per_second': 396.86, 'eval_steps_per_second': 99.215, 'epoch': 20.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
13  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
14  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
15  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
16  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
17  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   
18  lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.923077  0.571429  0.705882  
13                  Example   0.918367  0.891089  0.904523  
14         Responsibilities

0,1
eval/f1,▁▂▃▄▆▆▆▇▇▇▇▇▇█▇██████
eval/loss,█▆▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/precision,▁▂▃▅▆▆▆▆▆▆▆▆▇█▇██████
eval/recall,▁▂▃▄▅▆▆▇▇▇▇▇▇█▇██████
eval/runtime,▂▂▄▄█▃▁▇▆▇▃▂▆▃▄▅▂▃▃▅▄
eval/samples_per_second,▆▆▅▅▁▅█▂▃▂▅▇▂▅▅▄▇▆▅▃▄
eval/steps_per_second,▆▆▅▅▁▅█▂▃▂▅▇▂▅▅▄▇▆▅▃▄
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇█████
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇█████
train/grad_norm,█▂▁▅▁

0,1
eval/f1,0.59312
eval/loss,0.21757
eval/precision,0.89917
eval/recall,0.51216
eval/runtime,0.6551
eval/samples_per_second,396.86
eval/steps_per_second,99.215
total_flos,687567102474240.0
train/epoch,20.0
train/global_step,5200.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20645.73 examples/s]
  5%|▌         | 762/15240 [00:31<09:28, 25.47it/s]
  5%|▌         | 762/15240 [00:33<09:28, 25.47it/s]

{'eval_loss': 0.09841886907815933, 'eval_f1': 0.6670160591377915, 'eval_precision': 0.8189670316927435, 'eval_recall': 0.6554233543253407, 'eval_runtime': 2.1364, 'eval_samples_per_second': 712.874, 'eval_steps_per_second': 89.402, 'epoch': 1.0}


  7%|▋         | 1002/15240 [00:43<09:32, 24.85it/s] 

{'loss': 0.1302, 'grad_norm': 0.744046688079834, 'learning_rate': 4.671916010498688e-05, 'epoch': 1.31}


 10%|█         | 1524/15240 [01:05<09:05, 25.16it/s]
 10%|█         | 1524/15240 [01:07<09:05, 25.16it/s]

{'eval_loss': 0.07624907791614532, 'eval_f1': 0.8106299259960282, 'eval_precision': 0.8774177406444971, 'eval_recall': 0.7716063172267623, 'eval_runtime': 2.1237, 'eval_samples_per_second': 717.142, 'eval_steps_per_second': 89.937, 'epoch': 2.0}


 13%|█▎        | 2004/15240 [01:27<09:17, 23.75it/s]  

{'loss': 0.0638, 'grad_norm': 0.03232757747173309, 'learning_rate': 4.343832020997376e-05, 'epoch': 2.62}


 15%|█▌        | 2286/15240 [01:39<08:36, 25.06it/s]
 15%|█▌        | 2286/15240 [01:41<08:36, 25.06it/s]

{'eval_loss': 0.07747872918844223, 'eval_f1': 0.8586427300634268, 'eval_precision': 0.8899558203996334, 'eval_recall': 0.8317553358435615, 'eval_runtime': 2.1914, 'eval_samples_per_second': 695.004, 'eval_steps_per_second': 87.161, 'epoch': 3.0}


 20%|█▉        | 3003/15240 [02:11<08:18, 24.54it/s]  

{'loss': 0.0358, 'grad_norm': 0.6835840940475464, 'learning_rate': 4.015748031496063e-05, 'epoch': 3.94}


 20%|██        | 3048/15240 [02:13<08:18, 24.45it/s]
 20%|██        | 3048/15240 [02:15<08:18, 24.45it/s]

{'eval_loss': 0.09683237224817276, 'eval_f1': 0.8625749772608845, 'eval_precision': 0.8958147762714811, 'eval_recall': 0.8373729527682184, 'eval_runtime': 2.1395, 'eval_samples_per_second': 711.846, 'eval_steps_per_second': 89.273, 'epoch': 4.0}


 25%|██▌       | 3810/15240 [02:47<07:33, 25.18it/s]  
 25%|██▌       | 3810/15240 [02:49<07:33, 25.18it/s]

{'eval_loss': 0.11687274277210236, 'eval_f1': 0.842976539842838, 'eval_precision': 0.8425713704260539, 'eval_recall': 0.8473365327604506, 'eval_runtime': 2.0914, 'eval_samples_per_second': 728.21, 'eval_steps_per_second': 91.325, 'epoch': 5.0}


 26%|██▋       | 4002/15240 [02:58<07:33, 24.79it/s]  

{'loss': 0.0203, 'grad_norm': 0.08141324669122696, 'learning_rate': 3.6876640419947505e-05, 'epoch': 5.25}


 30%|███       | 4572/15240 [03:21<07:20, 24.21it/s]
 30%|███       | 4572/15240 [03:23<07:20, 24.21it/s]

{'eval_loss': 0.12277287989854813, 'eval_f1': 0.8388188160671383, 'eval_precision': 0.8681596768538163, 'eval_recall': 0.8170649935471197, 'eval_runtime': 2.1526, 'eval_samples_per_second': 707.53, 'eval_steps_per_second': 88.732, 'epoch': 6.0}


 33%|███▎      | 5004/15240 [03:41<07:01, 24.31it/s]  

{'loss': 0.0123, 'grad_norm': 0.012271490879356861, 'learning_rate': 3.3595800524934386e-05, 'epoch': 6.56}


 35%|███▌      | 5334/15240 [03:55<06:42, 24.60it/s]
 35%|███▌      | 5334/15240 [03:57<06:42, 24.60it/s]

{'eval_loss': 0.13711780309677124, 'eval_f1': 0.8418873787171132, 'eval_precision': 0.8354005499198038, 'eval_recall': 0.8541219752034515, 'eval_runtime': 2.1526, 'eval_samples_per_second': 707.527, 'eval_steps_per_second': 88.731, 'epoch': 7.0}


 39%|███▉      | 6003/15240 [04:25<06:15, 24.59it/s]

{'loss': 0.0096, 'grad_norm': 0.013406051322817802, 'learning_rate': 3.0314960629921263e-05, 'epoch': 7.87}


 40%|████      | 6096/15240 [04:29<05:57, 25.61it/s]
 40%|████      | 6096/15240 [04:31<05:57, 25.61it/s]

{'eval_loss': 0.13386206328868866, 'eval_f1': 0.845073382708834, 'eval_precision': 0.849362808783991, 'eval_recall': 0.8451524011587852, 'eval_runtime': 2.1411, 'eval_samples_per_second': 711.32, 'eval_steps_per_second': 89.207, 'epoch': 8.0}


 45%|████▌     | 6858/15240 [05:03<05:29, 25.47it/s]
 45%|████▌     | 6858/15240 [05:05<05:29, 25.47it/s]

{'eval_loss': 0.1397489458322525, 'eval_f1': 0.8584570132465273, 'eval_precision': 0.8736419015308502, 'eval_recall': 0.84593469248393, 'eval_runtime': 2.1003, 'eval_samples_per_second': 725.144, 'eval_steps_per_second': 90.941, 'epoch': 9.0}


 46%|████▌     | 7002/15240 [05:11<05:42, 24.02it/s]

{'loss': 0.007, 'grad_norm': 0.047936394810676575, 'learning_rate': 2.7034120734908137e-05, 'epoch': 9.19}


 50%|█████     | 7620/15240 [05:37<05:04, 25.03it/s]
 50%|█████     | 7620/15240 [05:39<05:04, 25.03it/s]

{'eval_loss': 0.1415857970714569, 'eval_f1': 0.8623742543009482, 'eval_precision': 0.8790217208036128, 'eval_recall': 0.8474520977088138, 'eval_runtime': 2.109, 'eval_samples_per_second': 722.143, 'eval_steps_per_second': 90.564, 'epoch': 10.0}


 53%|█████▎    | 8004/15240 [05:55<04:53, 24.62it/s]

{'loss': 0.0054, 'grad_norm': 0.12736649811267853, 'learning_rate': 2.3753280839895015e-05, 'epoch': 10.5}


 55%|█████▌    | 8382/15240 [06:10<04:25, 25.83it/s]
 55%|█████▌    | 8382/15240 [06:12<04:25, 25.83it/s]

{'eval_loss': 0.1493144929409027, 'eval_f1': 0.8534158309688264, 'eval_precision': 0.8550728447171637, 'eval_recall': 0.8523549088824168, 'eval_runtime': 2.1816, 'eval_samples_per_second': 698.101, 'eval_steps_per_second': 87.549, 'epoch': 11.0}


 59%|█████▉    | 9003/15240 [06:39<04:19, 24.04it/s]

{'loss': 0.003, 'grad_norm': 0.0010294559178873897, 'learning_rate': 2.0472440944881892e-05, 'epoch': 11.81}


 60%|██████    | 9144/15240 [06:44<04:03, 25.01it/s]
 60%|██████    | 9144/15240 [06:46<04:03, 25.01it/s]

{'eval_loss': 0.15224218368530273, 'eval_f1': 0.8576035918124371, 'eval_precision': 0.8764039500801317, 'eval_recall': 0.842175998872728, 'eval_runtime': 2.0959, 'eval_samples_per_second': 726.655, 'eval_steps_per_second': 91.13, 'epoch': 12.0}


 65%|██████▌   | 9906/15240 [07:18<03:28, 25.57it/s]
 65%|██████▌   | 9906/15240 [07:20<03:28, 25.57it/s]

{'eval_loss': 0.16379684209823608, 'eval_f1': 0.8459130947944827, 'eval_precision': 0.8462858772214678, 'eval_recall': 0.84781340412554, 'eval_runtime': 2.1281, 'eval_samples_per_second': 715.678, 'eval_steps_per_second': 89.753, 'epoch': 13.0}


 66%|██████▌   | 10002/15240 [07:25<03:30, 24.92it/s]

{'loss': 0.0023, 'grad_norm': 0.022600263357162476, 'learning_rate': 1.7191601049868766e-05, 'epoch': 13.12}


 70%|███████   | 10668/15240 [07:52<03:03, 24.86it/s]
 70%|███████   | 10668/15240 [07:54<03:03, 24.86it/s]

{'eval_loss': 0.16004855930805206, 'eval_f1': 0.8588389258475957, 'eval_precision': 0.8818280733349818, 'eval_recall': 0.8411489215500524, 'eval_runtime': 2.1845, 'eval_samples_per_second': 697.189, 'eval_steps_per_second': 87.435, 'epoch': 14.0}


 72%|███████▏  | 11004/15240 [08:09<02:56, 23.98it/s]

{'loss': 0.0012, 'grad_norm': 0.0007254068623296916, 'learning_rate': 1.3910761154855645e-05, 'epoch': 14.44}


 75%|███████▌  | 11430/15240 [08:26<02:29, 25.40it/s]
 75%|███████▌  | 11430/15240 [08:28<02:29, 25.40it/s]

{'eval_loss': 0.16115707159042358, 'eval_f1': 0.8478207686917408, 'eval_precision': 0.8586330444029739, 'eval_recall': 0.8403490774200156, 'eval_runtime': 2.1, 'eval_samples_per_second': 725.245, 'eval_steps_per_second': 90.953, 'epoch': 15.0}


 79%|███████▉  | 12003/15240 [08:52<02:10, 24.78it/s]

{'loss': 0.0015, 'grad_norm': 0.008853329345583916, 'learning_rate': 1.062992125984252e-05, 'epoch': 15.75}


 80%|████████  | 12192/15240 [09:00<01:59, 25.58it/s]
 80%|████████  | 12192/15240 [09:02<01:59, 25.58it/s]

{'eval_loss': 0.17491105198860168, 'eval_f1': 0.8506850839928431, 'eval_precision': 0.8786470084394516, 'eval_recall': 0.8281755175665142, 'eval_runtime': 2.0711, 'eval_samples_per_second': 735.347, 'eval_steps_per_second': 92.22, 'epoch': 16.0}


 85%|████████▌ | 12954/15240 [09:34<01:31, 24.95it/s]
 85%|████████▌ | 12954/15240 [09:36<01:31, 24.95it/s]

{'eval_loss': 0.16947688162326813, 'eval_f1': 0.8502379267838595, 'eval_precision': 0.8633338675953578, 'eval_recall': 0.839532501588537, 'eval_runtime': 2.2023, 'eval_samples_per_second': 691.545, 'eval_steps_per_second': 86.727, 'epoch': 17.0}


 85%|████████▌ | 13002/15240 [09:39<01:34, 23.79it/s]

{'loss': 0.0009, 'grad_norm': 0.000994542962871492, 'learning_rate': 7.349081364829396e-06, 'epoch': 17.06}


 90%|█████████ | 13716/15240 [10:08<00:59, 25.47it/s]
 90%|█████████ | 13716/15240 [10:10<00:59, 25.47it/s]

{'eval_loss': 0.1746087223291397, 'eval_f1': 0.8525526199768627, 'eval_precision': 0.8648813290745737, 'eval_recall': 0.8423721088548117, 'eval_runtime': 2.1765, 'eval_samples_per_second': 699.75, 'eval_steps_per_second': 87.756, 'epoch': 18.0}


 92%|█████████▏| 14004/15240 [10:22<00:50, 24.33it/s]

{'loss': 0.0005, 'grad_norm': 0.00024446132010780275, 'learning_rate': 4.068241469816273e-06, 'epoch': 18.37}


 95%|█████████▌| 14478/15240 [10:41<00:29, 25.61it/s]
 95%|█████████▌| 14478/15240 [10:43<00:29, 25.61it/s]

{'eval_loss': 0.17458797991275787, 'eval_f1': 0.8545820186143966, 'eval_precision': 0.8662486324835154, 'eval_recall': 0.8445285269776165, 'eval_runtime': 2.0807, 'eval_samples_per_second': 731.983, 'eval_steps_per_second': 91.798, 'epoch': 19.0}


 98%|█████████▊| 15003/15240 [11:06<00:09, 25.02it/s]

{'loss': 0.0006, 'grad_norm': 0.0011768144322559237, 'learning_rate': 7.874015748031496e-07, 'epoch': 19.69}


100%|██████████| 15240/15240 [11:15<00:00, 25.24it/s]
100%|██████████| 15240/15240 [11:18<00:00, 25.24it/s]

{'eval_loss': 0.1742468774318695, 'eval_f1': 0.8533964289993012, 'eval_precision': 0.8669954173329403, 'eval_recall': 0.8417809748675277, 'eval_runtime': 2.2018, 'eval_samples_per_second': 691.717, 'eval_steps_per_second': 86.748, 'epoch': 20.0}


100%|██████████| 15240/15240 [11:19<00:00, 22.42it/s]


{'train_runtime': 679.9009, 'train_samples_per_second': 179.173, 'train_steps_per_second': 22.415, 'train_loss': 0.019319367009334832, 'epoch': 20.0}


100%|██████████| 191/191 [00:02<00:00, 92.95it/s]


Evaluation Metrics: {'eval_loss': 0.09683237224817276, 'eval_f1': 0.8625749772608845, 'eval_precision': 0.8958147762714811, 'eval_recall': 0.8373729527682184, 'eval_runtime': 2.0687, 'eval_samples_per_second': 736.202, 'eval_steps_per_second': 92.327, 'epoch': 20.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  java      summary   
13  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  java    Ownership   
14  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  java       Expand   
15  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  java        usage   
16  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  java      Pointer   
17  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  java  deprecation   
18  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  java     rational   

    precision    recall        f1  
12   0.939437  0.948791  0.944091  
13   0.964912  1.000000  0.982143  
14  

0,1
eval/f1,▁▆██▇▇▇▇████▇█▇██████
eval/loss,▃▁▁▂▄▄▅▅▆▆▆▆▇▇▇█████▂
eval/precision,▁▆▇█▃▅▂▄▆▆▄▆▃▇▅▆▅▅▅▅█
eval/recall,▁▅▇▇█▇█████████▇▇███▇
eval/runtime,▅▄▇▅▂▅▅▅▃▃▇▂▄▇▃▁█▇▂█▁
eval/samples_per_second,▄▅▂▄▇▄▄▄▆▆▂▇▅▂▆█▁▂▇▁█
eval/steps_per_second,▄▅▂▄▇▄▄▄▆▆▂▇▅▂▆█▁▂▇▁█
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇█████
train/grad_norm,█▁▇▂▁▁▁▂▁▁▁▁▁▁▁

0,1
eval/f1,0.86257
eval/loss,0.09683
eval/precision,0.89581
eval/recall,0.83737
eval/runtime,2.0687
eval/samples_per_second,736.202
eval/steps_per_second,92.327
total_flos,4034654355655680.0
train/epoch,20.0
train/global_step,15240.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 15595.69 examples/s]
  5%|▍         | 187/3780 [00:07<02:23, 25.07it/s]
  5%|▌         | 189/3780 [00:08<02:23, 25.07it/s]

{'eval_loss': 0.3606899678707123, 'eval_f1': 0.29780131600064197, 'eval_precision': 0.3741891891891892, 'eval_recall': 0.24731404958677686, 'eval_runtime': 0.524, 'eval_samples_per_second': 719.493, 'eval_steps_per_second': 91.607, 'epoch': 1.0}


 10%|▉         | 376/3780 [00:16<02:22, 23.89it/s]
 10%|█         | 378/3780 [00:17<02:22, 23.89it/s]

{'eval_loss': 0.29777130484580994, 'eval_f1': 0.537989893608785, 'eval_precision': 0.6950096711798839, 'eval_recall': 0.45253118657471897, 'eval_runtime': 0.5629, 'eval_samples_per_second': 669.758, 'eval_steps_per_second': 85.274, 'epoch': 2.0}


 15%|█▍        | 565/3780 [00:25<02:12, 24.33it/s]
 15%|█▌        | 567/3780 [00:26<02:12, 24.33it/s]

{'eval_loss': 0.31601452827453613, 'eval_f1': 0.5594615430231868, 'eval_precision': 0.8505121652967904, 'eval_recall': 0.5140169920861891, 'eval_runtime': 0.5374, 'eval_samples_per_second': 701.539, 'eval_steps_per_second': 89.321, 'epoch': 3.0}


 20%|█▉        | 754/3780 [00:34<02:00, 25.05it/s]
 20%|██        | 756/3780 [00:35<02:00, 25.05it/s]

{'eval_loss': 0.3278109133243561, 'eval_f1': 0.6753285629664141, 'eval_precision': 0.7433812518718178, 'eval_recall': 0.6207462252765192, 'eval_runtime': 0.5237, 'eval_samples_per_second': 719.86, 'eval_steps_per_second': 91.653, 'epoch': 4.0}


 25%|██▍       | 943/3780 [00:44<01:53, 24.97it/s]
 25%|██▌       | 945/3780 [00:44<01:53, 24.97it/s]

{'eval_loss': 0.37552860379219055, 'eval_f1': 0.6724249422368851, 'eval_precision': 0.7484487243693537, 'eval_recall': 0.62862157988383, 'eval_runtime': 0.5482, 'eval_samples_per_second': 687.665, 'eval_steps_per_second': 87.554, 'epoch': 5.0}


 27%|██▋       | 1003/3780 [00:48<01:52, 24.68it/s]

{'loss': 0.2284, 'grad_norm': 2.632556676864624, 'learning_rate': 3.677248677248677e-05, 'epoch': 5.29}


 30%|██▉       | 1132/3780 [00:53<01:50, 23.90it/s]
 30%|███       | 1134/3780 [00:53<01:50, 23.90it/s]

{'eval_loss': 0.3931500315666199, 'eval_f1': 0.6817278592491276, 'eval_precision': 0.7378589322554839, 'eval_recall': 0.6364670300461357, 'eval_runtime': 0.5488, 'eval_samples_per_second': 686.894, 'eval_steps_per_second': 87.456, 'epoch': 6.0}


 35%|███▍      | 1321/3780 [01:02<01:39, 24.84it/s]
 35%|███▌      | 1323/3780 [01:03<01:38, 24.84it/s]

{'eval_loss': 0.3984311521053314, 'eval_f1': 0.7168577173538943, 'eval_precision': 0.737815675872471, 'eval_recall': 0.711225762884759, 'eval_runtime': 0.6061, 'eval_samples_per_second': 622.052, 'eval_steps_per_second': 79.2, 'epoch': 7.0}


 40%|███▉      | 1510/3780 [01:11<01:31, 24.82it/s]
 40%|████      | 1512/3780 [01:12<01:31, 24.82it/s]

{'eval_loss': 0.44211384654045105, 'eval_f1': 0.7200081937778707, 'eval_precision': 0.7475556543947913, 'eval_recall': 0.7015903922543426, 'eval_runtime': 0.5455, 'eval_samples_per_second': 691.167, 'eval_steps_per_second': 88.0, 'epoch': 8.0}


 45%|████▍     | 1699/3780 [01:21<01:27, 23.89it/s]
 45%|████▌     | 1701/3780 [01:21<01:27, 23.89it/s]

{'eval_loss': 0.46178117394447327, 'eval_f1': 0.7230359211775433, 'eval_precision': 0.7312611475590429, 'eval_recall': 0.718109649748883, 'eval_runtime': 0.544, 'eval_samples_per_second': 693.011, 'eval_steps_per_second': 88.235, 'epoch': 9.0}


 50%|████▉     | 1888/3780 [01:30<01:19, 23.80it/s]
 50%|█████     | 1890/3780 [01:30<01:19, 23.80it/s]

{'eval_loss': 0.47544702887535095, 'eval_f1': 0.7231281948720436, 'eval_precision': 0.7406733791493938, 'eval_recall': 0.70981336164332, 'eval_runtime': 0.5565, 'eval_samples_per_second': 677.499, 'eval_steps_per_second': 86.26, 'epoch': 10.0}


 53%|█████▎    | 2002/3780 [01:36<01:12, 24.62it/s]

{'loss': 0.0256, 'grad_norm': 0.2200814038515091, 'learning_rate': 2.3544973544973546e-05, 'epoch': 10.58}


 55%|█████▍    | 2077/3780 [01:39<01:08, 24.79it/s]
 55%|█████▌    | 2079/3780 [01:39<01:08, 24.79it/s]

{'eval_loss': 0.4871915280818939, 'eval_f1': 0.731770170453385, 'eval_precision': 0.757965958703669, 'eval_recall': 0.7104238065704577, 'eval_runtime': 0.5522, 'eval_samples_per_second': 682.704, 'eval_steps_per_second': 86.923, 'epoch': 11.0}


 60%|█████▉    | 2266/3780 [01:48<01:00, 25.15it/s]
 60%|██████    | 2268/3780 [01:48<01:00, 25.15it/s]

{'eval_loss': 0.4911639988422394, 'eval_f1': 0.7332978020482894, 'eval_precision': 0.7662721136599642, 'eval_recall': 0.7090568547178271, 'eval_runtime': 0.5476, 'eval_samples_per_second': 688.48, 'eval_steps_per_second': 87.658, 'epoch': 12.0}


 65%|██████▍   | 2455/3780 [01:57<00:56, 23.50it/s]
 65%|██████▌   | 2457/3780 [01:58<00:56, 23.50it/s]

{'eval_loss': 0.541502058506012, 'eval_f1': 0.7221538365447675, 'eval_precision': 0.7342587364326495, 'eval_recall': 0.7179575462709363, 'eval_runtime': 0.5582, 'eval_samples_per_second': 675.44, 'eval_steps_per_second': 85.998, 'epoch': 13.0}


 70%|██████▉   | 2644/3780 [02:06<00:47, 23.88it/s]
 70%|███████   | 2646/3780 [02:07<00:47, 23.88it/s]

{'eval_loss': 0.504812479019165, 'eval_f1': 0.7471574130108192, 'eval_precision': 0.7672148856847195, 'eval_recall': 0.7373213175277444, 'eval_runtime': 0.5384, 'eval_samples_per_second': 700.224, 'eval_steps_per_second': 89.153, 'epoch': 14.0}


 75%|███████▍  | 2833/3780 [02:15<00:38, 24.67it/s]
 75%|███████▌  | 2835/3780 [02:16<00:38, 24.67it/s]

{'eval_loss': 0.5137917399406433, 'eval_f1': 0.7414645534721482, 'eval_precision': 0.7695791250760824, 'eval_recall': 0.7178372850821277, 'eval_runtime': 0.5226, 'eval_samples_per_second': 721.39, 'eval_steps_per_second': 91.848, 'epoch': 15.0}


 79%|███████▉  | 3004/3780 [02:24<00:31, 24.32it/s]

{'loss': 0.0027, 'grad_norm': 0.07523898035287857, 'learning_rate': 1.0317460317460318e-05, 'epoch': 15.87}


 80%|███████▉  | 3022/3780 [02:25<00:30, 24.67it/s]
 80%|████████  | 3024/3780 [02:25<00:30, 24.67it/s]

{'eval_loss': 0.5232807993888855, 'eval_f1': 0.7437265618727747, 'eval_precision': 0.7689968111244708, 'eval_recall': 0.7265001275097923, 'eval_runtime': 0.5383, 'eval_samples_per_second': 700.364, 'eval_steps_per_second': 89.171, 'epoch': 16.0}


 85%|████████▍ | 3211/3780 [02:34<00:23, 24.03it/s]
 85%|████████▌ | 3213/3780 [02:34<00:23, 24.03it/s]

{'eval_loss': 0.5277856588363647, 'eval_f1': 0.7469557469557471, 'eval_precision': 0.7773486547431502, 'eval_recall': 0.7242398535371896, 'eval_runtime': 0.6047, 'eval_samples_per_second': 623.5, 'eval_steps_per_second': 79.385, 'epoch': 17.0}


 90%|████████▉ | 3400/3780 [02:43<00:15, 23.95it/s]
 90%|█████████ | 3402/3780 [02:43<00:15, 23.95it/s]

{'eval_loss': 0.5297295451164246, 'eval_f1': 0.7434193516973405, 'eval_precision': 0.7738144572946737, 'eval_recall': 0.7187912968238108, 'eval_runtime': 0.5885, 'eval_samples_per_second': 640.562, 'eval_steps_per_second': 81.557, 'epoch': 18.0}


 95%|█████████▍| 3589/3780 [02:52<00:07, 24.80it/s]
 95%|█████████▌| 3591/3780 [02:52<00:07, 24.80it/s]

{'eval_loss': 0.5323410630226135, 'eval_f1': 0.750202037998099, 'eval_precision': 0.777671519121106, 'eval_recall': 0.7287912968238108, 'eval_runtime': 0.5301, 'eval_samples_per_second': 711.186, 'eval_steps_per_second': 90.549, 'epoch': 19.0}


100%|█████████▉| 3778/3780 [03:01<00:00, 24.80it/s]
100%|██████████| 3780/3780 [03:02<00:00, 24.80it/s]

{'eval_loss': 0.5332509875297546, 'eval_f1': 0.7514963584983017, 'eval_precision': 0.7782955699234769, 'eval_recall': 0.7304441893857943, 'eval_runtime': 0.5212, 'eval_samples_per_second': 723.381, 'eval_steps_per_second': 92.102, 'epoch': 20.0}


100%|██████████| 3780/3780 [03:03<00:00, 20.57it/s]


{'train_runtime': 183.7418, 'train_samples_per_second': 164.035, 'train_steps_per_second': 20.572, 'train_loss': 0.06807219608435555, 'epoch': 20.0}


100%|██████████| 48/48 [00:00<00:00, 96.17it/s] 


Evaluation Metrics: {'eval_loss': 0.5332509875297546, 'eval_f1': 0.7514963584983017, 'eval_precision': 0.7782955699234769, 'eval_recall': 0.7304441893857943, 'eval_runtime': 0.5118, 'eval_samples_per_second': 736.544, 'eval_steps_per_second': 93.777, 'epoch': 20.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  python   
15  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  python   
16  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  python   
17  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  python   
18  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
14             Usage   0.884615  0.760331  0.817778  
15        Parameters   0.881818  0.866071  0.873874  
16  DevelopmentNotes   0.604651  0.650000  0.626506  
17            Expand   0.763636  0.608696  0.677419  
18           Summary   0.756757  0.767123  0.761905  
Scores:

0,1
eval/f1,▁▅▅▇▇▇▇██████████████
eval/loss,▃▁▂▂▃▄▄▅▆▆▆▇█▇▇▇█████
eval/precision,▁▆█▆▇▆▆▆▆▆▇▇▆▇▇▇▇▇▇▇▇
eval/recall,▁▄▅▆▆▇█▇█████████████
eval/runtime,▂▅▃▂▄▄█▄▃▄▄▄▄▃▂▃█▇▂▂▁
eval/samples_per_second,▇▄▆▇▅▅▁▅▅▄▅▅▄▆▇▆▁▂▆▇█
eval/steps_per_second,▇▄▆▇▅▅▁▅▅▄▅▅▄▆▇▆▁▂▆▇█
train/epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇████
train/grad_norm,█▁▁

0,1
eval/f1,0.7515
eval/loss,0.53325
eval/precision,0.7783
eval/recall,0.73044
eval/runtime,0.5118
eval/samples_per_second,736.544
eval/steps_per_second,93.777
total_flos,998195250201600.0
train/epoch,20.0
train/global_step,3780.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 15046.39 examples/s]
  5%|▍         | 129/2600 [00:05<01:41, 24.33it/s]
  5%|▌         | 130/2600 [00:05<01:41, 24.33it/s]

{'eval_loss': 0.2836136519908905, 'eval_f1': 0.23644788793767704, 'eval_precision': 0.3697278911564626, 'eval_recall': 0.20822898831236505, 'eval_runtime': 0.379, 'eval_samples_per_second': 685.98, 'eval_steps_per_second': 87.067, 'epoch': 1.0}


 10%|▉         | 258/2600 [00:11<01:33, 25.10it/s]
 10%|█         | 260/2600 [00:12<01:33, 25.10it/s]

{'eval_loss': 0.22834518551826477, 'eval_f1': 0.4574941634159967, 'eval_precision': 0.6276991441737986, 'eval_recall': 0.37468337098124643, 'eval_runtime': 0.3726, 'eval_samples_per_second': 697.89, 'eval_steps_per_second': 88.578, 'epoch': 2.0}


 15%|█▍        | 388/2600 [00:19<01:37, 22.61it/s]
 15%|█▌        | 390/2600 [00:20<01:37, 22.61it/s]

{'eval_loss': 0.22899915277957916, 'eval_f1': 0.5157859372207306, 'eval_precision': 0.727172096908939, 'eval_recall': 0.4416928912307552, 'eval_runtime': 0.7357, 'eval_samples_per_second': 353.415, 'eval_steps_per_second': 44.856, 'epoch': 3.0}


 20%|█▉        | 519/2600 [00:27<01:31, 22.71it/s]
 20%|██        | 520/2600 [00:27<01:31, 22.71it/s]

{'eval_loss': 0.22246912121772766, 'eval_f1': 0.6450560067251985, 'eval_precision': 0.8419035325729036, 'eval_recall': 0.5746313691176691, 'eval_runtime': 0.4224, 'eval_samples_per_second': 615.509, 'eval_steps_per_second': 78.122, 'epoch': 4.0}


 25%|██▍       | 648/2600 [00:34<01:21, 23.84it/s]
 25%|██▌       | 650/2600 [00:34<01:21, 23.84it/s]

{'eval_loss': 0.25379836559295654, 'eval_f1': 0.6166922935424262, 'eval_precision': 0.8637548179791322, 'eval_recall': 0.5494972278268574, 'eval_runtime': 0.3718, 'eval_samples_per_second': 699.342, 'eval_steps_per_second': 88.763, 'epoch': 5.0}


 30%|███       | 780/2600 [00:41<01:15, 24.12it/s]
 30%|███       | 780/2600 [00:41<01:15, 24.12it/s]

{'eval_loss': 0.22860164940357208, 'eval_f1': 0.6398228417635675, 'eval_precision': 0.8188169690829267, 'eval_recall': 0.5783237292122839, 'eval_runtime': 0.3731, 'eval_samples_per_second': 696.851, 'eval_steps_per_second': 88.446, 'epoch': 6.0}


 35%|███▍      | 909/2600 [00:48<01:10, 23.96it/s]
 35%|███▌      | 910/2600 [00:48<01:10, 23.96it/s]

{'eval_loss': 0.254475861787796, 'eval_f1': 0.653191856452726, 'eval_precision': 0.8126576500584476, 'eval_recall': 0.5853532315546156, 'eval_runtime': 0.384, 'eval_samples_per_second': 677.009, 'eval_steps_per_second': 85.928, 'epoch': 7.0}


 39%|███▊      | 1002/2600 [00:53<01:10, 22.65it/s]

{'loss': 0.1288, 'grad_norm': 0.28774532675743103, 'learning_rate': 3.0769230769230774e-05, 'epoch': 7.69}


 40%|███▉      | 1038/2600 [00:54<01:08, 22.85it/s]
 40%|████      | 1040/2600 [00:55<01:08, 22.85it/s]

{'eval_loss': 0.24846391379833221, 'eval_f1': 0.6628671786333785, 'eval_precision': 0.8056475161645995, 'eval_recall': 0.6177558947475523, 'eval_runtime': 0.3966, 'eval_samples_per_second': 655.519, 'eval_steps_per_second': 83.201, 'epoch': 8.0}


 45%|████▍     | 1169/2600 [01:02<00:59, 23.92it/s]
 45%|████▌     | 1170/2600 [01:03<00:59, 23.92it/s]

{'eval_loss': 0.2630316913127899, 'eval_f1': 0.6503270247653674, 'eval_precision': 0.8066630130795629, 'eval_recall': 0.5915332464297461, 'eval_runtime': 0.3859, 'eval_samples_per_second': 673.755, 'eval_steps_per_second': 85.515, 'epoch': 9.0}


 50%|████▉     | 1298/2600 [01:09<00:55, 23.47it/s]
 50%|█████     | 1300/2600 [01:10<00:55, 23.47it/s]

{'eval_loss': 0.2595427632331848, 'eval_f1': 0.6862530530112433, 'eval_precision': 0.8230155191147384, 'eval_recall': 0.6243781509809915, 'eval_runtime': 0.3966, 'eval_samples_per_second': 655.588, 'eval_steps_per_second': 83.209, 'epoch': 10.0}


 55%|█████▌    | 1430/2600 [01:16<00:48, 24.25it/s]
 55%|█████▌    | 1430/2600 [01:17<00:48, 24.25it/s]

{'eval_loss': 0.2718086838722229, 'eval_f1': 0.6798884421853258, 'eval_precision': 0.8189405702992659, 'eval_recall': 0.6158029134951797, 'eval_runtime': 0.3909, 'eval_samples_per_second': 665.145, 'eval_steps_per_second': 84.422, 'epoch': 11.0}


 60%|██████    | 1560/2600 [01:25<00:44, 23.12it/s]
 60%|██████    | 1560/2600 [01:26<00:44, 23.12it/s]

{'eval_loss': 0.27561667561531067, 'eval_f1': 0.6863116549028367, 'eval_precision': 0.7814051684503588, 'eval_recall': 0.6331328132241888, 'eval_runtime': 0.6314, 'eval_samples_per_second': 411.802, 'eval_steps_per_second': 52.267, 'epoch': 12.0}


 65%|██████▍   | 1689/2600 [01:33<00:39, 23.02it/s]
 65%|██████▌   | 1690/2600 [01:33<00:39, 23.02it/s]

{'eval_loss': 0.28543904423713684, 'eval_f1': 0.6958469818023776, 'eval_precision': 0.82946982730146, 'eval_recall': 0.6347342829673419, 'eval_runtime': 0.4285, 'eval_samples_per_second': 606.711, 'eval_steps_per_second': 77.006, 'epoch': 13.0}


 70%|███████   | 1820/2600 [01:40<00:32, 23.65it/s]
 70%|███████   | 1820/2600 [01:40<00:32, 23.65it/s]

{'eval_loss': 0.28732314705848694, 'eval_f1': 0.7077031326257053, 'eval_precision': 0.8341387361396173, 'eval_recall': 0.651589898185833, 'eval_runtime': 0.3955, 'eval_samples_per_second': 657.371, 'eval_steps_per_second': 83.436, 'epoch': 14.0}


 75%|███████▍  | 1949/2600 [01:47<00:29, 21.84it/s]
 75%|███████▌  | 1950/2600 [01:47<00:29, 21.84it/s]

{'eval_loss': 0.29320818185806274, 'eval_f1': 0.6841226276424441, 'eval_precision': 0.821333465507264, 'eval_recall': 0.6206196153358509, 'eval_runtime': 0.4466, 'eval_samples_per_second': 582.112, 'eval_steps_per_second': 73.884, 'epoch': 15.0}


 77%|███████▋  | 2003/2600 [01:51<00:25, 23.03it/s]

{'loss': 0.008, 'grad_norm': 0.02149008959531784, 'learning_rate': 1.153846153846154e-05, 'epoch': 15.38}


 80%|███████▉  | 2078/2600 [01:54<00:21, 23.77it/s]
 80%|████████  | 2080/2600 [01:54<00:21, 23.77it/s]

{'eval_loss': 0.2968083620071411, 'eval_f1': 0.6729328299448156, 'eval_precision': 0.8079322894983834, 'eval_recall': 0.6123703696328908, 'eval_runtime': 0.3767, 'eval_samples_per_second': 690.156, 'eval_steps_per_second': 87.597, 'epoch': 16.0}


 85%|████████▌ | 2210/2600 [02:01<00:16, 23.86it/s]
 85%|████████▌ | 2210/2600 [02:01<00:16, 23.86it/s]

{'eval_loss': 0.2992773652076721, 'eval_f1': 0.6865795323186369, 'eval_precision': 0.8187537656652275, 'eval_recall': 0.6268810135121426, 'eval_runtime': 0.3855, 'eval_samples_per_second': 674.486, 'eval_steps_per_second': 85.608, 'epoch': 17.0}


 90%|████████▉ | 2339/2600 [02:08<00:11, 22.55it/s]
 90%|█████████ | 2340/2600 [02:08<00:11, 22.55it/s]

{'eval_loss': 0.3022212088108063, 'eval_f1': 0.6760422024559745, 'eval_precision': 0.8104774064120568, 'eval_recall': 0.6157717301771085, 'eval_runtime': 0.4244, 'eval_samples_per_second': 612.645, 'eval_steps_per_second': 77.759, 'epoch': 18.0}


 95%|█████████▍| 2468/2600 [02:15<00:05, 22.78it/s]
 95%|█████████▌| 2470/2600 [02:15<00:05, 22.78it/s]

{'eval_loss': 0.3027402460575104, 'eval_f1': 0.6706258201843898, 'eval_precision': 0.8077882802817934, 'eval_recall': 0.6086109711366502, 'eval_runtime': 0.4307, 'eval_samples_per_second': 603.642, 'eval_steps_per_second': 76.616, 'epoch': 19.0}


100%|██████████| 2600/2600 [02:22<00:00, 23.11it/s]
100%|██████████| 2600/2600 [02:23<00:00, 23.11it/s]

{'eval_loss': 0.3034374415874481, 'eval_f1': 0.6790409042176188, 'eval_precision': 0.8122377905823674, 'eval_recall': 0.6188150527693033, 'eval_runtime': 0.4122, 'eval_samples_per_second': 630.779, 'eval_steps_per_second': 80.06, 'epoch': 20.0}


100%|██████████| 2600/2600 [02:24<00:00, 17.98it/s]


{'train_runtime': 144.6168, 'train_samples_per_second': 143.552, 'train_steps_per_second': 17.979, 'train_loss': 0.05318234980106354, 'epoch': 20.0}


100%|██████████| 33/33 [00:00<00:00, 85.43it/s]


Evaluation Metrics: {'eval_loss': 0.28732314705848694, 'eval_f1': 0.7077031326257053, 'eval_precision': 0.8341387361396173, 'eval_recall': 0.651589898185833, 'eval_runtime': 0.4014, 'eval_samples_per_second': 647.762, 'eval_steps_per_second': 82.216, 'epoch': 20.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
13  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
14  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
15  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
16  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
17  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
18  lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.735294  0.595238  0.657895  
13                  Example   0.946237  0.871287  0.907216  
14         Responsibilitie

0,1
eval/f1,▁▄▅▇▇▇▇▇▇██████▇██▇██
eval/loss,▆▂▂▁▄▂▄▃▅▄▅▆▆▇▇▇████▇
eval/precision,▁▅▆██▇▇▇▇▇▇▇██▇▇▇▇▇▇█
eval/recall,▁▄▅▇▆▇▇▇▇█▇████▇█▇▇▇█
eval/runtime,▁▁█▂▁▁▁▁▁▁▁▆▂▁▂▁▁▂▂▂▂
eval/samples_per_second,██▁▆███▇▇▇▇▂▆▇▆█▇▆▆▇▇
eval/steps_per_second,██▁▆███▇▇▇▇▂▆▇▆█▇▆▆▇▇
train/epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇████
train/grad_norm,█▁

0,1
eval/f1,0.7077
eval/loss,0.28732
eval/precision,0.83414
eval/recall,0.65159
eval/runtime,0.4014
eval/samples_per_second,647.762
eval/steps_per_second,82.216
total_flos,687567102474240.0
train/epoch,20.0
train/global_step,2600.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20064.47 examples/s]
  5%|▍         | 761/15240 [00:33<10:45, 22.43it/s]
  5%|▌         | 762/15240 [00:36<10:45, 22.43it/s]

{'eval_loss': 0.16755156219005585, 'eval_f1': 0.37924541271644807, 'eval_precision': 0.39436186587299943, 'eval_recall': 0.36777598595186045, 'eval_runtime': 2.4585, 'eval_samples_per_second': 619.48, 'eval_steps_per_second': 77.689, 'epoch': 1.0}


  7%|▋         | 1004/15240 [00:48<11:17, 21.00it/s] 

{'loss': 0.2544, 'grad_norm': 2.0353598594665527, 'learning_rate': 4.6719160104986885e-06, 'epoch': 1.31}


 10%|▉         | 1523/15240 [01:11<10:32, 21.70it/s]
 10%|█         | 1524/15240 [01:13<10:32, 21.70it/s]

{'eval_loss': 0.12093935161828995, 'eval_f1': 0.5321945316688897, 'eval_precision': 0.5381519714098244, 'eval_recall': 0.5269178485915982, 'eval_runtime': 2.333, 'eval_samples_per_second': 652.794, 'eval_steps_per_second': 81.867, 'epoch': 2.0}


 13%|█▎        | 2003/15240 [01:35<09:36, 22.98it/s]  

{'loss': 0.1225, 'grad_norm': 0.8947542309761047, 'learning_rate': 4.343832020997376e-06, 'epoch': 2.62}


 15%|█▍        | 2285/15240 [01:47<09:48, 22.01it/s]
 15%|█▌        | 2286/15240 [01:50<09:48, 22.01it/s]

{'eval_loss': 0.10301802307367325, 'eval_f1': 0.5534449660201279, 'eval_precision': 0.6703608861400978, 'eval_recall': 0.5362535019574036, 'eval_runtime': 2.4934, 'eval_samples_per_second': 610.8, 'eval_steps_per_second': 76.601, 'epoch': 3.0}


 20%|█▉        | 3005/15240 [02:21<08:28, 24.05it/s]  

{'loss': 0.0924, 'grad_norm': 1.2372949123382568, 'learning_rate': 4.015748031496064e-06, 'epoch': 3.94}


 20%|█▉        | 3047/15240 [02:23<08:39, 23.49it/s]
 20%|██        | 3048/15240 [02:25<08:39, 23.49it/s]

{'eval_loss': 0.10189609229564667, 'eval_f1': 0.6335459528221771, 'eval_precision': 0.9488010882338366, 'eval_recall': 0.5906479726874704, 'eval_runtime': 2.2994, 'eval_samples_per_second': 662.338, 'eval_steps_per_second': 83.064, 'epoch': 4.0}


 25%|██▍       | 3809/15240 [03:00<08:06, 23.50it/s]  
 25%|██▌       | 3810/15240 [03:02<08:06, 23.50it/s]

{'eval_loss': 0.08792456239461899, 'eval_f1': 0.7657889944288161, 'eval_precision': 0.8988930271269389, 'eval_recall': 0.7156621671765028, 'eval_runtime': 2.4561, 'eval_samples_per_second': 620.08, 'eval_steps_per_second': 77.764, 'epoch': 5.0}


 26%|██▋       | 4002/15240 [03:11<08:19, 22.49it/s]  

{'loss': 0.0727, 'grad_norm': 1.3648899793624878, 'learning_rate': 3.6876640419947506e-06, 'epoch': 5.25}


 30%|███       | 4572/15240 [03:37<07:39, 23.22it/s]
 30%|███       | 4572/15240 [03:39<07:39, 23.22it/s]

{'eval_loss': 0.0891655683517456, 'eval_f1': 0.7725809399341163, 'eval_precision': 0.9070329939524985, 'eval_recall': 0.7235850202121378, 'eval_runtime': 2.3903, 'eval_samples_per_second': 637.152, 'eval_steps_per_second': 79.906, 'epoch': 6.0}


 33%|███▎      | 5004/15240 [03:59<07:35, 22.48it/s]  

{'loss': 0.0583, 'grad_norm': 8.441051483154297, 'learning_rate': 3.3595800524934387e-06, 'epoch': 6.56}


 35%|███▌      | 5334/15240 [04:13<07:15, 22.73it/s]
 35%|███▌      | 5334/15240 [04:16<07:15, 22.73it/s]

{'eval_loss': 0.08463134616613388, 'eval_f1': 0.8004937382464407, 'eval_precision': 0.9174369481584892, 'eval_recall': 0.7491144703151703, 'eval_runtime': 2.558, 'eval_samples_per_second': 595.398, 'eval_steps_per_second': 74.669, 'epoch': 7.0}


 39%|███▉      | 6003/15240 [04:46<07:02, 21.86it/s]  

{'loss': 0.0502, 'grad_norm': 0.09263981878757477, 'learning_rate': 3.0314960629921263e-06, 'epoch': 7.87}


 40%|████      | 6096/15240 [04:50<06:30, 23.42it/s]
 40%|████      | 6096/15240 [04:52<06:30, 23.42it/s]

{'eval_loss': 0.08638796955347061, 'eval_f1': 0.8366358204792215, 'eval_precision': 0.8971786618939195, 'eval_recall': 0.8063798803102306, 'eval_runtime': 2.2895, 'eval_samples_per_second': 665.212, 'eval_steps_per_second': 83.424, 'epoch': 8.0}


 45%|████▌     | 6858/15240 [05:27<05:53, 23.71it/s]
 45%|████▌     | 6858/15240 [05:29<05:53, 23.71it/s]

{'eval_loss': 0.0870068147778511, 'eval_f1': 0.8493179205911376, 'eval_precision': 0.9067296830025361, 'eval_recall': 0.8146920595740857, 'eval_runtime': 2.3477, 'eval_samples_per_second': 648.728, 'eval_steps_per_second': 81.357, 'epoch': 9.0}


 46%|████▌     | 7002/15240 [05:36<06:09, 22.30it/s]

{'loss': 0.0399, 'grad_norm': 1.912459135055542, 'learning_rate': 2.7034120734908135e-06, 'epoch': 9.19}


 50%|█████     | 7620/15240 [06:03<05:17, 23.97it/s]
 50%|█████     | 7620/15240 [06:05<05:17, 23.97it/s]

{'eval_loss': 0.08234455436468124, 'eval_f1': 0.8659121170950069, 'eval_precision': 0.9025654766747241, 'eval_recall': 0.8422212383829983, 'eval_runtime': 2.321, 'eval_samples_per_second': 656.185, 'eval_steps_per_second': 82.292, 'epoch': 10.0}


 53%|█████▎    | 8004/15240 [06:23<05:18, 22.72it/s]

{'loss': 0.0314, 'grad_norm': 2.1973373889923096, 'learning_rate': 2.3753280839895016e-06, 'epoch': 10.5}


 55%|█████▌    | 8382/15240 [06:39<04:48, 23.81it/s]
 55%|█████▌    | 8382/15240 [06:42<04:48, 23.81it/s]

{'eval_loss': 0.08453369140625, 'eval_f1': 0.8600281363255311, 'eval_precision': 0.8897726242592964, 'eval_recall': 0.8367852269377182, 'eval_runtime': 2.2499, 'eval_samples_per_second': 676.911, 'eval_steps_per_second': 84.892, 'epoch': 11.0}


 59%|█████▉    | 9003/15240 [07:09<04:29, 23.18it/s]

{'loss': 0.0286, 'grad_norm': 0.9746435284614563, 'learning_rate': 2.0472440944881893e-06, 'epoch': 11.81}


 60%|██████    | 9144/15240 [07:15<04:32, 22.36it/s]
 60%|██████    | 9144/15240 [07:18<04:32, 22.36it/s]

{'eval_loss': 0.08846893906593323, 'eval_f1': 0.862502391836033, 'eval_precision': 0.9052837205444106, 'eval_recall': 0.8301665707892273, 'eval_runtime': 2.392, 'eval_samples_per_second': 636.711, 'eval_steps_per_second': 79.85, 'epoch': 12.0}


 65%|██████▌   | 9906/15240 [07:51<03:45, 23.60it/s]
 65%|██████▌   | 9906/15240 [07:54<03:45, 23.60it/s]

{'eval_loss': 0.09040336310863495, 'eval_f1': 0.8593170436244542, 'eval_precision': 0.905005023264205, 'eval_recall': 0.8269618584715792, 'eval_runtime': 2.397, 'eval_samples_per_second': 635.384, 'eval_steps_per_second': 79.684, 'epoch': 13.0}


 66%|██████▌   | 10002/15240 [07:59<03:45, 23.24it/s]

{'loss': 0.0222, 'grad_norm': 0.07683604210615158, 'learning_rate': 1.7191601049868767e-06, 'epoch': 13.12}


 70%|███████   | 10668/15240 [08:27<03:17, 23.17it/s]
 70%|███████   | 10668/15240 [08:29<03:17, 23.17it/s]

{'eval_loss': 0.08907173573970795, 'eval_f1': 0.8616040646050148, 'eval_precision': 0.9058691895143328, 'eval_recall': 0.8298992103214937, 'eval_runtime': 2.4024, 'eval_samples_per_second': 633.951, 'eval_steps_per_second': 79.504, 'epoch': 14.0}


 72%|███████▏  | 11004/15240 [08:44<02:58, 23.76it/s]

{'loss': 0.0205, 'grad_norm': 0.025957755744457245, 'learning_rate': 1.3910761154855646e-06, 'epoch': 14.44}


 75%|███████▌  | 11430/15240 [09:02<02:39, 23.83it/s]
 75%|███████▌  | 11430/15240 [09:05<02:39, 23.83it/s]

{'eval_loss': 0.09339232742786407, 'eval_f1': 0.8595928033901713, 'eval_precision': 0.8850396441480834, 'eval_recall': 0.8425210951334444, 'eval_runtime': 2.2804, 'eval_samples_per_second': 667.863, 'eval_steps_per_second': 83.757, 'epoch': 15.0}


 79%|███████▉  | 12003/15240 [09:30<02:18, 23.40it/s]

{'loss': 0.017, 'grad_norm': 0.14946973323822021, 'learning_rate': 1.062992125984252e-06, 'epoch': 15.75}


 80%|████████  | 12192/15240 [09:38<02:08, 23.75it/s]
 80%|████████  | 12192/15240 [09:40<02:08, 23.75it/s]

{'eval_loss': 0.0922912061214447, 'eval_f1': 0.867288437875392, 'eval_precision': 0.8989257619163605, 'eval_recall': 0.8418817811275938, 'eval_runtime': 2.2864, 'eval_samples_per_second': 666.125, 'eval_steps_per_second': 83.539, 'epoch': 16.0}


 85%|████████▌ | 12954/15240 [10:13<01:31, 24.92it/s]
 85%|████████▌ | 12954/15240 [10:15<01:31, 24.92it/s]

{'eval_loss': 0.09376412630081177, 'eval_f1': 0.8674090978510105, 'eval_precision': 0.9022035004991327, 'eval_recall': 0.8402971105911321, 'eval_runtime': 2.1766, 'eval_samples_per_second': 699.72, 'eval_steps_per_second': 87.752, 'epoch': 17.0}


 85%|████████▌ | 13002/15240 [10:18<01:43, 21.68it/s]

{'loss': 0.0154, 'grad_norm': 0.4118811786174774, 'learning_rate': 7.349081364829397e-07, 'epoch': 17.06}


 90%|█████████ | 13716/15240 [10:48<01:04, 23.62it/s]
 90%|█████████ | 13716/15240 [10:51<01:04, 23.62it/s]

{'eval_loss': 0.09383133053779602, 'eval_f1': 0.8732060828256467, 'eval_precision': 0.8982431991516358, 'eval_recall': 0.8541414207260409, 'eval_runtime': 2.3137, 'eval_samples_per_second': 658.253, 'eval_steps_per_second': 82.552, 'epoch': 18.0}


 92%|█████████▏| 14004/15240 [11:04<00:51, 23.80it/s]

{'loss': 0.0144, 'grad_norm': 0.049577414989471436, 'learning_rate': 4.068241469816273e-07, 'epoch': 18.37}


 95%|█████████▌| 14478/15240 [11:24<00:33, 22.47it/s]
 95%|█████████▌| 14478/15240 [11:26<00:33, 22.47it/s]

{'eval_loss': 0.09291346371173859, 'eval_f1': 0.8720049986181376, 'eval_precision': 0.9057877245887529, 'eval_recall': 0.846127638045412, 'eval_runtime': 2.2514, 'eval_samples_per_second': 676.454, 'eval_steps_per_second': 84.834, 'epoch': 19.0}


 98%|█████████▊| 15003/15240 [11:49<00:10, 23.06it/s]

{'loss': 0.0138, 'grad_norm': 0.09085608273744583, 'learning_rate': 7.874015748031497e-08, 'epoch': 19.69}


100%|██████████| 15240/15240 [11:59<00:00, 24.02it/s]
100%|██████████| 15240/15240 [12:03<00:00, 24.02it/s]

{'eval_loss': 0.09312953054904938, 'eval_f1': 0.8725385142730945, 'eval_precision': 0.902922889476313, 'eval_recall': 0.8483316490950666, 'eval_runtime': 2.4101, 'eval_samples_per_second': 631.926, 'eval_steps_per_second': 79.25, 'epoch': 20.0}


100%|██████████| 15240/15240 [12:04<00:00, 21.04it/s]


{'train_runtime': 724.3288, 'train_samples_per_second': 168.183, 'train_steps_per_second': 21.04, 'train_loss': 0.05623838815476325, 'epoch': 20.0}


100%|██████████| 191/191 [00:02<00:00, 91.06it/s]


Evaluation Metrics: {'eval_loss': 0.09383133053779602, 'eval_f1': 0.8732060828256467, 'eval_precision': 0.8982431991516358, 'eval_recall': 0.8541414207260409, 'eval_runtime': 2.1113, 'eval_samples_per_second': 721.368, 'eval_steps_per_second': 90.467, 'epoch': 20.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  java      summary   
13  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  java    Ownership   
14  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  java       Expand   
15  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  java        usage   
16  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  java      Pointer   
17  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  java  deprecation   
18  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  java     rational   

    precision    recall        f1  
12   0.941011  0.953058  0.946996  
13   0.964912  1.000000  0.982143  
14  

0,1
eval/f1,▁▃▃▅▆▇▇▇█████████████
eval/loss,█▄▃▃▁▂▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂
eval/precision,▁▃▄█▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▃▃▄▆▆▆▇▇████████████
eval/runtime,▆▄▇▄▆▅█▄▅▄▃▅▅▆▄▄▂▄▃▆▁
eval/samples_per_second,▂▄▂▅▂▃▁▅▄▄▆▃▃▃▅▅▇▄▆▃█
eval/steps_per_second,▂▄▂▅▂▃▁▅▄▄▆▃▃▃▅▅▇▄▆▃█
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▃▂▂▂█▁▃▃▂▁▁▁▁▁▁

0,1
eval/f1,0.87321
eval/loss,0.09383
eval/precision,0.89824
eval/recall,0.85414
eval/runtime,2.1113
eval/samples_per_second,721.368
eval/steps_per_second,90.467
total_flos,4034654355655680.0
train/epoch,20.0
train/global_step,15240.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 17724.68 examples/s]
  5%|▌         | 189/3780 [00:08<02:26, 24.59it/s]
  5%|▌         | 189/3780 [00:08<02:26, 24.59it/s]

{'eval_loss': 0.4935694932937622, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.5633, 'eval_samples_per_second': 669.306, 'eval_steps_per_second': 85.217, 'epoch': 1.0}


 10%|█         | 378/3780 [00:17<02:22, 23.92it/s]
 10%|█         | 378/3780 [00:18<02:22, 23.92it/s]

{'eval_loss': 0.44945162534713745, 'eval_f1': 0.10538922155688624, 'eval_precision': 0.19130434782608696, 'eval_recall': 0.07272727272727272, 'eval_runtime': 0.5561, 'eval_samples_per_second': 677.897, 'eval_steps_per_second': 86.31, 'epoch': 2.0}


 15%|█▌        | 567/3780 [00:27<02:12, 24.20it/s]
 15%|█▌        | 567/3780 [00:27<02:12, 24.20it/s]

{'eval_loss': 0.4084324538707733, 'eval_f1': 0.1902178125726688, 'eval_precision': 0.3522504892367906, 'eval_recall': 0.1412337662337662, 'eval_runtime': 0.6341, 'eval_samples_per_second': 594.547, 'eval_steps_per_second': 75.698, 'epoch': 3.0}


 20%|██        | 756/3780 [00:36<02:01, 24.89it/s]
 20%|██        | 756/3780 [00:37<02:01, 24.89it/s]

{'eval_loss': 0.3830506503582001, 'eval_f1': 0.29947542280224254, 'eval_precision': 0.5357864357864358, 'eval_recall': 0.2231322071452831, 'eval_runtime': 0.5511, 'eval_samples_per_second': 684.132, 'eval_steps_per_second': 87.104, 'epoch': 4.0}


 25%|██▌       | 945/3780 [00:46<01:55, 24.46it/s]
 25%|██▌       | 945/3780 [00:46<01:55, 24.46it/s]

{'eval_loss': 0.35729509592056274, 'eval_f1': 0.3869773162133094, 'eval_precision': 0.5216346153846153, 'eval_recall': 0.31984663841762223, 'eval_runtime': 0.544, 'eval_samples_per_second': 693.052, 'eval_steps_per_second': 88.24, 'epoch': 5.0}


 27%|██▋       | 1005/3780 [00:50<01:55, 23.97it/s]

{'loss': 0.4299, 'grad_norm': 2.3053503036499023, 'learning_rate': 3.6772486772486774e-06, 'epoch': 5.29}


 30%|███       | 1134/3780 [00:56<01:55, 22.91it/s]
 30%|███       | 1134/3780 [00:57<01:55, 22.91it/s]

{'eval_loss': 0.34251824021339417, 'eval_f1': 0.4361781027398404, 'eval_precision': 0.5235776823279982, 'eval_recall': 0.3760569131988808, 'eval_runtime': 0.6286, 'eval_samples_per_second': 599.709, 'eval_steps_per_second': 76.355, 'epoch': 6.0}


 35%|███▌      | 1323/3780 [01:06<02:13, 18.46it/s]
 35%|███▌      | 1323/3780 [01:06<02:13, 18.46it/s]

{'eval_loss': 0.3322286903858185, 'eval_f1': 0.45103277674706244, 'eval_precision': 0.50705189941145, 'eval_recall': 0.4087585515356375, 'eval_runtime': 0.5699, 'eval_samples_per_second': 661.559, 'eval_steps_per_second': 84.23, 'epoch': 7.0}


 40%|████      | 1512/3780 [01:15<01:31, 24.82it/s]
 40%|████      | 1512/3780 [01:16<01:31, 24.82it/s]

{'eval_loss': 0.322258859872818, 'eval_f1': 0.48382100227137637, 'eval_precision': 0.6557641459545215, 'eval_recall': 0.42729248660268204, 'eval_runtime': 0.5383, 'eval_samples_per_second': 700.365, 'eval_steps_per_second': 89.171, 'epoch': 8.0}


 45%|████▌     | 1701/3780 [01:25<01:22, 25.24it/s]
 45%|████▌     | 1701/3780 [01:25<01:22, 25.24it/s]

{'eval_loss': 0.31956958770751953, 'eval_f1': 0.49980406933871446, 'eval_precision': 0.665502239695788, 'eval_recall': 0.4389337175269007, 'eval_runtime': 0.5291, 'eval_samples_per_second': 712.567, 'eval_steps_per_second': 90.725, 'epoch': 9.0}


 50%|█████     | 1890/3780 [01:34<01:15, 24.94it/s]
 50%|█████     | 1890/3780 [01:34<01:15, 24.94it/s]

{'eval_loss': 0.31078416109085083, 'eval_f1': 0.5156740090149052, 'eval_precision': 0.6836809375682271, 'eval_recall': 0.45736189740196453, 'eval_runtime': 0.559, 'eval_samples_per_second': 674.442, 'eval_steps_per_second': 85.871, 'epoch': 10.0}


 53%|█████▎    | 2004/3780 [01:40<01:11, 24.72it/s]

{'loss': 0.262, 'grad_norm': 3.1787140369415283, 'learning_rate': 2.3544973544973545e-06, 'epoch': 10.58}


 55%|█████▌    | 2079/3780 [01:43<01:12, 23.61it/s]
 55%|█████▌    | 2079/3780 [01:43<01:12, 23.61it/s]

{'eval_loss': 0.30824723839759827, 'eval_f1': 0.5259485072971719, 'eval_precision': 0.671597828401107, 'eval_recall': 0.46985810609517903, 'eval_runtime': 0.5422, 'eval_samples_per_second': 695.349, 'eval_steps_per_second': 88.532, 'epoch': 11.0}


 60%|██████    | 2268/3780 [01:52<00:59, 25.39it/s]
 60%|██████    | 2268/3780 [01:52<00:59, 25.39it/s]

{'eval_loss': 0.30379247665405273, 'eval_f1': 0.5484665184891339, 'eval_precision': 0.66221632996633, 'eval_recall': 0.48433157796415144, 'eval_runtime': 0.5382, 'eval_samples_per_second': 700.431, 'eval_steps_per_second': 89.18, 'epoch': 12.0}


 65%|██████▌   | 2457/3780 [02:01<00:53, 24.94it/s]
 65%|██████▌   | 2457/3780 [02:01<00:53, 24.94it/s]

{'eval_loss': 0.30436772108078003, 'eval_f1': 0.5402518505913384, 'eval_precision': 0.6542398705896689, 'eval_recall': 0.48386561639850934, 'eval_runtime': 0.5305, 'eval_samples_per_second': 710.622, 'eval_steps_per_second': 90.477, 'epoch': 13.0}


 70%|███████   | 2646/3780 [02:10<00:45, 24.91it/s]
 70%|███████   | 2646/3780 [02:11<00:45, 24.91it/s]

{'eval_loss': 0.2994460165500641, 'eval_f1': 0.5501972773885245, 'eval_precision': 0.6557806637806637, 'eval_recall': 0.49505463290657403, 'eval_runtime': 0.5729, 'eval_samples_per_second': 658.031, 'eval_steps_per_second': 83.781, 'epoch': 14.0}


 75%|███████▌  | 2835/3780 [02:19<00:38, 24.84it/s]
 75%|███████▌  | 2835/3780 [02:20<00:38, 24.84it/s]

{'eval_loss': 0.3004390299320221, 'eval_f1': 0.5474343592093527, 'eval_precision': 0.6355913154159667, 'eval_recall': 0.492587271545932, 'eval_runtime': 0.5519, 'eval_samples_per_second': 683.1, 'eval_steps_per_second': 86.973, 'epoch': 15.0}


 79%|███████▉  | 3003/3780 [02:27<00:31, 24.64it/s]

{'loss': 0.1876, 'grad_norm': 4.302180290222168, 'learning_rate': 1.0317460317460317e-06, 'epoch': 15.87}


 80%|████████  | 3024/3780 [02:28<00:29, 25.48it/s]
 80%|████████  | 3024/3780 [02:29<00:29, 25.48it/s]

{'eval_loss': 0.29887086153030396, 'eval_f1': 0.5526570458622423, 'eval_precision': 0.6327100179694519, 'eval_recall': 0.5020973925185036, 'eval_runtime': 0.5294, 'eval_samples_per_second': 712.131, 'eval_steps_per_second': 90.669, 'epoch': 16.0}


 85%|████████▌ | 3213/3780 [02:37<00:23, 24.00it/s]
 85%|████████▌ | 3213/3780 [02:38<00:23, 24.00it/s]

{'eval_loss': 0.2966679632663727, 'eval_f1': 0.5563048788205566, 'eval_precision': 0.654333212025257, 'eval_recall': 0.49947997568875796, 'eval_runtime': 0.5831, 'eval_samples_per_second': 646.564, 'eval_steps_per_second': 82.321, 'epoch': 17.0}


 90%|█████████ | 3402/3780 [02:46<00:15, 25.18it/s]
 90%|█████████ | 3402/3780 [02:47<00:15, 25.18it/s]

{'eval_loss': 0.29767656326293945, 'eval_f1': 0.5542038228220142, 'eval_precision': 0.652861452273217, 'eval_recall': 0.4983671392498346, 'eval_runtime': 0.5297, 'eval_samples_per_second': 711.699, 'eval_steps_per_second': 90.614, 'epoch': 18.0}


 95%|█████████▌| 3591/3780 [02:55<00:07, 25.07it/s]
 95%|█████████▌| 3591/3780 [02:56<00:07, 25.07it/s]

{'eval_loss': 0.29718828201293945, 'eval_f1': 0.5532712587457114, 'eval_precision': 0.6445086600835149, 'eval_recall': 0.5005860909083878, 'eval_runtime': 0.5239, 'eval_samples_per_second': 719.648, 'eval_steps_per_second': 91.626, 'epoch': 19.0}


100%|██████████| 3780/3780 [03:05<00:00, 25.33it/s]
100%|██████████| 3780/3780 [03:06<00:00, 25.33it/s]

{'eval_loss': 0.2975972890853882, 'eval_f1': 0.5499391595232012, 'eval_precision': 0.6436600037893141, 'eval_recall': 0.4956274132224373, 'eval_runtime': 0.5401, 'eval_samples_per_second': 698.019, 'eval_steps_per_second': 88.872, 'epoch': 20.0}


100%|██████████| 3780/3780 [03:07<00:00, 20.17it/s]


{'train_runtime': 187.4135, 'train_samples_per_second': 160.821, 'train_steps_per_second': 20.169, 'train_loss': 0.26516435209405487, 'epoch': 20.0}


100%|██████████| 48/48 [00:00<00:00, 94.12it/s] 


Evaluation Metrics: {'eval_loss': 0.2966679632663727, 'eval_f1': 0.5563048788205566, 'eval_precision': 0.654333212025257, 'eval_recall': 0.49947997568875796, 'eval_runtime': 0.5228, 'eval_samples_per_second': 721.157, 'eval_steps_per_second': 91.818, 'epoch': 20.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  python   
15  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  python   
16  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  python   
17  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  python   
18  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  python   

                 cat  precision    recall        f1  
14             Usage   0.857143  0.694215  0.767123  
15        Parameters   0.871287  0.785714  0.826291  
16  DevelopmentNotes   0.000000  0.000000  0.000000  
17            Expand   0.758621  0.318841  0.448980  
18           Summary   0.784615  0.698630  0.739130  
Scores:

0,1
eval/f1,▁▂▃▅▆▆▇▇▇▇███████████
eval/loss,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
eval/precision,▁▃▅▆▆▆▆████████▇█████
eval/recall,▁▂▃▄▅▆▇▇▇▇███████████
eval/runtime,▄▃█▃▂█▄▂▁▃▂▂▁▄▃▁▅▁▁▂▁
eval/samples_per_second,▅▆▁▆▆▁▅▇█▅▇▇▇▅▆█▄▇█▇█
eval/steps_per_second,▅▆▁▆▆▁▅▇█▅▇▇▇▅▆█▄▇█▇█
train/epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇████
train/grad_norm,▁▄█

0,1
eval/f1,0.5563
eval/loss,0.29667
eval/precision,0.65433
eval/recall,0.49948
eval/runtime,0.5228
eval/samples_per_second,721.157
eval/steps_per_second,91.818
total_flos,998195250201600.0
train/epoch,20.0
train/global_step,3780.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 15117.63 examples/s]
  5%|▍         | 128/2600 [00:05<01:42, 24.15it/s]
  5%|▌         | 130/2600 [00:05<01:42, 24.15it/s]

{'eval_loss': 0.43183258175849915, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.3913, 'eval_samples_per_second': 664.522, 'eval_steps_per_second': 84.343, 'epoch': 1.0}


 10%|█         | 260/2600 [00:12<01:33, 25.10it/s]
 10%|█         | 260/2600 [00:12<01:33, 25.10it/s]

{'eval_loss': 0.38541874289512634, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.3658, 'eval_samples_per_second': 710.733, 'eval_steps_per_second': 90.208, 'epoch': 2.0}


 15%|█▍        | 389/2600 [00:18<01:33, 23.62it/s]
 15%|█▌        | 390/2600 [00:19<01:33, 23.62it/s]

{'eval_loss': 0.34619438648223877, 'eval_f1': 0.11165845648604271, 'eval_precision': 0.13307240704500978, 'eval_recall': 0.09618104667609619, 'eval_runtime': 0.413, 'eval_samples_per_second': 629.563, 'eval_steps_per_second': 79.906, 'epoch': 3.0}


 20%|█▉        | 518/2600 [00:25<01:23, 24.83it/s]
 20%|██        | 520/2600 [00:25<01:23, 24.83it/s]

{'eval_loss': 0.31524401903152466, 'eval_f1': 0.11820728291316526, 'eval_precision': 0.28157349896480327, 'eval_recall': 0.09762376237623763, 'eval_runtime': 0.3848, 'eval_samples_per_second': 675.627, 'eval_steps_per_second': 85.753, 'epoch': 4.0}


 25%|██▌       | 650/2600 [00:31<01:18, 24.84it/s]
 25%|██▌       | 650/2600 [00:32<01:18, 24.84it/s]

{'eval_loss': 0.2934728264808655, 'eval_f1': 0.15160589265268581, 'eval_precision': 0.4010025062656642, 'eval_recall': 0.12235468444803664, 'eval_runtime': 0.3664, 'eval_samples_per_second': 709.657, 'eval_steps_per_second': 90.072, 'epoch': 5.0}


 30%|██▉       | 779/2600 [00:38<01:15, 24.07it/s]
 30%|███       | 780/2600 [00:39<01:15, 24.07it/s]

{'eval_loss': 0.2755201458930969, 'eval_f1': 0.27128559109011885, 'eval_precision': 0.5213564213564214, 'eval_recall': 0.20810278879660252, 'eval_runtime': 0.4246, 'eval_samples_per_second': 612.295, 'eval_steps_per_second': 77.714, 'epoch': 6.0}


 35%|███▍      | 908/2600 [00:45<01:08, 24.76it/s]
 35%|███▌      | 910/2600 [00:46<01:08, 24.76it/s]

{'eval_loss': 0.26226806640625, 'eval_f1': 0.3179099696856669, 'eval_precision': 0.6537414965986394, 'eval_recall': 0.24405309973600828, 'eval_runtime': 0.373, 'eval_samples_per_second': 697.003, 'eval_steps_per_second': 88.466, 'epoch': 7.0}


 39%|███▊      | 1004/2600 [00:50<01:06, 24.08it/s]

{'loss': 0.3303, 'grad_norm': 1.7916492223739624, 'learning_rate': 3.0769230769230774e-06, 'epoch': 7.69}


 40%|████      | 1040/2600 [00:52<01:04, 24.03it/s]
 40%|████      | 1040/2600 [00:52<01:04, 24.03it/s]

{'eval_loss': 0.2535727322101593, 'eval_f1': 0.38114042346898314, 'eval_precision': 0.6476190476190476, 'eval_recall': 0.29018042412601064, 'eval_runtime': 0.3688, 'eval_samples_per_second': 704.91, 'eval_steps_per_second': 89.469, 'epoch': 8.0}


 45%|████▍     | 1169/2600 [00:58<00:58, 24.42it/s]
 45%|████▌     | 1170/2600 [00:59<00:58, 24.42it/s]

{'eval_loss': 0.2465989887714386, 'eval_f1': 0.41493037113104425, 'eval_precision': 0.6438783950304687, 'eval_recall': 0.32429798653243297, 'eval_runtime': 0.3826, 'eval_samples_per_second': 679.489, 'eval_steps_per_second': 86.243, 'epoch': 9.0}


 50%|█████     | 1300/2600 [01:06<00:53, 24.10it/s]
 50%|█████     | 1300/2600 [01:07<00:53, 24.10it/s]

{'eval_loss': 0.2402130365371704, 'eval_f1': 0.4231587884731901, 'eval_precision': 0.6315345100339501, 'eval_recall': 0.33435077808578445, 'eval_runtime': 0.3766, 'eval_samples_per_second': 690.433, 'eval_steps_per_second': 87.632, 'epoch': 10.0}


 55%|█████▍    | 1429/2600 [01:13<00:47, 24.85it/s]
 55%|█████▌    | 1430/2600 [01:13<00:47, 24.85it/s]

{'eval_loss': 0.23468662798404694, 'eval_f1': 0.4603384584451131, 'eval_precision': 0.6450691244239631, 'eval_recall': 0.3699110561923482, 'eval_runtime': 0.3791, 'eval_samples_per_second': 685.851, 'eval_steps_per_second': 87.05, 'epoch': 11.0}


 60%|█████▉    | 1558/2600 [01:19<00:43, 23.74it/s]
 60%|██████    | 1560/2600 [01:20<00:43, 23.74it/s]

{'eval_loss': 0.2306138575077057, 'eval_f1': 0.45364395020940557, 'eval_precision': 0.6422514619883042, 'eval_recall': 0.3659372407062678, 'eval_runtime': 0.4219, 'eval_samples_per_second': 616.228, 'eval_steps_per_second': 78.214, 'epoch': 12.0}


 65%|██████▌   | 1690/2600 [01:26<00:35, 25.32it/s]
 65%|██████▌   | 1690/2600 [01:27<00:35, 25.32it/s]

{'eval_loss': 0.22617478668689728, 'eval_f1': 0.4820706850585222, 'eval_precision': 0.6376161528441164, 'eval_recall': 0.39780124722712923, 'eval_runtime': 0.3626, 'eval_samples_per_second': 717.052, 'eval_steps_per_second': 91.01, 'epoch': 13.0}


 70%|██████▉   | 1819/2600 [01:33<00:32, 23.82it/s]
 70%|███████   | 1820/2600 [01:34<00:32, 23.82it/s]

{'eval_loss': 0.22519123554229736, 'eval_f1': 0.49574608232373935, 'eval_precision': 0.6489239926739927, 'eval_recall': 0.41263112791156614, 'eval_runtime': 0.3959, 'eval_samples_per_second': 656.681, 'eval_steps_per_second': 83.348, 'epoch': 14.0}


 75%|███████▍  | 1948/2600 [01:40<00:26, 24.36it/s]
 75%|███████▌  | 1950/2600 [01:40<00:26, 24.36it/s]

{'eval_loss': 0.22592179477214813, 'eval_f1': 0.48959725234708174, 'eval_precision': 0.6405494505494506, 'eval_recall': 0.4095633256906147, 'eval_runtime': 0.3728, 'eval_samples_per_second': 697.367, 'eval_steps_per_second': 88.512, 'epoch': 15.0}


 77%|███████▋  | 2002/2600 [01:43<00:25, 23.73it/s]

{'loss': 0.175, 'grad_norm': 1.0227973461151123, 'learning_rate': 1.153846153846154e-06, 'epoch': 15.38}


 80%|████████  | 2080/2600 [01:47<00:21, 23.94it/s]
 80%|████████  | 2080/2600 [01:47<00:21, 23.94it/s]

{'eval_loss': 0.22400401532649994, 'eval_f1': 0.4968041415731704, 'eval_precision': 0.6407559058516349, 'eval_recall': 0.41603248845578383, 'eval_runtime': 0.426, 'eval_samples_per_second': 610.349, 'eval_steps_per_second': 77.467, 'epoch': 16.0}


 85%|████████▍ | 2209/2600 [01:53<00:15, 24.91it/s]
 85%|████████▌ | 2210/2600 [01:53<00:15, 24.91it/s]

{'eval_loss': 0.2214789241552353, 'eval_f1': 0.4953918125417739, 'eval_precision': 0.627463362840599, 'eval_recall': 0.417657574976594, 'eval_runtime': 0.3652, 'eval_samples_per_second': 712.006, 'eval_steps_per_second': 90.37, 'epoch': 17.0}


 90%|█████████ | 2340/2600 [02:01<00:10, 24.27it/s]
 90%|█████████ | 2340/2600 [02:02<00:10, 24.27it/s]

{'eval_loss': 0.22067363560199738, 'eval_f1': 0.501098421255618, 'eval_precision': 0.6375425170068028, 'eval_recall': 0.42283140062983604, 'eval_runtime': 0.395, 'eval_samples_per_second': 658.22, 'eval_steps_per_second': 83.543, 'epoch': 18.0}


 95%|█████████▍| 2469/2600 [02:08<00:05, 24.75it/s]
 95%|█████████▌| 2470/2600 [02:08<00:05, 24.75it/s]

{'eval_loss': 0.22128716111183167, 'eval_f1': 0.49862700451329073, 'eval_precision': 0.6362973760932944, 'eval_recall': 0.4204864292905968, 'eval_runtime': 0.3863, 'eval_samples_per_second': 673.013, 'eval_steps_per_second': 85.421, 'epoch': 19.0}


100%|█████████▉| 2598/2600 [02:15<00:00, 24.60it/s]
100%|██████████| 2600/2600 [02:16<00:00, 24.60it/s]

{'eval_loss': 0.22058522701263428, 'eval_f1': 0.49632944531203915, 'eval_precision': 0.6263278710498978, 'eval_recall': 0.4204864292905968, 'eval_runtime': 0.3676, 'eval_samples_per_second': 707.292, 'eval_steps_per_second': 89.772, 'epoch': 20.0}


100%|██████████| 2600/2600 [02:17<00:00, 18.92it/s]


{'train_runtime': 137.4027, 'train_samples_per_second': 151.089, 'train_steps_per_second': 18.922, 'train_loss': 0.2265792465209961, 'epoch': 20.0}


100%|██████████| 33/33 [00:00<00:00, 94.45it/s] 


Evaluation Metrics: {'eval_loss': 0.22067363560199738, 'eval_f1': 0.501098421255618, 'eval_precision': 0.6375425170068028, 'eval_recall': 0.42283140062983604, 'eval_runtime': 0.3622, 'eval_samples_per_second': 717.741, 'eval_steps_per_second': 91.098, 'epoch': 20.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
13  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
14  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
15  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
16  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
17  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   
18  lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   1.000000  0.476190  0.645161  
13                  Example   0.927083  0.881188  0.903553  
14         Responsibiliti

0,1
eval/f1,▁▁▃▃▃▅▅▆▇▇▇▇█████████
eval/loss,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
eval/precision,▁▁▂▄▅▇███████████████
eval/recall,▁▁▃▃▃▄▅▆▆▇▇▇█████████
eval/runtime,▄▁▇▃▁█▂▂▃▃▃█▁▅▂█▁▅▄▂▁
eval/samples_per_second,▅█▂▅▇▁▇▇▆▆▆▁█▄▇▁█▄▅▇█
eval/steps_per_second,▅█▂▅▇▁▇▇▆▆▆▁█▄▇▁█▄▅▇█
train/epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇████
train/global_step,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇████
train/grad_norm,█▁

0,1
eval/f1,0.5011
eval/loss,0.22067
eval/precision,0.63754
eval/recall,0.42283
eval/runtime,0.3622
eval/samples_per_second,717.741
eval/steps_per_second,91.098
total_flos,687567102474240.0
train/epoch,20.0
train/global_step,2600.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20252.37 examples/s]
  7%|▋         | 1005/15230 [00:35<08:50, 26.83it/s]

{'loss': 0.1599, 'grad_norm': 0.09071271121501923, 'learning_rate': 4.6717005909389365e-05, 'epoch': 0.66}


 10%|▉         | 1522/15230 [00:53<07:46, 29.38it/s]
 10%|█         | 1523/15230 [00:57<07:46, 29.38it/s]

{'eval_loss': 0.10108744353055954, 'eval_f1': 0.6647943459459696, 'eval_precision': 0.6763384814582529, 'eval_recall': 0.6548082430723816, 'eval_runtime': 3.7019, 'eval_samples_per_second': 411.406, 'eval_steps_per_second': 102.919, 'epoch': 1.0}


 13%|█▎        | 2003/15230 [01:14<08:09, 27.00it/s]  

{'loss': 0.0982, 'grad_norm': 7.115480422973633, 'learning_rate': 4.343401181877873e-05, 'epoch': 1.31}


 20%|█▉        | 3003/15230 [01:49<06:58, 29.18it/s]

{'loss': 0.0827, 'grad_norm': 0.8813915848731995, 'learning_rate': 4.015101772816809e-05, 'epoch': 1.97}


 20%|██        | 3046/15230 [01:51<07:07, 28.51it/s]
 20%|██        | 3046/15230 [01:55<07:07, 28.51it/s]

{'eval_loss': 0.08754793554544449, 'eval_f1': 0.822366234067407, 'eval_precision': 0.8893908586297401, 'eval_recall': 0.7820263729903558, 'eval_runtime': 4.2129, 'eval_samples_per_second': 361.512, 'eval_steps_per_second': 90.437, 'epoch': 2.0}


 26%|██▋       | 4005/15230 [02:29<06:30, 28.76it/s]  

{'loss': 0.0567, 'grad_norm': 0.021005909889936447, 'learning_rate': 3.6868023637557454e-05, 'epoch': 2.63}


 30%|███       | 4569/15230 [02:49<06:16, 28.35it/s]
 30%|███       | 4569/15230 [02:53<06:16, 28.35it/s]

{'eval_loss': 0.0878656655550003, 'eval_f1': 0.8590531117899476, 'eval_precision': 0.87700526960552, 'eval_recall': 0.8432557008619929, 'eval_runtime': 3.6369, 'eval_samples_per_second': 418.762, 'eval_steps_per_second': 104.759, 'epoch': 3.0}


 33%|███▎      | 5002/15230 [03:09<06:16, 27.14it/s]  

{'loss': 0.0484, 'grad_norm': 0.275297075510025, 'learning_rate': 3.3585029546946817e-05, 'epoch': 3.28}


 39%|███▉      | 6002/15230 [03:44<05:14, 29.38it/s]

{'loss': 0.0361, 'grad_norm': 0.32234159111976624, 'learning_rate': 3.030203545633618e-05, 'epoch': 3.94}


 40%|███▉      | 6091/15230 [03:47<05:07, 29.68it/s]
 40%|████      | 6092/15230 [03:51<05:07, 29.68it/s]

{'eval_loss': 0.10308148711919785, 'eval_f1': 0.8565910123097984, 'eval_precision': 0.8939973997709547, 'eval_recall': 0.82925522130568, 'eval_runtime': 3.7357, 'eval_samples_per_second': 407.691, 'eval_steps_per_second': 101.99, 'epoch': 4.0}


 46%|████▌     | 7005/15230 [04:24<04:47, 28.62it/s]  

{'loss': 0.023, 'grad_norm': 0.00722371693700552, 'learning_rate': 2.7019041365725546e-05, 'epoch': 4.6}


 50%|█████     | 7615/15230 [04:45<04:30, 28.10it/s]
 50%|█████     | 7615/15230 [04:48<04:30, 28.10it/s]

{'eval_loss': 0.10961873829364777, 'eval_f1': 0.8582130449011995, 'eval_precision': 0.8838410474662719, 'eval_recall': 0.8397087910692896, 'eval_runtime': 3.4782, 'eval_samples_per_second': 437.865, 'eval_steps_per_second': 109.538, 'epoch': 5.0}


 53%|█████▎    | 8005/15230 [05:03<04:14, 28.42it/s]

{'loss': 0.0247, 'grad_norm': 0.009280776605010033, 'learning_rate': 2.3736047275114905e-05, 'epoch': 5.25}


 59%|█████▉    | 9002/15230 [05:38<03:50, 27.06it/s]

{'loss': 0.0156, 'grad_norm': 3.3318634033203125, 'learning_rate': 2.045305318450427e-05, 'epoch': 5.91}


 60%|█████▉    | 9137/15230 [05:43<03:25, 29.58it/s]
 60%|██████    | 9138/15230 [05:46<03:25, 29.58it/s]

{'eval_loss': 0.12748989462852478, 'eval_f1': 0.8190632740693219, 'eval_precision': 0.8509568283763475, 'eval_recall': 0.7998732795840654, 'eval_runtime': 3.3465, 'eval_samples_per_second': 455.096, 'eval_steps_per_second': 113.849, 'epoch': 6.0}


 66%|██████▌   | 10002/15230 [06:17<03:10, 27.42it/s]

{'loss': 0.0123, 'grad_norm': 0.009155207313597202, 'learning_rate': 1.717005909389363e-05, 'epoch': 6.57}


 70%|██████▉   | 10660/15230 [06:40<02:33, 29.74it/s]
 70%|███████   | 10661/15230 [06:43<02:33, 29.74it/s]

{'eval_loss': 0.12165864557027817, 'eval_f1': 0.8635635256747902, 'eval_precision': 0.8778237762819774, 'eval_recall': 0.8555352586812309, 'eval_runtime': 3.9082, 'eval_samples_per_second': 389.697, 'eval_steps_per_second': 97.488, 'epoch': 7.0}


 72%|███████▏  | 11004/15230 [06:57<02:35, 27.22it/s]

{'loss': 0.0115, 'grad_norm': 0.0038499324582517147, 'learning_rate': 1.3887065003282995e-05, 'epoch': 7.22}


 79%|███████▉  | 12003/15230 [07:31<01:50, 29.12it/s]

{'loss': 0.0073, 'grad_norm': 0.0029074945487082005, 'learning_rate': 1.0604070912672358e-05, 'epoch': 7.88}


 80%|███████▉  | 12182/15230 [07:38<01:54, 26.62it/s]
 80%|████████  | 12184/15230 [07:42<01:54, 26.62it/s]

{'eval_loss': 0.1367521435022354, 'eval_f1': 0.860868824344436, 'eval_precision': 0.8943267627124939, 'eval_recall': 0.8374637730694111, 'eval_runtime': 3.8159, 'eval_samples_per_second': 399.121, 'eval_steps_per_second': 99.846, 'epoch': 8.0}


 85%|████████▌ | 13005/15230 [08:11<01:17, 28.82it/s]

{'loss': 0.0041, 'grad_norm': 0.00602265540510416, 'learning_rate': 7.321076822061721e-06, 'epoch': 8.54}


 90%|████████▉ | 13706/15230 [08:36<00:52, 28.96it/s]
 90%|█████████ | 13707/15230 [08:39<00:52, 28.96it/s]

{'eval_loss': 0.14003893733024597, 'eval_f1': 0.8625307900193325, 'eval_precision': 0.8810033455818618, 'eval_recall': 0.8491561417277242, 'eval_runtime': 3.5729, 'eval_samples_per_second': 426.266, 'eval_steps_per_second': 106.637, 'epoch': 9.0}


 92%|█████████▏| 14002/15230 [08:50<00:41, 29.30it/s]

{'loss': 0.0039, 'grad_norm': 0.00840635783970356, 'learning_rate': 4.038082731451084e-06, 'epoch': 9.19}


 99%|█████████▊| 15003/15230 [09:25<00:08, 25.87it/s]

{'loss': 0.0032, 'grad_norm': 0.0010077784536406398, 'learning_rate': 7.550886408404465e-07, 'epoch': 9.85}


100%|█████████▉| 15228/15230 [09:33<00:00, 28.69it/s]
100%|██████████| 15230/15230 [09:38<00:00, 28.69it/s]

{'eval_loss': 0.14376841485500336, 'eval_f1': 0.8590310451080688, 'eval_precision': 0.8751937176834769, 'eval_recall': 0.8462532426746031, 'eval_runtime': 3.6638, 'eval_samples_per_second': 415.688, 'eval_steps_per_second': 103.99, 'epoch': 10.0}


100%|██████████| 15230/15230 [09:39<00:00, 26.28it/s]


{'train_runtime': 579.491, 'train_samples_per_second': 105.109, 'train_steps_per_second': 26.282, 'train_loss': 0.03861008402515381, 'epoch': 10.0}


100%|██████████| 381/381 [00:03<00:00, 118.14it/s]


Evaluation Metrics: {'eval_loss': 0.12165864557027817, 'eval_f1': 0.8635635256747902, 'eval_precision': 0.8778237762819774, 'eval_recall': 0.8555352586812309, 'eval_runtime': 3.2368, 'eval_samples_per_second': 470.522, 'eval_steps_per_second': 117.708, 'epoch': 10.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  java      summary   
13  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  java    Ownership   
14  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  java       Expand   
15  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  java        usage   
16  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  java      Pointer   
17  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  java  deprecation   
18  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  java     rational   

    precision    recall        f1  
12   0.944915  0.951636  0.948264  
13   0.964912  1.000000  0.982143  
14 

0,1
eval/f1,▁▇███▆█████
eval/loss,▃▁▁▃▄▆▅▇██▅
eval/precision,▁█▇██▇▇██▇▇
eval/recall,▁▅█▇▇▆█▇███
eval/runtime,▄█▄▅▃▂▆▅▃▄▁
eval/samples_per_second,▄▁▅▄▆▇▃▃▅▄█
eval/steps_per_second,▄▁▅▄▆▇▃▃▅▄█
train/epoch,▁▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████
train/grad_norm,▁█▂▁▁▁▁▁▄▁▁▁▁▁▁

0,1
eval/f1,0.86356
eval/loss,0.12166
eval/precision,0.87782
eval/recall,0.85554
eval/runtime,3.2368
eval/samples_per_second,470.522
eval/steps_per_second,117.708
total_flos,2017327177827840.0
train/epoch,10.0
train/global_step,15230.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 15659.94 examples/s]
 10%|▉         | 376/3770 [00:13<01:52, 30.18it/s]
 10%|█         | 377/3770 [00:14<01:52, 30.18it/s]

{'eval_loss': 0.3218485414981842, 'eval_f1': 0.42526980959507776, 'eval_precision': 0.5187829514658783, 'eval_recall': 0.363280555061377, 'eval_runtime': 0.8504, 'eval_samples_per_second': 443.295, 'eval_steps_per_second': 111.706, 'epoch': 1.0}


 20%|█▉        | 752/3770 [00:28<01:45, 28.54it/s]
 20%|██        | 754/3770 [00:29<01:45, 28.54it/s]

{'eval_loss': 0.3140501081943512, 'eval_f1': 0.5959207445368279, 'eval_precision': 0.7713467375026336, 'eval_recall': 0.5321828815056656, 'eval_runtime': 1.0345, 'eval_samples_per_second': 364.414, 'eval_steps_per_second': 91.828, 'epoch': 2.0}


 27%|██▋       | 1003/3770 [00:38<01:34, 29.21it/s]

{'loss': 0.3085, 'grad_norm': 3.3537614345550537, 'learning_rate': 3.673740053050398e-05, 'epoch': 2.65}


 30%|██▉       | 1129/3770 [00:43<01:35, 27.78it/s]
 30%|███       | 1131/3770 [00:44<01:35, 27.78it/s]

{'eval_loss': 0.35615912079811096, 'eval_f1': 0.6606670656264716, 'eval_precision': 0.7636725957052821, 'eval_recall': 0.5968719222734059, 'eval_runtime': 0.8289, 'eval_samples_per_second': 454.814, 'eval_steps_per_second': 114.608, 'epoch': 3.0}


 40%|████      | 1508/3770 [00:58<01:21, 27.86it/s]
 40%|████      | 1508/3770 [00:59<01:21, 27.86it/s]

{'eval_loss': 0.36680588126182556, 'eval_f1': 0.7132460186100511, 'eval_precision': 0.7647237112829897, 'eval_recall': 0.670546758004931, 'eval_runtime': 1.0707, 'eval_samples_per_second': 352.1, 'eval_steps_per_second': 88.726, 'epoch': 4.0}


 50%|████▉     | 1883/3770 [01:13<01:12, 26.19it/s]
 50%|█████     | 1885/3770 [01:14<01:11, 26.19it/s]

{'eval_loss': 0.41247057914733887, 'eval_f1': 0.7249397898948168, 'eval_precision': 0.7659102016540641, 'eval_recall': 0.7114238243843257, 'eval_runtime': 0.8811, 'eval_samples_per_second': 427.871, 'eval_steps_per_second': 107.819, 'epoch': 5.0}


 53%|█████▎    | 2004/3770 [01:19<01:02, 28.47it/s]

{'loss': 0.0911, 'grad_norm': 1.0043270587921143, 'learning_rate': 2.347480106100796e-05, 'epoch': 5.31}


 60%|█████▉    | 2259/3770 [01:28<00:52, 28.91it/s]
 60%|██████    | 2262/3770 [01:29<00:52, 28.91it/s]

{'eval_loss': 0.43313363194465637, 'eval_f1': 0.7322790304325464, 'eval_precision': 0.7518327977281826, 'eval_recall': 0.7196958592601291, 'eval_runtime': 0.9066, 'eval_samples_per_second': 415.827, 'eval_steps_per_second': 104.784, 'epoch': 6.0}


 70%|███████   | 2639/3770 [01:43<00:38, 29.74it/s]
 70%|███████   | 2639/3770 [01:44<00:38, 29.74it/s]

{'eval_loss': 0.45557090640068054, 'eval_f1': 0.7161200946717224, 'eval_precision': 0.7552839278280826, 'eval_recall': 0.6934310154443892, 'eval_runtime': 0.8189, 'eval_samples_per_second': 460.397, 'eval_steps_per_second': 116.015, 'epoch': 7.0}


 80%|███████▉  | 3003/3770 [01:58<00:26, 28.51it/s]

{'loss': 0.0276, 'grad_norm': 0.0793687030673027, 'learning_rate': 1.0212201591511936e-05, 'epoch': 7.96}


 80%|███████▉  | 3014/3770 [01:58<00:25, 29.73it/s]
 80%|████████  | 3016/3770 [01:59<00:25, 29.73it/s]

{'eval_loss': 0.46527764201164246, 'eval_f1': 0.7191328900844203, 'eval_precision': 0.7436371141705921, 'eval_recall': 0.7086504817127753, 'eval_runtime': 0.8377, 'eval_samples_per_second': 450.059, 'eval_steps_per_second': 113.41, 'epoch': 8.0}


 90%|█████████ | 3393/3770 [02:13<00:12, 29.90it/s]
 90%|█████████ | 3393/3770 [02:14<00:12, 29.90it/s]

{'eval_loss': 0.4696854054927826, 'eval_f1': 0.7267325647949924, 'eval_precision': 0.7384156684353907, 'eval_recall': 0.7209878657087494, 'eval_runtime': 0.856, 'eval_samples_per_second': 440.436, 'eval_steps_per_second': 110.985, 'epoch': 9.0}


100%|█████████▉| 3769/3770 [02:28<00:00, 27.57it/s]
100%|██████████| 3770/3770 [02:30<00:00, 27.57it/s]

{'eval_loss': 0.47691595554351807, 'eval_f1': 0.7272490433860571, 'eval_precision': 0.7420363048784681, 'eval_recall': 0.7207327347873669, 'eval_runtime': 0.8558, 'eval_samples_per_second': 440.508, 'eval_steps_per_second': 111.003, 'epoch': 10.0}


100%|██████████| 3770/3770 [02:31<00:00, 24.90it/s]


{'train_runtime': 151.44, 'train_samples_per_second': 99.511, 'train_steps_per_second': 24.894, 'train_loss': 0.11478354178309756, 'epoch': 10.0}


100%|██████████| 95/95 [00:00<00:00, 97.30it/s] 


Evaluation Metrics: {'eval_loss': 0.43313363194465637, 'eval_f1': 0.7322790304325464, 'eval_precision': 0.7518327977281826, 'eval_recall': 0.7196958592601291, 'eval_runtime': 0.9869, 'eval_samples_per_second': 381.991, 'eval_steps_per_second': 96.258, 'epoch': 10.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  python   
15  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  python   
16  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  python   
17  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  python   
18  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  python   

                 cat  precision    recall        f1  
14             Usage   0.894737  0.702479  0.787037  
15        Parameters   0.858407  0.866071  0.862222  
16  DevelopmentNotes   0.652174  0.750000  0.697674  
17            Expand   0.600000  0.608696  0.604317  
18           Summary   0.753846  0.671233  0.710145  
Scores

0,1
eval/f1,▁▅▆████████
eval/loss,▁▁▃▃▅▆▇▇██▆
eval/precision,▁████▇█▇▇▇▇
eval/recall,▁▄▆▇██▇████
eval/runtime,▂▇▁█▃▃▁▂▂▂▆
eval/samples_per_second,▇▂█▁▆▅█▇▇▇▃
eval/steps_per_second,▇▂█▁▆▅█▇▇▇▃
train/epoch,▁▂▂▃▃▄▄▅▆▆▆▇███
train/global_step,▁▂▂▃▃▄▄▅▆▆▆▇███
train/grad_norm,█▃▁

0,1
eval/f1,0.73228
eval/loss,0.43313
eval/precision,0.75183
eval/recall,0.7197
eval/runtime,0.9869
eval/samples_per_second,381.991
eval/steps_per_second,96.258
total_flos,499097625100800.0
train/epoch,10.0
train/global_step,3770.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14784.32 examples/s]
 10%|▉         | 259/2600 [00:09<01:24, 27.74it/s]
 10%|█         | 260/2600 [00:10<01:24, 27.74it/s]

{'eval_loss': 0.2616629898548126, 'eval_f1': 0.3839842802949272, 'eval_precision': 0.6582346225203368, 'eval_recall': 0.3043955475374765, 'eval_runtime': 0.6819, 'eval_samples_per_second': 381.311, 'eval_steps_per_second': 95.328, 'epoch': 1.0}


 20%|█▉        | 518/2600 [00:19<01:09, 29.92it/s]
 20%|██        | 520/2600 [00:20<01:09, 29.92it/s]

{'eval_loss': 0.22315768897533417, 'eval_f1': 0.5062682412422583, 'eval_precision': 0.6011866932459639, 'eval_recall': 0.450991722889637, 'eval_runtime': 0.5507, 'eval_samples_per_second': 472.089, 'eval_steps_per_second': 118.022, 'epoch': 2.0}


 30%|██▉       | 777/2600 [00:30<01:01, 29.88it/s]
 30%|███       | 780/2600 [00:30<01:00, 29.88it/s]

{'eval_loss': 0.25044184923171997, 'eval_f1': 0.4988160918436643, 'eval_precision': 0.6970335893879582, 'eval_recall': 0.45330373090329124, 'eval_runtime': 0.5424, 'eval_samples_per_second': 479.346, 'eval_steps_per_second': 119.836, 'epoch': 3.0}


 39%|███▊      | 1004/2600 [00:39<01:00, 26.26it/s]

{'loss': 0.1929, 'grad_norm': 0.1400100439786911, 'learning_rate': 3.0769230769230774e-05, 'epoch': 3.85}


 40%|███▉      | 1038/2600 [00:41<00:54, 28.49it/s]
 40%|████      | 1040/2600 [00:41<00:54, 28.49it/s]

{'eval_loss': 0.24837438762187958, 'eval_f1': 0.61383877169656, 'eval_precision': 0.82859866926761, 'eval_recall': 0.5497891445336686, 'eval_runtime': 0.6205, 'eval_samples_per_second': 419.033, 'eval_steps_per_second': 104.758, 'epoch': 4.0}


 50%|█████     | 1300/2600 [00:52<00:45, 28.36it/s]
 50%|█████     | 1300/2600 [00:52<00:45, 28.36it/s]

{'eval_loss': 0.24834130704402924, 'eval_f1': 0.6167282016195745, 'eval_precision': 0.8289116651834422, 'eval_recall': 0.5504665218675809, 'eval_runtime': 0.593, 'eval_samples_per_second': 438.423, 'eval_steps_per_second': 109.606, 'epoch': 5.0}


 60%|█████▉    | 1559/2600 [01:02<00:35, 29.49it/s]
 60%|██████    | 1560/2600 [01:03<00:35, 29.49it/s]

{'eval_loss': 0.2605675458908081, 'eval_f1': 0.6398051157559651, 'eval_precision': 0.8081615847920195, 'eval_recall': 0.5831252979721263, 'eval_runtime': 0.5742, 'eval_samples_per_second': 452.841, 'eval_steps_per_second': 113.21, 'epoch': 6.0}


 70%|██████▉   | 1819/2600 [01:13<00:27, 28.73it/s]
 70%|███████   | 1820/2600 [01:13<00:27, 28.73it/s]

{'eval_loss': 0.27624258399009705, 'eval_f1': 0.648851580810255, 'eval_precision': 0.8182284024854968, 'eval_recall': 0.5807198146740652, 'eval_runtime': 0.6185, 'eval_samples_per_second': 420.364, 'eval_steps_per_second': 105.091, 'epoch': 7.0}


 77%|███████▋  | 2005/2600 [01:21<00:20, 29.24it/s]

{'loss': 0.0415, 'grad_norm': 0.091462641954422, 'learning_rate': 1.153846153846154e-05, 'epoch': 7.69}


 80%|███████▉  | 2078/2600 [01:23<00:18, 28.08it/s]
 80%|████████  | 2080/2600 [01:24<00:18, 28.08it/s]

{'eval_loss': 0.2700762152671814, 'eval_f1': 0.6662663519465288, 'eval_precision': 0.8254016733666699, 'eval_recall': 0.609405586855278, 'eval_runtime': 0.5513, 'eval_samples_per_second': 471.577, 'eval_steps_per_second': 117.894, 'epoch': 8.0}


 90%|█████████ | 2340/2600 [01:34<00:09, 28.09it/s]
 90%|█████████ | 2340/2600 [01:35<00:09, 28.09it/s]

{'eval_loss': 0.27840539813041687, 'eval_f1': 0.6649337306483678, 'eval_precision': 0.8034986362966239, 'eval_recall': 0.6050555283149038, 'eval_runtime': 0.5815, 'eval_samples_per_second': 447.119, 'eval_steps_per_second': 111.78, 'epoch': 9.0}


100%|█████████▉| 2598/2600 [01:45<00:00, 29.71it/s]
100%|██████████| 2600/2600 [01:46<00:00, 29.71it/s]

{'eval_loss': 0.282045841217041, 'eval_f1': 0.6652976438998918, 'eval_precision': 0.8036455296404277, 'eval_recall': 0.6075508243056406, 'eval_runtime': 0.59, 'eval_samples_per_second': 440.644, 'eval_steps_per_second': 110.161, 'epoch': 10.0}


100%|██████████| 2600/2600 [01:47<00:00, 24.16it/s]


{'train_runtime': 107.6262, 'train_samples_per_second': 96.445, 'train_steps_per_second': 24.158, 'train_loss': 0.0933464266703679, 'epoch': 10.0}


100%|██████████| 65/65 [00:00<00:00, 120.18it/s]


Evaluation Metrics: {'eval_loss': 0.2700762152671814, 'eval_f1': 0.6662663519465288, 'eval_precision': 0.8254016733666699, 'eval_recall': 0.609405586855278, 'eval_runtime': 0.5519, 'eval_samples_per_second': 471.068, 'eval_steps_per_second': 117.767, 'epoch': 10.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
13  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
14  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
15  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
16  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
17  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
18  lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.870968  0.642857  0.739726  
13                  Example   0.890000  0.881188  0.885572  
14         Responsibilitie

0,1
eval/f1,▁▄▄▇▇▇█████
eval/loss,▆▁▄▄▄▅▇▇██▇
eval/precision,▃▁▄██▇██▇▇█
eval/recall,▁▄▄▇▇▇▇████
eval/runtime,█▁▁▅▄▃▅▁▃▃▁
eval/samples_per_second,▁▇█▄▅▆▄▇▆▅▇
eval/steps_per_second,▁▇█▄▅▆▄▇▆▅▇
train/epoch,▁▂▃▃▃▄▅▆▆▆▇███
train/global_step,▁▂▃▃▃▄▅▆▆▆▇███
train/grad_norm,█▁

0,1
eval/f1,0.66627
eval/loss,0.27008
eval/precision,0.8254
eval/recall,0.60941
eval/runtime,0.5519
eval/samples_per_second,471.068
eval/steps_per_second,117.767
total_flos,343783551237120.0
train/epoch,10.0
train/global_step,2600.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 19855.30 examples/s]
  7%|▋         | 1005/15230 [00:35<08:20, 28.44it/s]

{'loss': 0.2726, 'grad_norm': 0.6676028966903687, 'learning_rate': 4.671700590938937e-06, 'epoch': 0.66}


 10%|▉         | 1521/15230 [00:53<07:49, 29.18it/s]
 10%|█         | 1523/15230 [00:57<07:49, 29.18it/s]

{'eval_loss': 0.1391284316778183, 'eval_f1': 0.5245632031900174, 'eval_precision': 0.5300081870052935, 'eval_recall': 0.520214252283556, 'eval_runtime': 3.7053, 'eval_samples_per_second': 411.03, 'eval_steps_per_second': 102.825, 'epoch': 1.0}


 13%|█▎        | 2005/15230 [01:15<07:39, 28.75it/s]  

{'loss': 0.142, 'grad_norm': 2.3553309440612793, 'learning_rate': 4.343401181877873e-06, 'epoch': 1.31}


 20%|█▉        | 3005/15230 [01:49<07:27, 27.35it/s]

{'loss': 0.1113, 'grad_norm': 4.655873775482178, 'learning_rate': 4.015101772816809e-06, 'epoch': 1.97}


 20%|█▉        | 3044/15230 [01:51<07:19, 27.70it/s]
 20%|██        | 3046/15230 [01:55<07:19, 27.70it/s]

{'eval_loss': 0.11446011066436768, 'eval_f1': 0.5908528166565653, 'eval_precision': 0.8110978618778291, 'eval_recall': 0.5633420622459716, 'eval_runtime': 4.1675, 'eval_samples_per_second': 365.444, 'eval_steps_per_second': 91.421, 'epoch': 2.0}


 26%|██▋       | 4002/15230 [02:29<06:47, 27.56it/s]  

{'loss': 0.0891, 'grad_norm': 0.07892239838838577, 'learning_rate': 3.6868023637557455e-06, 'epoch': 2.63}


 30%|██▉       | 4567/15230 [02:49<06:19, 28.12it/s]
 30%|███       | 4569/15230 [02:52<06:19, 28.12it/s]

{'eval_loss': 0.09483808279037476, 'eval_f1': 0.7258658466630704, 'eval_precision': 0.9321612092366526, 'eval_recall': 0.6558678050284303, 'eval_runtime': 3.4049, 'eval_samples_per_second': 447.302, 'eval_steps_per_second': 111.899, 'epoch': 3.0}


 33%|███▎      | 5003/15230 [03:09<06:10, 27.59it/s]  

{'loss': 0.0825, 'grad_norm': 9.369587898254395, 'learning_rate': 3.358502954694682e-06, 'epoch': 3.28}


 39%|███▉      | 6005/15230 [03:44<05:19, 28.91it/s]

{'loss': 0.0733, 'grad_norm': 2.0635440349578857, 'learning_rate': 3.030203545633618e-06, 'epoch': 3.94}


 40%|███▉      | 6090/15230 [03:47<05:26, 28.03it/s]
 40%|████      | 6092/15230 [03:51<05:26, 28.03it/s]

{'eval_loss': 0.09802518784999847, 'eval_f1': 0.7794079318841135, 'eval_precision': 0.9132090527009458, 'eval_recall': 0.7209812764242318, 'eval_runtime': 3.5561, 'eval_samples_per_second': 428.272, 'eval_steps_per_second': 107.138, 'epoch': 4.0}


 46%|████▌     | 7004/15230 [04:24<04:58, 27.59it/s]  

{'loss': 0.0625, 'grad_norm': 0.04358445480465889, 'learning_rate': 2.7019041365725546e-06, 'epoch': 4.6}


 50%|████▉     | 7614/15230 [04:45<04:28, 28.32it/s]
 50%|█████     | 7615/15230 [04:49<04:28, 28.32it/s]

{'eval_loss': 0.09301789849996567, 'eval_f1': 0.8301937096581368, 'eval_precision': 0.8978493717690831, 'eval_recall': 0.7852303148016856, 'eval_runtime': 3.7397, 'eval_samples_per_second': 407.247, 'eval_steps_per_second': 101.878, 'epoch': 5.0}


 53%|█████▎    | 8004/15230 [05:04<04:15, 28.27it/s]  

{'loss': 0.0635, 'grad_norm': 0.04746953025460243, 'learning_rate': 2.3736047275114905e-06, 'epoch': 5.25}


 59%|█████▉    | 9004/15230 [05:39<03:34, 28.99it/s]

{'loss': 0.0526, 'grad_norm': 13.371354103088379, 'learning_rate': 2.045305318450427e-06, 'epoch': 5.91}


 60%|██████    | 9138/15230 [05:43<03:36, 28.11it/s]
 60%|██████    | 9138/15230 [05:47<03:36, 28.11it/s]

{'eval_loss': 0.09999188035726547, 'eval_f1': 0.8315659892613861, 'eval_precision': 0.9088890976333632, 'eval_recall': 0.7834237150718891, 'eval_runtime': 3.4487, 'eval_samples_per_second': 441.618, 'eval_steps_per_second': 110.477, 'epoch': 6.0}


 66%|██████▌   | 10005/15230 [06:18<03:03, 28.53it/s]

{'loss': 0.0478, 'grad_norm': 0.22351530194282532, 'learning_rate': 1.7170059093893632e-06, 'epoch': 6.57}


 70%|██████▉   | 10660/15230 [06:41<02:29, 30.61it/s]
 70%|███████   | 10661/15230 [06:44<02:29, 30.61it/s]

{'eval_loss': 0.09607526659965515, 'eval_f1': 0.8401021259946388, 'eval_precision': 0.8902466964927253, 'eval_recall': 0.8021138362368676, 'eval_runtime': 3.5086, 'eval_samples_per_second': 434.071, 'eval_steps_per_second': 108.589, 'epoch': 7.0}


 72%|███████▏  | 11004/15230 [06:57<02:36, 26.99it/s]

{'loss': 0.0472, 'grad_norm': 0.14011569321155548, 'learning_rate': 1.3887065003282996e-06, 'epoch': 7.22}


 79%|███████▉  | 12003/15230 [07:32<01:52, 28.56it/s]

{'loss': 0.0415, 'grad_norm': 0.032236676663160324, 'learning_rate': 1.0604070912672358e-06, 'epoch': 7.88}


 80%|████████  | 12184/15230 [07:39<01:52, 27.09it/s]
 80%|████████  | 12184/15230 [07:43<01:52, 27.09it/s]

{'eval_loss': 0.09649597853422165, 'eval_f1': 0.8376543462322262, 'eval_precision': 0.887225217520328, 'eval_recall': 0.8010543347773893, 'eval_runtime': 4.0961, 'eval_samples_per_second': 371.813, 'eval_steps_per_second': 93.014, 'epoch': 8.0}


 85%|████████▌ | 13005/15230 [08:12<01:18, 28.51it/s]

{'loss': 0.0362, 'grad_norm': 7.577348709106445, 'learning_rate': 7.32107682206172e-07, 'epoch': 8.54}


 90%|████████▉ | 13705/15230 [08:37<00:55, 27.55it/s]
 90%|█████████ | 13707/15230 [08:40<00:55, 27.55it/s]

{'eval_loss': 0.09824404865503311, 'eval_f1': 0.8464798543890334, 'eval_precision': 0.8809155965568066, 'eval_recall': 0.8188623155778897, 'eval_runtime': 3.3835, 'eval_samples_per_second': 450.121, 'eval_steps_per_second': 112.604, 'epoch': 9.0}


 92%|█████████▏| 14006/15230 [08:52<00:42, 28.89it/s]

{'loss': 0.04, 'grad_norm': 0.34970220923423767, 'learning_rate': 4.038082731451084e-07, 'epoch': 9.19}


 99%|█████████▊| 15002/15230 [09:27<00:08, 25.71it/s]

{'loss': 0.0347, 'grad_norm': 0.08663284778594971, 'learning_rate': 7.550886408404465e-08, 'epoch': 9.85}


100%|█████████▉| 15227/15230 [09:35<00:00, 29.81it/s]
100%|██████████| 15230/15230 [09:39<00:00, 29.81it/s]

{'eval_loss': 0.09744656085968018, 'eval_f1': 0.8420306566337726, 'eval_precision': 0.8819363124387322, 'eval_recall': 0.8100391066604554, 'eval_runtime': 3.828, 'eval_samples_per_second': 397.853, 'eval_steps_per_second': 99.529, 'epoch': 10.0}


100%|██████████| 15230/15230 [09:40<00:00, 26.22it/s]


{'train_runtime': 580.9488, 'train_samples_per_second': 104.846, 'train_steps_per_second': 26.216, 'train_loss': 0.07896781906984546, 'epoch': 10.0}


100%|██████████| 381/381 [00:03<00:00, 112.21it/s]


Evaluation Metrics: {'eval_loss': 0.09824404865503311, 'eval_f1': 0.8464798543890334, 'eval_precision': 0.8809155965568066, 'eval_recall': 0.8188623155778897, 'eval_runtime': 3.4053, 'eval_samples_per_second': 447.24, 'eval_steps_per_second': 111.883, 'epoch': 10.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  java      summary   
13  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  java    Ownership   
14  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  java       Expand   
15  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  java        usage   
16  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  java      Pointer   
17  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  java  deprecation   
18  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  java     rational   

    precision    recall        f1  
12   0.936200  0.960171  0.948034  
13   0.964912  1.000000  0.982143  
14  

0,1
eval/f1,▁▂▅▇███████
eval/loss,█▄▁▂▁▂▁▂▂▂▂
eval/precision,▁▆██▇█▇▇▇▇▇
eval/recall,▁▂▄▆▇▇█████
eval/runtime,▄█▁▃▄▂▂▇▁▅▁
eval/samples_per_second,▅▁█▆▄▇▇▂█▄█
eval/steps_per_second,▅▁█▆▄▇▇▂█▄█
train/epoch,▁▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇████
train/grad_norm,▁▂▃▁▆▂▁▁█▁▁▁▅▁▁

0,1
eval/f1,0.84648
eval/loss,0.09824
eval/precision,0.88092
eval/recall,0.81886
eval/runtime,3.4053
eval/samples_per_second,447.24
eval/steps_per_second,111.883
total_flos,2017327177827840.0
train/epoch,10.0
train/global_step,15230.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 13989.92 examples/s]
 10%|█         | 377/3770 [00:13<01:53, 29.92it/s]
 10%|█         | 377/3770 [00:14<01:53, 29.92it/s]

{'eval_loss': 0.47244417667388916, 'eval_f1': 0.03308270676691729, 'eval_precision': 0.18333333333333332, 'eval_recall': 0.01818181818181818, 'eval_runtime': 0.869, 'eval_samples_per_second': 433.832, 'eval_steps_per_second': 109.321, 'epoch': 1.0}


 20%|█▉        | 752/3770 [00:27<01:48, 27.92it/s]
 20%|██        | 754/3770 [00:28<01:48, 27.92it/s]

{'eval_loss': 0.419569194316864, 'eval_f1': 0.12358897243107769, 'eval_precision': 0.3888888888888889, 'eval_recall': 0.08786894923258559, 'eval_runtime': 0.8654, 'eval_samples_per_second': 435.657, 'eval_steps_per_second': 109.781, 'epoch': 2.0}


 27%|██▋       | 1004/3770 [00:38<01:34, 29.21it/s]

{'loss': 0.4636, 'grad_norm': 3.4435739517211914, 'learning_rate': 3.673740053050398e-06, 'epoch': 2.65}


 30%|██▉       | 1130/3770 [00:43<01:37, 26.99it/s]
 30%|███       | 1131/3770 [00:44<01:37, 26.99it/s]

{'eval_loss': 0.3773764967918396, 'eval_f1': 0.32044277919868963, 'eval_precision': 0.5692097026604068, 'eval_recall': 0.24873303844349923, 'eval_runtime': 1.0042, 'eval_samples_per_second': 375.435, 'eval_steps_per_second': 94.606, 'epoch': 3.0}


 40%|███▉      | 1506/3770 [00:58<01:17, 29.03it/s]
 40%|████      | 1508/3770 [00:59<01:17, 29.03it/s]

{'eval_loss': 0.3517577350139618, 'eval_f1': 0.3835426044381268, 'eval_precision': 0.5372413442472926, 'eval_recall': 0.3089162394268247, 'eval_runtime': 1.1113, 'eval_samples_per_second': 339.255, 'eval_steps_per_second': 85.489, 'epoch': 4.0}


 50%|████▉     | 1883/3770 [01:14<01:10, 26.79it/s]
 50%|█████     | 1885/3770 [01:14<01:10, 26.79it/s]

{'eval_loss': 0.33322229981422424, 'eval_f1': 0.43136115543837744, 'eval_precision': 0.6189679885332059, 'eval_recall': 0.3683980574445679, 'eval_runtime': 0.8538, 'eval_samples_per_second': 441.53, 'eval_steps_per_second': 111.261, 'epoch': 5.0}


 53%|█████▎    | 2005/3770 [01:20<01:00, 29.25it/s]

{'loss': 0.3285, 'grad_norm': 5.572543621063232, 'learning_rate': 2.347480106100796e-06, 'epoch': 5.31}


 60%|██████    | 2262/3770 [01:29<00:53, 28.37it/s]
 60%|██████    | 2262/3770 [01:30<00:53, 28.37it/s]

{'eval_loss': 0.3249717950820923, 'eval_f1': 0.4955697293432009, 'eval_precision': 0.6972471920080913, 'eval_recall': 0.42595898673312177, 'eval_runtime': 0.9551, 'eval_samples_per_second': 394.705, 'eval_steps_per_second': 99.462, 'epoch': 6.0}


 70%|███████   | 2639/3770 [01:44<00:38, 29.49it/s]
 70%|███████   | 2639/3770 [01:45<00:38, 29.49it/s]

{'eval_loss': 0.3175201714038849, 'eval_f1': 0.49383831271703127, 'eval_precision': 0.6811052836052836, 'eval_recall': 0.4361054700528955, 'eval_runtime': 0.834, 'eval_samples_per_second': 452.054, 'eval_steps_per_second': 113.913, 'epoch': 7.0}


 80%|███████▉  | 3004/3770 [01:58<00:26, 29.16it/s]

{'loss': 0.2617, 'grad_norm': 1.682710886001587, 'learning_rate': 1.0212201591511937e-06, 'epoch': 7.96}


 80%|███████▉  | 3014/3770 [01:59<00:26, 29.04it/s]
 80%|████████  | 3016/3770 [02:00<00:25, 29.04it/s]

{'eval_loss': 0.3156692683696747, 'eval_f1': 0.5107613067971206, 'eval_precision': 0.6884115884115884, 'eval_recall': 0.4531829163130262, 'eval_runtime': 0.9006, 'eval_samples_per_second': 418.622, 'eval_steps_per_second': 105.488, 'epoch': 8.0}


 90%|█████████ | 3393/3770 [02:14<00:12, 29.96it/s]
 90%|█████████ | 3393/3770 [02:14<00:12, 29.96it/s]

{'eval_loss': 0.31368333101272583, 'eval_f1': 0.5156491329432653, 'eval_precision': 0.6585356860935458, 'eval_recall': 0.45861806809069794, 'eval_runtime': 0.8366, 'eval_samples_per_second': 450.619, 'eval_steps_per_second': 113.551, 'epoch': 9.0}


100%|█████████▉| 3769/3770 [02:28<00:00, 29.05it/s]
100%|██████████| 3770/3770 [02:30<00:00, 29.05it/s]

{'eval_loss': 0.31362032890319824, 'eval_f1': 0.5178223741706454, 'eval_precision': 0.6608067028439353, 'eval_recall': 0.4607917350215255, 'eval_runtime': 0.93, 'eval_samples_per_second': 405.37, 'eval_steps_per_second': 102.149, 'epoch': 10.0}


100%|██████████| 3770/3770 [02:31<00:00, 24.87it/s]


{'train_runtime': 151.6198, 'train_samples_per_second': 99.393, 'train_steps_per_second': 24.865, 'train_loss': 0.32754077456041736, 'epoch': 10.0}


100%|██████████| 95/95 [00:00<00:00, 114.82it/s]


Evaluation Metrics: {'eval_loss': 0.31362032890319824, 'eval_f1': 0.5178223741706454, 'eval_precision': 0.6608067028439353, 'eval_recall': 0.4607917350215255, 'eval_runtime': 0.8383, 'eval_samples_per_second': 449.715, 'eval_steps_per_second': 113.323, 'epoch': 10.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  python   
15  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  python   
16  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  python   
17  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  python   
18  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  python   

                 cat  precision    recall        f1  
14             Usage   0.881720  0.677686  0.766355  
15        Parameters   0.859813  0.821429  0.840183  
16  DevelopmentNotes   0.000000  0.000000  0.000000  
17            Expand   0.812500  0.188406  0.305882  
18           Summary   0.750000  0.616438  0.676692  
Score

0,1
eval/f1,▁▂▅▆▇██████
eval/loss,█▆▄▃▂▂▁▁▁▁▁
eval/precision,▁▄▆▆▇███▇██
eval/recall,▁▂▅▆▇▇█████
eval/runtime,▂▂▅█▁▄▁▃▁▃▁
eval/samples_per_second,▇▇▃▁▇▄█▆█▅█
eval/steps_per_second,▇▇▃▁▇▄█▆█▅█
train/epoch,▁▂▂▃▃▄▄▅▆▆▆▇███
train/global_step,▁▂▂▃▃▄▄▅▆▆▆▇███
train/grad_norm,▄█▁

0,1
eval/f1,0.51782
eval/loss,0.31362
eval/precision,0.66081
eval/recall,0.46079
eval/runtime,0.8383
eval/samples_per_second,449.715
eval/steps_per_second,113.323
total_flos,499097625100800.0
train/epoch,10.0
train/global_step,3770.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14277.04 examples/s]
 10%|█         | 260/2600 [00:10<01:23, 28.08it/s]

{'eval_loss': 0.39877578616142273, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.7214, 'eval_samples_per_second': 360.399, 'eval_steps_per_second': 90.1, 'epoch': 1.0}


 20%|█▉        | 518/2600 [00:19<01:10, 29.51it/s]
 20%|██        | 520/2600 [00:20<01:10, 29.51it/s]

{'eval_loss': 0.34293892979621887, 'eval_f1': 0.11330049261083744, 'eval_precision': 0.1350293542074364, 'eval_recall': 0.0975954738330976, 'eval_runtime': 0.6177, 'eval_samples_per_second': 420.919, 'eval_steps_per_second': 105.23, 'epoch': 2.0}


 30%|██▉       | 778/2600 [00:30<01:00, 30.03it/s]
 30%|███       | 780/2600 [00:31<01:00, 30.03it/s]

{'eval_loss': 0.30876806378364563, 'eval_f1': 0.12454212454212454, 'eval_precision': 0.12917933130699089, 'eval_recall': 0.12022630834512023, 'eval_runtime': 0.5617, 'eval_samples_per_second': 462.919, 'eval_steps_per_second': 115.73, 'epoch': 3.0}


 39%|███▊      | 1005/2600 [00:39<00:56, 28.14it/s]

{'loss': 0.3562, 'grad_norm': 1.1501833200454712, 'learning_rate': 3.0769230769230774e-06, 'epoch': 3.85}


 40%|███▉      | 1037/2600 [00:41<00:54, 28.57it/s]
 40%|████      | 1040/2600 [00:41<00:54, 28.57it/s]

{'eval_loss': 0.2859659194946289, 'eval_f1': 0.148990743838519, 'eval_precision': 0.2787456445993031, 'eval_recall': 0.12536291223107274, 'eval_runtime': 0.5938, 'eval_samples_per_second': 437.879, 'eval_steps_per_second': 109.47, 'epoch': 4.0}


 50%|████▉     | 1299/2600 [00:52<00:46, 28.26it/s]
 50%|█████     | 1300/2600 [00:52<00:45, 28.26it/s]

{'eval_loss': 0.2722059190273285, 'eval_f1': 0.21145061798304227, 'eval_precision': 0.42328042328042326, 'eval_recall': 0.1677689272686667, 'eval_runtime': 0.5983, 'eval_samples_per_second': 434.56, 'eval_steps_per_second': 108.64, 'epoch': 5.0}


 60%|█████▉    | 1558/2600 [01:02<00:36, 28.32it/s]
 60%|██████    | 1560/2600 [01:03<00:36, 28.32it/s]

{'eval_loss': 0.25731992721557617, 'eval_f1': 0.2644934860736748, 'eval_precision': 0.5334379905808477, 'eval_recall': 0.20967851027852516, 'eval_runtime': 0.6394, 'eval_samples_per_second': 406.637, 'eval_steps_per_second': 101.659, 'epoch': 6.0}


 70%|██████▉   | 1818/2600 [01:13<00:25, 30.21it/s]
 70%|███████   | 1820/2600 [01:14<00:25, 30.21it/s]

{'eval_loss': 0.25026610493659973, 'eval_f1': 0.31158970013925497, 'eval_precision': 0.5320616883116883, 'eval_recall': 0.24669282825167585, 'eval_runtime': 0.5858, 'eval_samples_per_second': 443.865, 'eval_steps_per_second': 110.966, 'epoch': 7.0}


 77%|███████▋  | 2003/2600 [01:21<00:20, 28.89it/s]

{'loss': 0.2311, 'grad_norm': 1.2417654991149902, 'learning_rate': 1.153846153846154e-06, 'epoch': 7.69}


 80%|███████▉  | 2078/2600 [01:24<00:18, 28.42it/s]
 80%|████████  | 2080/2600 [01:24<00:18, 28.42it/s]

{'eval_loss': 0.2467828243970871, 'eval_f1': 0.3253556139436907, 'eval_precision': 0.6536861197055372, 'eval_recall': 0.26255080959795135, 'eval_runtime': 0.5528, 'eval_samples_per_second': 470.34, 'eval_steps_per_second': 117.585, 'epoch': 8.0}


 90%|████████▉ | 2338/2600 [01:35<00:09, 28.11it/s]
 90%|█████████ | 2340/2600 [01:35<00:09, 28.11it/s]

{'eval_loss': 0.24123412370681763, 'eval_f1': 0.34123931623931625, 'eval_precision': 0.6605243161094225, 'eval_recall': 0.2711825728816911, 'eval_runtime': 0.581, 'eval_samples_per_second': 447.507, 'eval_steps_per_second': 111.877, 'epoch': 9.0}


100%|█████████▉| 2598/2600 [01:45<00:00, 28.31it/s]
100%|██████████| 2600/2600 [01:47<00:00, 28.31it/s]

{'eval_loss': 0.24071058630943298, 'eval_f1': 0.3464381423565097, 'eval_precision': 0.657860824742268, 'eval_recall': 0.2756365137165041, 'eval_runtime': 0.6744, 'eval_samples_per_second': 385.546, 'eval_steps_per_second': 96.386, 'epoch': 10.0}


100%|██████████| 2600/2600 [01:48<00:00, 24.02it/s]


{'train_runtime': 108.2681, 'train_samples_per_second': 95.873, 'train_steps_per_second': 24.014, 'train_loss': 0.2715282440185547, 'epoch': 10.0}


100%|██████████| 65/65 [00:00<00:00, 120.14it/s]


Evaluation Metrics: {'eval_loss': 0.24071058630943298, 'eval_f1': 0.3464381423565097, 'eval_precision': 0.657860824742268, 'eval_recall': 0.2756365137165041, 'eval_runtime': 0.5526, 'eval_samples_per_second': 470.51, 'eval_steps_per_second': 117.628, 'epoch': 10.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
13  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
14  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
15  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
16  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
17  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   
18  lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0...  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   1.000000  0.285714  0.444444  
13                  Example   0.917526  0.881188  0.898990  
14         Responsibilitie

0,1
eval/f1,▁▃▄▄▅▆▇████
eval/loss,█▆▄▃▂▂▁▁▁▁▁
eval/precision,▁▂▂▄▅▇▇████
eval/recall,▁▃▄▄▅▆▇████
eval/runtime,█▄▁▃▃▅▂▁▂▆▁
eval/samples_per_second,▁▅█▆▆▄▆█▇▃█
eval/steps_per_second,▁▅█▆▆▄▆█▇▃█
train/epoch,▁▂▃▃▃▄▅▆▆▆▇███
train/global_step,▁▂▃▃▃▄▅▆▆▆▇███
train/grad_norm,▁█

0,1
eval/f1,0.34644
eval/loss,0.24071
eval/precision,0.65786
eval/recall,0.27564
eval/runtime,0.5526
eval/samples_per_second,470.51
eval/steps_per_second,117.628
total_flos,343783551237120.0
train/epoch,10.0
train/global_step,2600.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 19981.22 examples/s]
 10%|█         | 762/7620 [00:31<04:37, 24.69it/s]
 10%|█         | 762/7620 [00:33<04:37, 24.69it/s]

{'eval_loss': 0.1015646830201149, 'eval_f1': 0.6524690085952666, 'eval_precision': 0.6778889300157169, 'eval_recall': 0.6348395609918983, 'eval_runtime': 2.2925, 'eval_samples_per_second': 664.342, 'eval_steps_per_second': 83.315, 'epoch': 1.0}


 13%|█▎        | 1002/7620 [00:44<04:31, 24.33it/s]

{'loss': 0.1298, 'grad_norm': 0.3760480284690857, 'learning_rate': 4.343832020997376e-05, 'epoch': 1.31}


 20%|██        | 1524/7620 [01:05<04:03, 25.08it/s]
 20%|██        | 1524/7620 [01:07<04:03, 25.08it/s]

{'eval_loss': 0.07745072990655899, 'eval_f1': 0.8234782498327127, 'eval_precision': 0.8725145199993208, 'eval_recall': 0.7933584021771242, 'eval_runtime': 2.214, 'eval_samples_per_second': 687.883, 'eval_steps_per_second': 86.268, 'epoch': 2.0}


 26%|██▋       | 2004/7620 [01:28<03:52, 24.15it/s]

{'loss': 0.0641, 'grad_norm': 0.0429794043302536, 'learning_rate': 3.6876640419947505e-05, 'epoch': 2.62}


 30%|███       | 2286/7620 [01:40<03:30, 25.37it/s]
 30%|███       | 2286/7620 [01:42<03:30, 25.37it/s]

{'eval_loss': 0.09159431606531143, 'eval_f1': 0.8430576105961849, 'eval_precision': 0.869705667555172, 'eval_recall': 0.8196733347993853, 'eval_runtime': 2.1227, 'eval_samples_per_second': 717.494, 'eval_steps_per_second': 89.981, 'epoch': 3.0}


 39%|███▉      | 3003/7620 [02:12<03:13, 23.84it/s]

{'loss': 0.0351, 'grad_norm': 0.43561094999313354, 'learning_rate': 3.0314960629921263e-05, 'epoch': 3.94}


 40%|████      | 3048/7620 [02:14<03:04, 24.80it/s]
 40%|████      | 3048/7620 [02:17<03:04, 24.80it/s]

{'eval_loss': 0.08914182335138321, 'eval_f1': 0.8684226519853556, 'eval_precision': 0.8887567142517445, 'eval_recall': 0.8528542888214445, 'eval_runtime': 2.1511, 'eval_samples_per_second': 708.01, 'eval_steps_per_second': 88.792, 'epoch': 4.0}


 50%|█████     | 3810/7620 [02:49<02:36, 24.40it/s]
 50%|█████     | 3810/7620 [02:51<02:36, 24.40it/s]

{'eval_loss': 0.1030140295624733, 'eval_f1': 0.8499595345268445, 'eval_precision': 0.8727408071204505, 'eval_recall': 0.8342189673747467, 'eval_runtime': 2.1686, 'eval_samples_per_second': 702.298, 'eval_steps_per_second': 88.075, 'epoch': 5.0}


 53%|█████▎    | 4005/7620 [03:00<02:30, 24.08it/s]

{'loss': 0.0154, 'grad_norm': 0.037584055215120316, 'learning_rate': 2.3753280839895015e-05, 'epoch': 5.25}


 60%|██████    | 4572/7620 [03:23<02:04, 24.43it/s]
 60%|██████    | 4572/7620 [03:25<02:04, 24.43it/s]

{'eval_loss': 0.1099642813205719, 'eval_f1': 0.8442320462845554, 'eval_precision': 0.8798407934517366, 'eval_recall': 0.8189662306367111, 'eval_runtime': 2.2342, 'eval_samples_per_second': 681.671, 'eval_steps_per_second': 85.489, 'epoch': 6.0}


 66%|██████▌   | 5004/7620 [03:44<01:44, 24.96it/s]

{'loss': 0.0092, 'grad_norm': 0.005762810353189707, 'learning_rate': 1.7191601049868766e-05, 'epoch': 6.56}


 70%|███████   | 5334/7620 [03:57<01:30, 25.26it/s]
 70%|███████   | 5334/7620 [04:00<01:30, 25.26it/s]

{'eval_loss': 0.1050337627530098, 'eval_f1': 0.8560508171241495, 'eval_precision': 0.8582183084648333, 'eval_recall': 0.8564102763995078, 'eval_runtime': 2.1389, 'eval_samples_per_second': 712.057, 'eval_steps_per_second': 89.299, 'epoch': 7.0}


 79%|███████▉  | 6003/7620 [04:28<01:06, 24.32it/s]

{'loss': 0.0046, 'grad_norm': 0.004799437243491411, 'learning_rate': 1.062992125984252e-05, 'epoch': 7.87}


 80%|████████  | 6096/7620 [04:32<01:00, 25.24it/s]
 80%|████████  | 6096/7620 [04:34<01:00, 25.24it/s]

{'eval_loss': 0.11434672772884369, 'eval_f1': 0.8554302718737054, 'eval_precision': 0.8692459050093966, 'eval_recall': 0.8437683186505369, 'eval_runtime': 2.0966, 'eval_samples_per_second': 726.404, 'eval_steps_per_second': 91.099, 'epoch': 8.0}


 90%|█████████ | 6858/7620 [05:06<00:30, 24.71it/s]
 90%|█████████ | 6858/7620 [05:08<00:30, 24.71it/s]

{'eval_loss': 0.1144174337387085, 'eval_f1': 0.8555389605243348, 'eval_precision': 0.8734243272483292, 'eval_recall': 0.8405688686095586, 'eval_runtime': 2.2417, 'eval_samples_per_second': 679.401, 'eval_steps_per_second': 85.204, 'epoch': 9.0}


 92%|█████████▏| 7002/7620 [05:15<00:25, 24.07it/s]

{'loss': 0.0028, 'grad_norm': 0.02011386677622795, 'learning_rate': 4.068241469816273e-06, 'epoch': 9.19}


100%|██████████| 7620/7620 [05:40<00:00, 25.11it/s]
100%|██████████| 7620/7620 [05:43<00:00, 25.11it/s]

{'eval_loss': 0.11736174672842026, 'eval_f1': 0.8529758896354397, 'eval_precision': 0.8696593282566216, 'eval_recall': 0.8389970219710812, 'eval_runtime': 2.1803, 'eval_samples_per_second': 698.525, 'eval_steps_per_second': 87.602, 'epoch': 10.0}


100%|██████████| 7620/7620 [05:44<00:00, 22.10it/s]


{'train_runtime': 344.8566, 'train_samples_per_second': 176.624, 'train_steps_per_second': 22.096, 'train_loss': 0.03438628049973115, 'epoch': 10.0}


100%|██████████| 191/191 [00:02<00:00, 90.19it/s]


Evaluation Metrics: {'eval_loss': 0.08914182335138321, 'eval_f1': 0.8684226519853556, 'eval_precision': 0.8887567142517445, 'eval_recall': 0.8528542888214445, 'eval_runtime': 2.1316, 'eval_samples_per_second': 714.485, 'eval_steps_per_second': 89.604, 'epoch': 10.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  java      summary   
13  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  java    Ownership   
14  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  java       Expand   
15  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  java        usage   
16  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  java      Pointer   
17  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  java  deprecation   
18  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  java     rational   

    precision    recall        f1  
12   0.947067  0.941679  0.944365  
13   0.982143  1.000000  0.990991  
14  

0,1
eval/f1,▁▇▇█▇▇███▇█
eval/loss,▅▁▃▃▅▇▆▇▇█▃
eval/precision,▁▇▇█▇█▇▇▇▇█
eval/recall,▁▆▇█▇▇██▇▇█
eval/runtime,█▅▂▃▄▆▃▁▆▄▂
eval/samples_per_second,▁▄▇▆▅▃▆█▃▅▇
eval/steps_per_second,▁▄▇▆▅▃▆█▃▅▇
train/epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇███
train/global_step,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇███
train/grad_norm,▇▂█▂▁▁▁

0,1
eval/f1,0.86842
eval/loss,0.08914
eval/precision,0.88876
eval/recall,0.85285
eval/runtime,2.1316
eval/samples_per_second,714.485
eval/steps_per_second,89.604
total_flos,2017327177827840.0
train/epoch,10.0
train/global_step,7620.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 15209.65 examples/s]
 10%|▉         | 188/1890 [00:07<01:10, 24.00it/s]
 10%|█         | 189/1890 [00:08<01:10, 24.00it/s]

{'eval_loss': 0.3622388243675232, 'eval_f1': 0.2844869750132908, 'eval_precision': 0.35621045621045616, 'eval_recall': 0.23699822904368356, 'eval_runtime': 0.6009, 'eval_samples_per_second': 627.387, 'eval_steps_per_second': 79.88, 'epoch': 1.0}


 20%|█▉        | 377/1890 [00:17<01:01, 24.71it/s]
 20%|██        | 378/1890 [00:18<01:01, 24.71it/s]

{'eval_loss': 0.30409398674964905, 'eval_f1': 0.5509036758764526, 'eval_precision': 0.6344083694083695, 'eval_recall': 0.4971387148325531, 'eval_runtime': 0.5797, 'eval_samples_per_second': 650.316, 'eval_steps_per_second': 82.799, 'epoch': 2.0}


 30%|██▉       | 566/1890 [00:26<00:53, 24.94it/s]
 30%|███       | 567/1890 [00:27<00:53, 24.94it/s]

{'eval_loss': 0.3316703140735626, 'eval_f1': 0.5647254018771047, 'eval_precision': 0.8219252873563218, 'eval_recall': 0.5197350210566952, 'eval_runtime': 0.5298, 'eval_samples_per_second': 711.604, 'eval_steps_per_second': 90.602, 'epoch': 3.0}


 40%|███▉      | 755/1890 [00:36<00:47, 24.01it/s]
 40%|████      | 756/1890 [00:36<00:47, 24.01it/s]

{'eval_loss': 0.3434963524341583, 'eval_f1': 0.6628111935113892, 'eval_precision': 0.7656520085897979, 'eval_recall': 0.5982846176780607, 'eval_runtime': 0.5341, 'eval_samples_per_second': 705.884, 'eval_steps_per_second': 89.874, 'epoch': 4.0}


 50%|████▉     | 944/1890 [00:45<00:38, 24.59it/s]
 50%|█████     | 945/1890 [00:46<00:38, 24.59it/s]

{'eval_loss': 0.3764269948005676, 'eval_f1': 0.6707974896525556, 'eval_precision': 0.7368866162932083, 'eval_recall': 0.6325521512706797, 'eval_runtime': 0.562, 'eval_samples_per_second': 670.815, 'eval_steps_per_second': 85.409, 'epoch': 5.0}


 53%|█████▎    | 1004/1890 [00:49<00:36, 24.34it/s]

{'loss': 0.225, 'grad_norm': 2.9294703006744385, 'learning_rate': 2.3544973544973546e-05, 'epoch': 5.29}


 60%|█████▉    | 1133/1890 [00:55<00:30, 24.83it/s]
 60%|██████    | 1134/1890 [00:55<00:30, 24.83it/s]

{'eval_loss': 0.37451082468032837, 'eval_f1': 0.6924205532931229, 'eval_precision': 0.7480495101262834, 'eval_recall': 0.6483585786314683, 'eval_runtime': 0.57, 'eval_samples_per_second': 661.454, 'eval_steps_per_second': 84.217, 'epoch': 6.0}


 70%|██████▉   | 1322/1890 [01:04<00:22, 24.78it/s]
 70%|███████   | 1323/1890 [01:04<00:22, 24.78it/s]

{'eval_loss': 0.4070133566856384, 'eval_f1': 0.7092494404001293, 'eval_precision': 0.7471929436040934, 'eval_recall': 0.6813170911875497, 'eval_runtime': 0.5366, 'eval_samples_per_second': 702.515, 'eval_steps_per_second': 89.445, 'epoch': 7.0}


 80%|███████▉  | 1511/1890 [01:13<00:15, 24.11it/s]
 80%|████████  | 1512/1890 [01:14<00:15, 24.11it/s]

{'eval_loss': 0.41225090622901917, 'eval_f1': 0.7011132540553711, 'eval_precision': 0.732504684184199, 'eval_recall': 0.6803458457004941, 'eval_runtime': 0.58, 'eval_samples_per_second': 650.032, 'eval_steps_per_second': 82.763, 'epoch': 8.0}


 90%|████████▉ | 1700/1890 [01:23<00:07, 23.86it/s]
 90%|█████████ | 1701/1890 [01:23<00:07, 23.86it/s]

{'eval_loss': 0.41704267263412476, 'eval_f1': 0.7278250610667543, 'eval_precision': 0.7510163524869407, 'eval_recall': 0.7099974240209229, 'eval_runtime': 0.5643, 'eval_samples_per_second': 668.044, 'eval_steps_per_second': 85.056, 'epoch': 9.0}


100%|█████████▉| 1889/1890 [01:32<00:00, 24.76it/s]
100%|██████████| 1890/1890 [01:33<00:00, 24.76it/s]

{'eval_loss': 0.4135846495628357, 'eval_f1': 0.729577059238548, 'eval_precision': 0.7583790980373672, 'eval_recall': 0.7048405137285152, 'eval_runtime': 0.5322, 'eval_samples_per_second': 708.4, 'eval_steps_per_second': 90.194, 'epoch': 10.0}


100%|██████████| 1890/1890 [01:34<00:00, 19.96it/s]


{'train_runtime': 94.7066, 'train_samples_per_second': 159.123, 'train_steps_per_second': 19.956, 'train_loss': 0.13296544917676814, 'epoch': 10.0}


100%|██████████| 48/48 [00:00<00:00, 92.04it/s]


Evaluation Metrics: {'eval_loss': 0.4135846495628357, 'eval_f1': 0.729577059238548, 'eval_precision': 0.7583790980373672, 'eval_recall': 0.7048405137285152, 'eval_runtime': 0.5341, 'eval_samples_per_second': 705.851, 'eval_steps_per_second': 89.87, 'epoch': 10.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  python   
15  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  python   
16  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  python   
17  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  python   
18  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  python   

                 cat  precision    recall        f1  
14             Usage   0.901961  0.760331  0.825112  
15        Parameters   0.883929  0.883929  0.883929  
16  DevelopmentNotes   0.605263  0.575000  0.589744  
17            Expand   0.661017  0.565217  0.609375  
18           Summary   0.739726  0.739726  0.739726  
Scores:  

0,1
eval/f1,▁▅▅▇▇▇█████
eval/loss,▅▁▃▃▅▅▇████
eval/precision,▁▅█▇▇▇▇▇▇▇▇
eval/recall,▁▅▅▆▇▇█████
eval/runtime,█▆▁▁▄▅▂▆▄▁▁
eval/samples_per_second,▁▃██▅▄▇▃▄██
eval/steps_per_second,▁▃██▅▄▇▃▄██
train/epoch,▁▂▃▃▄▄▅▆▆▇███
train/global_step,▁▂▃▃▄▄▅▆▆▇███
train/grad_norm,▁

0,1
eval/f1,0.72958
eval/loss,0.41358
eval/precision,0.75838
eval/recall,0.70484
eval/runtime,0.5341
eval/samples_per_second,705.851
eval/steps_per_second,89.87
total_flos,499097625100800.0
train/epoch,10.0
train/global_step,1890.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 13379.52 examples/s]
 10%|▉         | 128/1300 [00:05<00:46, 25.20it/s]
 10%|█         | 130/1300 [00:05<00:46, 25.20it/s]

{'eval_loss': 0.28356966376304626, 'eval_f1': 0.19690005154730797, 'eval_precision': 0.3848314606741573, 'eval_recall': 0.16121789622571278, 'eval_runtime': 0.372, 'eval_samples_per_second': 698.952, 'eval_steps_per_second': 88.713, 'epoch': 1.0}


 20%|██        | 260/1300 [00:11<00:42, 24.34it/s]
 20%|██        | 260/1300 [00:12<00:42, 24.34it/s]

{'eval_loss': 0.2216731756925583, 'eval_f1': 0.4648511965169745, 'eval_precision': 0.6436932632584806, 'eval_recall': 0.38238743356932764, 'eval_runtime': 0.3955, 'eval_samples_per_second': 657.404, 'eval_steps_per_second': 83.44, 'epoch': 2.0}


 30%|██▉       | 389/1300 [00:18<00:36, 24.74it/s]
 30%|███       | 390/1300 [00:18<00:36, 24.74it/s]

{'eval_loss': 0.21828529238700867, 'eval_f1': 0.48567354701077753, 'eval_precision': 0.6098868224303765, 'eval_recall': 0.4164814817747972, 'eval_runtime': 0.3749, 'eval_samples_per_second': 693.459, 'eval_steps_per_second': 88.016, 'epoch': 3.0}


 40%|███▉      | 518/1300 [00:25<00:32, 23.94it/s]
 40%|████      | 520/1300 [00:25<00:32, 23.94it/s]

{'eval_loss': 0.21932640671730042, 'eval_f1': 0.578875120049698, 'eval_precision': 0.8250702772766504, 'eval_recall': 0.5057026907755108, 'eval_runtime': 0.3722, 'eval_samples_per_second': 698.491, 'eval_steps_per_second': 88.655, 'epoch': 4.0}


 50%|█████     | 650/1300 [00:31<00:25, 25.08it/s]
 50%|█████     | 650/1300 [00:32<00:25, 25.08it/s]

{'eval_loss': 0.2250576615333557, 'eval_f1': 0.6091410211489822, 'eval_precision': 0.8416821313706383, 'eval_recall': 0.5414246177000176, 'eval_runtime': 0.3728, 'eval_samples_per_second': 697.381, 'eval_steps_per_second': 88.514, 'epoch': 5.0}


 60%|█████▉    | 779/1300 [00:38<00:22, 23.50it/s]
 60%|██████    | 780/1300 [00:38<00:22, 23.50it/s]

{'eval_loss': 0.22325600683689117, 'eval_f1': 0.6332884292096496, 'eval_precision': 0.8331247883768892, 'eval_recall': 0.5618649530926537, 'eval_runtime': 0.3874, 'eval_samples_per_second': 671.101, 'eval_steps_per_second': 85.178, 'epoch': 6.0}


 70%|██████▉   | 908/1300 [00:45<00:15, 24.61it/s]
 70%|███████   | 910/1300 [00:45<00:15, 24.61it/s]

{'eval_loss': 0.2334321290254593, 'eval_f1': 0.6619832068032169, 'eval_precision': 0.8298613915312384, 'eval_recall': 0.5932322009996552, 'eval_runtime': 0.376, 'eval_samples_per_second': 691.518, 'eval_steps_per_second': 87.77, 'epoch': 7.0}


 77%|███████▋  | 1004/1300 [00:51<00:12, 23.66it/s]

{'loss': 0.1325, 'grad_norm': 1.1105527877807617, 'learning_rate': 1.153846153846154e-05, 'epoch': 7.69}


 80%|████████  | 1040/1300 [00:52<00:10, 24.44it/s]
 80%|████████  | 1040/1300 [00:53<00:10, 24.44it/s]

{'eval_loss': 0.2239556461572647, 'eval_f1': 0.6603520272528175, 'eval_precision': 0.8388299654228747, 'eval_recall': 0.5851312480243613, 'eval_runtime': 0.3964, 'eval_samples_per_second': 655.831, 'eval_steps_per_second': 83.24, 'epoch': 8.0}


 90%|████████▉ | 1169/1300 [00:59<00:05, 24.16it/s]
 90%|█████████ | 1170/1300 [00:59<00:05, 24.16it/s]

{'eval_loss': 0.22875961661338806, 'eval_f1': 0.6675820390267909, 'eval_precision': 0.8134948482316903, 'eval_recall': 0.6021692340635211, 'eval_runtime': 0.3829, 'eval_samples_per_second': 678.95, 'eval_steps_per_second': 86.174, 'epoch': 9.0}


100%|█████████▉| 1298/1300 [01:05<00:00, 23.95it/s]
100%|██████████| 1300/1300 [01:07<00:00, 23.95it/s]

{'eval_loss': 0.2318565994501114, 'eval_f1': 0.6572132195583639, 'eval_precision': 0.8074986676698329, 'eval_recall': 0.594134456638753, 'eval_runtime': 0.3654, 'eval_samples_per_second': 711.583, 'eval_steps_per_second': 90.316, 'epoch': 10.0}


100%|██████████| 1300/1300 [01:08<00:00, 19.04it/s]


{'train_runtime': 68.2845, 'train_samples_per_second': 152.011, 'train_steps_per_second': 19.038, 'train_loss': 0.10723604825826792, 'epoch': 10.0}


100%|██████████| 33/33 [00:00<00:00, 94.33it/s] 


Evaluation Metrics: {'eval_loss': 0.22875961661338806, 'eval_f1': 0.6675820390267909, 'eval_precision': 0.8134948482316903, 'eval_recall': 0.6021692340635211, 'eval_runtime': 0.3657, 'eval_samples_per_second': 710.887, 'eval_steps_per_second': 90.228, 'epoch': 10.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
13  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
14  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
15  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
16  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
17  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
18  lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.814815  0.523810  0.637681  
13                  Example   0.926316  0.871287  0.897959  
14         Responsibiliti

0,1
eval/f1,▁▅▅▇▇▇█████
eval/loss,█▁▁▁▂▂▃▂▂▂▂
eval/precision,▁▅▄██████▇█
eval/recall,▁▅▅▆▇▇█████
eval/runtime,▂█▃▃▃▆▃█▅▁▁
eval/samples_per_second,▆▁▆▆▆▃▅▁▄██
eval/steps_per_second,▆▁▆▆▆▃▅▁▄██
train/epoch,▁▂▃▃▄▅▆▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▆▇███
train/grad_norm,▁

0,1
eval/f1,0.66758
eval/loss,0.22876
eval/precision,0.81349
eval/recall,0.60217
eval/runtime,0.3657
eval/samples_per_second,710.887
eval/steps_per_second,90.228
total_flos,343783551237120.0
train/epoch,10.0
train/global_step,1300.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20267.38 examples/s]
 10%|▉         | 760/7620 [00:31<04:51, 23.57it/s]
 10%|█         | 762/7620 [00:33<04:50, 23.57it/s]

{'eval_loss': 0.16646847128868103, 'eval_f1': 0.41983840787247356, 'eval_precision': 0.5204161390416523, 'eval_recall': 0.3949301224227671, 'eval_runtime': 2.2044, 'eval_samples_per_second': 690.885, 'eval_steps_per_second': 86.644, 'epoch': 1.0}


 13%|█▎        | 1003/7620 [00:44<04:40, 23.59it/s]

{'loss': 0.2527, 'grad_norm': 2.619013547897339, 'learning_rate': 4.343832020997376e-06, 'epoch': 1.31}


 20%|█▉        | 1522/7620 [01:05<04:13, 24.07it/s]
 20%|██        | 1524/7620 [01:08<04:13, 24.07it/s]

{'eval_loss': 0.12346293032169342, 'eval_f1': 0.5347346042092337, 'eval_precision': 0.5409842632033339, 'eval_recall': 0.5295251352786494, 'eval_runtime': 2.2489, 'eval_samples_per_second': 677.207, 'eval_steps_per_second': 84.929, 'epoch': 2.0}


 26%|██▋       | 2002/7620 [01:28<03:51, 24.31it/s]

{'loss': 0.121, 'grad_norm': 1.267940640449524, 'learning_rate': 3.6876640419947506e-06, 'epoch': 2.62}


 30%|██▉       | 2284/7620 [01:40<03:35, 24.74it/s]
 30%|███       | 2286/7620 [01:42<03:35, 24.74it/s]

{'eval_loss': 0.1036229208111763, 'eval_f1': 0.551098548289791, 'eval_precision': 0.6897247300035856, 'eval_recall': 0.5330947271604541, 'eval_runtime': 2.1522, 'eval_samples_per_second': 707.636, 'eval_steps_per_second': 88.745, 'epoch': 3.0}


 39%|███▉      | 3004/7620 [02:13<03:12, 23.92it/s]

{'loss': 0.0928, 'grad_norm': 1.1909810304641724, 'learning_rate': 3.0314960629921263e-06, 'epoch': 3.94}


 40%|███▉      | 3046/7620 [02:15<03:03, 24.99it/s]
 40%|████      | 3048/7620 [02:17<03:02, 24.99it/s]

{'eval_loss': 0.10297688096761703, 'eval_f1': 0.6031257549803934, 'eval_precision': 0.9235083943606069, 'eval_recall': 0.5692864150972337, 'eval_runtime': 2.0962, 'eval_samples_per_second': 726.546, 'eval_steps_per_second': 91.116, 'epoch': 4.0}


 50%|████▉     | 3808/7620 [02:49<02:40, 23.77it/s]
 50%|█████     | 3810/7620 [02:51<02:40, 23.77it/s]

{'eval_loss': 0.09245611727237701, 'eval_f1': 0.7529699013637723, 'eval_precision': 0.9114577183787222, 'eval_recall': 0.6933685042794132, 'eval_runtime': 2.2085, 'eval_samples_per_second': 689.609, 'eval_steps_per_second': 86.484, 'epoch': 5.0}


 53%|█████▎    | 4003/7620 [03:00<02:30, 24.06it/s]

{'loss': 0.0744, 'grad_norm': 1.7561709880828857, 'learning_rate': 2.3753280839895016e-06, 'epoch': 5.25}


 60%|█████▉    | 4570/7620 [03:23<02:04, 24.52it/s]
 60%|██████    | 4572/7620 [03:25<02:04, 24.52it/s]

{'eval_loss': 0.09369064122438431, 'eval_f1': 0.7502179662395686, 'eval_precision': 0.8880854489304963, 'eval_recall': 0.6950030825614926, 'eval_runtime': 2.1937, 'eval_samples_per_second': 694.251, 'eval_steps_per_second': 87.066, 'epoch': 6.0}


 66%|██████▌   | 5002/7620 [03:44<01:45, 24.71it/s]

{'loss': 0.0641, 'grad_norm': 1.0465375185012817, 'learning_rate': 1.7191601049868767e-06, 'epoch': 6.56}


 70%|██████▉   | 5332/7620 [03:57<01:31, 25.03it/s]
 70%|███████   | 5334/7620 [04:00<01:31, 25.03it/s]

{'eval_loss': 0.09117134660482407, 'eval_f1': 0.7778198531419849, 'eval_precision': 0.9039005809604598, 'eval_recall': 0.7220125864819441, 'eval_runtime': 2.1747, 'eval_samples_per_second': 700.329, 'eval_steps_per_second': 87.829, 'epoch': 7.0}


 79%|███████▉  | 6004/7620 [04:28<01:06, 24.24it/s]

{'loss': 0.0579, 'grad_norm': 0.15864567458629608, 'learning_rate': 1.062992125984252e-06, 'epoch': 7.87}


 80%|███████▉  | 6094/7620 [04:32<01:01, 24.84it/s]
 80%|████████  | 6096/7620 [04:34<01:01, 24.84it/s]

{'eval_loss': 0.0913877859711647, 'eval_f1': 0.8010193154010136, 'eval_precision': 0.9068753672543451, 'eval_recall': 0.7488126031536988, 'eval_runtime': 2.1177, 'eval_samples_per_second': 719.169, 'eval_steps_per_second': 90.191, 'epoch': 8.0}


 90%|████████▉ | 6856/7620 [05:06<00:30, 24.68it/s]
 90%|█████████ | 6858/7620 [05:08<00:30, 24.68it/s]

{'eval_loss': 0.08979693055152893, 'eval_f1': 0.8150691618385736, 'eval_precision': 0.9040532050128872, 'eval_recall': 0.7635646560766266, 'eval_runtime': 2.1904, 'eval_samples_per_second': 695.304, 'eval_steps_per_second': 87.198, 'epoch': 9.0}


 92%|█████████▏| 7003/7620 [05:15<00:25, 24.02it/s]

{'loss': 0.0529, 'grad_norm': 1.1499711275100708, 'learning_rate': 4.068241469816273e-07, 'epoch': 9.19}


100%|█████████▉| 7618/7620 [05:40<00:00, 24.66it/s]
100%|██████████| 7620/7620 [05:43<00:00, 24.66it/s]

{'eval_loss': 0.08942916989326477, 'eval_f1': 0.8181074795409444, 'eval_precision': 0.9066341051616977, 'eval_recall': 0.7656130493236045, 'eval_runtime': 2.2668, 'eval_samples_per_second': 671.883, 'eval_steps_per_second': 84.261, 'epoch': 10.0}


100%|██████████| 7620/7620 [05:45<00:00, 22.09it/s]


{'train_runtime': 345.0171, 'train_samples_per_second': 176.542, 'train_steps_per_second': 22.086, 'train_loss': 0.09790120037209017, 'epoch': 10.0}


100%|██████████| 191/191 [00:02<00:00, 86.86it/s]


Evaluation Metrics: {'eval_loss': 0.08942916989326477, 'eval_f1': 0.8181074795409444, 'eval_precision': 0.9066341051616977, 'eval_recall': 0.7656130493236045, 'eval_runtime': 2.2159, 'eval_samples_per_second': 687.306, 'eval_steps_per_second': 86.195, 'epoch': 10.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  java      summary   
13  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  java    Ownership   
14  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  java       Expand   
15  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  java        usage   
16  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  java      Pointer   
17  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  java  deprecation   
18  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  java     rational   

    precision    recall        f1  
12   0.929363  0.954481  0.941754  
13   0.964912  1.000000  0.982143  
14  

0,1
eval/f1,▁▃▃▄▇▇▇████
eval/loss,█▄▂▂▁▁▁▁▁▁▁
eval/precision,▁▁▄██▇█████
eval/recall,▁▄▄▄▇▇▇████
eval/runtime,▅▇▃▁▆▅▄▂▅█▆
eval/samples_per_second,▃▂▆█▃▄▅▇▄▁▃
eval/steps_per_second,▃▂▆█▃▄▅▇▄▁▃
train/epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇███
train/global_step,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇███
train/grad_norm,█▄▄▆▄▁▄

0,1
eval/f1,0.81811
eval/loss,0.08943
eval/precision,0.90663
eval/recall,0.76561
eval/runtime,2.2159
eval/samples_per_second,687.306
eval/steps_per_second,86.195
total_flos,2017327177827840.0
train/epoch,10.0
train/global_step,7620.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 18028.95 examples/s]
 10%|▉         | 188/1890 [00:07<01:09, 24.32it/s]
 10%|█         | 189/1890 [00:08<01:09, 24.32it/s]

{'eval_loss': 0.49190446734428406, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.5586, 'eval_samples_per_second': 674.868, 'eval_steps_per_second': 85.925, 'epoch': 1.0}


 20%|█▉        | 377/1890 [00:16<01:02, 24.23it/s]
 20%|██        | 378/1890 [00:17<01:02, 24.23it/s]

{'eval_loss': 0.45459288358688354, 'eval_f1': 0.09113924050632911, 'eval_precision': 0.1945945945945946, 'eval_recall': 0.05950413223140496, 'eval_runtime': 0.5838, 'eval_samples_per_second': 645.737, 'eval_steps_per_second': 82.216, 'epoch': 2.0}


 30%|██▉       | 566/1890 [00:25<00:53, 24.98it/s]
 30%|███       | 567/1890 [00:26<00:52, 24.98it/s]

{'eval_loss': 0.42039549350738525, 'eval_f1': 0.1421904761904762, 'eval_precision': 0.3675925925925926, 'eval_recall': 0.09845041322314049, 'eval_runtime': 0.5304, 'eval_samples_per_second': 710.828, 'eval_steps_per_second': 90.503, 'epoch': 3.0}


 40%|███▉      | 755/1890 [00:35<00:45, 24.68it/s]
 40%|████      | 756/1890 [00:36<00:45, 24.68it/s]

{'eval_loss': 0.3895494341850281, 'eval_f1': 0.25147129186602873, 'eval_precision': 0.5849242424242425, 'eval_recall': 0.1780981627339037, 'eval_runtime': 0.5446, 'eval_samples_per_second': 692.314, 'eval_steps_per_second': 88.146, 'epoch': 4.0}


 50%|████▉     | 944/1890 [00:45<00:39, 23.78it/s]
 50%|█████     | 945/1890 [00:45<00:39, 23.78it/s]

{'eval_loss': 0.3661344051361084, 'eval_f1': 0.3219752286803472, 'eval_precision': 0.5539364501113135, 'eval_recall': 0.24958900066309778, 'eval_runtime': 0.6063, 'eval_samples_per_second': 621.768, 'eval_steps_per_second': 79.164, 'epoch': 5.0}


 53%|█████▎    | 1004/1890 [00:49<00:36, 24.09it/s]

{'loss': 0.4375, 'grad_norm': 2.632704019546509, 'learning_rate': 2.3544973544973545e-06, 'epoch': 5.29}


 60%|█████▉    | 1133/1890 [00:54<00:30, 24.68it/s]
 60%|██████    | 1134/1890 [00:55<00:30, 24.68it/s]

{'eval_loss': 0.3538739085197449, 'eval_f1': 0.3898222251377591, 'eval_precision': 0.551029648208888, 'eval_recall': 0.3053237858032379, 'eval_runtime': 0.534, 'eval_samples_per_second': 705.939, 'eval_steps_per_second': 89.881, 'epoch': 6.0}


 70%|██████▉   | 1321/1890 [01:04<00:23, 24.25it/s]
 70%|███████   | 1323/1890 [01:05<00:23, 24.25it/s]

{'eval_loss': 0.34401506185531616, 'eval_f1': 0.4123148329731494, 'eval_precision': 0.5461187983571436, 'eval_recall': 0.3356946758098688, 'eval_runtime': 0.5359, 'eval_samples_per_second': 703.489, 'eval_steps_per_second': 89.569, 'epoch': 7.0}


 80%|███████▉  | 1510/1890 [01:13<00:15, 23.77it/s]
 80%|████████  | 1512/1890 [01:14<00:15, 23.77it/s]

{'eval_loss': 0.34102702140808105, 'eval_f1': 0.421001182036859, 'eval_precision': 0.6299910356872382, 'eval_recall': 0.3498282866176363, 'eval_runtime': 0.546, 'eval_samples_per_second': 690.434, 'eval_steps_per_second': 87.907, 'epoch': 8.0}


 90%|████████▉ | 1699/1890 [01:22<00:07, 24.30it/s]
 90%|█████████ | 1701/1890 [01:23<00:07, 24.30it/s]

{'eval_loss': 0.3372706472873688, 'eval_f1': 0.4237797947559968, 'eval_precision': 0.6260773504675943, 'eval_recall': 0.35478696430358675, 'eval_runtime': 0.5818, 'eval_samples_per_second': 647.943, 'eval_steps_per_second': 82.497, 'epoch': 9.0}


100%|█████████▉| 1888/1890 [01:32<00:00, 24.57it/s]
100%|██████████| 1890/1890 [01:33<00:00, 24.57it/s]

{'eval_loss': 0.33692097663879395, 'eval_f1': 0.42213883196059243, 'eval_precision': 0.6222825662541974, 'eval_recall': 0.35491978602731755, 'eval_runtime': 0.5823, 'eval_samples_per_second': 647.424, 'eval_steps_per_second': 82.431, 'epoch': 10.0}


100%|██████████| 1890/1890 [01:34<00:00, 19.95it/s]


{'train_runtime': 94.7307, 'train_samples_per_second': 159.083, 'train_steps_per_second': 19.951, 'train_loss': 0.3778680549096809, 'epoch': 10.0}


100%|██████████| 48/48 [00:00<00:00, 85.93it/s]


Evaluation Metrics: {'eval_loss': 0.3372706472873688, 'eval_f1': 0.4237797947559968, 'eval_precision': 0.6260773504675943, 'eval_recall': 0.35478696430358675, 'eval_runtime': 0.5722, 'eval_samples_per_second': 658.828, 'eval_steps_per_second': 83.883, 'epoch': 10.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  python   
15  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  python   
16  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  python   
17  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  python   
18  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  python   

                 cat  precision    recall        f1  
14             Usage   0.902439  0.611570  0.729064  
15        Parameters   0.890110  0.723214  0.798030  
16  DevelopmentNotes   0.000000  0.000000  0.000000  
17            Expand   0.500000  0.014493  0.028169  
18           Summary   0.837838  0.424658  0.563636  
Scores

0,1
eval/f1,▁▃▃▅▆▇█████
eval/loss,█▆▅▃▂▂▁▁▁▁▁
eval/precision,▁▃▅▇▇▇▇████
eval/recall,▁▂▃▅▆▇█████
eval/runtime,▄▆▁▂█▁▂▂▆▆▅
eval/samples_per_second,▅▃█▇▁█▇▆▃▃▄
eval/steps_per_second,▅▃█▇▁█▇▆▃▃▄
train/epoch,▁▂▃▃▄▄▅▆▆▇███
train/global_step,▁▂▃▃▄▄▅▆▆▇███
train/grad_norm,▁

0,1
eval/f1,0.42378
eval/loss,0.33727
eval/precision,0.62608
eval/recall,0.35479
eval/runtime,0.5722
eval/samples_per_second,658.828
eval/steps_per_second,83.883
total_flos,499097625100800.0
train/epoch,10.0
train/global_step,1890.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14411.47 examples/s]
 10%|█         | 130/1300 [00:05<00:47, 24.78it/s]
 10%|█         | 130/1300 [00:05<00:47, 24.78it/s]

{'eval_loss': 0.4293648898601532, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.3702, 'eval_samples_per_second': 702.383, 'eval_steps_per_second': 89.149, 'epoch': 1.0}


 20%|█▉        | 259/1300 [00:11<00:41, 24.96it/s]
 20%|██        | 260/1300 [00:12<00:41, 24.96it/s]

{'eval_loss': 0.38273584842681885, 'eval_f1': 0.05110336817653891, 'eval_precision': 0.14285714285714285, 'eval_recall': 0.031117397454031116, 'eval_runtime': 0.3741, 'eval_samples_per_second': 694.99, 'eval_steps_per_second': 88.21, 'epoch': 2.0}


 30%|██▉       | 388/1300 [00:18<00:36, 24.97it/s]
 30%|███       | 390/1300 [00:18<00:36, 24.97it/s]

{'eval_loss': 0.3470132648944855, 'eval_f1': 0.11299435028248588, 'eval_precision': 0.13157894736842105, 'eval_recall': 0.099009900990099, 'eval_runtime': 0.369, 'eval_samples_per_second': 704.537, 'eval_steps_per_second': 89.422, 'epoch': 3.0}


 40%|████      | 520/1300 [00:25<00:31, 24.47it/s]
 40%|████      | 520/1300 [00:25<00:31, 24.47it/s]

{'eval_loss': 0.32350653409957886, 'eval_f1': 0.11494252873563217, 'eval_precision': 0.136986301369863, 'eval_recall': 0.099009900990099, 'eval_runtime': 0.4121, 'eval_samples_per_second': 630.887, 'eval_steps_per_second': 80.074, 'epoch': 4.0}


 50%|████▉     | 649/1300 [00:31<00:26, 24.62it/s]
 50%|█████     | 650/1300 [00:32<00:26, 24.62it/s]

{'eval_loss': 0.3080587387084961, 'eval_f1': 0.12063492063492064, 'eval_precision': 0.13743218806509946, 'eval_recall': 0.1074964639321075, 'eval_runtime': 0.3756, 'eval_samples_per_second': 692.236, 'eval_steps_per_second': 87.861, 'epoch': 5.0}


 60%|█████▉    | 778/1300 [00:38<00:21, 24.27it/s]
 60%|██████    | 780/1300 [00:38<00:21, 24.27it/s]

{'eval_loss': 0.29606011509895325, 'eval_f1': 0.12396069538926681, 'eval_precision': 0.1331168831168831, 'eval_recall': 0.11598302687411599, 'eval_runtime': 0.3807, 'eval_samples_per_second': 683.015, 'eval_steps_per_second': 86.69, 'epoch': 6.0}


 70%|███████   | 910/1300 [00:45<00:16, 23.91it/s]
 70%|███████   | 910/1300 [00:45<00:16, 23.91it/s]

{'eval_loss': 0.28803253173828125, 'eval_f1': 0.12462006079027356, 'eval_precision': 0.13464696223316913, 'eval_recall': 0.11598302687411599, 'eval_runtime': 0.3965, 'eval_samples_per_second': 655.729, 'eval_steps_per_second': 83.227, 'epoch': 7.0}


 77%|███████▋  | 1003/1300 [00:50<00:12, 23.56it/s]

{'loss': 0.3404, 'grad_norm': 1.8726283311843872, 'learning_rate': 1.153846153846154e-06, 'epoch': 7.69}


 80%|███████▉  | 1039/1300 [00:52<00:10, 24.66it/s]
 80%|████████  | 1040/1300 [00:52<00:10, 24.66it/s]

{'eval_loss': 0.28277406096458435, 'eval_f1': 0.15846757551188012, 'eval_precision': 0.41904761904761906, 'eval_recall': 0.13670661803022408, 'eval_runtime': 0.3753, 'eval_samples_per_second': 692.692, 'eval_steps_per_second': 87.919, 'epoch': 8.0}


 90%|████████▉ | 1168/1300 [00:58<00:05, 23.74it/s]
 90%|█████████ | 1170/1300 [00:59<00:05, 23.74it/s]

{'eval_loss': 0.27951472997665405, 'eval_f1': 0.1708790693310817, 'eval_precision': 0.42054574638844305, 'eval_recall': 0.14422541502270528, 'eval_runtime': 0.3735, 'eval_samples_per_second': 696.162, 'eval_steps_per_second': 88.359, 'epoch': 9.0}


100%|██████████| 1300/1300 [01:05<00:00, 24.31it/s]
100%|██████████| 1300/1300 [01:06<00:00, 24.31it/s]

{'eval_loss': 0.27845460176467896, 'eval_f1': 0.17587940184709464, 'eval_precision': 0.41770186335403725, 'eval_recall': 0.1493992406759473, 'eval_runtime': 0.3697, 'eval_samples_per_second': 703.211, 'eval_steps_per_second': 89.254, 'epoch': 10.0}


100%|██████████| 1300/1300 [01:07<00:00, 19.21it/s]


{'train_runtime': 67.6656, 'train_samples_per_second': 153.401, 'train_steps_per_second': 19.212, 'train_loss': 0.3198738274207482, 'epoch': 10.0}


100%|██████████| 33/33 [00:00<00:00, 94.75it/s] 


Evaluation Metrics: {'eval_loss': 0.27845460176467896, 'eval_f1': 0.17587940184709464, 'eval_precision': 0.41770186335403725, 'eval_recall': 0.1493992406759473, 'eval_runtime': 0.3624, 'eval_samples_per_second': 717.535, 'eval_steps_per_second': 91.072, 'epoch': 10.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
13  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
14  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
15  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
16  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
17  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   
18  lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0...  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.000000  0.000000  0.000000  
13                  Example   0.923913  0.841584  0.880829  
14         Responsibili

0,1
eval/f1,▁▃▅▆▆▆▆▇███
eval/loss,█▆▄▃▂▂▁▁▁▁▁
eval/precision,▁▃▃▃▃▃▃████
eval/recall,▁▂▆▆▆▆▆▇███
eval/runtime,▂▃▂█▃▄▆▃▃▂▁
eval/samples_per_second,▇▆▇▁▆▅▃▆▆▇█
eval/steps_per_second,▇▆▇▁▆▅▃▆▆▇█
train/epoch,▁▂▃▃▄▅▆▆▆▇███
train/global_step,▁▂▃▃▄▅▆▆▆▇███
train/grad_norm,▁

0,1
eval/f1,0.17588
eval/loss,0.27845
eval/precision,0.4177
eval/recall,0.1494
eval/runtime,0.3624
eval/samples_per_second,717.535
eval/steps_per_second,91.072
total_flos,343783551237120.0
train/epoch,10.0
train/global_step,1300.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 19430.90 examples/s]
  4%|▍         | 1004/22845 [00:35<12:20, 29.49it/s]

{'loss': 0.1601, 'grad_norm': 0.12920837104320526, 'learning_rate': 4.781133727292624e-05, 'epoch': 0.66}


  7%|▋         | 1521/22845 [00:53<12:34, 28.24it/s]
  7%|▋         | 1523/22845 [00:57<12:34, 28.24it/s]

{'eval_loss': 0.11151058226823807, 'eval_f1': 0.6584518293992669, 'eval_precision': 0.6752259247259614, 'eval_recall': 0.6435976208777777, 'eval_runtime': 4.253, 'eval_samples_per_second': 358.096, 'eval_steps_per_second': 89.583, 'epoch': 1.0}


  9%|▉         | 2005/22845 [01:15<11:43, 29.62it/s]  

{'loss': 0.1023, 'grad_norm': 3.532294273376465, 'learning_rate': 4.562267454585248e-05, 'epoch': 1.31}


 13%|█▎        | 3003/22845 [01:50<11:59, 27.58it/s]

{'loss': 0.0895, 'grad_norm': 0.3956603407859802, 'learning_rate': 4.343401181877873e-05, 'epoch': 1.97}


 13%|█▎        | 3046/22845 [01:51<12:38, 26.09it/s]
 13%|█▎        | 3046/22845 [01:56<12:38, 26.09it/s]

{'eval_loss': 0.08851809054613113, 'eval_f1': 0.8084939772416712, 'eval_precision': 0.8750078706966896, 'eval_recall': 0.7679712364326713, 'eval_runtime': 4.9299, 'eval_samples_per_second': 308.934, 'eval_steps_per_second': 77.284, 'epoch': 2.0}


 18%|█▊        | 4003/22845 [02:35<12:18, 25.53it/s]  

{'loss': 0.0598, 'grad_norm': 0.058342114090919495, 'learning_rate': 4.1245349091704974e-05, 'epoch': 2.63}


 20%|█▉        | 4566/22845 [02:56<11:42, 26.02it/s]
 20%|██        | 4569/22845 [03:00<11:42, 26.02it/s]

{'eval_loss': 0.08558880537748337, 'eval_f1': 0.8393522147376317, 'eval_precision': 0.8660505419843672, 'eval_recall': 0.8152951542832897, 'eval_runtime': 3.6565, 'eval_samples_per_second': 416.52, 'eval_steps_per_second': 104.198, 'epoch': 3.0}


 22%|██▏       | 5003/22845 [03:17<10:39, 27.92it/s]  

{'loss': 0.0542, 'grad_norm': 0.24034219980239868, 'learning_rate': 3.9056686364631214e-05, 'epoch': 3.28}


 26%|██▋       | 6005/22845 [03:53<09:59, 28.09it/s]

{'loss': 0.0386, 'grad_norm': 1.5765352249145508, 'learning_rate': 3.6868023637557454e-05, 'epoch': 3.94}


 27%|██▋       | 6090/22845 [03:56<09:37, 29.04it/s]
 27%|██▋       | 6092/22845 [04:00<09:36, 29.04it/s]

{'eval_loss': 0.10659456253051758, 'eval_f1': 0.8487157302477133, 'eval_precision': 0.8836386600435694, 'eval_recall': 0.8215523494738155, 'eval_runtime': 3.6237, 'eval_samples_per_second': 420.292, 'eval_steps_per_second': 105.142, 'epoch': 4.0}


 31%|███       | 7002/22845 [04:35<09:22, 28.18it/s]  

{'loss': 0.0283, 'grad_norm': 0.00720627186819911, 'learning_rate': 3.467936091048369e-05, 'epoch': 4.6}


 33%|███▎      | 7614/22845 [04:57<09:18, 27.27it/s]
 33%|███▎      | 7615/22845 [05:01<09:18, 27.27it/s]

{'eval_loss': 0.11453932523727417, 'eval_f1': 0.8367126561206503, 'eval_precision': 0.8611674010022129, 'eval_recall': 0.8174048393554961, 'eval_runtime': 4.0533, 'eval_samples_per_second': 375.743, 'eval_steps_per_second': 93.997, 'epoch': 5.0}


 35%|███▌      | 8005/22845 [05:16<08:27, 29.25it/s]  

{'loss': 0.0257, 'grad_norm': 0.009455591440200806, 'learning_rate': 3.249069818340994e-05, 'epoch': 5.25}


 39%|███▉      | 9003/22845 [05:52<08:26, 27.31it/s]

{'loss': 0.0187, 'grad_norm': 0.7638431191444397, 'learning_rate': 3.030203545633618e-05, 'epoch': 5.91}


 40%|███▉      | 9136/22845 [05:57<08:04, 28.27it/s]
 40%|████      | 9138/22845 [06:01<08:04, 28.27it/s]

{'eval_loss': 0.14248275756835938, 'eval_f1': 0.8333644032453807, 'eval_precision': 0.8700161514851205, 'eval_recall': 0.8189793330439062, 'eval_runtime': 3.6913, 'eval_samples_per_second': 412.597, 'eval_steps_per_second': 103.217, 'epoch': 6.0}


 44%|████▍     | 10003/22845 [06:33<07:32, 28.41it/s] 

{'loss': 0.0151, 'grad_norm': 0.0033053294755518436, 'learning_rate': 2.8113372729262422e-05, 'epoch': 6.57}


 47%|████▋     | 10659/22845 [06:57<07:28, 27.20it/s]
 47%|████▋     | 10661/22845 [07:00<07:27, 27.20it/s]

{'eval_loss': 0.12866900861263275, 'eval_f1': 0.852085440515066, 'eval_precision': 0.8633416806181415, 'eval_recall': 0.8419200122101878, 'eval_runtime': 3.502, 'eval_samples_per_second': 434.898, 'eval_steps_per_second': 108.796, 'epoch': 7.0}


 48%|████▊     | 11005/22845 [07:14<06:47, 29.06it/s]  

{'loss': 0.0148, 'grad_norm': 0.014193602837622166, 'learning_rate': 2.5924710002188662e-05, 'epoch': 7.22}


 53%|█████▎    | 12003/22845 [07:50<06:46, 26.70it/s]

{'loss': 0.01, 'grad_norm': 0.0010869849938899279, 'learning_rate': 2.3736047275114905e-05, 'epoch': 7.88}


 53%|█████▎    | 12184/22845 [07:57<05:57, 29.86it/s]
 53%|█████▎    | 12184/22845 [08:00<05:57, 29.86it/s]

{'eval_loss': 0.14601512253284454, 'eval_f1': 0.8345584874603966, 'eval_precision': 0.8428006917784856, 'eval_recall': 0.8305481031342999, 'eval_runtime': 3.8756, 'eval_samples_per_second': 392.974, 'eval_steps_per_second': 98.308, 'epoch': 8.0}


 57%|█████▋    | 13004/22845 [08:30<06:00, 27.28it/s]  

{'loss': 0.0075, 'grad_norm': 0.01669994369149208, 'learning_rate': 2.1547384548041148e-05, 'epoch': 8.54}


 60%|█████▉    | 13706/22845 [08:55<05:56, 25.66it/s]
 60%|██████    | 13707/22845 [08:59<05:56, 25.66it/s]

{'eval_loss': 0.15257112681865692, 'eval_f1': 0.8376737911566808, 'eval_precision': 0.855840498819442, 'eval_recall': 0.825823339416786, 'eval_runtime': 3.8547, 'eval_samples_per_second': 395.104, 'eval_steps_per_second': 98.841, 'epoch': 9.0}


 61%|██████▏   | 14006/22845 [09:10<04:58, 29.57it/s]  

{'loss': 0.0068, 'grad_norm': 0.0023783445358276367, 'learning_rate': 1.935872182096739e-05, 'epoch': 9.19}


 66%|██████▌   | 15002/22845 [09:45<04:41, 27.86it/s]

{'loss': 0.0045, 'grad_norm': 0.0005478914245031774, 'learning_rate': 1.717005909389363e-05, 'epoch': 9.85}


 67%|██████▋   | 15230/22845 [09:53<04:16, 29.67it/s]
 67%|██████▋   | 15230/22845 [09:56<04:16, 29.67it/s]

{'eval_loss': 0.18673062324523926, 'eval_f1': 0.844163724440339, 'eval_precision': 0.877720627174062, 'eval_recall': 0.8195666004253969, 'eval_runtime': 3.4935, 'eval_samples_per_second': 435.948, 'eval_steps_per_second': 109.059, 'epoch': 10.0}


 70%|███████   | 16003/22845 [10:24<03:53, 29.26it/s]

{'loss': 0.0042, 'grad_norm': 0.003737423103302717, 'learning_rate': 1.4981396366819875e-05, 'epoch': 10.51}


 73%|███████▎  | 16752/22845 [10:50<03:27, 29.42it/s]
 73%|███████▎  | 16753/22845 [10:54<03:27, 29.42it/s]

{'eval_loss': 0.16447414457798004, 'eval_f1': 0.8505680980084082, 'eval_precision': 0.8559313128849658, 'eval_recall': 0.8470083215424831, 'eval_runtime': 3.6487, 'eval_samples_per_second': 417.405, 'eval_steps_per_second': 104.42, 'epoch': 11.0}


 74%|███████▍  | 17005/22845 [11:04<03:29, 27.91it/s]

{'loss': 0.0025, 'grad_norm': 0.00022830399393569678, 'learning_rate': 1.2792733639746115e-05, 'epoch': 11.16}


 79%|███████▉  | 18005/22845 [11:39<02:54, 27.77it/s]

{'loss': 0.0034, 'grad_norm': 0.0005694431019946933, 'learning_rate': 1.0604070912672358e-05, 'epoch': 11.82}


 80%|███████▉  | 18275/22845 [11:48<02:35, 29.36it/s]
 80%|████████  | 18276/22845 [11:51<02:35, 29.36it/s]

{'eval_loss': 0.17819248139858246, 'eval_f1': 0.8549057056089453, 'eval_precision': 0.8760372681537324, 'eval_recall': 0.8378446107038687, 'eval_runtime': 3.3646, 'eval_samples_per_second': 452.659, 'eval_steps_per_second': 113.239, 'epoch': 12.0}


 83%|████████▎ | 19005/22845 [12:18<02:21, 27.21it/s]

{'loss': 0.0012, 'grad_norm': 0.0009510120144113898, 'learning_rate': 8.4154081855986e-06, 'epoch': 12.48}


 87%|████████▋ | 19797/22845 [12:45<01:46, 28.49it/s]
 87%|████████▋ | 19799/22845 [12:49<01:46, 28.49it/s]

{'eval_loss': 0.17703597247600555, 'eval_f1': 0.8485179174397331, 'eval_precision': 0.8635768448176757, 'eval_recall': 0.8368887989909854, 'eval_runtime': 3.4755, 'eval_samples_per_second': 438.216, 'eval_steps_per_second': 109.626, 'epoch': 13.0}


 88%|████████▊ | 20003/22845 [12:57<01:38, 28.82it/s]

{'loss': 0.0017, 'grad_norm': 0.006810968741774559, 'learning_rate': 6.2267454585248415e-06, 'epoch': 13.13}


 92%|█████████▏| 21004/22845 [13:32<01:08, 26.92it/s]

{'loss': 0.0018, 'grad_norm': 0.00027597829466685653, 'learning_rate': 4.038082731451084e-06, 'epoch': 13.79}


 93%|█████████▎| 21322/22845 [13:43<00:54, 28.19it/s]
 93%|█████████▎| 21322/22845 [13:47<00:54, 28.19it/s]

{'eval_loss': 0.17714670300483704, 'eval_f1': 0.8563309282756144, 'eval_precision': 0.8705699943795182, 'eval_recall': 0.8455423920243258, 'eval_runtime': 3.7603, 'eval_samples_per_second': 405.026, 'eval_steps_per_second': 101.323, 'epoch': 14.0}


 96%|█████████▋| 22004/22845 [14:12<00:31, 26.55it/s]

{'loss': 0.0009, 'grad_norm': 0.03775255009531975, 'learning_rate': 1.8494200043773256e-06, 'epoch': 14.45}


100%|██████████| 22845/22845 [14:41<00:00, 27.58it/s]
100%|██████████| 22845/22845 [14:45<00:00, 27.58it/s]

{'eval_loss': 0.1811661720275879, 'eval_f1': 0.8543819618143547, 'eval_precision': 0.8815826634514277, 'eval_recall': 0.8333299180269547, 'eval_runtime': 3.5601, 'eval_samples_per_second': 427.797, 'eval_steps_per_second': 107.019, 'epoch': 15.0}


100%|██████████| 22845/22845 [14:46<00:00, 25.76it/s]


{'train_runtime': 886.9381, 'train_samples_per_second': 103.012, 'train_steps_per_second': 25.757, 'train_loss': 0.028558281287762916, 'epoch': 15.0}


100%|██████████| 381/381 [00:03<00:00, 113.32it/s]


Evaluation Metrics: {'eval_loss': 0.17714670300483704, 'eval_f1': 0.8563309282756144, 'eval_precision': 0.8705699943795182, 'eval_recall': 0.8455423920243258, 'eval_runtime': 3.3726, 'eval_samples_per_second': 451.575, 'eval_steps_per_second': 112.968, 'epoch': 15.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  java      summary   
13  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  java    Ownership   
14  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  java       Expand   
15  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  java        usage   
16  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  java      Pointer   
17  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  java  deprecation   
18  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  java     rational   

    precision    recall        f1  
12   0.931944  0.954481  0.943078  
13   0.916667  1.000000  0.956522  
14 

0,1
eval/f1,▁▆▇█▇▇█▇▇███████
eval/loss,▃▁▁▂▃▅▄▅▆█▆▇▇▇█▇
eval/precision,▁█▇█▇█▇▇▇█▇█▇███
eval/recall,▁▅▇▇▇▇█▇▇▇██████
eval/runtime,▅█▂▂▄▂▂▃▃▂▂▁▁▃▂▁
eval/samples_per_second,▃▁▆▆▄▆▇▅▅▇▆█▇▆▇█
eval/steps_per_second,▃▁▆▆▄▆▇▅▅▇▆█▇▆▇█
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▁█▂▁▁▄▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f1,0.85633
eval/loss,0.17715
eval/precision,0.87057
eval/recall,0.84554
eval/runtime,3.3726
eval/samples_per_second,451.575
eval/steps_per_second,112.968
total_flos,3025990766741760.0
train/epoch,15.0
train/global_step,22845.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 14846.84 examples/s]
  7%|▋         | 375/5655 [00:13<03:12, 27.37it/s]
  7%|▋         | 377/5655 [00:14<03:12, 27.37it/s]

{'eval_loss': 0.3238682150840759, 'eval_f1': 0.38529465231705995, 'eval_precision': 0.5579487179487179, 'eval_recall': 0.3227612362730669, 'eval_runtime': 0.9539, 'eval_samples_per_second': 395.237, 'eval_steps_per_second': 99.595, 'epoch': 1.0}


 13%|█▎        | 752/5655 [00:28<02:51, 28.55it/s]
 13%|█▎        | 754/5655 [00:28<02:51, 28.55it/s]

{'eval_loss': 0.3317931294441223, 'eval_f1': 0.5410402097902097, 'eval_precision': 0.6066349367390785, 'eval_recall': 0.5061623472724508, 'eval_runtime': 0.834, 'eval_samples_per_second': 452.046, 'eval_steps_per_second': 113.911, 'epoch': 2.0}


 18%|█▊        | 1003/5655 [00:38<02:59, 25.89it/s]

{'loss': 0.3253, 'grad_norm': 11.042821884155273, 'learning_rate': 4.1158267020335985e-05, 'epoch': 2.65}


 20%|█▉        | 1130/5655 [00:43<02:36, 28.97it/s]
 20%|██        | 1131/5655 [00:44<02:36, 28.97it/s]

{'eval_loss': 0.3285452425479889, 'eval_f1': 0.618053570215415, 'eval_precision': 0.7523946586175296, 'eval_recall': 0.5513021093729307, 'eval_runtime': 0.862, 'eval_samples_per_second': 437.336, 'eval_steps_per_second': 110.204, 'epoch': 3.0}


 27%|██▋       | 1505/5655 [00:58<02:18, 29.94it/s]
 27%|██▋       | 1508/5655 [00:59<02:18, 29.94it/s]

{'eval_loss': 0.39022260904312134, 'eval_f1': 0.6718574830231943, 'eval_precision': 0.7241842943284376, 'eval_recall': 0.6380871439189432, 'eval_runtime': 0.8579, 'eval_samples_per_second': 439.437, 'eval_steps_per_second': 110.733, 'epoch': 4.0}


 33%|███▎      | 1885/5655 [01:13<02:05, 29.93it/s]
 33%|███▎      | 1885/5655 [01:14<02:05, 29.93it/s]

{'eval_loss': 0.4640596807003021, 'eval_f1': 0.6529154831898142, 'eval_precision': 0.7054243904243904, 'eval_recall': 0.6314351127512371, 'eval_runtime': 0.8417, 'eval_samples_per_second': 447.915, 'eval_steps_per_second': 112.87, 'epoch': 5.0}


 35%|███▌      | 2002/5655 [01:19<02:10, 27.90it/s]

{'loss': 0.1161, 'grad_norm': 4.507205486297607, 'learning_rate': 3.2316534040671975e-05, 'epoch': 5.31}


 40%|███▉      | 2261/5655 [01:28<02:08, 26.37it/s]
 40%|████      | 2262/5655 [01:29<02:08, 26.37it/s]

{'eval_loss': 0.49445608258247375, 'eval_f1': 0.6974220080271201, 'eval_precision': 0.7293407761828814, 'eval_recall': 0.6751067994127986, 'eval_runtime': 1.0285, 'eval_samples_per_second': 366.548, 'eval_steps_per_second': 92.366, 'epoch': 6.0}


 47%|████▋     | 2639/5655 [01:43<01:47, 28.03it/s]
 47%|████▋     | 2639/5655 [01:44<01:47, 28.03it/s]

{'eval_loss': 0.5737125873565674, 'eval_f1': 0.6757507034721285, 'eval_precision': 0.6972004958308926, 'eval_recall': 0.6604427461108927, 'eval_runtime': 0.9102, 'eval_samples_per_second': 414.195, 'eval_steps_per_second': 104.373, 'epoch': 7.0}


 53%|█████▎    | 3003/5655 [01:57<01:35, 27.78it/s]

{'loss': 0.032, 'grad_norm': 0.11612539738416672, 'learning_rate': 2.347480106100796e-05, 'epoch': 7.96}


 53%|█████▎    | 3016/5655 [01:58<01:34, 27.94it/s]
 53%|█████▎    | 3016/5655 [01:59<01:34, 27.94it/s]

{'eval_loss': 0.5705423355102539, 'eval_f1': 0.6993982063740349, 'eval_precision': 0.7201285714955679, 'eval_recall': 0.684152193484859, 'eval_runtime': 1.094, 'eval_samples_per_second': 344.623, 'eval_steps_per_second': 86.841, 'epoch': 8.0}


 60%|██████    | 3393/5655 [02:13<01:21, 27.67it/s]
 60%|██████    | 3393/5655 [02:14<01:21, 27.67it/s]

{'eval_loss': 0.5936078429222107, 'eval_f1': 0.677015119375519, 'eval_precision': 0.6796909033549162, 'eval_recall': 0.680525781003338, 'eval_runtime': 0.8157, 'eval_samples_per_second': 462.159, 'eval_steps_per_second': 116.459, 'epoch': 9.0}


 67%|██████▋   | 3767/5655 [02:28<01:03, 29.57it/s]
 67%|██████▋   | 3770/5655 [02:29<01:03, 29.57it/s]

{'eval_loss': 0.6414613723754883, 'eval_f1': 0.6950142425210509, 'eval_precision': 0.7005614222182842, 'eval_recall': 0.707777285747804, 'eval_runtime': 0.8974, 'eval_samples_per_second': 420.117, 'eval_steps_per_second': 105.865, 'epoch': 10.0}


 71%|███████   | 4003/5655 [02:38<00:57, 28.58it/s]

{'loss': 0.0082, 'grad_norm': 0.019265083596110344, 'learning_rate': 1.4633068081343945e-05, 'epoch': 10.61}


 73%|███████▎  | 4147/5655 [02:43<00:50, 29.60it/s]
 73%|███████▎  | 4147/5655 [02:44<00:50, 29.60it/s]

{'eval_loss': 0.6573078036308289, 'eval_f1': 0.6941551399906325, 'eval_precision': 0.6879016144417678, 'eval_recall': 0.7078978035969481, 'eval_runtime': 0.9373, 'eval_samples_per_second': 402.226, 'eval_steps_per_second': 101.357, 'epoch': 11.0}


 80%|████████  | 4524/5655 [02:58<00:39, 28.53it/s]
 80%|████████  | 4524/5655 [02:59<00:39, 28.53it/s]

{'eval_loss': 0.6659700870513916, 'eval_f1': 0.6949505792931051, 'eval_precision': 0.6951121755554256, 'eval_recall': 0.702207520897894, 'eval_runtime': 0.8635, 'eval_samples_per_second': 436.591, 'eval_steps_per_second': 110.016, 'epoch': 12.0}


 87%|████████▋ | 4899/5655 [03:13<00:27, 27.73it/s]
 87%|████████▋ | 4901/5655 [03:14<00:27, 27.73it/s]

{'eval_loss': 0.6892114877700806, 'eval_f1': 0.6982925658971466, 'eval_precision': 0.7000180401710098, 'eval_recall': 0.7003544813246205, 'eval_runtime': 1.0266, 'eval_samples_per_second': 367.23, 'eval_steps_per_second': 92.538, 'epoch': 13.0}


 89%|████████▊ | 5005/5655 [03:18<00:22, 29.16it/s]

{'loss': 0.002, 'grad_norm': 0.006003796122968197, 'learning_rate': 5.7913351016799295e-06, 'epoch': 13.26}


 93%|█████████▎| 5276/5655 [03:28<00:13, 27.42it/s]
 93%|█████████▎| 5278/5655 [03:29<00:13, 27.42it/s]

{'eval_loss': 0.7000097632408142, 'eval_f1': 0.6960380314292427, 'eval_precision': 0.7003456081568527, 'eval_recall': 0.6970486962006535, 'eval_runtime': 0.8439, 'eval_samples_per_second': 446.741, 'eval_steps_per_second': 112.574, 'epoch': 14.0}


100%|█████████▉| 5654/5655 [03:43<00:00, 27.09it/s]
100%|██████████| 5655/5655 [03:45<00:00, 27.09it/s]

{'eval_loss': 0.7028425931930542, 'eval_f1': 0.6946157756563603, 'eval_precision': 0.6986840505519116, 'eval_recall': 0.6949472469252912, 'eval_runtime': 0.9637, 'eval_samples_per_second': 391.185, 'eval_steps_per_second': 98.574, 'epoch': 15.0}


100%|██████████| 5655/5655 [03:46<00:00, 24.98it/s]


{'train_runtime': 226.3754, 'train_samples_per_second': 99.856, 'train_steps_per_second': 24.981, 'train_loss': 0.08562114469568673, 'epoch': 15.0}


100%|██████████| 95/95 [00:00<00:00, 115.47it/s]


Evaluation Metrics: {'eval_loss': 0.5705423355102539, 'eval_f1': 0.6993982063740349, 'eval_precision': 0.7201285714955679, 'eval_recall': 0.684152193484859, 'eval_runtime': 0.833, 'eval_samples_per_second': 452.59, 'eval_steps_per_second': 114.048, 'epoch': 15.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  python   
15  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  python   
16  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  python   
17  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  python   
18  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  python   

                 cat  precision    recall        f1  
14             Usage   0.840708  0.785124  0.811966  
15        Parameters   0.864865  0.857143  0.860987  
16  DevelopmentNotes   0.547619  0.575000  0.560976  
17            Expand   0.627451  0.463768  0.533333  
18           Summary   0.720000  0.739726  0.729730  
Scores:  

0,1
eval/f1,▁▄▆▇▇█▇█████████
eval/loss,▁▁▁▂▄▄▆▆▆▇▇▇███▆
eval/precision,▁▃█▇▆▇▆▇▅▆▆▆▆▆▆▇
eval/recall,▁▄▅▇▇▇▇█████████
eval/runtime,▄▁▂▂▂▆▃█▁▃▄▂▆▂▅▁
eval/samples_per_second,▄▇▇▇▇▂▅▁█▅▄▆▂▇▄▇
eval/steps_per_second,▄▇▇▇▇▂▅▁█▅▄▆▂▇▄▇
train/epoch,▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▇▇▇▇███
train/global_step,▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▇▇▇▇███
train/grad_norm,█▄▁▁▁

0,1
eval/f1,0.6994
eval/loss,0.57054
eval/precision,0.72013
eval/recall,0.68415
eval/runtime,0.833
eval/samples_per_second,452.59
eval/steps_per_second,114.048
total_flos,748646437651200.0
train/epoch,15.0
train/global_step,5655.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 15822.59 examples/s]
  7%|▋         | 260/3900 [00:09<02:02, 29.73it/s]
  7%|▋         | 260/3900 [00:09<02:02, 29.73it/s]

{'eval_loss': 0.2642990052700043, 'eval_f1': 0.34943490208234074, 'eval_precision': 0.5913731350093535, 'eval_recall': 0.2961255285434387, 'eval_runtime': 0.6382, 'eval_samples_per_second': 407.387, 'eval_steps_per_second': 101.847, 'epoch': 1.0}


 13%|█▎        | 518/3900 [00:19<02:02, 27.64it/s]
 13%|█▎        | 520/3900 [00:20<02:02, 27.64it/s]

{'eval_loss': 0.23337507247924805, 'eval_f1': 0.4865387869994952, 'eval_precision': 0.6186820209776537, 'eval_recall': 0.4223951026864462, 'eval_runtime': 0.6658, 'eval_samples_per_second': 390.489, 'eval_steps_per_second': 97.622, 'epoch': 2.0}


 20%|█▉        | 778/3900 [00:30<01:47, 29.05it/s]
 20%|██        | 780/3900 [00:31<01:47, 29.05it/s]

{'eval_loss': 0.2569299638271332, 'eval_f1': 0.5040518049380335, 'eval_precision': 0.5859392323172463, 'eval_recall': 0.4691551111369564, 'eval_runtime': 0.6649, 'eval_samples_per_second': 391.025, 'eval_steps_per_second': 97.756, 'epoch': 3.0}


 26%|██▌       | 1004/3900 [00:40<01:40, 28.78it/s]

{'loss': 0.1987, 'grad_norm': 8.646258354187012, 'learning_rate': 3.717948717948718e-05, 'epoch': 3.85}


 27%|██▋       | 1038/3900 [00:41<01:39, 28.80it/s]
 27%|██▋       | 1040/3900 [00:42<01:39, 28.80it/s]

{'eval_loss': 0.22595712542533875, 'eval_f1': 0.6248285320186183, 'eval_precision': 0.8078081978996614, 'eval_recall': 0.5539755223322684, 'eval_runtime': 0.6309, 'eval_samples_per_second': 412.132, 'eval_steps_per_second': 103.033, 'epoch': 4.0}


 33%|███▎      | 1300/3900 [00:52<01:28, 29.42it/s]
 33%|███▎      | 1300/3900 [00:52<01:28, 29.42it/s]

{'eval_loss': 0.23143519461154938, 'eval_f1': 0.6177463596523582, 'eval_precision': 0.8386320764581633, 'eval_recall': 0.5611675906489568, 'eval_runtime': 0.6083, 'eval_samples_per_second': 427.427, 'eval_steps_per_second': 106.857, 'epoch': 5.0}


 40%|████      | 1560/3900 [01:02<01:22, 28.36it/s]
 40%|████      | 1560/3900 [01:03<01:22, 28.36it/s]

{'eval_loss': 0.2496916800737381, 'eval_f1': 0.6356153167396492, 'eval_precision': 0.821723072055298, 'eval_recall': 0.5632170136135125, 'eval_runtime': 0.5759, 'eval_samples_per_second': 451.471, 'eval_steps_per_second': 112.868, 'epoch': 6.0}


 47%|████▋     | 1820/3900 [01:13<01:16, 27.23it/s]
 47%|████▋     | 1820/3900 [01:14<01:16, 27.23it/s]

{'eval_loss': 0.24539302289485931, 'eval_f1': 0.6521905881857775, 'eval_precision': 0.7963283855310433, 'eval_recall': 0.6027861507269634, 'eval_runtime': 0.6969, 'eval_samples_per_second': 373.056, 'eval_steps_per_second': 93.264, 'epoch': 7.0}


 51%|█████▏    | 2004/3900 [01:21<01:04, 29.28it/s]

{'loss': 0.0434, 'grad_norm': 0.029476424679160118, 'learning_rate': 2.435897435897436e-05, 'epoch': 7.69}


 53%|█████▎    | 2079/3900 [01:24<01:04, 28.30it/s]
 53%|█████▎    | 2080/3900 [01:25<01:04, 28.30it/s]

{'eval_loss': 0.27531179785728455, 'eval_f1': 0.638164906166656, 'eval_precision': 0.8698798397863818, 'eval_recall': 0.5746777571694139, 'eval_runtime': 0.5976, 'eval_samples_per_second': 435.087, 'eval_steps_per_second': 108.772, 'epoch': 8.0}


 60%|██████    | 2340/3900 [01:34<00:51, 30.35it/s]
 60%|██████    | 2340/3900 [01:35<00:51, 30.35it/s]

{'eval_loss': 0.2623283863067627, 'eval_f1': 0.6606069442569515, 'eval_precision': 0.8446172660039043, 'eval_recall': 0.5953721735498274, 'eval_runtime': 0.6607, 'eval_samples_per_second': 393.497, 'eval_steps_per_second': 98.374, 'epoch': 9.0}


 67%|██████▋   | 2599/3900 [01:45<00:44, 29.36it/s]
 67%|██████▋   | 2600/3900 [01:46<00:44, 29.36it/s]

{'eval_loss': 0.2900409698486328, 'eval_f1': 0.6605933891405077, 'eval_precision': 0.8271995815183123, 'eval_recall': 0.6026841200928832, 'eval_runtime': 0.6561, 'eval_samples_per_second': 396.29, 'eval_steps_per_second': 99.073, 'epoch': 10.0}


 73%|███████▎  | 2860/3900 [01:56<00:37, 27.55it/s]
 73%|███████▎  | 2860/3900 [01:57<00:37, 27.55it/s]

{'eval_loss': 0.28578275442123413, 'eval_f1': 0.6680327857767806, 'eval_precision': 0.7496283413608282, 'eval_recall': 0.628143261340326, 'eval_runtime': 0.5989, 'eval_samples_per_second': 434.102, 'eval_steps_per_second': 108.526, 'epoch': 11.0}


 77%|███████▋  | 3004/3900 [02:03<00:32, 27.68it/s]

{'loss': 0.0107, 'grad_norm': 0.011515134945511818, 'learning_rate': 1.153846153846154e-05, 'epoch': 11.54}


 80%|███████▉  | 3119/3900 [02:07<00:27, 28.30it/s]
 80%|████████  | 3120/3900 [02:08<00:27, 28.30it/s]

{'eval_loss': 0.30038899183273315, 'eval_f1': 0.6466141838071228, 'eval_precision': 0.8098173817117916, 'eval_recall': 0.588068656398286, 'eval_runtime': 0.6921, 'eval_samples_per_second': 375.654, 'eval_steps_per_second': 93.913, 'epoch': 12.0}


 87%|████████▋ | 3379/3900 [02:18<00:17, 28.97it/s]
 87%|████████▋ | 3380/3900 [02:18<00:17, 28.97it/s]

{'eval_loss': 0.28413400053977966, 'eval_f1': 0.6747882074081311, 'eval_precision': 0.8384010239273396, 'eval_recall': 0.6122248939933872, 'eval_runtime': 0.581, 'eval_samples_per_second': 447.513, 'eval_steps_per_second': 111.878, 'epoch': 13.0}


 93%|█████████▎| 3640/3900 [02:28<00:09, 27.97it/s]
 93%|█████████▎| 3640/3900 [02:29<00:09, 27.97it/s]

{'eval_loss': 0.2941231429576874, 'eval_f1': 0.6708130887360905, 'eval_precision': 0.8335843955686942, 'eval_recall': 0.6100660511008479, 'eval_runtime': 0.6422, 'eval_samples_per_second': 404.881, 'eval_steps_per_second': 101.22, 'epoch': 14.0}


100%|██████████| 3900/3900 [02:39<00:00, 28.59it/s]
100%|██████████| 3900/3900 [02:43<00:00, 28.59it/s]

{'eval_loss': 0.2938629686832428, 'eval_f1': 0.6735293267464462, 'eval_precision': 0.8333601300383062, 'eval_recall': 0.6157803368151334, 'eval_runtime': 0.7856, 'eval_samples_per_second': 330.952, 'eval_steps_per_second': 82.738, 'epoch': 15.0}


100%|██████████| 3900/3900 [02:44<00:00, 23.68it/s]


{'train_runtime': 164.6903, 'train_samples_per_second': 94.541, 'train_steps_per_second': 23.681, 'train_loss': 0.06565825107770089, 'epoch': 15.0}


100%|██████████| 65/65 [00:00<00:00, 111.58it/s]


Evaluation Metrics: {'eval_loss': 0.28413400053977966, 'eval_f1': 0.6747882074081311, 'eval_precision': 0.8384010239273396, 'eval_recall': 0.6122248939933872, 'eval_runtime': 0.5944, 'eval_samples_per_second': 437.411, 'eval_steps_per_second': 109.353, 'epoch': 15.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
13  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
14  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
15  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
16  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
17  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
18  lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.875000  0.666667  0.756757  
13                  Example   0.947368  0.891089  0.918367  
14         Responsibilit

0,1
eval/f1,▁▄▄▇▇▇█▇███▇████
eval/loss,▅▂▄▁▂▃▃▆▄▇▇█▆▇▇▆
eval/precision,▁▂▁▆▇▇▆█▇▇▅▇▇▇▇▇
eval/recall,▁▄▅▆▇▇▇▇▇▇█▇████
eval/runtime,▃▄▄▃▂▁▅▂▄▄▂▅▁▃█▂
eval/samples_per_second,▅▄▄▆▇█▃▇▅▅▇▄█▅▁▇
eval/steps_per_second,▅▄▄▆▇█▃▇▅▅▇▄█▅▁▇
train/epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇███
train/grad_norm,█▁▁

0,1
eval/f1,0.67479
eval/loss,0.28413
eval/precision,0.8384
eval/recall,0.61222
eval/runtime,0.5944
eval/samples_per_second,437.411
eval/steps_per_second,109.353
total_flos,515675326855680.0
train/epoch,15.0
train/global_step,3900.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 20194.85 examples/s]
  4%|▍         | 1005/22845 [00:35<12:48, 28.41it/s]

{'loss': 0.2771, 'grad_norm': 0.5760248303413391, 'learning_rate': 4.781133727292624e-06, 'epoch': 0.66}


  7%|▋         | 1521/22845 [00:53<11:56, 29.78it/s]
  7%|▋         | 1523/22845 [00:56<11:56, 29.78it/s]

{'eval_loss': 0.1444179117679596, 'eval_f1': 0.5256945393806732, 'eval_precision': 0.5270225870252195, 'eval_recall': 0.5250400746968252, 'eval_runtime': 3.5515, 'eval_samples_per_second': 428.838, 'eval_steps_per_second': 107.28, 'epoch': 1.0}


  9%|▉         | 2005/22845 [01:14<12:15, 28.33it/s]  

{'loss': 0.1466, 'grad_norm': 1.5200804471969604, 'learning_rate': 4.562267454585248e-06, 'epoch': 1.31}


 13%|█▎        | 3004/22845 [01:50<11:32, 28.66it/s]

{'loss': 0.115, 'grad_norm': 1.001747965812683, 'learning_rate': 4.343401181877873e-06, 'epoch': 1.97}


 13%|█▎        | 3043/22845 [01:51<11:24, 28.94it/s]
 13%|█▎        | 3046/22845 [01:54<11:24, 28.94it/s]

{'eval_loss': 0.11495280265808105, 'eval_f1': 0.5471973862687426, 'eval_precision': 0.655117699112914, 'eval_recall': 0.539425898107128, 'eval_runtime': 3.4406, 'eval_samples_per_second': 442.651, 'eval_steps_per_second': 110.735, 'epoch': 2.0}


 18%|█▊        | 4004/22845 [02:29<10:54, 28.78it/s]  

{'loss': 0.0931, 'grad_norm': 0.10689665377140045, 'learning_rate': 4.124534909170497e-06, 'epoch': 2.63}


 20%|██        | 4569/22845 [02:49<10:20, 29.45it/s]
 20%|██        | 4569/22845 [02:53<10:20, 29.45it/s]

{'eval_loss': 0.09857282787561417, 'eval_f1': 0.7562487139111396, 'eval_precision': 0.9085116522447603, 'eval_recall': 0.6921937323798755, 'eval_runtime': 4.0205, 'eval_samples_per_second': 378.812, 'eval_steps_per_second': 94.765, 'epoch': 3.0}


 22%|██▏       | 5003/22845 [03:09<10:41, 27.83it/s]  

{'loss': 0.0844, 'grad_norm': 10.439777374267578, 'learning_rate': 3.905668636463121e-06, 'epoch': 3.28}


 26%|██▋       | 6003/22845 [03:44<10:33, 26.59it/s]

{'loss': 0.0754, 'grad_norm': 2.199554920196533, 'learning_rate': 3.6868023637557455e-06, 'epoch': 3.94}


 27%|██▋       | 6090/22845 [03:47<10:13, 27.32it/s]
 27%|██▋       | 6092/22845 [03:51<10:13, 27.32it/s]

{'eval_loss': 0.10491222888231277, 'eval_f1': 0.7786445726971618, 'eval_precision': 0.9225330722011449, 'eval_recall': 0.7216563587522955, 'eval_runtime': 3.5658, 'eval_samples_per_second': 427.109, 'eval_steps_per_second': 106.847, 'epoch': 4.0}


 31%|███       | 7003/22845 [04:25<09:53, 26.69it/s]  

{'loss': 0.0631, 'grad_norm': 0.04264414310455322, 'learning_rate': 3.4679360910483694e-06, 'epoch': 4.6}


 33%|███▎      | 7613/22845 [04:46<08:33, 29.65it/s]
 33%|███▎      | 7615/22845 [04:50<08:33, 29.65it/s]

{'eval_loss': 0.0970206931233406, 'eval_f1': 0.8221026556974003, 'eval_precision': 0.8997973239012114, 'eval_recall': 0.7732620668335917, 'eval_runtime': 3.5284, 'eval_samples_per_second': 431.645, 'eval_steps_per_second': 107.982, 'epoch': 5.0}


 35%|███▌      | 8003/22845 [05:04<08:44, 28.29it/s]  

{'loss': 0.0631, 'grad_norm': 0.05786847323179245, 'learning_rate': 3.2490698183409942e-06, 'epoch': 5.25}


 39%|███▉      | 9004/22845 [05:39<08:18, 27.74it/s]

{'loss': 0.0506, 'grad_norm': 6.353844165802002, 'learning_rate': 3.030203545633618e-06, 'epoch': 5.91}


 40%|███▉      | 9136/22845 [05:44<07:54, 28.87it/s]
 40%|████      | 9138/22845 [05:48<07:54, 28.87it/s]

{'eval_loss': 0.09472591429948807, 'eval_f1': 0.8314685349331504, 'eval_precision': 0.9092868408057877, 'eval_recall': 0.7819820672271494, 'eval_runtime': 3.5741, 'eval_samples_per_second': 426.119, 'eval_steps_per_second': 106.6, 'epoch': 6.0}


 44%|████▍     | 10002/22845 [06:19<07:35, 28.20it/s] 

{'loss': 0.0458, 'grad_norm': 0.04580308496952057, 'learning_rate': 2.811337272926242e-06, 'epoch': 6.57}


 47%|████▋     | 10660/22845 [06:42<06:59, 29.03it/s]
 47%|████▋     | 10661/22845 [06:45<06:59, 29.03it/s]

{'eval_loss': 0.10047825425863266, 'eval_f1': 0.8348005199738487, 'eval_precision': 0.9007754552748926, 'eval_recall': 0.7893596902481005, 'eval_runtime': 3.513, 'eval_samples_per_second': 433.532, 'eval_steps_per_second': 108.454, 'epoch': 7.0}


 48%|████▊     | 11005/22845 [06:58<06:53, 28.61it/s]  

{'loss': 0.0434, 'grad_norm': 0.04639441892504692, 'learning_rate': 2.5924710002188665e-06, 'epoch': 7.22}


 53%|█████▎    | 12003/22845 [07:33<06:13, 29.03it/s]

{'loss': 0.0385, 'grad_norm': 0.029593179002404213, 'learning_rate': 2.3736047275114905e-06, 'epoch': 7.88}


 53%|█████▎    | 12182/22845 [07:40<06:12, 28.64it/s]
 53%|█████▎    | 12184/22845 [07:44<06:12, 28.64it/s]

{'eval_loss': 0.09895577281713486, 'eval_f1': 0.8533523678745991, 'eval_precision': 0.8890746457356553, 'eval_recall': 0.828304678294102, 'eval_runtime': 3.9883, 'eval_samples_per_second': 381.867, 'eval_steps_per_second': 95.529, 'epoch': 8.0}


 57%|█████▋    | 13005/22845 [08:13<05:55, 27.64it/s]  

{'loss': 0.0305, 'grad_norm': 5.995142459869385, 'learning_rate': 2.154738454804115e-06, 'epoch': 8.54}


 60%|██████    | 13707/22845 [08:38<05:25, 28.08it/s]
 60%|██████    | 13707/22845 [08:41<05:25, 28.08it/s]

{'eval_loss': 0.10295379161834717, 'eval_f1': 0.8517622688355893, 'eval_precision': 0.8821875225675729, 'eval_recall': 0.8269089797943902, 'eval_runtime': 3.6949, 'eval_samples_per_second': 412.189, 'eval_steps_per_second': 103.115, 'epoch': 9.0}


 61%|██████▏   | 14005/22845 [08:53<05:01, 29.35it/s]  

{'loss': 0.0337, 'grad_norm': 0.11025979369878769, 'learning_rate': 1.9358721820967393e-06, 'epoch': 9.19}


 66%|██████▌   | 15003/22845 [09:27<04:44, 27.52it/s]

{'loss': 0.0264, 'grad_norm': 0.019726043567061424, 'learning_rate': 1.7170059093893632e-06, 'epoch': 9.85}


 67%|██████▋   | 15229/22845 [09:35<04:25, 28.67it/s]
 67%|██████▋   | 15230/22845 [09:39<04:25, 28.67it/s]

{'eval_loss': 0.10247515887022018, 'eval_f1': 0.853430039455365, 'eval_precision': 0.8871394816300312, 'eval_recall': 0.8284539893293236, 'eval_runtime': 3.3953, 'eval_samples_per_second': 448.556, 'eval_steps_per_second': 112.213, 'epoch': 10.0}


 70%|███████   | 16005/22845 [10:07<03:57, 28.76it/s]

{'loss': 0.0231, 'grad_norm': 0.17144431173801422, 'learning_rate': 1.4981396366819876e-06, 'epoch': 10.51}


 73%|███████▎  | 16750/22845 [10:33<03:30, 28.96it/s]
 73%|███████▎  | 16753/22845 [10:37<03:30, 28.96it/s]

{'eval_loss': 0.102994404733181, 'eval_f1': 0.8428605841379512, 'eval_precision': 0.8635578356891358, 'eval_recall': 0.8251678183797484, 'eval_runtime': 3.5628, 'eval_samples_per_second': 427.474, 'eval_steps_per_second': 106.939, 'epoch': 11.0}


 74%|███████▍  | 17004/22845 [10:46<03:22, 28.88it/s]

{'loss': 0.0238, 'grad_norm': 0.012645172886550426, 'learning_rate': 1.2792733639746116e-06, 'epoch': 11.16}


 79%|███████▉  | 18004/22845 [11:21<02:44, 29.34it/s]

{'loss': 0.0217, 'grad_norm': 0.03638828173279762, 'learning_rate': 1.0604070912672358e-06, 'epoch': 11.82}


 80%|███████▉  | 18274/22845 [11:31<02:38, 28.87it/s]
 80%|████████  | 18276/22845 [11:34<02:38, 28.87it/s]

{'eval_loss': 0.10571939498186111, 'eval_f1': 0.8446217533863722, 'eval_precision': 0.8684378704890506, 'eval_recall': 0.8240901132272783, 'eval_runtime': 3.3711, 'eval_samples_per_second': 451.776, 'eval_steps_per_second': 113.018, 'epoch': 12.0}


 83%|████████▎ | 19003/22845 [12:00<02:14, 28.48it/s]

{'loss': 0.0196, 'grad_norm': 0.1868448108434677, 'learning_rate': 8.4154081855986e-07, 'epoch': 12.48}


 87%|████████▋ | 19799/22845 [12:28<01:39, 30.63it/s]
 87%|████████▋ | 19799/22845 [12:32<01:39, 30.63it/s]

{'eval_loss': 0.10680585354566574, 'eval_f1': 0.8476804358436504, 'eval_precision': 0.8651234517965323, 'eval_recall': 0.8324224224499208, 'eval_runtime': 3.5972, 'eval_samples_per_second': 423.387, 'eval_steps_per_second': 105.916, 'epoch': 13.0}


 88%|████████▊ | 20004/22845 [12:40<01:36, 29.50it/s]

{'loss': 0.0181, 'grad_norm': 12.680695533752441, 'learning_rate': 6.226745458524842e-07, 'epoch': 13.13}


 92%|█████████▏| 21002/22845 [13:15<01:05, 28.20it/s]

{'loss': 0.0182, 'grad_norm': 0.013420798815786839, 'learning_rate': 4.038082731451084e-07, 'epoch': 13.79}


 93%|█████████▎| 21320/22845 [13:26<00:57, 26.70it/s]
 93%|█████████▎| 21322/22845 [13:30<00:57, 26.70it/s]

{'eval_loss': 0.10899107903242111, 'eval_f1': 0.8526832971726466, 'eval_precision': 0.8792134762712788, 'eval_recall': 0.8321390922625467, 'eval_runtime': 3.9403, 'eval_samples_per_second': 386.516, 'eval_steps_per_second': 96.693, 'epoch': 14.0}


 96%|█████████▋| 22002/22845 [13:55<00:29, 28.24it/s]

{'loss': 0.0182, 'grad_norm': 10.440232276916504, 'learning_rate': 1.8494200043773256e-07, 'epoch': 14.45}


100%|█████████▉| 22843/22845 [14:24<00:00, 29.22it/s]
100%|██████████| 22845/22845 [14:29<00:00, 29.22it/s]

{'eval_loss': 0.10848498344421387, 'eval_f1': 0.8471560353209455, 'eval_precision': 0.8688759452415874, 'eval_recall': 0.8284675265368687, 'eval_runtime': 3.5184, 'eval_samples_per_second': 432.873, 'eval_steps_per_second': 108.289, 'epoch': 15.0}


100%|██████████| 22845/22845 [14:30<00:00, 26.23it/s]


{'train_runtime': 870.882, 'train_samples_per_second': 104.911, 'train_steps_per_second': 26.232, 'train_loss': 0.05874842687361766, 'epoch': 15.0}


100%|██████████| 381/381 [00:03<00:00, 111.29it/s]


Evaluation Metrics: {'eval_loss': 0.10247515887022018, 'eval_f1': 0.853430039455365, 'eval_precision': 0.8871394816300312, 'eval_recall': 0.8284539893293236, 'eval_runtime': 3.4407, 'eval_samples_per_second': 442.645, 'eval_steps_per_second': 110.734, 'epoch': 15.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  java      summary   
13  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  java    Ownership   
14  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  java       Expand   
15  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  java        usage   
16  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  java      Pointer   
17  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  java  deprecation   
18  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  java     rational   

    precision    recall        f1  
12   0.944837  0.950213  0.947518  
13   0.964912  1.000000  0.982143  
14  

0,1
eval/f1,▁▁▆▆▇███████████
eval/loss,█▄▂▂▁▁▂▂▂▂▂▃▃▃▃▂
eval/precision,▁▃█████▇▇▇▇▇▇▇▇▇
eval/recall,▁▁▅▅▇▇▇█████████
eval/runtime,▃▂█▃▃▃▃█▄▁▃▁▃▇▃▂
eval/samples_per_second,▆▇▁▆▆▆▆▁▄█▆█▅▂▆▇
eval/steps_per_second,▆▇▁▆▆▆▆▁▄█▆█▅▂▆▇
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▁▂▂▁▇▂▁▁▅▁▁▁▄▁▁▁▁▁▁█▁▇

0,1
eval/f1,0.85343
eval/loss,0.10248
eval/precision,0.88714
eval/recall,0.82845
eval/runtime,3.4407
eval/samples_per_second,442.645
eval/steps_per_second,110.734
total_flos,3025990766741760.0
train/epoch,15.0
train/global_step,22845.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 9193.07 examples/s]
  7%|▋         | 377/5655 [00:13<03:12, 27.42it/s]
  7%|▋         | 377/5655 [00:14<03:12, 27.42it/s]

{'eval_loss': 0.4676857590675354, 'eval_f1': 0.003278688524590164, 'eval_precision': 0.2, 'eval_recall': 0.001652892561983471, 'eval_runtime': 0.8774, 'eval_samples_per_second': 429.688, 'eval_steps_per_second': 108.277, 'epoch': 1.0}


 13%|█▎        | 753/5655 [00:28<02:55, 27.91it/s]
 13%|█▎        | 754/5655 [00:29<02:55, 27.91it/s]

{'eval_loss': 0.4110015034675598, 'eval_f1': 0.161015873015873, 'eval_precision': 0.3888888888888889, 'eval_recall': 0.10929752066115701, 'eval_runtime': 0.8541, 'eval_samples_per_second': 441.414, 'eval_steps_per_second': 111.232, 'epoch': 2.0}


 18%|█▊        | 1002/5655 [00:39<02:42, 28.57it/s]

{'loss': 0.4565, 'grad_norm': 2.515568256378174, 'learning_rate': 4.115826702033599e-06, 'epoch': 2.65}


 20%|█▉        | 1129/5655 [00:43<02:33, 29.47it/s]
 20%|██        | 1131/5655 [00:44<02:33, 29.47it/s]

{'eval_loss': 0.3678452968597412, 'eval_f1': 0.3816637643663864, 'eval_precision': 0.5479314802844215, 'eval_recall': 0.2977824634891883, 'eval_runtime': 0.9004, 'eval_samples_per_second': 418.683, 'eval_steps_per_second': 105.504, 'epoch': 3.0}


 27%|██▋       | 1508/5655 [01:01<02:30, 27.54it/s]
 27%|██▋       | 1508/5655 [01:02<02:30, 27.54it/s]

{'eval_loss': 0.3465121388435364, 'eval_f1': 0.4272533355774938, 'eval_precision': 0.6957693263504744, 'eval_recall': 0.342559328970342, 'eval_runtime': 1.0357, 'eval_samples_per_second': 364.0, 'eval_steps_per_second': 91.724, 'epoch': 4.0}


 33%|███▎      | 1883/5655 [01:16<02:13, 28.22it/s]
 33%|███▎      | 1885/5655 [01:17<02:13, 28.22it/s]

{'eval_loss': 0.3301655650138855, 'eval_f1': 0.45806598958120653, 'eval_precision': 0.6256195051802464, 'eval_recall': 0.4038566555540945, 'eval_runtime': 0.8742, 'eval_samples_per_second': 431.27, 'eval_steps_per_second': 108.676, 'epoch': 5.0}


 35%|███▌      | 2003/5655 [01:22<02:04, 29.24it/s]

{'loss': 0.3146, 'grad_norm': 4.057652473449707, 'learning_rate': 3.2316534040671975e-06, 'epoch': 5.31}


 40%|███▉      | 2261/5655 [01:31<01:58, 28.58it/s]
 40%|████      | 2262/5655 [01:32<01:58, 28.58it/s]

{'eval_loss': 0.3221471607685089, 'eval_f1': 0.5100299784645833, 'eval_precision': 0.6232197614111573, 'eval_recall': 0.45834092766655443, 'eval_runtime': 1.0026, 'eval_samples_per_second': 376.02, 'eval_steps_per_second': 94.753, 'epoch': 6.0}


 47%|████▋     | 2637/5655 [01:46<01:54, 26.41it/s]
 47%|████▋     | 2639/5655 [01:47<01:54, 26.41it/s]

{'eval_loss': 0.31720855832099915, 'eval_f1': 0.5018464247779922, 'eval_precision': 0.6300875488886681, 'eval_recall': 0.45696244778016937, 'eval_runtime': 0.8467, 'eval_samples_per_second': 445.263, 'eval_steps_per_second': 112.202, 'epoch': 7.0}


 53%|█████▎    | 3003/5655 [02:01<01:30, 29.19it/s]

{'loss': 0.2334, 'grad_norm': 4.178502559661865, 'learning_rate': 2.347480106100796e-06, 'epoch': 7.96}


 53%|█████▎    | 3016/5655 [02:02<01:30, 29.30it/s]
 53%|█████▎    | 3016/5655 [02:02<01:30, 29.30it/s]

{'eval_loss': 0.30935806035995483, 'eval_f1': 0.5243889061909369, 'eval_precision': 0.6451781970649895, 'eval_recall': 0.47863662205464685, 'eval_runtime': 0.8661, 'eval_samples_per_second': 435.301, 'eval_steps_per_second': 109.691, 'epoch': 8.0}


 60%|█████▉    | 3392/5655 [02:17<01:15, 30.15it/s]
 60%|██████    | 3393/5655 [02:18<01:15, 30.15it/s]

{'eval_loss': 0.3177170753479004, 'eval_f1': 0.5277298474945534, 'eval_precision': 0.6443141952596904, 'eval_recall': 0.4868663126629178, 'eval_runtime': 0.8648, 'eval_samples_per_second': 435.917, 'eval_steps_per_second': 109.846, 'epoch': 9.0}


 67%|██████▋   | 3769/5655 [02:32<01:03, 29.83it/s]
 67%|██████▋   | 3770/5655 [02:33<01:03, 29.83it/s]

{'eval_loss': 0.3186541199684143, 'eval_f1': 0.5273749989729998, 'eval_precision': 0.642719971283708, 'eval_recall': 0.4865043629913141, 'eval_runtime': 0.9581, 'eval_samples_per_second': 393.476, 'eval_steps_per_second': 99.152, 'epoch': 10.0}


 71%|███████   | 4005/5655 [02:42<00:57, 28.56it/s]

{'loss': 0.1827, 'grad_norm': 14.173871994018555, 'learning_rate': 1.4633068081343946e-06, 'epoch': 10.61}


 73%|███████▎  | 4144/5655 [02:47<00:51, 29.36it/s]
 73%|███████▎  | 4147/5655 [02:48<00:51, 29.36it/s]

{'eval_loss': 0.3127506673336029, 'eval_f1': 0.5471443988976823, 'eval_precision': 0.8284763612910065, 'eval_recall': 0.5001788190061783, 'eval_runtime': 0.9118, 'eval_samples_per_second': 413.457, 'eval_steps_per_second': 104.187, 'epoch': 11.0}


 80%|███████▉  | 4522/5655 [03:02<00:41, 27.07it/s]
 80%|████████  | 4524/5655 [03:02<00:41, 27.07it/s]

{'eval_loss': 0.310415655374527, 'eval_f1': 0.5786855548348013, 'eval_precision': 0.8166309523809524, 'eval_recall': 0.5242402496613607, 'eval_runtime': 0.8274, 'eval_samples_per_second': 455.67, 'eval_steps_per_second': 114.824, 'epoch': 12.0}


 87%|████████▋ | 4901/5655 [03:17<00:28, 26.78it/s]
 87%|████████▋ | 4901/5655 [03:18<00:28, 26.78it/s]

{'eval_loss': 0.3147800862789154, 'eval_f1': 0.5842488009087005, 'eval_precision': 0.8232745618141155, 'eval_recall': 0.5282776022955513, 'eval_runtime': 1.0457, 'eval_samples_per_second': 360.531, 'eval_steps_per_second': 90.85, 'epoch': 13.0}


 89%|████████▊ | 5005/5655 [03:22<00:23, 27.90it/s]

{'loss': 0.1473, 'grad_norm': 5.1299214363098145, 'learning_rate': 5.79133510167993e-07, 'epoch': 13.26}


 93%|█████████▎| 5278/5655 [03:32<00:12, 29.04it/s]
 93%|█████████▎| 5278/5655 [03:33<00:12, 29.04it/s]

{'eval_loss': 0.31268468499183655, 'eval_f1': 0.5978203772336457, 'eval_precision': 0.8154061624649861, 'eval_recall': 0.5420974780719489, 'eval_runtime': 0.9691, 'eval_samples_per_second': 389.008, 'eval_steps_per_second': 98.026, 'epoch': 14.0}


100%|█████████▉| 5654/5655 [03:47<00:00, 29.09it/s]
100%|██████████| 5655/5655 [03:48<00:00, 29.09it/s]

{'eval_loss': 0.31328362226486206, 'eval_f1': 0.5951766597256221, 'eval_precision': 0.8167474682180564, 'eval_recall': 0.535503275173398, 'eval_runtime': 0.8938, 'eval_samples_per_second': 421.782, 'eval_steps_per_second': 106.285, 'epoch': 15.0}


100%|██████████| 5655/5655 [03:49<00:00, 24.59it/s]


{'train_runtime': 229.9336, 'train_samples_per_second': 98.311, 'train_steps_per_second': 24.594, 'train_loss': 0.2517797913623004, 'epoch': 15.0}


100%|██████████| 95/95 [00:00<00:00, 108.69it/s]


Evaluation Metrics: {'eval_loss': 0.31268468499183655, 'eval_f1': 0.5978203772336457, 'eval_precision': 0.8154061624649861, 'eval_recall': 0.5420974780719489, 'eval_runtime': 0.8845, 'eval_samples_per_second': 426.235, 'eval_steps_per_second': 107.407, 'epoch': 15.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  python   
15  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  python   
16  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  python   
17  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  python   
18  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  python   

                 cat  precision    recall        f1  
14             Usage   0.852941  0.719008  0.780269  
15        Parameters   0.833333  0.848214  0.840708  
16  DevelopmentNotes   1.000000  0.125000  0.222222  
17            Expand   0.676471  0.333333  0.446602  
18           Summary   0.714286  0.684932  0.699301  
Score

0,1
eval/f1,▁▃▅▆▆▇▇▇▇▇▇█████
eval/loss,█▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁
eval/precision,▁▃▅▇▆▆▆▆▆▆██████
eval/recall,▁▂▅▅▆▇▇▇▇▇▇█████
eval/runtime,▃▂▃█▃▇▂▂▂▅▄▁█▆▃▃
eval/samples_per_second,▆▇▅▁▆▂▇▇▇▃▅█▁▃▆▆
eval/steps_per_second,▆▇▅▁▆▂▇▇▇▃▅█▁▃▆▆
train/epoch,▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▇▇▇▇███
train/global_step,▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▇▇▇▇███
train/grad_norm,▁▂▂█▃

0,1
eval/f1,0.59782
eval/loss,0.31268
eval/precision,0.81541
eval/recall,0.5421
eval/runtime,0.8845
eval/samples_per_second,426.235
eval/steps_per_second,107.407
total_flos,748646437651200.0
train/epoch,15.0
train/global_step,5655.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 11939.76 examples/s]
  7%|▋         | 260/3900 [00:09<02:11, 27.72it/s]
  7%|▋         | 260/3900 [00:09<02:11, 27.72it/s]

{'eval_loss': 0.4090248644351959, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.5543, 'eval_samples_per_second': 469.068, 'eval_steps_per_second': 117.267, 'epoch': 1.0}


 13%|█▎        | 519/3900 [00:20<01:56, 29.05it/s]
 13%|█▎        | 520/3900 [00:20<01:56, 29.05it/s]

{'eval_loss': 0.3517792224884033, 'eval_f1': 0.11065235342691991, 'eval_precision': 0.13293650793650794, 'eval_recall': 0.09476661951909478, 'eval_runtime': 0.6349, 'eval_samples_per_second': 409.546, 'eval_steps_per_second': 102.386, 'epoch': 2.0}


 20%|█▉        | 778/3900 [00:30<01:46, 29.24it/s]
 20%|██        | 780/3900 [00:31<01:46, 29.24it/s]

{'eval_loss': 0.3083249032497406, 'eval_f1': 0.12956293628562535, 'eval_precision': 0.2759740259740259, 'eval_recall': 0.11884016973125885, 'eval_runtime': 0.6858, 'eval_samples_per_second': 379.143, 'eval_steps_per_second': 94.786, 'epoch': 3.0}


 26%|██▌       | 1004/3900 [00:40<01:46, 27.30it/s]

{'loss': 0.3629, 'grad_norm': 1.35538649559021, 'learning_rate': 3.7179487179487184e-06, 'epoch': 3.85}


 27%|██▋       | 1038/3900 [00:41<01:42, 27.83it/s]
 27%|██▋       | 1040/3900 [00:42<01:42, 27.83it/s]

{'eval_loss': 0.2776077389717102, 'eval_f1': 0.17227089346905017, 'eval_precision': 0.3521008403361345, 'eval_recall': 0.14264969354078266, 'eval_runtime': 0.5975, 'eval_samples_per_second': 435.12, 'eval_steps_per_second': 108.78, 'epoch': 4.0}


 33%|███▎      | 1300/3900 [00:52<01:33, 27.87it/s]
 33%|███▎      | 1300/3900 [00:52<01:33, 27.87it/s]

{'eval_loss': 0.25724199414253235, 'eval_f1': 0.3406199223703852, 'eval_precision': 0.6790553619821912, 'eval_recall': 0.2487585330052991, 'eval_runtime': 0.5914, 'eval_samples_per_second': 439.616, 'eval_steps_per_second': 109.904, 'epoch': 5.0}


 40%|███▉      | 1557/3900 [01:02<01:19, 29.59it/s]
 40%|████      | 1560/3900 [01:03<01:19, 29.59it/s]

{'eval_loss': 0.23836103081703186, 'eval_f1': 0.4112727078561152, 'eval_precision': 0.6705980877939842, 'eval_recall': 0.3146372597357549, 'eval_runtime': 0.5986, 'eval_samples_per_second': 434.352, 'eval_steps_per_second': 108.588, 'epoch': 6.0}


 47%|████▋     | 1817/3900 [01:13<01:10, 29.42it/s]
 47%|████▋     | 1820/3900 [01:13<01:10, 29.42it/s]

{'eval_loss': 0.22647753357887268, 'eval_f1': 0.44722025420528627, 'eval_precision': 0.6746742743383282, 'eval_recall': 0.35039123282943285, 'eval_runtime': 0.5972, 'eval_samples_per_second': 435.366, 'eval_steps_per_second': 108.841, 'epoch': 7.0}


 51%|█████▏    | 2002/3900 [01:21<01:06, 28.43it/s]

{'loss': 0.2152, 'grad_norm': 0.6488374471664429, 'learning_rate': 2.435897435897436e-06, 'epoch': 7.69}


 53%|█████▎    | 2078/3900 [01:24<01:06, 27.33it/s]
 53%|█████▎    | 2080/3900 [01:24<01:06, 27.33it/s]

{'eval_loss': 0.21898463368415833, 'eval_f1': 0.4815876186680674, 'eval_precision': 0.6413528960112812, 'eval_recall': 0.3981132719849219, 'eval_runtime': 0.6015, 'eval_samples_per_second': 432.244, 'eval_steps_per_second': 108.061, 'epoch': 8.0}


 60%|█████▉    | 2339/3900 [01:34<00:57, 27.38it/s]
 60%|██████    | 2340/3900 [01:35<00:56, 27.38it/s]

{'eval_loss': 0.20789922773838043, 'eval_f1': 0.4959136570176365, 'eval_precision': 0.6419568219137185, 'eval_recall': 0.4101738727932715, 'eval_runtime': 0.6193, 'eval_samples_per_second': 419.857, 'eval_steps_per_second': 104.964, 'epoch': 9.0}


 67%|██████▋   | 2598/3900 [01:45<00:44, 29.09it/s]
 67%|██████▋   | 2600/3900 [01:45<00:44, 29.09it/s]

{'eval_loss': 0.20930959284305573, 'eval_f1': 0.5000574557582023, 'eval_precision': 0.6354173776534024, 'eval_recall': 0.42209084509703343, 'eval_runtime': 0.6352, 'eval_samples_per_second': 409.334, 'eval_steps_per_second': 102.334, 'epoch': 10.0}


 73%|███████▎  | 2858/3900 [01:55<00:37, 28.13it/s]
 73%|███████▎  | 2860/3900 [01:56<00:36, 28.13it/s]

{'eval_loss': 0.20001240074634552, 'eval_f1': 0.5138826575758383, 'eval_precision': 0.6389861751152074, 'eval_recall': 0.435172791589555, 'eval_runtime': 0.6474, 'eval_samples_per_second': 401.617, 'eval_steps_per_second': 100.404, 'epoch': 11.0}


 77%|███████▋  | 3004/3900 [02:02<00:31, 28.48it/s]

{'loss': 0.1523, 'grad_norm': 0.3274906277656555, 'learning_rate': 1.153846153846154e-06, 'epoch': 11.54}


 80%|████████  | 3120/3900 [02:06<00:27, 28.87it/s]
 80%|████████  | 3120/3900 [02:06<00:27, 28.87it/s]

{'eval_loss': 0.19984593987464905, 'eval_f1': 0.5211577357520965, 'eval_precision': 0.6446216316521384, 'eval_recall': 0.4428457221479169, 'eval_runtime': 0.6107, 'eval_samples_per_second': 425.725, 'eval_steps_per_second': 106.431, 'epoch': 12.0}


 87%|████████▋ | 3378/3900 [02:17<00:18, 28.84it/s]
 87%|████████▋ | 3380/3900 [02:17<00:18, 28.84it/s]

{'eval_loss': 0.19751858711242676, 'eval_f1': 0.5193543375937766, 'eval_precision': 0.6450783897788506, 'eval_recall': 0.44161656058651555, 'eval_runtime': 0.5652, 'eval_samples_per_second': 460.021, 'eval_steps_per_second': 115.005, 'epoch': 13.0}


 93%|█████████▎| 3637/3900 [02:27<00:09, 28.79it/s]
 93%|█████████▎| 3640/3900 [02:28<00:09, 28.79it/s]

{'eval_loss': 0.1941778063774109, 'eval_f1': 0.5278254447469232, 'eval_precision': 0.6517006802721088, 'eval_recall': 0.44877356200094265, 'eval_runtime': 0.6127, 'eval_samples_per_second': 424.381, 'eval_steps_per_second': 106.095, 'epoch': 14.0}


100%|█████████▉| 3898/3900 [02:37<00:00, 27.46it/s]
100%|██████████| 3900/3900 [02:39<00:00, 27.46it/s]

{'eval_loss': 0.19447548687458038, 'eval_f1': 0.5228717201550481, 'eval_precision': 0.6461484381395394, 'eval_recall': 0.4450112174414187, 'eval_runtime': 0.5943, 'eval_samples_per_second': 437.466, 'eval_steps_per_second': 109.367, 'epoch': 15.0}


100%|██████████| 3900/3900 [02:40<00:00, 24.29it/s]


{'train_runtime': 160.5449, 'train_samples_per_second': 96.982, 'train_steps_per_second': 24.292, 'train_loss': 0.2167291553203876, 'epoch': 15.0}


100%|██████████| 65/65 [00:00<00:00, 112.01it/s]


Evaluation Metrics: {'eval_loss': 0.1941778063774109, 'eval_f1': 0.5278254447469232, 'eval_precision': 0.6517006802721088, 'eval_recall': 0.44877356200094265, 'eval_runtime': 0.5921, 'eval_samples_per_second': 439.104, 'eval_steps_per_second': 109.776, 'epoch': 15.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
13  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
14  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
15  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
16  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
17  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   
18  lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0...  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   1.000000  0.571429  0.727273  
13                  Example   0.966667  0.861386  0.910995  
14         Responsibilit

0,1
eval/f1,▁▂▃▃▆▆▇▇████████
eval/loss,█▆▅▄▃▂▂▂▁▁▁▁▁▁▁▁
eval/precision,▁▂▄▅████████████
eval/recall,▁▂▃▃▅▆▆▇▇███████
eval/runtime,▁▅█▃▃▃▃▄▄▅▆▄▂▄▃▃
eval/samples_per_second,█▃▁▅▆▅▅▅▄▃▃▅▇▅▆▆
eval/steps_per_second,█▃▁▅▆▅▅▅▄▃▃▅▇▅▆▆
train/epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇███
train/grad_norm,█▃▁

0,1
eval/f1,0.52783
eval/loss,0.19418
eval/precision,0.6517
eval/recall,0.44877
eval/runtime,0.5921
eval/samples_per_second,439.104
eval/steps_per_second,109.776
total_flos,515675326855680.0
train/epoch,15.0
train/global_step,3900.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 19838.88 examples/s]
  7%|▋         | 761/11430 [00:32<07:17, 24.41it/s]
  7%|▋         | 762/11430 [00:34<07:17, 24.41it/s]

{'eval_loss': 0.10812810808420181, 'eval_f1': 0.5644114959833723, 'eval_precision': 0.6806856558412839, 'eval_recall': 0.5445862805784348, 'eval_runtime': 2.2748, 'eval_samples_per_second': 669.505, 'eval_steps_per_second': 83.963, 'epoch': 1.0}


  9%|▉         | 1004/11430 [00:45<07:26, 23.37it/s] 

{'loss': 0.1341, 'grad_norm': 1.1252930164337158, 'learning_rate': 4.562554680664917e-05, 'epoch': 1.31}


 13%|█▎        | 1523/11430 [01:06<06:47, 24.29it/s]
 13%|█▎        | 1524/11430 [01:09<06:47, 24.29it/s]

{'eval_loss': 0.09045516699552536, 'eval_f1': 0.7783473890120696, 'eval_precision': 0.8687934422320419, 'eval_recall': 0.7368316641050906, 'eval_runtime': 2.2139, 'eval_samples_per_second': 687.911, 'eval_steps_per_second': 86.271, 'epoch': 2.0}


 18%|█▊        | 2003/11430 [01:29<06:30, 24.11it/s]

{'loss': 0.0655, 'grad_norm': 0.3322427272796631, 'learning_rate': 4.125109361329834e-05, 'epoch': 2.62}


 20%|█▉        | 2285/11430 [01:41<06:12, 24.58it/s]
 20%|██        | 2286/11430 [01:43<06:12, 24.58it/s]

{'eval_loss': 0.08286954462528229, 'eval_f1': 0.8442820297447856, 'eval_precision': 0.9011093441688118, 'eval_recall': 0.801027145968784, 'eval_runtime': 2.1483, 'eval_samples_per_second': 708.938, 'eval_steps_per_second': 88.908, 'epoch': 3.0}


 26%|██▋       | 3002/11430 [02:14<05:53, 23.87it/s]

{'loss': 0.0394, 'grad_norm': 0.0373474545776844, 'learning_rate': 3.6876640419947505e-05, 'epoch': 3.94}


 27%|██▋       | 3047/11430 [02:16<05:42, 24.45it/s]
 27%|██▋       | 3048/11430 [02:18<05:42, 24.45it/s]

{'eval_loss': 0.09290573745965958, 'eval_f1': 0.8607093934573594, 'eval_precision': 0.9029577376502197, 'eval_recall': 0.8278576083645023, 'eval_runtime': 2.1413, 'eval_samples_per_second': 711.264, 'eval_steps_per_second': 89.2, 'epoch': 4.0}


 33%|███▎      | 3809/11430 [02:50<05:13, 24.29it/s]
 33%|███▎      | 3810/11430 [02:52<05:13, 24.29it/s]

{'eval_loss': 0.10730293393135071, 'eval_f1': 0.842593013910143, 'eval_precision': 0.904139368957033, 'eval_recall': 0.7989969701640182, 'eval_runtime': 2.2385, 'eval_samples_per_second': 680.378, 'eval_steps_per_second': 85.327, 'epoch': 5.0}


 35%|███▌      | 4004/11430 [03:01<05:11, 23.88it/s]

{'loss': 0.0217, 'grad_norm': 0.07963282614946365, 'learning_rate': 3.2502187226596675e-05, 'epoch': 5.25}


 40%|███▉      | 4571/11430 [03:24<04:42, 24.31it/s]
 40%|████      | 4572/11430 [03:27<04:42, 24.31it/s]

{'eval_loss': 0.1257534772157669, 'eval_f1': 0.8284905523798137, 'eval_precision': 0.8431239448436905, 'eval_recall': 0.832239260833765, 'eval_runtime': 2.2898, 'eval_samples_per_second': 665.125, 'eval_steps_per_second': 83.414, 'epoch': 6.0}


 44%|████▍     | 5003/11430 [03:45<04:24, 24.32it/s]

{'loss': 0.0111, 'grad_norm': 0.3979550898075104, 'learning_rate': 2.8127734033245845e-05, 'epoch': 6.56}


 47%|████▋     | 5333/11430 [03:59<04:07, 24.67it/s]
 47%|████▋     | 5334/11430 [04:01<04:07, 24.67it/s]

{'eval_loss': 0.11799376457929611, 'eval_f1': 0.8646358339790388, 'eval_precision': 0.8658467256794896, 'eval_recall': 0.8662849016847824, 'eval_runtime': 2.1646, 'eval_samples_per_second': 703.58, 'eval_steps_per_second': 88.236, 'epoch': 7.0}


 53%|█████▎    | 6002/11430 [04:29<03:39, 24.78it/s]

{'loss': 0.0077, 'grad_norm': 0.007315295282751322, 'learning_rate': 2.3753280839895015e-05, 'epoch': 7.87}


 53%|█████▎    | 6095/11430 [04:33<03:38, 24.44it/s]
 53%|█████▎    | 6096/11430 [04:35<03:38, 24.44it/s]

{'eval_loss': 0.12653052806854248, 'eval_f1': 0.8592334639789249, 'eval_precision': 0.8804290748735699, 'eval_recall': 0.8437110239986998, 'eval_runtime': 2.151, 'eval_samples_per_second': 708.039, 'eval_steps_per_second': 88.795, 'epoch': 8.0}


 60%|█████▉    | 6857/11430 [05:08<03:11, 23.93it/s]
 60%|██████    | 6858/11430 [05:10<03:11, 23.93it/s]

{'eval_loss': 0.13196179270744324, 'eval_f1': 0.863484425137612, 'eval_precision': 0.875161873902222, 'eval_recall': 0.8600073581978472, 'eval_runtime': 2.24, 'eval_samples_per_second': 679.923, 'eval_steps_per_second': 85.269, 'epoch': 9.0}


 61%|██████▏   | 7004/11430 [05:17<02:59, 24.70it/s]

{'loss': 0.0049, 'grad_norm': 0.06601132452487946, 'learning_rate': 1.9378827646544184e-05, 'epoch': 9.19}


 67%|██████▋   | 7619/11430 [05:42<02:36, 24.40it/s]
 67%|██████▋   | 7620/11430 [05:44<02:36, 24.40it/s]

{'eval_loss': 0.13273124396800995, 'eval_f1': 0.868755511877495, 'eval_precision': 0.8865236043891669, 'eval_recall': 0.8533171318568316, 'eval_runtime': 2.2028, 'eval_samples_per_second': 691.386, 'eval_steps_per_second': 86.707, 'epoch': 10.0}


 70%|███████   | 8003/11430 [06:01<02:20, 24.43it/s]

{'loss': 0.0032, 'grad_norm': 0.010951867327094078, 'learning_rate': 1.500437445319335e-05, 'epoch': 10.5}


 73%|███████▎  | 8381/11430 [06:16<02:02, 24.96it/s]
 73%|███████▎  | 8382/11430 [06:19<02:02, 24.96it/s]

{'eval_loss': 0.1360919326543808, 'eval_f1': 0.8689467796992055, 'eval_precision': 0.8874336964592754, 'eval_recall': 0.8546409807798574, 'eval_runtime': 2.1292, 'eval_samples_per_second': 715.297, 'eval_steps_per_second': 89.706, 'epoch': 11.0}


 79%|███████▉  | 9002/11430 [06:45<01:42, 23.67it/s]

{'loss': 0.0026, 'grad_norm': 0.001281701261177659, 'learning_rate': 1.062992125984252e-05, 'epoch': 11.81}


 80%|███████▉  | 9143/11430 [06:51<01:33, 24.55it/s]
 80%|████████  | 9144/11430 [06:53<01:33, 24.55it/s]

{'eval_loss': 0.14031115174293518, 'eval_f1': 0.861507433305422, 'eval_precision': 0.8701393969721315, 'eval_recall': 0.8546998682413947, 'eval_runtime': 2.1314, 'eval_samples_per_second': 714.566, 'eval_steps_per_second': 89.614, 'epoch': 12.0}


 87%|████████▋ | 9905/11430 [07:25<01:03, 23.85it/s]
 87%|████████▋ | 9906/11430 [07:27<01:03, 23.85it/s]

{'eval_loss': 0.13685138523578644, 'eval_f1': 0.8598167055701867, 'eval_precision': 0.8661042406410474, 'eval_recall': 0.8547965834583915, 'eval_runtime': 2.2221, 'eval_samples_per_second': 685.381, 'eval_steps_per_second': 85.954, 'epoch': 13.0}


 88%|████████▊ | 10004/11430 [07:32<00:57, 24.70it/s]

{'loss': 0.0015, 'grad_norm': 0.002701384015381336, 'learning_rate': 6.255468066491689e-06, 'epoch': 13.12}


 93%|█████████▎| 10667/11430 [07:59<00:32, 23.70it/s]
 93%|█████████▎| 10668/11430 [08:02<00:32, 23.70it/s]

{'eval_loss': 0.13971085846424103, 'eval_f1': 0.8651992216160346, 'eval_precision': 0.8795354939186915, 'eval_recall': 0.8524836593332642, 'eval_runtime': 2.2627, 'eval_samples_per_second': 673.09, 'eval_steps_per_second': 84.412, 'epoch': 14.0}


 96%|█████████▋| 11003/11430 [08:17<00:17, 24.38it/s]

{'loss': 0.001, 'grad_norm': 0.0011027086293324828, 'learning_rate': 1.8810148731408575e-06, 'epoch': 14.44}


100%|█████████▉| 11429/11430 [08:34<00:00, 24.10it/s]
100%|██████████| 11430/11430 [08:37<00:00, 24.10it/s]

{'eval_loss': 0.14240702986717224, 'eval_f1': 0.8651194170997788, 'eval_precision': 0.8835870515430029, 'eval_recall': 0.8487553945719821, 'eval_runtime': 2.1923, 'eval_samples_per_second': 694.715, 'eval_steps_per_second': 87.124, 'epoch': 15.0}


100%|██████████| 11430/11430 [08:38<00:00, 22.04it/s]


{'train_runtime': 518.6411, 'train_samples_per_second': 176.162, 'train_steps_per_second': 22.038, 'train_loss': 0.025646604288594305, 'epoch': 15.0}


100%|██████████| 191/191 [00:02<00:00, 89.54it/s]


Evaluation Metrics: {'eval_loss': 0.1360919326543808, 'eval_f1': 0.8689467796992055, 'eval_precision': 0.8874336964592754, 'eval_recall': 0.8546409807798574, 'eval_runtime': 2.1466, 'eval_samples_per_second': 709.49, 'eval_steps_per_second': 88.977, 'epoch': 15.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  java      summary   
13  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  java    Ownership   
14  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  java       Expand   
15  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  java        usage   
16  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  java      Pointer   
17  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  java  deprecation   
18  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  java     rational   

    precision    recall        f1  
12   0.942172  0.950213  0.946176  
13   0.964912  1.000000  0.982143  
14   0

0,1
eval/f1,▁▆▇█▇▇██████████
eval/loss,▄▂▁▂▄▆▅▆▇▇▇█▇██▇
eval/precision,▁▇███▆▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▅▇▇▇▇██████████
eval/runtime,▇▅▂▂▆█▃▂▆▄▁▁▅▇▄▂
eval/samples_per_second,▂▄▇▇▃▁▆▇▃▅██▄▂▅▇
eval/steps_per_second,▂▄▇▇▃▁▆▇▃▅██▄▂▅▇
train/epoch,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/grad_norm,█▃▁▁▃▁▁▁▁▁▁

0,1
eval/f1,0.86895
eval/loss,0.13609
eval/precision,0.88743
eval/recall,0.85464
eval/runtime,2.1466
eval/samples_per_second,709.49
eval/steps_per_second,88.977
total_flos,3025990766741760.0
train/epoch,15.0
train/global_step,11430.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 17092.61 examples/s]
  7%|▋         | 188/2835 [00:07<01:50, 24.05it/s]
  7%|▋         | 189/2835 [00:08<01:50, 24.05it/s]

{'eval_loss': 0.35980650782585144, 'eval_f1': 0.33237974683544297, 'eval_precision': 0.5626006904487917, 'eval_recall': 0.27611958402742964, 'eval_runtime': 0.5524, 'eval_samples_per_second': 682.499, 'eval_steps_per_second': 86.896, 'epoch': 1.0}


 13%|█▎        | 377/2835 [00:16<01:37, 25.11it/s]
 13%|█▎        | 378/2835 [00:17<01:37, 25.11it/s]

{'eval_loss': 0.31461000442504883, 'eval_f1': 0.5163776371275911, 'eval_precision': 0.6456258532394721, 'eval_recall': 0.43493938245413694, 'eval_runtime': 0.5278, 'eval_samples_per_second': 714.322, 'eval_steps_per_second': 90.948, 'epoch': 2.0}


 20%|█▉        | 566/2835 [00:25<01:30, 24.94it/s]
 20%|██        | 567/2835 [00:26<01:30, 24.94it/s]

{'eval_loss': 0.34263962507247925, 'eval_f1': 0.5943554780402727, 'eval_precision': 0.7504219839399455, 'eval_recall': 0.5404049209404128, 'eval_runtime': 0.5307, 'eval_samples_per_second': 710.388, 'eval_steps_per_second': 90.447, 'epoch': 3.0}


 27%|██▋       | 755/2835 [00:35<01:25, 24.30it/s]
 27%|██▋       | 756/2835 [00:36<01:25, 24.30it/s]

{'eval_loss': 0.3604891896247864, 'eval_f1': 0.6485032186675916, 'eval_precision': 0.7294053402464618, 'eval_recall': 0.5981709066016554, 'eval_runtime': 0.5756, 'eval_samples_per_second': 655.008, 'eval_steps_per_second': 83.396, 'epoch': 4.0}


 33%|███▎      | 944/2835 [00:44<01:15, 25.00it/s]
 33%|███▎      | 945/2835 [00:45<01:15, 25.00it/s]

{'eval_loss': 0.38334885239601135, 'eval_f1': 0.6588461668049808, 'eval_precision': 0.7513831994502627, 'eval_recall': 0.630964851245998, 'eval_runtime': 0.5542, 'eval_samples_per_second': 680.314, 'eval_steps_per_second': 86.618, 'epoch': 5.0}


 35%|███▌      | 1004/2835 [00:48<01:14, 24.45it/s]

{'loss': 0.2183, 'grad_norm': 1.3220583200454712, 'learning_rate': 3.2363315696649034e-05, 'epoch': 5.29}


 40%|███▉      | 1133/2835 [00:54<01:09, 24.55it/s]
 40%|████      | 1134/2835 [00:54<01:09, 24.55it/s]

{'eval_loss': 0.4077567756175995, 'eval_f1': 0.7070064616174968, 'eval_precision': 0.7367987930455693, 'eval_recall': 0.6907941832564173, 'eval_runtime': 0.5432, 'eval_samples_per_second': 694.012, 'eval_steps_per_second': 88.362, 'epoch': 6.0}


 47%|████▋     | 1322/2835 [01:03<01:02, 24.29it/s]
 47%|████▋     | 1323/2835 [01:04<01:02, 24.29it/s]

{'eval_loss': 0.424058735370636, 'eval_f1': 0.7183201014593419, 'eval_precision': 0.7613651962012191, 'eval_recall': 0.6939220810863835, 'eval_runtime': 0.5305, 'eval_samples_per_second': 710.664, 'eval_steps_per_second': 90.482, 'epoch': 7.0}


 53%|█████▎    | 1511/2835 [01:12<00:53, 24.74it/s]
 53%|█████▎    | 1512/2835 [01:13<00:53, 24.74it/s]

{'eval_loss': 0.42899855971336365, 'eval_f1': 0.7108722629857955, 'eval_precision': 0.725812655566876, 'eval_recall': 0.7010257014269142, 'eval_runtime': 0.5314, 'eval_samples_per_second': 709.406, 'eval_steps_per_second': 90.322, 'epoch': 8.0}


 60%|█████▉    | 1699/2835 [01:23<00:48, 23.54it/s]
 60%|██████    | 1701/2835 [01:24<00:48, 23.54it/s]

{'eval_loss': 0.4851558208465576, 'eval_f1': 0.7079635386540757, 'eval_precision': 0.7406793794926984, 'eval_recall': 0.6879402750226834, 'eval_runtime': 0.5915, 'eval_samples_per_second': 637.309, 'eval_steps_per_second': 81.143, 'epoch': 9.0}


 67%|██████▋   | 1888/2835 [01:32<00:39, 24.28it/s]
 67%|██████▋   | 1890/2835 [01:33<00:38, 24.28it/s]

{'eval_loss': 0.47934624552726746, 'eval_f1': 0.7136495539750227, 'eval_precision': 0.7251958191090038, 'eval_recall': 0.7039309733708456, 'eval_runtime': 0.5375, 'eval_samples_per_second': 701.415, 'eval_steps_per_second': 89.305, 'epoch': 10.0}


 71%|███████   | 2002/2835 [01:39<00:35, 23.73it/s]

{'loss': 0.0238, 'grad_norm': 0.98296058177948, 'learning_rate': 1.472663139329806e-05, 'epoch': 10.58}


 73%|███████▎  | 2077/2835 [01:42<00:31, 24.36it/s]
 73%|███████▎  | 2079/2835 [01:42<00:31, 24.36it/s]

{'eval_loss': 0.5249906778335571, 'eval_f1': 0.7072765430679617, 'eval_precision': 0.7281213029809691, 'eval_recall': 0.6918794445307792, 'eval_runtime': 0.5507, 'eval_samples_per_second': 684.591, 'eval_steps_per_second': 87.163, 'epoch': 11.0}


 80%|███████▉  | 2266/2835 [01:51<00:24, 23.43it/s]
 80%|████████  | 2268/2835 [01:52<00:24, 23.43it/s]

{'eval_loss': 0.5253384709358215, 'eval_f1': 0.710712897992563, 'eval_precision': 0.7256777281875646, 'eval_recall': 0.7007320069970999, 'eval_runtime': 0.6115, 'eval_samples_per_second': 616.516, 'eval_steps_per_second': 78.495, 'epoch': 12.0}


 87%|████████▋ | 2455/2835 [02:00<00:15, 23.91it/s]
 87%|████████▋ | 2457/2835 [02:01<00:15, 23.91it/s]

{'eval_loss': 0.5223578810691833, 'eval_f1': 0.706339893991852, 'eval_precision': 0.713967491255933, 'eval_recall': 0.7003700573254961, 'eval_runtime': 0.6105, 'eval_samples_per_second': 617.489, 'eval_steps_per_second': 78.619, 'epoch': 13.0}


 93%|█████████▎| 2644/2835 [02:09<00:07, 24.03it/s]
 93%|█████████▎| 2646/2835 [02:10<00:07, 24.03it/s]

{'eval_loss': 0.5315414071083069, 'eval_f1': 0.7075934093874597, 'eval_precision': 0.7182207535051113, 'eval_recall': 0.70127206312016, 'eval_runtime': 0.5351, 'eval_samples_per_second': 704.592, 'eval_steps_per_second': 89.709, 'epoch': 14.0}


100%|█████████▉| 2833/2835 [02:19<00:00, 24.30it/s]
100%|██████████| 2835/2835 [02:20<00:00, 24.30it/s]

{'eval_loss': 0.5311994552612305, 'eval_f1': 0.7157167066645388, 'eval_precision': 0.7292032825191218, 'eval_recall': 0.7058235064067812, 'eval_runtime': 0.5437, 'eval_samples_per_second': 693.362, 'eval_steps_per_second': 88.28, 'epoch': 15.0}


100%|██████████| 2835/2835 [02:21<00:00, 20.02it/s]


{'train_runtime': 141.6313, 'train_samples_per_second': 159.605, 'train_steps_per_second': 20.017, 'train_loss': 0.08644138290768578, 'epoch': 15.0}


100%|██████████| 48/48 [00:00<00:00, 92.16it/s] 


Evaluation Metrics: {'eval_loss': 0.424058735370636, 'eval_f1': 0.7183201014593419, 'eval_precision': 0.7613651962012191, 'eval_recall': 0.6939220810863835, 'eval_runtime': 0.5346, 'eval_samples_per_second': 705.166, 'eval_steps_per_second': 89.782, 'epoch': 15.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  python   
15  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  python   
16  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  python   
17  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  python   
18  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  python   

                 cat  precision    recall        f1  
14             Usage   0.898990  0.735537  0.809091  
15        Parameters   0.858407  0.866071  0.862222  
16  DevelopmentNotes   0.641026  0.625000  0.632911  
17            Expand   0.714286  0.434783  0.540541  
18           Summary   0.694118  0.808219  0.746835  
Scores: 

0,1
eval/f1,▁▄▆▇▇███████████
eval/loss,▂▁▂▂▃▄▅▅▇▆█████▅
eval/precision,▁▄█▇█▇█▇▇▇▇▇▆▆▇█
eval/recall,▁▄▅▆▇███████████
eval/runtime,▃▁▁▅▃▂▁▁▆▂▃██▂▂▂
eval/samples_per_second,▆██▄▆▇██▂▇▆▁▁▇▆▇
eval/steps_per_second,▆██▄▆▇██▂▇▆▁▁▇▇▇
train/epoch,▁▁▂▃▃▃▃▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▃▃▃▃▄▅▅▅▆▆▇▇▇███
train/grad_norm,█▁

0,1
eval/f1,0.71832
eval/loss,0.42406
eval/precision,0.76137
eval/recall,0.69392
eval/runtime,0.5346
eval/samples_per_second,705.166
eval/steps_per_second,89.782
total_flos,748646437651200.0
train/epoch,15.0
train/global_step,2835.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14638.78 examples/s]
  7%|▋         | 128/1950 [00:05<01:16, 23.95it/s]
  7%|▋         | 130/1950 [00:05<01:16, 23.95it/s]

{'eval_loss': 0.2991098165512085, 'eval_f1': 0.19399592297652046, 'eval_precision': 0.2714285714285714, 'eval_recall': 0.1634407801682424, 'eval_runtime': 0.375, 'eval_samples_per_second': 693.277, 'eval_steps_per_second': 87.993, 'epoch': 1.0}


 13%|█▎        | 260/1950 [00:12<01:06, 25.57it/s]
 13%|█▎        | 260/1950 [00:12<01:06, 25.57it/s]

{'eval_loss': 0.23167914152145386, 'eval_f1': 0.4488201402303306, 'eval_precision': 0.6369253762110905, 'eval_recall': 0.36987215246880634, 'eval_runtime': 0.3658, 'eval_samples_per_second': 710.705, 'eval_steps_per_second': 90.205, 'epoch': 2.0}


 20%|█▉        | 389/1950 [00:18<01:06, 23.59it/s]
 20%|██        | 390/1950 [00:19<01:06, 23.59it/s]

{'eval_loss': 0.23042745888233185, 'eval_f1': 0.49002561549465345, 'eval_precision': 0.6253571472855034, 'eval_recall': 0.42849180038784557, 'eval_runtime': 0.3741, 'eval_samples_per_second': 694.929, 'eval_steps_per_second': 88.203, 'epoch': 3.0}


 27%|██▋       | 518/1950 [00:25<00:58, 24.61it/s]
 27%|██▋       | 520/1950 [00:26<00:58, 24.61it/s]

{'eval_loss': 0.22222143411636353, 'eval_f1': 0.5587188395810475, 'eval_precision': 0.7180145696514083, 'eval_recall': 0.5083314729992093, 'eval_runtime': 0.3851, 'eval_samples_per_second': 675.068, 'eval_steps_per_second': 85.682, 'epoch': 4.0}


 33%|███▎      | 650/1950 [00:32<00:52, 24.88it/s]
 33%|███▎      | 650/1950 [00:33<00:52, 24.88it/s]

{'eval_loss': 0.20477184653282166, 'eval_f1': 0.6298666434623948, 'eval_precision': 0.8815363507583187, 'eval_recall': 0.5495284111449287, 'eval_runtime': 0.3709, 'eval_samples_per_second': 701.085, 'eval_steps_per_second': 88.984, 'epoch': 5.0}


 40%|███▉      | 779/1950 [00:39<00:49, 23.82it/s]
 40%|████      | 780/1950 [00:40<00:49, 23.82it/s]

{'eval_loss': 0.21803726255893707, 'eval_f1': 0.6049648085059708, 'eval_precision': 0.8307847776791876, 'eval_recall': 0.5502786164303292, 'eval_runtime': 0.383, 'eval_samples_per_second': 678.919, 'eval_steps_per_second': 86.171, 'epoch': 6.0}


 47%|████▋     | 908/1950 [00:46<00:42, 24.61it/s]
 47%|████▋     | 910/1950 [00:46<00:42, 24.61it/s]

{'eval_loss': 0.24409469962120056, 'eval_f1': 0.6522386874624028, 'eval_precision': 0.8252525982050753, 'eval_recall': 0.5820398290257528, 'eval_runtime': 0.3747, 'eval_samples_per_second': 693.819, 'eval_steps_per_second': 88.062, 'epoch': 7.0}


 51%|█████▏    | 1004/1950 [00:51<00:39, 23.83it/s]

{'loss': 0.1404, 'grad_norm': 0.41294482350349426, 'learning_rate': 2.435897435897436e-05, 'epoch': 7.69}


 53%|█████▎    | 1040/1950 [00:53<00:36, 24.80it/s]
 53%|█████▎    | 1040/1950 [00:53<00:36, 24.80it/s]

{'eval_loss': 0.21567708253860474, 'eval_f1': 0.6750410265116148, 'eval_precision': 0.7866534961863271, 'eval_recall': 0.6178664807313056, 'eval_runtime': 0.3716, 'eval_samples_per_second': 699.719, 'eval_steps_per_second': 88.811, 'epoch': 8.0}


 60%|█████▉    | 1169/1950 [00:59<00:32, 24.32it/s]
 60%|██████    | 1170/1950 [01:00<00:32, 24.32it/s]

{'eval_loss': 0.22838358581066132, 'eval_f1': 0.6807303252928559, 'eval_precision': 0.82755588097203, 'eval_recall': 0.6143497877680447, 'eval_runtime': 0.3757, 'eval_samples_per_second': 692.019, 'eval_steps_per_second': 87.833, 'epoch': 9.0}


 67%|██████▋   | 1298/1950 [01:06<00:28, 23.11it/s]
 67%|██████▋   | 1300/1950 [01:06<00:28, 23.11it/s]

{'eval_loss': 0.24094827473163605, 'eval_f1': 0.6817052577854964, 'eval_precision': 0.8226532863860542, 'eval_recall': 0.6167960732129157, 'eval_runtime': 0.3988, 'eval_samples_per_second': 651.961, 'eval_steps_per_second': 82.749, 'epoch': 10.0}


 73%|███████▎  | 1430/1950 [01:13<00:20, 25.20it/s]
 73%|███████▎  | 1430/1950 [01:13<00:20, 25.20it/s]

{'eval_loss': 0.24242058396339417, 'eval_f1': 0.6643942785062246, 'eval_precision': 0.8081833142180116, 'eval_recall': 0.6054959896656987, 'eval_runtime': 0.3803, 'eval_samples_per_second': 683.662, 'eval_steps_per_second': 86.772, 'epoch': 11.0}


 80%|███████▉  | 1559/1950 [01:19<00:15, 24.57it/s]
 80%|████████  | 1560/1950 [01:20<00:15, 24.57it/s]

{'eval_loss': 0.24324928224086761, 'eval_f1': 0.6909445546089463, 'eval_precision': 0.8385255747396567, 'eval_recall': 0.6235912277609367, 'eval_runtime': 0.385, 'eval_samples_per_second': 675.389, 'eval_steps_per_second': 85.722, 'epoch': 12.0}


 87%|████████▋ | 1688/1950 [01:26<00:10, 24.72it/s]
 87%|████████▋ | 1690/1950 [01:26<00:10, 24.72it/s]

{'eval_loss': 0.25186559557914734, 'eval_f1': 0.6847820285371341, 'eval_precision': 0.8278462948129277, 'eval_recall': 0.6192884744287213, 'eval_runtime': 0.3706, 'eval_samples_per_second': 701.572, 'eval_steps_per_second': 89.046, 'epoch': 13.0}


 93%|█████████▎| 1820/1950 [01:33<00:05, 23.70it/s]
 93%|█████████▎| 1820/1950 [01:33<00:05, 23.70it/s]

{'eval_loss': 0.2533136010169983, 'eval_f1': 0.6911456454654036, 'eval_precision': 0.8343325830897077, 'eval_recall': 0.6268102174844857, 'eval_runtime': 0.3862, 'eval_samples_per_second': 673.293, 'eval_steps_per_second': 85.456, 'epoch': 14.0}


100%|█████████▉| 1949/1950 [01:39<00:00, 24.50it/s]
100%|██████████| 1950/1950 [01:41<00:00, 24.50it/s]

{'eval_loss': 0.25548967719078064, 'eval_f1': 0.6914769728100951, 'eval_precision': 0.8360468034381078, 'eval_recall': 0.6268102174844857, 'eval_runtime': 0.3976, 'eval_samples_per_second': 653.902, 'eval_steps_per_second': 82.995, 'epoch': 15.0}


100%|██████████| 1950/1950 [01:42<00:00, 19.07it/s]


{'train_runtime': 102.2575, 'train_samples_per_second': 152.263, 'train_steps_per_second': 19.07, 'train_loss': 0.07830077146872497, 'epoch': 15.0}


100%|██████████| 33/33 [00:00<00:00, 84.81it/s]


Evaluation Metrics: {'eval_loss': 0.25548967719078064, 'eval_f1': 0.6914769728100951, 'eval_precision': 0.8360468034381078, 'eval_recall': 0.6268102174844857, 'eval_runtime': 0.4031, 'eval_samples_per_second': 645.08, 'eval_steps_per_second': 81.876, 'epoch': 15.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
13  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
14  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
15  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
16  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
17  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
18  lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   0.866667  0.619048  0.722222  
13                  Example   0.909091  0.891089  0.900000  
14         Responsibilitie

0,1
eval/f1,▁▅▅▆▇▇▇█████████
eval/loss,█▃▃▂▁▂▄▂▃▄▄▄▄▅▅▅
eval/precision,▁▅▅▆█▇▇▇▇▇▇█▇▇▇▇
eval/recall,▁▄▅▆▇▇▇█████████
eval/runtime,▃▁▃▅▂▄▃▂▃▇▄▅▂▅▇█
eval/samples_per_second,▆█▆▄▇▅▆▇▆▂▅▄▇▄▂▁
eval/steps_per_second,▆█▆▄▇▅▆▇▆▂▅▄▇▄▂▁
train/epoch,▁▁▂▃▃▃▄▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▄▅▅▅▆▇▇▇███
train/grad_norm,▁

0,1
eval/f1,0.69148
eval/loss,0.25549
eval/precision,0.83605
eval/recall,0.62681
eval/runtime,0.4031
eval/samples_per_second,645.08
eval/steps_per_second,81.876
total_flos,515675326855680.0
train/epoch,15.0
train/global_step,1950.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 19584.29 examples/s]
  7%|▋         | 761/11430 [00:31<07:09, 24.86it/s]
  7%|▋         | 762/11430 [00:33<07:09, 24.86it/s]

{'eval_loss': 0.16799940168857574, 'eval_f1': 0.3818554346434144, 'eval_precision': 0.38852739563457855, 'eval_recall': 0.3765697234150977, 'eval_runtime': 2.2449, 'eval_samples_per_second': 678.426, 'eval_steps_per_second': 85.082, 'epoch': 1.0}


  9%|▉         | 1004/11430 [00:44<07:16, 23.90it/s] 

{'loss': 0.2561, 'grad_norm': 3.1030735969543457, 'learning_rate': 4.5625546806649176e-06, 'epoch': 1.31}


 13%|█▎        | 1523/11430 [01:06<06:46, 24.38it/s]
 13%|█▎        | 1524/11430 [01:08<06:46, 24.38it/s]

{'eval_loss': 0.12502725422382355, 'eval_f1': 0.5302873434738667, 'eval_precision': 0.5335489312793958, 'eval_recall': 0.5276728182906673, 'eval_runtime': 2.1712, 'eval_samples_per_second': 701.444, 'eval_steps_per_second': 87.968, 'epoch': 2.0}


 18%|█▊        | 2003/11430 [01:29<06:32, 24.04it/s]

{'loss': 0.1244, 'grad_norm': 0.3309935927391052, 'learning_rate': 4.125109361329835e-06, 'epoch': 2.62}


 20%|█▉        | 2285/11430 [01:40<06:17, 24.22it/s]
 20%|██        | 2286/11430 [01:43<06:17, 24.22it/s]

{'eval_loss': 0.10195602476596832, 'eval_f1': 0.5392950125676188, 'eval_precision': 0.6146954830317609, 'eval_recall': 0.5283848464241904, 'eval_runtime': 2.1878, 'eval_samples_per_second': 696.121, 'eval_steps_per_second': 87.301, 'epoch': 3.0}


 26%|██▋       | 3005/11430 [02:13<05:51, 23.98it/s]

{'loss': 0.0934, 'grad_norm': 0.8760379552841187, 'learning_rate': 3.6876640419947506e-06, 'epoch': 3.94}


 27%|██▋       | 3047/11430 [02:15<05:44, 24.35it/s]
 27%|██▋       | 3048/11430 [02:17<05:44, 24.35it/s]

{'eval_loss': 0.09996819496154785, 'eval_f1': 0.6579502687989834, 'eval_precision': 0.9292534628691496, 'eval_recall': 0.6087446147981117, 'eval_runtime': 2.2511, 'eval_samples_per_second': 676.561, 'eval_steps_per_second': 84.848, 'epoch': 4.0}


 33%|███▎      | 3809/11430 [02:49<05:15, 24.17it/s]
 33%|███▎      | 3810/11430 [02:52<05:15, 24.17it/s]

{'eval_loss': 0.09038887917995453, 'eval_f1': 0.7812108051365184, 'eval_precision': 0.9202536658436836, 'eval_recall': 0.7234517209691719, 'eval_runtime': 2.2064, 'eval_samples_per_second': 690.26, 'eval_steps_per_second': 86.566, 'epoch': 5.0}


 35%|███▌      | 4004/11430 [03:00<05:04, 24.38it/s]

{'loss': 0.0737, 'grad_norm': 3.6132287979125977, 'learning_rate': 3.2502187226596677e-06, 'epoch': 5.25}


 40%|███▉      | 4571/11430 [03:24<04:37, 24.71it/s]
 40%|████      | 4572/11430 [03:26<04:37, 24.71it/s]

{'eval_loss': 0.09138267487287521, 'eval_f1': 0.7672764831099579, 'eval_precision': 0.9343637411467414, 'eval_recall': 0.710165431198833, 'eval_runtime': 2.1542, 'eval_samples_per_second': 706.993, 'eval_steps_per_second': 88.664, 'epoch': 6.0}


 44%|████▍     | 5003/11430 [03:44<04:21, 24.60it/s]

{'loss': 0.0612, 'grad_norm': 4.906905174255371, 'learning_rate': 2.8127734033245845e-06, 'epoch': 6.56}


 47%|████▋     | 5333/11430 [03:58<04:10, 24.29it/s]
 47%|████▋     | 5334/11430 [04:00<04:10, 24.29it/s]

{'eval_loss': 0.08655381202697754, 'eval_f1': 0.809537254352749, 'eval_precision': 0.9193688537647737, 'eval_recall': 0.7558519010300638, 'eval_runtime': 2.1364, 'eval_samples_per_second': 712.874, 'eval_steps_per_second': 89.402, 'epoch': 7.0}


 53%|█████▎    | 6002/11430 [04:29<03:48, 23.73it/s]

{'loss': 0.0541, 'grad_norm': 0.12985028326511383, 'learning_rate': 2.3753280839895016e-06, 'epoch': 7.87}


 53%|█████▎    | 6095/11430 [04:32<03:34, 24.91it/s]
 53%|█████▎    | 6096/11430 [04:35<03:34, 24.91it/s]

{'eval_loss': 0.08723849803209305, 'eval_f1': 0.8266163559141085, 'eval_precision': 0.911723954443247, 'eval_recall': 0.7743605126852247, 'eval_runtime': 2.1879, 'eval_samples_per_second': 696.113, 'eval_steps_per_second': 87.3, 'epoch': 8.0}


 60%|█████▉    | 6857/11430 [05:07<03:11, 23.89it/s]
 60%|██████    | 6858/11430 [05:09<03:11, 23.89it/s]

{'eval_loss': 0.08911211043596268, 'eval_f1': 0.8331688896693114, 'eval_precision': 0.8902766921287807, 'eval_recall': 0.7924343398892945, 'eval_runtime': 2.2619, 'eval_samples_per_second': 673.315, 'eval_steps_per_second': 84.441, 'epoch': 9.0}


 61%|██████▏   | 7004/11430 [05:16<03:02, 24.26it/s]

{'loss': 0.0449, 'grad_norm': 1.5539093017578125, 'learning_rate': 1.9378827646544183e-06, 'epoch': 9.19}


 67%|██████▋   | 7619/11430 [05:41<02:38, 24.10it/s]
 67%|██████▋   | 7620/11430 [05:43<02:38, 24.10it/s]

{'eval_loss': 0.08783511817455292, 'eval_f1': 0.8413674897523143, 'eval_precision': 0.9119336324391367, 'eval_recall': 0.7976806508641211, 'eval_runtime': 2.1987, 'eval_samples_per_second': 692.67, 'eval_steps_per_second': 86.868, 'epoch': 10.0}


 70%|███████   | 8003/11430 [06:00<02:22, 24.13it/s]

{'loss': 0.038, 'grad_norm': 1.7312058210372925, 'learning_rate': 1.500437445319335e-06, 'epoch': 10.5}


 73%|███████▎  | 8381/11430 [06:16<02:04, 24.50it/s]
 73%|███████▎  | 8382/11430 [06:18<02:04, 24.50it/s]

{'eval_loss': 0.08663550019264221, 'eval_f1': 0.8578459840979088, 'eval_precision': 0.911010783883997, 'eval_recall': 0.818987767040764, 'eval_runtime': 2.1081, 'eval_samples_per_second': 722.464, 'eval_steps_per_second': 90.604, 'epoch': 11.0}


 79%|███████▉  | 9002/11430 [06:44<01:39, 24.47it/s]

{'loss': 0.0349, 'grad_norm': 3.174954891204834, 'learning_rate': 1.062992125984252e-06, 'epoch': 11.81}


 80%|███████▉  | 9143/11430 [06:50<01:34, 24.12it/s]
 80%|████████  | 9144/11430 [06:52<01:34, 24.12it/s]

{'eval_loss': 0.09021012485027313, 'eval_f1': 0.8469286199351851, 'eval_precision': 0.8933628732970839, 'eval_recall': 0.8106138974992695, 'eval_runtime': 2.2036, 'eval_samples_per_second': 691.149, 'eval_steps_per_second': 86.677, 'epoch': 12.0}


 87%|████████▋ | 9905/11430 [07:24<01:02, 24.29it/s]
 87%|████████▋ | 9906/11430 [07:26<01:02, 24.29it/s]

{'eval_loss': 0.09016630053520203, 'eval_f1': 0.8565428191665587, 'eval_precision': 0.9090621355397379, 'eval_recall': 0.817834063896791, 'eval_runtime': 2.212, 'eval_samples_per_second': 688.523, 'eval_steps_per_second': 86.348, 'epoch': 13.0}


 88%|████████▊ | 10002/11430 [07:31<01:28, 16.07it/s]

{'loss': 0.0308, 'grad_norm': 0.07793806493282318, 'learning_rate': 6.25546806649169e-07, 'epoch': 13.12}


 93%|█████████▎| 10668/11430 [08:01<00:31, 23.82it/s]
 93%|█████████▎| 10668/11430 [08:03<00:31, 23.82it/s]

{'eval_loss': 0.08900251984596252, 'eval_f1': 0.8539300240591681, 'eval_precision': 0.9059860012768632, 'eval_recall': 0.8156475366309415, 'eval_runtime': 2.3877, 'eval_samples_per_second': 637.864, 'eval_steps_per_second': 79.995, 'epoch': 14.0}


 96%|█████████▋| 11004/11430 [08:19<00:17, 24.24it/s]

{'loss': 0.0299, 'grad_norm': 0.027864918112754822, 'learning_rate': 1.8810148731408576e-07, 'epoch': 14.44}


100%|██████████| 11430/11430 [08:38<00:00, 24.65it/s]
100%|██████████| 11430/11430 [08:41<00:00, 24.65it/s]

{'eval_loss': 0.08870829641819, 'eval_f1': 0.8564400063136052, 'eval_precision': 0.9070578393911131, 'eval_recall': 0.8182755554926392, 'eval_runtime': 2.2865, 'eval_samples_per_second': 666.071, 'eval_steps_per_second': 83.532, 'epoch': 15.0}


100%|██████████| 11430/11430 [08:42<00:00, 21.87it/s]


{'train_runtime': 522.5261, 'train_samples_per_second': 174.853, 'train_steps_per_second': 21.875, 'train_loss': 0.07460576914322553, 'epoch': 15.0}


100%|██████████| 191/191 [00:02<00:00, 85.29it/s]


Evaluation Metrics: {'eval_loss': 0.08663550019264221, 'eval_f1': 0.8578459840979088, 'eval_precision': 0.911010783883997, 'eval_recall': 0.818987767040764, 'eval_runtime': 2.2541, 'eval_samples_per_second': 675.652, 'eval_steps_per_second': 84.734, 'epoch': 15.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  java      summary   
13  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  java    Ownership   
14  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  java       Expand   
15  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  java        usage   
16  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  java      Pointer   
17  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  java  deprecation   
18  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  java     rational   

    precision    recall        f1  
12   0.951567  0.950213  0.950890  
13   0.964912  1.000000  0.982143  
14   0

0,1
eval/f1,▁▃▃▅▇▇▇█████████
eval/loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁
eval/precision,▁▃▄█████▇██▇████
eval/recall,▁▃▃▅▆▆▇▇████████
eval/runtime,▄▃▃▅▃▂▂▃▅▃▁▃▄█▅▅
eval/samples_per_second,▄▆▆▄▅▇▇▆▄▆█▅▅▁▃▄
eval/steps_per_second,▄▆▆▄▅▇▇▆▄▆█▅▅▁▃▄
train/epoch,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇████
train/grad_norm,▅▁▂▆█▁▃▃▆▁▁

0,1
eval/f1,0.85785
eval/loss,0.08664
eval/precision,0.91101
eval/recall,0.81899
eval/runtime,2.2541
eval/samples_per_second,675.652
eval/steps_per_second,84.734
total_flos,3025990766741760.0
train/epoch,15.0
train/global_step,11430.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 17388.09 examples/s]
  7%|▋         | 187/2835 [00:08<01:57, 22.59it/s]
  7%|▋         | 189/2835 [00:09<01:57, 22.59it/s]

{'eval_loss': 0.49137017130851746, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.5532, 'eval_samples_per_second': 681.477, 'eval_steps_per_second': 86.766, 'epoch': 1.0}


 13%|█▎        | 376/2835 [00:17<01:43, 23.71it/s]
 13%|█▎        | 378/2835 [00:18<01:43, 23.71it/s]

{'eval_loss': 0.4474322199821472, 'eval_f1': 0.09876543209876543, 'eval_precision': 0.1951219512195122, 'eval_recall': 0.06611570247933884, 'eval_runtime': 0.5789, 'eval_samples_per_second': 651.264, 'eval_steps_per_second': 82.92, 'epoch': 2.0}


 20%|█▉        | 565/2835 [00:27<01:37, 23.35it/s]
 20%|██        | 567/2835 [00:27<01:37, 23.35it/s]

{'eval_loss': 0.4081132411956787, 'eval_f1': 0.19903474133460972, 'eval_precision': 0.3823225806451613, 'eval_recall': 0.14037780401416763, 'eval_runtime': 0.6115, 'eval_samples_per_second': 616.553, 'eval_steps_per_second': 78.5, 'epoch': 3.0}


 27%|██▋       | 754/2835 [00:36<01:28, 23.59it/s]
 27%|██▋       | 756/2835 [00:37<01:28, 23.59it/s]

{'eval_loss': 0.3823331296443939, 'eval_f1': 0.2859602560682849, 'eval_precision': 0.5833800186741362, 'eval_recall': 0.20598445763451992, 'eval_runtime': 0.5796, 'eval_samples_per_second': 650.415, 'eval_steps_per_second': 82.811, 'epoch': 4.0}


 33%|███▎      | 943/2835 [00:46<01:19, 23.79it/s]
 33%|███▎      | 945/2835 [00:47<01:19, 23.79it/s]

{'eval_loss': 0.35885414481163025, 'eval_f1': 0.3679111409546193, 'eval_precision': 0.5488687086347545, 'eval_recall': 0.29374605780272034, 'eval_runtime': 0.5488, 'eval_samples_per_second': 687.006, 'eval_steps_per_second': 87.47, 'epoch': 5.0}


 35%|███▌      | 1003/2835 [00:50<01:17, 23.61it/s]

{'loss': 0.4279, 'grad_norm': 2.583125591278076, 'learning_rate': 3.2363315696649034e-06, 'epoch': 5.29}


 40%|███▉      | 1132/2835 [00:56<01:13, 23.30it/s]
 40%|████      | 1134/2835 [00:56<01:13, 23.30it/s]

{'eval_loss': 0.34949880838394165, 'eval_f1': 0.4418853962283015, 'eval_precision': 0.7377171717171718, 'eval_recall': 0.3659154078004584, 'eval_runtime': 0.5602, 'eval_samples_per_second': 672.971, 'eval_steps_per_second': 85.683, 'epoch': 6.0}


 47%|████▋     | 1321/2835 [01:05<01:05, 22.97it/s]
 47%|████▋     | 1323/2835 [01:06<01:05, 22.97it/s]

{'eval_loss': 0.33773472905158997, 'eval_f1': 0.454269412517282, 'eval_precision': 0.713704318936877, 'eval_recall': 0.3900011461817732, 'eval_runtime': 0.5722, 'eval_samples_per_second': 658.877, 'eval_steps_per_second': 83.889, 'epoch': 7.0}


 53%|█████▎    | 1510/2835 [01:15<00:54, 24.16it/s]
 53%|█████▎    | 1512/2835 [01:15<00:54, 24.16it/s]

{'eval_loss': 0.3284076452255249, 'eval_f1': 0.4752803799973611, 'eval_precision': 0.6496715713664866, 'eval_recall': 0.4080480946310174, 'eval_runtime': 0.5481, 'eval_samples_per_second': 687.799, 'eval_steps_per_second': 87.571, 'epoch': 8.0}


 60%|█████▉    | 1699/2835 [01:24<00:48, 23.37it/s]
 60%|██████    | 1701/2835 [01:25<00:48, 23.37it/s]

{'eval_loss': 0.32482221722602844, 'eval_f1': 0.49057527364718395, 'eval_precision': 0.6389354838709678, 'eval_recall': 0.42009655995456524, 'eval_runtime': 0.5839, 'eval_samples_per_second': 645.685, 'eval_steps_per_second': 82.209, 'epoch': 9.0}


 67%|██████▋   | 1888/2835 [01:34<00:39, 23.76it/s]
 67%|██████▋   | 1890/2835 [01:34<00:39, 23.76it/s]

{'eval_loss': 0.3216770887374878, 'eval_f1': 0.504430817667735, 'eval_precision': 0.6500701515609772, 'eval_recall': 0.4467389399904696, 'eval_runtime': 0.577, 'eval_samples_per_second': 653.333, 'eval_steps_per_second': 83.183, 'epoch': 10.0}


 71%|███████   | 2002/2835 [01:40<00:35, 23.21it/s]

{'loss': 0.2752, 'grad_norm': 3.6284539699554443, 'learning_rate': 1.472663139329806e-06, 'epoch': 10.58}


 73%|███████▎  | 2077/2835 [01:43<00:32, 23.10it/s]
 73%|███████▎  | 2079/2835 [01:44<00:32, 23.10it/s]

{'eval_loss': 0.3207869529724121, 'eval_f1': 0.5155886753028128, 'eval_precision': 0.6386438024960519, 'eval_recall': 0.46581194672997156, 'eval_runtime': 0.5898, 'eval_samples_per_second': 639.211, 'eval_steps_per_second': 81.385, 'epoch': 11.0}


 80%|███████▉  | 2266/2835 [01:53<00:23, 23.84it/s]
 80%|████████  | 2268/2835 [01:53<00:23, 23.84it/s]

{'eval_loss': 0.31594908237457275, 'eval_f1': 0.525080040252454, 'eval_precision': 0.6527698032961191, 'eval_recall': 0.4690109131037172, 'eval_runtime': 0.5685, 'eval_samples_per_second': 663.104, 'eval_steps_per_second': 84.427, 'epoch': 12.0}


 87%|████████▋ | 2455/2835 [02:02<00:16, 23.56it/s]
 87%|████████▋ | 2457/2835 [02:03<00:16, 23.56it/s]

{'eval_loss': 0.31481677293777466, 'eval_f1': 0.5161563717818897, 'eval_precision': 0.6235095822052343, 'eval_recall': 0.46238883032970424, 'eval_runtime': 0.5646, 'eval_samples_per_second': 667.703, 'eval_steps_per_second': 85.013, 'epoch': 13.0}


 93%|█████████▎| 2644/2835 [02:12<00:08, 23.48it/s]
 93%|█████████▎| 2646/2835 [02:12<00:08, 23.48it/s]

{'eval_loss': 0.3126277029514313, 'eval_f1': 0.5213171321316087, 'eval_precision': 0.642767623564762, 'eval_recall': 0.46843434148106844, 'eval_runtime': 0.5539, 'eval_samples_per_second': 680.59, 'eval_steps_per_second': 86.653, 'epoch': 14.0}


100%|█████████▉| 2833/2835 [02:21<00:00, 23.86it/s]
100%|██████████| 2835/2835 [02:23<00:00, 23.86it/s]

{'eval_loss': 0.31170654296875, 'eval_f1': 0.5240219011803954, 'eval_precision': 0.6432854025693832, 'eval_recall': 0.47230618570160504, 'eval_runtime': 0.5398, 'eval_samples_per_second': 698.451, 'eval_steps_per_second': 88.927, 'epoch': 15.0}


100%|██████████| 2835/2835 [02:24<00:00, 19.67it/s]


{'train_runtime': 144.1668, 'train_samples_per_second': 156.798, 'train_steps_per_second': 19.665, 'train_loss': 0.3130357261267499, 'epoch': 15.0}


100%|██████████| 48/48 [00:00<00:00, 89.78it/s]


Evaluation Metrics: {'eval_loss': 0.31594908237457275, 'eval_f1': 0.525080040252454, 'eval_precision': 0.6527698032961191, 'eval_recall': 0.4690109131037172, 'eval_runtime': 0.5479, 'eval_samples_per_second': 688.071, 'eval_steps_per_second': 87.606, 'epoch': 15.0}
CHECK SCORE                                                 model     lan  \
14  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  python   
15  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  python   
16  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  python   
17  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  python   
18  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  python   

                 cat  precision    recall        f1  
14             Usage   0.863158  0.677686  0.759259  
15        Parameters   0.836364  0.821429  0.828829  
16  DevelopmentNotes   0.000000  0.000000  0.000000  
17            Expand   0.722222  0.188406  0.298851  
18           Summary   0.842105  0.657534  0.738462  
Scores:

0,1
eval/f1,▁▂▄▅▆▇▇▇████████
eval/loss,█▆▅▄▃▂▂▂▂▁▁▁▁▁▁▁
eval/precision,▁▃▅▇▆██▇▇▇▇▇▇▇▇▇
eval/recall,▁▂▃▄▅▆▇▇▇███████
eval/runtime,▂▅█▅▂▃▄▂▅▅▆▄▃▂▁▂
eval/samples_per_second,▇▄▁▄▇▆▅▇▃▄▃▅▅▆█▇
eval/steps_per_second,▇▄▁▄▇▆▅▇▃▄▃▅▅▆█▇
train/epoch,▁▁▂▃▃▃▃▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▃▃▃▃▄▅▅▅▆▆▇▇▇███
train/grad_norm,▁█

0,1
eval/f1,0.52508
eval/loss,0.31595
eval/precision,0.65277
eval/recall,0.46901
eval/runtime,0.5479
eval/samples_per_second,688.071
eval/steps_per_second,87.606
total_flos,748646437651200.0
train/epoch,15.0
train/global_step,2835.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1298/1298 [00:00<00:00, 14197.59 examples/s]
  7%|▋         | 129/1950 [00:05<01:20, 22.59it/s]
  7%|▋         | 130/1950 [00:06<01:20, 22.59it/s]

{'eval_loss': 0.43571698665618896, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.4251, 'eval_samples_per_second': 611.651, 'eval_steps_per_second': 77.633, 'epoch': 1.0}


 13%|█▎        | 258/1950 [00:12<01:09, 24.22it/s]
 13%|█▎        | 260/1950 [00:13<01:09, 24.22it/s]

{'eval_loss': 0.39389365911483765, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 0.3862, 'eval_samples_per_second': 673.206, 'eval_steps_per_second': 85.445, 'epoch': 2.0}


 20%|██        | 390/1950 [00:19<01:07, 23.08it/s]
 20%|██        | 390/1950 [00:19<01:07, 23.08it/s]

{'eval_loss': 0.3559388518333435, 'eval_f1': 0.09463148316651501, 'eval_precision': 0.1326530612244898, 'eval_recall': 0.07355021216407355, 'eval_runtime': 0.3877, 'eval_samples_per_second': 670.694, 'eval_steps_per_second': 85.127, 'epoch': 3.0}


 27%|██▋       | 519/1950 [00:26<00:59, 24.11it/s]
 27%|██▋       | 520/1950 [00:26<00:59, 24.11it/s]

{'eval_loss': 0.3240354657173157, 'eval_f1': 0.11187607573149741, 'eval_precision': 0.14285714285714285, 'eval_recall': 0.09193776520509193, 'eval_runtime': 0.3795, 'eval_samples_per_second': 685.095, 'eval_steps_per_second': 86.954, 'epoch': 4.0}


 33%|███▎      | 648/1950 [00:33<00:59, 21.76it/s]
 33%|███▎      | 650/1950 [00:33<00:59, 21.76it/s]

{'eval_loss': 0.3001062870025635, 'eval_f1': 0.1238095238095238, 'eval_precision': 0.1410488245931284, 'eval_recall': 0.11032531824611033, 'eval_runtime': 0.4191, 'eval_samples_per_second': 620.419, 'eval_steps_per_second': 78.746, 'epoch': 5.0}


 40%|████      | 780/1950 [00:40<00:48, 24.16it/s]
 40%|████      | 780/1950 [00:40<00:48, 24.16it/s]

{'eval_loss': 0.2820037305355072, 'eval_f1': 0.1324166030048383, 'eval_precision': 0.2807308970099668, 'eval_recall': 0.12025459688826025, 'eval_runtime': 0.3911, 'eval_samples_per_second': 664.841, 'eval_steps_per_second': 84.384, 'epoch': 6.0}


 47%|████▋     | 908/1950 [00:48<00:44, 23.23it/s]
 47%|████▋     | 910/1950 [00:49<00:44, 23.23it/s]

{'eval_loss': 0.2672044038772583, 'eval_f1': 0.2528316781257958, 'eval_precision': 0.5378737541528239, 'eval_recall': 0.19363089477583634, 'eval_runtime': 0.4088, 'eval_samples_per_second': 636.019, 'eval_steps_per_second': 80.726, 'epoch': 7.0}


 51%|█████▏    | 1004/1950 [00:54<00:42, 22.48it/s]

{'loss': 0.3407, 'grad_norm': 2.112144947052002, 'learning_rate': 2.435897435897436e-06, 'epoch': 7.69}


 53%|█████▎    | 1040/1950 [00:55<00:39, 23.13it/s]
 53%|█████▎    | 1040/1950 [00:56<00:39, 23.13it/s]

{'eval_loss': 0.257151335477829, 'eval_f1': 0.3202944789775749, 'eval_precision': 0.6649350649350649, 'eval_recall': 0.23921867324424695, 'eval_runtime': 0.4084, 'eval_samples_per_second': 636.584, 'eval_steps_per_second': 80.797, 'epoch': 8.0}


 60%|█████▉    | 1169/1950 [01:02<00:33, 23.66it/s]
 60%|██████    | 1170/1950 [01:03<00:32, 23.66it/s]

{'eval_loss': 0.24903267621994019, 'eval_f1': 0.3801034384028178, 'eval_precision': 0.6741041244083841, 'eval_recall': 0.2844623807856938, 'eval_runtime': 0.3981, 'eval_samples_per_second': 653.149, 'eval_steps_per_second': 82.9, 'epoch': 9.0}


 67%|██████▋   | 1298/1950 [01:10<00:28, 23.17it/s]
 67%|██████▋   | 1300/1950 [01:10<00:28, 23.17it/s]

{'eval_loss': 0.2434477061033249, 'eval_f1': 0.38573172858887145, 'eval_precision': 0.6581433637284702, 'eval_recall': 0.29493877188984985, 'eval_runtime': 0.3776, 'eval_samples_per_second': 688.504, 'eval_steps_per_second': 87.387, 'epoch': 10.0}


 73%|███████▎  | 1430/1950 [01:17<00:22, 23.55it/s]
 73%|███████▎  | 1430/1950 [01:17<00:22, 23.55it/s]

{'eval_loss': 0.23839882016181946, 'eval_f1': 0.406477120762835, 'eval_precision': 0.643243661100804, 'eval_recall': 0.3121769559774727, 'eval_runtime': 0.3975, 'eval_samples_per_second': 654.049, 'eval_steps_per_second': 83.014, 'epoch': 11.0}


 80%|███████▉  | 1559/1950 [01:23<00:17, 22.73it/s]
 80%|████████  | 1560/1950 [01:24<00:17, 22.73it/s]

{'eval_loss': 0.23368829488754272, 'eval_f1': 0.4016502963198059, 'eval_precision': 0.6386363636363637, 'eval_recall': 0.3085932246125863, 'eval_runtime': 0.4219, 'eval_samples_per_second': 616.265, 'eval_steps_per_second': 78.218, 'epoch': 12.0}


 87%|████████▋ | 1688/1950 [01:31<00:11, 21.90it/s]
 87%|████████▋ | 1690/1950 [01:31<00:11, 21.90it/s]

{'eval_loss': 0.231072336435318, 'eval_f1': 0.4318851550976603, 'eval_precision': 0.6418790604697652, 'eval_recall': 0.3379299402618919, 'eval_runtime': 0.4088, 'eval_samples_per_second': 636.025, 'eval_steps_per_second': 80.726, 'epoch': 13.0}


 93%|█████████▎| 1820/1950 [01:38<00:05, 22.51it/s]
 93%|█████████▎| 1820/1950 [01:38<00:05, 22.51it/s]

{'eval_loss': 0.22941967844963074, 'eval_f1': 0.444186369478282, 'eval_precision': 0.6459593280282936, 'eval_recall': 0.35008510019820677, 'eval_runtime': 0.421, 'eval_samples_per_second': 617.557, 'eval_steps_per_second': 78.382, 'epoch': 14.0}


100%|█████████▉| 1949/1950 [01:45<00:00, 22.90it/s]
100%|██████████| 1950/1950 [01:46<00:00, 22.90it/s]

{'eval_loss': 0.22883041203022003, 'eval_f1': 0.43851759246433936, 'eval_precision': 0.6415204238848596, 'eval_recall': 0.34578524164092256, 'eval_runtime': 0.407, 'eval_samples_per_second': 638.894, 'eval_steps_per_second': 81.09, 'epoch': 15.0}


100%|██████████| 1950/1950 [01:47<00:00, 18.11it/s]


{'train_runtime': 107.7042, 'train_samples_per_second': 144.563, 'train_steps_per_second': 18.105, 'train_loss': 0.27361298389923877, 'epoch': 15.0}


100%|██████████| 33/33 [00:00<00:00, 79.49it/s]


Evaluation Metrics: {'eval_loss': 0.22941967844963074, 'eval_f1': 0.444186369478282, 'eval_precision': 0.6459593280282936, 'eval_recall': 0.35008510019820677, 'eval_runtime': 0.429, 'eval_samples_per_second': 605.994, 'eval_steps_per_second': 76.915, 'epoch': 15.0}
CHECK SCORE                                                 model    lan  \
12  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
13  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
14  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
15  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
16  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
17  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   
18  lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...  pharo   

                        cat  precision    recall        f1  
12  Keyimplementationpoints   1.000000  0.380952  0.551724  
13                  Example   0.954023  0.821782  0.882979  
14         Responsibilitie

0,1
eval/f1,▁▁▂▃▃▃▅▆▇▇▇▇████
eval/loss,█▇▅▄▃▃▂▂▂▁▁▁▁▁▁▁
eval/precision,▁▁▂▂▂▄▇█████████
eval/recall,▁▁▂▃▃▃▅▆▇▇▇▇████
eval/runtime,▇▂▂▁▇▃▅▅▄▁▄▇▅▇▅█
eval/samples_per_second,▁▇▆█▂▆▄▄▅█▅▂▄▂▄▁
eval/steps_per_second,▁▇▆█▂▆▄▄▅█▅▂▄▂▄▁
train/epoch,▁▁▂▃▃▃▄▄▅▅▅▆▇▇▇███
train/global_step,▁▁▂▃▃▃▄▄▅▅▅▆▇▇▇███
train/grad_norm,▁

0,1
eval/f1,0.44419
eval/loss,0.22942
eval/precision,0.64596
eval/recall,0.35009
eval/runtime,0.429
eval/samples_per_second,605.994
eval/steps_per_second,76.915
total_flos,515675326855680.0
train/epoch,15.0
train/global_step,1950.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 7614/7614 [00:00<00:00, 18295.48 examples/s]
  3%|▎         | 1003/30460 [00:37<19:03, 25.75it/s]

{'loss': 0.162, 'grad_norm': 0.09967369586229324, 'learning_rate': 4.8358502954694684e-05, 'epoch': 0.66}


  5%|▍         | 1522/30460 [00:56<16:49, 28.68it/s]
  5%|▌         | 1523/30460 [01:00<16:49, 28.68it/s]

{'eval_loss': 0.12026465684175491, 'eval_f1': 0.6593426660645493, 'eval_precision': 0.6649457112176049, 'eval_recall': 0.6543683832340257, 'eval_runtime': 3.7419, 'eval_samples_per_second': 407.012, 'eval_steps_per_second': 101.82, 'epoch': 1.0}


  7%|▋         | 2003/30460 [01:17<16:06, 29.43it/s]  

{'loss': 0.1022, 'grad_norm': 1.0586469173431396, 'learning_rate': 4.6717005909389365e-05, 'epoch': 1.31}


 10%|▉         | 3005/30460 [01:52<16:00, 28.60it/s]

{'loss': 0.0887, 'grad_norm': 0.48389390110969543, 'learning_rate': 4.507550886408405e-05, 'epoch': 1.97}


 10%|█         | 3046/30460 [01:54<17:58, 25.42it/s]
 10%|█         | 3046/30460 [01:58<17:58, 25.42it/s]

{'eval_loss': 0.1057291030883789, 'eval_f1': 0.8007744246777003, 'eval_precision': 0.8693133719427085, 'eval_recall': 0.7594187086947121, 'eval_runtime': 3.8667, 'eval_samples_per_second': 393.873, 'eval_steps_per_second': 98.533, 'epoch': 2.0}


 13%|█▎        | 4004/30460 [02:32<14:51, 29.69it/s]  

{'loss': 0.059, 'grad_norm': 0.04167123883962631, 'learning_rate': 4.343401181877873e-05, 'epoch': 2.63}


 15%|█▍        | 4566/30460 [02:53<14:17, 30.19it/s]
 15%|█▌        | 4569/30460 [02:57<14:17, 30.19it/s]

{'eval_loss': 0.10679897665977478, 'eval_f1': 0.8297808316994164, 'eval_precision': 0.8785908562315667, 'eval_recall': 0.7951668223037184, 'eval_runtime': 3.7961, 'eval_samples_per_second': 401.203, 'eval_steps_per_second': 100.367, 'epoch': 3.0}


 16%|█▋        | 5005/30460 [03:14<14:56, 28.39it/s]  

{'loss': 0.0534, 'grad_norm': 1.4389077425003052, 'learning_rate': 4.179251477347341e-05, 'epoch': 3.28}


 20%|█▉        | 6005/30460 [03:50<14:21, 28.38it/s]

{'loss': 0.0402, 'grad_norm': 21.928180694580078, 'learning_rate': 4.015101772816809e-05, 'epoch': 3.94}


 20%|█▉        | 6090/30460 [03:53<14:01, 28.96it/s]
 20%|██        | 6092/30460 [03:57<14:01, 28.96it/s]

{'eval_loss': 0.12087718397378922, 'eval_f1': 0.8286735073552063, 'eval_precision': 0.8962576750950628, 'eval_recall': 0.785491506544138, 'eval_runtime': 3.9341, 'eval_samples_per_second': 387.127, 'eval_steps_per_second': 96.845, 'epoch': 4.0}


 23%|██▎       | 7002/30460 [04:31<13:36, 28.71it/s]  

{'loss': 0.0286, 'grad_norm': 0.009530054405331612, 'learning_rate': 3.850952068286277e-05, 'epoch': 4.6}


 25%|██▍       | 7614/30460 [04:53<12:57, 29.40it/s]
 25%|██▌       | 7615/30460 [04:56<12:57, 29.40it/s]

{'eval_loss': 0.11069469153881073, 'eval_f1': 0.8378092456979493, 'eval_precision': 0.8370925946833809, 'eval_recall': 0.840652727580405, 'eval_runtime': 3.5026, 'eval_samples_per_second': 434.822, 'eval_steps_per_second': 108.777, 'epoch': 5.0}


 26%|██▋       | 8003/30460 [05:11<13:37, 27.47it/s]  

{'loss': 0.0308, 'grad_norm': 0.020569834858179092, 'learning_rate': 3.6868023637557454e-05, 'epoch': 5.25}


 30%|██▉       | 9003/30460 [05:46<12:01, 29.74it/s]

{'loss': 0.0215, 'grad_norm': 2.674649238586426, 'learning_rate': 3.5226526592252135e-05, 'epoch': 5.91}


 30%|██▉       | 9136/30460 [05:51<14:00, 25.39it/s]
 30%|███       | 9138/30460 [05:55<13:59, 25.39it/s]

{'eval_loss': 0.14053061604499817, 'eval_f1': 0.8253107281703749, 'eval_precision': 0.9031806407922229, 'eval_recall': 0.7789267845130007, 'eval_runtime': 3.6688, 'eval_samples_per_second': 415.117, 'eval_steps_per_second': 103.847, 'epoch': 6.0}


 33%|███▎      | 10006/30460 [06:26<11:38, 29.27it/s] 

{'loss': 0.017, 'grad_norm': 0.0019758816342800856, 'learning_rate': 3.3585029546946817e-05, 'epoch': 6.57}


 35%|███▌      | 10661/30460 [06:49<11:26, 28.83it/s]
 35%|███▌      | 10661/30460 [06:53<11:26, 28.83it/s]

{'eval_loss': 0.12640200555324554, 'eval_f1': 0.8655893329298842, 'eval_precision': 0.8838290535789132, 'eval_recall': 0.8571791556850111, 'eval_runtime': 3.561, 'eval_samples_per_second': 427.692, 'eval_steps_per_second': 106.993, 'epoch': 7.0}


 36%|███▌      | 11004/30460 [07:05<10:50, 29.89it/s]  

{'loss': 0.0162, 'grad_norm': 0.00643825251609087, 'learning_rate': 3.19435325016415e-05, 'epoch': 7.22}


 39%|███▉      | 12003/30460 [07:40<10:34, 29.11it/s]

{'loss': 0.0125, 'grad_norm': 0.0009932167595252395, 'learning_rate': 3.030203545633618e-05, 'epoch': 7.88}


 40%|███▉      | 12182/30460 [07:46<10:24, 29.26it/s]
 40%|████      | 12184/30460 [07:50<10:24, 29.26it/s]

{'eval_loss': 0.17169050872325897, 'eval_f1': 0.8271424152411131, 'eval_precision': 0.8658290952344385, 'eval_recall': 0.8044948229533734, 'eval_runtime': 3.5149, 'eval_samples_per_second': 433.294, 'eval_steps_per_second': 108.395, 'epoch': 8.0}


 43%|████▎     | 13005/30460 [08:20<09:52, 29.46it/s]  

{'loss': 0.0112, 'grad_norm': 0.004404356703162193, 'learning_rate': 2.8660538411030864e-05, 'epoch': 8.54}


 45%|████▍     | 13705/30460 [08:44<09:47, 28.51it/s]
 45%|████▌     | 13707/30460 [08:49<09:47, 28.51it/s]

{'eval_loss': 0.15031658113002777, 'eval_f1': 0.8473257195499445, 'eval_precision': 0.8599160654946861, 'eval_recall': 0.8375576350481645, 'eval_runtime': 4.2576, 'eval_samples_per_second': 357.71, 'eval_steps_per_second': 89.486, 'epoch': 9.0}


 46%|████▌     | 14003/30460 [09:00<09:28, 28.94it/s]  

{'loss': 0.011, 'grad_norm': 0.002885515335947275, 'learning_rate': 2.7019041365725546e-05, 'epoch': 9.19}


 49%|████▉     | 15004/30460 [09:35<08:36, 29.91it/s]

{'loss': 0.009, 'grad_norm': 0.08132471889257431, 'learning_rate': 2.5377544320420227e-05, 'epoch': 9.85}


 50%|█████     | 15230/30460 [09:47<08:41, 29.18it/s]

{'eval_loss': 0.17682351171970367, 'eval_f1': 0.8339190672161994, 'eval_precision': 0.879352390475017, 'eval_recall': 0.8074641952629761, 'eval_runtime': 3.5084, 'eval_samples_per_second': 434.096, 'eval_steps_per_second': 108.595, 'epoch': 10.0}


 53%|█████▎    | 16005/30460 [10:15<08:10, 29.46it/s]  

{'loss': 0.0063, 'grad_norm': 0.0023422956001013517, 'learning_rate': 2.3736047275114905e-05, 'epoch': 10.51}


 55%|█████▍    | 16751/30460 [10:41<07:42, 29.67it/s]
 55%|█████▌    | 16753/30460 [10:45<07:41, 29.67it/s]

{'eval_loss': 0.18122583627700806, 'eval_f1': 0.8512443898566336, 'eval_precision': 0.8559551155129242, 'eval_recall': 0.8482066552062514, 'eval_runtime': 3.73, 'eval_samples_per_second': 408.31, 'eval_steps_per_second': 102.145, 'epoch': 11.0}


 56%|█████▌    | 17002/30460 [10:54<07:54, 28.35it/s]  

{'loss': 0.0056, 'grad_norm': 0.00034241756657138467, 'learning_rate': 2.2094550229809586e-05, 'epoch': 11.16}


 59%|█████▉    | 18002/30460 [11:29<06:57, 29.84it/s]

{'loss': 0.0047, 'grad_norm': 0.001601303811185062, 'learning_rate': 2.045305318450427e-05, 'epoch': 11.82}


 60%|██████    | 18276/30460 [11:38<06:50, 29.67it/s]
 60%|██████    | 18276/30460 [11:42<06:50, 29.67it/s]

{'eval_loss': 0.1996276080608368, 'eval_f1': 0.852535260854997, 'eval_precision': 0.8568432281281453, 'eval_recall': 0.8501244105093884, 'eval_runtime': 3.6266, 'eval_samples_per_second': 419.949, 'eval_steps_per_second': 105.056, 'epoch': 12.0}


 62%|██████▏   | 19005/30460 [12:08<06:31, 29.24it/s]  

{'loss': 0.0044, 'grad_norm': 0.0004328441573306918, 'learning_rate': 1.881155613919895e-05, 'epoch': 12.48}


 65%|██████▍   | 19798/30460 [12:35<05:52, 30.24it/s]
 65%|██████▌   | 19799/30460 [12:39<05:52, 30.24it/s]

{'eval_loss': 0.18219459056854248, 'eval_f1': 0.8497933252589791, 'eval_precision': 0.8612361795768189, 'eval_recall': 0.8405615842725239, 'eval_runtime': 3.3789, 'eval_samples_per_second': 450.744, 'eval_steps_per_second': 112.76, 'epoch': 13.0}


 66%|██████▌   | 20005/30460 [12:47<06:25, 27.15it/s]  

{'loss': 0.0056, 'grad_norm': 0.0021067941561341286, 'learning_rate': 1.717005909389363e-05, 'epoch': 13.13}


 69%|██████▉   | 21003/30460 [13:21<05:35, 28.21it/s]

{'loss': 0.0027, 'grad_norm': 0.0004402909253258258, 'learning_rate': 1.5528562048588312e-05, 'epoch': 13.79}


 70%|███████   | 21322/30460 [13:32<05:20, 28.55it/s]
 70%|███████   | 21322/30460 [13:36<05:20, 28.55it/s]

{'eval_loss': 0.21277838945388794, 'eval_f1': 0.840164768917701, 'eval_precision': 0.8762145747412697, 'eval_recall': 0.8136349117061767, 'eval_runtime': 3.9252, 'eval_samples_per_second': 388.01, 'eval_steps_per_second': 97.066, 'epoch': 14.0}


 72%|███████▏  | 22005/30460 [14:00<04:33, 30.94it/s]  

{'loss': 0.0011, 'grad_norm': 0.029633864760398865, 'learning_rate': 1.3887065003282995e-05, 'epoch': 14.45}


 75%|███████▍  | 22844/30460 [14:29<04:38, 27.39it/s]
 75%|███████▌  | 22845/30460 [14:34<04:38, 27.39it/s]

{'eval_loss': 0.20980305969715118, 'eval_f1': 0.8418560873692156, 'eval_precision': 0.8747183482863669, 'eval_recall': 0.8197425953978461, 'eval_runtime': 4.3061, 'eval_samples_per_second': 353.683, 'eval_steps_per_second': 88.479, 'epoch': 15.0}


 76%|███████▌  | 23005/30460 [14:40<04:04, 30.47it/s]  

{'loss': 0.0023, 'grad_norm': 0.0002356151962885633, 'learning_rate': 1.2245567957977677e-05, 'epoch': 15.1}


 79%|███████▉  | 24004/30460 [15:14<03:27, 31.16it/s]

{'loss': 0.002, 'grad_norm': 0.0013431791448965669, 'learning_rate': 1.0604070912672358e-05, 'epoch': 15.76}


 80%|████████  | 24368/30460 [15:27<03:40, 27.65it/s]
 80%|████████  | 24368/30460 [15:31<03:40, 27.65it/s]

{'eval_loss': 0.2089308500289917, 'eval_f1': 0.8591022274898332, 'eval_precision': 0.8837791521903532, 'eval_recall': 0.837938995802965, 'eval_runtime': 3.8486, 'eval_samples_per_second': 395.729, 'eval_steps_per_second': 98.997, 'epoch': 16.0}


 82%|████████▏ | 25004/30460 [15:54<03:07, 29.06it/s]

{'loss': 0.0017, 'grad_norm': 0.00041336490539833903, 'learning_rate': 8.96257386736704e-06, 'epoch': 16.41}


 85%|████████▍ | 25888/30460 [16:24<02:31, 30.15it/s]
 85%|████████▌ | 25891/30460 [16:28<02:31, 30.15it/s]

{'eval_loss': 0.2099430412054062, 'eval_f1': 0.8487752951742509, 'eval_precision': 0.8560438013483816, 'eval_recall': 0.8446270184189152, 'eval_runtime': 3.3284, 'eval_samples_per_second': 457.575, 'eval_steps_per_second': 114.469, 'epoch': 17.0}


 85%|████████▌ | 26003/30460 [16:32<02:36, 28.49it/s]

{'loss': 0.0018, 'grad_norm': 0.0007414313149638474, 'learning_rate': 7.321076822061721e-06, 'epoch': 17.07}


 89%|████████▊ | 27002/30460 [17:07<02:37, 21.98it/s]

{'loss': 0.0011, 'grad_norm': 0.000889127841219306, 'learning_rate': 5.679579776756402e-06, 'epoch': 17.73}


 90%|████████▉ | 27413/30460 [17:23<01:49, 27.89it/s]
 90%|█████████ | 27414/30460 [17:26<01:49, 27.89it/s]

{'eval_loss': 0.2177903652191162, 'eval_f1': 0.8431084684812495, 'eval_precision': 0.8809778074334403, 'eval_recall': 0.8149425285744344, 'eval_runtime': 3.763, 'eval_samples_per_second': 404.727, 'eval_steps_per_second': 101.248, 'epoch': 18.0}


 92%|█████████▏| 28004/30460 [17:48<01:25, 28.70it/s]

{'loss': 0.0005, 'grad_norm': 0.0012387237511575222, 'learning_rate': 4.038082731451084e-06, 'epoch': 18.38}


 95%|█████████▍| 28936/30460 [18:22<00:54, 27.96it/s]
 95%|█████████▌| 28937/30460 [18:26<00:54, 27.96it/s]

{'eval_loss': 0.2125779390335083, 'eval_f1': 0.8481972707037396, 'eval_precision': 0.8666383425555677, 'eval_recall': 0.8339329489397753, 'eval_runtime': 3.6361, 'eval_samples_per_second': 418.85, 'eval_steps_per_second': 104.781, 'epoch': 19.0}


 95%|█████████▌| 29004/30460 [18:30<00:51, 28.11it/s]

{'loss': 0.0013, 'grad_norm': 0.00013574033800978214, 'learning_rate': 2.396585686145765e-06, 'epoch': 19.04}


 98%|█████████▊| 30003/30460 [19:06<00:15, 29.59it/s]

{'loss': 0.0008, 'grad_norm': 0.0002661168691702187, 'learning_rate': 7.550886408404465e-07, 'epoch': 19.7}


100%|█████████▉| 30459/30460 [19:22<00:00, 26.29it/s]
100%|██████████| 30460/30460 [19:28<00:00, 26.29it/s]

{'eval_loss': 0.2127913236618042, 'eval_f1': 0.8499278331897072, 'eval_precision': 0.869241437219986, 'eval_recall': 0.8346888068384904, 'eval_runtime': 3.9377, 'eval_samples_per_second': 386.771, 'eval_steps_per_second': 96.756, 'epoch': 20.0}


100%|██████████| 30460/30460 [19:29<00:00, 26.04it/s]


{'train_runtime': 1169.7114, 'train_samples_per_second': 104.145, 'train_steps_per_second': 26.041, 'train_loss': 0.02315477223642469, 'epoch': 20.0}


100%|██████████| 381/381 [00:04<00:00, 94.46it/s] 


Evaluation Metrics: {'eval_loss': 0.12640200555324554, 'eval_f1': 0.8655893329298842, 'eval_precision': 0.8838290535789132, 'eval_recall': 0.8571791556850111, 'eval_runtime': 4.0494, 'eval_samples_per_second': 376.108, 'eval_steps_per_second': 94.089, 'epoch': 20.0}
CHECK SCORE                                                 model   lan          cat  \
12  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0...  java      summary   
13  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0...  java    Ownership   
14  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0...  java       Expand   
15  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0...  java        usage   
16  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0...  java      Pointer   
17  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0...  java  deprecation   
18  lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0...  java     rational   

    precision    recall        f1  
12   0.944126  0.937411  0.940757  
13   0.964286  0.981818  0.972973  
14  

0,1
eval/f1,▁▆▇▇▇▇█▇▇▇██▇▇▇█▇▇▇▇█
eval/loss,▂▁▁▂▁▃▂▅▄▅▆▇▆██▇████▂
eval/precision,▁▇▇█▆█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
eval/recall,▁▅▆▆▇▅█▆▇▆██▇▆▇▇█▇▇▇█
eval/runtime,▄▅▄▅▂▃▃▂█▂▄▃▁▅█▅▁▄▃▅▆
eval/samples_per_second,▅▄▄▃▆▅▆▆▁▆▅▅█▃▁▄█▄▅▃▃
eval/steps_per_second,▅▄▄▃▆▅▆▆▁▆▅▅█▃▁▄█▄▅▃▃
train/epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▁▁▁▁▁█▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f1,0.86559
eval/loss,0.1264
eval/precision,0.88383
eval/recall,0.85718
eval/runtime,4.0494
eval/samples_per_second,376.108
eval/steps_per_second,94.089
total_flos,4034654355655680.0
train/epoch,20.0
train/global_step,30460.0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1884/1884 [00:00<00:00, 16840.82 examples/s]
  5%|▌         | 377/7540 [00:14<04:46, 24.97it/s]
  5%|▌         | 377/7540 [00:15<04:46, 24.97it/s]

{'eval_loss': 0.34614893794059753, 'eval_f1': 0.32863612075874965, 'eval_precision': 0.56889338731444, 'eval_recall': 0.26331916837832153, 'eval_runtime': 0.9664, 'eval_samples_per_second': 390.127, 'eval_steps_per_second': 98.308, 'epoch': 1.0}


RuntimeError: [enforce fail at ..\caffe2\serialize\inline_container.cc:337] . unexpected pos 474380672 vs 474380560

  5%|▌         | 377/7540 [00:27<04:46, 24.97it/s]

## Viewing Model Performance Scores

The first cell shows an error due to an accidental run with a new kernel, but will normally show show the results for the best model for each language, along with their label scores.

In [None]:
scores 

NameError: name 'scores' is not defined

Below we calculate the average f1, average precision, and average recal scores for the best model in each language.

In [17]:
best_f1_scores = []
best_f1_java_row = scores[scores['lan'] == 'java'].loc[scores[scores['lan'] == 'java']['f1'].idxmax()]
best_f1_scores.append(best_f1_java_row)

best_f1_python_row = scores[scores['lan'] == 'python'].loc[scores[scores['lan'] == 'python']['f1'].idxmax()]
best_f1_scores.append(best_f1_python_row)

best_f1_pharo_row = scores[scores['lan'] == 'pharo'].loc[scores[scores['lan'] == 'pharo']['f1'].idxmax()]
best_f1_scores.append(best_f1_pharo_row)
best_f1_scores

[model        lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0...
 lan                                                       java
 cat                                                  Ownership
 precision                                             0.964286
 recall                                                0.981818
 f1                                                    0.972973
 Name: 8, dtype: object,
 model        lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0...
 lan                                                     python
 cat                                                 Parameters
 precision                                             0.910256
 recall                                                0.633929
 f1                                                    0.747368
 Name: 15, dtype: object,
 model        lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0...
 lan                                                      pharo
 cat                                                 

gs_metric_dicts will show the avg F1 score for every language, per trained model.

In [18]:
gs_metrics_dict

{'java_lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01': {'lang': 'java',
  'avg_f1': 0.8614530647193451},
 'python_lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01': {'lang': 'python',
  'avg_f1': 0.7499024741736606},
 'pharo_lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01': {'lang': 'pharo',
  'avg_f1': 0.6760992595162517},
 'java_lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01': {'lang': 'java',
  'avg_f1': 0.847175679083368},
 'python_lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01': {'lang': 'python',
  'avg_f1': 0.5185931839398215},
 'pharo_lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01': {'lang': 'pharo',
  'avg_f1': 0.3464381423565097},
 'java_lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01': {'lang': 'java',
  'avg_f1': 0.8612716333442287},
 'python_lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01': {'lang': 'python',
  'avg_f1': 0.7202505734514754},
 'pharo_lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01': {'lang': 'pharo',
  'avg_f1': 0.6646768174062629},
 '

final_model_stats shows the average F1 score per trained model architecture.

In [19]:
final_model_stats

{'lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.01': 0.763809402132499,
 'lr-5e-06_epoch-10_batchSize-4_weightsOfDecay-0.01': 0.5762243510409606,
 'lr-5e-05_epoch-10_batchSize-8_weightsOfDecay-0.01': 0.7517311590795167,
 'lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.01': 0.47797133051203045,
 'lr-5e-05_epoch-15_batchSize-4_weightsOfDecay-0.01': 0.7523858683081371,
 'lr-5e-06_epoch-15_batchSize-4_weightsOfDecay-0.01': 0.6658800533871522,
 'lr-5e-05_epoch-15_batchSize-8_weightsOfDecay-0.01': 0.7681797815506027,
 'lr-5e-06_epoch-15_batchSize-8_weightsOfDecay-0.01': 0.6173060758906344,
 'lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01': 0.7769692605356904,
 'lr-5e-06_epoch-20_batchSize-4_weightsOfDecay-0.01': 0.7154331131665084,
 'lr-5e-05_epoch-20_batchSize-8_weightsOfDecay-0.01': 0.7762857137735598,
 'lr-5e-06_epoch-20_batchSize-8_weightsOfDecay-0.01': 0.6527187327721913,
 'lr-5e-05_epoch-10_batchSize-4_weightsOfDecay-0.001': 0.7563265418690509,
 'lr-5e-06_epoch-10_batchSize-4_weigh

In [25]:
best_f1_score = max(final_model_stats.values())
best_key = [key for key, value in final_model_stats.items() if value == best_f1_score]
(best_key, best_f1_score)

(['lr-5e-05_epoch-20_batchSize-4_weightsOfDecay-0.01'], 0.7769692605356904)

In [26]:
best_f1_score = min(final_model_stats.values())
best_key = [key for key, value in final_model_stats.items() if value == best_f1_score]
(best_key, best_f1_score)

(['lr-5e-06_epoch-10_batchSize-8_weightsOfDecay-0.001'], 0.4777266917629609)