### Fine-Tune Language Models

- Joel Stremmel
- 01-07-24

##### About

Fine-Tune pretrained language models on the formatted data using K-Fold Cross-Validation and save the scores.

##### Set Parameters
Pick a model size and provide a list of models and parameters to train within that size.

In [1]:
outcome = 'cohesion' # 'Alliance'
params = {
    "env": {"colab": False, "require_high_ram": True},
    "data": {"add_summaries": False},
    "training": {
        "lr": 0.000005,
        "weight_decay": 0.01,
        "adam_beta1": 0.9,
        "adam_beta2": 0.999,
        "adam_epsilon": 0.00000001,
        "warmup_steps": 50,
        "num_workers": 2,
        "epochs": 1000,
        "early_stopping_patience": 10,
        "logging_strategy": "epoch"
    },
    "evaluation": {
        "evaluation_strategy": "epoch",
        "save_strategy": "epoch",
        "save_total_limit": 1,
        "fp16_full_eval": False,
        "eval_accumulation_steps": 100,
    },
    "models": {
        "mental_roberta_base": {
            "path": "./models/mental-roberta-base",
            "max_seq_len": 512,
            "fp16": True,
            "batch_size": 1,
            "accumulation_steps": 16,
            "gradient_checkpointing": True,
            "type": "mlm",
        },
        'roberta_base': {
            'path': 'roberta-base',
            'fp16': True,
            'max_seq_len': 512,
            'batch_size': 1,
            'accumulation_steps': 16,
            'gradient_checkpointing': True,
            'type': 'mlm'
        },
        'roberta_pysch': {
            'path': 'mlaricheva/roberta-psych',
            'fp16': True,
            'max_seq_len': 512,
            'batch_size': 1,
            'accumulation_steps': 16,
            'gradient_checkpointing': True,
            'type': 'mlm'
        },
    },
    "io": {
        "results_dir": "./results",
        "input_dir": "./data",
        "model_output_dir": "./model_output",
    },
    "augmentation": {
        "add_synthetic": False,
        "aug_p": 0.2,
        "glove_file": "data/glove.6B.50d.txt",
        "glove_zip": "data/glove.6B.zip",
        "glove_url": "http://nlp.stanford.edu/data/glove.6B.zip",
    },
    "random": {"seed": 42},
}

In [2]:
# # Could use PEFT to save memory

# from peft import LoraConfig, get_peft_model 

# config = LoraConfig(
#     r=16,
#     lora_alpha=32,
#     lora_dropout=0.05,
#     bias="none",
#     task_type="CAUSAL_LM",
#     target_modules=["q_lin", "v_lin"],
    
# )

# model = get_peft_model(model, config)

##### Mount Google Drive, Install Requirements, and set Cache if Using Colab

In [3]:
if params["env"]["colab"]:

    import os
    from google.colab import drive

    # Mount
    drive.mount("/content/drive")

    # Install packages
    !pip install -q -r "/content/drive/MyDrive/nlp4psychotherapy/requirements.txt"

    # Set HF cache
    os.environ['TRANSFORMERS_CACHE'] = '/content/drive/MyDrive/hf_cache'
    os.environ['HF_DATASETS_CACHE'] = '/content/drive/MyDrive/hf_cache'

##### Check Colab Runtime

In [4]:
if params["env"]["colab"]:
  
    gpu_info = !nvidia-smi
    gpu_info = "\n".join(gpu_info)
    if gpu_info.find("failed") >= 0:
        print("Not connected to a GPU")
    else:
        print(gpu_info)

if params["env"]["require_high_ram"]:

    from psutil import virtual_memory
    ram_gb = virtual_memory().total / 1e9
    print("Your runtime has {:.1f} gigabytes of available RAM\n".format(ram_gb))

    if ram_gb < 20:
        print("Not using a high-RAM runtime")
    else:
        print("You are using a high-RAM runtime!")

Your runtime has 33.6 gigabytes of available RAM

You are using a high-RAM runtime!


##### Imports

In [5]:
import os
import re
import glob
import pickle
import torch
import numpy as np
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    GPT2ForSequenceClassification,
    GPTNeoForSequenceClassification,
    AutoModelForSeq2SeqLM,
    AutoModelForCausalLM,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback,
)

##### Disable Tokenizer Parallelism
This is mostly to avoid warnings.

In [6]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

##### Load Formatted Data

In [7]:
if params["data"]["add_summaries"]:
    with open(os.path.join(params["io"]["input_dir"], f"{outcome}_Xwsum_folds.pkl"), "rb") as f:
        X_folds = pickle.load(f)

else:
    with open(os.path.join(params["io"]["input_dir"], f"{outcome}_X_folds.pkl"), "rb") as f:
        X_folds = pickle.load(f)

    with open(os.path.join(params["io"]["input_dir"], f"{outcome}_y_folds.pkl"), "rb") as f:
        y_folds = pickle.load(f)

##### Check Data Shape

In [8]:
assert len(X_folds) == len(y_folds), "Expected the same number of folds in X and y."
X = list(X_folds.values())
y = list(y_folds.values())

##### Check Target Prevalence

In [9]:
print(f"Target prevalance: {round(np.mean(np.concatenate(y)), 3)}.")

Target prevalance: 0.607.


##### Check that GPU is Available

In [10]:
assert torch.cuda.is_available(), "Run this script on a GPU."
print(torch.__version__)

1.8.1+cu101


##### Select and Preprocess Text and Fit Model to Each Data Fold

In [11]:
y_probs, y_trues = {}, {}
for model in params["models"].keys():

    y_probs[model], y_trues[model] = [], []
    for i in range(len(X)):
        
        # Print model and fold
        print(f"Fitting model: {model} using fold {i} as out of fold test data.")

        # Identify train and test folds
        X_train_temp, y_train_temp = X[0:i] + X[i + 1 :], y[0:i] + y[i + 1 :]
        X_test, y_test = X[i], y[i]

        # Select a validation fold at random
        indices_temp = np.arange(len(y_train_temp))
        val_index = np.random.choice(indices_temp)
        X_val, y_val = X_train_temp[val_index], y_train_temp[val_index]

        # Identify the training folds as the indices not including the validation index
        # Concatenate all examples in the training folds to form the full training set
        del X_train_temp[val_index]
        del y_train_temp[val_index]
        X_train = np.concatenate(X_train_temp, axis=0)
        y_train = np.concatenate(y_train_temp, axis=0)

        # Shuffle training data
        indices = np.arange(len(y_train))
        np.random.shuffle(indices)
        X_train, y_train = X_train[indices], y_train[indices]

        # Print data shapes
        print(f"Train data sizes: {len(X_train), len(y_train)}.")
        print(f"Val data sizes: {len(X_val), len(y_val)}.")
        print(f"Test data sizes: {len(X_test), len(y_test)}.")

        # Format text and label data as HuggingFace dataset
        if params["models"][model]["type"] == "seq2seq":
            train_dataset = Dataset.from_dict(
                {"text": X_train, "label_ids": [str(label) for label in y_train]}
            )
            val_dataset = Dataset.from_dict(
                {"text": X_val, "label_ids": [str(label) for label in y_val]}
            )
            test_dataset = Dataset.from_dict(
                {"text": X_test, "label_ids": [str(label) for label in y_test]}
            )

        else:
            train_dataset = Dataset.from_dict({"text": X_train, "label": y_train})
            val_dataset = Dataset.from_dict({"text": X_val, "label": y_val})
            test_dataset = Dataset.from_dict({"text": X_test, "label": y_test})

        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(
            params["models"][model]["path"]
        )

        # Load model by model type
        if params["models"][model]["type"] == "mlm":
            
            # Load masked language model with a sequence classification head
            lm = AutoModelForSequenceClassification.from_pretrained(
                params["models"][model]["path"],
                num_labels=2,
                return_dict=True,
                problem_type="single_label_classification"
            )

        elif params["models"][model]["type"] == "gpt":
            
            # Use the end of sentence token as a pad token for GPT models
            tokenizer.pad_token = tokenizer.eos_token

            if model == "gpt2":
                
                # Load GPT-2
                lm = GPT2ForSequenceClassification.from_pretrained(
                    params["models"][model]["path"],
                    num_labels=2,
                    return_dict=True,
                    problem_type="single_label_classification",
                )

            elif "gpt_neo" in model:
                
                # Load a GPT Neo version
                lm = GPTNeoForSequenceClassification.from_pretrained(
                    params["models"][model]["path"],
                    num_labels=2,
                    return_dict=True,
                    problem_type="single_label_classification",
                )

            else:
                raise ValueError("Expected GPT model to be gpt2 or a gpt_neo version.")

        elif params["models"][model]["type"] == "seq2seq":
            lm = AutoModelForSeq2SeqLM.from_pretrained(
                params["models"][model]["path"]
            )
        elif params["models"][model]["type"] == "causal":
            lm = AutoModelForCausalLM.from_pretrained(
                params["models"][model]["path"]
            )
        else:
            raise ValueError(
                f"Unexpected model type: {params[f'{size}_models'][model]['path']}."
            )

        # Define function to preprocess and tokenize text
        if params["models"][model]["type"] == "seq2seq":

            def preprocess_function(
                sample, padding="max_length", output_max_seq_len=20
            ):
                
                # Add prefix to the input for t5
                inputs = [
                    "Classify this text as either 1 or 0: " + item
                    for item in sample["text"]
                ]

                # tokenize inputs
                model_inputs = tokenizer(
                    inputs,
                    max_length=params["models"][model]["max_seq_len"],
                    padding=padding,
                    truncation=True,
                )

                # Tokenize targets with the `text_target` keyword argument
                labels = tokenizer(
                    text_target=sample["label_ids"],
                    max_length=params["models"][model]["output_max_seq_len"],
                    padding=padding,
                    truncation=True,
                )

                # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
                # padding in the loss.
                if padding == "max_length":
                    labels["input_ids"] = [
                        [(l if l != tokenizer.pad_token_id else -100) for l in label]
                        for label in labels["input_ids"]
                    ]

                model_inputs["label_ids"] = labels["input_ids"]

                return model_inputs

        else:

            def preprocess_function(batch):
                return tokenizer(
                    batch["text"],
                    padding="max_length",
                    truncation=True,
                    max_length=params["models"][model]["max_seq_len"],
                )

        # Preprocess datasets
        train_dataset = train_dataset.map(
            preprocess_function,
            batched=True,
            remove_columns=["text"],
            batch_size=params["models"][model]["batch_size"],
        )
        train_dataset.set_format("pt")
        val_dataset = val_dataset.map(
            preprocess_function,
            batched=True,
            remove_columns=["text"],
            batch_size=params["models"][model]["batch_size"],
        )
        val_dataset.set_format("pt")
        test_dataset = test_dataset.map(
            preprocess_function,
            batched=True,
            remove_columns=["text"],
            batch_size=params["models"][model]["batch_size"],
        )
        test_dataset.set_format("pt")

        # Define training arguments
        training_args = TrainingArguments(
            output_dir=params["io"]["model_output_dir"],
            num_train_epochs=params["training"]["epochs"],
            warmup_steps=params["training"]["warmup_steps"],
            weight_decay=params["training"]["weight_decay"],
            learning_rate=params["training"]["lr"],
            adam_beta1=params["training"]["adam_beta1"],
            adam_beta2=params["training"]["adam_beta2"],
            adam_epsilon=params["training"]["adam_epsilon"],
            dataloader_num_workers=params["training"]["num_workers"],
            logging_strategy=params["training"]["logging_strategy"],
            seed=params["random"]["seed"],
            run_name=params["models"][model],
            fp16=params["models"][model]["fp16"],
            gradient_checkpointing=params["models"][model][
                "gradient_checkpointing"
            ],
            per_device_train_batch_size=params["models"][model]["batch_size"],
            per_device_eval_batch_size=params["models"][model]["batch_size"],
            gradient_accumulation_steps=params["models"][model][
                "accumulation_steps"
            ],
            evaluation_strategy=params["evaluation"]["evaluation_strategy"],
            save_strategy=params["evaluation"]["save_strategy"],
            fp16_full_eval=params["evaluation"]["fp16_full_eval"],
            eval_accumulation_steps=params["evaluation"]["eval_accumulation_steps"],
            save_total_limit=params["evaluation"]["save_total_limit"],
            lr_scheduler_type="linear",
            optim="adamw_torch",
            prediction_loss_only=False,
            load_best_model_at_end=True,
            disable_tqdm=True,
            logging_dir=None,
        )
        
        # Define special training arguments
        if params["models"][model]["type"] == "seq2seq":
            training_args.generation_max_length = params["models"][model]["output_max_seq_len"]
            training_args.predict_with_generate = True
            training_args.generation_num_beams = None

        # Define early stopping callback
        early_stopping = EarlyStoppingCallback(
            early_stopping_patience=params["training"]["early_stopping_patience"]
        )

        # Define trainer
        if params["models"][model]["type"] == "seq2seq":
            trainer = Seq2SeqTrainer(
                model=lm,
                args=training_args,
                train_dataset=train_dataset,
                eval_dataset=val_dataset,
                callbacks=[early_stopping],
            )
        else:
            trainer = Trainer(
                model=lm,
                args=training_args,
                train_dataset=train_dataset,
                eval_dataset=val_dataset,
                callbacks=[early_stopping],
            )

        # Train model
        trainer.train()

        # Predict on test dataset for seq2seq models
        if params["models"][model]["type"] == "seq2seq":
            
            # Predict on test dataset with greedy generation
            output = trainer.predict(
                test_dataset,
                do_sample=False,
                max_length=params["models"][model]["output_max_seq_len"],
                early_stopping=True,
            )
            preds_decoded = tokenizer.batch_decode(
                output.predictions, skip_special_tokens=True
            )
            labels = np.where(
                output.label_ids != -100, output.label_ids, tokenizer.pad_token_id
            )
            labels_decoded = tokenizer.batch_decode(labels, skip_special_tokens=False)

            # Convert preds to ints
            # We allow additional characters to be generated by check
            # that the first one is a 1 or 0
            preds = []
            for pred in preds_decoded:
                if pred[0] == "1":
                    preds.append(1)
                elif pred[0] == "0":
                    preds.append(0)
                else:
                    print(f"Got unexpected pred: {pred}.")
                    preds.append(np.random.choice([0, 1]))

            # Save scores and labels
            # The labels may contain additional characters, but the first should be
            # a 1 or 0
            y_probs[model].append(preds)
            y_trues[model].append([int(label[0]) for label in labels_decoded])

        # Predict on test set for other model types
        else:
            # Generate scores
            output = trainer.predict(test_dataset)
            labels = output.label_ids
            y_prob = torch.sigmoid(torch.tensor(output.predictions).double()).numpy()[
                :, 1
            ]

            # Save scores and labels
            y_probs[model].append(y_prob)
            y_trues[model].append(labels)

        # Empty cuda cache
        torch.cuda.empty_cache()
        
if params['data']['add_summaries']:
    sums = 'sum_'
else:
    sums = ''

# Save results
with open(os.path.join(params["io"]["results_dir"], f"{outcome}_{sums}lm_y_trues.pkl"), "wb") as f:
    pickle.dump(y_trues, f)

with open(os.path.join(params["io"]["results_dir"], f"{outcome}_{sums}lm_y_probs.pkl"), "wb") as f:
    pickle.dump(y_probs, f)

Fitting model: mental_roberta_base using fold 0 as out of fold test data.
Train data sizes: (96, 96).
Val data sizes: (10, 10).
Test data sizes: (11, 11).


Some weights of the model checkpoint at ./models/mental-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./models/mental-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Map:   0%|          | 0/96 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Using cuda_amp half precision backend
***** Running training *****
  Num examples = 96
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6896, 'learning_rate': 6.000000000000001e-07, 'epoch': 1.0}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.6713536381721497, 'eval_runtime': 1.6963, 'eval_samples_per_second': 5.895, 'eval_steps_per_second': 5.895, 'epoch': 1.0}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6906, 'learning_rate': 1.2000000000000002e-06, 'epoch': 2.0}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.6677708625793457, 'eval_runtime': 1.6893, 'eval_samples_per_second': 5.92, 'eval_steps_per_second': 5.92, 'epoch': 2.0}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6855, 'learning_rate': 1.8000000000000001e-06, 'epoch': 3.0}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.660723090171814, 'eval_runtime': 1.6907, 'eval_samples_per_second': 5.915, 'eval_steps_per_second': 5.915, 'epoch': 3.0}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6849, 'learning_rate': 2.4000000000000003e-06, 'epoch': 4.0}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.6509863138198853, 'eval_runtime': 1.6907, 'eval_samples_per_second': 5.915, 'eval_steps_per_second': 5.915, 'epoch': 4.0}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6906, 'learning_rate': 3e-06, 'epoch': 5.0}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6407142877578735, 'eval_runtime': 1.6892, 'eval_samples_per_second': 5.92, 'eval_steps_per_second': 5.92, 'epoch': 5.0}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6864, 'learning_rate': 3.6000000000000003e-06, 'epoch': 6.0}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.629675567150116, 'eval_runtime': 1.691, 'eval_samples_per_second': 5.914, 'eval_steps_per_second': 5.914, 'epoch': 6.0}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6811, 'learning_rate': 4.2000000000000004e-06, 'epoch': 7.0}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6235064268112183, 'eval_runtime': 1.6859, 'eval_samples_per_second': 5.931, 'eval_steps_per_second': 5.931, 'epoch': 7.0}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6734, 'learning_rate': 4.800000000000001e-06, 'epoch': 8.0}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.6082785129547119, 'eval_runtime': 1.6878, 'eval_samples_per_second': 5.925, 'eval_steps_per_second': 5.925, 'epoch': 8.0}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.67, 'learning_rate': 4.996638655462185e-06, 'epoch': 9.0}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.5948424935340881, 'eval_runtime': 1.689, 'eval_samples_per_second': 5.921, 'eval_steps_per_second': 5.921, 'epoch': 9.0}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.666, 'learning_rate': 4.9915966386554625e-06, 'epoch': 10.0}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.5828467607498169, 'eval_runtime': 1.6882, 'eval_samples_per_second': 5.924, 'eval_steps_per_second': 5.924, 'epoch': 10.0}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6591, 'learning_rate': 4.98655462184874e-06, 'epoch': 11.0}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.5638015866279602, 'eval_runtime': 1.6892, 'eval_samples_per_second': 5.92, 'eval_steps_per_second': 5.92, 'epoch': 11.0}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6556, 'learning_rate': 4.9815126050420174e-06, 'epoch': 12.0}


Saving model checkpoint to ./model_output/checkpoint-72
Configuration saved in ./model_output/checkpoint-72/config.json


{'eval_loss': 0.5388167500495911, 'eval_runtime': 1.685, 'eval_samples_per_second': 5.935, 'eval_steps_per_second': 5.935, 'epoch': 12.0}


Model weights saved in ./model_output/checkpoint-72/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6457, 'learning_rate': 4.976470588235294e-06, 'epoch': 13.0}


Saving model checkpoint to ./model_output/checkpoint-78
Configuration saved in ./model_output/checkpoint-78/config.json


{'eval_loss': 0.5166257619857788, 'eval_runtime': 1.6888, 'eval_samples_per_second': 5.921, 'eval_steps_per_second': 5.921, 'epoch': 13.0}


Model weights saved in ./model_output/checkpoint-78/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.647, 'learning_rate': 4.971428571428572e-06, 'epoch': 14.0}


Saving model checkpoint to ./model_output/checkpoint-84
Configuration saved in ./model_output/checkpoint-84/config.json


{'eval_loss': 0.4984477162361145, 'eval_runtime': 1.6858, 'eval_samples_per_second': 5.932, 'eval_steps_per_second': 5.932, 'epoch': 14.0}


Model weights saved in ./model_output/checkpoint-84/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-72] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6117, 'learning_rate': 4.966386554621849e-06, 'epoch': 15.0}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.4846232533454895, 'eval_runtime': 1.6866, 'eval_samples_per_second': 5.929, 'eval_steps_per_second': 5.929, 'epoch': 15.0}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-78] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5842, 'learning_rate': 4.961344537815126e-06, 'epoch': 16.0}


Saving model checkpoint to ./model_output/checkpoint-96
Configuration saved in ./model_output/checkpoint-96/config.json


{'eval_loss': 0.45817700028419495, 'eval_runtime': 1.6859, 'eval_samples_per_second': 5.931, 'eval_steps_per_second': 5.931, 'epoch': 16.0}


Model weights saved in ./model_output/checkpoint-96/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-84] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5493, 'learning_rate': 4.9571428571428575e-06, 'epoch': 17.0}


Saving model checkpoint to ./model_output/checkpoint-102
Configuration saved in ./model_output/checkpoint-102/config.json


{'eval_loss': 0.4283390939235687, 'eval_runtime': 1.6997, 'eval_samples_per_second': 5.883, 'eval_steps_per_second': 5.883, 'epoch': 17.0}


Model weights saved in ./model_output/checkpoint-102/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.537, 'learning_rate': 4.952100840336135e-06, 'epoch': 18.0}


Saving model checkpoint to ./model_output/checkpoint-108
Configuration saved in ./model_output/checkpoint-108/config.json


{'eval_loss': 0.4197138845920563, 'eval_runtime': 1.6876, 'eval_samples_per_second': 5.926, 'eval_steps_per_second': 5.926, 'epoch': 18.0}


Model weights saved in ./model_output/checkpoint-108/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-96] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5113, 'learning_rate': 4.947058823529412e-06, 'epoch': 19.0}


Saving model checkpoint to ./model_output/checkpoint-114
Configuration saved in ./model_output/checkpoint-114/config.json


{'eval_loss': 0.4291664659976959, 'eval_runtime': 1.6929, 'eval_samples_per_second': 5.907, 'eval_steps_per_second': 5.907, 'epoch': 19.0}


Model weights saved in ./model_output/checkpoint-114/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-102] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4577, 'learning_rate': 4.942016806722689e-06, 'epoch': 20.0}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.40057888627052307, 'eval_runtime': 1.6887, 'eval_samples_per_second': 5.922, 'eval_steps_per_second': 5.922, 'epoch': 20.0}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-108] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4228, 'learning_rate': 4.936974789915967e-06, 'epoch': 21.0}


Saving model checkpoint to ./model_output/checkpoint-126
Configuration saved in ./model_output/checkpoint-126/config.json


{'eval_loss': 0.38784924149513245, 'eval_runtime': 1.6837, 'eval_samples_per_second': 5.939, 'eval_steps_per_second': 5.939, 'epoch': 21.0}


Model weights saved in ./model_output/checkpoint-126/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-114] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4011, 'learning_rate': 4.931932773109244e-06, 'epoch': 22.0}


Saving model checkpoint to ./model_output/checkpoint-132
Configuration saved in ./model_output/checkpoint-132/config.json


{'eval_loss': 0.38083213567733765, 'eval_runtime': 1.6885, 'eval_samples_per_second': 5.923, 'eval_steps_per_second': 5.923, 'epoch': 22.0}


Model weights saved in ./model_output/checkpoint-132/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3535, 'learning_rate': 4.926890756302521e-06, 'epoch': 23.0}


Saving model checkpoint to ./model_output/checkpoint-138
Configuration saved in ./model_output/checkpoint-138/config.json


{'eval_loss': 0.38070255517959595, 'eval_runtime': 1.6874, 'eval_samples_per_second': 5.926, 'eval_steps_per_second': 5.926, 'epoch': 23.0}


Model weights saved in ./model_output/checkpoint-138/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-126] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.292, 'learning_rate': 4.921848739495799e-06, 'epoch': 24.0}


Saving model checkpoint to ./model_output/checkpoint-144
Configuration saved in ./model_output/checkpoint-144/config.json


{'eval_loss': 0.35015642642974854, 'eval_runtime': 1.686, 'eval_samples_per_second': 5.931, 'eval_steps_per_second': 5.931, 'epoch': 24.0}


Model weights saved in ./model_output/checkpoint-144/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-132] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2394, 'learning_rate': 4.916806722689076e-06, 'epoch': 25.0}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.37663906812667847, 'eval_runtime': 1.6876, 'eval_samples_per_second': 5.926, 'eval_steps_per_second': 5.926, 'epoch': 25.0}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-138] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1788, 'learning_rate': 4.911764705882353e-06, 'epoch': 26.0}


Saving model checkpoint to ./model_output/checkpoint-156
Configuration saved in ./model_output/checkpoint-156/config.json


{'eval_loss': 0.3070192039012909, 'eval_runtime': 1.694, 'eval_samples_per_second': 5.903, 'eval_steps_per_second': 5.903, 'epoch': 26.0}


Model weights saved in ./model_output/checkpoint-156/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-144] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1264, 'learning_rate': 4.907563025210084e-06, 'epoch': 27.0}


Saving model checkpoint to ./model_output/checkpoint-162
Configuration saved in ./model_output/checkpoint-162/config.json


{'eval_loss': 0.3602561056613922, 'eval_runtime': 1.6992, 'eval_samples_per_second': 5.885, 'eval_steps_per_second': 5.885, 'epoch': 27.0}


Model weights saved in ./model_output/checkpoint-162/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0887, 'learning_rate': 4.902521008403362e-06, 'epoch': 28.0}


Saving model checkpoint to ./model_output/checkpoint-168
Configuration saved in ./model_output/checkpoint-168/config.json


{'eval_loss': 0.2659120261669159, 'eval_runtime': 1.6863, 'eval_samples_per_second': 5.93, 'eval_steps_per_second': 5.93, 'epoch': 28.0}


Model weights saved in ./model_output/checkpoint-168/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-156] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0628, 'learning_rate': 4.897478991596639e-06, 'epoch': 29.0}


Saving model checkpoint to ./model_output/checkpoint-174
Configuration saved in ./model_output/checkpoint-174/config.json


{'eval_loss': 0.5449453592300415, 'eval_runtime': 1.6879, 'eval_samples_per_second': 5.925, 'eval_steps_per_second': 5.925, 'epoch': 29.0}


Model weights saved in ./model_output/checkpoint-174/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-162] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0324, 'learning_rate': 4.892436974789916e-06, 'epoch': 30.0}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.2957504689693451, 'eval_runtime': 1.687, 'eval_samples_per_second': 5.928, 'eval_steps_per_second': 5.928, 'epoch': 30.0}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-174] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0254, 'learning_rate': 4.887394957983194e-06, 'epoch': 31.0}


Saving model checkpoint to ./model_output/checkpoint-186
Configuration saved in ./model_output/checkpoint-186/config.json


{'eval_loss': 0.949724018573761, 'eval_runtime': 1.6886, 'eval_samples_per_second': 5.922, 'eval_steps_per_second': 5.922, 'epoch': 31.0}


Model weights saved in ./model_output/checkpoint-186/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0162, 'learning_rate': 4.882352941176471e-06, 'epoch': 32.0}


Saving model checkpoint to ./model_output/checkpoint-192
Configuration saved in ./model_output/checkpoint-192/config.json


{'eval_loss': 0.2845109701156616, 'eval_runtime': 1.6872, 'eval_samples_per_second': 5.927, 'eval_steps_per_second': 5.927, 'epoch': 32.0}


Model weights saved in ./model_output/checkpoint-192/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-186] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0114, 'learning_rate': 4.877310924369748e-06, 'epoch': 33.0}


Saving model checkpoint to ./model_output/checkpoint-198
Configuration saved in ./model_output/checkpoint-198/config.json


{'eval_loss': 0.5194439888000488, 'eval_runtime': 1.6864, 'eval_samples_per_second': 5.93, 'eval_steps_per_second': 5.93, 'epoch': 33.0}


Model weights saved in ./model_output/checkpoint-198/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-192] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0077, 'learning_rate': 4.872268907563026e-06, 'epoch': 34.0}


Saving model checkpoint to ./model_output/checkpoint-204
Configuration saved in ./model_output/checkpoint-204/config.json


{'eval_loss': 0.9283390045166016, 'eval_runtime': 1.691, 'eval_samples_per_second': 5.913, 'eval_steps_per_second': 5.913, 'epoch': 34.0}


Model weights saved in ./model_output/checkpoint-204/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-198] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0061, 'learning_rate': 4.867226890756303e-06, 'epoch': 35.0}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.44658365845680237, 'eval_runtime': 1.688, 'eval_samples_per_second': 5.924, 'eval_steps_per_second': 5.924, 'epoch': 35.0}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-204] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0043, 'learning_rate': 4.86218487394958e-06, 'epoch': 36.0}


Saving model checkpoint to ./model_output/checkpoint-216
Configuration saved in ./model_output/checkpoint-216/config.json


{'eval_loss': 0.6537527441978455, 'eval_runtime': 1.7098, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 36.0}


Model weights saved in ./model_output/checkpoint-216/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0036, 'learning_rate': 4.857142857142858e-06, 'epoch': 37.0}


Saving model checkpoint to ./model_output/checkpoint-222
Configuration saved in ./model_output/checkpoint-222/config.json


{'eval_loss': 0.4692930579185486, 'eval_runtime': 1.6864, 'eval_samples_per_second': 5.93, 'eval_steps_per_second': 5.93, 'epoch': 37.0}


Model weights saved in ./model_output/checkpoint-222/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-216] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.003, 'learning_rate': 4.852100840336135e-06, 'epoch': 38.0}


Saving model checkpoint to ./model_output/checkpoint-228
Configuration saved in ./model_output/checkpoint-228/config.json


{'eval_loss': 0.4641410708427429, 'eval_runtime': 1.6884, 'eval_samples_per_second': 5.923, 'eval_steps_per_second': 5.923, 'epoch': 38.0}


Model weights saved in ./model_output/checkpoint-228/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-222] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-168 (score: 0.2659120261669159).
***** Running Prediction *****
  Num examples = 11
  Batch size = 1


{'train_runtime': 2392.5024, 'train_samples_per_second': 40.125, 'train_steps_per_second': 2.508, 'train_loss': 0.39348444250297915, 'epoch': 38.0}


loading file vocab.json
loading file merges.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file ./models/mental-roberta-base/config.json
Model config RobertaConfig {
  "_name_or_path": "./models/mental-roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,

Fitting model: mental_roberta_base using fold 1 as out of fold test data.
Train data sizes: (92, 92).
Val data sizes: (10, 10).
Test data sizes: (15, 15).


Some weights of the model checkpoint at ./models/mental-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./models/mental-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Map:   0%|          | 0/92 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 92
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.811, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.87}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.7299544811248779, 'eval_runtime': 1.7013, 'eval_samples_per_second': 5.878, 'eval_steps_per_second': 5.878, 'epoch': 0.87}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-168] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.811, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.87}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.7259731888771057, 'eval_runtime': 1.7036, 'eval_samples_per_second': 5.87, 'eval_steps_per_second': 5.87, 'epoch': 1.87}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-228] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8121, 'learning_rate': 1.5e-06, 'epoch': 2.87}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.719746470451355, 'eval_runtime': 1.7015, 'eval_samples_per_second': 5.877, 'eval_steps_per_second': 5.877, 'epoch': 2.87}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7966, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.87}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.7103256583213806, 'eval_runtime': 1.7035, 'eval_samples_per_second': 5.87, 'eval_steps_per_second': 5.87, 'epoch': 3.87}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8001, 'learning_rate': 2.5e-06, 'epoch': 4.87}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6989002823829651, 'eval_runtime': 1.7002, 'eval_samples_per_second': 5.882, 'eval_steps_per_second': 5.882, 'epoch': 4.87}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7968, 'learning_rate': 3e-06, 'epoch': 5.87}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6877397298812866, 'eval_runtime': 1.7007, 'eval_samples_per_second': 5.88, 'eval_steps_per_second': 5.88, 'epoch': 5.87}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7835, 'learning_rate': 3.5e-06, 'epoch': 6.87}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6749366521835327, 'eval_runtime': 1.7023, 'eval_samples_per_second': 5.874, 'eval_steps_per_second': 5.874, 'epoch': 6.87}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7716, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.87}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6579686403274536, 'eval_runtime': 1.7026, 'eval_samples_per_second': 5.873, 'eval_steps_per_second': 5.873, 'epoch': 7.87}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7758, 'learning_rate': 4.5e-06, 'epoch': 8.87}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6380402445793152, 'eval_runtime': 1.7052, 'eval_samples_per_second': 5.864, 'eval_steps_per_second': 5.864, 'epoch': 8.87}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.768, 'learning_rate': 5e-06, 'epoch': 9.87}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.6204364895820618, 'eval_runtime': 1.6992, 'eval_samples_per_second': 5.885, 'eval_steps_per_second': 5.885, 'epoch': 9.87}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7495, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.87}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.6070041656494141, 'eval_runtime': 1.7004, 'eval_samples_per_second': 5.881, 'eval_steps_per_second': 5.881, 'epoch': 10.87}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7422, 'learning_rate': 4.98989898989899e-06, 'epoch': 11.87}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.5899472236633301, 'eval_runtime': 1.7051, 'eval_samples_per_second': 5.865, 'eval_steps_per_second': 5.865, 'epoch': 11.87}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7594, 'learning_rate': 4.984848484848485e-06, 'epoch': 12.87}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.577194333076477, 'eval_runtime': 1.6964, 'eval_samples_per_second': 5.895, 'eval_steps_per_second': 5.895, 'epoch': 12.87}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7242, 'learning_rate': 4.97979797979798e-06, 'epoch': 13.87}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.5660297274589539, 'eval_runtime': 1.7037, 'eval_samples_per_second': 5.87, 'eval_steps_per_second': 5.87, 'epoch': 13.87}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7117, 'learning_rate': 4.974747474747475e-06, 'epoch': 14.87}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.550052285194397, 'eval_runtime': 1.6982, 'eval_samples_per_second': 5.889, 'eval_steps_per_second': 5.889, 'epoch': 14.87}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7158, 'learning_rate': 4.970707070707071e-06, 'epoch': 15.87}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.5474663972854614, 'eval_runtime': 1.7009, 'eval_samples_per_second': 5.879, 'eval_steps_per_second': 5.879, 'epoch': 15.87}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7351, 'learning_rate': 4.965656565656566e-06, 'epoch': 16.87}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.5388776659965515, 'eval_runtime': 1.6928, 'eval_samples_per_second': 5.907, 'eval_steps_per_second': 5.907, 'epoch': 16.87}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6895, 'learning_rate': 4.9606060606060605e-06, 'epoch': 17.87}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5303799510002136, 'eval_runtime': 1.7015, 'eval_samples_per_second': 5.877, 'eval_steps_per_second': 5.877, 'epoch': 17.87}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6621, 'learning_rate': 4.9555555555555565e-06, 'epoch': 18.87}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.5192182660102844, 'eval_runtime': 1.7021, 'eval_samples_per_second': 5.875, 'eval_steps_per_second': 5.875, 'epoch': 18.87}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6197, 'learning_rate': 4.950505050505051e-06, 'epoch': 19.87}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json


{'eval_loss': 0.5092406272888184, 'eval_runtime': 1.7059, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 19.87}


Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6118, 'learning_rate': 4.945454545454546e-06, 'epoch': 20.87}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.5173724293708801, 'eval_runtime': 1.7038, 'eval_samples_per_second': 5.869, 'eval_steps_per_second': 5.869, 'epoch': 20.87}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5737, 'learning_rate': 4.940404040404041e-06, 'epoch': 21.87}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json


{'eval_loss': 0.48944276571273804, 'eval_runtime': 1.7074, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 21.87}


Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5652, 'learning_rate': 4.935353535353536e-06, 'epoch': 22.87}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.4785131812095642, 'eval_runtime': 1.7027, 'eval_samples_per_second': 5.873, 'eval_steps_per_second': 5.873, 'epoch': 22.87}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5238, 'learning_rate': 4.9303030303030305e-06, 'epoch': 23.87}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.48876363039016724, 'eval_runtime': 1.7057, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 23.87}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4711, 'learning_rate': 4.925252525252526e-06, 'epoch': 24.87}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.4816505014896393, 'eval_runtime': 1.7004, 'eval_samples_per_second': 5.881, 'eval_steps_per_second': 5.881, 'epoch': 24.87}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4519, 'learning_rate': 4.920202020202021e-06, 'epoch': 25.87}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.46302223205566406, 'eval_runtime': 1.7048, 'eval_samples_per_second': 5.866, 'eval_steps_per_second': 5.866, 'epoch': 25.87}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4001, 'learning_rate': 4.915151515151516e-06, 'epoch': 26.87}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.5254676938056946, 'eval_runtime': 1.7006, 'eval_samples_per_second': 5.88, 'eval_steps_per_second': 5.88, 'epoch': 26.87}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3611, 'learning_rate': 4.91010101010101e-06, 'epoch': 27.87}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.4763363301753998, 'eval_runtime': 1.7063, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 27.87}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3349, 'learning_rate': 4.905050505050505e-06, 'epoch': 28.87}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.5255115032196045, 'eval_runtime': 1.702, 'eval_samples_per_second': 5.875, 'eval_steps_per_second': 5.875, 'epoch': 28.87}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2792, 'learning_rate': 4.9000000000000005e-06, 'epoch': 29.87}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.6150530576705933, 'eval_runtime': 1.6991, 'eval_samples_per_second': 5.886, 'eval_steps_per_second': 5.886, 'epoch': 29.87}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2429, 'learning_rate': 4.894949494949495e-06, 'epoch': 30.87}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.47305622696876526, 'eval_runtime': 1.6978, 'eval_samples_per_second': 5.89, 'eval_steps_per_second': 5.89, 'epoch': 30.87}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1826, 'learning_rate': 4.88989898989899e-06, 'epoch': 31.87}


Saving model checkpoint to ./model_output/checkpoint-160
Configuration saved in ./model_output/checkpoint-160/config.json


{'eval_loss': 0.5918101072311401, 'eval_runtime': 1.7057, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 31.87}


Model weights saved in ./model_output/checkpoint-160/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1021, 'learning_rate': 4.884848484848485e-06, 'epoch': 32.87}


Saving model checkpoint to ./model_output/checkpoint-165
Configuration saved in ./model_output/checkpoint-165/config.json


{'eval_loss': 0.5880281329154968, 'eval_runtime': 1.6995, 'eval_samples_per_second': 5.884, 'eval_steps_per_second': 5.884, 'epoch': 32.87}


Model weights saved in ./model_output/checkpoint-165/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0586, 'learning_rate': 4.87979797979798e-06, 'epoch': 33.87}


Saving model checkpoint to ./model_output/checkpoint-170
Configuration saved in ./model_output/checkpoint-170/config.json


{'eval_loss': 0.6922687292098999, 'eval_runtime': 1.706, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 33.87}


Model weights saved in ./model_output/checkpoint-170/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-165] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0371, 'learning_rate': 4.8747474747474745e-06, 'epoch': 34.87}


Saving model checkpoint to ./model_output/checkpoint-175
Configuration saved in ./model_output/checkpoint-175/config.json


{'eval_loss': 0.6166763305664062, 'eval_runtime': 1.7021, 'eval_samples_per_second': 5.875, 'eval_steps_per_second': 5.875, 'epoch': 34.87}


Model weights saved in ./model_output/checkpoint-175/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-170] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0234, 'learning_rate': 4.8696969696969705e-06, 'epoch': 35.87}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.6721949577331543, 'eval_runtime': 1.7029, 'eval_samples_per_second': 5.872, 'eval_steps_per_second': 5.872, 'epoch': 35.87}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-175] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-130 (score: 0.46302223205566406).
***** Running Prediction *****
  Num examples = 15
  Batch size = 1


{'train_runtime': 2169.4836, 'train_samples_per_second': 42.406, 'train_steps_per_second': 2.305, 'train_loss': 0.562647665457593, 'epoch': 35.87}


loading file vocab.json
loading file merges.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file ./models/mental-roberta-base/config.json
Model config RobertaConfig {
  "_name_or_path": "./models/mental-roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,

Fitting model: mental_roberta_base using fold 2 as out of fold test data.
Train data sizes: (93, 93).
Val data sizes: (10, 10).
Test data sizes: (14, 14).


Some weights of the model checkpoint at ./models/mental-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./models/mental-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Map:   0%|          | 0/93 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 93
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7999, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.86}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.671142578125, 'eval_runtime': 1.7, 'eval_samples_per_second': 5.882, 'eval_steps_per_second': 5.882, 'epoch': 0.86}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8009, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.86}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.669681191444397, 'eval_runtime': 1.7062, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 1.86}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7921, 'learning_rate': 1.5e-06, 'epoch': 2.86}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.6674554944038391, 'eval_runtime': 1.7061, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 2.86}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8002, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.86}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.663364052772522, 'eval_runtime': 1.7081, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 3.86}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8026, 'learning_rate': 2.5e-06, 'epoch': 4.86}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.658602774143219, 'eval_runtime': 1.7094, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 4.86}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7899, 'learning_rate': 3e-06, 'epoch': 5.86}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6541932821273804, 'eval_runtime': 1.7016, 'eval_samples_per_second': 5.877, 'eval_steps_per_second': 5.877, 'epoch': 5.86}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7965, 'learning_rate': 3.5e-06, 'epoch': 6.86}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6501284837722778, 'eval_runtime': 1.7034, 'eval_samples_per_second': 5.871, 'eval_steps_per_second': 5.871, 'epoch': 6.86}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7917, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.86}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6446101069450378, 'eval_runtime': 1.7056, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 7.86}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7907, 'learning_rate': 4.5e-06, 'epoch': 8.86}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6355565786361694, 'eval_runtime': 1.7061, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 8.86}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7887, 'learning_rate': 5e-06, 'epoch': 9.86}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.6248744130134583, 'eval_runtime': 1.7084, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 9.86}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7789, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.86}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.613082230091095, 'eval_runtime': 1.7048, 'eval_samples_per_second': 5.866, 'eval_steps_per_second': 5.866, 'epoch': 10.86}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7729, 'learning_rate': 4.98989898989899e-06, 'epoch': 11.86}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6037832498550415, 'eval_runtime': 1.7066, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 11.86}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7793, 'learning_rate': 4.984848484848485e-06, 'epoch': 12.86}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.588408887386322, 'eval_runtime': 1.708, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 12.86}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7749, 'learning_rate': 4.97979797979798e-06, 'epoch': 13.86}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.5807090997695923, 'eval_runtime': 1.7103, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 13.86}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.759, 'learning_rate': 4.974747474747475e-06, 'epoch': 14.86}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.573157787322998, 'eval_runtime': 1.7092, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 14.86}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7568, 'learning_rate': 4.9696969696969696e-06, 'epoch': 15.86}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.5731016993522644, 'eval_runtime': 1.7088, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 15.86}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7604, 'learning_rate': 4.964646464646465e-06, 'epoch': 16.86}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.5707632303237915, 'eval_runtime': 1.7085, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 16.86}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7417, 'learning_rate': 4.95959595959596e-06, 'epoch': 17.86}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5466575622558594, 'eval_runtime': 1.7059, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 17.86}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.722, 'learning_rate': 4.954545454545455e-06, 'epoch': 18.86}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.5230148434638977, 'eval_runtime': 1.7031, 'eval_samples_per_second': 5.872, 'eval_steps_per_second': 5.872, 'epoch': 18.86}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7228, 'learning_rate': 4.94949494949495e-06, 'epoch': 19.86}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json


{'eval_loss': 0.5163125991821289, 'eval_runtime': 1.7108, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 19.86}


Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6738, 'learning_rate': 4.944444444444445e-06, 'epoch': 20.86}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.5160431265830994, 'eval_runtime': 1.7068, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 20.86}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6483, 'learning_rate': 4.93939393939394e-06, 'epoch': 21.86}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json


{'eval_loss': 0.4741686284542084, 'eval_runtime': 1.7056, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 21.86}


Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6156, 'learning_rate': 4.934343434343435e-06, 'epoch': 22.86}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.4534914493560791, 'eval_runtime': 1.7094, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 22.86}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5576, 'learning_rate': 4.92929292929293e-06, 'epoch': 23.86}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.45001786947250366, 'eval_runtime': 1.7048, 'eval_samples_per_second': 5.866, 'eval_steps_per_second': 5.866, 'epoch': 23.86}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5079, 'learning_rate': 4.924242424242425e-06, 'epoch': 24.86}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.43632155656814575, 'eval_runtime': 1.7116, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 24.86}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5159, 'learning_rate': 4.919191919191919e-06, 'epoch': 25.86}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.4256761074066162, 'eval_runtime': 1.7044, 'eval_samples_per_second': 5.867, 'eval_steps_per_second': 5.867, 'epoch': 25.86}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4534, 'learning_rate': 4.915151515151516e-06, 'epoch': 26.86}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.42188748717308044, 'eval_runtime': 1.7076, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 26.86}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4083, 'learning_rate': 4.91010101010101e-06, 'epoch': 27.86}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.4378748834133148, 'eval_runtime': 1.7112, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 27.86}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3844, 'learning_rate': 4.905050505050505e-06, 'epoch': 28.86}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.4189760684967041, 'eval_runtime': 1.7049, 'eval_samples_per_second': 5.865, 'eval_steps_per_second': 5.865, 'epoch': 28.86}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3243, 'learning_rate': 4.9000000000000005e-06, 'epoch': 29.86}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.4231477677822113, 'eval_runtime': 1.7091, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 29.86}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2708, 'learning_rate': 4.895959595959596e-06, 'epoch': 30.86}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.4368928074836731, 'eval_runtime': 1.7095, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 30.86}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1999, 'learning_rate': 4.8909090909090914e-06, 'epoch': 31.86}


Saving model checkpoint to ./model_output/checkpoint-160
Configuration saved in ./model_output/checkpoint-160/config.json


{'eval_loss': 0.48898616433143616, 'eval_runtime': 1.708, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 31.86}


Model weights saved in ./model_output/checkpoint-160/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1961, 'learning_rate': 4.885858585858586e-06, 'epoch': 32.86}


Saving model checkpoint to ./model_output/checkpoint-165
Configuration saved in ./model_output/checkpoint-165/config.json


{'eval_loss': 0.5663710832595825, 'eval_runtime': 1.7084, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 32.86}


Model weights saved in ./model_output/checkpoint-165/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1583, 'learning_rate': 4.880808080808081e-06, 'epoch': 33.86}


Saving model checkpoint to ./model_output/checkpoint-170
Configuration saved in ./model_output/checkpoint-170/config.json


{'eval_loss': 0.6179537773132324, 'eval_runtime': 1.7076, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 33.86}


Model weights saved in ./model_output/checkpoint-170/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-165] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1169, 'learning_rate': 4.875757575757576e-06, 'epoch': 34.86}


Saving model checkpoint to ./model_output/checkpoint-175
Configuration saved in ./model_output/checkpoint-175/config.json


{'eval_loss': 0.7154393196105957, 'eval_runtime': 1.7085, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 34.86}


Model weights saved in ./model_output/checkpoint-175/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-170] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0705, 'learning_rate': 4.870707070707071e-06, 'epoch': 35.86}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.7140611410140991, 'eval_runtime': 1.703, 'eval_samples_per_second': 5.872, 'eval_steps_per_second': 5.872, 'epoch': 35.86}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-175] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0624, 'learning_rate': 4.865656565656566e-06, 'epoch': 36.86}


Saving model checkpoint to ./model_output/checkpoint-185
Configuration saved in ./model_output/checkpoint-185/config.json


{'eval_loss': 0.7764999866485596, 'eval_runtime': 1.7241, 'eval_samples_per_second': 5.8, 'eval_steps_per_second': 5.8, 'epoch': 36.86}


Model weights saved in ./model_output/checkpoint-185/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0502, 'learning_rate': 4.8606060606060615e-06, 'epoch': 37.86}


Saving model checkpoint to ./model_output/checkpoint-190
Configuration saved in ./model_output/checkpoint-190/config.json


{'eval_loss': 0.8558410406112671, 'eval_runtime': 1.7111, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 37.86}


Model weights saved in ./model_output/checkpoint-190/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-185] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0363, 'learning_rate': 4.855555555555556e-06, 'epoch': 38.86}


Saving model checkpoint to ./model_output/checkpoint-195
Configuration saved in ./model_output/checkpoint-195/config.json


{'eval_loss': 0.868028998374939, 'eval_runtime': 1.7202, 'eval_samples_per_second': 5.813, 'eval_steps_per_second': 5.813, 'epoch': 38.86}


Model weights saved in ./model_output/checkpoint-195/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-190] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-145 (score: 0.4189760684967041).
***** Running Prediction *****
  Num examples = 14
  Batch size = 1


{'train_runtime': 2378.1926, 'train_samples_per_second': 39.105, 'train_steps_per_second': 2.102, 'train_loss': 0.5582796327578716, 'epoch': 38.86}


loading file vocab.json
loading file merges.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file ./models/mental-roberta-base/config.json
Model config RobertaConfig {
  "_name_or_path": "./models/mental-roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,

Fitting model: mental_roberta_base using fold 3 as out of fold test data.
Train data sizes: (97, 97).
Val data sizes: (10, 10).
Test data sizes: (10, 10).


Some weights of the model checkpoint at ./models/mental-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./models/mental-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Map:   0%|          | 0/97 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 97
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7178, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.99}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.7330659031867981, 'eval_runtime': 1.7105, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 0.99}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6961, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.99}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.7292950749397278, 'eval_runtime': 1.7065, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 1.99}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-195] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7055, 'learning_rate': 1.8000000000000001e-06, 'epoch': 2.99}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.7253116965293884, 'eval_runtime': 1.7085, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 2.99}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7047, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.99}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.7193354368209839, 'eval_runtime': 1.7043, 'eval_samples_per_second': 5.868, 'eval_steps_per_second': 5.868, 'epoch': 3.99}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7062, 'learning_rate': 3e-06, 'epoch': 4.99}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.7097674012184143, 'eval_runtime': 1.7071, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 4.99}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7004, 'learning_rate': 3.6000000000000003e-06, 'epoch': 5.99}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.7010634541511536, 'eval_runtime': 1.7104, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 5.99}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.695, 'learning_rate': 4.2000000000000004e-06, 'epoch': 6.99}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6939781904220581, 'eval_runtime': 1.7076, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 6.99}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6976, 'learning_rate': 4.800000000000001e-06, 'epoch': 7.99}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.6848143935203552, 'eval_runtime': 1.711, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 7.99}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6987, 'learning_rate': 4.996638655462185e-06, 'epoch': 8.99}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.6733771562576294, 'eval_runtime': 1.7055, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 8.99}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6971, 'learning_rate': 4.9915966386554625e-06, 'epoch': 9.99}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6637199521064758, 'eval_runtime': 1.7091, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 9.99}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6931, 'learning_rate': 4.98655462184874e-06, 'epoch': 10.99}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.6547878384590149, 'eval_runtime': 1.7094, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 10.99}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6919, 'learning_rate': 4.9815126050420174e-06, 'epoch': 11.99}


Saving model checkpoint to ./model_output/checkpoint-72
Configuration saved in ./model_output/checkpoint-72/config.json


{'eval_loss': 0.64787757396698, 'eval_runtime': 1.7061, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 11.99}


Model weights saved in ./model_output/checkpoint-72/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6835, 'learning_rate': 4.976470588235294e-06, 'epoch': 12.99}


Saving model checkpoint to ./model_output/checkpoint-78
Configuration saved in ./model_output/checkpoint-78/config.json


{'eval_loss': 0.6398080587387085, 'eval_runtime': 1.7088, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 12.99}


Model weights saved in ./model_output/checkpoint-78/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6786, 'learning_rate': 4.971428571428572e-06, 'epoch': 13.99}


Saving model checkpoint to ./model_output/checkpoint-84
Configuration saved in ./model_output/checkpoint-84/config.json


{'eval_loss': 0.6329507827758789, 'eval_runtime': 1.7105, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 13.99}


Model weights saved in ./model_output/checkpoint-84/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-72] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6677, 'learning_rate': 4.966386554621849e-06, 'epoch': 14.99}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.6256433725357056, 'eval_runtime': 1.727, 'eval_samples_per_second': 5.79, 'eval_steps_per_second': 5.79, 'epoch': 14.99}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-78] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6637, 'learning_rate': 4.961344537815126e-06, 'epoch': 15.99}


Saving model checkpoint to ./model_output/checkpoint-96
Configuration saved in ./model_output/checkpoint-96/config.json


{'eval_loss': 0.6178944110870361, 'eval_runtime': 1.7072, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 15.99}


Model weights saved in ./model_output/checkpoint-96/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-84] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6538, 'learning_rate': 4.956302521008404e-06, 'epoch': 16.99}


Saving model checkpoint to ./model_output/checkpoint-102
Configuration saved in ./model_output/checkpoint-102/config.json


{'eval_loss': 0.6037731170654297, 'eval_runtime': 1.7086, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 16.99}


Model weights saved in ./model_output/checkpoint-102/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6365, 'learning_rate': 4.951260504201681e-06, 'epoch': 17.99}


Saving model checkpoint to ./model_output/checkpoint-108
Configuration saved in ./model_output/checkpoint-108/config.json


{'eval_loss': 0.5767148733139038, 'eval_runtime': 1.7093, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 17.99}


Model weights saved in ./model_output/checkpoint-108/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-96] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6168, 'learning_rate': 4.946218487394958e-06, 'epoch': 18.99}


Saving model checkpoint to ./model_output/checkpoint-114
Configuration saved in ./model_output/checkpoint-114/config.json


{'eval_loss': 0.565248966217041, 'eval_runtime': 1.708, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 18.99}


Model weights saved in ./model_output/checkpoint-114/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-102] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5891, 'learning_rate': 4.941176470588236e-06, 'epoch': 19.99}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.5209082365036011, 'eval_runtime': 1.7068, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 19.99}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-108] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5337, 'learning_rate': 4.936134453781513e-06, 'epoch': 20.99}


Saving model checkpoint to ./model_output/checkpoint-126
Configuration saved in ./model_output/checkpoint-126/config.json


{'eval_loss': 0.4947306513786316, 'eval_runtime': 1.7092, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 20.99}


Model weights saved in ./model_output/checkpoint-126/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-114] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4949, 'learning_rate': 4.93109243697479e-06, 'epoch': 21.99}


Saving model checkpoint to ./model_output/checkpoint-132
Configuration saved in ./model_output/checkpoint-132/config.json


{'eval_loss': 0.4696773588657379, 'eval_runtime': 1.7141, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 21.99}


Model weights saved in ./model_output/checkpoint-132/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4447, 'learning_rate': 4.926050420168068e-06, 'epoch': 22.99}


Saving model checkpoint to ./model_output/checkpoint-138
Configuration saved in ./model_output/checkpoint-138/config.json


{'eval_loss': 0.4655284285545349, 'eval_runtime': 1.7062, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 22.99}


Model weights saved in ./model_output/checkpoint-138/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-126] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3745, 'learning_rate': 4.921848739495799e-06, 'epoch': 23.99}


Saving model checkpoint to ./model_output/checkpoint-144
Configuration saved in ./model_output/checkpoint-144/config.json


{'eval_loss': 0.47424134612083435, 'eval_runtime': 1.7121, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 23.99}


Model weights saved in ./model_output/checkpoint-144/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-132] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3184, 'learning_rate': 4.916806722689076e-06, 'epoch': 24.99}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.5017299056053162, 'eval_runtime': 1.7105, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 24.99}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-144] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2192, 'learning_rate': 4.911764705882353e-06, 'epoch': 25.99}


Saving model checkpoint to ./model_output/checkpoint-156
Configuration saved in ./model_output/checkpoint-156/config.json


{'eval_loss': 0.5670968294143677, 'eval_runtime': 1.7019, 'eval_samples_per_second': 5.876, 'eval_steps_per_second': 5.876, 'epoch': 25.99}


Model weights saved in ./model_output/checkpoint-156/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.167, 'learning_rate': 4.906722689075631e-06, 'epoch': 26.99}


Saving model checkpoint to ./model_output/checkpoint-162
Configuration saved in ./model_output/checkpoint-162/config.json


{'eval_loss': 0.5955225825309753, 'eval_runtime': 1.7134, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 26.99}


Model weights saved in ./model_output/checkpoint-162/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-156] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1185, 'learning_rate': 4.901680672268908e-06, 'epoch': 27.99}


Saving model checkpoint to ./model_output/checkpoint-168
Configuration saved in ./model_output/checkpoint-168/config.json


{'eval_loss': 0.6191055178642273, 'eval_runtime': 1.71, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 27.99}


Model weights saved in ./model_output/checkpoint-168/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-162] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0756, 'learning_rate': 4.896638655462185e-06, 'epoch': 28.99}


Saving model checkpoint to ./model_output/checkpoint-174
Configuration saved in ./model_output/checkpoint-174/config.json


{'eval_loss': 0.7000854015350342, 'eval_runtime': 1.7142, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 28.99}


Model weights saved in ./model_output/checkpoint-174/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-168] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0522, 'learning_rate': 4.891596638655463e-06, 'epoch': 29.99}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.6771276593208313, 'eval_runtime': 1.7102, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 29.99}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-174] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0342, 'learning_rate': 4.88655462184874e-06, 'epoch': 30.99}


Saving model checkpoint to ./model_output/checkpoint-186
Configuration saved in ./model_output/checkpoint-186/config.json


{'eval_loss': 0.7732697129249573, 'eval_runtime': 1.7075, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 30.99}


Model weights saved in ./model_output/checkpoint-186/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.02, 'learning_rate': 4.881512605042017e-06, 'epoch': 31.99}


Saving model checkpoint to ./model_output/checkpoint-192
Configuration saved in ./model_output/checkpoint-192/config.json


{'eval_loss': 0.8448383212089539, 'eval_runtime': 1.7135, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 31.99}


Model weights saved in ./model_output/checkpoint-192/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-186] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0135, 'learning_rate': 4.876470588235295e-06, 'epoch': 32.99}


Saving model checkpoint to ./model_output/checkpoint-198
Configuration saved in ./model_output/checkpoint-198/config.json


{'eval_loss': 0.9500395655632019, 'eval_runtime': 1.7069, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 32.99}


Model weights saved in ./model_output/checkpoint-198/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-192] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-138 (score: 0.4655284285545349).
***** Running Prediction *****
  Num examples = 10
  Batch size = 1


{'train_runtime': 2104.4539, 'train_samples_per_second': 46.093, 'train_steps_per_second': 2.851, 'train_loss': 0.4987959552819681, 'epoch': 32.99}


loading file vocab.json
loading file merges.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file ./models/mental-roberta-base/config.json
Model config RobertaConfig {
  "_name_or_path": "./models/mental-roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,

Fitting model: mental_roberta_base using fold 4 as out of fold test data.
Train data sizes: (94, 94).
Val data sizes: (10, 10).
Test data sizes: (13, 13).


Some weights of the model checkpoint at ./models/mental-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./models/mental-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Map:   0%|          | 0/94 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 94
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8412, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.85}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.7500547170639038, 'eval_runtime': 1.7096, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 0.85}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-138] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8445, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.85}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.7474017143249512, 'eval_runtime': 1.7128, 'eval_samples_per_second': 5.838, 'eval_steps_per_second': 5.838, 'epoch': 1.85}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-198] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8437, 'learning_rate': 1.5e-06, 'epoch': 2.85}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.7427513003349304, 'eval_runtime': 1.7115, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 2.85}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8323, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.85}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.7347853183746338, 'eval_runtime': 1.7069, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 3.85}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8331, 'learning_rate': 2.5e-06, 'epoch': 4.85}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.7244176864624023, 'eval_runtime': 1.7095, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 4.85}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8112, 'learning_rate': 3e-06, 'epoch': 5.85}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.7123066186904907, 'eval_runtime': 1.7084, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 5.85}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.82, 'learning_rate': 3.5e-06, 'epoch': 6.85}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.7013880610466003, 'eval_runtime': 1.7049, 'eval_samples_per_second': 5.865, 'eval_steps_per_second': 5.865, 'epoch': 6.85}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8036, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.85}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6876393556594849, 'eval_runtime': 1.7136, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 7.85}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8097, 'learning_rate': 4.5e-06, 'epoch': 8.85}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6730210185050964, 'eval_runtime': 1.707, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 8.85}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8038, 'learning_rate': 5e-06, 'epoch': 9.85}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.6598197221755981, 'eval_runtime': 1.7073, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 9.85}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.785, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.85}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.6453492045402527, 'eval_runtime': 1.7125, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 10.85}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8025, 'learning_rate': 4.990909090909091e-06, 'epoch': 11.85}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6395010948181152, 'eval_runtime': 1.7071, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 11.85}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7898, 'learning_rate': 4.9858585858585865e-06, 'epoch': 12.85}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.6346058249473572, 'eval_runtime': 1.7137, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 12.85}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7796, 'learning_rate': 4.980808080808081e-06, 'epoch': 13.85}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.6280836462974548, 'eval_runtime': 1.7071, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 13.85}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7609, 'learning_rate': 4.975757575757576e-06, 'epoch': 14.85}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.6209983229637146, 'eval_runtime': 1.7083, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 14.85}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.769, 'learning_rate': 4.970707070707071e-06, 'epoch': 15.85}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.608446478843689, 'eval_runtime': 1.7102, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 15.85}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.763, 'learning_rate': 4.966666666666667e-06, 'epoch': 16.85}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.5911577939987183, 'eval_runtime': 1.7034, 'eval_samples_per_second': 5.87, 'eval_steps_per_second': 5.87, 'epoch': 16.85}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7263, 'learning_rate': 4.961616161616162e-06, 'epoch': 17.85}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5620695352554321, 'eval_runtime': 1.7097, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 17.85}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6946, 'learning_rate': 4.956565656565657e-06, 'epoch': 18.85}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.5432630777359009, 'eval_runtime': 1.7116, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 18.85}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6621, 'learning_rate': 4.951515151515152e-06, 'epoch': 19.85}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json


{'eval_loss': 0.5330218076705933, 'eval_runtime': 1.7051, 'eval_samples_per_second': 5.865, 'eval_steps_per_second': 5.865, 'epoch': 19.85}


Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6184, 'learning_rate': 4.946464646464647e-06, 'epoch': 20.85}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.5204116702079773, 'eval_runtime': 1.7045, 'eval_samples_per_second': 5.867, 'eval_steps_per_second': 5.867, 'epoch': 20.85}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6179, 'learning_rate': 4.941414141414142e-06, 'epoch': 21.85}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json


{'eval_loss': 0.5283652544021606, 'eval_runtime': 1.7066, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 21.85}


Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5439, 'learning_rate': 4.936363636363637e-06, 'epoch': 22.85}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.5411518812179565, 'eval_runtime': 1.712, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 22.85}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5146, 'learning_rate': 4.931313131313132e-06, 'epoch': 23.85}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.5534762144088745, 'eval_runtime': 1.7079, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 23.85}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4634, 'learning_rate': 4.926262626262627e-06, 'epoch': 24.85}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.5867754220962524, 'eval_runtime': 1.7173, 'eval_samples_per_second': 5.823, 'eval_steps_per_second': 5.823, 'epoch': 24.85}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4233, 'learning_rate': 4.9212121212121214e-06, 'epoch': 25.85}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.5325106382369995, 'eval_runtime': 1.7142, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 25.85}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3531, 'learning_rate': 4.9161616161616166e-06, 'epoch': 26.85}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.6449037790298462, 'eval_runtime': 1.7122, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 26.85}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2879, 'learning_rate': 4.911111111111112e-06, 'epoch': 27.85}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.6427692174911499, 'eval_runtime': 1.7205, 'eval_samples_per_second': 5.812, 'eval_steps_per_second': 5.812, 'epoch': 27.85}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2221, 'learning_rate': 4.906060606060606e-06, 'epoch': 28.85}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.6607556939125061, 'eval_runtime': 1.7107, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 28.85}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1617, 'learning_rate': 4.901010101010101e-06, 'epoch': 29.85}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.7738850712776184, 'eval_runtime': 1.7132, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 29.85}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1207, 'learning_rate': 4.895959595959596e-06, 'epoch': 30.85}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.8667291402816772, 'eval_runtime': 1.7137, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 30.85}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-105 (score: 0.5204116702079773).
***** Running Prediction *****
  Num examples = 13
  Batch size = 1


{'train_runtime': 1913.2575, 'train_samples_per_second': 49.131, 'train_steps_per_second': 2.613, 'train_loss': 0.648483339048201, 'epoch': 30.85}


loading file vocab.json
loading file merges.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file ./models/mental-roberta-base/config.json
Model config RobertaConfig {
  "_name_or_path": "./models/mental-roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,

Fitting model: mental_roberta_base using fold 5 as out of fold test data.
Train data sizes: (98, 98).
Val data sizes: (10, 10).
Test data sizes: (9, 9).


Some weights of the model checkpoint at ./models/mental-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./models/mental-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Map:   0%|          | 0/98 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 98
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7235, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.98}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.7135900855064392, 'eval_runtime': 1.7071, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 0.98}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7254, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.98}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.7102487683296204, 'eval_runtime': 1.7042, 'eval_samples_per_second': 5.868, 'eval_steps_per_second': 5.868, 'epoch': 1.98}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7068, 'learning_rate': 1.8000000000000001e-06, 'epoch': 2.98}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.7052162885665894, 'eval_runtime': 1.7097, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 2.98}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7102, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.98}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.6999767422676086, 'eval_runtime': 1.7078, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 3.98}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7126, 'learning_rate': 3e-06, 'epoch': 4.98}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6936699748039246, 'eval_runtime': 1.7106, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 4.98}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7076, 'learning_rate': 3.6000000000000003e-06, 'epoch': 5.98}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.6856588125228882, 'eval_runtime': 1.7214, 'eval_samples_per_second': 5.809, 'eval_steps_per_second': 5.809, 'epoch': 5.98}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7023, 'learning_rate': 4.2000000000000004e-06, 'epoch': 6.98}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6755739450454712, 'eval_runtime': 1.7054, 'eval_samples_per_second': 5.864, 'eval_steps_per_second': 5.864, 'epoch': 6.98}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6963, 'learning_rate': 4.800000000000001e-06, 'epoch': 7.98}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.6662107110023499, 'eval_runtime': 1.7034, 'eval_samples_per_second': 5.871, 'eval_steps_per_second': 5.871, 'epoch': 7.98}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7045, 'learning_rate': 4.996638655462185e-06, 'epoch': 8.98}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.6552952527999878, 'eval_runtime': 1.7119, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 8.98}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6996, 'learning_rate': 4.9915966386554625e-06, 'epoch': 9.98}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6452484130859375, 'eval_runtime': 1.7132, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 9.98}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6901, 'learning_rate': 4.98655462184874e-06, 'epoch': 10.98}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.6313053369522095, 'eval_runtime': 1.7096, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 10.98}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6891, 'learning_rate': 4.982352941176471e-06, 'epoch': 11.98}


Saving model checkpoint to ./model_output/checkpoint-72
Configuration saved in ./model_output/checkpoint-72/config.json


{'eval_loss': 0.6169389486312866, 'eval_runtime': 1.7082, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 11.98}


Model weights saved in ./model_output/checkpoint-72/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6793, 'learning_rate': 4.9773109243697485e-06, 'epoch': 12.98}


Saving model checkpoint to ./model_output/checkpoint-78
Configuration saved in ./model_output/checkpoint-78/config.json


{'eval_loss': 0.6043844223022461, 'eval_runtime': 1.7064, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 12.98}


Model weights saved in ./model_output/checkpoint-78/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6798, 'learning_rate': 4.972268907563025e-06, 'epoch': 13.98}


Saving model checkpoint to ./model_output/checkpoint-84
Configuration saved in ./model_output/checkpoint-84/config.json


{'eval_loss': 0.6031926870346069, 'eval_runtime': 1.71, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 13.98}


Model weights saved in ./model_output/checkpoint-84/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-72] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.674, 'learning_rate': 4.967226890756303e-06, 'epoch': 14.98}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5931833386421204, 'eval_runtime': 1.7055, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 14.98}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-78] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6597, 'learning_rate': 4.96218487394958e-06, 'epoch': 15.98}


Saving model checkpoint to ./model_output/checkpoint-96
Configuration saved in ./model_output/checkpoint-96/config.json


{'eval_loss': 0.5835180878639221, 'eval_runtime': 1.7093, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 15.98}


Model weights saved in ./model_output/checkpoint-96/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-84] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6709, 'learning_rate': 4.9571428571428575e-06, 'epoch': 16.98}


Saving model checkpoint to ./model_output/checkpoint-102
Configuration saved in ./model_output/checkpoint-102/config.json


{'eval_loss': 0.5835957527160645, 'eval_runtime': 1.7072, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 16.98}


Model weights saved in ./model_output/checkpoint-102/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6379, 'learning_rate': 4.952100840336135e-06, 'epoch': 17.98}


Saving model checkpoint to ./model_output/checkpoint-108
Configuration saved in ./model_output/checkpoint-108/config.json


{'eval_loss': 0.5557677149772644, 'eval_runtime': 1.7095, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 17.98}


Model weights saved in ./model_output/checkpoint-108/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-96] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6279, 'learning_rate': 4.947058823529412e-06, 'epoch': 18.98}


Saving model checkpoint to ./model_output/checkpoint-114
Configuration saved in ./model_output/checkpoint-114/config.json


{'eval_loss': 0.5177955627441406, 'eval_runtime': 1.706, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 18.98}


Model weights saved in ./model_output/checkpoint-114/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-102] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5943, 'learning_rate': 4.942016806722689e-06, 'epoch': 19.98}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.5263456106185913, 'eval_runtime': 1.7144, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 19.98}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-108] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5717, 'learning_rate': 4.936974789915967e-06, 'epoch': 20.98}


Saving model checkpoint to ./model_output/checkpoint-126
Configuration saved in ./model_output/checkpoint-126/config.json


{'eval_loss': 0.4601455330848694, 'eval_runtime': 1.7141, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 20.98}


Model weights saved in ./model_output/checkpoint-126/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-114] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5417, 'learning_rate': 4.931932773109244e-06, 'epoch': 21.98}


Saving model checkpoint to ./model_output/checkpoint-132
Configuration saved in ./model_output/checkpoint-132/config.json


{'eval_loss': 0.42605361342430115, 'eval_runtime': 1.7089, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 21.98}


Model weights saved in ./model_output/checkpoint-132/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5087, 'learning_rate': 4.926890756302521e-06, 'epoch': 22.98}


Saving model checkpoint to ./model_output/checkpoint-138
Configuration saved in ./model_output/checkpoint-138/config.json


{'eval_loss': 0.44188007712364197, 'eval_runtime': 1.7057, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 22.98}


Model weights saved in ./model_output/checkpoint-138/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-126] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4393, 'learning_rate': 4.921848739495799e-06, 'epoch': 23.98}


Saving model checkpoint to ./model_output/checkpoint-144
Configuration saved in ./model_output/checkpoint-144/config.json


{'eval_loss': 0.40283823013305664, 'eval_runtime': 1.7115, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 23.98}


Model weights saved in ./model_output/checkpoint-144/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-132] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4255, 'learning_rate': 4.916806722689076e-06, 'epoch': 24.98}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.38550445437431335, 'eval_runtime': 1.7101, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 24.98}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-138] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3514, 'learning_rate': 4.911764705882353e-06, 'epoch': 25.98}


Saving model checkpoint to ./model_output/checkpoint-156
Configuration saved in ./model_output/checkpoint-156/config.json


{'eval_loss': 0.4174306392669678, 'eval_runtime': 1.708, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 25.98}


Model weights saved in ./model_output/checkpoint-156/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-144] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2986, 'learning_rate': 4.906722689075631e-06, 'epoch': 26.98}


Saving model checkpoint to ./model_output/checkpoint-162
Configuration saved in ./model_output/checkpoint-162/config.json


{'eval_loss': 0.35337597131729126, 'eval_runtime': 1.7117, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 26.98}


Model weights saved in ./model_output/checkpoint-162/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2238, 'learning_rate': 4.901680672268908e-06, 'epoch': 27.98}


Saving model checkpoint to ./model_output/checkpoint-168
Configuration saved in ./model_output/checkpoint-168/config.json


{'eval_loss': 0.3400033414363861, 'eval_runtime': 1.7077, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 27.98}


Model weights saved in ./model_output/checkpoint-168/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-156] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1623, 'learning_rate': 4.897478991596639e-06, 'epoch': 28.98}


Saving model checkpoint to ./model_output/checkpoint-174
Configuration saved in ./model_output/checkpoint-174/config.json


{'eval_loss': 0.31604382395744324, 'eval_runtime': 1.7136, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 28.98}


Model weights saved in ./model_output/checkpoint-174/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-162] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1118, 'learning_rate': 4.892436974789916e-06, 'epoch': 29.98}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.3651197552680969, 'eval_runtime': 1.7123, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 29.98}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-168] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0772, 'learning_rate': 4.888235294117647e-06, 'epoch': 30.98}


Saving model checkpoint to ./model_output/checkpoint-186
Configuration saved in ./model_output/checkpoint-186/config.json


{'eval_loss': 0.3302740156650543, 'eval_runtime': 1.7094, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 30.98}


Model weights saved in ./model_output/checkpoint-186/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0499, 'learning_rate': 4.883193277310925e-06, 'epoch': 31.98}


Saving model checkpoint to ./model_output/checkpoint-192
Configuration saved in ./model_output/checkpoint-192/config.json


{'eval_loss': 0.38727742433547974, 'eval_runtime': 1.7036, 'eval_samples_per_second': 5.87, 'eval_steps_per_second': 5.87, 'epoch': 31.98}


Model weights saved in ./model_output/checkpoint-192/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-186] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0213, 'learning_rate': 4.878151260504202e-06, 'epoch': 32.98}


Saving model checkpoint to ./model_output/checkpoint-198
Configuration saved in ./model_output/checkpoint-198/config.json


{'eval_loss': 0.45443248748779297, 'eval_runtime': 1.7046, 'eval_samples_per_second': 5.866, 'eval_steps_per_second': 5.866, 'epoch': 32.98}


Model weights saved in ./model_output/checkpoint-198/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-192] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0122, 'learning_rate': 4.873109243697479e-06, 'epoch': 33.98}


Saving model checkpoint to ./model_output/checkpoint-204
Configuration saved in ./model_output/checkpoint-204/config.json


{'eval_loss': 0.5582516193389893, 'eval_runtime': 1.7034, 'eval_samples_per_second': 5.871, 'eval_steps_per_second': 5.871, 'epoch': 33.98}


Model weights saved in ./model_output/checkpoint-204/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-198] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0078, 'learning_rate': 4.868067226890757e-06, 'epoch': 34.98}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.6855109930038452, 'eval_runtime': 1.7102, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 34.98}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-204] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0056, 'learning_rate': 4.863025210084034e-06, 'epoch': 35.98}


Saving model checkpoint to ./model_output/checkpoint-216
Configuration saved in ./model_output/checkpoint-216/config.json


{'eval_loss': 0.7210736870765686, 'eval_runtime': 1.7115, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 35.98}


Model weights saved in ./model_output/checkpoint-216/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0041, 'learning_rate': 4.857983193277311e-06, 'epoch': 36.98}


Saving model checkpoint to ./model_output/checkpoint-222
Configuration saved in ./model_output/checkpoint-222/config.json


{'eval_loss': 0.6688322424888611, 'eval_runtime': 1.7047, 'eval_samples_per_second': 5.866, 'eval_steps_per_second': 5.866, 'epoch': 36.98}


Model weights saved in ./model_output/checkpoint-222/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-216] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0045, 'learning_rate': 4.852941176470589e-06, 'epoch': 37.98}


Saving model checkpoint to ./model_output/checkpoint-228
Configuration saved in ./model_output/checkpoint-228/config.json


{'eval_loss': 0.6371610164642334, 'eval_runtime': 1.7083, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 37.98}


Model weights saved in ./model_output/checkpoint-228/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-222] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0034, 'learning_rate': 4.847899159663866e-06, 'epoch': 38.98}


Saving model checkpoint to ./model_output/checkpoint-234
Configuration saved in ./model_output/checkpoint-234/config.json


{'eval_loss': 0.4881536066532135, 'eval_runtime': 1.7058, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 38.98}


Model weights saved in ./model_output/checkpoint-234/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-228] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-174 (score: 0.31604382395744324).
***** Running Prediction *****
  Num examples = 9
  Batch size = 1


{'train_runtime': 2501.376, 'train_samples_per_second': 39.178, 'train_steps_per_second': 2.399, 'train_loss': 0.4490444120338075, 'epoch': 38.98}


loading file vocab.json
loading file merges.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file ./models/mental-roberta-base/config.json
Model config RobertaConfig {
  "_name_or_path": "./models/mental-roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,

Fitting model: mental_roberta_base using fold 6 as out of fold test data.
Train data sizes: (94, 94).
Val data sizes: (10, 10).
Test data sizes: (13, 13).


Some weights of the model checkpoint at ./models/mental-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./models/mental-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Map:   0%|          | 0/94 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 94
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8372, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.85}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.7336952090263367, 'eval_runtime': 1.7093, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 0.85}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-174] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8336, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.85}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.7309082746505737, 'eval_runtime': 1.7064, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 1.85}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-234] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8318, 'learning_rate': 1.5e-06, 'epoch': 2.85}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.725797176361084, 'eval_runtime': 1.7233, 'eval_samples_per_second': 5.803, 'eval_steps_per_second': 5.803, 'epoch': 2.85}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8346, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.85}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.7183441519737244, 'eval_runtime': 1.7095, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 3.85}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8276, 'learning_rate': 2.5e-06, 'epoch': 4.85}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.7096090912818909, 'eval_runtime': 1.7106, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 4.85}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7999, 'learning_rate': 3e-06, 'epoch': 5.85}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6983339190483093, 'eval_runtime': 1.7091, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 5.85}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.816, 'learning_rate': 3.5e-06, 'epoch': 6.85}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6874772310256958, 'eval_runtime': 1.7072, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 6.85}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8062, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.85}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6747021079063416, 'eval_runtime': 1.7083, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 7.85}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8058, 'learning_rate': 4.5e-06, 'epoch': 8.85}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6615234017372131, 'eval_runtime': 1.7031, 'eval_samples_per_second': 5.872, 'eval_steps_per_second': 5.872, 'epoch': 8.85}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8053, 'learning_rate': 5e-06, 'epoch': 9.85}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.6522213220596313, 'eval_runtime': 1.7094, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 9.85}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.79, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.85}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.6448199152946472, 'eval_runtime': 1.7052, 'eval_samples_per_second': 5.864, 'eval_steps_per_second': 5.864, 'epoch': 10.85}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7927, 'learning_rate': 4.98989898989899e-06, 'epoch': 11.85}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6376631855964661, 'eval_runtime': 1.7034, 'eval_samples_per_second': 5.871, 'eval_steps_per_second': 5.871, 'epoch': 11.85}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7951, 'learning_rate': 4.984848484848485e-06, 'epoch': 12.85}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.6299856901168823, 'eval_runtime': 1.7052, 'eval_samples_per_second': 5.864, 'eval_steps_per_second': 5.864, 'epoch': 12.85}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7952, 'learning_rate': 4.97979797979798e-06, 'epoch': 13.85}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.6195706725120544, 'eval_runtime': 1.7062, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 13.85}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7705, 'learning_rate': 4.974747474747475e-06, 'epoch': 14.85}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.6114852428436279, 'eval_runtime': 1.7097, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 14.85}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7656, 'learning_rate': 4.9696969696969696e-06, 'epoch': 15.85}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.6050575971603394, 'eval_runtime': 1.7072, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 15.85}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.771, 'learning_rate': 4.964646464646465e-06, 'epoch': 16.85}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.5941473841667175, 'eval_runtime': 1.7043, 'eval_samples_per_second': 5.868, 'eval_steps_per_second': 5.868, 'epoch': 16.85}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7468, 'learning_rate': 4.95959595959596e-06, 'epoch': 17.85}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5847631096839905, 'eval_runtime': 1.7048, 'eval_samples_per_second': 5.866, 'eval_steps_per_second': 5.866, 'epoch': 17.85}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7213, 'learning_rate': 4.954545454545455e-06, 'epoch': 18.85}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.5597463846206665, 'eval_runtime': 1.7038, 'eval_samples_per_second': 5.869, 'eval_steps_per_second': 5.869, 'epoch': 18.85}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6984, 'learning_rate': 4.94949494949495e-06, 'epoch': 19.85}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json


{'eval_loss': 0.5221564769744873, 'eval_runtime': 1.7083, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 19.85}


Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6817, 'learning_rate': 4.944444444444445e-06, 'epoch': 20.85}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.48870235681533813, 'eval_runtime': 1.7096, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 20.85}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6308, 'learning_rate': 4.93939393939394e-06, 'epoch': 21.85}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json


{'eval_loss': 0.4675827920436859, 'eval_runtime': 1.7084, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 21.85}


Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5789, 'learning_rate': 4.934343434343435e-06, 'epoch': 22.85}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.43334728479385376, 'eval_runtime': 1.7101, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 22.85}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5743, 'learning_rate': 4.9303030303030305e-06, 'epoch': 23.85}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.4089147448539734, 'eval_runtime': 1.7086, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 23.85}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5302, 'learning_rate': 4.925252525252526e-06, 'epoch': 24.85}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.381712943315506, 'eval_runtime': 1.7119, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 24.85}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4743, 'learning_rate': 4.920202020202021e-06, 'epoch': 25.85}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.32979756593704224, 'eval_runtime': 1.7109, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 25.85}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4212, 'learning_rate': 4.915151515151516e-06, 'epoch': 26.85}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.30519339442253113, 'eval_runtime': 1.7109, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 26.85}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3587, 'learning_rate': 4.91010101010101e-06, 'epoch': 27.85}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.2578052580356598, 'eval_runtime': 1.7048, 'eval_samples_per_second': 5.866, 'eval_steps_per_second': 5.866, 'epoch': 27.85}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2695, 'learning_rate': 4.905050505050505e-06, 'epoch': 28.85}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.22870364785194397, 'eval_runtime': 1.7047, 'eval_samples_per_second': 5.866, 'eval_steps_per_second': 5.866, 'epoch': 28.85}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1935, 'learning_rate': 4.9000000000000005e-06, 'epoch': 29.85}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.22259831428527832, 'eval_runtime': 1.7047, 'eval_samples_per_second': 5.866, 'eval_steps_per_second': 5.866, 'epoch': 29.85}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1327, 'learning_rate': 4.894949494949495e-06, 'epoch': 30.85}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.1668390929698944, 'eval_runtime': 1.7011, 'eval_samples_per_second': 5.879, 'eval_steps_per_second': 5.879, 'epoch': 30.85}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0734, 'learning_rate': 4.88989898989899e-06, 'epoch': 31.85}


Saving model checkpoint to ./model_output/checkpoint-160
Configuration saved in ./model_output/checkpoint-160/config.json


{'eval_loss': 0.22546128928661346, 'eval_runtime': 1.7067, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 31.85}


Model weights saved in ./model_output/checkpoint-160/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0498, 'learning_rate': 4.884848484848485e-06, 'epoch': 32.85}


Saving model checkpoint to ./model_output/checkpoint-165
Configuration saved in ./model_output/checkpoint-165/config.json


{'eval_loss': 0.14494413137435913, 'eval_runtime': 1.7119, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 32.85}


Model weights saved in ./model_output/checkpoint-165/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0381, 'learning_rate': 4.87979797979798e-06, 'epoch': 33.85}


Saving model checkpoint to ./model_output/checkpoint-170
Configuration saved in ./model_output/checkpoint-170/config.json


{'eval_loss': 0.3007768988609314, 'eval_runtime': 1.706, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 33.85}


Model weights saved in ./model_output/checkpoint-170/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0207, 'learning_rate': 4.8747474747474745e-06, 'epoch': 34.85}


Saving model checkpoint to ./model_output/checkpoint-175
Configuration saved in ./model_output/checkpoint-175/config.json


{'eval_loss': 0.1625436544418335, 'eval_runtime': 1.7031, 'eval_samples_per_second': 5.872, 'eval_steps_per_second': 5.872, 'epoch': 34.85}


Model weights saved in ./model_output/checkpoint-175/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-170] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.013, 'learning_rate': 4.8696969696969705e-06, 'epoch': 35.85}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.2843451201915741, 'eval_runtime': 1.707, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 35.85}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-175] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0112, 'learning_rate': 4.864646464646466e-06, 'epoch': 36.85}


Saving model checkpoint to ./model_output/checkpoint-185
Configuration saved in ./model_output/checkpoint-185/config.json


{'eval_loss': 0.16184820234775543, 'eval_runtime': 1.7091, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 36.85}


Model weights saved in ./model_output/checkpoint-185/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.007, 'learning_rate': 4.85959595959596e-06, 'epoch': 37.85}


Saving model checkpoint to ./model_output/checkpoint-190
Configuration saved in ./model_output/checkpoint-190/config.json


{'eval_loss': 0.25183388590812683, 'eval_runtime': 1.7098, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 37.85}


Model weights saved in ./model_output/checkpoint-190/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-185] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0053, 'learning_rate': 4.854545454545455e-06, 'epoch': 38.85}


Saving model checkpoint to ./model_output/checkpoint-195
Configuration saved in ./model_output/checkpoint-195/config.json


{'eval_loss': 0.15589436888694763, 'eval_runtime': 1.7102, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 38.85}


Model weights saved in ./model_output/checkpoint-195/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-190] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0052, 'learning_rate': 4.84949494949495e-06, 'epoch': 39.85}


Saving model checkpoint to ./model_output/checkpoint-200
Configuration saved in ./model_output/checkpoint-200/config.json


{'eval_loss': 0.15980294346809387, 'eval_runtime': 1.7098, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 39.85}


Model weights saved in ./model_output/checkpoint-200/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-195] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0041, 'learning_rate': 4.8444444444444446e-06, 'epoch': 40.85}


Saving model checkpoint to ./model_output/checkpoint-205
Configuration saved in ./model_output/checkpoint-205/config.json


{'eval_loss': 0.2695322036743164, 'eval_runtime': 1.7074, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 40.85}


Model weights saved in ./model_output/checkpoint-205/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0029, 'learning_rate': 4.83939393939394e-06, 'epoch': 41.85}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.310590922832489, 'eval_runtime': 1.7219, 'eval_samples_per_second': 5.807, 'eval_steps_per_second': 5.807, 'epoch': 41.85}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-205] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0034, 'learning_rate': 4.834343434343435e-06, 'epoch': 42.85}


Saving model checkpoint to ./model_output/checkpoint-215
Configuration saved in ./model_output/checkpoint-215/config.json


{'eval_loss': 0.15127791464328766, 'eval_runtime': 1.7098, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 42.85}


Model weights saved in ./model_output/checkpoint-215/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-165 (score: 0.14494413137435913).
***** Running Prediction *****
  Num examples = 13
  Batch size = 1


{'train_runtime': 2646.2256, 'train_samples_per_second': 35.522, 'train_steps_per_second': 1.889, 'train_loss': 0.4866121710862878, 'epoch': 42.85}


loading file vocab.json
loading file merges.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file ./models/mental-roberta-base/config.json
Model config RobertaConfig {
  "_name_or_path": "./models/mental-roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,

Fitting model: mental_roberta_base using fold 7 as out of fold test data.
Train data sizes: (94, 94).
Val data sizes: (13, 13).
Test data sizes: (10, 10).


Some weights of the model checkpoint at ./models/mental-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./models/mental-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Map:   0%|          | 0/94 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 94
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8119, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.85}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.69505774974823, 'eval_runtime': 2.1939, 'eval_samples_per_second': 5.925, 'eval_steps_per_second': 5.925, 'epoch': 0.85}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-165] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8034, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.85}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.6953591704368591, 'eval_runtime': 2.1938, 'eval_samples_per_second': 5.926, 'eval_steps_per_second': 5.926, 'epoch': 1.85}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-215] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8083, 'learning_rate': 1.5e-06, 'epoch': 2.85}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.695907473564148, 'eval_runtime': 2.1955, 'eval_samples_per_second': 5.921, 'eval_steps_per_second': 5.921, 'epoch': 2.85}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.806, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.85}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.6968543529510498, 'eval_runtime': 2.1888, 'eval_samples_per_second': 5.939, 'eval_steps_per_second': 5.939, 'epoch': 3.85}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8005, 'learning_rate': 2.5e-06, 'epoch': 4.85}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6978814601898193, 'eval_runtime': 2.1983, 'eval_samples_per_second': 5.914, 'eval_steps_per_second': 5.914, 'epoch': 4.85}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8121, 'learning_rate': 3e-06, 'epoch': 5.85}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6994186639785767, 'eval_runtime': 2.1891, 'eval_samples_per_second': 5.938, 'eval_steps_per_second': 5.938, 'epoch': 5.85}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8021, 'learning_rate': 3.5e-06, 'epoch': 6.85}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.7008711695671082, 'eval_runtime': 2.1893, 'eval_samples_per_second': 5.938, 'eval_steps_per_second': 5.938, 'epoch': 6.85}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.796, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.85}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.702957034111023, 'eval_runtime': 2.1884, 'eval_samples_per_second': 5.94, 'eval_steps_per_second': 5.94, 'epoch': 7.85}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7936, 'learning_rate': 4.5e-06, 'epoch': 8.85}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.7050777077674866, 'eval_runtime': 2.1964, 'eval_samples_per_second': 5.919, 'eval_steps_per_second': 5.919, 'epoch': 8.85}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8023, 'learning_rate': 5e-06, 'epoch': 9.85}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.7065998315811157, 'eval_runtime': 2.1929, 'eval_samples_per_second': 5.928, 'eval_steps_per_second': 5.928, 'epoch': 9.85}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7855, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.85}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.7075112462043762, 'eval_runtime': 2.2003, 'eval_samples_per_second': 5.908, 'eval_steps_per_second': 5.908, 'epoch': 10.85}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-5 (score: 0.69505774974823).
***** Running Prediction *****
  Num examples = 10
  Batch size = 1


{'train_runtime': 681.6606, 'train_samples_per_second': 137.899, 'train_steps_per_second': 7.335, 'train_loss': 0.8019859834150834, 'epoch': 10.85}


loading file vocab.json
loading file merges.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file ./models/mental-roberta-base/config.json
Model config RobertaConfig {
  "_name_or_path": "./models/mental-roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,

Fitting model: mental_roberta_base using fold 8 as out of fold test data.
Train data sizes: (100, 100).
Val data sizes: (13, 13).
Test data sizes: (4, 4).


Some weights of the model checkpoint at ./models/mental-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./models/mental-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 100
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7432, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.96}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.6932361125946045, 'eval_runtime': 2.1969, 'eval_samples_per_second': 5.917, 'eval_steps_per_second': 5.917, 'epoch': 0.96}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7483, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.96}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.6932200193405151, 'eval_runtime': 2.1971, 'eval_samples_per_second': 5.917, 'eval_steps_per_second': 5.917, 'epoch': 1.96}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7312, 'learning_rate': 1.7000000000000002e-06, 'epoch': 2.96}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.6933632493019104, 'eval_runtime': 2.1913, 'eval_samples_per_second': 5.932, 'eval_steps_per_second': 5.932, 'epoch': 2.96}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.726, 'learning_rate': 2.3000000000000004e-06, 'epoch': 3.96}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.6938534379005432, 'eval_runtime': 2.1931, 'eval_samples_per_second': 5.928, 'eval_steps_per_second': 5.928, 'epoch': 3.96}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7201, 'learning_rate': 2.9e-06, 'epoch': 4.96}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6950899958610535, 'eval_runtime': 2.2193, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 4.96}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7156, 'learning_rate': 3.5e-06, 'epoch': 5.96}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.6970611214637756, 'eval_runtime': 2.2009, 'eval_samples_per_second': 5.907, 'eval_steps_per_second': 5.907, 'epoch': 5.96}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7144, 'learning_rate': 4.1e-06, 'epoch': 6.96}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6992820501327515, 'eval_runtime': 2.1963, 'eval_samples_per_second': 5.919, 'eval_steps_per_second': 5.919, 'epoch': 6.96}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6924, 'learning_rate': 4.7e-06, 'epoch': 7.96}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.7038742899894714, 'eval_runtime': 2.1928, 'eval_samples_per_second': 5.928, 'eval_steps_per_second': 5.928, 'epoch': 7.96}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.711, 'learning_rate': 4.9974789915966396e-06, 'epoch': 8.96}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.7107869386672974, 'eval_runtime': 2.1946, 'eval_samples_per_second': 5.924, 'eval_steps_per_second': 5.924, 'epoch': 8.96}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6924, 'learning_rate': 4.992436974789916e-06, 'epoch': 9.96}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.7196356654167175, 'eval_runtime': 2.1917, 'eval_samples_per_second': 5.931, 'eval_steps_per_second': 5.931, 'epoch': 9.96}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7132, 'learning_rate': 4.987394957983194e-06, 'epoch': 10.96}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.72201007604599, 'eval_runtime': 2.1926, 'eval_samples_per_second': 5.929, 'eval_steps_per_second': 5.929, 'epoch': 10.96}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6923, 'learning_rate': 4.982352941176471e-06, 'epoch': 11.96}


Saving model checkpoint to ./model_output/checkpoint-72
Configuration saved in ./model_output/checkpoint-72/config.json


{'eval_loss': 0.7247807383537292, 'eval_runtime': 2.1886, 'eval_samples_per_second': 5.94, 'eval_steps_per_second': 5.94, 'epoch': 11.96}


Model weights saved in ./model_output/checkpoint-72/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-12 (score: 0.6932200193405151).
***** Running Prediction *****
  Num examples = 4
  Batch size = 1


{'train_runtime': 789.7296, 'train_samples_per_second': 126.626, 'train_steps_per_second': 7.598, 'train_loss': 0.7166870501306322, 'epoch': 11.96}


loading file vocab.json
loading file merges.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file ./models/mental-roberta-base/config.json
Model config RobertaConfig {
  "_name_or_path": "./models/mental-roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,

Fitting model: mental_roberta_base using fold 9 as out of fold test data.
Train data sizes: (93, 93).
Val data sizes: (13, 13).
Test data sizes: (11, 11).


Some weights of the model checkpoint at ./models/mental-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./models/mental-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Map:   0%|          | 0/93 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 93
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.809, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.86}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.6930135488510132, 'eval_runtime': 2.1946, 'eval_samples_per_second': 5.924, 'eval_steps_per_second': 5.924, 'epoch': 0.86}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.807, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.86}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.6932476758956909, 'eval_runtime': 2.1915, 'eval_samples_per_second': 5.932, 'eval_steps_per_second': 5.932, 'epoch': 1.86}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-72] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8065, 'learning_rate': 1.5e-06, 'epoch': 2.86}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.6935715079307556, 'eval_runtime': 2.1924, 'eval_samples_per_second': 5.93, 'eval_steps_per_second': 5.93, 'epoch': 2.86}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8041, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.86}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.6941434144973755, 'eval_runtime': 2.1889, 'eval_samples_per_second': 5.939, 'eval_steps_per_second': 5.939, 'epoch': 3.86}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8038, 'learning_rate': 2.5e-06, 'epoch': 4.86}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6948557496070862, 'eval_runtime': 2.1962, 'eval_samples_per_second': 5.919, 'eval_steps_per_second': 5.919, 'epoch': 4.86}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7995, 'learning_rate': 3e-06, 'epoch': 5.86}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6957405805587769, 'eval_runtime': 2.1889, 'eval_samples_per_second': 5.939, 'eval_steps_per_second': 5.939, 'epoch': 5.86}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7922, 'learning_rate': 3.5e-06, 'epoch': 6.86}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6970681548118591, 'eval_runtime': 2.1966, 'eval_samples_per_second': 5.918, 'eval_steps_per_second': 5.918, 'epoch': 6.86}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8018, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.86}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6989257335662842, 'eval_runtime': 2.1968, 'eval_samples_per_second': 5.918, 'eval_steps_per_second': 5.918, 'epoch': 7.86}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7968, 'learning_rate': 4.5e-06, 'epoch': 8.86}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.7008813619613647, 'eval_runtime': 2.1942, 'eval_samples_per_second': 5.925, 'eval_steps_per_second': 5.925, 'epoch': 8.86}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7819, 'learning_rate': 5e-06, 'epoch': 9.86}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.7036038041114807, 'eval_runtime': 2.1972, 'eval_samples_per_second': 5.916, 'eval_steps_per_second': 5.916, 'epoch': 9.86}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7752, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.86}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.7083276510238647, 'eval_runtime': 2.193, 'eval_samples_per_second': 5.928, 'eval_steps_per_second': 5.928, 'epoch': 10.86}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-5 (score: 0.6930135488510132).
***** Running Prediction *****
  Num examples = 11
  Batch size = 1


{'train_runtime': 674.9854, 'train_samples_per_second': 137.781, 'train_steps_per_second': 7.408, 'train_loss': 0.7979852329600942, 'epoch': 10.86}


loading file vocab.json
loading file merges.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file ./models/mental-roberta-base/config.json
Model config RobertaConfig {
  "_name_or_path": "./models/mental-roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,

Fitting model: mental_roberta_base using fold 10 as out of fold test data.
Train data sizes: (97, 97).
Val data sizes: (13, 13).
Test data sizes: (7, 7).


Some weights of the model checkpoint at ./models/mental-roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./models/mental-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Map:   0%|          | 0/97 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/7 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 97
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7137, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.99}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.6932439804077148, 'eval_runtime': 2.1905, 'eval_samples_per_second': 5.935, 'eval_steps_per_second': 5.935, 'epoch': 0.99}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7143, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.99}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.6933339834213257, 'eval_runtime': 2.1984, 'eval_samples_per_second': 5.913, 'eval_steps_per_second': 5.913, 'epoch': 1.99}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7061, 'learning_rate': 1.8000000000000001e-06, 'epoch': 2.99}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.693656861782074, 'eval_runtime': 2.1909, 'eval_samples_per_second': 5.934, 'eval_steps_per_second': 5.934, 'epoch': 2.99}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7138, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.99}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.6944303512573242, 'eval_runtime': 2.1932, 'eval_samples_per_second': 5.927, 'eval_steps_per_second': 5.927, 'epoch': 3.99}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6981, 'learning_rate': 3e-06, 'epoch': 4.99}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6952941417694092, 'eval_runtime': 2.1937, 'eval_samples_per_second': 5.926, 'eval_steps_per_second': 5.926, 'epoch': 4.99}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7094, 'learning_rate': 3.6000000000000003e-06, 'epoch': 5.99}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.6967604160308838, 'eval_runtime': 2.1935, 'eval_samples_per_second': 5.927, 'eval_steps_per_second': 5.927, 'epoch': 5.99}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6935, 'learning_rate': 4.2000000000000004e-06, 'epoch': 6.99}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6981602311134338, 'eval_runtime': 2.1901, 'eval_samples_per_second': 5.936, 'eval_steps_per_second': 5.936, 'epoch': 6.99}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6902, 'learning_rate': 4.800000000000001e-06, 'epoch': 7.99}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.700526773929596, 'eval_runtime': 2.1901, 'eval_samples_per_second': 5.936, 'eval_steps_per_second': 5.936, 'epoch': 7.99}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6883, 'learning_rate': 4.996638655462185e-06, 'epoch': 8.99}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.7048832178115845, 'eval_runtime': 2.1925, 'eval_samples_per_second': 5.929, 'eval_steps_per_second': 5.929, 'epoch': 8.99}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6847, 'learning_rate': 4.9915966386554625e-06, 'epoch': 9.99}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.7089776992797852, 'eval_runtime': 2.1919, 'eval_samples_per_second': 5.931, 'eval_steps_per_second': 5.931, 'epoch': 9.99}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6711, 'learning_rate': 4.98655462184874e-06, 'epoch': 10.99}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.716315507888794, 'eval_runtime': 2.1911, 'eval_samples_per_second': 5.933, 'eval_steps_per_second': 5.933, 'epoch': 10.99}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-6 (score: 0.6932439804077148).
***** Running Prediction *****
  Num examples = 7
  Batch size = 1


{'train_runtime': 703.4313, 'train_samples_per_second': 137.895, 'train_steps_per_second': 8.53, 'train_loss': 0.6984632376468543, 'epoch': 10.99}
Fitting model: roberta_base using fold 0 as out of fold test data.
Train data sizes: (96, 96).
Val data sizes: (10, 10).
Test data sizes: (11, 11).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--roberta-base/snapshots/bc2764f8af2e92b6eb5679868df33e224075ca68/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremm

Map:   0%|          | 0/96 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 96
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7085, 'learning_rate': 6.000000000000001e-07, 'epoch': 1.0}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.7339306473731995, 'eval_runtime': 1.7065, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 1.0}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7155, 'learning_rate': 1.2000000000000002e-06, 'epoch': 2.0}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.7306431531906128, 'eval_runtime': 1.7049, 'eval_samples_per_second': 5.865, 'eval_steps_per_second': 5.865, 'epoch': 2.0}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7101, 'learning_rate': 1.8000000000000001e-06, 'epoch': 3.0}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.7222596406936646, 'eval_runtime': 1.7088, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 3.0}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7023, 'learning_rate': 2.4000000000000003e-06, 'epoch': 4.0}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.7121529579162598, 'eval_runtime': 1.7023, 'eval_samples_per_second': 5.874, 'eval_steps_per_second': 5.874, 'epoch': 4.0}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6947, 'learning_rate': 3e-06, 'epoch': 5.0}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.7003856897354126, 'eval_runtime': 1.7047, 'eval_samples_per_second': 5.866, 'eval_steps_per_second': 5.866, 'epoch': 5.0}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6864, 'learning_rate': 3.6000000000000003e-06, 'epoch': 6.0}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.6860710382461548, 'eval_runtime': 1.7091, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 6.0}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6797, 'learning_rate': 4.2000000000000004e-06, 'epoch': 7.0}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6690887808799744, 'eval_runtime': 1.7072, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 7.0}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6801, 'learning_rate': 4.800000000000001e-06, 'epoch': 8.0}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.6477526426315308, 'eval_runtime': 1.7016, 'eval_samples_per_second': 5.877, 'eval_steps_per_second': 5.877, 'epoch': 8.0}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6798, 'learning_rate': 4.996638655462185e-06, 'epoch': 9.0}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.6231408715248108, 'eval_runtime': 1.7017, 'eval_samples_per_second': 5.876, 'eval_steps_per_second': 5.876, 'epoch': 9.0}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.674, 'learning_rate': 4.9915966386554625e-06, 'epoch': 10.0}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.5996628403663635, 'eval_runtime': 1.7069, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 10.0}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6426, 'learning_rate': 4.98655462184874e-06, 'epoch': 11.0}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.5905945301055908, 'eval_runtime': 1.7057, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 11.0}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6666, 'learning_rate': 4.9815126050420174e-06, 'epoch': 12.0}


Saving model checkpoint to ./model_output/checkpoint-72
Configuration saved in ./model_output/checkpoint-72/config.json


{'eval_loss': 0.5858544111251831, 'eval_runtime': 1.7069, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 12.0}


Model weights saved in ./model_output/checkpoint-72/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6588, 'learning_rate': 4.976470588235294e-06, 'epoch': 13.0}


Saving model checkpoint to ./model_output/checkpoint-78
Configuration saved in ./model_output/checkpoint-78/config.json


{'eval_loss': 0.5732324123382568, 'eval_runtime': 1.7037, 'eval_samples_per_second': 5.87, 'eval_steps_per_second': 5.87, 'epoch': 13.0}


Model weights saved in ./model_output/checkpoint-78/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.635, 'learning_rate': 4.971428571428572e-06, 'epoch': 14.0}


Saving model checkpoint to ./model_output/checkpoint-84
Configuration saved in ./model_output/checkpoint-84/config.json


{'eval_loss': 0.5628629922866821, 'eval_runtime': 1.7056, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 14.0}


Model weights saved in ./model_output/checkpoint-84/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-72] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.634, 'learning_rate': 4.966386554621849e-06, 'epoch': 15.0}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5356292724609375, 'eval_runtime': 1.7024, 'eval_samples_per_second': 5.874, 'eval_steps_per_second': 5.874, 'epoch': 15.0}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-78] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5905, 'learning_rate': 4.96218487394958e-06, 'epoch': 16.0}


Saving model checkpoint to ./model_output/checkpoint-96
Configuration saved in ./model_output/checkpoint-96/config.json


{'eval_loss': 0.5219812393188477, 'eval_runtime': 1.7135, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 16.0}


Model weights saved in ./model_output/checkpoint-96/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-84] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5698, 'learning_rate': 4.9571428571428575e-06, 'epoch': 17.0}


Saving model checkpoint to ./model_output/checkpoint-102
Configuration saved in ./model_output/checkpoint-102/config.json


{'eval_loss': 0.47679656744003296, 'eval_runtime': 1.7129, 'eval_samples_per_second': 5.838, 'eval_steps_per_second': 5.838, 'epoch': 17.0}


Model weights saved in ./model_output/checkpoint-102/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5656, 'learning_rate': 4.952100840336135e-06, 'epoch': 18.0}


Saving model checkpoint to ./model_output/checkpoint-108
Configuration saved in ./model_output/checkpoint-108/config.json


{'eval_loss': 0.5419512987136841, 'eval_runtime': 1.7057, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 18.0}


Model weights saved in ./model_output/checkpoint-108/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-96] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5305, 'learning_rate': 4.947058823529412e-06, 'epoch': 19.0}


Saving model checkpoint to ./model_output/checkpoint-114
Configuration saved in ./model_output/checkpoint-114/config.json


{'eval_loss': 0.4379139840602875, 'eval_runtime': 1.7037, 'eval_samples_per_second': 5.87, 'eval_steps_per_second': 5.87, 'epoch': 19.0}


Model weights saved in ./model_output/checkpoint-114/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-102] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5248, 'learning_rate': 4.9428571428571435e-06, 'epoch': 20.0}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.3832527697086334, 'eval_runtime': 1.707, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 20.0}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-108] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4988, 'learning_rate': 4.93781512605042e-06, 'epoch': 21.0}


Saving model checkpoint to ./model_output/checkpoint-126
Configuration saved in ./model_output/checkpoint-126/config.json


{'eval_loss': 0.5067498683929443, 'eval_runtime': 1.7022, 'eval_samples_per_second': 5.875, 'eval_steps_per_second': 5.875, 'epoch': 21.0}


Model weights saved in ./model_output/checkpoint-126/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-114] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.47, 'learning_rate': 4.932773109243698e-06, 'epoch': 22.0}


Saving model checkpoint to ./model_output/checkpoint-132
Configuration saved in ./model_output/checkpoint-132/config.json


{'eval_loss': 0.37637263536453247, 'eval_runtime': 1.704, 'eval_samples_per_second': 5.868, 'eval_steps_per_second': 5.868, 'epoch': 22.0}


Model weights saved in ./model_output/checkpoint-132/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3982, 'learning_rate': 4.927731092436975e-06, 'epoch': 23.0}


Saving model checkpoint to ./model_output/checkpoint-138
Configuration saved in ./model_output/checkpoint-138/config.json


{'eval_loss': 0.3203802704811096, 'eval_runtime': 1.7033, 'eval_samples_per_second': 5.871, 'eval_steps_per_second': 5.871, 'epoch': 23.0}


Model weights saved in ./model_output/checkpoint-138/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-126] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3966, 'learning_rate': 4.922689075630252e-06, 'epoch': 24.0}


Saving model checkpoint to ./model_output/checkpoint-144
Configuration saved in ./model_output/checkpoint-144/config.json


{'eval_loss': 0.4095514714717865, 'eval_runtime': 1.7003, 'eval_samples_per_second': 5.881, 'eval_steps_per_second': 5.881, 'epoch': 24.0}


Model weights saved in ./model_output/checkpoint-144/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-132] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3546, 'learning_rate': 4.91764705882353e-06, 'epoch': 25.0}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.25197237730026245, 'eval_runtime': 1.7035, 'eval_samples_per_second': 5.87, 'eval_steps_per_second': 5.87, 'epoch': 25.0}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-138] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3093, 'learning_rate': 4.912605042016807e-06, 'epoch': 26.0}


Saving model checkpoint to ./model_output/checkpoint-156
Configuration saved in ./model_output/checkpoint-156/config.json


{'eval_loss': 0.2783512473106384, 'eval_runtime': 1.7072, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 26.0}


Model weights saved in ./model_output/checkpoint-156/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-144] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2773, 'learning_rate': 4.907563025210084e-06, 'epoch': 27.0}


Saving model checkpoint to ./model_output/checkpoint-162
Configuration saved in ./model_output/checkpoint-162/config.json


{'eval_loss': 0.2148953229188919, 'eval_runtime': 1.7123, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 27.0}


Model weights saved in ./model_output/checkpoint-162/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2628, 'learning_rate': 4.902521008403362e-06, 'epoch': 28.0}


Saving model checkpoint to ./model_output/checkpoint-168
Configuration saved in ./model_output/checkpoint-168/config.json


{'eval_loss': 0.2113349735736847, 'eval_runtime': 1.7049, 'eval_samples_per_second': 5.865, 'eval_steps_per_second': 5.865, 'epoch': 28.0}


Model weights saved in ./model_output/checkpoint-168/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-156] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2019, 'learning_rate': 4.897478991596639e-06, 'epoch': 29.0}


Saving model checkpoint to ./model_output/checkpoint-174
Configuration saved in ./model_output/checkpoint-174/config.json


{'eval_loss': 0.15094228088855743, 'eval_runtime': 1.7058, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 29.0}


Model weights saved in ./model_output/checkpoint-174/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-162] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1359, 'learning_rate': 4.892436974789916e-06, 'epoch': 30.0}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.10198293626308441, 'eval_runtime': 1.7091, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 30.0}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-168] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0777, 'learning_rate': 4.888235294117647e-06, 'epoch': 31.0}


Saving model checkpoint to ./model_output/checkpoint-186
Configuration saved in ./model_output/checkpoint-186/config.json


{'eval_loss': 0.08717751502990723, 'eval_runtime': 1.7061, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 31.0}


Model weights saved in ./model_output/checkpoint-186/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-174] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0522, 'learning_rate': 4.883193277310925e-06, 'epoch': 32.0}


Saving model checkpoint to ./model_output/checkpoint-192
Configuration saved in ./model_output/checkpoint-192/config.json


{'eval_loss': 0.06058831140398979, 'eval_runtime': 1.7026, 'eval_samples_per_second': 5.873, 'eval_steps_per_second': 5.873, 'epoch': 32.0}


Model weights saved in ./model_output/checkpoint-192/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0451, 'learning_rate': 4.878991596638656e-06, 'epoch': 33.0}


Saving model checkpoint to ./model_output/checkpoint-198
Configuration saved in ./model_output/checkpoint-198/config.json


{'eval_loss': 0.12470312416553497, 'eval_runtime': 1.7061, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 33.0}


Model weights saved in ./model_output/checkpoint-198/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-186] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0144, 'learning_rate': 4.873949579831933e-06, 'epoch': 34.0}


Saving model checkpoint to ./model_output/checkpoint-204
Configuration saved in ./model_output/checkpoint-204/config.json


{'eval_loss': 0.04020223766565323, 'eval_runtime': 1.7029, 'eval_samples_per_second': 5.872, 'eval_steps_per_second': 5.872, 'epoch': 34.0}


Model weights saved in ./model_output/checkpoint-204/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-192] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.008, 'learning_rate': 4.86890756302521e-06, 'epoch': 35.0}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.05129339173436165, 'eval_runtime': 1.7045, 'eval_samples_per_second': 5.867, 'eval_steps_per_second': 5.867, 'epoch': 35.0}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-198] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0093, 'learning_rate': 4.863865546218488e-06, 'epoch': 36.0}


Saving model checkpoint to ./model_output/checkpoint-216
Configuration saved in ./model_output/checkpoint-216/config.json


{'eval_loss': 0.038834791630506516, 'eval_runtime': 1.707, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 36.0}


Model weights saved in ./model_output/checkpoint-216/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-204] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0069, 'learning_rate': 4.858823529411766e-06, 'epoch': 37.0}


Saving model checkpoint to ./model_output/checkpoint-222
Configuration saved in ./model_output/checkpoint-222/config.json


{'eval_loss': 0.01795816794037819, 'eval_runtime': 1.7071, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 37.0}


Model weights saved in ./model_output/checkpoint-222/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0042, 'learning_rate': 4.853781512605042e-06, 'epoch': 38.0}


Saving model checkpoint to ./model_output/checkpoint-228
Configuration saved in ./model_output/checkpoint-228/config.json


{'eval_loss': 0.0050369128584861755, 'eval_runtime': 1.7087, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 38.0}


Model weights saved in ./model_output/checkpoint-228/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-216] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0027, 'learning_rate': 4.84873949579832e-06, 'epoch': 39.0}


Saving model checkpoint to ./model_output/checkpoint-234
Configuration saved in ./model_output/checkpoint-234/config.json


{'eval_loss': 0.33474454283714294, 'eval_runtime': 1.7083, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 39.0}


Model weights saved in ./model_output/checkpoint-234/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-222] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0023, 'learning_rate': 4.843697478991597e-06, 'epoch': 40.0}


Saving model checkpoint to ./model_output/checkpoint-240
Configuration saved in ./model_output/checkpoint-240/config.json


{'eval_loss': 0.08415950834751129, 'eval_runtime': 1.7057, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 40.0}


Model weights saved in ./model_output/checkpoint-240/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-234] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0018, 'learning_rate': 4.838655462184874e-06, 'epoch': 41.0}


Saving model checkpoint to ./model_output/checkpoint-246
Configuration saved in ./model_output/checkpoint-246/config.json


{'eval_loss': 0.00435337470844388, 'eval_runtime': 1.7035, 'eval_samples_per_second': 5.87, 'eval_steps_per_second': 5.87, 'epoch': 41.0}


Model weights saved in ./model_output/checkpoint-246/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-228] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0019, 'learning_rate': 4.833613445378152e-06, 'epoch': 42.0}


Saving model checkpoint to ./model_output/checkpoint-252
Configuration saved in ./model_output/checkpoint-252/config.json


{'eval_loss': 0.005055816378444433, 'eval_runtime': 1.7063, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 42.0}


Model weights saved in ./model_output/checkpoint-252/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-240] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0016, 'learning_rate': 4.8285714285714295e-06, 'epoch': 43.0}


Saving model checkpoint to ./model_output/checkpoint-258
Configuration saved in ./model_output/checkpoint-258/config.json


{'eval_loss': 0.03969958797097206, 'eval_runtime': 1.7058, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 43.0}


Model weights saved in ./model_output/checkpoint-258/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-252] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0015, 'learning_rate': 4.823529411764706e-06, 'epoch': 44.0}


Saving model checkpoint to ./model_output/checkpoint-264
Configuration saved in ./model_output/checkpoint-264/config.json


{'eval_loss': 0.16956493258476257, 'eval_runtime': 1.7058, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 44.0}


Model weights saved in ./model_output/checkpoint-264/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-258] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0014, 'learning_rate': 4.818487394957984e-06, 'epoch': 45.0}


Saving model checkpoint to ./model_output/checkpoint-270
Configuration saved in ./model_output/checkpoint-270/config.json


{'eval_loss': 0.20559552311897278, 'eval_runtime': 1.7092, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 45.0}


Model weights saved in ./model_output/checkpoint-270/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-264] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0014, 'learning_rate': 4.813445378151261e-06, 'epoch': 46.0}


Saving model checkpoint to ./model_output/checkpoint-276
Configuration saved in ./model_output/checkpoint-276/config.json


{'eval_loss': 0.014735138043761253, 'eval_runtime': 1.7056, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 46.0}


Model weights saved in ./model_output/checkpoint-276/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-270] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0011, 'learning_rate': 4.808403361344538e-06, 'epoch': 47.0}


Saving model checkpoint to ./model_output/checkpoint-282
Configuration saved in ./model_output/checkpoint-282/config.json


{'eval_loss': 0.0052832383662462234, 'eval_runtime': 1.705, 'eval_samples_per_second': 5.865, 'eval_steps_per_second': 5.865, 'epoch': 47.0}


Model weights saved in ./model_output/checkpoint-282/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-276] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.001, 'learning_rate': 4.803361344537816e-06, 'epoch': 48.0}


Saving model checkpoint to ./model_output/checkpoint-288
Configuration saved in ./model_output/checkpoint-288/config.json


{'eval_loss': 0.005863143131136894, 'eval_runtime': 1.7073, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 48.0}


Model weights saved in ./model_output/checkpoint-288/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-282] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.001, 'learning_rate': 4.798319327731093e-06, 'epoch': 49.0}


Saving model checkpoint to ./model_output/checkpoint-294
Configuration saved in ./model_output/checkpoint-294/config.json


{'eval_loss': 0.008261564187705517, 'eval_runtime': 1.704, 'eval_samples_per_second': 5.869, 'eval_steps_per_second': 5.869, 'epoch': 49.0}


Model weights saved in ./model_output/checkpoint-294/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-288] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0009, 'learning_rate': 4.79327731092437e-06, 'epoch': 50.0}


Saving model checkpoint to ./model_output/checkpoint-300
Configuration saved in ./model_output/checkpoint-300/config.json


{'eval_loss': 0.01368875801563263, 'eval_runtime': 1.7082, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 50.0}


Model weights saved in ./model_output/checkpoint-300/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-294] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0009, 'learning_rate': 4.7882352941176475e-06, 'epoch': 51.0}


Saving model checkpoint to ./model_output/checkpoint-306
Configuration saved in ./model_output/checkpoint-306/config.json


{'eval_loss': 0.035729557275772095, 'eval_runtime': 1.7065, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 51.0}


Model weights saved in ./model_output/checkpoint-306/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-300] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-246 (score: 0.00435337470844388).
***** Running Prediction *****
  Num examples = 11
  Batch size = 1


{'train_runtime': 3207.0298, 'train_samples_per_second': 29.934, 'train_steps_per_second': 1.871, 'train_loss': 0.3233795498796051, 'epoch': 51.0}
Fitting model: roberta_base using fold 1 as out of fold test data.
Train data sizes: (92, 92).
Val data sizes: (10, 10).
Test data sizes: (15, 15).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--roberta-base/snapshots/bc2764f8af2e92b6eb5679868df33e224075ca68/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremm

Map:   0%|          | 0/92 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 92
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7842, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.87}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.6579311490058899, 'eval_runtime': 1.7041, 'eval_samples_per_second': 5.868, 'eval_steps_per_second': 5.868, 'epoch': 0.87}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-246] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7767, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.87}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.6570192575454712, 'eval_runtime': 1.7069, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 1.87}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-306] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7626, 'learning_rate': 1.5e-06, 'epoch': 2.87}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.6546112895011902, 'eval_runtime': 1.7085, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 2.87}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7885, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.87}
{'eval_loss': 0.649377167224884, 'eval_runtime': 1.7115, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 3.87}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json
Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7629, 'learning_rate': 2.5e-06, 'epoch': 4.87}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6423848271369934, 'eval_runtime': 1.7114, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 4.87}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7664, 'learning_rate': 3e-06, 'epoch': 5.87}
{'eval_loss': 0.6367286443710327, 'eval_runtime': 1.7098, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 5.87}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json
Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7762, 'learning_rate': 3.5e-06, 'epoch': 6.87}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6303929090499878, 'eval_runtime': 1.7109, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 6.87}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7606, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.87}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6210466623306274, 'eval_runtime': 1.7118, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 7.87}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7611, 'learning_rate': 4.5e-06, 'epoch': 8.87}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6107296943664551, 'eval_runtime': 1.71, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 8.87}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7722, 'learning_rate': 5e-06, 'epoch': 9.87}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.6011605858802795, 'eval_runtime': 1.7079, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 9.87}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7506, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.87}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.5925348997116089, 'eval_runtime': 1.7063, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 10.87}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7468, 'learning_rate': 4.98989898989899e-06, 'epoch': 11.87}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.5804692506790161, 'eval_runtime': 1.7044, 'eval_samples_per_second': 5.867, 'eval_steps_per_second': 5.867, 'epoch': 11.87}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7573, 'learning_rate': 4.984848484848485e-06, 'epoch': 12.87}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.5724747776985168, 'eval_runtime': 1.7079, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 12.87}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7471, 'learning_rate': 4.97979797979798e-06, 'epoch': 13.87}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.5655926465988159, 'eval_runtime': 1.7101, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 13.87}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.721, 'learning_rate': 4.974747474747475e-06, 'epoch': 14.87}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.5511415004730225, 'eval_runtime': 1.7088, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 14.87}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7183, 'learning_rate': 4.9696969696969696e-06, 'epoch': 15.87}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.5418311357498169, 'eval_runtime': 1.7064, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 15.87}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7353, 'learning_rate': 4.964646464646465e-06, 'epoch': 16.87}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.5272390842437744, 'eval_runtime': 1.703, 'eval_samples_per_second': 5.872, 'eval_steps_per_second': 5.872, 'epoch': 16.87}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7175, 'learning_rate': 4.95959595959596e-06, 'epoch': 17.87}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5268691778182983, 'eval_runtime': 1.7092, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 17.87}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7244, 'learning_rate': 4.954545454545455e-06, 'epoch': 18.87}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.5366165041923523, 'eval_runtime': 1.709, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 18.87}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6776, 'learning_rate': 4.950505050505051e-06, 'epoch': 19.87}
{'eval_loss': 0.5239057540893555, 'eval_runtime': 1.7117, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 19.87}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json
Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6799, 'learning_rate': 4.945454545454546e-06, 'epoch': 20.87}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.5048260688781738, 'eval_runtime': 1.7062, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 20.87}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6424, 'learning_rate': 4.941414141414142e-06, 'epoch': 21.87}
{'eval_loss': 0.4867885112762451, 'eval_runtime': 1.7019, 'eval_samples_per_second': 5.876, 'eval_steps_per_second': 5.876, 'epoch': 21.87}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json
Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6539, 'learning_rate': 4.936363636363637e-06, 'epoch': 22.87}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.4756527841091156, 'eval_runtime': 1.7064, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 22.87}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6116, 'learning_rate': 4.931313131313132e-06, 'epoch': 23.87}
{'eval_loss': 0.471251904964447, 'eval_runtime': 1.7071, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 23.87}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json
Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5828, 'learning_rate': 4.926262626262627e-06, 'epoch': 24.87}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.4700135588645935, 'eval_runtime': 1.709, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 24.87}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5747, 'learning_rate': 4.9212121212121214e-06, 'epoch': 25.87}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.41876888275146484, 'eval_runtime': 1.711, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 25.87}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5574, 'learning_rate': 4.9161616161616166e-06, 'epoch': 26.87}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.3988551199436188, 'eval_runtime': 1.7056, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 26.87}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5102, 'learning_rate': 4.911111111111112e-06, 'epoch': 27.87}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.41922348737716675, 'eval_runtime': 1.7012, 'eval_samples_per_second': 5.878, 'eval_steps_per_second': 5.878, 'epoch': 27.87}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.488, 'learning_rate': 4.906060606060606e-06, 'epoch': 28.87}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.4055508077144623, 'eval_runtime': 1.7018, 'eval_samples_per_second': 5.876, 'eval_steps_per_second': 5.876, 'epoch': 28.87}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4545, 'learning_rate': 4.901010101010101e-06, 'epoch': 29.87}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.3899385333061218, 'eval_runtime': 1.7073, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 29.87}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4383, 'learning_rate': 4.895959595959596e-06, 'epoch': 30.87}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.4101191461086273, 'eval_runtime': 1.7049, 'eval_samples_per_second': 5.866, 'eval_steps_per_second': 5.866, 'epoch': 30.87}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3996, 'learning_rate': 4.8909090909090914e-06, 'epoch': 31.87}


Saving model checkpoint to ./model_output/checkpoint-160
Configuration saved in ./model_output/checkpoint-160/config.json


{'eval_loss': 0.371519535779953, 'eval_runtime': 1.7041, 'eval_samples_per_second': 5.868, 'eval_steps_per_second': 5.868, 'epoch': 31.87}


Model weights saved in ./model_output/checkpoint-160/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.355, 'learning_rate': 4.885858585858586e-06, 'epoch': 32.87}


Saving model checkpoint to ./model_output/checkpoint-165
Configuration saved in ./model_output/checkpoint-165/config.json


{'eval_loss': 0.3706355690956116, 'eval_runtime': 1.707, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 32.87}


Model weights saved in ./model_output/checkpoint-165/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2993, 'learning_rate': 4.880808080808081e-06, 'epoch': 33.87}


Saving model checkpoint to ./model_output/checkpoint-170
Configuration saved in ./model_output/checkpoint-170/config.json


{'eval_loss': 0.33951351046562195, 'eval_runtime': 1.7085, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 33.87}


Model weights saved in ./model_output/checkpoint-170/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2463, 'learning_rate': 4.875757575757576e-06, 'epoch': 34.87}


Saving model checkpoint to ./model_output/checkpoint-175
Configuration saved in ./model_output/checkpoint-175/config.json


{'eval_loss': 0.3506735563278198, 'eval_runtime': 1.7054, 'eval_samples_per_second': 5.864, 'eval_steps_per_second': 5.864, 'epoch': 34.87}


Model weights saved in ./model_output/checkpoint-175/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-165] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2033, 'learning_rate': 4.870707070707071e-06, 'epoch': 35.87}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.30751490592956543, 'eval_runtime': 1.7035, 'eval_samples_per_second': 5.87, 'eval_steps_per_second': 5.87, 'epoch': 35.87}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-170] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1456, 'learning_rate': 4.865656565656566e-06, 'epoch': 36.87}


Saving model checkpoint to ./model_output/checkpoint-185
Configuration saved in ./model_output/checkpoint-185/config.json


{'eval_loss': 0.3467809855937958, 'eval_runtime': 1.7088, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 36.87}


Model weights saved in ./model_output/checkpoint-185/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-175] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.097, 'learning_rate': 4.8606060606060615e-06, 'epoch': 37.87}


Saving model checkpoint to ./model_output/checkpoint-190
Configuration saved in ./model_output/checkpoint-190/config.json


{'eval_loss': 0.29775550961494446, 'eval_runtime': 1.7045, 'eval_samples_per_second': 5.867, 'eval_steps_per_second': 5.867, 'epoch': 37.87}


Model weights saved in ./model_output/checkpoint-190/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0683, 'learning_rate': 4.855555555555556e-06, 'epoch': 38.87}


Saving model checkpoint to ./model_output/checkpoint-195
Configuration saved in ./model_output/checkpoint-195/config.json


{'eval_loss': 0.36382028460502625, 'eval_runtime': 1.7112, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 38.87}


Model weights saved in ./model_output/checkpoint-195/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-185] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0417, 'learning_rate': 4.850505050505051e-06, 'epoch': 39.87}


Saving model checkpoint to ./model_output/checkpoint-200
Configuration saved in ./model_output/checkpoint-200/config.json


{'eval_loss': 0.27270740270614624, 'eval_runtime': 1.7061, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 39.87}


Model weights saved in ./model_output/checkpoint-200/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-190] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0278, 'learning_rate': 4.845454545454546e-06, 'epoch': 40.87}


Saving model checkpoint to ./model_output/checkpoint-205
Configuration saved in ./model_output/checkpoint-205/config.json


{'eval_loss': 0.40516334772109985, 'eval_runtime': 1.7085, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 40.87}


Model weights saved in ./model_output/checkpoint-205/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-195] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0168, 'learning_rate': 4.840404040404041e-06, 'epoch': 41.87}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.18032947182655334, 'eval_runtime': 1.7127, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 41.87}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0098, 'learning_rate': 4.8353535353535355e-06, 'epoch': 42.87}


Saving model checkpoint to ./model_output/checkpoint-215
Configuration saved in ./model_output/checkpoint-215/config.json


{'eval_loss': 0.1555151492357254, 'eval_runtime': 1.709, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 42.87}


Model weights saved in ./model_output/checkpoint-215/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-205] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0078, 'learning_rate': 4.830303030303031e-06, 'epoch': 43.87}
{'eval_loss': 0.542934775352478, 'eval_runtime': 1.7073, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 43.87}


Saving model checkpoint to ./model_output/checkpoint-220
Configuration saved in ./model_output/checkpoint-220/config.json
Model weights saved in ./model_output/checkpoint-220/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0062, 'learning_rate': 4.825252525252526e-06, 'epoch': 44.87}


Saving model checkpoint to ./model_output/checkpoint-225
Configuration saved in ./model_output/checkpoint-225/config.json


{'eval_loss': 0.0773853212594986, 'eval_runtime': 1.7066, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 44.87}


Model weights saved in ./model_output/checkpoint-225/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-215] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0054, 'learning_rate': 4.820202020202021e-06, 'epoch': 45.87}


Saving model checkpoint to ./model_output/checkpoint-230
Configuration saved in ./model_output/checkpoint-230/config.json


{'eval_loss': 0.7842360734939575, 'eval_runtime': 1.7085, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 45.87}


Model weights saved in ./model_output/checkpoint-230/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-220] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0044, 'learning_rate': 4.815151515151515e-06, 'epoch': 46.87}


Saving model checkpoint to ./model_output/checkpoint-235
Configuration saved in ./model_output/checkpoint-235/config.json


{'eval_loss': 0.2291712760925293, 'eval_runtime': 1.7075, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 46.87}


Model weights saved in ./model_output/checkpoint-235/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-230] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0033, 'learning_rate': 4.81010101010101e-06, 'epoch': 47.87}


Saving model checkpoint to ./model_output/checkpoint-240
Configuration saved in ./model_output/checkpoint-240/config.json


{'eval_loss': 0.15722595155239105, 'eval_runtime': 1.7167, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 47.87}


Model weights saved in ./model_output/checkpoint-240/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-235] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0025, 'learning_rate': 4.8050505050505055e-06, 'epoch': 48.87}


Saving model checkpoint to ./model_output/checkpoint-245
Configuration saved in ./model_output/checkpoint-245/config.json


{'eval_loss': 0.8570529818534851, 'eval_runtime': 1.707, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 48.87}


Model weights saved in ./model_output/checkpoint-245/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-240] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0023, 'learning_rate': 4.800000000000001e-06, 'epoch': 49.87}


Saving model checkpoint to ./model_output/checkpoint-250
Configuration saved in ./model_output/checkpoint-250/config.json


{'eval_loss': 0.8118997812271118, 'eval_runtime': 1.707, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 49.87}


Model weights saved in ./model_output/checkpoint-250/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-245] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0022, 'learning_rate': 4.794949494949495e-06, 'epoch': 50.87}


Saving model checkpoint to ./model_output/checkpoint-255
Configuration saved in ./model_output/checkpoint-255/config.json


{'eval_loss': 0.12030847370624542, 'eval_runtime': 1.7113, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 50.87}


Model weights saved in ./model_output/checkpoint-255/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-250] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0018, 'learning_rate': 4.78989898989899e-06, 'epoch': 51.87}


Saving model checkpoint to ./model_output/checkpoint-260
Configuration saved in ./model_output/checkpoint-260/config.json


{'eval_loss': 0.2628176808357239, 'eval_runtime': 1.7084, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 51.87}


Model weights saved in ./model_output/checkpoint-260/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-255] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0016, 'learning_rate': 4.784848484848485e-06, 'epoch': 52.87}


Saving model checkpoint to ./model_output/checkpoint-265
Configuration saved in ./model_output/checkpoint-265/config.json


{'eval_loss': 0.481389582157135, 'eval_runtime': 1.7123, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 52.87}


Model weights saved in ./model_output/checkpoint-265/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-260] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0015, 'learning_rate': 4.77979797979798e-06, 'epoch': 53.87}


Saving model checkpoint to ./model_output/checkpoint-270
Configuration saved in ./model_output/checkpoint-270/config.json


{'eval_loss': 0.7033248543739319, 'eval_runtime': 1.7098, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 53.87}


Model weights saved in ./model_output/checkpoint-270/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-265] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0015, 'learning_rate': 4.7747474747474755e-06, 'epoch': 54.87}


Saving model checkpoint to ./model_output/checkpoint-275
Configuration saved in ./model_output/checkpoint-275/config.json


{'eval_loss': 0.46342453360557556, 'eval_runtime': 1.7131, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 54.87}


Model weights saved in ./model_output/checkpoint-275/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-270] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-225 (score: 0.0773853212594986).
***** Running Prediction *****
  Num examples = 15
  Batch size = 1


{'train_runtime': 3317.2404, 'train_samples_per_second': 27.734, 'train_steps_per_second': 1.507, 'train_loss': 0.42094660832631314, 'epoch': 54.87}
Fitting model: roberta_base using fold 2 as out of fold test data.
Train data sizes: (93, 93).
Val data sizes: (10, 10).
Test data sizes: (14, 14).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--roberta-base/snapshots/bc2764f8af2e92b6eb5679868df33e224075ca68/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremm

Map:   0%|          | 0/93 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 93
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8196, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.86}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.715583324432373, 'eval_runtime': 1.7072, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 0.86}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-225] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8061, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.86}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.7143620848655701, 'eval_runtime': 1.7056, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 1.86}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-275] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8109, 'learning_rate': 1.5e-06, 'epoch': 2.86}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.7121192812919617, 'eval_runtime': 1.708, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 2.86}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8056, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.86}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.7074457406997681, 'eval_runtime': 1.7091, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 3.86}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8185, 'learning_rate': 2.5e-06, 'epoch': 4.86}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.7008460760116577, 'eval_runtime': 1.7125, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 4.86}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8221, 'learning_rate': 3e-06, 'epoch': 5.86}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6942236423492432, 'eval_runtime': 1.7071, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 5.86}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8197, 'learning_rate': 3.5e-06, 'epoch': 6.86}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6884362697601318, 'eval_runtime': 1.7112, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 6.86}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7861, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.86}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6801483631134033, 'eval_runtime': 1.7066, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 7.86}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7986, 'learning_rate': 4.5e-06, 'epoch': 8.86}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6679173707962036, 'eval_runtime': 1.7052, 'eval_samples_per_second': 5.864, 'eval_steps_per_second': 5.864, 'epoch': 8.86}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7893, 'learning_rate': 5e-06, 'epoch': 9.86}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.6539319753646851, 'eval_runtime': 1.7058, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 9.86}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7816, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.86}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.6369115114212036, 'eval_runtime': 1.7062, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 10.86}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7953, 'learning_rate': 4.98989898989899e-06, 'epoch': 11.86}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6246939301490784, 'eval_runtime': 1.7126, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 11.86}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7746, 'learning_rate': 4.9858585858585865e-06, 'epoch': 12.86}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.6105351448059082, 'eval_runtime': 1.7109, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 12.86}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7757, 'learning_rate': 4.980808080808081e-06, 'epoch': 13.86}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.6003686189651489, 'eval_runtime': 1.709, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 13.86}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7829, 'learning_rate': 4.975757575757576e-06, 'epoch': 14.86}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.5925291776657104, 'eval_runtime': 1.7053, 'eval_samples_per_second': 5.864, 'eval_steps_per_second': 5.864, 'epoch': 14.86}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7732, 'learning_rate': 4.970707070707071e-06, 'epoch': 15.86}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.5936004519462585, 'eval_runtime': 1.7156, 'eval_samples_per_second': 5.829, 'eval_steps_per_second': 5.829, 'epoch': 15.86}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7819, 'learning_rate': 4.965656565656566e-06, 'epoch': 16.86}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.5955921411514282, 'eval_runtime': 1.7098, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 16.86}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7717, 'learning_rate': 4.9606060606060605e-06, 'epoch': 17.86}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5894137024879456, 'eval_runtime': 1.7126, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 17.86}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7605, 'learning_rate': 4.9555555555555565e-06, 'epoch': 18.86}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.579001247882843, 'eval_runtime': 1.7124, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 18.86}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7796, 'learning_rate': 4.951515151515152e-06, 'epoch': 19.86}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json


{'eval_loss': 0.5745317935943604, 'eval_runtime': 1.7154, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 19.86}


Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7536, 'learning_rate': 4.946464646464647e-06, 'epoch': 20.86}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.5915088057518005, 'eval_runtime': 1.7115, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 20.86}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7154, 'learning_rate': 4.941414141414142e-06, 'epoch': 21.86}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json


{'eval_loss': 0.5638577938079834, 'eval_runtime': 1.7172, 'eval_samples_per_second': 5.823, 'eval_steps_per_second': 5.823, 'epoch': 21.86}


Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6871, 'learning_rate': 4.936363636363637e-06, 'epoch': 22.86}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.541456937789917, 'eval_runtime': 1.7088, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 22.86}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6613, 'learning_rate': 4.931313131313132e-06, 'epoch': 23.86}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.5479859113693237, 'eval_runtime': 1.7239, 'eval_samples_per_second': 5.801, 'eval_steps_per_second': 5.801, 'epoch': 23.86}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6132, 'learning_rate': 4.926262626262627e-06, 'epoch': 24.86}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.5164966583251953, 'eval_runtime': 1.7117, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 24.86}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6095, 'learning_rate': 4.9212121212121214e-06, 'epoch': 25.86}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.5045573115348816, 'eval_runtime': 1.7144, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 25.86}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5582, 'learning_rate': 4.9161616161616166e-06, 'epoch': 26.86}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.5271428823471069, 'eval_runtime': 1.7063, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 26.86}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5337, 'learning_rate': 4.911111111111112e-06, 'epoch': 27.86}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.4947234094142914, 'eval_runtime': 1.7082, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 27.86}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5302, 'learning_rate': 4.906060606060606e-06, 'epoch': 28.86}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.5064793825149536, 'eval_runtime': 1.7139, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 28.86}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4793, 'learning_rate': 4.901010101010101e-06, 'epoch': 29.86}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.4842459261417389, 'eval_runtime': 1.7123, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 29.86}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4338, 'learning_rate': 4.895959595959596e-06, 'epoch': 30.86}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.4819222092628479, 'eval_runtime': 1.709, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 30.86}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3664, 'learning_rate': 4.8909090909090914e-06, 'epoch': 31.86}


Saving model checkpoint to ./model_output/checkpoint-160
Configuration saved in ./model_output/checkpoint-160/config.json


{'eval_loss': 0.4998552203178406, 'eval_runtime': 1.7074, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 31.86}


Model weights saved in ./model_output/checkpoint-160/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3613, 'learning_rate': 4.886868686868687e-06, 'epoch': 32.86}


Saving model checkpoint to ./model_output/checkpoint-165
Configuration saved in ./model_output/checkpoint-165/config.json


{'eval_loss': 0.5961325168609619, 'eval_runtime': 1.7178, 'eval_samples_per_second': 5.821, 'eval_steps_per_second': 5.821, 'epoch': 32.86}


Model weights saved in ./model_output/checkpoint-165/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2869, 'learning_rate': 4.881818181818182e-06, 'epoch': 33.86}


Saving model checkpoint to ./model_output/checkpoint-170
Configuration saved in ./model_output/checkpoint-170/config.json


{'eval_loss': 0.5526305437088013, 'eval_runtime': 1.7095, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 33.86}


Model weights saved in ./model_output/checkpoint-170/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-165] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2427, 'learning_rate': 4.876767676767677e-06, 'epoch': 34.86}


Saving model checkpoint to ./model_output/checkpoint-175
Configuration saved in ./model_output/checkpoint-175/config.json


{'eval_loss': 0.6539033651351929, 'eval_runtime': 1.7066, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 34.86}


Model weights saved in ./model_output/checkpoint-175/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-170] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1825, 'learning_rate': 4.871717171717172e-06, 'epoch': 35.86}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.7393666505813599, 'eval_runtime': 1.7081, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 35.86}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-175] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1166, 'learning_rate': 4.866666666666667e-06, 'epoch': 36.86}


Saving model checkpoint to ./model_output/checkpoint-185
Configuration saved in ./model_output/checkpoint-185/config.json


{'eval_loss': 0.8549965620040894, 'eval_runtime': 1.7147, 'eval_samples_per_second': 5.832, 'eval_steps_per_second': 5.832, 'epoch': 36.86}


Model weights saved in ./model_output/checkpoint-185/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0815, 'learning_rate': 4.861616161616162e-06, 'epoch': 37.86}


Saving model checkpoint to ./model_output/checkpoint-190
Configuration saved in ./model_output/checkpoint-190/config.json


{'eval_loss': 0.9398485422134399, 'eval_runtime': 1.7059, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 37.86}


Model weights saved in ./model_output/checkpoint-190/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-185] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0596, 'learning_rate': 4.856565656565657e-06, 'epoch': 38.86}


Saving model checkpoint to ./model_output/checkpoint-195
Configuration saved in ./model_output/checkpoint-195/config.json


{'eval_loss': 1.0473873615264893, 'eval_runtime': 1.7105, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 38.86}


Model weights saved in ./model_output/checkpoint-195/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-190] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0463, 'learning_rate': 4.851515151515152e-06, 'epoch': 39.86}


Saving model checkpoint to ./model_output/checkpoint-200
Configuration saved in ./model_output/checkpoint-200/config.json


{'eval_loss': 1.1114550828933716, 'eval_runtime': 1.7099, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 39.86}


Model weights saved in ./model_output/checkpoint-200/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-195] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0387, 'learning_rate': 4.846464646464647e-06, 'epoch': 40.86}


Saving model checkpoint to ./model_output/checkpoint-205
Configuration saved in ./model_output/checkpoint-205/config.json


{'eval_loss': 1.315035104751587, 'eval_runtime': 1.7144, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 40.86}


Model weights saved in ./model_output/checkpoint-205/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-200] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-155 (score: 0.4819222092628479).
***** Running Prediction *****
  Num examples = 14
  Batch size = 1


{'train_runtime': 2495.8044, 'train_samples_per_second': 37.263, 'train_steps_per_second': 2.003, 'train_loss': 0.590519639631597, 'epoch': 40.86}
Fitting model: roberta_base using fold 3 as out of fold test data.
Train data sizes: (97, 97).
Val data sizes: (10, 10).
Test data sizes: (10, 10).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--roberta-base/snapshots/bc2764f8af2e92b6eb5679868df33e224075ca68/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremm

Map:   0%|          | 0/97 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 97
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.703, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.99}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.7130951285362244, 'eval_runtime': 1.7067, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 0.99}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7044, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.99}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.7112053036689758, 'eval_runtime': 1.7119, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 1.99}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-205] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6943, 'learning_rate': 1.8000000000000001e-06, 'epoch': 2.99}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.7083170413970947, 'eval_runtime': 1.709, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 2.99}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6958, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.99}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.7032161951065063, 'eval_runtime': 1.7058, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 3.99}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7029, 'learning_rate': 3e-06, 'epoch': 4.99}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6947170495986938, 'eval_runtime': 1.7078, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 4.99}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7057, 'learning_rate': 3.6000000000000003e-06, 'epoch': 5.99}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.6868613958358765, 'eval_runtime': 1.7081, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 5.99}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6953, 'learning_rate': 4.2000000000000004e-06, 'epoch': 6.99}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6802624464035034, 'eval_runtime': 1.7098, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 6.99}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6911, 'learning_rate': 4.800000000000001e-06, 'epoch': 7.99}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.6714159250259399, 'eval_runtime': 1.713, 'eval_samples_per_second': 5.838, 'eval_steps_per_second': 5.838, 'epoch': 7.99}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6936, 'learning_rate': 4.996638655462185e-06, 'epoch': 8.99}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.6593478322029114, 'eval_runtime': 1.708, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 8.99}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6869, 'learning_rate': 4.9915966386554625e-06, 'epoch': 9.99}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6469243168830872, 'eval_runtime': 1.7075, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 9.99}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6845, 'learning_rate': 4.98655462184874e-06, 'epoch': 10.99}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.6359285116195679, 'eval_runtime': 1.7056, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 10.99}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6872, 'learning_rate': 4.9815126050420174e-06, 'epoch': 11.99}


Saving model checkpoint to ./model_output/checkpoint-72
Configuration saved in ./model_output/checkpoint-72/config.json


{'eval_loss': 0.6252457499504089, 'eval_runtime': 1.7118, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 11.99}


Model weights saved in ./model_output/checkpoint-72/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6793, 'learning_rate': 4.976470588235294e-06, 'epoch': 12.99}


Saving model checkpoint to ./model_output/checkpoint-78
Configuration saved in ./model_output/checkpoint-78/config.json


{'eval_loss': 0.6123930215835571, 'eval_runtime': 1.7066, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 12.99}


Model weights saved in ./model_output/checkpoint-78/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6724, 'learning_rate': 4.971428571428572e-06, 'epoch': 13.99}


Saving model checkpoint to ./model_output/checkpoint-84
Configuration saved in ./model_output/checkpoint-84/config.json


{'eval_loss': 0.5985330939292908, 'eval_runtime': 1.7108, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 13.99}


Model weights saved in ./model_output/checkpoint-84/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-72] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6599, 'learning_rate': 4.967226890756303e-06, 'epoch': 14.99}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5862657427787781, 'eval_runtime': 1.7094, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 14.99}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-78] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6467, 'learning_rate': 4.96218487394958e-06, 'epoch': 15.99}


Saving model checkpoint to ./model_output/checkpoint-96
Configuration saved in ./model_output/checkpoint-96/config.json


{'eval_loss': 0.5739560723304749, 'eval_runtime': 1.711, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 15.99}


Model weights saved in ./model_output/checkpoint-96/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-84] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.618, 'learning_rate': 4.9571428571428575e-06, 'epoch': 16.99}


Saving model checkpoint to ./model_output/checkpoint-102
Configuration saved in ./model_output/checkpoint-102/config.json


{'eval_loss': 0.5581048130989075, 'eval_runtime': 1.7087, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 16.99}


Model weights saved in ./model_output/checkpoint-102/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6168, 'learning_rate': 4.952100840336135e-06, 'epoch': 17.99}


Saving model checkpoint to ./model_output/checkpoint-108
Configuration saved in ./model_output/checkpoint-108/config.json


{'eval_loss': 0.5437602996826172, 'eval_runtime': 1.7046, 'eval_samples_per_second': 5.867, 'eval_steps_per_second': 5.867, 'epoch': 17.99}


Model weights saved in ./model_output/checkpoint-108/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-96] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5951, 'learning_rate': 4.947058823529412e-06, 'epoch': 18.99}


Saving model checkpoint to ./model_output/checkpoint-114
Configuration saved in ./model_output/checkpoint-114/config.json


{'eval_loss': 0.5836354494094849, 'eval_runtime': 1.7148, 'eval_samples_per_second': 5.832, 'eval_steps_per_second': 5.832, 'epoch': 18.99}


Model weights saved in ./model_output/checkpoint-114/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-102] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5622, 'learning_rate': 4.942016806722689e-06, 'epoch': 19.99}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.5106474161148071, 'eval_runtime': 1.7083, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 19.99}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-108] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5141, 'learning_rate': 4.936974789915967e-06, 'epoch': 20.99}


Saving model checkpoint to ./model_output/checkpoint-126
Configuration saved in ./model_output/checkpoint-126/config.json


{'eval_loss': 0.5096293687820435, 'eval_runtime': 1.7112, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 20.99}


Model weights saved in ./model_output/checkpoint-126/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-114] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4859, 'learning_rate': 4.931932773109244e-06, 'epoch': 21.99}


Saving model checkpoint to ./model_output/checkpoint-132
Configuration saved in ./model_output/checkpoint-132/config.json


{'eval_loss': 0.47425031661987305, 'eval_runtime': 1.7064, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 21.99}


Model weights saved in ./model_output/checkpoint-132/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4335, 'learning_rate': 4.926890756302521e-06, 'epoch': 22.99}


Saving model checkpoint to ./model_output/checkpoint-138
Configuration saved in ./model_output/checkpoint-138/config.json


{'eval_loss': 0.5616108775138855, 'eval_runtime': 1.7101, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 22.99}


Model weights saved in ./model_output/checkpoint-138/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-126] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3424, 'learning_rate': 4.921848739495799e-06, 'epoch': 23.99}


Saving model checkpoint to ./model_output/checkpoint-144
Configuration saved in ./model_output/checkpoint-144/config.json


{'eval_loss': 0.4499285817146301, 'eval_runtime': 1.7106, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 23.99}


Model weights saved in ./model_output/checkpoint-144/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-132] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.313, 'learning_rate': 4.91764705882353e-06, 'epoch': 24.99}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.5546931028366089, 'eval_runtime': 1.7073, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 24.99}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-138] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2453, 'learning_rate': 4.913445378151261e-06, 'epoch': 25.99}


Saving model checkpoint to ./model_output/checkpoint-156
Configuration saved in ./model_output/checkpoint-156/config.json


{'eval_loss': 0.4756585955619812, 'eval_runtime': 1.7045, 'eval_samples_per_second': 5.867, 'eval_steps_per_second': 5.867, 'epoch': 25.99}


Model weights saved in ./model_output/checkpoint-156/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2643, 'learning_rate': 4.908403361344538e-06, 'epoch': 26.99}


Saving model checkpoint to ./model_output/checkpoint-162
Configuration saved in ./model_output/checkpoint-162/config.json


{'eval_loss': 0.5704103112220764, 'eval_runtime': 1.7267, 'eval_samples_per_second': 5.791, 'eval_steps_per_second': 5.791, 'epoch': 26.99}


Model weights saved in ./model_output/checkpoint-162/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-156] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.133, 'learning_rate': 4.903361344537815e-06, 'epoch': 27.99}


Saving model checkpoint to ./model_output/checkpoint-168
Configuration saved in ./model_output/checkpoint-168/config.json


{'eval_loss': 0.6406546235084534, 'eval_runtime': 1.7103, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 27.99}


Model weights saved in ./model_output/checkpoint-168/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-162] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1156, 'learning_rate': 4.899159663865546e-06, 'epoch': 28.99}


Saving model checkpoint to ./model_output/checkpoint-174
Configuration saved in ./model_output/checkpoint-174/config.json


{'eval_loss': 0.6120837926864624, 'eval_runtime': 1.7102, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 28.99}


Model weights saved in ./model_output/checkpoint-174/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-168] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0614, 'learning_rate': 4.894117647058824e-06, 'epoch': 29.99}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 1.0863978862762451, 'eval_runtime': 1.712, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 29.99}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-174] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0408, 'learning_rate': 4.889075630252102e-06, 'epoch': 30.99}


Saving model checkpoint to ./model_output/checkpoint-186
Configuration saved in ./model_output/checkpoint-186/config.json


{'eval_loss': 0.81060391664505, 'eval_runtime': 1.7056, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 30.99}


Model weights saved in ./model_output/checkpoint-186/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0285, 'learning_rate': 4.884033613445378e-06, 'epoch': 31.99}


Saving model checkpoint to ./model_output/checkpoint-192
Configuration saved in ./model_output/checkpoint-192/config.json


{'eval_loss': 1.391750454902649, 'eval_runtime': 1.7124, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 31.99}


Model weights saved in ./model_output/checkpoint-192/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-186] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0214, 'learning_rate': 4.878991596638656e-06, 'epoch': 32.99}


Saving model checkpoint to ./model_output/checkpoint-198
Configuration saved in ./model_output/checkpoint-198/config.json


{'eval_loss': 1.149079442024231, 'eval_runtime': 1.7078, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 32.99}


Model weights saved in ./model_output/checkpoint-198/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-192] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0104, 'learning_rate': 4.873949579831933e-06, 'epoch': 33.99}


Saving model checkpoint to ./model_output/checkpoint-204
Configuration saved in ./model_output/checkpoint-204/config.json


{'eval_loss': 1.545870065689087, 'eval_runtime': 1.7106, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 33.99}


Model weights saved in ./model_output/checkpoint-204/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-198] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-144 (score: 0.4499285817146301).
***** Running Prediction *****
  Num examples = 10
  Batch size = 1


{'train_runtime': 2163.8152, 'train_samples_per_second': 44.828, 'train_steps_per_second': 2.773, 'train_loss': 0.4824897688904814, 'epoch': 33.99}
Fitting model: roberta_base using fold 4 as out of fold test data.
Train data sizes: (94, 94).
Val data sizes: (10, 10).
Test data sizes: (13, 13).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--roberta-base/snapshots/bc2764f8af2e92b6eb5679868df33e224075ca68/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremm

Map:   0%|          | 0/94 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 94
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8151, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.85}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.6826565861701965, 'eval_runtime': 1.7094, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 0.85}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-144] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8123, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.85}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.6818407773971558, 'eval_runtime': 1.7127, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 1.85}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-204] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8028, 'learning_rate': 1.4000000000000001e-06, 'epoch': 2.85}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.6804622411727905, 'eval_runtime': 1.7145, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 2.85}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8164, 'learning_rate': 1.9000000000000002e-06, 'epoch': 3.85}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.6764990091323853, 'eval_runtime': 1.71, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 3.85}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7988, 'learning_rate': 2.4000000000000003e-06, 'epoch': 4.85}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6707668304443359, 'eval_runtime': 1.7054, 'eval_samples_per_second': 5.864, 'eval_steps_per_second': 5.864, 'epoch': 4.85}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8043, 'learning_rate': 2.9e-06, 'epoch': 5.85}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6627445816993713, 'eval_runtime': 1.7076, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 5.85}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8016, 'learning_rate': 3.4000000000000005e-06, 'epoch': 6.85}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6559419631958008, 'eval_runtime': 1.7129, 'eval_samples_per_second': 5.838, 'eval_steps_per_second': 5.838, 'epoch': 6.85}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8039, 'learning_rate': 3.900000000000001e-06, 'epoch': 7.85}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6473942995071411, 'eval_runtime': 1.7131, 'eval_samples_per_second': 5.838, 'eval_steps_per_second': 5.838, 'epoch': 7.85}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7886, 'learning_rate': 4.4e-06, 'epoch': 8.85}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6380153894424438, 'eval_runtime': 1.7125, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 8.85}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8167, 'learning_rate': 4.9000000000000005e-06, 'epoch': 9.85}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.6318706274032593, 'eval_runtime': 1.7096, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 9.85}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7742, 'learning_rate': 4.995959595959596e-06, 'epoch': 10.85}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.6265465617179871, 'eval_runtime': 1.7092, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 10.85}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8058, 'learning_rate': 4.990909090909091e-06, 'epoch': 11.85}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6217307448387146, 'eval_runtime': 1.709, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 11.85}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7899, 'learning_rate': 4.9858585858585865e-06, 'epoch': 12.85}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.6210581660270691, 'eval_runtime': 1.7132, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 12.85}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7907, 'learning_rate': 4.980808080808081e-06, 'epoch': 13.85}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.6170462369918823, 'eval_runtime': 1.7079, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 13.85}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7783, 'learning_rate': 4.975757575757576e-06, 'epoch': 14.85}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.6155937910079956, 'eval_runtime': 1.7097, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 14.85}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7947, 'learning_rate': 4.970707070707071e-06, 'epoch': 15.85}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.6150460243225098, 'eval_runtime': 1.7103, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 15.85}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7938, 'learning_rate': 4.965656565656566e-06, 'epoch': 16.85}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.6126841306686401, 'eval_runtime': 1.7113, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 16.85}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7746, 'learning_rate': 4.9606060606060605e-06, 'epoch': 17.85}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.6115588545799255, 'eval_runtime': 1.7131, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 17.85}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7595, 'learning_rate': 4.9555555555555565e-06, 'epoch': 18.85}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.5993784070014954, 'eval_runtime': 1.7087, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 18.85}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7401, 'learning_rate': 4.950505050505051e-06, 'epoch': 19.85}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json


{'eval_loss': 0.5679976940155029, 'eval_runtime': 1.7163, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 19.85}


Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7235, 'learning_rate': 4.945454545454546e-06, 'epoch': 20.85}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.5493675470352173, 'eval_runtime': 1.708, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 20.85}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7147, 'learning_rate': 4.940404040404041e-06, 'epoch': 21.85}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json


{'eval_loss': 0.5499065518379211, 'eval_runtime': 1.7106, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 21.85}


Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6794, 'learning_rate': 4.935353535353536e-06, 'epoch': 22.85}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.5327490568161011, 'eval_runtime': 1.7115, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 22.85}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.653, 'learning_rate': 4.9303030303030305e-06, 'epoch': 23.85}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.5086909532546997, 'eval_runtime': 1.7102, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 23.85}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6222, 'learning_rate': 4.925252525252526e-06, 'epoch': 24.85}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.4951629638671875, 'eval_runtime': 1.7093, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 24.85}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6109, 'learning_rate': 4.920202020202021e-06, 'epoch': 25.85}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.48983797430992126, 'eval_runtime': 1.7091, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 25.85}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5731, 'learning_rate': 4.915151515151516e-06, 'epoch': 26.85}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.4659351706504822, 'eval_runtime': 1.7154, 'eval_samples_per_second': 5.829, 'eval_steps_per_second': 5.829, 'epoch': 26.85}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5406, 'learning_rate': 4.91010101010101e-06, 'epoch': 27.85}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.4622679650783539, 'eval_runtime': 1.7126, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 27.85}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4846, 'learning_rate': 4.905050505050505e-06, 'epoch': 28.85}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.44915133714675903, 'eval_runtime': 1.7133, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 28.85}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4367, 'learning_rate': 4.9000000000000005e-06, 'epoch': 29.85}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.4470670819282532, 'eval_runtime': 1.7158, 'eval_samples_per_second': 5.828, 'eval_steps_per_second': 5.828, 'epoch': 29.85}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4024, 'learning_rate': 4.894949494949495e-06, 'epoch': 30.85}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.4225495457649231, 'eval_runtime': 1.7072, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 30.85}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3223, 'learning_rate': 4.88989898989899e-06, 'epoch': 31.85}


Saving model checkpoint to ./model_output/checkpoint-160
Configuration saved in ./model_output/checkpoint-160/config.json


{'eval_loss': 0.3829755187034607, 'eval_runtime': 1.7107, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 31.85}


Model weights saved in ./model_output/checkpoint-160/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2781, 'learning_rate': 4.884848484848485e-06, 'epoch': 32.85}


Saving model checkpoint to ./model_output/checkpoint-165
Configuration saved in ./model_output/checkpoint-165/config.json


{'eval_loss': 0.36947134137153625, 'eval_runtime': 1.7113, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 32.85}


Model weights saved in ./model_output/checkpoint-165/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2153, 'learning_rate': 4.87979797979798e-06, 'epoch': 33.85}


Saving model checkpoint to ./model_output/checkpoint-170
Configuration saved in ./model_output/checkpoint-170/config.json


{'eval_loss': 0.36031609773635864, 'eval_runtime': 1.7072, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 33.85}


Model weights saved in ./model_output/checkpoint-170/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1505, 'learning_rate': 4.875757575757576e-06, 'epoch': 34.85}


Saving model checkpoint to ./model_output/checkpoint-175
Configuration saved in ./model_output/checkpoint-175/config.json


{'eval_loss': 0.4639316499233246, 'eval_runtime': 1.708, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 34.85}


Model weights saved in ./model_output/checkpoint-175/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-165] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1247, 'learning_rate': 4.870707070707071e-06, 'epoch': 35.85}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.3396797478199005, 'eval_runtime': 1.7126, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 35.85}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-170] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1479, 'learning_rate': 4.865656565656566e-06, 'epoch': 36.85}


Saving model checkpoint to ./model_output/checkpoint-185
Configuration saved in ./model_output/checkpoint-185/config.json


{'eval_loss': 0.34129759669303894, 'eval_runtime': 1.7141, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 36.85}


Model weights saved in ./model_output/checkpoint-185/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-175] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0899, 'learning_rate': 4.861616161616162e-06, 'epoch': 37.85}


Saving model checkpoint to ./model_output/checkpoint-190
Configuration saved in ./model_output/checkpoint-190/config.json


{'eval_loss': 0.36826688051223755, 'eval_runtime': 1.7117, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 37.85}


Model weights saved in ./model_output/checkpoint-190/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-185] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0587, 'learning_rate': 4.856565656565657e-06, 'epoch': 38.85}


Saving model checkpoint to ./model_output/checkpoint-195
Configuration saved in ./model_output/checkpoint-195/config.json


{'eval_loss': 0.3533196449279785, 'eval_runtime': 1.7121, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 38.85}


Model weights saved in ./model_output/checkpoint-195/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-190] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0453, 'learning_rate': 4.851515151515152e-06, 'epoch': 39.85}


Saving model checkpoint to ./model_output/checkpoint-200
Configuration saved in ./model_output/checkpoint-200/config.json


{'eval_loss': 0.39888739585876465, 'eval_runtime': 1.7105, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 39.85}


Model weights saved in ./model_output/checkpoint-200/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-195] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0244, 'learning_rate': 4.846464646464647e-06, 'epoch': 40.85}


Saving model checkpoint to ./model_output/checkpoint-205
Configuration saved in ./model_output/checkpoint-205/config.json


{'eval_loss': 0.4763520359992981, 'eval_runtime': 1.7165, 'eval_samples_per_second': 5.826, 'eval_steps_per_second': 5.826, 'epoch': 40.85}


Model weights saved in ./model_output/checkpoint-205/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0155, 'learning_rate': 4.841414141414142e-06, 'epoch': 41.85}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.5548121929168701, 'eval_runtime': 1.7122, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 41.85}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-205] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0122, 'learning_rate': 4.836363636363637e-06, 'epoch': 42.85}


Saving model checkpoint to ./model_output/checkpoint-215
Configuration saved in ./model_output/checkpoint-215/config.json


{'eval_loss': 0.552181601524353, 'eval_runtime': 1.7149, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 42.85}


Model weights saved in ./model_output/checkpoint-215/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0086, 'learning_rate': 4.831313131313132e-06, 'epoch': 43.85}


Saving model checkpoint to ./model_output/checkpoint-220
Configuration saved in ./model_output/checkpoint-220/config.json


{'eval_loss': 0.4602546691894531, 'eval_runtime': 1.7119, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 43.85}


Model weights saved in ./model_output/checkpoint-220/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-215] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0061, 'learning_rate': 4.826262626262626e-06, 'epoch': 44.85}


Saving model checkpoint to ./model_output/checkpoint-225
Configuration saved in ./model_output/checkpoint-225/config.json


{'eval_loss': 0.6531530618667603, 'eval_runtime': 1.7098, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 44.85}


Model weights saved in ./model_output/checkpoint-225/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-220] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0046, 'learning_rate': 4.8212121212121215e-06, 'epoch': 45.85}


Saving model checkpoint to ./model_output/checkpoint-230
Configuration saved in ./model_output/checkpoint-230/config.json


{'eval_loss': 0.5664567351341248, 'eval_runtime': 1.7186, 'eval_samples_per_second': 5.819, 'eval_steps_per_second': 5.819, 'epoch': 45.85}


Model weights saved in ./model_output/checkpoint-230/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-225] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-180 (score: 0.3396797478199005).
***** Running Prediction *****
  Num examples = 13
  Batch size = 1


{'train_runtime': 2833.6329, 'train_samples_per_second': 33.173, 'train_steps_per_second': 1.765, 'train_loss': 0.5175497006058045, 'epoch': 45.85}
Fitting model: roberta_base using fold 5 as out of fold test data.
Train data sizes: (98, 98).
Val data sizes: (10, 10).
Test data sizes: (9, 9).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--roberta-base/snapshots/bc2764f8af2e92b6eb5679868df33e224075ca68/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremm

Map:   0%|          | 0/98 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 98
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7552, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.98}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.7961438894271851, 'eval_runtime': 1.7063, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 0.98}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7506, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.98}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.7922282218933105, 'eval_runtime': 1.708, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 1.98}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-230] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7264, 'learning_rate': 1.8000000000000001e-06, 'epoch': 2.98}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.7832130789756775, 'eval_runtime': 1.7204, 'eval_samples_per_second': 5.813, 'eval_steps_per_second': 5.813, 'epoch': 2.98}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7524, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.98}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.7710002660751343, 'eval_runtime': 1.7093, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 3.98}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7368, 'learning_rate': 3e-06, 'epoch': 4.98}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.7564960718154907, 'eval_runtime': 1.7107, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 4.98}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7348, 'learning_rate': 3.6000000000000003e-06, 'epoch': 5.98}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.7402726411819458, 'eval_runtime': 1.7125, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 5.98}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7192, 'learning_rate': 4.2000000000000004e-06, 'epoch': 6.98}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.7212113738059998, 'eval_runtime': 1.7083, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 6.98}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6989, 'learning_rate': 4.800000000000001e-06, 'epoch': 7.98}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.7046206593513489, 'eval_runtime': 1.7093, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 7.98}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7141, 'learning_rate': 4.996638655462185e-06, 'epoch': 8.98}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.6838214993476868, 'eval_runtime': 1.7241, 'eval_samples_per_second': 5.8, 'eval_steps_per_second': 5.8, 'epoch': 8.98}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7109, 'learning_rate': 4.9915966386554625e-06, 'epoch': 9.98}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6656161546707153, 'eval_runtime': 1.7084, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 9.98}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6879, 'learning_rate': 4.98655462184874e-06, 'epoch': 10.98}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.6444641351699829, 'eval_runtime': 1.7075, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 10.98}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7077, 'learning_rate': 4.9815126050420174e-06, 'epoch': 11.98}


Saving model checkpoint to ./model_output/checkpoint-72
Configuration saved in ./model_output/checkpoint-72/config.json


{'eval_loss': 0.621229887008667, 'eval_runtime': 1.7109, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 11.98}


Model weights saved in ./model_output/checkpoint-72/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6893, 'learning_rate': 4.976470588235294e-06, 'epoch': 12.98}


Saving model checkpoint to ./model_output/checkpoint-78
Configuration saved in ./model_output/checkpoint-78/config.json


{'eval_loss': 0.6072287559509277, 'eval_runtime': 1.7123, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 12.98}


Model weights saved in ./model_output/checkpoint-78/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6715, 'learning_rate': 4.971428571428572e-06, 'epoch': 13.98}


Saving model checkpoint to ./model_output/checkpoint-84
Configuration saved in ./model_output/checkpoint-84/config.json


{'eval_loss': 0.6052583456039429, 'eval_runtime': 1.708, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 13.98}


Model weights saved in ./model_output/checkpoint-84/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-72] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6748, 'learning_rate': 4.966386554621849e-06, 'epoch': 14.98}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5962575674057007, 'eval_runtime': 1.7095, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 14.98}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-78] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6534, 'learning_rate': 4.961344537815126e-06, 'epoch': 15.98}


Saving model checkpoint to ./model_output/checkpoint-96
Configuration saved in ./model_output/checkpoint-96/config.json


{'eval_loss': 0.5869784951210022, 'eval_runtime': 1.7113, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 15.98}


Model weights saved in ./model_output/checkpoint-96/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-84] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6764, 'learning_rate': 4.956302521008404e-06, 'epoch': 16.98}


Saving model checkpoint to ./model_output/checkpoint-102
Configuration saved in ./model_output/checkpoint-102/config.json


{'eval_loss': 0.5975548028945923, 'eval_runtime': 1.7182, 'eval_samples_per_second': 5.82, 'eval_steps_per_second': 5.82, 'epoch': 16.98}


Model weights saved in ./model_output/checkpoint-102/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6511, 'learning_rate': 4.951260504201681e-06, 'epoch': 17.98}


Saving model checkpoint to ./model_output/checkpoint-108
Configuration saved in ./model_output/checkpoint-108/config.json


{'eval_loss': 0.5858069062232971, 'eval_runtime': 1.7121, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 17.98}


Model weights saved in ./model_output/checkpoint-108/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-96] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6291, 'learning_rate': 4.946218487394958e-06, 'epoch': 18.98}


Saving model checkpoint to ./model_output/checkpoint-114
Configuration saved in ./model_output/checkpoint-114/config.json


{'eval_loss': 0.5345587730407715, 'eval_runtime': 1.7176, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 18.98}


Model weights saved in ./model_output/checkpoint-114/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-102] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5932, 'learning_rate': 4.941176470588236e-06, 'epoch': 19.98}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.5058766603469849, 'eval_runtime': 1.7102, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 19.98}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-108] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5987, 'learning_rate': 4.936134453781513e-06, 'epoch': 20.98}


Saving model checkpoint to ./model_output/checkpoint-126
Configuration saved in ./model_output/checkpoint-126/config.json


{'eval_loss': 0.4758751392364502, 'eval_runtime': 1.7107, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 20.98}


Model weights saved in ./model_output/checkpoint-126/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-114] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5545, 'learning_rate': 4.93109243697479e-06, 'epoch': 21.98}


Saving model checkpoint to ./model_output/checkpoint-132
Configuration saved in ./model_output/checkpoint-132/config.json


{'eval_loss': 0.44654083251953125, 'eval_runtime': 1.7132, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 21.98}


Model weights saved in ./model_output/checkpoint-132/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5133, 'learning_rate': 4.926050420168068e-06, 'epoch': 22.98}


Saving model checkpoint to ./model_output/checkpoint-138
Configuration saved in ./model_output/checkpoint-138/config.json


{'eval_loss': 0.417229562997818, 'eval_runtime': 1.7122, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 22.98}


Model weights saved in ./model_output/checkpoint-138/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-126] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4858, 'learning_rate': 4.921008403361345e-06, 'epoch': 23.98}


Saving model checkpoint to ./model_output/checkpoint-144
Configuration saved in ./model_output/checkpoint-144/config.json


{'eval_loss': 0.4115889072418213, 'eval_runtime': 1.7125, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 23.98}


Model weights saved in ./model_output/checkpoint-144/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-132] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4752, 'learning_rate': 4.916806722689076e-06, 'epoch': 24.98}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.40211740136146545, 'eval_runtime': 1.7077, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 24.98}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-138] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4156, 'learning_rate': 4.911764705882353e-06, 'epoch': 25.98}


Saving model checkpoint to ./model_output/checkpoint-156
Configuration saved in ./model_output/checkpoint-156/config.json


{'eval_loss': 0.4664889872074127, 'eval_runtime': 1.7155, 'eval_samples_per_second': 5.829, 'eval_steps_per_second': 5.829, 'epoch': 25.98}


Model weights saved in ./model_output/checkpoint-156/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-144] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4277, 'learning_rate': 4.906722689075631e-06, 'epoch': 26.98}


Saving model checkpoint to ./model_output/checkpoint-162
Configuration saved in ./model_output/checkpoint-162/config.json


{'eval_loss': 0.36264145374298096, 'eval_runtime': 1.7132, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 26.98}


Model weights saved in ./model_output/checkpoint-162/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3621, 'learning_rate': 4.901680672268908e-06, 'epoch': 27.98}


Saving model checkpoint to ./model_output/checkpoint-168
Configuration saved in ./model_output/checkpoint-168/config.json


{'eval_loss': 0.32630568742752075, 'eval_runtime': 1.717, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 27.98}


Model weights saved in ./model_output/checkpoint-168/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-156] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.322, 'learning_rate': 4.896638655462185e-06, 'epoch': 28.98}


Saving model checkpoint to ./model_output/checkpoint-174
Configuration saved in ./model_output/checkpoint-174/config.json


{'eval_loss': 0.28947174549102783, 'eval_runtime': 1.7137, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 28.98}


Model weights saved in ./model_output/checkpoint-174/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-162] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2685, 'learning_rate': 4.892436974789916e-06, 'epoch': 29.98}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.2699694037437439, 'eval_runtime': 1.7178, 'eval_samples_per_second': 5.821, 'eval_steps_per_second': 5.821, 'epoch': 29.98}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-168] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2061, 'learning_rate': 4.887394957983194e-06, 'epoch': 30.98}


Saving model checkpoint to ./model_output/checkpoint-186
Configuration saved in ./model_output/checkpoint-186/config.json


{'eval_loss': 0.26832348108291626, 'eval_runtime': 1.7203, 'eval_samples_per_second': 5.813, 'eval_steps_per_second': 5.813, 'epoch': 30.98}


Model weights saved in ./model_output/checkpoint-186/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-174] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1915, 'learning_rate': 4.883193277310925e-06, 'epoch': 31.98}


Saving model checkpoint to ./model_output/checkpoint-192
Configuration saved in ./model_output/checkpoint-192/config.json


{'eval_loss': 0.22625501453876495, 'eval_runtime': 1.7123, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 31.98}


Model weights saved in ./model_output/checkpoint-192/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2129, 'learning_rate': 4.878151260504202e-06, 'epoch': 32.98}


Saving model checkpoint to ./model_output/checkpoint-198
Configuration saved in ./model_output/checkpoint-198/config.json


{'eval_loss': 0.1888808310031891, 'eval_runtime': 1.7162, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 32.98}


Model weights saved in ./model_output/checkpoint-198/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-186] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1244, 'learning_rate': 4.873109243697479e-06, 'epoch': 33.98}


Saving model checkpoint to ./model_output/checkpoint-204
Configuration saved in ./model_output/checkpoint-204/config.json


{'eval_loss': 0.19809553027153015, 'eval_runtime': 1.7133, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 33.98}


Model weights saved in ./model_output/checkpoint-204/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-192] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0745, 'learning_rate': 4.868067226890757e-06, 'epoch': 34.98}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.21146929264068604, 'eval_runtime': 1.7187, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 34.98}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-204] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0558, 'learning_rate': 4.863025210084034e-06, 'epoch': 35.98}


Saving model checkpoint to ./model_output/checkpoint-216
Configuration saved in ./model_output/checkpoint-216/config.json


{'eval_loss': 0.22134152054786682, 'eval_runtime': 1.712, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 35.98}


Model weights saved in ./model_output/checkpoint-216/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0423, 'learning_rate': 4.858823529411766e-06, 'epoch': 36.98}


Saving model checkpoint to ./model_output/checkpoint-222
Configuration saved in ./model_output/checkpoint-222/config.json


{'eval_loss': 0.23914571106433868, 'eval_runtime': 1.7055, 'eval_samples_per_second': 5.864, 'eval_steps_per_second': 5.864, 'epoch': 36.98}


Model weights saved in ./model_output/checkpoint-222/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-216] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0244, 'learning_rate': 4.853781512605042e-06, 'epoch': 37.98}


Saving model checkpoint to ./model_output/checkpoint-228
Configuration saved in ./model_output/checkpoint-228/config.json


{'eval_loss': 0.2873031795024872, 'eval_runtime': 1.7114, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 37.98}


Model weights saved in ./model_output/checkpoint-228/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-222] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0171, 'learning_rate': 4.84873949579832e-06, 'epoch': 38.98}


Saving model checkpoint to ./model_output/checkpoint-234
Configuration saved in ./model_output/checkpoint-234/config.json


{'eval_loss': 0.32221633195877075, 'eval_runtime': 1.7121, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 38.98}


Model weights saved in ./model_output/checkpoint-234/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-228] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0092, 'learning_rate': 4.843697478991597e-06, 'epoch': 39.98}


Saving model checkpoint to ./model_output/checkpoint-240
Configuration saved in ./model_output/checkpoint-240/config.json


{'eval_loss': 0.6390388011932373, 'eval_runtime': 1.7079, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 39.98}


Model weights saved in ./model_output/checkpoint-240/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-234] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0062, 'learning_rate': 4.838655462184874e-06, 'epoch': 40.98}


Saving model checkpoint to ./model_output/checkpoint-246
Configuration saved in ./model_output/checkpoint-246/config.json


{'eval_loss': 0.25597819685935974, 'eval_runtime': 1.7091, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 40.98}


Model weights saved in ./model_output/checkpoint-246/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-240] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0043, 'learning_rate': 4.833613445378152e-06, 'epoch': 41.98}


Saving model checkpoint to ./model_output/checkpoint-252
Configuration saved in ./model_output/checkpoint-252/config.json


{'eval_loss': 0.6602343320846558, 'eval_runtime': 1.7067, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 41.98}


Model weights saved in ./model_output/checkpoint-252/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-246] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0039, 'learning_rate': 4.8285714285714295e-06, 'epoch': 42.98}


Saving model checkpoint to ./model_output/checkpoint-258
Configuration saved in ./model_output/checkpoint-258/config.json


{'eval_loss': 0.25660353899002075, 'eval_runtime': 1.7075, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 42.98}


Model weights saved in ./model_output/checkpoint-258/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-252] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-198 (score: 0.1888808310031891).
***** Running Prediction *****
  Num examples = 9
  Batch size = 1


{'train_runtime': 2759.2269, 'train_samples_per_second': 35.517, 'train_steps_per_second': 2.175, 'train_loss': 0.44952426976416, 'epoch': 42.98}
Fitting model: roberta_base using fold 6 as out of fold test data.
Train data sizes: (94, 94).
Val data sizes: (10, 10).
Test data sizes: (13, 13).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--roberta-base/snapshots/bc2764f8af2e92b6eb5679868df33e224075ca68/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremm

Map:   0%|          | 0/94 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 94
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8097, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.85}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.6651713252067566, 'eval_runtime': 1.7129, 'eval_samples_per_second': 5.838, 'eval_steps_per_second': 5.838, 'epoch': 0.85}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-198] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8021, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.85}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.6644352674484253, 'eval_runtime': 1.714, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 1.85}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-258] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7992, 'learning_rate': 1.5e-06, 'epoch': 2.85}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.6624475717544556, 'eval_runtime': 1.7161, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 2.85}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8006, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.85}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.6580768823623657, 'eval_runtime': 1.7138, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 3.85}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7967, 'learning_rate': 2.5e-06, 'epoch': 4.85}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6524919867515564, 'eval_runtime': 1.7374, 'eval_samples_per_second': 5.756, 'eval_steps_per_second': 5.756, 'epoch': 4.85}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8045, 'learning_rate': 3e-06, 'epoch': 5.85}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6452988982200623, 'eval_runtime': 1.7114, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 5.85}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.799, 'learning_rate': 3.5e-06, 'epoch': 6.85}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6388819813728333, 'eval_runtime': 1.726, 'eval_samples_per_second': 5.794, 'eval_steps_per_second': 5.794, 'epoch': 6.85}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7961, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.85}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6308039426803589, 'eval_runtime': 1.7105, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 7.85}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7935, 'learning_rate': 4.5e-06, 'epoch': 8.85}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6230034232139587, 'eval_runtime': 1.7106, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 8.85}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8161, 'learning_rate': 5e-06, 'epoch': 9.85}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.6183845400810242, 'eval_runtime': 1.7152, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 9.85}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7836, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.85}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.6161572933197021, 'eval_runtime': 1.7121, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 10.85}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.781, 'learning_rate': 4.98989898989899e-06, 'epoch': 11.85}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6147540807723999, 'eval_runtime': 1.7173, 'eval_samples_per_second': 5.823, 'eval_steps_per_second': 5.823, 'epoch': 11.85}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7794, 'learning_rate': 4.9858585858585865e-06, 'epoch': 12.85}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.6133472323417664, 'eval_runtime': 1.7154, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 12.85}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7842, 'learning_rate': 4.980808080808081e-06, 'epoch': 13.85}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.607837975025177, 'eval_runtime': 1.7158, 'eval_samples_per_second': 5.828, 'eval_steps_per_second': 5.828, 'epoch': 13.85}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.787, 'learning_rate': 4.975757575757576e-06, 'epoch': 14.85}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.6041581034660339, 'eval_runtime': 1.7125, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 14.85}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7778, 'learning_rate': 4.970707070707071e-06, 'epoch': 15.85}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.6017223596572876, 'eval_runtime': 1.7164, 'eval_samples_per_second': 5.826, 'eval_steps_per_second': 5.826, 'epoch': 15.85}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7746, 'learning_rate': 4.965656565656566e-06, 'epoch': 16.85}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.594582736492157, 'eval_runtime': 1.7102, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 16.85}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7749, 'learning_rate': 4.9606060606060605e-06, 'epoch': 17.85}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5900936722755432, 'eval_runtime': 1.7098, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 17.85}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7454, 'learning_rate': 4.9555555555555565e-06, 'epoch': 18.85}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.5747913718223572, 'eval_runtime': 1.7134, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 18.85}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7494, 'learning_rate': 4.950505050505051e-06, 'epoch': 19.85}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json


{'eval_loss': 0.5425484776496887, 'eval_runtime': 1.706, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 19.85}


Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7455, 'learning_rate': 4.945454545454546e-06, 'epoch': 20.85}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.5138496160507202, 'eval_runtime': 1.7111, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 20.85}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7175, 'learning_rate': 4.940404040404041e-06, 'epoch': 21.85}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json


{'eval_loss': 0.5123583078384399, 'eval_runtime': 1.7163, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 21.85}


Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7042, 'learning_rate': 4.935353535353536e-06, 'epoch': 22.85}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.5011835098266602, 'eval_runtime': 1.7144, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 22.85}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6848, 'learning_rate': 4.9303030303030305e-06, 'epoch': 23.85}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.4427456855773926, 'eval_runtime': 1.711, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 23.85}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6428, 'learning_rate': 4.925252525252526e-06, 'epoch': 24.85}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.4010460376739502, 'eval_runtime': 1.7147, 'eval_samples_per_second': 5.832, 'eval_steps_per_second': 5.832, 'epoch': 24.85}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5836, 'learning_rate': 4.920202020202021e-06, 'epoch': 25.85}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.39840245246887207, 'eval_runtime': 1.7083, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 25.85}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4994, 'learning_rate': 4.915151515151516e-06, 'epoch': 26.85}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.3174002170562744, 'eval_runtime': 1.7209, 'eval_samples_per_second': 5.811, 'eval_steps_per_second': 5.811, 'epoch': 26.85}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4343, 'learning_rate': 4.91010101010101e-06, 'epoch': 27.85}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.2646715044975281, 'eval_runtime': 1.7133, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 27.85}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.42, 'learning_rate': 4.905050505050505e-06, 'epoch': 28.85}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.30647674202919006, 'eval_runtime': 1.7121, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 28.85}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3052, 'learning_rate': 4.9000000000000005e-06, 'epoch': 29.85}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.21954818069934845, 'eval_runtime': 1.7172, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 29.85}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3948, 'learning_rate': 4.895959595959596e-06, 'epoch': 30.85}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.31717774271965027, 'eval_runtime': 1.7169, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 30.85}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2484, 'learning_rate': 4.891919191919192e-06, 'epoch': 31.85}


Saving model checkpoint to ./model_output/checkpoint-160
Configuration saved in ./model_output/checkpoint-160/config.json


{'eval_loss': 0.2472379207611084, 'eval_runtime': 1.7077, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 31.85}


Model weights saved in ./model_output/checkpoint-160/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3503, 'learning_rate': 4.886868686868687e-06, 'epoch': 32.85}


Saving model checkpoint to ./model_output/checkpoint-165
Configuration saved in ./model_output/checkpoint-165/config.json


{'eval_loss': 0.25720471143722534, 'eval_runtime': 1.7146, 'eval_samples_per_second': 5.832, 'eval_steps_per_second': 5.832, 'epoch': 32.85}


Model weights saved in ./model_output/checkpoint-165/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2159, 'learning_rate': 4.881818181818182e-06, 'epoch': 33.85}


Saving model checkpoint to ./model_output/checkpoint-170
Configuration saved in ./model_output/checkpoint-170/config.json


{'eval_loss': 0.28762388229370117, 'eval_runtime': 1.7079, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 33.85}


Model weights saved in ./model_output/checkpoint-170/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-165] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1206, 'learning_rate': 4.876767676767677e-06, 'epoch': 34.85}


Saving model checkpoint to ./model_output/checkpoint-175
Configuration saved in ./model_output/checkpoint-175/config.json


{'eval_loss': 0.27494990825653076, 'eval_runtime': 1.7161, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 34.85}


Model weights saved in ./model_output/checkpoint-175/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-170] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1387, 'learning_rate': 4.871717171717172e-06, 'epoch': 35.85}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.1818559765815735, 'eval_runtime': 1.7095, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 35.85}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0872, 'learning_rate': 4.866666666666667e-06, 'epoch': 36.85}


Saving model checkpoint to ./model_output/checkpoint-185
Configuration saved in ./model_output/checkpoint-185/config.json


{'eval_loss': 0.15894363820552826, 'eval_runtime': 1.7105, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 36.85}


Model weights saved in ./model_output/checkpoint-185/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-175] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1427, 'learning_rate': 4.861616161616162e-06, 'epoch': 37.85}


Saving model checkpoint to ./model_output/checkpoint-190
Configuration saved in ./model_output/checkpoint-190/config.json


{'eval_loss': 0.2783893048763275, 'eval_runtime': 1.7134, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 37.85}


Model weights saved in ./model_output/checkpoint-190/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0758, 'learning_rate': 4.856565656565657e-06, 'epoch': 38.85}


Saving model checkpoint to ./model_output/checkpoint-195
Configuration saved in ./model_output/checkpoint-195/config.json


{'eval_loss': 0.3868536353111267, 'eval_runtime': 1.7086, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 38.85}


Model weights saved in ./model_output/checkpoint-195/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-190] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0752, 'learning_rate': 4.852525252525253e-06, 'epoch': 39.85}


Saving model checkpoint to ./model_output/checkpoint-200
Configuration saved in ./model_output/checkpoint-200/config.json


{'eval_loss': 0.1540166735649109, 'eval_runtime': 1.7106, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 39.85}


Model weights saved in ./model_output/checkpoint-200/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-185] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0289, 'learning_rate': 4.847474747474748e-06, 'epoch': 40.85}


Saving model checkpoint to ./model_output/checkpoint-205
Configuration saved in ./model_output/checkpoint-205/config.json


{'eval_loss': 0.15542033314704895, 'eval_runtime': 1.71, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 40.85}


Model weights saved in ./model_output/checkpoint-205/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-195] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0168, 'learning_rate': 4.842424242424243e-06, 'epoch': 41.85}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.15819098055362701, 'eval_runtime': 1.712, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 41.85}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-205] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0153, 'learning_rate': 4.837373737373738e-06, 'epoch': 42.85}


Saving model checkpoint to ./model_output/checkpoint-215
Configuration saved in ./model_output/checkpoint-215/config.json


{'eval_loss': 0.20970723032951355, 'eval_runtime': 1.7157, 'eval_samples_per_second': 5.829, 'eval_steps_per_second': 5.829, 'epoch': 42.85}


Model weights saved in ./model_output/checkpoint-215/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0107, 'learning_rate': 4.832323232323233e-06, 'epoch': 43.85}


Saving model checkpoint to ./model_output/checkpoint-220
Configuration saved in ./model_output/checkpoint-220/config.json


{'eval_loss': 0.23469209671020508, 'eval_runtime': 1.7292, 'eval_samples_per_second': 5.783, 'eval_steps_per_second': 5.783, 'epoch': 43.85}


Model weights saved in ./model_output/checkpoint-220/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-215] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0082, 'learning_rate': 4.827272727272728e-06, 'epoch': 44.85}


Saving model checkpoint to ./model_output/checkpoint-225
Configuration saved in ./model_output/checkpoint-225/config.json


{'eval_loss': 0.13127224147319794, 'eval_runtime': 1.7154, 'eval_samples_per_second': 5.829, 'eval_steps_per_second': 5.829, 'epoch': 44.85}


Model weights saved in ./model_output/checkpoint-225/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0052, 'learning_rate': 4.822222222222222e-06, 'epoch': 45.85}


Saving model checkpoint to ./model_output/checkpoint-230
Configuration saved in ./model_output/checkpoint-230/config.json


{'eval_loss': 0.06222741678357124, 'eval_runtime': 1.71, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 45.85}


Model weights saved in ./model_output/checkpoint-230/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-220] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0042, 'learning_rate': 4.817171717171717e-06, 'epoch': 46.85}


Saving model checkpoint to ./model_output/checkpoint-235
Configuration saved in ./model_output/checkpoint-235/config.json


{'eval_loss': 0.09652518481016159, 'eval_runtime': 1.7097, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 46.85}


Model weights saved in ./model_output/checkpoint-235/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-225] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0035, 'learning_rate': 4.8121212121212125e-06, 'epoch': 47.85}


Saving model checkpoint to ./model_output/checkpoint-240
Configuration saved in ./model_output/checkpoint-240/config.json


{'eval_loss': 0.07679904252290726, 'eval_runtime': 1.7137, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 47.85}


Model weights saved in ./model_output/checkpoint-240/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-235] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0029, 'learning_rate': 4.807070707070708e-06, 'epoch': 48.85}


Saving model checkpoint to ./model_output/checkpoint-245
Configuration saved in ./model_output/checkpoint-245/config.json


{'eval_loss': 0.06834007054567337, 'eval_runtime': 1.7096, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 48.85}


Model weights saved in ./model_output/checkpoint-245/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-240] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0028, 'learning_rate': 4.802020202020202e-06, 'epoch': 49.85}


Saving model checkpoint to ./model_output/checkpoint-250
Configuration saved in ./model_output/checkpoint-250/config.json


{'eval_loss': 0.06834469735622406, 'eval_runtime': 1.7161, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 49.85}


Model weights saved in ./model_output/checkpoint-250/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-245] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0024, 'learning_rate': 4.796969696969697e-06, 'epoch': 50.85}


Saving model checkpoint to ./model_output/checkpoint-255
Configuration saved in ./model_output/checkpoint-255/config.json


{'eval_loss': 0.06604542583227158, 'eval_runtime': 1.7179, 'eval_samples_per_second': 5.821, 'eval_steps_per_second': 5.821, 'epoch': 50.85}


Model weights saved in ./model_output/checkpoint-255/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-250] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0022, 'learning_rate': 4.791919191919192e-06, 'epoch': 51.85}


Saving model checkpoint to ./model_output/checkpoint-260
Configuration saved in ./model_output/checkpoint-260/config.json


{'eval_loss': 0.08034075796604156, 'eval_runtime': 1.7144, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 51.85}


Model weights saved in ./model_output/checkpoint-260/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-255] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0018, 'learning_rate': 4.786868686868687e-06, 'epoch': 52.85}


Saving model checkpoint to ./model_output/checkpoint-265
Configuration saved in ./model_output/checkpoint-265/config.json


{'eval_loss': 0.057468481361866, 'eval_runtime': 1.7132, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 52.85}


Model weights saved in ./model_output/checkpoint-265/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-230] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0019, 'learning_rate': 4.7818181818181825e-06, 'epoch': 53.85}


Saving model checkpoint to ./model_output/checkpoint-270
Configuration saved in ./model_output/checkpoint-270/config.json


{'eval_loss': 0.06204597279429436, 'eval_runtime': 1.7115, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 53.85}


Model weights saved in ./model_output/checkpoint-270/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-260] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0018, 'learning_rate': 4.776767676767678e-06, 'epoch': 54.85}


Saving model checkpoint to ./model_output/checkpoint-275
Configuration saved in ./model_output/checkpoint-275/config.json


{'eval_loss': 0.051285307854413986, 'eval_runtime': 1.7143, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 54.85}


Model weights saved in ./model_output/checkpoint-275/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-265] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0016, 'learning_rate': 4.771717171717172e-06, 'epoch': 55.85}


Saving model checkpoint to ./model_output/checkpoint-280
Configuration saved in ./model_output/checkpoint-280/config.json


{'eval_loss': 0.04843466356396675, 'eval_runtime': 1.7145, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 55.85}


Model weights saved in ./model_output/checkpoint-280/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-270] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0014, 'learning_rate': 4.766666666666667e-06, 'epoch': 56.85}


Saving model checkpoint to ./model_output/checkpoint-285
Configuration saved in ./model_output/checkpoint-285/config.json


{'eval_loss': 0.04712741822004318, 'eval_runtime': 1.7131, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 56.85}


Model weights saved in ./model_output/checkpoint-285/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-275] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0015, 'learning_rate': 4.761616161616162e-06, 'epoch': 57.85}


Saving model checkpoint to ./model_output/checkpoint-290
Configuration saved in ./model_output/checkpoint-290/config.json


{'eval_loss': 0.054077327251434326, 'eval_runtime': 1.7056, 'eval_samples_per_second': 5.863, 'eval_steps_per_second': 5.863, 'epoch': 57.85}


Model weights saved in ./model_output/checkpoint-290/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-280] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0014, 'learning_rate': 4.756565656565657e-06, 'epoch': 58.85}


Saving model checkpoint to ./model_output/checkpoint-295
Configuration saved in ./model_output/checkpoint-295/config.json


{'eval_loss': 0.048106059432029724, 'eval_runtime': 1.7066, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 58.85}


Model weights saved in ./model_output/checkpoint-295/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-290] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0013, 'learning_rate': 4.751515151515152e-06, 'epoch': 59.85}


Saving model checkpoint to ./model_output/checkpoint-300
Configuration saved in ./model_output/checkpoint-300/config.json


{'eval_loss': 0.04729504510760307, 'eval_runtime': 1.7124, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 59.85}


Model weights saved in ./model_output/checkpoint-300/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-295] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0013, 'learning_rate': 4.746464646464647e-06, 'epoch': 60.85}


Saving model checkpoint to ./model_output/checkpoint-305
Configuration saved in ./model_output/checkpoint-305/config.json


{'eval_loss': 0.049565982073545456, 'eval_runtime': 1.7131, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 60.85}


Model weights saved in ./model_output/checkpoint-305/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0012, 'learning_rate': 4.741414141414142e-06, 'epoch': 61.85}


Saving model checkpoint to ./model_output/checkpoint-310
Configuration saved in ./model_output/checkpoint-310/config.json


{'eval_loss': 0.04636603221297264, 'eval_runtime': 1.7165, 'eval_samples_per_second': 5.826, 'eval_steps_per_second': 5.826, 'epoch': 61.85}


Model weights saved in ./model_output/checkpoint-310/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-285] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0012, 'learning_rate': 4.736363636363637e-06, 'epoch': 62.85}


Saving model checkpoint to ./model_output/checkpoint-315
Configuration saved in ./model_output/checkpoint-315/config.json


{'eval_loss': 0.044989120215177536, 'eval_runtime': 1.7164, 'eval_samples_per_second': 5.826, 'eval_steps_per_second': 5.826, 'epoch': 62.85}


Model weights saved in ./model_output/checkpoint-315/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-305] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0011, 'learning_rate': 4.731313131313131e-06, 'epoch': 63.85}


Saving model checkpoint to ./model_output/checkpoint-320
Configuration saved in ./model_output/checkpoint-320/config.json


{'eval_loss': 0.05822797492146492, 'eval_runtime': 1.7107, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 63.85}


Model weights saved in ./model_output/checkpoint-320/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-310] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.001, 'learning_rate': 4.7262626262626265e-06, 'epoch': 64.85}


Saving model checkpoint to ./model_output/checkpoint-325
Configuration saved in ./model_output/checkpoint-325/config.json


{'eval_loss': 0.05631992220878601, 'eval_runtime': 1.712, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 64.85}


Model weights saved in ./model_output/checkpoint-325/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-320] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0011, 'learning_rate': 4.721212121212122e-06, 'epoch': 65.85}


Saving model checkpoint to ./model_output/checkpoint-330
Configuration saved in ./model_output/checkpoint-330/config.json


{'eval_loss': 0.04396820068359375, 'eval_runtime': 1.7158, 'eval_samples_per_second': 5.828, 'eval_steps_per_second': 5.828, 'epoch': 65.85}


Model weights saved in ./model_output/checkpoint-330/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-315] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.001, 'learning_rate': 4.716161616161617e-06, 'epoch': 66.85}


Saving model checkpoint to ./model_output/checkpoint-335
Configuration saved in ./model_output/checkpoint-335/config.json


{'eval_loss': 0.043130841106176376, 'eval_runtime': 1.7104, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 66.85}


Model weights saved in ./model_output/checkpoint-335/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-325] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.001, 'learning_rate': 4.711111111111111e-06, 'epoch': 67.85}


Saving model checkpoint to ./model_output/checkpoint-340
Configuration saved in ./model_output/checkpoint-340/config.json


{'eval_loss': 0.04854653775691986, 'eval_runtime': 1.7177, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 67.85}


Model weights saved in ./model_output/checkpoint-340/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-330] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0009, 'learning_rate': 4.706060606060606e-06, 'epoch': 68.85}


Saving model checkpoint to ./model_output/checkpoint-345
Configuration saved in ./model_output/checkpoint-345/config.json


{'eval_loss': 0.05062275379896164, 'eval_runtime': 1.7145, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 68.85}


Model weights saved in ./model_output/checkpoint-345/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-340] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0009, 'learning_rate': 4.701010101010101e-06, 'epoch': 69.85}


Saving model checkpoint to ./model_output/checkpoint-350
Configuration saved in ./model_output/checkpoint-350/config.json


{'eval_loss': 0.04748789593577385, 'eval_runtime': 1.7161, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 69.85}


Model weights saved in ./model_output/checkpoint-350/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-345] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0009, 'learning_rate': 4.6959595959595965e-06, 'epoch': 70.85}


Saving model checkpoint to ./model_output/checkpoint-355
Configuration saved in ./model_output/checkpoint-355/config.json


{'eval_loss': 0.04756093770265579, 'eval_runtime': 1.7109, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 70.85}


Model weights saved in ./model_output/checkpoint-355/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-350] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0009, 'learning_rate': 4.690909090909092e-06, 'epoch': 71.85}


Saving model checkpoint to ./model_output/checkpoint-360
Configuration saved in ./model_output/checkpoint-360/config.json


{'eval_loss': 0.039269573986530304, 'eval_runtime': 1.7069, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 71.85}


Model weights saved in ./model_output/checkpoint-360/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-335] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0008, 'learning_rate': 4.685858585858587e-06, 'epoch': 72.85}


Saving model checkpoint to ./model_output/checkpoint-365
Configuration saved in ./model_output/checkpoint-365/config.json


{'eval_loss': 0.04151821881532669, 'eval_runtime': 1.7134, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 72.85}


Model weights saved in ./model_output/checkpoint-365/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-355] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0008, 'learning_rate': 4.680808080808081e-06, 'epoch': 73.85}


Saving model checkpoint to ./model_output/checkpoint-370
Configuration saved in ./model_output/checkpoint-370/config.json


{'eval_loss': 0.03924575820565224, 'eval_runtime': 1.7093, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 73.85}


Model weights saved in ./model_output/checkpoint-370/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-360] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0008, 'learning_rate': 4.675757575757576e-06, 'epoch': 74.85}


Saving model checkpoint to ./model_output/checkpoint-375
Configuration saved in ./model_output/checkpoint-375/config.json


{'eval_loss': 0.03749901056289673, 'eval_runtime': 1.7113, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 74.85}


Model weights saved in ./model_output/checkpoint-375/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-365] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0007, 'learning_rate': 4.670707070707071e-06, 'epoch': 75.85}


Saving model checkpoint to ./model_output/checkpoint-380
Configuration saved in ./model_output/checkpoint-380/config.json


{'eval_loss': 0.03716227412223816, 'eval_runtime': 1.7157, 'eval_samples_per_second': 5.828, 'eval_steps_per_second': 5.828, 'epoch': 75.85}


Model weights saved in ./model_output/checkpoint-380/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-370] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0007, 'learning_rate': 4.6656565656565665e-06, 'epoch': 76.85}


Saving model checkpoint to ./model_output/checkpoint-385
Configuration saved in ./model_output/checkpoint-385/config.json


{'eval_loss': 0.037132881581783295, 'eval_runtime': 1.7105, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 76.85}


Model weights saved in ./model_output/checkpoint-385/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-375] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0007, 'learning_rate': 4.660606060606061e-06, 'epoch': 77.85}


Saving model checkpoint to ./model_output/checkpoint-390
Configuration saved in ./model_output/checkpoint-390/config.json


{'eval_loss': 0.036664191633462906, 'eval_runtime': 1.7186, 'eval_samples_per_second': 5.819, 'eval_steps_per_second': 5.819, 'epoch': 77.85}


Model weights saved in ./model_output/checkpoint-390/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-380] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0007, 'learning_rate': 4.655555555555556e-06, 'epoch': 78.85}


Saving model checkpoint to ./model_output/checkpoint-395
Configuration saved in ./model_output/checkpoint-395/config.json


{'eval_loss': 0.036401040852069855, 'eval_runtime': 1.7115, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 78.85}


Model weights saved in ./model_output/checkpoint-395/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-385] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0007, 'learning_rate': 4.650505050505051e-06, 'epoch': 79.85}


Saving model checkpoint to ./model_output/checkpoint-400
Configuration saved in ./model_output/checkpoint-400/config.json


{'eval_loss': 0.03934039920568466, 'eval_runtime': 1.7159, 'eval_samples_per_second': 5.828, 'eval_steps_per_second': 5.828, 'epoch': 79.85}


Model weights saved in ./model_output/checkpoint-400/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-390] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0007, 'learning_rate': 4.645454545454545e-06, 'epoch': 80.85}


Saving model checkpoint to ./model_output/checkpoint-405
Configuration saved in ./model_output/checkpoint-405/config.json


{'eval_loss': 0.04245776683092117, 'eval_runtime': 1.7125, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 80.85}


Model weights saved in ./model_output/checkpoint-405/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-400] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0006, 'learning_rate': 4.6404040404040406e-06, 'epoch': 81.85}


Saving model checkpoint to ./model_output/checkpoint-410
Configuration saved in ./model_output/checkpoint-410/config.json


{'eval_loss': 0.048627935349941254, 'eval_runtime': 1.7178, 'eval_samples_per_second': 5.821, 'eval_steps_per_second': 5.821, 'epoch': 81.85}


Model weights saved in ./model_output/checkpoint-410/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-405] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0006, 'learning_rate': 4.635353535353536e-06, 'epoch': 82.85}


Saving model checkpoint to ./model_output/checkpoint-415
Configuration saved in ./model_output/checkpoint-415/config.json


{'eval_loss': 0.041124455630779266, 'eval_runtime': 1.714, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 82.85}


Model weights saved in ./model_output/checkpoint-415/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-410] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0007, 'learning_rate': 4.630303030303031e-06, 'epoch': 83.85}


Saving model checkpoint to ./model_output/checkpoint-420
Configuration saved in ./model_output/checkpoint-420/config.json


{'eval_loss': 0.034401439130306244, 'eval_runtime': 1.7159, 'eval_samples_per_second': 5.828, 'eval_steps_per_second': 5.828, 'epoch': 83.85}


Model weights saved in ./model_output/checkpoint-420/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-395] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0006, 'learning_rate': 4.625252525252525e-06, 'epoch': 84.85}


Saving model checkpoint to ./model_output/checkpoint-425
Configuration saved in ./model_output/checkpoint-425/config.json


{'eval_loss': 0.031063854694366455, 'eval_runtime': 1.735, 'eval_samples_per_second': 5.764, 'eval_steps_per_second': 5.764, 'epoch': 84.85}


Model weights saved in ./model_output/checkpoint-425/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-415] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0006, 'learning_rate': 4.62020202020202e-06, 'epoch': 85.85}


Saving model checkpoint to ./model_output/checkpoint-430
Configuration saved in ./model_output/checkpoint-430/config.json


{'eval_loss': 0.030511682853102684, 'eval_runtime': 1.707, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 85.85}


Model weights saved in ./model_output/checkpoint-430/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-420] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0006, 'learning_rate': 4.615151515151515e-06, 'epoch': 86.85}


Saving model checkpoint to ./model_output/checkpoint-435
Configuration saved in ./model_output/checkpoint-435/config.json


{'eval_loss': 0.02997261844575405, 'eval_runtime': 1.7304, 'eval_samples_per_second': 5.779, 'eval_steps_per_second': 5.779, 'epoch': 86.85}


Model weights saved in ./model_output/checkpoint-435/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-425] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0006, 'learning_rate': 4.6101010101010106e-06, 'epoch': 87.85}


Saving model checkpoint to ./model_output/checkpoint-440
Configuration saved in ./model_output/checkpoint-440/config.json


{'eval_loss': 0.029242131859064102, 'eval_runtime': 1.7107, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 87.85}


Model weights saved in ./model_output/checkpoint-440/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-430] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0006, 'learning_rate': 4.605050505050506e-06, 'epoch': 88.85}


Saving model checkpoint to ./model_output/checkpoint-445
Configuration saved in ./model_output/checkpoint-445/config.json


{'eval_loss': 0.02939123846590519, 'eval_runtime': 1.7089, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 88.85}


Model weights saved in ./model_output/checkpoint-445/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-435] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.600000000000001e-06, 'epoch': 89.85}


Saving model checkpoint to ./model_output/checkpoint-450
Configuration saved in ./model_output/checkpoint-450/config.json


{'eval_loss': 0.03148222714662552, 'eval_runtime': 1.719, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 89.85}


Model weights saved in ./model_output/checkpoint-450/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-445] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.594949494949495e-06, 'epoch': 90.85}


Saving model checkpoint to ./model_output/checkpoint-455
Configuration saved in ./model_output/checkpoint-455/config.json


{'eval_loss': 0.031515248119831085, 'eval_runtime': 1.7138, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 90.85}


Model weights saved in ./model_output/checkpoint-455/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-450] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0006, 'learning_rate': 4.58989898989899e-06, 'epoch': 91.85}


Saving model checkpoint to ./model_output/checkpoint-460
Configuration saved in ./model_output/checkpoint-460/config.json


{'eval_loss': 0.028005117550492287, 'eval_runtime': 1.7194, 'eval_samples_per_second': 5.816, 'eval_steps_per_second': 5.816, 'epoch': 91.85}


Model weights saved in ./model_output/checkpoint-460/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-440] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0006, 'learning_rate': 4.5848484848484854e-06, 'epoch': 92.85}


Saving model checkpoint to ./model_output/checkpoint-465
Configuration saved in ./model_output/checkpoint-465/config.json


{'eval_loss': 0.029714643955230713, 'eval_runtime': 1.7109, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 92.85}


Model weights saved in ./model_output/checkpoint-465/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-455] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.579797979797981e-06, 'epoch': 93.85}


Saving model checkpoint to ./model_output/checkpoint-470
Configuration saved in ./model_output/checkpoint-470/config.json


{'eval_loss': 0.030949890613555908, 'eval_runtime': 1.7084, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 93.85}


Model weights saved in ./model_output/checkpoint-470/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-465] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.574747474747475e-06, 'epoch': 94.85}


Saving model checkpoint to ./model_output/checkpoint-475
Configuration saved in ./model_output/checkpoint-475/config.json


{'eval_loss': 0.03228236734867096, 'eval_runtime': 1.7126, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 94.85}


Model weights saved in ./model_output/checkpoint-475/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-470] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.56969696969697e-06, 'epoch': 95.85}


Saving model checkpoint to ./model_output/checkpoint-480
Configuration saved in ./model_output/checkpoint-480/config.json


{'eval_loss': 0.027259796857833862, 'eval_runtime': 1.7131, 'eval_samples_per_second': 5.838, 'eval_steps_per_second': 5.838, 'epoch': 95.85}


Model weights saved in ./model_output/checkpoint-480/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-460] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.564646464646465e-06, 'epoch': 96.85}


Saving model checkpoint to ./model_output/checkpoint-485
Configuration saved in ./model_output/checkpoint-485/config.json


{'eval_loss': 0.02676115557551384, 'eval_runtime': 1.7105, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 96.85}


Model weights saved in ./model_output/checkpoint-485/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-475] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.55959595959596e-06, 'epoch': 97.85}


Saving model checkpoint to ./model_output/checkpoint-490
Configuration saved in ./model_output/checkpoint-490/config.json


{'eval_loss': 0.02630072459578514, 'eval_runtime': 1.7161, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 97.85}


Model weights saved in ./model_output/checkpoint-490/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-480] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.554545454545455e-06, 'epoch': 98.85}


Saving model checkpoint to ./model_output/checkpoint-495
Configuration saved in ./model_output/checkpoint-495/config.json


{'eval_loss': 0.024703821167349815, 'eval_runtime': 1.7112, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 98.85}


Model weights saved in ./model_output/checkpoint-495/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-485] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.54949494949495e-06, 'epoch': 99.85}


Saving model checkpoint to ./model_output/checkpoint-500
Configuration saved in ./model_output/checkpoint-500/config.json


{'eval_loss': 0.02612021006643772, 'eval_runtime': 1.7166, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 99.85}


Model weights saved in ./model_output/checkpoint-500/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-490] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.544444444444445e-06, 'epoch': 100.85}


Saving model checkpoint to ./model_output/checkpoint-505
Configuration saved in ./model_output/checkpoint-505/config.json


{'eval_loss': 0.026247549802064896, 'eval_runtime': 1.7078, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 100.85}


Model weights saved in ./model_output/checkpoint-505/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-500] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.539393939393939e-06, 'epoch': 101.85}


Saving model checkpoint to ./model_output/checkpoint-510
Configuration saved in ./model_output/checkpoint-510/config.json


{'eval_loss': 0.023485563695430756, 'eval_runtime': 1.7105, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 101.85}


Model weights saved in ./model_output/checkpoint-510/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-495] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.534343434343434e-06, 'epoch': 102.85}


Saving model checkpoint to ./model_output/checkpoint-515
Configuration saved in ./model_output/checkpoint-515/config.json


{'eval_loss': 0.023964999243617058, 'eval_runtime': 1.7153, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 102.85}


Model weights saved in ./model_output/checkpoint-515/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-505] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.5292929292929295e-06, 'epoch': 103.85}


Saving model checkpoint to ./model_output/checkpoint-520
Configuration saved in ./model_output/checkpoint-520/config.json


{'eval_loss': 0.02304719388484955, 'eval_runtime': 1.7121, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 103.85}


Model weights saved in ./model_output/checkpoint-520/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-510] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.524242424242425e-06, 'epoch': 104.85}


Saving model checkpoint to ./model_output/checkpoint-525
Configuration saved in ./model_output/checkpoint-525/config.json


{'eval_loss': 0.02204766497015953, 'eval_runtime': 1.7194, 'eval_samples_per_second': 5.816, 'eval_steps_per_second': 5.816, 'epoch': 104.85}


Model weights saved in ./model_output/checkpoint-525/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-515] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.51919191919192e-06, 'epoch': 105.85}


Saving model checkpoint to ./model_output/checkpoint-530
Configuration saved in ./model_output/checkpoint-530/config.json


{'eval_loss': 0.021862637251615524, 'eval_runtime': 1.705, 'eval_samples_per_second': 5.865, 'eval_steps_per_second': 5.865, 'epoch': 105.85}


Model weights saved in ./model_output/checkpoint-530/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-520] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.514141414141415e-06, 'epoch': 106.85}


Saving model checkpoint to ./model_output/checkpoint-535
Configuration saved in ./model_output/checkpoint-535/config.json


{'eval_loss': 0.022097285836935043, 'eval_runtime': 1.7097, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 106.85}


Model weights saved in ./model_output/checkpoint-535/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-525] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0005, 'learning_rate': 4.50909090909091e-06, 'epoch': 107.85}


Saving model checkpoint to ./model_output/checkpoint-540
Configuration saved in ./model_output/checkpoint-540/config.json


{'eval_loss': 0.022251952439546585, 'eval_runtime': 1.7116, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 107.85}


Model weights saved in ./model_output/checkpoint-540/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-535] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.504040404040404e-06, 'epoch': 108.85}


Saving model checkpoint to ./model_output/checkpoint-545
Configuration saved in ./model_output/checkpoint-545/config.json


{'eval_loss': 0.0220603309571743, 'eval_runtime': 1.7117, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 108.85}


Model weights saved in ./model_output/checkpoint-545/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-540] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.4989898989898995e-06, 'epoch': 109.85}


Saving model checkpoint to ./model_output/checkpoint-550
Configuration saved in ./model_output/checkpoint-550/config.json


{'eval_loss': 0.021635999903082848, 'eval_runtime': 1.7152, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 109.85}


Model weights saved in ./model_output/checkpoint-550/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-530] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.493939393939395e-06, 'epoch': 110.85}


Saving model checkpoint to ./model_output/checkpoint-555
Configuration saved in ./model_output/checkpoint-555/config.json


{'eval_loss': 0.021509090438485146, 'eval_runtime': 1.7148, 'eval_samples_per_second': 5.832, 'eval_steps_per_second': 5.832, 'epoch': 110.85}


Model weights saved in ./model_output/checkpoint-555/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-545] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.488888888888889e-06, 'epoch': 111.85}


Saving model checkpoint to ./model_output/checkpoint-560
Configuration saved in ./model_output/checkpoint-560/config.json


{'eval_loss': 0.02102566882967949, 'eval_runtime': 1.714, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 111.85}


Model weights saved in ./model_output/checkpoint-560/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-550] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.483838383838384e-06, 'epoch': 112.85}


Saving model checkpoint to ./model_output/checkpoint-565
Configuration saved in ./model_output/checkpoint-565/config.json


{'eval_loss': 0.02090492844581604, 'eval_runtime': 1.712, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 112.85}


Model weights saved in ./model_output/checkpoint-565/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-555] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.478787878787879e-06, 'epoch': 113.85}


Saving model checkpoint to ./model_output/checkpoint-570
Configuration saved in ./model_output/checkpoint-570/config.json


{'eval_loss': 0.021096227690577507, 'eval_runtime': 1.7091, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 113.85}


Model weights saved in ./model_output/checkpoint-570/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-560] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.473737373737374e-06, 'epoch': 114.85}


Saving model checkpoint to ./model_output/checkpoint-575
Configuration saved in ./model_output/checkpoint-575/config.json


{'eval_loss': 0.022418130189180374, 'eval_runtime': 1.7111, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 114.85}


Model weights saved in ./model_output/checkpoint-575/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-570] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.468686868686869e-06, 'epoch': 115.85}


Saving model checkpoint to ./model_output/checkpoint-580
Configuration saved in ./model_output/checkpoint-580/config.json


{'eval_loss': 0.02179756760597229, 'eval_runtime': 1.7152, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 115.85}


Model weights saved in ./model_output/checkpoint-580/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-575] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.463636363636364e-06, 'epoch': 116.85}


Saving model checkpoint to ./model_output/checkpoint-585
Configuration saved in ./model_output/checkpoint-585/config.json


{'eval_loss': 0.020462822169065475, 'eval_runtime': 1.7195, 'eval_samples_per_second': 5.816, 'eval_steps_per_second': 5.816, 'epoch': 116.85}


Model weights saved in ./model_output/checkpoint-585/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-565] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.458585858585859e-06, 'epoch': 117.85}


Saving model checkpoint to ./model_output/checkpoint-590
Configuration saved in ./model_output/checkpoint-590/config.json


{'eval_loss': 0.021172653883695602, 'eval_runtime': 1.7111, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 117.85}


Model weights saved in ./model_output/checkpoint-590/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-580] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0004, 'learning_rate': 4.453535353535354e-06, 'epoch': 118.85}


Saving model checkpoint to ./model_output/checkpoint-595
Configuration saved in ./model_output/checkpoint-595/config.json


{'eval_loss': 0.023584920912981033, 'eval_runtime': 1.7195, 'eval_samples_per_second': 5.816, 'eval_steps_per_second': 5.816, 'epoch': 118.85}


Model weights saved in ./model_output/checkpoint-595/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-590] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.448484848484848e-06, 'epoch': 119.85}


Saving model checkpoint to ./model_output/checkpoint-600
Configuration saved in ./model_output/checkpoint-600/config.json


{'eval_loss': 0.02241574414074421, 'eval_runtime': 1.7174, 'eval_samples_per_second': 5.823, 'eval_steps_per_second': 5.823, 'epoch': 119.85}


Model weights saved in ./model_output/checkpoint-600/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-595] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.4434343434343435e-06, 'epoch': 120.85}


Saving model checkpoint to ./model_output/checkpoint-605
Configuration saved in ./model_output/checkpoint-605/config.json


{'eval_loss': 0.021469229832291603, 'eval_runtime': 1.714, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 120.85}


Model weights saved in ./model_output/checkpoint-605/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-600] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.438383838383839e-06, 'epoch': 121.85}


Saving model checkpoint to ./model_output/checkpoint-610
Configuration saved in ./model_output/checkpoint-610/config.json


{'eval_loss': 0.021992091089487076, 'eval_runtime': 1.7205, 'eval_samples_per_second': 5.812, 'eval_steps_per_second': 5.812, 'epoch': 121.85}


Model weights saved in ./model_output/checkpoint-610/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-605] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.433333333333334e-06, 'epoch': 122.85}


Saving model checkpoint to ./model_output/checkpoint-615
Configuration saved in ./model_output/checkpoint-615/config.json


{'eval_loss': 0.021015429869294167, 'eval_runtime': 1.7107, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 122.85}


Model weights saved in ./model_output/checkpoint-615/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-610] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.428282828282829e-06, 'epoch': 123.85}


Saving model checkpoint to ./model_output/checkpoint-620
Configuration saved in ./model_output/checkpoint-620/config.json


{'eval_loss': 0.02061854861676693, 'eval_runtime': 1.7138, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 123.85}


Model weights saved in ./model_output/checkpoint-620/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-615] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.423232323232324e-06, 'epoch': 124.85}


Saving model checkpoint to ./model_output/checkpoint-625
Configuration saved in ./model_output/checkpoint-625/config.json


{'eval_loss': 0.019181448966264725, 'eval_runtime': 1.7084, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 124.85}


Model weights saved in ./model_output/checkpoint-625/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-585] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.418181818181818e-06, 'epoch': 125.85}


Saving model checkpoint to ./model_output/checkpoint-630
Configuration saved in ./model_output/checkpoint-630/config.json


{'eval_loss': 0.018768401816487312, 'eval_runtime': 1.7187, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 125.85}


Model weights saved in ./model_output/checkpoint-630/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-620] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.4131313131313135e-06, 'epoch': 126.85}


Saving model checkpoint to ./model_output/checkpoint-635
Configuration saved in ./model_output/checkpoint-635/config.json


{'eval_loss': 0.019670424982905388, 'eval_runtime': 1.7121, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 126.85}


Model weights saved in ./model_output/checkpoint-635/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-625] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.408080808080809e-06, 'epoch': 127.85}


Saving model checkpoint to ./model_output/checkpoint-640
Configuration saved in ./model_output/checkpoint-640/config.json


{'eval_loss': 0.016966965049505234, 'eval_runtime': 1.7281, 'eval_samples_per_second': 5.787, 'eval_steps_per_second': 5.787, 'epoch': 127.85}


Model weights saved in ./model_output/checkpoint-640/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-630] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.403030303030304e-06, 'epoch': 128.85}


Saving model checkpoint to ./model_output/checkpoint-645
Configuration saved in ./model_output/checkpoint-645/config.json


{'eval_loss': 0.01622496172785759, 'eval_runtime': 1.7117, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 128.85}


Model weights saved in ./model_output/checkpoint-645/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-635] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.397979797979798e-06, 'epoch': 129.85}


Saving model checkpoint to ./model_output/checkpoint-650
Configuration saved in ./model_output/checkpoint-650/config.json


{'eval_loss': 0.016123898327350616, 'eval_runtime': 1.7104, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 129.85}


Model weights saved in ./model_output/checkpoint-650/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-640] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.392929292929293e-06, 'epoch': 130.85}


Saving model checkpoint to ./model_output/checkpoint-655
Configuration saved in ./model_output/checkpoint-655/config.json


{'eval_loss': 0.015862921252846718, 'eval_runtime': 1.7168, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 130.85}


Model weights saved in ./model_output/checkpoint-655/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-645] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.387878787878788e-06, 'epoch': 131.85}


Saving model checkpoint to ./model_output/checkpoint-660
Configuration saved in ./model_output/checkpoint-660/config.json


{'eval_loss': 0.0160547886043787, 'eval_runtime': 1.7119, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 131.85}


Model weights saved in ./model_output/checkpoint-660/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-650] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.382828282828283e-06, 'epoch': 132.85}


Saving model checkpoint to ./model_output/checkpoint-665
Configuration saved in ./model_output/checkpoint-665/config.json


{'eval_loss': 0.015748152509331703, 'eval_runtime': 1.7134, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 132.85}


Model weights saved in ./model_output/checkpoint-665/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-655] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.377777777777778e-06, 'epoch': 133.85}


Saving model checkpoint to ./model_output/checkpoint-670
Configuration saved in ./model_output/checkpoint-670/config.json


{'eval_loss': 0.01597701944410801, 'eval_runtime': 1.7181, 'eval_samples_per_second': 5.82, 'eval_steps_per_second': 5.82, 'epoch': 133.85}


Model weights saved in ./model_output/checkpoint-670/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-660] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.372727272727273e-06, 'epoch': 134.85}


Saving model checkpoint to ./model_output/checkpoint-675
Configuration saved in ./model_output/checkpoint-675/config.json


{'eval_loss': 0.017956096678972244, 'eval_runtime': 1.7145, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 134.85}


Model weights saved in ./model_output/checkpoint-675/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-670] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.367676767676768e-06, 'epoch': 135.85}


Saving model checkpoint to ./model_output/checkpoint-680
Configuration saved in ./model_output/checkpoint-680/config.json


{'eval_loss': 0.020959952846169472, 'eval_runtime': 1.7138, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 135.85}


Model weights saved in ./model_output/checkpoint-680/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-675] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.362626262626262e-06, 'epoch': 136.85}


Saving model checkpoint to ./model_output/checkpoint-685
Configuration saved in ./model_output/checkpoint-685/config.json


{'eval_loss': 0.024603435769677162, 'eval_runtime': 1.7119, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 136.85}


Model weights saved in ./model_output/checkpoint-685/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-680] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.3575757575757576e-06, 'epoch': 137.85}


Saving model checkpoint to ./model_output/checkpoint-690
Configuration saved in ./model_output/checkpoint-690/config.json


{'eval_loss': 0.028783634305000305, 'eval_runtime': 1.7181, 'eval_samples_per_second': 5.82, 'eval_steps_per_second': 5.82, 'epoch': 137.85}


Model weights saved in ./model_output/checkpoint-690/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-685] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.3525252525252536e-06, 'epoch': 138.85}


Saving model checkpoint to ./model_output/checkpoint-695
Configuration saved in ./model_output/checkpoint-695/config.json


{'eval_loss': 0.025708818808197975, 'eval_runtime': 1.7073, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 138.85}


Model weights saved in ./model_output/checkpoint-695/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-690] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.347474747474748e-06, 'epoch': 139.85}


Saving model checkpoint to ./model_output/checkpoint-700
Configuration saved in ./model_output/checkpoint-700/config.json


{'eval_loss': 0.018236931413412094, 'eval_runtime': 1.714, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 139.85}


Model weights saved in ./model_output/checkpoint-700/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-695] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.342424242424243e-06, 'epoch': 140.85}


Saving model checkpoint to ./model_output/checkpoint-705
Configuration saved in ./model_output/checkpoint-705/config.json


{'eval_loss': 0.016728129237890244, 'eval_runtime': 1.7111, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 140.85}


Model weights saved in ./model_output/checkpoint-705/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-700] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.337373737373738e-06, 'epoch': 141.85}


Saving model checkpoint to ./model_output/checkpoint-710
Configuration saved in ./model_output/checkpoint-710/config.json


{'eval_loss': 0.01591055653989315, 'eval_runtime': 1.7119, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 141.85}


Model weights saved in ./model_output/checkpoint-710/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-705] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.3323232323232324e-06, 'epoch': 142.85}


Saving model checkpoint to ./model_output/checkpoint-715
Configuration saved in ./model_output/checkpoint-715/config.json


{'eval_loss': 0.014235051348805428, 'eval_runtime': 1.7106, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 142.85}


Model weights saved in ./model_output/checkpoint-715/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-665] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.327272727272728e-06, 'epoch': 143.85}


Saving model checkpoint to ./model_output/checkpoint-720
Configuration saved in ./model_output/checkpoint-720/config.json


{'eval_loss': 0.014297349378466606, 'eval_runtime': 1.7124, 'eval_samples_per_second': 5.84, 'eval_steps_per_second': 5.84, 'epoch': 143.85}


Model weights saved in ./model_output/checkpoint-720/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-710] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.322222222222223e-06, 'epoch': 144.85}


Saving model checkpoint to ./model_output/checkpoint-725
Configuration saved in ./model_output/checkpoint-725/config.json


{'eval_loss': 0.015635624527931213, 'eval_runtime': 1.7104, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 144.85}


Model weights saved in ./model_output/checkpoint-725/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-720] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.317171717171718e-06, 'epoch': 145.85}


Saving model checkpoint to ./model_output/checkpoint-730
Configuration saved in ./model_output/checkpoint-730/config.json


{'eval_loss': 0.01833307556807995, 'eval_runtime': 1.7104, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 145.85}


Model weights saved in ./model_output/checkpoint-730/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-725] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.312121212121212e-06, 'epoch': 146.85}


Saving model checkpoint to ./model_output/checkpoint-735
Configuration saved in ./model_output/checkpoint-735/config.json


{'eval_loss': 0.025125358253717422, 'eval_runtime': 1.7099, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 146.85}


Model weights saved in ./model_output/checkpoint-735/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-730] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.307070707070707e-06, 'epoch': 147.85}


Saving model checkpoint to ./model_output/checkpoint-740
Configuration saved in ./model_output/checkpoint-740/config.json


{'eval_loss': 0.02861500345170498, 'eval_runtime': 1.7069, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 147.85}


Model weights saved in ./model_output/checkpoint-740/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-735] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0003, 'learning_rate': 4.3020202020202024e-06, 'epoch': 148.85}


Saving model checkpoint to ./model_output/checkpoint-745
Configuration saved in ./model_output/checkpoint-745/config.json


{'eval_loss': 0.024344250559806824, 'eval_runtime': 1.7165, 'eval_samples_per_second': 5.826, 'eval_steps_per_second': 5.826, 'epoch': 148.85}


Model weights saved in ./model_output/checkpoint-745/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-740] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0002, 'learning_rate': 4.296969696969698e-06, 'epoch': 149.85}


Saving model checkpoint to ./model_output/checkpoint-750
Configuration saved in ./model_output/checkpoint-750/config.json


{'eval_loss': 0.02032971940934658, 'eval_runtime': 1.7137, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 149.85}


Model weights saved in ./model_output/checkpoint-750/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-745] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0002, 'learning_rate': 4.291919191919192e-06, 'epoch': 150.85}


Saving model checkpoint to ./model_output/checkpoint-755
Configuration saved in ./model_output/checkpoint-755/config.json


{'eval_loss': 0.01849498599767685, 'eval_runtime': 1.7108, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 150.85}


Model weights saved in ./model_output/checkpoint-755/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-750] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0002, 'learning_rate': 4.286868686868687e-06, 'epoch': 151.85}


Saving model checkpoint to ./model_output/checkpoint-760
Configuration saved in ./model_output/checkpoint-760/config.json


{'eval_loss': 0.01630324125289917, 'eval_runtime': 1.7147, 'eval_samples_per_second': 5.832, 'eval_steps_per_second': 5.832, 'epoch': 151.85}


Model weights saved in ./model_output/checkpoint-760/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-755] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0002, 'learning_rate': 4.281818181818182e-06, 'epoch': 152.85}


Saving model checkpoint to ./model_output/checkpoint-765
Configuration saved in ./model_output/checkpoint-765/config.json


{'eval_loss': 0.015387753024697304, 'eval_runtime': 1.7149, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 152.85}


Model weights saved in ./model_output/checkpoint-765/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-760] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-715 (score: 0.014235051348805428).
***** Running Prediction *****
  Num examples = 13
  Batch size = 1


{'train_runtime': 9405.5184, 'train_samples_per_second': 9.994, 'train_steps_per_second': 0.532, 'train_loss': 0.15362895796002418, 'epoch': 152.85}
Fitting model: roberta_base using fold 7 as out of fold test data.
Train data sizes: (94, 94).
Val data sizes: (13, 13).
Test data sizes: (10, 10).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--roberta-base/snapshots/bc2764f8af2e92b6eb5679868df33e224075ca68/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremm

Map:   0%|          | 0/94 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 94
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8026, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.85}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.6956660747528076, 'eval_runtime': 2.1942, 'eval_samples_per_second': 5.925, 'eval_steps_per_second': 5.925, 'epoch': 0.85}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-715] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8053, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.85}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.695811927318573, 'eval_runtime': 2.1971, 'eval_samples_per_second': 5.917, 'eval_steps_per_second': 5.917, 'epoch': 1.85}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-765] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7955, 'learning_rate': 1.5e-06, 'epoch': 2.85}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.6962449550628662, 'eval_runtime': 2.1985, 'eval_samples_per_second': 5.913, 'eval_steps_per_second': 5.913, 'epoch': 2.85}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8064, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.85}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.6972169876098633, 'eval_runtime': 2.1968, 'eval_samples_per_second': 5.918, 'eval_steps_per_second': 5.918, 'epoch': 3.85}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8078, 'learning_rate': 2.5e-06, 'epoch': 4.85}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6986137628555298, 'eval_runtime': 2.2003, 'eval_samples_per_second': 5.908, 'eval_steps_per_second': 5.908, 'epoch': 4.85}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7988, 'learning_rate': 3e-06, 'epoch': 5.85}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.7007021307945251, 'eval_runtime': 2.1941, 'eval_samples_per_second': 5.925, 'eval_steps_per_second': 5.925, 'epoch': 5.85}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7889, 'learning_rate': 3.5e-06, 'epoch': 6.85}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.7026857733726501, 'eval_runtime': 2.2003, 'eval_samples_per_second': 5.908, 'eval_steps_per_second': 5.908, 'epoch': 6.85}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.796, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.85}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.7053810954093933, 'eval_runtime': 2.201, 'eval_samples_per_second': 5.906, 'eval_steps_per_second': 5.906, 'epoch': 7.85}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7955, 'learning_rate': 4.5e-06, 'epoch': 8.85}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.7086734771728516, 'eval_runtime': 2.1989, 'eval_samples_per_second': 5.912, 'eval_steps_per_second': 5.912, 'epoch': 8.85}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7986, 'learning_rate': 5e-06, 'epoch': 9.85}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.7114807963371277, 'eval_runtime': 2.2187, 'eval_samples_per_second': 5.859, 'eval_steps_per_second': 5.859, 'epoch': 9.85}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7787, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.85}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.713131308555603, 'eval_runtime': 2.205, 'eval_samples_per_second': 5.896, 'eval_steps_per_second': 5.896, 'epoch': 10.85}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-5 (score: 0.6956660747528076).
***** Running Prediction *****
  Num examples = 10
  Batch size = 1


{'train_runtime': 682.1061, 'train_samples_per_second': 137.808, 'train_steps_per_second': 7.33, 'train_loss': 0.797645447470925, 'epoch': 10.85}
Fitting model: roberta_base using fold 8 as out of fold test data.
Train data sizes: (100, 100).
Val data sizes: (13, 13).
Test data sizes: (4, 4).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--roberta-base/snapshots/bc2764f8af2e92b6eb5679868df33e224075ca68/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremm

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 100
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7429, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.96}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.6909574270248413, 'eval_runtime': 2.1944, 'eval_samples_per_second': 5.924, 'eval_steps_per_second': 5.924, 'epoch': 0.96}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7499, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.96}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.6910583972930908, 'eval_runtime': 2.1972, 'eval_samples_per_second': 5.917, 'eval_steps_per_second': 5.917, 'epoch': 1.96}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7344, 'learning_rate': 1.8000000000000001e-06, 'epoch': 2.96}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.6914329528808594, 'eval_runtime': 2.206, 'eval_samples_per_second': 5.893, 'eval_steps_per_second': 5.893, 'epoch': 2.96}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7268, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.96}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.6925208568572998, 'eval_runtime': 2.2063, 'eval_samples_per_second': 5.892, 'eval_steps_per_second': 5.892, 'epoch': 3.96}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7201, 'learning_rate': 3e-06, 'epoch': 4.96}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.694681704044342, 'eval_runtime': 2.1996, 'eval_samples_per_second': 5.91, 'eval_steps_per_second': 5.91, 'epoch': 4.96}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7152, 'learning_rate': 3.6000000000000003e-06, 'epoch': 5.96}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.6978209018707275, 'eval_runtime': 2.1967, 'eval_samples_per_second': 5.918, 'eval_steps_per_second': 5.918, 'epoch': 5.96}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7107, 'learning_rate': 4.2000000000000004e-06, 'epoch': 6.96}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.7019720673561096, 'eval_runtime': 2.1993, 'eval_samples_per_second': 5.911, 'eval_steps_per_second': 5.911, 'epoch': 6.96}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6917, 'learning_rate': 4.7e-06, 'epoch': 7.96}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.7066317200660706, 'eval_runtime': 2.2, 'eval_samples_per_second': 5.909, 'eval_steps_per_second': 5.909, 'epoch': 7.96}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7067, 'learning_rate': 4.9974789915966396e-06, 'epoch': 8.96}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.7152487635612488, 'eval_runtime': 2.2064, 'eval_samples_per_second': 5.892, 'eval_steps_per_second': 5.892, 'epoch': 8.96}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6894, 'learning_rate': 4.992436974789916e-06, 'epoch': 9.96}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.7303069233894348, 'eval_runtime': 2.1984, 'eval_samples_per_second': 5.913, 'eval_steps_per_second': 5.913, 'epoch': 9.96}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7067, 'learning_rate': 4.987394957983194e-06, 'epoch': 10.96}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.7364035844802856, 'eval_runtime': 2.2003, 'eval_samples_per_second': 5.908, 'eval_steps_per_second': 5.908, 'epoch': 10.96}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-6 (score: 0.6909574270248413).
***** Running Prediction *****
  Num examples = 4
  Batch size = 1


{'train_runtime': 724.6708, 'train_samples_per_second': 137.994, 'train_steps_per_second': 8.28, 'train_loss': 0.7176795872774991, 'epoch': 10.96}
Fitting model: roberta_base using fold 9 as out of fold test data.
Train data sizes: (93, 93).
Val data sizes: (13, 13).
Test data sizes: (11, 11).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--roberta-base/snapshots/bc2764f8af2e92b6eb5679868df33e224075ca68/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremm

Map:   0%|          | 0/93 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 93
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8183, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.86}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.6909653544425964, 'eval_runtime': 2.1985, 'eval_samples_per_second': 5.913, 'eval_steps_per_second': 5.913, 'epoch': 0.86}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8226, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.86}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.6910161375999451, 'eval_runtime': 2.1945, 'eval_samples_per_second': 5.924, 'eval_steps_per_second': 5.924, 'epoch': 1.86}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8127, 'learning_rate': 1.5e-06, 'epoch': 2.86}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.6911754608154297, 'eval_runtime': 2.1971, 'eval_samples_per_second': 5.917, 'eval_steps_per_second': 5.917, 'epoch': 2.86}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8153, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.86}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.6915452480316162, 'eval_runtime': 2.2007, 'eval_samples_per_second': 5.907, 'eval_steps_per_second': 5.907, 'epoch': 3.86}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8137, 'learning_rate': 2.5e-06, 'epoch': 4.86}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6922451853752136, 'eval_runtime': 2.2015, 'eval_samples_per_second': 5.905, 'eval_steps_per_second': 5.905, 'epoch': 4.86}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8099, 'learning_rate': 3e-06, 'epoch': 5.86}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6933318376541138, 'eval_runtime': 2.2106, 'eval_samples_per_second': 5.881, 'eval_steps_per_second': 5.881, 'epoch': 5.86}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8113, 'learning_rate': 3.4000000000000005e-06, 'epoch': 6.86}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6945732831954956, 'eval_runtime': 2.1969, 'eval_samples_per_second': 5.917, 'eval_steps_per_second': 5.917, 'epoch': 6.86}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8052, 'learning_rate': 3.900000000000001e-06, 'epoch': 7.86}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6968659162521362, 'eval_runtime': 2.2054, 'eval_samples_per_second': 5.895, 'eval_steps_per_second': 5.895, 'epoch': 7.86}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.796, 'learning_rate': 4.4e-06, 'epoch': 8.86}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6998268961906433, 'eval_runtime': 2.1978, 'eval_samples_per_second': 5.915, 'eval_steps_per_second': 5.915, 'epoch': 8.86}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7871, 'learning_rate': 4.9000000000000005e-06, 'epoch': 9.86}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.7042269110679626, 'eval_runtime': 2.1981, 'eval_samples_per_second': 5.914, 'eval_steps_per_second': 5.914, 'epoch': 9.86}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7783, 'learning_rate': 4.995959595959596e-06, 'epoch': 10.86}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.7112866640090942, 'eval_runtime': 2.2, 'eval_samples_per_second': 5.909, 'eval_steps_per_second': 5.909, 'epoch': 10.86}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-5 (score: 0.6909653544425964).
***** Running Prediction *****
  Num examples = 11
  Batch size = 1


{'train_runtime': 676.0187, 'train_samples_per_second': 137.57, 'train_steps_per_second': 7.396, 'train_loss': 0.8064024795185436, 'epoch': 10.86}
Fitting model: roberta_base using fold 10 as out of fold test data.
Train data sizes: (97, 97).
Val data sizes: (13, 13).
Test data sizes: (7, 7).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--roberta-base/snapshots/bc2764f8af2e92b6eb5679868df33e224075ca68/config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremm

Map:   0%|          | 0/97 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/7 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 97
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7152, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.99}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.6909691095352173, 'eval_runtime': 2.2018, 'eval_samples_per_second': 5.904, 'eval_steps_per_second': 5.904, 'epoch': 0.99}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7172, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.99}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.691095232963562, 'eval_runtime': 2.1932, 'eval_samples_per_second': 5.927, 'eval_steps_per_second': 5.927, 'epoch': 1.99}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7112, 'learning_rate': 1.8000000000000001e-06, 'epoch': 2.99}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.6915649771690369, 'eval_runtime': 2.1949, 'eval_samples_per_second': 5.923, 'eval_steps_per_second': 5.923, 'epoch': 2.99}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7137, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.99}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.6927883625030518, 'eval_runtime': 2.1919, 'eval_samples_per_second': 5.931, 'eval_steps_per_second': 5.931, 'epoch': 3.99}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6982, 'learning_rate': 3e-06, 'epoch': 4.99}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6942161321640015, 'eval_runtime': 2.1968, 'eval_samples_per_second': 5.918, 'eval_steps_per_second': 5.918, 'epoch': 4.99}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7085, 'learning_rate': 3.6000000000000003e-06, 'epoch': 5.99}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.696209192276001, 'eval_runtime': 2.1962, 'eval_samples_per_second': 5.919, 'eval_steps_per_second': 5.919, 'epoch': 5.99}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.694, 'learning_rate': 4.2000000000000004e-06, 'epoch': 6.99}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6981986165046692, 'eval_runtime': 2.2139, 'eval_samples_per_second': 5.872, 'eval_steps_per_second': 5.872, 'epoch': 6.99}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6931, 'learning_rate': 4.800000000000001e-06, 'epoch': 7.99}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.7019988298416138, 'eval_runtime': 2.2043, 'eval_samples_per_second': 5.898, 'eval_steps_per_second': 5.898, 'epoch': 7.99}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6884, 'learning_rate': 4.996638655462185e-06, 'epoch': 8.99}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.7089552283287048, 'eval_runtime': 2.1974, 'eval_samples_per_second': 5.916, 'eval_steps_per_second': 5.916, 'epoch': 8.99}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.683, 'learning_rate': 4.9915966386554625e-06, 'epoch': 9.99}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.713958740234375, 'eval_runtime': 2.2009, 'eval_samples_per_second': 5.907, 'eval_steps_per_second': 5.907, 'epoch': 9.99}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.672, 'learning_rate': 4.98655462184874e-06, 'epoch': 10.99}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.7250712513923645, 'eval_runtime': 2.1985, 'eval_samples_per_second': 5.913, 'eval_steps_per_second': 5.913, 'epoch': 10.99}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-6 (score: 0.6909691095352173).
***** Running Prediction *****
  Num examples = 7
  Batch size = 1


{'train_runtime': 704.0017, 'train_samples_per_second': 137.784, 'train_steps_per_second': 8.523, 'train_loss': 0.6994951927300656, 'epoch': 10.99}
Fitting model: roberta_pysch using fold 0 as out of fold test data.
Train data sizes: (96, 96).
Val data sizes: (10, 10).
Test data sizes: (11, 11).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshots/18f4eb3ec5e26053f262d6e19f98ede75673ff33/config.json
Model config RobertaConfig {
  "_name_or_path": "mlaricheva/roberta-psych",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from c

Map:   0%|          | 0/96 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 96
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.708, 'learning_rate': 6.000000000000001e-07, 'epoch': 1.0}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.7312226891517639, 'eval_runtime': 1.7092, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 1.0}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.714, 'learning_rate': 1.2000000000000002e-06, 'epoch': 2.0}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.7256516218185425, 'eval_runtime': 1.7184, 'eval_samples_per_second': 5.82, 'eval_steps_per_second': 5.82, 'epoch': 2.0}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7087, 'learning_rate': 1.8000000000000001e-06, 'epoch': 3.0}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.7166228890419006, 'eval_runtime': 1.7076, 'eval_samples_per_second': 5.856, 'eval_steps_per_second': 5.856, 'epoch': 3.0}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7017, 'learning_rate': 2.4000000000000003e-06, 'epoch': 4.0}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.7077690958976746, 'eval_runtime': 1.7099, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 4.0}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.694, 'learning_rate': 3e-06, 'epoch': 5.0}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6967397928237915, 'eval_runtime': 1.7071, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 5.0}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6854, 'learning_rate': 3.6000000000000003e-06, 'epoch': 6.0}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.6824719309806824, 'eval_runtime': 1.7117, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 6.0}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6808, 'learning_rate': 4.2000000000000004e-06, 'epoch': 7.0}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6665642857551575, 'eval_runtime': 1.7096, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 7.0}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6801, 'learning_rate': 4.800000000000001e-06, 'epoch': 8.0}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.6454712748527527, 'eval_runtime': 1.7162, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 8.0}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6807, 'learning_rate': 4.996638655462185e-06, 'epoch': 9.0}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.6200535893440247, 'eval_runtime': 1.7078, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 9.0}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6756, 'learning_rate': 4.9915966386554625e-06, 'epoch': 10.0}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.5963512063026428, 'eval_runtime': 1.7136, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 10.0}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6453, 'learning_rate': 4.98655462184874e-06, 'epoch': 11.0}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.5863860845565796, 'eval_runtime': 1.7108, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 11.0}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6716, 'learning_rate': 4.9815126050420174e-06, 'epoch': 12.0}


Saving model checkpoint to ./model_output/checkpoint-72
Configuration saved in ./model_output/checkpoint-72/config.json


{'eval_loss': 0.5793854594230652, 'eval_runtime': 1.7143, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 12.0}


Model weights saved in ./model_output/checkpoint-72/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6647, 'learning_rate': 4.976470588235294e-06, 'epoch': 13.0}


Saving model checkpoint to ./model_output/checkpoint-78
Configuration saved in ./model_output/checkpoint-78/config.json


{'eval_loss': 0.5741570591926575, 'eval_runtime': 1.7216, 'eval_samples_per_second': 5.809, 'eval_steps_per_second': 5.809, 'epoch': 13.0}


Model weights saved in ./model_output/checkpoint-78/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6519, 'learning_rate': 4.972268907563025e-06, 'epoch': 14.0}


Saving model checkpoint to ./model_output/checkpoint-84
Configuration saved in ./model_output/checkpoint-84/config.json


{'eval_loss': 0.5700925588607788, 'eval_runtime': 1.71, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 14.0}


Model weights saved in ./model_output/checkpoint-84/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-72] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.649, 'learning_rate': 4.967226890756303e-06, 'epoch': 15.0}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5544783473014832, 'eval_runtime': 1.7136, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 15.0}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-78] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6181, 'learning_rate': 4.96218487394958e-06, 'epoch': 16.0}


Saving model checkpoint to ./model_output/checkpoint-96
Configuration saved in ./model_output/checkpoint-96/config.json


{'eval_loss': 0.5303832292556763, 'eval_runtime': 1.7175, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 16.0}


Model weights saved in ./model_output/checkpoint-96/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-84] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6115, 'learning_rate': 4.9571428571428575e-06, 'epoch': 17.0}


Saving model checkpoint to ./model_output/checkpoint-102
Configuration saved in ./model_output/checkpoint-102/config.json


{'eval_loss': 0.4921371042728424, 'eval_runtime': 1.707, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 17.0}


Model weights saved in ./model_output/checkpoint-102/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6003, 'learning_rate': 4.952100840336135e-06, 'epoch': 18.0}


Saving model checkpoint to ./model_output/checkpoint-108
Configuration saved in ./model_output/checkpoint-108/config.json


{'eval_loss': 0.48270383477211, 'eval_runtime': 1.714, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 18.0}


Model weights saved in ./model_output/checkpoint-108/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-96] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5719, 'learning_rate': 4.947058823529412e-06, 'epoch': 19.0}


Saving model checkpoint to ./model_output/checkpoint-114
Configuration saved in ./model_output/checkpoint-114/config.json


{'eval_loss': 0.4581567347049713, 'eval_runtime': 1.7063, 'eval_samples_per_second': 5.861, 'eval_steps_per_second': 5.861, 'epoch': 19.0}


Model weights saved in ./model_output/checkpoint-114/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-102] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.55, 'learning_rate': 4.942016806722689e-06, 'epoch': 20.0}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.4321006238460541, 'eval_runtime': 1.7099, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 20.0}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-108] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5261, 'learning_rate': 4.936974789915967e-06, 'epoch': 21.0}


Saving model checkpoint to ./model_output/checkpoint-126
Configuration saved in ./model_output/checkpoint-126/config.json


{'eval_loss': 0.4274124205112457, 'eval_runtime': 1.7099, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 21.0}


Model weights saved in ./model_output/checkpoint-126/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-114] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4982, 'learning_rate': 4.931932773109244e-06, 'epoch': 22.0}


Saving model checkpoint to ./model_output/checkpoint-132
Configuration saved in ./model_output/checkpoint-132/config.json


{'eval_loss': 0.37452834844589233, 'eval_runtime': 1.715, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 22.0}


Model weights saved in ./model_output/checkpoint-132/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4362, 'learning_rate': 4.926890756302521e-06, 'epoch': 23.0}


Saving model checkpoint to ./model_output/checkpoint-138
Configuration saved in ./model_output/checkpoint-138/config.json


{'eval_loss': 0.36640578508377075, 'eval_runtime': 1.708, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 23.0}


Model weights saved in ./model_output/checkpoint-138/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-126] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4028, 'learning_rate': 4.921848739495799e-06, 'epoch': 24.0}


Saving model checkpoint to ./model_output/checkpoint-144
Configuration saved in ./model_output/checkpoint-144/config.json


{'eval_loss': 0.31947797536849976, 'eval_runtime': 1.7086, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 24.0}


Model weights saved in ./model_output/checkpoint-144/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-132] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3484, 'learning_rate': 4.916806722689076e-06, 'epoch': 25.0}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.2938688397407532, 'eval_runtime': 1.7084, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 25.0}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-138] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2798, 'learning_rate': 4.911764705882353e-06, 'epoch': 26.0}


Saving model checkpoint to ./model_output/checkpoint-156
Configuration saved in ./model_output/checkpoint-156/config.json


{'eval_loss': 0.2759801149368286, 'eval_runtime': 1.7117, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 26.0}


Model weights saved in ./model_output/checkpoint-156/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-144] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2113, 'learning_rate': 4.906722689075631e-06, 'epoch': 27.0}


Saving model checkpoint to ./model_output/checkpoint-162
Configuration saved in ./model_output/checkpoint-162/config.json


{'eval_loss': 0.21068572998046875, 'eval_runtime': 1.7115, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 27.0}


Model weights saved in ./model_output/checkpoint-162/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2135, 'learning_rate': 4.902521008403362e-06, 'epoch': 28.0}


Saving model checkpoint to ./model_output/checkpoint-168
Configuration saved in ./model_output/checkpoint-168/config.json


{'eval_loss': 0.19328202307224274, 'eval_runtime': 1.7091, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 28.0}


Model weights saved in ./model_output/checkpoint-168/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-156] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1718, 'learning_rate': 4.897478991596639e-06, 'epoch': 29.0}


Saving model checkpoint to ./model_output/checkpoint-174
Configuration saved in ./model_output/checkpoint-174/config.json


{'eval_loss': 0.380149781703949, 'eval_runtime': 1.7132, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 29.0}


Model weights saved in ./model_output/checkpoint-174/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-162] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0739, 'learning_rate': 4.89327731092437e-06, 'epoch': 30.0}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.1867682784795761, 'eval_runtime': 1.7109, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 30.0}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-168] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1169, 'learning_rate': 4.888235294117647e-06, 'epoch': 31.0}


Saving model checkpoint to ./model_output/checkpoint-186
Configuration saved in ./model_output/checkpoint-186/config.json


{'eval_loss': 0.2879996597766876, 'eval_runtime': 1.7134, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 31.0}


Model weights saved in ./model_output/checkpoint-186/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-174] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0375, 'learning_rate': 4.883193277310925e-06, 'epoch': 32.0}


Saving model checkpoint to ./model_output/checkpoint-192
Configuration saved in ./model_output/checkpoint-192/config.json


{'eval_loss': 0.2007053792476654, 'eval_runtime': 1.7126, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 32.0}


Model weights saved in ./model_output/checkpoint-192/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-186] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0645, 'learning_rate': 4.878151260504202e-06, 'epoch': 33.0}


Saving model checkpoint to ./model_output/checkpoint-198
Configuration saved in ./model_output/checkpoint-198/config.json


{'eval_loss': 0.331966370344162, 'eval_runtime': 1.7092, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 33.0}


Model weights saved in ./model_output/checkpoint-198/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-192] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0155, 'learning_rate': 4.873109243697479e-06, 'epoch': 34.0}


Saving model checkpoint to ./model_output/checkpoint-204
Configuration saved in ./model_output/checkpoint-204/config.json


{'eval_loss': 0.15250608325004578, 'eval_runtime': 1.7139, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 34.0}


Model weights saved in ./model_output/checkpoint-204/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0175, 'learning_rate': 4.868067226890757e-06, 'epoch': 35.0}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.4885866045951843, 'eval_runtime': 1.7117, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 35.0}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-198] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0075, 'learning_rate': 4.863025210084034e-06, 'epoch': 36.0}


Saving model checkpoint to ./model_output/checkpoint-216
Configuration saved in ./model_output/checkpoint-216/config.json


{'eval_loss': 0.076222725212574, 'eval_runtime': 1.7114, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 36.0}


Model weights saved in ./model_output/checkpoint-216/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-204] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0062, 'learning_rate': 4.857983193277311e-06, 'epoch': 37.0}


Saving model checkpoint to ./model_output/checkpoint-222
Configuration saved in ./model_output/checkpoint-222/config.json


{'eval_loss': 0.19077257812023163, 'eval_runtime': 1.7118, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 37.0}


Model weights saved in ./model_output/checkpoint-222/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0038, 'learning_rate': 4.852941176470589e-06, 'epoch': 38.0}


Saving model checkpoint to ./model_output/checkpoint-228
Configuration saved in ./model_output/checkpoint-228/config.json


{'eval_loss': 0.07158254832029343, 'eval_runtime': 1.7136, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 38.0}


Model weights saved in ./model_output/checkpoint-228/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-216] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0033, 'learning_rate': 4.847899159663866e-06, 'epoch': 39.0}


Saving model checkpoint to ./model_output/checkpoint-234
Configuration saved in ./model_output/checkpoint-234/config.json


{'eval_loss': 0.16148820519447327, 'eval_runtime': 1.7082, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 39.0}


Model weights saved in ./model_output/checkpoint-234/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-222] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0025, 'learning_rate': 4.842857142857143e-06, 'epoch': 40.0}


Saving model checkpoint to ./model_output/checkpoint-240
Configuration saved in ./model_output/checkpoint-240/config.json


{'eval_loss': 0.1521514356136322, 'eval_runtime': 1.7112, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 40.0}


Model weights saved in ./model_output/checkpoint-240/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-234] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0023, 'learning_rate': 4.837815126050421e-06, 'epoch': 41.0}


Saving model checkpoint to ./model_output/checkpoint-246
Configuration saved in ./model_output/checkpoint-246/config.json


{'eval_loss': 0.08767198026180267, 'eval_runtime': 1.7093, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 41.0}


Model weights saved in ./model_output/checkpoint-246/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-240] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.002, 'learning_rate': 4.832773109243698e-06, 'epoch': 42.0}


Saving model checkpoint to ./model_output/checkpoint-252
Configuration saved in ./model_output/checkpoint-252/config.json


{'eval_loss': 0.11938942968845367, 'eval_runtime': 1.7101, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 42.0}


Model weights saved in ./model_output/checkpoint-252/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-246] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0018, 'learning_rate': 4.827731092436975e-06, 'epoch': 43.0}


Saving model checkpoint to ./model_output/checkpoint-258
Configuration saved in ./model_output/checkpoint-258/config.json


{'eval_loss': 0.12766340374946594, 'eval_runtime': 1.7118, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 43.0}


Model weights saved in ./model_output/checkpoint-258/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-252] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0016, 'learning_rate': 4.8226890756302525e-06, 'epoch': 44.0}


Saving model checkpoint to ./model_output/checkpoint-264
Configuration saved in ./model_output/checkpoint-264/config.json


{'eval_loss': 0.12340587377548218, 'eval_runtime': 1.7127, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 44.0}


Model weights saved in ./model_output/checkpoint-264/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-258] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0016, 'learning_rate': 4.81764705882353e-06, 'epoch': 45.0}


Saving model checkpoint to ./model_output/checkpoint-270
Configuration saved in ./model_output/checkpoint-270/config.json


{'eval_loss': 0.2542673945426941, 'eval_runtime': 1.7101, 'eval_samples_per_second': 5.848, 'eval_steps_per_second': 5.848, 'epoch': 45.0}


Model weights saved in ./model_output/checkpoint-270/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-264] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0015, 'learning_rate': 4.812605042016807e-06, 'epoch': 46.0}


Saving model checkpoint to ./model_output/checkpoint-276
Configuration saved in ./model_output/checkpoint-276/config.json


{'eval_loss': 0.09353048354387283, 'eval_runtime': 1.7108, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 46.0}


Model weights saved in ./model_output/checkpoint-276/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-270] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0013, 'learning_rate': 4.807563025210085e-06, 'epoch': 47.0}


Saving model checkpoint to ./model_output/checkpoint-282
Configuration saved in ./model_output/checkpoint-282/config.json


{'eval_loss': 0.054070599377155304, 'eval_runtime': 1.7151, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 47.0}


Model weights saved in ./model_output/checkpoint-282/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-228] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0012, 'learning_rate': 4.8025210084033615e-06, 'epoch': 48.0}


Saving model checkpoint to ./model_output/checkpoint-288
Configuration saved in ./model_output/checkpoint-288/config.json


{'eval_loss': 0.14209052920341492, 'eval_runtime': 1.7107, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 48.0}


Model weights saved in ./model_output/checkpoint-288/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-276] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0012, 'learning_rate': 4.797478991596639e-06, 'epoch': 49.0}


Saving model checkpoint to ./model_output/checkpoint-294
Configuration saved in ./model_output/checkpoint-294/config.json


{'eval_loss': 0.27427011728286743, 'eval_runtime': 1.714, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 49.0}


Model weights saved in ./model_output/checkpoint-294/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-288] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0011, 'learning_rate': 4.792436974789916e-06, 'epoch': 50.0}


Saving model checkpoint to ./model_output/checkpoint-300
Configuration saved in ./model_output/checkpoint-300/config.json


{'eval_loss': 0.2310682237148285, 'eval_runtime': 1.7088, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 50.0}


Model weights saved in ./model_output/checkpoint-300/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-294] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.001, 'learning_rate': 4.787394957983194e-06, 'epoch': 51.0}


Saving model checkpoint to ./model_output/checkpoint-306
Configuration saved in ./model_output/checkpoint-306/config.json


{'eval_loss': 0.1624028980731964, 'eval_runtime': 1.716, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 51.0}


Model weights saved in ./model_output/checkpoint-306/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-300] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0009, 'learning_rate': 4.7823529411764704e-06, 'epoch': 52.0}


Saving model checkpoint to ./model_output/checkpoint-312
Configuration saved in ./model_output/checkpoint-312/config.json


{'eval_loss': 0.14062240719795227, 'eval_runtime': 1.7096, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 52.0}


Model weights saved in ./model_output/checkpoint-312/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-306] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0009, 'learning_rate': 4.777310924369749e-06, 'epoch': 53.0}


Saving model checkpoint to ./model_output/checkpoint-318
Configuration saved in ./model_output/checkpoint-318/config.json


{'eval_loss': 0.16449251770973206, 'eval_runtime': 1.7318, 'eval_samples_per_second': 5.774, 'eval_steps_per_second': 5.774, 'epoch': 53.0}


Model weights saved in ./model_output/checkpoint-318/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-312] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0009, 'learning_rate': 4.772268907563025e-06, 'epoch': 54.0}


Saving model checkpoint to ./model_output/checkpoint-324
Configuration saved in ./model_output/checkpoint-324/config.json


{'eval_loss': 0.19455818831920624, 'eval_runtime': 1.7059, 'eval_samples_per_second': 5.862, 'eval_steps_per_second': 5.862, 'epoch': 54.0}


Model weights saved in ./model_output/checkpoint-324/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-318] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0009, 'learning_rate': 4.767226890756303e-06, 'epoch': 55.0}


Saving model checkpoint to ./model_output/checkpoint-330
Configuration saved in ./model_output/checkpoint-330/config.json


{'eval_loss': 0.1545795053243637, 'eval_runtime': 1.7087, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 55.0}


Model weights saved in ./model_output/checkpoint-330/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-324] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0008, 'learning_rate': 4.76218487394958e-06, 'epoch': 56.0}


Saving model checkpoint to ./model_output/checkpoint-336
Configuration saved in ./model_output/checkpoint-336/config.json


{'eval_loss': 0.10491830110549927, 'eval_runtime': 1.7104, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 56.0}


Model weights saved in ./model_output/checkpoint-336/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-330] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0008, 'learning_rate': 4.757142857142858e-06, 'epoch': 57.0}


Saving model checkpoint to ./model_output/checkpoint-342
Configuration saved in ./model_output/checkpoint-342/config.json


{'eval_loss': 0.13879308104515076, 'eval_runtime': 1.7116, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 57.0}


Model weights saved in ./model_output/checkpoint-342/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-336] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-282 (score: 0.054070599377155304).
***** Running Prediction *****
  Num examples = 11
  Batch size = 1


{'train_runtime': 3590.5432, 'train_samples_per_second': 26.737, 'train_steps_per_second': 1.671, 'train_loss': 0.2916170827341838, 'epoch': 57.0}
Fitting model: roberta_pysch using fold 1 as out of fold test data.
Train data sizes: (92, 92).
Val data sizes: (10, 10).
Test data sizes: (15, 15).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshots/18f4eb3ec5e26053f262d6e19f98ede75673ff33/config.json
Model config RobertaConfig {
  "_name_or_path": "mlaricheva/roberta-psych",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from c

Map:   0%|          | 0/92 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 92
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8386, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.87}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.7700704336166382, 'eval_runtime': 1.7118, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 0.87}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-282] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8289, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.87}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.7652588486671448, 'eval_runtime': 1.7142, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 1.87}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-342] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8225, 'learning_rate': 1.5e-06, 'epoch': 2.87}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.7572612762451172, 'eval_runtime': 1.7174, 'eval_samples_per_second': 5.823, 'eval_steps_per_second': 5.823, 'epoch': 2.87}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8233, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.87}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.7458813786506653, 'eval_runtime': 1.711, 'eval_samples_per_second': 5.844, 'eval_steps_per_second': 5.844, 'epoch': 3.87}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8186, 'learning_rate': 2.5e-06, 'epoch': 4.87}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.7327383756637573, 'eval_runtime': 1.7082, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 4.87}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8312, 'learning_rate': 3e-06, 'epoch': 5.87}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.7198632955551147, 'eval_runtime': 1.7187, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 5.87}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7926, 'learning_rate': 3.5e-06, 'epoch': 6.87}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.7058134078979492, 'eval_runtime': 1.7125, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 6.87}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.78, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.87}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6881093382835388, 'eval_runtime': 1.7088, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 7.87}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7873, 'learning_rate': 4.5e-06, 'epoch': 8.87}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6677449345588684, 'eval_runtime': 1.716, 'eval_samples_per_second': 5.828, 'eval_steps_per_second': 5.828, 'epoch': 8.87}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7844, 'learning_rate': 5e-06, 'epoch': 9.87}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.6482932567596436, 'eval_runtime': 1.7174, 'eval_samples_per_second': 5.823, 'eval_steps_per_second': 5.823, 'epoch': 9.87}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7533, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.87}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.632499635219574, 'eval_runtime': 1.7069, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 10.87}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7525, 'learning_rate': 4.98989898989899e-06, 'epoch': 11.87}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6136974096298218, 'eval_runtime': 1.7117, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 11.87}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.762, 'learning_rate': 4.984848484848485e-06, 'epoch': 12.87}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.5992754697799683, 'eval_runtime': 1.7065, 'eval_samples_per_second': 5.86, 'eval_steps_per_second': 5.86, 'epoch': 12.87}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7444, 'learning_rate': 4.980808080808081e-06, 'epoch': 13.87}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.5861923098564148, 'eval_runtime': 1.7185, 'eval_samples_per_second': 5.819, 'eval_steps_per_second': 5.819, 'epoch': 13.87}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7309, 'learning_rate': 4.975757575757576e-06, 'epoch': 14.87}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.5643514394760132, 'eval_runtime': 1.7158, 'eval_samples_per_second': 5.828, 'eval_steps_per_second': 5.828, 'epoch': 14.87}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7523, 'learning_rate': 4.970707070707071e-06, 'epoch': 15.87}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.5581814646720886, 'eval_runtime': 1.7154, 'eval_samples_per_second': 5.829, 'eval_steps_per_second': 5.829, 'epoch': 15.87}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7742, 'learning_rate': 4.965656565656566e-06, 'epoch': 16.87}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.548109769821167, 'eval_runtime': 1.7189, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 16.87}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7671, 'learning_rate': 4.9606060606060605e-06, 'epoch': 17.87}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5441025495529175, 'eval_runtime': 1.7106, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 17.87}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7375, 'learning_rate': 4.9555555555555565e-06, 'epoch': 18.87}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.551042377948761, 'eval_runtime': 1.7074, 'eval_samples_per_second': 5.857, 'eval_steps_per_second': 5.857, 'epoch': 18.87}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.71, 'learning_rate': 4.950505050505051e-06, 'epoch': 19.87}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json


{'eval_loss': 0.5410429239273071, 'eval_runtime': 1.7146, 'eval_samples_per_second': 5.832, 'eval_steps_per_second': 5.832, 'epoch': 19.87}


Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6926, 'learning_rate': 4.945454545454546e-06, 'epoch': 20.87}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.5253686308860779, 'eval_runtime': 1.7087, 'eval_samples_per_second': 5.852, 'eval_steps_per_second': 5.852, 'epoch': 20.87}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6824, 'learning_rate': 4.940404040404041e-06, 'epoch': 21.87}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json


{'eval_loss': 0.49997812509536743, 'eval_runtime': 1.7167, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 21.87}


Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6827, 'learning_rate': 4.935353535353536e-06, 'epoch': 22.87}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.4815644323825836, 'eval_runtime': 1.7139, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 22.87}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6339, 'learning_rate': 4.9303030303030305e-06, 'epoch': 23.87}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.47906333208084106, 'eval_runtime': 1.7093, 'eval_samples_per_second': 5.851, 'eval_steps_per_second': 5.851, 'epoch': 23.87}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6061, 'learning_rate': 4.925252525252526e-06, 'epoch': 24.87}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.4836800694465637, 'eval_runtime': 1.7108, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 24.87}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5687, 'learning_rate': 4.9212121212121214e-06, 'epoch': 25.87}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.4382542669773102, 'eval_runtime': 1.7192, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 25.87}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5616, 'learning_rate': 4.9161616161616166e-06, 'epoch': 26.87}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.4144876003265381, 'eval_runtime': 1.7234, 'eval_samples_per_second': 5.803, 'eval_steps_per_second': 5.803, 'epoch': 26.87}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5118, 'learning_rate': 4.911111111111112e-06, 'epoch': 27.87}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.47702091932296753, 'eval_runtime': 1.7143, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 27.87}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.463, 'learning_rate': 4.906060606060606e-06, 'epoch': 28.87}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.4128323197364807, 'eval_runtime': 1.7182, 'eval_samples_per_second': 5.82, 'eval_steps_per_second': 5.82, 'epoch': 28.87}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4524, 'learning_rate': 4.901010101010101e-06, 'epoch': 29.87}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.39599183201789856, 'eval_runtime': 1.7166, 'eval_samples_per_second': 5.826, 'eval_steps_per_second': 5.826, 'epoch': 29.87}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3976, 'learning_rate': 4.895959595959596e-06, 'epoch': 30.87}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.5042617917060852, 'eval_runtime': 1.713, 'eval_samples_per_second': 5.838, 'eval_steps_per_second': 5.838, 'epoch': 30.87}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3433, 'learning_rate': 4.8909090909090914e-06, 'epoch': 31.87}


Saving model checkpoint to ./model_output/checkpoint-160
Configuration saved in ./model_output/checkpoint-160/config.json


{'eval_loss': 0.40787965059280396, 'eval_runtime': 1.7118, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 31.87}


Model weights saved in ./model_output/checkpoint-160/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2819, 'learning_rate': 4.885858585858586e-06, 'epoch': 32.87}


Saving model checkpoint to ./model_output/checkpoint-165
Configuration saved in ./model_output/checkpoint-165/config.json


{'eval_loss': 0.3994213938713074, 'eval_runtime': 1.7149, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 32.87}


Model weights saved in ./model_output/checkpoint-165/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2198, 'learning_rate': 4.880808080808081e-06, 'epoch': 33.87}


Saving model checkpoint to ./model_output/checkpoint-170
Configuration saved in ./model_output/checkpoint-170/config.json


{'eval_loss': 0.44503116607666016, 'eval_runtime': 1.7137, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 33.87}


Model weights saved in ./model_output/checkpoint-170/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-165] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2023, 'learning_rate': 4.875757575757576e-06, 'epoch': 34.87}


Saving model checkpoint to ./model_output/checkpoint-175
Configuration saved in ./model_output/checkpoint-175/config.json


{'eval_loss': 0.4110877513885498, 'eval_runtime': 1.7134, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 34.87}


Model weights saved in ./model_output/checkpoint-175/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-170] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1246, 'learning_rate': 4.870707070707071e-06, 'epoch': 35.87}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.7774795293807983, 'eval_runtime': 1.7138, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 35.87}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-175] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1655, 'learning_rate': 4.866666666666667e-06, 'epoch': 36.87}


Saving model checkpoint to ./model_output/checkpoint-185
Configuration saved in ./model_output/checkpoint-185/config.json


{'eval_loss': 0.5862048864364624, 'eval_runtime': 1.7116, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 36.87}


Model weights saved in ./model_output/checkpoint-185/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1747, 'learning_rate': 4.861616161616162e-06, 'epoch': 37.87}


Saving model checkpoint to ./model_output/checkpoint-190
Configuration saved in ./model_output/checkpoint-190/config.json


{'eval_loss': 0.4524086117744446, 'eval_runtime': 1.7201, 'eval_samples_per_second': 5.813, 'eval_steps_per_second': 5.813, 'epoch': 37.87}


Model weights saved in ./model_output/checkpoint-190/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-185] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1509, 'learning_rate': 4.857575757575758e-06, 'epoch': 38.87}


Saving model checkpoint to ./model_output/checkpoint-195
Configuration saved in ./model_output/checkpoint-195/config.json


{'eval_loss': 0.5347481966018677, 'eval_runtime': 1.7181, 'eval_samples_per_second': 5.82, 'eval_steps_per_second': 5.82, 'epoch': 38.87}


Model weights saved in ./model_output/checkpoint-195/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-190] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0352, 'learning_rate': 4.852525252525253e-06, 'epoch': 39.87}


Saving model checkpoint to ./model_output/checkpoint-200
Configuration saved in ./model_output/checkpoint-200/config.json


{'eval_loss': 0.542687177658081, 'eval_runtime': 1.7135, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 39.87}


Model weights saved in ./model_output/checkpoint-200/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-195] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-150 (score: 0.39599183201789856).
***** Running Prediction *****
  Num examples = 15
  Batch size = 1


{'train_runtime': 2414.1314, 'train_samples_per_second': 38.109, 'train_steps_per_second': 2.071, 'train_loss': 0.5885627550631761, 'epoch': 39.87}
Fitting model: roberta_pysch using fold 2 as out of fold test data.
Train data sizes: (93, 93).
Val data sizes: (10, 10).
Test data sizes: (14, 14).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshots/18f4eb3ec5e26053f262d6e19f98ede75673ff33/config.json
Model config RobertaConfig {
  "_name_or_path": "mlaricheva/roberta-psych",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from c

Map:   0%|          | 0/93 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 93
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8087, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.86}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.6310148239135742, 'eval_runtime': 1.7125, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 0.86}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.812, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.86}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.6308525204658508, 'eval_runtime': 1.7168, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 1.86}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8018, 'learning_rate': 1.4000000000000001e-06, 'epoch': 2.86}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.6305049657821655, 'eval_runtime': 1.7142, 'eval_samples_per_second': 5.834, 'eval_steps_per_second': 5.834, 'epoch': 2.86}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7914, 'learning_rate': 1.9000000000000002e-06, 'epoch': 3.86}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.6286649107933044, 'eval_runtime': 1.7204, 'eval_samples_per_second': 5.812, 'eval_steps_per_second': 5.812, 'epoch': 3.86}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7846, 'learning_rate': 2.4000000000000003e-06, 'epoch': 4.86}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.626733660697937, 'eval_runtime': 1.7171, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 4.86}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7801, 'learning_rate': 2.9e-06, 'epoch': 5.86}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.625951886177063, 'eval_runtime': 1.717, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 5.86}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7915, 'learning_rate': 3.4000000000000005e-06, 'epoch': 6.86}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.625688374042511, 'eval_runtime': 1.7231, 'eval_samples_per_second': 5.804, 'eval_steps_per_second': 5.804, 'epoch': 6.86}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8025, 'learning_rate': 3.900000000000001e-06, 'epoch': 7.86}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6239124536514282, 'eval_runtime': 1.713, 'eval_samples_per_second': 5.838, 'eval_steps_per_second': 5.838, 'epoch': 7.86}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7746, 'learning_rate': 4.4e-06, 'epoch': 8.86}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6189640164375305, 'eval_runtime': 1.7238, 'eval_samples_per_second': 5.801, 'eval_steps_per_second': 5.801, 'epoch': 8.86}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7988, 'learning_rate': 4.9000000000000005e-06, 'epoch': 9.86}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.6119517683982849, 'eval_runtime': 1.7108, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 9.86}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7967, 'learning_rate': 4.995959595959596e-06, 'epoch': 10.86}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.6031365990638733, 'eval_runtime': 1.7127, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 10.86}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7909, 'learning_rate': 4.990909090909091e-06, 'epoch': 11.86}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.5971507430076599, 'eval_runtime': 1.7127, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 11.86}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7823, 'learning_rate': 4.986868686868687e-06, 'epoch': 12.86}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.5916892886161804, 'eval_runtime': 1.7188, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 12.86}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7672, 'learning_rate': 4.981818181818182e-06, 'epoch': 13.86}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.5888892412185669, 'eval_runtime': 1.7152, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 13.86}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7649, 'learning_rate': 4.976767676767677e-06, 'epoch': 14.86}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.5837547183036804, 'eval_runtime': 1.7177, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 14.86}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7747, 'learning_rate': 4.971717171717172e-06, 'epoch': 15.86}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.5807288885116577, 'eval_runtime': 1.7142, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 15.86}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7809, 'learning_rate': 4.966666666666667e-06, 'epoch': 16.86}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.5790335536003113, 'eval_runtime': 1.7197, 'eval_samples_per_second': 5.815, 'eval_steps_per_second': 5.815, 'epoch': 16.86}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7754, 'learning_rate': 4.961616161616162e-06, 'epoch': 17.86}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5712524056434631, 'eval_runtime': 1.7129, 'eval_samples_per_second': 5.838, 'eval_steps_per_second': 5.838, 'epoch': 17.86}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7694, 'learning_rate': 4.956565656565657e-06, 'epoch': 18.86}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.5566432476043701, 'eval_runtime': 1.7207, 'eval_samples_per_second': 5.811, 'eval_steps_per_second': 5.811, 'epoch': 18.86}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7793, 'learning_rate': 4.951515151515152e-06, 'epoch': 19.86}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json


{'eval_loss': 0.5429231524467468, 'eval_runtime': 1.7169, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 19.86}


Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7611, 'learning_rate': 4.946464646464647e-06, 'epoch': 20.86}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.544977068901062, 'eval_runtime': 1.7228, 'eval_samples_per_second': 5.805, 'eval_steps_per_second': 5.805, 'epoch': 20.86}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7434, 'learning_rate': 4.941414141414142e-06, 'epoch': 21.86}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json


{'eval_loss': 0.5353567600250244, 'eval_runtime': 1.7183, 'eval_samples_per_second': 5.82, 'eval_steps_per_second': 5.82, 'epoch': 21.86}


Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7169, 'learning_rate': 4.936363636363637e-06, 'epoch': 22.86}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.5080159902572632, 'eval_runtime': 1.7339, 'eval_samples_per_second': 5.767, 'eval_steps_per_second': 5.767, 'epoch': 22.86}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7027, 'learning_rate': 4.931313131313132e-06, 'epoch': 23.86}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.48193445801734924, 'eval_runtime': 1.7178, 'eval_samples_per_second': 5.821, 'eval_steps_per_second': 5.821, 'epoch': 23.86}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6734, 'learning_rate': 4.926262626262627e-06, 'epoch': 24.86}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.47143322229385376, 'eval_runtime': 1.7237, 'eval_samples_per_second': 5.802, 'eval_steps_per_second': 5.802, 'epoch': 24.86}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6555, 'learning_rate': 4.9212121212121214e-06, 'epoch': 25.86}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.4592442512512207, 'eval_runtime': 1.7177, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 25.86}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6168, 'learning_rate': 4.9161616161616166e-06, 'epoch': 26.86}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.44125810265541077, 'eval_runtime': 1.7167, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 26.86}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6147, 'learning_rate': 4.911111111111112e-06, 'epoch': 27.86}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.400911420583725, 'eval_runtime': 1.7154, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 27.86}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5707, 'learning_rate': 4.906060606060606e-06, 'epoch': 28.86}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.36714038252830505, 'eval_runtime': 1.7102, 'eval_samples_per_second': 5.847, 'eval_steps_per_second': 5.847, 'epoch': 28.86}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5358, 'learning_rate': 4.901010101010101e-06, 'epoch': 29.86}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.35622477531433105, 'eval_runtime': 1.7189, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 29.86}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4919, 'learning_rate': 4.895959595959596e-06, 'epoch': 30.86}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.35064390301704407, 'eval_runtime': 1.7115, 'eval_samples_per_second': 5.843, 'eval_steps_per_second': 5.843, 'epoch': 30.86}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4536, 'learning_rate': 4.8909090909090914e-06, 'epoch': 31.86}


Saving model checkpoint to ./model_output/checkpoint-160
Configuration saved in ./model_output/checkpoint-160/config.json


{'eval_loss': 0.345704048871994, 'eval_runtime': 1.719, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 31.86}


Model weights saved in ./model_output/checkpoint-160/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3658, 'learning_rate': 4.885858585858586e-06, 'epoch': 32.86}


Saving model checkpoint to ./model_output/checkpoint-165
Configuration saved in ./model_output/checkpoint-165/config.json


{'eval_loss': 0.3252559304237366, 'eval_runtime': 1.7202, 'eval_samples_per_second': 5.813, 'eval_steps_per_second': 5.813, 'epoch': 32.86}


Model weights saved in ./model_output/checkpoint-165/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3468, 'learning_rate': 4.880808080808081e-06, 'epoch': 33.86}


Saving model checkpoint to ./model_output/checkpoint-170
Configuration saved in ./model_output/checkpoint-170/config.json


{'eval_loss': 0.36978381872177124, 'eval_runtime': 1.7134, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 33.86}


Model weights saved in ./model_output/checkpoint-170/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2651, 'learning_rate': 4.875757575757576e-06, 'epoch': 34.86}


Saving model checkpoint to ./model_output/checkpoint-175
Configuration saved in ./model_output/checkpoint-175/config.json


{'eval_loss': 0.38046789169311523, 'eval_runtime': 1.716, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 34.86}


Model weights saved in ./model_output/checkpoint-175/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-170] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2251, 'learning_rate': 4.870707070707071e-06, 'epoch': 35.86}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.38892579078674316, 'eval_runtime': 1.7157, 'eval_samples_per_second': 5.829, 'eval_steps_per_second': 5.829, 'epoch': 35.86}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-175] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1397, 'learning_rate': 4.865656565656566e-06, 'epoch': 36.86}


Saving model checkpoint to ./model_output/checkpoint-185
Configuration saved in ./model_output/checkpoint-185/config.json


{'eval_loss': 0.4165034294128418, 'eval_runtime': 1.7247, 'eval_samples_per_second': 5.798, 'eval_steps_per_second': 5.798, 'epoch': 36.86}


Model weights saved in ./model_output/checkpoint-185/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1128, 'learning_rate': 4.8606060606060615e-06, 'epoch': 37.86}


Saving model checkpoint to ./model_output/checkpoint-190
Configuration saved in ./model_output/checkpoint-190/config.json


{'eval_loss': 0.44958844780921936, 'eval_runtime': 1.7106, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 37.86}


Model weights saved in ./model_output/checkpoint-190/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-185] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.111, 'learning_rate': 4.855555555555556e-06, 'epoch': 38.86}


Saving model checkpoint to ./model_output/checkpoint-195
Configuration saved in ./model_output/checkpoint-195/config.json


{'eval_loss': 0.480097234249115, 'eval_runtime': 1.716, 'eval_samples_per_second': 5.828, 'eval_steps_per_second': 5.828, 'epoch': 38.86}


Model weights saved in ./model_output/checkpoint-195/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-190] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0713, 'learning_rate': 4.850505050505051e-06, 'epoch': 39.86}


Saving model checkpoint to ./model_output/checkpoint-200
Configuration saved in ./model_output/checkpoint-200/config.json


{'eval_loss': 0.8886852264404297, 'eval_runtime': 1.7135, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 39.86}


Model weights saved in ./model_output/checkpoint-200/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-195] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0652, 'learning_rate': 4.846464646464647e-06, 'epoch': 40.86}


Saving model checkpoint to ./model_output/checkpoint-205
Configuration saved in ./model_output/checkpoint-205/config.json


{'eval_loss': 0.7621414661407471, 'eval_runtime': 1.7157, 'eval_samples_per_second': 5.829, 'eval_steps_per_second': 5.829, 'epoch': 40.86}


Model weights saved in ./model_output/checkpoint-205/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0252, 'learning_rate': 4.841414141414142e-06, 'epoch': 41.86}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.8682456016540527, 'eval_runtime': 1.7148, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 41.86}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-205] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0236, 'learning_rate': 4.836363636363637e-06, 'epoch': 42.86}


Saving model checkpoint to ./model_output/checkpoint-215
Configuration saved in ./model_output/checkpoint-215/config.json


{'eval_loss': 0.9936383366584778, 'eval_runtime': 1.7079, 'eval_samples_per_second': 5.855, 'eval_steps_per_second': 5.855, 'epoch': 42.86}


Model weights saved in ./model_output/checkpoint-215/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-165 (score: 0.3252559304237366).
***** Running Prediction *****
  Num examples = 14
  Batch size = 1


{'train_runtime': 2623.1156, 'train_samples_per_second': 35.454, 'train_steps_per_second': 1.906, 'train_loss': 0.5817648036881935, 'epoch': 42.86}
Fitting model: roberta_pysch using fold 3 as out of fold test data.
Train data sizes: (97, 97).
Val data sizes: (10, 10).
Test data sizes: (10, 10).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshots/18f4eb3ec5e26053f262d6e19f98ede75673ff33/config.json
Model config RobertaConfig {
  "_name_or_path": "mlaricheva/roberta-psych",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from c

Map:   0%|          | 0/97 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 97
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7001, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.99}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.6649903059005737, 'eval_runtime': 1.7144, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 0.99}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-165] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6992, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.99}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.6625173091888428, 'eval_runtime': 1.7072, 'eval_samples_per_second': 5.858, 'eval_steps_per_second': 5.858, 'epoch': 1.99}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-215] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6937, 'learning_rate': 1.8000000000000001e-06, 'epoch': 2.99}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.661206841468811, 'eval_runtime': 1.7168, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 2.99}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7079, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.99}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.6590290665626526, 'eval_runtime': 1.712, 'eval_samples_per_second': 5.841, 'eval_steps_per_second': 5.841, 'epoch': 3.99}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6931, 'learning_rate': 3e-06, 'epoch': 4.99}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6554297208786011, 'eval_runtime': 1.7129, 'eval_samples_per_second': 5.838, 'eval_steps_per_second': 5.838, 'epoch': 4.99}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6944, 'learning_rate': 3.6000000000000003e-06, 'epoch': 5.99}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.6534672975540161, 'eval_runtime': 1.7165, 'eval_samples_per_second': 5.826, 'eval_steps_per_second': 5.826, 'epoch': 5.99}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7025, 'learning_rate': 4.2000000000000004e-06, 'epoch': 6.99}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6530888676643372, 'eval_runtime': 1.7184, 'eval_samples_per_second': 5.819, 'eval_steps_per_second': 5.819, 'epoch': 6.99}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6935, 'learning_rate': 4.800000000000001e-06, 'epoch': 7.99}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.6502567529678345, 'eval_runtime': 1.7192, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 7.99}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6982, 'learning_rate': 4.996638655462185e-06, 'epoch': 8.99}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.6449960470199585, 'eval_runtime': 1.7257, 'eval_samples_per_second': 5.795, 'eval_steps_per_second': 5.795, 'epoch': 8.99}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6893, 'learning_rate': 4.9915966386554625e-06, 'epoch': 9.99}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6383037567138672, 'eval_runtime': 1.7146, 'eval_samples_per_second': 5.832, 'eval_steps_per_second': 5.832, 'epoch': 9.99}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6925, 'learning_rate': 4.98655462184874e-06, 'epoch': 10.99}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.6324759721755981, 'eval_runtime': 1.7191, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 10.99}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6878, 'learning_rate': 4.9815126050420174e-06, 'epoch': 11.99}


Saving model checkpoint to ./model_output/checkpoint-72
Configuration saved in ./model_output/checkpoint-72/config.json


{'eval_loss': 0.6268978118896484, 'eval_runtime': 1.7195, 'eval_samples_per_second': 5.816, 'eval_steps_per_second': 5.816, 'epoch': 11.99}


Model weights saved in ./model_output/checkpoint-72/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6876, 'learning_rate': 4.976470588235294e-06, 'epoch': 12.99}


Saving model checkpoint to ./model_output/checkpoint-78
Configuration saved in ./model_output/checkpoint-78/config.json


{'eval_loss': 0.6190072298049927, 'eval_runtime': 1.7153, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 12.99}


Model weights saved in ./model_output/checkpoint-78/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6871, 'learning_rate': 4.971428571428572e-06, 'epoch': 13.99}


Saving model checkpoint to ./model_output/checkpoint-84
Configuration saved in ./model_output/checkpoint-84/config.json


{'eval_loss': 0.6105816960334778, 'eval_runtime': 1.7138, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 13.99}


Model weights saved in ./model_output/checkpoint-84/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-72] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6718, 'learning_rate': 4.966386554621849e-06, 'epoch': 14.99}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.6041171550750732, 'eval_runtime': 1.7152, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 14.99}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-78] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6828, 'learning_rate': 4.96218487394958e-06, 'epoch': 15.99}


Saving model checkpoint to ./model_output/checkpoint-96
Configuration saved in ./model_output/checkpoint-96/config.json


{'eval_loss': 0.599514365196228, 'eval_runtime': 1.716, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 15.99}


Model weights saved in ./model_output/checkpoint-96/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-84] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6765, 'learning_rate': 4.9571428571428575e-06, 'epoch': 16.99}


Saving model checkpoint to ./model_output/checkpoint-102
Configuration saved in ./model_output/checkpoint-102/config.json


{'eval_loss': 0.600234866142273, 'eval_runtime': 1.7152, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 16.99}


Model weights saved in ./model_output/checkpoint-102/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6671, 'learning_rate': 4.952100840336135e-06, 'epoch': 17.99}


Saving model checkpoint to ./model_output/checkpoint-108
Configuration saved in ./model_output/checkpoint-108/config.json


{'eval_loss': 0.5858156085014343, 'eval_runtime': 1.7212, 'eval_samples_per_second': 5.81, 'eval_steps_per_second': 5.81, 'epoch': 17.99}


Model weights saved in ./model_output/checkpoint-108/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-96] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6438, 'learning_rate': 4.947058823529412e-06, 'epoch': 18.99}


Saving model checkpoint to ./model_output/checkpoint-114
Configuration saved in ./model_output/checkpoint-114/config.json


{'eval_loss': 0.5804284811019897, 'eval_runtime': 1.7167, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 18.99}


Model weights saved in ./model_output/checkpoint-114/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-102] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6295, 'learning_rate': 4.942016806722689e-06, 'epoch': 19.99}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.5527266263961792, 'eval_runtime': 1.716, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 19.99}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-108] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5787, 'learning_rate': 4.936974789915967e-06, 'epoch': 20.99}


Saving model checkpoint to ./model_output/checkpoint-126
Configuration saved in ./model_output/checkpoint-126/config.json


{'eval_loss': 0.5058836936950684, 'eval_runtime': 1.717, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 20.99}


Model weights saved in ./model_output/checkpoint-126/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-114] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5282, 'learning_rate': 4.931932773109244e-06, 'epoch': 21.99}


Saving model checkpoint to ./model_output/checkpoint-132
Configuration saved in ./model_output/checkpoint-132/config.json


{'eval_loss': 0.4545834958553314, 'eval_runtime': 1.7125, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 21.99}


Model weights saved in ./model_output/checkpoint-132/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4674, 'learning_rate': 4.926890756302521e-06, 'epoch': 22.99}


Saving model checkpoint to ./model_output/checkpoint-138
Configuration saved in ./model_output/checkpoint-138/config.json


{'eval_loss': 0.550338864326477, 'eval_runtime': 1.7143, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 22.99}


Model weights saved in ./model_output/checkpoint-138/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-126] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4278, 'learning_rate': 4.922689075630252e-06, 'epoch': 23.99}


Saving model checkpoint to ./model_output/checkpoint-144
Configuration saved in ./model_output/checkpoint-144/config.json


{'eval_loss': 0.43914595246315, 'eval_runtime': 1.7251, 'eval_samples_per_second': 5.797, 'eval_steps_per_second': 5.797, 'epoch': 23.99}


Model weights saved in ./model_output/checkpoint-144/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-132] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3407, 'learning_rate': 4.91764705882353e-06, 'epoch': 24.99}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.5544610619544983, 'eval_runtime': 1.7165, 'eval_samples_per_second': 5.826, 'eval_steps_per_second': 5.826, 'epoch': 24.99}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-138] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3066, 'learning_rate': 4.912605042016807e-06, 'epoch': 25.99}


Saving model checkpoint to ./model_output/checkpoint-156
Configuration saved in ./model_output/checkpoint-156/config.json


{'eval_loss': 0.4224693775177002, 'eval_runtime': 1.7273, 'eval_samples_per_second': 5.789, 'eval_steps_per_second': 5.789, 'epoch': 25.99}


Model weights saved in ./model_output/checkpoint-156/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-144] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2751, 'learning_rate': 4.907563025210084e-06, 'epoch': 26.99}


Saving model checkpoint to ./model_output/checkpoint-162
Configuration saved in ./model_output/checkpoint-162/config.json


{'eval_loss': 0.6031414270401001, 'eval_runtime': 1.7206, 'eval_samples_per_second': 5.812, 'eval_steps_per_second': 5.812, 'epoch': 26.99}


Model weights saved in ./model_output/checkpoint-162/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1921, 'learning_rate': 4.902521008403362e-06, 'epoch': 27.99}


Saving model checkpoint to ./model_output/checkpoint-168
Configuration saved in ./model_output/checkpoint-168/config.json


{'eval_loss': 0.573469340801239, 'eval_runtime': 1.7194, 'eval_samples_per_second': 5.816, 'eval_steps_per_second': 5.816, 'epoch': 27.99}


Model weights saved in ./model_output/checkpoint-168/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-162] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1459, 'learning_rate': 4.898319327731093e-06, 'epoch': 28.99}


Saving model checkpoint to ./model_output/checkpoint-174
Configuration saved in ./model_output/checkpoint-174/config.json


{'eval_loss': 0.7804755568504333, 'eval_runtime': 1.7169, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 28.99}


Model weights saved in ./model_output/checkpoint-174/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-168] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0951, 'learning_rate': 4.89327731092437e-06, 'epoch': 29.99}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.8332098722457886, 'eval_runtime': 1.7176, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 29.99}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-174] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1008, 'learning_rate': 4.889075630252102e-06, 'epoch': 30.99}


Saving model checkpoint to ./model_output/checkpoint-186
Configuration saved in ./model_output/checkpoint-186/config.json


{'eval_loss': 1.0689421892166138, 'eval_runtime': 1.7221, 'eval_samples_per_second': 5.807, 'eval_steps_per_second': 5.807, 'epoch': 30.99}


Model weights saved in ./model_output/checkpoint-186/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0454, 'learning_rate': 4.884033613445378e-06, 'epoch': 31.99}


Saving model checkpoint to ./model_output/checkpoint-192
Configuration saved in ./model_output/checkpoint-192/config.json


{'eval_loss': 1.0063122510910034, 'eval_runtime': 1.7186, 'eval_samples_per_second': 5.819, 'eval_steps_per_second': 5.819, 'epoch': 31.99}


Model weights saved in ./model_output/checkpoint-192/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-186] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.032, 'learning_rate': 4.878991596638656e-06, 'epoch': 32.99}


Saving model checkpoint to ./model_output/checkpoint-198
Configuration saved in ./model_output/checkpoint-198/config.json


{'eval_loss': 1.1050448417663574, 'eval_runtime': 1.7106, 'eval_samples_per_second': 5.846, 'eval_steps_per_second': 5.846, 'epoch': 32.99}


Model weights saved in ./model_output/checkpoint-198/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-192] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0172, 'learning_rate': 4.873949579831933e-06, 'epoch': 33.99}


Saving model checkpoint to ./model_output/checkpoint-204
Configuration saved in ./model_output/checkpoint-204/config.json


{'eval_loss': 1.23089599609375, 'eval_runtime': 1.7167, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 33.99}


Model weights saved in ./model_output/checkpoint-204/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-198] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.012, 'learning_rate': 4.86890756302521e-06, 'epoch': 34.99}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 1.380677342414856, 'eval_runtime': 1.7181, 'eval_samples_per_second': 5.82, 'eval_steps_per_second': 5.82, 'epoch': 34.99}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-204] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0084, 'learning_rate': 4.863865546218488e-06, 'epoch': 35.99}


Saving model checkpoint to ./model_output/checkpoint-216
Configuration saved in ./model_output/checkpoint-216/config.json


{'eval_loss': 1.3584096431732178, 'eval_runtime': 1.7172, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 35.99}


Model weights saved in ./model_output/checkpoint-216/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-156 (score: 0.4224693775177002).
***** Running Prediction *****
  Num examples = 10
  Batch size = 1


{'train_runtime': 2296.977, 'train_samples_per_second': 42.229, 'train_steps_per_second': 2.612, 'train_loss': 0.4797689572528557, 'epoch': 35.99}
Fitting model: roberta_pysch using fold 4 as out of fold test data.
Train data sizes: (94, 94).
Val data sizes: (10, 10).
Test data sizes: (13, 13).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshots/18f4eb3ec5e26053f262d6e19f98ede75673ff33/config.json
Model config RobertaConfig {
  "_name_or_path": "mlaricheva/roberta-psych",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from c

Map:   0%|          | 0/94 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 94
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8149, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.85}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.6824673414230347, 'eval_runtime': 1.7168, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 0.85}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-156] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8103, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.85}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.6806558966636658, 'eval_runtime': 1.7218, 'eval_samples_per_second': 5.808, 'eval_steps_per_second': 5.808, 'epoch': 1.85}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-216] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8068, 'learning_rate': 1.5e-06, 'epoch': 2.85}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.6778399348258972, 'eval_runtime': 1.7192, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 2.85}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8071, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.85}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.6726669073104858, 'eval_runtime': 1.7234, 'eval_samples_per_second': 5.802, 'eval_steps_per_second': 5.802, 'epoch': 3.85}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8027, 'learning_rate': 2.5e-06, 'epoch': 4.85}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6665602922439575, 'eval_runtime': 1.7182, 'eval_samples_per_second': 5.82, 'eval_steps_per_second': 5.82, 'epoch': 4.85}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8082, 'learning_rate': 3e-06, 'epoch': 5.85}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6587536931037903, 'eval_runtime': 1.717, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 5.85}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8002, 'learning_rate': 3.5e-06, 'epoch': 6.85}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6513532996177673, 'eval_runtime': 1.7194, 'eval_samples_per_second': 5.816, 'eval_steps_per_second': 5.816, 'epoch': 6.85}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7947, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.85}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6418934464454651, 'eval_runtime': 1.7164, 'eval_samples_per_second': 5.826, 'eval_steps_per_second': 5.826, 'epoch': 7.85}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7869, 'learning_rate': 4.5e-06, 'epoch': 8.85}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.6323486566543579, 'eval_runtime': 1.7198, 'eval_samples_per_second': 5.815, 'eval_steps_per_second': 5.815, 'epoch': 8.85}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8062, 'learning_rate': 5e-06, 'epoch': 9.85}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.6248337030410767, 'eval_runtime': 1.7186, 'eval_samples_per_second': 5.819, 'eval_steps_per_second': 5.819, 'epoch': 9.85}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.782, 'learning_rate': 4.995959595959596e-06, 'epoch': 10.85}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.6206249594688416, 'eval_runtime': 1.7094, 'eval_samples_per_second': 5.85, 'eval_steps_per_second': 5.85, 'epoch': 10.85}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7943, 'learning_rate': 4.990909090909091e-06, 'epoch': 11.85}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6152738332748413, 'eval_runtime': 1.7219, 'eval_samples_per_second': 5.807, 'eval_steps_per_second': 5.807, 'epoch': 11.85}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7878, 'learning_rate': 4.9858585858585865e-06, 'epoch': 12.85}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.6146801710128784, 'eval_runtime': 1.7147, 'eval_samples_per_second': 5.832, 'eval_steps_per_second': 5.832, 'epoch': 12.85}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7799, 'learning_rate': 4.980808080808081e-06, 'epoch': 13.85}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.611150324344635, 'eval_runtime': 1.7133, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 13.85}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7825, 'learning_rate': 4.975757575757576e-06, 'epoch': 14.85}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.6101422309875488, 'eval_runtime': 1.7196, 'eval_samples_per_second': 5.815, 'eval_steps_per_second': 5.815, 'epoch': 14.85}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7984, 'learning_rate': 4.970707070707071e-06, 'epoch': 15.85}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.6088457107543945, 'eval_runtime': 1.715, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 15.85}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7876, 'learning_rate': 4.965656565656566e-06, 'epoch': 16.85}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.605900764465332, 'eval_runtime': 1.7136, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 16.85}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7827, 'learning_rate': 4.9606060606060605e-06, 'epoch': 17.85}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.6038755774497986, 'eval_runtime': 1.7179, 'eval_samples_per_second': 5.821, 'eval_steps_per_second': 5.821, 'epoch': 17.85}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7715, 'learning_rate': 4.9555555555555565e-06, 'epoch': 18.85}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.5973131656646729, 'eval_runtime': 1.7176, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 18.85}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7566, 'learning_rate': 4.950505050505051e-06, 'epoch': 19.85}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json


{'eval_loss': 0.580156147480011, 'eval_runtime': 1.7166, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 19.85}


Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7484, 'learning_rate': 4.945454545454546e-06, 'epoch': 20.85}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.5568384528160095, 'eval_runtime': 1.7197, 'eval_samples_per_second': 5.815, 'eval_steps_per_second': 5.815, 'epoch': 20.85}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7432, 'learning_rate': 4.940404040404041e-06, 'epoch': 21.85}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json


{'eval_loss': 0.5368273854255676, 'eval_runtime': 1.7242, 'eval_samples_per_second': 5.8, 'eval_steps_per_second': 5.8, 'epoch': 21.85}


Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7257, 'learning_rate': 4.935353535353536e-06, 'epoch': 22.85}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.525854229927063, 'eval_runtime': 1.7144, 'eval_samples_per_second': 5.833, 'eval_steps_per_second': 5.833, 'epoch': 22.85}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7208, 'learning_rate': 4.9303030303030305e-06, 'epoch': 23.85}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.5219119787216187, 'eval_runtime': 1.7164, 'eval_samples_per_second': 5.826, 'eval_steps_per_second': 5.826, 'epoch': 23.85}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6906, 'learning_rate': 4.925252525252526e-06, 'epoch': 24.85}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.5075762271881104, 'eval_runtime': 1.7169, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 24.85}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6762, 'learning_rate': 4.920202020202021e-06, 'epoch': 25.85}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.48214831948280334, 'eval_runtime': 1.7188, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 25.85}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6616, 'learning_rate': 4.915151515151516e-06, 'epoch': 26.85}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.47606945037841797, 'eval_runtime': 1.7195, 'eval_samples_per_second': 5.816, 'eval_steps_per_second': 5.816, 'epoch': 26.85}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6369, 'learning_rate': 4.91010101010101e-06, 'epoch': 27.85}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.45215949416160583, 'eval_runtime': 1.7131, 'eval_samples_per_second': 5.837, 'eval_steps_per_second': 5.837, 'epoch': 27.85}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6041, 'learning_rate': 4.905050505050505e-06, 'epoch': 28.85}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.4545782506465912, 'eval_runtime': 1.7174, 'eval_samples_per_second': 5.823, 'eval_steps_per_second': 5.823, 'epoch': 28.85}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5802, 'learning_rate': 4.9000000000000005e-06, 'epoch': 29.85}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.4850766658782959, 'eval_runtime': 1.7232, 'eval_samples_per_second': 5.803, 'eval_steps_per_second': 5.803, 'epoch': 29.85}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5522, 'learning_rate': 4.895959595959596e-06, 'epoch': 30.85}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.4340236186981201, 'eval_runtime': 1.7179, 'eval_samples_per_second': 5.821, 'eval_steps_per_second': 5.821, 'epoch': 30.85}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5287, 'learning_rate': 4.8909090909090914e-06, 'epoch': 31.85}


Saving model checkpoint to ./model_output/checkpoint-160
Configuration saved in ./model_output/checkpoint-160/config.json


{'eval_loss': 0.39734917879104614, 'eval_runtime': 1.7139, 'eval_samples_per_second': 5.835, 'eval_steps_per_second': 5.835, 'epoch': 31.85}


Model weights saved in ./model_output/checkpoint-160/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4707, 'learning_rate': 4.885858585858586e-06, 'epoch': 32.85}


Saving model checkpoint to ./model_output/checkpoint-165
Configuration saved in ./model_output/checkpoint-165/config.json


{'eval_loss': 0.39493563771247864, 'eval_runtime': 1.7268, 'eval_samples_per_second': 5.791, 'eval_steps_per_second': 5.791, 'epoch': 32.85}


Model weights saved in ./model_output/checkpoint-165/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4325, 'learning_rate': 4.880808080808081e-06, 'epoch': 33.85}


Saving model checkpoint to ./model_output/checkpoint-170
Configuration saved in ./model_output/checkpoint-170/config.json


{'eval_loss': 0.3654637932777405, 'eval_runtime': 1.7207, 'eval_samples_per_second': 5.812, 'eval_steps_per_second': 5.812, 'epoch': 33.85}


Model weights saved in ./model_output/checkpoint-170/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3898, 'learning_rate': 4.875757575757576e-06, 'epoch': 34.85}


Saving model checkpoint to ./model_output/checkpoint-175
Configuration saved in ./model_output/checkpoint-175/config.json


{'eval_loss': 0.35563015937805176, 'eval_runtime': 1.721, 'eval_samples_per_second': 5.811, 'eval_steps_per_second': 5.811, 'epoch': 34.85}


Model weights saved in ./model_output/checkpoint-175/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-165] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3185, 'learning_rate': 4.870707070707071e-06, 'epoch': 35.85}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.312919557094574, 'eval_runtime': 1.7198, 'eval_samples_per_second': 5.815, 'eval_steps_per_second': 5.815, 'epoch': 35.85}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-170] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2895, 'learning_rate': 4.865656565656566e-06, 'epoch': 36.85}


Saving model checkpoint to ./model_output/checkpoint-185
Configuration saved in ./model_output/checkpoint-185/config.json


{'eval_loss': 0.28761106729507446, 'eval_runtime': 1.7171, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 36.85}


Model weights saved in ./model_output/checkpoint-185/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-175] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.249, 'learning_rate': 4.8606060606060615e-06, 'epoch': 37.85}


Saving model checkpoint to ./model_output/checkpoint-190
Configuration saved in ./model_output/checkpoint-190/config.json


{'eval_loss': 0.262675017118454, 'eval_runtime': 1.7161, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 37.85}


Model weights saved in ./model_output/checkpoint-190/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2113, 'learning_rate': 4.855555555555556e-06, 'epoch': 38.85}


Saving model checkpoint to ./model_output/checkpoint-195
Configuration saved in ./model_output/checkpoint-195/config.json


{'eval_loss': 0.24693556129932404, 'eval_runtime': 1.7191, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 38.85}


Model weights saved in ./model_output/checkpoint-195/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-185] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1498, 'learning_rate': 4.850505050505051e-06, 'epoch': 39.85}


Saving model checkpoint to ./model_output/checkpoint-200
Configuration saved in ./model_output/checkpoint-200/config.json


{'eval_loss': 0.30475398898124695, 'eval_runtime': 1.7217, 'eval_samples_per_second': 5.808, 'eval_steps_per_second': 5.808, 'epoch': 39.85}


Model weights saved in ./model_output/checkpoint-200/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-190] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1355, 'learning_rate': 4.845454545454546e-06, 'epoch': 40.85}


Saving model checkpoint to ./model_output/checkpoint-205
Configuration saved in ./model_output/checkpoint-205/config.json


{'eval_loss': 0.2399677336215973, 'eval_runtime': 1.7189, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 40.85}


Model weights saved in ./model_output/checkpoint-205/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-195] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1158, 'learning_rate': 4.840404040404041e-06, 'epoch': 41.85}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.2500469982624054, 'eval_runtime': 1.7177, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 41.85}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0902, 'learning_rate': 4.836363636363637e-06, 'epoch': 42.85}


Saving model checkpoint to ./model_output/checkpoint-215
Configuration saved in ./model_output/checkpoint-215/config.json


{'eval_loss': 0.29348263144493103, 'eval_runtime': 1.7226, 'eval_samples_per_second': 5.805, 'eval_steps_per_second': 5.805, 'epoch': 42.85}


Model weights saved in ./model_output/checkpoint-215/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0742, 'learning_rate': 4.831313131313132e-06, 'epoch': 43.85}


Saving model checkpoint to ./model_output/checkpoint-220
Configuration saved in ./model_output/checkpoint-220/config.json


{'eval_loss': 0.27049511671066284, 'eval_runtime': 1.7166, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 43.85}


Model weights saved in ./model_output/checkpoint-220/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-215] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0501, 'learning_rate': 4.826262626262626e-06, 'epoch': 44.85}


Saving model checkpoint to ./model_output/checkpoint-225
Configuration saved in ./model_output/checkpoint-225/config.json


{'eval_loss': 0.3901243507862091, 'eval_runtime': 1.7154, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 44.85}


Model weights saved in ./model_output/checkpoint-225/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-220] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0329, 'learning_rate': 4.8212121212121215e-06, 'epoch': 45.85}


Saving model checkpoint to ./model_output/checkpoint-230
Configuration saved in ./model_output/checkpoint-230/config.json


{'eval_loss': 0.3825642168521881, 'eval_runtime': 1.7341, 'eval_samples_per_second': 5.767, 'eval_steps_per_second': 5.767, 'epoch': 45.85}


Model weights saved in ./model_output/checkpoint-230/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-225] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0258, 'learning_rate': 4.816161616161617e-06, 'epoch': 46.85}


Saving model checkpoint to ./model_output/checkpoint-235
Configuration saved in ./model_output/checkpoint-235/config.json


{'eval_loss': 0.6113086938858032, 'eval_runtime': 1.7134, 'eval_samples_per_second': 5.836, 'eval_steps_per_second': 5.836, 'epoch': 46.85}


Model weights saved in ./model_output/checkpoint-235/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-230] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0207, 'learning_rate': 4.811111111111111e-06, 'epoch': 47.85}


Saving model checkpoint to ./model_output/checkpoint-240
Configuration saved in ./model_output/checkpoint-240/config.json


{'eval_loss': 0.6433030366897583, 'eval_runtime': 1.7204, 'eval_samples_per_second': 5.813, 'eval_steps_per_second': 5.813, 'epoch': 47.85}


Model weights saved in ./model_output/checkpoint-240/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-235] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0138, 'learning_rate': 4.806060606060606e-06, 'epoch': 48.85}


Saving model checkpoint to ./model_output/checkpoint-245
Configuration saved in ./model_output/checkpoint-245/config.json


{'eval_loss': 0.6069953441619873, 'eval_runtime': 1.7217, 'eval_samples_per_second': 5.808, 'eval_steps_per_second': 5.808, 'epoch': 48.85}


Model weights saved in ./model_output/checkpoint-245/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-240] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0107, 'learning_rate': 4.801010101010101e-06, 'epoch': 49.85}


Saving model checkpoint to ./model_output/checkpoint-250
Configuration saved in ./model_output/checkpoint-250/config.json


{'eval_loss': 1.0155766010284424, 'eval_runtime': 1.7271, 'eval_samples_per_second': 5.79, 'eval_steps_per_second': 5.79, 'epoch': 49.85}


Model weights saved in ./model_output/checkpoint-250/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-245] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0067, 'learning_rate': 4.795959595959596e-06, 'epoch': 50.85}


Saving model checkpoint to ./model_output/checkpoint-255
Configuration saved in ./model_output/checkpoint-255/config.json


{'eval_loss': 0.6710972785949707, 'eval_runtime': 1.7185, 'eval_samples_per_second': 5.819, 'eval_steps_per_second': 5.819, 'epoch': 50.85}


Model weights saved in ./model_output/checkpoint-255/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-250] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-205 (score: 0.2399677336215973).
***** Running Prediction *****
  Num examples = 13
  Batch size = 1


{'train_runtime': 3143.6178, 'train_samples_per_second': 29.902, 'train_steps_per_second': 1.591, 'train_loss': 0.525834383800918, 'epoch': 50.85}
Fitting model: roberta_pysch using fold 5 as out of fold test data.
Train data sizes: (98, 98).
Val data sizes: (10, 10).
Test data sizes: (9, 9).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshots/18f4eb3ec5e26053f262d6e19f98ede75673ff33/config.json
Model config RobertaConfig {
  "_name_or_path": "mlaricheva/roberta-psych",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from c

Map:   0%|          | 0/98 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 98
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6991, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.98}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.6533288955688477, 'eval_runtime': 1.7172, 'eval_samples_per_second': 5.823, 'eval_steps_per_second': 5.823, 'epoch': 0.98}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-205] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7018, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.98}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.6521288752555847, 'eval_runtime': 1.7149, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 1.98}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-255] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7058, 'learning_rate': 1.8000000000000001e-06, 'epoch': 2.98}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.6502513885498047, 'eval_runtime': 1.7229, 'eval_samples_per_second': 5.804, 'eval_steps_per_second': 5.804, 'epoch': 2.98}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6997, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.98}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.6486254334449768, 'eval_runtime': 1.7197, 'eval_samples_per_second': 5.815, 'eval_steps_per_second': 5.815, 'epoch': 3.98}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6979, 'learning_rate': 3e-06, 'epoch': 4.98}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.646836519241333, 'eval_runtime': 1.7208, 'eval_samples_per_second': 5.811, 'eval_steps_per_second': 5.811, 'epoch': 4.98}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6873, 'learning_rate': 3.6000000000000003e-06, 'epoch': 5.98}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.6444528102874756, 'eval_runtime': 1.717, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 5.98}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6982, 'learning_rate': 4.2000000000000004e-06, 'epoch': 6.98}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6395242810249329, 'eval_runtime': 1.7224, 'eval_samples_per_second': 5.806, 'eval_steps_per_second': 5.806, 'epoch': 6.98}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6879, 'learning_rate': 4.800000000000001e-06, 'epoch': 7.98}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.6341791152954102, 'eval_runtime': 1.7118, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 7.98}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6887, 'learning_rate': 4.996638655462185e-06, 'epoch': 8.98}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.6262345910072327, 'eval_runtime': 1.7149, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 8.98}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6842, 'learning_rate': 4.9915966386554625e-06, 'epoch': 9.98}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6201871633529663, 'eval_runtime': 1.7163, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 9.98}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6819, 'learning_rate': 4.98655462184874e-06, 'epoch': 10.98}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.6112709045410156, 'eval_runtime': 1.7173, 'eval_samples_per_second': 5.823, 'eval_steps_per_second': 5.823, 'epoch': 10.98}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.689, 'learning_rate': 4.9815126050420174e-06, 'epoch': 11.98}


Saving model checkpoint to ./model_output/checkpoint-72
Configuration saved in ./model_output/checkpoint-72/config.json


{'eval_loss': 0.6008720993995667, 'eval_runtime': 1.7198, 'eval_samples_per_second': 5.815, 'eval_steps_per_second': 5.815, 'epoch': 11.98}


Model weights saved in ./model_output/checkpoint-72/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6866, 'learning_rate': 4.976470588235294e-06, 'epoch': 12.98}


Saving model checkpoint to ./model_output/checkpoint-78
Configuration saved in ./model_output/checkpoint-78/config.json


{'eval_loss': 0.5934730768203735, 'eval_runtime': 1.7162, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 12.98}


Model weights saved in ./model_output/checkpoint-78/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6701, 'learning_rate': 4.971428571428572e-06, 'epoch': 13.98}


Saving model checkpoint to ./model_output/checkpoint-84
Configuration saved in ./model_output/checkpoint-84/config.json


{'eval_loss': 0.591976523399353, 'eval_runtime': 1.7156, 'eval_samples_per_second': 5.829, 'eval_steps_per_second': 5.829, 'epoch': 13.98}


Model weights saved in ./model_output/checkpoint-84/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-72] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6662, 'learning_rate': 4.966386554621849e-06, 'epoch': 14.98}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5846008658409119, 'eval_runtime': 1.719, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 14.98}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-78] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6652, 'learning_rate': 4.961344537815126e-06, 'epoch': 15.98}


Saving model checkpoint to ./model_output/checkpoint-96
Configuration saved in ./model_output/checkpoint-96/config.json


{'eval_loss': 0.5779520273208618, 'eval_runtime': 1.7185, 'eval_samples_per_second': 5.819, 'eval_steps_per_second': 5.819, 'epoch': 15.98}


Model weights saved in ./model_output/checkpoint-96/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-84] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6661, 'learning_rate': 4.956302521008404e-06, 'epoch': 16.98}


Saving model checkpoint to ./model_output/checkpoint-102
Configuration saved in ./model_output/checkpoint-102/config.json


{'eval_loss': 0.5841462016105652, 'eval_runtime': 1.7083, 'eval_samples_per_second': 5.854, 'eval_steps_per_second': 5.854, 'epoch': 16.98}


Model weights saved in ./model_output/checkpoint-102/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6428, 'learning_rate': 4.951260504201681e-06, 'epoch': 17.98}


Saving model checkpoint to ./model_output/checkpoint-108
Configuration saved in ./model_output/checkpoint-108/config.json


{'eval_loss': 0.5746496319770813, 'eval_runtime': 1.7186, 'eval_samples_per_second': 5.819, 'eval_steps_per_second': 5.819, 'epoch': 17.98}


Model weights saved in ./model_output/checkpoint-108/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-96] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6169, 'learning_rate': 4.946218487394958e-06, 'epoch': 18.98}


Saving model checkpoint to ./model_output/checkpoint-114
Configuration saved in ./model_output/checkpoint-114/config.json


{'eval_loss': 0.5305923819541931, 'eval_runtime': 1.7233, 'eval_samples_per_second': 5.803, 'eval_steps_per_second': 5.803, 'epoch': 18.98}


Model weights saved in ./model_output/checkpoint-114/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-102] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6042, 'learning_rate': 4.941176470588236e-06, 'epoch': 19.98}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.5187739133834839, 'eval_runtime': 1.7233, 'eval_samples_per_second': 5.803, 'eval_steps_per_second': 5.803, 'epoch': 19.98}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-108] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5693, 'learning_rate': 4.936134453781513e-06, 'epoch': 20.98}


Saving model checkpoint to ./model_output/checkpoint-126
Configuration saved in ./model_output/checkpoint-126/config.json


{'eval_loss': 0.4909462034702301, 'eval_runtime': 1.7191, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 20.98}


Model weights saved in ./model_output/checkpoint-126/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-114] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5211, 'learning_rate': 4.93109243697479e-06, 'epoch': 21.98}


Saving model checkpoint to ./model_output/checkpoint-132
Configuration saved in ./model_output/checkpoint-132/config.json


{'eval_loss': 0.4629693925380707, 'eval_runtime': 1.717, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 21.98}


Model weights saved in ./model_output/checkpoint-132/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4637, 'learning_rate': 4.926050420168068e-06, 'epoch': 22.98}


Saving model checkpoint to ./model_output/checkpoint-138
Configuration saved in ./model_output/checkpoint-138/config.json


{'eval_loss': 0.4289852976799011, 'eval_runtime': 1.7178, 'eval_samples_per_second': 5.821, 'eval_steps_per_second': 5.821, 'epoch': 22.98}


Model weights saved in ./model_output/checkpoint-138/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-126] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3917, 'learning_rate': 4.921008403361345e-06, 'epoch': 23.98}


Saving model checkpoint to ./model_output/checkpoint-144
Configuration saved in ./model_output/checkpoint-144/config.json


{'eval_loss': 0.4214620590209961, 'eval_runtime': 1.719, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 23.98}


Model weights saved in ./model_output/checkpoint-144/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-132] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3461, 'learning_rate': 4.916806722689076e-06, 'epoch': 24.98}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.4101842939853668, 'eval_runtime': 1.7312, 'eval_samples_per_second': 5.776, 'eval_steps_per_second': 5.776, 'epoch': 24.98}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-138] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2535, 'learning_rate': 4.911764705882353e-06, 'epoch': 25.98}


Saving model checkpoint to ./model_output/checkpoint-156
Configuration saved in ./model_output/checkpoint-156/config.json


{'eval_loss': 0.5585514307022095, 'eval_runtime': 1.7265, 'eval_samples_per_second': 5.792, 'eval_steps_per_second': 5.792, 'epoch': 25.98}


Model weights saved in ./model_output/checkpoint-156/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-144] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.209, 'learning_rate': 4.907563025210084e-06, 'epoch': 26.98}


Saving model checkpoint to ./model_output/checkpoint-162
Configuration saved in ./model_output/checkpoint-162/config.json


{'eval_loss': 0.48451828956604004, 'eval_runtime': 1.7189, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 26.98}


Model weights saved in ./model_output/checkpoint-162/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-156] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1925, 'learning_rate': 4.902521008403362e-06, 'epoch': 27.98}


Saving model checkpoint to ./model_output/checkpoint-168
Configuration saved in ./model_output/checkpoint-168/config.json


{'eval_loss': 0.6057392358779907, 'eval_runtime': 1.7257, 'eval_samples_per_second': 5.795, 'eval_steps_per_second': 5.795, 'epoch': 27.98}


Model weights saved in ./model_output/checkpoint-168/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-162] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1271, 'learning_rate': 4.897478991596639e-06, 'epoch': 28.98}


Saving model checkpoint to ./model_output/checkpoint-174
Configuration saved in ./model_output/checkpoint-174/config.json


{'eval_loss': 0.6187590956687927, 'eval_runtime': 1.7199, 'eval_samples_per_second': 5.814, 'eval_steps_per_second': 5.814, 'epoch': 28.98}


Model weights saved in ./model_output/checkpoint-174/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-168] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0765, 'learning_rate': 4.892436974789916e-06, 'epoch': 29.98}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.7058144211769104, 'eval_runtime': 1.7176, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 29.98}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-174] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0458, 'learning_rate': 4.887394957983194e-06, 'epoch': 30.98}


Saving model checkpoint to ./model_output/checkpoint-186
Configuration saved in ./model_output/checkpoint-186/config.json


{'eval_loss': 0.9022008180618286, 'eval_runtime': 1.7195, 'eval_samples_per_second': 5.816, 'eval_steps_per_second': 5.816, 'epoch': 30.98}


Model weights saved in ./model_output/checkpoint-186/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0267, 'learning_rate': 4.882352941176471e-06, 'epoch': 31.98}


Saving model checkpoint to ./model_output/checkpoint-192
Configuration saved in ./model_output/checkpoint-192/config.json


{'eval_loss': 1.1096497774124146, 'eval_runtime': 1.7187, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 31.98}


Model weights saved in ./model_output/checkpoint-192/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-186] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0158, 'learning_rate': 4.877310924369748e-06, 'epoch': 32.98}


Saving model checkpoint to ./model_output/checkpoint-198
Configuration saved in ./model_output/checkpoint-198/config.json


{'eval_loss': 1.178260326385498, 'eval_runtime': 1.7226, 'eval_samples_per_second': 5.805, 'eval_steps_per_second': 5.805, 'epoch': 32.98}


Model weights saved in ./model_output/checkpoint-198/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-192] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0097, 'learning_rate': 4.872268907563026e-06, 'epoch': 33.98}


Saving model checkpoint to ./model_output/checkpoint-204
Configuration saved in ./model_output/checkpoint-204/config.json


{'eval_loss': 1.2475597858428955, 'eval_runtime': 1.7154, 'eval_samples_per_second': 5.829, 'eval_steps_per_second': 5.829, 'epoch': 33.98}


Model weights saved in ./model_output/checkpoint-204/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-198] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0063, 'learning_rate': 4.867226890756303e-06, 'epoch': 34.98}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 1.2951124906539917, 'eval_runtime': 1.7165, 'eval_samples_per_second': 5.826, 'eval_steps_per_second': 5.826, 'epoch': 34.98}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-204] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-150 (score: 0.4101842939853668).
***** Running Prediction *****
  Num examples = 9
  Batch size = 1


{'train_runtime': 2250.6601, 'train_samples_per_second': 43.543, 'train_steps_per_second': 2.666, 'train_loss': 0.4798459627266441, 'epoch': 34.98}
Fitting model: roberta_pysch using fold 6 as out of fold test data.
Train data sizes: (94, 94).
Val data sizes: (10, 10).
Test data sizes: (13, 13).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshots/18f4eb3ec5e26053f262d6e19f98ede75673ff33/config.json
Model config RobertaConfig {
  "_name_or_path": "mlaricheva/roberta-psych",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from c

Map:   0%|          | 0/94 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 94
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8046, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.85}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.693391740322113, 'eval_runtime': 1.7189, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 0.85}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.812, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.85}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.6915411949157715, 'eval_runtime': 1.7172, 'eval_samples_per_second': 5.823, 'eval_steps_per_second': 5.823, 'epoch': 1.85}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8027, 'learning_rate': 1.5e-06, 'epoch': 2.85}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.6878174543380737, 'eval_runtime': 1.7126, 'eval_samples_per_second': 5.839, 'eval_steps_per_second': 5.839, 'epoch': 2.85}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.813, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.85}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.6821228265762329, 'eval_runtime': 1.7203, 'eval_samples_per_second': 5.813, 'eval_steps_per_second': 5.813, 'epoch': 3.85}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8038, 'learning_rate': 2.5e-06, 'epoch': 4.85}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6760494112968445, 'eval_runtime': 1.7203, 'eval_samples_per_second': 5.813, 'eval_steps_per_second': 5.813, 'epoch': 4.85}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7913, 'learning_rate': 3e-06, 'epoch': 5.85}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6677549481391907, 'eval_runtime': 1.7176, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 5.85}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7959, 'learning_rate': 3.5e-06, 'epoch': 6.85}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6597200632095337, 'eval_runtime': 1.7228, 'eval_samples_per_second': 5.805, 'eval_steps_per_second': 5.805, 'epoch': 6.85}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7994, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.85}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.648749053478241, 'eval_runtime': 1.7204, 'eval_samples_per_second': 5.813, 'eval_steps_per_second': 5.813, 'epoch': 7.85}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.803, 'learning_rate': 4.5e-06, 'epoch': 8.85}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.637180745601654, 'eval_runtime': 1.72, 'eval_samples_per_second': 5.814, 'eval_steps_per_second': 5.814, 'epoch': 8.85}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.8217, 'learning_rate': 5e-06, 'epoch': 9.85}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.629450798034668, 'eval_runtime': 1.7264, 'eval_samples_per_second': 5.792, 'eval_steps_per_second': 5.792, 'epoch': 9.85}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7853, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.85}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.6251024007797241, 'eval_runtime': 1.7237, 'eval_samples_per_second': 5.801, 'eval_steps_per_second': 5.801, 'epoch': 10.85}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7917, 'learning_rate': 4.98989898989899e-06, 'epoch': 11.85}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.6228569746017456, 'eval_runtime': 1.7203, 'eval_samples_per_second': 5.813, 'eval_steps_per_second': 5.813, 'epoch': 11.85}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7809, 'learning_rate': 4.984848484848485e-06, 'epoch': 12.85}


Saving model checkpoint to ./model_output/checkpoint-65
Configuration saved in ./model_output/checkpoint-65/config.json


{'eval_loss': 0.621414065361023, 'eval_runtime': 1.7183, 'eval_samples_per_second': 5.82, 'eval_steps_per_second': 5.82, 'epoch': 12.85}


Model weights saved in ./model_output/checkpoint-65/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7925, 'learning_rate': 4.97979797979798e-06, 'epoch': 13.85}


Saving model checkpoint to ./model_output/checkpoint-70
Configuration saved in ./model_output/checkpoint-70/config.json


{'eval_loss': 0.6154623031616211, 'eval_runtime': 1.7196, 'eval_samples_per_second': 5.815, 'eval_steps_per_second': 5.815, 'epoch': 13.85}


Model weights saved in ./model_output/checkpoint-70/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7738, 'learning_rate': 4.974747474747475e-06, 'epoch': 14.85}


Saving model checkpoint to ./model_output/checkpoint-75
Configuration saved in ./model_output/checkpoint-75/config.json


{'eval_loss': 0.6082673072814941, 'eval_runtime': 1.7192, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 14.85}


Model weights saved in ./model_output/checkpoint-75/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-65] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7721, 'learning_rate': 4.9696969696969696e-06, 'epoch': 15.85}


Saving model checkpoint to ./model_output/checkpoint-80
Configuration saved in ./model_output/checkpoint-80/config.json


{'eval_loss': 0.6016637086868286, 'eval_runtime': 1.7161, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 15.85}


Model weights saved in ./model_output/checkpoint-80/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7815, 'learning_rate': 4.964646464646465e-06, 'epoch': 16.85}


Saving model checkpoint to ./model_output/checkpoint-85
Configuration saved in ./model_output/checkpoint-85/config.json


{'eval_loss': 0.5878671407699585, 'eval_runtime': 1.7097, 'eval_samples_per_second': 5.849, 'eval_steps_per_second': 5.849, 'epoch': 16.85}


Model weights saved in ./model_output/checkpoint-85/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-75] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7617, 'learning_rate': 4.95959595959596e-06, 'epoch': 17.85}


Saving model checkpoint to ./model_output/checkpoint-90
Configuration saved in ./model_output/checkpoint-90/config.json


{'eval_loss': 0.5763230919837952, 'eval_runtime': 1.7213, 'eval_samples_per_second': 5.81, 'eval_steps_per_second': 5.81, 'epoch': 17.85}


Model weights saved in ./model_output/checkpoint-90/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7406, 'learning_rate': 4.954545454545455e-06, 'epoch': 18.85}


Saving model checkpoint to ./model_output/checkpoint-95
Configuration saved in ./model_output/checkpoint-95/config.json


{'eval_loss': 0.5435547232627869, 'eval_runtime': 1.7167, 'eval_samples_per_second': 5.825, 'eval_steps_per_second': 5.825, 'epoch': 18.85}


Model weights saved in ./model_output/checkpoint-95/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-85] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7244, 'learning_rate': 4.94949494949495e-06, 'epoch': 19.85}


Saving model checkpoint to ./model_output/checkpoint-100
Configuration saved in ./model_output/checkpoint-100/config.json


{'eval_loss': 0.504464864730835, 'eval_runtime': 1.7195, 'eval_samples_per_second': 5.816, 'eval_steps_per_second': 5.816, 'epoch': 19.85}


Model weights saved in ./model_output/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.7164, 'learning_rate': 4.944444444444445e-06, 'epoch': 20.85}


Saving model checkpoint to ./model_output/checkpoint-105
Configuration saved in ./model_output/checkpoint-105/config.json


{'eval_loss': 0.4851831793785095, 'eval_runtime': 1.7186, 'eval_samples_per_second': 5.819, 'eval_steps_per_second': 5.819, 'epoch': 20.85}


Model weights saved in ./model_output/checkpoint-105/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-95] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6879, 'learning_rate': 4.93939393939394e-06, 'epoch': 21.85}


Saving model checkpoint to ./model_output/checkpoint-110
Configuration saved in ./model_output/checkpoint-110/config.json


{'eval_loss': 0.47896963357925415, 'eval_runtime': 1.7183, 'eval_samples_per_second': 5.82, 'eval_steps_per_second': 5.82, 'epoch': 21.85}


Model weights saved in ./model_output/checkpoint-110/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-100] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6679, 'learning_rate': 4.934343434343435e-06, 'epoch': 22.85}


Saving model checkpoint to ./model_output/checkpoint-115
Configuration saved in ./model_output/checkpoint-115/config.json


{'eval_loss': 0.4420349597930908, 'eval_runtime': 1.7177, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 22.85}


Model weights saved in ./model_output/checkpoint-115/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-105] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6623, 'learning_rate': 4.92929292929293e-06, 'epoch': 23.85}


Saving model checkpoint to ./model_output/checkpoint-120
Configuration saved in ./model_output/checkpoint-120/config.json


{'eval_loss': 0.3947533965110779, 'eval_runtime': 1.7188, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 5.818, 'epoch': 23.85}


Model weights saved in ./model_output/checkpoint-120/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-110] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.6103, 'learning_rate': 4.924242424242425e-06, 'epoch': 24.85}


Saving model checkpoint to ./model_output/checkpoint-125
Configuration saved in ./model_output/checkpoint-125/config.json


{'eval_loss': 0.35459399223327637, 'eval_runtime': 1.718, 'eval_samples_per_second': 5.821, 'eval_steps_per_second': 5.821, 'epoch': 24.85}


Model weights saved in ./model_output/checkpoint-125/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-115] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5653, 'learning_rate': 4.920202020202021e-06, 'epoch': 25.85}


Saving model checkpoint to ./model_output/checkpoint-130
Configuration saved in ./model_output/checkpoint-130/config.json


{'eval_loss': 0.3279782831668854, 'eval_runtime': 1.7196, 'eval_samples_per_second': 5.815, 'eval_steps_per_second': 5.815, 'epoch': 25.85}


Model weights saved in ./model_output/checkpoint-130/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5514, 'learning_rate': 4.915151515151516e-06, 'epoch': 26.85}


Saving model checkpoint to ./model_output/checkpoint-135
Configuration saved in ./model_output/checkpoint-135/config.json


{'eval_loss': 0.3282676637172699, 'eval_runtime': 1.7347, 'eval_samples_per_second': 5.765, 'eval_steps_per_second': 5.765, 'epoch': 26.85}


Model weights saved in ./model_output/checkpoint-135/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-125] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.5123, 'learning_rate': 4.91010101010101e-06, 'epoch': 27.85}


Saving model checkpoint to ./model_output/checkpoint-140
Configuration saved in ./model_output/checkpoint-140/config.json


{'eval_loss': 0.30171483755111694, 'eval_runtime': 1.715, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 27.85}


Model weights saved in ./model_output/checkpoint-140/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-130] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4773, 'learning_rate': 4.905050505050505e-06, 'epoch': 28.85}


Saving model checkpoint to ./model_output/checkpoint-145
Configuration saved in ./model_output/checkpoint-145/config.json


{'eval_loss': 0.29136964678764343, 'eval_runtime': 1.7178, 'eval_samples_per_second': 5.821, 'eval_steps_per_second': 5.821, 'epoch': 28.85}


Model weights saved in ./model_output/checkpoint-145/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-135] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4528, 'learning_rate': 4.9000000000000005e-06, 'epoch': 29.85}


Saving model checkpoint to ./model_output/checkpoint-150
Configuration saved in ./model_output/checkpoint-150/config.json


{'eval_loss': 0.26000064611434937, 'eval_runtime': 1.7152, 'eval_samples_per_second': 5.83, 'eval_steps_per_second': 5.83, 'epoch': 29.85}


Model weights saved in ./model_output/checkpoint-150/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-140] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.4252, 'learning_rate': 4.894949494949495e-06, 'epoch': 30.85}


Saving model checkpoint to ./model_output/checkpoint-155
Configuration saved in ./model_output/checkpoint-155/config.json


{'eval_loss': 0.2555277347564697, 'eval_runtime': 1.7191, 'eval_samples_per_second': 5.817, 'eval_steps_per_second': 5.817, 'epoch': 30.85}


Model weights saved in ./model_output/checkpoint-155/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-145] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3547, 'learning_rate': 4.88989898989899e-06, 'epoch': 31.85}


Saving model checkpoint to ./model_output/checkpoint-160
Configuration saved in ./model_output/checkpoint-160/config.json


{'eval_loss': 0.20929622650146484, 'eval_runtime': 1.711, 'eval_samples_per_second': 5.845, 'eval_steps_per_second': 5.845, 'epoch': 31.85}


Model weights saved in ./model_output/checkpoint-160/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-150] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.3124, 'learning_rate': 4.884848484848485e-06, 'epoch': 32.85}


Saving model checkpoint to ./model_output/checkpoint-165
Configuration saved in ./model_output/checkpoint-165/config.json


{'eval_loss': 0.18336831033229828, 'eval_runtime': 1.7174, 'eval_samples_per_second': 5.823, 'eval_steps_per_second': 5.823, 'epoch': 32.85}


Model weights saved in ./model_output/checkpoint-165/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-155] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2907, 'learning_rate': 4.87979797979798e-06, 'epoch': 33.85}


Saving model checkpoint to ./model_output/checkpoint-170
Configuration saved in ./model_output/checkpoint-170/config.json


{'eval_loss': 0.25089412927627563, 'eval_runtime': 1.715, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 33.85}


Model weights saved in ./model_output/checkpoint-170/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.2124, 'learning_rate': 4.875757575757576e-06, 'epoch': 34.85}


Saving model checkpoint to ./model_output/checkpoint-175
Configuration saved in ./model_output/checkpoint-175/config.json


{'eval_loss': 0.14517459273338318, 'eval_runtime': 1.7118, 'eval_samples_per_second': 5.842, 'eval_steps_per_second': 5.842, 'epoch': 34.85}


Model weights saved in ./model_output/checkpoint-175/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-165] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1648, 'learning_rate': 4.870707070707071e-06, 'epoch': 35.85}


Saving model checkpoint to ./model_output/checkpoint-180
Configuration saved in ./model_output/checkpoint-180/config.json


{'eval_loss': 0.1367662101984024, 'eval_runtime': 1.7169, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 35.85}


Model weights saved in ./model_output/checkpoint-180/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-170] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1627, 'learning_rate': 4.865656565656566e-06, 'epoch': 36.85}


Saving model checkpoint to ./model_output/checkpoint-185
Configuration saved in ./model_output/checkpoint-185/config.json


{'eval_loss': 0.2581808269023895, 'eval_runtime': 1.7177, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 36.85}


Model weights saved in ./model_output/checkpoint-185/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-175] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.1477, 'learning_rate': 4.8606060606060615e-06, 'epoch': 37.85}


Saving model checkpoint to ./model_output/checkpoint-190
Configuration saved in ./model_output/checkpoint-190/config.json


{'eval_loss': 0.0924629420042038, 'eval_runtime': 1.726, 'eval_samples_per_second': 5.794, 'eval_steps_per_second': 5.794, 'epoch': 37.85}


Model weights saved in ./model_output/checkpoint-190/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-180] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0763, 'learning_rate': 4.855555555555556e-06, 'epoch': 38.85}


Saving model checkpoint to ./model_output/checkpoint-195
Configuration saved in ./model_output/checkpoint-195/config.json


{'eval_loss': 0.09531363844871521, 'eval_runtime': 1.7186, 'eval_samples_per_second': 5.819, 'eval_steps_per_second': 5.819, 'epoch': 38.85}


Model weights saved in ./model_output/checkpoint-195/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-185] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0742, 'learning_rate': 4.850505050505051e-06, 'epoch': 39.85}


Saving model checkpoint to ./model_output/checkpoint-200
Configuration saved in ./model_output/checkpoint-200/config.json


{'eval_loss': 0.21695928275585175, 'eval_runtime': 1.7208, 'eval_samples_per_second': 5.811, 'eval_steps_per_second': 5.811, 'epoch': 39.85}


Model weights saved in ./model_output/checkpoint-200/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-195] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0471, 'learning_rate': 4.845454545454546e-06, 'epoch': 40.85}


Saving model checkpoint to ./model_output/checkpoint-205
Configuration saved in ./model_output/checkpoint-205/config.json


{'eval_loss': 0.16855238378047943, 'eval_runtime': 1.7212, 'eval_samples_per_second': 5.81, 'eval_steps_per_second': 5.81, 'epoch': 40.85}


Model weights saved in ./model_output/checkpoint-205/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-200] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0373, 'learning_rate': 4.840404040404041e-06, 'epoch': 41.85}


Saving model checkpoint to ./model_output/checkpoint-210
Configuration saved in ./model_output/checkpoint-210/config.json


{'eval_loss': 0.35985520482063293, 'eval_runtime': 1.7169, 'eval_samples_per_second': 5.824, 'eval_steps_per_second': 5.824, 'epoch': 41.85}


Model weights saved in ./model_output/checkpoint-210/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-205] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0308, 'learning_rate': 4.8353535353535355e-06, 'epoch': 42.85}


Saving model checkpoint to ./model_output/checkpoint-215
Configuration saved in ./model_output/checkpoint-215/config.json


{'eval_loss': 0.22289517521858215, 'eval_runtime': 1.7204, 'eval_samples_per_second': 5.813, 'eval_steps_per_second': 5.813, 'epoch': 42.85}


Model weights saved in ./model_output/checkpoint-215/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-210] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0228, 'learning_rate': 4.830303030303031e-06, 'epoch': 43.85}


Saving model checkpoint to ./model_output/checkpoint-220
Configuration saved in ./model_output/checkpoint-220/config.json


{'eval_loss': 0.28007519245147705, 'eval_runtime': 1.7313, 'eval_samples_per_second': 5.776, 'eval_steps_per_second': 5.776, 'epoch': 43.85}


Model weights saved in ./model_output/checkpoint-220/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-215] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0174, 'learning_rate': 4.825252525252526e-06, 'epoch': 44.85}


Saving model checkpoint to ./model_output/checkpoint-225
Configuration saved in ./model_output/checkpoint-225/config.json


{'eval_loss': 0.3361595571041107, 'eval_runtime': 1.7198, 'eval_samples_per_second': 5.815, 'eval_steps_per_second': 5.815, 'epoch': 44.85}


Model weights saved in ./model_output/checkpoint-225/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-220] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0088, 'learning_rate': 4.820202020202021e-06, 'epoch': 45.85}


Saving model checkpoint to ./model_output/checkpoint-230
Configuration saved in ./model_output/checkpoint-230/config.json


{'eval_loss': 0.4326241910457611, 'eval_runtime': 1.7148, 'eval_samples_per_second': 5.831, 'eval_steps_per_second': 5.831, 'epoch': 45.85}


Model weights saved in ./model_output/checkpoint-230/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-225] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0084, 'learning_rate': 4.815151515151515e-06, 'epoch': 46.85}


Saving model checkpoint to ./model_output/checkpoint-235
Configuration saved in ./model_output/checkpoint-235/config.json


{'eval_loss': 0.5389736890792847, 'eval_runtime': 1.7161, 'eval_samples_per_second': 5.827, 'eval_steps_per_second': 5.827, 'epoch': 46.85}


Model weights saved in ./model_output/checkpoint-235/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-230] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 10
  Batch size = 1


{'loss': 0.0053, 'learning_rate': 4.81010101010101e-06, 'epoch': 47.85}


Saving model checkpoint to ./model_output/checkpoint-240
Configuration saved in ./model_output/checkpoint-240/config.json


{'eval_loss': 0.6803290247917175, 'eval_runtime': 1.721, 'eval_samples_per_second': 5.811, 'eval_steps_per_second': 5.811, 'epoch': 47.85}


Model weights saved in ./model_output/checkpoint-240/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-235] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-190 (score: 0.0924629420042038).
***** Running Prediction *****
  Num examples = 13
  Batch size = 1


{'train_runtime': 2960.1062, 'train_samples_per_second': 31.756, 'train_steps_per_second': 1.689, 'train_loss': 0.50114557306127, 'epoch': 47.85}
Fitting model: roberta_pysch using fold 7 as out of fold test data.
Train data sizes: (94, 94).
Val data sizes: (13, 13).
Test data sizes: (10, 10).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshots/18f4eb3ec5e26053f262d6e19f98ede75673ff33/config.json
Model config RobertaConfig {
  "_name_or_path": "mlaricheva/roberta-psych",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from c

Map:   0%|          | 0/94 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 94
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8104, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.85}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.6958620548248291, 'eval_runtime': 2.2036, 'eval_samples_per_second': 5.899, 'eval_steps_per_second': 5.899, 'epoch': 0.85}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-190] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8002, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.85}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.696140706539154, 'eval_runtime': 2.2035, 'eval_samples_per_second': 5.9, 'eval_steps_per_second': 5.9, 'epoch': 1.85}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-240] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8077, 'learning_rate': 1.5e-06, 'epoch': 2.85}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.696751594543457, 'eval_runtime': 2.21, 'eval_samples_per_second': 5.882, 'eval_steps_per_second': 5.882, 'epoch': 2.85}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8016, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.85}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.6978152990341187, 'eval_runtime': 2.2055, 'eval_samples_per_second': 5.894, 'eval_steps_per_second': 5.894, 'epoch': 3.85}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8041, 'learning_rate': 2.5e-06, 'epoch': 4.85}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6991551518440247, 'eval_runtime': 2.204, 'eval_samples_per_second': 5.898, 'eval_steps_per_second': 5.898, 'epoch': 4.85}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7941, 'learning_rate': 3e-06, 'epoch': 5.85}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.7010244727134705, 'eval_runtime': 2.2098, 'eval_samples_per_second': 5.883, 'eval_steps_per_second': 5.883, 'epoch': 5.85}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8011, 'learning_rate': 3.5e-06, 'epoch': 6.85}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.7027925252914429, 'eval_runtime': 2.2022, 'eval_samples_per_second': 5.903, 'eval_steps_per_second': 5.903, 'epoch': 6.85}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8054, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.85}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.7054169774055481, 'eval_runtime': 2.2095, 'eval_samples_per_second': 5.884, 'eval_steps_per_second': 5.884, 'epoch': 7.85}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7959, 'learning_rate': 4.5e-06, 'epoch': 8.85}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.7085803747177124, 'eval_runtime': 2.204, 'eval_samples_per_second': 5.898, 'eval_steps_per_second': 5.898, 'epoch': 8.85}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8064, 'learning_rate': 5e-06, 'epoch': 9.85}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.7110198140144348, 'eval_runtime': 2.206, 'eval_samples_per_second': 5.893, 'eval_steps_per_second': 5.893, 'epoch': 9.85}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7749, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.85}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.7131219506263733, 'eval_runtime': 2.208, 'eval_samples_per_second': 5.888, 'eval_steps_per_second': 5.888, 'epoch': 10.85}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-5 (score: 0.6958620548248291).
***** Running Prediction *****
  Num examples = 10
  Batch size = 1


{'train_runtime': 684.4672, 'train_samples_per_second': 137.333, 'train_steps_per_second': 7.305, 'train_loss': 0.8001632603732023, 'epoch': 10.85}


Could not locate the tokenizer configuration file, will try to use the model config instead.


Fitting model: roberta_pysch using fold 8 as out of fold test data.
Train data sizes: (100, 100).
Val data sizes: (13, 13).
Test data sizes: (4, 4).


loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshots/18f4eb3ec5e26053f262d6e19f98ede75673ff33/config.json
Model config RobertaConfig {
  "_name_or_path": "mlaricheva/roberta-psych",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshot

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 100
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.743, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.96}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.6916720271110535, 'eval_runtime': 2.2064, 'eval_samples_per_second': 5.892, 'eval_steps_per_second': 5.892, 'epoch': 0.96}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7492, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.96}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.6917991638183594, 'eval_runtime': 2.2072, 'eval_samples_per_second': 5.89, 'eval_steps_per_second': 5.89, 'epoch': 1.96}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7339, 'learning_rate': 1.8000000000000001e-06, 'epoch': 2.96}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.6922093033790588, 'eval_runtime': 2.2032, 'eval_samples_per_second': 5.901, 'eval_steps_per_second': 5.901, 'epoch': 2.96}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7254, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.96}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.6932648420333862, 'eval_runtime': 2.208, 'eval_samples_per_second': 5.888, 'eval_steps_per_second': 5.888, 'epoch': 3.96}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.718, 'learning_rate': 3e-06, 'epoch': 4.96}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6954661011695862, 'eval_runtime': 2.2139, 'eval_samples_per_second': 5.872, 'eval_steps_per_second': 5.872, 'epoch': 4.96}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7132, 'learning_rate': 3.6000000000000003e-06, 'epoch': 5.96}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.6988455653190613, 'eval_runtime': 2.206, 'eval_samples_per_second': 5.893, 'eval_steps_per_second': 5.893, 'epoch': 5.96}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7097, 'learning_rate': 4.2000000000000004e-06, 'epoch': 6.96}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.7027458548545837, 'eval_runtime': 2.2045, 'eval_samples_per_second': 5.897, 'eval_steps_per_second': 5.897, 'epoch': 6.96}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6915, 'learning_rate': 4.7e-06, 'epoch': 7.96}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.7082982659339905, 'eval_runtime': 2.2079, 'eval_samples_per_second': 5.888, 'eval_steps_per_second': 5.888, 'epoch': 7.96}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7078, 'learning_rate': 4.9974789915966396e-06, 'epoch': 8.96}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.7170908451080322, 'eval_runtime': 2.209, 'eval_samples_per_second': 5.885, 'eval_steps_per_second': 5.885, 'epoch': 8.96}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6916, 'learning_rate': 4.992436974789916e-06, 'epoch': 9.96}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.731153130531311, 'eval_runtime': 2.2033, 'eval_samples_per_second': 5.9, 'eval_steps_per_second': 5.9, 'epoch': 9.96}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7103, 'learning_rate': 4.987394957983194e-06, 'epoch': 10.96}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.7376512885093689, 'eval_runtime': 2.2042, 'eval_samples_per_second': 5.898, 'eval_steps_per_second': 5.898, 'epoch': 10.96}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-6 (score: 0.6916720271110535).
***** Running Prediction *****
  Num examples = 4
  Batch size = 1


{'train_runtime': 727.7991, 'train_samples_per_second': 137.401, 'train_steps_per_second': 8.244, 'train_loss': 0.717597383441347, 'epoch': 10.96}
Fitting model: roberta_pysch using fold 9 as out of fold test data.
Train data sizes: (93, 93).
Val data sizes: (13, 13).
Test data sizes: (11, 11).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshots/18f4eb3ec5e26053f262d6e19f98ede75673ff33/config.json
Model config RobertaConfig {
  "_name_or_path": "mlaricheva/roberta-psych",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from c

Map:   0%|          | 0/93 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 93
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 5000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8191, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.86}


Saving model checkpoint to ./model_output/checkpoint-5
Configuration saved in ./model_output/checkpoint-5/config.json


{'eval_loss': 0.6916837096214294, 'eval_runtime': 2.2084, 'eval_samples_per_second': 5.887, 'eval_steps_per_second': 5.887, 'epoch': 0.86}


Model weights saved in ./model_output/checkpoint-5/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-6] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8205, 'learning_rate': 1.0000000000000002e-06, 'epoch': 1.86}


Saving model checkpoint to ./model_output/checkpoint-10
Configuration saved in ./model_output/checkpoint-10/config.json


{'eval_loss': 0.6917852163314819, 'eval_runtime': 2.2072, 'eval_samples_per_second': 5.89, 'eval_steps_per_second': 5.89, 'epoch': 1.86}


Model weights saved in ./model_output/checkpoint-10/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-66] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8112, 'learning_rate': 1.5e-06, 'epoch': 2.86}


Saving model checkpoint to ./model_output/checkpoint-15
Configuration saved in ./model_output/checkpoint-15/config.json


{'eval_loss': 0.6920098662376404, 'eval_runtime': 2.2077, 'eval_samples_per_second': 5.888, 'eval_steps_per_second': 5.888, 'epoch': 2.86}


Model weights saved in ./model_output/checkpoint-15/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-10] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8148, 'learning_rate': 2.0000000000000003e-06, 'epoch': 3.86}


Saving model checkpoint to ./model_output/checkpoint-20
Configuration saved in ./model_output/checkpoint-20/config.json


{'eval_loss': 0.692436158657074, 'eval_runtime': 2.2084, 'eval_samples_per_second': 5.887, 'eval_steps_per_second': 5.887, 'epoch': 3.86}


Model weights saved in ./model_output/checkpoint-20/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-15] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8115, 'learning_rate': 2.5e-06, 'epoch': 4.86}


Saving model checkpoint to ./model_output/checkpoint-25
Configuration saved in ./model_output/checkpoint-25/config.json


{'eval_loss': 0.6930796504020691, 'eval_runtime': 2.2067, 'eval_samples_per_second': 5.891, 'eval_steps_per_second': 5.891, 'epoch': 4.86}


Model weights saved in ./model_output/checkpoint-25/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8088, 'learning_rate': 3e-06, 'epoch': 5.86}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.6940994262695312, 'eval_runtime': 2.2211, 'eval_samples_per_second': 5.853, 'eval_steps_per_second': 5.853, 'epoch': 5.86}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-25] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8085, 'learning_rate': 3.5e-06, 'epoch': 6.86}


Saving model checkpoint to ./model_output/checkpoint-35
Configuration saved in ./model_output/checkpoint-35/config.json


{'eval_loss': 0.6957129240036011, 'eval_runtime': 2.1998, 'eval_samples_per_second': 5.91, 'eval_steps_per_second': 5.91, 'epoch': 6.86}


Model weights saved in ./model_output/checkpoint-35/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.8023, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.86}


Saving model checkpoint to ./model_output/checkpoint-40
Configuration saved in ./model_output/checkpoint-40/config.json


{'eval_loss': 0.6984496712684631, 'eval_runtime': 2.2076, 'eval_samples_per_second': 5.889, 'eval_steps_per_second': 5.889, 'epoch': 7.86}


Model weights saved in ./model_output/checkpoint-40/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-35] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7925, 'learning_rate': 4.5e-06, 'epoch': 8.86}


Saving model checkpoint to ./model_output/checkpoint-45
Configuration saved in ./model_output/checkpoint-45/config.json


{'eval_loss': 0.7018773555755615, 'eval_runtime': 2.2165, 'eval_samples_per_second': 5.865, 'eval_steps_per_second': 5.865, 'epoch': 8.86}


Model weights saved in ./model_output/checkpoint-45/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7842, 'learning_rate': 5e-06, 'epoch': 9.86}


Saving model checkpoint to ./model_output/checkpoint-50
Configuration saved in ./model_output/checkpoint-50/config.json


{'eval_loss': 0.7071641683578491, 'eval_runtime': 2.2074, 'eval_samples_per_second': 5.889, 'eval_steps_per_second': 5.889, 'epoch': 9.86}


Model weights saved in ./model_output/checkpoint-50/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-45] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.775, 'learning_rate': 4.9949494949494956e-06, 'epoch': 10.86}


Saving model checkpoint to ./model_output/checkpoint-55
Configuration saved in ./model_output/checkpoint-55/config.json


{'eval_loss': 0.7163309454917908, 'eval_runtime': 2.2057, 'eval_samples_per_second': 5.894, 'eval_steps_per_second': 5.894, 'epoch': 10.86}


Model weights saved in ./model_output/checkpoint-55/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-50] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-5 (score: 0.6916837096214294).
***** Running Prediction *****
  Num examples = 11
  Batch size = 1


{'train_runtime': 678.9034, 'train_samples_per_second': 136.986, 'train_steps_per_second': 7.365, 'train_loss': 0.8043957970359109, 'epoch': 10.86}
Fitting model: roberta_pysch using fold 10 as out of fold test data.
Train data sizes: (97, 97).
Val data sizes: (13, 13).
Test data sizes: (7, 7).


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /home/joel_stremmel/.cache/huggingface/hub/models--mlaricheva--roberta-psych/snapshots/18f4eb3ec5e26053f262d6e19f98ede75673ff33/config.json
Model config RobertaConfig {
  "_name_or_path": "mlaricheva/roberta-psych",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file vocab.json from c

Map:   0%|          | 0/97 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Map:   0%|          | 0/7 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 97
  Num Epochs = 1000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 6000
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7154, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.99}


Saving model checkpoint to ./model_output/checkpoint-6
Configuration saved in ./model_output/checkpoint-6/config.json


{'eval_loss': 0.6917096376419067, 'eval_runtime': 2.203, 'eval_samples_per_second': 5.901, 'eval_steps_per_second': 5.901, 'epoch': 0.99}


Model weights saved in ./model_output/checkpoint-6/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-5] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7166, 'learning_rate': 1.2000000000000002e-06, 'epoch': 1.99}


Saving model checkpoint to ./model_output/checkpoint-12
Configuration saved in ./model_output/checkpoint-12/config.json


{'eval_loss': 0.6918760538101196, 'eval_runtime': 2.2044, 'eval_samples_per_second': 5.897, 'eval_steps_per_second': 5.897, 'epoch': 1.99}


Model weights saved in ./model_output/checkpoint-12/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-55] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7088, 'learning_rate': 1.8000000000000001e-06, 'epoch': 2.99}


Saving model checkpoint to ./model_output/checkpoint-18
Configuration saved in ./model_output/checkpoint-18/config.json


{'eval_loss': 0.6923965215682983, 'eval_runtime': 2.206, 'eval_samples_per_second': 5.893, 'eval_steps_per_second': 5.893, 'epoch': 2.99}


Model weights saved in ./model_output/checkpoint-18/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-12] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7118, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.99}


Saving model checkpoint to ./model_output/checkpoint-24
Configuration saved in ./model_output/checkpoint-24/config.json


{'eval_loss': 0.693493127822876, 'eval_runtime': 2.2099, 'eval_samples_per_second': 5.883, 'eval_steps_per_second': 5.883, 'epoch': 3.99}


Model weights saved in ./model_output/checkpoint-24/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-18] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6963, 'learning_rate': 3e-06, 'epoch': 4.99}


Saving model checkpoint to ./model_output/checkpoint-30
Configuration saved in ./model_output/checkpoint-30/config.json


{'eval_loss': 0.694878876209259, 'eval_runtime': 2.2097, 'eval_samples_per_second': 5.883, 'eval_steps_per_second': 5.883, 'epoch': 4.99}


Model weights saved in ./model_output/checkpoint-30/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-24] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.7075, 'learning_rate': 3.6000000000000003e-06, 'epoch': 5.99}


Saving model checkpoint to ./model_output/checkpoint-36
Configuration saved in ./model_output/checkpoint-36/config.json


{'eval_loss': 0.6969022154808044, 'eval_runtime': 2.2033, 'eval_samples_per_second': 5.9, 'eval_steps_per_second': 5.9, 'epoch': 5.99}


Model weights saved in ./model_output/checkpoint-36/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6942, 'learning_rate': 4.1e-06, 'epoch': 6.99}


Saving model checkpoint to ./model_output/checkpoint-42
Configuration saved in ./model_output/checkpoint-42/config.json


{'eval_loss': 0.6984695792198181, 'eval_runtime': 2.2027, 'eval_samples_per_second': 5.902, 'eval_steps_per_second': 5.902, 'epoch': 6.99}


Model weights saved in ./model_output/checkpoint-42/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-36] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6928, 'learning_rate': 4.7e-06, 'epoch': 7.99}


Saving model checkpoint to ./model_output/checkpoint-48
Configuration saved in ./model_output/checkpoint-48/config.json


{'eval_loss': 0.7020854949951172, 'eval_runtime': 2.2116, 'eval_samples_per_second': 5.878, 'eval_steps_per_second': 5.878, 'epoch': 7.99}


Model weights saved in ./model_output/checkpoint-48/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-42] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.687, 'learning_rate': 4.9974789915966396e-06, 'epoch': 8.99}


Saving model checkpoint to ./model_output/checkpoint-54
Configuration saved in ./model_output/checkpoint-54/config.json


{'eval_loss': 0.7090712189674377, 'eval_runtime': 2.2122, 'eval_samples_per_second': 5.877, 'eval_steps_per_second': 5.877, 'epoch': 8.99}


Model weights saved in ./model_output/checkpoint-54/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-48] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6842, 'learning_rate': 4.992436974789916e-06, 'epoch': 9.99}


Saving model checkpoint to ./model_output/checkpoint-60
Configuration saved in ./model_output/checkpoint-60/config.json


{'eval_loss': 0.7147993445396423, 'eval_runtime': 2.2169, 'eval_samples_per_second': 5.864, 'eval_steps_per_second': 5.864, 'epoch': 9.99}


Model weights saved in ./model_output/checkpoint-60/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-54] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 13
  Batch size = 1


{'loss': 0.6727, 'learning_rate': 4.987394957983194e-06, 'epoch': 10.99}


Saving model checkpoint to ./model_output/checkpoint-66
Configuration saved in ./model_output/checkpoint-66/config.json


{'eval_loss': 0.7258511781692505, 'eval_runtime': 2.2088, 'eval_samples_per_second': 5.885, 'eval_steps_per_second': 5.885, 'epoch': 10.99}


Model weights saved in ./model_output/checkpoint-66/pytorch_model.bin
Deleting older checkpoint [model_output/checkpoint-60] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model_output/checkpoint-6 (score: 0.6917096376419067).
***** Running Prediction *****
  Num examples = 7
  Batch size = 1


{'train_runtime': 707.5408, 'train_samples_per_second': 137.095, 'train_steps_per_second': 8.48, 'train_loss': 0.6988435586293539, 'epoch': 10.99}


##### Unassign Runtime if Running on Colab

In [12]:
if params["env"]["colab"]:

    from google.colab import runtime
    runtime.unassign()