In [1]:
model_name = "roberta-base"

In [2]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [3]:
from kaggle_secrets import UserSecretsClient
import wandb

user_secrets = UserSecretsClient()

wandb_api = user_secrets.get_secret("wandb_api") 

wandb.login(key=wandb_api)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [4]:
sweep_config = {
   'method': 'grid',
   'parameters': {
       'epochs': {
           'value': 3
       },
       'learning_rate': {
           'values': [5e-5, 4e-5, 3e-5, 2e-5]
       },
       'batch_size': {
           'value': 16
       },
       'weight_decay': {
           'values': [0.1, 0.01, 0.0]
       },
       'warmup_ratio': {
           'value': 0.1
       }
   }
}
project = 'RoBERTa-base-goemotions-seed-2'
sweep_id = wandb.sweep(sweep_config, project=project)

Create sweep with ID: j20ua06q
Sweep URL: https://wandb.ai/martinsit288/RoBERTa-base-goemotions-seed-2/sweeps/j20ua06q


In [5]:
import transformers
import datasets

print(f"Running on transformers v{transformers.__version__} and datasets v{datasets.__version__}")

Running on transformers v4.20.1 and datasets v2.1.0


In [6]:
!pip3 install evaluate

Collecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: evaluate
Successfully installed evaluate-0.4.0
[0m

In [7]:
import torch
from pathlib import Path
import evaluate
from datasets import load_dataset
from transformers import (AutoTokenizer, AutoModelForSequenceClassification, 
                          TrainingArguments, Trainer)
import numpy as np
import random
import os

## Seed everything

In [8]:
SEED = 2

In [9]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # some cudnn methods can be random even after fixing the seed unless you tell it to be deterministic
    torch.backends.cudnn.deterministic = True

seed_everything(SEED)

## Load dataset

In [10]:
ds = load_dataset("go_emotions", "simplified")
ds

Downloading builder script:   0%|          | 0.00/2.02k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.67k [00:00<?, ?B/s]

Downloading and preparing dataset go_emotions/simplified (download: 4.19 MiB, generated: 5.03 MiB, post-processed: Unknown size, total: 9.22 MiB) to /root/.cache/huggingface/datasets/go_emotions/simplified/0.0.0/2637cfdd4e64d30249c3ed2150fa2b9d279766bfcd6a809b9f085c61a90d776d...


Downloading data:   0%|          | 0.00/1.61M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/203k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/201k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/43410 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5426 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5427 [00:00<?, ? examples/s]

Dataset go_emotions downloaded and prepared to /root/.cache/huggingface/datasets/go_emotions/simplified/0.0.0/2637cfdd4e64d30249c3ed2150fa2b9d279766bfcd6a809b9f085c61a90d776d. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 43410
    })
    validation: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5426
    })
    test: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5427
    })
})

In [11]:
ds["train"][0]

{'text': "My favourite food is anything I didn't have to cook myself.",
 'labels': [27],
 'id': 'eebbqej'}

In [12]:
def one_hot_encode(example):
    l = example["labels"]
    one_hot_list = [0] * (28)
    for i in l:
        one_hot_list[i] = 1
    example["labels"] = one_hot_list
    return example

In [13]:
ds = ds.map(one_hot_encode)

  0%|          | 0/43410 [00:00<?, ?ex/s]

  0%|          | 0/5426 [00:00<?, ?ex/s]

  0%|          | 0/5427 [00:00<?, ?ex/s]

In [14]:
ds["train"][0]

{'text': "My favourite food is anything I didn't have to cook myself.",
 'labels': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1],
 'id': 'eebbqej'}

In [15]:
id2label = {0: 'admiration', 1: 'amusement', 2: 'anger', 3: 'annoyance', 4: 'approval', 5: 'caring', 6: 'confusion', 7: 'curiosity', 8: 'desire', 9: 'disappointment', 10: 'disapproval', 11: 'disgust', 12: 'embarrassment', 13: 'excitement', 14: 'fear', 15: 'gratitude', 16: 'grief', 17: 'joy', 18: 'love', 19: 'nervousness', 20: 'optimism', 21: 'pride', 22: 'realization', 23: 'relief', 24: 'remorse', 25: 'sadness', 26: 'surprise', 27: 'neutral'}
label2id = {'admiration': 0, 'amusement': 1, 'anger': 2, 'annoyance': 3, 'approval': 4, 'caring': 5, 'confusion': 6, 'curiosity': 7, 'desire': 8, 'disappointment': 9, 'disapproval': 10, 'disgust': 11, 'embarrassment': 12, 'excitement': 13, 'fear': 14, 'gratitude': 15, 'grief': 16, 'joy': 17, 'love': 18, 'nervousness': 19, 'optimism': 20, 'pride': 21, 'realization': 22, 'relief': 23, 'remorse': 24, 'sadness': 25, 'surprise': 26, 'neutral': 27}

## Tokenize and encode

In [16]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

In [17]:
def tokenize_func(examples):
  return tokenizer(examples["text"], truncation=True, max_length=50)

In [18]:
ds_enc = ds.map(tokenize_func, batched=True, remove_columns=["text", "id"])
ds_enc = ds_enc.rename_column("labels", "label")
ds_enc

  0%|          | 0/44 [00:00<?, ?ba/s]

  0%|          | 0/6 [00:00<?, ?ba/s]

  0%|          | 0/6 [00:00<?, ?ba/s]

DatasetDict({
    train: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 43410
    })
    validation: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 5426
    })
    test: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 5427
    })
})

In [19]:
def to_float_labels(example):
    float_labels = example["label"].to(torch.float)
    example["float_label"] = float_labels
    return example

In [20]:
ds_enc.set_format("torch")
ds_enc = ds_enc.map(to_float_labels).remove_columns("label").rename_column("float_label", "label")

  0%|          | 0/43410 [00:00<?, ?ex/s]

  0%|          | 0/5426 [00:00<?, ?ex/s]

  0%|          | 0/5427 [00:00<?, ?ex/s]

In [21]:
ds_enc["train"][0]

{'input_ids': tensor([   0, 2387, 5548,  689,   16,  932,   38,  399,   75,   33,    7, 7142,
         2185,    4,    2]),
 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
 'label': tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])}

## Load model

In [22]:
def model_init():
    num_labels = 28
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, id2label=id2label, label2id=label2id, ignore_mismatched_sizes=True, problem_type='multi_label_classification').to(device)
    return model

## Load trainer

In [23]:
acc = evaluate.load("accuracy", "multilabel")
f1 = evaluate.load("f1", "multilabel")
precision = evaluate.load("precision", "multilabel")
recall = evaluate.load("recall", "multilabel")

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.55k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.36k [00:00<?, ?B/s]

In [24]:
from sklearn.metrics import hamming_loss
def hamming_score(predictions, references):
    return 1 - hamming_loss(references, predictions)

In [25]:
def compute_metrics(eval_pred, threshold=0.3):
    prediction_scores, labels = eval_pred
    
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.tensor(prediction_scores))
    
    predictions = np.zeros(probs.shape)
    predictions[np.where(probs >= threshold)] = 1
    
    return {
        'hamming': hamming_score(predictions=predictions, references=labels),
        'exact_match': acc.compute(predictions=predictions, references=labels)['accuracy'],
        'macro_f1': f1.compute(predictions=predictions, references=labels, average="macro")['f1'],
        'macro_precision': precision.compute(predictions=predictions, references=labels, average="macro")['precision'],
        'macro_recall': recall.compute(predictions=predictions, references=labels, average="macro")['recall'],
        'micro_f1': f1.compute(predictions=predictions, references=labels, average="micro")['f1'],
        'micro_precision': precision.compute(predictions=predictions, references=labels, average="micro")['precision'],
        'micro_recall': recall.compute(predictions=predictions, references=labels, average="micro")['recall'],
        'weighted_f1': f1.compute(predictions=predictions, references=labels, average="weighted")['f1'],
        'weighted_precision': precision.compute(predictions=predictions, references=labels, average="weighted")['precision'],
        'weighted_recall': recall.compute(predictions=predictions, references=labels, average="weighted")['recall']
    }

In [26]:
def train(config=None):
    with wandb.init(config=config):
        config = wandb.config
        
        training_args = TrainingArguments(
            output_dir="results",
            report_to="wandb",
            num_train_epochs=config.epochs,
            learning_rate=config.learning_rate,
            weight_decay=config.weight_decay,
            warmup_ratio=config.warmup_ratio,
            per_device_train_batch_size=config.batch_size,
            per_device_eval_batch_size=16,
            load_best_model_at_end=True,
            logging_strategy="steps",
            evaluation_strategy="epoch",
            save_strategy="epoch",
            seed=SEED
        )
        
        trainer = Trainer(
            model_init=model_init, 
            args=training_args,
            compute_metrics=compute_metrics,
            train_dataset=ds_enc["train"],
            eval_dataset=ds_enc["validation"],
            tokenizer=tokenizer
        )
        
        trainer.train()
        
        trainer.evaluate(eval_dataset=ds_enc["test"], metric_key_prefix="test")

In [27]:
wandb.agent(sweep_id, train, count=12, project=project)

[34m[1mwandb[0m: Agent Starting Run: w236as4p with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: Currently logged in as: [33mmartinsit288[0m. Use [1m`wandb login --relogin`[0m to force relogin


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "joy",
    "18": "love",
    "

Downloading:   0%|          | 0.00/478M [00:00<?, ?B/s]

storing https://huggingface.co/roberta-base/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7
creating metadata file for /root/.cache/huggingface/transformers/51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7
loading weights file https://huggingface.co/roberta-base/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'roberta.poole

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.0945,0.089734,0.964417,0.428308,0.39562,0.461143,0.405658,0.585111,0.573233,0.597492,0.546408,0.556428,0.597492
2,0.0827,0.082739,0.966346,0.457059,0.471286,0.49139,0.480645,0.611622,0.593368,0.631034,0.592246,0.58075,0.631034
3,0.0691,0.082659,0.966004,0.452451,0.510223,0.530673,0.517721,0.611742,0.587751,0.637774,0.602227,0.580164,0.637774


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁█▇
eval/hamming,▁█▇
eval/loss,█▁▁
eval/macro_f1,▁▆█
eval/macro_precision,▁▄█
eval/macro_recall,▁▆█
eval/micro_f1,▁██
eval/micro_precision,▁█▆
eval/micro_recall,▁▇█
eval/runtime,▂▁█

0,1
eval/exact_match,0.45245
eval/hamming,0.966
eval/loss,0.08266
eval/macro_f1,0.51022
eval/macro_precision,0.53067
eval/macro_recall,0.51772
eval/micro_f1,0.61174
eval/micro_precision,0.58775
eval/micro_recall,0.63777
eval/runtime,18.5635


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2jhxfmg5 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


PyTorch: setting up devices
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "j

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.0949,0.089184,0.965155,0.432731,0.387276,0.43492,0.392491,0.588847,0.58359,0.594201,0.545082,0.549028,0.594201
2,0.0833,0.083734,0.965984,0.445632,0.474737,0.526252,0.487728,0.607831,0.589144,0.627743,0.591677,0.58503,0.627743
3,0.0699,0.082364,0.966372,0.455953,0.513074,0.536211,0.522406,0.615315,0.592088,0.640439,0.606235,0.584922,0.640439


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁▅█
eval/hamming,▁▆█
eval/loss,█▂▁
eval/macro_f1,▁▆█
eval/macro_precision,▁▇█
eval/macro_recall,▁▆█
eval/micro_f1,▁▆█
eval/micro_precision,▁▆█
eval/micro_recall,▁▆█
eval/runtime,█▁▁

0,1
eval/exact_match,0.45595
eval/hamming,0.96637
eval/loss,0.08236
eval/macro_f1,0.51307
eval/macro_precision,0.53621
eval/macro_recall,0.52241
eval/micro_f1,0.61532
eval/micro_precision,0.59209
eval/micro_recall,0.64044
eval/runtime,18.2235


[34m[1mwandb[0m: Agent Starting Run: 4qmkhj7h with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


PyTorch: setting up devices
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "j

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.095,0.08974,0.9647,0.426097,0.393593,0.459426,0.406701,0.586029,0.577338,0.594984,0.5486,0.569577,0.594984
2,0.0831,0.083009,0.966208,0.455031,0.472336,0.493985,0.480205,0.608689,0.592433,0.625862,0.590471,0.582038,0.625862
3,0.0697,0.082237,0.965931,0.451345,0.507351,0.539454,0.514957,0.610769,0.587019,0.63652,0.602046,0.581533,0.63652


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁█▇
eval/hamming,▁█▇
eval/loss,█▂▁
eval/macro_f1,▁▆█
eval/macro_precision,▁▄█
eval/macro_recall,▁▆█
eval/micro_f1,▁▇█
eval/micro_precision,▁█▅
eval/micro_recall,▁▆█
eval/runtime,█▁▂

0,1
eval/exact_match,0.45135
eval/hamming,0.96593
eval/loss,0.08224
eval/macro_f1,0.50735
eval/macro_precision,0.53945
eval/macro_recall,0.51496
eval/micro_f1,0.61077
eval/micro_precision,0.58702
eval/micro_recall,0.63652
eval/runtime,18.2478


[34m[1mwandb[0m: Agent Starting Run: 1x16lxsv with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 4e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.1


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


PyTorch: setting up devices
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "j

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.095,0.089371,0.964898,0.426465,0.409869,0.494609,0.417429,0.592496,0.578053,0.60768,0.562242,0.587606,0.60768
2,0.0826,0.083466,0.966116,0.452635,0.474123,0.494459,0.482475,0.607682,0.591368,0.624922,0.591161,0.58092,0.624922
3,0.0696,0.082612,0.965905,0.451161,0.496953,0.545272,0.505861,0.609057,0.587336,0.632445,0.600173,0.582733,0.632445


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁██
eval/hamming,▁█▇
eval/loss,█▂▁
eval/macro_f1,▁▆█
eval/macro_precision,▁▁█
eval/macro_recall,▁▆█
eval/micro_f1,▁▇█
eval/micro_precision,▁█▆
eval/micro_recall,▁▆█
eval/runtime,█▄▁

0,1
eval/exact_match,0.45116
eval/hamming,0.9659
eval/loss,0.08261
eval/macro_f1,0.49695
eval/macro_precision,0.54527
eval/macro_recall,0.50586
eval/micro_f1,0.60906
eval/micro_precision,0.58734
eval/micro_recall,0.63245
eval/runtime,18.0119


[34m[1mwandb[0m: Agent Starting Run: 5cugbloz with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 4e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


PyTorch: setting up devices
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "j

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.0949,0.089611,0.964477,0.420752,0.399173,0.466437,0.40247,0.586722,0.573589,0.60047,0.551561,0.562959,0.60047
2,0.0827,0.083254,0.965865,0.447475,0.472253,0.489423,0.48464,0.606107,0.587975,0.625392,0.589686,0.578578,0.625392
3,0.07,0.082334,0.966056,0.450055,0.508577,0.552036,0.51275,0.609348,0.58965,0.630408,0.600078,0.583712,0.630408


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁▇█
eval/hamming,▁▇█
eval/loss,█▂▁
eval/macro_f1,▁▆█
eval/macro_precision,▁▃█
eval/macro_recall,▁▆█
eval/micro_f1,▁▇█
eval/micro_precision,▁▇█
eval/micro_recall,▁▇█
eval/runtime,▅█▁

0,1
eval/exact_match,0.45006
eval/hamming,0.96606
eval/loss,0.08233
eval/macro_f1,0.50858
eval/macro_precision,0.55204
eval/macro_recall,0.51275
eval/micro_f1,0.60935
eval/micro_precision,0.58965
eval/micro_recall,0.63041
eval/runtime,18.2043


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d9lksiun with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 4e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


PyTorch: setting up devices
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "j

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.0947,0.089594,0.965543,0.434574,0.403078,0.476824,0.403919,0.59194,0.588773,0.595141,0.554129,0.568948,0.595141
2,0.0829,0.083827,0.966069,0.44895,0.47113,0.489527,0.48146,0.606158,0.591295,0.621787,0.589947,0.581647,0.621787
3,0.0704,0.082474,0.9658,0.449687,0.499112,0.541853,0.507503,0.609323,0.585549,0.63511,0.600334,0.579866,0.63511


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁██
eval/hamming,▁█▄
eval/loss,█▂▁
eval/macro_f1,▁▆█
eval/macro_precision,▁▂█
eval/macro_recall,▁▆█
eval/micro_f1,▁▇█
eval/micro_precision,▅█▁
eval/micro_recall,▁▆█
eval/runtime,▁▅█

0,1
eval/exact_match,0.44969
eval/hamming,0.9658
eval/loss,0.08247
eval/macro_f1,0.49911
eval/macro_precision,0.54185
eval/macro_recall,0.5075
eval/micro_f1,0.60932
eval/micro_precision,0.58555
eval/micro_recall,0.63511
eval/runtime,18.9226


[34m[1mwandb[0m: Agent Starting Run: h7p9mj31 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.1


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


PyTorch: setting up devices
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "j

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.0955,0.090455,0.964918,0.424438,0.393877,0.45733,0.395541,0.587078,0.580423,0.593887,0.551192,0.555139,0.593887
2,0.083,0.084028,0.966148,0.452451,0.467667,0.492757,0.471858,0.606232,0.592576,0.620533,0.589713,0.582299,0.620533
3,0.0718,0.083027,0.965885,0.453004,0.474191,0.477308,0.484406,0.606484,0.588131,0.626019,0.595288,0.57678,0.626019


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁██
eval/hamming,▁█▇
eval/loss,█▂▁
eval/macro_f1,▁▇█
eval/macro_precision,▁█▅
eval/macro_recall,▁▇█
eval/micro_f1,▁██
eval/micro_precision,▁█▅
eval/micro_recall,▁▇█
eval/runtime,▁█▄

0,1
eval/exact_match,0.453
eval/hamming,0.96589
eval/loss,0.08303
eval/macro_f1,0.47419
eval/macro_precision,0.47731
eval/macro_recall,0.48441
eval/micro_f1,0.60648
eval/micro_precision,0.58813
eval/micro_recall,0.62602
eval/runtime,18.1288


[34m[1mwandb[0m: Agent Starting Run: pzm9vdnz with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


PyTorch: setting up devices
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "j

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.0955,0.090701,0.965306,0.431994,0.392196,0.461985,0.39144,0.586815,0.586953,0.586677,0.548705,0.56242,0.586677
2,0.0835,0.084369,0.966162,0.450608,0.471354,0.49463,0.476882,0.605358,0.5932,0.618025,0.590437,0.584931,0.618025
3,0.072,0.082908,0.966096,0.451898,0.475546,0.476485,0.488897,0.609506,0.590222,0.630094,0.598213,0.579018,0.630094


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁██
eval/hamming,▁█▇
eval/loss,█▂▁
eval/macro_f1,▁██
eval/macro_precision,▁█▄
eval/macro_recall,▁▇█
eval/micro_f1,▁▇█
eval/micro_precision,▁█▅
eval/micro_recall,▁▆█
eval/runtime,█▁▇

0,1
eval/exact_match,0.4519
eval/hamming,0.9661
eval/loss,0.08291
eval/macro_f1,0.47555
eval/macro_precision,0.47648
eval/macro_recall,0.4889
eval/micro_f1,0.60951
eval/micro_precision,0.59022
eval/micro_recall,0.63009
eval/runtime,18.4261


[34m[1mwandb[0m: Agent Starting Run: hbc4oxwi with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


PyTorch: setting up devices
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "j

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.0953,0.09017,0.965648,0.431441,0.395667,0.479588,0.396449,0.590313,0.591288,0.589342,0.552394,0.573745,0.589342
2,0.0832,0.084219,0.966004,0.44895,0.469568,0.496762,0.477419,0.605514,0.590496,0.621317,0.590318,0.586044,0.621317
3,0.0719,0.082524,0.966372,0.45282,0.487278,0.518833,0.499713,0.612514,0.593387,0.632915,0.602104,0.585669,0.632915


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁▇█
eval/hamming,▁▄█
eval/loss,█▃▁
eval/macro_f1,▁▇█
eval/macro_precision,▁▄█
eval/macro_recall,▁▆█
eval/micro_f1,▁▆█
eval/micro_precision,▃▁█
eval/micro_recall,▁▆█
eval/runtime,█▇▁

0,1
eval/exact_match,0.45282
eval/hamming,0.96637
eval/loss,0.08252
eval/macro_f1,0.48728
eval/macro_precision,0.51883
eval/macro_recall,0.49971
eval/micro_f1,0.61251
eval/micro_precision,0.59339
eval/micro_recall,0.63292
eval/runtime,18.1646


[34m[1mwandb[0m: Agent Starting Run: 9d2b0i4t with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.1


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


PyTorch: setting up devices
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "j

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.0978,0.09212,0.965648,0.435864,0.380876,0.426847,0.380474,0.587137,0.592717,0.581661,0.545056,0.548088,0.581661
2,0.0849,0.084672,0.966517,0.457427,0.461652,0.500154,0.460431,0.606908,0.598537,0.615517,0.589712,0.58883,0.615517
3,0.0751,0.083061,0.966458,0.458349,0.472725,0.483962,0.478871,0.609442,0.596281,0.623197,0.597357,0.584637,0.623197


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁██
eval/hamming,▁██
eval/loss,█▂▁
eval/macro_f1,▁▇█
eval/macro_precision,▁█▆
eval/macro_recall,▁▇█
eval/micro_f1,▁▇█
eval/micro_precision,▁█▅
eval/micro_recall,▁▇█
eval/runtime,▁▇█

0,1
eval/exact_match,0.45835
eval/hamming,0.96646
eval/loss,0.08306
eval/macro_f1,0.47272
eval/macro_precision,0.48396
eval/macro_recall,0.47887
eval/micro_f1,0.60944
eval/micro_precision,0.59628
eval/micro_recall,0.6232
eval/runtime,18.8571


[34m[1mwandb[0m: Agent Starting Run: n2c5j9k1 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


PyTorch: setting up devices
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "j

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.0978,0.091845,0.966083,0.436786,0.385122,0.473524,0.380743,0.5921,0.598113,0.586207,0.550028,0.570893,0.586207
2,0.085,0.084805,0.966385,0.454847,0.464326,0.499723,0.46874,0.607908,0.595786,0.620533,0.591685,0.588753,0.620533
3,0.0751,0.083359,0.966425,0.454294,0.475089,0.488576,0.481768,0.610283,0.59532,0.626019,0.598108,0.584411,0.626019


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁██
eval/hamming,▁▇█
eval/loss,█▂▁
eval/macro_f1,▁▇█
eval/macro_precision,▁█▅
eval/macro_recall,▁▇█
eval/micro_f1,▁▇█
eval/micro_precision,█▂▁
eval/micro_recall,▁▇█
eval/runtime,▁▂█

0,1
eval/exact_match,0.45429
eval/hamming,0.96642
eval/loss,0.08336
eval/macro_f1,0.47509
eval/macro_precision,0.48858
eval/macro_recall,0.48177
eval/micro_f1,0.61028
eval/micro_precision,0.59532
eval/micro_recall,0.62602
eval/runtime,18.5794


[34m[1mwandb[0m: Agent Starting Run: y9grwlgg with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


PyTorch: setting up devices
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarrassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "j

Epoch,Training Loss,Validation Loss,Hamming,Exact Match,Macro F1,Macro Precision,Macro Recall,Micro F1,Micro Precision,Micro Recall,Weighted F1,Weighted Precision,Weighted Recall
1,0.0981,0.092345,0.965918,0.437154,0.376869,0.469098,0.373794,0.587739,0.597249,0.578527,0.544653,0.570067,0.578527
2,0.0851,0.085032,0.966438,0.457427,0.459314,0.496478,0.463883,0.605798,0.597712,0.614107,0.588603,0.589064,0.614107
3,0.0749,0.083345,0.96653,0.457796,0.473533,0.488148,0.478172,0.610315,0.597091,0.624138,0.59769,0.584915,0.624138


***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-2714
Configuration saved in results/checkpoint-2714/config.json
Model weights saved in results/checkpoint-2714/pytorch_model.bin
tokenizer config file saved in results/checkpoint-2714/tokenizer_config.json
Special tokens file saved in results/checkpoint-2714/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5426
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to results/checkpoint-5428
Configuration saved in results/checkpoint-5428/config.json
Model weights saved in results/checkpoint-5428/pytorch_model.bin
tokenizer config file saved in results/checkpoint-5428/tokenizer_config.json
Special tokens file saved in results/checkpoint-5428/spec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/exact_match,▁██
eval/hamming,▁▇█
eval/loss,█▂▁
eval/macro_f1,▁▇█
eval/macro_precision,▁█▆
eval/macro_recall,▁▇█
eval/micro_f1,▁▇█
eval/micro_precision,▃█▁
eval/micro_recall,▁▆█
eval/runtime,█▇▁

0,1
eval/exact_match,0.4578
eval/hamming,0.96653
eval/loss,0.08335
eval/macro_f1,0.47353
eval/macro_precision,0.48815
eval/macro_recall,0.47817
eval/micro_f1,0.61031
eval/micro_precision,0.59709
eval/micro_recall,0.62414
eval/runtime,18.2306
