In [1]:
from datasets import load_dataset
from transformers import BertTokenizer
dataset = load_dataset('dair-ai/emotion')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [2]:
dataset


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

In [3]:
tokenizer

BertTokenizer(name_or_path='bert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [4]:
import re
import unidecode
def clean_keywords(keyword):
    cleaned = re.sub(r'%20', ' ', keyword)
    cleaned = unidecode.unidecode(keyword)
    cleaned = re.sub(r"[!\"#$%&()*+-./:;<=>?@[\]^_`{|}~\n -' ]"," ",keyword)
    return cleaned

In [5]:
def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True)
tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [6]:
tokenized_datasets["train"]

Dataset({
    features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 16000
})

In [7]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 2000
    })
})

In [22]:
!pip install accelerate -U
!pip install transformers[torch] -U



In [8]:
from transformers import  Trainer, TrainingArguments
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import numpy as np
from sklearn.metrics import accuracy_score

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

model = BertForSequenceClassification.from_pretrained("bert-base-uncased",num_labels=6)

training_args = TrainingArguments(
output_dir="./results",
# Directory for saving outputs
learning_rate=5e-5,
# Learning rate for optimization
per_device_train_batch_size=16,
# Batch size for training
per_device_eval_batch_size=16,
# Batch size for evaluation
num_train_epochs=3,
# Number of training epochs
weight_decay=0.01,
# Weight decay for regularization
evaluation_strategy="epoch",
save_strategy = "no"
# Evaluation is done at the end of each epoch
)
train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"]
val_dataset = tokenized_datasets["validation"]
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
compute_metrics=lambda p: {"accuracy": accuracy_score(p.label_ids,
np.argmax(p.predictions,
axis=1))}
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [10]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [11]:
eval_results = trainer.evaluate(val_dataset)
print(eval_results)

{'eval_loss': 1.8156042098999023, 'eval_accuracy': 0.09, 'eval_runtime': 67.9655, 'eval_samples_per_second': 29.427, 'eval_steps_per_second': 1.839}


In [13]:
!pip install ray


Collecting ray
  Downloading ray-2.9.3-cp310-cp310-manylinux2014_x86_64.whl (64.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: ray
Successfully installed ray-2.9.3


In [14]:
from ray import tune
def ray_hp_space(trial):
    return {
        "learning_rate": tune.loguniform(1e-6, 1e-4),
        "per_device_train_batch_size": tune.choice([1,4 ,8]),
        "num_train_epochs": tune.choice([1,3,5]),

        "weight_decay": tune.loguniform(1e-4, 0.1)
    }

In [15]:
small_train_dataset = tokenized_datasets["train"].shard(index=1, num_shards=10)
small_eval_dataset = tokenized_datasets["test"].shard(index=1, num_shards=10)


In [16]:
num_labels = 6
from transformers import  Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import accuracy_score
def model_init():
    return  BertForSequenceClassification.from_pretrained("bert-base-uncased",num_labels=num_labels)

In [17]:




num_labels = num_labels
trainer = Trainer(

    model_init= model_init,

    args=training_args,

    train_dataset=small_train_dataset,

    eval_dataset=small_eval_dataset,

    compute_metrics=lambda p: {"accuracy": accuracy_score(p.label_ids,
np.argmax(p.predictions,
axis=1))}




)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [18]:
best_trial = trainer.hyperparameter_search(

    direction="maximize",

    backend="ray",

    hp_space=ray_hp_space,

    n_trials=30

)

2024-03-11 01:05:09,059	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-11 01:05:13,244	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2024-03-11 01:05:13,254	INFO tune.py:592 -- [output] This will use the new output engine with verbosity 2. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949
2024-03-11 01:05:13,295	INFO tensorboardx.py:178 -- pip install "ray[tune]" to see TensorBoard files.


+-------------------------------------------------------------------+
| Configuration for experiment     _objective_2024-03-11_01-05-13   |
+-------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator            |
| Scheduler                        FIFOScheduler                    |
| Number of trials                 30                               |
+-------------------------------------------------------------------+

View detailed results here: /root/ray_results/_objective_2024-03-11_01-05-13

Trial status: 16 PENDING
Current time: 2024-03-11 01:05:13. Total running time: 0s
Logical resource usage: 0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay |
+------------------------------------

[36m(_objective pid=3656)[0m 2024-03-11 01:05:34.278362: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_objective pid=3656)[0m 2024-03-11 01:05:34.278420: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_objective pid=3656)[0m 2024-03-11 01:05:34.280232: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(_objective pid=3656)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[36m(_objective pid=3656)[0m You should probably TRAIN th


Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:05:43. Total running time: 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay |
+--------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703    |
| _objective_6a385_00001   PENDING        1.57513e-05                        8                    3      0.000293754 |
| _objective_6a385_00002   PENDING        1.30667e-06                        1                    5      0.00011528  |
| _objective_6a385_00003   PENDING        8.70602e-05                        4                    3      0.00010

[36m(_objective pid=3656)[0m   0%|          | 33/8000 [00:05<19:09,  6.93it/s]
  0%|          | 34/8000 [00:05<19:09,  6.93it/s]
  0%|          | 35/8000 [00:05<19:04,  6.96it/s]
  0%|          | 36/8000 [00:06<19:00,  6.98it/s]
  0%|          | 37/8000 [00:06<19:10,  6.92it/s]
  0%|          | 38/8000 [00:06<19:22,  6.85it/s]
  0%|          | 39/8000 [00:06<19:04,  6.96it/s]
  0%|          | 40/8000 [00:06<19:03,  6.96it/s]
  1%|          | 41/8000 [00:06<19:04,  6.95it/s]
  1%|          | 42/8000 [00:06<19:08,  6.93it/s]
  1%|          | 43/8000 [00:07<19:04,  6.95it/s]
  1%|          | 44/8000 [00:07<18:52,  7.03it/s]
  1%|          | 45/8000 [00:07<18:46,  7.06it/s]
  1%|          | 46/8000 [00:07<18:43,  7.08it/s]
  1%|          | 47/8000 [00:07<18:43,  7.08it/s]
  1%|          | 48/8000 [00:07<18:51,  7.03it/s]
  1%|          | 49/8000 [00:07<18:50,  7.03it/s]
  1%|          | 50/8000 [00:08<18:42,  7.08it/s]
  1%|          | 51/8000 [00:08<18:53,  7.01it/s]
  1%|          | 5

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:06:13. Total running time: 1min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay |
+--------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703    |
| _objective_6a385_00001   PENDING        1.57513e-05                        8                    3      0.000293754 |
| _objective_6a385_00002   PENDING        1.30667e-06                        1                    5      0.00011528  |
| _objective_6a385_00003   PENDING        8.70602e-05                        4                    3      0.00

  3%|▎         | 241/8000 [00:35<18:28,  7.00it/s]
  3%|▎         | 242/8000 [00:35<18:27,  7.00it/s]
  3%|▎         | 243/8000 [00:35<18:23,  7.03it/s]
  3%|▎         | 244/8000 [00:36<18:25,  7.01it/s]
  3%|▎         | 245/8000 [00:36<18:19,  7.05it/s]
  3%|▎         | 246/8000 [00:36<18:15,  7.08it/s]
  3%|▎         | 247/8000 [00:36<18:16,  7.07it/s]
  3%|▎         | 248/8000 [00:36<18:22,  7.03it/s]
  3%|▎         | 249/8000 [00:36<18:15,  7.07it/s]
  3%|▎         | 250/8000 [00:36<18:14,  7.08it/s]
  3%|▎         | 251/8000 [00:37<18:15,  7.08it/s]
  3%|▎         | 252/8000 [00:37<18:16,  7.07it/s]
  3%|▎         | 253/8000 [00:37<18:12,  7.09it/s]
  3%|▎         | 254/8000 [00:37<18:20,  7.04it/s]
  3%|▎         | 255/8000 [00:37<18:23,  7.02it/s]
  3%|▎         | 256/8000 [00:37<18:20,  7.03it/s]
  3%|▎         | 257/8000 [00:37<18:19,  7.04it/s]
  3%|▎         | 258/8000 [00:38<18:13,  7.08it/s]
  3%|▎         | 259/8000 [00:38<18:17,  7.05it/s]
  3%|▎         | 260/8000 [00:3

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:06:43. Total running time: 1min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay |
+--------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703    |
| _objective_6a385_00001   PENDING        1.57513e-05                        8                    3      0.000293754 |
| _objective_6a385_00002   PENDING        1.30667e-06                        1                    5      0.00011528  |
| _objective_6a385_00003   PENDING        8.70602e-05                        4                    3      0.0

[36m(_objective pid=3656)[0m   6%|▌         | 452/8000 [01:05<17:27,  7.21it/s]
  6%|▌         | 453/8000 [01:05<17:23,  7.24it/s]
  6%|▌         | 454/8000 [01:05<17:36,  7.14it/s]
  6%|▌         | 455/8000 [01:05<17:41,  7.11it/s]
  6%|▌         | 456/8000 [01:06<17:47,  7.07it/s]
  6%|▌         | 457/8000 [01:06<17:53,  7.03it/s]
  6%|▌         | 458/8000 [01:06<17:47,  7.07it/s]
  6%|▌         | 459/8000 [01:06<17:45,  7.08it/s]
  6%|▌         | 460/8000 [01:06<17:40,  7.11it/s]
  6%|▌         | 461/8000 [01:06<17:34,  7.15it/s]
  6%|▌         | 462/8000 [01:06<17:27,  7.20it/s]
  6%|▌         | 463/8000 [01:07<17:25,  7.21it/s]
  6%|▌         | 464/8000 [01:07<17:34,  7.15it/s]
  6%|▌         | 465/8000 [01:07<17:39,  7.12it/s]
  6%|▌         | 466/8000 [01:07<17:34,  7.14it/s]
  6%|▌         | 467/8000 [01:07<17:40,  7.10it/s]
  6%|▌         | 468/8000 [01:07<17:45,  7.07it/s]
  6%|▌         | 469/8000 [01:07<17:33,  7.15it/s]
  6%|▌         | 470/8000 [01:08<17:26,  7.20it/s]

[36m(_objective pid=3656)[0m {'loss': 1.559, 'grad_norm': 24.076812744140625, 'learning_rate': 5.260796639376103e-06, 'epoch': 0.31}


[36m(_objective pid=3656)[0m   6%|▋         | 502/8000 [01:12<17:25,  7.17it/s]
  6%|▋         | 503/8000 [01:12<17:36,  7.10it/s]
  6%|▋         | 504/8000 [01:12<17:26,  7.17it/s]
  6%|▋         | 505/8000 [01:12<17:24,  7.18it/s]
  6%|▋         | 506/8000 [01:13<17:28,  7.15it/s]
  6%|▋         | 507/8000 [01:13<17:29,  7.14it/s]
  6%|▋         | 508/8000 [01:13<17:38,  7.08it/s]
  6%|▋         | 509/8000 [01:13<17:35,  7.10it/s]
  6%|▋         | 510/8000 [01:13<17:29,  7.14it/s]
  6%|▋         | 511/8000 [01:13<17:30,  7.13it/s]
  6%|▋         | 512/8000 [01:13<17:23,  7.17it/s]
  6%|▋         | 513/8000 [01:14<17:23,  7.17it/s]
  6%|▋         | 514/8000 [01:14<17:25,  7.16it/s]
  6%|▋         | 515/8000 [01:14<17:30,  7.13it/s]
  6%|▋         | 516/8000 [01:14<17:36,  7.09it/s]
  6%|▋         | 517/8000 [01:14<17:36,  7.08it/s]
  6%|▋         | 518/8000 [01:14<17:40,  7.06it/s]
  6%|▋         | 519/8000 [01:14<17:39,  7.06it/s]
  6%|▋         | 520/8000 [01:15<17:37,  7.07it/s]

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:07:13. Total running time: 2min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay |
+--------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703    |
| _objective_6a385_00001   PENDING        1.57513e-05                        8                    3      0.000293754 |
| _objective_6a385_00002   PENDING        1.30667e-06                        1                    5      0.00011528  |
| _objective_6a385_00003   PENDING        8.70602e-05                        4                    3      0.00

  8%|▊         | 636/8000 [01:35<54:11,  2.26it/s]
  8%|▊         | 637/8000 [01:36<47:13,  2.60it/s]
  8%|▊         | 638/8000 [01:36<47:32,  2.58it/s]
  8%|▊         | 639/8000 [01:36<46:52,  2.62it/s]
  8%|▊         | 640/8000 [01:37<39:02,  3.14it/s]
  8%|▊         | 641/8000 [01:37<42:15,  2.90it/s]
  8%|▊         | 642/8000 [01:37<48:19,  2.54it/s]
  8%|▊         | 643/8000 [01:38<43:34,  2.81it/s]
  8%|▊         | 644/8000 [01:38<35:51,  3.42it/s]
  8%|▊         | 645/8000 [01:38<31:06,  3.94it/s]
  8%|▊         | 646/8000 [01:38<28:28,  4.30it/s]
  8%|▊         | 647/8000 [01:38<25:11,  4.86it/s]
  8%|▊         | 648/8000 [01:38<22:59,  5.33it/s]
  8%|▊         | 649/8000 [01:39<21:20,  5.74it/s]
  8%|▊         | 650/8000 [01:39<20:21,  6.02it/s]
  8%|▊         | 651/8000 [01:39<19:48,  6.18it/s]
  8%|▊         | 652/8000 [01:39<20:31,  5.96it/s]
  8%|▊         | 653/8000 [01:39<20:08,  6.08it/s]
  8%|▊         | 654/8000 [01:39<21:16,  5.75it/s]
  8%|▊         | 655/8000 [01:4

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:07:43. Total running time: 2min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay |
+--------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703    |
| _objective_6a385_00001   PENDING        1.57513e-05                        8                    3      0.000293754 |
| _objective_6a385_00002   PENDING        1.30667e-06                        1                    5      0.00011528  |
| _objective_6a385_00003   PENDING        8.70602e-05                        4                    3      0.0

 10%|▉         | 798/8000 [02:05<16:56,  7.09it/s]
 10%|▉         | 799/8000 [02:05<16:57,  7.08it/s]
 10%|█         | 800/8000 [02:06<17:02,  7.04it/s]
 10%|█         | 801/8000 [02:06<17:00,  7.05it/s]
 10%|█         | 802/8000 [02:06<16:59,  7.06it/s]
 10%|█         | 803/8000 [02:06<17:04,  7.02it/s]
 10%|█         | 804/8000 [02:06<17:02,  7.04it/s]
 10%|█         | 805/8000 [02:06<16:53,  7.10it/s]
 10%|█         | 806/8000 [02:06<17:02,  7.03it/s]
 10%|█         | 807/8000 [02:07<17:02,  7.03it/s]
 10%|█         | 808/8000 [02:07<17:02,  7.03it/s]
 10%|█         | 809/8000 [02:07<17:05,  7.01it/s]
 10%|█         | 810/8000 [02:07<16:57,  7.07it/s]
 10%|█         | 811/8000 [02:07<17:04,  7.02it/s]
 10%|█         | 812/8000 [02:07<17:01,  7.04it/s]
 10%|█         | 813/8000 [02:07<17:08,  6.99it/s]
 10%|█         | 814/8000 [02:08<17:00,  7.04it/s]
 10%|█         | 815/8000 [02:08<16:56,  7.07it/s]
 10%|█         | 816/8000 [02:08<17:00,  7.04it/s]
 10%|█         | 817/8000 [02:0

[36m(_objective pid=3656)[0m {'loss': 1.3352, 'grad_norm': 1.3184014558792114, 'learning_rate': 4.910076863417696e-06, 'epoch': 0.62}


 13%|█▎        | 1001/8000 [02:34<16:25,  7.10it/s]
 13%|█▎        | 1002/8000 [02:34<16:26,  7.10it/s]
 13%|█▎        | 1003/8000 [02:34<16:30,  7.07it/s]
 13%|█▎        | 1004/8000 [02:35<16:27,  7.09it/s]
 13%|█▎        | 1005/8000 [02:35<16:30,  7.07it/s]
 13%|█▎        | 1006/8000 [02:35<16:26,  7.09it/s]
 13%|█▎        | 1007/8000 [02:35<16:38,  7.00it/s]
 13%|█▎        | 1008/8000 [02:35<16:23,  7.11it/s]


Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:08:14. Total running time: 3min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay |
+--------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703    |
| _objective_6a385_00001   PENDING        1.57513e-05                        8                    3      0.000293754 |
| _objective_6a385_00002   PENDING        1.30667e-06                        1                    5      0.00011528  |
| _objective_6a385_00003   PENDING        8.70602e-05                        4                    3      0.00

 13%|█▎        | 1009/8000 [02:35<16:31,  7.05it/s]
 13%|█▎        | 1010/8000 [02:35<16:27,  7.08it/s]
 13%|█▎        | 1011/8000 [02:36<16:35,  7.02it/s]
 13%|█▎        | 1012/8000 [02:36<16:23,  7.10it/s]
 13%|█▎        | 1013/8000 [02:36<16:27,  7.08it/s]
 13%|█▎        | 1014/8000 [02:36<16:27,  7.07it/s]
 13%|█▎        | 1015/8000 [02:36<16:25,  7.09it/s]
 13%|█▎        | 1016/8000 [02:36<16:26,  7.08it/s]
 13%|█▎        | 1017/8000 [02:36<16:24,  7.10it/s]
 13%|█▎        | 1018/8000 [02:37<16:25,  7.08it/s]
 13%|█▎        | 1019/8000 [02:37<16:20,  7.12it/s]
 13%|█▎        | 1020/8000 [02:37<16:17,  7.14it/s]
 13%|█▎        | 1021/8000 [02:37<16:29,  7.05it/s]
 13%|█▎        | 1022/8000 [02:37<16:19,  7.13it/s]
 13%|█▎        | 1023/8000 [02:37<16:20,  7.12it/s]
 13%|█▎        | 1024/8000 [02:37<16:17,  7.14it/s]
 13%|█▎        | 1025/8000 [02:38<16:25,  7.08it/s]
 13%|█▎        | 1026/8000 [02:38<16:27,  7.06it/s]
 13%|█▎        | 1027/8000 [02:38<16:28,  7.06it/s]
 13%|█▎     

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:08:44. Total running time: 3min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay |
+--------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703    |
| _objective_6a385_00001   PENDING        1.57513e-05                        8                    3      0.000293754 |
| _objective_6a385_00002   PENDING        1.30667e-06                        1                    5      0.00011528  |
| _objective_6a385_00003   PENDING        8.70602e-05                        4                    3      0.0

[36m(_objective pid=3656)[0m  15%|█▌        | 1207/8000 [03:05<16:11,  6.99it/s]
 15%|█▌        | 1208/8000 [03:05<16:03,  7.05it/s]
 15%|█▌        | 1209/8000 [03:06<15:57,  7.09it/s]
 15%|█▌        | 1210/8000 [03:06<16:02,  7.05it/s]
 15%|█▌        | 1211/8000 [03:06<16:17,  6.95it/s]
 15%|█▌        | 1212/8000 [03:06<16:12,  6.98it/s]
 15%|█▌        | 1213/8000 [03:06<16:16,  6.95it/s]
 15%|█▌        | 1214/8000 [03:06<16:08,  7.01it/s]
 15%|█▌        | 1215/8000 [03:06<16:22,  6.91it/s]
 15%|█▌        | 1216/8000 [03:07<16:11,  6.98it/s]
 15%|█▌        | 1217/8000 [03:07<16:11,  6.99it/s]
 15%|█▌        | 1218/8000 [03:07<16:11,  6.98it/s]
 15%|█▌        | 1219/8000 [03:07<16:12,  6.97it/s]
 15%|█▌        | 1220/8000 [03:07<16:26,  6.87it/s]
 15%|█▌        | 1221/8000 [03:07<16:16,  6.94it/s]
 15%|█▌        | 1222/8000 [03:07<16:14,  6.95it/s]
 15%|█▌        | 1223/8000 [03:08<16:14,  6.95it/s]
 15%|█▌        | 1224/8000 [03:08<16:14,  6.95it/s]
 15%|█▌        | 1225/8000 [03:0

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:09:14. Total running time: 4min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay |
+--------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703    |
| _objective_6a385_00001   PENDING        1.57513e-05                        8                    3      0.000293754 |
| _objective_6a385_00002   PENDING        1.30667e-06                        1                    5      0.00011528  |
| _objective_6a385_00003   PENDING        8.70602e-05                        4                    3      0.00

[36m(_objective pid=3656)[0m  18%|█▊        | 1418/8000 [03:35<15:36,  7.03it/s]
 18%|█▊        | 1419/8000 [03:36<15:28,  7.09it/s]
 18%|█▊        | 1420/8000 [03:36<15:28,  7.09it/s]
 18%|█▊        | 1421/8000 [03:36<15:34,  7.04it/s]
 18%|█▊        | 1422/8000 [03:36<15:36,  7.02it/s]
 18%|█▊        | 1423/8000 [03:36<15:32,  7.05it/s]
 18%|█▊        | 1424/8000 [03:36<15:28,  7.08it/s]
 18%|█▊        | 1425/8000 [03:36<15:41,  6.98it/s]
 18%|█▊        | 1426/8000 [03:37<15:29,  7.07it/s]
 18%|█▊        | 1427/8000 [03:37<15:30,  7.06it/s]
 18%|█▊        | 1428/8000 [03:37<15:32,  7.05it/s]
 18%|█▊        | 1429/8000 [03:37<15:36,  7.01it/s]
 18%|█▊        | 1430/8000 [03:37<15:31,  7.05it/s]
 18%|█▊        | 1431/8000 [03:37<15:25,  7.10it/s]
 18%|█▊        | 1432/8000 [03:37<15:34,  7.03it/s]
 18%|█▊        | 1433/8000 [03:38<15:28,  7.08it/s]
 18%|█▊        | 1434/8000 [03:38<15:35,  7.02it/s]
 18%|█▊        | 1435/8000 [03:38<15:32,  7.04it/s]
 18%|█▊        | 1436/8000 [03:3

[36m(_objective pid=3656)[0m {'loss': 1.3566, 'grad_norm': 131.41705322265625, 'learning_rate': 4.559357087459289e-06, 'epoch': 0.94}


[36m(_objective pid=3656)[0m  19%|█▉        | 1502/8000 [03:47<15:19,  7.07it/s]
 19%|█▉        | 1503/8000 [03:48<15:19,  7.06it/s]
 19%|█▉        | 1504/8000 [03:48<15:17,  7.08it/s]
 19%|█▉        | 1505/8000 [03:48<15:14,  7.10it/s]
 19%|█▉        | 1506/8000 [03:48<15:24,  7.03it/s]
 19%|█▉        | 1507/8000 [03:48<15:17,  7.08it/s]
 19%|█▉        | 1508/8000 [03:48<15:19,  7.06it/s]
 19%|█▉        | 1509/8000 [03:48<15:15,  7.09it/s]
 19%|█▉        | 1510/8000 [03:49<15:17,  7.07it/s]
 19%|█▉        | 1511/8000 [03:49<15:16,  7.08it/s]
 19%|█▉        | 1512/8000 [03:49<15:20,  7.05it/s]
 19%|█▉        | 1513/8000 [03:49<15:24,  7.02it/s]
 19%|█▉        | 1514/8000 [03:49<15:19,  7.05it/s]
 19%|█▉        | 1515/8000 [03:49<15:17,  7.07it/s]
 19%|█▉        | 1516/8000 [03:49<15:20,  7.05it/s]
 19%|█▉        | 1517/8000 [03:50<15:19,  7.05it/s]
 19%|█▉        | 1518/8000 [03:50<15:20,  7.04it/s]
 19%|█▉        | 1519/8000 [03:50<15:14,  7.09it/s]
 19%|█▉        | 1520/8000 [03:5

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:09:44. Total running time: 4min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay |
+--------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703    |
| _objective_6a385_00001   PENDING        1.57513e-05                        8                    3      0.000293754 |
| _objective_6a385_00002   PENDING        1.30667e-06                        1                    5      0.00011528  |
| _objective_6a385_00003   PENDING        8.70602e-05                        4                    3      0.0

[36m(_objective pid=3656)[0m 
 54%|█████▍    | 7/13 [00:03<00:03,  1.80it/s][A
[36m(_objective pid=3656)[0m 
 62%|██████▏   | 8/13 [00:04<00:02,  1.76it/s][A
[36m(_objective pid=3656)[0m 
 69%|██████▉   | 9/13 [00:04<00:02,  1.74it/s][A
[36m(_objective pid=3656)[0m 
 77%|███████▋  | 10/13 [00:05<00:01,  1.73it/s][A
[36m(_objective pid=3656)[0m 
 85%|████████▍ | 11/13 [00:05<00:01,  1.72it/s][A
[36m(_objective pid=3656)[0m 
 92%|█████████▏| 12/13 [00:06<00:00,  1.72it/s][A



Trial _objective_6a385_00000 finished iteration 1 at 2024-03-11 01:09:47. Total running time: 4min 34s
+-------------------------------------------------+
| Trial _objective_6a385_00000 result             |
+-------------------------------------------------+
| checkpoint_dir_name                             |
| time_this_iter_s                        258.657 |
| time_total_s                            258.657 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_accuracy                             0.695 |
| eval_loss                               1.23422 |
| eval_runtime                             7.3693 |
| eval_samples_per_second                   27.14 |
| eval_steps_per_second                     1.764 |
| objective                                 0.695 |
+-------------------------------------------------+
[36m(_objective pid=3656)[0m {'eval_loss': 1.2342238426208496, 'eval_accuracy': 0.695, 'eval_runtime': 7.3693, 'ev

[36m(_objective pid=3656)[0m 
[36m(_objective pid=3656)[0m 100%|██████████| 13/13 [00:06<00:00,  2.04it/s][A                                                   
[36m(_objective pid=3656)[0m                                                [A 20%|██        | 1600/8000 [04:09<15:01,  7.10it/s]
[36m(_objective pid=3656)[0m 100%|██████████| 13/13 [00:06<00:00,  2.04it/s][A
[36m(_objective pid=3656)[0m                                                [A
 20%|██        | 1601/8000 [04:09<4:10:16,  2.35s/it]
 20%|██        | 1602/8000 [04:09<2:59:41,  1.69s/it]
 20%|██        | 1603/8000 [04:09<2:10:05,  1.22s/it]
 20%|██        | 1604/8000 [04:09<1:35:31,  1.12it/s]
 20%|██        | 1605/8000 [04:09<1:11:27,  1.49it/s]
 20%|██        | 1606/8000 [04:10<54:27,  1.96it/s]  
 20%|██        | 1607/8000 [04:10<42:43,  2.49it/s]
 20%|██        | 1608/8000 [04:10<34:32,  3.08it/s]
 20%|██        | 1609/8000 [04:10<28:43,  3.71it/s]
 20%|██        | 1610/8000 [04:10<24:57,  4.27it/s]


Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:10:14. Total running time: 5min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           1            258.657       1.23422 

 22%|██▏       | 1790/8000 [04:36<14:39,  7.06it/s]
 22%|██▏       | 1791/8000 [04:36<14:53,  6.95it/s]
 22%|██▏       | 1792/8000 [04:36<14:58,  6.91it/s]
 22%|██▏       | 1793/8000 [04:36<14:53,  6.95it/s]
 22%|██▏       | 1794/8000 [04:36<14:47,  6.99it/s]
 22%|██▏       | 1795/8000 [04:36<14:46,  7.00it/s]
 22%|██▏       | 1796/8000 [04:36<14:39,  7.05it/s]
 22%|██▏       | 1797/8000 [04:37<14:46,  7.00it/s]
 22%|██▏       | 1798/8000 [04:37<14:47,  6.99it/s]
 22%|██▏       | 1799/8000 [04:37<14:44,  7.01it/s]
 22%|██▎       | 1800/8000 [04:37<14:44,  7.01it/s]
 23%|██▎       | 1801/8000 [04:37<14:47,  6.98it/s]
 23%|██▎       | 1802/8000 [04:37<14:50,  6.96it/s]
 23%|██▎       | 1803/8000 [04:37<15:01,  6.87it/s]
 23%|██▎       | 1804/8000 [04:38<15:01,  6.87it/s]
 23%|██▎       | 1805/8000 [04:38<15:01,  6.87it/s]
 23%|██▎       | 1806/8000 [04:38<14:52,  6.94it/s]
 23%|██▎       | 1807/8000 [04:38<14:48,  6.97it/s]
 23%|██▎       | 1808/8000 [04:38<14:44,  7.00it/s]
 23%|██▎    

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:10:44. Total running time: 5min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           1            258.657       1.23422 

[36m(_objective pid=3656)[0m  25%|██▌       | 2000/8000 [05:06<14:18,  6.99it/s]
 25%|██▌       | 2000/8000 [05:06<14:18,  6.99it/s]
 25%|██▌       | 2001/8000 [05:06<14:22,  6.96it/s]
 25%|██▌       | 2002/8000 [05:06<14:09,  7.06it/s]
 25%|██▌       | 2003/8000 [05:06<14:12,  7.03it/s]
 25%|██▌       | 2004/8000 [05:06<14:10,  7.05it/s]
 25%|██▌       | 2005/8000 [05:06<14:03,  7.11it/s]
 25%|██▌       | 2006/8000 [05:06<14:09,  7.05it/s]
 25%|██▌       | 2007/8000 [05:07<14:09,  7.06it/s]
 25%|██▌       | 2008/8000 [05:07<14:09,  7.06it/s]
 25%|██▌       | 2009/8000 [05:07<14:19,  6.97it/s]
 25%|██▌       | 2010/8000 [05:07<14:12,  7.03it/s]
 25%|██▌       | 2011/8000 [05:07<14:16,  6.99it/s]
 25%|██▌       | 2012/8000 [05:07<14:05,  7.08it/s]
 25%|██▌       | 2013/8000 [05:07<14:08,  7.06it/s]
 25%|██▌       | 2014/8000 [05:08<14:11,  7.03it/s]
 25%|██▌       | 2015/8000 [05:08<14:06,  7.07it/s]
 25%|██▌       | 2016/8000 [05:08<14:11,  7.03it/s]
 25%|██▌       | 2017/8000 [05:0

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:11:14. Total running time: 6min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           1            258.657       1.23422  

[36m(_objective pid=3656)[0m  27%|██▋       | 2177/8000 [05:36<13:52,  6.99it/s]
 27%|██▋       | 2178/8000 [05:36<13:52,  6.99it/s]
 27%|██▋       | 2179/8000 [05:36<13:41,  7.08it/s]
 27%|██▋       | 2180/8000 [05:36<13:46,  7.05it/s]
 27%|██▋       | 2181/8000 [05:36<13:49,  7.01it/s]
 27%|██▋       | 2182/8000 [05:36<13:51,  7.00it/s]
 27%|██▋       | 2183/8000 [05:36<13:46,  7.04it/s]
 27%|██▋       | 2184/8000 [05:37<13:44,  7.06it/s]
 27%|██▋       | 2185/8000 [05:37<13:47,  7.03it/s]
 27%|██▋       | 2186/8000 [05:37<13:42,  7.07it/s]
 27%|██▋       | 2187/8000 [05:37<13:42,  7.06it/s]
 27%|██▋       | 2188/8000 [05:37<13:46,  7.03it/s]
 27%|██▋       | 2189/8000 [05:37<13:49,  7.01it/s]
 27%|██▋       | 2190/8000 [05:37<13:46,  7.03it/s]
 27%|██▋       | 2191/8000 [05:38<13:48,  7.01it/s]
 27%|██▋       | 2192/8000 [05:38<13:43,  7.05it/s]
 27%|██▋       | 2193/8000 [05:38<13:40,  7.07it/s]
 27%|██▋       | 2194/8000 [05:38<13:43,  7.05it/s]
 27%|██▋       | 2195/8000 [05:3

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:11:44. Total running time: 6min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           1            258.657       1.23422 

[36m(_objective pid=3656)[0m  30%|██▉       | 2387/8000 [06:06<13:16,  7.05it/s]
 30%|██▉       | 2388/8000 [06:06<13:17,  7.04it/s]
 30%|██▉       | 2389/8000 [06:06<13:15,  7.06it/s]
 30%|██▉       | 2390/8000 [06:06<13:17,  7.04it/s]
 30%|██▉       | 2391/8000 [06:06<13:16,  7.04it/s]
 30%|██▉       | 2392/8000 [06:06<13:15,  7.05it/s]
 30%|██▉       | 2393/8000 [06:06<13:18,  7.02it/s]
 30%|██▉       | 2394/8000 [06:07<13:11,  7.08it/s]
 30%|██▉       | 2395/8000 [06:07<13:23,  6.98it/s]
 30%|██▉       | 2396/8000 [06:07<13:23,  6.97it/s]
 30%|██▉       | 2397/8000 [06:07<13:21,  6.99it/s]
 30%|██▉       | 2398/8000 [06:07<13:23,  6.97it/s]
 30%|██▉       | 2399/8000 [06:07<13:24,  6.97it/s]
 30%|███       | 2400/8000 [06:07<13:26,  6.94it/s]
 30%|███       | 2401/8000 [06:08<13:32,  6.89it/s]
 30%|███       | 2402/8000 [06:08<13:27,  6.93it/s]
 30%|███       | 2403/8000 [06:08<13:30,  6.90it/s]
 30%|███       | 2404/8000 [06:08<13:22,  6.97it/s]
 30%|███       | 2405/8000 [06:0

[36m(_objective pid=3656)[0m {'loss': 0.9424, 'grad_norm': 0.28083518147468567, 'learning_rate': 3.857917535542475e-06, 'epoch': 1.56}


[36m(_objective pid=3656)[0m  31%|███▏      | 2502/8000 [06:22<13:21,  6.86it/s]
 31%|███▏      | 2503/8000 [06:22<13:10,  6.95it/s]
 31%|███▏      | 2504/8000 [06:22<13:08,  6.97it/s]
 31%|███▏      | 2505/8000 [06:22<12:56,  7.08it/s]
 31%|███▏      | 2506/8000 [06:23<12:58,  7.05it/s]
 31%|███▏      | 2507/8000 [06:23<12:57,  7.07it/s]
 31%|███▏      | 2508/8000 [06:23<13:01,  7.03it/s]
 31%|███▏      | 2509/8000 [06:23<13:05,  6.99it/s]
 31%|███▏      | 2510/8000 [06:23<13:02,  7.02it/s]
 31%|███▏      | 2511/8000 [06:23<12:53,  7.10it/s]
 31%|███▏      | 2512/8000 [06:23<12:53,  7.10it/s]
 31%|███▏      | 2513/8000 [06:24<12:52,  7.11it/s]
 31%|███▏      | 2514/8000 [06:24<12:49,  7.12it/s]
 31%|███▏      | 2515/8000 [06:24<12:48,  7.14it/s]
 31%|███▏      | 2516/8000 [06:24<12:52,  7.10it/s]
 31%|███▏      | 2517/8000 [06:24<12:55,  7.07it/s]
 31%|███▏      | 2518/8000 [06:24<12:51,  7.10it/s]
 31%|███▏      | 2519/8000 [06:24<12:48,  7.13it/s]
 32%|███▏      | 2520/8000 [06:2

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:12:14. Total running time: 7min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           1            258.657       1.23422  

[36m(_objective pid=3656)[0m  32%|███▏      | 2598/8000 [06:36<12:44,  7.07it/s]
 32%|███▏      | 2599/8000 [06:36<12:50,  7.01it/s]
 32%|███▎      | 2600/8000 [06:36<12:47,  7.03it/s]
 33%|███▎      | 2601/8000 [06:36<12:42,  7.08it/s]
 33%|███▎      | 2602/8000 [06:36<12:40,  7.10it/s]
 33%|███▎      | 2603/8000 [06:36<12:52,  6.99it/s]
 33%|███▎      | 2604/8000 [06:36<12:43,  7.07it/s]
 33%|███▎      | 2605/8000 [06:37<12:43,  7.07it/s]
 33%|███▎      | 2606/8000 [06:37<12:43,  7.06it/s]
 33%|███▎      | 2607/8000 [06:37<12:42,  7.07it/s]
 33%|███▎      | 2608/8000 [06:37<12:44,  7.05it/s]
 33%|███▎      | 2609/8000 [06:37<12:37,  7.11it/s]
 33%|███▎      | 2610/8000 [06:37<12:38,  7.11it/s]
 33%|███▎      | 2611/8000 [06:37<12:40,  7.09it/s]
 33%|███▎      | 2612/8000 [06:38<12:35,  7.13it/s]
 33%|███▎      | 2613/8000 [06:38<12:35,  7.13it/s]
 33%|███▎      | 2614/8000 [06:38<12:32,  7.16it/s]
 33%|███▎      | 2615/8000 [06:38<12:40,  7.09it/s]
 33%|███▎      | 2616/8000 [06:3

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:12:44. Total running time: 7min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           1            258.657       1.23422 

 35%|███▌      | 2810/8000 [07:06<12:26,  6.95it/s]
 35%|███▌      | 2811/8000 [07:06<12:25,  6.96it/s]
 35%|███▌      | 2812/8000 [07:06<12:37,  6.85it/s]
 35%|███▌      | 2813/8000 [07:06<12:30,  6.91it/s]
 35%|███▌      | 2814/8000 [07:06<12:21,  6.99it/s]
 35%|███▌      | 2815/8000 [07:06<12:35,  6.86it/s]
 35%|███▌      | 2816/8000 [07:07<12:26,  6.95it/s]
 35%|███▌      | 2817/8000 [07:07<12:26,  6.94it/s]
 35%|███▌      | 2818/8000 [07:07<12:20,  7.00it/s]
 35%|███▌      | 2819/8000 [07:07<12:16,  7.03it/s]
 35%|███▌      | 2820/8000 [07:07<12:13,  7.06it/s]
 35%|███▌      | 2821/8000 [07:07<12:07,  7.12it/s]
 35%|███▌      | 2822/8000 [07:07<12:11,  7.08it/s]
 35%|███▌      | 2823/8000 [07:08<12:08,  7.10it/s]
 35%|███▌      | 2824/8000 [07:08<12:06,  7.12it/s]
 35%|███▌      | 2825/8000 [07:08<12:11,  7.08it/s]
 35%|███▌      | 2826/8000 [07:08<12:11,  7.07it/s]
 35%|███▌      | 2827/8000 [07:08<12:11,  7.07it/s]
 35%|███▌      | 2828/8000 [07:08<12:12,  7.07it/s]
 35%|███▌   

[36m(_objective pid=3656)[0m {'loss': 1.0282, 'grad_norm': 0.20138107240200043, 'learning_rate': 3.5071977595840687e-06, 'epoch': 1.88}


[36m(_objective pid=3656)[0m  38%|███▊      | 3001/8000 [07:33<12:01,  6.93it/s]
 38%|███▊      | 3002/8000 [07:33<11:43,  7.10it/s]
 38%|███▊      | 3003/8000 [07:33<11:50,  7.03it/s]
 38%|███▊      | 3004/8000 [07:33<11:50,  7.03it/s]
 38%|███▊      | 3005/8000 [07:34<11:46,  7.07it/s]
 38%|███▊      | 3006/8000 [07:34<11:42,  7.11it/s]
 38%|███▊      | 3007/8000 [07:34<11:48,  7.05it/s]
 38%|███▊      | 3008/8000 [07:34<11:51,  7.01it/s]
 38%|███▊      | 3009/8000 [07:34<11:43,  7.09it/s]
 38%|███▊      | 3010/8000 [07:34<11:52,  7.00it/s]
 38%|███▊      | 3011/8000 [07:34<11:48,  7.04it/s]
 38%|███▊      | 3012/8000 [07:35<11:51,  7.01it/s]
 38%|███▊      | 3013/8000 [07:35<11:46,  7.06it/s]
 38%|███▊      | 3014/8000 [07:35<11:43,  7.08it/s]
 38%|███▊      | 3015/8000 [07:35<11:48,  7.03it/s]
 38%|███▊      | 3016/8000 [07:35<11:49,  7.03it/s]
 38%|███▊      | 3017/8000 [07:35<11:51,  7.01it/s]
 38%|███▊      | 3018/8000 [07:35<11:46,  7.05it/s]
 38%|███▊      | 3019/8000 [07:3

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:13:14. Total running time: 8min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           1            258.657       1.23422  

[36m(_objective pid=3656)[0m  38%|███▊      | 3022/8000 [07:36<11:49,  7.01it/s]
 38%|███▊      | 3023/8000 [07:36<11:43,  7.07it/s]
 38%|███▊      | 3024/8000 [07:36<11:44,  7.07it/s]
 38%|███▊      | 3025/8000 [07:36<11:47,  7.03it/s]
 38%|███▊      | 3026/8000 [07:37<11:54,  6.97it/s]
 38%|███▊      | 3027/8000 [07:37<11:49,  7.01it/s]
 38%|███▊      | 3028/8000 [07:37<11:51,  6.99it/s]
 38%|███▊      | 3029/8000 [07:37<11:48,  7.01it/s]
 38%|███▊      | 3030/8000 [07:37<12:00,  6.89it/s]
 38%|███▊      | 3031/8000 [07:37<11:54,  6.96it/s]
 38%|███▊      | 3032/8000 [07:37<11:57,  6.92it/s]
 38%|███▊      | 3033/8000 [07:38<11:58,  6.91it/s]
 38%|███▊      | 3034/8000 [07:38<11:55,  6.94it/s]
 38%|███▊      | 3035/8000 [07:38<11:52,  6.97it/s]
 38%|███▊      | 3036/8000 [07:38<11:53,  6.96it/s]
 38%|███▊      | 3037/8000 [07:38<11:56,  6.93it/s]
 38%|███▊      | 3038/8000 [07:38<12:01,  6.88it/s]
 38%|███▊      | 3039/8000 [07:38<11:51,  6.97it/s]
 38%|███▊      | 3040/8000 [07:3

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:13:44. Total running time: 8min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           1            258.657       1.23422 

[36m(_objective pid=3656)[0m 
 62%|██████▏   | 8/13 [00:04<00:02,  1.75it/s][A
[36m(_objective pid=3656)[0m 
 69%|██████▉   | 9/13 [00:04<00:02,  1.73it/s][A
[36m(_objective pid=3656)[0m 
 77%|███████▋  | 10/13 [00:05<00:01,  1.72it/s][A
[36m(_objective pid=3656)[0m 
 85%|████████▍ | 11/13 [00:05<00:01,  1.71it/s][A
[36m(_objective pid=3656)[0m 
 92%|█████████▏| 12/13 [00:06<00:00,  1.72it/s][A



Trial _objective_6a385_00000 finished iteration 2 at 2024-03-11 01:13:47. Total running time: 8min 34s
+-------------------------------------------------+
| Trial _objective_6a385_00000 result             |
+-------------------------------------------------+
| checkpoint_dir_name                             |
| time_this_iter_s                        239.988 |
| time_total_s                            498.645 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_accuracy                             0.745 |
| eval_loss                               1.21072 |
| eval_runtime                             7.4094 |
| eval_samples_per_second                  26.993 |
| eval_steps_per_second                     1.755 |
| objective                                 0.745 |
+-------------------------------------------------+
[36m(_objective pid=3656)[0m {'eval_loss': 1.2107211351394653, 'eval_accuracy': 0.745, 'eval_runtime': 7.4094, 'ev

[36m(_objective pid=3656)[0m 
[36m(_objective pid=3656)[0m 100%|██████████| 13/13 [00:06<00:00,  2.03it/s][A                                                   
[36m(_objective pid=3656)[0m                                                [A 40%|████      | 3200/8000 [08:09<11:20,  7.05it/s]
[36m(_objective pid=3656)[0m 100%|██████████| 13/13 [00:06<00:00,  2.03it/s][A
[36m(_objective pid=3656)[0m                                                [A
 40%|████      | 3201/8000 [08:09<3:08:40,  2.36s/it]
 40%|████      | 3202/8000 [08:09<2:15:24,  1.69s/it]
 40%|████      | 3203/8000 [08:09<1:38:06,  1.23s/it]
 40%|████      | 3204/8000 [08:09<1:11:56,  1.11it/s]
 40%|████      | 3205/8000 [08:09<53:44,  1.49it/s]  
 40%|████      | 3206/8000 [08:09<40:58,  1.95it/s]
 40%|████      | 3207/8000 [08:10<32:10,  2.48it/s]
 40%|████      | 3208/8000 [08:10<25:57,  3.08it/s]
 40%|████      | 3209/8000 [08:10<21:36,  3.70it/s]
 40%|████      | 3210/8000 [08:10<18:28,  4.32it/s]
 


Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:14:14. Total running time: 9min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           2            498.645       1.21072 

[36m(_objective pid=3656)[0m  42%|████▏     | 3392/8000 [08:36<11:04,  6.93it/s]
 42%|████▏     | 3393/8000 [08:36<10:52,  7.06it/s]
 42%|████▏     | 3394/8000 [08:36<10:56,  7.02it/s]
 42%|████▏     | 3395/8000 [08:36<10:51,  7.06it/s]
 42%|████▏     | 3396/8000 [08:37<10:53,  7.05it/s]
 42%|████▏     | 3397/8000 [08:37<10:51,  7.06it/s]
 42%|████▏     | 3398/8000 [08:37<10:55,  7.02it/s]
 42%|████▏     | 3399/8000 [08:37<10:53,  7.04it/s]
 42%|████▎     | 3400/8000 [08:37<10:56,  7.01it/s]
 43%|████▎     | 3401/8000 [08:37<10:57,  7.00it/s]
 43%|████▎     | 3402/8000 [08:37<10:49,  7.08it/s]
 43%|████▎     | 3403/8000 [08:38<10:52,  7.05it/s]
 43%|████▎     | 3404/8000 [08:38<10:54,  7.02it/s]
 43%|████▎     | 3405/8000 [08:38<10:57,  6.99it/s]
 43%|████▎     | 3406/8000 [08:38<10:48,  7.09it/s]
 43%|████▎     | 3407/8000 [08:38<10:52,  7.04it/s]
 43%|████▎     | 3408/8000 [08:38<10:59,  6.96it/s]
 43%|████▎     | 3409/8000 [08:38<10:45,  7.11it/s]
 43%|████▎     | 3410/8000 [08:3

[36m(_objective pid=3656)[0m {'loss': 0.7704, 'grad_norm': 0.13399852812290192, 'learning_rate': 3.1564779836256618e-06, 'epoch': 2.19}


[36m(_objective pid=3656)[0m  44%|████▍     | 3501/8000 [08:52<10:35,  7.07it/s]
 44%|████▍     | 3502/8000 [08:52<10:34,  7.09it/s]
 44%|████▍     | 3503/8000 [08:52<10:34,  7.09it/s]
 44%|████▍     | 3504/8000 [08:52<10:38,  7.04it/s]
 44%|████▍     | 3505/8000 [08:52<10:41,  7.00it/s]
 44%|████▍     | 3506/8000 [08:52<10:48,  6.93it/s]
 44%|████▍     | 3507/8000 [08:52<10:37,  7.05it/s]
 44%|████▍     | 3508/8000 [08:53<10:36,  7.05it/s]
 44%|████▍     | 3509/8000 [08:53<10:35,  7.07it/s]
 44%|████▍     | 3510/8000 [08:53<10:32,  7.09it/s]
 44%|████▍     | 3511/8000 [08:53<10:31,  7.11it/s]
 44%|████▍     | 3512/8000 [08:53<10:35,  7.06it/s]
 44%|████▍     | 3513/8000 [08:53<10:35,  7.06it/s]
 44%|████▍     | 3514/8000 [08:53<10:33,  7.08it/s]
 44%|████▍     | 3515/8000 [08:54<10:33,  7.08it/s]
 44%|████▍     | 3516/8000 [08:54<10:31,  7.10it/s]
 44%|████▍     | 3517/8000 [08:54<10:28,  7.13it/s]
 44%|████▍     | 3518/8000 [08:54<10:33,  7.07it/s]
 44%|████▍     | 3519/8000 [08:5

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:14:44. Total running time: 9min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           2            498.645       1.21072 

 45%|████▌     | 3603/8000 [09:06<10:30,  6.97it/s]
 45%|████▌     | 3604/8000 [09:06<10:24,  7.04it/s]
 45%|████▌     | 3605/8000 [09:06<10:20,  7.08it/s]
 45%|████▌     | 3606/8000 [09:06<10:32,  6.94it/s]
 45%|████▌     | 3607/8000 [09:07<10:19,  7.09it/s]
 45%|████▌     | 3608/8000 [09:07<10:31,  6.95it/s]
 45%|████▌     | 3609/8000 [09:07<10:32,  6.94it/s]
 45%|████▌     | 3610/8000 [09:07<10:34,  6.92it/s]
 45%|████▌     | 3611/8000 [09:07<10:35,  6.91it/s]
 45%|████▌     | 3612/8000 [09:07<10:31,  6.95it/s]
 45%|████▌     | 3613/8000 [09:07<10:30,  6.96it/s]
 45%|████▌     | 3614/8000 [09:08<10:31,  6.95it/s]
 45%|████▌     | 3615/8000 [09:08<10:37,  6.88it/s]
 45%|████▌     | 3616/8000 [09:08<10:45,  6.79it/s]
 45%|████▌     | 3617/8000 [09:08<10:31,  6.95it/s]
 45%|████▌     | 3618/8000 [09:08<10:24,  7.02it/s]
 45%|████▌     | 3619/8000 [09:08<10:27,  6.98it/s]
 45%|████▌     | 3620/8000 [09:08<10:26,  7.00it/s]
 45%|████▌     | 3621/8000 [09:09<10:27,  6.98it/s]
 45%|████▌  

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:15:14. Total running time: 10min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           2            498.645       1.21072 

 48%|████▊     | 3811/8000 [09:36<09:55,  7.04it/s]
 48%|████▊     | 3812/8000 [09:36<09:59,  6.98it/s]
 48%|████▊     | 3813/8000 [09:36<09:49,  7.10it/s]
 48%|████▊     | 3814/8000 [09:36<09:49,  7.10it/s]
 48%|████▊     | 3815/8000 [09:37<09:56,  7.02it/s]
 48%|████▊     | 3816/8000 [09:37<10:03,  6.93it/s]
 48%|████▊     | 3817/8000 [09:37<09:51,  7.07it/s]
 48%|████▊     | 3818/8000 [09:37<09:52,  7.05it/s]
 48%|████▊     | 3819/8000 [09:37<09:51,  7.07it/s]
 48%|████▊     | 3820/8000 [09:37<09:55,  7.02it/s]
 48%|████▊     | 3821/8000 [09:37<09:48,  7.10it/s]
 48%|████▊     | 3822/8000 [09:38<09:53,  7.05it/s]
 48%|████▊     | 3823/8000 [09:38<09:57,  6.99it/s]
 48%|████▊     | 3824/8000 [09:38<09:49,  7.09it/s]
 48%|████▊     | 3825/8000 [09:38<09:52,  7.05it/s]
 48%|████▊     | 3826/8000 [09:38<09:56,  7.00it/s]
 48%|████▊     | 3827/8000 [09:38<09:48,  7.09it/s]
 48%|████▊     | 3828/8000 [09:38<09:50,  7.07it/s]
 48%|████▊     | 3829/8000 [09:39<09:54,  7.02it/s]
 48%|████▊  

[36m(_objective pid=3656)[0m {'loss': 0.7789, 'grad_norm': 0.04183417186141014, 'learning_rate': 2.805758207667255e-06, 'epoch': 2.5}


 50%|█████     | 4002/8000 [10:03<09:23,  7.10it/s]
 50%|█████     | 4003/8000 [10:03<09:23,  7.09it/s]
 50%|█████     | 4004/8000 [10:03<09:23,  7.09it/s]
 50%|█████     | 4005/8000 [10:04<09:23,  7.08it/s]
 50%|█████     | 4006/8000 [10:04<09:25,  7.06it/s]
 50%|█████     | 4007/8000 [10:04<09:25,  7.06it/s]
 50%|█████     | 4008/8000 [10:04<09:22,  7.10it/s]
 50%|█████     | 4009/8000 [10:04<09:19,  7.14it/s]
 50%|█████     | 4010/8000 [10:04<09:17,  7.16it/s]
 50%|█████     | 4011/8000 [10:04<09:19,  7.13it/s]
 50%|█████     | 4012/8000 [10:05<09:27,  7.02it/s]
 50%|█████     | 4013/8000 [10:05<09:20,  7.12it/s]
 50%|█████     | 4014/8000 [10:05<09:25,  7.05it/s]
 50%|█████     | 4015/8000 [10:05<09:24,  7.06it/s]
 50%|█████     | 4016/8000 [10:05<09:27,  7.03it/s]
 50%|█████     | 4017/8000 [10:05<09:19,  7.12it/s]
 50%|█████     | 4018/8000 [10:05<09:20,  7.10it/s]
 50%|█████     | 4019/8000 [10:06<09:26,  7.03it/s]
 50%|█████     | 4020/8000 [10:06<09:24,  7.04it/s]
 50%|█████  

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:15:44. Total running time: 10min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           2            498.645       1.21072

[36m(_objective pid=3656)[0m  50%|█████     | 4022/8000 [10:06<09:23,  7.06it/s]
 50%|█████     | 4023/8000 [10:06<09:24,  7.05it/s]
 50%|█████     | 4024/8000 [10:06<09:28,  7.00it/s]
 50%|█████     | 4025/8000 [10:06<09:24,  7.05it/s]
 50%|█████     | 4026/8000 [10:07<09:29,  6.98it/s]
 50%|█████     | 4027/8000 [10:07<09:23,  7.05it/s]
 50%|█████     | 4028/8000 [10:07<09:25,  7.03it/s]
 50%|█████     | 4029/8000 [10:07<09:25,  7.02it/s]
 50%|█████     | 4030/8000 [10:07<09:25,  7.02it/s]
 50%|█████     | 4031/8000 [10:07<09:21,  7.07it/s]
 50%|█████     | 4032/8000 [10:07<09:25,  7.02it/s]
 50%|█████     | 4033/8000 [10:08<09:24,  7.03it/s]
 50%|█████     | 4034/8000 [10:08<09:24,  7.02it/s]
 50%|█████     | 4035/8000 [10:08<09:29,  6.97it/s]
 50%|█████     | 4036/8000 [10:08<09:19,  7.09it/s]
 50%|█████     | 4037/8000 [10:08<09:23,  7.04it/s]
 50%|█████     | 4038/8000 [10:08<09:19,  7.08it/s]
 50%|█████     | 4039/8000 [10:08<09:26,  6.99it/s]
 50%|█████     | 4040/8000 [10:0

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:16:14. Total running time: 11min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           2            498.645       1.21072 

 53%|█████▎    | 4234/8000 [10:36<08:52,  7.07it/s]
 53%|█████▎    | 4235/8000 [10:36<08:53,  7.06it/s]
 53%|█████▎    | 4236/8000 [10:37<08:55,  7.03it/s]
 53%|█████▎    | 4237/8000 [10:37<08:53,  7.05it/s]
 53%|█████▎    | 4238/8000 [10:37<08:52,  7.06it/s]
 53%|█████▎    | 4239/8000 [10:37<08:49,  7.11it/s]
 53%|█████▎    | 4240/8000 [10:37<09:01,  6.94it/s]
 53%|█████▎    | 4241/8000 [10:37<08:57,  7.00it/s]
 53%|█████▎    | 4242/8000 [10:37<08:55,  7.02it/s]
 53%|█████▎    | 4243/8000 [10:38<08:57,  6.99it/s]
 53%|█████▎    | 4244/8000 [10:38<09:03,  6.91it/s]
 53%|█████▎    | 4245/8000 [10:38<08:57,  6.98it/s]
 53%|█████▎    | 4246/8000 [10:38<09:02,  6.93it/s]
 53%|█████▎    | 4247/8000 [10:38<09:00,  6.94it/s]
 53%|█████▎    | 4248/8000 [10:38<09:02,  6.92it/s]
 53%|█████▎    | 4249/8000 [10:38<08:59,  6.95it/s]
 53%|█████▎    | 4250/8000 [10:39<09:00,  6.93it/s]
 53%|█████▎    | 4251/8000 [10:39<08:58,  6.97it/s]
 53%|█████▎    | 4252/8000 [10:39<08:56,  6.99it/s]
 53%|█████▎ 

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:16:44. Total running time: 11min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           2            498.645       1.21072

 55%|█████▌    | 4429/8000 [11:06<16:57,  3.51it/s]
 55%|█████▌    | 4430/8000 [11:06<15:26,  3.85it/s]
 55%|█████▌    | 4431/8000 [11:07<13:20,  4.46it/s]
 55%|█████▌    | 4432/8000 [11:07<12:14,  4.86it/s]
 55%|█████▌    | 4433/8000 [11:07<11:09,  5.33it/s]
 55%|█████▌    | 4434/8000 [11:07<10:26,  5.69it/s]
 55%|█████▌    | 4435/8000 [11:07<09:54,  5.99it/s]
 55%|█████▌    | 4436/8000 [11:07<09:43,  6.11it/s]
 55%|█████▌    | 4437/8000 [11:07<09:19,  6.37it/s]
 55%|█████▌    | 4438/8000 [11:08<09:17,  6.39it/s]
 55%|█████▌    | 4439/8000 [11:08<08:46,  6.76it/s]
 56%|█████▌    | 4440/8000 [11:08<08:38,  6.87it/s]
 56%|█████▌    | 4441/8000 [11:08<09:11,  6.45it/s]
 56%|█████▌    | 4442/8000 [11:08<12:31,  4.73it/s]
 56%|█████▌    | 4443/8000 [11:09<11:44,  5.05it/s]
 56%|█████▌    | 4444/8000 [11:09<10:47,  5.49it/s]
 56%|█████▌    | 4445/8000 [11:09<10:14,  5.78it/s]
 56%|█████▌    | 4446/8000 [11:09<09:41,  6.11it/s]
 56%|█████▌    | 4447/8000 [11:09<09:19,  6.35it/s]
 56%|█████▌ 

[36m(_objective pid=3656)[0m {'loss': 0.5803, 'grad_norm': 0.09716051071882248, 'learning_rate': 2.455038431708848e-06, 'epoch': 2.81}


[36m(_objective pid=3656)[0m  56%|█████▋    | 4501/8000 [11:17<08:37,  6.77it/s]
 56%|█████▋    | 4502/8000 [11:17<08:22,  6.96it/s]
 56%|█████▋    | 4503/8000 [11:18<08:22,  6.96it/s]
 56%|█████▋    | 4504/8000 [11:18<08:19,  7.00it/s]
 56%|█████▋    | 4505/8000 [11:18<08:19,  7.00it/s]
 56%|█████▋    | 4506/8000 [11:18<08:25,  6.91it/s]
 56%|█████▋    | 4507/8000 [11:18<08:18,  7.01it/s]
 56%|█████▋    | 4508/8000 [11:18<08:22,  6.94it/s]
 56%|█████▋    | 4509/8000 [11:18<08:22,  6.94it/s]
 56%|█████▋    | 4510/8000 [11:19<08:22,  6.94it/s]
 56%|█████▋    | 4511/8000 [11:19<08:24,  6.92it/s]
 56%|█████▋    | 4512/8000 [11:19<08:24,  6.91it/s]
 56%|█████▋    | 4513/8000 [11:19<08:22,  6.94it/s]
 56%|█████▋    | 4514/8000 [11:19<08:20,  6.96it/s]
 56%|█████▋    | 4515/8000 [11:19<08:18,  6.99it/s]
 56%|█████▋    | 4516/8000 [11:19<08:23,  6.92it/s]
 56%|█████▋    | 4517/8000 [11:20<08:17,  7.00it/s]
 56%|█████▋    | 4518/8000 [11:20<08:14,  7.04it/s]
 56%|█████▋    | 4519/8000 [11:2

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:17:14. Total running time: 12min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           2            498.645       1.21072 

 58%|█████▊    | 4634/8000 [11:36<07:56,  7.06it/s]
 58%|█████▊    | 4635/8000 [11:36<08:00,  7.00it/s]
 58%|█████▊    | 4636/8000 [11:37<07:58,  7.03it/s]
 58%|█████▊    | 4637/8000 [11:37<07:57,  7.05it/s]
 58%|█████▊    | 4638/8000 [11:37<07:53,  7.10it/s]
 58%|█████▊    | 4639/8000 [11:37<07:54,  7.08it/s]
 58%|█████▊    | 4640/8000 [11:37<07:55,  7.07it/s]
 58%|█████▊    | 4641/8000 [11:37<07:58,  7.03it/s]
 58%|█████▊    | 4642/8000 [11:37<07:59,  7.01it/s]
 58%|█████▊    | 4643/8000 [11:38<07:57,  7.03it/s]
 58%|█████▊    | 4644/8000 [11:38<07:55,  7.06it/s]
 58%|█████▊    | 4645/8000 [11:38<07:54,  7.08it/s]
 58%|█████▊    | 4646/8000 [11:38<07:54,  7.07it/s]
 58%|█████▊    | 4647/8000 [11:38<07:53,  7.09it/s]
 58%|█████▊    | 4648/8000 [11:38<07:55,  7.05it/s]
 58%|█████▊    | 4649/8000 [11:38<07:58,  7.00it/s]
 58%|█████▊    | 4650/8000 [11:39<07:58,  7.00it/s]
 58%|█████▊    | 4651/8000 [11:39<07:57,  7.01it/s]
 58%|█████▊    | 4652/8000 [11:39<07:53,  7.06it/s]
 58%|█████▊ 

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:17:45. Total running time: 12min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           2            498.645       1.21072

[36m(_objective pid=3656)[0m 
 85%|████████▍ | 11/13 [00:05<00:01,  1.73it/s][A
[36m(_objective pid=3656)[0m 
 92%|█████████▏| 12/13 [00:06<00:00,  1.72it/s][A



Trial _objective_6a385_00000 finished iteration 3 at 2024-03-11 01:17:46. Total running time: 12min 32s
+-------------------------------------------------+
| Trial _objective_6a385_00000 result             |
+-------------------------------------------------+
| checkpoint_dir_name                             |
| time_this_iter_s                        238.668 |
| time_total_s                            737.313 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_accuracy                             0.785 |
| eval_loss                               0.97747 |
| eval_runtime                             7.3649 |
| eval_samples_per_second                  27.156 |
| eval_steps_per_second                     1.765 |
| objective                                 0.785 |
+-------------------------------------------------+
[36m(_objective pid=3656)[0m {'eval_loss': 0.9774714708328247, 'eval_accuracy': 0.785, 'eval_runtime': 7.3649, 'e

[36m(_objective pid=3656)[0m 
[36m(_objective pid=3656)[0m 100%|██████████| 13/13 [00:06<00:00,  2.04it/s][A                                                   
[36m(_objective pid=3656)[0m                                                [A 60%|██████    | 4800/8000 [12:07<07:34,  7.05it/s]
[36m(_objective pid=3656)[0m 100%|██████████| 13/13 [00:06<00:00,  2.04it/s][A
[36m(_objective pid=3656)[0m                                                [A
 60%|██████    | 4801/8000 [12:07<2:05:03,  2.35s/it]
 60%|██████    | 4802/8000 [12:08<1:29:44,  1.68s/it]
 60%|██████    | 4803/8000 [12:08<1:05:01,  1.22s/it]
 60%|██████    | 4804/8000 [12:08<47:44,  1.12it/s]  
 60%|██████    | 4805/8000 [12:08<35:37,  1.50it/s]
 60%|██████    | 4806/8000 [12:08<27:18,  1.95it/s]
 60%|██████    | 4807/8000 [12:08<21:24,  2.49it/s]
 60%|██████    | 4808/8000 [12:08<17:21,  3.07it/s]
 60%|██████    | 4809/8000 [12:09<14:34,  3.65it/s]
 60%|██████    | 4810/8000 [12:09<12:26,  4.27it/s]
 60

[36m(_objective pid=3656)[0m {'loss': 0.4996, 'grad_norm': 0.0713769719004631, 'learning_rate': 2.104318655750441e-06, 'epoch': 3.12}


[36m(_objective pid=3656)[0m  63%|██████▎   | 5002/8000 [12:36<07:15,  6.88it/s]
[36m(_objective pid=3656)[0m  63%|██████▎   | 5003/8000 [12:36<07:18,  6.84it/s]



Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:18:15. Total running time: 13min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           3            737.313      0.977471

[36m(_objective pid=3656)[0m  63%|██████▎   | 5004/8000 [12:36<07:26,  6.70it/s]
 63%|██████▎   | 5005/8000 [12:37<07:22,  6.77it/s]
 63%|██████▎   | 5006/8000 [12:37<07:18,  6.83it/s]
 63%|██████▎   | 5007/8000 [12:37<07:16,  6.86it/s]
 63%|██████▎   | 5008/8000 [12:37<07:08,  6.99it/s]
 63%|██████▎   | 5009/8000 [12:37<07:06,  7.02it/s]
 63%|██████▎   | 5010/8000 [12:37<07:10,  6.95it/s]
 63%|██████▎   | 5011/8000 [12:37<07:12,  6.90it/s]
 63%|██████▎   | 5012/8000 [12:38<07:11,  6.92it/s]
 63%|██████▎   | 5013/8000 [12:38<07:06,  7.01it/s]
 63%|██████▎   | 5014/8000 [12:38<07:03,  7.06it/s]
 63%|██████▎   | 5015/8000 [12:38<07:02,  7.06it/s]
 63%|██████▎   | 5016/8000 [12:38<07:02,  7.06it/s]
 63%|██████▎   | 5017/8000 [12:38<07:05,  7.01it/s]
 63%|██████▎   | 5018/8000 [12:38<07:05,  7.01it/s]
 63%|██████▎   | 5019/8000 [12:39<07:04,  7.02it/s]
 63%|██████▎   | 5020/8000 [12:39<07:03,  7.03it/s]
 63%|██████▎   | 5021/8000 [12:39<07:02,  7.05it/s]
 63%|██████▎   | 5022/8000 [12:3

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:18:45. Total running time: 13min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           3            737.313      0.977471

 65%|██████▌   | 5215/8000 [13:07<06:29,  7.15it/s]
 65%|██████▌   | 5216/8000 [13:07<06:31,  7.11it/s]
 65%|██████▌   | 5217/8000 [13:07<06:32,  7.09it/s]
 65%|██████▌   | 5218/8000 [13:07<06:34,  7.05it/s]
 65%|██████▌   | 5219/8000 [13:07<06:33,  7.06it/s]
 65%|██████▌   | 5220/8000 [13:07<06:32,  7.09it/s]
 65%|██████▌   | 5221/8000 [13:07<06:37,  7.00it/s]
 65%|██████▌   | 5222/8000 [13:08<06:32,  7.07it/s]
 65%|██████▌   | 5223/8000 [13:08<06:35,  7.02it/s]
 65%|██████▌   | 5224/8000 [13:08<06:32,  7.08it/s]
 65%|██████▌   | 5225/8000 [13:08<06:33,  7.05it/s]
 65%|██████▌   | 5226/8000 [13:08<06:30,  7.09it/s]
 65%|██████▌   | 5227/8000 [13:08<06:32,  7.06it/s]
 65%|██████▌   | 5228/8000 [13:08<06:34,  7.02it/s]
 65%|██████▌   | 5229/8000 [13:09<06:31,  7.08it/s]
 65%|██████▌   | 5230/8000 [13:09<06:35,  7.01it/s]
 65%|██████▌   | 5231/8000 [13:09<06:33,  7.04it/s]
 65%|██████▌   | 5232/8000 [13:09<06:33,  7.04it/s]
 65%|██████▌   | 5233/8000 [13:09<06:34,  7.01it/s]
 65%|██████▌

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:19:15. Total running time: 14min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           3            737.313      0.977471 

[36m(_objective pid=3656)[0m  68%|██████▊   | 5425/8000 [13:36<06:08,  6.99it/s]
 68%|██████▊   | 5426/8000 [13:37<06:05,  7.04it/s]
 68%|██████▊   | 5427/8000 [13:37<06:12,  6.90it/s]
 68%|██████▊   | 5428/8000 [13:37<06:06,  7.01it/s]
 68%|██████▊   | 5429/8000 [13:37<06:05,  7.03it/s]
 68%|██████▊   | 5430/8000 [13:37<06:10,  6.93it/s]
 68%|██████▊   | 5431/8000 [13:37<06:09,  6.94it/s]
 68%|██████▊   | 5432/8000 [13:37<06:11,  6.91it/s]
 68%|██████▊   | 5433/8000 [13:38<06:11,  6.90it/s]
 68%|██████▊   | 5434/8000 [13:38<06:13,  6.87it/s]
 68%|██████▊   | 5435/8000 [13:38<06:15,  6.84it/s]
 68%|██████▊   | 5436/8000 [13:38<06:07,  6.98it/s]
 68%|██████▊   | 5437/8000 [13:38<06:04,  7.03it/s]
 68%|██████▊   | 5438/8000 [13:38<06:06,  7.00it/s]
 68%|██████▊   | 5439/8000 [13:38<06:05,  7.01it/s]
 68%|██████▊   | 5440/8000 [13:39<06:08,  6.95it/s]
 68%|██████▊   | 5441/8000 [13:39<06:09,  6.92it/s]
 68%|██████▊   | 5442/8000 [13:39<06:17,  6.78it/s]
 68%|██████▊   | 5443/8000 [13:3

[36m(_objective pid=3656)[0m {'loss': 0.5341, 'grad_norm': 0.11145833879709244, 'learning_rate': 1.7535988797920343e-06, 'epoch': 3.44}


 69%|██████▉   | 5501/8000 [13:47<05:53,  7.07it/s]
 69%|██████▉   | 5502/8000 [13:48<05:54,  7.06it/s]
 69%|██████▉   | 5503/8000 [13:48<05:55,  7.02it/s]
 69%|██████▉   | 5504/8000 [13:48<05:53,  7.06it/s]
 69%|██████▉   | 5505/8000 [13:48<05:52,  7.09it/s]
 69%|██████▉   | 5506/8000 [13:48<05:55,  7.02it/s]
 69%|██████▉   | 5507/8000 [13:48<05:53,  7.06it/s]
 69%|██████▉   | 5508/8000 [13:48<05:51,  7.09it/s]
 69%|██████▉   | 5509/8000 [13:49<05:54,  7.03it/s]
 69%|██████▉   | 5510/8000 [13:49<06:01,  6.89it/s]
 69%|██████▉   | 5511/8000 [13:49<05:50,  7.10it/s]
 69%|██████▉   | 5512/8000 [13:49<05:52,  7.06it/s]
 69%|██████▉   | 5513/8000 [13:49<05:53,  7.04it/s]
 69%|██████▉   | 5514/8000 [13:49<05:54,  7.02it/s]
 69%|██████▉   | 5515/8000 [13:49<05:51,  7.08it/s]
 69%|██████▉   | 5516/8000 [13:50<05:51,  7.07it/s]
 69%|██████▉   | 5517/8000 [13:50<05:54,  7.00it/s]
 69%|██████▉   | 5518/8000 [13:50<05:55,  6.98it/s]
 69%|██████▉   | 5519/8000 [13:50<05:52,  7.04it/s]
 69%|██████▉

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:19:45. Total running time: 14min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           3            737.313      0.977471

[36m(_objective pid=3656)[0m  70%|███████   | 5636/8000 [14:07<05:35,  7.04it/s]
 70%|███████   | 5637/8000 [14:07<05:41,  6.92it/s]
 70%|███████   | 5638/8000 [14:07<05:41,  6.91it/s]
 70%|███████   | 5639/8000 [14:07<05:44,  6.85it/s]
 70%|███████   | 5640/8000 [14:07<05:45,  6.84it/s]
 71%|███████   | 5641/8000 [14:07<05:45,  6.83it/s]
 71%|███████   | 5642/8000 [14:07<05:40,  6.93it/s]
 71%|███████   | 5643/8000 [14:08<05:39,  6.94it/s]
 71%|███████   | 5644/8000 [14:08<05:41,  6.90it/s]
 71%|███████   | 5645/8000 [14:08<05:49,  6.74it/s]
 71%|███████   | 5646/8000 [14:08<05:42,  6.87it/s]
 71%|███████   | 5647/8000 [14:08<05:36,  7.00it/s]
 71%|███████   | 5648/8000 [14:08<05:34,  7.02it/s]
 71%|███████   | 5649/8000 [14:08<05:32,  7.08it/s]
 71%|███████   | 5650/8000 [14:09<05:33,  7.06it/s]
 71%|███████   | 5651/8000 [14:09<05:34,  7.02it/s]
 71%|███████   | 5652/8000 [14:09<05:33,  7.05it/s]
 71%|███████   | 5653/8000 [14:09<05:35,  7.00it/s]
 71%|███████   | 5654/8000 [14:0

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:20:15. Total running time: 15min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           3            737.313      0.977471 

[36m(_objective pid=3656)[0m  73%|███████▎  | 5847/8000 [14:37<05:05,  7.04it/s]
 73%|███████▎  | 5848/8000 [14:37<05:04,  7.06it/s]
 73%|███████▎  | 5849/8000 [14:37<05:05,  7.04it/s]
 73%|███████▎  | 5850/8000 [14:37<05:05,  7.04it/s]
 73%|███████▎  | 5851/8000 [14:37<05:07,  7.00it/s]
 73%|███████▎  | 5852/8000 [14:37<05:05,  7.04it/s]
 73%|███████▎  | 5853/8000 [14:37<05:05,  7.02it/s]
 73%|███████▎  | 5854/8000 [14:38<05:03,  7.07it/s]
 73%|███████▎  | 5855/8000 [14:38<05:03,  7.06it/s]
 73%|███████▎  | 5856/8000 [14:38<05:05,  7.03it/s]
 73%|███████▎  | 5857/8000 [14:38<05:04,  7.04it/s]
 73%|███████▎  | 5858/8000 [14:38<05:07,  6.98it/s]
 73%|███████▎  | 5859/8000 [14:38<05:03,  7.06it/s]
 73%|███████▎  | 5860/8000 [14:38<05:06,  6.99it/s]
 73%|███████▎  | 5861/8000 [14:39<05:04,  7.03it/s]
 73%|███████▎  | 5862/8000 [14:39<05:02,  7.06it/s]
 73%|███████▎  | 5863/8000 [14:39<05:03,  7.05it/s]
 73%|███████▎  | 5864/8000 [14:39<05:03,  7.04it/s]
 73%|███████▎  | 5865/8000 [14:3

[36m(_objective pid=3656)[0m {'loss': 0.4023, 'grad_norm': 58.72669219970703, 'learning_rate': 1.4028791038336274e-06, 'epoch': 3.75}


[36m(_objective pid=3656)[0m  75%|███████▌  | 6001/8000 [14:59<04:52,  6.84it/s]
 75%|███████▌  | 6002/8000 [14:59<04:49,  6.90it/s]
 75%|███████▌  | 6003/8000 [14:59<04:52,  6.83it/s]
 75%|███████▌  | 6004/8000 [14:59<04:53,  6.81it/s]
 75%|███████▌  | 6005/8000 [14:59<04:50,  6.88it/s]
 75%|███████▌  | 6006/8000 [14:59<04:52,  6.81it/s]
 75%|███████▌  | 6007/8000 [14:59<04:52,  6.81it/s]
 75%|███████▌  | 6008/8000 [15:00<04:52,  6.82it/s]
 75%|███████▌  | 6009/8000 [15:00<04:49,  6.87it/s]
 75%|███████▌  | 6010/8000 [15:00<04:44,  7.00it/s]
 75%|███████▌  | 6011/8000 [15:00<04:42,  7.04it/s]
 75%|███████▌  | 6012/8000 [15:00<04:42,  7.03it/s]
 75%|███████▌  | 6013/8000 [15:00<04:41,  7.06it/s]
 75%|███████▌  | 6014/8000 [15:00<04:40,  7.09it/s]
 75%|███████▌  | 6015/8000 [15:01<04:42,  7.02it/s]
 75%|███████▌  | 6016/8000 [15:01<04:42,  7.02it/s]
 75%|███████▌  | 6017/8000 [15:01<04:42,  7.02it/s]
 75%|███████▌  | 6018/8000 [15:01<04:41,  7.04it/s]
 75%|███████▌  | 6019/8000 [15:0

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:20:45. Total running time: 15min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           3            737.313      0.977471

[36m(_objective pid=3656)[0m  76%|███████▌  | 6059/8000 [15:07<04:35,  7.04it/s]
 76%|███████▌  | 6060/8000 [15:07<04:34,  7.06it/s]
 76%|███████▌  | 6061/8000 [15:07<04:37,  6.98it/s]
 76%|███████▌  | 6062/8000 [15:07<04:34,  7.06it/s]
 76%|███████▌  | 6063/8000 [15:07<04:35,  7.04it/s]
 76%|███████▌  | 6064/8000 [15:08<04:33,  7.09it/s]
 76%|███████▌  | 6065/8000 [15:08<04:36,  7.01it/s]
 76%|███████▌  | 6066/8000 [15:08<04:34,  7.03it/s]
 76%|███████▌  | 6067/8000 [15:08<04:33,  7.07it/s]
 76%|███████▌  | 6068/8000 [15:08<04:36,  6.99it/s]
 76%|███████▌  | 6069/8000 [15:08<04:36,  6.99it/s]
 76%|███████▌  | 6070/8000 [15:08<04:32,  7.08it/s]
 76%|███████▌  | 6071/8000 [15:09<04:32,  7.09it/s]
 76%|███████▌  | 6072/8000 [15:09<04:34,  7.02it/s]
 76%|███████▌  | 6073/8000 [15:09<04:32,  7.06it/s]
 76%|███████▌  | 6074/8000 [15:09<04:33,  7.05it/s]
 76%|███████▌  | 6075/8000 [15:09<04:35,  6.99it/s]
 76%|███████▌  | 6076/8000 [15:09<04:33,  7.04it/s]
 76%|███████▌  | 6077/8000 [15:0

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:21:15. Total running time: 16min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           3            737.313      0.977471 

[36m(_objective pid=3656)[0m  78%|███████▊  | 6268/8000 [15:37<04:06,  7.02it/s]
 78%|███████▊  | 6269/8000 [15:37<04:07,  6.99it/s]
 78%|███████▊  | 6270/8000 [15:37<04:13,  6.82it/s]
 78%|███████▊  | 6271/8000 [15:37<04:10,  6.91it/s]
 78%|███████▊  | 6272/8000 [15:37<04:08,  6.94it/s]
 78%|███████▊  | 6273/8000 [15:37<04:08,  6.95it/s]
 78%|███████▊  | 6274/8000 [15:38<04:07,  6.99it/s]
 78%|███████▊  | 6275/8000 [15:38<04:11,  6.87it/s]
 78%|███████▊  | 6276/8000 [15:38<04:05,  7.02it/s]
 78%|███████▊  | 6277/8000 [15:38<04:06,  7.00it/s]
 78%|███████▊  | 6278/8000 [15:38<04:13,  6.80it/s]
 78%|███████▊  | 6279/8000 [15:38<04:09,  6.90it/s]
 78%|███████▊  | 6280/8000 [15:38<04:13,  6.80it/s]
 79%|███████▊  | 6281/8000 [15:39<04:08,  6.91it/s]
 79%|███████▊  | 6282/8000 [15:39<04:04,  7.03it/s]
 79%|███████▊  | 6283/8000 [15:39<04:03,  7.05it/s]
 79%|███████▊  | 6284/8000 [15:39<04:05,  6.99it/s]
 79%|███████▊  | 6285/8000 [15:39<04:04,  7.02it/s]
 79%|███████▊  | 6286/8000 [15:3


Trial _objective_6a385_00000 finished iteration 4 at 2024-03-11 01:21:41. Total running time: 16min 28s
+-------------------------------------------------+
| Trial _objective_6a385_00000 result             |
+-------------------------------------------------+
| checkpoint_dir_name                             |
| time_this_iter_s                        235.714 |
| time_total_s                            973.027 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_accuracy                             0.815 |
| eval_loss                               0.86708 |
| eval_runtime                             7.3629 |
| eval_samples_per_second                  27.163 |
| eval_steps_per_second                     1.766 |
| objective                                 0.815 |
+-------------------------------------------------+
[36m(_objective pid=3656)[0m {'eval_loss': 0.867075502872467, 'eval_accuracy': 0.815, 'eval_runtime': 7.3629, 'ev

[36m(_objective pid=3656)[0m 
[36m(_objective pid=3656)[0m 100%|██████████| 13/13 [00:06<00:00,  2.04it/s][A                                                   
[36m(_objective pid=3656)[0m                                                [A 80%|████████  | 6400/8000 [16:03<03:47,  7.03it/s]
[36m(_objective pid=3656)[0m 100%|██████████| 13/13 [00:06<00:00,  2.04it/s][A
[36m(_objective pid=3656)[0m                                                [A
 80%|████████  | 6401/8000 [16:03<1:02:35,  2.35s/it]
 80%|████████  | 6402/8000 [16:03<44:53,  1.69s/it]  
 80%|████████  | 6403/8000 [16:03<32:35,  1.22s/it]
 80%|████████  | 6404/8000 [16:04<23:57,  1.11it/s]
 80%|████████  | 6405/8000 [16:04<17:55,  1.48it/s]
 80%|████████  | 6406/8000 [16:04<13:39,  1.95it/s]
 80%|████████  | 6407/8000 [16:04<10:41,  2.48it/s]
 80%|████████  | 6408/8000 [16:04<08:36,  3.08it/s]
 80%|████████  | 6409/8000 [16:04<07:09,  3.70it/s]
 80%|████████  | 6410/8000 [16:04<06:11,  4.28it/s]
 80%|██


Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:21:45. Total running time: 16min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           4            973.027      0.86707

[36m(_objective pid=3656)[0m  80%|████████  | 6426/8000 [16:07<03:43,  7.05it/s]
[36m(_objective pid=3656)[0m  80%|████████  | 6427/8000 [16:07<03:45,  6.96it/s]
 80%|████████  | 6428/8000 [16:07<03:42,  7.07it/s]
 80%|████████  | 6429/8000 [16:07<03:42,  7.07it/s]
 80%|████████  | 6430/8000 [16:07<03:42,  7.05it/s]
 80%|████████  | 6431/8000 [16:07<03:44,  6.99it/s]
 80%|████████  | 6432/8000 [16:08<03:42,  7.06it/s]
 80%|████████  | 6433/8000 [16:08<03:41,  7.06it/s]
 80%|████████  | 6434/8000 [16:08<03:41,  7.08it/s]
 80%|████████  | 6435/8000 [16:08<03:43,  7.00it/s]
 80%|████████  | 6436/8000 [16:08<03:40,  7.08it/s]
 80%|████████  | 6437/8000 [16:08<03:40,  7.10it/s]
 80%|████████  | 6438/8000 [16:08<03:42,  7.02it/s]
 80%|████████  | 6439/8000 [16:09<03:40,  7.09it/s]
 80%|████████  | 6440/8000 [16:09<03:41,  7.06it/s]
 81%|████████  | 6441/8000 [16:09<03:42,  7.02it/s]
 81%|████████  | 6442/8000 [16:09<03:41,  7.02it/s]
 81%|████████  | 6443/8000 [16:09<03:41,  7.04it/s]


[36m(_objective pid=3656)[0m {'loss': 0.4677, 'grad_norm': 33.89179229736328, 'learning_rate': 1.0521593278752205e-06, 'epoch': 4.06}


[36m(_objective pid=3656)[0m  81%|████████▏ | 6502/8000 [16:18<03:39,  6.81it/s]
 81%|████████▏ | 6503/8000 [16:18<03:36,  6.92it/s]
 81%|████████▏ | 6504/8000 [16:18<03:34,  6.99it/s]
 81%|████████▏ | 6505/8000 [16:18<03:36,  6.92it/s]
 81%|████████▏ | 6506/8000 [16:18<03:35,  6.94it/s]
 81%|████████▏ | 6507/8000 [16:18<03:37,  6.87it/s]
 81%|████████▏ | 6508/8000 [16:18<03:32,  7.01it/s]
 81%|████████▏ | 6509/8000 [16:19<03:32,  7.02it/s]
 81%|████████▏ | 6510/8000 [16:19<03:32,  7.01it/s]
 81%|████████▏ | 6511/8000 [16:19<03:32,  7.00it/s]
 81%|████████▏ | 6512/8000 [16:19<03:32,  7.01it/s]
 81%|████████▏ | 6513/8000 [16:19<03:30,  7.06it/s]
 81%|████████▏ | 6514/8000 [16:19<03:30,  7.07it/s]
 81%|████████▏ | 6515/8000 [16:19<03:30,  7.05it/s]
 81%|████████▏ | 6516/8000 [16:20<03:30,  7.06it/s]
 81%|████████▏ | 6517/8000 [16:20<03:30,  7.05it/s]
 81%|████████▏ | 6518/8000 [16:20<03:29,  7.09it/s]
 81%|████████▏ | 6519/8000 [16:20<03:28,  7.09it/s]
 82%|████████▏ | 6520/8000 [16:2

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:22:15. Total running time: 17min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           4            973.027      0.867076 

[36m(_objective pid=3656)[0m  83%|████████▎ | 6637/8000 [16:37<03:13,  7.05it/s]
 83%|████████▎ | 6638/8000 [16:37<03:13,  7.05it/s]
 83%|████████▎ | 6639/8000 [16:37<03:12,  7.06it/s]
 83%|████████▎ | 6640/8000 [16:37<03:13,  7.03it/s]
 83%|████████▎ | 6641/8000 [16:37<03:12,  7.07it/s]
 83%|████████▎ | 6642/8000 [16:38<03:12,  7.04it/s]
 83%|████████▎ | 6643/8000 [16:38<03:12,  7.06it/s]
 83%|████████▎ | 6644/8000 [16:38<03:12,  7.04it/s]
 83%|████████▎ | 6645/8000 [16:38<03:13,  7.02it/s]
 83%|████████▎ | 6646/8000 [16:38<03:12,  7.03it/s]
 83%|████████▎ | 6647/8000 [16:38<03:11,  7.06it/s]
 83%|████████▎ | 6648/8000 [16:38<03:12,  7.02it/s]
 83%|████████▎ | 6649/8000 [16:39<03:12,  7.01it/s]
 83%|████████▎ | 6650/8000 [16:39<03:11,  7.05it/s]
 83%|████████▎ | 6651/8000 [16:39<03:10,  7.09it/s]
 83%|████████▎ | 6652/8000 [16:39<03:11,  7.05it/s]
 83%|████████▎ | 6653/8000 [16:39<03:10,  7.06it/s]
 83%|████████▎ | 6654/8000 [16:39<03:11,  7.04it/s]
 83%|████████▎ | 6655/8000 [16:3

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:22:45. Total running time: 17min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           4            973.027      0.867076

 86%|████████▌ | 6848/8000 [17:07<02:42,  7.08it/s]
 86%|████████▌ | 6849/8000 [17:07<02:43,  7.06it/s]
 86%|████████▌ | 6850/8000 [17:07<02:42,  7.10it/s]
 86%|████████▌ | 6851/8000 [17:07<02:48,  6.84it/s]
 86%|████████▌ | 6852/8000 [17:07<02:43,  7.02it/s]
 86%|████████▌ | 6853/8000 [17:08<02:43,  7.03it/s]
 86%|████████▌ | 6854/8000 [17:08<02:44,  6.97it/s]
 86%|████████▌ | 6855/8000 [17:08<02:45,  6.92it/s]
 86%|████████▌ | 6856/8000 [17:08<02:46,  6.89it/s]
 86%|████████▌ | 6857/8000 [17:08<02:44,  6.93it/s]
 86%|████████▌ | 6858/8000 [17:08<02:44,  6.95it/s]
 86%|████████▌ | 6859/8000 [17:08<02:44,  6.94it/s]
 86%|████████▌ | 6860/8000 [17:09<02:43,  6.99it/s]
 86%|████████▌ | 6861/8000 [17:09<02:48,  6.75it/s]
 86%|████████▌ | 6862/8000 [17:09<02:46,  6.82it/s]
 86%|████████▌ | 6863/8000 [17:09<02:47,  6.80it/s]
 86%|████████▌ | 6864/8000 [17:09<02:43,  6.95it/s]
 86%|████████▌ | 6865/8000 [17:09<02:42,  7.00it/s]
 86%|████████▌ | 6866/8000 [17:09<02:45,  6.87it/s]
 86%|███████

[36m(_objective pid=3656)[0m {'loss': 0.2328, 'grad_norm': 0.1885831207036972, 'learning_rate': 7.014395519168137e-07, 'epoch': 4.38}


 88%|████████▊ | 7001/8000 [17:29<02:21,  7.08it/s]
 88%|████████▊ | 7002/8000 [17:29<02:21,  7.03it/s]
 88%|████████▊ | 7003/8000 [17:29<02:21,  7.04it/s]
 88%|████████▊ | 7004/8000 [17:29<02:20,  7.11it/s]
 88%|████████▊ | 7005/8000 [17:29<02:20,  7.07it/s]
 88%|████████▊ | 7006/8000 [17:29<02:21,  7.00it/s]
 88%|████████▊ | 7007/8000 [17:30<02:20,  7.05it/s]
 88%|████████▊ | 7008/8000 [17:30<02:20,  7.06it/s]
 88%|████████▊ | 7009/8000 [17:30<02:20,  7.07it/s]
 88%|████████▊ | 7010/8000 [17:30<02:20,  7.03it/s]
 88%|████████▊ | 7011/8000 [17:30<02:19,  7.11it/s]
 88%|████████▊ | 7012/8000 [17:30<02:18,  7.11it/s]
 88%|████████▊ | 7013/8000 [17:30<02:20,  7.04it/s]
 88%|████████▊ | 7014/8000 [17:31<02:21,  6.96it/s]
 88%|████████▊ | 7015/8000 [17:31<02:19,  7.05it/s]
 88%|████████▊ | 7016/8000 [17:31<02:19,  7.07it/s]
 88%|████████▊ | 7017/8000 [17:31<02:19,  7.03it/s]
 88%|████████▊ | 7018/8000 [17:31<02:20,  7.00it/s]
 88%|████████▊ | 7019/8000 [17:31<02:18,  7.06it/s]
 88%|███████

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:23:15. Total running time: 18min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           4            973.027      0.867076 

[36m(_objective pid=3656)[0m  88%|████████▊ | 7059/8000 [17:37<02:14,  6.98it/s]
 88%|████████▊ | 7060/8000 [17:37<02:12,  7.10it/s]
 88%|████████▊ | 7061/8000 [17:37<02:12,  7.08it/s]
 88%|████████▊ | 7062/8000 [17:37<02:11,  7.13it/s]
 88%|████████▊ | 7063/8000 [17:38<02:12,  7.09it/s]
 88%|████████▊ | 7064/8000 [17:38<02:12,  7.08it/s]
 88%|████████▊ | 7065/8000 [17:38<02:12,  7.06it/s]
 88%|████████▊ | 7066/8000 [17:38<02:13,  7.02it/s]
 88%|████████▊ | 7067/8000 [17:38<02:12,  7.06it/s]
 88%|████████▊ | 7068/8000 [17:38<02:11,  7.08it/s]
 88%|████████▊ | 7069/8000 [17:38<02:11,  7.08it/s]
 88%|████████▊ | 7070/8000 [17:39<02:11,  7.08it/s]
 88%|████████▊ | 7071/8000 [17:39<02:11,  7.08it/s]
 88%|████████▊ | 7072/8000 [17:39<02:12,  6.98it/s]
 88%|████████▊ | 7073/8000 [17:39<02:11,  7.03it/s]
 88%|████████▊ | 7074/8000 [17:39<02:11,  7.02it/s]
 88%|████████▊ | 7075/8000 [17:39<02:11,  7.04it/s]
 88%|████████▊ | 7076/8000 [17:39<02:10,  7.07it/s]
 88%|████████▊ | 7077/8000 [17:4

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:23:45. Total running time: 18min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           4            973.027      0.867076

 91%|█████████ | 7270/8000 [18:07<01:43,  7.09it/s]
 91%|█████████ | 7271/8000 [18:07<01:43,  7.02it/s]
 91%|█████████ | 7272/8000 [18:07<01:43,  7.06it/s]
 91%|█████████ | 7273/8000 [18:08<01:43,  7.00it/s]
 91%|█████████ | 7274/8000 [18:08<01:43,  7.04it/s]
 91%|█████████ | 7275/8000 [18:08<01:42,  7.04it/s]
 91%|█████████ | 7276/8000 [18:08<01:43,  7.00it/s]
 91%|█████████ | 7277/8000 [18:08<01:42,  7.05it/s]
 91%|█████████ | 7278/8000 [18:08<01:42,  7.03it/s]
 91%|█████████ | 7279/8000 [18:08<01:42,  7.02it/s]
 91%|█████████ | 7280/8000 [18:09<01:42,  7.00it/s]
 91%|█████████ | 7281/8000 [18:09<01:42,  7.03it/s]
 91%|█████████ | 7282/8000 [18:09<01:41,  7.05it/s]
 91%|█████████ | 7283/8000 [18:09<01:42,  7.02it/s]
 91%|█████████ | 7284/8000 [18:09<01:41,  7.02it/s]
 91%|█████████ | 7285/8000 [18:09<01:41,  7.04it/s]
 91%|█████████ | 7286/8000 [18:09<01:41,  7.01it/s]
 91%|█████████ | 7287/8000 [18:10<01:42,  6.97it/s]
 91%|█████████ | 7288/8000 [18:10<01:41,  7.05it/s]
 91%|███████

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:24:15. Total running time: 19min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           4            973.027      0.867076 

 93%|█████████▎| 7479/8000 [18:37<01:13,  7.08it/s]
 94%|█████████▎| 7480/8000 [18:37<01:13,  7.03it/s]
 94%|█████████▎| 7481/8000 [18:37<01:13,  7.05it/s]
 94%|█████████▎| 7482/8000 [18:37<01:13,  7.03it/s]
 94%|█████████▎| 7483/8000 [18:38<01:13,  7.01it/s]
 94%|█████████▎| 7484/8000 [18:38<01:13,  7.06it/s]
 94%|█████████▎| 7485/8000 [18:38<01:13,  7.00it/s]
 94%|█████████▎| 7486/8000 [18:38<01:12,  7.10it/s]
 94%|█████████▎| 7487/8000 [18:38<01:12,  7.07it/s]
 94%|█████████▎| 7488/8000 [18:38<01:12,  7.05it/s]
 94%|█████████▎| 7489/8000 [18:38<01:12,  7.01it/s]
 94%|█████████▎| 7490/8000 [18:39<01:12,  7.02it/s]
 94%|█████████▎| 7491/8000 [18:39<01:12,  7.05it/s]
 94%|█████████▎| 7492/8000 [18:39<01:12,  7.05it/s]
 94%|█████████▎| 7493/8000 [18:39<01:14,  6.82it/s]
 94%|█████████▎| 7494/8000 [18:39<01:12,  6.99it/s]
 94%|█████████▎| 7495/8000 [18:39<01:12,  6.96it/s]
 94%|█████████▎| 7496/8000 [18:39<01:12,  6.97it/s]
 94%|█████████▎| 7497/8000 [18:40<01:12,  6.97it/s]
 94%|███████

[36m(_objective pid=3656)[0m {'loss': 0.325, 'grad_norm': 0.7596622705459595, 'learning_rate': 3.5071977595840686e-07, 'epoch': 4.69}


 94%|█████████▍| 7502/8000 [18:40<01:12,  6.90it/s]
 94%|█████████▍| 7503/8000 [18:40<01:12,  6.83it/s]
 94%|█████████▍| 7504/8000 [18:41<01:12,  6.88it/s]
 94%|█████████▍| 7505/8000 [18:41<01:11,  6.88it/s]
 94%|█████████▍| 7506/8000 [18:41<01:12,  6.86it/s]
 94%|█████████▍| 7507/8000 [18:41<01:11,  6.92it/s]
 94%|█████████▍| 7508/8000 [18:41<01:11,  6.84it/s]
 94%|█████████▍| 7509/8000 [18:41<01:10,  6.94it/s]
 94%|█████████▍| 7510/8000 [18:41<01:11,  6.88it/s]
 94%|█████████▍| 7511/8000 [18:42<01:10,  6.92it/s]
 94%|█████████▍| 7512/8000 [18:42<01:11,  6.85it/s]
 94%|█████████▍| 7513/8000 [18:42<01:09,  7.04it/s]
 94%|█████████▍| 7514/8000 [18:42<01:09,  7.03it/s]
 94%|█████████▍| 7515/8000 [18:42<01:09,  7.02it/s]
 94%|█████████▍| 7516/8000 [18:42<01:08,  7.02it/s]
 94%|█████████▍| 7517/8000 [18:42<01:08,  7.04it/s]
 94%|█████████▍| 7518/8000 [18:43<01:08,  7.02it/s]
 94%|█████████▍| 7519/8000 [18:43<01:08,  7.05it/s]
 94%|█████████▍| 7520/8000 [18:43<01:08,  7.02it/s]
 94%|███████

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:24:45. Total running time: 19min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           4            973.027      0.867076

[36m(_objective pid=3656)[0m  96%|█████████▌| 7690/8000 [19:07<00:44,  6.93it/s]
 96%|█████████▌| 7691/8000 [19:07<00:44,  6.91it/s]
 96%|█████████▌| 7692/8000 [19:07<00:45,  6.80it/s]
 96%|█████████▌| 7693/8000 [19:08<00:45,  6.82it/s]
 96%|█████████▌| 7694/8000 [19:08<00:45,  6.73it/s]
 96%|█████████▌| 7695/8000 [19:08<00:44,  6.90it/s]
 96%|█████████▌| 7696/8000 [19:08<00:43,  6.99it/s]
 96%|█████████▌| 7697/8000 [19:08<00:43,  7.04it/s]
 96%|█████████▌| 7698/8000 [19:08<00:43,  7.02it/s]
 96%|█████████▌| 7699/8000 [19:08<00:42,  7.06it/s]
 96%|█████████▋| 7700/8000 [19:09<00:42,  7.07it/s]
 96%|█████████▋| 7701/8000 [19:09<00:42,  6.98it/s]
 96%|█████████▋| 7702/8000 [19:09<00:42,  7.00it/s]
 96%|█████████▋| 7703/8000 [19:09<00:42,  7.01it/s]
 96%|█████████▋| 7704/8000 [19:09<00:41,  7.07it/s]
 96%|█████████▋| 7705/8000 [19:09<00:41,  7.06it/s]
 96%|█████████▋| 7706/8000 [19:09<00:41,  7.06it/s]
 96%|█████████▋| 7707/8000 [19:10<00:41,  7.03it/s]
 96%|█████████▋| 7708/8000 [19:1

Trial status: 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:25:15. Total running time: 20min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00000   RUNNING        5.61152e-06                        1                    5      0.015703           4            973.027      0.867076 

 99%|█████████▊| 7898/8000 [19:37<00:14,  7.10it/s]
 99%|█████████▊| 7899/8000 [19:37<00:14,  7.04it/s]
 99%|█████████▉| 7900/8000 [19:37<00:14,  7.09it/s]
 99%|█████████▉| 7901/8000 [19:38<00:14,  7.06it/s]
 99%|█████████▉| 7902/8000 [19:38<00:13,  7.01it/s]
 99%|█████████▉| 7903/8000 [19:38<00:13,  6.99it/s]
 99%|█████████▉| 7904/8000 [19:38<00:13,  6.98it/s]
 99%|█████████▉| 7905/8000 [19:38<00:13,  7.10it/s]
 99%|█████████▉| 7906/8000 [19:38<00:13,  7.04it/s]
 99%|█████████▉| 7907/8000 [19:38<00:13,  7.04it/s]
 99%|█████████▉| 7908/8000 [19:39<00:13,  7.07it/s]
 99%|█████████▉| 7909/8000 [19:39<00:12,  7.07it/s]
 99%|█████████▉| 7910/8000 [19:39<00:12,  7.05it/s]
 99%|█████████▉| 7911/8000 [19:39<00:12,  7.09it/s]
 99%|█████████▉| 7912/8000 [19:39<00:12,  7.11it/s]
 99%|█████████▉| 7913/8000 [19:39<00:12,  7.09it/s]
 99%|█████████▉| 7914/8000 [19:39<00:12,  7.11it/s]
 99%|█████████▉| 7915/8000 [19:40<00:11,  7.11it/s]
 99%|█████████▉| 7916/8000 [19:40<00:11,  7.13it/s]
 99%|███████

[36m(_objective pid=3656)[0m {'loss': 0.2091, 'grad_norm': 0.02878394164144993, 'learning_rate': 0.0, 'epoch': 5.0}


[36m(_objective pid=3656)[0m 
  0%|          | 0/13 [00:00<?, ?it/s][A
[36m(_objective pid=3656)[0m 
 15%|█▌        | 2/13 [00:00<00:03,  3.38it/s][A
[36m(_objective pid=3656)[0m 
 23%|██▎       | 3/13 [00:01<00:04,  2.42it/s][A
[36m(_objective pid=3656)[0m 
 31%|███       | 4/13 [00:01<00:04,  2.09it/s][A
[36m(_objective pid=3656)[0m 
 38%|███▊      | 5/13 [00:02<00:04,  1.93it/s][A
[36m(_objective pid=3656)[0m 
 46%|████▌     | 6/13 [00:02<00:03,  1.86it/s][A
[36m(_objective pid=3656)[0m 
 54%|█████▍    | 7/13 [00:03<00:03,  1.80it/s][A
[36m(_objective pid=3656)[0m 
 62%|██████▏   | 8/13 [00:04<00:02,  1.77it/s][A
[36m(_objective pid=3656)[0m 
 69%|██████▉   | 9/13 [00:04<00:02,  1.75it/s][A
[36m(_objective pid=3656)[0m 
 77%|███████▋  | 10/13 [00:05<00:01,  1.73it/s][A
[36m(_objective pid=3656)[0m 
 85%|████████▍ | 11/13 [00:05<00:01,  1.72it/s][A
[36m(_objective pid=3656)[0m 
 92%|█████████▏| 12/13 [00:06<00:00,  1.71it/s][A



Trial _objective_6a385_00000 finished iteration 5 at 2024-03-11 01:25:37. Total running time: 20min 24s
+-------------------------------------------------+
| Trial _objective_6a385_00000 result             |
+-------------------------------------------------+
| checkpoint_dir_name                             |
| time_this_iter_s                        236.061 |
| time_total_s                            1209.09 |
| training_iteration                            5 |
| epoch                                         5 |
| eval_accuracy                              0.84 |
| eval_loss                               0.83819 |
| eval_runtime                             7.3562 |
| eval_samples_per_second                  27.188 |
| eval_steps_per_second                     1.767 |
| objective                                  0.84 |
+-------------------------------------------------+

Trial _objective_6a385_00000 completed after 5 iterations at 2024-03-11 01:25:37. Total running time: 20min 24s
[

[36m(_objective pid=3656)[0m 
[36m(_objective pid=3656)[0m 100%|██████████| 13/13 [00:06<00:00,  2.02it/s][A                                                   
[36m(_objective pid=3656)[0m                                                [A100%|██████████| 8000/8000 [19:59<00:00,  7.03it/s]
[36m(_objective pid=3656)[0m 100%|██████████| 13/13 [00:06<00:00,  2.02it/s][A
[36m(_objective pid=3656)[0m                                                [A                                                   100%|██████████| 8000/8000 [19:59<00:00,  7.03it/s]100%|██████████| 8000/8000 [19:59<00:00,  6.67it/s]



Trial _objective_6a385_00001 started with configuration:
+-------------------------------------------------+
| Trial _objective_6a385_00001 config             |
+-------------------------------------------------+
| learning_rate                             2e-05 |
| num_train_epochs                              3 |
| per_device_train_batch_size                   8 |
| weight_decay                            0.00029 |
+-------------------------------------------------+


[36m(_objective pid=8866)[0m 2024-03-11 01:25:44.374455: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_objective pid=8866)[0m 2024-03-11 01:25:44.374518: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_objective pid=8866)[0m 2024-03-11 01:25:44.375857: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:25:46. Total running time: 20min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00001   RUNNING          1.57513e-05                        8                    3      0.000293754                  

[36m(_objective pid=8866)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[36m(_objective pid=8866)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/600 [00:00<?, ?it/s]
  0%|          | 1/600 [00:02<21:25,  2.15s/it]
  0%|          | 2/600 [00:02<13:05,  1.31s/it]
  0%|          | 3/600 [00:03<10:42,  1.08s/it]
  1%|          | 4/600 [00:04<09:30,  1.04it/s]
  1%|          | 5/600 [00:05<08:47,  1.13it/s]
  1%|          | 6/600 [00:05<08:20,  1.19it/s]
  1%|          | 7/600 [00:06<08:04,  1.22it/s]
  1%|▏         | 8/600 [00:07<07:53,  1.25it/s]
  2%|▏         | 9/600 [00:08<07:46,  1.27it/s]
  2%|▏         | 10/600 [00:09<07:40,  1.28it/s]
  2%|▏         | 11/600 [00:09<07:37,  1.29it/s]
  2%|▏         | 12/600 [00:10<07:35,  1.29it/s]
  2%|▏         | 13/600 [0

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:26:16. Total running time: 21min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00001   RUNNING          1.57513e-05                        8                    3      0.000293754                    

  6%|▌         | 34/600 [00:27<07:34,  1.25it/s]
  6%|▌         | 35/600 [00:28<07:34,  1.24it/s]
  6%|▌         | 36/600 [00:29<07:35,  1.24it/s]
  6%|▌         | 37/600 [00:30<07:34,  1.24it/s]
  6%|▋         | 38/600 [00:31<07:33,  1.24it/s]
  6%|▋         | 39/600 [00:31<07:31,  1.24it/s]
  7%|▋         | 40/600 [00:32<07:30,  1.24it/s]
  7%|▋         | 41/600 [00:33<07:28,  1.25it/s]
  7%|▋         | 42/600 [00:34<07:26,  1.25it/s]
  7%|▋         | 43/600 [00:35<07:25,  1.25it/s]
  7%|▋         | 44/600 [00:35<07:23,  1.25it/s]
  8%|▊         | 45/600 [00:36<07:21,  1.26it/s]
  8%|▊         | 46/600 [00:37<07:19,  1.26it/s]
  8%|▊         | 47/600 [00:38<07:17,  1.26it/s]
  8%|▊         | 48/600 [00:39<07:16,  1.26it/s]
  8%|▊         | 49/600 [00:39<07:15,  1.27it/s]
  8%|▊         | 50/600 [00:40<07:14,  1.27it/s]
  8%|▊         | 51/600 [00:41<07:13,  1.27it/s]
  9%|▊         | 52/600 [00:42<07:12,  1.27it/s]
  9%|▉         | 53/600 [00:43<07:12,  1.27it/s]
  9%|▉         | 54/

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:26:46. Total running time: 21min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00001   RUNNING          1.57513e-05                        8                    3      0.000293754                   

 12%|█▏        | 72/600 [00:57<06:48,  1.29it/s]
 12%|█▏        | 73/600 [00:58<06:46,  1.30it/s]
 12%|█▏        | 74/600 [00:59<06:45,  1.30it/s]
 12%|█▎        | 75/600 [01:00<06:44,  1.30it/s]
 13%|█▎        | 76/600 [01:00<06:43,  1.30it/s]
 13%|█▎        | 77/600 [01:01<06:41,  1.30it/s]
 13%|█▎        | 78/600 [01:02<06:40,  1.30it/s]
 13%|█▎        | 79/600 [01:03<06:39,  1.30it/s]
 13%|█▎        | 80/600 [01:03<06:39,  1.30it/s]
 14%|█▎        | 81/600 [01:04<06:38,  1.30it/s]
 14%|█▎        | 82/600 [01:05<06:38,  1.30it/s]
 14%|█▍        | 83/600 [01:06<06:38,  1.30it/s]
 14%|█▍        | 84/600 [01:07<06:38,  1.30it/s]
 14%|█▍        | 85/600 [01:07<06:37,  1.30it/s]
 14%|█▍        | 86/600 [01:08<06:36,  1.30it/s]
 14%|█▍        | 87/600 [01:09<06:34,  1.30it/s]
 15%|█▍        | 88/600 [01:10<06:33,  1.30it/s]
 15%|█▍        | 89/600 [01:10<06:32,  1.30it/s]
 15%|█▌        | 90/600 [01:11<06:30,  1.31it/s]
 15%|█▌        | 91/600 [01:12<06:29,  1.31it/s]
 15%|█▌        | 92/

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:27:16. Total running time: 22min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00001   RUNNING          1.57513e-05                        8                    3      0.000293754                    

 18%|█▊        | 111/600 [01:27<06:15,  1.30it/s]
 19%|█▊        | 112/600 [01:28<06:15,  1.30it/s]
 19%|█▉        | 113/600 [01:29<06:14,  1.30it/s]
 19%|█▉        | 114/600 [01:30<06:13,  1.30it/s]
 19%|█▉        | 115/600 [01:30<06:12,  1.30it/s]
 19%|█▉        | 116/600 [01:31<06:12,  1.30it/s]
 20%|█▉        | 117/600 [01:32<06:12,  1.30it/s]
 20%|█▉        | 118/600 [01:33<06:12,  1.29it/s]
 20%|█▉        | 119/600 [01:33<06:12,  1.29it/s]
 20%|██        | 120/600 [01:34<06:11,  1.29it/s]
 20%|██        | 121/600 [01:35<06:10,  1.29it/s]
 20%|██        | 122/600 [01:36<06:09,  1.29it/s]
 20%|██        | 123/600 [01:37<06:09,  1.29it/s]
 21%|██        | 124/600 [01:37<06:08,  1.29it/s]
 21%|██        | 125/600 [01:38<06:07,  1.29it/s]
 21%|██        | 126/600 [01:39<06:05,  1.30it/s]
 21%|██        | 127/600 [01:40<06:04,  1.30it/s]
 21%|██▏       | 128/600 [01:40<06:03,  1.30it/s]
 22%|██▏       | 129/600 [01:41<06:02,  1.30it/s]
 22%|██▏       | 130/600 [01:42<06:02,  1.30it/s]


Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:27:46. Total running time: 22min 33s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00001   RUNNING          1.57513e-05                        8                    3      0.000293754                   

 25%|██▌       | 150/600 [01:58<05:50,  1.28it/s]
 25%|██▌       | 151/600 [01:58<05:50,  1.28it/s]
 25%|██▌       | 152/600 [01:59<05:50,  1.28it/s]
 26%|██▌       | 153/600 [02:00<05:50,  1.28it/s]
 26%|██▌       | 154/600 [02:01<05:49,  1.28it/s]
 26%|██▌       | 155/600 [02:01<05:47,  1.28it/s]
 26%|██▌       | 156/600 [02:02<05:46,  1.28it/s]
 26%|██▌       | 157/600 [02:03<05:46,  1.28it/s]
 26%|██▋       | 158/600 [02:04<05:44,  1.28it/s]
 26%|██▋       | 159/600 [02:05<05:43,  1.28it/s]
 27%|██▋       | 160/600 [02:05<05:44,  1.28it/s]
 27%|██▋       | 161/600 [02:06<05:41,  1.28it/s]
 27%|██▋       | 162/600 [02:07<05:41,  1.28it/s]
 27%|██▋       | 163/600 [02:08<05:39,  1.29it/s]
 27%|██▋       | 164/600 [02:08<05:38,  1.29it/s]
 28%|██▊       | 165/600 [02:09<05:37,  1.29it/s]
 28%|██▊       | 166/600 [02:10<05:38,  1.28it/s]
 28%|██▊       | 167/600 [02:11<05:38,  1.28it/s]
 28%|██▊       | 168/600 [02:12<05:39,  1.27it/s]
 28%|██▊       | 169/600 [02:12<05:37,  1.28it/s]


Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:28:16. Total running time: 23min 3s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00001   RUNNING          1.57513e-05                        8                    3      0.000293754                    

 32%|███▏      | 189/600 [02:28<05:18,  1.29it/s]
 32%|███▏      | 190/600 [02:29<05:17,  1.29it/s]
 32%|███▏      | 191/600 [02:29<05:15,  1.30it/s]
 32%|███▏      | 192/600 [02:30<05:14,  1.30it/s]
 32%|███▏      | 193/600 [02:31<05:13,  1.30it/s]
 32%|███▏      | 194/600 [02:32<05:12,  1.30it/s]
 32%|███▎      | 195/600 [02:33<05:11,  1.30it/s]
 33%|███▎      | 196/600 [02:33<05:10,  1.30it/s]
 33%|███▎      | 197/600 [02:34<05:09,  1.30it/s]
 33%|███▎      | 198/600 [02:35<05:09,  1.30it/s]
 33%|███▎      | 199/600 [02:36<05:08,  1.30it/s]
 33%|███▎      | 200/600 [02:36<05:08,  1.30it/s]
[36m(_objective pid=8866)[0m 
  0%|          | 0/13 [00:00<?, ?it/s][A
[36m(_objective pid=8866)[0m 
 15%|█▌        | 2/13 [00:00<00:03,  3.37it/s][A
[36m(_objective pid=8866)[0m 
 23%|██▎       | 3/13 [00:01<00:04,  2.39it/s][A
[36m(_objective pid=8866)[0m 
 31%|███       | 4/13 [00:01<00:04,  2.07it/s][A
[36m(_objective pid=8866)[0m 
 38%|███▊      | 5/13 [00:02<00:04,  1.92it/s][


Trial _objective_6a385_00001 finished iteration 1 at 2024-03-11 01:28:32. Total running time: 23min 19s
+-------------------------------------------------+
| Trial _objective_6a385_00001 result             |
+-------------------------------------------------+
| checkpoint_dir_name                             |
| time_this_iter_s                        169.455 |
| time_total_s                            169.455 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_accuracy                             0.615 |
| eval_loss                               1.06991 |
| eval_runtime                             7.3842 |
| eval_samples_per_second                  27.085 |
| eval_steps_per_second                     1.761 |
| objective                                 0.615 |
+-------------------------------------------------+
[36m(_objective pid=8866)[0m {'eval_loss': 1.0699081420898438, 'eval_accuracy': 0.615, 'eval_runtime': 7.3842, 'e

[36m(_objective pid=8866)[0m 
[36m(_objective pid=8866)[0m 100%|██████████| 13/13 [00:06<00:00,  2.04it/s][A                                                 
[36m(_objective pid=8866)[0m                                                [A 33%|███▎      | 200/600 [02:44<05:08,  1.30it/s]
[36m(_objective pid=8866)[0m 100%|██████████| 13/13 [00:06<00:00,  2.04it/s][A
                                               [A
 34%|███▎      | 201/600 [02:45<19:49,  2.98s/it]
 34%|███▎      | 202/600 [02:45<15:22,  2.32s/it]
 34%|███▍      | 203/600 [02:46<12:16,  1.85s/it]
 34%|███▍      | 204/600 [02:47<10:04,  1.53s/it]
 34%|███▍      | 205/600 [02:48<08:33,  1.30s/it]
 34%|███▍      | 206/600 [02:48<07:29,  1.14s/it]
 34%|███▍      | 207/600 [02:49<06:45,  1.03s/it]
 35%|███▍      | 208/600 [02:50<06:14,  1.05it/s]
 35%|███▍      | 209/600 [02:51<05:52,  1.11it/s]
 35%|███▌      | 210/600 [02:51<05:37,  1.16it/s]



Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2024-03-11 01:28:40. Total running time: 23min 27s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:T4)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     ..._train_batch_size     num_train_epochs     weight_decay     iter     total time (s)     eval_loss     eval_accuracy     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6a385_00001   RUNNING          1.57513e-05                        8                    3      0.000293754        1         

 35%|███▌      | 211/600 [02:52<05:25,  1.19it/s]
 35%|███▌      | 212/600 [02:53<05:16,  1.22it/s]
 36%|███▌      | 213/600 [02:54<05:10,  1.25it/s]
 36%|███▌      | 214/600 [02:55<05:06,  1.26it/s]
 36%|███▌      | 215/600 [02:55<05:02,  1.27it/s]
 36%|███▌      | 216/600 [02:56<04:59,  1.28it/s]
 36%|███▌      | 217/600 [02:57<04:57,  1.29it/s]
 36%|███▋      | 218/600 [02:58<04:56,  1.29it/s]
 36%|███▋      | 219/600 [02:58<04:54,  1.30it/s]
 37%|███▋      | 220/600 [02:59<04:53,  1.30it/s]
 37%|███▋      | 221/600 [03:00<04:52,  1.30it/s]
 37%|███▋      | 222/600 [03:01<04:51,  1.30it/s]
 37%|███▋      | 223/600 [03:01<04:49,  1.30it/s]
Resume experiment with: tune.run(..., resume=True)
- _objective_6a385_00002: FileNotFoundError('Could not fetch metrics for _objective_6a385_00002: both result.json and progress.csv were not found at /root/ray_results/_objective_2024-03-11_01-05-13/_objective_6a385_00002_2_learning_rate=0.0000,num_train_epochs=5,per_device_train_batch_size=1,weight




In [19]:
best_trial

BestRun(run_id='6a385_00000', objective=0.84, hyperparameters={'learning_rate': 5.61151641533451e-06, 'per_device_train_batch_size': 1, 'num_train_epochs': 5, 'weight_decay': 0.015702970884055385}, run_summary=<ray.tune.analysis.experiment_analysis.ExperimentAnalysis object at 0x7d636692a8c0>)

{'learning_rate': 8.70602e-05,
 'per_device_train_batch_size': 4,
 'num_train_epochs': 3,
 'weight_decay': 0.000100539}

In [20]:

train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"]
val_dataset = tokenized_datasets["validation"]
trainer = Trainer(

model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
compute_metrics=lambda p: {"accuracy": accuracy_score(p.label_ids,
np.argmax(p.predictions,
axis=1))}
)

In [21]:
for n, v in best_trial.hyperparameters.items():
    setattr(trainer.args, n, v)



In [22]:
trainer.args

TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_

In [23]:
trainer.train()

Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
trainer.save_model('/home/ai/Downloads/results/last-bert-emotion')

In [27]:
predictions = trainer.evaluate(train_dataset)
print(predictions)

Epoch,Training Loss,Validation Loss,Accuracy
0,0.4934,0.324572,0.935


{'eval_loss': 0.3245723247528076, 'eval_accuracy': 0.935}


In [25]:
predictions = trainer.evaluate(eval_dataset)
print(predictions)

Epoch,Training Loss,Validation Loss,Accuracy
0,0.4934,0.443169,0.918


{'eval_loss': 0.4431690573692322, 'eval_accuracy': 0.918}


In [26]:
predictions = trainer.evaluate(val_dataset)
print(predictions)

Epoch,Training Loss,Validation Loss,Accuracy
0,0.4934,0.399073,0.9225


{'eval_loss': 0.39907336235046387, 'eval_accuracy': 0.9225}


In [28]:
import random


In [35]:
import random

num_examples = 10
picks = []
lable = []
input = []
prediction = []
for i in range(num_examples):
        pick = random.randint(0, len(dataset['validation'])-1)
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        picks.append(pick)
        input.append(dataset['validation'][picks[i]]['text'])
        #input_token = tokenizer(dataset['validation'][picks[i]]['text'])
        lable.append(dataset['validation'][picks[i]]['label'])
        encoded_input = tokenizer(dataset['validation'][pick]['text'],return_tensors='pt').to(model.device)
        prediction.append(torch.argmax(trainer.model.forward(**encoded_input).logits, dim=1)[0].cpu().item())





In [36]:
print(input)
print(lable,"lable")
print(prediction,"prediction")



['i feel grouchy and i cannot think properly when i am deprived of food for more than two hours', 'i feel not worthwhile', 'i feel unusually mellow not having to worry about any of the aforementioned things not having to rely on tylenol pm or nyquil to lull me to sleep', 'i was questioning myself and feeling nervous about being able to hit the targets', 'i get the feeling he was as surprised as everybody else when people started getting sick', 'i accept the medication until i dont feel too troubled by those i will never have the full benefices from them', 'i get up to refill my coffee and feel that pleasant and familiar ache it reminds me how much i miss the whole body conversations you can have when you re sitting on a good good horse', 'ive explained that he is very creative and loves to makes things and i feel that he is very smart and intelligent and he is lacking in some areas that i agree with', 'i was feeling sentimental', 'when i was walking around all alone at night']
[3, 1, 1