In [1]:
import torch
from datasets import load_dataset
dataset = load_dataset('dair-ai/emotion')

from transformers import AutoTokenizer, AlbertForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("textattack/albert-base-v2-imdb",use_fast=False)



You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [2]:
dataset


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

In [3]:
tokenizer

AlbertTokenizer(name_or_path='textattack/albert-base-v2-imdb', vocab_size=30000, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '[CLS]', 'eos_token': '[SEP]', 'unk_token': '<unk>', 'sep_token': '[SEP]', 'pad_token': '<pad>', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	3: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	4: AddedToken("[MASK]", rstrip=False, lstrip=True, single_word=False, normalized=False, special=True),
}

In [4]:
def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True)
tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [5]:
tokenized_datasets["train"]

Dataset({
    features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 16000
})

In [6]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 2000
    })
})

In [8]:
from transformers import  Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import accuracy_score
model = AlbertForSequenceClassification.from_pretrained("textattack/albert-base-v2-imdb", num_labels=6,ignore_mismatched_sizes=True)
training_args = TrainingArguments(
output_dir="./results",
# Directory for saving outputs
learning_rate=5e-5,
# Learning rate for optimization
per_device_train_batch_size=16,
# Batch size for training
per_device_eval_batch_size=16,
# Batch size for evaluation
num_train_epochs=3,
# Number of training epochs
weight_decay=0.01,
# Weight decay for regularization
evaluation_strategy="epoch",
save_strategy = "no"
# Evaluation is done at the end of each epoch
)
train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"]
val_dataset = tokenized_datasets["validation"]
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
compute_metrics=lambda p: {"accuracy": accuracy_score(p.label_ids,
np.argmax(p.predictions,
axis=1))}
)

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at textattack/albert-base-v2-imdb and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([2, 768]) in the checkpoint and torch.Size([6, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([2]) in the checkpoint and torch.Size([6]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
model

AlbertForSequenceClassification(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768,

In [9]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [10]:
eval_results = trainer.evaluate(eval_dataset)
print(eval_results)

{'eval_loss': 1.802198052406311, 'eval_accuracy': 0.0975, 'eval_runtime': 34.9302, 'eval_samples_per_second': 57.257, 'eval_steps_per_second': 3.579}


In [11]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2393,0.209393,0.921
2,0.1346,0.160831,0.931
3,0.0921,0.178811,0.9295


TrainOutput(global_step=3000, training_loss=0.2122759043375651, metrics={'train_runtime': 2277.1766, 'train_samples_per_second': 21.079, 'train_steps_per_second': 1.317, 'total_flos': 6358888710144000.0, 'train_loss': 0.2122759043375651, 'epoch': 3.0})

In [8]:
from ray import tune
def ray_hp_space(trial):
    return {
        "learning_rate": tune.loguniform(1e-6, 1e-4),
        "per_device_train_batch_size": tune.choice([1,4 ,8 ,16 ]),
        "num_train_epochs": tune.choice([1,3,5]),

        "weight_decay": tune.loguniform(1e-4, 0.1)
    }

In [9]:
small_train_dataset = tokenized_datasets["train"].shard(index=1, num_shards=10) 
small_eval_dataset = tokenized_datasets["test"].shard(index=1, num_shards=10) 


In [10]:
num_labels = 6
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import accuracy_score
def model_init():
    return  DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased',num_labels=6)

In [11]:




num_labels = 6
trainer = Trainer(

    model_init= model_init,

    args=training_args,

    train_dataset=small_train_dataset,

    eval_dataset=small_eval_dataset,

    compute_metrics=lambda p: {"accuracy": accuracy_score(p.label_ids,
np.argmax(p.predictions,
axis=1))}

    

  
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
best_trial = trainer.hyperparameter_search(

    direction="maximize",

    backend="ray",

    hp_space=ray_hp_space,

    n_trials=10

)

  self.pid = _fork_exec(
2024-03-07 06:19:50,072	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-07 06:19:51,726	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2024-03-07 06:19:51,729	INFO tune.py:583 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


== Status ==
Current time: 2024-03-07 06:19:51 (running for 00:00:00.18)
Using FIFO scheduling algorithm.
Logical resource usage: 0/12 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (10 PENDING)
+------------------------+----------+-------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc   |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |       |                 |                    |                ch_size |                |
|------------------------+----------+-------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | PENDING  |       |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_5ed05_00001 | PENDING  |       |     1.57513e-05 |                  3

[36m(_objective pid=77585)[0m 2024-03-07 06:19:57.586483: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_objective pid=77585)[0m 2024-03-07 06:19:57.586532: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_objective pid=77585)[0m 2024-03-07 06:19:57.587850: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(_objective pid=77585)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weig

== Status ==
Current time: 2024-03-07 06:20:02 (running for 00:00:10.31)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  0%|          | 7/8000 [00:00<11:56, 11.16it/s]
  0%|          | 9/8000 [00:00<11:12, 11.89it/s]
  0%|          | 11/8000 [00:01<11:01, 12.07it/s]
  0%|          | 13/8000 [00:01<10:52, 12.25it/s]
  0%|          | 15/8000 [00:01<10:39, 12.49it/s]
  0%|          | 17/8000 [00:01<10:27, 12.72it/s]
  0%|          | 19/8000 [00:01<10:26, 12.74it/s]
  0%|          | 21/8000 [00:01<10:26, 12.73it/s]
  0%|          | 23/8000 [00:01<10:23, 12.79it/s]
  0%|          | 25/8000 [00:02<10:23, 12.80it/s]
  0%|          | 27/8000 [00:02<10:19, 12.87it/s]
  0%|          | 29/8000 [00:02<10:14, 12.97it/s]
  0%|          | 31/8000 [00:02<10:16, 12.92it/s]
  0%|          | 33/8000 [00:02<10:10, 13.06it/s]
  0%|          | 35/8000 [00:02<10:09, 13.07it/s]
  0%|          | 37/8000 [00:03<10:08, 13.08it/s]
  0%|          | 39/8000 [00:03<10:14, 12.96it/s]
  1%|          | 41/8000 [00:03<10:09, 13.06it/s]
  1%|          | 43/8000 [00:03<10:05, 13.14it/s]
  1%|          | 45/8000 [00:03<10:01, 13.23it/s]
  

== Status ==
Current time: 2024-03-07 06:20:07 (running for 00:00:15.34)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  1%|          | 75/8000 [00:05<09:56, 13.28it/s]
  1%|          | 77/8000 [00:06<09:57, 13.26it/s]
  1%|          | 79/8000 [00:06<09:56, 13.27it/s]
  1%|          | 81/8000 [00:06<10:03, 13.12it/s]
  1%|          | 83/8000 [00:06<10:06, 13.06it/s]
  1%|          | 85/8000 [00:06<10:06, 13.04it/s]
  1%|          | 87/8000 [00:06<10:03, 13.11it/s]
  1%|          | 89/8000 [00:06<10:02, 13.13it/s]
  1%|          | 91/8000 [00:07<09:55, 13.28it/s]
  1%|          | 93/8000 [00:07<09:57, 13.24it/s]
  1%|          | 95/8000 [00:07<09:54, 13.31it/s]
  1%|          | 97/8000 [00:07<09:54, 13.29it/s]
  1%|          | 99/8000 [00:07<09:54, 13.29it/s]
  1%|▏         | 101/8000 [00:07<09:51, 13.36it/s]
  1%|▏         | 103/8000 [00:08<09:52, 13.32it/s]
  1%|▏         | 105/8000 [00:08<09:51, 13.36it/s]
  1%|▏         | 107/8000 [00:08<09:50, 13.36it/s]
  1%|▏         | 109/8000 [00:08<09:54, 13.27it/s]
  1%|▏         | 111/8000 [00:08<10:02, 13.10it/s]
  1%|▏         | 113/8000 [00:08<10:06, 13.0

== Status ==
Current time: 2024-03-07 06:20:12 (running for 00:00:20.38)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  2%|▏         | 139/8000 [00:10<09:59, 13.10it/s]
  2%|▏         | 141/8000 [00:10<10:04, 13.00it/s]
  2%|▏         | 143/8000 [00:11<10:02, 13.05it/s]
  2%|▏         | 145/8000 [00:11<09:58, 13.13it/s]
  2%|▏         | 147/8000 [00:11<09:58, 13.12it/s]
  2%|▏         | 149/8000 [00:11<10:05, 12.97it/s]
  2%|▏         | 151/8000 [00:11<10:06, 12.94it/s]
  2%|▏         | 153/8000 [00:11<10:07, 12.91it/s]
  2%|▏         | 155/8000 [00:11<10:09, 12.87it/s]
  2%|▏         | 157/8000 [00:12<10:10, 12.85it/s]
  2%|▏         | 159/8000 [00:12<10:05, 12.95it/s]
  2%|▏         | 161/8000 [00:12<09:59, 13.08it/s]
  2%|▏         | 163/8000 [00:12<10:00, 13.05it/s]
  2%|▏         | 165/8000 [00:12<10:05, 12.95it/s]
  2%|▏         | 167/8000 [00:12<10:03, 12.98it/s]
  2%|▏         | 169/8000 [00:13<10:01, 13.03it/s]
  2%|▏         | 171/8000 [00:13<09:58, 13.09it/s]
  2%|▏         | 173/8000 [00:13<09:54, 13.16it/s]
  2%|▏         | 175/8000 [00:13<09:59, 13.06it/s]
  2%|▏         | 177/8000 [00:1

== Status ==
Current time: 2024-03-07 06:20:17 (running for 00:00:25.41)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  3%|▎         | 205/8000 [00:15<09:42, 13.39it/s]
  3%|▎         | 207/8000 [00:15<09:46, 13.29it/s]
  3%|▎         | 209/8000 [00:16<09:45, 13.31it/s]
  3%|▎         | 211/8000 [00:16<09:45, 13.30it/s]
  3%|▎         | 213/8000 [00:16<09:52, 13.13it/s]
  3%|▎         | 215/8000 [00:16<09:50, 13.18it/s]
  3%|▎         | 217/8000 [00:16<09:50, 13.17it/s]
  3%|▎         | 219/8000 [00:16<09:48, 13.22it/s]
  3%|▎         | 221/8000 [00:17<09:46, 13.26it/s]
  3%|▎         | 223/8000 [00:17<09:44, 13.31it/s]
  3%|▎         | 225/8000 [00:17<09:42, 13.36it/s]
  3%|▎         | 227/8000 [00:17<09:41, 13.37it/s]
  3%|▎         | 229/8000 [00:17<09:38, 13.42it/s]
  3%|▎         | 231/8000 [00:17<09:38, 13.42it/s]
  3%|▎         | 233/8000 [00:17<09:42, 13.34it/s]
  3%|▎         | 235/8000 [00:18<09:43, 13.31it/s]
  3%|▎         | 237/8000 [00:18<09:49, 13.16it/s]
  3%|▎         | 239/8000 [00:18<09:49, 13.16it/s]
  3%|▎         | 241/8000 [00:18<09:51, 13.12it/s]
  3%|▎         | 243/8000 [00:1

== Status ==
Current time: 2024-03-07 06:20:22 (running for 00:00:30.44)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  3%|▎         | 275/8000 [00:21<09:33, 13.46it/s]
  3%|▎         | 277/8000 [00:21<09:37, 13.38it/s]
  3%|▎         | 279/8000 [00:21<09:36, 13.40it/s]
  4%|▎         | 281/8000 [00:21<09:36, 13.38it/s]
  4%|▎         | 283/8000 [00:21<09:35, 13.40it/s]
  4%|▎         | 285/8000 [00:21<09:42, 13.26it/s]
  4%|▎         | 287/8000 [00:21<09:48, 13.12it/s]
  4%|▎         | 289/8000 [00:22<09:44, 13.20it/s]
  4%|▎         | 291/8000 [00:22<09:41, 13.25it/s]
  4%|▎         | 293/8000 [00:22<09:40, 13.28it/s]
  4%|▎         | 295/8000 [00:22<09:40, 13.27it/s]
  4%|▎         | 297/8000 [00:22<09:40, 13.28it/s]
  4%|▎         | 299/8000 [00:22<09:38, 13.30it/s]
  4%|▍         | 301/8000 [00:23<09:39, 13.29it/s]
  4%|▍         | 303/8000 [00:23<09:38, 13.30it/s]
  4%|▍         | 305/8000 [00:23<09:47, 13.10it/s]
  4%|▍         | 307/8000 [00:23<09:46, 13.11it/s]
  4%|▍         | 309/8000 [00:23<09:42, 13.21it/s]
  4%|▍         | 311/8000 [00:23<09:41, 13.21it/s]
  4%|▍         | 313/8000 [00:2

== Status ==
Current time: 2024-03-07 06:20:27 (running for 00:00:35.48)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  4%|▍         | 339/8000 [00:25<09:47, 13.04it/s]
  4%|▍         | 341/8000 [00:26<09:46, 13.05it/s]
  4%|▍         | 343/8000 [00:26<09:42, 13.15it/s]
  4%|▍         | 345/8000 [00:26<09:37, 13.24it/s]
  4%|▍         | 347/8000 [00:26<09:45, 13.08it/s]
  4%|▍         | 349/8000 [00:26<09:47, 13.02it/s]
  4%|▍         | 351/8000 [00:26<09:41, 13.15it/s]
  4%|▍         | 353/8000 [00:27<09:39, 13.20it/s]
  4%|▍         | 355/8000 [00:27<09:38, 13.22it/s]
  4%|▍         | 357/8000 [00:27<09:37, 13.24it/s]
  4%|▍         | 359/8000 [00:27<09:38, 13.21it/s]
  5%|▍         | 361/8000 [00:27<09:35, 13.28it/s]
  5%|▍         | 363/8000 [00:27<09:34, 13.29it/s]
  5%|▍         | 365/8000 [00:27<09:33, 13.31it/s]
  5%|▍         | 367/8000 [00:28<09:33, 13.30it/s]
  5%|▍         | 369/8000 [00:28<09:30, 13.38it/s]
  5%|▍         | 371/8000 [00:28<09:32, 13.33it/s]
  5%|▍         | 373/8000 [00:28<09:34, 13.27it/s]
  5%|▍         | 375/8000 [00:28<09:31, 13.34it/s]
  5%|▍         | 377/8000 [00:2

== Status ==
Current time: 2024-03-07 06:20:32 (running for 00:00:40.52)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  5%|▌         | 405/8000 [00:30<09:31, 13.29it/s]
  5%|▌         | 407/8000 [00:31<09:31, 13.30it/s]
  5%|▌         | 409/8000 [00:31<09:34, 13.22it/s]
  5%|▌         | 411/8000 [00:31<09:34, 13.21it/s]
  5%|▌         | 413/8000 [00:31<09:40, 13.07it/s]
  5%|▌         | 415/8000 [00:31<09:37, 13.13it/s]
  5%|▌         | 417/8000 [00:31<09:35, 13.17it/s]
  5%|▌         | 419/8000 [00:31<09:34, 13.18it/s]
  5%|▌         | 421/8000 [00:32<09:33, 13.21it/s]
  5%|▌         | 423/8000 [00:32<09:32, 13.24it/s]
  5%|▌         | 425/8000 [00:32<09:31, 13.26it/s]
  5%|▌         | 427/8000 [00:32<09:36, 13.15it/s]
  5%|▌         | 429/8000 [00:32<09:39, 13.07it/s]
  5%|▌         | 431/8000 [00:32<09:41, 13.01it/s]
  5%|▌         | 433/8000 [00:33<09:37, 13.09it/s]
  5%|▌         | 435/8000 [00:33<09:36, 13.12it/s]
  5%|▌         | 437/8000 [00:33<09:35, 13.14it/s]
  5%|▌         | 439/8000 [00:33<09:30, 13.24it/s]
  6%|▌         | 441/8000 [00:33<09:31, 13.24it/s]
  6%|▌         | 443/8000 [00:3

== Status ==
Current time: 2024-03-07 06:20:37 (running for 00:00:45.55)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  6%|▌         | 473/8000 [00:36<09:37, 13.03it/s]
  6%|▌         | 475/8000 [00:36<09:33, 13.12it/s]
  6%|▌         | 477/8000 [00:36<09:29, 13.21it/s]
  6%|▌         | 479/8000 [00:36<09:27, 13.25it/s]
  6%|▌         | 481/8000 [00:36<09:30, 13.19it/s]
  6%|▌         | 483/8000 [00:36<09:25, 13.29it/s]
  6%|▌         | 485/8000 [00:36<09:24, 13.32it/s]
  6%|▌         | 487/8000 [00:37<09:24, 13.31it/s]
  6%|▌         | 489/8000 [00:37<09:22, 13.36it/s]
  6%|▌         | 491/8000 [00:37<09:20, 13.40it/s]
  6%|▌         | 493/8000 [00:37<09:21, 13.37it/s]
  6%|▌         | 495/8000 [00:37<09:27, 13.23it/s]
  6%|▌         | 497/8000 [00:37<09:33, 13.09it/s]
  6%|▌         | 499/8000 [00:38<09:34, 13.06it/s]
  6%|▋         | 500/8000 [00:38<09:34, 13.06it/s]


[36m(_objective pid=77585)[0m {'loss': 1.5836, 'grad_norm': 15.947199821472168, 'learning_rate': 5.260796639376103e-06, 'epoch': 0.31}


  6%|▋         | 501/8000 [00:39<40:57,  3.05it/s]
  6%|▋         | 503/8000 [00:40<31:34,  3.96it/s]
  6%|▋         | 505/8000 [00:40<24:56,  5.01it/s]
  6%|▋         | 507/8000 [00:40<20:13,  6.17it/s]
  6%|▋         | 509/8000 [00:40<16:55,  7.38it/s]
  6%|▋         | 511/8000 [00:40<14:39,  8.51it/s]
  6%|▋         | 513/8000 [00:40<13:07,  9.51it/s]
  6%|▋         | 515/8000 [00:40<11:57, 10.44it/s]


== Status ==
Current time: 2024-03-07 06:20:42 (running for 00:00:50.59)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  6%|▋         | 517/8000 [00:41<11:12, 11.13it/s]
  6%|▋         | 519/8000 [00:41<10:38, 11.72it/s]
  7%|▋         | 521/8000 [00:41<10:16, 12.12it/s]
  7%|▋         | 523/8000 [00:41<10:00, 12.45it/s]
  7%|▋         | 525/8000 [00:41<09:48, 12.70it/s]
  7%|▋         | 527/8000 [00:41<09:39, 12.89it/s]
  7%|▋         | 529/8000 [00:41<09:33, 13.02it/s]
  7%|▋         | 531/8000 [00:42<09:28, 13.15it/s]
  7%|▋         | 533/8000 [00:42<09:25, 13.20it/s]
  7%|▋         | 535/8000 [00:42<09:21, 13.30it/s]
  7%|▋         | 537/8000 [00:42<09:23, 13.25it/s]
  7%|▋         | 539/8000 [00:42<09:22, 13.27it/s]
  7%|▋         | 541/8000 [00:42<09:19, 13.34it/s]
  7%|▋         | 543/8000 [00:43<09:19, 13.33it/s]
  7%|▋         | 545/8000 [00:43<09:19, 13.33it/s]
  7%|▋         | 547/8000 [00:43<09:17, 13.37it/s]
  7%|▋         | 549/8000 [00:43<09:17, 13.36it/s]
  7%|▋         | 551/8000 [00:43<09:21, 13.26it/s]
  7%|▋         | 553/8000 [00:43<09:20, 13.28it/s]
  7%|▋         | 555/8000 [00:4

== Status ==
Current time: 2024-03-07 06:20:47 (running for 00:00:55.63)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  7%|▋         | 583/8000 [00:46<09:17, 13.30it/s]
  7%|▋         | 585/8000 [00:46<09:16, 13.31it/s]
  7%|▋         | 587/8000 [00:46<09:18, 13.27it/s]
  7%|▋         | 589/8000 [00:46<09:19, 13.24it/s]
  7%|▋         | 591/8000 [00:46<09:24, 13.13it/s]
  7%|▋         | 593/8000 [00:46<09:23, 13.14it/s]
  7%|▋         | 595/8000 [00:46<09:25, 13.09it/s]
  7%|▋         | 597/8000 [00:47<09:25, 13.09it/s]
  7%|▋         | 599/8000 [00:47<09:22, 13.17it/s]
  8%|▊         | 601/8000 [00:47<09:18, 13.24it/s]
  8%|▊         | 603/8000 [00:47<09:16, 13.30it/s]
  8%|▊         | 605/8000 [00:47<09:17, 13.27it/s]
  8%|▊         | 607/8000 [00:47<09:14, 13.33it/s]
  8%|▊         | 609/8000 [00:47<09:14, 13.33it/s]
  8%|▊         | 611/8000 [00:48<09:14, 13.33it/s]
  8%|▊         | 613/8000 [00:48<09:13, 13.36it/s]
  8%|▊         | 615/8000 [00:48<09:16, 13.28it/s]
  8%|▊         | 617/8000 [00:48<09:21, 13.15it/s]
  8%|▊         | 619/8000 [00:48<09:19, 13.18it/s]
  8%|▊         | 621/8000 [00:4

== Status ==
Current time: 2024-03-07 06:20:52 (running for 00:01:00.67)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  8%|▊         | 653/8000 [00:51<09:27, 12.96it/s]
  8%|▊         | 655/8000 [00:51<09:24, 13.00it/s]
  8%|▊         | 657/8000 [00:51<09:24, 13.00it/s]
  8%|▊         | 659/8000 [00:51<09:28, 12.90it/s]
  8%|▊         | 661/8000 [00:51<09:25, 12.98it/s]
  8%|▊         | 663/8000 [00:52<09:23, 13.02it/s]
  8%|▊         | 665/8000 [00:52<09:17, 13.16it/s]
  8%|▊         | 667/8000 [00:52<09:18, 13.13it/s]
  8%|▊         | 669/8000 [00:52<09:20, 13.07it/s]
  8%|▊         | 671/8000 [00:52<09:24, 12.98it/s]
  8%|▊         | 673/8000 [00:52<09:18, 13.12it/s]
  8%|▊         | 675/8000 [00:53<09:16, 13.17it/s]
  8%|▊         | 677/8000 [00:53<09:12, 13.25it/s]
  8%|▊         | 679/8000 [00:53<09:18, 13.12it/s]
  9%|▊         | 681/8000 [00:53<09:20, 13.06it/s]
  9%|▊         | 683/8000 [00:53<09:21, 13.03it/s]
  9%|▊         | 685/8000 [00:53<09:18, 13.09it/s]
  9%|▊         | 687/8000 [00:53<09:17, 13.12it/s]
  9%|▊         | 689/8000 [00:54<09:18, 13.08it/s]
  9%|▊         | 691/8000 [00:5

== Status ==
Current time: 2024-03-07 06:20:57 (running for 00:01:05.70)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  9%|▉         | 717/8000 [00:56<09:37, 12.60it/s]
  9%|▉         | 719/8000 [00:56<09:42, 12.50it/s]
  9%|▉         | 721/8000 [00:56<09:40, 12.54it/s]
  9%|▉         | 723/8000 [00:56<09:43, 12.47it/s]
  9%|▉         | 725/8000 [00:56<09:42, 12.50it/s]
  9%|▉         | 727/8000 [00:57<09:44, 12.45it/s]
  9%|▉         | 729/8000 [00:57<09:47, 12.38it/s]
  9%|▉         | 731/8000 [00:57<09:44, 12.43it/s]
  9%|▉         | 733/8000 [00:57<09:43, 12.45it/s]
  9%|▉         | 735/8000 [00:57<09:52, 12.25it/s]


== Status ==
Current time: 2024-03-07 06:20:59 (running for 00:01:07.32)
Using FIFO scheduling algorithm.
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/ai/ray_results/_objective_2024-03-07_06-19-51
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------+
| Trial name             | status   | loc              |   learning_rate |   num_train_epochs |   per_device_train_bat |   weight_decay |
|                        |          |                  |                 |                    |                ch_size |                |
|------------------------+----------+------------------+-----------------+--------------------+------------------------+----------------|
| _objective_5ed05_00000 | RUNNING  | 172.19.0.1:77585 |     5.61152e-06 |                  5 |                      1 |    0.015703    |
| _objective_

  9%|▉         | 737/8000 [00:57<09:54, 12.21it/s]
  9%|▉         | 739/8000 [00:58<09:52, 12.25it/s]
  9%|▉         | 741/8000 [00:58<09:50, 12.29it/s]
  9%|▉         | 743/8000 [00:58<09:46, 12.37it/s]
  9%|▉         | 745/8000 [00:58<09:45, 12.39it/s]
  9%|▉         | 747/8000 [00:58<09:46, 12.37it/s]
  9%|▉         | 749/8000 [00:58<09:46, 12.36it/s]
  9%|▉         | 751/8000 [00:59<09:45, 12.39it/s]
  9%|▉         | 753/8000 [00:59<09:45, 12.39it/s]
  9%|▉         | 755/8000 [00:59<09:43, 12.42it/s]
  9%|▉         | 757/8000 [00:59<09:45, 12.38it/s]
  9%|▉         | 759/8000 [00:59<09:45, 12.37it/s]
 10%|▉         | 761/8000 [00:59<09:46, 12.34it/s]
 10%|▉         | 763/8000 [00:59<09:45, 12.35it/s]
 10%|▉         | 765/8000 [01:00<09:44, 12.37it/s]
 10%|▉         | 767/8000 [01:00<09:43, 12.39it/s]
 10%|▉         | 769/8000 [01:00<09:41, 12.43it/s]
 10%|▉         | 771/8000 [01:00<09:46, 12.33it/s]
 10%|▉         | 773/8000 [01:00<09:53, 12.18it/s]
 10%|▉         | 775/8000 [01:0

In [14]:
best_trial

BestRun(run_id='0d889_00009', objective=0.89, hyperparameters={'learning_rate': 8.580760619921325e-06, 'per_device_train_batch_size': 1, 'num_train_epochs': 5, 'weight_decay': 0.010988100318524612}, run_summary=<ray.tune.analysis.experiment_analysis.ExperimentAnalysis object at 0x7fded96352d0>)

In [11]:

train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"]
val_dataset = tokenized_datasets["validation"]
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
compute_metrics=lambda p: {"accuracy": accuracy_score(p.label_ids,
np.argmax(p.predictions,
axis=1))}
)

In [15]:
for n, v in best_trial.hyperparameters.items():
    setattr(trainer.args, n, v)



 19%|█▉        | 1559/8000 [02:06<08:10, 13.14it/s]


In [18]:
trainer.args

TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=IntervalStrategy.EPOCH,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,


In [13]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4968,0.423107,0.916
2,0.258,0.413957,0.9225
3,0.3116,0.391027,0.929
4,0.2294,0.346086,0.9275
5,0.2254,0.472098,0.922


TrainOutput(global_step=80000, training_loss=0.3704045206546783, metrics={'train_runtime': 9324.0715, 'train_samples_per_second': 8.58, 'train_steps_per_second': 8.58, 'total_flos': 1912604098560000.0, 'train_loss': 0.3704045206546783, 'epoch': 5.0})

In [14]:
trainer.save_model('/home/ai/Downloads/results/last-albert-emotion')

In [15]:
predictions = trainer.evaluate(train_dataset)
print(predictions)

{'eval_loss': 0.14407125115394592, 'eval_accuracy': 0.9651875, 'eval_runtime': 624.1207, 'eval_samples_per_second': 25.636, 'eval_steps_per_second': 1.602, 'epoch': 5.0}


In [16]:
predictions = trainer.evaluate(eval_dataset)
print(predictions)

{'eval_loss': 0.47209835052490234, 'eval_accuracy': 0.922, 'eval_runtime': 81.1871, 'eval_samples_per_second': 24.634, 'eval_steps_per_second': 1.54, 'epoch': 5.0}


In [17]:
predictions = trainer.evaluate(val_dataset)
print(predictions)

{'eval_loss': 0.36796051263809204, 'eval_accuracy': 0.939, 'eval_runtime': 85.1369, 'eval_samples_per_second': 23.492, 'eval_steps_per_second': 1.468, 'epoch': 5.0}


In [33]:
import random
import torch
num_examples = 10
picks = []
lable = []
input = []
prediction = [] 
for i in range(num_examples):
        pick = random.randint(0, len(dataset['validation'])-1)
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        picks.append(pick)
        input.append(dataset['validation'][picks[i]]['text'])
        #input_token = tokenizer(dataset['validation'][picks[i]]['text'])
        lable.append(dataset['validation'][picks[i]]['label'])
        encoded_input = tokenizer(dataset['validation'][pick]['text'],return_tensors='pt').to(model.device)
        prediction.append(torch.argmax(trainer.model.forward(**encoded_input).logits, dim=1)[0].cpu().item())
    
    
    


In [34]:
print(input)
print(lable,"lable")
print(prediction,"prediction")



['i eat or sleep i cant get myself to feel the life loving energy i felt so easily before', 'i love those kiddos and yet am left feeling so helpless', 'im just feeling personally devastated that this happened at my college in the school im studying under', 'im still feeling a little shocked over yesterdays news that pope benedict xvi has decided to resign', 'i want to feel pretty or handsome or something', 'i feel this way i do not just get to appreciate the amazing things i have right here and now i also get to dig up happy memories hidden back of my mind and i get to become inspired with hope for the future', 'i feel so terrified to tell her', 'i am feeling pretty relaxed though', 'i dont know whats wrong with me i try studying but i just feel like im fluffing around and getting distracted all the time', 'i am feeling much better and thought i should get going on the blogging front']
[2, 0, 0, 5, 1, 1, 4, 1, 3, 1] lable
[2, 4, 0, 5, 1, 1, 4, 1, 3, 1] prediction
