In [1]:
# https://huggingface.co/docs/transformers/v4.44.2/en/main_classes/trainer#trainer
from transformers import AutoModelForSequenceClassification, AutoTokenizer

In [None]:
# https://blog.gopenai.com/how-to-resolve-runtimeerror-cuda-out-of-memory-d48995452a0

In [2]:
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
from datasets import load_dataset

dataset = load_dataset("rotten_tomatoes")

In [4]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

In [5]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./output",
    learning_rate=2e-5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=1,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=False,
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
2024-08-26 22:15:34.288525: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    #data_collator=data_collator,
    #compute_metrics=compute_metrics,
)

trainer.train()

  0%|          | 0/8530 [00:00<?, ?it/s]

{'loss': 0.9524, 'grad_norm': 0.6996574997901917, 'learning_rate': 1.8827667057444317e-05, 'epoch': 0.06}
{'loss': 1.092, 'grad_norm': 0.3950474262237549, 'learning_rate': 1.7655334114888628e-05, 'epoch': 0.12}
{'loss': 1.1472, 'grad_norm': 38.29890823364258, 'learning_rate': 1.6483001172332943e-05, 'epoch': 0.18}
{'loss': 0.9417, 'grad_norm': 195.2352294921875, 'learning_rate': 1.5310668229777258e-05, 'epoch': 0.23}
{'loss': 0.8948, 'grad_norm': 0.15859559178352356, 'learning_rate': 1.4138335287221572e-05, 'epoch': 0.29}
{'loss': 0.811, 'grad_norm': 0.293508380651474, 'learning_rate': 1.2966002344665887e-05, 'epoch': 0.35}
{'loss': 0.9282, 'grad_norm': 0.12572962045669556, 'learning_rate': 1.17936694021102e-05, 'epoch': 0.41}
{'loss': 0.7846, 'grad_norm': 0.08798622339963913, 'learning_rate': 1.0621336459554515e-05, 'epoch': 0.47}
{'loss': 0.9664, 'grad_norm': 0.25048723816871643, 'learning_rate': 9.449003516998828e-06, 'epoch': 0.53}
{'loss': 0.7778, 'grad_norm': 0.4948570430278778, 

  0%|          | 0/1066 [00:00<?, ?it/s]

{'eval_loss': 0.8523743152618408, 'eval_runtime': 173.253, 'eval_samples_per_second': 6.153, 'eval_steps_per_second': 6.153, 'epoch': 1.0}
{'train_runtime': 6241.3634, 'train_samples_per_second': 1.367, 'train_steps_per_second': 1.367, 'train_loss': 0.8723370741009013, 'epoch': 1.0}


TrainOutput(global_step=8530, training_loss=0.8723370741009013, metrics={'train_runtime': 6241.3634, 'train_samples_per_second': 1.367, 'train_steps_per_second': 1.367, 'total_flos': 2244337302220800.0, 'train_loss': 0.8723370741009013, 'epoch': 1.0})

In [7]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [9]:
model.push_to_hub("bert-base-cased")

model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/thiagoads/bert-base-cased/commit/099e202f3c24fb195c89efa3d9289c149be9eaef', commit_message='Upload BertForSequenceClassification', commit_description='', oid='099e202f3c24fb195c89efa3d9289c149be9eaef', pr_url=None, pr_revision=None, pr_num=None)