https://discuss.huggingface.co/t/multilabel-sequence-classification-with-roberta-value-error-expected-input-batch-size-to-match-target-batch-size/1653/2

In [1]:
from transformers import Trainer
import torch
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        outputs = model(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            token_type_ids=inputs['token_type_ids']
        )
        loss = torch.nn.BCEWithLogitsLoss()(outputs['logits'].float(), inputs['labels'].float())
        return (loss, outputs) if return_outputs else loss

In [2]:
from transformers import DataCollatorWithPadding, TrainingArguments, BertTokenizer, BertForSequenceClassification
from datasets import load_dataset
# load dataset, tokenize, adapt columns, and apply datacollator
checkpoint = "bert-base-cased"
transformers_tokenizer = BertTokenizer.from_pretrained(checkpoint)
def transformers_tokenize_function(item):
    return transformers_tokenizer(item["text"], padding=True, truncation=True)
transformers_tokenized_datasets = (
    load_dataset("mdroth/transformers_issues_labels")
    .map(transformers_tokenize_function, batched=True)
    .remove_columns(column_names=["url", "text", "num_labels", "labels"])
    .rename_column("arr_labels", "labels")
)
transformers_data_collator = DataCollatorWithPadding(tokenizer=transformers_tokenizer)
# training arguments
training_args = TrainingArguments(
    "5_try_transformers_dataset",
    evaluation_strategy="epoch",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4
)
# model
num_labels = 57
transformers_model = BertForSequenceClassification.from_pretrained(checkpoint, num_labels=num_labels)
# trainer
trainer = CustomTrainer(
    transformers_model,
    training_args,
    train_dataset=transformers_tokenized_datasets["dev"],
    eval_dataset=transformers_tokenized_datasets["dev"],
    data_collator=transformers_data_collator,
    tokenizer=transformers_tokenizer
)
# train
trainer.train()

Using custom data configuration mdroth--transformers_issues_labels-e1a55ed64424aafd
Reusing dataset parquet (/Users/matthias/.cache/huggingface/datasets/parquet/mdroth--transformers_issues_labels-e1a55ed64424aafd/0.0.0/0b6d5799bb726b24ad7fc7be720c170d8e497f575d02d47537de9a5bac074901)


  0%|          | 0/4 [00:00<?, ?it/s]

Loading cached processed dataset at /Users/matthias/.cache/huggingface/datasets/parquet/mdroth--transformers_issues_labels-e1a55ed64424aafd/0.0.0/0b6d5799bb726b24ad7fc7be720c170d8e497f575d02d47537de9a5bac074901/cache-6abb82432ea4f160.arrow
Loading cached processed dataset at /Users/matthias/.cache/huggingface/datasets/parquet/mdroth--transformers_issues_labels-e1a55ed64424aafd/0.0.0/0b6d5799bb726b24ad7fc7be720c170d8e497f575d02d47537de9a5bac074901/cache-71378fd7107bbfbb.arrow
Loading cached processed dataset at /Users/matthias/.cache/huggingface/datasets/parquet/mdroth--transformers_issues_labels-e1a55ed64424aafd/0.0.0/0b6d5799bb726b24ad7fc7be720c170d8e497f575d02d47537de9a5bac074901/cache-697e1bed1cdd157e.arrow
Loading cached processed dataset at /Users/matthias/.cache/huggingface/datasets/parquet/mdroth--transformers_issues_labels-e1a55ed64424aafd/0.0.0/0b6d5799bb726b24ad7fc7be720c170d8e497f575d02d47537de9a5bac074901/cache-eb557e2d389372ac.arrow
Some weights of the model checkpoint at 

Epoch,Training Loss,Validation Loss
1,No log,0.712996
2,No log,0.664925
3,No log,0.650512


***** Running Evaluation *****
  Num examples = 5
  Batch size = 4
***** Running Evaluation *****
  Num examples = 5
  Batch size = 4
***** Running Evaluation *****
  Num examples = 5
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=6, training_loss=0.6987737019856771, metrics={'train_runtime': 148.5928, 'train_samples_per_second': 0.101, 'train_steps_per_second': 0.04, 'total_flos': 3948614784000.0, 'train_loss': 0.6987737019856771, 'epoch': 3.0})