In [10]:
from datasets import load_dataset
from transformers import BertTokenizer

# Assuming you're using a dataset that has a clear label and text structure
dataset = load_dataset("glue", "mrpc")  # Example dataset from Hugging Face
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_function(examples):
    # This assumes your dataset has text in 'sentence1' and 'sentence2' fields
    return tokenizer(examples['sentence1'], examples['sentence2'], padding="max_length", truncation=True, max_length=512)

# Apply tokenizer
dataset = dataset.map(tokenize_function, batched=True)


Downloading readme:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/649k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/75.7k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/308k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3668 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/408 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1725 [00:00<?, ? examples/s]

Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

In [11]:
from transformers import BertForSequenceClassification

# Number of labels should match your dataset's label count
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)  # For binary classification


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    logging_dir='./logs',
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation']
)

trainer.train()


  0%|          | 0/1377 [00:00<?, ?it/s]

{'loss': 0.7142, 'grad_norm': 9.401176452636719, 'learning_rate': 4.963689179375454e-05, 'epoch': 0.02}
{'loss': 0.6641, 'grad_norm': 3.9398577213287354, 'learning_rate': 4.927378358750908e-05, 'epoch': 0.04}
{'loss': 0.6788, 'grad_norm': 4.859483242034912, 'learning_rate': 4.891067538126362e-05, 'epoch': 0.07}
{'loss': 0.6468, 'grad_norm': 2.0564959049224854, 'learning_rate': 4.854756717501816e-05, 'epoch': 0.09}
{'loss': 0.6431, 'grad_norm': 1.882811188697815, 'learning_rate': 4.8184458968772694e-05, 'epoch': 0.11}
{'loss': 0.682, 'grad_norm': 5.581890106201172, 'learning_rate': 4.7821350762527234e-05, 'epoch': 0.13}
{'loss': 0.6835, 'grad_norm': 11.823446273803711, 'learning_rate': 4.7458242556281774e-05, 'epoch': 0.15}
{'loss': 0.6493, 'grad_norm': 6.311870098114014, 'learning_rate': 4.709513435003631e-05, 'epoch': 0.17}
{'loss': 0.5961, 'grad_norm': 11.150650024414062, 'learning_rate': 4.673202614379085e-05, 'epoch': 0.2}
{'loss': 0.6786, 'grad_norm': 7.35840368270874, 'learning_r

TrainOutput(global_step=1377, training_loss=0.5137444956197025, metrics={'train_runtime': 6613.0703, 'train_samples_per_second': 1.664, 'train_steps_per_second': 0.208, 'train_loss': 0.5137444956197025, 'epoch': 3.0})