In [6]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

In [7]:
model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

Downloading model.safetensors: 100%|██████████| 440M/440M [00:14<00:00, 29.7MB/s] 
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification 

In [15]:
# Train the model on the Sentiment Classification task in GLUE
from datasets import load_dataset, load_metric
# Load the dataset and metric
dataset = load_dataset('glue', 'sst2')
metric = load_metric('glue', 'sst2')

# Split the dataset
train_dataset = dataset['train']
dev_dataset = dataset['validation']

# Print a description of the dataset
print("Dataset Description: ", train_dataset.description)

# Print the label space
print("Label Space: ", train_dataset.features["label"].names)

Dataset Description:  GLUE, the General Language Understanding Evaluation benchmark
(https://gluebenchmark.com/) is a collection of resources for training,
evaluating, and analyzing natural language understanding systems.


Label Space:  ['negative', 'positive']


In [23]:
train_dataset

Dataset({
    features: ['sentence', 'label', 'idx'],
    num_rows: 67349
})

In [24]:
# Encode the datasets
train_dataset = train_dataset.map(lambda examples: tokenizer(examples['sentence'], truncation=True, padding='max_length'), batched=True)
dev_dataset = dev_dataset.map(lambda examples: tokenizer(examples['sentence'], truncation=True, padding='max_length'), batched=True)



[A[A



[A[A

[A[A

  0%|          | 0/67349 [02:39<?, ?it/s]
  0%|          | 0/67349 [02:29<?, ?it/s]


[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

Map: 100%|██████████| 67349/67349 [00:27<00:00, 2455.09 examples/s]
Map: 100%|██████████| 872/872 [00:00<00:00, 1679.26 examples/s]


In [25]:
# Train the model using the Trainer class
from transformers import TrainingArguments, Trainer
# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=1,            # total number of training steps
    per_device_train_batch_size=1,  # batch size per device during training
    per_device_eval_batch_size=64,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
)

# Initialize the trainer
trainer = Trainer(
    model=model,
    tokenizer=tokenizer,                 # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=dev_dataset,           # evaluation dataset
)

trainer.train()

  0%|          | 0/67349 [05:22<?, ?it/s]
  0%|          | 0/67349 [00:00<?, ?it/s]

  0%|          | 2/67349 [00:12<110:29:56,  5.91s/it]

KeyboardInterrupt: 

In [None]:
# Evaluate the model on the test dataset
evaluation_results = trainer.evaluate(test_dataset)

# Print the evaluation results
print(evaluation_results)