In [8]:
from datasets import load_dataset
import pandas as pd
dataset = load_dataset("mteb/tweet_sentiment_extraction")

In [9]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'text', 'label', 'label_text'],
        num_rows: 27481
    })
    test: Dataset({
        features: ['id', 'text', 'label', 'label_text'],
        num_rows: 3534
    })
})

In [12]:
from transformers import GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)



In [13]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)

Map: 100%|██████████| 27481/27481 [00:06<00:00, 3936.55 examples/s]
Map: 100%|██████████| 3534/3534 [00:00<00:00, 3592.93 examples/s]


In [14]:
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

In [15]:
from transformers import GPT2ForSequenceClassification

model = GPT2ForSequenceClassification.from_pretrained("gpt2", num_labels=3)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
import evaluate
import numpy as np
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
   logits, labels = eval_pred
   predictions = np.argmax(logits, axis=-1)
   return metric.compute(predictions=predictions, references=labels)

In [17]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
   output_dir="test_trainer",
   #evaluation_strategy="epoch",
   per_device_train_batch_size=1,  # Reduce batch size here
   per_device_eval_batch_size=1,    # Optionally, reduce for evaluation as well
   gradient_accumulation_steps=10
   )


trainer = Trainer(
   model=model,
   args=training_args,
   train_dataset=small_train_dataset,
   eval_dataset=small_eval_dataset,
   compute_metrics=compute_metrics,

)

trainer.train()

Step,Training Loss


TrainOutput(global_step=300, training_loss=0.7337846374511718, metrics={'train_runtime': 228.5552, 'train_samples_per_second': 13.126, 'train_steps_per_second': 1.313, 'total_flos': 1567794659328000.0, 'train_loss': 0.7337846374511718, 'epoch': 3.0})

In [18]:
import evaluate

trainer.evaluate()

{'eval_loss': 0.6462212204933167,
 'eval_accuracy': 0.739,
 'eval_runtime': 24.0868,
 'eval_samples_per_second': 41.517,
 'eval_steps_per_second': 41.517,
 'epoch': 3.0}