In [19]:
# The code below is for 1 epoch. I ran it for 10 epochs.
# The model is retrained using the parameters below in the code.

In [1]:
!pip install torch transformers datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.11.0-py3-none-any.whl (468 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.7/468.7 kB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m87.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
Collect

In [9]:
# Import required libraries
from datasets import load_dataset
from transformers import GPT2ForSequenceClassification, GPT2Tokenizer, Trainer, TrainingArguments

In [10]:
# Load the BoolQ task from the SuperGLUE benchmark
dataset = load_dataset('super_glue', 'boolq')

# Load the GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2ForSequenceClassification.from_pretrained('gpt2', num_labels=2)
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id



  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
# Tokenize the dataset
def tokenize(batch):
    return tokenizer(batch['question'], batch['passage'], truncation=True, padding=True, max_length=512)

train_dataset = dataset['train'].map(tokenize, batched=True, batch_size=len(dataset['train']))
val_dataset = dataset['validation'].map(tokenize, batched=True, batch_size=len(dataset['validation']))

train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
val_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])



In [12]:
# Define a compute_metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    correct = (preds == labels)
    acc = correct.sum() / correct.shape[0]
    return {'accuracy': acc}

In [13]:
# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=1,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    evaluation_strategy='steps',
    eval_steps=500,
    logging_steps=500,
)

In [14]:
# Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

In [15]:
# Train the model
trainer.train()

Step,Training Loss,Validation Loss,Accuracy
500,0.6933,0.657501,0.610398


TrainOutput(global_step=590, training_loss=0.6878035464529264, metrics={'train_runtime': 264.2233, 'train_samples_per_second': 35.678, 'train_steps_per_second': 2.233, 'total_flos': 2463244467830784.0, 'train_loss': 0.6878035464529264, 'epoch': 1.0})

In [16]:
# Evaluate the model
eval_result = trainer.evaluate()
print(f"Evaluation result: {eval_result}")

Evaluation result: {'eval_loss': 0.6341205835342407, 'eval_accuracy': 0.6370030581039755, 'eval_runtime': 25.2555, 'eval_samples_per_second': 129.477, 'eval_steps_per_second': 2.059, 'epoch': 1.0}
