In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.cuda.is_available()

True

In [3]:
raw_dataset = load_dataset("glue", "mrpc")
checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

Found cached dataset glue (C:/Users/Raj/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
100%|██████████| 3/3 [00:00<00:00, 1000.07it/s]


In [4]:
def tokenize_function(examples):
    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)

In [5]:
tokenized_datasets = raw_dataset.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Loading cached processed dataset at C:\Users\Raj\.cache\huggingface\datasets\glue\mrpc\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad\cache-8c84bcc161b9e9cc.arrow
Loading cached processed dataset at C:\Users\Raj\.cache\huggingface\datasets\glue\mrpc\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad\cache-6f4a5090965d06b8.arrow


In [8]:
# Training
from transformers import TrainingArguments

# 1. Using the default Trainer hyperparameters the Trainer will use for training and evaluation
# test-trainer is the directory where the model will be saved, as well as checkpoints along the way
training_args = TrainingArguments("test-trainer")

In [9]:
# Define the model
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [10]:
# once we have our model, we can define a Trainer by passing all the objects constructed so far - the model, training args, training and validation datasets, data collator, and tokenizer

from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
)
    
    

In [11]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [12]:
trainer.train()

  0%|          | 0/1377 [00:00<?, ?it/s]You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 36%|███▋      | 500/1377 [00:19<00:30, 28.51it/s]

{'loss': 0.4841, 'learning_rate': 3.184458968772695e-05, 'epoch': 1.09}


 73%|███████▎  | 1000/1377 [00:38<00:13, 28.65it/s]

{'loss': 0.2483, 'learning_rate': 1.3689179375453886e-05, 'epoch': 2.18}


100%|██████████| 1377/1377 [00:52<00:00, 26.37it/s]

{'train_runtime': 52.2152, 'train_samples_per_second': 210.743, 'train_steps_per_second': 26.372, 'train_loss': 0.2874641556971819, 'epoch': 3.0}





TrainOutput(global_step=1377, training_loss=0.2874641556971819, metrics={'train_runtime': 52.2152, 'train_samples_per_second': 210.743, 'train_steps_per_second': 26.372, 'train_loss': 0.2874641556971819, 'epoch': 3.0})

### Evaluation

In [13]:
# build a useful compute_metrics function to pass to the Trainer

predictions = trainer.predict(tokenized_datasets["validation"])
print(predictions.predictions.shape, predictions.label_ids.shape)

100%|██████████| 51/51 [00:00<00:00, 115.11it/s]

(408, 2) (408,)





In [15]:
# all transformers return logits for each element of the dataset we passed to predict()
# to transform them into predictions that we can compare to our labels, we need to take the index of the maximum value in each logits tuple (the second axis)
import numpy as np
preds = np.argmax(predictions.predictions, axis=-1)
print(preds)

[1 0 0 1 0 1 1 1 1 1 1 0 0 1 1 0 1 0 1 0 0 1 0 1 1 0 1 1 1 1 0 1 1 1 1 1 0
 0 1 1 0 1 0 0 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 0 1 0 1 0 1 1 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 0 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 0 0 1 1
 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 0 1 0 1 1 0 0 1 1 1 1 0 1 0 1 1 1
 0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1
 1 0 1 0 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 1 1 1 1 1 0 1 1 1
 1 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1
 0 1 0 1 1 1 1 0 0 0 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 0 0 0 1 1 1 0
 0 1 1 0 1 1 1 0 1 1 0 1 0 1 0 1 1 0 1 0 1 1 0 0 0 0 0 1 0 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 0 0
 0 1 1 1 0 0 1 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0
 1]


In [21]:
# compare predictions to labels
import evaluate

metric = evaluate.load("glue", "mrpc")
metric.compute(predictions=preds, references=predictions.label_ids)

{'accuracy': 0.8504901960784313, 'f1': 0.893169877408056}

In [23]:
# putting all this together, out compute_metrics function looks like this
def compute_metrics(eval_preds):
    metric = evaluate.load("glue", "mrpc")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [24]:
# using the above compute_metrics function, we can now pass it to the Trainer
# define a new trainer with the compute_metrics function
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [25]:
trainer.train()

 36%|███▋      | 500/1377 [00:17<00:31, 27.76it/s]

{'loss': 0.0984, 'learning_rate': 3.184458968772695e-05, 'epoch': 1.09}


 73%|███████▎  | 1000/1377 [00:36<00:13, 28.07it/s]

{'loss': 0.0741, 'learning_rate': 1.3689179375453886e-05, 'epoch': 2.18}


100%|██████████| 1377/1377 [00:51<00:00, 26.96it/s]

{'train_runtime': 51.0764, 'train_samples_per_second': 215.442, 'train_steps_per_second': 26.96, 'train_loss': 0.07160873385382979, 'epoch': 3.0}





TrainOutput(global_step=1377, training_loss=0.07160873385382979, metrics={'train_runtime': 51.0764, 'train_samples_per_second': 215.442, 'train_steps_per_second': 26.96, 'train_loss': 0.07160873385382979, 'epoch': 3.0})