# Fine-tuning a model with the Trainer API or Keras

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

In [12]:
!pip install datasets evaluate transformers[sentencepiece]
!pip install transformers[torch]
!pip install accelerate>=0.20.1
!
!
!




In [1]:

from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

raw_datasets = load_dataset("glue", "mrpc")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)


def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)


tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [2]:
from transformers import TrainingArguments

training_args = TrainingArguments("test-trainer")

In [3]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
)

In [5]:
trainer.train()

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.5314
1000,0.2944


TrainOutput(global_step=1377, training_loss=0.3430345323350694, metrics={'train_runtime': 185.9915, 'train_samples_per_second': 59.164, 'train_steps_per_second': 7.404, 'total_flos': 405626802939840.0, 'train_loss': 0.3430345323350694, 'epoch': 3.0})

In [6]:
predictions = trainer.predict(tokenized_datasets["validation"])
print(predictions)

PredictionOutput(predictions=array([[-3.5377598e+00,  3.3340709e+00],
       [ 2.7995000e+00, -2.4626062e+00],
       [ 2.9296474e+00, -2.2873292e+00],
       [-3.4763479e+00,  3.2088585e+00],
       [ 3.1318784e+00, -2.4079506e+00],
       [-3.2755902e+00,  2.8216560e+00],
       [-3.1412613e+00,  2.9662383e+00],
       [-3.5109191e+00,  3.2910612e+00],
       [-3.3006535e+00,  2.9145014e+00],
       [-3.5434501e+00,  3.3016124e+00],
       [-3.5727370e+00,  3.3866472e+00],
       [ 3.0305238e+00, -2.2737606e+00],
       [ 3.2531025e+00, -2.5248668e+00],
       [-3.4340384e+00,  3.1408288e+00],
       [-3.5478685e+00,  3.3045759e+00],
       [-3.0059202e+00,  2.6471314e+00],
       [-3.5121267e+00,  3.2739735e+00],
       [ 2.6917236e+00, -1.9245321e+00],
       [-3.4602931e+00,  3.2415371e+00],
       [ 3.2315264e+00, -2.5515862e+00],
       [ 3.1024835e+00, -2.4621830e+00],
       [-3.1948385e+00,  2.7219119e+00],
       [ 2.3100913e+00, -1.6857921e+00],
       [-3.4702427e+00,  3.1

In [7]:
print(predictions.predictions.shape, predictions.label_ids.shape)

(408, 2) (408,)


In [9]:
print(predictions.predictions)   # Those are logits for 2 labels

[[-3.5377598e+00  3.3340709e+00]
 [ 2.7995000e+00 -2.4626062e+00]
 [ 2.9296474e+00 -2.2873292e+00]
 [-3.4763479e+00  3.2088585e+00]
 [ 3.1318784e+00 -2.4079506e+00]
 [-3.2755902e+00  2.8216560e+00]
 [-3.1412613e+00  2.9662383e+00]
 [-3.5109191e+00  3.2910612e+00]
 [-3.3006535e+00  2.9145014e+00]
 [-3.5434501e+00  3.3016124e+00]
 [-3.5727370e+00  3.3866472e+00]
 [ 3.0305238e+00 -2.2737606e+00]
 [ 3.2531025e+00 -2.5248668e+00]
 [-3.4340384e+00  3.1408288e+00]
 [-3.5478685e+00  3.3045759e+00]
 [-3.0059202e+00  2.6471314e+00]
 [-3.5121267e+00  3.2739735e+00]
 [ 2.6917236e+00 -1.9245321e+00]
 [-3.4602931e+00  3.2415371e+00]
 [ 3.2315264e+00 -2.5515862e+00]
 [ 3.1024835e+00 -2.4621830e+00]
 [-3.1948385e+00  2.7219119e+00]
 [ 2.3100913e+00 -1.6857921e+00]
 [-3.4702427e+00  3.1625893e+00]
 [-3.5358052e+00  3.2974005e+00]
 [-2.1110663e+00  1.8658023e+00]
 [-2.0103116e+00  1.6176392e+00]
 [-3.5106394e+00  3.2849784e+00]
 [-3.4398427e+00  3.2527585e+00]
 [-3.5450552e+00  3.2854195e+00]
 [-1.69642

In [10]:
import numpy as np

preds = np.argmax(predictions.predictions, axis=-1)  # from above logtis get the max logit

In [11]:
import evaluate

metric = evaluate.load("glue", "mrpc")
metric.compute(predictions=preds, references=predictions.label_ids)

Downloading builder script:   0%|          | 0.00/5.75k [00:00<?, ?B/s]

{'accuracy': 0.8676470588235294, 'f1': 0.9087837837837838}

In [12]:
def compute_metrics(eval_preds):
    metric = evaluate.load("glue", "mrpc")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [13]:
training_args = TrainingArguments("test-trainer", evaluation_strategy="epoch")
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.415209,0.82598,0.884553
2,0.520300,0.41166,0.865196,0.905983
3,0.277800,0.642819,0.865196,0.90566


TrainOutput(global_step=1377, training_loss=0.3265021274984965, metrics={'train_runtime': 208.0674, 'train_samples_per_second': 52.887, 'train_steps_per_second': 6.618, 'total_flos': 405626802939840.0, 'train_loss': 0.3265021274984965, 'epoch': 3.0})