## Fine-tuning a model with the Trainer API

In [None]:
# pip install transformers datasets evaluate torch

In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

raw_datasets = load_dataset("glue", "mrpc")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def tokenize_function(example):
    return tokenizer(example["sentence1"],
                    example["sentence2"],
                    truncation = True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer = tokenizer)

## Training

In [3]:
from transformers import TrainingArguments

training_args = TrainingArguments("test-trainer")

In [4]:
# Definign of the model
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset = tokenized_datasets["train"],
    eval_dataset = tokenized_datasets["validation"],
    data_collator = data_collator,
    processing_class = tokenizer,
)

In [6]:
import torch
print(torch.cuda.is_available())  # Should be True
print(torch.cuda.get_device_name(0))  # See your GPU name


True
NVIDIA GeForce RTX 4050 Laptop GPU


In [7]:
trainer.train()

Step,Training Loss
500,0.5308
1000,0.2777


TrainOutput(global_step=1377, training_loss=0.32818500737652195, metrics={'train_runtime': 163.8058, 'train_samples_per_second': 67.177, 'train_steps_per_second': 8.406, 'total_flos': 405114969714960.0, 'train_loss': 0.32818500737652195, 'epoch': 3.0})

## Evaluation

In [11]:
predictions = trainer.predict(tokenized_datasets['validation'])
print(predictions.predictions.shape, predictions.label_ids.shape)

(408, 2) (408,)


In [12]:
import numpy as np
preds = np.argmax(predictions.predictions, axis=-1)

In [13]:
import evaluate

metric = evaluate.load("glue","mrpc")
metric.compute(predictions=preds, references=predictions.label_ids)

{'accuracy': 0.8553921568627451, 'f1': 0.899488926746167}

## By Using the Compute Metrics function

In [18]:
# def compute_metrics(eval_preds):
#     metric = evaluate.load("glue", "mrpc")
#     logits, labels = eval_preds
#     predictions = np.argmax(logits, axis=-1)
#     return metric.compute(predictions=predictions, references=labels)
    

# training_args = TrainingArguments("test-trainer", eval_strategy="epoch")

# model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

# trainer = Trainer(
#     model, 
#     training_args,
#     train_dataset = tokenized_datasets["train"],
#     eval_dataset = tokenized_datasets["validation"],
#     data_collator = data_collator,
#     processing_class = tokenizer,
#     compute_metrics = compute_metrics,
# )
# trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
# Saving the Adapter
trainer.save_model('fine_tuning_bert_on_sentence_pair_classification_with_glue_mrpc')

In [17]:
import os
os.listdir('fine_tuning_bert_on_sentence_pair_classification_with_glue_mrpc')

['config.json',
 'model.safetensors',
 'special_tokens_map.json',
 'tokenizer.json',
 'tokenizer_config.json',
 'training_args.bin',
 'vocab.txt']

In [19]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [20]:
# Pushing the Adapter on HF-Hub
trainer.push_to_hub()

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.71k [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/Ravindra31/test-trainer/commit/15d5fe0c151ac704406392844b4ee76a2ed3e6c7', commit_message='End of training', commit_description='', oid='15d5fe0c151ac704406392844b4ee76a2ed3e6c7', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Ravindra31/test-trainer', endpoint='https://huggingface.co', repo_type='model', repo_id='Ravindra31/test-trainer'), pr_revision=None, pr_num=None)