In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

raw_datasets = load_dataset("glue", "mrpc")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

tokenized_datasets


  from .autonotebook import tqdm as notebook_tqdm
Map: 100%|██████████| 1725/1725 [00:00<00:00, 12575.54 examples/s]


DatasetDict({
    train: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 3668
    })
    validation: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 408
    })
    test: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1725
    })
})

In [2]:
#training arguments (hyperparameters Trainer will use)
from transformers import TrainingArguments

training_args = TrainingArguments("test-trainer")

In [3]:
#model
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
#trainer for training
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    processing_class=tokenizer,
)

trainer.train()

Step,Training Loss
500,0.5537
1000,0.2891


TrainOutput(global_step=1377, training_loss=0.34457680031105326, metrics={'train_runtime': 227.1516, 'train_samples_per_second': 48.443, 'train_steps_per_second': 6.062, 'total_flos': 405114969714960.0, 'train_loss': 0.34457680031105326, 'epoch': 3.0})

In [5]:
#get prediction
predictions = trainer.predict(tokenized_datasets["validation"])
print(predictions.predictions.shape, predictions.label_ids.shape)


(408, 2) (408,)


In [7]:
#get max value of logits
import numpy as np

preds = np.argmax(predictions.predictions, axis=-1)

In [None]:
import evaluate

#need evaluate, scikit-learn and scipy installed
metric = evaluate.load("glue", "mrpc")
metric.compute(predictions=preds, references=predictions.label_ids)


{'accuracy': 0.8357843137254902, 'f1': 0.8850771869639794}

In [None]:
#custom function 
def compute_metrics(eval_preds):
    metric = evaluate.load("glue", "mrpc")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    return metric.compute(predictions=predictions, references=labels)


In [25]:
#report metric at end of each epoch
training_args = TrainingArguments("test-trainer", eval_strategy="epoch")
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.510988,0.801471,0.871224
2,0.501800,0.527909,0.848039,0.896321
3,0.296800,0.690178,0.857843,0.901361


TrainOutput(global_step=1377, training_loss=0.32872253063370893, metrics={'train_runtime': 169.4181, 'train_samples_per_second': 64.952, 'train_steps_per_second': 8.128, 'total_flos': 405114969714960.0, 'train_loss': 0.32872253063370893, 'epoch': 3.0})

In [None]:
#more trainer feature
#mixed precision training
#faster training, reduced memory usage

training_args = TrainingArguments(
    "test-trainer",
    eval_strategy="epoch",
    fp16=True, #enable mixed precision ( half-precision floating point)
)



In [None]:
#gradient accumulation
#for large batch size when GPU memory is limited

training_args = TrainingArguments(
    "test-trainer",
    eval_strategy="epoch",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4, #effective batch size = 4 x 4 =16
)



In [None]:
#learning rate scheduling
#linear decay is used by default

training_args = TrainingArguments(
    "test-trainer",
    eval_strategy="epoch",
    learning_rate=2e-5,
    lr_scheduler_type="cosine" 
)