# Fine-tuning a model with the Trainer API

In [1]:
# code from previous section, as we'll need the variables

from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

raw_datasets = load_dataset("glue", "mrpc")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)


def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)


tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Found cached dataset glue (C:/Users/joamart/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at C:\Users\joamart\.cache\huggingface\datasets\glue\mrpc\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad\cache-5137f2fb4737d116.arrow
Loading cached processed dataset at C:\Users\joamart\.cache\huggingface\datasets\glue\mrpc\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad\cache-60ba0ce5ca5c6a38.arrow
Loading cached processed dataset at C:\Users\joamart\.cache\huggingface\datasets\glue\mrpc\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad\cache-b26904918f30dd5d.arrow


In [22]:
raw_datasets['train'][1]

{'sentence1': "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
 'sentence2': "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
 'label': 0,
 'idx': 1}

In [2]:
from transformers import TrainingArguments

# this contains the hyperparameters for the Fine Tuning. The parameter is the name of a directory where the trained model will be saved,
# as well as the checkpoints along the way
training_args = TrainingArguments("test-trainer")

In [17]:
#training_args

In [4]:
# now define the model
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Note the warnings we get on the above code after instantiating this pretrained model. This is because **BERT has not been pretrained on classifying pairs of sentences**, so **the head of the pretrained model has been discarded and a new head suitable for sequence classification has been added instead**. The warnings indicate that some weights were not used (the ones corresponding to the dropped pretraining head) and that some others were randomly initialized (the ones for the new head). It concludes by encouraging you to train the model, which is exactly what we are going to do now.

In [5]:
import torch
from torch import cuda

# another attempt at a fix to run in the gpu
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# this returns "cuda", so pytorch is aware of CUDA being installed

cuda.empty_cache()
model = model.to(device) #should not be needed, this is me trying to debug issues with the GPU
print(device)            


cuda


In [6]:
# Once we have our model, we can define a Trainer by passing it all the objects constructed up to now — 
# the model, the training_args, the training and validation datasets, our data_collator, and our tokenizer
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
)

# Note that when you pass the tokenizer as we did here, the default data_collator used by the Trainer will be a 
# DataCollatorWithPadding as defined previously, so you can actually skip the line data_collator=data_collator in this call.



In [7]:
# To fine-tune the model on our dataset, we just have to call the train() method of our Trainer:
trainer.train(resume_from_checkpoint=True)

# took 5 minutes to run on my laptop



  0%|          | 0/1377 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'train_runtime': 69.0861, 'train_samples_per_second': 159.279, 'train_steps_per_second': 19.932, 'train_loss': 0.05268104680656261, 'epoch': 3.0}


TrainOutput(global_step=1377, training_loss=0.05268104680656261, metrics={'train_runtime': 69.0861, 'train_samples_per_second': 159.279, 'train_steps_per_second': 19.932, 'train_loss': 0.05268104680656261, 'epoch': 3.0})

In [8]:
# get some predictions from our model
predictions = trainer.predict(tokenized_datasets["validation"])
print(predictions.predictions.shape, predictions.label_ids.shape)

  0%|          | 0/51 [00:00<?, ?it/s]

(408, 2) (408,)


The output of the `predict()` method is another named tuple with three fields: `predictions`, `label_ids`, and `metrics`. The `metrics` field will just contain the loss on the dataset passed, as well as some time metrics (how long it took to predict, in total and on average). Once we complete our `compute_metrics()` function and pass it to the Trainer, that field will also contain the metrics returned by `compute_metrics()`. 

In [9]:
predictions.predictions
# remember the output is always logits, not probabilities

array([[-3.2535424 ,  3.214699  ],
       [ 2.3342998 , -2.2142668 ],
       [ 1.841418  , -1.7557342 ],
       [-3.1135614 ,  3.0576544 ],
       [ 2.1885965 , -2.1124992 ],
       [-3.1099048 ,  3.098265  ],
       [-3.0268288 ,  2.9717078 ],
       [-3.2591035 ,  3.2337382 ],
       [-3.1470149 ,  3.1429627 ],
       [-3.2277832 ,  3.1927621 ],
       [-3.270707  ,  3.2157836 ],
       [ 2.1633573 , -1.8815385 ],
       [ 1.7756401 , -1.7210639 ],
       [-3.2479305 ,  3.228663  ],
       [-3.2727935 ,  3.243384  ],
       [-3.0075758 ,  2.9595397 ],
       [-3.1894376 ,  3.1814132 ],
       [ 1.7170898 , -1.6417385 ],
       [-3.2475433 ,  3.2217438 ],
       [ 2.171003  , -2.0685265 ],
       [ 2.236793  , -2.092103  ],
       [-3.02765   ,  2.9652088 ],
       [ 2.3683083 , -2.2850518 ],
       [-3.2705934 ,  3.199697  ],
       [ 2.2196317 , -2.0901704 ],
       [-0.79386127,  0.7055756 ],
       [ 1.9045945 , -2.1052516 ],
       [-3.3121195 ,  3.2918031 ],
       [-2.8737507 ,

In [10]:
predictions.label_ids

array([1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
       0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1,

In [11]:
predictions.metrics

{'test_loss': 0.7007440328598022,
 'test_runtime': 2.2516,
 'test_samples_per_second': 181.207,
 'test_steps_per_second': 22.651}

In [12]:
import numpy as np

preds = np.argmax(predictions.predictions, axis=-1) 
# Returns the indices of the maximum values along an axis - 0 or 1. Logo dá-nos o valor da previsão, com base nos logits, 
# mesmo sem precisar de converter para probabilidades

In [13]:
preds

array([1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1,
       1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,

We can now compare those `preds` to the labels. To build our `compute_metric()` function, we will rely on the metrics from the 🤗 Evaluate library. **We can load the metrics associated with the MRPC dataset as easily as we loaded the dataset, this time with the `evaluate.load()` function. The object returned has a `compute()` method we can use to do the metric calculation:**

In [14]:
import evaluate

# this needs scikit-learn to be installed
metric = evaluate.load("glue", "mrpc")
metric.compute(predictions=preds, references=predictions.label_ids)

{'accuracy': 0.8382352941176471, 'f1': 0.8858131487889273}

The exact results you get may vary, as the random initialization of the model head might change the metrics it achieved. **Accuracy and F1 score are the two metrics used to evaluate results on the MRPC dataset for the GLUE benchmark**.

The table in the BERT paper reported an F1 score of 88.9 for the base model. That was the uncased model while we are currently using the cased model, which explains the better result.

In [15]:
# putting the function together
def compute_metrics(eval_preds):
    metric = evaluate.load("glue", "mrpc") # load the applicable metrics of the mrpc dataset in the glue dataset
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1) # get the predictions from the logits
    return metric.compute(predictions=predictions, references=labels) #compute the metrics

In [16]:
# Now use the trainer again, but this time pass in the compute_metrics function

training_args = TrainingArguments("test-trainer", evaluation_strategy="epoch") # note epoch parameter, means: compute metrics at the end of every epoch
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics, # this is new
)

trainer.train()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  0%|          | 0/1377 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

{'eval_loss': 0.37218421697616577, 'eval_accuracy': 0.8529411764705882, 'eval_f1': 0.8958333333333334, 'eval_runtime': 4.0861, 'eval_samples_per_second': 99.851, 'eval_steps_per_second': 12.481, 'epoch': 1.0}
{'loss': 0.5089, 'learning_rate': 3.184458968772695e-05, 'epoch': 1.09}


  0%|          | 0/51 [00:00<?, ?it/s]

{'eval_loss': 0.5396929979324341, 'eval_accuracy': 0.8406862745098039, 'eval_f1': 0.8914858096828046, 'eval_runtime': 4.5412, 'eval_samples_per_second': 89.844, 'eval_steps_per_second': 11.23, 'epoch': 2.0}
{'loss': 0.2862, 'learning_rate': 1.3689179375453886e-05, 'epoch': 2.18}


  0%|          | 0/51 [00:00<?, ?it/s]

{'eval_loss': 0.6872920393943787, 'eval_accuracy': 0.8455882352941176, 'eval_f1': 0.8904347826086957, 'eval_runtime': 5.6965, 'eval_samples_per_second': 71.623, 'eval_steps_per_second': 8.953, 'epoch': 3.0}
{'train_runtime': 346.3752, 'train_samples_per_second': 31.769, 'train_steps_per_second': 3.975, 'train_loss': 0.321917495782945, 'epoch': 3.0}


TrainOutput(global_step=1377, training_loss=0.321917495782945, metrics={'train_runtime': 346.3752, 'train_samples_per_second': 31.769, 'train_steps_per_second': 3.975, 'train_loss': 0.321917495782945, 'epoch': 3.0})

This time, it will report the validation loss and metrics at the end of each epoch on top of the training loss.

# Do this with glue/sst-2

In [26]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

sst2_raw_dataset = load_dataset("glue", "sst2") 
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)


def tokenize_function(example):
    return tokenizer(example["sentence"], truncation=True)


tokenized_datasets = sst2_raw_dataset.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Found cached dataset glue (C:/Users/joamart/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [29]:
from transformers import TrainingArguments, AutoModelForSequenceClassification

training_args = TrainingArguments("sst2-test-trainer", evaluation_strategy="epoch") # can add other hyper-parameters here
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [28]:
def compute_metrics(eval_preds):
    metric = evaluate.load("glue", "sst2") # load the applicable metrics of the mrpc dataset in the glue dataset
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1) # get the predictions from the logits
    return metric.compute(predictions=predictions, references=labels) #compute the metrics

In [30]:
trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics, # this is new
)

# trainer.train() -- commented to not make the mistake of training again



  0%|          | 0/25257 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 0.4321, 'learning_rate': 4.9010175396919665e-05, 'epoch': 0.06}
{'loss': 0.3793, 'learning_rate': 4.8020350793839334e-05, 'epoch': 0.12}
{'loss': 0.3352, 'learning_rate': 4.7030526190759e-05, 'epoch': 0.18}
{'loss': 0.3184, 'learning_rate': 4.6040701587678666e-05, 'epoch': 0.24}
{'loss': 0.3096, 'learning_rate': 4.5050876984598335e-05, 'epoch': 0.3}
{'loss': 0.3261, 'learning_rate': 4.4061052381518e-05, 'epoch': 0.36}
{'loss': 0.3066, 'learning_rate': 4.307122777843766e-05, 'epoch': 0.42}
{'loss': 0.2846, 'learning_rate': 4.208140317535733e-05, 'epoch': 0.48}
{'loss': 0.2776, 'learning_rate': 4.109157857227699e-05, 'epoch': 0.53}
{'loss': 0.3158, 'learning_rate': 4.010175396919666e-05, 'epoch': 0.59}
{'loss': 0.284, 'learning_rate': 3.9111929366116324e-05, 'epoch': 0.65}
{'loss': 0.2825, 'learning_rate': 3.812210476303599e-05, 'epoch': 0.71}
{'loss': 0.2891, 'learning_rate': 3.7132280159955656e-05, 'epoch': 0.77}
{'loss': 0.2641, 'learning_rate': 3.614245555687532e-05, 'epoch'

  0%|          | 0/109 [00:00<?, ?it/s]

{'eval_loss': 0.3546741306781769, 'eval_accuracy': 0.9059633027522935, 'eval_runtime': 14.0778, 'eval_samples_per_second': 61.942, 'eval_steps_per_second': 7.743, 'epoch': 1.0}
{'loss': 0.2442, 'learning_rate': 3.317298174763432e-05, 'epoch': 1.01}
{'loss': 0.1845, 'learning_rate': 3.218315714455399e-05, 'epoch': 1.07}
{'loss': 0.1771, 'learning_rate': 3.119333254147365e-05, 'epoch': 1.13}
{'loss': 0.1833, 'learning_rate': 3.0203507938393317e-05, 'epoch': 1.19}
{'loss': 0.1839, 'learning_rate': 2.9213683335312986e-05, 'epoch': 1.25}
{'loss': 0.1898, 'learning_rate': 2.822385873223265e-05, 'epoch': 1.31}
{'loss': 0.1774, 'learning_rate': 2.7234034129152314e-05, 'epoch': 1.37}
{'loss': 0.1804, 'learning_rate': 2.6244209526071984e-05, 'epoch': 1.43}
{'loss': 0.1743, 'learning_rate': 2.5254384922991646e-05, 'epoch': 1.48}
{'loss': 0.1653, 'learning_rate': 2.4264560319911315e-05, 'epoch': 1.54}
{'loss': 0.1732, 'learning_rate': 2.3274735716830978e-05, 'epoch': 1.6}
{'loss': 0.1683, 'learnin

  0%|          | 0/109 [00:00<?, ?it/s]

{'eval_loss': 0.42381051182746887, 'eval_accuracy': 0.8979357798165137, 'eval_runtime': 7.8802, 'eval_samples_per_second': 110.657, 'eval_steps_per_second': 13.832, 'epoch': 2.0}
{'loss': 0.1525, 'learning_rate': 1.634596349526864e-05, 'epoch': 2.02}
{'loss': 0.1063, 'learning_rate': 1.5356138892188305e-05, 'epoch': 2.08}
{'loss': 0.1133, 'learning_rate': 1.4366314289107971e-05, 'epoch': 2.14}
{'loss': 0.1214, 'learning_rate': 1.3376489686027638e-05, 'epoch': 2.2}
{'loss': 0.1172, 'learning_rate': 1.2386665082947303e-05, 'epoch': 2.26}
{'loss': 0.1247, 'learning_rate': 1.1396840479866969e-05, 'epoch': 2.32}
{'loss': 0.1021, 'learning_rate': 1.0407015876786634e-05, 'epoch': 2.38}
{'loss': 0.1192, 'learning_rate': 9.417191273706299e-06, 'epoch': 2.43}
{'loss': 0.0999, 'learning_rate': 8.427366670625965e-06, 'epoch': 2.49}
{'loss': 0.1095, 'learning_rate': 7.437542067545632e-06, 'epoch': 2.55}
{'loss': 0.1064, 'learning_rate': 6.447717464465297e-06, 'epoch': 2.61}
{'loss': 0.1236, 'learni

  0%|          | 0/109 [00:00<?, ?it/s]

{'eval_loss': 0.4058356285095215, 'eval_accuracy': 0.9071100917431193, 'eval_runtime': 24.1128, 'eval_samples_per_second': 36.163, 'eval_steps_per_second': 4.52, 'epoch': 3.0}
{'train_runtime': 6344.0032, 'train_samples_per_second': 31.849, 'train_steps_per_second': 3.981, 'train_loss': 0.19839191531164607, 'epoch': 3.0}


TrainOutput(global_step=25257, training_loss=0.19839191531164607, metrics={'train_runtime': 6344.0032, 'train_samples_per_second': 31.849, 'train_steps_per_second': 3.981, 'train_loss': 0.19839191531164607, 'epoch': 3.0})

In [41]:
my_sequences = [
    "I've been waiting for a HuggingFace course my whole life.",
    "This course is amazing!",
    "I hate my life",
    "Why don't they respond, this is frustrating",
    "This is a sunny day!",
    "This is a rainy day!"
]

batch = tokenizer(my_sequences, padding=True, truncation=True, return_tensors="pt")


In [36]:
# trainer.model.save_pretrained("sst2-finetuned-bert-jota")

In [39]:
my_model = AutoModelForSequenceClassification.from_pretrained("sst2-finetuned-bert-jota")

outputs = my_model(**batch)
predictions = outputs.logits.argmax(-1)

In [40]:
predictions

tensor([1, 1, 0, 0, 1, 0])