## Training

In [1]:
import numpy as np
import evaluate # pip install evaluate

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, TrainingArguments, Trainer

In [2]:
training_args = TrainingArguments(output_dir='./trained_model', num_train_epochs=10)

In [3]:
raw_datasets = load_dataset("glue", "mrpc")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Found cached dataset glue (C:/Users/epdls/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at C:\Users\epdls\.cache\huggingface\datasets\glue\mrpc\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad\cache-16ad00c953871f0f.arrow
Loading cached processed dataset at C:\Users\epdls\.cache\huggingface\datasets\glue\mrpc\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad\cache-9690c23dbc2848e5.arrow
Loading cached processed dataset at C:\Users\epdls\.cache\huggingface\datasets\glue\mrpc\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad\cache-372ad965bbe7ba4b.arrow


In [4]:
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [5]:
trainer = Trainer(
    model,
    training_args,
    train_dataset = tokenized_datasets['train'],
    eval_dataset = tokenized_datasets['validation'],
    data_collator = data_collator,
    tokenizer = tokenizer
)

In [6]:
trainer.train()

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentence1, idx, sentence2. If sentence1, idx, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 3668
  Num Epochs = 10
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 4590


Step,Training Loss
500,0.5254
1000,0.3775
1500,0.2312
2000,0.1491
2500,0.0595
3000,0.0299
3500,0.0176
4000,0.013
4500,0.0077


Saving model checkpoint to ./trained_model\checkpoint-500
Configuration saved in ./trained_model\checkpoint-500\config.json
Model weights saved in ./trained_model\checkpoint-500\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-500\tokenizer_config.json
Special tokens file saved in ./trained_model\checkpoint-500\special_tokens_map.json
Saving model checkpoint to ./trained_model\checkpoint-1000
Configuration saved in ./trained_model\checkpoint-1000\config.json
Model weights saved in ./trained_model\checkpoint-1000\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-1000\tokenizer_config.json
Special tokens file saved in ./trained_model\checkpoint-1000\special_tokens_map.json
Saving model checkpoint to ./trained_model\checkpoint-1500
Configuration saved in ./trained_model\checkpoint-1500\config.json
Model weights saved in ./trained_model\checkpoint-1500\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-1500\token

TrainOutput(global_step=4590, training_loss=0.1536856976708028, metrics={'train_runtime': 270.8374, 'train_samples_per_second': 135.432, 'train_steps_per_second': 16.947, 'total_flos': 1353749546484720.0, 'train_loss': 0.1536856976708028, 'epoch': 10.0})

In [7]:
pred = trainer.predict(tokenized_datasets['validation'])

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentence1, idx, sentence2. If sentence1, idx, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 408
  Batch size = 8


In [8]:
preds = np.argmax(pred.predictions, axis=-1)

metric = evaluate.load('glue', 'mrpc')
metric.compute(predictions=preds, references=pred.label_ids)

{'accuracy': 0.8455882352941176, 'f1': 0.891566265060241}

In [9]:
def compute_metrics(eval_preds):
    metric = evaluate.load("glue", "mrpc")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    
    return metric.compute(predictions=predictions, references=labels)

In [10]:
training_args = TrainingArguments(output_dir='./trained_model', num_train_epochs=10, evaluation_strategy='epoch')

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [11]:
trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [12]:
trainer.train()

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentence1, idx, sentence2. If sentence1, idx, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 3668
  Num Epochs = 10
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 4590


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,1.055631,0.821078,0.874786
2,0.118100,0.828091,0.855392,0.898799
3,0.066700,1.056531,0.845588,0.891566
4,0.050200,1.101945,0.82598,0.877797
5,0.031400,1.15337,0.848039,0.893471
6,0.020900,1.358433,0.82598,0.87389
7,0.011700,1.384753,0.835784,0.88547
8,0.006600,1.448529,0.838235,0.887755
9,0.011500,1.541422,0.835784,0.886633
10,0.000300,1.584729,0.833333,0.885135


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentence1, idx, sentence2. If sentence1, idx, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 408
  Batch size = 8
Saving model checkpoint to ./trained_model\checkpoint-500
Configuration saved in ./trained_model\checkpoint-500\config.json
Model weights saved in ./trained_model\checkpoint-500\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-500\tokenizer_config.json
Special tokens file saved in ./trained_model\checkpoint-500\special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentence1, idx, sentence2. If sentence1, idx, sentence2 are not expected by `BertForSequenceClassification.forward`,  you

TrainOutput(global_step=4590, training_loss=0.034586332761428945, metrics={'train_runtime': 299.037, 'train_samples_per_second': 122.66, 'train_steps_per_second': 15.349, 'total_flos': 1353749546484720.0, 'train_loss': 0.034586332761428945, 'epoch': 10.0})

In [14]:
pred = trainer.predict(tokenized_datasets['validation'])

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentence, idx. If sentence, idx are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 872
  Batch size = 8


In [16]:
trainer.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentence1, idx, sentence2. If sentence1, idx, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 408
  Batch size = 8


{'eval_loss': 1.5847285985946655,
 'eval_accuracy': 0.8333333333333334,
 'eval_f1': 0.8851351351351352,
 'eval_runtime': 3.1198,
 'eval_samples_per_second': 130.776,
 'eval_steps_per_second': 16.347,
 'epoch': 10.0}

✏️ **Try it out!** Fine-tune a model on the GLUE SST-2 dataset, using the data processing you did in section 2.

In [13]:
raw_datasets = load_dataset("glue", "sst2")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def tokenize_function(example):
    return tokenizer(example["sentence"], truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Found cached dataset glue (C:/Users/epdls/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\epdls/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.20.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/r

  0%|          | 0/68 [00:00<?, ?ba/s]

Loading cached processed dataset at C:\Users\epdls\.cache\huggingface\datasets\glue\sst2\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad\cache-9813c7bd64b24f69.arrow
Loading cached processed dataset at C:\Users\epdls\.cache\huggingface\datasets\glue\sst2\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad\cache-aa1f46b739d75720.arrow


In [17]:
training_args = TrainingArguments(output_dir='./trained_model', num_train_epochs=10)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [18]:
trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    per_device_train_batch_size=32,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [19]:
trainer.train()

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentence, idx. If sentence, idx are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 67349
  Num Epochs = 10
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 84190


Step,Training Loss
500,0.5117
1000,0.3672
1500,0.3321
2000,0.3347
2500,0.3478
3000,0.3444
3500,0.335
4000,0.3186
4500,0.2987
5000,0.3167


Saving model checkpoint to ./trained_model\checkpoint-500
Configuration saved in ./trained_model\checkpoint-500\config.json
Model weights saved in ./trained_model\checkpoint-500\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-500\tokenizer_config.json
Special tokens file saved in ./trained_model\checkpoint-500\special_tokens_map.json
Saving model checkpoint to ./trained_model\checkpoint-1000
Configuration saved in ./trained_model\checkpoint-1000\config.json
Model weights saved in ./trained_model\checkpoint-1000\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-1000\tokenizer_config.json
Special tokens file saved in ./trained_model\checkpoint-1000\special_tokens_map.json
Saving model checkpoint to ./trained_model\checkpoint-1500
Configuration saved in ./trained_model\checkpoint-1500\config.json
Model weights saved in ./trained_model\checkpoint-1500\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-1500\token

Model weights saved in ./trained_model\checkpoint-11500\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-11500\tokenizer_config.json
Special tokens file saved in ./trained_model\checkpoint-11500\special_tokens_map.json
Saving model checkpoint to ./trained_model\checkpoint-12000
Configuration saved in ./trained_model\checkpoint-12000\config.json
Model weights saved in ./trained_model\checkpoint-12000\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-12000\tokenizer_config.json
Special tokens file saved in ./trained_model\checkpoint-12000\special_tokens_map.json
Saving model checkpoint to ./trained_model\checkpoint-12500
Configuration saved in ./trained_model\checkpoint-12500\config.json
Model weights saved in ./trained_model\checkpoint-12500\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-12500\tokenizer_config.json
Special tokens file saved in ./trained_model\checkpoint-12500\special_tokens_map.json
Saving

Model weights saved in ./trained_model\checkpoint-22500\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-22500\tokenizer_config.json
Special tokens file saved in ./trained_model\checkpoint-22500\special_tokens_map.json
Saving model checkpoint to ./trained_model\checkpoint-23000
Configuration saved in ./trained_model\checkpoint-23000\config.json
Model weights saved in ./trained_model\checkpoint-23000\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-23000\tokenizer_config.json
Special tokens file saved in ./trained_model\checkpoint-23000\special_tokens_map.json
Saving model checkpoint to ./trained_model\checkpoint-23500
Configuration saved in ./trained_model\checkpoint-23500\config.json
Model weights saved in ./trained_model\checkpoint-23500\pytorch_model.bin
tokenizer config file saved in ./trained_model\checkpoint-23500\tokenizer_config.json
Special tokens file saved in ./trained_model\checkpoint-23500\special_tokens_map.json
Saving

KeyboardInterrupt: 