In [1]:
from datasets import load_dataset
from transformers import TrainingArguments, Trainer
from sklearn.metrics import matthews_corrcoef, accuracy_score, f1_score, balanced_accuracy_score
from transformers import RobertaForSequenceClassification, RobertaModel, RobertaConfig, RobertaTokenizer

In [2]:
mnli = load_dataset("multi_nli")

Found cached dataset multi_nli (/home/imger/.cache/huggingface/datasets/multi_nli/default/0.0.0/591f72eb6263d1ab527561777936b199b714cda156d35716881158a2bd144f39)


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
roberta = RobertaModel.from_pretrained("roberta-base")

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
config = RobertaConfig.from_json_file("../models/sequence_classification.json")
model = RobertaForSequenceClassification(config)

In [5]:
state_dict = roberta.state_dict()
del state_dict["pooler.dense.weight"]
del state_dict["pooler.dense.bias"]
model.roberta.load_state_dict(state_dict)

<All keys matched successfully>

In [6]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

In [7]:
mnli_train = mnli["train"]
mnli_train = mnli_train.map(lambda d: {"x": [f"{p}</s></s>{h}" for p, h in zip(d["premise"], d["hypothesis"])]}, batched=True)
mnli_train = mnli_train.map(lambda d: tokenizer(d["x"], padding="max_length", truncation=True), batched=True)
mnli_train.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

Loading cached processed dataset at /home/imger/.cache/huggingface/datasets/multi_nli/default/0.0.0/591f72eb6263d1ab527561777936b199b714cda156d35716881158a2bd144f39/cache-7265d058a7b86bbf.arrow
Loading cached processed dataset at /home/imger/.cache/huggingface/datasets/multi_nli/default/0.0.0/591f72eb6263d1ab527561777936b199b714cda156d35716881158a2bd144f39/cache-6a90ef943f1f7ccc.arrow


In [8]:
mnli_val = mnli["validation_matched"]
mnli_val = mnli_val.map(lambda d: {"x": [f"{p}</s></s>{h}" for p, h in zip(d["premise"], d["hypothesis"])]}, batched=True) \
    .map(lambda d: tokenizer(d["x"], padding="max_length", truncation=True), batched=True)
mnli_val.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

Loading cached processed dataset at /home/imger/.cache/huggingface/datasets/multi_nli/default/0.0.0/591f72eb6263d1ab527561777936b199b714cda156d35716881158a2bd144f39/cache-974b93da97f091a2.arrow
Loading cached processed dataset at /home/imger/.cache/huggingface/datasets/multi_nli/default/0.0.0/591f72eb6263d1ab527561777936b199b714cda156d35716881158a2bd144f39/cache-1b429a8927c1e5bf.arrow


In [9]:
def compute_metrics(pred):
    """
    Shows a few helpful metrics and saves them in specified directory
    :param pred: list
    """

    true = pred.label_ids
    predicted = pred.predictions.argmax(-1)

    return {
        "MCC": matthews_corrcoef(true, predicted),
        "F1": f1_score(true, predicted, average='macro'),
        "Acc": accuracy_score(true, predicted),
        "BAcc": balanced_accuracy_score(true, predicted),
    }

In [10]:
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # total number of training epochs
    per_device_train_batch_size=2,  # batch size per device during training
    per_device_eval_batch_size=8,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    load_best_model_at_end=True,     # load the best model when finished training (default metric is loss)
    # but you can specify `metric_for_best_model` argument to change to accuracy or other metric
    logging_steps=1000,               # log & save weights each logging_steps
    save_steps=1000,
    evaluation_strategy="steps",     # evaluate each `logging_steps`
)

trainer = Trainer(
    model=model,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=mnli_train,         # training dataset
    eval_dataset=mnli_val,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)

In [11]:
trainer.train()

Step,Training Loss,Validation Loss,Mcc,F1,Acc,Bacc
1000,1.124,1.102177,0.0,0.164415,0.327356,0.333333
2000,1.1224,1.105609,0.0,0.160921,0.318186,0.333333
3000,1.1182,1.101572,0.0,0.174465,0.354457,0.333333
4000,1.1211,1.100234,0.0,0.174465,0.354457,0.333333
5000,1.1163,1.107134,0.0,0.164415,0.327356,0.333333
6000,1.115,1.101544,0.0,0.164415,0.327356,0.333333
7000,1.1163,1.102512,0.0,0.174465,0.354457,0.333333
8000,1.1103,1.105491,0.0,0.164415,0.327356,0.333333
9000,1.1122,1.105091,0.0,0.164415,0.327356,0.333333


KeyboardInterrupt: 