In [1]:
import pandas as pd
from transformers import AutoAdapterModel, AdapterConfig, RobertaTokenizer, BertTokenizer, AutoTokenizer
import torch

In [2]:
train_data = pd.read_csv('train_all.csv')
test_data = pd.read_csv('test_all.csv')

In [3]:

label2id = {"positive":0, "neutral":1, 'negative':2}
id2label = {0:"positive", 1:"neutral", 2:'negative'}
# tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")

def encode_batch(row):
    text = ' '.join(filter(lambda x:x[0]!='@', row.text.split()))
    out = tokenizer(text, max_length=80, truncation=True, padding="max_length", return_tensors='pt')
    out['labels'] = torch.LongTensor([label2id[row.labels]])[0]
    return out

train = train_data.apply(encode_batch, axis=1)

len(train)

Downloading config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

Downloading sentencepiece.bpe.model:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/8.68M [00:00<?, ?B/s]

9583

In [4]:
train[0]

{'input_ids': tensor([[     0,  32506, 101207,  12610, 168044, 145739,      5,   4804,    257,
          31949,     47,     10,  26267,  13379,     23,  18982,      5,     87,
           5351,  95041,   7603,  22225, 123142,   2806,  11177,     70,  54727,
          26267,    538,      5,      2,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 

In [5]:
test = test_data.apply(encode_batch, axis=1)

len(test)

3138

In [6]:
model = AutoAdapterModel.from_pretrained('xlm-roberta-base')
model.add_adapter("sst-2")
model.train_adapter("sst-2")
model.add_classification_head("sst-2", num_labels=3)
model.set_active_adapters("sst-2")

NOTE: Redirects are currently not supported in Windows or MacOs.


Downloading pytorch_model.bin:   0%|          | 0.00/1.04G [00:00<?, ?B/s]

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaAdapterModel: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaAdapterModel were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for prediction

In [7]:
import numpy as np
from transformers import TrainingArguments, AdapterTrainer, EvalPrediction

training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=6,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    output_dir="./training_output",
    overwrite_output_dir=True
#     # The next line is important to ensure the dataset labels are properly passed to the model
#     remove_unused_columns=False,
)

def compute_accuracy(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=test,
    
#     train_dataset=dataset["train"],
#     eval_dataset=dataset["validation"],
    compute_metrics=compute_accuracy,
)

In [8]:

trainer.train()

***** Running training *****
  Num examples = 9583
  Num Epochs = 6
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1800


Step,Training Loss
200,0.9315
400,0.6579
600,0.6097
800,0.5847
1000,0.5404
1200,0.5364
1400,0.5216
1600,0.4952
1800,0.5033


Saving model checkpoint to ./training_output/checkpoint-500
Configuration saved in ./training_output/checkpoint-500/sst-2/adapter_config.json
Module weights saved in ./training_output/checkpoint-500/sst-2/pytorch_adapter.bin
Configuration saved in ./training_output/checkpoint-500/sst-2/head_config.json
Module weights saved in ./training_output/checkpoint-500/sst-2/pytorch_model_head.bin
Configuration saved in ./training_output/checkpoint-500/sst-2/head_config.json
Module weights saved in ./training_output/checkpoint-500/sst-2/pytorch_model_head.bin
Saving model checkpoint to ./training_output/checkpoint-1000
Configuration saved in ./training_output/checkpoint-1000/sst-2/adapter_config.json
Module weights saved in ./training_output/checkpoint-1000/sst-2/pytorch_adapter.bin
Configuration saved in ./training_output/checkpoint-1000/sst-2/head_config.json
Module weights saved in ./training_output/checkpoint-1000/sst-2/pytorch_model_head.bin
Configuration saved in ./training_output/checkpoin

TrainOutput(global_step=1800, training_loss=0.597850341796875, metrics={'train_runtime': 22971.314, 'train_samples_per_second': 2.503, 'train_steps_per_second': 0.078, 'total_flos': 2404815275741760.0, 'train_loss': 0.597850341796875, 'epoch': 6.0})

In [9]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 3138
  Batch size = 32


{'eval_loss': 0.5285979509353638,
 'eval_acc': 0.7794773741236456,
 'eval_runtime': 574.5506,
 'eval_samples_per_second': 5.462,
 'eval_steps_per_second': 0.172,
 'epoch': 6.0}