In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import torch
import random
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from transformers.adapters.composition import Stack

In [3]:
from utils import io
from utils import plot
from utils import metric
from model import train_evaluate

from model import xlmr_xnli_model
from model import xlmr_xnli_dataset

from transformers import AutoTokenizer
from transformers import XLMRobertaTokenizer, XLMRobertaModel

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
seed = 144
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True

## Load data

In [6]:
train_file = "data/train/extended_train_en_hi_sw_zh_es.csv"
data = io.load_xnli_dataset_csv(train_file)

In [7]:
languages = ['zh', 'es', 'hi', 'sw']

lang_code_map = {x:i for i, x in enumerate(data.language.cat.categories)}
lang_codes = {lang_code_map[lang]: lang for lang in languages}

In [8]:
dataset_info = {
    'language': data.language.cat.categories.values,
    'gold_labels': data.gold_label.cat.categories.values
}

In [9]:
train_data, valid_data, test_data = io.split_dataset(data, lang_codes=lang_codes)

## Dataloader

In [10]:
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")

In [11]:
train_dataset = xlmr_xnli_dataset.XLMRXNLIDataset(train_data, tokenizer, torch.device('cuda'))
valid_dataset = xlmr_xnli_dataset.XLMRXNLIDataset(valid_data, tokenizer, torch.device('cuda'))

## Model

In [12]:
from transformers import AutoConfig, AutoAdapterModel

config = AutoConfig.from_pretrained(
    "xlm-roberta-base",
)
model = AutoAdapterModel.from_pretrained(
    "xlm-roberta-base",
    config=config
)

model = model.to(device)

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaAdapterModel: ['lm_head.dense.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaAdapterModel were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for prediction

In [13]:
from transformers import AdapterConfig

lang_adapter_config = AdapterConfig.load("pfeiffer", reduction_factor=2)
model.load_adapter("en/wiki@ukp", config=lang_adapter_config)
model.load_adapter("zh/wiki@ukp", config=lang_adapter_config)
model.load_adapter("hi/wiki@ukp", config=lang_adapter_config)
model.load_adapter("es/wiki@ukp", config=lang_adapter_config)
model.load_adapter("sw/wiki@ukp", config=lang_adapter_config)

model.add_adapter("xnli")

model.add_multiple_choice_head("xnli", num_choices=2)

In [14]:
model.train_adapter(["xnli"])

In [15]:
model.active_adapters = Stack("en", "xnli")

In [16]:
from transformers import TrainingArguments, AdapterTrainer
from datasets import concatenate_datasets

training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=8,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=100,
    output_dir="./training_output",
    overwrite_output_dir=True,
    remove_unused_columns=False,
)

In [17]:
trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=tokenizer.pad,
)

In [17]:
trainer.train()

***** Running training *****
  Num examples = 396485
  Num Epochs = 8
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 99128
Exception in thread Thread-5:
Traceback (most recent call last):
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/site-packages/tensorboard/summary/writer/event_file_writer.py", line 233, in run
    self._record_writer.write(data)
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/site-packages/tensorboard/summary/writer/record_writer.py", line 40, in write
    self._writer.write(header + header_crc + data + footer_crc)
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/io/gfile.py",

Traceback (most recent call last):
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_19127/4032920361.py", line 1, in <module>
    trainer.train()
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/site-packages/transformers/trainer.py", line 1502, in train
    ignore_keys_for_eval=ignore_keys_for_eval,
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/site-packages/transformers/trainer.py", line 1669, in _inner_training_loop
    self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/site-packages/transformers/trainer_callback.py", line 353, in on_train_begin
    return self.call_event("on_train_begin", args, state, control)
  File "/home/scai/phd/aiz218323/scratch

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_19127/4032920361.py", line 1, in <module>
    trainer.train()
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/site-packages/transformers/trainer.py", line 1502, in train
    ignore_keys_for_eval=ignore_keys_for_eval,
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/site-packages/transformers/trainer.py", line 1669, in _inner_training_loop
    self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
  File "/home/scai/phd/aiz218323/scratch/anaconda3/envs/xnli/lib/python3.7/site-packages/transformers/trainer_callback.py", line 353, in on_train_begin
    return self.call_event("on_train_begin", args, state, control)
  File "/home/scai/phd/aiz218323/scratch

TypeError: object of type 'NoneType' has no len()