In [18]:
%reload_ext autoreload
%autoreload 2

In [19]:
import torch
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm

from transformers import AutoTokenizer, AutoModelForSequenceClassification

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from utils import io
from utils import miscellaneous as ms

In [20]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Load data

In [4]:
train_file = f'data/train/train.tsv'

In [21]:
data, vocab = io.load_xnli_dataset(train_file)

In [6]:
data.head()

Unnamed: 0,gold_label,premise,hypothesis,language
0,neutral,"At ground level, the asymmetrical cathedral is...",It's hard to find a dramatic view of the cathe...,en
1,contradiction,Hanuman is a beneficent deity predating classi...,Hanuman declared that all the lemurs here need...,en
2,contradiction,All other spending as well as federal revenue ...,None of the federal spending is assumed to grow,en
3,neutral,uh-huh that's interesting well it sounds as th...,That information about graduation rates is int...,en
4,neutral,Some kind of instant recognition on his father...,Did his father recognize him?,en


In [7]:
pd.DataFrame(data['language'].unique()).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,en,vi,de,ar,es,bg,el,th,ru,tr,sw,ur,zh,hi,fr


In [18]:
languages = ['zh', 'hi', 'sw', 'es']
lang_list = ms.filter_data(data, languages)

In [19]:
ms.display_random_text(lang_list)

contradiction
他们中的一些愚蠢的家伙可能会相信这个故事。他对腰间的男人嗤笑，并竖起轻蔑的大拇指，他们的队伍正因艏楼的其他人的到来而稳步增加。
人群开始减少，因为大家都走向甲板就餐。
zh

neutral
हाँ मेरे पास वीसीआर है और मुझे इसे एक हि विफलता के कारण कई बार वापस करना पड़ा है और यह अभी भी एक असली अच्छी तस्वीर नहीं दिखा रहा है
वीसीआर केवल एक सप्ताह तक काम करता है और फिर इसे फिर से तोड़ दिया जाता है।
hi

entailment
Lakini kazi yangu ilikuwa kuweka parashuti juu yake na vyombo vya kuokoa maisha wakati tulipakia mzigo na kuanza kuenda mahali nje ya nchi.
Nilizituma nje.
sw

contradiction
No pienses que lo acepto de buena gana.
Lo acepto de buena gana.
es



## Model

In [11]:
tokenizer = AutoTokenizer.from_pretrained("joeddav/xlm-roberta-large-xnli")
model = AutoModelForSequenceClassification.from_pretrained("joeddav/xlm-roberta-large-xnli").to(device)

Some weights of the model checkpoint at joeddav/xlm-roberta-large-xnli were not used when initializing XLMRobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [12]:
model

XLMRobertaForSequenceClassification(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0): XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1

## Dataset

In [13]:
from model import xlmr_xnli_dataset

In [14]:
dataset = xlmr_xnli_dataset.XLMRXNLIDataset(data, tokenizer, device)

## DataLoader

In [15]:
batch_size = 16

In [16]:
dataloader = DataLoader(
    dataset,
    batch_size=batch_size,
    num_workers=0,
)

In [15]:
batch = next(iter(dataloader))

## Evaluation

In [17]:
from utils import metric

In [18]:
metric_params = {
    'accuracy': metric.accuracy, 
    'macro_f1': metric.macro_f1, 
    'average_f1': metric.average_f1
}

In [21]:
def evaluate(model, dataloader, metrics):
    model.eval()
    
    valid_metrics = {}
    
    loss_history = []
    y_preds, y_trues = [], []
    for idx, batch in tqdm(enumerate(dataloader), total=len(dataloader)):
        output = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['label'])
        
        y_pred = output.logits
        y_true = batch['label']

        loss_history.append(output.loss.item())

        y_preds.extend(y_pred.data.cpu().argmax(dim=1).tolist())
        y_trues.extend(y_true.data.cpu().tolist())

    valid_metrics['loss'] = np.mean(loss_history)
    for name, metric in metrics.items():
        valid_metrics[name] = metric(y_preds, y_trues)

    return valid_metrics

In [22]:
evaluate(model, dataloader, metric_params)

  0%|          | 0/7187 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 122.00 MiB (GPU 0; 31.75 GiB total capacity; 29.95 GiB already allocated; 41.50 MiB free; 30.75 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF