In [2]:
import torch
import torch.nn.functional as F

In [5]:
device = "cuda" if torch.cuda.is_available() else 'cpu'

In [1]:
import torch.nn as nn
from transformers import AutoModelForTokenClassification


class QASpanDetector(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = AutoModelForTokenClassification.from_pretrained('roberta-base', num_labels=2)
        # self.model = AutoModelForTokenClassification.from_pretrained('YituTech/conv-bert-base', num_labels=2)

    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids,
                             attention_mask=attention_mask)
        return outputs

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# model = torch.load('../input/robertayolo/yolo_qa_v6.pth', map_location=torch.device('cpu'))
# model = torch.load('../input/robertayolo/yolo_qa_v6.pth')
# model = torch.load('../input/robertayolo/roberta_yolo_2_epochs.pth')
# model = torch.load('../input/xlmyoloqamodel/yolo_qa.pth')
# model = torch.load('../input/conv-bert-yolo-model/yolo_qa.pth')
# model = QASpanDetector()
model = torch.load('models/yolo_qa_v6.pth', map_location=torch.device(device))

In [7]:
from transformers.data.metrics.squad_metrics import compute_f1

In [8]:
import json

class SquadDataset(torch.utils.data.Dataset):
    def __init__(self, file_path, tokenizer):
        with open(file_path, 'r') as f:
            self.data = json.load(f)

        self.tokenizer = tokenizer
        self.contexts = []
        self.questions = []
        self.answers = []
        for group in self.data['data']:
            for passage in group['paragraphs']:
                context = passage['context']
                for qa in passage['qas']:
                    question = qa['question']
                    if len(qa['answers']) == 0:
                        continue
                    self.questions.append(question)
                    self.contexts.append(context)
                    answers = [answer['text'] for answer in qa['answers']]
                    self.answers.append(answers)

    def __getitem__(self, index):
        context = self.contexts[index]
        question = self.questions[index]
        answer = self.answers[index]

        encodings = self.tokenizer(context, question, truncation=True, padding=True)
        encodings = {key: torch.tensor(val) for key, val in encodings.items()}
        encodings['answers'] = answer
        encodings['question'] = question
        encodings['context'] = context

        return encodings

    def __len__(self):
        return len(self.questions)

In [9]:
class CustomCollator:
    def __init__(self, tokenizer):
        self.pad_token_id = tokenizer.pad_token_id

    def __call__(self, samples):
        batch_size = len(samples)
        assert batch_size == 1, f'Only batch_size=1 supported, got batch_size={batch_size}.'

        sample = samples[0]

        max_seq_length = tokenizer.model_max_length
        padded_length = max_seq_length

        input_shape = (1, padded_length)
        input_ids = torch.full(input_shape,
                               self.pad_token_id,
                               dtype=torch.long)
        attention_mask = torch.zeros(input_shape, dtype=torch.long)

        seq_length = len(sample['input_ids'])
        input_ids[0, :seq_length] = sample['input_ids']
        attention_mask[0, :seq_length] = sample['attention_mask']

        return dict(input_ids=input_ids,
                    attention_mask=attention_mask,
                    answers=sample['answers'],
                    question=sample['question'],
                    context=sample['context'])

In [10]:
from transformers import AutoTokenizer
# tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
# tokenizer = AutoTokenizer.from_pretrained("YituTech/conv-bert-base")

Downloading: 100%|██████████| 481/481 [00:00<?, ?B/s] 
Downloading: 100%|██████████| 878k/878k [01:22<00:00, 10.9kB/s] 
Downloading: 100%|██████████| 446k/446k [00:10<00:00, 43.2kB/s] 
Downloading: 100%|██████████| 1.29M/1.29M [00:36<00:00, 36.9kB/s]


In [11]:
dev_data = SquadDataset('data/dev-v2.0.json', tokenizer)
# dev_data = SquadDataset('../input/squad-20/train-v2.0.json', tokenizer)

In [12]:
from torch.utils.data import DataLoader

collate = CustomCollator(tokenizer)
dev_loader = DataLoader(dev_data,
                      batch_size=1,
                      shuffle=False,
                      collate_fn=collate)

In [13]:
next(iter(dev_loader))

{'input_ids': tensor([[    0,   133, 20336,  1253,    36,   487, 16803,    35,   234,  2126,
            119,  8771,   131,  1515,    35, 20336,  8771,   131,  5862,    35,
          20336, 28867,    43,    58,     5,    82,    54,    11,     5,   158,
            212,     8,   365,   212, 11505,   851,    49,   766,     7, 37741,
              6,    10,   976,    11,  1470,     4,   252,    58, 22306,    31,
          41498,  6697,   487, 16803,   113,   606,    31,    22,   487, 27209,
            397,  8070, 10369,   268,     8, 34941,    31, 10060,     6, 14605,
              8,  8683,    54,     6,   223,    49,   884, 13065,   139,     6,
           1507,     7, 24909, 10668, 12107,     7,  1745,  3163,  6395,     9,
            580, 17932,   493,     4,  6278,  6808,     9,  8446, 43616,     8,
          17793,    19,     5,  3763,  3848,  1173,     8,  7733,    12,   534,
           6695,  1173,  9883,     6,    49, 29285,    74,  9097, 19388,    19,
              5,  9347,   1

In [14]:
tokenizer.model_max_length

512

In [15]:
import csv

device = "cuda" if torch.cuda.is_available() else 'cpu'

model.eval()
f1 = 0
num_examples = 0

# open the file in the write mode
f = open('results.csv', 'w')
writer = csv.writer(f)
writer.writerow(['Question', 'Context', 'Ground Truth', 'Prediction', 'F1'])

print("############Evaluate############")
with torch.no_grad():
    for batch_idx, batch in enumerate(dev_loader):
        sentence_length = batch['input_ids'].size(1)
        batch_size = batch['input_ids'].size(0)

        outputs = model(batch['input_ids'].to(device), batch['attention_mask'].to(device))
        obj_pred = outputs['logits'][0, :, 0]
        answer_start_pred = int(torch.argmax(obj_pred))
        answer_length_pred = int(outputs['logits'][0, :, 1][answer_start_pred].exp().round())
        answer_pred = tokenizer.decode(batch['input_ids'][0, answer_start_pred:answer_start_pred+answer_length_pred],
                                      skip_special_tokens=True)
        
        answers = batch['answers']
        max_f1 = 0
        best_fit_target = ''
        for answer_target in answers:
            f1_score = compute_f1(answer_target, answer_pred)
            if f1_score >= max_f1:
                max_f1 = f1_score
                best_fit_target = answer_target
        writer.writerow([batch['question'], batch['context'], best_fit_target, answer_pred, max_f1])

        f1 += max_f1
        num_examples += 1

print('Average F1:', f1 / num_examples)
f.close()


############Evaluate############
Average F1: 1.0
