In [1]:
import transformers
from transformers import RobertaTokenizer, RobertaForQuestionAnswering
tokenizer = RobertaTokenizer.from_pretrained('ngnigel99/RoBERTa_QA-FineTuned')
model = RobertaForQuestionAnswering.from_pretrained('ngnigel99/RoBERTa_QA-FineTuned')

In [2]:
# reading from dev-v1.1.json
import json
data = json.load(open("./dev-v1.1.json"))

In [14]:
import time
from tqdm import tqdm
import torch

def parse_data(data):
    # parses data to only retrieve id, context, question
    questions = []
    contexts = []
    ids = []
    for data_item in data["data"]:
        for paragraph in data_item["paragraphs"]:
            for qa in paragraph["qas"]:
                questions.append(qa["question"])
                contexts.append(paragraph["context"])
                ids.append(qa["id"])
    
    return zip(ids, questions, contexts)

def answer(model, data, output_file):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    start_time = time.time()
    res = {}
    for id_, question, context in tqdm(parse_data(data)):
        inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt",
                                       max_length=512, truncation='only_second', padding='max_length',
                                       return_overflowing_tokens=True)     
           
        # Remove overflowing_tokens from inputs
        if 'overflowing_tokens' in inputs:
            del inputs['overflowing_tokens']
            
        if 'num_truncated_tokens' in inputs:
            del inputs['num_truncated_tokens']
            
        inputs = {k: v.to(device) for k, v in inputs.items()}

        input_ids = inputs["input_ids"].tolist()[0]

        # Move input_ids back to CPU for use with tokenizer
        input_ids_cpu = inputs["input_ids"].to('cpu').tolist()[0]
        text_tokens = tokenizer.convert_ids_to_tokens(input_ids_cpu)
        outputs = model(**inputs)

        answer_start_scores = outputs.start_logits
        answer_end_scores = outputs.end_logits
        
        answer_start = torch.argmax(answer_start_scores)
        answer_end = torch.argmax(answer_end_scores) + 1

        # Convert input_ids back to GPU for slicing
        input_ids = inputs["input_ids"][0] 
        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end].to('cpu')))
        
        res[id_] = answer
    
    print("Time taken: ", time.time() - start_time, "seconds")
    
    with open(output_file, 'w') as f:
        json.dump(res, f)

    print("Done writing to file")
            
answer(model, data, "predictions.json")

0it [00:00, ?it/s]

10570it [01:20, 131.37it/s]

Time taken:  80.46471047401428 seconds
Done writing to file





In [2]:
!python evaluate-v2.0.py dev-v1.1.json predictions.json

{
  "exact": 69.11069063386944,
  "f1": 77.08073476189746,
  "total": 10570,
  "HasAns_exact": 69.11069063386944,
  "HasAns_f1": 77.08073476189746,
  "HasAns_total": 10570
}
