In [1]:
import transformers
from transformers import RobertaTokenizer, RobertaForQuestionAnswering
tokenizer = RobertaTokenizer.from_pretrained('ngnigel99/RoBERTa_QA-FineTuned_2')
model = RobertaForQuestionAnswering.from_pretrained('ngnigel99/RoBERTa_QA-FineTuned_2')

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.22k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/681 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/496M [00:00<?, ?B/s]

In [2]:
# reading from dev-v1.1.json
import json
data = json.load(open("./dev-v1.1.json"))

In [4]:
import time
from tqdm import tqdm
import torch

def parse_data(data):
    # parses data to only retrieve id, context, question
    questions = []
    contexts = []
    ids = []
    for data_item in data["data"]:
        for paragraph in data_item["paragraphs"]:
            for qa in paragraph["qas"]:
                questions.append(qa["question"])
                contexts.append(paragraph["context"])
                ids.append(qa["id"])
    
    return zip(ids, questions, contexts)

def answer(model, data, output_file):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    start_time = time.time()
    res = {}
    for id_, question, context in tqdm(parse_data(data)):
        inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt",
                                       max_length=512, truncation='only_second', padding='max_length',
                                       return_overflowing_tokens=True)     
           
        # Remove overflowing_tokens from inputs
        if 'overflowing_tokens' in inputs:
            del inputs['overflowing_tokens']
            
        if 'num_truncated_tokens' in inputs:
            del inputs['num_truncated_tokens']
            
        inputs = {k: v.to(device) for k, v in inputs.items()}

        input_ids = inputs["input_ids"].tolist()[0]

        # Move input_ids back to CPU for use with tokenizer
        input_ids_cpu = inputs["input_ids"].to('cpu').tolist()[0]
        text_tokens = tokenizer.convert_ids_to_tokens(input_ids_cpu)
        outputs = model(**inputs)

        answer_start_scores = outputs.start_logits
        answer_end_scores = outputs.end_logits
        
        answer_start = torch.argmax(answer_start_scores)
        answer_end = torch.argmax(answer_end_scores) + 1

        # Convert input_ids back to GPU for slicing
        input_ids = inputs["input_ids"][0] 
        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end].to('cpu')))
        
        res[id_] = answer
    
    print("Time taken: ", time.time() - start_time, "seconds")
    
    with open(output_file, 'w') as f:
        json.dump(res, f)

    print("Done writing to file")
            
answer(model, data, "predictions_2.json")

0it [00:00, ?it/s]

10570it [01:13, 143.34it/s]


Time taken:  73.74936938285828 seconds
Done writing to file


In [5]:
!python evaluate-v2.0.py dev-v1.1.json predictions_2.json

{
  "exact": 72.57332071901608,
  "f1": 80.79021807607467,
  "total": 10570,
  "HasAns_exact": 72.57332071901608,
  "HasAns_f1": 80.79021807607467,
  "HasAns_total": 10570
}
