In [27]:
import json
import glob
import collections
from itertools import chain
from typing import Any, Dict, Iterator, List, Tuple, Union
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

In [28]:
def read_json_file(file_path):
    with open(file_path) as f:
        data = json.load(f)
    return data

In [29]:
def data_preprocessing(dataset):
    contexts, questions, answers = [], [], []
    for group in dataset:
        for passage in group['paragraphs']:
            context = passage['context']
            for qa in passage['qas']:
                question = qa['question']
                for answer in qa['answers']:
                    contexts.append(context)
                    questions.append(question)
                    answers.append(answer)

    return contexts, questions, answers

In [30]:
model = AutoModelForQuestionAnswering.from_pretrained("saiful9379/Bangla_Roberta_Question_and_Answer")
tokenizer = AutoTokenizer.from_pretrained("saiful9379/Bangla_Roberta_Question_and_Answer", use_fast=True)

In [None]:
bn_val_data_path = "./bn_dataset/bn/val"
bn_val_files = glob.glob(bn_val_data_path+"/*.json")
bn_val_list = [i["data"] for i in bn_val_data_list]
bn_val_data_list= list(map(read_json_file, bn_val_files))
bn_val_data = list(chain(* bn_val_list))

In [19]:
test_contexts, test_questions, test_answers = data_preprocessing(bn_val_data)

In [20]:
squad_test = {'answers': test_answers,'context': test_contexts, 'question': test_questions}

In [24]:
ground_truth_values, prediction_values = [], []
for answer, context, question in zip(squad_test["answers"], squad_test["context"], squad_test["question"]):
    QA = pipeline('question-answering', model=model, tokenizer=tokenizer)
    QA_input = {'question': question,'context':context}
    prediction = QA(QA_input)
    gt = answer["text"]
    pt = prediction["answer"]
    ground_truth_values.append(gt)
    prediction_values.append(pt)

In [31]:
def compute_f1_score(ground_truth_values: List[str],
                     prediction_values: List[str]) -> float:
    '''Compute f1 score comparing two list of values.'''
    common = (
        collections.Counter(prediction_values) &
        collections.Counter(ground_truth_values))
    num_same = sum(common.values())

    # No answer case.
    if not ground_truth_values or not prediction_values:
        return int(ground_truth_values == prediction_values)

    if num_same == 0:
        return 0.

    precision = 1.0 * num_same / len(prediction_values)
    recall = 1.0 * num_same / len(ground_truth_values)
    f1_score = (2 * precision * recall) / (precision + recall)
    return f1_score

In [26]:
f1_score = compute_f1_score(ground_truth_values, prediction_values)
print("f1_score : ", f1_score)

f1_score :  0.7072072072072072
