In [1]:
import json

# Update Task Dict
def update_task_dict_from_file(file_name, task_dict):
    with open(file_name, 'r') as f:
        for line in f:
            record = json.loads(line)
            example = {}
            example['context'] = record['context']
            example['category'] = record['category']
            example['question'] = record['question']
            if len(record['answer']['text']) == 1:
                example['answer'] = record['answer']['text'][0]
            else:
                example['answer'] = 'I do not know.'
                
            task_dict['examples'].append(example)
    return task_dict

In [3]:
import json

task_dict_positive={}
task_dict_positive['examples']=[]

task_dict_negative={}
task_dict_negative['examples']=[]

# Generate examples
task_dict_positive = update_task_dict_from_file('C:/Users/ADMIN/Desktop/DATN/Extract_Information/data/aveqa_mave/train/mave_positives.jsonl', task_dict_positive)
task_dict_positive = update_task_dict_from_file('C:/Users/ADMIN/Desktop/DATN/Extract_Information/data/aveqa_mave/eval/mave_positives.jsonl', task_dict_positive)

In [4]:
with open('task_aveqa_positive.json', 'w', encoding='utf-8') as f:
    json.dump(task_dict_positive, f, indent=4)

In [None]:
from transformers import pipeline

# Replace this with your own checkpoint
model_checkpoint = "C:/Users/ADMIN/Desktop/DATN/Extract_Information/baseline/bert_fine_turn/checkpoint"
question_answerer = pipeline("question-answering", model=model_checkpoint)

# context = """
# Deolux 32GB for Nikon D3200 DSLR Camera 32GB SD SDHC Class 10 High Speed SD Secure Digital MEMORY CARD for Camera Camcorder Video and HD Video
# """
# question = "What is the SD Format of the Flash Memory Card?"
# question_answerer(question=question, context=context)

In [6]:
from tqdm import tqdm

result_dict ={}
result_dict['examples'] =[]

for example in tqdm(task_dict_positive['examples']):
    pred = question_answerer(question=example['question'], context=example['context'])
    example['pred'] = pred['answer']
    example['score'] = pred['score']
    result_dict['examples'].append(example)
    
with open('result_aveqa_positive.json', 'w', encoding='utf-8') as f:
    json.dump(result_dict, f, indent=4)
    

100%|██████████| 34418/34418 [1:31:37<00:00,  6.26it/s]  


In [17]:
min_score_positive = 1
for example in result_dict['examples']:
    min_score_positive = min(min_score_positive, example['score'])

print(min_score_positive)

1.1937552135829462e-17


In [13]:
vc = 0 # Correct values when the ground truth has attribute Values (TP)
vw = 0 # Wrong values when the ground truth has attribute Values (FN-FP)

for example in result_dict['examples']:
    if example['answer'] == example['pred']:
        vc += 1
    else:
        vw += 1

In [8]:
task_dict_negative = update_task_dict_from_file('C:/Users/ADMIN/Desktop/DATN/Extract_Information/data/aveqa_mave/train/mave_negatives.jsonl', task_dict_negative)
task_dict_negative = update_task_dict_from_file('C:/Users/ADMIN/Desktop/DATN/Extract_Information/data/aveqa_mave/eval/mave_negatives.jsonl', task_dict_negative)

In [10]:
with open('task_aveqa_negative.json', 'w', encoding='utf-8') as f:
    json.dump(task_dict_negative, f, indent=4)

In [16]:
from tqdm import tqdm

result_dict_negative ={}
result_dict_negative['examples'] =[]

for example in tqdm(task_dict_negative['examples']):
    pred = question_answerer(question=example['question'], context=example['context'])
    example['pred'] = pred['answer']
    example['score'] = pred['score']
    result_dict_negative['examples'].append(example)
    
with open('result_aveqa_negative.json', 'w', encoding='utf-8') as f:
    json.dump(result_dict_negative, f, indent=4)

100%|██████████| 35075/35075 [1:19:08<00:00,  7.39it/s]


In [19]:
max_score_negative = 0
for example in result_dict_negative['examples']:
    max_score_negative = max(max_score_negative, example['score'])

print(max_score_negative)

1.0


In [33]:
nn = 0 # the model can predict No value (I do not know.) when ground truth is No attribute value (TN)
nv = 0 # some incorrect Value when ground truth is No attribute value (FP)

for example in result_dict_negative['examples']:
    if example['score'] < 0.1:
        nn += 1
    else:
        nv += 1

In [34]:
print(nn)
print(nv)

35068
7


In [36]:
precision = round((vc / (nv + vc + vw))*100, 2) if (nv + vc + vw) > 0 else 0 
recall = round((vc / (vc + vw))*100, 2) if (vc + vw) > 0 else 0
f1 = round(2* precision* recall/ (precision + recall), 2) if (precision + recall) > 0 else 0

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1: {f1}")

Precision: 99.16
Recall: 99.18
F1: 99.17
