In [308]:
from lxml import etree as et
from sklearn.metrics import classification_report,f1_score,average_precision_score, label_ranking_average_precision_score
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
import pandas as pd
import json
import csv

from utils import *

def parse_XML(xml_file): 
    df_cols = ['id','human_concepts','documents','full_abstracts','titles','type']
    xtree = et.parse(xml_file)
    xroot = xtree.getroot()
    rows = []
    for question in xroot: 
        id = question.attrib.get("id")
        ir = question.find("IR")
        qp = question.find("QP")
        concepts = [e.text for e in qp.findall("Entities")]
        qa_type = qp.find("Type").text
        titles =  [e.find("Title").text for e in ir.findall("Result")]
        abstracts =  [e.find("Abstract").text for e in ir.findall("Result")]
        pmids = [e.get("PMID") for e in ir.findall("Result")]
        rows.append({"id":id,"human_concepts":concepts,"documents":pmids,"full_abstracts":abstracts,"titles":titles,"type":qa_type})
    out_df = pd.DataFrame(rows, columns=df_cols)
        
    return out_df

In [285]:
def add_answers(df, answers):
    dfs = []
    for answer in answers:
        with open(answer,'r') as f:
            d = json.loads(f.read())
            qa_df = pd.json_normalize(d,record_path="questions")
            just_answers_df = qa_df.loc[:,['id','exact_answer']].copy()
            dfs.append(just_answers_df)
    main_df = pd.concat(dfs)
    main_df['id'] = main_df['id'].apply(trim_big)
    joined_df = df.merge(main_df,on ='id',how='outer')
    return joined_df

def trim_big(id):
    if len(id) == 24:
        id = id[0:20]
    return id

def get_pmid(docs):
    documents = [
        document.split("/")[-1] for document in docs
    ]
    return documents

In [None]:
#Takes in two numpy arrays
def get_f1_score(gold, gen):
    print("")

In [286]:
golden_dataset_path = "testing_datasets/augmented_concepts_abstracts_titles.json"
generated_qu = "tmp/ir/output/bioasq_qa.xml"
with open(golden_dataset_path,'r') as f:
    gold_data = json.loads(f.read())
# flatten data
gold_df = pd.json_normalize(gold_data,record_path="questions")
gold_df['id'] = gold_df['id'].apply(trim_big)
gold_df['documents'] = gold_df['documents'].apply(get_pmid)


In [287]:
gen_df = parse_XML(generated_qu)
gen_df['id'] = gen_df['id'].apply(trim_big)
answers = ["tmp/qa/yesno/BioASQform_BioASQ-answer.json","tmp/qa/factoid/BioASQform_BioASQ-answer.json","tmp/qa/list/BioASQform_BioASQ-answer.json"]
gen_df = add_answers(gen_df,answers)

Average QU concepts f1, precision, recall score:
(0.1727500898601726, 0.16211627033544848, 0.2278786552576016)

Number of question's with type correctly predicted
2983/3243

Average IR PMID f1, precision, recall score:
(0.024398537596114104, 0.045651429549734605, 0.022827095765177285)

Average F1,precision,recall scores for Yes Questions
(0, 0, 0)

Average F1,precision,recall scores for No Questions
(0.15430861723446893, 0.15430861723446893, 0.15430861723446893)

Average F1,precision,recall scores for ALL Yes/No Questions
(0.07715430861723446, 0.07715430861723446, 0.07715430861723446)

Strict Accuracy for factoid questions:
0.0

Lenient Accuracy for factoid questions:
0.0

Average MRR for factoid questions:
0.0

Average F1,precision,recall scores for ALL List Questions
(0.0, 0.0, 0.0)

In [338]:
gen_df_trim = gen_df.drop_duplicates(subset="id")
gold_df_trim = gold_df.drop_duplicates(subset='id')

yes_no_gold_df = gold_df[gold_df['type'] == 'yesno']
yes_gold_df = yes_no_gold_df[yes_no_gold_df['exact_answer'] == 'yes']
no_gold_df = yes_no_gold_df[yes_no_gold_df['exact_answer'] == 'no']
print(len(yes_gold_df))
print(len(no_gold_df))

print (yes_gold_df.id.unique())
# get the matching generated yes and no based on ids of gold
should_be_yes_gen_df = gen_df[gen_df.id.isin(yes_gold_df.id.unique())]
should_be_no_gen_df = gen_df[gen_df.id.isin(no_gold_df.id.unique())]

# print (len(should_be_yes_gen_df))
should_be_yes_gen_df = should_be_yes_gen_df.drop_duplicates(subset='id')
should_be_no_gen_df = should_be_no_gen_df.drop_duplicates(subset='id')

print (len(should_be_yes_gen_df))
print (len(should_be_no_gen_df))


factoid_gold_df = gold_df[gold_df['type'] == 'factoid']
list_gold_df = gold_df[gold_df['type'] == 'list']


# Get classification reports
type_report = classification_report(gold_df_trim['type'].to_numpy(),gen_df_trim['type'].to_numpy(),output_dict=True)

# print(type_report)


704
177
<class 'numpy.ndarray'>
696
176


In [322]:
'i' in yes_gold_df.id.unique()

False

In [None]:
gen_df_trim = gen_df.drop_duplicates(subset="id")
gold_df_trim = gold_df.drop_duplicates(subset='id')

yesn_no_gold_df = gold_df[gold_df['type'] == 'yesno']
factoid_gold_df = gold_df[gold_df['type'] == 'factoid']
list_gold_df = gold_df[gold_df['type'] == 'list']
