In [1]:
import json
import pandas as pd
import numpy as np

In [2]:
from dataLoader import DataLoader
from ner import evaluator, pubtator

input_path = 'input/BioASQ-trainingDataset6b.json'
data = DataLoader(input_path)

In [3]:
# Get the candidate entities

pubtator_results = pd.read_pickle('pubtator_results')
cnn_gram_results = pd.read_pickle('cnn_gram_results')
cnn_gram_results = evaluator.clean_entities(cnn_gram_results, data)
lingpipe_results = pd.read_pickle('lingpipe_results')

results = [pubtator_results, lingpipe_results, cnn_gram_results]
typ = 'union'
ensemble = evaluator.ensemble_tags(results, typ=typ)
ensemble_union = evaluator.clean_entities(ensemble, data)

typ = 'intersection'
ensemble = evaluator.ensemble_tags(results, typ=typ)
ensemble_intersection = evaluator.clean_entities(ensemble, data)

In [66]:
def entity_score_by_type(sentence_scores, entity):
    acc_bm25 = 0
    acc_indri = 0
    for sentence_score in sentence_scores:
        if entity in sentence_score['sentence']:
            acc_bm25 += sentence_score['bm25']
            acc_indri += sentence_score['indri']

    return {'BM25': acc_bm25, 'INDRI': acc_indri}

def entity_score(sentence_scores, entity):
    score = entity_score_by_type(sentence_scores, entity)
#     return (score['INDRI'], score['BM25'])
    return (score['BM25'], score['INDRI'])

def top_entities(entities, score_list, n):
    entity_scores = [(entity_score(score_list, entity), entity) for entity in entities]
    return [entity for score, entity in sorted(entity_scores, reverse=True)[:n]]

In [67]:
run exact_answer_evaluator.py

In [68]:
import FactoidScoreEvaluator
import time

dfs = {
    'pubtator': pubtator_results,
    'lingpipe': lingpipe_results,
    'Gram CNN': cnn_gram_results,
    'Ensemble Union': ensemble_union,
    'Ensemble Intersection': ensemble_intersection
}

all_results = []

for name, df in dfs.items():
    data = DataLoader(input_path)
    start = time.time()

    for _, row in df.iterrows():
        qid = row.qid
        if not data.questions[qid].snippets:
            continue
        score_list = FactoidScoreEvaluator.getScoreList(data.questions[qid].question, data.questions[qid].snippets)
        if row.type == 'factoid':
            data.questions[qid].exact_answer = top_entities(row.entities, score_list, 5)
        elif row.type == 'list':
            pass
#             data.questions[qid].exact_answer = top_entities(row.entities, score_list, 10)
        else:
            raise 'Unexpected'

    print 'Time taken to compute for %s = %.2f seconds' % (name, time.time() - start)    
    evaluator = ExactAnswerEvaluator()
    result = evaluator.eval(data)
    result['name'] = name
    all_results.append(result)

print pd.DataFrame(all_results)

Time taken to compute for lingpipe = 18.24 seconds
   Exact Matches  MRR Exact  MRR Soft  Soft Matches      name
0       0.045827   0.026978  0.446427      0.607201  lingpipe


In [69]:
evaluator = ExactAnswerEvaluator()
evaluator.eval(data)

{'Exact Matches': 0.04582651391162029,
 'MRR Exact': 0.026977632296781233,
 'MRR Soft': 0.4464266230223676,
 'Soft Matches': 0.6072013093289689}

In [19]:
# pubtator_results[pubtator_results.type == 'list']['answers'].apply(lambda x: len(x)).value_counts()
# pubtator_results[pubtator_results.type == 'factoid'].answers

In [72]:
qid = 2232
# qid = 2238
q = data.questions[qid]
q.question, q.exact_answer, q.exact_answer_ref

(u'Which disease is treated with lucinactant?',
 ['neonates', 'oxygenation', 'rds.', 'rds', 'b.'],
 [u'respiratory distress syndrome'])