In [1]:
import json
import numpy as np
import os
from pprint import pprint
import simplejson
from tqdm import tqdm

DATA_DIR = '{}/research/data'.format(os.getenv('HOME'))

In [2]:
data_hotpot = {}
for split in ['train', 'dev_distractor']:
    filename = 'hotpot_{}_v1.json'.format(split)
    with open('{}/hotpot-orig/{}'.format(DATA_DIR, filename), 'r') as f:
        data_hotpot[filename] = json.load(f)

In [26]:
use_title = False

def clean(text):
    return text.strip()

def convert_example(example, bad_answer_example=False):
    yes_q = example['answer'].lower().strip() == 'yes'
    no_q = example['answer'].lower().strip() == 'no'
    span_q = (not yes_q) and (not no_q)
    clean_answer = clean(example['answer'])
    if bad_answer_example:
        clean_answer = clean_answer.strip('"')
        clean_answer = clean_answer.replace('3OH!3', '3OH! 3')  # Hard-coded special case to match text
    num_answerable = 0
    supporting_fact_titles = set([sf[0] for sf in example['supporting_facts']])
    new_example = {"title": example['question'], 'paragraphs': []}
    found_sfs = 0
    for paragraph_index, (title, sents) in enumerate(example['context']):
        new_context_start_tokens = ['yes', 'no']
        if use_title:
            new_context_start_tokens += [clean(title), '/']
        new_context = ' '.join(new_context_start_tokens)
        
        answer_start = None
        if title in supporting_fact_titles:
            if yes_q:
                answer_start = new_context.index('yes')
            elif no_q:
                answer_start = new_context.index('no')
        
        supporting_facts = []
        for sent_index, sent in enumerate(sents):
            clean_sent = clean(sent)
            if [title, sent_index] in example['supporting_facts']:
                supporting_facts.append(clean_sent)  # Found supporting fact
                if span_q and (title in supporting_fact_titles) and (answer_start is None) and (clean_answer in clean_sent):  # Find span if possible (Only use 1st span)
                    answer_start = len(new_context + ' ') + clean_sent.index(clean_answer)
            new_context += ' ' + clean_sent

        if span_q and (title in supporting_fact_titles) and (answer_start is None) and (clean_answer in new_context):
            answer_start = new_context.index(clean_answer)
        
        new_example['paragraphs'].append({
            # HotpotQA info
            "_id": example['_id'],
            "type": example['type'],
            "level": example['level'],
            "supporting_facts": supporting_facts,
            # SQuAD info
            "context": new_context,
            "qas": [
                {
                    "question": example['question'],
                    "id": example['_id'] + '.' + str(paragraph_index),
                    "answers": [
                    {
                        "text": clean_answer,
                        "answer_start": answer_start
                    }
                    ] if answer_start is not None else [],
                    "is_impossible": answer_start is None
                }
            ]
        })
        
        # Verification
        if answer_start is not None:
            num_answerable += 1
            assert clean_answer == new_context[answer_start: answer_start + len(clean_answer)], \
                '[{}] answer {} != span {}'.format(
                example['_id'] + '.' + str(paragraph_index),
                clean_answer,
                new_context[answer_start: answer_start + len(clean_answer)])
            assert title in supporting_fact_titles, 'title {} not in supporting fact titles: {} {} for new example {}'.format(
                title, supporting_fact_titles, example['supporting_facts'], new_example['paragraphs'][-1])
        found_sfs += len(supporting_facts)
    missing_sfs = len(example['supporting_facts']) - found_sfs
    return new_example, num_answerable, missing_sfs

new_data_dir = '{}/hotpot'.format(DATA_DIR)
os.makedirs(new_data_dir, exist_ok=True)
for filename, data_hotpot_split in data_hotpot.items():
    new_data = {'data': [], 'version': filename}
    num_answerable_per_q = []
    num_bad_answer_examples = 0
    total_missing_sfs = 0
    num_answer_words = []
    num_question_words = []
    for example_index, example in enumerate(tqdm(data_hotpot_split)):
        new_example, num_answerable, missing_sfs = convert_example(example)
        if num_answerable == 0:
            num_bad_answer_examples += 1
            new_example, num_answerable, missing_sfs = convert_example(example, bad_answer_example=True)
            if num_answerable == 0:
                pprint(example)
        new_data['data'].append(new_example)
        num_answerable_per_q.append(num_answerable)
        total_missing_sfs += missing_sfs
        num_answer_words.append(len(clean(example['answer']).split()))
        num_question_words.append(len(clean(example['question']).split()))

    num_answerable_per_q = np.array(num_answerable_per_q)
    num_answer_words = np.array(num_answer_words)
    num_question_words = np.array(num_question_words)
    print('{}: # Bad Answers: {}'.format(filename, num_bad_answer_examples))
    print('{}: # Unanswerable: {}'.format(filename, (num_answerable_per_q == 0).sum()))
    print('{}: # Missing SFs: {}'.format(filename, total_missing_sfs))
    break

#     with open(os.path.join(new_data_dir, filename), 'w') as f:
#         f.write(simplejson.dumps(new_data, indent=2, sort_keys=False))

100%|██████████| 90447/90447 [00:11<00:00, 7636.23it/s] 

hotpot_train_v1.json: # Bad Answers: 5
hotpot_train_v1.json: # Unanswerable: 0
hotpot_train_v1.json: # Missing SFs: 22





In [25]:
num_question_words.sort()
num_question_words[-100:]

array([ 76,  76,  77,  77,  77,  77,  77,  77,  77,  77,  77,  77,  77,
        77,  78,  78,  78,  78,  78,  78,  78,  79,  79,  79,  79,  79,
        79,  79,  79,  79,  79,  80,  80,  80,  80,  80,  81,  81,  81,
        81,  81,  81,  82,  82,  82,  82,  82,  82,  82,  83,  83,  83,
        83,  83,  83,  83,  84,  84,  84,  85,  85,  85,  85,  85,  85,
        85,  85,  85,  86,  86,  87,  87,  87,  88,  88,  90,  90,  93,
        93,  93,  93,  94,  94,  94,  94,  94,  95,  95,  95,  98,  98,
        99, 100, 101, 101, 104, 104, 104, 106, 108])

In [78]:
num_answerable_per_q.mean()

2.5078775415436665

In [5]:
example

{'_id': '5ac132a755429964131be17c',
 'answer': 'Norwood, Massachusetts',
 'question': 'Blackfin is a family of processors developed by the company that is headquartered in what city?',
 'supporting_facts': [['Blackfin', 0], ['Analog Devices', 0]],
 'context': [['1st Word/1st Word Plus',
   ['1st Word and 1st Word Plus are word processors developed by GST Computer Systems in the 1980s.',
    ' The original package, 1st Word, was given away free with all Atari STs.',
    ' The later 1st Word Plus was sold by GST and was more advanced.',
    ' Atari ST disk magazine ST News was written entirely and exclusively using 1st Word and, later, 1st Word Plus.',
    ' The first Volume (1986) was distributed as a plain 1st Word .',
    'DOC file, after that a custom shell was produced that enabled the 1st Word documents to be displayed in a userfriendly disk magazine shell.']],
  ['Arm Holdings',
   ['Arm Holdings (Arm) is a British multinational semiconductor and software design company, owned by 

In [53]:
example

{'supporting_facts': [['Vietnam national cricket team', 0],
  ['Vietnam national cricket team', 1],
  ['Cricket at the 2017 Southeast Asian Games', 0]],
 'level': 'medium',
 'question': 'Vietnam national cricket team will debut at what competitions at  Kinrara Oval',
 'context': [['Vietnam national cricket team',
   ['The Vietnam national cricket team represents Vietnam in international cricket.',
    ' It will debut in the cricket tournament at the 2017 Southeast Asian Games in Kuala Lumpur, Malaysia.']],
  ['Pickwick Cricket Club',
   ['Pickwick Cricket Club is a Barbados cricket club.',
    ' The club was founded on 23 November 1882, the second oldest cricket club in Barbados after Wanderers Cricket Club.',
    " The club's home from its foundation until 2005 was Kensington Oval in Bridgetown, the main venue for matches involving the Barbados national cricket team and the Barbados venue for Test cricket involving the West Indies cricket team.",
    ' The ground was built on land on 

In [None]:
### SQuAD minimum example
{
  "data": [
    {
      "title": "Pitch_(music)",
      "paragraphs": [
        {
          "qas": [
            {
              "question": "Pitch perception  has inherent octave what?",
              "id": "57098332ed30961900e8425a",
              "answers": [
                {
                  "text": "ambiguities",
                  "answer_start": 1213
                }
              ],
              "is_impossible": false
            }
          ],
          "context": "Temporal theories offer an alternative that appeals to the temporal structure of action potentials, mostly the phase-locking and mode-locking of action potentials to frequencies in a stimulus. The precise way this temporal structure helps code for pitch at higher levels is still debated, but the processing seems to be based on an autocorrelation of action potentials in the auditory nerve. However, it has long been noted that a neural mechanism that may accomplish a delay\u2014a necessary operation of a true autocorrelation\u2014has not been found. At least one model shows that a temporal delay is unnecessary to produce an autocorrelation model of pitch perception, appealing to phase shifts between cochlear filters; however, earlier work has shown that certain sounds with a prominent peak in their autocorrelation function do not elicit a corresponding pitch percept, and that certain sounds without a peak in their autocorrelation function nevertheless elicit a pitch. To be a more complete model, autocorrelation must therefore apply to signals that represent the output of the cochlea, as via auditory-nerve interspike-interval histograms. Some theories of pitch perception hold that pitch has inherent octave ambiguities, and therefore is best decomposed into a pitch chroma, a periodic value around the octave, like the note names in western music\u2014and a pitch height, which may be ambiguous, that indicates the octave the pitch is in."
        }
      ]
    }
  ]
}

In [None]:
# Write slice of training set
filename = 'hotpot_train_v1.json'
with open(os.path.join(new_data_dir, filename), 'r') as f:
    saved_data_hotpot = json.load(f)

In [36]:
slice_data_dir = '{}/hotpot-slice'.format(DATA_DIR)
os.makedirs(slice_data_dir, exist_ok=True)
with open(os.path.join(slice_data_dir, filename), 'w') as f:
    f.write(simplejson.dumps({'data': saved_data_hotpot['data'][0::100][:10], 'version': filename}, indent=2, sort_keys=False))

In [156]:
# Write answers to file for hotpot evaluations
# job_name = 'hotpot.bs=16.lr=0.00002.nte=4'
job_name = 'tn=hotpot.mn=bert-base-uncased.2'  # For debugging
model_checkpoint_dir = os.path.join('/Users/ethanperez/research/pytorch-transformers/checkpoint/', job_name)

with open('{}/predictions_.json'.format(model_checkpoint_dir), 'r') as f:
    predictions = json.load(f)

with open('{}/nbest_predictions_.json'.format(model_checkpoint_dir), 'r') as f:
    nbest_predictions = json.load(f)

with open('{}/null_odds_.json'.format(model_checkpoint_dir), 'r') as f:
    null_odds = json.load(f)

In [157]:
qids = {single_hop_qid.split('.')[0] for single_hop_qid in nbest_predictions.keys()}
pred_answers_and_sps = {'answer': {}, 'sp': {}}
globally_normed_pred_answers_and_sps = {'answer': {}, 'sp': {}}
max_num_paragraphs = 10
for qid in qids:
    # Find paragraph with answer prediction
    min_null_odds = float('inf')
    max_logit_sum = float('-inf')
    best_single_hop_qid = None
    for paragraph_no in range(max_num_paragraphs):
        single_hop_qid = qid + '.' + str(paragraph_no)
        if (single_hop_qid in null_odds) and (null_odds[single_hop_qid] < min_null_odds):
            best_single_hop_qid = single_hop_qid
            min_null_odds = null_odds[single_hop_qid]
        if (single_hop_qid in nbest_predictions):
            for nbest_prediction in nbest_predictions[single_hop_qid]:
                if len(nbest_prediction['text']) > 0:
                    logit_sum = nbest_prediction['start_logit'] + nbest_prediction['end_logit']
                    if logit_sum > max_logit_sum:
                        globally_normed_pred_answers_and_sps['answer'][qid] = nbest_prediction['text']
                        max_logit_sum = logit_sum

    # Find/store answer and supporting fact        
    pred_answers_and_sps['sp'][qid] = []  # NB: Dummy supporting fact for now
    globally_normed_pred_answers_and_sps['sp'][qid] = []  # NB: Dummy supporting fact for now
    for nbest_prediction in nbest_predictions[best_single_hop_qid]:
        if len(nbest_prediction['text']) > 0:
            pred_answers_and_sps['answer'][qid] = nbest_prediction['text']
            break
    assert qid in pred_answers_and_sps['answer'], 'Error: No predicted answer found.'
    assert qid in globally_normed_pred_answers_and_sps['answer'], 'Error: No globally normed predicted answer found.'

with open('{}/hotpot_predictions.json'.format(model_checkpoint_dir), "w") as writer:
    writer.write(json.dumps(pred_answers_and_sps, indent=2))

with open('{}/hotpot_predictions_globally_normed.json'.format(model_checkpoint_dir), "w") as writer:
    writer.write(json.dumps(globally_normed_pred_answers_and_sps, indent=2))

In [158]:
max_logit_sum

1.087890625

In [159]:
globally_normed_pred_answers_and_sps

{'answer': {'5a7a06935542990198eaf050': "yes no Arthur's Magazine (1844–1846) was an American literary periodical published in Philadelphia in the 19th century. Edited",
  '5ab30bbb55429976abd1bc39': 'no Prince Georg of Hanover ("Georg Paul Christian Prinz von Hannover"), Duke of Brunswick-Lüneburg (born 9 December 1949',
  '5ae3032f55429928c42395ae': 'Putnam City North High School',
  '5ac141dd5542991316484a9b': 'no George Distel is a former member of the Ohio House of Representatives, representing the 99th District from 1999-2008. He was',
  '5ae0fd14554299422ee995a3': 'yes no River of Romance is a 1929 American drama film directed by Richard Wallace and',
  '5ae67c245542996d980e7b82': 'yes no Youth & Young Manhood is the debut album from American rock band Kings of Leon, released on July 7, 2003, in',
  '5ac14ee95542991316484aea': 'yes',
  '5ab3f9b95542992ade7c6f09': 'no The Japanese and Europe: Economic and Cultural Encounters is a',
  '5abb89fc5542993f40c73b19': 'Conner (born Marc

In [153]:
max_logit_sum

1.36474609375

In [152]:
for i in range(10):
    pprint(nbest_predictions[qid + '.' + str(i)])

[{'end_logit': 1.025390625,
  'probability': 0.11381675313505601,
  'start_logit': 0.9394531250000001,
  'text': ''},
 {'end_logit': 0.5068359375,
  'probability': 0.053344838568228005,
  'start_logit': 0.7001953125,
  'text': "no Radio City is India's first private FM radio station and was "
          'started'},
 {'end_logit': 0.49072265624999994,
  'probability': 0.052492166303899,
  'start_logit': 0.7001953125,
  'text': "no Radio City is India's"},
 {'end_logit': 0.48779296874999994,
  'probability': 0.052338605712538004,
  'start_logit': 0.7001953125,
  'text': "no Radio City is India's first private FM radio station and"},
 {'end_logit': 0.465087890625,
  'probability': 0.051163642868491004,
  'start_logit': 0.7001953125,
  'text': "no Radio City is India's first"},
 {'end_logit': 0.5068359375,
  'probability': 0.049722856983962005,
  'start_logit': 0.6298828125,
  'text': "yes no Radio City is India's first private FM radio station and was "
          'started'},
 {'end_logit':

In [148]:
nbest_predictions[qid]

SyntaxError: invalid syntax (<ipython-input-148-1b0c95b6085b>, line 1)

In [138]:
nbest_predictions['5a7a06935542990198eaf050.0'][1]['start_logit'] + nbest_predictions['5a7a06935542990198eaf050.0'][1]['end_logit']

1.20703125

In [135]:
float('-inf') < -10000

True

In [121]:
len(pred_answers_and_sps['sp'])

7405

In [85]:
# hotpot_evaluate_v1.py
import sys
import ujson as json
import re
import string
from collections import Counter
import pickle

def normalize_answer(s):

    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))


def f1_score(prediction, ground_truth):
    normalized_prediction = normalize_answer(prediction)
    normalized_ground_truth = normalize_answer(ground_truth)

    ZERO_METRIC = (0, 0, 0)

    if normalized_prediction in ['yes', 'no', 'noanswer'] and normalized_prediction != normalized_ground_truth:
        return ZERO_METRIC
    if normalized_ground_truth in ['yes', 'no', 'noanswer'] and normalized_prediction != normalized_ground_truth:
        return ZERO_METRIC

    prediction_tokens = normalized_prediction.split()
    ground_truth_tokens = normalized_ground_truth.split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if num_same == 0:
        return ZERO_METRIC
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1, precision, recall


def exact_match_score(prediction, ground_truth):
    return (normalize_answer(prediction) == normalize_answer(ground_truth))

def update_answer(metrics, prediction, gold):
    em = exact_match_score(prediction, gold)
    f1, prec, recall = f1_score(prediction, gold)
    metrics['em'] += float(em)
    metrics['f1'] += f1
    metrics['prec'] += prec
    metrics['recall'] += recall
    return em, prec, recall

def update_sp(metrics, prediction, gold):
    cur_sp_pred = set(map(tuple, prediction))
    gold_sp_pred = set(map(tuple, gold))
    tp, fp, fn = 0, 0, 0
    for e in cur_sp_pred:
        if e in gold_sp_pred:
            tp += 1
        else:
            fp += 1
    for e in gold_sp_pred:
        if e not in cur_sp_pred:
            fn += 1
    prec = 1.0 * tp / (tp + fp) if tp + fp > 0 else 0.0
    recall = 1.0 * tp / (tp + fn) if tp + fn > 0 else 0.0
    f1 = 2 * prec * recall / (prec + recall) if prec + recall > 0 else 0.0
    em = 1.0 if fp + fn == 0 else 0.0
    metrics['sp_em'] += em
    metrics['sp_f1'] += f1
    metrics['sp_prec'] += prec
    metrics['sp_recall'] += recall
    return em, prec, recall

def eval(prediction_file, gold_file):
    with open(prediction_file) as f:
        prediction = json.load(f)
    with open(gold_file) as f:
        gold = json.load(f)

    metrics = {'em': 0, 'f1': 0, 'prec': 0, 'recall': 0,
        'sp_em': 0, 'sp_f1': 0, 'sp_prec': 0, 'sp_recall': 0,
        'joint_em': 0, 'joint_f1': 0, 'joint_prec': 0, 'joint_recall': 0}
    for dp in gold:
        cur_id = dp['_id']
        can_eval_joint = True
        if cur_id not in prediction['answer']:
            print('missing answer {}'.format(cur_id))
            can_eval_joint = False
        else:
            em, prec, recall = update_answer(
                metrics, prediction['answer'][cur_id], dp['answer'])
        if cur_id not in prediction['sp']:
            print('missing sp fact {}'.format(cur_id))
            can_eval_joint = False
        else:
            sp_em, sp_prec, sp_recall = update_sp(
                metrics, prediction['sp'][cur_id], dp['supporting_facts'])

        if can_eval_joint:
            joint_prec = prec * sp_prec
            joint_recall = recall * sp_recall
            if joint_prec + joint_recall > 0:
                joint_f1 = 2 * joint_prec * joint_recall / (joint_prec + joint_recall)
            else:
                joint_f1 = 0.
            joint_em = em * sp_em

            metrics['joint_em'] += joint_em
            metrics['joint_f1'] += joint_f1
            metrics['joint_prec'] += joint_prec
            metrics['joint_recall'] += joint_recall

    N = len(gold)
    for k in metrics.keys():
        metrics[k] /= N

    print(metrics)

# if __name__ == '__main__':
#     eval(sys.argv[1], sys.argv[2])


In [93]:
eval('{}/hotpot_predictions.json'.format(model_checkpoint_dir), '{}/hotpot-slice/hotpot_train_v1_orig.json'.format(DATA_DIR))

{'em': 0.0, 'f1': 0.06388888888888888, 'prec': 0.03678571428571429, 'recall': 0.25, 'sp_em': 0.0, 'sp_f1': 0.0, 'sp_prec': 0.0, 'sp_recall': 0.0, 'joint_em': 0.0, 'joint_f1': 0.0, 'joint_prec': 0.0, 'joint_recall': 0.0}


In [122]:
# Eval on sample dev answers (as a test)
eval('/Users/ethanperez/research/data/hotpot-orig/sample_dev_answers.json', '/Users/ethanperez/research/data/hotpot-orig/hotpot_dev_distractor_v1.json')

missing answer 5a87ab905542996e4f3088c1
missing sp fact 5a87ab905542996e4f3088c1
missing answer 5ab56e32554299637185c594
missing sp fact 5ab56e32554299637185c594
missing answer 5a760ab65542994ccc918697
missing sp fact 5a760ab65542994ccc918697
missing answer 5ab7f97a5542991d322237ef
missing sp fact 5ab7f97a5542991d322237ef
missing answer 5ab266b5554299340b5254b4
missing sp fact 5ab266b5554299340b5254b4
missing answer 5ae7eb3c5542994a481bbe20
missing sp fact 5ae7eb3c5542994a481bbe20
missing answer 5a8b595855429949d91db563
missing sp fact 5a8b595855429949d91db563
missing answer 5a80762a5542996402f6a536
missing sp fact 5a80762a5542996402f6a536
missing answer 5adbe2c65542996e68525274
missing sp fact 5adbe2c65542996e68525274
missing answer 5ab8f33155429919ba4e237f
missing sp fact 5ab8f33155429919ba4e237f
missing answer 5a8f495c5542997ba9cb3220
missing sp fact 5a8f495c5542997ba9cb3220
missing answer 5a753c8c55429916b01642ab
missing sp fact 5a753c8c55429916b01642ab
missing answer 5ae5be0255429

In [123]:
with open('/Users/ethanperez/research/data/hotpot-orig/hotpot_dev_distractor_v1.json', 'r') as f:
    data_hotpot_dev = json.load(f)

In [124]:
data_hotpot_dev[0]

{'_id': '5a8b57f25542995d1e6f1371',
 'answer': 'yes',
 'question': 'Were Scott Derrickson and Ed Wood of the same nationality?',
 'supporting_facts': [['Scott Derrickson', 0], ['Ed Wood', 0]],
 'context': [['Ed Wood (film)',
   ['Ed Wood is a 1994 American biographical period comedy-drama film directed and produced by Tim Burton, and starring Johnny Depp as cult filmmaker Ed Wood.',
    " The film concerns the period in Wood's life when he made his best-known films as well as his relationship with actor Bela Lugosi, played by Martin Landau.",
    ' Sarah Jessica Parker, Patricia Arquette, Jeffrey Jones, Lisa Marie, and Bill Murray are among the supporting cast.']],
  ['Scott Derrickson',
   ['Scott Derrickson (born July 16, 1966) is an American director, screenwriter and producer.',
    ' He lives in Los Angeles, California.',
    ' He is best known for directing horror films such as "Sinister", "The Exorcism of Emily Rose", and "Deliver Us From Evil", as well as the 2016 Marvel Cinema

In [125]:
true_dev_answers = {'answer': {}, 'sp': {}}
for example in data_hotpot_dev:
    true_dev_answers['answer'][example['_id']] = example['answer']
    true_dev_answers['sp'][example['_id']] = example['supporting_facts']

In [129]:
true_dev_answers_filepath = '{}/hotpot-orig/true_dev_answers.json'.format(DATA_DIR)
with open(true_dev_answers_filepath, 'w') as f:
    f.write(json.dumps(true_dev_answers, indent=2))

In [130]:
# Eval on true dev answers (as a test)
eval(true_dev_answers_filepath, '/Users/ethanperez/research/data/hotpot-orig/hotpot_dev_distractor_v1.json')

{'em': 1.0, 'f1': 0.999729912221472, 'prec': 0.999729912221472, 'recall': 0.999729912221472, 'sp_em': 1.0, 'sp_f1': 1.0, 'sp_prec': 1.0, 'sp_recall': 1.0, 'joint_em': 1.0, 'joint_f1': 0.999729912221472, 'joint_prec': 0.999729912221472, 'joint_recall': 0.999729912221472}


In [92]:
# Slice original format dataset using selected subset of examples
with open('{}/hotpot-orig/hotpot_train_v1.json'.format(DATA_DIR), 'r') as f:
    data_hotpot = json.load(f)

data_hotpot_slice = []
for example in data_hotpot:
    if example['_id'] in qids:
        data_hotpot_slice.append(example)
assert len(data_hotpot_slice) == len(qids), '# sliced hotpot examples {} != {}'.format(len(data_hotpot_slice), len(qids))

with open('{}/hotpot-slice/hotpot_train_v1_orig.json'.format(DATA_DIR), 'w') as f:
    f.write(json.dumps(data_hotpot_slice, indent=2))

In [111]:
# Pretty print data file
filepath = '/Users/ethanperez/research/data/hotpot-orig/hotpot_train_v1.json'
with open(filepath, 'r') as f:
    data_hotpot = json.load(f)
# with open(filepath, 'w') as f:
#     f.write(json.dumps(data_hotpot, indent=2))

In [112]:
context_lens = np.array([])
for example in data_hotpot:
    context_lens = np.append(context_lens, len(example['context']))
print(context_lens.mean())

9.946897077846694


In [118]:
100. * (context_lens < 3).mean()

0.28967240483377005