In [1]:
import _pickle as pickle
import json
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
question_bank = pd.read_csv('../data/question_bank.tsv', sep='\t')

In [3]:
devset = pickle.load(open('../data/dev_synthetic.pkl', 'rb'))

In [4]:
trainset = pickle.load(open('../data/train_synthetic.pkl', 'rb'))

In [5]:
qid2question = {_id: _q for _id, _q in question_bank[['question_id', 'question']].values}

In [52]:
np.random.choice(list(trainset.keys()), size=10, replace=False)

array([ 842156,   29407,  105016,  430756,  130309,   90551,  736030,
        263829, 1214494,  669712])

In [57]:
trainset[669712]

{'topic_id': 137,
 'facet_id': 'F0149',
 'initial_request': 'tell me about rock and gem shows',
 'question': 'what local rock and gem shows are you looking for from this list',
 'answer': 'i dont know yet',
 'conversation_context': [{'question': 'are you curious about rock and gem shows',
   'answer': 'yes locations and dates'},
  {'question': 'would you like to learn about rock and gem shows in california',
   'answer': 'yes'}],
 'context_id': 101051}

In [7]:
# build eval_score
deveval = pickle.load(open('../data/multi_turn_dev_eval.pkl', 'rb'))

In [6]:
traineval = pickle.load(open('../data/multi_turn_train_eval.pkl', 'rb'))

In [8]:
def get_cid2maxq(evalset):
    cid2maxq = {}
    for cid, value in tqdm(evalset['NDCG3'].items()):
        cid2maxq[cid] = []
        if value['MAX']['with_answer'] == 0:
            continue
        for k in value.keys():
            if k == 'MAX' or k == 'MIN':
                continue
            if value[k]['with_answer'] == value['MAX']['with_answer']:
                cid2maxq[cid].append((qid2question[k], value[k]['with_answer']))
    return cid2maxq

In [10]:
dev_cid2maxq = get_cid2maxq(deveval)
len(dev_cid2maxq)

100%|██████████| 15066/15066 [00:00<00:00, 113784.54it/s]


15066

In [11]:
train_cid2maxq = get_cid2maxq(traineval)
len(train_cid2maxq)

100%|██████████| 61200/61200 [00:02<00:00, 20548.16it/s] 


61200

In [12]:
def get_max_keys(dataset, cid2maxq):
    max_keys = []
    for key, value in tqdm(dataset.items()):
        tmp = [_v[0] for _v in cid2maxq.get(value['context_id'], [])]
        if value['question'] in tmp:
            max_keys.append(key)
    return max_keys

In [13]:
dev_max_keys = get_max_keys(devset, dev_cid2maxq)
len(dev_max_keys)

100%|██████████| 182435/182435 [00:00<00:00, 479521.53it/s]


38703

In [14]:
train_max_keys = get_max_keys(trainset, train_cid2maxq)
len(train_max_keys)

100%|██████████| 744869/744869 [00:00<00:00, 765271.48it/s]


150968

In [18]:
new_devset = {_k: devset[_k] for _k in dev_max_keys}
new_trainset = {_k: trainset[_k] for _k in train_max_keys}

# add facet_desc

In [15]:
qulac_data = json.load(open('../data/qulac.json', 'r', encoding='utf-8'))

In [16]:
df = pd.DataFrame()
for key in qulac_data.keys():
    df[key] = list(qulac_data[key].values())

In [21]:
def build_desc(dataset):
    err_keys = []
    for key in tqdm(dataset.keys()):
        t1 = df['topic_id'] == dataset[key]['topic_id']
        t2 = df['question'] == dataset[key]['question']
        t3 = df['answer'] == dataset[key]['answer']
        tmp_df = df[t1 & t2]
        if len(tmp_df) < 1:
            err_keys.append(key)
            continue
        dataset[key]['topic_desc'] = tmp_df['topic_desc'].values[0]
        dataset[key]['facet_desc'] = tmp_df['facet_desc'].values[0]
    return dataset, err_keys

In [22]:
new_devset, dev_errkeys = build_desc(new_devset)
print(len(new_devset), len(dev_errkeys))

100%|██████████| 38703/38703 [01:35<00:00, 406.53it/s]

38703 1822





In [20]:
new_trainset, train_errkeys = build_desc(new_trainset)
print(len(new_trainset), len(train_errkeys))

100%|██████████| 150968/150968 [07:03<00:00, 356.31it/s]

150968 4185





In [39]:
tmpset = set(train_errkeys)
tmpkeys = [_k for _k in new_trainset.keys() if _k not in tmpset]

In [58]:
np.random.choice(tmpkeys, size=10, replace=False)

array([769481, 743694, 547838, 460796, 508755, 208171, 699225, 623451,
       704995, 432022])

In [63]:
new_trainset[704995]

{'topic_id': 149,
 'facet_id': 'F0189',
 'initial_request': 'Tell me about uplift at yellowstone national park',
 'question': 'would you like to know why some scientists think the yellowstone volcano is in the process of dying',
 'answer': 'yes',
 'conversation_context': [{'question': 'what would you like to know about uplift',
   'answer': 'i would like to know how likely it would be and what effects an eruption yellowstone national park would be'},
  {'question': 'would you like to see a ground map showing the uplift of yellowstone national park',
   'answer': 'yes please and maybe an airiel map as well'}],
 'context_id': 106974,
 'topic_desc': 'How likely is a volcanic eruption at Yellowstone National Park, and what would be the effects of such an eruption?',
 'facet_desc': 'How likely is a volcanic eruption at Yellowstone National Park, and what would be the effects of such an eruption?'}

In [64]:
new_trainset[704996]

{'topic_id': 149,
 'facet_id': 'F0190',
 'initial_request': 'Tell me about uplift at yellowstone national park',
 'question': 'would you like to know why some scientists think the yellowstone volcano is in the process of dying',
 'answer': 'no i need information on geothermal activity',
 'conversation_context': [{'question': 'what would you like to know about uplift',
   'answer': 'i would like to know about the ground elevation changes and movement resulting from ongoing lowlevel geothermal activity at yellowstone national park'},
  {'question': 'would you like to see a ground map showing the uplift of yellowstone national park',
   'answer': 'yes'}],
 'context_id': 106975,
 'topic_desc': 'How likely is a volcanic eruption at Yellowstone National Park, and what would be the effects of such an eruption?',
 'facet_desc': 'information on ground elevation changes and movement resulting from ongoing low-level geothermal activity at Yellowstone National Park'}

In [65]:
new_trainset[704997]

{'topic_id': 149,
 'facet_id': 'F0191',
 'initial_request': 'Tell me about uplift at yellowstone national park',
 'question': 'would you like to know why some scientists think the yellowstone volcano is in the process of dying',
 'answer': 'i am not interested in this topic',
 'conversation_context': [{'question': 'what would you like to know about uplift',
   'answer': 'i want the yellowstone volcano observatory homepage'},
  {'question': 'would you like to see a ground map showing the uplift of yellowstone national park',
   'answer': 'yes'}],
 'context_id': 106976,
 'topic_desc': 'How likely is a volcanic eruption at Yellowstone National Park, and what would be the effects of such an eruption?',
 'facet_desc': 'home page for the Yellowstone Volcano Observatory'}

In [68]:
new_trainset[704998]

{'topic_id': 149,
 'facet_id': 'F0189',
 'initial_request': 'Tell me about uplift at yellowstone national park',
 'question': 'would you like to know why some scientists think the yellowstone volcano is in the process of dying',
 'answer': 'yes',
 'conversation_context': [{'question': 'would you like more information of the course of the yellowstone volcano and the different hypothesizes scientest have',
   'answer': 'no i want to know what the likelihood is of an eruption and the effects of it'},
  {'question': 'would you like to know about yellowstones history of eruptions',
   'answer': 'i want to know about future eruption likelihood'}],
 'context_id': 106782,
 'topic_desc': 'How likely is a volcanic eruption at Yellowstone National Park, and what would be the effects of such an eruption?',
 'facet_desc': 'How likely is a volcanic eruption at Yellowstone National Park, and what would be the effects of such an eruption?'}

In [25]:
traineval['MRR100'][2]

{'Q00384': {'no_answer': 1.0, 'with_answer': 1.0},
 'Q00001': {'with_answer': 1.0, 'no_answer': 1.0},
 'Q03442': {'no_answer': 0.5, 'with_answer': 1.0},
 'Q03402': {'no_answer': 0.5, 'with_answer': 1.0},
 'Q03725': {'no_answer': 0.5, 'with_answer': 1.0},
 'Q00769': {'no_answer': 0.16666666666666666, 'with_answer': 1.0},
 'Q01286': {'no_answer': 1.0, 'with_answer': 1.0},
 'Q02152': {'no_answer': 1.0, 'with_answer': 0.5},
 'Q02527': {'no_answer': 0.5, 'with_answer': 1.0},
 'Q02950': {'no_answer': 0.5, 'with_answer': 0.5},
 'Q03023': {'no_answer': 0.3333333333333333, 'with_answer': 1.0},
 'Q03071': {'no_answer': 0.5, 'with_answer': 0.5},
 'Q03351': {'no_answer': 0.5, 'with_answer': 1.0},
 'MAX': {'with_answer': 1.0, 'no_answer': 1.0},
 'MIN': {'with_answer': 0.5, 'no_answer': 0.16666666666666666}}

In [38]:
deveval['NDCG3'][968]

{'Q00706': {'no_answer': 0.0, 'with_answer': 0.2960819109658653},
 'Q00001': {'with_answer': 0.2960819109658653,
  'no_answer': 0.2960819109658653},
 'Q03340': {'no_answer': 0.46927872602275655,
  'with_answer': 0.2960819109658653},
 'Q03826': {'no_answer': 0.0, 'with_answer': 0.46927872602275655},
 'Q03895': {'no_answer': 0.0, 'with_answer': 0.0},
 'Q00756': {'no_answer': 0.0, 'with_answer': 0.0},
 'Q01185': {'no_answer': 0.7653606369886218,
  'with_answer': 0.7653606369886218},
 'Q01578': {'no_answer': 0.0, 'with_answer': 0.0},
 'Q01596': {'no_answer': 0.0, 'with_answer': 0.0},
 'Q02191': {'no_answer': 0.0, 'with_answer': 0.0},
 'Q02762': {'no_answer': 0.0, 'with_answer': 0.2960819109658653},
 'Q02817': {'no_answer': 0.0, 'with_answer': 0.0},
 'Q02907': {'no_answer': 0.0, 'with_answer': 0.0},
 'MAX': {'with_answer': 0.7653606369886218, 'no_answer': 0.7653606369886218},
 'MIN': {'with_answer': 0.0, 'no_answer': 0.0}}

In [41]:
question_bank[question_bank['question_id'] == 'Q01185']

Unnamed: 0,question_id,question
1184,Q01185,are you offering appraisal services


In [46]:
len(cid2maxq)

15066

In [11]:
tidfid2data = {}
for key, value in devset.items():
    newkey = '{}-{}-{}'.format(value['topic_id'], value['facet_id'], len(value['conversation_context']))
    if newkey not in tidfid2data:
        tidfid2data[newkey] = []
    tidfid2data[newkey].append(key)

In [12]:
len(tidfid2data)

474

In [13]:
tidfid2data.keys()

dict_keys(['8-F0968-0', '8-F0969-0', '8-F0970-0', '8-F0971-0', '18-F0316-0', '18-F0317-0', '18-F0318-0', '18-F0319-0', '18-F0320-0', '20-F0395-0', '20-F0396-0', '20-F0397-0', '20-F0398-0', '24-F0539-0', '24-F0540-0', '24-F0541-0', '24-F0542-0', '25-F0572-0', '25-F0573-0', '25-F0574-0', '27-F0630-0', '27-F0631-0', '27-F0632-0', '27-F0633-0', '27-F0634-0', '35-F0774-0', '35-F0775-0', '35-F0776-0', '35-F0777-0', '35-F0778-0', '35-F0779-0', '37-F0782-0', '37-F0783-0', '37-F0784-0', '37-F0785-0', '44-F0813-0', '44-F0814-0', '44-F0815-0', '44-F0816-0', '44-F0817-0', '45-F0818-0', '45-F0819-0', '45-F0820-0', '51-F0841-0', '51-F0842-0', '51-F0843-0', '51-F0844-0', '51-F0845-0', '71-F0932-0', '71-F0933-0', '71-F0934-0', '71-F0935-0', '74-F0943-0', '74-F0944-0', '74-F0945-0', '79-F0964-0', '79-F0965-0', '79-F0966-0', '79-F0967-0', '83-F0981-0', '83-F0982-0', '83-F0983-0', '85-F0987-0', '85-F0988-0', '85-F0989-0', '85-F0990-0', '101-F0010-0', '101-F0011-0', '101-F0012-0', '101-F0013-0', '106-F002

In [21]:
tidfid2data['8-F0968-0']

[2287, 2291, 2295, 2299, 2303, 2307, 2311, 2315, 2319, 2323, 2327, 2331]

In [24]:
search_q = 'are you looking for appraisal companies near you'
match_keys = []
for key, value in tqdm(devset.items()):
    if value['question'] == search_q:
        match_keys.append(key)
    else:
        tmp = [_v['question'] for _v in value['conversation_context']]
        if search_q in tmp:
            match_keys.append(key)

100%|██████████| 182435/182435 [00:00<00:00, 353977.83it/s]


In [25]:
match_keys

[2291,
 2292,
 2293,
 2294,
 15755,
 15756,
 15757,
 15758,
 15799,
 15800,
 15801,
 15802,
 15803,
 15804,
 15805,
 15806,
 15807,
 15808,
 15809,
 15810,
 15811,
 15812,
 15813,
 15814,
 15815,
 15816,
 15817,
 15818,
 15819,
 15820,
 15821,
 15822,
 15823,
 15824,
 15825,
 15826,
 15827,
 15828,
 15829,
 15830,
 15831,
 15832,
 15833,
 15834,
 15835,
 15836,
 15837,
 15838,
 15839,
 15840,
 15841,
 15842,
 15847,
 15848,
 15849,
 15850,
 15891,
 15892,
 15893,
 15894,
 15935,
 15936,
 15937,
 15938,
 15979,
 15980,
 15981,
 15982,
 16023,
 16024,
 16025,
 16026,
 16067,
 16068,
 16069,
 16070,
 16111,
 16112,
 16113,
 16114,
 16155,
 16156,
 16157,
 16158,
 16199,
 16200,
 16201,
 16202,
 16243,
 16244,
 16245,
 16246,
 162160,
 162161,
 162162,
 162163,
 162164,
 162165,
 162166,
 162167,
 162168,
 162169,
 162170,
 162171,
 162172,
 162173,
 162174,
 162175,
 162176,
 162177,
 162178,
 162179,
 162180,
 162181,
 162182,
 162183,
 162184,
 162185,
 162186,
 162187,
 162188,
 162189

In [26]:
devset[2291]

{'topic_id': 8,
 'facet_id': 'F0968',
 'initial_request': 'I want to know about appraisals.',
 'question': 'are you looking for appraisal companies near you',
 'answer': 'yes',
 'conversation_context': [],
 'context_id': 968,
 'topic_desc': 'How are home values appraised? I want to know how home appraisals are done.',
 'facet_desc': "What companies can give an appraisal of my home\\'s value?"}

In [27]:
devset[2292]

{'topic_id': 8,
 'facet_id': 'F0969',
 'initial_request': 'I want to know about appraisals.',
 'question': 'are you looking for appraisal companies near you',
 'answer': 'i am looking for jewlery appraisal companies',
 'conversation_context': [],
 'context_id': 969,
 'topic_desc': 'How are home values appraised? I want to know how home appraisals are done.',
 'facet_desc': "I\\'m looking for companies that appraise jewelry."}

In [28]:
devset[15755]

{'topic_id': 8,
 'facet_id': 'F0968',
 'initial_request': 'I want to know about appraisals.',
 'question': 'are you looking for a type of appraiser',
 'answer': 'im looking for nearby companies that do home appraisals',
 'conversation_context': [{'question': 'are you looking for appraisal companies near you',
   'answer': 'yes'}],
 'context_id': 2291,
 'topic_desc': 'How are home values appraised? I want to know how home appraisals are done.',
 'facet_desc': "What companies can give an appraisal of my home\\'s value?"}

In [24]:
import pandas as pd
from rank_bm25 import BM25Okapi
import nltk
import pickle
from nltk.stem.porter import PorterStemmer

nltk.download('punkt')
nltk.download('stopwords')

def stem_tokenize(text, remove_stopwords=True):
  stemmer = PorterStemmer()
  tokens = [word for sent in nltk.sent_tokenize(text) \
                                      for word in nltk.word_tokenize(sent)]
  tokens = [word for word in tokens if word not in \
          nltk.corpus.stopwords.words('english')]
  return [stemmer.stem(word) for word in tokens]

[nltk_data] Downloading package punkt to /Users/moli/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/moli/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [29]:
# Files paths

multi_turn_request_file_path = '../data/dev_synthetic.pkl'
question_bank_path = '../data/question_bank.tsv'
run_file_path = '../sample_runs/dev_bm25_multi_turn'

In [18]:
# Reads files and build bm25 corpus (index)
with open(multi_turn_request_file_path, 'rb') as fi:
    dev = pickle.load(fi)
question_bank = pd.read_csv(question_bank_path, sep='\t').fillna('')

question_bank['tokenized_question_list'] = question_bank['question'].map(stem_tokenize)
question_bank['tokenized_question_str'] = question_bank['tokenized_question_list'].map(lambda x: ' '.join(x))

bm25_corpus = question_bank['tokenized_question_list'].tolist()
bm25 = BM25Okapi(bm25_corpus)

In [19]:
# Reads the dev file and create the context_dict to make predictions
context_dict = dict()
for rec_id in dev:
    ctx_id = dev[rec_id]['context_id']
    if ctx_id not in context_dict:
        context_dict[ctx_id] = {'initial_request': dev[rec_id]['initial_request'],
                                'conversation_context': dev[rec_id]['conversation_context']}

In [22]:
# Runs bm25 for every query and stores output in file.

def build_query(context_info):
    query_str = context_info['initial_request']
    for ctx in context_info['conversation_context']:
        query_str += ctx['question'] + ' ' + ctx['answer']
    return query_str

def select_no_duplicate_questions(bm25_q_list, conv_context):
    prev_questions = [x['question'] for x in conv_context]
    bm25_preds = question_bank.set_index('tokenized_question_str').loc[bm25_q_list, 'question'].tolist()
    pred_list = []
    for q in bm25_preds:
        if q not in prev_questions:
            pred_list.append(q)
    return pred_list

with open(run_file_path, 'w') as fo:
  for ctx_id in context_dict:
    query = build_query(context_dict[ctx_id])
    bm25_ranked_list = bm25.get_top_n(stem_tokenize(query, True), 
                                    bm25_corpus,
                                    n=5)
    bm25_q_list = [' '.join(sent) for sent in bm25_ranked_list]
    preds = select_no_duplicate_questions(bm25_q_list, context_dict[ctx_id]['conversation_context'])
    for i, qid in enumerate(preds):
        fo.write('{} 0 "{}" {} {} bm25_multi_turn\n'.format(ctx_id, qid, i, len(preds)-i))
        break # we write only one result per context.

In [32]:
! python ../official_src/clariq_eval_tool.py    --eval_task document_relevance\
                                --data_dir ../data/ \
                                --multi_turn \
                                --experiment_type dev \
                                --run_file {run_file_path} #\
                                # --out_file {run_file_path}.eval

NDCG1: 0.21898645957785742
NDCG3: 0.201618860054938
NDCG5: 0.19652670322787674
NDCG10: 0.1856817702651898
NDCG20: 0.17112798502504814
P1: 0.2747245453338643
P3: 0.2423116067082614
P5: 0.2295632550112837
P10: 0.2003849727864065
P20: 0.15768949953537767
MRR100: 0.35986740195719213
