## Sentence-level online prompty mining: MLQA

In [1]:
import copy
import re
import os, sys
import json
import glob
from collections import Counter, defaultdict

import jsonlines

from exploring_sentence_level import (
    load_model,
    mine_prompt_gt,
    segment_sentence,
    run_online_prompt_mining
)

### 0. Download dataset

```bash
cd ../scripts
bash ./download_mlqa
.sh
```

### 1. Process dataset

In [2]:
MLQA_BASE_DIR = '../data/mlqa/MLQA_V1/'

mlqa_xx = {}
MLQA_LANGS = ['en', 'ar', 'de', 'es', 'hi', 'vi', 'zh']
for lang in MLQA_LANGS:
    mlqa_xx[f'{lang}_val'] = json.load(open(os.path.join(MLQA_BASE_DIR, 'dev', f'dev-context-en-question-{lang}.json'), 'r'))['data'],
    mlqa_xx[f'{lang}_test'] = json.load(open(os.path.join(MLQA_BASE_DIR, 'test', f'test-context-en-question-{lang}.json'), 'r'))['data'],


In [3]:
len(mlqa_xx['ar_test'][0])

2389

In [4]:
def get_squad_answer_str(context, qas):
    context_qa_pairs = []
    for qa in qas:
        question = qa['question']
        answer = qa['answers'][0]['text']
        answer_start = qa['answers'][0]['answer_start']
        context_qa_pairs.append((context, question, answer, answer_start))
    return context_qa_pairs

In [5]:
mlqa_xx_dataset = defaultdict(lambda: {'val':[], 'test': []})
for lang in MLQA_LANGS:

    for split_name in ['val', 'test']:
        for i, item in enumerate(mlqa_xx[f'{lang}_{split_name}'][0]):

            paragraphs = item['paragraphs']

            for j, paragraph in enumerate(paragraphs):

                context = paragraph['context']
                context_qa_pairs = get_squad_answer_str(context=context, qas=paragraph['qas'])

                for context_qa_pair in context_qa_pairs:
                    context, question, answer, answer_start = context_qa_pair
                    gt_sentence = mine_prompt_gt(context_qa_pair)
                    qa_item = {
                         'question': question,
                            'context': context,
                            'segmented_context': segment_sentence(context),
                            'answer': answer,
                            'answer_start': answer_start,
                            'gt_sentence': gt_sentence,
                    }
                    mlqa_xx_dataset[lang][split_name].append(qa_item)


In [6]:
len(mlqa_xx_dataset['ar']['val']), \
len(mlqa_xx_dataset['ar']['test'])

(517, 5335)

In [7]:
mlqa_xx_dataset['en']['test'][0]

{'question': 'Who analyzed the biopsies?',
 'context': 'In 1994, five unnamed civilian contractors and the widows of contractors Walter Kasza and Robert Frost sued the USAF and the United States Environmental Protection Agency. Their suit, in which they were represented by George Washington University law professor Jonathan Turley, alleged they had been present when large quantities of unknown chemicals had been burned in open pits and trenches at Groom. Biopsies taken from the complainants were analyzed by Rutgers University biochemists, who found high levels of dioxin, dibenzofuran, and trichloroethylene in their body fat. The complainants alleged they had sustained skin, liver, and respiratory injuries due to their work at Groom, and that this had contributed to the deaths of Frost and Kasza. The suit sought compensation for the injuries they had sustained, claiming the USAF had illegally handled toxic materials, and that the EPA had failed in its duty to enforce the Resource Conser

### 2. Compute question-sentence similarity


#### 2.1 Load models

##### a) Load mUSE_small (v3) model (as a baseline)

In [4]:
muse_small_v3_model = load_model('https://tfhub.dev/google/universal-sentence-encoder-multilingual/3')


KeyboardInterrupt: 

##### b) Load teacher models

In [None]:
XQUAD_TEACHER_DIR = '../../../CL-ReLKT_store/models/XQUAD/teacher_model/'
MLQA_TEACHER_DIR = '../../../CL-ReLKT_store/models/MLQA/teacher_model/'

In [None]:
xquad_teacher_model = load_model(XQUAD_TEACHER_DIR)
mlqa_teacher_model = load_model(MLQA_TEACHER_DIR)

##### c) Load student models

In [None]:
XQUAD_STUDENT_SUPPORTED_LANGS_DIR = '../../../CL-ReLKT_store/models/XQUAD/student_best_supported_languages/'
XQUAD_STUDENT_UNSUPPORTED_LANGS_DIR = '../../../CL-ReLKT_store/models/XQUAD/student_best_unsupported_languages/'

XORQA_STUDENT_SUPPORTED_LANGS_DIR = '../../../CL-ReLKT_store/models/XORQA/student_best_supported_languages/'
XORQA_STUDENT_UNSUPPORTED_LANGS_DIR = '../../../CL-ReLKT_store/models/XORQA/student_best_unsupported_languages/'

MLQA_STUDENT_SUPPORTED_LANGS_DIR = '../../../CL-ReLKT_store/models/MLQA/student_best_supported_languages/'
MLQA_STUDENT_UNSUPPORTED_LANGS_DIR = '../../../CL-ReLKT_store/models/MLQA/student_best_unsupported_languages/'

In [None]:
xquad_student_supported_langs_model = load_model(XQUAD_STUDENT_SUPPORTED_LANGS_DIR)
xorqa_student_supported_langs_model = load_model(XORQA_STUDENT_SUPPORTED_LANGS_DIR)
mlqa_student_supported_langs_model = load_model(MLQA_STUDENT_SUPPORTED_LANGS_DIR)

xquad_student_unsupported_langs_model = load_model(XQUAD_STUDENT_UNSUPPORTED_LANGS_DIR)
xorqa_student_unsupported_langs_model = load_model(XORQA_STUDENT_UNSUPPORTED_LANGS_DIR)
mlqa_student_unsupported_langs_model = load_model(MLQA_STUDENT_UNSUPPORTED_LANGS_DIR)

In [None]:
MODEL_MAPPING = {
  # mUSE_small
  'model-muse_small_v3': muse_small_v3_model,
  # teacher    
  'model-xquad_teacher': xquad_teacher_model,
  'model-mlqa_teacher': mlqa_teacher_model,
  # student
  'model-xquad_student_supported_langs': xquad_student_supported_langs_model,
  'model-xorqa_student_supported_langs': xorqa_student_supported_langs_model,
  'model-mlqa_student_supported_langs': mlqa_student_supported_langs_model,
  'model-xquad_student_unsupported_langs': xquad_student_unsupported_langs_model,
  'model-xorqa_student_unsupported_langs': xorqa_student_unsupported_langs_model,
  'model-mlqa_student_unsupported_langs': mlqa_student_unsupported_langs_model,
}



In [8]:
DATASET_MAPPING = {}
for lang in list(MLQA_LANGS):
    DATASET_MAPPING[f'dataset-mlqa_{lang.strip()}_val'] = mlqa_xx_dataset[lang]['val']
    DATASET_MAPPING[f'dataset-mlqa_{lang.strip()}_test'] = mlqa_xx_dataset[lang]['test']
    
print(DATASET_MAPPING.keys())

dict_keys(['dataset-mlqa_en_val', 'dataset-mlqa_en_test', 'dataset-mlqa_ar_val', 'dataset-mlqa_ar_test', 'dataset-mlqa_de_val', 'dataset-mlqa_de_test', 'dataset-mlqa_es_val', 'dataset-mlqa_es_test', 'dataset-mlqa_hi_val', 'dataset-mlqa_hi_test', 'dataset-mlqa_vi_val', 'dataset-mlqa_vi_test', 'dataset-mlqa_zh_val', 'dataset-mlqa_zh_test'])


#### 2.2 Run inference and evaluate

The following function `run_online_prompt_mining` iterates over question-answer-passage triplets $(q_i, a_i, p_i)$ and compute 
the cosine similarity scores between question $q_i$ and segmented setences $s^i_j \textrm{ where } p_i = ( s^i_0, \ldots , s^i_{|p_i| - 1} )$ , and rank each quesiton-sentence pair by similairy score. Then, it evaluate the sentence-level precision@k.  Note: There is only 1 groundtruth sentence (i.e. the sentence where the answer span is a part of). 


In [None]:

for dataset_prefix, dataset in DATASET_MAPPING.items():
    print(f'\n\ndataset_prefix: {dataset_prefix}')
    for model_prefix, model in MODEL_MAPPING.items():
        
        print(f'\n - model_prefix: {model_prefix}')
        prefix = f'{dataset_prefix}+{model_prefix}'
        _result = run_online_prompt_mining(dataset,
                             prefix=f'{dataset_prefix}_{model_prefix}',
                             model=model)


        results[dataset_prefix][model_prefix] = _result
        print('--'*50)
    print('\n')    
    print('=='*50)
    print('\n')    
    

In [None]:
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)


#### 2.3 Write result as JSON file

In [None]:
json.dump(results, open('./eval_results.dataset_name-mlqa.json', 'w'), ensure_ascii=False, indent=2)

### 3. Convert evaluation results to a pandas.DataFrame

In [None]:
results = json.load(open('./eval_results.dataset_name-mlqa.json', 'r'))

In [None]:
list(results.keys()), len(list(results.keys()))

In [None]:
result_objs = []
for dataset_name, result_model_group in data_teacher.items():
    for model_name, (metric, raw_result) in result_model_group.items():
        top1, precision_at_k = metric
        
        result_objs.append({
            'dataset_name': dataset_name,
            'model_name': model_name,
            'precision_at_1': top1,
            'precision_at_2': precision_at_k['2'],
            'precision_at_3': precision_at_k['6'],
            'precision_at_4': precision_at_k['4'],
            'precision_at_5': precision_at_k['5'],
            'precision_at_10': precision_at_k['10'],
        })
    
df = pd.DataFrame.from_dict(result_obj)
df.to_csv('./eval_results.dataset_name-mlqa.csv')