# Training MRC Model

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2,5"

In [2]:
from simpletransformers.question_answering import QuestionAnsweringModel
import json

train_args = {
    'learning_rate': 3e-5,
    'num_train_epochs': 2,
    'max_seq_length': 384,
    'doc_stride': 128,
    'overwrite_output_dir': True,
    'reprocess_input_data': False,
    'train_batch_size': 2,
    'gradient_accumulation_steps': 8,
}

model = QuestionAnsweringModel('xlmroberta', 'xlm-roberta-base',train_args)

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForQuestionAnswering: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing XLMRobertaForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForQuestionAnswering were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream tas

In [5]:
with open('../datasets/XORQA/tydi_xor_gp/gp_squad_train_data.json', 'r') as f:
    train_data = json.load(f)

train_data = [item for topic in train_data['data'] for item in topic['paragraphs'] ]

In [8]:
model.train_model(train_data)

convert squad examples to features: 100%|██████████| 14003/14003 [00:09<00:00, 1462.94it/s]
add example index and unique id: 100%|██████████| 14003/14003 [00:00<00:00, 306678.08it/s]


HBox(children=(FloatProgress(value=0.0, description='Epoch', max=2.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 2', max=7330.0, style=ProgressStyle(de…

  model.parameters(), args.max_grad_norm





HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 2', max=7330.0, style=ProgressStyle(de…





(1832, 2.089423501302842)

# Retrieval

In [2]:
import json

In [3]:
class mydict(dict):
    def __str__(self):
        return json.dumps(self)

In [4]:
with open('../datasets/XORQA/tydi_xor_gp/gp_squad_dev_data.json', 'r') as f:
    dev_data = json.load(f)

In [5]:
dev_data = [item for topic in dev_data['data'] for item in topic['paragraphs'] ]

In [6]:
dev_data_edited = []
for dev in dev_data:
    for q in dev['qas']:
        if q['lang'] != 'te' and q['lang'] != 'bn':
            dev_data_edited.append(dev)

In [7]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

In [8]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text

In [9]:
def batch_encode(encoder,context,bs = 10):
    batch_encoded = []
    for i in range(len(context)//bs+1):
        batch_encoded.append(encoder(context[(i*bs):((i+1)*bs)]).numpy()) 
 
    batch_encoded = np.concatenate(batch_encoded,0)
    return np.array(batch_encoded)

In [10]:
def sim_search(question_encoded,doc_encoded):
    query_map = np.full(doc_encoded.shape, question_encoded)
    sim_score = np.array([*map(np.inner,query_map,doc_encoded)])
    return np.argsort(sim_score)[::-1]

# doc retrieve

In [11]:
mode = 'dev'
# q_lan = ['ru_en','ru','ko','ko_en','bn','fi','te','ja']
q_lan = ['ru','ko','fi','ja']
# q_lan = ['ru_en']
# q_lan = ['ru_en','ko_en','bn_en','fi_en','te_en','ja']
corpus = 'XORQA'

df_doc = pd.read_csv(f'../data_preprocess/{corpus}/{mode}/{corpus.lower()}_doc_en-en.csv') 
doc_raw = df_doc['doc'].to_list()

# model = f'../models/{corpus}/best_model/finetuned_USE_XORQA_train_en-fi_top0-0_q-d-distillation_1000MSE_1e-4LR_TrueShuffle_teacher_best_teacher_batchsize_8_acc_metric'
# module_load = hub.load(model)
# model = f'../models/{corpus}/best_model/finetuned_USE_XORQA_train_en-ru_ko_ja_fi_top0-0_para_finetuned_USE_XORQA_train_en-ru_ko_ja_fi_top1-3_para_0.27margin_2WarmStep_cosLoss_RegFalse_FCFalse_small_0.27_1BatchUpdate_2hard_3shard_cos_HardUpdateFalse_SemiHardUpdateFalse_RegFalse'
# teacher_model = hub.load(model)


In [12]:
model = f'../models/{corpus}/finetuned_USE_XORQA_train_en-ru_ko_ja_fi_top0-0_q-d-distillation_1000MSE_0.1MSEq_1.0MSEd_0.01MSEqd_0.0005LR_teacher_best_teacher_batchsize_16_acc_metric_3term'
module_load = hub.load(model)


In [13]:
context_encoded = batch_encode(module_load,df_doc['doc'].to_list())

In [14]:
final_data = []
for dev_data in tqdm(dev_data_edited):
    question = dev_data['qas'][0]['question']
    question_encoded = module_load(question).numpy()
    ranking = sim_search(question_encoded,context_encoded)
    dev_data.update({
        'context': doc_raw[ranking[0]]
    })
    final_data.append(dev_data)

HBox(children=(FloatProgress(value=0.0, max=2209.0), HTML(value='')))




In [15]:
with open('results/final_data_doc_3term.json', 'w') as f:
    json.dump(final_data, f)

# Passage retrieve

In [16]:
mode = 'dev'
# q_lan = ['ru_en','ru','ko','ko_en','bn','fi','te','ja']
q_lan = ['ru','ko','fi','ja']
# q_lan = ['ru_en']
# q_lan = ['ru_en','ko_en','bn_en','fi_en','te_en','ja']
corpus = 'XORQA'

df_para = pd.read_csv(f'../data_preprocess/{corpus}/{mode}/{corpus.lower()}_para_en-en.csv') 
doc_raw = df_para['para'].to_list()

# model = f'../models/{corpus}/best_model/finetuned_USE_XORQA_train_en-fi_top0-0_q-d-distillation_1000MSE_1e-4LR_TrueShuffle_teacher_best_teacher_batchsize_8_acc_metric'
# module_load = hub.load(model)
# model = f'../models/{corpus}/best_model/finetuned_USE_XORQA_train_en-ru_ko_ja_fi_top0-0_para_finetuned_USE_XORQA_train_en-ru_ko_ja_fi_top1-3_para_0.27margin_2WarmStep_cosLoss_RegFalse_FCFalse_small_0.27_1BatchUpdate_2hard_3shard_cos_HardUpdateFalse_SemiHardUpdateFalse_RegFalse'
# teacher_model = hub.load(model)


In [17]:
context_encoded = batch_encode(module_load,df_para['para'].to_list())

In [18]:
final_data = []
for dev_data in tqdm(dev_data_edited):
    question = dev_data['qas'][0]['question']
    question_encoded = module_load(question).numpy()
    ranking = sim_search(question_encoded,context_encoded)
    dev_data.update({
        'context': doc_raw[ranking[0]]
    })
    final_data.append(dev_data)

HBox(children=(FloatProgress(value=0.0, max=2209.0), HTML(value='')))




In [19]:
with open('results/final_data_para_3term.json', 'w') as f:
    json.dump(final_data, f)

# DPR

In [17]:
import pandas as pd

In [12]:
dpr_df = pd.read_csv('results/final_context_dpr_doc.csv')

In [13]:
dpr_df

Unnamed: 0,question,context_ans
0,Где вручается Шно́белевская премия ?,The Nobel Prize in Chemistry () is awarded ann...
1,В каком театре работал Серге́й Ерва́ндович Кур...,Kurginyan was a member of the commission on ne...
2,Сколько длились Бои за город Дейр-эз-Зор?,"The Battle of Ganja (Armenian:, '; ) or Elizab..."
3,Сколько статуй героев на площади Героев в Буда...,"In Budapest, Hungary, the Heroes' Square, bett..."
4,Когда была основана сборная Швейцарии по футболу?,The first football club in Switzerland is the ...
...,...,...
1719,Mikä on Brasilian pinta-ala?,"Brazil ( ), officially the Federative Republic..."
1720,Missä Niue sijaitsee?,Mean sea level (MSL) (often shortened to sea l...
1721,Kuinka kauan puberteetti keskimäärin kestää?,"On average, girls begin puberty around ages 10..."
1722,Kuka suomalainen pelasi Cercle Bruggessa 80-lu...,Lanier was drafted number one overall by the N...


In [14]:
final_data = []
for dev_data in tqdm(dev_data_edited):
    question = dev_data['qas'][0]['question']
    try:
        context = dpr_df[dpr_df['question']==question].context_ans.to_list()[0]
    except:
        context = 'XXXXXXXXXXXX'
    dev_data.update({
        'context': context
    })
    final_data.append(dev_data)

HBox(children=(FloatProgress(value=0.0, max=2209.0), HTML(value='')))




In [15]:
with open('results/final_data_dpr_doc.json', 'w') as f:
    json.dump(final_data, f)

# QA reader

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2,5"
from simpletransformers.question_answering import QuestionAnsweringModel
import json
model_qa = QuestionAnsweringModel('xlmroberta', 'outputs/')

In [2]:
class mydict(dict):
    def __str__(self):
        return json.dumps(self)

In [3]:
# final_data_doc_3term
with open('results/final_data_para_3term.json', 'r') as f:
    dev_data = json.load(f)

In [4]:
preds = model_qa.predict(dev_data)

convert squad examples to features: 100%|██████████| 2209/2209 [00:04<00:00, 481.09it/s]
add example index and unique id: 100%|██████████| 2209/2209 [00:00<00:00, 526193.64it/s]


HBox(children=(FloatProgress(value=0.0, description='Running Prediction', max=288.0, style=ProgressStyle(descr…




In [5]:
submission_list = []
for pred in preds[0]:
    submission_list.append([
        pred['id'],pred['answer'][0]
    ])

In [6]:
submission =  mydict(submission_list) 

In [7]:
with open('results/submission_3term_para.json', 'w') as f:
    json.dump(submission, f)

In [8]:
!python eval_xor_full.py \
--data_file ../datasets/XORQA/xor_dev_retrieve_eng_span_v1_1.jsonl.txt \
--pred_file results/submission_3term_para.json

loading examples from ../datasets/XORQA/xor_dev_retrieve_eng_span_v1_1.jsonl.txt
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no ans

no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
Evaluating the performance on te
F1: 0.0, EM:0.0, BLEU:0.0
Evaluating the performance o

In [9]:
!python eval_xor_full.py \
--data_file ../datasets/XORQA/xor_dev_retrieve_eng_span_v1_1.jsonl.txt \
--pred_file results/submission_3term_doc.json

loading examples from ../datasets/XORQA/xor_dev_retrieve_eng_span_v1_1.jsonl.txt
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no ans

no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
no answers
Evaluating the performance on te
F1: 0.0, EM:0.0, BLEU

| Method | F1 | EM |
| --- | --- | --- |
| CL-ReLTK para|
| RU | 21.1 |  16.8|
| KO | 27.8 |  20.4|
| JA | 26.1 |  20.2|
| FI | 20.7 |  16.1|
| AVG| 23.9 |  18.4|
| CL-ReLTK doc|
| RU | 24.3 |  18.8|
| KO | 28.5 |  21.7|
| JA | 26.3 |  20.6|
| FI | 21.2 |  16.7|
| AVG| 25.1 |  19.5|