In [1]:
output_dir="out"
input_file="/dccstor/jsmc-nmt-01/bool/expts/toolkit/b/b19/qtc/eval_predictions.json"

from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    HfArgumentParser,
    Trainer,
    TrainingArguments)
from transformers.trainer_utils import set_seed
from oneqa.boolqa.processors.postprocessors.nway import NWayClassifierPostProcessor
from oneqa.boolqa.processors.preprocessors.default import NWayPreProcessor

from examples.boolqa.mrc2dataset  import create_dataset_from_run_mrc_output, create_dataset_from_json_str


seed = 42
set_seed(seed)

training_args = TrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    do_train=False,
    do_eval=True,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=1,
    num_train_epochs=1,
    evaluation_strategy='no',
    learning_rate=4e-05,
    warmup_ratio=0.1,
    weight_decay=0.1,
    save_steps=50000,
    seed=seed,
)

id_key='example_id'
sentence2_key='passage'
label_list=['False', 'NONE', 'True']
output_label_prefix='boolean_answer'



In [2]:
config = AutoConfig.from_pretrained('/dccstor/jsmc-nmt-01/bool/git/IOTA-boolean-challenge/model/evc-c5', num_labels=3)

tokenizer=AutoTokenizer.from_pretrained('/dccstor/jsmc-nmt-01/bool/git/IOTA-boolean-challenge/model/evc-c5', use_fast=True)

model = AutoModelForSequenceClassification.from_pretrained('/dccstor/jsmc-nmt-01/bool/git/IOTA-boolean-challenge/model/evc-c5', config=config)



In [3]:
postprocessor_class = NWayClassifierPostProcessor  # TODO # taskargs.
postprocessor = postprocessor_class(
    k=10,       
    drop_label="NONE",
    label_list = label_list,
    id_key=id_key,
    output_label_prefix=output_label_prefix
)

preprocessor_class = NWayPreProcessor # TODO task_args.preprocessor
preprocessor = preprocessor_class(
    sentence1_key='question',
    sentence2_key='span_answer_text' if sentence2_key else None,
    tokenizer=tokenizer,
    load_from_cache_file=False,
    max_seq_len=500,
    padding=False
)
    


In [4]:
examples=create_dataset_from_run_mrc_output(input_file, unpack=False)
examples=examples.filter(lambda x:x['language']=='english' and x['question_type_pred']=='boolean')
eval_examples, eval_dataset = preprocessor.process_eval(examples)
eval_examples


  0%|          | 0/19 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Dataset({
    features: ['example_id', 'cls_score', 'start_logit', 'end_logit', 'span_answer', 'span_answer_score', 'start_index', 'end_index', 'passage_index', 'target_type_logits', 'span_answer_text', 'yes_no_answer', 'start_stdev', 'end_stdev', 'query_passage_similarity', 'normalized_span_answer_score', 'confidence_score', 'question', 'language', 'order', 'rank', 'question_type_pred', 'question_type_scores', 'question_type_conf'],
    num_rows: 112
})

In [5]:
trainer = Trainer( 
    model=model,
    args=training_args,
    train_dataset=None,
    eval_dataset=eval_dataset,
    compute_metrics=None, #compute_metrics,
    tokenizer=tokenizer,
    data_collator=None,
)

In [6]:
predictions = trainer.predict(eval_dataset, metric_key_prefix="predict").predictions


The following columns in the test set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: language, span_answer_text, question, example_id. If language, span_answer_text, question, example_id are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 112
  Batch size = 16


In [7]:
import pandas as pd
pd.DataFrame.from_records(predictions[0:5,:])

Unnamed: 0,0,1,2
0,-5.042398,3.993901,1.586379
1,-6.511352,5.316398,1.436958
2,4.817096,-1.596816,-2.872052
3,-4.071929,-0.438778,4.115036
4,-5.682469,6.056089,0.199635


In [9]:
eval_preds = postprocessor.process_references_and_predictions(eval_examples, eval_dataset, predictions)
eval_preds_ds = create_dataset_from_json_str(eval_preds.predictions, False)
print(eval_preds_ds)

in process_references_and_predictions
Dataset({
    features: ['example_id', 'cls_score', 'start_logit', 'end_logit', 'span_answer', 'span_answer_score', 'start_index', 'end_index', 'passage_index', 'target_type_logits', 'span_answer_text', 'yes_no_answer', 'start_stdev', 'end_stdev', 'query_passage_similarity', 'normalized_span_answer_score', 'confidence_score', 'question', 'language', 'order', 'rank', 'question_type_pred', 'question_type_scores', 'question_type_conf', 'boolean_answer_pred', 'boolean_answer_scores', 'boolean_answer_conf'],
    num_rows: 112
})


In [14]:
from datasets import ClassLabel, Sequence
import random
import pandas as pd
from IPython.display import display, HTML

# Based on https://github.com/huggingface/notebooks/blob/main/examples/question_answering.ipynb
def show_elements(dataset, cols):
    df = pd.DataFrame(dataset)
    for column, typ in dataset.features.items():
        if isinstance(typ, ClassLabel):
            df[column] = df[column].transform(lambda i: typ.names[i])
        elif isinstance(typ, Sequence) and isinstance(typ.feature, ClassLabel):
            df[column] = df[column].transform(lambda x: [typ.feature.names[i] for i in x])
    display(HTML(df[cols].to_html()))
    
import random

random_idxs = random.sample(range(len(eval_preds_ds)), 20)
random_eval_examples = eval_preds_ds.select(random_idxs)

cols=['example_id','question','question_type_pred', 'span_answer', 'span_answer_text', 'boolean_answer_pred']
show_elements(random_eval_examples,cols)  # Show random train examples

Unnamed: 0,example_id,question,question_type_pred,span_answer,span_answer_text,boolean_answer_pred
0,466eb35c-762b-4d75-84d1-b3434b512555,Is the great horned owl endangered?,boolean,"{'end_position': 84861, 'start_position': 84752}",Hunting and trapping of great horned owls may continue on a small scale but is now illegal in most countries.,True
1,dbf4a953-7aaa-4aa1-a027-a16b14752750,Is Camp Maybry a working base?,boolean,"{'end_position': 875, 'start_position': 796}",The facility has served a variety of military purposes since its establishment.,True
2,dbcd04df-cb7b-47e0-8ae0-2f8d9f294061,Is Daydream Software still an active company?,boolean,"{'end_position': 722, 'start_position': 686}",Google acquired Waze Mobile in 2013.,True
3,2030a582-ebd2-45bf-9690-fe46007c71ee,Does Frankfurt have a regional dish?,boolean,"{'end_position': 243, 'start_position': 152}","Frankfurt am Main, Offenbach am Main, Darmstadt, Langen, and other parts of southern Hesse.",True
4,e8dc42b7-9931-424e-95d8-c4175a5b80f9,Does Yasumi Matsuno have a wife?,boolean,"{'end_position': 1175, 'start_position': 1103}",In 1989 Matsuno left his job as a reporter to work at Quest Corporation.,True
5,66318298-52bd-4f41-9c83-e492ab543eeb,Does France have a major park?,boolean,"{'end_position': 154, 'start_position': 26}","more than 421 municipal parks and gardens, covering more than three thousand hectares and containing more than 250,000 trees.[1]",True
6,54d56520-61cd-40a8-a96f-f7b5b887f5ba,Can you make your X-ray machine?,boolean,"{'end_position': 8494, 'start_position': 8391}",A prototype handheld x-ray scanner using the source could be manufactured in as soon as three years.[6],True
7,e5d8a0e9-0f22-4aa6-a039-568a44b662fa,Do fungus spread by spores?,boolean,"{'end_position': 5549, 'start_position': 5438}","The fungi produce asexual spores which disperse by wind, water or by insect vectors[9] spreading the infection.",True
8,f6e58e86-343a-43b2-b723-b070785b7373,Can you tell skin color from DNA?,boolean,"{'end_position': 599, 'start_position': 457}","The actual skin color of different humans is affected by many substances, although the single most important substance is the pigment melanin.",True
9,31f875b5-96a9-4425-9458-1819db0a3f66,Is Boris Yeltsin still alive?,boolean,"{'end_position': 3718, 'start_position': 3660}",Yeltsin died of congestive heart failure on 23 April 2007.,False
