In [1]:
# https://colab.research.google.com/drive/1uSlWtJdZmLrI3FCNIlUHFxwAJiSu2J0-#scrollTo=-ONLrgJK99TQ
# !pip install transformers
# !pip install statsmodels
# !pip install tqdm

### Models to run

bert-large-cased-whole-word-masking-finetuned-squad  
bert-large-uncased-whole-word-masking-finetuned-squad  
distilbert-base-cased-distilled-squad  
distilbert-base-uncased-distilled-squad  


In [2]:
from transformers import BertForQuestionAnswering, AutoModelForQuestionAnswering, AutoTokenizer
from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering
from transformers import BertTokenizer

import os
import torch
import torch.nn as nn
import json
from tqdm import tqdm
import nltk

model_type = 'distilbert_uncased'

if model_type == 'bert_cased':
    huggingface_model_name = 'bert-large-cased-whole-word-masking-finetuned-squad'
    model = BertForQuestionAnswering.from_pretrained('bert-large-cased-whole-word-masking-finetuned-squad',
                                                    cache_dir="/data/models_cache")
    tokenizer = BertTokenizer.from_pretrained('bert-large-cased-whole-word-masking-finetuned-squad',
                                             cache_dir="/data/models_cache")

elif model_type == 'bert_uncased':
    huggingface_model_name = 'bert-large-uncased-whole-word-masking-finetuned-squad'
    model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad',
                                                    cache_dir="/data/models_cache")
    tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad',
                                             cache_dir="/data/models_cache")
    
elif model_type == 'distilbert_cased':
    huggingface_model_name = 'distilbert-base-cased-distilled-squad'
    model = DistilBertForQuestionAnswering.from_pretrained("distilbert-base-cased-distilled-squad",
                                                    cache_dir="/data/models_cache")
    tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-cased-distilled-squad",
                                             cache_dir="/data/models_cache")
    
elif model_type == 'distilbert_uncased':
    huggingface_model_name = 'distilbert-base-uncased-distilled-squad'
    model = DistilBertForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad",
                                                    cache_dir="/data/models_cache")
    tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad",
                                             cache_dir="/data/models_cache")


In [3]:
# Setting statically 
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if torch.cuda.device_count() > 1:
  model = nn.DataParallel(model, device_ids=[0,1])

model = model.to(device)

In [4]:
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [5]:
import nltk
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()

sentence = "He was running and eating at same time. He has bad habit of swimming after playing long hours in the Sun."
punctuations="?:!.,;"
sentence_words = nltk.word_tokenize(sentence)
# for word in sentence_words:
#     if word in punctuations:
#         sentence_words.remove(word)

# sentence_words
# print("{0:20}{1:20}".format("Word","Lemma"))
# for word in sentence_words:
#     print ("{0:20}{1:20}".format(word,wordnet_lemmatizer.lemmatize(word, pos="v")))

def lemmatize(sentence):
    results = ''
    for word in sentence_words:
        results = results + ' ' + wordnet_lemmatizer.lemmatize(word, pos="v")
    
    return results
    
lemmatize(sentence)
    

' He be run and eat at same time . He have bad habit of swim after play long hours in the Sun .'

In [6]:
wordnet_lemmatizer.lemmatize("was", pos="v")

'be'

In [7]:

def check_answer(answer, ground_truths):
    for ground_truth in ground_truths:
        if answer.lower() == ground_truth['text'].lower():
            return True
    return False

def predict_answer(question, context):
    input_ids = tokenizer.encode(question, context, max_length=512)

    # Search the input_ids for the first instance of the `[SEP]` token.
    sep_index = input_ids.index(tokenizer.sep_token_id)

    # The number of segment A tokens includes the [SEP] token istelf.
    num_seg_a = sep_index + 1

    # The remainder are segment B.
    num_seg_b = len(input_ids) - num_seg_a

    # Construct the list of 0s and 1s.
    segment_ids = [0]*num_seg_a + [1]*num_seg_b

    # There should be a segment_id for every input token.
    assert len(segment_ids) == len(input_ids)

#    start_scores, end_scores = model(torch.tensor([input_ids]).to(device), # The tokens representing our input text.
#                      token_type_ids=torch.tensor([segment_ids]).to(device)) # The segment IDs to differentiate question from answer_text

    start_scores, end_scores = model(torch.tensor([input_ids]).to(device))
    
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores)

    # Combine the tokens in the answer and print it out.
    tokens = tokenizer.convert_ids_to_tokens(input_ids)
    #answer = ' '.join(tokens[answer_start:answer_end+1])
    
    answer = tokens[answer_start]

    # Select the remaining answer tokens and join them with whitespace.
    for i in range(answer_start + 1, answer_end + 1):

        # If it's a subword token, then recombine it with the previous token.
        if tokens[i][0:2] == '##':
            answer += tokens[i][2:]

        # Otherwise, add a space then the token.
        else:
            answer += ' ' + tokens[i]
    
    return answer, answer_start

## JSON Format
# -data
#   -paragraphs
#     -context
#     -qas
#       -question
#       -id
#       -answers
#          -text
#          -answer_start
#   -title
#   -split

def predict_answers(qa_json_file):
    print("Predicting answers for: {}".format(qa_json_file))
    results = {}
    count = 0
    with open(qa_json_file) as json_file:
        amazon_qa = json.load(json_file)['data']
        for splits in tqdm(amazon_qa):

            paragraphs = splits['paragraphs']
            for paragraph in paragraphs:
                count += 1

                for qa in paragraph['qas']:

                    # Predict the anser, and then validate against ground truth. 
                    answer, answer_start = predict_answer(qa['question'], paragraph['context'])
                    result = check_answer(answer, qa['answers'])

                    results.update([(qa['id'], answer)])
    return results

def get_augmented_filename(input_dir, model_name, question_set, parts_of_speech=None, frequency_percentile=None):
    
    if model_name == 'orig':
        filename = input_dir+question_set+".json"
        return filename

    assert bool(parts_of_speech) ^ bool(frequency_percentile), "Can only pass one of parts_of_speech and frequency_percentile"
    
    if parts_of_speech:
        filename = input_dir+question_set+"_"+model_name+"_"+"_".join(parts_of_speech)+".json"
    elif frequency_percentile:
        filename = input_dir+question_set+"_"+model_name+"_Percentile_"+str(frequency_percentile)+".json"

    return filename


def get_prediction_filename(output_dir, model_name, question_set, parts_of_speech=None, frequency_percentile=None):
    
    if model_name == 'orig':
        filename = output_dir+question_set+"_"+huggingface_model_name+".json"
        return filename
    
    assert bool(parts_of_speech) ^ bool(frequency_percentile), "Can only pass one of parts_of_speech and frequency_percentile"
    
    if parts_of_speech:
        filename = output_dir+question_set+"_"+model_name+"_"+"_".join(parts_of_speech)+"_"+huggingface_model_name+".json"
    elif frequency_percentile:
        filename = output_dir+question_set+"_"+model_name+"_Percentile_"+str(frequency_percentile)+"_"+huggingface_model_name+".json"

    return filename


def write_prediction_file(results, output_dir, model_name, question_set, parts_of_speech=None, frequency_percentile=None):
    predictions_filename = get_prediction_filename(output_dir, model_name, question_set, parts_of_speech, frequency_percentile)

    print("Writting file: {}".format(predictions_filename))
    
    with open(predictions_filename, 'w') as fp:
        json.dump(results, fp)
    
    return predictions_filename



In [8]:
augmented_dir = '/data/distribution_shift/augmented_qa/'
predictions_dir = '/data/distribution_shift/augmented_qa/predictions/'

qa_files = {
    "amazon_reviews_v1_0": '/data/distribution_shift/new_qa/amazon_reviews_v1.0.json',
    "reddit_v1_0": '/data/distribution_shift/new_qa/reddit_v1.0.json',
    "new_wiki_v1.0": '/data/distribution_shift/new_qa/new_wiki_v1.0.json',
    "nyt_v1.0": '/data/distribution_shift/new_qa/nyt_v1.0.json',
}

parts_of_speech_list = [
    ['JJ', 'VB'],
    ['JJ'],
    ['VB', 'RB'],
    ['VB'],
    ['RB'],
    ['RB', 'RBR', 'RBZ'],
    ['VB', 'VBD', 'VBG', 'VBN', 'VBP'],
    ['RB', 'RBR', 'RBZ', 'VB', 'VBD', 'VBG' 'VBN', 'VBP']
]

augmenting_models = [
    'orig',
    'bert',
    'roberta'
]

frequency_percentiles = [
    0.10,
    0.20,
    0.30,
    0.50
]

for model_name in augmenting_models:
    for quesion_set, filename in qa_files.items():
        if model_name == 'orig':
#             out_filepath = augmented_dir+quesion_set+".json"
            out_filepath = get_prediction_filename(predictions_dir, model_name, quesion_set)
            if not os.path.exists(out_filepath):
                in_filename = filename
                predicted_answers = predict_answers(in_filename)
                write_prediction_file(predicted_answers, predictions_dir, model_name, quesion_set, parts_of_speech=None, frequency_percentile=None)
            else:
                print("Skipping exiting output: {}".format(out_filepath))
            continue

        for parts_of_speech in parts_of_speech_list:
            
            out_filepath = get_prediction_filename(predictions_dir, model_name, quesion_set, parts_of_speech=parts_of_speech)
            
            if not os.path.exists(out_filepath):
                in_filename = get_augmented_filename(augmented_dir, model_name, quesion_set, parts_of_speech=parts_of_speech)
                predicted_answers = predict_answers(in_filename)
                write_prediction_file(predicted_answers, predictions_dir, model_name, quesion_set, parts_of_speech=parts_of_speech)
            else:
                print("Skipping exiting output: {}".format(out_filepath))
                
        for frequency_percentile in frequency_percentiles:
            
            out_filepath = get_prediction_filename(predictions_dir, model_name, quesion_set, frequency_percentile=frequency_percentile)
            
            if not os.path.exists(out_filepath):
                in_filename = get_augmented_filename(augmented_dir, model_name, quesion_set, frequency_percentile=frequency_percentile)
                predicted_answers = predict_answers(in_filename)
                write_prediction_file(predicted_answers, predictions_dir, model_name, quesion_set, frequency_percentile=frequency_percentile)
            else:
                print("Skipping exiting output: {}".format(out_filepath))                

  0%|          | 0/413 [00:00<?, ?it/s]

Predicting answers for: /data/distribution_shift/new_qa/amazon_reviews_v1.0.json


100%|██████████| 413/413 [01:31<00:00,  4.50it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/new_qa/reddit_v1.0.json


100%|██████████| 1/1 [01:30<00:00, 90.69s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/new_qa/new_wiki_v1.0.json


100%|██████████| 48/48 [01:11<00:00,  1.48s/it]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/new_qa/nyt_v1.0.json


100%|██████████| 797/797 [01:33<00:00,  8.55it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_JJ_VB.json


100%|██████████| 413/413 [01:25<00:00,  4.86it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_JJ_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_JJ.json


100%|██████████| 413/413 [01:25<00:00,  4.83it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_JJ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_VB_RB.json


100%|██████████| 413/413 [01:30<00:00,  4.57it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_VB_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_VB.json


100%|██████████| 413/413 [01:30<00:00,  4.56it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_RB.json


100%|██████████| 413/413 [01:30<00:00,  4.56it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_RB_RBR_RBZ.json


100%|██████████| 413/413 [01:30<00:00,  4.57it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_RB_RBR_RBZ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_VB_VBD_VBG_VBN_VBP.json


100%|██████████| 413/413 [01:30<00:00,  4.58it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_VB_VBD_VBG_VBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json


100%|██████████| 413/413 [01:29<00:00,  4.62it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_Percentile_0.1.json


100%|██████████| 413/413 [01:27<00:00,  4.74it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_Percentile_0.1_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_Percentile_0.2.json


100%|██████████| 413/413 [01:26<00:00,  4.77it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_Percentile_0.2_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_Percentile_0.3.json


100%|██████████| 413/413 [01:26<00:00,  4.77it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_Percentile_0.3_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_bert_Percentile_0.5.json


100%|██████████| 413/413 [01:25<00:00,  4.84it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_bert_Percentile_0.5_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_JJ_VB.json


100%|██████████| 1/1 [01:23<00:00, 83.59s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_JJ_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_JJ.json


100%|██████████| 1/1 [01:23<00:00, 83.67s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_JJ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_VB_RB.json


100%|██████████| 1/1 [01:27<00:00, 87.43s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_VB_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_VB.json


100%|██████████| 1/1 [01:27<00:00, 87.03s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_RB.json


100%|██████████| 1/1 [01:27<00:00, 87.55s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_RB_RBR_RBZ.json


100%|██████████| 1/1 [01:27<00:00, 87.09s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_RB_RBR_RBZ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_VB_VBD_VBG_VBN_VBP.json


100%|██████████| 1/1 [01:26<00:00, 86.50s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_VB_VBD_VBG_VBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json


100%|██████████| 1/1 [01:26<00:00, 86.65s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_Percentile_0.1.json


100%|██████████| 1/1 [01:25<00:00, 85.39s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_Percentile_0.1_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_Percentile_0.2.json


100%|██████████| 1/1 [01:24<00:00, 84.83s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_Percentile_0.2_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_Percentile_0.3.json


100%|██████████| 1/1 [01:24<00:00, 84.03s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_Percentile_0.3_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_bert_Percentile_0.5.json


100%|██████████| 1/1 [01:23<00:00, 83.63s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_bert_Percentile_0.5_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_JJ_VB.json


100%|██████████| 48/48 [01:04<00:00,  1.34s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_JJ_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_JJ.json


100%|██████████| 48/48 [01:04<00:00,  1.33s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_JJ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_VB_RB.json


100%|██████████| 48/48 [01:09<00:00,  1.44s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_VB_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_VB.json


100%|██████████| 48/48 [01:09<00:00,  1.44s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_RB.json


100%|██████████| 48/48 [01:09<00:00,  1.44s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_RB_RBR_RBZ.json


100%|██████████| 48/48 [01:10<00:00,  1.47s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_RB_RBR_RBZ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_VB_VBD_VBG_VBN_VBP.json


100%|██████████| 48/48 [01:11<00:00,  1.50s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_VB_VBD_VBG_VBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json


100%|██████████| 48/48 [01:14<00:00,  1.54s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_Percentile_0.1.json


100%|██████████| 48/48 [01:14<00:00,  1.56s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_Percentile_0.1_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_Percentile_0.2.json


100%|██████████| 48/48 [01:13<00:00,  1.52s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_Percentile_0.2_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_Percentile_0.3.json


100%|██████████| 48/48 [01:11<00:00,  1.49s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_Percentile_0.3_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_bert_Percentile_0.5.json


100%|██████████| 48/48 [01:10<00:00,  1.46s/it]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_bert_Percentile_0.5_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_JJ_VB.json


100%|██████████| 797/797 [01:29<00:00,  8.86it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_JJ_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_JJ.json


100%|██████████| 797/797 [01:32<00:00,  8.66it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_JJ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_VB_RB.json


100%|██████████| 797/797 [01:40<00:00,  7.93it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_VB_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_VB.json


100%|██████████| 797/797 [01:35<00:00,  8.38it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_RB.json


100%|██████████| 797/797 [01:36<00:00,  8.28it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_RB_RBR_RBZ.json


100%|██████████| 797/797 [01:35<00:00,  8.33it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_RB_RBR_RBZ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_VB_VBD_VBG_VBN_VBP.json


100%|██████████| 797/797 [01:35<00:00,  8.37it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_VB_VBD_VBG_VBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json


100%|██████████| 797/797 [01:35<00:00,  8.32it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_Percentile_0.1.json


100%|██████████| 797/797 [01:31<00:00,  8.73it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_Percentile_0.1_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_Percentile_0.2.json


100%|██████████| 797/797 [01:31<00:00,  8.75it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_Percentile_0.2_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_Percentile_0.3.json


100%|██████████| 797/797 [01:30<00:00,  8.85it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_Percentile_0.3_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_bert_Percentile_0.5.json


100%|██████████| 797/797 [01:29<00:00,  8.90it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_bert_Percentile_0.5_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_JJ_VB.json


100%|██████████| 413/413 [01:33<00:00,  4.42it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_JJ_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_JJ.json


100%|██████████| 413/413 [01:33<00:00,  4.42it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_JJ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_VB_RB.json


100%|██████████| 413/413 [01:38<00:00,  4.18it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_VB_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_VB.json


100%|██████████| 413/413 [01:38<00:00,  4.18it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_RB.json


100%|██████████| 413/413 [01:39<00:00,  4.16it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_RB_RBR_RBZ.json


100%|██████████| 413/413 [01:37<00:00,  4.23it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_RB_RBR_RBZ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_VB_VBD_VBG_VBN_VBP.json


100%|██████████| 413/413 [01:36<00:00,  4.27it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_VB_VBD_VBG_VBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json


100%|██████████| 413/413 [01:36<00:00,  4.26it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_Percentile_0.1.json


100%|██████████| 413/413 [01:35<00:00,  4.35it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_Percentile_0.1_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_Percentile_0.2.json


100%|██████████| 413/413 [01:33<00:00,  4.42it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_Percentile_0.2_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_Percentile_0.3.json


100%|██████████| 413/413 [01:32<00:00,  4.48it/s]
  0%|          | 0/413 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_Percentile_0.3_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/amazon_reviews_v1_0_roberta_Percentile_0.5.json


100%|██████████| 413/413 [01:31<00:00,  4.54it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/amazon_reviews_v1_0_roberta_Percentile_0.5_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_JJ_VB.json


100%|██████████| 1/1 [01:29<00:00, 89.59s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_JJ_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_JJ.json


100%|██████████| 1/1 [01:30<00:00, 90.04s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_JJ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_VB_RB.json


100%|██████████| 1/1 [01:34<00:00, 94.48s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_VB_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_VB.json


100%|██████████| 1/1 [01:35<00:00, 95.49s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_RB.json


100%|██████████| 1/1 [01:35<00:00, 95.73s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_RB_RBR_RBZ.json


100%|██████████| 1/1 [01:38<00:00, 98.52s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_RB_RBR_RBZ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_VB_VBD_VBG_VBN_VBP.json


100%|██████████| 1/1 [01:34<00:00, 94.10s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_VB_VBD_VBG_VBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json


100%|██████████| 1/1 [01:35<00:00, 95.04s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_Percentile_0.1.json


100%|██████████| 1/1 [01:32<00:00, 92.87s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_Percentile_0.1_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_Percentile_0.2.json


100%|██████████| 1/1 [01:30<00:00, 90.57s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_Percentile_0.2_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_Percentile_0.3.json


100%|██████████| 1/1 [01:27<00:00, 87.80s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_Percentile_0.3_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/reddit_v1_0_roberta_Percentile_0.5.json


100%|██████████| 1/1 [01:26<00:00, 86.54s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/reddit_v1_0_roberta_Percentile_0.5_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_JJ_VB.json


100%|██████████| 48/48 [01:05<00:00,  1.36s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_JJ_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_JJ.json


100%|██████████| 48/48 [01:05<00:00,  1.36s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_JJ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_VB_RB.json


100%|██████████| 48/48 [01:11<00:00,  1.50s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_VB_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_VB.json


100%|██████████| 48/48 [01:12<00:00,  1.51s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_RB.json


100%|██████████| 48/48 [01:11<00:00,  1.50s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_RB_RBR_RBZ.json


100%|██████████| 48/48 [01:11<00:00,  1.49s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_RB_RBR_RBZ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_VB_VBD_VBG_VBN_VBP.json


100%|██████████| 48/48 [01:10<00:00,  1.46s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_VB_VBD_VBG_VBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json


100%|██████████| 48/48 [01:10<00:00,  1.48s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_Percentile_0.1.json


100%|██████████| 48/48 [01:07<00:00,  1.40s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_Percentile_0.1_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_Percentile_0.2.json


100%|██████████| 48/48 [01:05<00:00,  1.36s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_Percentile_0.2_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_Percentile_0.3.json


100%|██████████| 48/48 [01:03<00:00,  1.33s/it]
  0%|          | 0/48 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_Percentile_0.3_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/new_wiki_v1.0_roberta_Percentile_0.5.json


100%|██████████| 48/48 [01:03<00:00,  1.31s/it]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/new_wiki_v1.0_roberta_Percentile_0.5_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_JJ_VB.json


100%|██████████| 797/797 [01:29<00:00,  8.94it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_JJ_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_JJ.json


100%|██████████| 797/797 [01:28<00:00,  9.00it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_JJ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_VB_RB.json


100%|██████████| 797/797 [01:35<00:00,  8.34it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_VB_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_VB.json


100%|██████████| 797/797 [01:35<00:00,  8.32it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_VB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_RB.json


100%|██████████| 797/797 [01:35<00:00,  8.37it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_RB_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_RB_RBR_RBZ.json


100%|██████████| 797/797 [01:34<00:00,  8.45it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_RB_RBR_RBZ_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_VB_VBD_VBG_VBN_VBP.json


100%|██████████| 797/797 [01:34<00:00,  8.46it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_VB_VBD_VBG_VBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json


100%|██████████| 797/797 [01:36<00:00,  8.29it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_Percentile_0.1.json


100%|██████████| 797/797 [01:31<00:00,  8.73it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_Percentile_0.1_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_Percentile_0.2.json


100%|██████████| 797/797 [01:29<00:00,  8.92it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_Percentile_0.2_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_Percentile_0.3.json


100%|██████████| 797/797 [01:28<00:00,  9.02it/s]
  0%|          | 0/797 [00:00<?, ?it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_Percentile_0.3_distilbert-base-uncased-distilled-squad.json
Predicting answers for: /data/distribution_shift/augmented_qa/nyt_v1.0_roberta_Percentile_0.5.json


100%|██████████| 797/797 [01:27<00:00,  9.14it/s]

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_Percentile_0.5_distilbert-base-uncased-distilled-squad.json





In [9]:
write_prediction_file(predicted_answers, predictions_dir, model_name, quesion_set, parts_of_speech=parts_of_speech)

Writting file: /data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-uncased-distilled-squad.json


'/data/distribution_shift/augmented_qa/predictions/nyt_v1.0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-uncased-distilled-squad.json'

## Results using Bert cased.
Original:  
```
{'exact_match': 61.29489124936773,
 'exact_match_ci': (60.32643155896314, 62.25669155646592),
 'f1': 76.44828148772298,
 'f1_ci': (75.73942758762348, 77.15713538782143)}
```
 
Augmented using parts of speech ('RB', 'VB', 'JJ'):
```
{'exact_match': 55.723443223443226,
 'exact_match_ci': (54.67436316808506, 56.768703886705765),
 'f1': 70.53511389475281,
 'f1_ci': (69.70373232753799, 71.36649546196658)}
 ```
 
Augmented using parts of speech ('JJ', 'VB'):
```
{'exact_match': 57.61699303020246,
 'exact_match_ci': (56.59041181574251, 58.638662021683864),
 'f1': 72.49160994326711,
 'f1_ci': (71.69580181605691, 73.28741807047638)}
```
 
Augmented using parts of speech ('JJ'):
```
{'exact_match': 59.672275376663364,
 'exact_match_ci': (58.655525891224094, 60.68282402622843),
 'f1': 74.34748354796466,
 'f1_ci': (73.57500291336866, 75.11996418255961)}
```

Augmented using parts of speech ('VB', 'RB'):
```
{'exact_match': 60.52954977097531,
 'exact_match_ci': (59.50808371658397, 61.54415775798525),
 'f1': 75.1355165930609,
 'f1_ci': (74.36753548765809, 75.90349769846254)}
```

Augmented using parts of speech ('VB'):
```
{'exact_match': 61.695209191415564,
 'exact_match_ci': (60.69426383659264, 62.68876479362644),
 'f1': 76.60951250625979,
 'f1_ci': (75.8739950555737, 77.34502995694473)}
```

Augmented using parts of speech ('RB'):
```
{'exact_match': 62.686401068328514,
 'exact_match_ci': (61.67688701776737, 63.68768392154747),
 'f1': 77.1423384101846,
 'f1_ci': (76.40242639581122, 77.88225042455691)}
```

Augmented using parts of speech ('RB', 'RBR', 'RBZ'):
```
{'exact_match': 62.690722797638934,
 'exact_match_ci': (61.68083367692271, 63.69237147400661),
 'f1': 77.07116898243828,
 'f1_ci': (76.32951557203064, 77.81282239284486)}
```

Augmented using parts of speech ('VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ'):
```
{'exact_match': 57.88402848423194,
 'exact_match_ci': (56.847068026993874, 58.915793750168575),
 'f1': 72.05739805303583,
 'f1_ci': (71.24414442866858, 72.87065167740224)}
```

Augmented using parts of speech ('VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'RB', 'RBR', 'RBZ'):
```
{'exact_match': 54.86960535425802,
 'exact_match_ci': (53.814639701026, 55.921295105813854),
 'f1': 69.2809597048899,
 'f1_ci': (68.4326274186548, 70.1292919911243)}
```

## Results using DistilBert uncased.
Original:  
```
{'exact_match': 51.78553363682347,
 'exact_match_ci': (50.79503391873498, 52.77498080096132),
 'f1': 67.1402286661281,
 'f1_ci': (66.33834793758437, 67.94210939467114)}
```
 
Augmented using parts of speech ('RB', 'VB', 'JJ'):
```
{'exact_match': 47.27564102564103,
'exact_match_ci': (46.22404688046134, 48.32905313480492),
'f1': 61.976016403387256,
'f1_ci': (61.081630518715, 62.870402288058955)}
```
 
Augmented using parts of speech ('JJ', 'VB'):
```
{'exact_match': 48.7332669543091,
'exact_match_ci': (47.697853514895336, 49.7694972479201),
'f1': 63.639230738212405,
'f1_ci': (62.771313358234096, 64.50714811819024)}
```
 
Augmented using parts of speech ('JJ'):
```
{'exact_match': 49.950511382382054,
 'exact_match_ci': (48.917471939596425, 50.983582547731864),
 'f1': 65.2630401500315,
 'f1_ci': (64.4106603365326, 66.11541996352976)}
```

Augmented using parts of speech ('VB', 'RB'):
```
{'exact_match': 51.49145346888616,
 'exact_match_ci': (50.45016923740518, 52.531766446720496),
 'f1': 66.08929790660498,
 'f1_ci': (65.23191051755666, 66.946685295653)}
```

Augmented using parts of speech ('VB'):
```
{'exact_match': 52.21114242358552,
 'exact_match_ci': (51.18589672092638, 53.23499125822917),
 'f1': 66.99138249073286,
 'f1_ci': (66.15498117557172, 67.82778380589349)}
```

Augmented using parts of speech ('RB'):
```
{'exact_match': 52.65969285555308,
 'exact_match_ci': (51.621074174551694, 53.696586267438605),
 'f1': 67.50048486449307,
 'f1_ci': (66.65731834923861, 68.34365137974714)}
```
