In [1]:
import json
import os
import csv 
import pandas as pd

from eval_v1_1_question import evaluate

from proj_config import *
from proj_utils import *

In [2]:
show_configs()

parts_of_speech_list:
 [['JJ', 'VB'], ['JJ'], ['VB', 'RB'], ['VB'], ['RB'], ['RB', 'RBR', 'RBZ'], ['VB', 'VBD', 'VBG', 'VBN', 'VBP'], ['RB', 'RBR', 'RBZ', 'VB', 'VBD', 'VBGVBN', 'VBP']]

augmenting_models:
 ['orig', 'bert', 'roberta']

qa_models:
 ['bert-large-cased-whole-word-masking-finetuned-squad', 'bert-large-uncased-whole-word-masking-finetuned-squad', 'distilbert-base-cased-distilled-squad', 'distilbert-base-uncased-distilled-squad']

qa_urls:
 {'amazon_reviews_v1_0': 'https://ndownloader.figshare.com/files/21500109?private_link=2f119bea3e8d711047ec', 'reddit_v1_0': 'https://ndownloader.figshare.com/files/21500112?private_link=2f119bea3e8d711047ec', 'new_wiki_v1.0': 'https://ndownloader.figshare.com/files/21500115?private_link=2f119bea3e8d711047ec', 'nyt_v1.0': 'https://ndownloader.figshare.com/files/21500118?private_link=2f119bea3e8d711047ec'}



In [4]:

def get_augmented_url(base_url, model_name, question_set, parts_of_speech=None, frequency_percentile=None):
    if model_name == 'orig':
        filename = base_url+question_set+".json.gz"
        return filename

    assert bool(parts_of_speech) ^ bool(frequency_percentile), "Can only pass one of parts_of_speech and frequency_percentile"
    
    if parts_of_speech:
        filename = base_url+question_set+"_"+model_name+"_"+"_".join(parts_of_speech)+".json.gz"
    elif frequency_percentile:
        filename = base_url+question_set+"_"+model_name+"_Percentile_"+str(frequency_percentile)+".json.gz"

    return filename


def get_prediction_url(base_url, model_name, question_set, qa_model, parts_of_speech=None, frequency_percentile=None):
    if model_name == 'orig':
        filename = base_url+question_set+"_"+qa_model+".json.gz"
        return filename
    
    assert bool(parts_of_speech) ^ bool(frequency_percentile), "Can only pass one of parts_of_speech and frequency_percentile"
    
    if parts_of_speech:
        filename = base_url+question_set+"_"+model_name+"_"+"_".join(parts_of_speech)+"_"+qa_model+".json.gz"
    elif frequency_percentile:
        filename = base_url+question_set+"_"+model_name+"_Percentile_"+str(frequency_percentile)+"_"+qa_model+".json.gz"

    return filename

def strip_filename(filepath):
    filename = filepath.split('/')[-1:][0]
    return filename[:-5]


def get_eval_1_1(qa_json, preds_json):
    
#     with open(qa_json) as qa_json_file:
#         qa = json.load(qa_json_file)['data']
    
    qa = get_gzip_json_url(qa_json)['data']

#     with open(preds_json) as preds_json_file:
#         print(preds_json)
#         preds = json.load(preds_json_file)
    
    preds = get_gzip_json_url(preds_json)
    
    eval_results, question_results = evaluate(qa, preds)
    
    return eval_results, question_results

augmentation_test_base_url = 'https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/'
augmented_predictions_base_url = 'https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/'


# Columns for model scoring results
model_results = [["question_set", 
                  "questions", 
                  "predictions", 
                  "augmentation_model", 
                  "parts_of_speech", 
                  "frequency_percentile", 
                  "augmentation_name", 
                  "question_answering_model", 
                  "exact_match", "f1"
                 ]]

# Columns for question-level scoring results
model_question_results = [["question_set", 
                           "questions", 
                           "predictions", 
                           "augmentation_model", 
                           "augmentation_name", 
                           "question_answering_model", 
                           "question_id", 
                           "exact_match", 
                           "f1"]
                         ]

for model_name in augmenting_models:
    for question_set, orig_filename in qa_urls.items():
        for qa_model in qa_models:

            if model_name == 'orig':
                qa_filepath = orig_filename
                pred_filepath = get_prediction_url(augmented_predictions_base_url, model_name, question_set, qa_model)

                print("Predicting for\n QA: {}\n Pred:{}".format(qa_filepath, pred_filepath))

                model_eval, question_results = get_eval_1_1(qa_filepath, pred_filepath)

                model_results.append([question_set, 
                                      strip_filename(qa_filepath),
                                      strip_filename(pred_filepath),
                                      model_name,
                                      None,
                                      None,
                                      "Original",
                                      qa_model,
                                      model_eval['exact_match'],
                                      model_eval['f1'],
                                     ])

                # Add meta data and append
                for question_result in question_results:
                    model_question_results.append([question_set, 
                                                   strip_filename(qa_filepath),
                                                   strip_filename(pred_filepath),
                                                   model_name, 
                                                   "Original",
                                                   qa_model
                                                  ] + question_result
                                                 )

                continue

            for parts_of_speech in parts_of_speech_list:
                qa_filepath = get_augmented_url(augmentation_test_base_url, model_name, question_set, parts_of_speech=parts_of_speech)

                pred_filepath = get_prediction_url(augmented_predictions_base_url, model_name, question_set, qa_model, parts_of_speech=parts_of_speech)

                model_eval, question_results = get_eval_1_1(qa_filepath, pred_filepath)

                model_results.append([question_set, 
                                      strip_filename(qa_filepath),
                                      strip_filename(pred_filepath),
                                      model_name,
                                      parts_of_speech,
                                      None,
                                      "PoS_"+"_".join(parts_of_speech),
                                      qa_model,
                                      model_eval['exact_match'],
                                      model_eval['f1'],
                                     ])

                # Add meta data and append
                for question_result in question_results:
                    model_question_results.append([question_set, 
                                                   strip_filename(qa_filepath),
                                                   strip_filename(pred_filepath),
                                                   model_name,
                                                   "PoS_"+"_".join(parts_of_speech),
                                                   qa_model
                                                  ] + question_result
                                                 )


            for frequency_percentile in frequency_percentiles:
                qa_filepath = get_augmented_url(augmentation_test_base_url, model_name, question_set, frequency_percentile=frequency_percentile)
                pred_filepath = get_prediction_url(augmented_predictions_base_url, model_name, question_set, qa_model, frequency_percentile=frequency_percentile)


                model_eval, question_results = get_eval_1_1(qa_filepath, pred_filepath)

                model_results.append([question_set, 
                                      strip_filename(qa_filepath),
                                      strip_filename(pred_filepath),
                                      model_name,
                                      None,
                                      frequency_percentile,
                                      "Percentile_"+str(frequency_percentile),
                                      qa_model,
                                      model_eval['exact_match'],
                                      model_eval['f1'],
                                     ])

                for question_result in question_results:
                    model_question_results.append([question_set, 
                                                   strip_filename(qa_filepath),
                                                   strip_filename(pred_filepath),
                                                   model_name,
                                                   "Percentile_"+str(frequency_percentile),
                                                   qa_model
                                                  ] + question_result
                                                 )                    



Predicting for
 QA: https://ndownloader.figshare.com/files/21500109?private_link=2f119bea3e8d711047ec
 Pred:https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://ndownloader.figshare.com/files/21500109?private_link=2f119bea3e8d711047ec
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Predicting for
 QA: https://ndownloader.figshare.com/files/21500109?private_link=2f119bea3e8d711047ec
 Pred:https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_bert-large-uncased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://ndownloader.figshare.com/files/21500109?private_link=2f119bea3e8d711047ec
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_bert_JJ_VB_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_bert_JJ.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_bert_JJ_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_bert_VB_RB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_bert_VB_RB_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_bert_VB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_rev

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_bert_VB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_bert_VB_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_bert_RB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_bert_RB_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_bert_RB_RBR_RBZ.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_bert_RB_RBR_RBZ_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_bert_VB_VBD_VBG_VBN_VBP.json.gz


Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_bert_Percentile_0.1.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_bert_Percentile_0.1_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_bert_Percentile_0.2.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_bert_Percentile_0.2_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazon

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_bert_Percentile_0.5.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_bert_Percentile_0.5_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_bert_JJ_VB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_bert_JJ_VB_distilbert-base-uncased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_bert_JJ.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_bert_JJ_distilbert-base-uncased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_bert_VB_RB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_bert_VB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_bert_VB_bert-large-uncased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_bert_RB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_bert_RB_bert-large-uncased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_bert_RB_RBR_RBZ.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_bert_RB_RBR_RBZ_bert-large-uncased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_bert_VB_VBD_VBG_VBN_VBP.js

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_bert_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-uncased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_bert_Percentile_0.1.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_bert_Percentile_0.1_distilbert-base-uncased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_bert_Percentile_0.2.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_bert_Percentile_0.2_distilbert-base-uncased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/tes

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/nyt_v1.0_bert_Percentile_0.5.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/nyt_v1.0_bert_Percentile_0.5_bert-large-uncased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/nyt_v1.0_bert_JJ_VB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/nyt_v1.0_bert_JJ_VB_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/nyt_v1.0_bert_JJ.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/nyt_v1.0_bert_JJ_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/nyt_v1.0_bert_VB_RB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_roberta_RB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_roberta_RB_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_roberta_RB_RBR_RBZ.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_roberta_RB_RBR_RBZ_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_roberta_VB_VBD_VBG_VBN_VBP.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_roberta_VB_VBD_VBG_VBN_VBP_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-ce

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_roberta_VB_VBD_VBG_VBN_VBP.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_roberta_VB_VBD_VBG_VBN_VBP_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/amazon_reviews_v1_0_roberta_Percentile_0.1.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/amazon_reviews_v1_0_roberta_Percentile_0.1_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-dis

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_roberta_Percentile_0.2.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_roberta_Percentile_0.2_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_roberta_Percentile_0.3.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_roberta_Percentile_0.3_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_roberta_Percentile_0.5.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_roberta_Percentile_0.5_bert-large-cased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/au

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_roberta_JJ.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_roberta_JJ_distilbert-base-uncased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_roberta_VB_RB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_roberta_VB_RB_distilbert-base-uncased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_roberta_VB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/reddit_v1_0_roberta_VB_distilbert-base-uncased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/reddit_v1_0_roberta_RB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amaz

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_roberta_RB_RBR_RBZ.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_roberta_RB_RBR_RBZ_bert-large-uncased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_roberta_VB_VBD_VBG_VBN_VBP.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_roberta_VB_VBD_VBG_VBN_VBP_bert-large-uncased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_roberta_RB_RBR_RBZ_VB_VBD_VBGVBN_VBP_bert-large-uncased-whole-word-masking-finetuned-squad.json.gz
Fetching: https://nlp

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_roberta_Percentile_0.2.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_roberta_Percentile_0.2_distilbert-base-uncased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_roberta_Percentile_0.3.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_roberta_Percentile_0.3_distilbert-base-uncased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/new_wiki_v1.0_roberta_Percentile_0.5.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/new_wiki_v1.0_roberta_Percentile_0.5_distilbert-base-uncased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/nyt_v1.0

Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/nyt_v1.0_roberta_JJ.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/nyt_v1.0_roberta_JJ_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/nyt_v1.0_roberta_VB_RB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/nyt_v1.0_roberta_VB_RB_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/nyt_v1.0_roberta_VB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/predictions/nyt_v1.0_roberta_VB_distilbert-base-cased-distilled-squad.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/test/nyt_v1.0_roberta_RB.json.gz
Fetching: https://nlp-distribution.s3.ca-central-1.amazonaws.com/augmentation/pred

### Write model results to CSV and parquet

In [5]:
def write_results_csv(model_results):
    filename = '/data/augmentation/results/model_results.csv'
    
    print("Writting: {}".format(filename))
    with open(filename, 'w', encoding='utf-8') as f:
        csv_writer = csv.writer(f, dialect='excel')
        csv_writer.writerows(model_results)
        
    return filename

write_results_csv(model_results)

Writting: /data/distribution_shift/augmented_qa/results/model_results.csv


'/data/distribution_shift/augmented_qa/results/model_results.csv'

In [6]:
filename = '/data/augmentation/results/model_results.parquet.gz'

model_results_df = pd.DataFrame(model_results[1:], columns=model_results[0])

model_results_df['question_set'] = model_results_df['question_set'].astype('category')
model_results_df['questions'] = model_results_df['questions'].astype('category')
model_results_df['predictions'] = model_results_df['predictions'].astype('category')
model_results_df['augmentation_model'] = model_results_df['augmentation_model'].astype('category')
model_results_df['question_answering_model'] = model_results_df['question_answering_model'].astype('category')
model_results_df['augmentation_name'] = model_results_df['augmentation_name'].astype('category')

print("Writting: {}".format(filename))
model_results_df.to_parquet(filename,
              compression='gzip') 

model_results_df.head()

Writting: /data/distribution_shift/augmented_qa/results/model_results.parquet.gzip


Unnamed: 0,question_set,questions,predictions,augmentation_model,parts_of_speech,frequency_percentile,augmentation_name,question_answering_model,exact_match,f1
0,amazon_reviews_v1_0,21500109?private_link=2f119bea3e8d711,amazon_reviews_v1_0_bert-large-cased-whole-wor...,orig,,,Original,bert-large-cased-whole-word-masking-finetuned-...,61.294891,76.448281
1,amazon_reviews_v1_0,21500109?private_link=2f119bea3e8d711,amazon_reviews_v1_0_bert-large-uncased-whole-w...,orig,,,Original,bert-large-uncased-whole-word-masking-finetune...,61.446636,77.010119
2,amazon_reviews_v1_0,21500109?private_link=2f119bea3e8d711,amazon_reviews_v1_0_distilbert-base-cased-dist...,orig,,,Original,distilbert-base-cased-distilled-squad,52.149722,67.433575
3,amazon_reviews_v1_0,21500109?private_link=2f119bea3e8d711,amazon_reviews_v1_0_distilbert-base-uncased-di...,orig,,,Original,distilbert-base-uncased-distilled-squad,51.785534,67.140229
4,reddit_v1_0,21500112?private_link=2f119bea3e8d711,reddit_v1_0_bert-large-cased-whole-word-maskin...,orig,,,Original,bert-large-cased-whole-word-masking-finetuned-...,61.215954,75.00533


### Write model question results to CSV and parquet

In [7]:
def write_question_results_csv(question_results):
    filename = '/data/augmentation/results/question_results.csv'
    
    print("Writting: {}".format(filename))
    with open(filename, 'w', encoding='utf-8') as f:
        csv_writer = csv.writer(f, dialect='excel')
        csv_writer.writerows(question_results)
        
    return filename

write_question_results_csv(model_question_results)

Writting: /data/distribution_shift/augmented_qa/results/question_results.csv


'/data/distribution_shift/augmented_qa/results/question_results.csv'

In [8]:
filename = '/data/augmentation/results/question_results.parquet.gz'

model_question_results_df = pd.DataFrame(model_question_results[1:],
                                        columns=model_question_results[0]
                                        )

model_question_results_df['question_set'] = model_question_results_df['question_set'].astype('category')
model_question_results_df['questions'] = model_question_results_df['questions'].astype('category')
model_question_results_df['predictions'] = model_question_results_df['predictions'].astype('category')
model_question_results_df['augmentation_model'] = model_question_results_df['augmentation_model'].astype('category')
model_question_results_df['augmentation_name'] = model_question_results_df['augmentation_name'].astype('category')
model_question_results_df['question_answering_model'] = model_question_results_df['question_answering_model'].astype('category')


print("Writting: {}".format(filename))
model_question_results_df.to_parquet(filename,
              compression='gzip') 

model_question_results_df.head()

Writting: /data/distribution_shift/augmented_qa/results/question_results.parquet.gzip


Unnamed: 0,question_set,questions,predictions,augmentation_model,augmentation_name,question_answering_model,question_id,exact_match,f1
0,amazon_reviews_v1_0,21500109?private_link=2f119bea3e8d711,amazon_reviews_v1_0_bert-large-cased-whole-wor...,orig,Original,bert-large-cased-whole-word-masking-finetuned-...,5dd465dacc027a086d65bc6c,True,1.0
1,amazon_reviews_v1_0,21500109?private_link=2f119bea3e8d711,amazon_reviews_v1_0_bert-large-cased-whole-wor...,orig,Original,bert-large-cased-whole-word-masking-finetuned-...,5dd465dacc027a086d65bc6d,False,0.888889
2,amazon_reviews_v1_0,21500109?private_link=2f119bea3e8d711,amazon_reviews_v1_0_bert-large-cased-whole-wor...,orig,Original,bert-large-cased-whole-word-masking-finetuned-...,5dd465dacc027a086d65bc6e,True,1.0
3,amazon_reviews_v1_0,21500109?private_link=2f119bea3e8d711,amazon_reviews_v1_0_bert-large-cased-whole-wor...,orig,Original,bert-large-cased-whole-word-masking-finetuned-...,5dd465dacc027a086d65bc6f,False,0.571429
4,amazon_reviews_v1_0,21500109?private_link=2f119bea3e8d711,amazon_reviews_v1_0_bert-large-cased-whole-wor...,orig,Original,bert-large-cased-whole-word-masking-finetuned-...,5dd465dacc027a086d65bc70,False,0.0
