In [1]:
import json
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

# Squad / NQ / Musique

### Zero Shot

In [28]:
indir = "generated_prompts/all/zero_shot"
data_name = "musique" #"NQ" #squad
data_types = ["adversarial", "control_group"]
variants = ["variant1", "variant2", "variant3"]

adversarial_path = os.path.join(indir, f"{data_name}_adversarial_all.json")
control_group_path = os.path.join(indir, f"{data_name}_control_group_all.json")

In [29]:
if data_name == "musique":
    variants_replacemants = {"variant1": {"":""},
                            "variant2":{"If it cannot be answered based on the context, reply \"unanswerable\"": "If you don't know the answer, reply \"IDK\""},
                            "variant3":{"If it cannot be answered based on the context, reply \"unanswerable\"": "If there is no correct answer, reply \"N/A\""}}
else:
    variants_replacemants = {"variant1": {"":""},
                         "variant2":{"If it cannot be answered based on the passage, reply \"unanswerable\"": "If you don't know the answer, reply \"IDK\""},
                         "variant3":{"If it cannot be answered based on the passage, reply \"unanswerable\"": "If there is no correct answer, reply \"N/A\""}}

In [30]:
def change_instance(instance, old_str, new_str):
    return {key:str(value).replace(old_str, new_str) for key,value in instance.items()}

In [31]:
for data_type in tqdm(data_types):
    curr_indir = os.path.join(indir, f"{data_name}_{data_type}_all.json")
    with open(curr_indir, 'r') as f1:
        curr_data = json.loads(f1.read())
    for variant in variants:
        old_str, new_str = list(variants_replacemants[variant].items())[0]
        curr_outdir = os.path.join(indir, variant, f"{data_name}_{data_type}_all.json")
        new_data = [change_instance(instance, old_str, new_str) for instance in curr_data]

        with open(curr_outdir, 'w') as f1:
            f1.write(json.dumps(new_data, indent=2))


100%|██████████| 2/2 [00:00<00:00,  2.70it/s]


# Few-Shot with Instructions

In [8]:
indir = "../generated_prompts/all/few_shot_with_instructions"
data_name = "squad" #"NQ" #"musique"
data_types = ["adversarial", "control_group"]
variants = ["variant1", "variant2", "variant3"]
icl_examples_vs = [f"icl_examples_v{v}" for v in range(1,4)]

In [5]:
if data_name == "musique":
    variants_replacemants = {"variant1": {"":"", "":""},
                            "variant2":{"If it cannot be answered based on the context, reply \"unanswerable\"": "If you don't know the answer, reply \"IDK\"",
                                        "unanswerable": "IDK"},
                            "variant3":{"If it cannot be answered based on the context, reply \"unanswerable\"": "If there is no correct answer, reply \"N/A\"",
                                        "unanswerable": "N/A"}}
else:
    variants_replacemants = {"variant1": {"":""},
                         "variant2":{"If it cannot be answered based on the passage, reply \"unanswerable\"": "If you don't know the answer, reply \"IDK\"",
                                     "unanswerable": "IDK"},
                         "variant3":{"If it cannot be answered based on the passage, reply \"unanswerable\"": "If there is no correct answer, reply \"N/A\"",
                                     "unanswerable": "N/A"}}

In [6]:
def change_instance(instance, variants_replacemants):
    updated_instance = dict()
    for key,value in instance.items():
        if not "Answerability" in key:
            updated_instance[key] = str(value)
            for old_str, new_str in variants_replacemants.items():
                updated_instance[key] = updated_instance[key].replace(old_str, new_str)
        else:
            updated_instance[key] = value
    return updated_instance

In [9]:
for data_type in tqdm(data_types):
    for icl_examples_v in icl_examples_vs:
        curr_indir = os.path.join(indir, f"{data_name}_{data_type}_{icl_examples_v}_all.json")
        with open(curr_indir, 'r') as f1:
            curr_data = json.loads(f1.read())
        for variant in variants:
            curr_outdir = os.path.join(indir, variant, f"{data_name}_{data_type}_{icl_examples_v}_all.json")
            new_data = [change_instance(instance, variants_replacemants[variant]) for instance in curr_data]

            with open(curr_outdir, 'w') as f1:
                f1.write(json.dumps(new_data, indent=2))

100%|██████████| 2/2 [00:14<00:00,  7.46s/it]
