In [1]:
import json
from tqdm import tqdm
import os
from datasets import load_dataset
import re
import spacy
import random
import pandas as pd
import requests
from bs4 import BeautifulSoup

  from .autonotebook import tqdm as notebook_tqdm


# Unanswerable Questions - Old

## Read the json file

In [14]:
indir = r"data/squad/dev-v2.0.json"
indir_reasons = r"data/squad/reasons"
outdir = r"generated_prompts"
IS_CONTROL_GROUP = False

In [15]:
shortcut_to_reason = {"E":"Entity Swap",
                        "#":"Number Swap",
                        "N":"Negation",
                        "A":"Antonym",
                        "X":"Mutual Exclusion",
                        "I":"No Information"}

In [16]:
with open(indir) as f:
    data = json.load(f)
    data = data["data"]

reasons_df = pd.DataFrame(columns=["qid", "reason"])
for reasons_filename in os.listdir(indir_reasons):
    curr_reasons_df = pd.read_csv(os.path.join(indir_reasons, reasons_filename), sep='\t')
    reasons_df = pd.concat([reasons_df, curr_reasons_df], ignore_index=True)
reasons_df = reasons_df.drop_duplicates()
question_to_reason_dict = {row["qid"]:shortcut_to_reason[row["reason"]] for index, row in reasons_df.iterrows()}

FileNotFoundError: [Errno 2] No such file or directory: 'data/squad/dev-v2.0.json'

## Filter only unanswerable questions

In [None]:
filtered_data = []
for elem in data:
    for paragraph in elem["paragraphs"]:
        for qa in paragraph["qas"]:
            sample = f"Passage: {paragraph['context']} Question: {qa['question']}"
            if (IS_CONTROL_GROUP and len(qa["answers"])>0) or (not IS_CONTROL_GROUP and len(qa["answers"])==0):
                filtered_data.append({"id":qa['id'], "input":sample})

## Generate Instructions (Zero-Shot) - Version 1

In [None]:
prompt_list_zero_shot = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]
    if not IS_CONTROL_GROUP:
        prompt_elem["Unanswerablity-Reason"] = question_to_reason_dict[elem["id"]]


    prompt_elem["Adversarial"] = f"""Given the following passage and question, answer the question:
        {elem["input"]}""" 
    
    prompt_elem["Pseudo-Adversarial"] = f"""Given the following passage and question, answer the question.
    If it cannot be answered based on the passage, answer with "N/A" only, and nothing else:
        {elem["input"]}""" 
    
    prompt_elem["CoT-Adversarial"] = f"""Given the following passage and question, answer the question.
    First make sure if it can be answered by the passage.
    If it cannot be answered based on the passage, answer with "N/A" only, and nothing else:
        {elem["input"]}"""  

    prompt_elem["Answerability"] = f"""Given the following passage and question, choose one of the following options:
    (A) the question can be answered by the passage (B) the question cannot be answered by the passage.
    Please answer with "A" or "B" only, and nothing else:
        {elem["input"]}""" 

    prompt_list_zero_shot.append(prompt_elem)

## Generate Instructions (Zero-Shot) - Version 2

In [None]:
prompt_list_zero_shot_version2 = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]
    if not IS_CONTROL_GROUP:
        prompt_elem["Unanswerablity-Reason"] = question_to_reason_dict[elem["id"]]


    prompt_elem["Adversarial"] = f"""Given the following passage and question, answer the question:
        {elem["input"]}""" 
    
    prompt_elem["Pseudo-Adversarial"] = f"""Given the following passage and question, answer the question.
    If it cannot be answered based on the passage, reply "unanswerable":
        {elem["input"]}""" 
    
    prompt_elem["CoT-Adversarial"] = f"""Given the following passage and question, answer the question.
    First make sure if it can be answered by the passage.
    If it cannot be answered based on the passage, reply "unanswerable":
        {elem["input"]}"""  

    prompt_elem["Answerability"] = f"""Given the following passage and question, decide if the question is answerable based on the passage. Reply only "answerable" or "unanswerable":
        {elem["input"]}""" 

    prompt_list_zero_shot_version2.append(prompt_elem)

## Generate Instructions (Few-Shot) - Version 1

In [None]:
prompt_list_few_shot = list()
for elem in filtered_data:

    pos_example_1 = {'input': 'Passage: Madonna released the Material Girl clothing line, which she designed with her daughter, Lourdes. The 1980s inspired clothing line, borrowed from Madonna\'s punk-girl style when she rose to fame in the 1980s, was released under the Macy\'s label. Madonna also opened a series of fitness centers around the world named Hard Candy Fitness. In November 2011, Madonna and MG Icon announced the release of a second fashion brand called Truth or Dare by Madonna to include footwear, underclothing, and accessories. She also directed her second feature film, W.E., a biographic about the affair between King Edward VIII and Wallis Simpson; it was co-written with Alek Keshishian. Critical and commercial response to the film was negative. Madonna contributed the ballad "Masterpiece" for the film\'s soundtrack, which won her a Golden Globe Award for Best Original Song. Question: Material Girl clothing line is released under which brand?',
                         'Answer': 'Macy\'s.',
                         'CoT':'"Material Girl clothing line" is first mentioned in the first sentence of the passage. The second sentence further discusses the clothing line, saying that it "was released under the Macy\'s label".'}
    
    pos_example_2 = {'input':'Passage: In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards by adopting the elective system and moving away from the university\'s traditional scholastic and classical emphasis. By contrast, the Jesuit colleges, bastions of academic conservatism, were reluctant to move to a system of electives. Their graduates were shut out of Harvard Law School for that reason. Notre Dame continued to grow over the years, adding more colleges, programs, and sports teams. By 1921, with the addition of the College of Commerce, Notre Dame had grown from a small college to a university with five colleges and a professional law school. The university continued to expand and add new residence halls and buildings with each subsequent president. Question: Over how many years did the change to national standards undertaken at Notre Dame in the early 20th century take place?',
                         'Answer':'three years.',
                         'CoT':'The "national standards" are mentioned in the first sentence of the passage, where it is stated that "In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards".'}

    
    neg_example_1 = {'input':'Passage: The descendants of Rollo\'s Vikings and their Frankish wives would replace the Norse religion and Old Norse language with Catholicism (Christianity) and the Gallo-Romance language of the local people, blending their maternal Frankish heritage with Old Norse traditions and customs to synthesize a unique "Norman" culture in the north of France. The Norman language was forged by the adoption of the indigenous langue d\'oïl branch of Romance by a Norse-speaking ruling class, and it developed into the regional language that survives today. Question: What was replaced with the Norse religion?',
                        'Answer':'the given question cannot be answered in the context of the passage.',
                        'CoT':'In the passage, it is mentioned that Norse religion was replaced with Catholicism, whereas the question asks about the entity which was replaced by Norse religion. No such information is provided in the passage.'}

    
    neg_example_2 = {'input':'Passage: Exceptions to the restrictions faced by Windows Store apps are given to web browsers. The user\'s default browser can distribute a Metro-style web browser in same package as the desktop version, which has access to functionality unavailable to other apps, such as being able to permanently run in the background, use multiple background processes, and use Windows API code instead of WinRT (allowing for code to be re-used with the desktop version, while still taking advantage of features available to Windows Store apps, such as charms). Microsoft advertises this exception privilege "New experience enabled" (formerly "Metro-style enabled"). Question: What term did Microsoft give to its exception privilige for file browsing?',
                         'Answer':'the given question cannot be answered in the context of the passage.',
                         'CoT':"The passage talks about exception privilege given with respect to the entity 'web browser', whereas the question asks about 'file browser'"}




    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]

    if not IS_CONTROL_GROUP:
        prompt_elem["Unanswerablity-Reason"] = question_to_reason_dict[elem["id"]]
        
    prompt_elem["Adversarial"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: The answer to the given question is: {pos_example_1["Answer"]}

        Example 2:
        {pos_example_2["input"]}
        Output: The answer to the given question is: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 
    
    prompt_elem["Adversarial-CoT"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer to the given question is: {pos_example_1["Answer"]}

        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer to the given question is: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 

    
    prompt_elem["Pseudo-Adversarial"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: The answer to the given question is {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: The answer to the given question is {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["Answer"].capitalize()}

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Pseudo-Adversarial-CoT"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer to the given question is: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer to the given question is: {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["CoT"]} Therefore, {neg_example_1["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 
    
    prompt_elem["Answerability"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: The question is answerable.
        
        Example 2:
        {pos_example_2["input"]}
        Output: The question is answerable.

        Example 3:
        {neg_example_1["input"]}
        Output: The question is not answerable.

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Answerability-CoT"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, The question is answerable.
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, The question is answerable.

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["CoT"]} Therefore, The question is not answerable.

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_list_few_shot.append(prompt_elem)

## Generate Instructions (Few-Shot) - Version 2

In [None]:
prompt_list_few_shot = list()
for elem in filtered_data:

    pos_example_1 = {'input': 'Passage: Madonna released the Material Girl clothing line, which she designed with her daughter, Lourdes. The 1980s inspired clothing line, borrowed from Madonna\'s punk-girl style when she rose to fame in the 1980s, was released under the Macy\'s label. Madonna also opened a series of fitness centers around the world named Hard Candy Fitness. In November 2011, Madonna and MG Icon announced the release of a second fashion brand called Truth or Dare by Madonna to include footwear, underclothing, and accessories. She also directed her second feature film, W.E., a biographic about the affair between King Edward VIII and Wallis Simpson; it was co-written with Alek Keshishian. Critical and commercial response to the film was negative. Madonna contributed the ballad "Masterpiece" for the film\'s soundtrack, which won her a Golden Globe Award for Best Original Song. Question: Material Girl clothing line is released under which brand?',
                         'Answer': 'Macy\'s.',
                         'CoT':'"Material Girl clothing line" is first mentioned in the first sentence of the passage. The second sentence further discusses the clothing line, saying that it "was released under the Macy\'s label".'}
    
    pos_example_2 = {'input':'Passage: In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards by adopting the elective system and moving away from the university\'s traditional scholastic and classical emphasis. By contrast, the Jesuit colleges, bastions of academic conservatism, were reluctant to move to a system of electives. Their graduates were shut out of Harvard Law School for that reason. Notre Dame continued to grow over the years, adding more colleges, programs, and sports teams. By 1921, with the addition of the College of Commerce, Notre Dame had grown from a small college to a university with five colleges and a professional law school. The university continued to expand and add new residence halls and buildings with each subsequent president. Question: Over how many years did the change to national standards undertaken at Notre Dame in the early 20th century take place?',
                         'Answer':'three years.',
                         'CoT':'The "national standards" are mentioned in the first sentence of the passage, where it is stated that "In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards".'}

    
    neg_example_1 = {'input':'Passage: The descendants of Rollo\'s Vikings and their Frankish wives would replace the Norse religion and Old Norse language with Catholicism (Christianity) and the Gallo-Romance language of the local people, blending their maternal Frankish heritage with Old Norse traditions and customs to synthesize a unique "Norman" culture in the north of France. The Norman language was forged by the adoption of the indigenous langue d\'oïl branch of Romance by a Norse-speaking ruling class, and it developed into the regional language that survives today. Question: What was replaced with the Norse religion?',
                        'Answer':'unanswerable.',
                        'CoT':'In the passage, it is mentioned that Norse religion was replaced with Catholicism, whereas the question asks about the entity which was replaced by Norse religion. No such information is provided in the passage.'}

    
    neg_example_2 = {'input':'Passage: Exceptions to the restrictions faced by Windows Store apps are given to web browsers. The user\'s default browser can distribute a Metro-style web browser in same package as the desktop version, which has access to functionality unavailable to other apps, such as being able to permanently run in the background, use multiple background processes, and use Windows API code instead of WinRT (allowing for code to be re-used with the desktop version, while still taking advantage of features available to Windows Store apps, such as charms). Microsoft advertises this exception privilege "New experience enabled" (formerly "Metro-style enabled"). Question: What term did Microsoft give to its exception privilige for file browsing?',
                         'Answer':'unanswerable.',
                         'CoT':"The passage talks about exception privilege given with respect to the entity 'web browser', whereas the question asks about 'file browser'"}




    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]

    if not IS_CONTROL_GROUP:
        prompt_elem["Unanswerablity-Reason"] = question_to_reason_dict[elem["id"]]
        
    prompt_elem["Adversarial"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["Answer"]}

        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 
    
    prompt_elem["Adversarial-CoT"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer is: {pos_example_1["Answer"]}

        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer is: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 

    
    prompt_elem["Pseudo-Adversarial"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["Answer"].capitalize()}

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Pseudo-Adversarial-CoT"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer is: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer is: {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["CoT"]} Therefore, the answer is: {neg_example_1["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 
    
    prompt_elem["Answerability"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: answerable.
        
        Example 2:
        {pos_example_2["input"]}
        Output: answerable.

        Example 3:
        {neg_example_1["input"]}
        Output: unanswerable.

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Answerability-CoT"] = f"""
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, The question is answerable.
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, The question is answerable.

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["CoT"]} Therefore, The question is unanswerable.

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_list_few_shot.append(prompt_elem)

## Generate Instructions (Few-Shot with Instructions) - Version 1

In [None]:
prompt_list_few_shot_with_instructions = list()
for elem in filtered_data:


    pos_example_1 = {'input': 'Passage: Madonna released the Material Girl clothing line, which she designed with her daughter, Lourdes. The 1980s inspired clothing line, borrowed from Madonna\'s punk-girl style when she rose to fame in the 1980s, was released under the Macy\'s label. Madonna also opened a series of fitness centers around the world named Hard Candy Fitness. In November 2011, Madonna and MG Icon announced the release of a second fashion brand called Truth or Dare by Madonna to include footwear, underclothing, and accessories. She also directed her second feature film, W.E., a biographic about the affair between King Edward VIII and Wallis Simpson; it was co-written with Alek Keshishian. Critical and commercial response to the film was negative. Madonna contributed the ballad "Masterpiece" for the film\'s soundtrack, which won her a Golden Globe Award for Best Original Song. Question: Material Girl clothing line is released under which brand?',
                         'Answer': 'Macy\'s.',
                         'CoT':'"Material Girl clothing line" is first mentioned in the first sentence of the passage. The second sentence further discusses the clothing line, saying that it "was released under the Macy\'s label".'}
    
    pos_example_2 = {'input':'Passage: In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards by adopting the elective system and moving away from the university\'s traditional scholastic and classical emphasis. By contrast, the Jesuit colleges, bastions of academic conservatism, were reluctant to move to a system of electives. Their graduates were shut out of Harvard Law School for that reason. Notre Dame continued to grow over the years, adding more colleges, programs, and sports teams. By 1921, with the addition of the College of Commerce, Notre Dame had grown from a small college to a university with five colleges and a professional law school. The university continued to expand and add new residence halls and buildings with each subsequent president. Question: Over how many years did the change to national standards undertaken at Notre Dame in the early 20th century take place?',
                         'Answer':'three years.',
                         'CoT':'The "national standards" are mentioned in the first sentence of the passage, where it is stated that "In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards".'}

    
    neg_example_1 = {'input':'Passage: The descendants of Rollo\'s Vikings and their Frankish wives would replace the Norse religion and Old Norse language with Catholicism (Christianity) and the Gallo-Romance language of the local people, blending their maternal Frankish heritage with Old Norse traditions and customs to synthesize a unique "Norman" culture in the north of France. The Norman language was forged by the adoption of the indigenous langue d\'oïl branch of Romance by a Norse-speaking ruling class, and it developed into the regional language that survives today. Question: What was replaced with the Norse religion?',
                        'Answer':'N/A',
                        'CoT':'In the passage, it is mentioned that Norse religion was replaced with Catholicism, whereas the question asks about the entity which was replaced by Norse religion. No such information is provided in the passage.'}

    
    neg_example_2 = {'input':'Passage: Exceptions to the restrictions faced by Windows Store apps are given to web browsers. The user\'s default browser can distribute a Metro-style web browser in same package as the desktop version, which has access to functionality unavailable to other apps, such as being able to permanently run in the background, use multiple background processes, and use Windows API code instead of WinRT (allowing for code to be re-used with the desktop version, while still taking advantage of features available to Windows Store apps, such as charms). Microsoft advertises this exception privilege "New experience enabled" (formerly "Metro-style enabled"). Question: What term did Microsoft give to its exception privilige for file browsing?',
                         'Answer':'N/A',
                         'CoT':"The passage talks about exception privilege given with respect to the entity 'web browser', whereas the question asks about 'file browser'"}




    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]
    
    if not IS_CONTROL_GROUP:
        prompt_elem["Unanswerablity-Reason"] = question_to_reason_dict[elem["id"]]

    prompt_elem["Adversarial"] = f"""Given the following passage and question, answer the question:

        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["Answer"]}

        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 
    
    prompt_elem["Adversarial-CoT"] = f"""Given the following passage and question, answer the question:
    
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer is: {pos_example_1["Answer"]}

        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer is: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 

    
    prompt_elem["Pseudo-Adversarial"] = f"""Given the following passage and question, answer the question.     
If it cannot be answered based on the passage, answer with "N/A" only, and nothing else:
        
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Pseudo-Adversarial-CoT"] = f"""Given the following passage and question, answer the question.     
If it cannot be answered based on the passage, answer with "N/A" only, and nothing else:

        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer is: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer is: {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["CoT"]} Therefore, the answer is: {neg_example_1["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 


    prompt_elem["Pseudo-Adversarial-Ablation1"] = f"""Given the following passage and question, answer the question.     
If it cannot be answered based on the passage, answer with "N/A" only, and nothing else:
        
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Pseudo-Adversarial-Ablation1-CoT"] = f"""Given the following passage and question, answer the question.     
If it cannot be answered based on the passage, answer with "N/A" only, and nothing else:

        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer is: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer is: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 



    prompt_elem["Pseudo-Adversarial-Ablation2"] = f"""Given the following passage and question, answer the question.     
        
        Example 1:
        {pos_example_1["input"]}
        Output: The answer is {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: The answer is {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Pseudo-Adversarial-Ablation2-CoT"] = f"""Given the following passage and question, answer the question.     

        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer is: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer is: {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["CoT"]} Therefore, the answer is: {neg_example_1["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 


    
    prompt_elem["Answerability"] = f"""Given the following passage and question, choose one of the following options:
(A) the question can be answered by the passage (B) the question cannot be answered by the passage.
Please answer with "A" or "B" only, and nothing else:

        Example 1:
        {pos_example_1["input"]}
        Output: (A)
        
        Example 2:
        {pos_example_2["input"]}
        Output: (A)

        Example 3:
        {neg_example_1["input"]}
        Output: (B)

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Answerability-CoT"] = f"""Given the following passage and question, choose one of the following options:
(A) the question can be answered by the passage (B) the question cannot be answered by the passage.
Please answer with "A" or "B" only, and nothing else:

        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer is (A).
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer is (A).

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["CoT"]} Therefore, the answer is (B).

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_list_few_shot_with_instructions.append(prompt_elem)

## Generate Instructions (Few-Shot with Instructions) - Version 2

In [None]:
prompt_list_few_shot_with_instructions_v2 = list()
for elem in filtered_data:


    pos_example_1 = {'input': 'Passage: Madonna released the Material Girl clothing line, which she designed with her daughter, Lourdes. The 1980s inspired clothing line, borrowed from Madonna\'s punk-girl style when she rose to fame in the 1980s, was released under the Macy\'s label. Madonna also opened a series of fitness centers around the world named Hard Candy Fitness. In November 2011, Madonna and MG Icon announced the release of a second fashion brand called Truth or Dare by Madonna to include footwear, underclothing, and accessories. She also directed her second feature film, W.E., a biographic about the affair between King Edward VIII and Wallis Simpson; it was co-written with Alek Keshishian. Critical and commercial response to the film was negative. Madonna contributed the ballad "Masterpiece" for the film\'s soundtrack, which won her a Golden Globe Award for Best Original Song. Question: Material Girl clothing line is released under which brand?',
                         'Answer': 'Macy\'s.',
                         'CoT':'"Material Girl clothing line" is first mentioned in the first sentence of the passage. The second sentence further discusses the clothing line, saying that it "was released under the Macy\'s label".'}
    
    pos_example_2 = {'input':'Passage: In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards by adopting the elective system and moving away from the university\'s traditional scholastic and classical emphasis. By contrast, the Jesuit colleges, bastions of academic conservatism, were reluctant to move to a system of electives. Their graduates were shut out of Harvard Law School for that reason. Notre Dame continued to grow over the years, adding more colleges, programs, and sports teams. By 1921, with the addition of the College of Commerce, Notre Dame had grown from a small college to a university with five colleges and a professional law school. The university continued to expand and add new residence halls and buildings with each subsequent president. Question: Over how many years did the change to national standards undertaken at Notre Dame in the early 20th century take place?',
                         'Answer':'three years.',
                         'CoT':'The "national standards" are mentioned in the first sentence of the passage, where it is stated that "In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards".'}

    
    neg_example_1 = {'input':'Passage: The descendants of Rollo\'s Vikings and their Frankish wives would replace the Norse religion and Old Norse language with Catholicism (Christianity) and the Gallo-Romance language of the local people, blending their maternal Frankish heritage with Old Norse traditions and customs to synthesize a unique "Norman" culture in the north of France. The Norman language was forged by the adoption of the indigenous langue d\'oïl branch of Romance by a Norse-speaking ruling class, and it developed into the regional language that survives today. Question: What was replaced with the Norse religion?',
                        'Answer':'unanswerable.',
                        'CoT':'In the passage, it is mentioned that Norse religion was replaced with Catholicism, whereas the question asks about the entity which was replaced by Norse religion. No such information is provided in the passage.'}

    
    neg_example_2 = {'input':'Passage: Exceptions to the restrictions faced by Windows Store apps are given to web browsers. The user\'s default browser can distribute a Metro-style web browser in same package as the desktop version, which has access to functionality unavailable to other apps, such as being able to permanently run in the background, use multiple background processes, and use Windows API code instead of WinRT (allowing for code to be re-used with the desktop version, while still taking advantage of features available to Windows Store apps, such as charms). Microsoft advertises this exception privilege "New experience enabled" (formerly "Metro-style enabled"). Question: What term did Microsoft give to its exception privilige for file browsing?',
                         'Answer':'unanswerable.',
                         'CoT':"The passage talks about exception privilege given with respect to the entity 'web browser', whereas the question asks about 'file browser'"}




    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]
    
    if not IS_CONTROL_GROUP:
        prompt_elem["Unanswerablity-Reason"] = question_to_reason_dict[elem["id"]]

    prompt_elem["Adversarial"] = f"""Given the following passage and question, answer the question:

        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["Answer"]}

        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 
    
    prompt_elem["Adversarial-CoT"] = f"""Given the following passage and question, answer the question:
    
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer is: {pos_example_1["Answer"]}

        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer is: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 

    
    prompt_elem["Pseudo-Adversarial"] = f"""Given the following passage and question, answer the question.     
If it cannot be answered based on the passage, reply "unanswerable":
        
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Pseudo-Adversarial-CoT"] = f"""Given the following passage and question, answer the question.     
If it cannot be answered based on the passage, reply "unanswerable":

        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer is: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer is: {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["CoT"]} Therefore, the answer is: {neg_example_1["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 


    prompt_elem["Pseudo-Adversarial-Ablation1"] = f"""Given the following passage and question, answer the question.     
If it cannot be answered based on the passage, reply "unanswerable":
        
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Pseudo-Adversarial-Ablation1-CoT"] = f"""Given the following passage and question, answer the question.     
If it cannot be answered based on the passage, reply "unanswerable":

        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer is: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer is: {pos_example_2["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 



    prompt_elem["Pseudo-Adversarial-Ablation2"] = f"""Given the following passage and question, answer the question.     
        
        Example 1:
        {pos_example_1["input"]}
        Output: The answer is {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: The answer is {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Pseudo-Adversarial-Ablation2-CoT"] = f"""Given the following passage and question, answer the question.     

        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, the answer is: {pos_example_1["Answer"]}
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, the answer is: {pos_example_2["Answer"]}

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["CoT"]} Therefore, the answer is: {neg_example_1["Answer"]}

        Now your turn:
        {elem["input"]}
        Output:""" 


    
    prompt_elem["Answerability"] = f"""Given the following passage and question, decide if the question is answerable based on the passage. Reply only "answerable" or "unanswerable":

        Example 1:
        {pos_example_1["input"]}
        Output: answerable.
        
        Example 2:
        {pos_example_2["input"]}
        Output: answerable.

        Example 3:
        {neg_example_1["input"]}
        Output: unanswerable.

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_elem["Answerability-CoT"] = f"""Given the following passage and question, decide if the question is answerable based on the passage. Reply only "answerable" or "unanswerable":
    
        Example 1:
        {pos_example_1["input"]}
        Output: {pos_example_1["CoT"]} Therefore, The question is answerable.
        
        Example 2:
        {pos_example_2["input"]}
        Output: {pos_example_2["CoT"]} Therefore, The question is answerable.

        Example 3:
        {neg_example_1["input"]}
        Output: {neg_example_1["CoT"]} Therefore, The question is unanswerable.

        Now your turn:
        {elem["input"]}
        Output:""" 

    prompt_list_few_shot_with_instructions_v2.append(prompt_elem)

## save to json

In [None]:
if not os.path.exists(outdir):
   os.makedirs(outdir)

if not os.path.exists(os.path.join(outdir, "chatGPT", "zero_shot")):
   os.makedirs(os.path.join(outdir, "chatGPT", "zero_shot"))

if not os.path.exists(os.path.join(outdir, "chatGPT", "few_shot")):
   os.makedirs(os.path.join(outdir, "chatGPT", "few_shot"))

if not os.path.exists(os.path.join(outdir, "chatGPT", "few_shot_with_instructions")):
   os.makedirs(os.path.join(outdir, "chatGPT", "few_shot_with_instructions"))

split_type = "control_group" if IS_CONTROL_GROUP else "adversarial"

# # zero shot
# with open(os.path.join(outdir, "chatGPT", "zero_shot", f"squad_{split_type}.json"), 'w') as f1:
#     f1.write(json.dumps(prompt_list_zero_shot, indent=2))

# few shot
# with open(os.path.join(outdir, "chatGPT", "few_shot", f"squad_{split_type}.json"), 'w') as f1:
#     f1.write(json.dumps(prompt_list_few_shot, indent=2))

# # few shot (with instructions)
with open(os.path.join(outdir, "chatGPT", "few_shot_with_instructions", f"squad_{split_type}.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_few_shot_with_instructions, indent=2))

# Unanswerable Questions

## get reasons for unanswerability

In [2]:
indir_reasons = r"../files_not_for_git/data/squad/reasons"

In [3]:
shortcut_to_reason = {"E":"Entity Swap",
                        "#":"Number Swap",
                        "N":"Negation",
                        "A":"Antonym",
                        "X":"Mutual Exclusion",
                        "I":"No Information"}

In [4]:
reasons_df = pd.DataFrame(columns=["qid", "reason"])
for reasons_filename in os.listdir(indir_reasons):
    curr_reasons_df = pd.read_csv(os.path.join(indir_reasons, reasons_filename), sep='\t')
    reasons_df = pd.concat([reasons_df, curr_reasons_df], ignore_index=True)
reasons_df = reasons_df.drop_duplicates()
question_to_reason_dict = {row["qid"]:shortcut_to_reason[row["reason"]] for index, row in reasons_df.iterrows()}

## Few-Shot with Instructions

In [13]:
ICL_examples_variant = 2 # any of 1,2,3

In [8]:
squad_indir = r"../files_not_for_git/data/squad/dev-v2.0.json" #r"../data/squad/train-v2.0.json"
squad_few_shot_with_instructions_outdir = "../generated_prompts/all/few_shot_with_instructions"

In [9]:
with open(squad_indir, 'r') as f1:
    examples = json.loads(f1.read())


In [24]:
# # code to find ICL examples - need to be used in debug mode
# for example in examples['data']:
#     for paragraph in example['paragraphs']:
#         curr_context = paragraph['context']
#         for qa in paragraph['qas']:
#             if qa['is_impossible']:
#                 adversarial_qa = qa
#             else:
#                 control_group_qa = qa

In [10]:
def get_icl_examples_squad_v1():
    pos_example_1 = {'input': 'Passage: Madonna released the Material Girl clothing line, which she designed with her daughter, Lourdes. The 1980s inspired clothing line, borrowed from Madonna\'s punk-girl style when she rose to fame in the 1980s, was released under the Macy\'s label. Madonna also opened a series of fitness centers around the world named Hard Candy Fitness. In November 2011, Madonna and MG Icon announced the release of a second fashion brand called Truth or Dare by Madonna to include footwear, underclothing, and accessories. She also directed her second feature film, W.E., a biographic about the affair between King Edward VIII and Wallis Simpson; it was co-written with Alek Keshishian. Critical and commercial response to the film was negative. Madonna contributed the ballad "Masterpiece" for the film\'s soundtrack, which won her a Golden Globe Award for Best Original Song. Question: Material Girl clothing line is released under which brand?',
                     'Answer': 'Macy\'s.',
                     'CoT':'"Material Girl clothing line" is first mentioned in the first sentence of the passage. The second sentence further discusses the clothing line, saying that it "was released under the Macy\'s label".'}
    
    pos_example_2 = {'input':'Passage: In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards by adopting the elective system and moving away from the university\'s traditional scholastic and classical emphasis. By contrast, the Jesuit colleges, bastions of academic conservatism, were reluctant to move to a system of electives. Their graduates were shut out of Harvard Law School for that reason. Notre Dame continued to grow over the years, adding more colleges, programs, and sports teams. By 1921, with the addition of the College of Commerce, Notre Dame had grown from a small college to a university with five colleges and a professional law school. The university continued to expand and add new residence halls and buildings with each subsequent president. Question: Over how many years did the change to national standards undertaken at Notre Dame in the early 20th century take place?',
                     'Answer':'three years.',
                     'CoT':'The "national standards" are mentioned in the first sentence of the passage, where it is stated that "In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards".'}

    neg_example_1 = {'input':'Passage: The descendants of Rollo\'s Vikings and their Frankish wives would replace the Norse religion and Old Norse language with Catholicism (Christianity) and the Gallo-Romance language of the local people, blending their maternal Frankish heritage with Old Norse traditions and customs to synthesize a unique "Norman" culture in the north of France. The Norman language was forged by the adoption of the indigenous langue d\'oïl branch of Romance by a Norse-speaking ruling class, and it developed into the regional language that survives today. Question: What was replaced with the Norse religion?',
                     'Answer':'unanswerable.',
                     'CoT':'In the passage, it is mentioned that Norse religion was replaced with Catholicism, whereas the question asks about the entity which was replaced by Norse religion. No such information is provided in the passage.'}


    pos_answerability_example_1 = {'input': 'Passage: Madonna released the Material Girl clothing line, which she designed with her daughter, Lourdes. The 1980s inspired clothing line, borrowed from Madonna\'s punk-girl style when she rose to fame in the 1980s, was released under the Macy\'s label. Madonna also opened a series of fitness centers around the world named Hard Candy Fitness. In November 2011, Madonna and MG Icon announced the release of a second fashion brand called Truth or Dare by Madonna to include footwear, underclothing, and accessories. She also directed her second feature film, W.E., a biographic about the affair between King Edward VIII and Wallis Simpson; it was co-written with Alek Keshishian. Critical and commercial response to the film was negative. Madonna contributed the ballad "Masterpiece" for the film\'s soundtrack, which won her a Golden Globe Award for Best Original Song. Question: Material Girl clothing line is released under which brand?',
                     'Answer': 'answerable.',
                     'CoT':'"Material Girl clothing line" is first mentioned in the first sentence of the passage. The second sentence further discusses the clothing line, saying that it "was released under the Macy\'s label".'}


    neg_answerability_example_1 = {'input':'Passage: The descendants of Rollo\'s Vikings and their Frankish wives would replace the Norse religion and Old Norse language with Catholicism (Christianity) and the Gallo-Romance language of the local people, blending their maternal Frankish heritage with Old Norse traditions and customs to synthesize a unique "Norman" culture in the north of France. The Norman language was forged by the adoption of the indigenous langue d\'oïl branch of Romance by a Norse-speaking ruling class, and it developed into the regional language that survives today. Question: What was replaced with the Norse religion?',
                     'Answer':'unanswerable.',
                     'CoT':'In the passage, it is mentioned that Norse religion was replaced with Catholicism, whereas the question asks about the entity which was replaced by Norse religion. No such information is provided in the passage.'}

    return pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1

In [11]:
def get_icl_examples_squad_v2():
    pos_example_1 = {'input': 'Passage: In November 2013 MGM and the McClory estate formally settled the issue with Danjaq, LLC—sister company of Eon Productions—with MGM acquiring the full copyright film rights to the concept of Spectre and all of the characters associated with it. With the acquisition of the film rights and the organisation\'s re-introduction to the series\' continuity, the SPECTRE acronym was discarded and the organisation reimagined as "Spectre".  Question: Which two parties settled the issue in November 2003?',
                     'Answer': 'MGM and the McClory estate.',
                     'CoT':'The passage starts by saying that in November 2013 MGM and Mclory estate formally stated the issue with Danjaq.'}
    
    pos_example_2 = {'input':'Passage: Genome composition is used to describe the make up of contents of a haploid genome, which should include genome size, proportions of non-repetitive DNA and repetitive DNA in details. By comparing the genome compositions between genomes, scientists can better understand the evolutionary history of a given genome. What aspect of a genome can genome compositions help researchers in learning about?',
                     'Answer':'evolutionary history.',
                     'CoT':'The second paragraph sentence mentions that comparing genome composition can help scientists better understand the evolutionary history of a given genome. This evolutionary history is one aspect of a genome.'}

    neg_example_1 = {'input':'Passage: The story focuses on series protagonist Link, who tries to prevent Hyrule from being engulfed by a corrupted parallel dimension known as the Twilight Realm. To do so, he takes the form of both a Hylian and a wolf, and is assisted by a mysterious creature named Midna. The game takes place hundreds of years after Ocarina of Time and Majora\'s Mask, in an alternate timeline from The Wind Waker. Question: What land does Ocarina serve to protect?',
                     'Answer':'unanswerable.',
                     'CoT':'In the passage, the only character mentioned as trying to save some land is Link, where as the question asks what country does Ocarina serves to protect. No such information is provided in the passage.'}


    pos_answerability_example_1 = {'input': 'Passage: In November 2013 MGM and the McClory estate formally settled the issue with Danjaq, LLC—sister company of Eon Productions—with MGM acquiring the full copyright film rights to the concept of Spectre and all of the characters associated with it. With the acquisition of the film rights and the organisation\'s re-introduction to the series\' continuity, the SPECTRE acronym was discarded and the organisation reimagined as "Spectre".  Question: Which two parties settled the issue in November 2003?',
                     'Answer': 'answerable.',
                     'CoT':'The passage starts by saying that in November 2013 MGM and Mclory estate formally stated the issue with Danjaq.'}


    neg_answerability_example_1 = {'input':'Passage: The story focuses on series protagonist Link, who tries to prevent Hyrule from being engulfed by a corrupted parallel dimension known as the Twilight Realm. To do so, he takes the form of both a Hylian and a wolf, and is assisted by a mysterious creature named Midna. The game takes place hundreds of years after Ocarina of Time and Majora\'s Mask, in an alternate timeline from The Wind Waker. Question: What land does Ocarina serve to protect?',
                     'Answer':'unanswerable.',
                     'CoT':'In the passage, the only character mentioned as trying to save some land is Link, where as the question asks what country does Ocarina serves to protect. No such information is provided in the passage.'}

    return pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1

In [12]:
def get_icl_examples_squad_v3():
    pos_example_1 = {'input': 'Passage: Thomas Newman returned as Spectre\'s composer. Rather than composing the score once the film had moved into post-production, Newman worked during filming. The theatrical trailer released in July 2015 contained a rendition of John Barry\'s On Her Majesty\'s Secret Service theme. Mendes revealed that the final film would have more than one hundred minutes of music. The soundtrack album was released on 23 October 2015 in the UK and 6 November 2015 in the USA on the Decca Records label.  Question: Who wrote the music for Spectre?',
                     'Answer': 'Thomas Newman.',
                     'CoT':'The passage starts by saying that Thomas Newman returned as Spectra\'s composer. That means that he was the one to write its music.'}
    
    pos_example_2 = {'input':'Passage: Between 64 and 104 major aftershocks, ranging in magnitude from 4.0 to 6.1, were recorded within 72 hours of the main quake. According to Chinese official counts, "by 12:00 CST, November 6, 2008 there had been 42,719 total aftershocks, of which 246 ranged from 4.0 MS to 4.9 MS, 34 from 5.0 MS to 5.9 MS, and 8 from 6.0 Ms to 6.4 MS; the strongest aftershock measured 6.4 MS." The latest aftershock exceeding M6 occurred on August 5, 2008. Question: What do the Chinese say is the total number of shocks after the quake?',
                     'Answer':'42,719',
                     'CoT':'The first paragraph sentence discusses the number of aftershocks following the earthquake. The second paragraph sentence says that according to Chinese official counts, the total number was 42,719.'}

    neg_example_1 = {'input':'Passage: Both the number of base pairs and the number of genes vary widely from one species to another, and there is only a rough correlation between the two (an observation known as the C-value paradox). At present, the highest known number of genes is around 60,000, for the protozoan causing trichomoniasis (see List of sequenced eukaryotic genomes), almost three times as many as in the human genome. Question: What is the highest known number of species?',
                     'Answer':'unanswerable.',
                     'CoT':'In the passage, it is mentioned that the highest known number of genes is around 60,000, whereas the question asks about the highest number of species. No such information is provided in the passage.'}


    pos_answerability_example_1 = {'input': 'Passage: Thomas Newman returned as Spectre\'s composer. Rather than composing the score once the film had moved into post-production, Newman worked during filming. The theatrical trailer released in July 2015 contained a rendition of John Barry\'s On Her Majesty\'s Secret Service theme. Mendes revealed that the final film would have more than one hundred minutes of music. The soundtrack album was released on 23 October 2015 in the UK and 6 November 2015 in the USA on the Decca Records label.  Question: Who wrote the music for Spectre?',
                     'Answer': 'answerable.',
                     'CoT':'The passage starts by saying that Thomas Newman returned as Spectra\'s composer. That means that he was the one to write its music.'}


    neg_answerability_example_1 = {'input': 'Passage: Both the number of base pairs and the number of genes vary widely from one species to another, and there is only a rough correlation between the two (an observation known as the C-value paradox). At present, the highest known number of genes is around 60,000, for the protozoan causing trichomoniasis (see List of sequenced eukaryotic genomes), almost three times as many as in the human genome. Question: What is the highest known number of species?',
                     'Answer':'unanswerable.',
                     'CoT':'In the passage, it is mentioned that the highest known number of genes is around 60,000, whereas the question asks about the highest number of species. No such information is provided in the passage.'}

    return pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1

In [14]:
def example_to_string_squad(example, instruction, isIcl, isCoT):
    example_str = f"Instructions: {instruction}\n {example['input']}\n Answer:"

    if isIcl:
        if isCoT:
            example_str = f"{example_str} {example['CoT']} So, the answer is: {example['Answer']}"
        else:
            example_str = f"{example_str} {example['Answer']}"
    return example_str

In [15]:
def get_few_shot_with_instructions_prompts_squad(curr_data, data_type):
    if ICL_examples_variant == 1:
        pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1 = get_icl_examples_squad_v1()
    elif ICL_examples_variant == 2:
        pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1 = get_icl_examples_squad_v2()
    elif ICL_examples_variant == 3:
        pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1 = get_icl_examples_squad_v3()
    else:
        raise Exception(f"ICL_examples_variant can only be any of 1,2,3, but is currently {ICL_examples_variant}")
    instructions = {'Adversarial': 'Given the following passage and question, answer the question.',
                    'Pseudo-Adversarial': 'Given the following passage and question, answer the question. If it cannot be answered based on the passage, reply "unanswerable".',
                    'Answerability': 'Given the following passage and question, decide if the question is answerable based on the passage. Reply only "answerable" or "unanswerable".'}
    prompt_list_few_shot_with_instructions = list()
    for elem in curr_data:

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        if data_type == "adversarial":
            prompt_elem["Unanswerability-Reason"] = question_to_reason_dict[elem["id"]]


        # Adversarial
        prompt_elem["Adversarial"] = f"""
                                    {example_to_string_squad(pos_example_1, instructions['Adversarial'], True, False)}

                                    {example_to_string_squad(pos_example_2, instructions['Adversarial'], True, False)}

                                    {example_to_string_squad(elem, instructions['Adversarial'], False, False)}"""


        prompt_elem["Adversarial-CoT"] = f"""
                                    {example_to_string_squad(pos_example_1, instructions['Adversarial'], True, True)}

                                    {example_to_string_squad(pos_example_2, instructions['Adversarial'], True, True)}

                                    {example_to_string_squad(elem, instructions['Adversarial'], False, False)}"""

        # Pseudo-Adversarial
        prompt_elem["Pseudo-Adversarial"] = f"""
                                    {example_to_string_squad(pos_example_1, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string_squad(neg_example_1, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string_squad(elem, instructions['Pseudo-Adversarial'], False, False)}"""

        
        prompt_elem["Pseudo-Adversarial-CoT"] = f"""
                                    {example_to_string_squad(pos_example_1, instructions['Pseudo-Adversarial'], True, True)}

                                    {example_to_string_squad(neg_example_1, instructions['Pseudo-Adversarial'], True, True)}

                                    {example_to_string_squad(elem, instructions['Pseudo-Adversarial'], False, False)}"""


        # Ablation1
        prompt_elem["Ablation1"] = f"""
                                    {example_to_string_squad(pos_example_1, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string_squad(pos_example_2, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string_squad(elem, instructions['Pseudo-Adversarial'], False, False)}"""

        
        prompt_elem["Ablation1-CoT"] = f"""
                                    {example_to_string_squad(pos_example_1, instructions['Pseudo-Adversarial'], True, True)}

                                    {example_to_string_squad(pos_example_2, instructions['Pseudo-Adversarial'], True, True)}
                                    
                                    {example_to_string_squad(elem, instructions['Pseudo-Adversarial'], False, False)}"""


        # Ablation2
        prompt_elem["Ablation2"] = f"""
                                    {example_to_string_squad(pos_example_1, instructions['Adversarial'], True, False)}

                                    {example_to_string_squad(neg_example_1, instructions['Adversarial'], True, False)}

                                    {example_to_string_squad(elem, instructions['Adversarial'], False, False)}"""

        
        prompt_elem["Ablation2-CoT"] = f"""
                                    {example_to_string_squad(pos_example_1, instructions['Adversarial'], True, True)}

                                    {example_to_string_squad(neg_example_1, instructions['Adversarial'], True, True)}

                                    {example_to_string_squad(elem, instructions['Adversarial'], False, False)}"""

        # Answerability
        prompt_elem["Answerability"] = f"""
                                    {example_to_string_squad(pos_answerability_example_1, instructions['Answerability'], True, False)}

                                    {example_to_string_squad(neg_answerability_example_1, instructions['Answerability'], True, False)}

                                    {example_to_string_squad(elem, instructions['Answerability'], False, False)}"""


        prompt_elem["Answerability-CoT"] = f"""
                                    {example_to_string_squad(pos_answerability_example_1, instructions['Answerability'], True, True)}

                                    {example_to_string_squad(neg_answerability_example_1, instructions['Answerability'], True, True)}

                                    {example_to_string_squad(elem, instructions['Answerability'], False, False)}"""


        for key,value in prompt_elem.items():
            if key in ["example_id", "annotation_id"]:
                continue
            prompt_elem[key] = f" {re.sub(' +', ' ', value).strip()}"
        prompt_list_few_shot_with_instructions.append(prompt_elem)
    return prompt_list_few_shot_with_instructions

In [16]:
def filter_adversarial_or_control_group(data, data_types):
    is_control_group = True if data_types=="control_group" else False
    filtered_data = []
    for elem in data:
        for paragraph in elem["paragraphs"]:
            for qa in paragraph["qas"]:
                sample = f"Passage: {paragraph['context']} Question: {qa['question']}"
                if (is_control_group and len(qa["answers"])>0) or (not is_control_group and len(qa["answers"])==0):
                    filtered_data.append({"id":qa['id'], "input":sample})
    return filtered_data

In [17]:
data_types = ["adversarial", "control_group"]
for data_type in data_types:
    with open(squad_indir, 'r') as f1:
        curr_data = json.load(f1)
        curr_data = curr_data["data"]
        curr_data = filter_adversarial_or_control_group(curr_data, data_type)
    curr_output = get_few_shot_with_instructions_prompts_squad(curr_data, data_type)
    curr_outdir = os.path.join(squad_few_shot_with_instructions_outdir, f"squad_{data_type}_icl_examples_v{ICL_examples_variant}_all.json")
    with open(curr_outdir, 'w') as f1:
        f1.write(json.dumps(curr_output))

## Train set (for training the classifier) - zero-shot

In [None]:
indir = r"data/squad/train-v2.0.json"
outdir = r"generated_prompts/train_set"
IS_CONTROL_GROUP = True

In [None]:
with open(indir) as f:
    data = json.load(f)
    data = data["data"]

#### filter only (un)answerable questions

In [None]:
filtered_data = []
for elem in data:
    for paragraph in elem["paragraphs"]:
        for qa in paragraph["qas"]:
            sample = f"Passage: {paragraph['context']} Question: {qa['question']}"
            if (IS_CONTROL_GROUP and len(qa["answers"])>0) or (not IS_CONTROL_GROUP and len(qa["answers"])==0):
                filtered_data.append({"id":qa['id'], "input":sample})

#### generate prompts (zero shot)

In [None]:
prompt_list_zero_shot_version2 = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]

    prompt_elem["Adversarial"] = f"""Given the following passage and question, answer the question:
        {elem["input"]}""" 
    
    prompt_elem["Pseudo-Adversarial"] = f"""Given the following passage and question, answer the question.
    If it cannot be answered based on the passage, reply "unanswerable":
        {elem["input"]}""" 
    
    prompt_elem["CoT-Adversarial"] = f"""Given the following passage and question, answer the question.
    First make sure if it can be answered by the passage.
    If it cannot be answered based on the passage, reply "unanswerable":
        {elem["input"]}"""  

    prompt_elem["Answerability"] = f"""Given the following passage and question, decide if the question is answerable based on the passage. Reply only "answerable" or "unanswerable":
        {elem["input"]}""" 

    prompt_list_zero_shot_version2.append(prompt_elem)

#### save to json

In [None]:
if not os.path.exists(outdir):
   os.makedirs(outdir)

if not os.path.exists(os.path.join(outdir, "all")):
   os.makedirs(os.path.join(outdir, "all"))

if not os.path.exists(os.path.join(outdir, "all", "zero_shot")):
   os.makedirs(os.path.join(outdir, "all", "zero_shot"))



split_type = "control_group" if IS_CONTROL_GROUP else "adversarial"

# zero shot
with open(os.path.join(outdir, "all", "zero_shot", f"squad_trainset_{split_type}.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_zero_shot_version2, indent=2))

# Natural Questions

## Zero-Shot

In [42]:
NQ_indir = "../data/NQ"
NQ_outdir = "../generated_prompts/all/zero_shot"

In [None]:
def get_prompts_NQ(curr_data):
    prompt_list_zero_shot = list()
    for elem in curr_data:

        prompt_elem = dict()

        prompt_elem["example_id"] = elem["example_id"]
        prompt_elem["annotation_id"] = elem["annotation_id"]

        elem["Question"] = elem["Question"] if elem["Question"].endswith("?") else f'{elem["Question"]} ?'

        prompt_elem["Adversarial"] = f"""Given the following passage and question, answer the question:
            Passage: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 
        
        prompt_elem["Pseudo-Adversarial"] = f"""Given the following passage and question, answer the question.
        If it cannot be answered based on the passage, reply "unanswerable":
            Passage: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 
        
        prompt_elem["CoT-Adversarial"] = f"""Given the following passage and question, answer the question.
        First make sure if it can be answered by the passage.
        If it cannot be answered based on the passage, reply "unanswerable":
            Passage: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 

        prompt_elem["Answerability"] = f"""Given the following passage and question, decide if the question is answerable based on the passage. Reply only "answerable" or "unanswerable":
            Passage: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 

        prompt_list_zero_shot.append(prompt_elem)
    return prompt_list_zero_shot

In [None]:
data_types = ["adversarial", "control_group"]
for data_type in data_types:
    with open(os.path.join(NQ_indir, f"{data_type}_NQ.jsonl"), 'r') as f1:
        curr_data = json.loads(f1.read())
    curr_output = get_prompts_NQ(curr_data)
    curr_outdir = os.path.join(NQ_outdir, f"NQ_{data_type}_all.json")
    with open(curr_outdir, 'w') as f1:
        f1.write(json.dumps(curr_output))

## Few-Shot with Instructions

In [68]:
ICL_examples_variant = 3 # any of 1,2,3

In [69]:
NQ_indir = "../data/NQ"
NQ_few_shot_with_instructions_outdir = "../generated_prompts/all/few_shot_with_instructions"

In [70]:
def get_icl_examples_NQ_v1():
    pos_example_1 = {'Paragraphs': 'Hypoxia differs from hypoxemia and anoxemia in that hypoxia refers to a state in which oxygen supply is insufficient , whereas hypoxemia and anoxemia refer specifically to states that have low or zero arterial oxygen supply . Hypoxia in which there is complete deprivation of oxygen supply is referred to as anoxia .',
                     'Question': 'a medical term which means a deficiency but not a total lack of oxygen ?',
                     'Answer': 'hypoxia.',
                     'CoT':'The passage mentions that Hypoxia is a state in which oxygen supply is insufficient. It further describes that Hypoxia in which there is complete deprivation of oxygen supply is referred to as anoxia, meaning that Hypoxia itself is not a total lack of oxygen.'}
    
    pos_example_2 = {'Paragraphs':'South Africa have played at six of the eight Rugby World Cup tournaments , having been unable to compete in the first two tournaments due to a sports boycott during the apartheid era . Following the end of apartheid , they hosted the 1995 Rugby World Cup and won the tournament , and were champions again at the 2007 tournament in France . With two tournament wins , they are one of the three best performing teams , along with Australia who have also won twice , and New Zealand with three wins , the only team to do better .',
                     'Question': 'when did south africa first win the rugby world cup ?',
                     'Answer':'1995.',
                     'CoT':'The passage mentions that South Africa won the 1995 Rugby World Cup. The passage further says that they won again in 2007. Lastly, the passsage says that they had two wins, meaning that the win of 1995 was their first.'}

    neg_example_1 = {'Paragraphs':'The Act of Settlement is an Act of the Parliament of England that was passed in 1701 to settle the succession to the English and Irish crowns on Protestants only . The next Protestant in line to the throne was the Electress Sophia of Hanover , a granddaughter of James VI of Scotland and I of England . After her the crowns would descend only to her non-Roman Catholic heirs .',
                     'Question': 'The next Roman in line to the throne ?',
                     'Answer':'unanswerable.',
                     'CoT':'In the passage, it is mentioned that the next person in line to the throne is Protestant, and not Roman. It is also said that the crown would descend to her non-Roman heirs.'}

    pos_answerability_example_1 = {'Paragraphs': 'Hypoxia differs from hypoxemia and anoxemia in that hypoxia refers to a state in which oxygen supply is insufficient , whereas hypoxemia and anoxemia refer specifically to states that have low or zero arterial oxygen supply . Hypoxia in which there is complete deprivation of oxygen supply is referred to as anoxia .',
                     'Question': 'a medical term which means a deficiency but not a total lack of oxygen ?',
                     'Answer': 'answerable.',
                     'CoT':'The passage mentions that Hypoxia is a state in which oxygen supply is insufficient. It further describes that Hypoxia in which there is complete deprivation of oxygen supply is referred to as anoxia, meaning that Hypoxia itself is not a total lack of oxygen.'}

    neg_answerability_example_1 = {'Paragraphs':'The Act of Settlement is an Act of the Parliament of England that was passed in 1701 to settle the succession to the English and Irish crowns on Protestants only . The next Protestant in line to the throne was the Electress Sophia of Hanover , a granddaughter of James VI of Scotland and I of England . After her the crowns would descend only to her non-Roman Catholic heirs .',
                     'Question': 'The next Roman in line to the throne ?',
                     'Answer':'unanswerable.',
                     'CoT':'In the passage, it is mentioned that the next person in line to the throne is Protestant, and not Roman. It is also said that the crown would descend to her non-Roman heirs.'}



    return pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1

In [71]:
def get_icl_examples_NQ_v2():
    pos_example_1 = {'Paragraphs': 'Louise Joy Brown ( born 25 July 1978 ) is an English woman known for being the first human to have been born after conception by in vitro fertilisation , or IVF .',
                     'Question': 'when was the first in vitro baby born ?',
                     'Answer': '25 July 1978.',
                     'CoT':'The passage says that Louise Joy Brown was the first human to have been born by in vitro fertilisation. It also mentions that Louise Joy Brown was born on 25 July 1978.'}
    
    pos_example_2 = {'Paragraphs':'The 2018 College Football Playoff National Championship was a college football bowl game that determined the national champion in the NCAA Division I Football Bowl Subdivision for the 2017 season . The Alabama Crimson Tide defeated the Georgia Bulldogs 26 -- 23 in overtime . Alabama overcame a 13 -- 0 deficit at halftime . Tua Tagovailoa and Da\'Ron Payne were respectively named the offensive and defensive players of the game .',
                     'Question': 'who won the college football national championship tonight ?',
                     'Answer':'The Alabama Crimson Tide.',
                     'CoT':'The passage mentions starts by talking about the Football Playoff National Championship. It then says that the Alabama Crimson Tide defeated the Georgia Bulldogs.'}

    neg_example_1 = {'Paragraphs':'The Ranch is an American comedy web television series starring Ashton Kutcher , Danny Masterson , Debra Winger , Elisha Cuthbert , and Sam Elliott that debuted in 2016 on Netflix . The show takes place on the fictional Iron River Ranch in the fictitious small town of Garrison , Colorado ; detailing the life of the Bennetts , a dysfunctional family consisting of two brothers , their rancher father , and his divorced wife and local bar owner . While the opening sequence shows scenes from Ouray , Colorado and surrounding Ouray County , The Ranch is filmed on a sound stage in front of a live audience in Burbank , California . Each season consists of 20 episodes broken up into two parts , each containing 10 episodes .',
                     'Question': 'when does the next series of the ranch come out ?',
                     'Answer':'unanswerable.',
                     'CoT':'The passage portrays the "the Ranch", including when it debuted, but it doesn\'t say when is its next series.'}

    pos_answerability_example_1 = {'Paragraphs': 'Louise Joy Brown ( born 25 July 1978 ) is an English woman known for being the first human to have been born after conception by in vitro fertilisation , or IVF .',
                     'Question': 'when was the first in vitro baby born ?',
                     'Answer': 'answerable.',
                     'CoT':'The passage says that Louise Joy Brown was the first human to have been born by in vitro fertilisation. It also mentions that Louise Joy Brown was born on 25 July 1978.'}

    neg_answerability_example_1 = {'Paragraphs':'The Ranch is an American comedy web television series starring Ashton Kutcher , Danny Masterson , Debra Winger , Elisha Cuthbert , and Sam Elliott that debuted in 2016 on Netflix . The show takes place on the fictional Iron River Ranch in the fictitious small town of Garrison , Colorado ; detailing the life of the Bennetts , a dysfunctional family consisting of two brothers , their rancher father , and his divorced wife and local bar owner . While the opening sequence shows scenes from Ouray , Colorado and surrounding Ouray County , The Ranch is filmed on a sound stage in front of a live audience in Burbank , California . Each season consists of 20 episodes broken up into two parts , each containing 10 episodes .',
                     'Question': 'when does the next series of the ranch come out ?',
                     'Answer':'unanswerable.',
                     'CoT':'The passage portrays the "the Ranch", including when it debuted, but it doesn\'t say when is its next series.'}



    return pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1

In [72]:
def get_icl_examples_NQ_v3():
    pos_example_1 = {'Paragraphs':'`` Fool ( If You Think It \'s Over ) \'\' is the title of a popular song originally publicly released in 1978 by the British singer - songwriter Chris Rea . Rea also wrote the words and composed the music of the song , which appears on his 1978 debut album , Whatever Happened to Benny Santini ? . The single \'s charting success in the USA earned him a Grammy nomination as Best New Artist in 1979 .',
                     'Question': 'who sang fool if you think it over ?',
                     'Answer':'Chris Rea.',
                     'CoT':'The passage says that ``Fool ( If You Think It \'s Over )\'\' is the title of a popular song. It also says that it was originally publicly released by Chris Rea.'}

    pos_example_2 = {'Paragraphs': 'The Mississippi Freedom Democratic Party ( MFDP ) was an American political party created in 1964 as a branch of the populist Freedom Democratic organization in the state of Mississippi during the Civil Rights Movement . It was organized by African Americans and whites from Mississippi to challenge the legitimacy of the regular Mississippi Democratic Party , which allowed participation only by whites , when African Americans made up 40 percent of the state population .',
                     'Question': 'why did the mississippi freedom democratic party emerge at the democratic party convention in 1964 ?',
                     'Answer': 'to challenge the legitimacy of the regular Mississippi Democratic Party , which allowed participation only by whites , when African Americans made up 40 percent of the state population.',
                     'CoT':'The passage says that the Mississippi Freedom Democratic Party was created in 1964. It then says that it was organized to challenge the legitimacy of the regular Mississippi Democratic Party , which allowed participation only by whites , when African Americans made up 40 percent of the state population .'}



    neg_example_1 = {'Paragraphs':'Owing in part to the way in which the United Kingdom , and Northern Ireland , came into being , there is no legally defined term to describe what Northern Ireland \' is \' . There is also no uniform or guiding way to refer to Northern Ireland amongst the agencies of the UK government . For example , the websites of the Office of the Prime Minister of the United Kingdom and the UK Statistics Authority describe the United Kingdom as being made up of four countries , one of these being Northern Ireland . Other pages on the same websites refer to Northern Ireland specifically as a `` province \'\' as do publications of the UK Statistics Authority . The website of the Northern Ireland Statistics and Research Agency also refers to Northern Ireland as being a province as does the website of the Office of Public Sector Information and other agencies within Northern Ireland . Publications of HM Treasury and the Department of Finance and Personnel of the Northern Ireland Executive , on the other hand , describe Northern Ireland as being a `` region of the UK \'\' . The UK \'s submission to the 2007 United Nations Conference on the Standardization of Geographical Names defines the UK as being made up of two countries ( England and Scotland ) , one principality ( Wales ) and one province ( Northern Ireland ) .',
                     'Question': 'why is northern ireland not part of ireland ?',
                     'Answer':'unanswerable.',
                     'CoT':'The passage discusses Northern Ireland\'s unclear legal status as part of the UK. However, it does not mention why is Northern Ireland a part of the UK and not Ireland.'}

    pos_answerability_example_1 = {'Paragraphs':'`` Fool ( If You Think It \'s Over ) \'\' is the title of a popular song originally publicly released in 1978 by the British singer - songwriter Chris Rea . Rea also wrote the words and composed the music of the song , which appears on his 1978 debut album , Whatever Happened to Benny Santini ? . The single \'s charting success in the USA earned him a Grammy nomination as Best New Artist in 1979 .',
                     'Question': 'who sang fool if you think it over ?',
                     'Answer': 'answerable.',
                     'CoT':'The passage says that ``Fool ( If You Think It \'s Over )\'\' is the title of a popular song. It also says that it was originally publicly released by Chris Rea.'}

    neg_answerability_example_1 = {'Paragraphs': 'Owing in part to the way in which the United Kingdom , and Northern Ireland , came into being , there is no legally defined term to describe what Northern Ireland \' is \' . There is also no uniform or guiding way to refer to Northern Ireland amongst the agencies of the UK government . For example , the websites of the Office of the Prime Minister of the United Kingdom and the UK Statistics Authority describe the United Kingdom as being made up of four countries , one of these being Northern Ireland . Other pages on the same websites refer to Northern Ireland specifically as a `` province \'\' as do publications of the UK Statistics Authority . The website of the Northern Ireland Statistics and Research Agency also refers to Northern Ireland as being a province as does the website of the Office of Public Sector Information and other agencies within Northern Ireland . Publications of HM Treasury and the Department of Finance and Personnel of the Northern Ireland Executive , on the other hand , describe Northern Ireland as being a `` region of the UK \'\' . The UK \'s submission to the 2007 United Nations Conference on the Standardization of Geographical Names defines the UK as being made up of two countries ( England and Scotland ) , one principality ( Wales ) and one province ( Northern Ireland ) .',
                     'Question': 'why is northern ireland not part of ireland ?',
                     'Answer':'unanswerable.',
                     'CoT':'The passage discusses Northern Ireland\'s unclear legal status as part of the UK. However, it does not mention why is Northern Ireland a part of the UK and not Ireland.'}



    return pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1

In [73]:
def example_to_string(example, instruction, isIcl, isCoT):
    example_str = f"Instructions: {instruction}\n Passage: {example['Paragraphs']}\n Question: {example['Question']}\n Answer:"

    if isIcl:
        if isCoT:
            example_str = f"{example_str} {example['CoT']} So, the answer is: {example['Answer']}"
        else:
            example_str = f"{example_str} {example['Answer']}"
    return example_str

In [74]:
def get_few_shot_with_instructions_prompts_NQ(curr_data):
    if ICL_examples_variant == 1:
        pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1 = get_icl_examples_NQ_v1()
    elif ICL_examples_variant == 2:
        pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1 = get_icl_examples_NQ_v2()
    elif ICL_examples_variant == 3:
        pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1 = get_icl_examples_NQ_v3()
    else:
        raise Exception(f"ICL_examples_variant can only be any of 1,2,3, but is currently {ICL_examples_variant}")
    
    instructions = {'Adversarial': 'Given the following passage and question, answer the question.',
                    'Pseudo-Adversarial': 'Given the following passage and question, answer the question. If it cannot be answered based on the passage, reply "unanswerable".',
                    'Answerability': 'Given the following passage and question, decide if the question is answerable based on the passage. Reply only "answerable" or "unanswerable".'}
    prompt_list_few_shot_with_instructions = list()
    for elem in curr_data:

        prompt_elem = dict()

        prompt_elem["example_id"] = elem["example_id"]
        prompt_elem["annotation_id"] = elem["annotation_id"]

        elem["Question"] = elem["Question"] if elem["Question"].endswith("?") else f"{elem['Question']} ?"
        prompt_elem["Answer"] = elem["answer"]
        # Adversarial
        prompt_elem["Adversarial"] = f"""
                                    {example_to_string(pos_example_1, instructions['Adversarial'], True, False)}

                                    {example_to_string(pos_example_2, instructions['Adversarial'], True, False)}

                                    {example_to_string(elem, instructions['Adversarial'], False, False)}"""


        prompt_elem["Adversarial-CoT"] = f"""
                                    {example_to_string(pos_example_1, instructions['Adversarial'], True, True)}

                                    {example_to_string(pos_example_2, instructions['Adversarial'], True, True)}

                                    {example_to_string(elem, instructions['Adversarial'], False, False)}"""

        # Pseudo-Adversarial
        prompt_elem["Pseudo-Adversarial"] = f"""
                                    {example_to_string(pos_example_1, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string(neg_example_1, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string(elem, instructions['Pseudo-Adversarial'], False, False)}"""

        
        prompt_elem["Pseudo-Adversarial-CoT"] = f"""
                                    {example_to_string(pos_example_1, instructions['Pseudo-Adversarial'], True, True)}

                                    {example_to_string(neg_example_1, instructions['Pseudo-Adversarial'], True, True)}

                                    {example_to_string(elem, instructions['Pseudo-Adversarial'], False, False)}"""


        # Ablation1
        prompt_elem["Ablation1"] = f"""
                                    {example_to_string(pos_example_1, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string(pos_example_2, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string(elem, instructions['Pseudo-Adversarial'], False, False)}"""

        
        prompt_elem["Ablation1-CoT"] = f"""
                                    {example_to_string(pos_example_1, instructions['Pseudo-Adversarial'], True, True)}

                                    {example_to_string(pos_example_2, instructions['Pseudo-Adversarial'], True, True)}
                                    
                                    {example_to_string(elem, instructions['Pseudo-Adversarial'], False, False)}"""


        # Ablation2
        prompt_elem["Ablation2"] = f"""
                                    {example_to_string(pos_example_1, instructions['Adversarial'], True, False)}

                                    {example_to_string(neg_example_1, instructions['Adversarial'], True, False)}

                                    {example_to_string(elem, instructions['Adversarial'], False, False)}"""

        
        prompt_elem["Ablation2-CoT"] = f"""
                                    {example_to_string(pos_example_1, instructions['Adversarial'], True, True)}

                                    {example_to_string(neg_example_1, instructions['Adversarial'], True, True)}

                                    {example_to_string(elem, instructions['Adversarial'], False, False)}"""

        # Answerability
        prompt_elem["Answerability"] = f"""
                                    {example_to_string(pos_answerability_example_1, instructions['Answerability'], True, False)}

                                    {example_to_string(neg_answerability_example_1, instructions['Answerability'], True, False)}

                                    {example_to_string(elem, instructions['Answerability'], False, False)}"""


        prompt_elem["Answerability-CoT"] = f"""
                                    {example_to_string(pos_answerability_example_1, instructions['Answerability'], True, True)}

                                    {example_to_string(neg_answerability_example_1, instructions['Answerability'], True, True)}

                                    {example_to_string(elem, instructions['Answerability'], False, False)}"""


        for key,value in prompt_elem.items():
            if key in ["example_id", "annotation_id"]:
                continue
            prompt_elem[key] = f" {re.sub(' +', ' ', value).strip()}"
        prompt_list_few_shot_with_instructions.append(prompt_elem)
    return prompt_list_few_shot_with_instructions

In [75]:
data_types = ["adversarial", "control_group"]
for data_type in data_types:
    with open(os.path.join(NQ_indir, f"{data_type}_NQ.jsonl"), 'r') as f1:
        curr_data = json.loads(f1.read())
    curr_output = get_few_shot_with_instructions_prompts_NQ(curr_data)
    curr_outdir = os.path.join(NQ_few_shot_with_instructions_outdir, f"NQ_{data_type}_icl_examples_v{ICL_examples_variant}_all.json")
    with open(curr_outdir, 'w') as f1:
        f1.write(json.dumps(curr_output))

## Train set (for training the classifier) - zero-shot

In [None]:
NQ_indir = "data/NQ"
NQ_outdir = "generated_prompts/train_set/all/zero_shot"

In [None]:
def get_prompts_NQ(curr_data):
    prompt_list_zero_shot = list()
    for elem in curr_data:

        prompt_elem = dict()

        prompt_elem["example_id"] = elem["example_id"]
        prompt_elem["annotation_id"] = elem["annotation_id"]

        elem["Question"] = elem["Question"] if elem["Question"].endswith("?") else f'{elem["Question"]} ?'

        prompt_elem["Adversarial"] = f"""Given the following passage and question, answer the question:
            Passage: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 
        
        prompt_elem["Pseudo-Adversarial"] = f"""Given the following passage and question, answer the question.
        If it cannot be answered based on the passage, reply "unanswerable":
            Passage: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 
        
        prompt_elem["CoT-Adversarial"] = f"""Given the following passage and question, answer the question.
        First make sure if it can be answered by the passage.
        If it cannot be answered based on the passage, reply "unanswerable":
            Passage: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 

        prompt_elem["Answerability"] = f"""Given the following passage and question, decide if the question is answerable based on the passage. Reply only "answerable" or "unanswerable":
            Passage: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 

        prompt_list_zero_shot.append(prompt_elem)
    return prompt_list_zero_shot

In [None]:
data_types = ["adversarial", "control_group"]
for data_type in data_types:
    with open(os.path.join(NQ_indir, f"{data_type}_NQ_train.jsonl"), 'r') as f1:
        curr_data = json.loads(f1.read())
    curr_output = get_prompts_NQ(curr_data)
    curr_outdir = os.path.join(NQ_outdir, f"NQ_trainset_{data_type}_all.json")
    with open(curr_outdir, 'w') as f1:
        f1.write(json.dumps(curr_output))

# Musique

## Zero-Shot

In [None]:
musique_indir = "data/musique"
musique_outdir = "generated_prompts/all/zero_shot"

In [None]:
def get_prompts_musique(curr_data):
    prompt_list_zero_shot = list()
    for elem in curr_data:

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        prompt_elem["Adversarial"] = f"""Given the following context and question, answer the question:
            Context: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 
        
        prompt_elem["Pseudo-Adversarial"] = f"""Given the following context and question, answer the question.
        If it cannot be answered based on the context, reply "unanswerable":
            Context: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 
        
        prompt_elem["CoT-Adversarial"] = f"""Given the following context and question, answer the question.
        First make sure if it can be answered by the context.
        If it cannot be answered based on the context, reply "unanswerable":
            Context: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 

        prompt_elem["Answerability"] = f"""Given the following context and question, decide if the question is answerable based on the context. Reply only "answerable" or "unanswerable":
            Context: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 

        prompt_elem["answer"] = elem["answer"]

        if "actual_answer" in elem.keys():
            prompt_elem["actual_answer"] = elem["actual_answer"]

        prompt_list_zero_shot.append(prompt_elem)
    return prompt_list_zero_shot

In [None]:
data_types = ["adversarial", "control_group"]
for data_type in data_types:
    with open(os.path.join(musique_indir, f"{data_type}_musique.jsonl"), 'r') as f1:
        curr_data = json.loads(f1.read())
    curr_output = get_prompts_musique(curr_data)
    curr_outdir = os.path.join(musique_outdir, f"musique_{data_type}_all.json")
    with open(curr_outdir, 'w') as f1:
        f1.write(json.dumps(curr_output))

## Few-Shot with Instructions

In [98]:
ICL_examples_variant = 3 # any of 1,2,3

In [99]:
musique_indir = "../data/musique"
musique_few_shot_with_instructions_outdir = "../generated_prompts/all/few_shot_with_instructions"

In [87]:
def get_icl_examples_musique_v1():
    pos_example_1 = {'Paragraphs': 'Paragraph 1: South Africa have played at six of the eight Rugby World Cup tournaments, having been unable to compete in the first two tournaments due to a sports boycott during the apartheid era. Following the end of apartheid, they hosted the 1995 Rugby World Cup and won the tournament.\n Paragraph 2: With two tournament wins, South Africa is one of the three best performing teams, along with Australia who have also won twice, and New Zealand with three wins, the only team to do better.',
                     'Question': 'How many times did the winner of the 1995 Rugby World Cup win in total?',
                     'Answer': 'two times.',
                     'CoT':'Paragraph 1 mentions that the winner of the 1995 Rugby World Cup was South Africa. Paragraph 2 mentions that South Africa had two tournament wins.'}
    
    pos_example_2 = {'Paragraphs':'Paragraph 1: Barack Obama is an American politician who served as the 44th president of the United States from 2009 to 2017.\n Pargaraph 2: Obama married Michelle on October 3, 1992, after being engaged for almost a year.\n Paragraph 3: Barack Obama was born in Honolulu, Hawaii. After graduating from Columbia University in 1983, he worked as a community organizer in Chicago.',
                     'Question': 'What is the name of the wife of the American president who was born in Hawaii?',
                     'Answer':'Michelle',
                     'CoT':'Paragraph 1 mentions that Barack Obama was an American President. Paragraph 3 mentions that Barack Obama was born in Hawaii, making him the American president born in Hawaii. Paragraph 2 mentions that he married Michelle.'}

    neg_example_1 = {'Paragraphs':'Paragraph 1: Barack Obama is an American politician who served as the 44th president of the United States from 2009 to 2017.\n Pargaraph 2: Obama married Michelle on October 3, 1992, after being engaged for almost a year.',
                     'Question': 'What is the name of the wife of the American president who was born in New York?',
                     'Answer':'unanswerable.',
                     'CoT':'Paragraph 1 mentions that Barack Obama was an American President. Paragraph 2 mentions that he married Michelle, but it doesn\'t say where he was born. The context doesn\'t have information on an american president born in New York.'}
    
    pos_answerability_example_1 = {'Paragraphs': 'Paragraph 1: South Africa have played at six of the eight Rugby World Cup tournaments, having been unable to compete in the first two tournaments due to a sports boycott during the apartheid era. Following the end of apartheid, they hosted the 1995 Rugby World Cup and won the tournament.\n Paragraph 2: With two tournament wins, South Africa is one of the three best performing teams, along with Australia who have also won twice, and New Zealand with three wins, the only team to do better.',
                     'Question': 'How many times did the winner of the 1995 Rugby World Cup win in total?',
                     'Answer': 'answerable.',
                     'CoT':'Paragraph 1 mentions that the winner of the 1995 Rugby World Cup was South Africa. Paragraph 2 mentions that South Africa had two tournament wins.'}

    
    neg_answerability_example_1 = {'Paragraphs':'Paragraph 1: Barack Obama is an American politician who served as the 44th president of the United States from 2009 to 2017.\n Pargaraph 2: Obama married Michelle on October 3, 1992, after being engaged for almost a year.',
                     'Question': 'What is the name of the wife of the American president who was born in New York?',
                     'Answer':'unanswerable.',
                     'CoT':'Paragraph 1 mentions that Barack Obama was an American President. Paragraph 2 mentions that he married Michelle, but it doesn\'t say where he was born. The context doesn\'t have information on an american president born in New York.'}
 

    return pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1

In [88]:
def get_icl_examples_musique_v2():
    pos_example_1 = {'Paragraphs': 'Paragraph 1: Kaya toast is a well-known snack in Singapore. Kaya toast is prepared with kaya (coconut jam), a topping of sugar, coconut milk and eggs, pandan, and sometimes margarine or butter. Kaya is generally served on toast, and also sometimes on crackers. It is considered a breakfast staple, and remains popular in Singapore. The dish is sometimes dipped into soft-boiled egg with a little dark soy sauce and white pepper.\n Paragraph 2: A justice of the peace in Singapore derives his powers from statute law. He is appointed by the President of the Republic of Singapore, under the provisions of section 11 (l) of the Subordinate Courts Act (Cap. 321). The President may revoke the appointment of any justice of the peace. A newly appointed justice of the peace is required by section 17 of the Subordinate Courts Act, to take the oath of office and allegiance as set out in the schedule to the Subordinate Courts Act, before exercising the functions of his office.',
                     'Question': 'How do you become a justice of peace in the country where Kaya toast is popular?',
                     'Answer': 'appointed by the President of the Republic of Singapore.',
                     'CoT':'Paragraph 1 mentions that Kaya toast is a well-known snack in Singapore. Paragraph 2 says that a justice of peace in Singapore derives his powers from statute law. It also says that he is appointed by the President of the Republic of Singapore.'}
    
    pos_example_2 = {'Paragraphs':'Paragraph 1: Mount Henry is located in the Lewis Range, Glacier National Park in the U.S. state of Montana. Mount Henry is just south of Appistoki Peak in the Two Medicine region of the park.\n Paragraph 2: KJRZ-LP (105.3 FM) was a radio station in Libby, Montana. It was owned and operated by the Libby Area Chamber of Commerce.\n Paragraph 3: The Lewis Range is a mountain range located in the Rocky Mountains of northern Montana, United States and extreme southern Alberta, Canada. It was formed as a result of the Lewis Overthrust, a geologic thrust fault resulted in the overlying of younger Cretaceous rocks by older Proterozoic rocks. The range is located within Waterton Lakes National Park in Alberta, Canada and Glacier National Park and the Bob Marshall Wilderness Complex in Montana, United States. The highest peak is Mount Cleveland at .',
                     'Question': 'In what mountain group is the range of which Mount Henry from the state where KJRZ-LP is located is part?',
                     'Answer':'Rocky Mountains',
                     'CoT':'Paragraph 2 says that KJRZ-LP was in Libby, Montana. Paragraph 1 mentions that Mount Henry is located in the Lewis Range. It also says that Lewis Range is in the state of Montana. Paragraph 3 mentions that the Lewis Range is located in the Rocky Mountains.'}

    neg_example_1 = {'Paragraphs':'Paragraph 1: WODS (103.3 MHz) - known on-air as 103.3 AMP Radio - is a commercial FM radio station in Boston, Massachusetts. WODS airs a Top 40 (CHR) radio format, and is owned by Entercom. Its studios and offices are located on Leo M. Birmingham Parkwary in Brighton.\n Paragraph 2: The Embassy of the United States to the Republic of Indonesia is located in Jakarta just south of the Monas at Jalan Medan Merdeka Selatan.\n Paragraph 3: Westminster College is a private liberal arts college located in the Sugar House neighborhood of Salt Lake City, Utah, United States. The college comprises four schools: the School of Arts and Sciences, the Bill and Vieve Gore School of Business, the School of Education, and the School of Nursing and Health Sciences. It is the only accredited liberal arts college in the state of Utah.\n Paragraph 4: The Shorter House is located at the end of Andrews Road in Thompson Ridge, a hamlet in the Town of Crawford in Orange County, New York, United States. It is a late 18th-century building later modified in the Greek Revival style.',
                     'Question': 'What is the business category of Crawford House, located in the same city as WODS and the same state as Wellesley College in Mona Lisa Smile?',
                     'Answer':'unanswerable.',
                     'CoT':'Paragraph 1 says that WODS is location in Boston. However, no paragraph talks about which state Wellesley College.'}
    
    pos_answerability_example_1 = {'Paragraphs': 'Paragraph 1: Kaya toast is a well-known snack in Singapore. Kaya toast is prepared with kaya (coconut jam), a topping of sugar, coconut milk and eggs, pandan, and sometimes margarine or butter. Kaya is generally served on toast, and also sometimes on crackers. It is considered a breakfast staple, and remains popular in Singapore. The dish is sometimes dipped into soft-boiled egg with a little dark soy sauce and white pepper.\n Paragraph 2: A justice of the peace in Singapore derives his powers from statute law. He is appointed by the President of the Republic of Singapore, under the provisions of section 11 (l) of the Subordinate Courts Act (Cap. 321). The President may revoke the appointment of any justice of the peace. A newly appointed justice of the peace is required by section 17 of the Subordinate Courts Act, to take the oath of office and allegiance as set out in the schedule to the Subordinate Courts Act, before exercising the functions of his office.',
                     'Question': 'How do you become a justice of peace in the country where Kaya toast is popular?',
                     'Answer': 'answerable.',
                     'CoT':'Paragraph 1 mentions that Kaya toast is a well-known snack in Singapore. Paragraph 2 says that a justice of peace in Singapore derives his powers from statute law. It also says that he is appointed by the President of the Republic of Singapore.'}

    
    neg_answerability_example_1 = {'Paragraphs':'Paragraph 1: WODS (103.3 MHz) - known on-air as 103.3 AMP Radio - is a commercial FM radio station in Boston, Massachusetts. WODS airs a Top 40 (CHR) radio format, and is owned by Entercom. Its studios and offices are located on Leo M. Birmingham Parkwary in Brighton.\n Paragraph 2: The Embassy of the United States to the Republic of Indonesia is located in Jakarta just south of the Monas at Jalan Medan Merdeka Selatan.\n Paragraph 3: Westminster College is a private liberal arts college located in the Sugar House neighborhood of Salt Lake City, Utah, United States. The college comprises four schools: the School of Arts and Sciences, the Bill and Vieve Gore School of Business, the School of Education, and the School of Nursing and Health Sciences. It is the only accredited liberal arts college in the state of Utah.\n Paragraph 4: The Shorter House is located at the end of Andrews Road in Thompson Ridge, a hamlet in the Town of Crawford in Orange County, New York, United States. It is a late 18th-century building later modified in the Greek Revival style.',
                     'Question': 'What is the business category of Crawford House, located in the same city as WODS and the same state as Wellesley College in Mona Lisa Smile?',
                     'Answer':'unanswerable.',
                     'CoT':'Paragraph 1 says that WODS is location in Boston. However, no paragraph talks about which state Wellesley College.'}


    return pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1

In [89]:
def get_icl_examples_musique_v3():
    pos_example_1 = {'Paragraphs': 'Paragraph 1: Meet Me in St. Louis is a musical film made by Metro - Goldwyn - Mayer and released in 1944. Divided into a series of seasonal vignettes, starting with Summer 1903, it relates the story of a year in the life of the Smith family in St. Louis, leading up to the opening of the Louisiana Purchase Exposition (more commonly referred to as the World\'s Fair) in the spring of 1904. The picture stars Judy Garland, Margaret O\'Brien, Mary Astor, Lucille Bremer, Tom Drake, Leon Ames, Marjorie Main, June Lockhart, and Joan Carroll.\n Paragraph 2: Gracie is a 2007 American sports drama film directed by Davis Guggenheim. It stars Carly Schroeder as Gracie Bowen, Dermot Mulroney as Bryan Bowen, Elisabeth Shue as Lindsay Bowen, Jesse Lee Soffer as Johnny Bowen, and Andrew Shue as Coach Owen Clark.\n Paragraph 3: He was born Philip Davis Guggenheim in St. Louis, Missouri, United States, the son of Marion Davis and film director and producer Charles Guggenheim. His father was Jewish, whereas his mother was Episcopalian. He graduated from the Potomac School (McLean, Virginia) (1979), from Sidwell Friends School (1982), and from Brown University (1986).',
                     'Question': 'When does Meet Me in the birthplace of Gracie\'s director take place?',
                     'Answer': 'starting with Summer 1903.',
                     'CoT':'Paragraph 2 mentions that Garcie was directed by Davis Guggenheim. Paragraph 3 says that Davis Guggenheim was born in St. Louis. Paragraph 1 says that Meet Me in St. Louis starts with Summer 1903.'}
    
    pos_example_2 = {'Paragraphs':'Paragraph 1: The city has a Mayor and is one of the 16 cities and towns in England and Wales to have a ceremonial sheriff who acts as a deputy for the Mayor. The current and 793rd Mayor of Southampton is Linda Norris. Catherine McEwing is the current and 578th sherriff. The town crier from 2004 until his death in 2014 was John Melody, who acted as master of ceremonies in the city and who possessed a cry of 104 decibels.\n Paragraph 2: John May (born 26 September 1849 in Southampton, Hampshire; date of death unknown) was an English cricketer. May was a right-handed batsman who was a right-arm fast bowler.',
                     'Question': 'Who is the current mayor of the birthplace of John May?',
                     'Answer':'Linda Norris',
                     'CoT':'Paragraph 2 says that John May was born in Southampton. Paragraph 1 mentions that the current Mayor of Southampton is Linda Norris.'}

    neg_example_1 = {'Paragraphs':'Paragraph 1: Imran Khan has held the office of Prime Minister since 18 August 2018, following the outcome of nationwide general elections held on 25 July 2018.\n Paragraph 2: Hampi, also referred to as the Group of Monuments at Hampi, is a UNESCO World Heritage Site located in east - central Karnataka, India. It became the centre of the Hindu Vijayanagara Empire capital in the 14th century. Chronicles left by Persian and European travellers, particularly the Portuguese, state Hampi was a prosperous, wealthy and grand city near the Tungabhadra River, with numerous temples, farms and trading markets. By 1500 CE, Hampi - Vijayanagara was the world\'s second - largest medieval - era city after Beijing, and probably India\'s richest at that time, attracting traders from Persia and Portugal. The Vijayanagara Empire was defeated by a coalition of Muslim sultanates; its capital was conquered, pillaged and destroyed by sultanate armies in 1565, after which Hampi remained in ruins.\n Paragraph 3: As of June 2018, the Government of Karnataka consists of 27 ministers including Chief Minister and a Deputy Chief Minister.\n Paragraph 4: Thekkady (Idukki district) is the location of the Periyar National Park, which is an important tourist attraction in the Kerala state of India.',
                     'Question': 'As of 2018, who is the minister of the state where hampi tourist place is located?',
                     'Answer':'unanswerable.',
                     'CoT':'Paragraph 2 mentions that Hampi is located in east - central Karnataka, India. Paragraph 3 says that as of June 2018, the Government of Karnataka consists of 27 ministers including Chief Minister and a Deputy Chief Minister, but it doesn\'t say who they are.'}
    
    pos_answerability_example_1 = {'Paragraphs': 'Paragraph 1: Meet Me in St. Louis is a musical film made by Metro - Goldwyn - Mayer and released in 1944. Divided into a series of seasonal vignettes, starting with Summer 1903, it relates the story of a year in the life of the Smith family in St. Louis, leading up to the opening of the Louisiana Purchase Exposition (more commonly referred to as the World\'s Fair) in the spring of 1904. The picture stars Judy Garland, Margaret O\'Brien, Mary Astor, Lucille Bremer, Tom Drake, Leon Ames, Marjorie Main, June Lockhart, and Joan Carroll.\n Paragraph 2: Gracie is a 2007 American sports drama film directed by Davis Guggenheim. It stars Carly Schroeder as Gracie Bowen, Dermot Mulroney as Bryan Bowen, Elisabeth Shue as Lindsay Bowen, Jesse Lee Soffer as Johnny Bowen, and Andrew Shue as Coach Owen Clark.\n Paragraph 3: He was born Philip Davis Guggenheim in St. Louis, Missouri, United States, the son of Marion Davis and film director and producer Charles Guggenheim. His father was Jewish, whereas his mother was Episcopalian. He graduated from the Potomac School (McLean, Virginia) (1979), from Sidwell Friends School (1982), and from Brown University (1986).',
                     'Question': 'When does Meet Me in the birthplace of Gracie\'s director take place?',
                     'Answer': 'answerable.',
                     'CoT':'Paragraph 2 mentions that Garcie was directed by Davis Guggenheim. Paragraph 3 says that Davis Guggenheim was born in St. Louis. Paragraph 1 says that Meet Me in St. Louis starts with Summer 1903.'}

    
    neg_answerability_example_1 = {'Paragraphs':'Paragraph 1: Imran Khan has held the office of Prime Minister since 18 August 2018, following the outcome of nationwide general elections held on 25 July 2018.\n Paragraph 2: Hampi, also referred to as the Group of Monuments at Hampi, is a UNESCO World Heritage Site located in east - central Karnataka, India. It became the centre of the Hindu Vijayanagara Empire capital in the 14th century. Chronicles left by Persian and European travellers, particularly the Portuguese, state Hampi was a prosperous, wealthy and grand city near the Tungabhadra River, with numerous temples, farms and trading markets. By 1500 CE, Hampi - Vijayanagara was the world\'s second - largest medieval - era city after Beijing, and probably India\'s richest at that time, attracting traders from Persia and Portugal. The Vijayanagara Empire was defeated by a coalition of Muslim sultanates; its capital was conquered, pillaged and destroyed by sultanate armies in 1565, after which Hampi remained in ruins.\n Paragraph 3: As of June 2018, the Government of Karnataka consists of 27 ministers including Chief Minister and a Deputy Chief Minister.\n Paragraph 4: Thekkady (Idukki district) is the location of the Periyar National Park, which is an important tourist attraction in the Kerala state of India.',
                     'Question': 'As of 2018, who is the minister of the state where hampi tourist place is located?',
                     'Answer':'unanswerable.',
                     'CoT':'Paragraph 2 mentions that Hampi is located in east - central Karnataka, India. Paragraph 3 says that as of June 2018, the Government of Karnataka consists of 27 ministers including Chief Minister and a Deputy Chief Minister, but it doesn\'t say who they are.'}


    return pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1

In [90]:
def example_to_string_musique(example, instruction, isIcl, isCoT):
    example_str = f"Instructions: {instruction}\n Context: {example['Paragraphs']}\n Question: {example['Question']}\n Answer:"

    if isIcl:
        if isCoT:
            example_str = f"{example_str} {example['CoT']} So, the answer is: {example['Answer']}"
        else:
            example_str = f"{example_str} {example['Answer']}"
    return example_str

In [91]:
def get_few_shot_with_instructions_prompts_musique(curr_data):
    if ICL_examples_variant == 1:
        pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1 = get_icl_examples_musique_v1()
    elif ICL_examples_variant == 2:
        pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1 = get_icl_examples_musique_v2()
    elif ICL_examples_variant == 3:
        pos_example_1, pos_example_2, neg_example_1, pos_answerability_example_1, neg_answerability_example_1 = get_icl_examples_musique_v3()
    else:
        raise Exception(f"ICL_examples_variant can only be any of 1,2,3, but is currently {ICL_examples_variant}")
    
    instructions = {'Adversarial': 'Given the following context and question, answer the question.',
                    'Pseudo-Adversarial': 'Given the following context and question, answer the question. If it cannot be answered based on the context, reply "unanswerable".',
                    'Answerability': 'Given the following context and question, decide if the question is answerable based on the context. Reply only "answerable" or "unanswerable".'}
    prompt_list_few_shot_with_instructions = list()
    for elem in curr_data:

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        prompt_elem["Answer"] = elem["answer"]
        if "actual_answer" in elem.keys():
            prompt_elem["actual_answer"] = elem["actual_answer"]
        
        # Adversarial
        prompt_elem["Adversarial"] = f"""
                                    {example_to_string_musique(pos_example_1, instructions['Adversarial'], True, False)}

                                    {example_to_string_musique(pos_example_2, instructions['Adversarial'], True, False)}

                                    {example_to_string_musique(elem, instructions['Adversarial'], False, False)}"""


        prompt_elem["Adversarial-CoT"] = f"""
                                    {example_to_string_musique(pos_example_1, instructions['Adversarial'], True, True)}

                                    {example_to_string_musique(pos_example_2, instructions['Adversarial'], True, True)}

                                    {example_to_string_musique(elem, instructions['Adversarial'], False, False)}"""

        # Pseudo-Adversarial
        prompt_elem["Pseudo-Adversarial"] = f"""
                                    {example_to_string_musique(pos_example_1, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string_musique(neg_example_1, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string_musique(elem, instructions['Pseudo-Adversarial'], False, False)}"""

        
        prompt_elem["Pseudo-Adversarial-CoT"] = f"""
                                    {example_to_string_musique(pos_example_1, instructions['Pseudo-Adversarial'], True, True)}

                                    {example_to_string_musique(neg_example_1, instructions['Pseudo-Adversarial'], True, True)}

                                    {example_to_string_musique(elem, instructions['Pseudo-Adversarial'], False, False)}"""


        # Ablation1
        prompt_elem["Ablation1"] = f"""
                                    {example_to_string_musique(pos_example_1, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string_musique(pos_example_2, instructions['Pseudo-Adversarial'], True, False)}

                                    {example_to_string_musique(elem, instructions['Pseudo-Adversarial'], False, False)}"""

        
        prompt_elem["Ablation1-CoT"] = f"""
                                    {example_to_string_musique(pos_example_1, instructions['Pseudo-Adversarial'], True, True)}

                                    {example_to_string_musique(pos_example_2, instructions['Pseudo-Adversarial'], True, True)}
                                    
                                    {example_to_string_musique(elem, instructions['Pseudo-Adversarial'], False, False)}"""


        # Ablation2
        prompt_elem["Ablation2"] = f"""
                                    {example_to_string_musique(pos_example_1, instructions['Adversarial'], True, False)}

                                    {example_to_string_musique(neg_example_1, instructions['Adversarial'], True, False)}

                                    {example_to_string_musique(elem, instructions['Adversarial'], False, False)}"""

        
        prompt_elem["Ablation2-CoT"] = f"""
                                    {example_to_string_musique(pos_example_1, instructions['Adversarial'], True, True)}

                                    {example_to_string_musique(neg_example_1, instructions['Adversarial'], True, True)}

                                    {example_to_string_musique(elem, instructions['Adversarial'], False, False)}"""

        # Answerability
        prompt_elem["Answerability"] = f"""
                                    {example_to_string_musique(pos_answerability_example_1, instructions['Answerability'], True, False)}

                                    {example_to_string_musique(neg_answerability_example_1, instructions['Answerability'], True, False)}

                                    {example_to_string_musique(elem, instructions['Answerability'], False, False)}"""


        prompt_elem["Answerability-CoT"] = f"""
                                    {example_to_string_musique(pos_answerability_example_1, instructions['Answerability'], True, True)}

                                    {example_to_string_musique(neg_answerability_example_1, instructions['Answerability'], True, True)}

                                    {example_to_string_musique(elem, instructions['Answerability'], False, False)}"""


        for key,value in prompt_elem.items():
            if key in ["example_id", "annotation_id"]:
                continue
            prompt_elem[key] = f" {re.sub(' +', ' ', value).strip()}"
        prompt_list_few_shot_with_instructions.append(prompt_elem)
    return prompt_list_few_shot_with_instructions

In [100]:
data_types = ["adversarial", "control_group"]
for data_type in data_types:
    with open(os.path.join(musique_indir, f"{data_type}_musique.jsonl"), 'r') as f1:
        curr_data = json.loads(f1.read())
    curr_output = get_few_shot_with_instructions_prompts_musique(curr_data)
    curr_outdir = os.path.join(musique_few_shot_with_instructions_outdir, f"musique_{data_type}_icl_examples_v{ICL_examples_variant}_all.json")
    with open(curr_outdir, 'w') as f1:
        f1.write(json.dumps(curr_output))

## Train set (for training the classifier) - zero-shot

In [None]:
musique_indir = "data/musique"
musique_outdir = "generated_prompts/train_set/all/zero_shot"

In [None]:
def get_prompts_musique(curr_data):
    prompt_list_zero_shot = list()
    for elem in curr_data:

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        prompt_elem["Adversarial"] = f"""Given the following context and question, answer the question:
            Context: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 
        
        prompt_elem["Pseudo-Adversarial"] = f"""Given the following context and question, answer the question.
        If it cannot be answered based on the context, reply "unanswerable":
            Context: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 
        
        prompt_elem["CoT-Adversarial"] = f"""Given the following context and question, answer the question.
        First make sure if it can be answered by the context.
        If it cannot be answered based on the context, reply "unanswerable":
            Context: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 

        prompt_elem["Answerability"] = f"""Given the following context and question, decide if the question is answerable based on the context. Reply only "answerable" or "unanswerable":
            Context: {elem["Paragraphs"]}
            Question:  {elem["Question"]}""" 

        prompt_elem["answer"] = elem["answer"]

        if "actual_answer" in elem.keys():
            prompt_elem["actual_answer"] = elem["actual_answer"]

        prompt_list_zero_shot.append(prompt_elem)
    return prompt_list_zero_shot

In [None]:
data_types = ["adversarial", "control_group"]
for data_type in data_types:
    with open(os.path.join(musique_indir, f"{data_type}_musique_train.jsonl"), 'r') as f1:
        curr_data = json.loads(f1.read())
    curr_output = get_prompts_musique(curr_data)
    curr_outdir = os.path.join(musique_outdir, f"musique_trainset_{data_type}_all.json")
    with open(curr_outdir, 'w') as f1:
        f1.write(json.dumps(curr_output))

## Read the json file

In [None]:
indir = r"data/task025_cosmosqa_incorrect_answer_generation.json"
outdir = r"generated_prompts"

In [None]:
with open(indir) as f:
    data = json.load(f)

In [None]:
len(data["Instances"])

## Filter only samples with more than two answers

In [None]:
filtered_data = [elem for elem in data["Instances"] if len(elem["output"])>2]
len(filtered_data)

## ChatGPT

### Generate Prompts (Zero-Shot) - Adversarial - Version 1

In [None]:
prompt_list_zero_shot = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]

    context_end = elem["input"].find("\nCorrect Answer:")
    if context_end == -1:
        raise Exception("Didn't find the beginning of the \"Correct Answer\" in the input")

    context = elem["input"][:context_end]

    prompt_elem["Adversarial"] = f"""{context}
    Given this context, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}""" 

    prompt_elem["Adversarial-2-options"] = f"""{context}
    Given this context, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}""" 

    
    prompt_elem["Pseudo-Adversarial"] = f"""{context}
    Given this context, which of the following options answers the question?
    If none of the options fit, answer "N/A".
    Keep your answer short - only "(A)", "(B)", "(C)" or "N/A" if none of the options fit:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}"""  
    
    prompt_elem["Adversarial-NA-fourth-option"] = f"""{context}
    Given this context, which of the following options answers the question? 
    Keep your answer short - only "(A)", "(B)", "(C)" or "(D)":
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) None of the above.""" 

    prompt_elem["Answerability"] = f"""{context}
    Given this context, does any of the following options answer the question? 
    Answer with a single word - "yes" or "no":
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}""" 

    prompt_list_zero_shot.append(prompt_elem)

### Generate Prompts (Zero-Shot) - Control Group - Version 1

In [None]:
prompt_list_zero_shot_control_group = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]

    context_end = elem["input"].find("\nCorrect Answer:")
    if context_end == -1:
        raise Exception("Didn't find the beginning of the \"Correct Answer\" in the input")

    context = elem["input"][:context_end]
    correct_answer = elem["input"][elem["input"].index("\nCorrect Answer:"):].replace("\nCorrect Answer:", "").strip()

    prompt_elem["Adversarial"] = f"""{context}
    Given this context, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer}""" 

    prompt_elem["Adversarial-2-options"] = f"""{context}
    Given this context, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {correct_answer}""" 

    
    prompt_elem["Pseudo-Adversarial"] = f"""{context}
    Given this context, which of the following options answers the question?
    If none of the options fit, answer "N/A".
    Keep your answer short - only "(A)", "(B)", "(C)" or "N/A" if none of the options fit:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer}"""  
    
    prompt_elem["Adversarial-NA-fourth-option"] = f"""{context}
    Given this context, which of the following options answers the question? 
    Keep your answer short - only "(A)", "(B)", "(C)" or "(D)":
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer}
    (D) None of the above.""" 

    prompt_elem["Answerability"] = f"""{context}
    Given this context, does any of the following options answer the question? 
    Answer with a single word - "yes" or "no":
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer}""" 

    prompt_list_zero_shot_control_group.append(prompt_elem)

### Generate Prompts (Zero-Shot) - Adversarial - Version 2

In [None]:
prompt_list_zero_shot_v2 = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]

    context_end = elem["input"].find("\nCorrect Answer:")
    if context_end == -1:
        raise Exception("Didn't find the beginning of the \"Correct Answer\" in the input")

    context = elem["input"][:context_end]

    prompt_elem["Adversarial"] = f"""{context}
    Given this context, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}""" 

    prompt_elem["Adversarial-2-options"] = f"""{context}
    Given this context, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}""" 

    
    prompt_elem["Pseudo-Adversarial"] = f"""{context}
    Given this context, which of the following options answers the question?
    If none of the options fit, reply "unanswerable".
    Keep your answer short - only "(A)", "(B)", "(C)" or "unanswerable" if none of the options fit:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}"""  
    
    prompt_elem["Adversarial-NA-fourth-option"] = f"""{context}
    Given this context, which of the following options answers the question? 
    Keep your answer short - only "(A)", "(B)", "(C)" or "(D)":
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) None of the above.""" 

    prompt_elem["Answerability"] = f"""Determine whether the following context, question and three options are answerable. Reply only "answerable" or "unanswerable":
    {context}
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
""" 

    prompt_list_zero_shot_v2.append(prompt_elem)

### Generate Prompts (Zero-Shot) - Control Group - Version 2

In [None]:
prompt_list_zero_shot_control_group_v2 = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]

    context_end = elem["input"].find("\nCorrect Answer:")
    if context_end == -1:
        raise Exception("Didn't find the beginning of the \"Correct Answer\" in the input")

    context = elem["input"][:context_end]
    correct_answer = elem["input"][elem["input"].index("\nCorrect Answer:"):].replace("\nCorrect Answer:", "").strip()

    prompt_elem["Adversarial"] = f"""{context}
    Given this context, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer}""" 

    prompt_elem["Adversarial-2-options"] = f"""{context}
    Given this context, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {correct_answer}""" 

    
    prompt_elem["Pseudo-Adversarial"] = f"""{context}
    Given this context, which of the following options answers the question?
    If none of the options fit, reply "unanswerable".
    Keep your answer short - only "(A)", "(B)", "(C)" or "unanswerable" if none of the options fit:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer}"""  
    
    prompt_elem["Adversarial-NA-fourth-option"] = f"""{context}
    Given this context, which of the following options answers the question? 
    Keep your answer short - only "(A)", "(B)", "(C)" or "(D)":
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer}
    (D) None of the above.""" 

    prompt_elem["Answerability"] = f"""Determine whether the following context, question and three options are answerable. Reply only "answerable" or "unanswerable":
    {context}
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer}""" 

    prompt_list_zero_shot_control_group_v2.append(prompt_elem)

### Generate Prompts (Few-Shot) - Adversarial + Control_Group - Version 1

In [None]:
def get_example(example, is_CoT):
    generated_example = f"""{example["input"]} 
Options:
 (A) {example["option A"]}
 (B) {example["option B"]}
 (C) {example["option C"]}"""

    if 'option D' in example.keys():
        generated_example = f"""{generated_example}
 (D) {example["option D"]}"""
        
    if is_CoT:
        if not example["Answer"] in ["(A)", "(B)", "(C)", "(D)"]:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, {example["Answer"]}."""
        else:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, the answer is {example["Answer"]}."""
    
    else:
        generated_example = f"""{generated_example}
Output: {example["Answer"]}"""
    return generated_example

In [None]:
def get_example_dicts_cosmos():
    pos_example_1 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I found the money was charged but I have not got shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': '(A)',
                     'CoT':'You are told, in person over the phone, that your shoes were on their way. They have your money, which means your paid for the shoes. you haven\'t received the shoes yet.'}
    
    pos_example_2 = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'Answer': '(B)',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}


    neg_example_1 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': 'none of the options answers the question',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}

    neg_example_2 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'option D': 'None of the above.',
                     'Answer': '(D)',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}


    neg_example_3 = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'option D': 'None of the above.',
                     'Answer': '(B)',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}



    pos_example_1_answerability = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'Answer': 'there is a correct option (option B)',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}

    neg_example_1_answerability = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': 'there is no correct option',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}

    return pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability

In [None]:
def get_prompt_list_cosmos_few_shot(filtered_data, is_control_group):
    prompt_list_few_shot = list()
    for elem in filtered_data:

        pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_example_dicts_cosmos()

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        context_end = elem["input"].find("\nCorrect Answer:")
        if context_end == -1:
            raise Exception("Didn't find the beginning of the \"Correct Answer\" in the input")

        context = elem["input"][:context_end]
        correct_answer = elem["input"][elem["input"].index("\nCorrect Answer:"):].replace("\nCorrect Answer:", "").strip()


    ############## Adversarial-3-options ############
        prompt_elem["Adversarial-3-options"] = f"""
    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(pos_example_2, False)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:""" 
    ################################################

    ########### Adversarial-3-options-CoT ##########
        prompt_elem["Adversarial-3-options-CoT"] = f"""
    Example 1:
    {get_example(pos_example_1, True)}\n 

    Example 2: 
    {get_example(pos_example_2, True)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:""" 
    ################################################

    ############## Adversarial-2-options ############
        prompt_elem["Adversarial-2-options"] = f"""
    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(pos_example_2, False)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {correct_answer if is_control_group else elem["output"][1]}
    Output:""" 
    ################################################

    ########### Adversarial-2-options-CoT ##########
        prompt_elem["Adversarial-2-options-CoT"] = f"""
    Example 1:
    {get_example(pos_example_1, True)}\n 

    Example 2: 
    {get_example(pos_example_2, True)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {correct_answer if is_control_group else elem["output"][1]}
    Output:""" 
    ################################################


    ############### Pseudo-Adversarial #############
        prompt_elem["Pseudo-Adversarial"] = f"""
    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(neg_example_1, False)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:"""  
    ################################################

    ############# Pseudo-Adversarial-CoT ###########
        prompt_elem["Pseudo-Adversarial-CoT"] = f"""
    Example 1:
    {get_example(pos_example_1, True)}

    Example 2: 
    {get_example(neg_example_1, True)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:"""  
    ################################################

    ############## Adversarial-NA-fourth-option ############
        prompt_elem["Adversarial-NA-fourth-option"] = f"""
    Example 1:
    {get_example(neg_example_2, False)}

    Example 2: 
    {get_example(neg_example_3, False)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
    Output:""" 
    #########################################################


    ############ Adversarial-NA-fourth-option-CoT ###########
        prompt_elem["Adversarial-NA-fourth-option-CoT"] = f"""
    Example 1:
    {get_example(neg_example_2, True)}

    Example 2: 
    {get_example(neg_example_3, True)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
    Output:""" 
    #########################################################

    ###################### Answerability ####################
        prompt_elem["Answerability"] = f"""
    Example 1:
    {get_example(pos_example_1_answerability, False)}

    Example 2: 
    {get_example(neg_example_1_answerability, False)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:""" 
    #########################################################

    ###################### Answerability-CoT ####################
        prompt_elem["Answerability-CoT"] = f"""
    Example 1:
    {get_example(pos_example_1_answerability, True)}

    Example 2: 
    {get_example(neg_example_1_answerability, True)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:""" 
    #########################################################

        prompt_list_few_shot.append(prompt_elem)
    return prompt_list_few_shot

In [None]:
prompt_list_few_shot = get_prompt_list_cosmos_few_shot(filtered_data, False)
prompt_list_few_shot_control_group = get_prompt_list_cosmos_few_shot(filtered_data, True)

### Generate Prompts (Few-Shot) - Adversarial + Control_Group - Version 2

In [None]:
def get_example(example, is_CoT):
    generated_example = f"""{example["input"]} 
Options:
 (A) {example["option A"]}
 (B) {example["option B"]}
 (C) {example["option C"]}"""

    if 'option D' in example.keys():
        generated_example = f"""{generated_example}
 (D) {example["option D"]}"""
        
    if is_CoT:
        if not example["Answer"] in ["(A)", "(B)", "(C)", "(D)"]:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, {example["Answer"]}."""
        else:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, the answer is {example["Answer"]}."""
    
    else:
        generated_example = f"""{generated_example}
Output: {example["Answer"]}"""
    return generated_example

In [None]:
def get_example_dicts_cosmos_v2():
    pos_example_1 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I found the money was charged but I have not got shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': '(A)',
                     'CoT':'You are told, in person over the phone, that your shoes were on their way. They have your money, which means your paid for the shoes. you haven\'t received the shoes yet.'}
    
    pos_example_2 = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'Answer': '(B)',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}


    neg_example_1 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': 'unanswerable',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}

    neg_example_2 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'option D': 'None of the above.',
                     'Answer': '(D)',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}


    neg_example_3 = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'option D': 'None of the above.',
                     'Answer': '(B)',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}



    pos_example_1_answerability = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'Answer': 'answerable',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}

    neg_example_1_answerability = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': 'unanswerable',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}

    return pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability

In [None]:
def get_prompt_list_cosmos_few_shot_v2(filtered_data, is_control_group):
    prompt_list_few_shot = list()
    for elem in filtered_data:

        pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_example_dicts_cosmos_v2()

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        context_end = elem["input"].find("\nCorrect Answer:")
        if context_end == -1:
            raise Exception("Didn't find the beginning of the \"Correct Answer\" in the input")

        context = elem["input"][:context_end]
        correct_answer = elem["input"][elem["input"].index("\nCorrect Answer:"):].replace("\nCorrect Answer:", "").strip()


    ############## Adversarial-3-options ############
        prompt_elem["Adversarial-3-options"] = f"""
    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(pos_example_2, False)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:""" 
    ################################################

    ########### Adversarial-3-options-CoT ##########
        prompt_elem["Adversarial-3-options-CoT"] = f"""
    Example 1:
    {get_example(pos_example_1, True)}\n 

    Example 2: 
    {get_example(pos_example_2, True)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:""" 
    ################################################

    ############## Adversarial-2-options ############
        prompt_elem["Adversarial-2-options"] = f"""
    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(pos_example_2, False)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {correct_answer if is_control_group else elem["output"][1]}
    Output:""" 
    ################################################

    ########### Adversarial-2-options-CoT ##########
        prompt_elem["Adversarial-2-options-CoT"] = f"""
    Example 1:
    {get_example(pos_example_1, True)}\n 

    Example 2: 
    {get_example(pos_example_2, True)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {correct_answer if is_control_group else elem["output"][1]}
    Output:""" 
    ################################################


    ############### Pseudo-Adversarial #############
        prompt_elem["Pseudo-Adversarial"] = f"""
    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(neg_example_1, False)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:"""  
    ################################################

    ############# Pseudo-Adversarial-CoT ###########
        prompt_elem["Pseudo-Adversarial-CoT"] = f"""
    Example 1:
    {get_example(pos_example_1, True)}

    Example 2: 
    {get_example(neg_example_1, True)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:"""  
    ################################################

    ############## Adversarial-NA-fourth-option ############
        prompt_elem["Adversarial-NA-fourth-option"] = f"""
    Example 1:
    {get_example(neg_example_2, False)}

    Example 2: 
    {get_example(neg_example_3, False)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
    Output:""" 
    #########################################################


    ############ Adversarial-NA-fourth-option-CoT ###########
        prompt_elem["Adversarial-NA-fourth-option-CoT"] = f"""
    Example 1:
    {get_example(neg_example_2, True)}

    Example 2: 
    {get_example(neg_example_3, True)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
    Output:""" 
    #########################################################

    ###################### Answerability ####################
        prompt_elem["Answerability"] = f"""
    Example 1:
    {get_example(pos_example_1_answerability, False)}

    Example 2: 
    {get_example(neg_example_1_answerability, False)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:""" 
    #########################################################

    ###################### Answerability-CoT ####################
        prompt_elem["Answerability-CoT"] = f"""
    Example 1:
    {get_example(pos_example_1_answerability, True)}

    Example 2: 
    {get_example(neg_example_1_answerability, True)}

    Now your turn:
    {context}
    Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    Output:""" 
    #########################################################

        prompt_list_few_shot.append(prompt_elem)
    return prompt_list_few_shot

In [None]:
prompt_list_few_shot_v2 = get_prompt_list_cosmos_few_shot_v2(filtered_data, False)
prompt_list_few_shot_control_group_v2 = get_prompt_list_cosmos_few_shot_v2(filtered_data, True)

### Generate Prompts (Few-Shot with Instructions) - Adversarial + Control_Group - Version 1

In [None]:
def get_example(example, is_CoT):
    generated_example = f"""{example["input"]} 
Options:
 (A) {example["option A"]}
 (B) {example["option B"]}
 (C) {example["option C"]}"""

    if 'option D' in example.keys():
        generated_example = f"""{generated_example}
 (D) {example["option D"]}"""
        
    if is_CoT:
        if not example["Answer"] in ["(A)", "(B)", "(C)", "(D)"]:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, {example["Answer"]}."""
        else:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, the answer is {example["Answer"]}."""
    
    else:
        generated_example = f"""{generated_example}
Output: {example["Answer"]}"""
    return generated_example

In [None]:
def get_few_shot_with_instructions_example_dicts_cosmos():
    pos_example_1 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I found the money was charged but I have not got shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': '(A)',
                     'CoT':'You are told, in person over the phone, that your shoes were on their way. They have your money, which means your paid for the shoes. you haven\'t received the shoes yet.'}
    
    pos_example_2 = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'Answer': '(B)',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}


    neg_example_1 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': 'N/A',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}

    neg_example_2 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'option D': 'None of the above.',
                     'Answer': '(D)',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}


    neg_example_3 = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'option D': 'None of the above.',
                     'Answer': '(B)',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}



    pos_example_1_answerability = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'Answer': 'yes',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}

    neg_example_1_answerability = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': 'no',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}

    return pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability

In [None]:
def get_prompt_list_cosmos_few_shot_with_instructions(filtered_data, is_control_group):
    prompt_list_few_shot = list()
    for elem in filtered_data:

        pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_few_shot_with_instructions_example_dicts_cosmos()

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        context_end = elem["input"].find("\nCorrect Answer:")
        if context_end == -1:
            raise Exception("Didn't find the beginning of the \"Correct Answer\" in the input")

        context = elem["input"][:context_end]
        correct_answer = elem["input"][elem["input"].index("\nCorrect Answer:"):].replace("\nCorrect Answer:", "").strip()


    ############## Adversarial-3-options ############
        prompt_elem["Adversarial-3-options"] = f"""Given the following context, question, and three options, which of the options answers the question?

    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(pos_example_2, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:""" 
    ################################################

    ########### Adversarial-3-options-CoT ##########
        prompt_elem["Adversarial-3-options-CoT"] = f"""Given the following context, question, and three options, which of the options answers the question?

    Example 1:
    {get_example(pos_example_1, True)}\n 

    Example 2: 
    {get_example(pos_example_2, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:""" 
    ################################################

    ############## Adversarial-2-options ############
        prompt_elem["Adversarial-2-options"] = f"""Given the following context, question, and three options, which of the options answers the question?

    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(pos_example_2, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {correct_answer if is_control_group else elem["output"][1]}
Output:""" 
    ################################################

    ########### Adversarial-2-options-CoT ##########
        prompt_elem["Adversarial-2-options-CoT"] = f"""Given the following context, question, and three options, which of the options answers the question?

    Example 1:
    {get_example(pos_example_1, True)}\n 

    Example 2: 
    {get_example(pos_example_2, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {correct_answer if is_control_group else elem["output"][1]}
Output:""" 
    ################################################


    ############### Pseudo-Adversarial #############
        prompt_elem["Pseudo-Adversarial"] = f"""Given the following context, question, and three options, which of the options answers the question?
If none of the options fit, answer \"N/A\".

    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(neg_example_1, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:"""  
    ################################################

    ############# Pseudo-Adversarial-CoT ###########
        prompt_elem["Pseudo-Adversarial-CoT"] = f"""Given the following context, question, and three options, which of the options answers the question?
If none of the options fit, answer \"N/A\".

    Example 1:
    {get_example(pos_example_1, True)}

    Example 2: 
    {get_example(neg_example_1, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:"""  
    ################################################

    ############## Adversarial-NA-fourth-option ############
        prompt_elem["Adversarial-NA-fourth-option"] = f"""Given the following context, question, and several options, which of the options answers the question?

    Example 1:
    {get_example(neg_example_2, False)}

    Example 2: 
    {get_example(neg_example_3, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
Output:""" 
    #########################################################


    ############ Adversarial-NA-fourth-option-CoT ###########
        prompt_elem["Adversarial-NA-fourth-option-CoT"] = f"""Given the following context, question, and several options, which of the options answers the question?
    
    Example 1:
    {get_example(neg_example_2, True)}

    Example 2: 
    {get_example(neg_example_3, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
Output:""" 
    #########################################################


    ############## Adversarial-NA-fourth-option-Ablation1 ############
        prompt_elem["Adversarial-NA-fourth-option-Ablation1"] = f"""Given the following context, question, and several options, which of the options answers the question?

    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(pos_example_2, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
Output:""" 
    #########################################################


    ############ Adversarial-NA-fourth-option-CoT-Ablation1 ###########
        prompt_elem["Adversarial-NA-fourth-option-Ablation1-CoT"] = f"""Given the following context, question, and several options, which of the options answers the question?
    
    Example 1:
    {get_example(pos_example_1, True)}

    Example 2: 
    {get_example(pos_example_2, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
Output:""" 
    #########################################################




    ###################### Answerability ####################
        prompt_elem["Answerability"] = f"""Given the following context, question, and three options, does any of the options answer the question? 
Answer with a single word - \"yes\" or \"no\":

    Example 1:
    {get_example(pos_example_1_answerability, False)}

    Example 2: 
    {get_example(neg_example_1_answerability, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:""" 
    #########################################################

    ###################### Answerability-CoT ####################
        prompt_elem["Answerability-CoT"] = f"""Given the following context, question, and three options, does any of the options answer the question? 
Answer with a single word - \"yes\" or \"no\":

    Example 1:
    {get_example(pos_example_1_answerability, True)}

    Example 2: 
    {get_example(neg_example_1_answerability, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:""" 
    #########################################################

        prompt_list_few_shot.append(prompt_elem)
    
    prompt_list_few_shot = [{key:re.sub(' +', ' ', value) for key,value in sample.items()} for sample in prompt_list_few_shot] # replace consecutive spaces with a single space
    return prompt_list_few_shot

In [None]:
prompt_list_few_shot_with_instructions = get_prompt_list_cosmos_few_shot_with_instructions(filtered_data, False)
prompt_list_few_shot_with_instructions_control_group = get_prompt_list_cosmos_few_shot_with_instructions(filtered_data, True)

### Generate Prompts (Few-Shot with Instructions) - Adversarial + Control_Group - Version 2

In [None]:
def get_example(example, is_CoT):
    generated_example = f"""{example["input"]} 
Options:
 (A) {example["option A"]}
 (B) {example["option B"]}
 (C) {example["option C"]}"""

    if 'option D' in example.keys():
        generated_example = f"""{generated_example}
 (D) {example["option D"]}"""
        
    if is_CoT:
        if not example["Answer"] in ["(A)", "(B)", "(C)", "(D)"]:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, {example["Answer"]}."""
        else:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, the answer is {example["Answer"]}."""
    
    else:
        generated_example = f"""{generated_example}
Output: {example["Answer"]}"""
    return generated_example

In [None]:
def get_few_shot_with_instructions_example_dicts_cosmos_v2():
    pos_example_1 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I found the money was charged but I have not got shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': '(A)',
                     'CoT':'You are told, in person over the phone, that your shoes were on their way. They have your money, which means your paid for the shoes. you haven\'t received the shoes yet.'}
    
    pos_example_2 = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'Answer': '(B)',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}


    neg_example_1 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': 'unanswerable',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}

    neg_example_2 = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'option D': 'None of the above.',
                     'Answer': '(D)',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}


    neg_example_3 = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'option D': 'None of the above.',
                     'Answer': '(B)',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}



    pos_example_1_answerability = {'input': 'Context: you see , at my age relationship is kind of important and i thought i got the one after all these years . I noticed that once again i was wrong . i was good simply because i was good , i was caring , helping , supportive , bla bla blaaa . \nQuestion: What may happen to me?',
                     'option A': 'I got one important relationship.',
                     'option B': 'I broke up with my friend.',
                     'option C': 'I got a friend who is good, caring, helping and supportive.',
                     'Answer': 'answerable',
                     'CoT':'You say that relationship is important, and that you thought you finally got one. Then you say that you were wrong, which means you don\'t have a relationship afterall.'}

    neg_example_1_answerability = {'input': "Context: I was told, in person over the phone, that my shoes were on their way. They have my money. I have no shoes. \nQuestion: What may happen before I called them?",
                     'option A': 'I will pay them money after I receive the shoes.',
                     'option B': 'I found the shoes were still on the way after several days.',
                     'option C': 'I felt happy though I paid money and have not got the shoes.',
                     'Answer': 'unanswerable',
                     'CoT':'They have your money, so you already paid, so answer (A) is incorrect. You were told that the shoes are on their way during the call, not before it, so answer (B) is incorrect. You paid money and haven\'t got your shoes yet, so it is implausible you were happy, so answer (C) is incorrect.'}

    return pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability

In [None]:
def get_prompt_list_cosmos_few_shot_with_instructions_v2(filtered_data, is_control_group):
    prompt_list_few_shot = list()
    for elem in filtered_data:

        pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_few_shot_with_instructions_example_dicts_cosmos_v2()

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        context_end = elem["input"].find("\nCorrect Answer:")
        if context_end == -1:
            raise Exception("Didn't find the beginning of the \"Correct Answer\" in the input")

        context = elem["input"][:context_end]
        correct_answer = elem["input"][elem["input"].index("\nCorrect Answer:"):].replace("\nCorrect Answer:", "").strip()


    ############## Adversarial-3-options ############
        prompt_elem["Adversarial-3-options"] = f"""Given the following context, question, and three options, which of the options answers the question?

    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(pos_example_2, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:""" 
    ################################################

    ########### Adversarial-3-options-CoT ##########
        prompt_elem["Adversarial-3-options-CoT"] = f"""Given the following context, question, and three options, which of the options answers the question?

    Example 1:
    {get_example(pos_example_1, True)}\n 

    Example 2: 
    {get_example(pos_example_2, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:""" 
    ################################################

    ############## Adversarial-2-options ############
        prompt_elem["Adversarial-2-options"] = f"""Given the following context, question, and three options, which of the options answers the question?

    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(pos_example_2, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {correct_answer if is_control_group else elem["output"][1]}
Output:""" 
    ################################################

    ########### Adversarial-2-options-CoT ##########
        prompt_elem["Adversarial-2-options-CoT"] = f"""Given the following context, question, and three options, which of the options answers the question?

    Example 1:
    {get_example(pos_example_1, True)}\n 

    Example 2: 
    {get_example(pos_example_2, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {correct_answer if is_control_group else elem["output"][1]}
Output:""" 
    ################################################


    ############### Pseudo-Adversarial #############
        prompt_elem["Pseudo-Adversarial"] = f"""Given the following context, question, and three options, which of the options answers the question?
If none of the options fit, reply "unanswerable".

    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(neg_example_1, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:"""  
    ################################################

    ############# Pseudo-Adversarial-CoT ###########
        prompt_elem["Pseudo-Adversarial-CoT"] = f"""Given the following context, question, and three options, which of the options answers the question?
If none of the options fit, reply "unanswerable".

    Example 1:
    {get_example(pos_example_1, True)}

    Example 2: 
    {get_example(neg_example_1, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:"""  
    ################################################

    ############## Adversarial-NA-fourth-option ############
        prompt_elem["Adversarial-NA-fourth-option"] = f"""Given the following context, question, and several options, which of the options answers the question?

    Example 1:
    {get_example(neg_example_2, False)}

    Example 2: 
    {get_example(neg_example_3, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
Output:""" 
    #########################################################


    ############ Adversarial-NA-fourth-option-CoT ###########
        prompt_elem["Adversarial-NA-fourth-option-CoT"] = f"""Given the following context, question, and several options, which of the options answers the question?
    
    Example 1:
    {get_example(neg_example_2, True)}

    Example 2: 
    {get_example(neg_example_3, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
Output:""" 
    #########################################################


    ############## Adversarial-NA-fourth-option-Ablation1 ############
        prompt_elem["Adversarial-NA-fourth-option-Ablation1"] = f"""Given the following context, question, and several options, which of the options answers the question?

    Example 1:
    {get_example(pos_example_1, False)}

    Example 2: 
    {get_example(pos_example_2, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
Output:""" 
    #########################################################


    ############ Adversarial-NA-fourth-option-CoT-Ablation1 ###########
        prompt_elem["Adversarial-NA-fourth-option-Ablation1-CoT"] = f"""Given the following context, question, and several options, which of the options answers the question?
    
    Example 1:
    {get_example(pos_example_1, True)}

    Example 2: 
    {get_example(pos_example_2, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
    (D) None of the above.
Output:""" 
    #########################################################




    ###################### Answerability ####################
        prompt_elem["Answerability"] = f"""Determine whether the following context, question and three options are answerable. Reply only "answerable" or "unanswerable":

    Example 1:
    {get_example(pos_example_1_answerability, False)}

    Example 2: 
    {get_example(neg_example_1_answerability, False)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:""" 
    #########################################################

    ###################### Answerability-CoT ####################
        prompt_elem["Answerability-CoT"] = f"""Determine whether the following context, question and three options are answerable. Reply only "answerable" or "unanswerable":

    Example 1:
    {get_example(pos_example_1_answerability, True)}

    Example 2: 
    {get_example(neg_example_1_answerability, True)}

    Now your turn:
    {context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {correct_answer if is_control_group else elem["output"][2]}
Output:""" 
    #########################################################

        prompt_list_few_shot.append(prompt_elem)
    
    prompt_list_few_shot = [{key:re.sub(' +', ' ', value) for key,value in sample.items()} for sample in prompt_list_few_shot] # replace consecutive spaces with a single space
    return prompt_list_few_shot

In [None]:
prompt_list_few_shot_with_instructions_v2 = get_prompt_list_cosmos_few_shot_with_instructions_v2(filtered_data, False)
prompt_list_few_shot_with_instructions_control_group_v2 = get_prompt_list_cosmos_few_shot_with_instructions_v2(filtered_data, True)

### save to json

In [None]:
if not os.path.exists(outdir):
   os.makedirs(outdir)

if not os.path.exists(os.path.join(outdir, "chatGPT", "zero_shot")):
   os.makedirs(os.path.join(outdir, "chatGPT", "zero_shot"))

if not os.path.exists(os.path.join(outdir, "chatGPT", "few_shot")):
   os.makedirs(os.path.join(outdir, "chatGPT", "few_shot"))

if not os.path.exists(os.path.join(outdir, "chatGPT", "few_shot_with_instructions")):
   os.makedirs(os.path.join(outdir, "chatGPT", "few_shot_with_instructions"))


# zero shot
with open(os.path.join(outdir, "chatGPT", "zero_shot", "cosmosqa_incorrect_answers_adversarial.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_zero_shot, indent=2))

with open(os.path.join(outdir, "chatGPT", "zero_shot", "cosmosqa_incorrect_answers_control_group.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_zero_shot_control_group, indent=2))

# few shot
with open(os.path.join(outdir, "chatGPT", "few_shot", "cosmosqa_incorrect_answers_adversarial.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_few_shot, indent=2))

with open(os.path.join(outdir, "chatGPT", "few_shot", "cosmosqa_incorrect_answers_control_group.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_few_shot_control_group, indent=2))

# few shot with instructions
with open(os.path.join(outdir, "chatGPT", "few_shot_with_instructions", "cosmosqa_incorrect_answers_adversarial.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_few_shot_with_instructions, indent=2))

with open(os.path.join(outdir, "chatGPT", "few_shot_with_instructions", "cosmosqa_incorrect_answers_control_group.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_few_shot_with_instructions_control_group, indent=2))

## Tk-Instruction

### Generate Instructions (Zero-Shot) - Adversarial

In [None]:
prompt_list_zero_shot = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]

    context_end = elem["input"].find("\nCorrect Answer:")
    if context_end == -1:
        raise Exception("Didn't find the beginning of the \"Correct Answer\" in the input")

    context = elem["input"][:context_end]

    prompt_elem["Adversarial"] = f"""Definition:
        Given the following context, question, and three options, which of the options answers the question?
        {context}
        Options:
        (A) {elem["output"][0]}
        (B) {elem["output"][1]}
        (C) {elem["output"][2]}""" 

    prompt_elem["Adversarial-2-options"] = f"""Definition:
        Given the following context, question, and three options, which of the options answers the question?
        {context}
        Options:
        (A) {elem["output"][0]}
        (B) {elem["output"][1]}""" 

    prompt_elem["Pseudo-Adversarial"] = f"""Definition:
        Given the following context, question, and three options, which of the options answers the question?
        If none of the options fit, answer \"N/A\".
        {context}
        Options:
        (A) {elem["output"][0]}
        (B) {elem["output"][1]}
        (C) {elem["output"][2]}"""  
    
    prompt_elem["Adversarial-NA-fourth-option"] = f"""Definition:
        Given the following context, question, and three options, which of the options answers the question?
        {context}
        Options:
        (A) {elem["output"][0]}
        (B) {elem["output"][1]}
        (C) {elem["output"][2]}
        (D) None of the above.""" 

    prompt_elem["Answerability"] = f"""Definition:
        Given the following context, question, and three options, does any of the options answer the question? 
        Answer with a single word - \"yes\" or \"no\":
        {context}
        Options:
        (A) {elem["output"][0]}
        (B) {elem["output"][1]}
        (C) {elem["output"][2]}""" 

    prompt_list_zero_shot.append(prompt_elem)

### Generate Instructions (Zero-Shot) - Control Group

In [None]:
prompt_list_zero_shot_control_group = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]

    context_end = elem["input"].find("\nCorrect Answer:")
    if context_end == -1:
        raise Exception("Didn't find the beginning of the \"Correct Answer\" in the input")

    context = elem["input"][:context_end]
    correct_answer = elem["input"][elem["input"].index("\nCorrect Answer:"):].replace("\nCorrect Answer:", "").strip()

    prompt_elem["Adversarial"] = f"""Definition:
        Given the following context, question, and three options, which of the options answers the question?
        {context}
        Options:
        (A) {elem["output"][0]}
        (B) {elem["output"][1]}
        (C) {correct_answer}""" 

    prompt_elem["Adversarial-2-options"] = f"""Definition:
        Given the following context, question, and three options, which of the options answers the question?
        {context}
        Options:
        (A) {elem["output"][0]}
        (B) {correct_answer}""" 

    prompt_elem["Pseudo-Adversarial"] = f"""Definition:
        Given the following context, question, and three options, which of the options answers the question?
        If none of the options fit, answer \"N/A\".
        {context}
        Options:
        (A) {elem["output"][0]}
        (B) {elem["output"][1]}
        (C) {correct_answer}"""  
    
    prompt_elem["Adversarial-NA-fourth-option"] = f"""Definition:
        Given the following context, question, and three options, which of the options answers the question?
        {context}
        Options:
        (A) {elem["output"][0]}
        (B) {elem["output"][1]}
        (C) {correct_answer}
        (D) None of the above.""" 

    prompt_elem["Answerability"] = f"""Definition:
        Given the following context, question, and three options, does any of the options answer the question? 
        Answer with a single word - \"yes\" or \"no\":
        {context}
        Options:
        (A) {elem["output"][0]}
        (B) {elem["output"][1]}
        (C) {correct_answer}""" 

    prompt_list_zero_shot_control_group.append(prompt_elem)

### save to json

In [None]:
if not os.path.exists(outdir):
   os.makedirs(outdir)

with open(os.path.join(outdir, "tk-instruct", "zero_shot", "cosmosqa_incorrect_answers_adversarial.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_zero_shot, indent=2))

with open(os.path.join(outdir, "tk-instruct", "zero_shot", "cosmosqa_incorrect_answers_control_group.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_zero_shot_control_group, indent=2))

## Read the json file

In [None]:
indir = r"data/task055_multirc_write_incorrect_answer.json"
indir_control_group = r"data/mutlirc-v2/splitv2/train_456-fixedIds.json"
indir_control_group_dev = r"data/mutlirc-v2/splitv2/dev_83-fixedIds.json"
outdir = r"generated_prompts"

In [None]:
with open(indir) as f:
    data = json.load(f)

with open(indir_control_group) as f:
    data_control_group = json.load(f)
    data_control_group = data_control_group["data"]

with open(indir_control_group_dev) as f:
    data_control_group_dev = json.load(f)
    data_control_group_dev = data_control_group_dev["data"]
    data_control_group.extend(data_control_group_dev)

In [None]:
len(data_control_group)

## Filter only samples with at least three answers

In [None]:
# Adversarial
filtered_data = [elem for elem in data["Instances"] if len(elem["output"])>2]
len(filtered_data)

## ChatGPT

### Generate Instructions (Zero-Shot) - Adversarial - Version 1

In [None]:
prompt_list_zero_shot = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]
     
    context_end = elem["input"].find("Correct Answer:")
    context = elem["input"][:context_end] if context_end != -1 else elem["input"] # if there is the right answer in the input - remove it
    context = re.sub(r"Sent \d+:", "", context) # make all sentences into a single paragraph
    context = re.sub(r"sent \d+:", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
    context = context.replace("Paragraph-", "Paragraph:").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
    context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space
    context = context.replace("Question:", "\nQuestion:") # move the Question to a new line

    context = context.replace("?.", "?") # the paragraph and question appear together, followed by a "." --> so remove this redundant dot.


    # prompt_elem["Adversarial-7-options"] = f"""{context}
    # Based on the paragraph, which of the following options answers the question?
    # (A) {elem["output"][0]}
    # (B) {elem["output"][1]}
    # (C) {elem["output"][2]}
    # (D) {elem["output"][3]}
    # (E) {elem["output"][4]}
    # (F) {elem["output"][5]}
    # (G) {elem["output"][6]}""" 

    # prompt_elem["Adversarial-6-options"] = f"""{context}
    # Based on the paragraph, which of the following options answers the question?
    # (A) {elem["output"][0]}
    # (B) {elem["output"][1]}
    # (C) {elem["output"][2]}
    # (D) {elem["output"][3]}
    # (E) {elem["output"][4]}
    # (F) {elem["output"][5]}""" 

    # prompt_elem["Adversarial-5-options"] = f"""{context}
    # Based on the paragraph, which of the following options answers the question?
    # (A) {elem["output"][0]}
    # (B) {elem["output"][1]}
    # (C) {elem["output"][2]}
    # (D) {elem["output"][3]}
    # (E) {elem["output"][4]}"""

    # prompt_elem["Adversarial-4-options"] = f"""{context}
    # Based on the paragraph, which of the following options answers the question?
    # (A) {elem["output"][0]}
    # (B) {elem["output"][1]}
    # (C) {elem["output"][2]}
    # (D) {elem["output"][3]}""" 

    prompt_elem["Adversarial-3-options"] = f"""{context}
    Based on the paragraph, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}""" 

    # prompt_elem["Adversarial-2-options"] = f"""{context}
    # Based on the paragraph, which of the following options answers the question?
    # (A) {elem["output"][0]}
    # (B) {elem["output"][1]}""" 
    
    prompt_elem["Pseudo-Adversarial"] = f"""{context}
    Based on the paragraph, which of the following options answers the question?
    If none of the options fit, answer "N/A".
    Keep your answer short - only "(A)", "(B)", "(C)" or "N/A" if none of the options fit:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}"""  

    prompt_elem["Adversarial-NA-fourth-option"] = f"""{context}
    Based on the paragraph, which of the following options answers the question?
    Keep your answer short - only "(A)", "(B)", "(C)", "(D)":
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) None of the above.""" 

    prompt_elem["Answerability"] = f"""{context}
    Based on the paragraph, does any of the following options answer the question?
    Answer with a single word - "yes" or "no":
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}""" 

    prompt_list_zero_shot.append(prompt_elem)

### Generate Instructions (Zero-Shot) - Adversarial - Version 2

In [None]:
prompt_list_zero_shot_v2 = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]
     
    context_end = elem["input"].find("Correct Answer:")
    context = elem["input"][:context_end] if context_end != -1 else elem["input"] # if there is the right answer in the input - remove it
    context = re.sub(r"Sent \d+:", "", context) # make all sentences into a single paragraph
    context = re.sub(r"sent \d+:", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
    context = context.replace("Paragraph-", "Paragraph:").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
    context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space
    context = context.replace("Question:", "\nQuestion:") # move the Question to a new line

    context = context.replace("?.", "?") # the paragraph and question appear together, followed by a "." --> so remove this redundant dot.


    prompt_elem["Adversarial-7-options"] = f"""{context}
    Based on the paragraph, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) {elem["output"][3]}
    (E) {elem["output"][4]}
    (F) {elem["output"][5]}
    (G) {elem["output"][6]}""" 

    prompt_elem["Adversarial-6-options"] = f"""{context}
    Based on the paragraph, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) {elem["output"][3]}
    (E) {elem["output"][4]}
    (F) {elem["output"][5]}""" 

    prompt_elem["Adversarial-5-options"] = f"""{context}
    Based on the paragraph, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) {elem["output"][3]}
    (E) {elem["output"][4]}"""

    prompt_elem["Adversarial-4-options"] = f"""{context}
    Based on the paragraph, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) {elem["output"][3]}""" 

    prompt_elem["Adversarial-3-options"] = f"""{context}
    Based on the paragraph, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}""" 

    prompt_elem["Adversarial-2-options"] = f"""{context}
    Based on the paragraph, which of the following options answers the question?
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}""" 
    
    prompt_elem["Pseudo-Adversarial"] = f"""{context}
    Based on the paragraph, which of the following options answers the question?
    If none of the options fit, reply "unanswerable".
    Keep your answer short - only "(A)", "(B)", "(C)" or "unanswerable" if none of the options fit:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}"""  

    prompt_elem["Adversarial-NA-fourth-option"] = f"""{context}
    Based on the paragraph, which of the following options answers the question?
    Keep your answer short - only "(A)", "(B)", "(C)", "(D)":
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) None of the above.""" 

    prompt_elem["Answerability"] = f"""Determine whether the following context, question and three options are answerable. Reply only "answerable" or "unanswerable":
    {context}
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}""" 

    prompt_list_zero_shot_v2.append(prompt_elem)

### Generate Instructions (Zero-Shot) - Control Group - Version 1

In [None]:
prompt_list_zero_shot_control_group = list()
for elem in data_control_group:

    # choose question with at least one correct answer and at least 3 incorrect answers
    potential_questions = [i for i,q in enumerate(elem['paragraph']['questions']) if len([answer for answer in q["answers"] if answer["isAnswer"]])>0 and len([answer for answer in q["answers"] if not answer["isAnswer"]])>2]

    # if empty, a.k.a there aren't questions with at least one correct answer and at least 3 incorrect answers
    if not potential_questions:
        continue

    for question_i in potential_questions:
        
        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        context = f"Paragraph: {elem['paragraph']['text']}"
        context = re.sub(r"<b>Sent \d+: </b>", "", context) # make all sentences into a single paragraph
        context = re.sub(r"<b>sent \d+: </b>", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
        context = context.replace("<br>", "").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
        context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space

        context = f"{context}\nQuestion: {elem['paragraph']['questions'][question_i]['question']}"

        correct_answer = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if answer["isAnswer"]][0]
        incorrect_answers = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if not answer["isAnswer"]]


        # prompt_elem["Adversarial-7-options"] = f"""{context}
        # Based on the paragraph, which of the following options answers the question?
        # (A) {incorrect_answers[0]}
        # (B) {incorrect_answers[1]}
        # (C) {incorrect_answers[2]}
        # (D) {incorrect_answers[3]}
        # (E) {incorrect_answers[4]}
        # (F) {incorrect_answers[5]}
        # (G) {correct_answer}""" 

        # prompt_elem["Adversarial-6-options"] = f"""{context}
        # Based on the paragraph, which of the following options answers the question?
        # (A) {incorrect_answers[0]}
        # (B) {incorrect_answers[1]}
        # (C) {incorrect_answers[2]}
        # (D) {incorrect_answers[3]}
        # (E) {incorrect_answers[4]}
        # (F) {correct_answer}""" 

        # prompt_elem["Adversarial-5-options"] = f"""{context}
        # Based on the paragraph, which of the following options answers the question?
        # (A) {incorrect_answers[0]}
        # (B) {incorrect_answers[1]}
        # (C) {incorrect_answers[2]}
        # (D) {incorrect_answers[3]}
        # (E) {correct_answer}""" 

        # prompt_elem["Adversarial-4-options"] = f"""{context}
        # Based on the paragraph, which of the following options answers the question?
        # (A) {incorrect_answers[0]}
        # (B) {incorrect_answers[1]}
        # (C) {incorrect_answers[2]}
        # (D) {correct_answer}""" 

        prompt_elem["Adversarial-3-options"] = f"""{context}
        Based on the paragraph, which of the following options answers the question?
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {correct_answer}""" 

        # prompt_elem["Adversarial-2-options"] = f"""{context}
        # Based on the paragraph, which of the following options answers the question?
        # (A) {incorrect_answers[0]}
        # (B) {correct_answer}""" 

        
        prompt_elem["Pseudo-Adversarial"] = f"""{context}
        Based on the paragraph, which of the following options answers the question?
        If none of the options fit, answer "N/A".
        Keep your answer short - only "(A)", "(B)", "(C)" or "N/A" if none of the options fit:
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {correct_answer}"""  
        
        prompt_elem["Adversarial-NA-fourth-option"] = f"""{context}
        Based on the paragraph, which of the following options answers the question?
        Keep your answer short - only "(A)", "(B)", "(C)" or "(D)":
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {correct_answer}
        (D) None of the above.""" 

        prompt_elem["Answerability"] = f"""{context}
        Based on the paragraph, does any of the following options answer the question? 
        Answer with a single word - "yes" or "no":
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {correct_answer}""" 

        prompt_list_zero_shot_control_group.append(prompt_elem)

### Generate Instructions (Zero-Shot) - Control Group - Version 2

In [None]:
prompt_list_zero_shot_control_group_v2 = list()
for elem in data_control_group:

    # choose question with at least one correct answer and at least 3 incorrect answers
    potential_questions = [i for i,q in enumerate(elem['paragraph']['questions']) if len([answer for answer in q["answers"] if answer["isAnswer"]])>0 and len([answer for answer in q["answers"] if not answer["isAnswer"]])>2]

    # if empty, a.k.a there aren't questions with at least one correct answer and at least 3 incorrect answers
    if not potential_questions:
        continue

    for question_i in potential_questions:
        
        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        context = f"Paragraph: {elem['paragraph']['text']}"
        context = re.sub(r"<b>Sent \d+: </b>", "", context) # make all sentences into a single paragraph
        context = re.sub(r"<b>sent \d+: </b>", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
        context = context.replace("<br>", "").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
        context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space

        context = f"{context}\nQuestion: {elem['paragraph']['questions'][question_i]['question']}"

        correct_answer = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if answer["isAnswer"]][0]
        incorrect_answers = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if not answer["isAnswer"]]


        prompt_elem["Adversarial-7-options"] = f"""{context}
        Based on the paragraph, which of the following options answers the question?
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {incorrect_answers[2]}
        (D) {incorrect_answers[3]}
        (E) {incorrect_answers[4]}
        (F) {incorrect_answers[5]}
        (G) {correct_answer}""" 

        prompt_elem["Adversarial-6-options"] = f"""{context}
        Based on the paragraph, which of the following options answers the question?
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {incorrect_answers[2]}
        (D) {incorrect_answers[3]}
        (E) {incorrect_answers[4]}
        (F) {correct_answer}""" 

        prompt_elem["Adversarial-5-options"] = f"""{context}
        Based on the paragraph, which of the following options answers the question?
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {incorrect_answers[2]}
        (D) {incorrect_answers[3]}
        (E) {correct_answer}""" 

        prompt_elem["Adversarial-4-options"] = f"""{context}
        Based on the paragraph, which of the following options answers the question?
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {incorrect_answers[2]}
        (D) {correct_answer}""" 

        prompt_elem["Adversarial-3-options"] = f"""{context}
        Based on the paragraph, which of the following options answers the question?
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {correct_answer}""" 

        prompt_elem["Adversarial-2-options"] = f"""{context}
        Based on the paragraph, which of the following options answers the question?
        (A) {incorrect_answers[0]}
        (B) {correct_answer}""" 

        
        prompt_elem["Pseudo-Adversarial"] = f"""{context}
        Based on the paragraph, which of the following options answers the question?
        If none of the options fit, answer "unanswerable".
        Keep your answer short - only "(A)", "(B)", "(C)" or "unanswerable" if none of the options fit:
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {correct_answer}"""  
        
        prompt_elem["Adversarial-NA-fourth-option"] = f"""{context}
        Based on the paragraph, which of the following options answers the question?
        Keep your answer short - only "(A)", "(B)", "(C)" or "(D)":
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {correct_answer}
        (D) None of the above.""" 

        prompt_elem["Answerability"] = f"""Determine whether the following context, question and three options are answerable. Reply only "answerable" or "unanswerable":
        {context}
        (A) {incorrect_answers[0]}
        (B) {incorrect_answers[1]}
        (C) {correct_answer}""" 

        prompt_list_zero_shot_control_group_v2.append(prompt_elem)

### Generate Instructions (Few-Shot) - Adversarial - Version 1

In [None]:
def get_example(example, is_CoT):
    generated_example = f"""{example["input"]} 
Options:
 (A) {example["option A"]}
 (B) {example["option B"]}
 (C) {example["option C"]}"""

    if 'option D' in example.keys():
        generated_example = f"""{generated_example}
 (D) {example["option D"]}"""
        
    if is_CoT:
        if not example["Answer"] in ["(A)", "(B)", "(C)", "(D)"]:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, {example["Answer"]}."""
        else:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, the answer is {example["Answer"]}."""
    
    else:
        generated_example = f"""{generated_example}
Output: {example["Answer"]}"""
    return generated_example

In [None]:
def get_examples_dict_multirc_few_shot():
    pos_example_1 = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'Answer': '(A)',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}
    
    pos_example_2 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '19.',
                     'option C': '1961.',
                     'Answer': '(B)',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19.'}


    neg_example_1 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'Answer': 'none of the options answers the question',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    neg_example_2 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'option D': 'None of the above.',
                     'Answer': '(D)',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    neg_example_3 = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'option D': 'None of the above.',
                     'Answer': '(A)',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}

    pos_example_1_answerability = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'Answer': 'there is a correct option (option A)',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}
    
    
    neg_example_1_answerability = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'Answer': 'there is no correct option',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    return pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability

In [None]:
def get_full_adversarial_prompt(example_1, example_2, is_CoT, context, options):
    option_names = ["(A)", "(B)", "(C)", "(D)", "(E)", "(F)", "(G)"]

    full_prompt = f"""
                    Example 1:
                    {get_example(example_1, is_CoT)}

                    Example 2: 
                    {get_example(example_2, is_CoT)}
                    
                    Now your turn:
                    {context}
                    Options:"""

    for i,option in enumerate(options):
        full_prompt = f"""{full_prompt}
                        {option_names[i]} {option}"""
    
    full_prompt = f"{full_prompt}\n Output:"
    full_prompt = re.sub(' +', ' ', full_prompt) # replace consecutive spaces with a single space
    full_prompt = full_prompt.replace("\nQuestion:", "\n Question:").replace("\nOptions:", "\n Options:").replace("\nOutput:", "\n Output:")
    return full_prompt

In [None]:
prompt_list_few_shot = list()

pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_examples_dict_multirc_few_shot()

for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]
     
    context_end = elem["input"].find("Correct Answer:")
    context = elem["input"][:context_end] if context_end != -1 else elem["input"] # if there is the right answer in the input - remove it
    context = re.sub(r"Sent \d+:", "", context) # make all sentences into a single paragraph
    context = re.sub(r"sent \d+:", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
    context = context.replace("Paragraph-", "Paragraph:").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
    context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space
    context = context.replace("Question:", "\nQuestion:") # move the Question to a new line

    context = context.replace("?.", "?") # the paragraph and question appear together, followed by a "." --> so remove this redundant dot.

    

    # ############## Adversarial-7-options ##############
    # options = [option for option in elem["output"][:7]]
    # prompt_elem["Adversarial-7-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # # CoT 
    # options = [option for option in elem["output"][:7]]
    # prompt_elem["Adversarial-7-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    # ####################################################



    # ############## Adversarial-6-options ############
    # options = [option for option in elem["output"][:6]]
    # prompt_elem["Adversarial-6-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # # CoT 
    # options = [option for option in elem["output"][:6]]
    # prompt_elem["Adversarial-6-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    # #################################################



    # ############## Adversarial-5-options ##############
    # options = [option for option in elem["output"][:5]]
    # prompt_elem["Adversarial-5-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # # CoT 
    # options = [option for option in elem["output"][:5]]
    # prompt_elem["Adversarial-5-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    # ####################################################



    # ############## Adversarial-4-options ##############
    # options = [option for option in elem["output"][:4]]
    # prompt_elem["Adversarial-4-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # # CoT 
    # options = [option for option in elem["output"][:4]]
    # prompt_elem["Adversarial-4-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    # ####################################################



    ############## Adversarial-3-options ##############
    options = [option for option in elem["output"][:3]]
    prompt_elem["Adversarial-3-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Adversarial-3-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    ####################################################



    # ############## Adversarial-2-options ##############
    # options = [option for option in elem["output"][:2]]
    # prompt_elem["Adversarial-2-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # # CoT 
    # options = [option for option in elem["output"][:2]]
    # prompt_elem["Adversarial-2-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    # ####################################################



    ################# Pseudo-Adversarial ###############
    options = [option for option in elem["output"][:3]]
    prompt_elem["Pseudo-Adversarial"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Pseudo-Adversarial-CoT"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, True, context, options)
    ####################################################




    ############ Adversarial-NA-fourth-option ##########
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option-CoT"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, True, context, options)
    ####################################################


    ################### Answerability ##################
    options = [option for option in elem["output"][:3]]
    prompt_elem["Answerability"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Answerability-CoT"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, True, context, options)
    ####################################################



    prompt_list_few_shot.append(prompt_elem)

### Generate Instructions (Few-Shot) - Control Group - Version 1

In [None]:
prompt_list_few_shot_control_group = list()

pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_examples_dict_multirc_few_shot()

for elem in data_control_group:

    # choose question with at least one correct answer and at least 3 incorrect answers
    potential_questions = [i for i,q in enumerate(elem['paragraph']['questions']) if len([answer for answer in q["answers"] if answer["isAnswer"]])>0 and len([answer for answer in q["answers"] if not answer["isAnswer"]])>2]

    # if empty, a.k.a there aren't questions with at least one correct answer and at least 2 incorrect answers
    if not potential_questions:
        continue

    for question_i in potential_questions:

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        context = f"Paragraph: {elem['paragraph']['text']}"
        context = re.sub(r"<b>Sent \d+: </b>", "", context) # make all sentences into a single paragraph
        context = re.sub(r"<b>sent \d+: </b>", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
        context = context.replace("<br>", "").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
        context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space

        context = f"{context}\nQuestion: {elem['paragraph']['questions'][question_i]['question']}"

        correct_answer = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if answer["isAnswer"]][0]
        incorrect_answers = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if not answer["isAnswer"]]

        adversarial_instructions = "Given the following paragraph, question, and several options, which of the options answers the question?"
        pseudo_adversarial_instructions = f'{adversarial_instructions} \nIf none of the options fit, answer "N/A".'
        answerability_instructions = 'Based on the paragraph, does any of the following options answer the question? \nAnswer with a single word - "yes" or "no".'



        # ############## Adversarial-7-options ##############
        # options = incorrect_answers[:6]
        # options.append(correct_answer)    
        # prompt_elem["Adversarial-7-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # # CoT 
        # options = incorrect_answers[:6]
        # options.append(correct_answer)    
        # prompt_elem["Adversarial-7-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        # ####################################################


        # ############## Adversarial-6-options ##############
        # options = incorrect_answers[:5]
        # options.append(correct_answer)    
        # prompt_elem["Adversarial-6-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # # CoT 
        # options = incorrect_answers[:5]
        # options.append(correct_answer)    
        # prompt_elem["Adversarial-6-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        # ####################################################

        # ############## Adversarial-5-options ##############
        # options = incorrect_answers[:4]
        # options.append(correct_answer)    
        # prompt_elem["Adversarial-5-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # # CoT 
        # options = incorrect_answers[:4]
        # options.append(correct_answer)    
        # prompt_elem["Adversarial-5-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        # ####################################################


        # ############## Adversarial-4-options ##############
        # options = incorrect_answers[:3]
        # options.append(correct_answer)    
        # prompt_elem["Adversarial-4-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # # CoT 
        # options = incorrect_answers[:3]
        # options.append(correct_answer)    
        # prompt_elem["Adversarial-4-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        # ####################################################


        ############## Adversarial-3-options ##############
        options = incorrect_answers[:2]
        options.append(correct_answer)    
        prompt_elem["Adversarial-3-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)    
        prompt_elem["Adversarial-3-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        ####################################################

        # ############## Adversarial-2-options ##############
        # options = incorrect_answers[:1]
        # options.append(correct_answer)    
        # prompt_elem["Adversarial-2-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # # CoT 
        # options = incorrect_answers[:1]
        # options.append(correct_answer)    
        # prompt_elem["Adversarial-2-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        # ####################################################


        ############## Pseudo-Adversarial ##############
        options = incorrect_answers[:2]
        options.append(correct_answer)    
        prompt_elem["Pseudo-Adversarial"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, False, context, options)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)    
        prompt_elem["Pseudo-Adversarial-CoT"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, True, context, options)
        ####################################################


        ############ Adversarial-NA-fourth-option ############
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, False, context, options)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option-CoT"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, True, context, options)
        ######################################################


        #################### Answerability ###################
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Answerability"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, False, context, options)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Answerability-CoT"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, True, context, options)
        ######################################################


        prompt_list_few_shot_control_group.append(prompt_elem)

### Generate Instructions (Few-Shot) - Adversarial - Version 2

In [None]:
def get_example(example, is_CoT):
    generated_example = f"""{example["input"]} 
Options:
 (A) {example["option A"]}
 (B) {example["option B"]}
 (C) {example["option C"]}"""

    if 'option D' in example.keys():
        generated_example = f"""{generated_example}
 (D) {example["option D"]}"""
        
    if is_CoT:
        if not example["Answer"] in ["(A)", "(B)", "(C)", "(D)"]:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, {example["Answer"]}."""
        else:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, the answer is {example["Answer"]}."""
    
    else:
        generated_example = f"""{generated_example}
Output: {example["Answer"]}"""
    return generated_example

In [None]:
def get_examples_dict_multirc_few_shot_v2():
    pos_example_1 = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'Answer': '(A)',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}
    
    pos_example_2 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '19.',
                     'option C': '1961.',
                     'Answer': '(B)',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19.'}


    neg_example_1 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'Answer': 'unanswerable',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    neg_example_2 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'option D': 'None of the above.',
                     'Answer': '(D)',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    neg_example_3 = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'option D': 'None of the above.',
                     'Answer': '(A)',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}

    pos_example_1_answerability = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'Answer': 'answerable',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}
    
    
    neg_example_1_answerability = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'Answer': 'unanswerable',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    return pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability

In [None]:
def get_full_adversarial_prompt(example_1, example_2, is_CoT, context, options):
    option_names = ["(A)", "(B)", "(C)", "(D)", "(E)", "(F)", "(G)"]

    full_prompt = f"""
                    Example 1:
                    {get_example(example_1, is_CoT)}

                    Example 2: 
                    {get_example(example_2, is_CoT)}
                    
                    Now your turn:
                    {context}
                    Options:"""

    for i,option in enumerate(options):
        full_prompt = f"""{full_prompt}
                        {option_names[i]} {option}"""
    
    full_prompt = f"{full_prompt}\n Output:"
    full_prompt = re.sub(' +', ' ', full_prompt) # replace consecutive spaces with a single space
    full_prompt = full_prompt.replace("\nQuestion:", "\n Question:").replace("\nOptions:", "\n Options:").replace("\nOutput:", "\n Output:")
    return full_prompt

In [None]:
prompt_list_few_shot_v2 = list()

pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_examples_dict_multirc_few_shot_v2()

for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]
     
    context_end = elem["input"].find("Correct Answer:")
    context = elem["input"][:context_end] if context_end != -1 else elem["input"] # if there is the right answer in the input - remove it
    context = re.sub(r"Sent \d+:", "", context) # make all sentences into a single paragraph
    context = re.sub(r"sent \d+:", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
    context = context.replace("Paragraph-", "Paragraph:").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
    context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space
    context = context.replace("Question:", "\nQuestion:") # move the Question to a new line

    context = context.replace("?.", "?") # the paragraph and question appear together, followed by a "." --> so remove this redundant dot.

    

    ############## Adversarial-7-options ##############
    options = [option for option in elem["output"][:7]]
    prompt_elem["Adversarial-7-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:7]]
    prompt_elem["Adversarial-7-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    ####################################################



    ############## Adversarial-6-options ############
    options = [option for option in elem["output"][:6]]
    prompt_elem["Adversarial-6-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:6]]
    prompt_elem["Adversarial-6-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    #################################################



    ############## Adversarial-5-options ##############
    options = [option for option in elem["output"][:5]]
    prompt_elem["Adversarial-5-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:5]]
    prompt_elem["Adversarial-5-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    ####################################################



    ############## Adversarial-4-options ##############
    options = [option for option in elem["output"][:4]]
    prompt_elem["Adversarial-4-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:4]]
    prompt_elem["Adversarial-4-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    ####################################################



    ############## Adversarial-3-options ##############
    options = [option for option in elem["output"][:3]]
    prompt_elem["Adversarial-3-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Adversarial-3-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    ####################################################



    ############## Adversarial-2-options ##############
    options = [option for option in elem["output"][:2]]
    prompt_elem["Adversarial-2-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:2]]
    prompt_elem["Adversarial-2-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
    ####################################################



    ################# Pseudo-Adversarial ###############
    options = [option for option in elem["output"][:3]]
    prompt_elem["Pseudo-Adversarial"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Pseudo-Adversarial-CoT"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, True, context, options)
    ####################################################




    ############ Adversarial-NA-fourth-option ##########
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option-CoT"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, True, context, options)
    ####################################################


    ################### Answerability ##################
    options = [option for option in elem["output"][:3]]
    prompt_elem["Answerability"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, False, context, options)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Answerability-CoT"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, True, context, options)
    ####################################################



    prompt_list_few_shot_v2.append(prompt_elem)

### Generate Instructions (Few-Shot) - Control Group - Version 2

In [None]:
prompt_list_few_shot_control_group_v2 = list()

pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_examples_dict_multirc_few_shot_v2()

for elem in data_control_group:

    # choose question with at least one correct answer and at least 3 incorrect answers
    potential_questions = [i for i,q in enumerate(elem['paragraph']['questions']) if len([answer for answer in q["answers"] if answer["isAnswer"]])>0 and len([answer for answer in q["answers"] if not answer["isAnswer"]])>2]

    # if empty, a.k.a there aren't questions with at least one correct answer and at least 3 incorrect answers
    if not potential_questions:
        continue

    for question_i in potential_questions:

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        context = f"Paragraph: {elem['paragraph']['text']}"
        context = re.sub(r"<b>Sent \d+: </b>", "", context) # make all sentences into a single paragraph
        context = re.sub(r"<b>sent \d+: </b>", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
        context = context.replace("<br>", "").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
        context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space

        context = f"{context}\nQuestion: {elem['paragraph']['questions'][question_i]['question']}"

        correct_answer = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if answer["isAnswer"]][0]
        incorrect_answers = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if not answer["isAnswer"]]

        adversarial_instructions = "Given the following paragraph, question, and several options, which of the options answers the question?"
        pseudo_adversarial_instructions = f'{adversarial_instructions} \nIf none of the options fit, answer "N/A".'
        answerability_instructions = 'Based on the paragraph, does any of the following options answer the question? \nAnswer with a single word - "yes" or "no".'



        ############## Adversarial-7-options ##############
        options = incorrect_answers[:6]
        options.append(correct_answer)    
        prompt_elem["Adversarial-7-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # CoT 
        options = incorrect_answers[:6]
        options.append(correct_answer)    
        prompt_elem["Adversarial-7-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        ####################################################


        ############## Adversarial-6-options ##############
        options = incorrect_answers[:5]
        options.append(correct_answer)    
        prompt_elem["Adversarial-6-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # CoT 
        options = incorrect_answers[:5]
        options.append(correct_answer)    
        prompt_elem["Adversarial-6-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        ####################################################

        ############## Adversarial-5-options ##############
        options = incorrect_answers[:4]
        options.append(correct_answer)    
        prompt_elem["Adversarial-5-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # CoT 
        options = incorrect_answers[:4]
        options.append(correct_answer)    
        prompt_elem["Adversarial-5-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        ####################################################


        ############## Adversarial-4-options ##############
        options = incorrect_answers[:3]
        options.append(correct_answer)    
        prompt_elem["Adversarial-4-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # CoT 
        options = incorrect_answers[:3]
        options.append(correct_answer)    
        prompt_elem["Adversarial-4-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        ####################################################


        ############## Adversarial-3-options ##############
        options = incorrect_answers[:2]
        options.append(correct_answer)    
        prompt_elem["Adversarial-3-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)    
        prompt_elem["Adversarial-3-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        ####################################################

        ############## Adversarial-2-options ##############
        options = incorrect_answers[:1]
        options.append(correct_answer)    
        prompt_elem["Adversarial-2-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options)
        # CoT 
        options = incorrect_answers[:1]
        options.append(correct_answer)    
        prompt_elem["Adversarial-2-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options)
        ####################################################


        ############## Pseudo-Adversarial ##############
        options = incorrect_answers[:2]
        options.append(correct_answer)    
        prompt_elem["Pseudo-Adversarial"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, False, context, options)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)    
        prompt_elem["Pseudo-Adversarial-CoT"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, True, context, options)
        ####################################################


        ############ Adversarial-NA-fourth-option ############
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, False, context, options)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option-CoT"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, True, context, options)
        ######################################################


        #################### Answerability ###################
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Answerability"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, False, context, options)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Answerability-CoT"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, True, context, options)
        ######################################################


        prompt_list_few_shot_control_group_v2.append(prompt_elem)

### Generate Instructions (Few-Shot with Instructions) - Adversarial - Version 1

In [None]:
def get_example(example, is_CoT):
    generated_example = f"""{example["input"]} 
Options:
 (A) {example["option A"]}
 (B) {example["option B"]}
 (C) {example["option C"]}"""

    if 'option D' in example.keys():
        generated_example = f"""{generated_example}
 (D) {example["option D"]}"""
        
    if is_CoT:
        if not example["Answer"] in ["(A)", "(B)", "(C)", "(D)"]:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, {example["Answer"]}."""
        else:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, the answer is {example["Answer"]}."""
    
    else:
        generated_example = f"""{generated_example}
Output: {example["Answer"]}"""
    return generated_example

In [None]:
def get_examples_dict_multirc_few_shot_with_instructions():
    pos_example_1 = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'Answer': '(A)',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}
    
    pos_example_2 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '19.',
                     'option C': '1961.',
                     'Answer': '(B)',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19.'}


    neg_example_1 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'Answer': 'N/A',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    neg_example_2 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'option D': 'None of the above.',
                     'Answer': '(D)',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    neg_example_3 = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'option D': 'None of the above.',
                     'Answer': '(A)',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}

    pos_example_1_answerability = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'Answer': 'yes',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}
    
    
    neg_example_1_answerability = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'Answer': 'no',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    return pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability

In [None]:
def get_full_adversarial_prompt(example_1, example_2, is_CoT, context, options, instructions):
    option_names = ["(A)", "(B)", "(C)", "(D)", "(E)", "(F)", "(G)"]

    full_prompt = f"""{instructions}
    
                    Example 1:
                    {get_example(example_1, is_CoT)}

                    Example 2: 
                    {get_example(example_2, is_CoT)}
                    
                    Now your turn:
                    {context}
                    Options:"""

    for i,option in enumerate(options):
        full_prompt = f"""{full_prompt}
                        {option_names[i]} {option}"""
    
    full_prompt = f"{full_prompt}\n Output:"
    full_prompt = re.sub(' +', ' ', full_prompt) # replace consecutive spaces with a single space
    full_prompt = full_prompt.replace("\nQuestion:", "\n Question:").replace("\nOptions:", "\n Options:").replace("\nOutput:", "\n Output:")
    return full_prompt

In [None]:
prompt_list_few_shot_with_instructions = list()

pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_examples_dict_multirc_few_shot_with_instructions()

for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]
     
    context_end = elem["input"].find("Correct Answer:")
    context = elem["input"][:context_end] if context_end != -1 else elem["input"] # if there is the right answer in the input - remove it
    context = re.sub(r"Sent \d+:", "", context) # make all sentences into a single paragraph
    context = re.sub(r"sent \d+:", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
    context = context.replace("Paragraph-", "Paragraph:").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
    context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space
    context = context.replace("Question:", "\nQuestion:") # move the Question to a new line

    context = context.replace("?.", "?") # the paragraph and question appear together, followed by a "." --> so remove this redundant dot.

    adversarial_instructions = "Given the following paragraph, question, and several options, which of the options answers the question?"
    pseudo_adversarial_instructions = f'{adversarial_instructions} \nIf none of the options fit, answer "N/A".'
    answerability_instructions = 'Based on the paragraph, does any of the following options answer the question? \nAnswer with a single word - "yes" or "no".'


    # ############## Adversarial-7-options ##############
    # options = [option for option in elem["output"][:7]]
    # prompt_elem["Adversarial-7-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # # CoT 
    # options = [option for option in elem["output"][:7]]
    # prompt_elem["Adversarial-7-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    # ####################################################



    # ############## Adversarial-6-options ############
    # options = [option for option in elem["output"][:6]]
    # prompt_elem["Adversarial-6-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # # CoT 
    # options = [option for option in elem["output"][:6]]
    # prompt_elem["Adversarial-6-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    # #################################################



    # ############## Adversarial-5-options ##############
    # options = [option for option in elem["output"][:5]]
    # prompt_elem["Adversarial-5-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # # CoT 
    # options = [option for option in elem["output"][:5]]
    # prompt_elem["Adversarial-5-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    # ####################################################



    # ############## Adversarial-4-options ##############
    # options = [option for option in elem["output"][:4]]
    # prompt_elem["Adversarial-4-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # # CoT 
    # options = [option for option in elem["output"][:4]]
    # prompt_elem["Adversarial-4-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    # ####################################################



    ############## Adversarial-3-options ##############
    options = [option for option in elem["output"][:3]]
    prompt_elem["Adversarial-3-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Adversarial-3-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    ####################################################



    # ############## Adversarial-2-options ##############
    # options = [option for option in elem["output"][:2]]
    # prompt_elem["Adversarial-2-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # # CoT 
    # options = [option for option in elem["output"][:2]]
    # prompt_elem["Adversarial-2-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    # ####################################################



    ################# Pseudo-Adversarial ###############
    options = [option for option in elem["output"][:3]]
    prompt_elem["Pseudo-Adversarial"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, False, context, options, pseudo_adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Pseudo-Adversarial-CoT"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, True, context, options, pseudo_adversarial_instructions)
    ####################################################




    ############ Adversarial-NA-fourth-option ##########
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, False, context, options, adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option-CoT"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, True, context, options, adversarial_instructions)
    ####################################################



    ############ Adversarial-NA-fourth-option-Ablation1 ##########
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option-Ablation1"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option-Ablation1-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    ####################################################




    ################### Answerability ##################
    options = [option for option in elem["output"][:3]]
    prompt_elem["Answerability"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, False, context, options, answerability_instructions)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Answerability-CoT"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, True, context, options, answerability_instructions)
    ####################################################



    prompt_list_few_shot_with_instructions.append(prompt_elem)

### Generate Instructions (Few-Shot with Instructions) - Adversarial - Version 2

In [None]:
def get_example(example, is_CoT):
    generated_example = f"""{example["input"]} 
Options:
 (A) {example["option A"]}
 (B) {example["option B"]}
 (C) {example["option C"]}"""

    if 'option D' in example.keys():
        generated_example = f"""{generated_example}
 (D) {example["option D"]}"""
        
    if is_CoT:
        if not example["Answer"] in ["(A)", "(B)", "(C)", "(D)"]:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, {example["Answer"]}."""
        else:
            generated_example = f"""{generated_example}
Output: {example["CoT"]} Therefore, the answer is {example["Answer"]}."""
    
    else:
        generated_example = f"""{generated_example}
Output: {example["Answer"]}"""
    return generated_example

In [None]:
def get_examples_dict_multirc_few_shot_with_instructions_v2():
    pos_example_1 = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'Answer': '(A)',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}
    
    pos_example_2 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '19.',
                     'option C': '1961.',
                     'Answer': '(B)',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19.'}


    neg_example_1 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'Answer': 'unanswerable',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    neg_example_2 = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'option D': 'None of the above.',
                     'Answer': '(D)',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    neg_example_3 = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'option D': 'None of the above.',
                     'Answer': '(A)',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}

    pos_example_1_answerability = {'input': "Paragraph: It was hot that day. The temperature on the wall of the backyard was showing something well over 100 F. Meanwhile Tom, at home, was trying finish the remainder of carrots from last night, and packing for his trip to Chicago tomorrow. As employees of the Art Museum, Tom and his older cousin often had to travel to Chicago. \nQuestion: What was the temperature outside, when Tom was eating carrots?",
                     'option A': 'Well over 100 F.',
                     'option B': 'Not very hot.',
                     'option C': 'Far below 100 F.',
                     'Answer': 'answerable',
                     'CoT':'The second sentence says that "The temperature on the wall of the backyard was showing something well over 100 F". Then, it says that at the same time, Tom was "trying to finish the remainder of carrots".'}
    
    
    neg_example_1_answerability = {'input': "Paragraph: Obama was born on August 4, 1961, at Kapiʻolani Maternity & Gynecological Hospital in Honolulu, Hawaii. He is the only President to have been born in Hawaii. He was born to a white mother and a black father. His mother, Ann Dunham (1942-1995), was born in Wichita, Kansas, of mostly English descent, with some German, Irish, Scottish, Swiss, and Welsh ancestry. \nQuestion: How old was Obama's mother when he was born?",
                     'option A': '4.',
                     'option B': '29.',
                     'option C': '1961.',
                     'Answer': 'unanswerable',
                     'CoT':'Obama\'s mother was born 1942. Obama was born in 1961. 1961 - 1942 = 19. So, the correct answer is 19.'}

    return pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability

In [None]:
def get_full_adversarial_prompt(example_1, example_2, is_CoT, context, options, instructions):
    option_names = ["(A)", "(B)", "(C)", "(D)", "(E)", "(F)", "(G)"]

    full_prompt = f"""{instructions}
    
                    Example 1:
                    {get_example(example_1, is_CoT)}

                    Example 2: 
                    {get_example(example_2, is_CoT)}
                    
                    Now your turn:
                    {context}
                    Options:"""

    for i,option in enumerate(options):
        full_prompt = f"""{full_prompt}
                        {option_names[i]} {option}"""
    
    full_prompt = f"{full_prompt}\n Output:"
    full_prompt = re.sub(' +', ' ', full_prompt) # replace consecutive spaces with a single space
    full_prompt = full_prompt.replace("\nQuestion:", "\n Question:").replace("\nOptions:", "\n Options:").replace("\nOutput:", "\n Output:")
    return full_prompt

In [None]:
prompt_list_few_shot_with_instructions_v2 = list()

pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_examples_dict_multirc_few_shot_with_instructions_v2()

for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]
     
    context_end = elem["input"].find("Correct Answer:")
    context = elem["input"][:context_end] if context_end != -1 else elem["input"] # if there is the right answer in the input - remove it
    context = re.sub(r"Sent \d+:", "", context) # make all sentences into a single paragraph
    context = re.sub(r"sent \d+:", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
    context = context.replace("Paragraph-", "Paragraph:").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
    context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space
    context = context.replace("Question:", "\nQuestion:") # move the Question to a new line

    context = context.replace("?.", "?") # the paragraph and question appear together, followed by a "." --> so remove this redundant dot.

    adversarial_instructions = "Given the following paragraph, question, and several options, which of the options answers the question?"
    pseudo_adversarial_instructions = f'{adversarial_instructions} \nIf none of the options fit, answer "unanswerable".'
    answerability_instructions = 'Determine whether the following context, question and three options are answerable. Reply only "answerable" or "unanswerable":'


    ############## Adversarial-7-options ##############
    options = [option for option in elem["output"][:7]]
    prompt_elem["Adversarial-7-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:7]]
    prompt_elem["Adversarial-7-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    ####################################################



    ############## Adversarial-6-options ############
    options = [option for option in elem["output"][:6]]
    prompt_elem["Adversarial-6-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:6]]
    prompt_elem["Adversarial-6-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    #################################################



    ############## Adversarial-5-options ##############
    options = [option for option in elem["output"][:5]]
    prompt_elem["Adversarial-5-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:5]]
    prompt_elem["Adversarial-5-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    ####################################################



    ############## Adversarial-4-options ##############
    options = [option for option in elem["output"][:4]]
    prompt_elem["Adversarial-4-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:4]]
    prompt_elem["Adversarial-4-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    ####################################################



    ############## Adversarial-3-options ##############
    options = [option for option in elem["output"][:3]]
    prompt_elem["Adversarial-3-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Adversarial-3-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    ####################################################



    ############## Adversarial-2-options ##############
    options = [option for option in elem["output"][:2]]
    prompt_elem["Adversarial-2-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:2]]
    prompt_elem["Adversarial-2-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    ####################################################



    ################# Pseudo-Adversarial ###############
    options = [option for option in elem["output"][:3]]
    prompt_elem["Pseudo-Adversarial"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, False, context, options, pseudo_adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Pseudo-Adversarial-CoT"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, True, context, options, pseudo_adversarial_instructions)
    ####################################################




    ############ Adversarial-NA-fourth-option ##########
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, False, context, options, adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option-CoT"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, True, context, options, adversarial_instructions)
    ####################################################



    ############ Adversarial-NA-fourth-option-Ablation1 ##########
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option-Ablation1"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
    # CoT 
    options = [option for option in elem["output"][:3]]
    options.append("None of the above.")
    prompt_elem["Adversarial-NA-fourth-option-Ablation1-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
    ####################################################




    ################### Answerability ##################
    options = [option for option in elem["output"][:3]]
    prompt_elem["Answerability"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, False, context, options, answerability_instructions)
    # CoT 
    options = [option for option in elem["output"][:3]]
    prompt_elem["Answerability-CoT"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, True, context, options, answerability_instructions)
    ####################################################



    prompt_list_few_shot_with_instructions_v2.append(prompt_elem)

### Generate Instructions (Few-Shot with Instructions) - Control Group - Version 1

In [None]:
prompt_list_few_shot_with_instructions_control_group = list()

pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_examples_dict_multirc_few_shot_with_instructions()

for elem in data_control_group:

    # choose question with at least one correct answer and at least 3 incorrect answers
    potential_questions = [i for i,q in enumerate(elem['paragraph']['questions']) if len([answer for answer in q["answers"] if answer["isAnswer"]])>0 and len([answer for answer in q["answers"] if not answer["isAnswer"]])>2]

    # if empty, a.k.a there aren't questions with at least one correct answer and at least 3 incorrect answers
    if not potential_questions:
        continue

    for question_i in potential_questions:

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        context = f"Paragraph: {elem['paragraph']['text']}"
        context = re.sub(r"<b>Sent \d+: </b>", "", context) # make all sentences into a single paragraph
        context = re.sub(r"<b>sent \d+: </b>", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
        context = context.replace("<br>", "").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
        context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space

        context = f"{context}\nQuestion: {elem['paragraph']['questions'][question_i]['question']}"

        correct_answer = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if answer["isAnswer"]][0]
        incorrect_answers = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if not answer["isAnswer"]]

        adversarial_instructions = "Given the following paragraph, question, and several options, which of the options answers the question?"
        pseudo_adversarial_instructions = f'{adversarial_instructions} \nIf none of the options fit, answer "N/A".'
        answerability_instructions = 'Based on the paragraph, does any of the following options answer the question? \nAnswer with a single word - "yes" or "no".'



        # ############## Adversarial-7-options ##############
        # options = incorrect_answers[:6]
        # options.append(correct_answer)
        # prompt_elem["Adversarial-7-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # # CoT 
        # options = incorrect_answers[:6]
        # options.append(correct_answer)
        # prompt_elem["Adversarial-7-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        # ####################################################


        # ############## Adversarial-6-options ###############
        # options = incorrect_answers[:5]
        # options.append(correct_answer)
        # prompt_elem["Adversarial-6-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # # CoT 
        # options = incorrect_answers[:5]
        # options.append(correct_answer)
        # prompt_elem["Adversarial-6-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        # ####################################################


        # ############## Adversarial-5-options ##############
        # options = incorrect_answers[:4]
        # options.append(correct_answer)
        # prompt_elem["Adversarial-5-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # # CoT 
        # options = incorrect_answers[:4]
        # options.append(correct_answer)
        # prompt_elem["Adversarial-5-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        # ####################################################


        # ############## Adversarial-4-options ###############
        # options = incorrect_answers[:3]
        # options.append(correct_answer)
        # prompt_elem["Adversarial-4-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # # CoT 
        # options = incorrect_answers[:3]
        # options.append(correct_answer)
        # prompt_elem["Adversarial-4-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        # #####################################################


        ############## Adversarial-3-options ################
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Adversarial-3-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Adversarial-3-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        ######################################################


        # ############## Adversarial-2-options #################
        # options = incorrect_answers[:1]
        # options.append(correct_answer)
        # prompt_elem["Adversarial-2-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # # CoT 
        # options = incorrect_answers[:1]
        # options.append(correct_answer)
        # prompt_elem["Adversarial-2-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        # ######################################################


        ################ Pseudo-Adversarial ##################
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Pseudo-Adversarial"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, False, context, options, pseudo_adversarial_instructions)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Pseudo-Adversarial-CoT"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, True, context, options, pseudo_adversarial_instructions)
        ######################################################


        ############ Adversarial-NA-fourth-option ############
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, False, context, options, adversarial_instructions)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option-CoT"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, True, context, options, adversarial_instructions)
        ######################################################



        ############ Adversarial-NA-fourth-option-Ablation1 ############
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option-Ablation1"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option-Ablation1-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        ######################################################




        #################### Answerability ###################
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Answerability"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, False, context, options, answerability_instructions)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Answerability-CoT"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, True, context, options, answerability_instructions)
        ######################################################


        prompt_list_few_shot_with_instructions_control_group.append(prompt_elem)

### Generate Instructions (Few-Shot with Instructions) - Control Group - Version 2

In [None]:
prompt_list_few_shot_with_instructions_control_group_v2 = list()

pos_example_1, pos_example_2, neg_example_1, neg_example_2, neg_example_3, pos_example_1_answerability, neg_example_1_answerability = get_examples_dict_multirc_few_shot_with_instructions_v2()

for elem in data_control_group:

    # choose question with at least one correct answer and at least 3 incorrect answers
    potential_questions = [i for i,q in enumerate(elem['paragraph']['questions']) if len([answer for answer in q["answers"] if answer["isAnswer"]])>0 and len([answer for answer in q["answers"] if not answer["isAnswer"]])>2]

    # if empty, a.k.a there aren't questions with at least one correct answer and at least 3 incorrect answers
    if not potential_questions:
        continue

    for question_i in potential_questions:

        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        context = f"Paragraph: {elem['paragraph']['text']}"
        context = re.sub(r"<b>Sent \d+: </b>", "", context) # make all sentences into a single paragraph
        context = re.sub(r"<b>sent \d+: </b>", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
        context = context.replace("<br>", "").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
        context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space

        context = f"{context}\nQuestion: {elem['paragraph']['questions'][question_i]['question']}"

        correct_answer = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if answer["isAnswer"]][0]
        incorrect_answers = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if not answer["isAnswer"]]

        adversarial_instructions = "Given the following paragraph, question, and several options, which of the options answers the question?"
        pseudo_adversarial_instructions = f'{adversarial_instructions} \nIf none of the options fit, answer "unanswerable".'
        answerability_instructions = 'Determine whether the following context, question and three options are answerable. Reply only "answerable" or "unanswerable":'



        ############## Adversarial-7-options ##############
        options = incorrect_answers[:6]
        options.append(correct_answer)
        prompt_elem["Adversarial-7-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # CoT 
        options = incorrect_answers[:6]
        options.append(correct_answer)
        prompt_elem["Adversarial-7-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        ####################################################


        ############## Adversarial-6-options ###############
        options = incorrect_answers[:5]
        options.append(correct_answer)
        prompt_elem["Adversarial-6-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # CoT 
        options = incorrect_answers[:5]
        options.append(correct_answer)
        prompt_elem["Adversarial-6-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        ####################################################


        ############## Adversarial-5-options ##############
        options = incorrect_answers[:4]
        options.append(correct_answer)
        prompt_elem["Adversarial-5-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # CoT 
        options = incorrect_answers[:4]
        options.append(correct_answer)
        prompt_elem["Adversarial-5-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        ####################################################


        ############## Adversarial-4-options ###############
        options = incorrect_answers[:3]
        options.append(correct_answer)
        prompt_elem["Adversarial-4-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # CoT 
        options = incorrect_answers[:3]
        options.append(correct_answer)
        prompt_elem["Adversarial-4-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        #####################################################


        ############## Adversarial-3-options ################
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Adversarial-3-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Adversarial-3-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        ######################################################


        ############## Adversarial-2-options #################
        options = incorrect_answers[:1]
        options.append(correct_answer)
        prompt_elem["Adversarial-2-options"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # CoT 
        options = incorrect_answers[:1]
        options.append(correct_answer)
        prompt_elem["Adversarial-2-options-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        ######################################################


        ################ Pseudo-Adversarial ##################
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Pseudo-Adversarial"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, False, context, options, pseudo_adversarial_instructions)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Pseudo-Adversarial-CoT"] = get_full_adversarial_prompt(pos_example_1, neg_example_1, True, context, options, pseudo_adversarial_instructions)
        ######################################################


        ############ Adversarial-NA-fourth-option ############
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, False, context, options, adversarial_instructions)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option-CoT"] = get_full_adversarial_prompt(neg_example_2, neg_example_3, True, context, options, adversarial_instructions)
        ######################################################



        ############ Adversarial-NA-fourth-option-Ablation1 ############
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option-Ablation1"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, False, context, options, adversarial_instructions)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        options.append("None of the above.")
        prompt_elem["Adversarial-NA-fourth-option-Ablation1-CoT"] = get_full_adversarial_prompt(pos_example_1, pos_example_2, True, context, options, adversarial_instructions)
        ######################################################




        #################### Answerability ###################
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Answerability"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, False, context, options, answerability_instructions)
        # CoT 
        options = incorrect_answers[:2]
        options.append(correct_answer)
        prompt_elem["Answerability-CoT"] = get_full_adversarial_prompt(pos_example_1_answerability, neg_example_1_answerability, True, context, options, answerability_instructions)
        ######################################################


        prompt_list_few_shot_with_instructions_control_group_v2.append(prompt_elem)

### save to json

In [None]:
if not os.path.exists(outdir):
   os.makedirs(outdir)
# zero shot
with open(os.path.join(outdir, "chatGPT", "zero_shot", "multirc_incorrect_answers_adversarial.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_zero_shot, indent=2))

with open(os.path.join(outdir, "chatGPT", "zero_shot", "multirc_incorrect_answers_control_group.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_zero_shot_control_group, indent=2))

# few shot
with open(os.path.join(outdir, "chatGPT", "few_shot", "multirc_incorrect_answers_adversarial.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_few_shot, indent=2))

with open(os.path.join(outdir, "chatGPT", "few_shot", "multirc_incorrect_answers_control_group.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_few_shot_control_group, indent=2))

# few shot with instructions
with open(os.path.join(outdir, "chatGPT", "few_shot_with_instructions", "multirc_incorrect_answers_adversarial.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_few_shot_with_instructions, indent=2))

with open(os.path.join(outdir, "chatGPT", "few_shot_with_instructions", "multirc_incorrect_answers_control_group.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_few_shot_with_instructions_control_group, indent=2))

## Tk-Instruction

### Generate Instructions (Zero-Shot) - Adversarial

In [None]:
prompt_list = list()
for elem in filtered_data:

    prompt_elem = dict()

    prompt_elem["id"] = elem["id"]

    context_end = elem["input"].find("Correct Answer:")
    context = elem["input"][:context_end] if context_end != -1 else elem["input"] # if there is the right answer in the input - remove it.
    context = re.sub(r"Sent \d+:", "", context) # make all sentences into a single paragraph
    context = re.sub(r"sent \d+:", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
    context = context.replace("Paragraph-", "Paragraph:").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
    context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space
    context = context.replace("Question:", "\nQuestion:") # move the Question to a new line
    context = context.replace("?.", "?") # the paragraph and question appear together, followed by a "." --> so remove this redundant dot.


    prompt_elem["Adversarial-7-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) {elem["output"][3]}
    (E) {elem["output"][4]}
    (F) {elem["output"][5]}
    (G) {elem["output"][6]}""" 
    
    prompt_elem["Adversarial-6-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) {elem["output"][3]}
    (E) {elem["output"][4]}
    (F) {elem["output"][5]}""" 
    
    prompt_elem["Adversarial-5-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) {elem["output"][3]}
    (E) {elem["output"][4]}""" 
    
    prompt_elem["Adversarial-4-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) {elem["output"][3]}""" 
    
    prompt_elem["Adversarial-3-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}""" 

    prompt_elem["Adversarial-2-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}""" 

    prompt_elem["Pseudo-Adversarial"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?
If none of the options fit, answer \"N/A\".\n
{context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}"""  
    
    prompt_elem["Adversarial-NA-fourth-option"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}
    (D) None of the above.""" 

    prompt_elem["Answerability"] = f"""Definition:
Given the following paragraph, question, and several options, does any of the options answer the question? 
Answer with a single word - \"yes\" or \"no\":\n
{context}
Options:
    (A) {elem["output"][0]}
    (B) {elem["output"][1]}
    (C) {elem["output"][2]}""" 

    prompt_list.append(prompt_elem)

### Generate Instructions (Zero-Shot) - Control Group

In [None]:
prompt_list_control_group = list()
for elem in data_control_group:
    # choose question with at least one correct answer and at least 7 incorrect answers
    potential_questions = [i for i,q in enumerate(elem['paragraph']['questions']) if len([answer for answer in q["answers"] if answer["isAnswer"]])>0 and len([answer for answer in q["answers"] if not answer["isAnswer"]])>7]

    # if empty, a.k.a there aren't questions with at least one correct answer and at least 7 incorrect answers
    if not potential_questions:
        continue

    for question_i in potential_questions:

        
        prompt_elem = dict()

        prompt_elem["id"] = elem["id"]

        context = f"Paragraph: {elem['paragraph']['text']}"
        context = re.sub(r"<b>Sent \d+: </b>", "", context) # make all sentences into a single paragraph
        context = re.sub(r"<b>sent \d+: </b>", "", context) # make all sentences into a single paragraph (if "sent i:" starts with a non-capital "S")
        context = context.replace("<br>", "").replace("\n", " ") # replace "Paragraph-" with "Paragraph:" and remove all new lines
        context = re.sub(' +', ' ', context) # replace consecutive spaces with a single space

        context = f"{context}\nQuestion: {elem['paragraph']['questions'][question_i]['question']}"
        

        correct_answer = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if answer["isAnswer"]][0]
        incorrect_answers = [answer["text"] for answer in elem['paragraph']['questions'][question_i]["answers"] if not answer["isAnswer"]]



        prompt_elem["Adversarial-7-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {incorrect_answers[0]}
    (B) {incorrect_answers[1]}
    (C) {incorrect_answers[2]}
    (D) {incorrect_answers[3]}
    (E) {incorrect_answers[4]}
    (F) {incorrect_answers[5]}
    (G) {correct_answer}""" 
        
        prompt_elem["Adversarial-6-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {incorrect_answers[0]}
    (B) {incorrect_answers[1]}
    (C) {incorrect_answers[2]}
    (D) {incorrect_answers[3]}
    (E) {incorrect_answers[4]}
    (F) {correct_answer}""" 
        

        prompt_elem["Adversarial-5-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {incorrect_answers[0]}
    (B) {incorrect_answers[1]}
    (C) {incorrect_answers[2]}
    (D) {incorrect_answers[3]}
    (E) {correct_answer}""" 
        
        prompt_elem["Adversarial-4-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {incorrect_answers[0]}
    (B) {incorrect_answers[1]}
    (C) {incorrect_answers[2]}
    (D) {correct_answer}""" 
        
        prompt_elem["Adversarial-3-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {incorrect_answers[0]}
    (B) {incorrect_answers[1]}
    (C) {correct_answer}""" 

        prompt_elem["Adversarial-2-options"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {incorrect_answers[0]}
    (B) {correct_answer}""" 

        prompt_elem["Pseudo-Adversarial"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?
If none of the options fit, answer \"N/A\".\n
{context}
Options:
    (A) {incorrect_answers[0]}
    (B) {incorrect_answers[1]}
    (C) {correct_answer}"""  
        
        prompt_elem["Adversarial-NA-fourth-option"] = f"""Definition:
Given the following paragraph, question, and several options, which of the options answers the question?\n
{context}
Options:
    (A) {incorrect_answers[0]}
    (B) {incorrect_answers[1]}
    (C) {correct_answer}
    (D) None of the above.""" 

        prompt_elem["Answerability"] = f"""Definition:
Given the following paragraph, question, and several options, does any of the options answer the question? 
Answer with a single word - \"yes\" or \"no\":\n
{context}
Options:
    (A) {incorrect_answers[0]}
    (B) {incorrect_answers[1]}
    (C) {correct_answer}""" 

        prompt_list_control_group.append(prompt_elem)

### save to json

In [None]:
if not os.path.exists(outdir):
   os.makedirs(outdir)

with open(os.path.join(outdir, "tk-instruct", "zero_shot", "multirc_incorrect_answers_adversarial.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list, indent=2))

with open(os.path.join(outdir, "tk-instruct", "zero_shot", "multirc_incorrect_answers_control_group.json"), 'w') as f1:
    f1.write(json.dumps(prompt_list_control_group, indent=2))

##### Delete

In [17]:
import filecmp


In [18]:
indir1 = "/home/nlp/sloboda1/projects/unanswerable_adversarial/generated_text_debugging"
indir2 = "/home/nlp/sloboda1/projects/unanswerable_adversarial/generated_text_debugging1"

In [19]:
for subdir, dirs, files in os.walk(indir1):
    for file in files:
        file1 = os.path.join(subdir, file)
        file2 = file1.replace(indir1, indir2)
        comparison = filecmp.cmp(file1, file2)
        if not comparison:
            print(f"not equal: {file1}")