In [6]:
import pandas as pd
from datasets import load_dataset
import re
import numpy as np
from tqdm import tqdm


ds = load_dataset("lukaemon/bbh", "snarks")
data = ds['test'].to_pandas()

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Generating test split: 100%|██████████| 178/178 [00:00<00:00, 58795.57 examples/s]


In [11]:
def extract_question(text):
    """
    Extract the question part between the prompt and options.
    Assumes the question starts after the first newline and ends before "Options:"
    """
    # Split by newlines and remove the first line (the prompt)
    parts = text.split('\n', 1)
    if len(parts) < 2:
        return None
    
    # Take everything after the first line and before "Options:"
    question_part = parts[1].split('Options:', 1)[0]
    
    # Clean up any extra whitespace
    question_part = question_part.strip()
    
    return question_part

In [12]:
data['question'] = data['input'].apply(extract_question)

In [133]:
templates = pd.read_csv('../Multi-Prompt-LLM-Evaluation/data/automatic paraphrases/BBH/causal_judgement.csv')

In [45]:
def standardize_template(template):
    """
    Ensure 'words' and 'category' are surrounded by curly brackets in the template.
    Handles cases where they might already be bracketed or not.
    """
    # First remove any existing brackets around words/category
    # This prevents double bracketing
    template = re.sub(r'\{question\}', 'question', template)
    
    # Then add brackets around standalone 'words' and 'category'
    # Using word boundaries (\b) to ensure we match whole words only
    template = re.sub(r'\bquestion\b', '{question}', template)
    
    return template

templates['prompt template'] = templates['prompt template'].apply(standardize_template)

In [149]:
data['question_length'] = data['question'].apply(len)
data = data[data['question_length'] < 1100]
data = data.drop(columns=['question_length'])

In [153]:
rng = np.random.RandomState(42)

df = pd.DataFrame(columns=['question_id', 'perturbation_id', 'prompt', 'correct_answer'])

for idx, row in tqdm(data.iterrows(), total=len(data)):
    templates_buffer = templates.sample(frac=1, random_state=rng)
    for index, row1 in templates_buffer.iterrows():
        template = row1['prompt template']
        formatted = template.format(question=row['question'])

        df.loc[len(df)] = [idx, index, formatted, row['target']]

100%|██████████| 146/146 [00:14<00:00, 10.16it/s]


In [154]:
df['prompt'] = df['prompt'].apply(lambda x: x.strip())

In [155]:
df.to_csv('data/causal_judgement_perturbations.csv', index=False)

In [156]:
data = data.drop(columns=['question'])
data = data.rename(columns={'input': 'prompt', 'target': 'correct_answer'})
data['question_id'] = data.index
data = data[['question_id', 'prompt', 'correct_answer']]
data.to_csv('data/causal_judgement_original.csv', index=False)

In [158]:
filtered_df = df.groupby('question_id').head(20)
filtered_df.to_csv('data/causal_judgement_20.csv', index=False)

## Movie Recommendation

In [159]:
ds = load_dataset("lukaemon/bbh", "movie_recommendation")
data = ds['test'].to_pandas()

In [162]:
templates = pd.read_csv('../Multi-Prompt-LLM-Evaluation/data/automatic paraphrases/BBH/movie_recommendation.csv')

In [163]:
for idx, row in templates.iterrows():
    if '{options}' not in row['prompt template']:
        print(row['prompt template'])


In [103]:
def parse_template_detailed(text):
    """
    Extract movie list and options from the template.
    Returns tuple of (movies_list, options_list) where movies_list is split into individual movies
    """
    # Extract movie list
    movie_match = re.search(r'Find a movie similar to (.*?):\n', text)
    movie_list = movie_match.group(1) if movie_match else ""
    
    # Split movie list into individual movies and clean up whitespace
    movies = [movie.strip() for movie in movie_list.split(',')]
    
    # Extract options
    options = re.findall(r'\([A-D]\) (.*?)(?=\n|$)', text)
    
    return movies, options

In [85]:
data['movies'] = data['input'].apply(lambda x: parse_template_detailed(x)[0])
data['options'] = data['input'].apply(lambda x: parse_template_detailed(x)[1])

In [121]:
def shuffle_with_tracking(options, original_index):
    """
    Shuffle options while tracking where the item at original_index ends up
    
    Args:
        options: List of options
        original_index: The index to track through the shuffle
    
    Returns:
        shuffled_options: List of shuffled options
        new_index: New index of the tracked item after shuffling
    """
    # Get permutation indices
    rng = np.random.default_rng()
    perm = rng.permutation(len(options))
    
    # Shuffle the options
    shuffled_options = [options[i] for i in perm]
    
    # Find where the original index ended up
    new_index = np.where(perm == original_index)[0][0]
    
    return shuffled_options, new_index

In [100]:
print(formatted)

Q: Can you recommend a movie that is similar to The Shawshank Redemption, Terminator 2 Judgment Day, Schindler's List, The Lion King?
Please choose from the following (A) The Red Turtle
(B) Tarzan
(C) Aladdin
(D) Nothing But Trouble:
(A) The Red Turtle
(B) Tarzan
(C) Aladdin
(D) Nothing But Trouble


In [123]:
rng = np.random.RandomState(42)

df = pd.DataFrame(columns=['question_id', 'perturbation_id', 'prompt', 'correct_answer'])
target_indices = {'(A)': 0, '(B)': 1, '(C)': 2, '(D)': 3}
inv_target_indices = {0: '(A)', 1: '(B)', 2: '(C)', 3: '(D)'}
for idx, row in tqdm(data.iterrows(), total=len(data)):
    if row['target'] not in target_indices.keys():
        continue
    target_index = target_indices[row['target']]

    templates_buffer = templates.sample(frac=1, random_state=rng)
    for index, row1 in templates_buffer.iterrows():

        template = row1['prompt template']
        movie_list = list(rng.permutation(row['movies']))
        # print(row['target'], row['options'])
        options, new_index = shuffle_with_tracking(row['options'], target_index)

        # print(new_index, options)
        movie_list_str = ', '.join(movie_list)

        options_str = '\n'.join([f'({chr(65 + i)}) {option}' for i, option in enumerate(options)])
        formatted = template.format(movie_list=movie_list_str, options=options_str)

        df.loc[len(df)] = [idx, index, formatted, inv_target_indices[new_index]]

100%|██████████| 250/250 [00:28<00:00,  8.71it/s]


In [166]:
df['prompt'] = df['prompt'].apply(lambda x: x.strip())
df.to_csv('data/movie_recommendation_perturbations.csv', index=False)

In [128]:
data = data.drop(columns=['movies', 'options'])
data = data.rename(columns={'input': 'prompt', 'target': 'correct_answer'})
data['question_id'] = data.index
data = data[['question_id', 'prompt', 'correct_answer']]
data.to_csv('data/movie_recommendation_original.csv', index=False)

In [169]:
filtered_df = df.groupby('question_id').head(20)
filtered_df.to_csv('data/movie_recommendation_20.csv', index=False)

## Snarks

In [14]:
import pandas as pd
from datasets import load_dataset
import re
import numpy as np
from tqdm import tqdm


ds = load_dataset("lukaemon/bbh", "snarks")
data = ds['test'].to_pandas()

In [25]:
def extract_question(text):
    """
    Extract the question part between the prompt and options.
    Assumes the question starts after the first newline and ends before "Options:"
    """
    # Split by newlines and remove the first line (the prompt)
    options = text.split('Options:')[1].strip()
    options = re.findall(r'\([A-D]\) (.*?)(?=\n|$)', options)
    
    return options

In [26]:
data['options'] = data['input'].apply(extract_question)

In [61]:
templates = pd.read_csv('raw_data/perturbations/snarks.csv')

In [69]:
templates = templates[~(templates['prompt template'].str.contains('{question}') | templates['prompt template'].str.contains('{prompt}'))]

In [58]:
def shuffle_with_tracking(options, original_index, rng):
    """
    Shuffle options while tracking where the item at original_index ends up
    
    Args:
        options: List of options
        original_index: The index to track through the shuffle
    
    Returns:
        shuffled_options: List of shuffled options
        new_index: New index of the tracked item after shuffling
    """
    # Get permutation indices
    perm = rng.permutation(len(options))
    
    # Shuffle the options
    shuffled_options = [options[i] for i in perm]
    
    # Find where the original index ended up
    new_index = np.where(perm == original_index)[0][0]
    return shuffled_options, new_index

In [70]:
rng = np.random.RandomState(42)

df = pd.DataFrame(columns=['question_id', 'perturbation_id', 'prompt', 'correct_answer'])
target_indices = {'(A)': 0, '(B)': 1}
inv_target_indices = {0: '(A)', 1: '(B)'}
for idx, row in tqdm(data.iterrows(), total=len(data)):
    if row['target'] not in target_indices.keys():
        continue
    target_index = target_indices[row['target']]

    templates_buffer = templates.sample(frac=1, random_state=rng)
    df_buffer = pd.DataFrame(columns=['question_id', 'perturbation_id', 'prompt', 'correct_answer'])
    for index, row1 in templates_buffer.iterrows():

        template = row1['prompt template']
        # print(row['target'], row['options'])
        options, new_index = shuffle_with_tracking(row['options'], target_index, rng)
        # print(options, new_index)

        options_str = '\n'.join([f'({chr(65 + i)}) {option}' for i, option in enumerate(options)])
        # print(template)
        formatted = template.format(options=options_str)
        # print(formatted)

        df_buffer.loc[len(df_buffer)] = [idx, index, formatted, inv_target_indices[new_index]]
    df = pd.concat([df, df_buffer], ignore_index=True)
# df['prompt'] = df['prompt'].apply(lambda x: x.strip())
# df.to_csv('data/snarks_perturbations.csv', index=False)
# data = data.drop(columns=['options'])
# data = data.rename(columns={'input': 'prompt', 'target': 'correct_answer'})

100%|██████████| 178/178 [00:46<00:00,  3.81it/s]


In [72]:
df['prompt'] = df['prompt'].apply(lambda x: x.strip())
df.to_csv('data/snarks/perturbations.csv', index=False)

In [77]:
data = data.drop(columns=['options'])
data = data.rename(columns={'input': 'prompt', 'target': 'correct_answer'})
data['question_id'] = data.index
data = data[['question_id', 'prompt', 'correct_answer']]
data.to_csv('data/snarks/original.csv', index=False)

In [68]:
data = pd.read_csv('data/snarks/original.csv')
print(data)

     question_id                                             prompt  \
0              0  Which statement is sarcastic?\nOptions:\n(A) H...   
1              1  Which statement is sarcastic?\nOptions:\n(A) H...   
2              2  Which statement is sarcastic?\nOptions:\n(A) J...   
3              3  Which statement is sarcastic?\nOptions:\n(A) Y...   
4              4  Which statement is sarcastic?\nOptions:\n(A) H...   
..           ...                                                ...   
173          173  Which statement is sarcastic?\nOptions:\n(A) I...   
174          174  Which statement is sarcastic?\nOptions:\n(A) Q...   
175          175  Which statement is sarcastic?\nOptions:\n(A) W...   
176          176  Which statement is sarcastic?\nOptions:\n(A) G...   
177          177  Which statement is sarcastic?\nOptions:\n(A) W...   

    correct_answer  
0              (B)  
1              (A)  
2              (A)  
3              (B)  
4              (A)  
..             ...  


In [79]:
filtered_df = df.groupby('question_id').head(20)
filtered_df.to_csv('data/snarks/20.csv', index=False)

## Formal Fallacies

In [22]:
import pandas as pd
from datasets import load_dataset
import re
import numpy as np
from tqdm import tqdm


ds = load_dataset("lukaemon/bbh", "formal_fallacies")
data = ds['test'].to_pandas()

In [30]:
data['question'] = data['input'].apply(lambda x: x.split('\"')[1].strip())
data['question'] = data['question'].apply(lambda x: f"\"{x}\"")
data = data[data['question'].str.len() < 600]

In [58]:
templates = pd.read_csv('raw_data/perturbations/formal_fallacies.csv')
templates = templates[templates['correct'] == 1]

In [60]:
rng = np.random.RandomState(42)

df = pd.DataFrame(columns=['question_id', 'perturbation_id', 'prompt', 'correct_answer'])

for idx, row in tqdm(data.iterrows(), total=len(data)):
    templates_buffer = templates.sample(frac=1, random_state=rng)
    df_buffer = pd.DataFrame(columns=['question_id', 'perturbation_id', 'prompt', 'correct_answer'])
    for index, row1 in templates_buffer.iterrows():
        template = row1['prompt template']
        formatted = template.format(input=row['question'])

        df_buffer.loc[len(df_buffer)] = [idx, index, formatted, row['target']]
    df = pd.concat([df, df_buffer], ignore_index=True)
df['prompt'] = df['prompt'].apply(lambda x: x.strip())

100%|██████████| 235/235 [00:53<00:00,  4.36it/s]


In [62]:
df['prompt'] = df['prompt'].apply(lambda x: x.strip())
df.to_csv('data/formal_fallacies/perturbations.csv', index=False)

In [64]:
data = data.drop(columns=['question', 'length'])
data = data.rename(columns={'input': 'prompt', 'target': 'correct_answer'})
data['question_id'] = data.index
data = data[['question_id', 'prompt', 'correct_answer']]
data.to_csv('data/formal_fallacies/original.csv', index=False)

In [66]:
filtered_df = df.groupby('question_id').head(20)
filtered_df.to_csv('data/formal_fallacies/20.csv', index=False)

In [9]:
## taking 5 perturbations
import pandas as pd
import numpy as np

df = pd.read_csv('data/movie_recommendation/20.csv')

In [12]:
df

Unnamed: 0,question_id,perturbation_id,prompt,correct_answer
0,0,19,Q: Can you recommend a movie similar to The Ma...,(C)
1,0,42,Q: Can you recommend a movie similar to the fo...,(B)
2,0,153,"Given a list of movies, recommend a movie that...",(A)
3,0,78,I need your help to find movies that are simil...,(A)
4,0,145,"Based on your movie preferences, we can recomm...",(A)
...,...,...,...,...
4815,249,95,I am looking for a movie that is similar to Te...,(A)
4816,249,157,Recommend a movie similar to a given list of m...,(A)
4817,249,178,Recommend a movie similar to a given list of m...,(B)
4818,249,39,Q: Can you recommend a movie that is similar t...,(D)


In [15]:
df.groupby('question_id').head(5).iloc[0]['prompt']

'Q: Can you recommend a movie similar to The Mask, Batman, The Fugitive, Pretty Woman?\nChoose from the following options:\n(A) Maelstrom\n(B) Lamerica\n(C) The Lion King\n(D) The Front Page\nA:'

In [18]:
df.groupby('question_id').head(5).to_csv('data/movie_recommendation/5.csv', index=False)

In [19]:
df

Unnamed: 0,question_id,perturbation_id,prompt,correct_answer
0,0,19,Q: Can you recommend a movie similar to The Ma...,(C)
1,0,42,Q: Can you recommend a movie similar to the fo...,(B)
2,0,153,"Given a list of movies, recommend a movie that...",(A)
3,0,78,I need your help to find movies that are simil...,(A)
4,0,145,"Based on your movie preferences, we can recomm...",(A)
...,...,...,...,...
4815,249,95,I am looking for a movie that is similar to Te...,(A)
4816,249,157,Recommend a movie similar to a given list of m...,(A)
4817,249,178,Recommend a movie similar to a given list of m...,(B)
4818,249,39,Q: Can you recommend a movie that is similar t...,(D)
