In [4]:
# correct_prompt_templates = [
#     lambda n1, n2: f'What is {n1} plus {n2}?',
#     lambda n1, n2: f'What is the sum of {n1} and {n2}?',
#     lambda n1, n2: f'{n1}+{n2}=',
#     lambda n1, n2: f'Combien font {n1} plus {n2}?',
#     lambda n1, n2: f'What is {n1} times {n2}',
#     lambda n1, n2: f'{n1}⊕{n2}=',
# ]

correct_prompt_templates = [
    lambda n1, n2: f'What is {n1} plus {n2}?',
    lambda n1, n2: f'{n1}+{n2}=',
]

prefix_prompt = 'Solve the following and respond with only the final answer:'

In [6]:
import random

number_examples_train = 6000
number_examples_test = 500

results_train = []
results_test = []

seen = set()

def make_key(prompt, corrupted, answer):
    return (prompt, corrupted, answer)

for j, template in enumerate(correct_prompt_templates):
    i = 0
    while i < number_examples_train:
        a = random.randint(10, 99)
        b = random.randint(10, 99)
        c = random.randint(10, 99)
        answer = a + b

        prompt = template(a, b)
        corrupted_prompt = template(a, c)
        full_prompt = prefix_prompt + ' ' + prompt
        full_corrupted = prefix_prompt + ' ' + corrupted_prompt

        key = make_key(full_prompt, full_corrupted, answer)
        if key in seen:
            continue
        seen.add(key)

        results_train.append({
            'id': j,
            'prompt': full_prompt,
            'answer': answer,
            'corrupted_prompt': full_corrupted
        })
        i += 1

for j, template in enumerate(correct_prompt_templates):
    i = 0
    while i < number_examples_test:
        a = random.randint(10, 99)
        b = random.randint(10, 99)
        c = random.randint(10, 99)
        answer = a + b

        prompt = template(a, b)
        corrupted_prompt = template(a, c)
        full_prompt = prefix_prompt + ' ' + prompt
        full_corrupted = prefix_prompt + ' ' + corrupted_prompt

        key = make_key(full_prompt, full_corrupted, answer)
        if key in seen:
            continue  # avoid duplicates across both sets
        seen.add(key)

        results_test.append({
            'id': j,
            'prompt': full_prompt,
            'answer': answer,
            'corrupted_prompt': full_corrupted
        })
        i += 1


In [7]:
unique_ids_set = set()
for item in results_train:
    if 'id' in item:
        unique_ids_set.add(item['id'])

In [8]:
import os
import csv

output_directory = 'datasets_csv/'

for id in unique_ids_set:
    data_train = [['clean', 'corrupted', 'answer']]
    for item in results_train:
        if id == item['id']:
            data_train.append([item['prompt'],item['corrupted_prompt'], item['answer']])

    data_test = [['clean', 'corrupted', 'answer']]
    for item in results_test:
        if id == item['id']:
            data_test.append([item['prompt'],item['corrupted_prompt'], item['answer']])
    
    
    subfolder = os.path.join(output_directory, f"prompts_id_{id}")
    os.makedirs(subfolder, exist_ok=True)

    filename_train = os.path.join(subfolder, "train.csv")
    filename_test = os.path.join(subfolder, "test.csv")

    with open(filename_train, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerows(data_train)

    with open(filename_test, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerows(data_test)

In [9]:
import random
import csv
from pathlib import Path

# Configuration
number_examples_train = 6000
number_examples_val = 500
output_dir = Path("datasets")
output_dir.mkdir(exist_ok=True)

# Store data and deduplication keys
train_data = []
val_data = []
seen = set()

def make_key(prompt, corrupted, answer):
    return (prompt, corrupted, answer)

def generate_dataset(n, split_list):
    count = 0
    while count < n:
        template = random.choice(correct_prompt_templates)
        a = random.randint(10, 99)
        b = random.randint(10, 99)
        c = random.randint(10, 99)
        answer = a + b

        clean = f"{prefix_prompt} {template(a, b)}"
        corrupted = f"{prefix_prompt} {template(a, c)}"

        key = make_key(clean, corrupted, answer)
        if key in seen:
            continue
        seen.add(key)

        split_list.append({
            'prompt': clean,
            'corrupted_prompt': corrupted,
            'answer': answer
        })
        count += 1

# Generate datasets
generate_dataset(number_examples_train, train_data)
generate_dataset(number_examples_val, val_data)

# Shuffle
random.shuffle(train_data)
random.shuffle(val_data)

# Save to CSVs
def save_csv(data, filepath):
    with open(filepath, "w", newline='') as f:
        writer = csv.DictWriter(f, fieldnames=['prompt', 'corrupted_prompt', 'answer'])
        writer.writeheader()
        writer.writerows(data)

save_csv(train_data, output_dir / "train.csv")
save_csv(val_data, output_dir / "validation.csv")

print(f"Saved {len(train_data)} training examples and {len(val_data)} validation examples to {output_dir}")


Saved 6000 training examples and 500 validation examples to datasets
