In [None]:
# For licensing see accompanying LICENSE file.
# Copyright (C) 2025 Apple Inc. All Rights Reserved.
#
# Some of the code in this file is adapted from:
# https://github.com/google-deepmind/long-form-factuality
# Copyright (c) 2023 Sewon Min.
# Licensed under the MIT License.
#
# prompts from SAFE paper

from ageval.datasets.longfact import load_original_row_from_hashed_prompt
from longfact_hashed_prompts import hashed_prompts
import pathlib

all_rows = load_original_row_from_hashed_prompt(
    hashed_prompts=hashed_prompts,
    path=pathlib.Path(
        "../../third_party/long_form_factuality/longfact/longfact-objects_gpt4_01-12-2024_noduplicates"
    ),
)
longfact_object_prompts = [row["prompt"] for row in all_rows]

# quick sanity checks
assert len(set(longfact_object_prompts)) == len(longfact_object_prompts), (
    "Not all prompts are unique"
)  # all prompts should be unique
assert (
    len(set(longfact_object_prompts)) == 250
)  # reported number of prompts in SAFE paper

[32m2025-06-24 13:54:54.884[0m | [34m[1mDEBUG   [0m | [36mageval.datasets.longfact[0m:[36m_load_longfact_data[0m:[36m31[0m - [34m[1mLoaded 38 topics with prompts from longfact.[0m


In [None]:
from ageval.models import get_model
from ageval.utils import setup_api_keys

setup_api_keys("../../ageval_secrets.toml")

model = get_model(model="openai/gpt-4o-mini-2024-07-18", max_tokens="2048", temp=0.1)

In [None]:
# getting the pre-existing prompts
# which have already been used to generate data
prompt_path = pathlib.Path("../../data/generated/longfact/individual_generations_v7_100/prompts")

def load_txt(path: str | pathlib.Path) -> None:
    with open(path, "r") as text_file:
        return text_file.read()

pre_existing_prompts = []
for file in prompt_path.glob("*"):
    prompt = load_txt(file)
    pre_existing_prompts.append(prompt)

filtered_longfact_prompts = [prompt for prompt in longfact_object_prompts if prompt not in pre_existing_prompts]

assert len(filtered_longfact_prompts) == len(longfact_object_prompts) - len(pre_existing_prompts)


In [None]:
import random
import pathlib

NUM_GENERATIONS = 20
random.seed(42)
rand_longfact_prompts = random.sample(filtered_longfact_prompts, len(filtered_longfact_prompts))

def create_responses(start_idx, end_idx, prompts, num_sentences=8, save_path = "../../data/generated/longfact/individual_generations"):

    save_path = pathlib.Path(save_path)
    assert save_path.exists()


    generations = []
    postamble_base = "Provide as many specific details and examples as possible (such as names of people, numbers, events, locations, dates, times, etc.)." # from p.17 of SAFE paper
    postamble_sent = f" Respond in {num_sentences} sentences." # as described in appendix D.4 of SAFE paper
    


    def save_to_txt(text: str, path: str | pathlib.Path) -> None:
        with open(path, "w") as text_file:
            text_file.write(text)

    for i in range(start_idx, end_idx):
        prompt = prompts[i]
        full_prompt = f"{prompt} {postamble_base} {postamble_sent}"
        gen = {
            "prompt": prompt,
            "generation": model.invoke(full_prompt),
            "generation2": model.invoke(full_prompt),
        }

        save_to_txt(gen["prompt"], save_path / f"prompts/n{i}.txt")
        save_to_txt(gen["generation"], save_path / f"generations/n{i}.txt")
        save_to_txt(gen["generation2"], save_path / f"alt_generations/n{i}.txt")
        save_to_txt(gen["generation"], save_path / f"adapted_generations/n{i}.txt")

        generations.append(gen)

In [None]:
len(rand_longfact_prompts)

In [None]:
# create_responses(start_idx=0,end_idx=20,num_sentences=8, prompts=rand_longfact_prompts)
# create_responses(start_idx=20,end_idx=40,num_sentences=5, prompts=rand_longfact_prompts)
# create_responses(start_idx=40,end_idx=250,num_sentences=5, prompts=rand_longfact_prompts)

In [1]:
# save annotated data to csv

import random
import pathlib
import pandas as pd

def load_txt(path: str | pathlib.Path) -> None:
    with open(path, "r") as text_file:
        return text_file.read()
     
random.seed(5)
preferred_idxs = random.choices([0,1], k=10000)

save_path = pathlib.Path("../../data/generated/longfact/individual_generations_v8_fixflips")
rows = []

for i in range(100):

    alt_text = load_txt(save_path / f"alt_generations/n{i}.txt") # alternative text
    adapted_text = load_txt(save_path / f"adapted_generations/n{i}.txt") # adapted text


    if preferred_idxs[i] == 0:
        text_a = alt_text
        text_b = adapted_text
        preferred_text = "text_a"
    elif preferred_idxs[i] == 1:
        text_a = adapted_text
        text_b = alt_text
        preferred_text = "text_b"

    original = load_txt(save_path / f"generations/n{i}.txt")
    prompt = load_txt(save_path / f"prompts/n{i}.txt")
    alt_text_original = load_txt(save_path / f"alt_generations_original/n{i}.txt")

    rows.append(
        dict(
            prompt = prompt,
            text_a = text_a,
            text_b = text_b,
            original_text = original,
            alt_text_original = alt_text_original,
            preferred_text = preferred_text,
        )
    )
pd.DataFrame(rows).to_csv(save_path / "pref_data.csv", index=True, index_label="index")

In [2]:
preferred_idxs[:100].count(0)

50

In [3]:
# generate pairwise data for POEM

DATA_PATH = str(save_path / "pref_data.csv")

In [4]:
df = pd.read_csv(DATA_PATH)

json_dicts = []

# to skip already annotated rows
start_index = 40

for i, row in df.iterrows():
    if i < start_index:
        continue
    pref_text = row[row["preferred_text"]]
    nonpref_text_col = "text_a" if row["preferred_text"] == "text_b" else "text_b"
    nonpref_text = row[nonpref_text_col]

    assert row["preferred_text"] != nonpref_text_col, f"{row['preferred_text']} == {nonpref_text_col}"
    assert nonpref_text != pref_text, f"{nonpref_text} == {pref_text}"


    json_dicts.append(
        {
            "prompt_id": f"longfact-pairwise-20240813-{i:07}",
            "messages":[
                {
                    "role": "user",
                    "content": row["prompt"],
                }
            ],
            "enrichments":
            {
                "task_type": "Open Q&A",
                "topic": "other_v1.1",
                "task_type_prob": 1,
                "topic_prob": 1,
                "meta":{"longfact-preferred": "text_1"},
            },
            "responses":
            {
                "text_1":
                {
                    "response": pref_text,
                    "metadata": {},
                },
                "text_2":
                {
                    "response": nonpref_text,
                    "metadata": {},
                },
            }
            }

    )


In [5]:
import json

with open(save_path / "longfact-40to99-20240813.jsonl", "w") as f:
    json.dump(json_dicts, f, indent=4)