In [1]:
import pathlib
import json

results_path = pathlib.Path("/Users/arduin/main/repos/github/feedback-forensics/feedback-forensics-results-paper")
sanitize_path = pathlib.Path("/Users/arduin/main/repos/github/feedback-forensics/feedback-forensics-annotations")

# ensure sanitize path exists
sanitize_path.mkdir(parents=True, exist_ok=True)

# get all json files in the results path
json_files = list(results_path.glob("./*.json"))

# load all json files
json_data = {file.name: json.load(open(file)) for file in json_files}

In [2]:
print((json_data.keys()))

dict_keys(['llama4_arena_vs_public_version.json', 'chatbot_arena.json', 'allenai_multipref.json', 'model_comparison.json', 'prism.json'])


In [3]:
# dict_keys(['llama4_arena_vs_public_version.json', 'chatbot_arena.json', 'allenai_multipref.json', 'model_comparison.json', 'prism.json'])


meta_data_to_keep={
    "allenai_multipref.json": ["prompt_id","comparison_id"],
    "chatbot_arena.json": ["question_id", "judge_hash"],
    "llama4_arena_vs_public_version.json": ["model_a", "model_b"],
    "prism.json": ["conversation_id", "chosen_model", "rejected_model"],
    "model_comparison.json": ["prompt_id"],
}

def sanitize_dataset(data: dict, meta_data_to_keep: list):
    data["metadata"]["available_metadata_keys_per_comparison"] = meta_data_to_keep + ["response_a_length", "response_b_length"]

    for comparison in data["comparisons"]:
        # v1.0
        if data["metadata"]["version"] == "1.0":
            assert "text_a" in comparison and "text_b" in comparison
            comparison["metadata"]["response_a_length"] = len(comparison["text_a"])
            comparison["metadata"]["response_b_length"] = len(comparison["text_b"])
            comparison["prompt"] = None
            comparison["text_a"] = None
            comparison["text_b"] = None
        else:
            assert "response_a" in comparison and "response_b" in comparison
            comparison["metadata"]["response_a_length"] = len(comparison["response_a"]["text"])
            comparison["metadata"]["response_b_length"] = len(comparison["response_b"]["text"])

            # delete text from response_a and response_b
            comparison["response_a"]["text"] = None
            comparison["response_b"]["text"] = None
            comparison["prompt"] = None

        for key in meta_data_to_keep:
            assert key in comparison["metadata"], f"{key} not in {comparison['metadata'].keys()}"

        # remove metadata keys that are not in the meta_data_to_keep list
        new_metadata = {}
        for key in comparison["metadata"].keys():
            if key in meta_data_to_keep:
                new_metadata[key] = comparison["metadata"][key]
        comparison["metadata"] = new_metadata


    return data

combined_sanitized_data = {}
for dataset_name, data in json_data.items():
    print(f"Sanitizing {dataset_name}...")
    sanitized_data = sanitize_dataset(data, meta_data_to_keep[dataset_name])
    combined_sanitized_data[dataset_name] = sanitized_data
    with open(sanitize_path / dataset_name, "w") as f:
        json.dump(sanitized_data, f, indent=4)




Sanitizing llama4_arena_vs_public_version.json...
Sanitizing chatbot_arena.json...
Sanitizing allenai_multipref.json...
Sanitizing model_comparison.json...
Sanitizing prism.json...
