In [None]:
import feedback_forensics as ff
import feedback_forensics.app.plotting.paper as paper_plot
import pathlib
from IPython.display import display, Latex

data_path = pathlib.Path("../forensics-data/feedback-forensics-public-results")
fig_save_path = pathlib.Path("./output/figures")
tex_save_path = pathlib.Path("./output/tex")

# ensure save path exists
fig_save_path.mkdir(parents=True, exist_ok=True)
tex_save_path.mkdir(parents=True, exist_ok=True)


# save general latex preamble
with open(tex_save_path / "000_preamble.tex", "w") as f:
    f.write(paper_plot.get_latex_doc_preamble())

# example latex table
with open(tex_save_path / "999_example_table.tex", "w") as f:
    latex = []
    latex = paper_plot.add_table_preamble(latex, "Example Table")
    latex = paper_plot.add_table_postamble(latex)
    f.write("\n".join(latex))

datasets = [
    ["multipref_10k_v3.json", "MultiPref"],
    ["llama4_arena_vs_public_version.json", "Llama 4 Arena vs Public"],
    ["arena", "Chatbot Arena"],
    ["prism", "PRISM"],
]

cache = {}
for dataset_path, dataset_name in datasets:
    dataset = ff.DatasetHandler(cache=cache)
    dataset.add_data_from_path(data_path / dataset_path)

    overall_metrics = dataset.get_overall_metrics()
    annotator_metrics = dataset.get_annotator_metrics()

    metric_name = "strength"
    strength_metrics = annotator_metrics[dataset_path.split(".")[0]]["metrics"][metric_name]

    kwargs = {}
    if dataset_path == "llama4_arena_vs_public_version.json":
        kwargs = {
            "top_title": "Most  changes between Chatbot Arena and Public Version",
            "bottom_title": "Least notable changes between Public Version and Chatbot Arena",
        }

    latex_table = paper_plot.get_latex_top_and_bottom_annotators(
        annotator_metrics=strength_metrics,
        metric_name=metric_name.capitalize(),
        **kwargs,
    )

    with open(tex_save_path / f"001_top_and_bottom_annotators_{dataset_path.split('.')[0]}.tex", "w") as f:
        f.write(latex_table)



In [None]:
# Analysis of Arena data
import pandas as pd

dataset_name = "arena"
dataset = ff.DatasetHandler(cache=cache)
dataset.add_data_from_path(data_path / dataset_name)

general_df = dataset.first_handler.df
values = [
    'Creative Writing Prompts',
    'Songwriting Prompts',
    'Resume and Cover Letter Writing',
    'Professional Email Communication',
]
dataset.split_by_col(col="narrower_category", selected_vals=values)

metrics_df = dataset.get_annotator_metrics_df(metric_name="strength", index_col_name="Generate a response that...")

latex_str = paper_plot.get_latex_table_from_metrics_df(
    metrics_df=metrics_df.head(10),
    title="Encouraged personality traits across writing domains in Chatbot Arena (Strength)",
)

with open(tex_save_path / "002_writing_tasks_arena.tex", "w", encoding="utf-8") as f:
    f.write(latex_str)



In [None]:
# Analysis of MultiPref data
import pandas as pd
import feedback_forensics as ff
import pathlib

cache = {}
data_path = pathlib.Path("../forensics-data/feedback-forensics-public-results")

dataset_name = "multipref_10k_v3.json"
dataset = ff.DatasetHandler(cache=cache)
dataset.add_data_from_path(data_path / dataset_name)

In [None]:
annotator_metadata = dataset.get_available_annotators()
special_annotators = {
    annotator_key: metadata
    for annotator_key, metadata in annotator_metadata.items()
    if metadata["variant"] in ["unknown", "human"]
    if "normal" in metadata["annotator_visible_name"] or "expert" in metadata["annotator_visible_name"] or "gpt4" in metadata["annotator_visible_name"]
}
special_annotators

dataset.set_annotator_cols(annotator_keys=list(special_annotators.keys()))
df = dataset.get_annotator_metrics_df(metric_name="strength", index_col_name="Generate a response that...")


In [5]:
rename_dict = {
    'multipref_10k_v3\n(unknown: expert_1_preferred_text)': 'Human Expert 2',
    'multipref_10k_v3\n(unknown: preferred_text_gpt4)': 'GPT-4',
    'multipref_10k_v3\n(unknown: normal_0_preferred_text)': 'Human Regular 1',
    'multipref_10k_v3\n(unknown: normal_1_preferred_text)': 'Human Regular 2',
    'multipref_10k_v3\n(unknown: expert_0_preferred_text)': 'Human Expert 1',
}
# rename the columns
df.rename(rename_dict, inplace=True, axis=1)

# reorder the columns (experts, regular, gpt-4)
df = df[['Generate a response that...', 'Human Expert 1', 'Human Expert 2', 'Human Regular 1', 'Human Regular 2', 'GPT-4', 'Max diff']]

latex_str = paper_plot.get_latex_table_from_metrics_df(
    metrics_df=df.head(5),
    title="Personality traits encouraged by different annotators on MultiPref (Strength)",
    first_col_width=0.15,
)

with open(tex_save_path / "003_cross_annotator_comparison_multipref.tex", "w", encoding="utf-8") as f:
    f.write(latex_str)

