In [1]:
import sys; sys.path.append("../utils")
from constants import *
from prompt_templates import *
import pandas as pd

from tqdm import tqdm, trange

In [2]:
aspects = ["coherence", "consistency", "fluency", "relevance"]
columns = ["article_id", "system_id", "article", "summary1", "summary2"]
columns += aspects
columns += [f"prompt-{aspect}" for aspect in aspects]

### newsroom

In [3]:
path = f"{gdrive_path}/data/newsroom/newsroom-processed.jsonl"
summaries = pd.read_json(path, orient="records", lines=True)

In [4]:
comparisons = pd.DataFrame(columns=columns)
for article in summaries.article_id.unique():
    subset = summaries.loc[summaries.article_id == article, :]
    assert subset["system_id"].nunique() == 7
    assert subset["article"].nunique() == 1
    for summary1_id in subset["system_id"].unique():
        for summary2_id in subset["system_id"].unique():
            row = [article, (summary1_id, summary2_id), subset["article"].iloc[0]]
            summary1 = subset.loc[subset["system_id"] == summary1_id, "summary"].item()
            summary2 = subset.loc[subset["system_id"] == summary2_id, "summary"].item()
            row.append(summary1)
            row.append(summary2)
            prompts = []
            for aspect in aspects:
                s1 = subset.loc[subset["system_id"] == summary1_id, aspect].item()
                s2 = subset.loc[subset["system_id"] == summary2_id, aspect].item()
                row.append((round(s1, 2), round(s2, 2)))
                prompt = theirs_compare_llama3.format(
                    INSTRUCTION=instructions[aspect],
                    ARTICLE=subset["article"].iloc[0],
                    SUMMARY1=summary1,
                    SUMMARY2=summary2,
                    ASPECT=aspect
                )
                prompts.append(prompt)
            row.extend(prompts)
            comparisons.loc[len(comparisons)] = row
comparisons.to_json(f"{gdrive_path}/model_harvesting/prompts/newsroom-theirs-compare-llama3.jsonl", orient="records", lines=True)

In [5]:
for choice in ["1", "2"]:
    comparisons = pd.DataFrame(columns=columns)
    for article in summaries.article_id.unique():
        subset = summaries.loc[summaries.article_id == article, :]
        assert subset["system_id"].nunique() == 7
        assert subset["article"].nunique() == 1
        for summary1_id in subset["system_id"].unique():
            for summary2_id in subset["system_id"].unique():
                row = [article, (summary1_id, summary2_id), subset["article"].iloc[0]]
                summary1 = subset.loc[subset["system_id"] == summary1_id, "summary"].item()
                summary2 = subset.loc[subset["system_id"] == summary2_id, "summary"].item()
                row.append(summary1)
                row.append(summary2)
                prompts = []
                for aspect in aspects:
                    s1 = subset.loc[subset["system_id"] == summary1_id, aspect].item()
                    s2 = subset.loc[subset["system_id"] == summary2_id, aspect].item()
                    row.append((round(s1, 2), round(s2, 2)))
                    prompt = mine_compare_llama3.format(
                        INSTRUCTION=instructions[aspect],
                        ARTICLE=subset["article"].iloc[0],
                        SUMMARY1=summary1,
                        SUMMARY2=summary2,
                        ASPECT=aspect,
                        CHOICE=choice
                    )
                    prompts.append(prompt)
                row.extend(prompts)
                comparisons.loc[len(comparisons)] = row
    comparisons.to_json(f"{gdrive_path}/model_harvesting/prompts/newsroom-mine-compare-{choice}-llama3.jsonl", orient="records", lines=True)

In [6]:
for aspect in aspects:
    prompts = []
    for _, row in summaries.iterrows():
        prompt = theirs_score_llama3.format(
            INSTRUCTION=instructions[aspect],
            ARTICLE=row["article"],
            SUMMARY=row["summary"],
            ASPECT=aspect
        )
        prompts.append(prompt)
    summaries[f"prompt-{aspect}"] = prompts
summaries.to_json(f"{gdrive_path}/model_harvesting/prompts/newsroom-theirs-score-llama3.jsonl", orient="records", lines=True)

### summeval

In [7]:
path = f"{gdrive_path}/data/summeval/summeval-processed.jsonl"
summaries = pd.read_json(path, orient="records", lines=True)

In [8]:
comparisons = pd.DataFrame(columns=columns)
for article in summaries.article_id.unique():
    subset = summaries.loc[summaries.article_id == article, :]
    assert subset["model_id"].nunique() == 16
    assert subset["article"].nunique() == 1
    for summary1_id in subset["model_id"].unique():
        for summary2_id in subset["model_id"].unique():
            row = [article, (summary1_id, summary2_id), subset["article"].iloc[0]]
            summary1 = subset.loc[subset["model_id"] == summary1_id, "summary"].item()
            summary2 = subset.loc[subset["model_id"] == summary2_id, "summary"].item()
            row.append(summary1)
            row.append(summary2)
            prompts = []
            for aspect in aspects:
                s1 = subset.loc[subset["model_id"] == summary1_id, aspect].item()
                s2 = subset.loc[subset["model_id"] == summary2_id, aspect].item()
                row.append((round(s1, 2), round(s2, 2)))
                prompt = theirs_compare_llama3.format(
                    INSTRUCTION=instructions[aspect],
                    ARTICLE=subset["article"].iloc[0],
                    SUMMARY1=summary1,
                    SUMMARY2=summary2,
                    ASPECT=aspect
                )
                prompts.append(prompt)
            row.extend(prompts)
            comparisons.loc[len(comparisons)] = row
comparisons.to_json(f"{gdrive_path}/model_harvesting/prompts/summeval-theirs-compare-llama3.jsonl", orient="records", lines=True)

In [9]:
for choice in ["1", "2"]:
    comparisons = pd.DataFrame(columns=columns)
    for article in tqdm(summaries.article_id.unique(), desc=choice):
        subset = summaries.loc[summaries.article_id == article, :]
        assert subset["model_id"].nunique() == 16
        assert subset["article"].nunique() == 1
        for summary1_id in subset["model_id"].unique():
            for summary2_id in subset["model_id"].unique():
                row = [article, (summary1_id, summary2_id), subset["article"].iloc[0]]
                summary1 = subset.loc[subset["model_id"] == summary1_id, "summary"].item()
                summary2 = subset.loc[subset["model_id"] == summary2_id, "summary"].item()
                row.append(summary1)
                row.append(summary2)
                prompts = []
                for aspect in aspects:
                    s1 = subset.loc[subset["model_id"] == summary1_id, aspect].item()
                    s2 = subset.loc[subset["model_id"] == summary2_id, aspect].item()
                    row.append((round(s1, 2), round(s2, 2)))
                    prompt = mine_compare_llama3.format(
                        INSTRUCTION=instructions[aspect],
                        ARTICLE=subset["article"].iloc[0],
                        SUMMARY1=summary1,
                        SUMMARY2=summary2,
                        ASPECT=aspect,
                        CHOICE=choice
                    )
                    prompts.append(prompt)
                row.extend(prompts)
                comparisons.loc[len(comparisons)] = row
    comparisons.to_json(f"{gdrive_path}/model_harvesting/prompts/summeval-mine-compare-{choice}-llama3.jsonl", orient="records", lines=True)

1: 100%|██████████| 100/100 [07:00<00:00,  4.20s/it]
2: 100%|██████████| 100/100 [07:15<00:00,  4.35s/it]


In [10]:
for aspect in aspects:
    prompts = []
    for _, row in summaries.iterrows():
        prompt = theirs_score_llama3.format(
            INSTRUCTION=instructions[aspect],
            ARTICLE=row["article"],
            SUMMARY=row["summary"],
            ASPECT=aspect
        )
        prompts.append(prompt)
    summaries[f"prompt-{aspect}"] = prompts
summaries.to_json(f"{gdrive_path}/model_harvesting/prompts/summeval-theirs-score-llama3.jsonl", orient="records", lines=True)