### Generative agent-based peer review simulation

This notebook sets up a simple **agent-based simulation of ICLR peer review decisions**.

We use historical human reviews (ratings and decisions) to estimate paper-level quality, then simulate review panels where a varying proportion of reviewers are **LLM-based agents** instead of humans.

By changing the fraction of agent reviewers, we can study how simulated **accept/reject decisions across manuscripts** shift as the reviewer population becomes more agent-heavy.


In [2]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path

%matplotlib inline

# Path to the CSV created in 01_get_human_review.ipynb
DATA_PATH = Path("ICLR2025_human_reviews.csv")
DATA_PATH.resolve()


ModuleNotFoundError: No module named 'seaborn'

In [None]:
# Load and preprocess human review data
if not DATA_PATH.exists():
    raise FileNotFoundError(
        f"Could not find {DATA_PATH.resolve()} â€” run 01_get_human_review.ipynb first to create it."
    )

reviews_df = pd.read_csv(DATA_PATH)
print("Raw reviews shape:", reviews_df.shape)
reviews_df.head()


In [None]:
# Parse numeric rating from strings like "8: Strong accept"

def parse_rating(val):
    if pd.isna(val):
        return np.nan
    if isinstance(val, (int, float)):
        return float(val)
    s = str(val).strip()
    # Try splitting on ':' first, then space
    for sep in (":", " "):
        try:
            num = float(s.split(sep)[0])
            return num
        except (ValueError, IndexError):
            continue
    return np.nan


reviews_df["rating_num"] = reviews_df["rating"].apply(parse_rating)

print("Fraction of reviews with parsed numeric rating:", reviews_df["rating_num"].notna().mean())
reviews_df[["paper_id", "rating", "rating_num"]].head()


In [None]:
# Aggregate to paper-level statistics we will use in the simulation

paper_stats = (
    reviews_df.groupby("paper_id")
    .agg(
        n_reviews=("review_id", "count"),
        mean_rating=("rating_num", "mean"),
        std_rating=("rating_num", "std"),
        decision=("decision", lambda x: x.dropna().iloc[0] if len(x.dropna()) else np.nan),
        title=("title", "first"),
    )
    .reset_index()
)

print("Paper-level stats shape:", paper_stats.shape)
paper_stats.head()


In [None]:
# Map decisions to a simple accept / reject label and define a "quality" score

def is_accept(decision: str) -> float:
    if not isinstance(decision, str):
        return np.nan
    d = decision.lower()
    # Very simple rule: any label containing "accept" and not containing "reject" counts as accept
    if "accept" in d and "reject" not in d:
        return 1.0
    if "reject" in d:
        return 0.0
    return np.nan


paper_stats["accept_label"] = paper_stats["decision"].apply(is_accept)

# Use the mean human rating as a proxy for latent paper quality
# (you can swap this out for a more sophisticated quality estimate later)
global_mean_rating = paper_stats["mean_rating"].mean()
paper_stats["quality"] = paper_stats["mean_rating"].fillna(global_mean_rating)

paper_stats[["paper_id", "title", "quality", "accept_label"]].head()


In [None]:
# Define human vs. agent reviewer behavior and panel decision rule

rng = np.random.default_rng(seed=0)


def sample_human_rating(quality: float, noise_std: float = 1.0, bias: float = 0.0) -> float:
    """Sample a synthetic human rating given latent paper quality.

    Here quality and ratings live on the same numeric scale as the parsed ratings.
    """

    return quality + bias + rng.normal(0, noise_std)


def sample_agent_rating(quality: float, noise_std: float = 0.8, bias: float = 0.0) -> float:
    """Sample a synthetic agent rating given latent paper quality.

    As a default, we assume agents are *slightly less noisy* but you can tune bias/noise.
    """

    return quality + bias + rng.normal(0, noise_std)


def decide_from_ratings(ratings, threshold: float = 7.0) -> int:
    """Panel decision: accept if the mean rating crosses a threshold."""

    if len(ratings) == 0:
        return 0
    return int(np.mean(ratings) >= threshold)


def simulate_paper_decisions(
    quality: float,
    n_reviewers: int = 3,
    agent_share: float = 0.0,
    n_trials: int = 100,
    human_kwargs: dict | None = None,
    agent_kwargs: dict | None = None,
    threshold: float = 7.0,
) -> float:
    """Return the *simulated acceptance probability* for one paper.

    We repeatedly form panels of size `n_reviewers` with a given fraction of agent
    reviewers, simulate ratings, and apply the decision rule.
    """

    human_kwargs = human_kwargs or {}
    agent_kwargs = agent_kwargs or {}

    n_agents = int(round(n_reviewers * agent_share))
    n_humans = n_reviewers - n_agents

    outcomes = []
    for _ in range(n_trials):
        human_ratings = [sample_human_rating(quality, **human_kwargs) for _ in range(n_humans)]
        agent_ratings = [sample_agent_rating(quality, **agent_kwargs) for _ in range(n_agents)]
        ratings = human_ratings + agent_ratings
        outcomes.append(decide_from_ratings(ratings, threshold=threshold))

    return float(np.mean(outcomes))


In [None]:
# Run the simulation for different proportions of agent reviewers

agent_shares = np.linspace(0.0, 1.0, 5)  # e.g., 0%, 25%, 50%, 75%, 100%
n_reviewers = 3
n_trials = 200

sim_rows: list[dict] = []

for _, row in paper_stats.iterrows():
    q = row["quality"]
    pid = row["paper_id"]
    for s in agent_shares:
        accept_prob = simulate_paper_decisions(
            q,
            n_reviewers=n_reviewers,
            agent_share=s,
            n_trials=n_trials,
            human_kwargs=dict(noise_std=1.0, bias=0.0),
            agent_kwargs=dict(noise_std=0.8, bias=0.0),
            threshold=7.0,
        )
        sim_rows.append(
            {
                "paper_id": pid,
                "agent_share": s,
                "sim_accept_prob": accept_prob,
            }
        )

sim_results = pd.DataFrame(sim_rows)
print("Simulation results shape:", sim_results.shape)
sim_results.head()


In [None]:
# Aggregate over manuscripts: how does the average acceptance probability change?

summary = (
    sim_results.groupby("agent_share")["sim_accept_prob"]
    .mean()
    .reset_index()
    .rename(columns={"sim_accept_prob": "mean_accept_prob"})
)

summary


In [None]:
# Visualize the effect of agent share on overall simulated acceptance probability

plt.figure(figsize=(6, 4))
sns.lineplot(data=summary, x="agent_share", y="mean_accept_prob", marker="o")
plt.xlabel("Fraction of reviewers who are agents")
plt.ylabel("Mean simulated acceptance probability")
plt.title("Effect of agent share on simulated ICLR acceptance rates")
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


In [None]:
# How much do individual manuscripts move when everyone becomes an agent reviewer?

pivot = sim_results.pivot(index="paper_id", columns="agent_share", values="sim_accept_prob")

if 0.0 in pivot.columns and 1.0 in pivot.columns:
    pivot["delta_all_agents_vs_all_humans"] = pivot[1.0] - pivot[0.0]
    pivot["delta_all_agents_vs_all_humans"].describe()
else:
    print("Expected agent_share values 0.0 and 1.0 not found in simulation results.")
