In [1]:
import os, csv
import json
import openreview
import pandas as pd
import requests
import pdfplumber
from tqdm import tqdm

os.environ['OPENREVIEW_USERNAME'] = 'ruishich@stanford.edu'
os.environ['OPENREVIEW_PASSWORD'] = 'Crs20010314!'

client = openreview.api.OpenReviewClient(
    baseurl='https://api2.openreview.net',
    username=os.environ['OPENREVIEW_USERNAME'],
    password=os.environ['OPENREVIEW_PASSWORD'],
)

In [2]:
# Fetch all ICLR 2025 submissions (same as in 00_Get_PDF_url.ipynb)
iclr_invitation = "ICLR.cc/2025/Conference/-/Submission"
submissions = client.get_all_notes(invitation=iclr_invitation)
print(f"Retrieved {len(submissions)} submissions for {iclr_invitation}")

Retrieved 11672 submissions for ICLR.cc/2025/Conference/-/Submission


In [32]:
def extract_value(field):
    """Handle OpenReview fields which are often dicts with a 'value' key."""
    if isinstance(field, dict):
        return field.get("value")
    return field


def get_content_value(content: dict, key: str):
    if content is None:
        return None
    if key not in content:
        return None
    return extract_value(content[key])


def get_submission_title(submission):
    content = submission.content or {}
    if "title" not in content:
        return None
    return get_content_value(content, "title")

def extract_rating_from_review(content: dict):
    """Extract the numeric/text rating from a review content dict.

    ICLR forms sometimes rename the field (e.g. overall_score, overall_assessment).
    We first try common names, then fall back to any key containing
    "rating" or "score".
    """
    if not content:
        return None

    # Most likely field names
    for k in ["recommendation", "overall_assessment", "overall_score", "rating"]:
        if k in content:
            return get_content_value(content, k)

    # Fallback: any key that looks like a rating/score
    for k in content.keys():
        kl = k.lower()
        if "rating" in kl or "score" in kl:
            return get_content_value(content, k)

    return None

def get_decision_for_forum(notes_in_forum):
    """Look up the decision for a given forum, given its notes.

    Strategy: scan the notes and return the value of any 'decision' field.
    """
    for n in notes_in_forum:
        content = n.content or {}
        for k in ["decision", "Decision"]:
            if k in content:
                return get_content_value(content, k)

    # Optional: handle any special desk-reject style fields if they appear
    for n in notes_in_forum:
        content = n.content or {}
        if "desk_reject" in content:
            return get_content_value(content, "desk_reject")

    return None



In [33]:
rows = []

for submission in tqdm(submissions, desc="Collecting human reviews"):
    forum_id = submission.forum
    paper_id = submission.id
    title = get_submission_title(submission)

    # All notes (reviews, decisions, etc.) attached to this submission
    forum_notes = client.get_all_notes(forum=forum_id)

    # Use notes list here
    decision = get_decision_for_forum(forum_notes)

    # Filter to review notes (their invitations list contains 'Official_Review')
    review_notes = []
    for n in forum_notes:
        invitations = getattr(n, "invitations", None) or []
        if any("Official_Review" in inv for inv in invitations):
            review_notes.append(n)

    for review in review_notes:
        content = review.content or {}

        reviewer_id = None
        if "reviewer_id" in content:
            reviewer_id = get_content_value(content, "reviewer_id")
        else:
            sigs = review.signatures or []
            if sigs:
                reviewer_id = sigs[0]

        row = {
            "paper_forum": forum_id,
            "paper_id": paper_id,
            "title": title,
            "decision": decision,
            "review_id": review.id,
            "reviewer_id": reviewer_id,
            "rating": extract_rating_from_review(content),
            "summary": get_content_value(content, "summary"),
            "strengths": get_content_value(content, "strengths"),
            "weaknesses": get_content_value(content, "weaknesses"),
            "questions": get_content_value(content, "questions"),
        }
        rows.append(row)

len(rows)

Collecting human reviews: 100%|██████████| 11672/11672 [41:32<00:00,  4.68it/s] 


46748

In [None]:
reviews_df = pd.DataFrame(rows)
print("Reviews shape:", reviews_df.shape)

Reviews shape: (46748, 11)


In [43]:
# Distinct manuscripts with at least one non-null decision
papers_with_decision = (
    reviews_df
    .loc[reviews_df["decision"].notna(), "paper_id"]
    .nunique()
)
total_papers_in_reviews = reviews_df["paper_id"].nunique()
papers_with_decision, total_papers_in_reviews, papers_with_decision / total_papers_in_reviews

(8727, 11520, 0.7575520833333333)

In [52]:
df_with_decision = (
    reviews_df
    .loc[reviews_df["decision"].notna()]
    .drop(columns=["rating"])
)

total_papers = reviews_df["paper_id"].nunique()
papers_with_decision = df_with_decision["paper_id"].nunique()
papers_without_decision = total_papers - papers_with_decision

print("Total manuscripts with reviews:", total_papers)
print("Manuscripts with a decision:", papers_with_decision)
print("Manuscripts WITHOUT a decision (neglected):", papers_without_decision)
print("Fraction neglected:", papers_without_decision / total_papers)

Total manuscripts with reviews: 11520
Manuscripts with a decision: 8727
Manuscripts WITHOUT a decision (neglected): 2793
Fraction neglected: 0.24244791666666668


In [53]:
df_with_decision.to_csv(
    "ICLR2025_human_reviews_with_decision.csv",
    index=False,
    quoting=csv.QUOTE_MINIMAL,  # or csv.QUOTE_ALL
    escapechar="\\",            # used only if needed
)

reviews_df.to_csv(
    "ICLR2025_human_reviews.csv",
    index=False,
    quoting=csv.QUOTE_MINIMAL,  # or csv.QUOTE_ALL
    escapechar="\\",            # used only if needed
)