In [26]:
import os, csv
import json
import openreview
import pandas as pd
import requests
import pdfplumber
from tqdm import tqdm

os.environ['OPENREVIEW_USERNAME'] = 'ruishich@stanford.edu'
os.environ['OPENREVIEW_PASSWORD'] = 'Crs20010314!'

client = openreview.api.OpenReviewClient(
    baseurl='https://api2.openreview.net',
    username=os.environ['OPENREVIEW_USERNAME'],
    password=os.environ['OPENREVIEW_PASSWORD'],
)

# Read in API

In [16]:
iclr_date_to_get = 'ICLR.cc/2025/Conference/-/Submission'
submissions = client.get_all_notes(invitation=iclr_date_to_get)

print(f"Retrieved {len(submissions)} submissions for {iclr_date_to_get}")

Retrieved 11672 submissions for ICLR.cc/2025/Conference/-/Submission


## Explore submission record

In [57]:
print(first)

{'cdate': 1727524853394,
 'content': {'_bibtex': {'value': '@misc{\n'
                                  'kachhi2024neuroacoustic,\n'
                                  'title={Neuroacoustic Patterns: Constant Q '
                                  'Cepstral Coefficients for the '
                                  'Classification of Neurodegenerative '
                                  'Disorders},\n'
                                  'author={Aastha Kachhi and Shashank Ojha and '
                                  'Megha Pandey and Ajay Kumar Sharma and '
                                  'Anurag Pandey},\n'
                                  'year={2024},\n'
                                  'url={https://openreview.net/forum?id=5sRnsubyAK}\n'
                                  '}'},
             'abstract': {'value': 'Early identification of neurodegenerative '
                                   'diseases is crucial for effective '
                                   'diagnosis in neurolog

In [58]:
print(first.to_json().keys()) 

dict_keys(['id', 'forum', 'content', 'invitations', 'cdate', 'odate', 'mdate', 'signatures', 'writers', 'readers', 'license'])


In [59]:
print(first.content.keys()) 

dict_keys(['title', 'authors', 'authorids', 'keywords', 'abstract', 'pdf', 'primary_area', 'code_of_ethics', 'submission_guidelines', 'reciprocal_reviewing', 'anonymous_url', 'no_acknowledgement_section', 'venue', 'venueid', '_bibtex', 'paperhash'])


## Explore review & decision

In [52]:
first = submissions[0]
forum_id = first.forum
forum_notes = client.get_all_notes(forum=forum_id)

In [54]:
# Reviews
reviews = [
    n for n in forum_notes
    if any("Official_Review" in inv for inv in (getattr(n, "invitations", []) or []))
]
if reviews:
    print("Review content keys:", reviews[0].content.keys())
else:
    print("No reviews for this forum")


# Decisions 
decisions = [
    n for n in forum_notes
    if any("Decision" in inv for inv in (getattr(n, "invitations", []) or []))
]
if decisions:
    print("Decision content keys:", decisions[0].content.keys())
else:
    print("No decision note for this forum")

Review content keys: dict_keys(['summary', 'soundness', 'presentation', 'contribution', 'strengths', 'weaknesses', 'questions', 'flag_for_ethics_review', 'rating', 'confidence', 'code_of_conduct'])
No decision note for this forum


In [55]:
# 1. Pick one of the decision invitations you printed earlier
inv = "ICLR.cc/2025/Conference/Submission14290/-/Decision"  # example

# 2. Fetch the decision notes for that invitation
dec_notes = client.get_all_notes(invitation=inv)
print("Number of decision notes:", len(dec_notes))

# 3. Inspect the content of the first decision note
dec_content = dec_notes[0].content
dec_content.keys(), dec_content

Number of decision notes: 1


(dict_keys(['title', 'decision', 'comment']),
 {'title': {'value': 'Paper Decision'},
  'decision': {'value': 'Reject'},
  'comment': {'value': ''}})

# Get PDFs URLs for ICLR 2025

In [21]:
first_pdf = first.content.get('pdf')
print(first_pdf)

{'value': '/pdf/1a25dea01f1af2e8a59a59679add4c066906eae3.pdf'}


In [22]:
# Directory where PDFs and parsed JSON will be stored

output_dir = 'ICLR2025_papers'
os.makedirs(output_dir, exist_ok=True)

output_dir

'ICLR2025_papers'

In [32]:
def get_pdf_url(note):
    pdf_field = note.content.get('pdf')
    if isinstance(pdf_field, dict):
        pdf_field = pdf_field.get('value')

    if not pdf_field:
        return None
    # If it's a relative path like "/pdf/....", prepend the OpenReview host
    if isinstance(pdf_field, str) and pdf_field.startswith("/pdf/"):
        return "https://openreview.net" + pdf_field
    # If it's already a full URL, just return it
    if isinstance(pdf_field, str) and pdf_field.startswith("http"):
        return pdf_field
    # Fallback: treat it as an id
    return f"https://openreview.net/pdf?id={pdf_field}"

test_pdf_url = get_pdf_url(submissions[1])
print(test_pdf_url)


https://openreview.net/pdf/11fa6b1cf03df801b7d9568dbd25367525da4761.pdf


In [33]:
csv_path = "2025_iclr_pdfs_urls.csv"  

def get_title(note):
    t = note.content.get("title", "Untitled")
    if isinstance(t, dict):
        return t.get("value", "Untitled")
    return t

def get_pdf_url_from_forum(note):
    # canonical OpenReview PDF URL
    return f"https://openreview.net/pdf?id={note.forum}"

with open(csv_path, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["paper_id", "forum", "title", "pdf_url"])
    for note in submissions:
        writer.writerow([
            note.id,
            note.forum,
            get_title(note),
            get_pdf_url_from_forum(note),
        ])

# Quick Check


In [34]:
url = pd.read_csv('2025_iclr_pdfs_urls.csv')

In [37]:
url[['title','pdf_url']].head(10)

Unnamed: 0,title,pdf_url
0,Neuroacoustic Patterns: Constant Q Cepstral Co...,https://openreview.net/pdf?id=5sRnsubyAK
1,A Feature-Aware Federated Learning Framework f...,https://openreview.net/pdf?id=J1SGf2lyr6
2,UnoLoRA: Single Low-Rank Adaptation for Effici...,https://openreview.net/pdf?id=49ti6LOUw5
3,Synergistic Approach for Simultaneous Optimiza...,https://openreview.net/pdf?id=zkNCWtw2fd
4,EXecution-Eval: Can language models execute re...,https://openreview.net/pdf?id=viQ1bLqKY0
5,The Rate-Distortion-Perception Trade-Off with ...,https://openreview.net/pdf?id=vdUYa7N8Mt
6,Beyond Random Masking: When Dropout meets Grap...,https://openreview.net/pdf?id=PwxYoMvmvy
7,Defining Deception in Decision Making,https://openreview.net/pdf?id=YaRzuMaubS
8,Self-supervised contrastive learning performs ...,https://openreview.net/pdf?id=ONfWFluZBI
9,"MAC-CAFE: Multi-actor, Centralized Critic Arch...",https://openreview.net/pdf?id=Ql7msQBqoF
