# ICLR 2017 OpenReview data collection

Uses OpenReview **Legacy API v1** (api.openreview.net). Single bulk fetch via invitation—no per-paper forum calls, so collection is fast (no rate-limit bottlenecks).

**Note:** The OpenReview API does not expose submission data for ICLR 2013–2017; this notebook will return 0 submissions. See `ICLR/README.md` for details and alternatives.

In [1]:
# Install if needed (uncomment)
# !pip install openreview-py "urllib3<2.0"

import openreview
import pandas as pd
from pathlib import Path

OUTPUT_DIR = Path("")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

In [2]:
def get_iclr_2017_submissions():
    """Fetch all ICLR 2017 submissions in one bulk call via Legacy API."""
    client = openreview.Client(baseurl="https://api.openreview.net")

    invitation_id = "ICLR.cc/2017/Conference/-/Blind_Submission"
    print(f"Collecting submissions from {invitation_id}...")

    submissions = client.get_all_notes(invitation=invitation_id)
    print(f"Successfully collected {len(submissions)} submissions.")

    for i, note in enumerate(submissions[:5]):
        print(f"  {i+1}. {note.content.get('title', 'No Title')}")

    return submissions

In [3]:
submissions = get_iclr_2017_submissions()

if len(submissions) > 0:
    df = pd.DataFrame([n.content for n in submissions])
else:
    # Fallback: OpenReview API has no data for 2013–2017. Use pre-crawled dataset.
    url = "https://raw.githubusercontent.com/ahmaurya/iclr2017-reviews-dataset/master/iclr2017_papers.csv"
    print(f"API returned 0. Fetching from {url}...")
    df = pd.read_csv(url)
    # Rename columns to match our schema (title, abstract, etc.) if needed
    if "title" not in df.columns and len(df.columns) > 0:
        df = df.rename(columns={df.columns[0]: "title"})
    print(f"Loaded {len(df)} rows from alternative source.")

out_path = OUTPUT_DIR / "iclr2017_submissions.csv"
df.to_csv(out_path, index=False)
print(f"Saved {len(df)} rows to {out_path}")

Collecting submissions from ICLR.cc/2017/Conference/-/Blind_Submission...
Retrying request: GET /notes?invitation=ICLR.cc%2F2017%2FConference%2F-%2FBlind_Submission&limit=1000&sort=id, response: <urllib3.response.HTTPResponse object at 0x11a09b2e0>, error: None
Successfully collected 0 submissions.
Saved 0 rows to iclr2017_submissions.csv
