# Combine ICLR Submissions (All Years)

Loads all `iclr*_submissions.csv` files, adds a `year` column, and concatenates into a single dataset. Uses outer join so columns are unioned (NaN where a year lacks a column).

**Format consistency:** Column schemas differ across years (OpenReview evolved). Core fields (title, abstract, authors, keywords, pdf, _bibtex) exist in most years. Years 2013â€“2017 may be empty or have different structures.

In [None]:
import pandas as pd
from pathlib import Path

ICLR_DIR = Path("ICLR")
OUTPUT_PATH = ICLR_DIR / "iclr_all_years_submissions.csv"

In [None]:
years = range(2013, 2027)
dfs = []

for year in years:
    path = ICLR_DIR / str(year) / f"iclr{year}_submissions.csv"
    if not path.exists():
        continue
    try:
        df = pd.read_csv(path)
    except pd.errors.EmptyDataError:
        print(f"  {year}: skipped (empty file)")
        continue
    if df.empty or len(df.columns) < 2:
        print(f"  {year}: skipped (empty or malformed)")
        continue
    df["year"] = year
    # Normalize TL;DR vs TLDR
    if "TL;DR" in df.columns and "TLDR" not in df.columns:
        df = df.rename(columns={"TL;DR": "TLDR"})
    dfs.append(df)
    print(f"  {year}: {len(df)} rows, {len(df.columns)} columns")

combined = pd.concat(dfs, ignore_index=True, join="outer")
print(f"\nCombined: {len(combined)} rows, {len(combined.columns)} columns")

In [None]:
# Move year to front
cols = ["year"] + [c for c in combined.columns if c != "year"]
combined = combined[cols]

combined.to_csv(OUTPUT_PATH, index=False)
print(f"Saved to {OUTPUT_PATH}")