In [18]:
VENUE_ID = 'ICLR.cc/2025/Conference'
VENUE_LS = ['ICLR 2024 oral', 'ICLR 2024 spotlight', 'ICLR 2024 poster']
CONFERENCE_NAME = 'ICLR'
OUT = 'output/iclr2025'

## Init API client

In [2]:
import os
from dotenv import load_dotenv
import openreview

load_dotenv('.env')
client = openreview.api.OpenReviewClient(
    baseurl='https://api2.openreview.net',
    username=os.getenv("username"),
    password=os.getenv("password"),
)

## Scraping prep

In [3]:
# print matched venue ids
top3 = ['ICML', 'ICLR', 'NeurIPS']
venues = client.get_group(id='venues').members
venue_ids = [ven for ven in venues if any(con in ven for con in top3) and 'Conference' in ven]
venue_ids.sort()
venue_ids

['ICLR.cc/2018/Conference',
 'ICLR.cc/2019/Conference',
 'ICLR.cc/2020/Conference',
 'ICLR.cc/2021/Conference',
 'ICLR.cc/2022/Conference',
 'ICLR.cc/2023/Conference',
 'ICLR.cc/2024/Conference',
 'ICLR.cc/2025/Conference',
 'ICML.cc/2020/Conference',
 'ICML.cc/2023/Conference',
 'ICML.cc/2024/Conference',
 'ICML.cc/2025/Conference',
 'NeurIPS.cc/2020/Conference',
 'NeurIPS.cc/2021/Conference',
 'NeurIPS.cc/2022/Conference',
 'NeurIPS.cc/2023/Conference',
 'NeurIPS.cc/2024/Conference',
 'NeurIPS.cc/2025/Conference']

In [9]:
# print example paper note to examine the structure of content
# p.s. the pattern will be used for client.get_notes() to match all papers
sample_paper = client.get_notes(id='odjMSBSWRt')[0]
print(sample_paper)

{'cdate': 1727524390512,
 'content': {'TLDR': {'value': 'We introduce DarkBench, a benchmark revealing '
                               'that many large language models employ '
                               'manipulative dark design patterns. '
                               'Organizations developing LLMs should actively '
                               'recognize and mitigate the impact of dark '
                               'design patterns to promote ethical Al.'},
             '_bibtex': {'value': '@inproceedings{\n'
                                  'kran2025darkbench,\n'
                                  'title={DarkBench: Benchmarking Dark '
                                  'Patterns in Large Language Models},\n'
                                  'author={Esben Kran and Hieu Minh Nguyen and '
                                  'Akash Kundu and Sami Jawhar and Jinsuk Park '
                                  'and Mateusz Maria Jurewicz},\n'
                                  'b

## Paper scraping

In [5]:
submissions = client.get_all_notes(content={'venueid': VENUE_ID})
submissions

Getting V2 Notes: 100%|█████████▉| 3703/3707 [00:02<00:00, 1326.19it/s]


[Note(id = 'zxg6601zoc',number = 1371,cdate = 1726597109281,pdate = 1737562226953,odate = 1728008565725,mdate = 1740889663811,tcdate = 1726597109281,tmdate = 1740889663811,ddate = None,content = {'title': {'value': 'Re-Imagining Multimodal Instruction Tuning: A Representation View'}, 'authors': {'value': ['Yiyang Liu', 'James Chenhao Liang', 'Ruixiang Tang', 'Yugyung Lee', 'MAJID RABBANI', 'Sohail Dianat', 'Raghuveer Rao', 'Lifu Huang', 'Dongfang Liu', 'Qifan Wang', 'Cheng Han']}, 'authorids': {'value': ['~Yiyang_Liu3', '~James_Chenhao_Liang1', '~Ruixiang_Tang1', '~Yugyung_Lee1', '~MAJID_RABBANI1', '~Sohail_Dianat1', '~Raghuveer_Rao1', '~Lifu_Huang1', '~Dongfang_Liu1', '~Qifan_Wang2', '~Cheng_Han1']}, 'keywords': {'value': ['Representation Tuning', 'Large Multimodal Models', 'Parameter-efficient Fine-tuning']}, 'TLDR': {'value': 'Multimodal Representation Tuning for Zero-shot Multimodal Instruction Learning'}, 'abstract': {'value': 'Multimodal instruction tuning has proven to be an eff

In [6]:
# primary areas
areas = set()

for submission in submissions:
    areas.add(submission.content['primary_area']['value'])

areas

{'alignment, fairness, safety, privacy, and societal considerations',
 'applications to computer vision, audio, language, and other modalities',
 'applications to neuroscience & cognitive science',
 'applications to physical sciences (physics, chemistry, biology, etc.)',
 'applications to robotics, autonomy, planning',
 'causal reasoning',
 'datasets and benchmarks',
 'foundation or frontier models, including LLMs',
 'generative models',
 'infrastructure, software libraries, hardware, systems, etc.',
 'interpretability and explainable AI',
 'learning on graphs and other geometries & topologies',
 'learning on time series and dynamical systems',
 'learning theory',
 'neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.)',
 'optimization',
 'other topics in machine learning (i.e., none of the above)',
 'probabilistic methods (Bayesian methods, variational inference, sampling, UQ, etc.)',
 'reinforcement learning',
 'transfer learning, meta learning, and lif

In [7]:
from collections import defaultdict
allpaper = {}

for submission in submissions:
    venue = submission.content['venue']['value']
    area = submission.content['primary_area']['value']

    if venue not in allpaper:
        allpaper[venue] = defaultdict(dict)

    if area not in allpaper[venue]:
        allpaper[venue][area] = []

    allpaper[venue][area].append(submission)

allpaper.keys()

dict_keys(['ICLR 2025 Poster', 'ICLR 2025 Spotlight', 'ICLR 2025 Oral', 'ICLR 2025 conditionaloral', 'ICLR 2025 conditionalposter', 'ICLR 2025 conditionalspotlight'])

## Export .bib & .md

In [11]:
sample_paper.content
# sample_paper.content['venue']['value']
# sample_paper.content['primary_area']['value']
# sample_paper.content['_bibtex']['value']

{'title': {'value': 'DarkBench: Benchmarking Dark Patterns in Large Language Models'},
 'authors': {'value': ['Esben Kran',
   'Hieu Minh Nguyen',
   'Akash Kundu',
   'Sami Jawhar',
   'Jinsuk Park',
   'Mateusz Maria Jurewicz']},
 'authorids': {'value': ['~Esben_Kran1',
   '~Hieu_Minh_Nguyen2',
   '~Akash_Kundu2',
   '~Sami_Jawhar1',
   '~Jinsuk_Park1',
   '~Mateusz_Maria_Jurewicz1']},
 'keywords': {'value': ['Dark Patterns',
   'AI Deception',
   'Large Language Models']},
 'TLDR': {'value': 'We introduce DarkBench, a benchmark revealing that many large language models employ manipulative dark design patterns. Organizations developing LLMs should actively recognize and mitigate the impact of dark design patterns to promote ethical Al.'},
 'abstract': {'value': "We introduce DarkBench, a comprehensive benchmark for detecting dark design patterns—manipulative techniques that influence user behavior—in interactions with large language models (LLMs). Our benchmark comprises 660 prompts 

In [14]:
import re

def parse_bitex(bibtex_data, gen_id=False, id_surfix='', lower_case_type=True):
    entry_regex = re.compile(r'@([a-zA-Z]+){([^,]+),(.*)}', re.DOTALL)
    match = entry_regex.search(bibtex_data)

    if not match:
        raise ValueError("Invalid BibTeX data")

    fields = {
        'type': match.group(1),
        'id': match.group(2)
    }
    fields_str = match.group(3)

    mode = 'key'
    store = ''
    max_layer = 0
    stack = []
    keys = []
    values = []

    for idx, char in enumerate(fields_str):
        if mode == 'key':
            if char == '=':
                keys.append(store.strip())
                store = ''
                mode = 'value'
            else:
                store += char
        elif mode == 'value':
            store += char

            if char == '{':
                stack.append(char)
                max_layer += 1
                if max_layer == 1:
                    store = ''
            elif char == '}':
                stack.pop()
                if not stack:
                    store = store[:-1]

            if (max_layer > 0 and not stack) or (max_layer == 0 and (char in ',}' or idx == len(fields_str) - 1)):
                value = store.strip().replace(": ", "{:} ")
                if value.startswith('{') or value.endswith('}'):
                    value = f'"{value}"'
                values.append(value)
                store = ''
                max_layer = 0
                mode = 'key'

    for key, value in zip(keys, values):
        fields[key.lower().replace('\n', '').replace(',', '')] = value

    if gen_id and 'author' in fields:
        authors = fields['author'].split(' and ')
        first_author = authors[0]
        if ',' in first_author:
            last_name, first_name = map(str.strip, first_author.split(','))
        else:
            name_parts = list(map(str.strip, first_author.split()))
            last_name = name_parts.pop()
            first_name = ' '.join(name_parts)

        fields['id'] = re.sub(r'[^a-zA-Z0-9]', '', f"{first_name}{last_name}{fields.get('year', '')}") + id_surfix

    if lower_case_type:
        fields['type'] = fields['type'].lower()

    return fields

bib = sample_paper.content['_bibtex']['value']
fields = parse_bitex(bib, gen_id=True, id_surfix=CONFERENCE_NAME)
fields

{'type': 'inproceedings',
 'id': 'EsbenKran2025ICLR',
 'title': 'DarkBench{:} Benchmarking Dark Patterns in Large Language Models',
 'author': 'Esben Kran and Hieu Minh Nguyen and Akash Kundu and Sami Jawhar and Jinsuk Park and Mateusz Maria Jurewicz',
 'booktitle': 'The Thirteenth International Conference on Learning Representations',
 'year': '2025',
 'url': 'https://openreview.net/forum?id=odjMSBSWRt'}

In [15]:
def make_bibtex(fields):
    # generate bibtex from a dict
    bibtex = '@' + fields['type'] + '{' + fields['id'] + ',\n'

    for key, value in fields.items():
        if key not in ['type', 'id']:
            bibtex += '  ' + key + ' = ' + '{' + value + '},\n'
    bibtex += "}\n"
    return bibtex

print(make_bibtex(fields))

@inproceedings{EsbenKran2025ICLR,
  title = {DarkBench{:} Benchmarking Dark Patterns in Large Language Models},
  author = {Esben Kran and Hieu Minh Nguyen and Akash Kundu and Sami Jawhar and Jinsuk Park and Mateusz Maria Jurewicz},
  booktitle = {The Thirteenth International Conference on Learning Representations},
  year = {2025},
  url = {https://openreview.net/forum?id=odjMSBSWRt},
}



In [17]:
for name, value in allpaper.items():
    print('-'*50)
    print(name)
    for name, value in value.items():
        print(name, len(value))

--------------------------------------------------
ICLR 2025 Poster
unsupervised, self-supervised, semi-supervised, and supervised representation learning 207
applications to neuroscience & cognitive science 47
applications to computer vision, audio, language, and other modalities 353
reinforcement learning 193
other topics in machine learning (i.e., none of the above) 124
interpretability and explainable AI 123
applications to physical sciences (physics, chemistry, biology, etc.) 145
datasets and benchmarks 170
foundation or frontier models, including LLMs 388
generative models 317
learning theory 115
alignment, fairness, safety, privacy, and societal considerations 271
learning on time series and dynamical systems 53
causal reasoning 27
neurosymbolic & hybrid AI systems (physics-informed, logic & formal reasoning, etc.) 23
optimization 140
applications to robotics, autonomy, planning 72
learning on graphs and other geometries & topologies 112
infrastructure, software libraries, hardw

In [21]:
bib = []
mdbib = []
bibkey = set()

for name, venue in allpaper.items():
    bib.append('% ---------------------------')
    bib.append(f'% {name}')
    bib.append('% ---------------------------\n')

    mdbib.append(f'## {name}\n')

    for name, area in venue.items():
        bib.append(f'% {name}\n')
        mdbib.append(f'### {name}\n')
        mdbib.append(f'```bibtex')

        for paper in area:
            bibtex = paper.content['_bibtex']['value']
            fields = parse_bitex(bibtex, gen_id=True, id_surfix=CONFERENCE_NAME)
            
            # resolve duplicated bibkey
            while(fields['id'] in bibkey):
                fields['id'] += '+'
                
            bibkey.add(fields['id'])

            # add abstract
            abstract = paper.content['abstract']['value'].replace('\n', '')
            fields['abstract'] = abstract

            # add to bib list
            bib.append(make_bibtex(fields))

            # add to markdown list
            mdbib.append(make_bibtex(fields))

        mdbib.append(f'```\n')

with open(f'{OUT}.bib', 'w') as f:
    f.write('\n'.join(bib))

with open(f'{OUT}.md', 'w') as f:
    f.write('\n'.join(mdbib))