## Scrape all CVPR 2026 BibTeX with abstracts

In [53]:
import requests
from bs4 import BeautifulSoup

In [54]:
url = "https://openaccess.thecvf.com/CVPR2025?day=all"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

In [55]:
import requests
from bs4 import BeautifulSoup

def scrape_paper(paper_dt):
    try:
        if (title_tag := paper_dt.find('a')):
            paper_title = title_tag.text
            paper_url = "https://openaccess.thecvf.com" + title_tag.get('href')

            paper_response = requests.get(paper_url, timeout=20)
            paper_soup = BeautifulSoup(paper_response.text, 'html.parser')

            abstract_div = paper_soup.find('div', id='abstract')
            paper_abstract = abstract_div.text.strip() if abstract_div else ''

            paper_bibtex_div = paper_soup.find('div', class_='bibref')
            paper_bibtex = paper_bibtex_div.text.strip() if paper_bibtex_div else ''

            return paper_title, {
                'url': paper_url,
                'abstract': paper_abstract,
                'bibtex': paper_bibtex,
                'pub_type' : 'poster',
            }
    except Exception as e:
        print(f"Error scraping {paper_dt}: {e}")
    return None

In [56]:
from tqdm.auto import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

paper_dts = soup.find_all('dt', class_='ptitle')
paper_dict = {}

with ThreadPoolExecutor(max_workers=16) as executor:
    # submit all jobs
    futures = [executor.submit(scrape_paper, dt) for dt in paper_dts]
    # progress bar and collect results
    for f in tqdm(as_completed(futures), total=len(futures)):
        result = f.result()
        if result:
            paper_title, paper_info = result
            paper_dict[paper_title] = paper_info

100%|██████████| 2871/2871 [02:02<00:00, 23.35it/s]


In [57]:
print(paper_dict['Hyperbolic Category Discovery']['bibtex'])

@InProceedings{Liu_2025_CVPR,
    author    = {Liu, Yuanpei and He, Zhenqi and Han, Kai},
    title     = {Hyperbolic Category Discovery},
    booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)},
    month     = {June},
    year      = {2025},
    pages     = {9891-9900}
}


## Scrape oral paper titles

In [58]:
url = "https://cvpr.thecvf.com/virtual/2025/calendar"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

In [59]:
oral_session_divs = soup.find_all('div', class_='oral-session')
oral_dict = {}

for session_div in oral_session_divs:
    # scrape all oral session
    if (title_tag := session_div.find('a')):
        title_text = title_tag.text.replace('\n', '').replace('  ', '').strip()
        title_text = title_text.split('[')[0]
        oral_dict[title_text] = []

        # scrape papers in the oral session
        papers = session_div.find_all('div', class_='content oral')
        for paper in papers:
            paper_title = paper.text.strip().replace('\n', '').replace('  ', '').strip()
            paper_title = paper_title.split(']')[-1].strip()
            oral_dict[title_text].append(paper_title)

oral_dict

{'Oral Session 1A: Image and Video Synthesis': ['Motion Prompting: Controlling Video Generation with Motion Trajectories',
  'Go-with-the-Flow: Motion-Controllable Video Diffusion Models Using Real-Time Warped Noise',
  'LookingGlass: Generative Anamorphoses via Laplacian Pyramid Warping',
  'Alias-Free Latent Diffusion Models: Improving Fractional Shift Equivariance of Diffusion Latent Space',
  'RandAR: Decoder-only Autoregressive Visual Generation in Random Orders'],
 'Oral Session 1B: Interpretability and Evaluation': ['OpenING: A Comprehensive Benchmark for Judging Open-ended Interleaved Image-Text Generation',
  'LibraGrad: Balancing Gradient Flow for Universally Better Vision Transformer Attributions',
  'Do We Always Need the Simplicity Bias? Looking for Optimal Inductive Biases in the Wild',
  'Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Vision-Language Models',
  'Rethinking Vision-Language Model in Face Forensics: Multi-Modal Interpretable Forged Face De

In [60]:
oral_dict['Oral Session 1A: Image and Video Synthesis']

['Motion Prompting: Controlling Video Generation with Motion Trajectories',
 'Go-with-the-Flow: Motion-Controllable Video Diffusion Models Using Real-Time Warped Noise',
 'LookingGlass: Generative Anamorphoses via Laplacian Pyramid Warping',
 'Alias-Free Latent Diffusion Models: Improving Fractional Shift Equivariance of Diffusion Latent Space',
 'RandAR: Decoder-only Autoregressive Visual Generation in Random Orders']

In [61]:
total = 0

for session, papers in oral_dict.items():
    print(f"{session} -> {len(papers)} papers")
    total += len(papers)

print('-'*100)
print(f"total -> {total}")

Oral Session 1A: Image and Video Synthesis -> 5 papers
Oral Session 1B: Interpretability and Evaluation -> 5 papers
Oral Session 1C: Image Processing and Deep Architectures -> 5 papers
Oral Session 2A: 3D Computer Vision -> 6 papers
Oral Session 2B: Human Motion -> 6 papers
Oral Session 2C: Temporal Modeling and Action Recognition -> 6 papers
Oral Session 3A: 3D Computer Vision -> 5 papers
Oral Session 3B: Multimodal Computer Vision -> 5 papers
Oral Session 3C: Vision and Language -> 5 papers
Oral Session 4A: Image and Video Synthesis -> 5 papers
Oral Session 4B: Embodied Computer Vision -> 5 papers
Oral Session 4C: 3D Computer Vision -> 5 papers
Oral Session 5A: Generative AI -> 5 papers
Oral Session 5B: Learning Systems and Medical Applications -> 5 papers
Oral Session 5C: Visual and Spatial Computing -> 5 papers
Oral Session 6A: 3D from Single or Multi-View Sensors -> 5 papers
Oral Session 6B: Scene Understanding, Image Editing and Multimodal Learning -> 6 papers
Oral Session 6C: Vi

## Scrape highlight paper titles

In [62]:
url = "https://cvpr.thecvf.com/virtual/2025/awards_detail"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

In [63]:
highlight_divs = soup.find_all('div', class_='virtual-card')
highlight_ls = []

for highlight_div in highlight_divs:
    # scrape all oral session
    if (title_tag := highlight_div.find('a')):
        title_text = title_tag.text.strip()
        highlight_ls.append(title_text)

highlight_ls

['Learning Class Prototypes for Unified Sparse-Supervised 3D Object Detection',
 'Scene-Centric Unsupervised Panoptic Segmentation',
 'CASAGPT: Cuboid Arrangement and Scene Assembly for Interior Design',
 'Multi-modal Vision Pre-training for Medical Image Analysis',
 'SpecTRe-GS: Modeling Highly Specular Surfaces with Reflected Nearby Objects by Tracing Rays in 3D Gaussian Splatting',
 'High-fidelity 3D Object Generation from Single Image with RGBN-Volume Gaussian Reconstruction Model',
 'End-to-End HOI Reconstruction Transformer with Graph-based Encoding',
 'FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation',
 'Octopus: Alleviating Hallucination via Dynamic Contrastive Decoding',
 'SoMA: Singular Value Decomposed Minor Components Adaptation for Domain Generalizable Representation Learning',
 'Structure from Collision',
 'You See it, You Got it: Learning 3D Creation on Pose-Free Videos at Scale',
 'Volumetrically Consistent 3D Gaussian Rasterization

In [64]:
len(highlight_ls)

388

## Export

### BibTeX utils

In [65]:
import re

def parse_bitex(bibtex_data, gen_id=False, id_surfix='', lower_case_type=True):
    entry_regex = re.compile(r'@([a-zA-Z]+){([^,]+),(.*)}', re.DOTALL)
    match = entry_regex.search(bibtex_data)

    if not match:
        raise ValueError("Invalid BibTeX data")

    fields = {
        'type': match.group(1),
        'id': match.group(2)
    }
    fields_str = match.group(3)

    mode = 'key'
    store = ''
    max_layer = 0
    stack = []
    keys = []
    values = []

    for idx, char in enumerate(fields_str):
        if mode == 'key':
            if char == '=':
                keys.append(store.strip())
                store = ''
                mode = 'value'
            else:
                store += char
        elif mode == 'value':
            store += char

            if char == '{':
                stack.append(char)
                max_layer += 1
                if max_layer == 1:
                    store = ''
            elif char == '}':
                stack.pop()
                if not stack:
                    store = store[:-1]

            if (max_layer > 0 and not stack) or (max_layer == 0 and (char in ',}' or idx == len(fields_str) - 1)):
                value = store.strip().replace(": ", "{:} ")
                if value.startswith('{') or value.endswith('}'):
                    value = f'"{value}"'
                values.append(value)
                store = ''
                max_layer = 0
                mode = 'key'

    for key, value in zip(keys, values):
        fields[key.lower().replace('\n', '').replace(',', '').strip()] = value

    if gen_id and 'author' in fields:
        authors = fields['author'].split(' and ')
        first_author = authors[0]
        if ',' in first_author:
            last_name, first_name = map(str.strip, first_author.split(','))
        else:
            name_parts = list(map(str.strip, first_author.split()))
            last_name = name_parts.pop()
            first_name = ' '.join(name_parts)

        fields['id'] = re.sub(r'[^a-zA-Z0-9]', '', f"{first_name}{last_name}{fields.get('year', '')}") + id_surfix

    if lower_case_type:
        fields['type'] = fields['type'].lower()

    return fields

fields = parse_bitex(paper_dict['Hyperbolic Category Discovery']['bibtex'], gen_id=True, id_surfix='CVPR')
fields

{'type': 'inproceedings',
 'id': 'YuanpeiLiu2025CVPR',
 'author': 'Liu, Yuanpei and He, Zhenqi and Han, Kai',
 'title': 'Hyperbolic Category Discovery',
 'booktitle': 'Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)',
 'month': 'June',
 'year': '2025',
 'pages': '9891-9900'}

In [66]:
def make_bibtex(fields):
    # generate bibtex from a dict
    bibtex = '@' + fields['type'] + '{' + fields['id'] + ',\n'

    for key, value in fields.items():
        if key not in ['type', 'id']:
            bibtex += '  ' + key + ' = ' + '{' + value + '},\n'
    bibtex += "}\n"
    return bibtex

print(make_bibtex(fields))

@inproceedings{YuanpeiLiu2025CVPR,
  author = {Liu, Yuanpei and He, Zhenqi and Han, Kai},
  title = {Hyperbolic Category Discovery},
  booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)},
  month = {June},
  year = {2025},
  pages = {9891-9900},
}



### Oral

_P.S. The unmatched papers need to be manually placed to the oral section from the poster section._

In [87]:
bib = []
mdbib = []
bibkey = set()

mdbib.append('## CVPR 2025 Oral\n')

bib.append('% ---------------------------')
bib.append(f'% CVPR 2025 Oral')
bib.append('% ---------------------------\n')

for session, papers in oral_dict.items():
    mdbib.append(f"### {session}\n")
    mdbib.append(f'```bibtex')

    for paper in papers:
        try:
            # get bibtex from paper_dict & mark publication type
            bibtex = paper_dict[paper]['bibtex']
            paper_dict[paper]['pub_type'] = 'oral'

            # parse bibtex
            fields = parse_bitex(bibtex, gen_id=True, id_surfix='CVPR')
            fields['url'] = paper_dict[paper]['url']
            fields['abstract'] = paper_dict[paper]['abstract']
            
            # resolve duplicated bibkey
            while(fields['id'] in bibkey):
                fields['id'] += '+'
            bibkey.add(fields['id'])

            # append to mdbib
            mdbib.append(make_bibtex(fields))

            # append to bib (w/o abstract)
            bib.append(make_bibtex({
                key: fields[key]
                for key in fields.keys() if key != 'abstract'
            }))
            
        except KeyError:
            print(f"unmatched: {session} -> {paper}'")
            print('-'*100)
            continue

    mdbib.append(f'```\n')

unmatched: Oral Session 2C: Temporal Modeling and Action Recognition -> Rethinking Spiking Self-Attention Mechanism: Implementing α-XNOR Similarity Calculation in Spiking Transformers'
----------------------------------------------------------------------------------------------------
unmatched: Oral Session 4A: Image and Video Synthesis -> Infinity∞: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis'
----------------------------------------------------------------------------------------------------
unmatched: Oral Session 5C: Visual and Spatial Computing -> Gromov–Wasserstein Problem with Cyclic Symmetry'
----------------------------------------------------------------------------------------------------


### Highlight

_P.S. The unmatched papers need to be manually placed to the oral section from the poster section._

In [88]:
mdbib.append('## CVPR 2025 Highlight\n')

bib.append('% ---------------------------')
bib.append(f'% CVPR 2025 Highlight')
bib.append('% ---------------------------\n')

for paper in highlight_ls:
    try:
        # get bibtex from paper_dict & mark publication type
        bibtex = paper_dict[paper]['bibtex']
        paper_dict[paper]['pub_type'] = 'highlight'
        
        # parse bibtex
        fields = parse_bitex(bibtex, gen_id=True, id_surfix='CVPR')
        fields['url'] = paper_dict[paper]['url']
        fields['abstract'] = paper_dict[paper]['abstract']

        # resolve duplicated bibkey
        while(fields['id'] in bibkey):
            fields['id'] += '+'
        bibkey.add(fields['id'])

        # append to mdbib
        mdbib.append(f'```bibtex')
        mdbib.append(make_bibtex(fields))
        mdbib.append(f'```\n')

        # append to bib (w/o abstract)
        bib.append(make_bibtex({
            key: fields[key]
            for key in fields.keys() if key != 'abstract'
        }))

    except KeyError:
        print(f"unmatched: {paper}'")
        print('-'*100)
        continue

unmatched: Doppelgängers and Adversarial Vulnerability'
----------------------------------------------------------------------------------------------------
unmatched: Estimating Body and Hand Motion in an Ego‑sensed World'
----------------------------------------------------------------------------------------------------


### Poster

In [89]:
paper

'Less is More: Efficient Model Merging with Binary Task Switch'

In [90]:
mdbib.append('## CVPR 2025 Poster\n')

bib.append('% ---------------------------')
bib.append(f'% CVPR 2025 Poster')
bib.append('% ---------------------------\n')

for paper in paper_dict.values():
    if paper['pub_type'] == 'poster':
        # get bibtex from paper_dict
        bibtex = paper['bibtex']
        
        # parse bibtex
        fields = parse_bitex(bibtex, gen_id=True, id_surfix='CVPR')
        fields['url'] = paper['url']
        fields['abstract'] = paper['abstract']

        # resolve duplicated bibkey
        while(fields['id'] in bibkey):
            fields['id'] += '+'
        bibkey.add(fields['id'])

        # append to mdbib
        mdbib.append(f'```bibtex')
        mdbib.append(make_bibtex(fields))
        mdbib.append(f'```\n')

        # append to bib (w/o abstract)
        bib.append(make_bibtex({
            key: fields[key]
            for key in fields.keys() if key != 'abstract'
        }))

## Export to disk

In [91]:
with open(f'output/cvpr2025.md', 'w') as f:
    f.write('\n'.join(mdbib))

In [92]:
with open(f'output/cvpr2025.bib', 'w') as f:
    f.write('\n'.join(bib))