In [1]:
import openreview
import os
import json

### Helper functions

In [2]:
def clean_list(papers_list):
    if ".DS_Store" in papers_list:
        papers_list.remove(".DS_Store")
    return papers_list

def get_openreview_ids(venue):
    accepted = clean_list(os.listdir(os.path.join(venue, "accepted")))
    rejected = clean_list(os.listdir(os.path.join(venue, "rejected")))

    accepted_ids = [paper.split("_")[0] for paper in accepted]
    rejected_ids = [paper.split("_")[0] for paper in rejected]

    return {
        "accepted": accepted_ids,
        "rejected": rejected_ids
    }

def get_reviews(openreview_id):
    client = openreview.api.OpenReviewClient(baseurl='https://api2.openreview.net')
    all_notes = client.get_notes(forum=openreview_id, details='replies')

    return all_notes


In [3]:
def group_reviews_and_replies(paper_review):
    review_threads = {}
    all_replies = {}

    if 'details' not in paper_review or 'replies' not in paper_review['details']:
        print("The provided JSON does not have the expected structure ('details' -> 'replies').")
        return review_threads

    # Step 1: Index all replies
    for post in paper_review['details']['replies']:
        post_id = post.get('id')
        if not post_id:
            continue
        all_replies[post_id] = post

    # Step 2: Identify top-level reviews (direct reply to forum + has rating)
    for post_id, post in all_replies.items():
        is_review = (post.get('replyto') == paper_review.get('forum') and 
                     'rating' in post.get('content', {}))
        if is_review:
            review_threads[post_id] = {
                'review': post,
                'replies': []
            }

    # Step 3: Build parent → children mapping
    parent_to_children = {}
    for post in all_replies.values():
        parent_id = post.get('replyto')
        if parent_id:
            parent_to_children.setdefault(parent_id, []).append(post)

    # Step 4: Recursively collect all replies under a review
    def collect_thread_replies(parent_id, visited=None):
        if visited is None:
            visited = set()
        replies = parent_to_children.get(parent_id, [])
        replies.sort(key=lambda r: r.get('cdate', 0))
        thread = []
        for reply in replies:
            if reply['id'] in visited:
                continue
            visited.add(reply['id'])
            thread.append(reply)
            thread.extend(collect_thread_replies(reply['id'], visited))
        return thread

    # Step 5: Assign threaded replies to each top-level review
    visited = set()
    for review_id in review_threads:
        review_threads[review_id]['replies'] = collect_thread_replies(review_id, visited)

    return review_threads


In [4]:
def save_review(paper, review_dict, review_dir):
    file_path = os.path.join(review_dir, f"{paper}.json")
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(review_dict, f, ensure_ascii=False, indent=2)


def process_papers(papers, reviews_dir):
    for paper in papers:
        try:
            reviews = get_reviews(paper)
            
            main_review = None
            for review in reviews:
                if review.replyto is None:
                    main_review = review

            if not main_review:
                raise ValueError(f"Main review not found for {paper}")

            processed_review = group_reviews_and_replies(review.__dict__)
            save_review(paper, processed_review, reviews_dir)
        except Exception as e:
            print(e)


### Run Code

In [5]:
iclr_2025 = get_openreview_ids("ICLR2025")
process_papers(iclr_2025["accepted"], reviews_dir="ICLR2025_Reviews_Raw/accepted")
process_papers(iclr_2025["rejected"], reviews_dir="ICLR2025_Reviews_Raw/rejected")