# AI-Generated ICLR-Style Reviews

This notebook uses GPT-4o or DeepSeek Reasoner API to generate ICLR-style paper reviews.

Features:
- Read PDF paper files
- Generate professional reviews using AI models
- Batch process multiple papers and save to CSV format

Supported APIs:
- OpenAI GPT-4o
- DeepSeek Reasoner


## 1. Install Dependencies


In [1]:
%pip install openai PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


## 2. Import Libraries


In [2]:
from openai import OpenAI
import PyPDF2
import os
import csv
from pathlib import Path


## 3. Mount Google Drive

**Important**: Run this cell to connect to Google Drive to read PDFs and save generated reviews


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Define working directory (consistent with download.ipynb)
WORK_DIR = '/content/drive/MyDrive/Notebooks/AI_review'
print(f"✓ Google Drive mounted")
print(f"✓ Working directory: {WORK_DIR}")


Mounted at /content/drive
✓ Google Drive 已挂载
✓ 工作目录: /content/drive/MyDrive/Notebooks/AI_review


## 4. Define Core Functions


In [4]:
def read_pdf(pdf_path):
    """Extract text from PDF file"""
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() + "\n"
    return text


def get_prompt():
    """ICLR review prompt template"""
    return """As an ICLR reviewer, write a detailed review for this paper.

    Write a natural, flowing review that includes:
    - A brief summary of what the paper proposes and its main contributions
    - Discussion of strengths and weaknesses
    - Specific technical questions or concerns
    - Comments on clarity, novelty, experimental validation
    - Suggestions for improvement
    - Your overall assessment

    Be professional, constructive, and specific. Write in a natural style like real ICLR reviews,
    not in a rigid numbered format.

    Write the review in 200-400 words."""


def generate_review(api_key, pdf_path, prompt=None, api_name="deepseek"):
    """Generate review using GPT-4o or DeepSeek Reasoner API"""

    if api_name == "gpt":
        # OpenAI GPT-4o
        client = OpenAI(api_key=api_key)
        model = "gpt-4o"
    else:
        # DeepSeek Reasoner
        client = OpenAI(
            api_key=api_key,
            base_url="https://api.deepseek.com"
        )
        model = "deepseek-reasoner"

    # Read PDF
    print(f"Reading PDF: {pdf_path}")
    paper_text = read_pdf(pdf_path)

    # Limit text length
    if len(paper_text) > 100000:
        paper_text = paper_text[:100000]

    print(f"Text length: {len(paper_text)} characters")

    # Get prompt
    if prompt is None:
        prompt = get_prompt()

    # Call API
    print(f"Calling {model} API...")
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are an experienced ML conference reviewer."},
            {"role": "user", "content": f"{prompt}\n\nPaper content:\n\n{paper_text}"}
        ],
        max_tokens=4096,
        temperature=0.7
    )

    return response.choices[0].message.content


def save_review(review, output_path):
    """Save review to file"""
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(review)
    print(f"Review saved to: {output_path}")


def batch_generate_reviews(api_key, pdf_folder, output_csv, api_name="deepseek"):
    """Generate reviews for all PDFs in a folder and save to CSV"""

    # Get all PDF files
    pdf_files = list(Path(pdf_folder).glob("*.pdf"))
    total = len(pdf_files)

    print(f"Found {total} PDF files in {pdf_folder}")

    # Prepare CSV
    reviews_data = []

    for i, pdf_path in enumerate(pdf_files, 1):
        print(f"\n[{i}/{total}] Processing: {pdf_path.name}")

        # Use PDF filename as paper title (without .pdf extension)
        paper_title = pdf_path.stem

        # Generate review
        review = generate_review(api_key, str(pdf_path), api_name=api_name)

        # Store data
        reviews_data.append({
            'paper_title': paper_title,
            'review_text': review,
            'pdf_path': str(pdf_path)
        })

        print(f"✓ Completed {i}/{total}")

    # Write to CSV
    with open(output_csv, 'w', encoding='utf-8', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=['paper_title', 'review_text', 'pdf_path'])
        writer.writeheader()
        writer.writerows(reviews_data)

    print(f"\n✓ All reviews saved to: {output_csv}")


## 5. Configure Parameters and Run

**Important**: Please fill in your API Key. Paths will be automatically read from Google Drive


In [None]:
# ========== Configure Parameters ==========

# Fill in your API Key
API_KEY = ""

# Data year (should match the year downloaded in download.ipynb)
YEAR = 2020

# PDF folder path (read from Google Drive)
PDF_FOLDER = f"{WORK_DIR}/iclr_{YEAR}_data/pdfs"

# Output CSV filename (save to Google Drive)
OUTPUT_CSV = f"{WORK_DIR}/ai_review_{YEAR}.csv"

# Select API: "gpt" or "deepseek"
API_NAME = "deepseek"

print("="*60)
print("Configuration:")
print(f"PDF folder: {PDF_FOLDER}")
print(f"Output file: {OUTPUT_CSV}")
print(f"API: {API_NAME}")
print("="*60)

# ========== Run Batch Generation ==========
batch_generate_reviews(API_KEY, PDF_FOLDER, OUTPUT_CSV, api_name=API_NAME)
