# Summarize Research Papers

Get concise summaries of academic PDFs automatically.


In [2]:
%pip install -qU pixeltable anthropic


Note: you may need to restart the kernel to use updated packages.


In [3]:
import os, getpass
if 'ANTHROPIC_API_KEY' not in os.environ:
    os.environ['ANTHROPIC_API_KEY'] = getpass.getpass('Anthropic API Key:')


In [None]:
import pixeltable as pxt
from pixeltable.functions import anthropic


In [None]:
# Step 1: Create papers table
pxt.create_dir('research', if_exists='ignore')
papers = pxt.create_table('research.papers', {'pdf': pxt.Document, 'title': pxt.String}, if_exists='ignore')
papers.insert([{'pdf': 'https://raw.githubusercontent.com/pixeltable/pixeltable/release/docs/resources/rag-demo/Zacks-Nvidia-Report.pdf', 
                'title': 'Nvidia Report'}])


Connected to Pixeltable database at: postgresql+psycopg://postgres:@/pixeltable?host=/Users/anushas-pxt/.pixeltable/pgdata
Created directory 'research'.
Created table 'papers'.


Error: Failed to download https://arxiv.org/pdf/sample.pdf: HTTP Error 404: Not Found

In [None]:
# Step 2: Auto-generate summaries with Claude
papers.add_computed_column(if_exists='ignore',
    summary=anthropic.messages(
        model='claude-3-haiku-20240307',
        max_tokens=300,
        messages=[{'role': 'user', 'content': 
            f"Summarize this research paper in 3-4 sentences, focusing on: "
            f"1) Main contribution 2) Methodology 3) Key findings.\n\nDocument: {papers.pdf}"}]
    ).content[0].text
)


NameError: name 'papers' is not defined

In [None]:
# Step 3: View summaries
papers.select(papers.title, papers.summary).collect()


**What's Happening:**
- PDF documents stored in table
- Claude processes document directly
- Summary computed on insert
- Incremental: add papers, get summaries automatically

**Variation:** Extract specific sections:
```python
# Get methodology only
papers.add_computed_column(
    methodology=anthropic.messages(
        messages=[{'role': 'user', 'content': 
            f"Extract only the methodology section: {papers.pdf}"}]
    ).content[0].text
)
```

**Next:** `answer-questions-from-docs.ipynb` • `find-relevant-contract-clauses.ipynb`
