From 8c05e64f51460f2dd0587146c0e46be05a1aea51 Mon Sep 17 00:00:00 2001
From: Zeke Sikelianos <zeke@sikelianos.com>
Date: Mon, 22 Sep 2025 17:36:02 -0700
Subject: [PATCH 1/8] feat: add api_token parameter support for legacy
 compatibility

Adds support for the legacy api_token parameter in both Replicate and
AsyncReplicate client initialization as an alternative to bearer_token.

This enables backward compatibility with v1.x client code that uses:
- Client(api_token="...")
- AsyncClient(api_token="...")

The implementation:
- Accepts both api_token and bearer_token parameters
- Raises clear error if both are provided
- Maps api_token to bearer_token internally
- Maintains existing environment variable behavior
- Includes comprehensive test coverage
---
 src/replicate/_client.py              | 22 +++++++
 tests/test_api_token_compatibility.py | 89 +++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)
 create mode 100644 tests/test_api_token_compatibility.py

diff --git a/src/replicate/_client.py b/src/replicate/_client.py
index 390a552..237cd87 100644
--- a/src/replicate/_client.py
+++ b/src/replicate/_client.py
@@ -102,6 +102,7 @@ def __init__(
         self,
         *,
         bearer_token: str | None = None,
+        api_token: str | None = None,  # Legacy compatibility parameter
         base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -124,7 +125,17 @@ def __init__(
         """Construct a new synchronous Replicate client instance.
 
         This automatically infers the `bearer_token` argument from the `REPLICATE_API_TOKEN` environment variable if it is not provided.
+
+        For legacy compatibility, you can also pass `api_token` instead of `bearer_token`.
         """
+        # Handle legacy api_token parameter
+        if api_token is not None and bearer_token is not None:
+            raise ReplicateError(
+                "Cannot specify both 'bearer_token' and 'api_token'. Please use 'bearer_token' (recommended) or 'api_token' for legacy compatibility."
+            )
+        if api_token is not None:
+            bearer_token = api_token
+
         if bearer_token is None:
             bearer_token = _get_api_token_from_environment()
         if bearer_token is None:
@@ -477,6 +488,7 @@ def __init__(
         self,
         *,
         bearer_token: str | None = None,
+        api_token: str | None = None,  # Legacy compatibility parameter
         base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
@@ -499,7 +511,17 @@ def __init__(
         """Construct a new async AsyncReplicate client instance.
 
         This automatically infers the `bearer_token` argument from the `REPLICATE_API_TOKEN` environment variable if it is not provided.
+
+        For legacy compatibility, you can also pass `api_token` instead of `bearer_token`.
         """
+        # Handle legacy api_token parameter
+        if api_token is not None and bearer_token is not None:
+            raise ReplicateError(
+                "Cannot specify both 'bearer_token' and 'api_token'. Please use 'bearer_token' (recommended) or 'api_token' for legacy compatibility."
+            )
+        if api_token is not None:
+            bearer_token = api_token
+
         if bearer_token is None:
             bearer_token = _get_api_token_from_environment()
         if bearer_token is None:
diff --git a/tests/test_api_token_compatibility.py b/tests/test_api_token_compatibility.py
new file mode 100644
index 0000000..3d2ef51
--- /dev/null
+++ b/tests/test_api_token_compatibility.py
@@ -0,0 +1,89 @@
+"""Tests for api_token legacy compatibility during client instantiation."""
+
+from __future__ import annotations
+
+import os
+import pytest
+
+from replicate import Replicate, AsyncReplicate, ReplicateError
+from replicate._client import Client
+
+
+class TestApiTokenCompatibility:
+    """Test that api_token parameter works as a legacy compatibility option."""
+
+    def test_sync_client_with_api_token(self) -> None:
+        """Test that Replicate accepts api_token parameter."""
+        client = Replicate(api_token="test_token_123")
+        assert client.bearer_token == "test_token_123"
+
+    def test_async_client_with_api_token(self) -> None:
+        """Test that AsyncReplicate accepts api_token parameter."""
+        client = AsyncReplicate(api_token="test_token_123")
+        assert client.bearer_token == "test_token_123"
+
+    def test_sync_client_with_bearer_token(self) -> None:
+        """Test that Replicate still accepts bearer_token parameter."""
+        client = Replicate(bearer_token="test_token_123")
+        assert client.bearer_token == "test_token_123"
+
+    def test_async_client_with_bearer_token(self) -> None:
+        """Test that AsyncReplicate still accepts bearer_token parameter."""
+        client = AsyncReplicate(bearer_token="test_token_123")
+        assert client.bearer_token == "test_token_123"
+
+    def test_sync_client_both_tokens_error(self) -> None:
+        """Test that providing both api_token and bearer_token raises an error."""
+        with pytest.raises(ReplicateError, match="Cannot specify both 'bearer_token' and 'api_token'"):
+            Replicate(api_token="test_api", bearer_token="test_bearer")
+
+    def test_async_client_both_tokens_error(self) -> None:
+        """Test that providing both api_token and bearer_token raises an error."""
+        with pytest.raises(ReplicateError, match="Cannot specify both 'bearer_token' and 'api_token'"):
+            AsyncReplicate(api_token="test_api", bearer_token="test_bearer")
+
+    def test_sync_client_no_token_with_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Test that client reads from environment when no token is provided."""
+        monkeypatch.setenv("REPLICATE_API_TOKEN", "env_token_123")
+        client = Replicate()
+        assert client.bearer_token == "env_token_123"
+
+    def test_async_client_no_token_with_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Test that async client reads from environment when no token is provided."""
+        monkeypatch.setenv("REPLICATE_API_TOKEN", "env_token_123")
+        client = AsyncReplicate()
+        assert client.bearer_token == "env_token_123"
+
+    def test_sync_client_no_token_no_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Test that client raises error when no token is provided and env is not set."""
+        monkeypatch.delenv("REPLICATE_API_TOKEN", raising=False)
+        with pytest.raises(ReplicateError, match="The bearer_token client option must be set"):
+            Replicate()
+
+    def test_async_client_no_token_no_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Test that async client raises error when no token is provided and env is not set."""
+        monkeypatch.delenv("REPLICATE_API_TOKEN", raising=False)
+        with pytest.raises(ReplicateError, match="The bearer_token client option must be set"):
+            AsyncReplicate()
+
+    def test_legacy_client_alias(self) -> None:
+        """Test that legacy Client import still works as an alias."""
+        assert Client is Replicate
+
+    def test_legacy_client_with_api_token(self) -> None:
+        """Test that legacy Client alias works with api_token parameter."""
+        client = Client(api_token="test_token_123")
+        assert client.bearer_token == "test_token_123"
+        assert isinstance(client, Replicate)
+
+    def test_api_token_overrides_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Test that explicit api_token overrides environment variable."""
+        monkeypatch.setenv("REPLICATE_API_TOKEN", "env_token")
+        client = Replicate(api_token="explicit_token")
+        assert client.bearer_token == "explicit_token"
+
+    def test_bearer_token_overrides_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Test that explicit bearer_token overrides environment variable."""
+        monkeypatch.setenv("REPLICATE_API_TOKEN", "env_token")
+        client = Replicate(bearer_token="explicit_token")
+        assert client.bearer_token == "explicit_token"
\ No newline at end of file

From 905252e587684f90aaeab2c69abc65c197b0f01a Mon Sep 17 00:00:00 2001
From: Zeke Sikelianos <zeke@sikelianos.com>
Date: Tue, 23 Sep 2025 09:03:56 -0700
Subject: [PATCH 2/8] docs: improve API reference documentation with
 comprehensive client examples

This PR enhances the api.md documentation to provide more useful client reference information including:
- Quick start guide with immediate examples
- Detailed client initialization for both sync and async clients
- Comprehensive coverage of all high-level methods (run, stream, use, search)
- Complete documentation of core resources with code examples
- File handling patterns for input and output
- Error handling with full exception hierarchy
- Pagination examples (automatic and manual)
- Advanced features like raw response access and custom HTTP clients
- Async/await support with examples
- Common patterns for real-world usage
- Migration guide from v0.x to v1.0+

Fixes https://linear.app/replicate/issue/DP-660/generate-new-apimd-with-more-useful-client-reference
---
 api.md | 802 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 703 insertions(+), 99 deletions(-)

diff --git a/api.md b/api.md
index d037762..298594b 100644
--- a/api.md
+++ b/api.md
@@ -1,193 +1,797 @@
-# Replicate
+# Replicate Python SDK API Reference
 
-Types:
+## Installation
 
-```python
-from replicate.types import SearchResponse
+```bash
+pip install replicate
 ```
 
-Methods:
+## Quick Start
+
+```python
+import replicate
+
+# Initialize the client (uses REPLICATE_API_TOKEN env var by default)
+client = replicate.Replicate()
 
-- <code title="get /search">replicate.<a href="./src/replicate/_client.py">search</a>(\*\*<a href="src/replicate/types/client_search_params.py">params</a>) -> <a href="./src/replicate/types/search_response.py">SearchResponse</a></code>
+# Run a model
+output = client.run(
+    "black-forest-labs/flux-schnell",
+    input={"prompt": "astronaut on a horse"}
+)
+print(output)
+```
 
-# Collections
+## Client Initialization
 
-Types:
+### Synchronous Client
 
 ```python
-from replicate.types import CollectionListResponse, CollectionGetResponse
+from replicate import Replicate
+
+# Using environment variable (REPLICATE_API_TOKEN)
+client = Replicate()
+
+# With explicit token
+client = Replicate(bearer_token="your_api_token")
+
+# Legacy token parameter (for compatibility)
+client = Replicate(api_token="your_api_token")
+
+# With custom configuration
+client = Replicate(
+    bearer_token="your_api_token",
+    base_url="https://api.replicate.com/v1",  # Optional custom base URL
+    timeout=120.0,  # Request timeout in seconds
+    max_retries=5  # Maximum number of retries
+)
 ```
 
-Methods:
+### Asynchronous Client
 
-- <code title="get /collections">replicate.collections.<a href="./src/replicate/resources/collections.py">list</a>() -> <a href="./src/replicate/types/collection_list_response.py">SyncCursorURLPage[CollectionListResponse]</a></code>
-- <code title="get /collections/{collection_slug}">replicate.collections.<a href="./src/replicate/resources/collections.py">get</a>(\*, collection_slug) -> <a href="./src/replicate/types/collection_get_response.py">CollectionGetResponse</a></code>
+```python
+from replicate import AsyncReplicate
+import asyncio
+
+async def main():
+    client = AsyncReplicate(bearer_token="your_api_token")
+    output = await client.run(
+        "stability-ai/stable-diffusion",
+        input={"prompt": "a watercolor painting"}
+    )
+    print(output)
+
+asyncio.run(main())
+```
+
+## High-Level Methods
 
-# Deployments
+### run() - Run a Model
 
-Types:
+The simplest way to run a model and get output.
 
 ```python
-from replicate.types import (
-    DeploymentCreateResponse,
-    DeploymentUpdateResponse,
-    DeploymentListResponse,
-    DeploymentGetResponse,
+# Basic usage - returns output when complete
+output = client.run(
+    "stability-ai/sdxl:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b",
+    input={"prompt": "a 19th century portrait of a wombat gentleman"}
+)
+
+# With options
+output = client.run(
+    "meta/llama-2-70b-chat",
+    input={
+        "prompt": "Write a poem about machine learning",
+        "max_new_tokens": 500,
+        "temperature": 0.7
+    },
+    wait=30,  # Wait up to 30 seconds for completion (or True for unlimited)
+    use_file_output=True,  # Return files as FileOutput objects
+    file_encoding_strategy="base64"  # Encode input files as base64 (or "url")
 )
+
+# Model reference formats
+client.run("owner/name:version", input={})  # Specific version
+client.run("owner/name", input={})  # Latest version
+client.run("5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9bf15ccaa", input={})  # Version ID
 ```
 
-Methods:
+### stream() - Stream Model Output
+
+For models that support streaming (like language models).
+
+```python
+# Stream text output
+for event in client.stream(
+    "meta/llama-2-70b-chat",
+    input={
+        "prompt": "Tell me a story about a robot",
+        "max_new_tokens": 1000
+    }
+):
+    print(str(event), end="")
+
+# Async streaming
+async for event in async_client.stream("meta/llama-2-70b-chat", input={"prompt": "Hello"}):
+    print(str(event), end="")
+```
+
+### use() - Create a Reusable Model Function
+
+Experimental feature for creating reusable model functions.
+
+```python
+# Create a model function
+stable_diffusion = client.use("stability-ai/stable-diffusion")
+
+# Use it multiple times
+image1 = stable_diffusion(prompt="a cat in a hat")
+image2 = stable_diffusion(prompt="a dog in sunglasses")
+
+# With streaming models
+llama = client.use("meta/llama-2-70b-chat", streaming=True)
+for chunk in llama(prompt="Explain quantum computing"):
+    print(chunk, end="")
+```
+
+### search() - Search Models
+
+```python
+# Search for models
+results = client.search(query="image generation", limit=10)
+
+for model in results:
+    print(f"{model.owner}/{model.name}: {model.description}")
+```
+
+## Core Resources
+
+### Predictions
+
+Create and manage model predictions.
+
+```python
+from replicate.types import Prediction
+
+# Create a prediction
+prediction = client.predictions.create(
+    model="owner/model:version",
+    input={"prompt": "hello world"},
+    webhook="https://example.com/webhook",  # Optional webhook URL
+    webhook_events_filter=["start", "completed"]  # Optional webhook events
+)
+
+# Get prediction status
+prediction = client.predictions.get(prediction_id="abc123")
+print(f"Status: {prediction.status}")
+print(f"Output: {prediction.output}")
+
+# Cancel a prediction
+cancelled = client.predictions.cancel(prediction_id="abc123")
 
-- <code title="post /deployments">replicate.deployments.<a href="./src/replicate/resources/deployments/deployments.py">create</a>(\*\*<a href="src/replicate/types/deployment_create_params.py">params</a>) -> <a href="./src/replicate/types/deployment_create_response.py">DeploymentCreateResponse</a></code>
-- <code title="patch /deployments/{deployment_owner}/{deployment_name}">replicate.deployments.<a href="./src/replicate/resources/deployments/deployments.py">update</a>(\*, deployment_owner, deployment_name, \*\*<a href="src/replicate/types/deployment_update_params.py">params</a>) -> <a href="./src/replicate/types/deployment_update_response.py">DeploymentUpdateResponse</a></code>
-- <code title="get /deployments">replicate.deployments.<a href="./src/replicate/resources/deployments/deployments.py">list</a>() -> <a href="./src/replicate/types/deployment_list_response.py">SyncCursorURLPage[DeploymentListResponse]</a></code>
-- <code title="delete /deployments/{deployment_owner}/{deployment_name}">replicate.deployments.<a href="./src/replicate/resources/deployments/deployments.py">delete</a>(\*, deployment_owner, deployment_name) -> None</code>
-- <code title="get /deployments/{deployment_owner}/{deployment_name}">replicate.deployments.<a href="./src/replicate/resources/deployments/deployments.py">get</a>(\*, deployment_owner, deployment_name) -> <a href="./src/replicate/types/deployment_get_response.py">DeploymentGetResponse</a></code>
+# List predictions
+for prediction in client.predictions.list():
+    print(f"{prediction.id}: {prediction.status}")
 
-## Predictions
+# Wait for a prediction to complete
+completed = client.predictions.wait(
+    prediction_id="abc123",
+    timeout=60  # Optional timeout in seconds
+)
+```
+
+### Models
 
-Methods:
+Interact with models and their versions.
 
-- <code title="post /deployments/{deployment_owner}/{deployment_name}/predictions">replicate.deployments.predictions.<a href="./src/replicate/resources/deployments/predictions.py">create</a>(\*, deployment_owner, deployment_name, \*\*<a href="src/replicate/types/deployments/prediction_create_params.py">params</a>) -> <a href="./src/replicate/types/prediction.py">Prediction</a></code>
+```python
+# Get a specific model
+model = client.models.get(model_owner="stability-ai", model_name="stable-diffusion")
+print(f"Model: {model.owner}/{model.name}")
+print(f"Description: {model.description}")
+print(f"Latest version: {model.latest_version.id}")
+
+# List all models (with pagination)
+for model in client.models.list():
+    print(f"{model.owner}/{model.name}")
+
+# Search models
+for model in client.models.search(query="text generation"):
+    print(f"{model.owner}/{model.name}: {model.description}")
+
+# Create a new model
+model = client.models.create(
+    owner="your-username",
+    name="my-model",
+    visibility="public",  # or "private"
+    hardware="gpu-t4",  # Specify hardware requirements
+    description="My custom model",
+    github_url="https://github.com/user/repo"
+)
 
-# Hardware
+# Delete a model
+client.models.delete(model_owner="your-username", model_name="my-model")
+```
 
-Types:
+#### Model Versions
 
 ```python
-from replicate.types import HardwareListResponse
+# List model versions
+for version in client.models.versions.list(
+    model_owner="stability-ai",
+    model_name="stable-diffusion"
+):
+    print(f"Version {version.id}: created at {version.created_at}")
+
+# Get a specific version
+version = client.models.versions.get(
+    model_owner="stability-ai",
+    model_name="stable-diffusion",
+    version_id="db21e45d3f7023abc2a46ee38a23973f6dce16bb082a930b0c49861f96d1e5bf"
+)
+
+# Delete a version
+client.models.versions.delete(
+    model_owner="your-username",
+    model_name="my-model",
+    version_id="version-id"
+)
 ```
 
-Methods:
+#### Model Predictions
 
-- <code title="get /hardware">replicate.hardware.<a href="./src/replicate/resources/hardware.py">list</a>() -> <a href="./src/replicate/types/hardware_list_response.py">HardwareListResponse</a></code>
+Run predictions directly through a model.
 
-# Account
+```python
+# Create a prediction for a specific model
+prediction = client.models.predictions.create(
+    model_owner="stability-ai",
+    model_name="stable-diffusion",
+    input={"prompt": "a beautiful landscape"}
+)
+```
 
-Types:
+#### Model Examples
 
 ```python
-from replicate.types import AccountGetResponse
+# Get example predictions for a model
+for example in client.models.examples.list(
+    model_owner="stability-ai",
+    model_name="stable-diffusion"
+):
+    print(f"Example input: {example.input}")
+    print(f"Example output: {example.output}")
 ```
 
-Methods:
+### Deployments
+
+Manage model deployments for production use.
+
+```python
+# Create a deployment
+deployment = client.deployments.create(
+    name="my-deployment",
+    model="owner/model:version",
+    hardware="gpu-a100-large",
+    min_instances=1,
+    max_instances=10
+)
+
+# List deployments
+for deployment in client.deployments.list():
+    print(f"{deployment.owner}/{deployment.name}")
 
-- <code title="get /account">replicate.account.<a href="./src/replicate/resources/account.py">get</a>() -> <a href="./src/replicate/types/account_get_response.py">AccountGetResponse</a></code>
+# Get deployment details
+deployment = client.deployments.get(
+    deployment_owner="your-username",
+    deployment_name="my-deployment"
+)
 
-# Models
+# Update deployment
+updated = client.deployments.update(
+    deployment_owner="your-username",
+    deployment_name="my-deployment",
+    min_instances=2,
+    max_instances=20
+)
 
-Types:
+# Delete deployment
+client.deployments.delete(
+    deployment_owner="your-username",
+    deployment_name="my-deployment"
+)
+
+# Run a prediction on a deployment
+prediction = client.deployments.predictions.create(
+    deployment_owner="your-username",
+    deployment_name="my-deployment",
+    input={"prompt": "hello world"}
+)
+```
+
+### Trainings
+
+Create and manage model training jobs.
 
 ```python
-from replicate.types import (
-    ModelCreateResponse,
-    ModelListResponse,
-    ModelGetResponse,
-    ModelSearchResponse,
+# Start a training job
+training = client.trainings.create(
+    model_owner="your-username",
+    model_name="my-model",
+    version_id="base-version-id",
+    input={
+        "train_data": "https://example.com/training-data.zip",
+        "epochs": 100,
+        "batch_size": 32
+    },
+    webhook="https://example.com/training-webhook"
 )
+
+# Get training status
+training = client.trainings.get(training_id="training-abc123")
+print(f"Status: {training.status}")
+
+# List trainings
+for training in client.trainings.list():
+    print(f"{training.id}: {training.status}")
+
+# Cancel a training
+cancelled = client.trainings.cancel(training_id="training-abc123")
+```
+
+### Collections
+
+Browse curated model collections.
+
+```python
+# List collections
+for collection in client.collections.list():
+    print(f"{collection.name}: {collection.description}")
+
+# Get a specific collection
+collection = client.collections.get(collection_slug="awesome-sdxl-models")
+for model in collection.models:
+    print(f"- {model.owner}/{model.name}")
+```
+
+### Files
+
+Upload and manage files for model inputs.
+
+```python
+# Create/upload a file
+with open("image.jpg", "rb") as f:
+    file_response = client.files.create(file=f)
+    file_url = file_response.urls.get
+
+# List files
+for file in client.files.list():
+    print(f"{file.id}: {file.name}")
+
+# Get file details
+file = client.files.get(file_id="file-abc123")
+print(f"File URL: {file.urls.get}")
+
+# Delete a file
+client.files.delete(file_id="file-abc123")
 ```
 
-Methods:
+### Hardware
 
-- <code title="post /models">replicate.models.<a href="./src/replicate/resources/models/models.py">create</a>(\*\*<a href="src/replicate/types/model_create_params.py">params</a>) -> <a href="./src/replicate/types/model_create_response.py">ModelCreateResponse</a></code>
-- <code title="get /models">replicate.models.<a href="./src/replicate/resources/models/models.py">list</a>() -> <a href="./src/replicate/types/model_list_response.py">SyncCursorURLPage[ModelListResponse]</a></code>
-- <code title="delete /models/{model_owner}/{model_name}">replicate.models.<a href="./src/replicate/resources/models/models.py">delete</a>(\*, model_owner, model_name) -> None</code>
-- <code title="get /models/{model_owner}/{model_name}">replicate.models.<a href="./src/replicate/resources/models/models.py">get</a>(\*, model_owner, model_name) -> <a href="./src/replicate/types/model_get_response.py">ModelGetResponse</a></code>
-- <code title="query /models">replicate.models.<a href="./src/replicate/resources/models/models.py">search</a>(\*\*<a href="src/replicate/types/model_search_params.py">params</a>) -> <a href="./src/replicate/types/model_search_response.py">SyncCursorURLPage[ModelSearchResponse]</a></code>
+Get information about available hardware.
 
-## Examples
+```python
+# List available hardware SKUs
+hardware_list = client.hardware.list()
+for sku in hardware_list:
+    print(f"{sku.name}: {sku.specs}")
+```
 
-Methods:
+### Account
 
-- <code title="get /models/{model_owner}/{model_name}/examples">replicate.models.examples.<a href="./src/replicate/resources/models/examples.py">list</a>(\*, model_owner, model_name) -> <a href="./src/replicate/types/prediction.py">SyncCursorURLPage[Prediction]</a></code>
+Manage account information.
 
-## Predictions
+```python
+# Get account details
+account = client.account.get()
+print(f"Username: {account.username}")
+print(f"Email: {account.email}")
+```
 
-Methods:
+### Webhooks
 
-- <code title="post /models/{model_owner}/{model_name}/predictions">replicate.models.predictions.<a href="./src/replicate/resources/models/predictions.py">create</a>(\*, model_owner, model_name, \*\*<a href="src/replicate/types/models/prediction_create_params.py">params</a>) -> <a href="./src/replicate/types/prediction.py">Prediction</a></code>
+Configure webhooks for predictions.
 
-## Readme
+```python
+# Get the default webhook secret
+webhook_secret = client.webhooks.default.secret.get()
+print(f"Webhook signing secret: {webhook_secret.key}")
+```
 
-Types:
+## File Handling
+
+### Input Files
+
+The SDK supports multiple ways to provide file inputs:
 
 ```python
-from replicate.types.models import ReadmeGetResponse
+# File object
+with open("input.jpg", "rb") as f:
+    output = client.run("model:version", input={"image": f})
+
+# File path (automatically opened)
+output = client.run("model:version", input={"image": "path/to/image.jpg"})
+
+# URL
+output = client.run("model:version", input={"image": "https://example.com/image.jpg"})
+
+# Base64 data URI
+output = client.run("model:version", input={"image": "data:image/jpeg;base64,..."})
+
+# Control encoding strategy
+output = client.run(
+    "model:version",
+    input={"image": file_obj},
+    file_encoding_strategy="base64"  # or "url" (uploads to Replicate)
+)
 ```
 
-Methods:
+### Output Files
+
+File outputs are automatically converted to `FileOutput` objects:
 
-- <code title="get /models/{model_owner}/{model_name}/readme">replicate.models.readme.<a href="./src/replicate/resources/models/readme.py">get</a>(\*, model_owner, model_name) -> str</code>
+```python
+from replicate.helpers import FileOutput
+
+output = client.run("model:version", input={"prompt": "generate an image"})
+
+# If output is a FileOutput
+if isinstance(output, FileOutput):
+    # Get the URL
+    print(f"File URL: {output.url}")
+    
+    # Read the file content
+    content = output.read()
+    
+    # Save to disk
+    with open("output.jpg", "wb") as f:
+        for chunk in output:
+            f.write(chunk)
+```
 
-## Versions
+## Error Handling
 
-Types:
+The SDK provides detailed exception types for error handling:
 
 ```python
-from replicate.types.models import VersionListResponse, VersionGetResponse
+from replicate.exceptions import (
+    ReplicateError,
+    ModelError,
+    RateLimitError,
+    AuthenticationError,
+    NotFoundError
+)
+
+try:
+    output = client.run("model:version", input={"prompt": "test"})
+except ModelError as e:
+    # Model execution failed
+    print(f"Model error: {e}")
+    print(f"Prediction ID: {e.prediction.id}")
+    print(f"Prediction status: {e.prediction.status}")
+except RateLimitError as e:
+    # Rate limited
+    print("Rate limit exceeded, retry after:", e.response.headers.get("retry-after"))
+except AuthenticationError:
+    # Invalid API token
+    print("Invalid API token")
+except NotFoundError:
+    # Model not found
+    print("Model not found")
+except ReplicateError as e:
+    # Other Replicate API errors
+    print(f"API error: {e}")
 ```
 
-Methods:
+## Pagination
 
-- <code title="get /models/{model_owner}/{model_name}/versions">replicate.models.versions.<a href="./src/replicate/resources/models/versions.py">list</a>(\*, model_owner, model_name) -> <a href="./src/replicate/types/models/version_list_response.py">SyncCursorURLPage[VersionListResponse]</a></code>
-- <code title="delete /models/{model_owner}/{model_name}/versions/{version_id}">replicate.models.versions.<a href="./src/replicate/resources/models/versions.py">delete</a>(\*, model_owner, model_name, version_id) -> None</code>
-- <code title="get /models/{model_owner}/{model_name}/versions/{version_id}">replicate.models.versions.<a href="./src/replicate/resources/models/versions.py">get</a>(\*, model_owner, model_name, version_id) -> <a href="./src/replicate/types/models/version_get_response.py">VersionGetResponse</a></code>
+The SDK automatically handles pagination for list operations:
 
-# Predictions
+```python
+# Automatic pagination (iterates through all pages)
+for model in client.models.list():
+    print(model.name)
 
-Types:
+# Manual pagination
+first_page = client.models.list()
+print(f"Items in first page: {len(first_page.items)}")
+
+if first_page.has_next_page():
+    next_page = first_page.get_next_page()
+    print(f"Items in second page: {len(next_page.items)}")
+
+# Get all items at once
+all_models = list(client.models.list())
+```
+
+## Advanced Features
+
+### Raw Response Access
+
+Access the underlying HTTP response:
+
+```python
+# Get raw response
+response = client.predictions.with_raw_response.create(
+    model="model:version",
+    input={"prompt": "test"}
+)
+
+# Access response data
+print(f"Status code: {response.status_code}")
+print(f"Headers: {response.headers}")
+
+# Parse the response
+prediction = response.parse()
+```
+
+### Custom HTTP Client
+
+Configure a custom HTTP client:
 
 ```python
-from replicate.types import Prediction, PredictionOutput, PredictionRequest
+import httpx
+from replicate import DefaultHttpxClient
+
+# With proxy
+client = Replicate(
+    http_client=DefaultHttpxClient(
+        proxy="http://proxy.example.com:8080"
+    )
+)
+
+# With custom timeout
+client = Replicate(
+    http_client=DefaultHttpxClient(
+        timeout=httpx.Timeout(60.0)
+    )
+)
 ```
 
-Methods:
+### Retries and Timeouts
 
-- <code title="post /predictions">replicate.predictions.<a href="./src/replicate/resources/predictions.py">create</a>(\*\*<a href="src/replicate/types/prediction_create_params.py">params</a>) -> <a href="./src/replicate/types/prediction.py">Prediction</a></code>
-- <code title="get /predictions">replicate.predictions.<a href="./src/replicate/resources/predictions.py">list</a>(\*\*<a href="src/replicate/types/prediction_list_params.py">params</a>) -> <a href="./src/replicate/types/prediction.py">SyncCursorURLPageWithCreatedFilters[Prediction]</a></code>
-- <code title="post /predictions/{prediction_id}/cancel">replicate.predictions.<a href="./src/replicate/resources/predictions.py">cancel</a>(\*, prediction_id) -> <a href="./src/replicate/types/prediction.py">Prediction</a></code>
-- <code title="get /predictions/{prediction_id}">replicate.predictions.<a href="./src/replicate/resources/predictions.py">get</a>(\*, prediction_id) -> <a href="./src/replicate/types/prediction.py">Prediction</a></code>
+Configure retry behavior and timeouts:
+
+```python
+client = Replicate(
+    max_retries=5,  # Maximum number of retries
+    timeout=120.0  # Request timeout in seconds
+)
+
+# Per-request timeout
+output = client.run(
+    "model:version",
+    input={"prompt": "test"},
+    wait=60  # Wait up to 60 seconds for completion
+)
+```
 
-# Trainings
+### Client Copying
 
-Types:
+Create a new client with modified settings:
 
 ```python
-from replicate.types import (
-    TrainingCreateResponse,
-    TrainingListResponse,
-    TrainingCancelResponse,
-    TrainingGetResponse,
+# Create a copy with different settings
+new_client = client.copy(
+    bearer_token="different_token",
+    timeout=60.0,
+    max_retries=3
 )
 ```
 
-Methods:
+## Async/Await Support
+
+All methods have async equivalents when using `AsyncReplicate`:
+
+```python
+import asyncio
+from replicate import AsyncReplicate
+
+async def main():
+    client = AsyncReplicate()
+    
+    # Run a model
+    output = await client.run(
+        "stability-ai/stable-diffusion",
+        input={"prompt": "a futuristic city"}
+    )
+    
+    # Stream output
+    async for event in client.stream(
+        "meta/llama-2-70b-chat",
+        input={"prompt": "Tell me a joke"}
+    ):
+        print(event, end="")
+    
+    # Pagination
+    async for model in client.models.list():
+        print(model.name)
+    
+    # Concurrent requests
+    tasks = [
+        client.run("model1", input={"prompt": "test1"}),
+        client.run("model2", input={"prompt": "test2"}),
+        client.run("model3", input={"prompt": "test3"})
+    ]
+    results = await asyncio.gather(*tasks)
+
+asyncio.run(main())
+```
 
-- <code title="post /models/{model_owner}/{model_name}/versions/{version_id}/trainings">replicate.trainings.<a href="./src/replicate/resources/trainings.py">create</a>(\*, model_owner, model_name, version_id, \*\*<a href="src/replicate/types/training_create_params.py">params</a>) -> <a href="./src/replicate/types/training_create_response.py">TrainingCreateResponse</a></code>
-- <code title="get /trainings">replicate.trainings.<a href="./src/replicate/resources/trainings.py">list</a>() -> <a href="./src/replicate/types/training_list_response.py">SyncCursorURLPage[TrainingListResponse]</a></code>
-- <code title="post /trainings/{training_id}/cancel">replicate.trainings.<a href="./src/replicate/resources/trainings.py">cancel</a>(\*, training_id) -> <a href="./src/replicate/types/training_cancel_response.py">TrainingCancelResponse</a></code>
-- <code title="get /trainings/{training_id}">replicate.trainings.<a href="./src/replicate/resources/trainings.py">get</a>(\*, training_id) -> <a href="./src/replicate/types/training_get_response.py">TrainingGetResponse</a></code>
+## Environment Variables
 
-# Webhooks
+The SDK respects these environment variables:
 
-## Default
+- `REPLICATE_API_TOKEN` - API authentication token
+- `REPLICATE_BASE_URL` - Override the API base URL (default: `https://api.replicate.com/v1`)
 
-### Secret
+## Type Hints
 
-Types:
+The SDK is fully typed with comprehensive type hints:
 
 ```python
-from replicate.types.webhooks.default import SecretGetResponse
+from replicate import Replicate
+from replicate.types import Prediction, PredictionStatus
+from replicate.pagination import SyncCursorURLPage
+
+client: Replicate = Replicate()
+
+# Type hints for responses
+prediction: Prediction = client.predictions.get(prediction_id="abc123")
+status: PredictionStatus = prediction.status
+
+# Type hints for pagination
+page: SyncCursorURLPage[Prediction] = client.predictions.list()
 ```
 
-Methods:
+## Common Patterns
 
-- <code title="get /webhooks/default/secret">replicate.webhooks.default.secret.<a href="./src/replicate/resources/webhooks/default/secret.py">get</a>() -> <a href="./src/replicate/types/webhooks/default/secret_get_response.py">SecretGetResponse</a></code>
+### Wait for Completion with Polling
 
-# Files
+```python
+import time
+
+def wait_for_prediction(client, prediction_id, timeout=300):
+    """Poll a prediction until it completes or times out."""
+    start = time.time()
+    while time.time() - start < timeout:
+        prediction = client.predictions.get(prediction_id)
+        if prediction.status in ["succeeded", "failed", "canceled"]:
+            return prediction
+        time.sleep(2)  # Poll every 2 seconds
+    raise TimeoutError(f"Prediction {prediction_id} timed out")
+
+# Usage
+prediction = client.predictions.create(model="model:version", input={})
+result = wait_for_prediction(client, prediction.id)
+```
 
-Types:
+### Batch Processing
 
 ```python
-from replicate.types import FileCreateResponse, FileListResponse, FileGetResponse
+import asyncio
+from replicate import AsyncReplicate
+
+async def batch_process(prompts):
+    """Process multiple prompts in parallel."""
+    client = AsyncReplicate()
+    tasks = [
+        client.run("model:version", input={"prompt": prompt})
+        for prompt in prompts
+    ]
+    return await asyncio.gather(*tasks)
+
+# Usage
+prompts = ["prompt 1", "prompt 2", "prompt 3"]
+results = asyncio.run(batch_process(prompts))
 ```
+
+### Webhook Handling
+
+```python
+from flask import Flask, request
+import hmac
+import hashlib
+
+app = Flask(__name__)
+
+def verify_webhook(payload, signature, secret):
+    """Verify webhook signature."""
+    expected = hmac.new(
+        secret.encode(),
+        payload,
+        hashlib.sha256
+    ).hexdigest()
+    return hmac.compare_digest(expected, signature)
+
+@app.route("/webhook", methods=["POST"])
+def webhook():
+    # Get webhook secret
+    secret = "your_webhook_secret"  # Get from client.webhooks.default.secret.get()
+    
+    # Verify signature
+    signature = request.headers.get("X-Replicate-Signature")
+    if not verify_webhook(request.data, signature, secret):
+        return "Unauthorized", 401
+    
+    # Process prediction
+    data = request.json
+    print(f"Prediction {data['id']} status: {data['status']}")
+    
+    if data["status"] == "succeeded":
+        print(f"Output: {data['output']}")
+    
+    return "OK", 200
+```
+
+## Migration Guide
+
+### From v0.x to v1.0+
+
+The new SDK uses a different API structure. Here's how to migrate:
+
+**Old (v0.x):**
+```python
+import replicate
+
+# Run a model
+output = replicate.run(
+    "stability-ai/stable-diffusion:version",
+    input={"prompt": "a cat"}
+)
+
+# Get a model
+model = replicate.models.get("stability-ai/stable-diffusion")
+```
+
+**New (v1.0+):**
+```python
+from replicate import Replicate
+
+client = Replicate()
+
+# Run a model
+output = client.run(
+    "stability-ai/stable-diffusion:version",
+    input={"prompt": "a cat"}
+)
+
+# Get a model
+model = client.models.get(
+    model_owner="stability-ai",
+    model_name="stable-diffusion"
+)
+```
+
+### Using Legacy Authentication
+
+For compatibility with older code:
+
+```python
+# Old style (still supported)
+client = Replicate(api_token="your_token")
+
+# New style (recommended)
+client = Replicate(bearer_token="your_token")
+```
+
+## Support
+
+- **Documentation**: https://replicate.com/docs
+- **GitHub**: https://github.com/replicate/replicate-python
+- **Discord**: https://discord.gg/replicate
+- **API Reference**: https://replicate.com/docs/api
+
+## License
+
+Apache License 2.0
\ No newline at end of file

From cbd450a9c954a975d7c508f13e3b26e17ad01bea Mon Sep 17 00:00:00 2001
From: Zeke Sikelianos <zeke@sikelianos.com>
Date: Tue, 23 Sep 2025 09:08:55 -0700
Subject: [PATCH 3/8] docs: rename 'client' to 'replicate' in all code examples

Update all documentation examples to use 'replicate' as the variable name
instead of 'client' for better consistency with the library name and
improved readability.
---
 api.md | 188 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 94 insertions(+), 94 deletions(-)

diff --git a/api.md b/api.md
index 298594b..2e64073 100644
--- a/api.md
+++ b/api.md
@@ -9,13 +9,13 @@ pip install replicate
 ## Quick Start
 
 ```python
-import replicate
+from replicate import Replicate
 
-# Initialize the client (uses REPLICATE_API_TOKEN env var by default)
-client = replicate.Replicate()
+# Initialize with REPLICATE_API_TOKEN env var by default
+replicate = Replicate()
 
 # Run a model
-output = client.run(
+output = replicate.run(
     "black-forest-labs/flux-schnell",
     input={"prompt": "astronaut on a horse"}
 )
@@ -30,16 +30,16 @@ print(output)
 from replicate import Replicate
 
 # Using environment variable (REPLICATE_API_TOKEN)
-client = Replicate()
+replicate = Replicate()
 
 # With explicit token
-client = Replicate(bearer_token="your_api_token")
+replicate = Replicate(bearer_token="your_api_token")
 
 # Legacy token parameter (for compatibility)
-client = Replicate(api_token="your_api_token")
+replicate = Replicate(api_token="your_api_token")
 
 # With custom configuration
-client = Replicate(
+replicate = Replicate(
     bearer_token="your_api_token",
     base_url="https://api.replicate.com/v1",  # Optional custom base URL
     timeout=120.0,  # Request timeout in seconds
@@ -54,8 +54,8 @@ from replicate import AsyncReplicate
 import asyncio
 
 async def main():
-    client = AsyncReplicate(bearer_token="your_api_token")
-    output = await client.run(
+    replicate = AsyncReplicate(bearer_token="your_api_token")
+    output = await replicate.run(
         "stability-ai/stable-diffusion",
         input={"prompt": "a watercolor painting"}
     )
@@ -72,13 +72,13 @@ The simplest way to run a model and get output.
 
 ```python
 # Basic usage - returns output when complete
-output = client.run(
+output = replicate.run(
     "stability-ai/sdxl:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b",
     input={"prompt": "a 19th century portrait of a wombat gentleman"}
 )
 
 # With options
-output = client.run(
+output = replicate.run(
     "meta/llama-2-70b-chat",
     input={
         "prompt": "Write a poem about machine learning",
@@ -91,9 +91,9 @@ output = client.run(
 )
 
 # Model reference formats
-client.run("owner/name:version", input={})  # Specific version
-client.run("owner/name", input={})  # Latest version
-client.run("5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9bf15ccaa", input={})  # Version ID
+replicate.run("owner/name:version", input={})  # Specific version
+replicate.run("owner/name", input={})  # Latest version
+replicate.run("5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9bf15ccaa", input={})  # Version ID
 ```
 
 ### stream() - Stream Model Output
@@ -102,7 +102,7 @@ For models that support streaming (like language models).
 
 ```python
 # Stream text output
-for event in client.stream(
+for event in replicate.stream(
     "meta/llama-2-70b-chat",
     input={
         "prompt": "Tell me a story about a robot",
@@ -112,7 +112,7 @@ for event in client.stream(
     print(str(event), end="")
 
 # Async streaming
-async for event in async_client.stream("meta/llama-2-70b-chat", input={"prompt": "Hello"}):
+async for event in async_replicate.stream("meta/llama-2-70b-chat", input={"prompt": "Hello"}):
     print(str(event), end="")
 ```
 
@@ -122,14 +122,14 @@ Experimental feature for creating reusable model functions.
 
 ```python
 # Create a model function
-stable_diffusion = client.use("stability-ai/stable-diffusion")
+stable_diffusion = replicate.use("stability-ai/stable-diffusion")
 
 # Use it multiple times
 image1 = stable_diffusion(prompt="a cat in a hat")
 image2 = stable_diffusion(prompt="a dog in sunglasses")
 
 # With streaming models
-llama = client.use("meta/llama-2-70b-chat", streaming=True)
+llama = replicate.use("meta/llama-2-70b-chat", streaming=True)
 for chunk in llama(prompt="Explain quantum computing"):
     print(chunk, end="")
 ```
@@ -138,7 +138,7 @@ for chunk in llama(prompt="Explain quantum computing"):
 
 ```python
 # Search for models
-results = client.search(query="image generation", limit=10)
+results = replicate.search(query="image generation", limit=10)
 
 for model in results:
     print(f"{model.owner}/{model.name}: {model.description}")
@@ -154,7 +154,7 @@ Create and manage model predictions.
 from replicate.types import Prediction
 
 # Create a prediction
-prediction = client.predictions.create(
+prediction = replicate.predictions.create(
     model="owner/model:version",
     input={"prompt": "hello world"},
     webhook="https://example.com/webhook",  # Optional webhook URL
@@ -162,19 +162,19 @@ prediction = client.predictions.create(
 )
 
 # Get prediction status
-prediction = client.predictions.get(prediction_id="abc123")
+prediction = replicate.predictions.get(prediction_id="abc123")
 print(f"Status: {prediction.status}")
 print(f"Output: {prediction.output}")
 
 # Cancel a prediction
-cancelled = client.predictions.cancel(prediction_id="abc123")
+cancelled = replicate.predictions.cancel(prediction_id="abc123")
 
 # List predictions
-for prediction in client.predictions.list():
+for prediction in replicate.predictions.list():
     print(f"{prediction.id}: {prediction.status}")
 
 # Wait for a prediction to complete
-completed = client.predictions.wait(
+completed = replicate.predictions.wait(
     prediction_id="abc123",
     timeout=60  # Optional timeout in seconds
 )
@@ -186,21 +186,21 @@ Interact with models and their versions.
 
 ```python
 # Get a specific model
-model = client.models.get(model_owner="stability-ai", model_name="stable-diffusion")
+model = replicate.models.get(model_owner="stability-ai", model_name="stable-diffusion")
 print(f"Model: {model.owner}/{model.name}")
 print(f"Description: {model.description}")
 print(f"Latest version: {model.latest_version.id}")
 
 # List all models (with pagination)
-for model in client.models.list():
+for model in replicate.models.list():
     print(f"{model.owner}/{model.name}")
 
 # Search models
-for model in client.models.search(query="text generation"):
+for model in replicate.models.search(query="text generation"):
     print(f"{model.owner}/{model.name}: {model.description}")
 
 # Create a new model
-model = client.models.create(
+model = replicate.models.create(
     owner="your-username",
     name="my-model",
     visibility="public",  # or "private"
@@ -210,28 +210,28 @@ model = client.models.create(
 )
 
 # Delete a model
-client.models.delete(model_owner="your-username", model_name="my-model")
+replicate.models.delete(model_owner="your-username", model_name="my-model")
 ```
 
 #### Model Versions
 
 ```python
 # List model versions
-for version in client.models.versions.list(
+for version in replicate.models.versions.list(
     model_owner="stability-ai",
     model_name="stable-diffusion"
 ):
     print(f"Version {version.id}: created at {version.created_at}")
 
 # Get a specific version
-version = client.models.versions.get(
+version = replicate.models.versions.get(
     model_owner="stability-ai",
     model_name="stable-diffusion",
     version_id="db21e45d3f7023abc2a46ee38a23973f6dce16bb082a930b0c49861f96d1e5bf"
 )
 
 # Delete a version
-client.models.versions.delete(
+replicate.models.versions.delete(
     model_owner="your-username",
     model_name="my-model",
     version_id="version-id"
@@ -244,7 +244,7 @@ Run predictions directly through a model.
 
 ```python
 # Create a prediction for a specific model
-prediction = client.models.predictions.create(
+prediction = replicate.models.predictions.create(
     model_owner="stability-ai",
     model_name="stable-diffusion",
     input={"prompt": "a beautiful landscape"}
@@ -255,7 +255,7 @@ prediction = client.models.predictions.create(
 
 ```python
 # Get example predictions for a model
-for example in client.models.examples.list(
+for example in replicate.models.examples.list(
     model_owner="stability-ai",
     model_name="stable-diffusion"
 ):
@@ -269,7 +269,7 @@ Manage model deployments for production use.
 
 ```python
 # Create a deployment
-deployment = client.deployments.create(
+deployment = replicate.deployments.create(
     name="my-deployment",
     model="owner/model:version",
     hardware="gpu-a100-large",
@@ -278,17 +278,17 @@ deployment = client.deployments.create(
 )
 
 # List deployments
-for deployment in client.deployments.list():
+for deployment in replicate.deployments.list():
     print(f"{deployment.owner}/{deployment.name}")
 
 # Get deployment details
-deployment = client.deployments.get(
+deployment = replicate.deployments.get(
     deployment_owner="your-username",
     deployment_name="my-deployment"
 )
 
 # Update deployment
-updated = client.deployments.update(
+updated = replicate.deployments.update(
     deployment_owner="your-username",
     deployment_name="my-deployment",
     min_instances=2,
@@ -296,13 +296,13 @@ updated = client.deployments.update(
 )
 
 # Delete deployment
-client.deployments.delete(
+replicate.deployments.delete(
     deployment_owner="your-username",
     deployment_name="my-deployment"
 )
 
 # Run a prediction on a deployment
-prediction = client.deployments.predictions.create(
+prediction = replicate.deployments.predictions.create(
     deployment_owner="your-username",
     deployment_name="my-deployment",
     input={"prompt": "hello world"}
@@ -315,7 +315,7 @@ Create and manage model training jobs.
 
 ```python
 # Start a training job
-training = client.trainings.create(
+training = replicate.trainings.create(
     model_owner="your-username",
     model_name="my-model",
     version_id="base-version-id",
@@ -328,15 +328,15 @@ training = client.trainings.create(
 )
 
 # Get training status
-training = client.trainings.get(training_id="training-abc123")
+training = replicate.trainings.get(training_id="training-abc123")
 print(f"Status: {training.status}")
 
 # List trainings
-for training in client.trainings.list():
+for training in replicate.trainings.list():
     print(f"{training.id}: {training.status}")
 
 # Cancel a training
-cancelled = client.trainings.cancel(training_id="training-abc123")
+cancelled = replicate.trainings.cancel(training_id="training-abc123")
 ```
 
 ### Collections
@@ -345,11 +345,11 @@ Browse curated model collections.
 
 ```python
 # List collections
-for collection in client.collections.list():
+for collection in replicate.collections.list():
     print(f"{collection.name}: {collection.description}")
 
 # Get a specific collection
-collection = client.collections.get(collection_slug="awesome-sdxl-models")
+collection = replicate.collections.get(collection_slug="awesome-sdxl-models")
 for model in collection.models:
     print(f"- {model.owner}/{model.name}")
 ```
@@ -361,19 +361,19 @@ Upload and manage files for model inputs.
 ```python
 # Create/upload a file
 with open("image.jpg", "rb") as f:
-    file_response = client.files.create(file=f)
+    file_response = replicate.files.create(file=f)
     file_url = file_response.urls.get
 
 # List files
-for file in client.files.list():
+for file in replicate.files.list():
     print(f"{file.id}: {file.name}")
 
 # Get file details
-file = client.files.get(file_id="file-abc123")
+file = replicate.files.get(file_id="file-abc123")
 print(f"File URL: {file.urls.get}")
 
 # Delete a file
-client.files.delete(file_id="file-abc123")
+replicate.files.delete(file_id="file-abc123")
 ```
 
 ### Hardware
@@ -382,7 +382,7 @@ Get information about available hardware.
 
 ```python
 # List available hardware SKUs
-hardware_list = client.hardware.list()
+hardware_list = replicate.hardware.list()
 for sku in hardware_list:
     print(f"{sku.name}: {sku.specs}")
 ```
@@ -393,7 +393,7 @@ Manage account information.
 
 ```python
 # Get account details
-account = client.account.get()
+account = replicate.account.get()
 print(f"Username: {account.username}")
 print(f"Email: {account.email}")
 ```
@@ -404,7 +404,7 @@ Configure webhooks for predictions.
 
 ```python
 # Get the default webhook secret
-webhook_secret = client.webhooks.default.secret.get()
+webhook_secret = replicate.webhooks.default.secret.get()
 print(f"Webhook signing secret: {webhook_secret.key}")
 ```
 
@@ -417,19 +417,19 @@ The SDK supports multiple ways to provide file inputs:
 ```python
 # File object
 with open("input.jpg", "rb") as f:
-    output = client.run("model:version", input={"image": f})
+    output = replicate.run("model:version", input={"image": f})
 
 # File path (automatically opened)
-output = client.run("model:version", input={"image": "path/to/image.jpg"})
+output = replicate.run("model:version", input={"image": "path/to/image.jpg"})
 
 # URL
-output = client.run("model:version", input={"image": "https://example.com/image.jpg"})
+output = replicate.run("model:version", input={"image": "https://example.com/image.jpg"})
 
 # Base64 data URI
-output = client.run("model:version", input={"image": "data:image/jpeg;base64,..."})
+output = replicate.run("model:version", input={"image": "data:image/jpeg;base64,..."})
 
 # Control encoding strategy
-output = client.run(
+output = replicate.run(
     "model:version",
     input={"image": file_obj},
     file_encoding_strategy="base64"  # or "url" (uploads to Replicate)
@@ -443,7 +443,7 @@ File outputs are automatically converted to `FileOutput` objects:
 ```python
 from replicate.helpers import FileOutput
 
-output = client.run("model:version", input={"prompt": "generate an image"})
+output = replicate.run("model:version", input={"prompt": "generate an image"})
 
 # If output is a FileOutput
 if isinstance(output, FileOutput):
@@ -473,7 +473,7 @@ from replicate.exceptions import (
 )
 
 try:
-    output = client.run("model:version", input={"prompt": "test"})
+    output = replicate.run("model:version", input={"prompt": "test"})
 except ModelError as e:
     # Model execution failed
     print(f"Model error: {e}")
@@ -499,11 +499,11 @@ The SDK automatically handles pagination for list operations:
 
 ```python
 # Automatic pagination (iterates through all pages)
-for model in client.models.list():
+for model in replicate.models.list():
     print(model.name)
 
 # Manual pagination
-first_page = client.models.list()
+first_page = replicate.models.list()
 print(f"Items in first page: {len(first_page.items)}")
 
 if first_page.has_next_page():
@@ -511,7 +511,7 @@ if first_page.has_next_page():
     print(f"Items in second page: {len(next_page.items)}")
 
 # Get all items at once
-all_models = list(client.models.list())
+all_models = list(replicate.models.list())
 ```
 
 ## Advanced Features
@@ -522,7 +522,7 @@ Access the underlying HTTP response:
 
 ```python
 # Get raw response
-response = client.predictions.with_raw_response.create(
+response = replicate.predictions.with_raw_response.create(
     model="model:version",
     input={"prompt": "test"}
 )
@@ -537,21 +537,21 @@ prediction = response.parse()
 
 ### Custom HTTP Client
 
-Configure a custom HTTP client:
+Configure a custom HTTP client for Replicate:
 
 ```python
 import httpx
 from replicate import DefaultHttpxClient
 
 # With proxy
-client = Replicate(
+replicate = Replicate(
     http_client=DefaultHttpxClient(
         proxy="http://proxy.example.com:8080"
     )
 )
 
 # With custom timeout
-client = Replicate(
+replicate = Replicate(
     http_client=DefaultHttpxClient(
         timeout=httpx.Timeout(60.0)
     )
@@ -563,13 +563,13 @@ client = Replicate(
 Configure retry behavior and timeouts:
 
 ```python
-client = Replicate(
+replicate = Replicate(
     max_retries=5,  # Maximum number of retries
     timeout=120.0  # Request timeout in seconds
 )
 
 # Per-request timeout
-output = client.run(
+output = replicate.run(
     "model:version",
     input={"prompt": "test"},
     wait=60  # Wait up to 60 seconds for completion
@@ -578,11 +578,11 @@ output = client.run(
 
 ### Client Copying
 
-Create a new client with modified settings:
+Create a new Replicate instance with modified settings:
 
 ```python
 # Create a copy with different settings
-new_client = client.copy(
+new_replicate = replicate.copy(
     bearer_token="different_token",
     timeout=60.0,
     max_retries=3
@@ -598,30 +598,30 @@ import asyncio
 from replicate import AsyncReplicate
 
 async def main():
-    client = AsyncReplicate()
+    replicate = AsyncReplicate()
     
     # Run a model
-    output = await client.run(
+    output = await replicate.run(
         "stability-ai/stable-diffusion",
         input={"prompt": "a futuristic city"}
     )
     
     # Stream output
-    async for event in client.stream(
+    async for event in replicate.stream(
         "meta/llama-2-70b-chat",
         input={"prompt": "Tell me a joke"}
     ):
         print(event, end="")
     
     # Pagination
-    async for model in client.models.list():
+    async for model in replicate.models.list():
         print(model.name)
     
     # Concurrent requests
     tasks = [
-        client.run("model1", input={"prompt": "test1"}),
-        client.run("model2", input={"prompt": "test2"}),
-        client.run("model3", input={"prompt": "test3"})
+        replicate.run("model1", input={"prompt": "test1"}),
+        replicate.run("model2", input={"prompt": "test2"}),
+        replicate.run("model3", input={"prompt": "test3"})
     ]
     results = await asyncio.gather(*tasks)
 
@@ -644,14 +644,14 @@ from replicate import Replicate
 from replicate.types import Prediction, PredictionStatus
 from replicate.pagination import SyncCursorURLPage
 
-client: Replicate = Replicate()
+replicate: Replicate = Replicate()
 
 # Type hints for responses
-prediction: Prediction = client.predictions.get(prediction_id="abc123")
+prediction: Prediction = replicate.predictions.get(prediction_id="abc123")
 status: PredictionStatus = prediction.status
 
 # Type hints for pagination
-page: SyncCursorURLPage[Prediction] = client.predictions.list()
+page: SyncCursorURLPage[Prediction] = replicate.predictions.list()
 ```
 
 ## Common Patterns
@@ -661,19 +661,19 @@ page: SyncCursorURLPage[Prediction] = client.predictions.list()
 ```python
 import time
 
-def wait_for_prediction(client, prediction_id, timeout=300):
+def wait_for_prediction(replicate, prediction_id, timeout=300):
     """Poll a prediction until it completes or times out."""
     start = time.time()
     while time.time() - start < timeout:
-        prediction = client.predictions.get(prediction_id)
+        prediction = replicate.predictions.get(prediction_id)
         if prediction.status in ["succeeded", "failed", "canceled"]:
             return prediction
         time.sleep(2)  # Poll every 2 seconds
     raise TimeoutError(f"Prediction {prediction_id} timed out")
 
 # Usage
-prediction = client.predictions.create(model="model:version", input={})
-result = wait_for_prediction(client, prediction.id)
+prediction = replicate.predictions.create(model="model:version", input={})
+result = wait_for_prediction(replicate, prediction.id)
 ```
 
 ### Batch Processing
@@ -684,9 +684,9 @@ from replicate import AsyncReplicate
 
 async def batch_process(prompts):
     """Process multiple prompts in parallel."""
-    client = AsyncReplicate()
+    replicate = AsyncReplicate()
     tasks = [
-        client.run("model:version", input={"prompt": prompt})
+        replicate.run("model:version", input={"prompt": prompt})
         for prompt in prompts
     ]
     return await asyncio.gather(*tasks)
@@ -717,7 +717,7 @@ def verify_webhook(payload, signature, secret):
 @app.route("/webhook", methods=["POST"])
 def webhook():
     # Get webhook secret
-    secret = "your_webhook_secret"  # Get from client.webhooks.default.secret.get()
+    secret = "your_webhook_secret"  # From replicate.webhooks.default.secret.get()
     
     # Verify signature
     signature = request.headers.get("X-Replicate-Signature")
@@ -758,16 +758,16 @@ model = replicate.models.get("stability-ai/stable-diffusion")
 ```python
 from replicate import Replicate
 
-client = Replicate()
+replicate = Replicate()
 
 # Run a model
-output = client.run(
+output = replicate.run(
     "stability-ai/stable-diffusion:version",
     input={"prompt": "a cat"}
 )
 
 # Get a model
-model = client.models.get(
+model = replicate.models.get(
     model_owner="stability-ai",
     model_name="stable-diffusion"
 )
@@ -779,10 +779,10 @@ For compatibility with older code:
 
 ```python
 # Old style (still supported)
-client = Replicate(api_token="your_token")
+replicate = Replicate(api_token="your_token")
 
 # New style (recommended)
-client = Replicate(bearer_token="your_token")
+replicate = Replicate(bearer_token="your_token")
 ```
 
 ## Support

From 0d8b065a146a4abd8bf841e42c286a4386dc867b Mon Sep 17 00:00:00 2001
From: Zeke Sikelianos <zeke@sikelianos.com>
Date: Tue, 23 Sep 2025 09:14:02 -0700
Subject: [PATCH 4/8] docs: emphasize use() as the primary high-level method

- Reorder high-level methods to show use() first as the recommended approach
- Add "(Recommended)" label to use() method
- Expand use() examples with more detailed usage patterns
- Update Quick Start to demonstrate use() as the primary pattern
- Clarify that use() creates Pythonic callable functions
- Keep run() as secondary option for one-off predictions
---
 api.md | 70 +++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 23 deletions(-)

diff --git a/api.md b/api.md
index 2e64073..b8c659d 100644
--- a/api.md
+++ b/api.md
@@ -14,12 +14,18 @@ from replicate import Replicate
 # Initialize with REPLICATE_API_TOKEN env var by default
 replicate = Replicate()
 
-# Run a model
+# Create a model function
+flux = replicate.use("black-forest-labs/flux-schnell")
+
+# Call it like any Python function
+output = flux(prompt="astronaut on a horse")
+print(output)
+
+# Or use run() for one-off predictions
 output = replicate.run(
     "black-forest-labs/flux-schnell",
     input={"prompt": "astronaut on a horse"}
 )
-print(output)
 ```
 
 ## Client Initialization
@@ -66,9 +72,43 @@ asyncio.run(main())
 
 ## High-Level Methods
 
-### run() - Run a Model
+### use() - Create a Reusable Model Function (Recommended)
 
-The simplest way to run a model and get output.
+The most Pythonic way to interact with models. Creates a callable function for any model.
+
+```python
+# Create a model function
+sdxl = replicate.use("stability-ai/sdxl")
+
+# Call it like a regular function
+image = sdxl(prompt="a 19th century portrait of a wombat gentleman")
+
+# Use it multiple times with different inputs
+image1 = sdxl(prompt="a cat in a hat", negative_prompt="blurry, low quality")
+image2 = sdxl(prompt="a dog in sunglasses", num_outputs=4)
+
+# Works great with language models too
+llama = replicate.use("meta/llama-2-70b-chat")
+response = llama(
+    prompt="Write a haiku about Python programming",
+    temperature=0.7,
+    max_new_tokens=100
+)
+
+# Enable streaming for models that support it
+llama_stream = replicate.use("meta/llama-2-70b-chat", streaming=True)
+for chunk in llama_stream(prompt="Explain quantum computing"):
+    print(chunk, end="")
+
+# Can accept model references in various formats
+model = replicate.use("owner/name:version")  # Specific version
+model = replicate.use("owner/name")  # Latest version
+model = replicate.use("5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9bf15ccaa")  # Version ID
+```
+
+### run() - Run a Model Once
+
+Direct method to run a model and get output. Good for one-off predictions.
 
 ```python
 # Basic usage - returns output when complete
@@ -98,7 +138,7 @@ replicate.run("5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9bf15ccaa"
 
 ### stream() - Stream Model Output
 
-For models that support streaming (like language models).
+For models that support streaming (like language models). Returns an iterator of output chunks.
 
 ```python
 # Stream text output
@@ -116,26 +156,10 @@ async for event in async_replicate.stream("meta/llama-2-70b-chat", input={"promp
     print(str(event), end="")
 ```
 
-### use() - Create a Reusable Model Function
-
-Experimental feature for creating reusable model functions.
-
-```python
-# Create a model function
-stable_diffusion = replicate.use("stability-ai/stable-diffusion")
-
-# Use it multiple times
-image1 = stable_diffusion(prompt="a cat in a hat")
-image2 = stable_diffusion(prompt="a dog in sunglasses")
-
-# With streaming models
-llama = replicate.use("meta/llama-2-70b-chat", streaming=True)
-for chunk in llama(prompt="Explain quantum computing"):
-    print(chunk, end="")
-```
-
 ### search() - Search Models
 
+Find models by keyword or description.
+
 ```python
 # Search for models
 results = replicate.search(query="image generation", limit=10)

From 4a356700bf45a0ca54a72b39d8b3cc151cb77762 Mon Sep 17 00:00:00 2001
From: Zeke Sikelianos <zeke@sikelianos.com>
Date: Tue, 23 Sep 2025 09:24:44 -0700
Subject: [PATCH 5/8] docs: update examples to use anthropic/claude-4-sonnet
 and google/nano-banana
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace all model references in examples:
- stability-ai/sdxl → google/nano-banana
- meta/llama-2-70b-chat → anthropic/claude-4-sonnet
- black-forest-labs/flux-schnell → google/nano-banana
- stability-ai/stable-diffusion → google/nano-banana

This provides more whimsical and memorable example model names while
maintaining the same API patterns and functionality demonstrations.
---
 api.md | 66 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/api.md b/api.md
index b8c659d..116b2ff 100644
--- a/api.md
+++ b/api.md
@@ -15,15 +15,15 @@ from replicate import Replicate
 replicate = Replicate()
 
 # Create a model function
-flux = replicate.use("black-forest-labs/flux-schnell")
+banana = replicate.use("google/nano-banana")
 
 # Call it like any Python function
-output = flux(prompt="astronaut on a horse")
+output = banana(prompt="astronaut on a horse")
 print(output)
 
 # Or use run() for one-off predictions
 output = replicate.run(
-    "black-forest-labs/flux-schnell",
+    "google/nano-banana",
     input={"prompt": "astronaut on a horse"}
 )
 ```
@@ -62,7 +62,7 @@ import asyncio
 async def main():
     replicate = AsyncReplicate(bearer_token="your_api_token")
     output = await replicate.run(
-        "stability-ai/stable-diffusion",
+        "google/nano-banana",
         input={"prompt": "a watercolor painting"}
     )
     print(output)
@@ -78,26 +78,26 @@ The most Pythonic way to interact with models. Creates a callable function for a
 
 ```python
 # Create a model function
-sdxl = replicate.use("stability-ai/sdxl")
+banana = replicate.use("google/nano-banana")
 
 # Call it like a regular function
-image = sdxl(prompt="a 19th century portrait of a wombat gentleman")
+image = banana(prompt="a 19th century portrait of a wombat gentleman")
 
 # Use it multiple times with different inputs
-image1 = sdxl(prompt="a cat in a hat", negative_prompt="blurry, low quality")
-image2 = sdxl(prompt="a dog in sunglasses", num_outputs=4)
+image1 = banana(prompt="a cat in a hat", negative_prompt="blurry, low quality")
+image2 = banana(prompt="a dog in sunglasses", num_outputs=4)
 
 # Works great with language models too
-llama = replicate.use("meta/llama-2-70b-chat")
-response = llama(
+claude = replicate.use("anthropic/claude-4-sonnet")
+response = claude(
     prompt="Write a haiku about Python programming",
     temperature=0.7,
     max_new_tokens=100
 )
 
 # Enable streaming for models that support it
-llama_stream = replicate.use("meta/llama-2-70b-chat", streaming=True)
-for chunk in llama_stream(prompt="Explain quantum computing"):
+claude_stream = replicate.use("anthropic/claude-4-sonnet", streaming=True)
+for chunk in claude_stream(prompt="Explain quantum computing"):
     print(chunk, end="")
 
 # Can accept model references in various formats
@@ -113,13 +113,13 @@ Direct method to run a model and get output. Good for one-off predictions.
 ```python
 # Basic usage - returns output when complete
 output = replicate.run(
-    "stability-ai/sdxl:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b",
+    "google/nano-banana:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b",
     input={"prompt": "a 19th century portrait of a wombat gentleman"}
 )
 
 # With options
 output = replicate.run(
-    "meta/llama-2-70b-chat",
+    "anthropic/claude-4-sonnet",
     input={
         "prompt": "Write a poem about machine learning",
         "max_new_tokens": 500,
@@ -143,7 +143,7 @@ For models that support streaming (like language models). Returns an iterator of
 ```python
 # Stream text output
 for event in replicate.stream(
-    "meta/llama-2-70b-chat",
+    "anthropic/claude-4-sonnet",
     input={
         "prompt": "Tell me a story about a robot",
         "max_new_tokens": 1000
@@ -152,7 +152,7 @@ for event in replicate.stream(
     print(str(event), end="")
 
 # Async streaming
-async for event in async_replicate.stream("meta/llama-2-70b-chat", input={"prompt": "Hello"}):
+async for event in async_replicate.stream("anthropic/claude-4-sonnet", input={"prompt": "Hello"}):
     print(str(event), end="")
 ```
 
@@ -210,7 +210,7 @@ Interact with models and their versions.
 
 ```python
 # Get a specific model
-model = replicate.models.get(model_owner="stability-ai", model_name="stable-diffusion")
+model = replicate.models.get(model_owner="google", model_name="nano-banana")
 print(f"Model: {model.owner}/{model.name}")
 print(f"Description: {model.description}")
 print(f"Latest version: {model.latest_version.id}")
@@ -242,15 +242,15 @@ replicate.models.delete(model_owner="your-username", model_name="my-model")
 ```python
 # List model versions
 for version in replicate.models.versions.list(
-    model_owner="stability-ai",
-    model_name="stable-diffusion"
+    model_owner="google",
+    model_name="nano-banana"
 ):
     print(f"Version {version.id}: created at {version.created_at}")
 
 # Get a specific version
 version = replicate.models.versions.get(
-    model_owner="stability-ai",
-    model_name="stable-diffusion",
+    model_owner="google",
+    model_name="nano-banana",
     version_id="db21e45d3f7023abc2a46ee38a23973f6dce16bb082a930b0c49861f96d1e5bf"
 )
 
@@ -269,8 +269,8 @@ Run predictions directly through a model.
 ```python
 # Create a prediction for a specific model
 prediction = replicate.models.predictions.create(
-    model_owner="stability-ai",
-    model_name="stable-diffusion",
+    model_owner="google",
+    model_name="nano-banana",
     input={"prompt": "a beautiful landscape"}
 )
 ```
@@ -280,8 +280,8 @@ prediction = replicate.models.predictions.create(
 ```python
 # Get example predictions for a model
 for example in replicate.models.examples.list(
-    model_owner="stability-ai",
-    model_name="stable-diffusion"
+    model_owner="google",
+    model_name="nano-banana"
 ):
     print(f"Example input: {example.input}")
     print(f"Example output: {example.output}")
@@ -373,7 +373,7 @@ for collection in replicate.collections.list():
     print(f"{collection.name}: {collection.description}")
 
 # Get a specific collection
-collection = replicate.collections.get(collection_slug="awesome-sdxl-models")
+collection = replicate.collections.get(collection_slug="awesome-banana-models")
 for model in collection.models:
     print(f"- {model.owner}/{model.name}")
 ```
@@ -626,13 +626,13 @@ async def main():
     
     # Run a model
     output = await replicate.run(
-        "stability-ai/stable-diffusion",
+        "google/nano-banana",
         input={"prompt": "a futuristic city"}
     )
     
     # Stream output
     async for event in replicate.stream(
-        "meta/llama-2-70b-chat",
+        "anthropic/claude-4-sonnet",
         input={"prompt": "Tell me a joke"}
     ):
         print(event, end="")
@@ -770,12 +770,12 @@ import replicate
 
 # Run a model
 output = replicate.run(
-    "stability-ai/stable-diffusion:version",
+    "google/nano-banana:version",
     input={"prompt": "a cat"}
 )
 
 # Get a model
-model = replicate.models.get("stability-ai/stable-diffusion")
+model = replicate.models.get("google/nano-banana")
 ```
 
 **New (v1.0+):**
@@ -786,14 +786,14 @@ replicate = Replicate()
 
 # Run a model
 output = replicate.run(
-    "stability-ai/stable-diffusion:version",
+    "google/nano-banana:version",
     input={"prompt": "a cat"}
 )
 
 # Get a model
 model = replicate.models.get(
-    model_owner="stability-ai",
-    model_name="stable-diffusion"
+    model_owner="google",
+    model_name="nano-banana"
 )
 ```
 

From abc0da18be93ff155b0a93a391612bb5f16f7e94 Mon Sep 17 00:00:00 2001
From: Zeke Sikelianos <zeke@sikelianos.com>
Date: Tue, 23 Sep 2025 09:26:41 -0700
Subject: [PATCH 6/8] docs: simplify examples by using default import replicate
 pattern

- Remove explicit `replicate = Replicate()` from most examples
- Use `import replicate` pattern which automatically uses REPLICATE_API_TOKEN
- Keep one comprehensive example showing custom client configuration with environment variables
- Maintain explicit client instantiation only where showing advanced configuration
- Update Quick Start, Type Hints, and Migration Guide sections accordingly

This makes the examples cleaner and follows the most common usage pattern
where users rely on the default environment variable configuration.
---
 api.md | 46 +++++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/api.md b/api.md
index 116b2ff..f8ae546 100644
--- a/api.md
+++ b/api.md
@@ -9,10 +9,7 @@ pip install replicate
 ## Quick Start
 
 ```python
-from replicate import Replicate
-
-# Initialize with REPLICATE_API_TOKEN env var by default
-replicate = Replicate()
+import replicate
 
 # Create a model function
 banana = replicate.use("google/nano-banana")
@@ -30,27 +27,33 @@ output = replicate.run(
 
 ## Client Initialization
 
-### Synchronous Client
+By default, the SDK uses the `REPLICATE_API_TOKEN` environment variable:
 
 ```python
-from replicate import Replicate
+import replicate
 
-# Using environment variable (REPLICATE_API_TOKEN)
-replicate = Replicate()
+# Uses REPLICATE_API_TOKEN from environment
+output = replicate.run("google/nano-banana", input={"prompt": "hello"})
+```
+
+### Custom Client Configuration
 
-# With explicit token
-replicate = Replicate(bearer_token="your_api_token")
+For advanced use cases, you can create an explicit client instance:
 
-# Legacy token parameter (for compatibility)
-replicate = Replicate(api_token="your_api_token")
+```python
+from replicate import Replicate
+import os
 
-# With custom configuration
+# Explicitly specify which environment variable to use
 replicate = Replicate(
-    bearer_token="your_api_token",
+    bearer_token=os.environ.get("MY_REPLICATE_TOKEN"),
     base_url="https://api.replicate.com/v1",  # Optional custom base URL
     timeout=120.0,  # Request timeout in seconds
     max_retries=5  # Maximum number of retries
 )
+
+# Now use this configured client
+output = replicate.run("google/nano-banana", input={"prompt": "hello"})
 ```
 
 ### Asynchronous Client
@@ -58,9 +61,11 @@ replicate = Replicate(
 ```python
 from replicate import AsyncReplicate
 import asyncio
+import os
 
 async def main():
-    replicate = AsyncReplicate(bearer_token="your_api_token")
+    # Can specify token explicitly if needed
+    replicate = AsyncReplicate(bearer_token=os.environ.get("MY_REPLICATE_TOKEN"))
     output = await replicate.run(
         "google/nano-banana",
         input={"prompt": "a watercolor painting"}
@@ -664,12 +669,10 @@ The SDK respects these environment variables:
 The SDK is fully typed with comprehensive type hints:
 
 ```python
-from replicate import Replicate
+import replicate
 from replicate.types import Prediction, PredictionStatus
 from replicate.pagination import SyncCursorURLPage
 
-replicate: Replicate = Replicate()
-
 # Type hints for responses
 prediction: Prediction = replicate.predictions.get(prediction_id="abc123")
 status: PredictionStatus = prediction.status
@@ -708,6 +711,7 @@ from replicate import AsyncReplicate
 
 async def batch_process(prompts):
     """Process multiple prompts in parallel."""
+    from replicate import AsyncReplicate
     replicate = AsyncReplicate()
     tasks = [
         replicate.run("model:version", input={"prompt": prompt})
@@ -780,9 +784,7 @@ model = replicate.models.get("google/nano-banana")
 
 **New (v1.0+):**
 ```python
-from replicate import Replicate
-
-replicate = Replicate()
+import replicate
 
 # Run a model
 output = replicate.run(
@@ -802,6 +804,8 @@ model = replicate.models.get(
 For compatibility with older code:
 
 ```python
+from replicate import Replicate
+
 # Old style (still supported)
 replicate = Replicate(api_token="your_token")
 

From bfa0cc99d638c730f75c1da90f721027497a81df Mon Sep 17 00:00:00 2001
From: Zeke Sikelianos <zeke@sikelianos.com>
Date: Tue, 23 Sep 2025 09:39:51 -0700
Subject: [PATCH 7/8] docs: update examples to use minimum required inputs and
 correct output types

Based on actual model schemas:
- google/nano-banana: only requires 'prompt', returns image URL
- anthropic/claude-4-sonnet: only requires 'prompt', returns text string

Changes:
- Simplified all examples to use only the required 'prompt' input
- Updated variable names to clarify output types (image_url vs text)
- Added comments showing what each model returns
- Removed unnecessary optional parameters from examples
- Updated async and streaming examples to show correct output types
- Fixed batch processing example to be more specific about images
---
 api.md | 175 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 87 insertions(+), 88 deletions(-)

diff --git a/api.md b/api.md
index f8ae546..cba6258 100644
--- a/api.md
+++ b/api.md
@@ -11,18 +11,17 @@ pip install replicate
 ```python
 import replicate
 
-# Create a model function
+# Create a model function for image generation
 banana = replicate.use("google/nano-banana")
 
-# Call it like any Python function
-output = banana(prompt="astronaut on a horse")
-print(output)
+# Call it - returns an image URL
+image_url = banana(prompt="astronaut on a horse")
+print(image_url)  # https://replicate.delivery/...
 
-# Or use run() for one-off predictions
-output = replicate.run(
-    "google/nano-banana",
-    input={"prompt": "astronaut on a horse"}
-)
+# Or for text generation
+claude = replicate.use("anthropic/claude-4-sonnet")
+text = claude(prompt="Write a haiku about Python")
+print(text)  # "Code flows like water..."
 ```
 
 ## Client Initialization
@@ -33,7 +32,7 @@ By default, the SDK uses the `REPLICATE_API_TOKEN` environment variable:
 import replicate
 
 # Uses REPLICATE_API_TOKEN from environment
-output = replicate.run("google/nano-banana", input={"prompt": "hello"})
+image_url = replicate.run("google/nano-banana", input={"prompt": "hello"})
 ```
 
 ### Custom Client Configuration
@@ -53,7 +52,7 @@ replicate = Replicate(
 )
 
 # Now use this configured client
-output = replicate.run("google/nano-banana", input={"prompt": "hello"})
+image_url = replicate.run("google/nano-banana", input={"prompt": "hello"})
 ```
 
 ### Asynchronous Client
@@ -66,11 +65,11 @@ import os
 async def main():
     # Can specify token explicitly if needed
     replicate = AsyncReplicate(bearer_token=os.environ.get("MY_REPLICATE_TOKEN"))
-    output = await replicate.run(
+    image_url = await replicate.run(
         "google/nano-banana",
         input={"prompt": "a watercolor painting"}
     )
-    print(output)
+    print(image_url)  # https://replicate.delivery/...
 
 asyncio.run(main())
 ```
@@ -82,28 +81,28 @@ asyncio.run(main())
 The most Pythonic way to interact with models. Creates a callable function for any model.
 
 ```python
-# Create a model function
+# Image generation - returns file URLs
 banana = replicate.use("google/nano-banana")
 
-# Call it like a regular function
-image = banana(prompt="a 19th century portrait of a wombat gentleman")
+# Simple call with just prompt (only required input)
+image_url = banana(prompt="a 19th century portrait of a wombat gentleman")
+print(image_url)  # Returns: https://replicate.delivery/...
 
-# Use it multiple times with different inputs
-image1 = banana(prompt="a cat in a hat", negative_prompt="blurry, low quality")
-image2 = banana(prompt="a dog in sunglasses", num_outputs=4)
+# Use it multiple times
+image1 = banana(prompt="a cat in a hat")
+image2 = banana(prompt="a dog in sunglasses")
 
-# Works great with language models too
+# Text generation - returns text string
 claude = replicate.use("anthropic/claude-4-sonnet")
-response = claude(
-    prompt="Write a haiku about Python programming",
-    temperature=0.7,
-    max_new_tokens=100
-)
 
-# Enable streaming for models that support it
+# Simple call with just prompt (only required input)
+text = claude(prompt="Write a haiku about Python programming")
+print(text)  # Returns: "Code flows like water..."
+
+# Enable streaming for text models
 claude_stream = replicate.use("anthropic/claude-4-sonnet", streaming=True)
 for chunk in claude_stream(prompt="Explain quantum computing"):
-    print(chunk, end="")
+    print(chunk, end="")  # Streams text chunks
 
 # Can accept model references in various formats
 model = replicate.use("owner/name:version")  # Specific version
@@ -116,24 +115,19 @@ model = replicate.use("5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9b
 Direct method to run a model and get output. Good for one-off predictions.
 
 ```python
-# Basic usage - returns output when complete
-output = replicate.run(
-    "google/nano-banana:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b",
+# Image generation - returns a file URL
+image_url = replicate.run(
+    "google/nano-banana",
     input={"prompt": "a 19th century portrait of a wombat gentleman"}
 )
+print(image_url)  # https://replicate.delivery/...
 
-# With options
-output = replicate.run(
+# Text generation - returns text string
+text = replicate.run(
     "anthropic/claude-4-sonnet",
-    input={
-        "prompt": "Write a poem about machine learning",
-        "max_new_tokens": 500,
-        "temperature": 0.7
-    },
-    wait=30,  # Wait up to 30 seconds for completion (or True for unlimited)
-    use_file_output=True,  # Return files as FileOutput objects
-    file_encoding_strategy="base64"  # Encode input files as base64 (or "url")
+    input={"prompt": "Write a poem about machine learning"}
 )
+print(text)  # "In silicon valleys deep and wide..."
 
 # Model reference formats
 replicate.run("owner/name:version", input={})  # Specific version
@@ -146,19 +140,19 @@ replicate.run("5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9bf15ccaa"
 For models that support streaming (like language models). Returns an iterator of output chunks.
 
 ```python
-# Stream text output
-for event in replicate.stream(
+# Stream text output (only for text models like Claude)
+for chunk in replicate.stream(
     "anthropic/claude-4-sonnet",
-    input={
-        "prompt": "Tell me a story about a robot",
-        "max_new_tokens": 1000
-    }
+    input={"prompt": "Tell me a story about a robot"}
 ):
-    print(str(event), end="")
+    print(chunk, end="")  # Prints each text chunk as it arrives
 
 # Async streaming
-async for event in async_replicate.stream("anthropic/claude-4-sonnet", input={"prompt": "Hello"}):
-    print(str(event), end="")
+async for chunk in async_replicate.stream(
+    "anthropic/claude-4-sonnet", 
+    input={"prompt": "Hello"}
+):
+    print(chunk, end="")
 ```
 
 ### search() - Search Models
@@ -278,6 +272,7 @@ prediction = replicate.models.predictions.create(
     model_name="nano-banana",
     input={"prompt": "a beautiful landscape"}
 )
+# prediction.output will be an image URL when complete
 ```
 
 #### Model Examples
@@ -444,25 +439,24 @@ print(f"Webhook signing secret: {webhook_secret.key}")
 The SDK supports multiple ways to provide file inputs:
 
 ```python
-# File object
+# For models that accept image inputs (like Claude with vision)
 with open("input.jpg", "rb") as f:
-    output = replicate.run("model:version", input={"image": f})
+    text = replicate.run("anthropic/claude-4-sonnet", input={
+        "prompt": "What's in this image?",
+        "image": f
+    })
 
 # File path (automatically opened)
-output = replicate.run("model:version", input={"image": "path/to/image.jpg"})
+text = replicate.run("anthropic/claude-4-sonnet", input={
+    "prompt": "Describe this",
+    "image": "path/to/image.jpg"
+})
 
 # URL
-output = replicate.run("model:version", input={"image": "https://example.com/image.jpg"})
-
-# Base64 data URI
-output = replicate.run("model:version", input={"image": "data:image/jpeg;base64,..."})
-
-# Control encoding strategy
-output = replicate.run(
-    "model:version",
-    input={"image": file_obj},
-    file_encoding_strategy="base64"  # or "url" (uploads to Replicate)
-)
+text = replicate.run("anthropic/claude-4-sonnet", input={
+    "prompt": "Analyze this image",
+    "image": "https://example.com/image.jpg"
+})
 ```
 
 ### Output Files
@@ -472,19 +466,23 @@ File outputs are automatically converted to `FileOutput` objects:
 ```python
 from replicate.helpers import FileOutput
 
-output = replicate.run("model:version", input={"prompt": "generate an image"})
+# Image generation returns a file URL
+image_url = replicate.run("google/nano-banana", input={"prompt": "sunset over mountains"})
+print(f"Generated image: {image_url}")
+
+# Text generation returns a string
+text = replicate.run("anthropic/claude-4-sonnet", input={"prompt": "Tell me a joke"})
+print(f"Response: {text}")
 
-# If output is a FileOutput
-if isinstance(output, FileOutput):
+# When using FileOutput wrapper
+from replicate.helpers import FileOutput
+if isinstance(image_url, FileOutput):
     # Get the URL
-    print(f"File URL: {output.url}")
-    
-    # Read the file content
-    content = output.read()
+    print(f"File URL: {image_url.url}")
     
     # Save to disk
     with open("output.jpg", "wb") as f:
-        for chunk in output:
+        for chunk in image_url:
             f.write(chunk)
 ```
 
@@ -502,7 +500,7 @@ from replicate.exceptions import (
 )
 
 try:
-    output = replicate.run("model:version", input={"prompt": "test"})
+    image_url = replicate.run("google/nano-banana", input={"prompt": "test"})
 except ModelError as e:
     # Model execution failed
     print(f"Model error: {e}")
@@ -598,8 +596,8 @@ replicate = Replicate(
 )
 
 # Per-request timeout
-output = replicate.run(
-    "model:version",
+image_url = replicate.run(
+    "google/nano-banana",
     input={"prompt": "test"},
     wait=60  # Wait up to 60 seconds for completion
 )
@@ -629,18 +627,19 @@ from replicate import AsyncReplicate
 async def main():
     replicate = AsyncReplicate()
     
-    # Run a model
-    output = await replicate.run(
+    # Image generation
+    image_url = await replicate.run(
         "google/nano-banana",
         input={"prompt": "a futuristic city"}
     )
+    print(f"Image: {image_url}")
     
-    # Stream output
-    async for event in replicate.stream(
+    # Stream text generation
+    async for chunk in replicate.stream(
         "anthropic/claude-4-sonnet",
         input={"prompt": "Tell me a joke"}
     ):
-        print(event, end="")
+        print(chunk, end="")
     
     # Pagination
     async for model in replicate.models.list():
@@ -648,9 +647,9 @@ async def main():
     
     # Concurrent requests
     tasks = [
-        replicate.run("model1", input={"prompt": "test1"}),
-        replicate.run("model2", input={"prompt": "test2"}),
-        replicate.run("model3", input={"prompt": "test3"})
+        replicate.run("google/nano-banana", input={"prompt": "cat"}),
+        replicate.run("google/nano-banana", input={"prompt": "dog"}),
+        replicate.run("anthropic/claude-4-sonnet", input={"prompt": "Hello"})
     ]
     results = await asyncio.gather(*tasks)
 
@@ -772,9 +771,9 @@ The new SDK uses a different API structure. Here's how to migrate:
 ```python
 import replicate
 
-# Run a model
-output = replicate.run(
-    "google/nano-banana:version",
+# Run a model - image generation returns URL
+image_url = replicate.run(
+    "google/nano-banana",
     input={"prompt": "a cat"}
 )
 
@@ -786,9 +785,9 @@ model = replicate.models.get("google/nano-banana")
 ```python
 import replicate
 
-# Run a model
-output = replicate.run(
-    "google/nano-banana:version",
+# Run a model - image generation returns URL
+image_url = replicate.run(
+    "google/nano-banana",
     input={"prompt": "a cat"}
 )
 

From 937ba61f1d7f65772268e7705637282fdad853de Mon Sep 17 00:00:00 2001
From: Zeke Sikelianos <zeke@sikelianos.com>
Date: Wed, 24 Sep 2025 09:31:17 -0700
Subject: [PATCH 8/8] docs: update headings to use sentence case instead of
 title case

Changed all section headings throughout the API documentation to follow
sentence case convention (only capitalizing the first word) for better
consistency and modern documentation style.
---
 api.md | 62 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/api.md b/api.md
index cba6258..ab66b5d 100644
--- a/api.md
+++ b/api.md
@@ -6,7 +6,7 @@
 pip install replicate
 ```
 
-## Quick Start
+## Quick start
 
 ```python
 import replicate
@@ -24,7 +24,7 @@ text = claude(prompt="Write a haiku about Python")
 print(text)  # "Code flows like water..."
 ```
 
-## Client Initialization
+## Client initialization
 
 By default, the SDK uses the `REPLICATE_API_TOKEN` environment variable:
 
@@ -35,7 +35,7 @@ import replicate
 image_url = replicate.run("google/nano-banana", input={"prompt": "hello"})
 ```
 
-### Custom Client Configuration
+### Custom client configuration
 
 For advanced use cases, you can create an explicit client instance:
 
@@ -55,7 +55,7 @@ replicate = Replicate(
 image_url = replicate.run("google/nano-banana", input={"prompt": "hello"})
 ```
 
-### Asynchronous Client
+### Asynchronous client
 
 ```python
 from replicate import AsyncReplicate
@@ -74,9 +74,9 @@ async def main():
 asyncio.run(main())
 ```
 
-## High-Level Methods
+## High-level methods
 
-### use() - Create a Reusable Model Function (Recommended)
+### use() - Create a reusable model function (recommended)
 
 The most Pythonic way to interact with models. Creates a callable function for any model.
 
@@ -110,7 +110,7 @@ model = replicate.use("owner/name")  # Latest version
 model = replicate.use("5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9bf15ccaa")  # Version ID
 ```
 
-### run() - Run a Model Once
+### run() - Run a model once
 
 Direct method to run a model and get output. Good for one-off predictions.
 
@@ -135,7 +135,7 @@ replicate.run("owner/name", input={})  # Latest version
 replicate.run("5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9bf15ccaa", input={})  # Version ID
 ```
 
-### stream() - Stream Model Output
+### stream() - Stream model output
 
 For models that support streaming (like language models). Returns an iterator of output chunks.
 
@@ -155,7 +155,7 @@ async for chunk in async_replicate.stream(
     print(chunk, end="")
 ```
 
-### search() - Search Models
+### search() - Search models
 
 Find models by keyword or description.
 
@@ -167,7 +167,7 @@ for model in results:
     print(f"{model.owner}/{model.name}: {model.description}")
 ```
 
-## Core Resources
+## Core resources
 
 ### Predictions
 
@@ -236,7 +236,7 @@ model = replicate.models.create(
 replicate.models.delete(model_owner="your-username", model_name="my-model")
 ```
 
-#### Model Versions
+#### Model versions
 
 ```python
 # List model versions
@@ -261,7 +261,7 @@ replicate.models.versions.delete(
 )
 ```
 
-#### Model Predictions
+#### Model predictions
 
 Run predictions directly through a model.
 
@@ -275,7 +275,7 @@ prediction = replicate.models.predictions.create(
 # prediction.output will be an image URL when complete
 ```
 
-#### Model Examples
+#### Model examples
 
 ```python
 # Get example predictions for a model
@@ -432,9 +432,9 @@ webhook_secret = replicate.webhooks.default.secret.get()
 print(f"Webhook signing secret: {webhook_secret.key}")
 ```
 
-## File Handling
+## File handling
 
-### Input Files
+### Input files
 
 The SDK supports multiple ways to provide file inputs:
 
@@ -459,7 +459,7 @@ text = replicate.run("anthropic/claude-4-sonnet", input={
 })
 ```
 
-### Output Files
+### Output files
 
 File outputs are automatically converted to `FileOutput` objects:
 
@@ -486,7 +486,7 @@ if isinstance(image_url, FileOutput):
             f.write(chunk)
 ```
 
-## Error Handling
+## Error handling
 
 The SDK provides detailed exception types for error handling:
 
@@ -541,9 +541,9 @@ if first_page.has_next_page():
 all_models = list(replicate.models.list())
 ```
 
-## Advanced Features
+## Advanced features
 
-### Raw Response Access
+### Raw response access
 
 Access the underlying HTTP response:
 
@@ -562,7 +562,7 @@ print(f"Headers: {response.headers}")
 prediction = response.parse()
 ```
 
-### Custom HTTP Client
+### Custom HTTP client
 
 Configure a custom HTTP client for Replicate:
 
@@ -585,7 +585,7 @@ replicate = Replicate(
 )
 ```
 
-### Retries and Timeouts
+### Retries and timeouts
 
 Configure retry behavior and timeouts:
 
@@ -603,7 +603,7 @@ image_url = replicate.run(
 )
 ```
 
-### Client Copying
+### Client copying
 
 Create a new Replicate instance with modified settings:
 
@@ -616,7 +616,7 @@ new_replicate = replicate.copy(
 )
 ```
 
-## Async/Await Support
+## Async/await support
 
 All methods have async equivalents when using `AsyncReplicate`:
 
@@ -656,14 +656,14 @@ async def main():
 asyncio.run(main())
 ```
 
-## Environment Variables
+## Environment variables
 
 The SDK respects these environment variables:
 
 - `REPLICATE_API_TOKEN` - API authentication token
 - `REPLICATE_BASE_URL` - Override the API base URL (default: `https://api.replicate.com/v1`)
 
-## Type Hints
+## Type hints
 
 The SDK is fully typed with comprehensive type hints:
 
@@ -680,9 +680,9 @@ status: PredictionStatus = prediction.status
 page: SyncCursorURLPage[Prediction] = replicate.predictions.list()
 ```
 
-## Common Patterns
+## Common patterns
 
-### Wait for Completion with Polling
+### Wait for completion with polling
 
 ```python
 import time
@@ -702,7 +702,7 @@ prediction = replicate.predictions.create(model="model:version", input={})
 result = wait_for_prediction(replicate, prediction.id)
 ```
 
-### Batch Processing
+### Batch processing
 
 ```python
 import asyncio
@@ -723,7 +723,7 @@ prompts = ["prompt 1", "prompt 2", "prompt 3"]
 results = asyncio.run(batch_process(prompts))
 ```
 
-### Webhook Handling
+### Webhook handling
 
 ```python
 from flask import Flask, request
@@ -761,7 +761,7 @@ def webhook():
     return "OK", 200
 ```
 
-## Migration Guide
+## Migration guide
 
 ### From v0.x to v1.0+
 
@@ -798,7 +798,7 @@ model = replicate.models.get(
 )
 ```
 
-### Using Legacy Authentication
+### Using legacy authentication
 
 For compatibility with older code: