diff --git a/.gitignore b/.gitignore
index 78e07f63..f84925da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -454,3 +454,4 @@ $RECYCLE.BIN/
 
 !pyrightconfig.json
 /coverage/
+/coverage.json
diff --git a/examples/README.md b/examples/README.md
index 6d168504..76571230 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -18,3 +18,10 @@ that via `.env` if desired), upload the checked-in sample assets under
 `examples/resources/`, and exercise the async client end-to-end. Use
 `uvx nox -s examples` when you want to execute every example across the
 supported interpreter matrix.
+
+## Available Examples
+
+- `examples/delete/delete_example.py` – demonstrate file deletion (sync + async
+  variants).
+- `examples/extract_text/extract_pdf_text_example.py` – run `extract_pdf_text`
+  with word coordinates/style enabled and render the output as a Rich table.
diff --git a/examples/extract_text/extract_pdf_text_example.py b/examples/extract_text/extract_pdf_text_example.py
new file mode 100644
index 00000000..15e55dee
--- /dev/null
+++ b/examples/extract_text/extract_pdf_text_example.py
@@ -0,0 +1,111 @@
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["pdfrest", "python-dotenv", "rich"]
+# ///
+"""Render extracted words with coordinates and style metadata.
+
+This sample demonstrates how to:
+
+1. Upload the bundled ``examples/resources/report.pdf`` resource.
+2. Request JSON output from :func:`PdfRestClient.extract_pdf_text` while turning on
+   word-level coordinates and styling data.
+3. Display the returned metadata as a Rich table.
+
+Run with ``uv run --project ../.. python extract_pdf_text_example.py`` after
+setting ``PDFREST_API_KEY`` (``python-dotenv`` will also load `.env` if present).
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from dotenv import load_dotenv
+from rich.console import Console
+from rich.table import Table
+
+from pdfrest import PdfRestClient
+from pdfrest.models import (
+    ExtractedTextDocument,
+    ExtractedTextPoint,
+    ExtractedTextWord,
+    ExtractedTextWordCoordinates,
+)
+
+RESOURCE = Path(__file__).resolve().parents[1] / "resources" / "report.pdf"
+
+
+def _format_point(point: ExtractedTextPoint | None) -> str:
+    if point is None:
+        return "—"
+    return f"({point.x:.2f}, {point.y:.2f})"
+
+
+def _format_color(word: ExtractedTextWord) -> str:
+    style = word.style
+    if style is None or style.color is None:
+        return "—"
+    color = style.color
+    values = ", ".join(str(value) for value in color.values)
+    return f"{color.space}: {values}"
+
+
+def _format_font(word: ExtractedTextWord) -> str:
+    style = word.style
+    if style is None:
+        return "—"
+    font = style.font
+    return f"{font.name} ({font.size:.1f} pt)"
+
+
+def _build_word_table(document: ExtractedTextDocument) -> Table:
+    table = Table(title="Extracted Words with Coordinates and Style")
+    table.add_column("Word", style="bold")
+    table.add_column("Page", justify="right")
+    table.add_column("Top Left")
+    table.add_column("Top Right")
+    table.add_column("Bottom Left")
+    table.add_column("Bottom Right")
+    table.add_column("Color")
+    table.add_column("Font")
+
+    for word in document.words or []:
+        coords: ExtractedTextWordCoordinates | None = word.coordinates
+        table.add_row(
+            word.text,
+            str(word.page),
+            _format_point(coords.top_left if coords else None),
+            _format_point(coords.top_right if coords else None),
+            _format_point(coords.bottom_left if coords else None),
+            _format_point(coords.bottom_right if coords else None),
+            _format_color(word),
+            _format_font(word),
+        )
+    return table
+
+
+def list_words_with_coordinates() -> None:
+    load_dotenv()
+    console = Console()
+
+    with PdfRestClient() as client:
+        uploaded = client.files.create_from_paths([RESOURCE])[0]
+        document = client.extract_pdf_text(
+            uploaded,
+            full_text="by_page",
+            preserve_line_breaks=True,
+            word_style=True,
+            word_coordinates=True,
+        )
+
+    words = document.words or []
+    console.print(f"Extracted {len(words)} words from [bold]{uploaded.name}[/bold].")
+    if not words:
+        console.print("[yellow]This document did not include word metadata.[/yellow]")
+        return
+
+    table = _build_word_table(document)
+    console.print(table)
+
+
+if __name__ == "__main__":  # pragma: no cover - manual example
+    list_words_with_coordinates()
diff --git a/pyproject.toml b/pyproject.toml
index c7c92469..b09c8c05 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,7 @@ dev = [
     "basedpyright>=1.34.0",
     "python-dotenv>=1.0.1",
     "diff-cover>=10.2.0",
+    "rich>=14.1.0",
 ]
 
 [tool.pytest.ini_options]
diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py
index 0035227c..ecac10d4 100644
--- a/src/pdfrest/client.py
+++ b/src/pdfrest/client.py
@@ -60,6 +60,7 @@
     translate_httpx_error,
 )
 from .models import (
+    ExtractedTextDocument,
     PdfRestDeletionResponse,
     PdfRestErrorResponse,
     PdfRestFile,
@@ -2398,6 +2399,48 @@ def extract_images(
             timeout=timeout,
         )
 
+    def extract_pdf_text(
+        self,
+        file: PdfRestFile | Sequence[PdfRestFile],
+        *,
+        pages: PdfPageSelection | None = None,
+        full_text: Literal["off", "by_page", "document"] = "document",
+        preserve_line_breaks: bool = False,
+        word_style: bool = False,
+        word_coordinates: bool = False,
+        extra_query: Query | None = None,
+        extra_headers: AnyMapping | None = None,
+        extra_body: Body | None = None,
+        timeout: TimeoutTypes | None = None,
+    ) -> ExtractedTextDocument:
+        """Extract text content from a PDF and return parsed JSON results."""
+
+        payload: dict[str, Any] = {
+            "files": file,
+            "full_text": full_text,
+            "preserve_line_breaks": preserve_line_breaks,
+            "word_style": word_style,
+            "word_coordinates": word_coordinates,
+            "output_type": "json",
+        }
+        if pages is not None:
+            payload["pages"] = pages
+
+        validated_payload = ExtractTextPayload.model_validate(payload)
+        request = self.prepare_request(
+            "POST",
+            "/extracted-text",
+            json_body=validated_payload.model_dump(
+                mode="json", by_alias=True, exclude_none=True, exclude_unset=True
+            ),
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        raw_payload = self._send_request(request)
+        return ExtractedTextDocument.model_validate(raw_payload)
+
     def extract_pdf_text_to_file(
         self,
         file: PdfRestFile | Sequence[PdfRestFile],
@@ -3393,6 +3436,48 @@ async def extract_images(
             timeout=timeout,
         )
 
+    async def extract_pdf_text(
+        self,
+        file: PdfRestFile | Sequence[PdfRestFile],
+        *,
+        pages: PdfPageSelection | None = None,
+        full_text: Literal["off", "by_page", "document"] = "document",
+        preserve_line_breaks: bool = False,
+        word_style: bool = False,
+        word_coordinates: bool = False,
+        extra_query: Query | None = None,
+        extra_headers: AnyMapping | None = None,
+        extra_body: Body | None = None,
+        timeout: TimeoutTypes | None = None,
+    ) -> ExtractedTextDocument:
+        """Extract text content from a PDF and return parsed JSON results."""
+
+        payload: dict[str, Any] = {
+            "files": file,
+            "full_text": full_text,
+            "preserve_line_breaks": preserve_line_breaks,
+            "word_style": word_style,
+            "word_coordinates": word_coordinates,
+            "output_type": "json",
+        }
+        if pages is not None:
+            payload["pages"] = pages
+
+        validated_payload = ExtractTextPayload.model_validate(payload)
+        request = self.prepare_request(
+            "POST",
+            "/extracted-text",
+            json_body=validated_payload.model_dump(
+                mode="json", by_alias=True, exclude_none=True, exclude_unset=True
+            ),
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            extra_body=extra_body,
+            timeout=timeout,
+        )
+        raw_payload = await self._send_request(request)
+        return ExtractedTextDocument.model_validate(raw_payload)
+
     async def extract_pdf_text_to_file(
         self,
         file: PdfRestFile | Sequence[PdfRestFile],
diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py
index ef10e565..2ee3d5a2 100644
--- a/src/pdfrest/models/__init__.py
+++ b/src/pdfrest/models/__init__.py
@@ -1,4 +1,15 @@
 from .public import (
+    ExtractedTextDocument,
+    ExtractedTextFullText,
+    ExtractedTextFullTextPage,
+    ExtractedTextFullTextPages,
+    ExtractedTextPoint,
+    ExtractedTextWord,
+    ExtractedTextWordColor,
+    ExtractedTextWordCoordinates,
+    ExtractedTextWordFont,
+    ExtractedTextWordStyle,
+    ExtractTextResponse,
     PdfRestDeletionResponse,
     PdfRestErrorResponse,
     PdfRestFile,
@@ -12,6 +23,17 @@
 )
 
 __all__ = [
+    "ExtractTextResponse",
+    "ExtractedTextDocument",
+    "ExtractedTextFullText",
+    "ExtractedTextFullTextPage",
+    "ExtractedTextFullTextPages",
+    "ExtractedTextPoint",
+    "ExtractedTextWord",
+    "ExtractedTextWordColor",
+    "ExtractedTextWordCoordinates",
+    "ExtractedTextWordFont",
+    "ExtractedTextWordStyle",
     "PdfRestDeletionResponse",
     "PdfRestErrorResponse",
     "PdfRestFile",
diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py
index e4dc8a3a..99ef5257 100644
--- a/src/pdfrest/models/public.py
+++ b/src/pdfrest/models/public.py
@@ -14,12 +14,24 @@
     ConfigDict,
     Field,
     HttpUrl,
+    RootModel,
 )
 from pydantic.json_schema import JsonSchemaValue
 from pydantic_core import CoreSchema
 from typing_extensions import override
 
 __all__ = (
+    "ExtractTextResponse",
+    "ExtractedTextDocument",
+    "ExtractedTextFullText",
+    "ExtractedTextFullTextPage",
+    "ExtractedTextFullTextPages",
+    "ExtractedTextPoint",
+    "ExtractedTextWord",
+    "ExtractedTextWordColor",
+    "ExtractedTextWordCoordinates",
+    "ExtractedTextWordFont",
+    "ExtractedTextWordStyle",
     "PdfRestDeletionResponse",
     "PdfRestErrorResponse",
     "PdfRestFile",
@@ -402,6 +414,307 @@ class TranslatePdfTextFileResponse(PdfRestFileBasedResponse):
     ] = None
 
 
+class ExtractTextResponse(BaseModel):
+    """Response returned by the extracted-text tool."""
+
+    model_config = ConfigDict(extra="allow")
+
+    full_text: Annotated[
+        str | None,
+        Field(
+            alias="fullText",
+            validation_alias=AliasChoices("full_text", "fullText"),
+            description="Inline extracted text when output_type is json.",
+            default=None,
+        ),
+    ] = None
+    input_id: Annotated[
+        PdfRestFileID,
+        Field(
+            validation_alias=AliasChoices("input_id", "inputId"),
+            description="The id of the input file.",
+        ),
+    ]
+    warning: Annotated[
+        str | None,
+        Field(description="A warning that was generated during text extraction."),
+    ] = None
+
+
+class ExtractedTextPoint(BaseModel):
+    """A point in PDF coordinate space expressed in points."""
+
+    model_config = ConfigDict(extra="allow")
+
+    x: Annotated[
+        float,
+        Field(description="Horizontal position in PDF points."),
+    ]
+    y: Annotated[
+        float,
+        Field(description="Vertical position in PDF points."),
+    ]
+
+
+class ExtractedTextWordCoordinates(BaseModel):
+    """Bounding box describing where a word appears on the page."""
+
+    model_config = ConfigDict(extra="allow")
+
+    top_left: Annotated[
+        ExtractedTextPoint,
+        Field(
+            alias="topLeft",
+            validation_alias=AliasChoices("top_left", "topLeft"),
+            description="Upper-left corner of the word bounds.",
+        ),
+    ]
+    top_right: Annotated[
+        ExtractedTextPoint,
+        Field(
+            alias="topRight",
+            validation_alias=AliasChoices("top_right", "topRight"),
+            description="Upper-right corner of the word bounds.",
+        ),
+    ]
+    bottom_left: Annotated[
+        ExtractedTextPoint,
+        Field(
+            alias="bottomLeft",
+            validation_alias=AliasChoices("bottom_left", "bottomLeft"),
+            description="Lower-left corner of the word bounds.",
+        ),
+    ]
+    bottom_right: Annotated[
+        ExtractedTextPoint,
+        Field(
+            alias="bottomRight",
+            validation_alias=AliasChoices("bottom_right", "bottomRight"),
+            description="Lower-right corner of the word bounds.",
+        ),
+    ]
+
+
+class ExtractedTextWordColor(BaseModel):
+    """Font color applied to an extracted word."""
+
+    model_config = ConfigDict(extra="allow")
+
+    space: Annotated[
+        str,
+        Field(description="Color space name reported by pdfRest (e.g., DeviceRGB)."),
+    ]
+    values: Annotated[
+        list[float],
+        Field(
+            description="Numeric components in the reported color space.",
+            min_length=1,
+        ),
+    ]
+
+
+class ExtractedTextWordFont(BaseModel):
+    """Font metadata applied to an extracted word."""
+
+    model_config = ConfigDict(extra="allow")
+
+    name: Annotated[
+        str,
+        Field(description="Reported font face name."),
+    ]
+    size: Annotated[
+        float,
+        Field(description="Font size in points."),
+    ]
+
+
+class ExtractedTextWordStyle(BaseModel):
+    """Style information for an extracted word."""
+
+    model_config = ConfigDict(extra="allow")
+
+    color: Annotated[
+        ExtractedTextWordColor,
+        Field(description="Color information for the word."),
+    ]
+    font: Annotated[
+        ExtractedTextWordFont,
+        Field(description="Font information for the word."),
+    ]
+
+
+class ExtractedTextWord(BaseModel):
+    """A single word extracted from a PDF page."""
+
+    model_config = ConfigDict(extra="allow")
+
+    text: Annotated[
+        str,
+        Field(description="Word content as rendered by the PDF."),
+    ]
+    page: Annotated[
+        int,
+        Field(description="1-indexed page number containing the word.", ge=1),
+    ]
+    coordinates: Annotated[
+        ExtractedTextWordCoordinates | None,
+        Field(
+            description="Bounding box for the word when positional data is requested.",
+            default=None,
+        ),
+    ] = None
+    style: Annotated[
+        ExtractedTextWordStyle | None,
+        Field(
+            description="Font/color details captured for the word.",
+            default=None,
+        ),
+    ] = None
+
+
+class ExtractedTextFullTextPage(BaseModel):
+    """Per-page representation of the aggregated text content."""
+
+    model_config = ConfigDict(extra="allow")
+
+    page: Annotated[
+        int,
+        Field(description="1-indexed page number.", ge=1),
+    ]
+    text: Annotated[
+        str,
+        Field(description="Concatenated text for the page."),
+    ]
+
+
+class ExtractedTextFullTextPages(BaseModel):
+    """Container for per-page text output."""
+
+    model_config = ConfigDict(extra="allow")
+
+    pages: Annotated[
+        list[ExtractedTextFullTextPage],
+        Field(
+            description="Ordered text for each page present in the document.",
+            min_length=1,
+        ),
+    ]
+
+
+class ExtractedTextFullText(RootModel[str | ExtractedTextFullTextPages]):
+    """
+    Represents full-text extraction in either "document" (str) or "page" (object)
+    modes while providing convenience accessors for both forms.
+    """
+
+    root: str | ExtractedTextFullTextPages
+
+    @property
+    def document_text(self) -> str | None:
+        """
+        Return the document-level string. Falls back to space-joining per-page text
+        when only the page-structured payload is available.
+        """
+        if isinstance(self.root, str):
+            return self.root
+        return " ".join(page.text for page in self.root.pages)
+
+    @property
+    def pages(self) -> list[ExtractedTextFullTextPage]:
+        """
+        Return page entries when pdfRest emits per-page text.
+        Raises ValueError when the payload is in document-string mode.
+        """
+        if isinstance(self.root, ExtractedTextFullTextPages):
+            return self.root.pages
+        msg = "full text payload was emitted in document mode; page data unavailable"
+        raise ValueError(msg)
+
+    def iter_pages(self) -> list[ExtractedTextFullTextPage]:
+        """
+        Convenience helper that provides a stable iterable without requiring
+        callers to guard against the document-only representation.
+        """
+        try:
+            return self.pages
+        except ValueError:
+            return []
+
+
+class ExtractedTextDocument(BaseModel):
+    """Structured representation of the JSON output returned by extract_text_to_file."""
+
+    model_config = ConfigDict(extra="allow")
+
+    input_id: Annotated[
+        PdfRestFileID,
+        Field(
+            alias="inputId",
+            validation_alias=AliasChoices("input_id", "inputId"),
+            description="Identifier of the uploaded PDF.",
+        ),
+    ]
+    words: Annotated[
+        list[ExtractedTextWord] | None,
+        Field(
+            description="Individual word records when word-level extraction is enabled.",
+            default=None,
+        ),
+    ] = None
+    full_text: Annotated[
+        ExtractedTextFullText | None,
+        Field(
+            alias="fullText",
+            validation_alias=AliasChoices("full_text", "fullText"),
+            description="Full text output (document string or per-page content).",
+            default=None,
+        ),
+    ] = None
+
+
+class ConvertToMarkdownResponse(BaseModel):
+    """Response returned by the markdown conversion tool."""
+
+    model_config = ConfigDict(extra="allow")
+
+    markdown: Annotated[
+        str | None,
+        Field(
+            description="Inline markdown content when output_type is json.",
+            default=None,
+        ),
+    ] = None
+    input_id: Annotated[
+        PdfRestFileID,
+        Field(
+            validation_alias=AliasChoices("input_id", "inputId"),
+            description="The id of the input file.",
+        ),
+    ]
+    output_url: Annotated[
+        HttpUrl | None,
+        Field(
+            alias="outputUrl",
+            validation_alias=AliasChoices("output_url", "outputUrl"),
+            description="Download URL for file output.",
+            default=None,
+        ),
+    ] = None
+    output_id: Annotated[
+        PdfRestFileID | None,
+        Field(
+            alias="outputId",
+            validation_alias=AliasChoices("output_id", "outputId"),
+            description="The id of the generated output when output_type is file.",
+            default=None,
+        ),
+    ] = None
+    warning: Annotated[
+        str | None,
+        Field(description="A warning that was generated during markdown conversion."),
+    ] = None
+
+
 class PdfRestInfoResponse(BaseModel):
     """A response containing the output from the /info route."""
 
diff --git a/tests/live/test_live_extract_pdf_text.py b/tests/live/test_live_extract_pdf_text.py
new file mode 100644
index 00000000..67b69f1a
--- /dev/null
+++ b/tests/live/test_live_extract_pdf_text.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+from itertools import product
+
+import pytest
+
+from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient
+from pdfrest.models import ExtractedTextDocument
+
+from ..resources import get_test_resource_path
+
+FULL_TEXT_OPTIONS = ("off", "by_page", "document")
+BOOL_OPTION_SETS = list(product([False, True], repeat=3))
+
+LIVE_OPTION_SETS = [
+    pytest.param(
+        {
+            "full_text": full_text,
+            "preserve_line_breaks": preserve,
+            "word_style": word_style,
+            "word_coordinates": word_coordinates,
+        },
+        id=f"{full_text}-plb-{int(preserve)}-ws-{int(word_style)}-wc-{int(word_coordinates)}",
+    )
+    for full_text in FULL_TEXT_OPTIONS
+    for preserve, word_style, word_coordinates in BOOL_OPTION_SETS
+]
+
+
+def _assert_live_full_text(
+    response: ExtractedTextDocument,
+    *,
+    full_text_mode: str,
+) -> None:
+    if full_text_mode == "off":
+        assert response.full_text is None
+    elif full_text_mode == "document":
+        assert response.full_text is not None
+        assert response.full_text.document_text is not None
+    else:
+        assert response.full_text is not None
+        assert response.full_text.pages is not None
+
+
+@pytest.mark.parametrize("options", LIVE_OPTION_SETS)
+def test_live_extract_pdf_text_success(
+    options: dict[str, bool | str],
+    pdfrest_api_key: str,
+    pdfrest_live_base_url: str,
+) -> None:
+    resource = get_test_resource_path("report.pdf")
+    with PdfRestClient(
+        api_key=pdfrest_api_key,
+        base_url=pdfrest_live_base_url,
+    ) as client:
+        uploaded = client.files.create_from_paths([resource])[0]
+        response = client.extract_pdf_text(uploaded, **options)
+
+    assert isinstance(response, ExtractedTextDocument)
+    assert response.input_id == uploaded.id
+    _assert_live_full_text(response, full_text_mode=options["full_text"])
+    if options["word_style"] or options["word_coordinates"]:
+        assert response.words is not None
+        assert response.words
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("options", LIVE_OPTION_SETS)
+async def test_live_async_extract_pdf_text_success(
+    options: dict[str, bool | str],
+    pdfrest_api_key: str,
+    pdfrest_live_base_url: str,
+) -> None:
+    resource = get_test_resource_path("report.pdf")
+    async with AsyncPdfRestClient(
+        api_key=pdfrest_api_key,
+        base_url=pdfrest_live_base_url,
+    ) as client:
+        uploaded = (await client.files.create_from_paths([resource]))[0]
+        response = await client.extract_pdf_text(uploaded, **options)
+
+    assert isinstance(response, ExtractedTextDocument)
+    assert response.input_id == uploaded.id
+    _assert_live_full_text(response, full_text_mode=options["full_text"])
+    if options["word_style"] or options["word_coordinates"]:
+        assert response.words is not None
+        assert response.words
+
+
+def test_live_extract_pdf_text_invalid_pages(
+    pdfrest_api_key: str,
+    pdfrest_live_base_url: str,
+) -> None:
+    resource = get_test_resource_path("report.pdf")
+    with PdfRestClient(
+        api_key=pdfrest_api_key,
+        base_url=pdfrest_live_base_url,
+    ) as client:
+        uploaded = client.files.create_from_paths([resource])[0]
+        with pytest.raises(PdfRestApiError, match=r"(?i)page"):
+            client.extract_pdf_text(
+                uploaded,
+                extra_body={"pages": "last-1"},
+            )
+
+
+@pytest.mark.asyncio
+async def test_live_async_extract_pdf_text_invalid_pages(
+    pdfrest_api_key: str,
+    pdfrest_live_base_url: str,
+) -> None:
+    resource = get_test_resource_path("report.pdf")
+    async with AsyncPdfRestClient(
+        api_key=pdfrest_api_key,
+        base_url=pdfrest_live_base_url,
+    ) as client:
+        uploaded = (await client.files.create_from_paths([resource]))[0]
+        with pytest.raises(PdfRestApiError, match=r"(?i)page"):
+            await client.extract_pdf_text(
+                uploaded,
+                extra_body={"pages": "last-1"},
+            )
diff --git a/tests/test_extract_pdf_text.py b/tests/test_extract_pdf_text.py
new file mode 100644
index 00000000..4286d92c
--- /dev/null
+++ b/tests/test_extract_pdf_text.py
@@ -0,0 +1,432 @@
+from __future__ import annotations
+
+import json
+from collections.abc import Mapping
+from itertools import product
+
+import httpx
+import pytest
+from pydantic import ValidationError
+
+from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient
+from pdfrest.models import ExtractedTextDocument, PdfRestFileID
+from pdfrest.models._internal import ExtractTextPayload
+
+from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file
+
+
+def _make_extracted_text_document_payload(input_id: str) -> dict[str, object]:
+    return {
+        "inputId": input_id,
+        "words": [
+            {
+                "text": "Hello",
+                "page": 1,
+                "coordinates": {
+                    "topLeft": {"x": 1, "y": 2},
+                    "topRight": {"x": 3, "y": 4},
+                    "bottomLeft": {"x": 5, "y": 6},
+                    "bottomRight": {"x": 7, "y": 8},
+                },
+                "style": {
+                    "color": {"space": "DeviceRGB", "values": [0, 0, 0]},
+                    "font": {"name": "Calibri", "size": 12},
+                },
+            }
+        ],
+        "fullText": {
+            "pages": [
+                {"page": 1, "text": "Hello world"},
+                {"page": 2, "text": "Bye"},
+            ]
+        },
+    }
+
+
+FULL_TEXT_OPTIONS = ("off", "by_page", "document")
+BOOL_OPTION_SETS = list(product([False, True], repeat=3))
+
+EXTRACT_TEXT_OPTION_SETS = [
+    pytest.param(
+        {
+            "full_text": full_text,
+            "preserve_line_breaks": preserve,
+            "word_style": word_style,
+            "word_coordinates": word_coordinates,
+        },
+        id=f"{full_text}-plb-{int(preserve)}-ws-{int(word_style)}-wc-{int(word_coordinates)}",
+    )
+    for full_text in FULL_TEXT_OPTIONS
+    for preserve, word_style, word_coordinates in BOOL_OPTION_SETS
+]
+
+PAGES_OPTION_SETS = [
+    pytest.param(None, id="without-pages"),
+    pytest.param(["1-2"], id="with-pages"),
+]
+
+
+@pytest.mark.parametrize("options", EXTRACT_TEXT_OPTION_SETS)
+def test_extract_pdf_text_success(
+    options: Mapping[str, bool | str],
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    input_file = make_pdf_file(PdfRestFileID.generate(1))
+    base_payload: dict[str, object] = {
+        "files": [input_file],
+        "pages": ["1-2"],
+        "output_type": "json",
+    }
+    payload_input = base_payload | dict(options)
+    payload_dump = ExtractTextPayload.model_validate(payload_input).model_dump(
+        mode="json",
+        by_alias=True,
+        exclude_none=True,
+        exclude_unset=True,
+    )
+
+    expected_response = _make_extracted_text_document_payload(str(input_file.id))
+    seen: dict[str, int] = {"post": 0}
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        if request.method == "POST" and request.url.path == "/extracted-text":
+            seen["post"] += 1
+            payload = json.loads(request.content.decode("utf-8"))
+            assert payload == payload_dump
+            return httpx.Response(200, json=expected_response)
+        msg = f"Unexpected request {request.method} {request.url}"
+        raise AssertionError(msg)
+
+    transport = httpx.MockTransport(handler)
+    with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client:
+        response = client.extract_pdf_text(
+            input_file,
+            pages=["1-2"],
+            full_text=options["full_text"],
+            preserve_line_breaks=options["preserve_line_breaks"],
+            word_style=options["word_style"],
+            word_coordinates=options["word_coordinates"],
+        )
+
+    assert seen == {"post": 1}
+    assert isinstance(response, ExtractedTextDocument)
+    assert response.input_id == input_file.id
+    assert response.model_dump(by_alias=True, exclude_none=True) == expected_response
+
+
+def test_extract_pdf_text_request_customization(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    input_file = make_pdf_file(PdfRestFileID.generate(1))
+    payload_dump = ExtractTextPayload.model_validate(
+        {
+            "files": [input_file],
+            "full_text": "document",
+            "preserve_line_breaks": False,
+            "word_style": False,
+            "word_coordinates": False,
+            "output_type": "json",
+        }
+    ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True)
+    expected_response = _make_extracted_text_document_payload(str(input_file.id))
+    captured_timeout: dict[str, float | dict[str, float] | None] = {}
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        if request.method == "POST" and request.url.path == "/extracted-text":
+            assert request.url.params["trace"] == "true"
+            assert request.headers["X-Debug"] == "sync-json"
+            captured_timeout["post"] = request.extensions.get("timeout")
+            payload = json.loads(request.content.decode("utf-8"))
+            assert payload == payload_dump | {"debug": True}
+            return httpx.Response(200, json=expected_response)
+        msg = f"Unexpected request {request.method} {request.url}"
+        raise AssertionError(msg)
+
+    transport = httpx.MockTransport(handler)
+    with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client:
+        response = client.extract_pdf_text(
+            input_file,
+            extra_query={"trace": "true"},
+            extra_headers={"X-Debug": "sync-json"},
+            extra_body={"debug": True},
+            timeout=0.25,
+        )
+
+    assert isinstance(response, ExtractedTextDocument)
+    post_timeout = captured_timeout["post"]
+    assert post_timeout is not None
+    if isinstance(post_timeout, dict):
+        assert all(
+            component == pytest.approx(0.25) for component in post_timeout.values()
+        )
+    else:
+        assert post_timeout == pytest.approx(0.25)
+    assert response.model_dump(by_alias=True, exclude_none=True) == expected_response
+
+
+@pytest.mark.asyncio
+async def test_async_extract_pdf_text_request_customization(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    input_file = make_pdf_file(PdfRestFileID.generate(2))
+    payload_dump = ExtractTextPayload.model_validate(
+        {
+            "files": [input_file],
+            "full_text": "document",
+            "preserve_line_breaks": False,
+            "word_style": False,
+            "word_coordinates": False,
+            "output_type": "json",
+        }
+    ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True)
+    expected_response = _make_extracted_text_document_payload(str(input_file.id))
+    captured_timeout: dict[str, float | dict[str, float] | None] = {}
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        if request.method == "POST" and request.url.path == "/extracted-text":
+            assert request.url.params["trace"] == "true"
+            assert request.headers["X-Debug"] == "async-json"
+            captured_timeout["post"] = request.extensions.get("timeout")
+            payload = json.loads(request.content.decode("utf-8"))
+            assert payload == payload_dump | {"debug": True}
+            return httpx.Response(200, json=expected_response)
+        msg = f"Unexpected request {request.method} {request.url}"
+        raise AssertionError(msg)
+
+    transport = httpx.MockTransport(handler)
+    async with AsyncPdfRestClient(
+        api_key=ASYNC_API_KEY,
+        transport=transport,
+    ) as client:
+        response = await client.extract_pdf_text(
+            input_file,
+            extra_query={"trace": "true"},
+            extra_headers={"X-Debug": "async-json"},
+            extra_body={"debug": True},
+            timeout=0.25,
+        )
+
+    assert isinstance(response, ExtractedTextDocument)
+    post_timeout = captured_timeout["post"]
+    assert post_timeout is not None
+    if isinstance(post_timeout, dict):
+        assert all(
+            component == pytest.approx(0.25) for component in post_timeout.values()
+        )
+    else:
+        assert post_timeout == pytest.approx(0.25)
+    assert response.model_dump(by_alias=True, exclude_none=True) == expected_response
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("options", EXTRACT_TEXT_OPTION_SETS)
+@pytest.mark.parametrize("pages", PAGES_OPTION_SETS)
+async def test_async_extract_pdf_text_success(
+    options: Mapping[str, bool | str],
+    pages: list[str] | None,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    input_file = make_pdf_file(PdfRestFileID.generate(2))
+    base_payload: dict[str, object] = {
+        "files": [input_file],
+        "output_type": "json",
+    }
+    if pages is not None:
+        base_payload["pages"] = pages
+    payload_input = base_payload | dict(options)
+    payload_dump = ExtractTextPayload.model_validate(payload_input).model_dump(
+        mode="json",
+        by_alias=True,
+        exclude_none=True,
+        exclude_unset=True,
+    )
+    expected_response = _make_extracted_text_document_payload(str(input_file.id))
+    seen: dict[str, int] = {"post": 0}
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        if request.method == "POST" and request.url.path == "/extracted-text":
+            seen["post"] += 1
+            payload = json.loads(request.content.decode("utf-8"))
+            assert payload == payload_dump
+            return httpx.Response(200, json=expected_response)
+        msg = f"Unexpected request {request.method} {request.url}"
+        raise AssertionError(msg)
+
+    transport = httpx.MockTransport(handler)
+    async with AsyncPdfRestClient(
+        api_key=ASYNC_API_KEY,
+        transport=transport,
+    ) as client:
+        request_kwargs: dict[str, object] = {
+            "full_text": options["full_text"],
+            "preserve_line_breaks": options["preserve_line_breaks"],
+            "word_style": options["word_style"],
+            "word_coordinates": options["word_coordinates"],
+        }
+        if pages is not None:
+            request_kwargs["pages"] = pages
+
+        response = await client.extract_pdf_text(input_file, **request_kwargs)
+
+    assert seen == {"post": 1}
+    assert isinstance(response, ExtractedTextDocument)
+    assert response.input_id == input_file.id
+    assert response.model_dump(by_alias=True, exclude_none=True) == expected_response
+
+
+def test_extract_pdf_text_multi_file_guard(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    files = [
+        make_pdf_file(PdfRestFileID.generate(1)),
+        make_pdf_file(PdfRestFileID.generate(2)),
+    ]
+    transport = httpx.MockTransport(lambda request: httpx.Response(500))
+    with (
+        PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client,
+        pytest.raises(ValidationError, match="at most 1 item"),
+    ):
+        client.extract_pdf_text(files)
+
+
+@pytest.mark.asyncio
+async def test_async_extract_pdf_text_multi_file_guard(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    files = [
+        make_pdf_file(PdfRestFileID.generate(1)),
+        make_pdf_file(PdfRestFileID.generate(2)),
+    ]
+    transport = httpx.MockTransport(lambda request: httpx.Response(500))
+    async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client:
+        with pytest.raises(ValidationError, match="at most 1 item"):
+            await client.extract_pdf_text(files)
+
+
+def test_extract_pdf_text_invalid_pages(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    input_file = make_pdf_file(PdfRestFileID.generate(1))
+    transport = httpx.MockTransport(lambda request: httpx.Response(500))
+    with (
+        PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client,
+        pytest.raises(
+            ValidationError,
+            match="The start page must be less than or equal to the end",
+        ),
+    ):
+        client.extract_pdf_text(input_file, pages=["5-1"])
+
+
+@pytest.mark.asyncio
+async def test_async_extract_pdf_text_invalid_pages(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    input_file = make_pdf_file(PdfRestFileID.generate(1))
+    transport = httpx.MockTransport(lambda request: httpx.Response(500))
+    async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client:
+        with pytest.raises(
+            ValidationError,
+            match="The start page must be less than or equal to the end",
+        ):
+            await client.extract_pdf_text(input_file, pages=["5-1"])
+
+
+def test_extract_pdf_text_server_error(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    input_file = make_pdf_file(PdfRestFileID.generate(1))
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        if request.method == "POST" and request.url.path == "/extracted-text":
+            return httpx.Response(400, json={"message": "Invalid option"})
+        msg = f"Unexpected request {request.method} {request.url}"
+        raise AssertionError(msg)
+
+    transport = httpx.MockTransport(handler)
+    with (
+        PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client,
+        pytest.raises(PdfRestApiError, match="Invalid option"),
+    ):
+        client.extract_pdf_text(input_file, full_text="off")
+
+
+@pytest.mark.asyncio
+async def test_async_extract_pdf_text_server_error(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    input_file = make_pdf_file(PdfRestFileID.generate(1))
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        if request.method == "POST" and request.url.path == "/extracted-text":
+            return httpx.Response(400, json={"message": "Invalid option"})
+        msg = f"Unexpected request {request.method} {request.url}"
+        raise AssertionError(msg)
+
+    transport = httpx.MockTransport(handler)
+    async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client:
+        with pytest.raises(PdfRestApiError, match="Invalid option"):
+            await client.extract_pdf_text(input_file, full_text="off")
+
+
+@pytest.mark.parametrize(
+    ("invalid_kwargs", "match"),
+    [
+        pytest.param({"full_text": "pages"}, "full_text", id="bad-full-text"),
+        pytest.param(
+            {"preserve_line_breaks": "maybe"},
+            "preserve_line_breaks",
+            id="bad-preserve-line-breaks",
+        ),
+        pytest.param({"word_style": "maybe"}, "word_style", id="bad-word-style"),
+        pytest.param(
+            {"word_coordinates": "maybe"}, "word_coordinates", id="bad-word-coordinates"
+        ),
+    ],
+)
+def test_extract_pdf_text_invalid_option_values(
+    invalid_kwargs: Mapping[str, object],
+    match: str,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    input_file = make_pdf_file(PdfRestFileID.generate(1))
+    transport = httpx.MockTransport(lambda request: httpx.Response(500))
+    with (
+        PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client,
+        pytest.raises(ValidationError, match=match),
+    ):
+        client.extract_pdf_text(input_file, **invalid_kwargs)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("invalid_kwargs", "match"),
+    [
+        pytest.param({"full_text": "pages"}, "full_text", id="bad-full-text"),
+        pytest.param(
+            {"preserve_line_breaks": "maybe"},
+            "preserve_line_breaks",
+            id="bad-preserve-line-breaks",
+        ),
+        pytest.param({"word_style": "maybe"}, "word_style", id="bad-word-style"),
+        pytest.param(
+            {"word_coordinates": "maybe"}, "word_coordinates", id="bad-word-coordinates"
+        ),
+    ],
+)
+async def test_async_extract_pdf_text_invalid_option_values(
+    invalid_kwargs: Mapping[str, object],
+    match: str,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.delenv("PDFREST_API_KEY", raising=False)
+    input_file = make_pdf_file(PdfRestFileID.generate(1))
+    transport = httpx.MockTransport(lambda request: httpx.Response(500))
+    async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client:
+        with pytest.raises(ValidationError, match=match):
+            await client.extract_pdf_text(input_file, **invalid_kwargs)
diff --git a/tests/test_extracted_text_document.py b/tests/test_extracted_text_document.py
new file mode 100644
index 00000000..b9110672
--- /dev/null
+++ b/tests/test_extracted_text_document.py
@@ -0,0 +1,244 @@
+"""Tests for ExtractedTextDocument validation and serialization."""
+
+from __future__ import annotations
+
+import pytest
+
+from pdfrest.models import ExtractedTextDocument
+
+
+def test_extract_text_document_round_trip_document_mode() -> None:
+    data = {
+        "inputId": "153ec1a0f-07e4-4f42-bc64-05180f72a06c",
+        "fullText": "The lamb walks My Cow Eats!",
+    }
+
+    document = ExtractedTextDocument.model_validate(data)
+
+    assert document.input_id == data["inputId"]
+    assert document.full_text is not None
+    assert document.full_text.document_text == "The lamb walks My Cow Eats!"
+    assert document.full_text.iter_pages() == []
+    assert document.words is None
+
+    with pytest.raises(
+        ValueError,
+        match="full text payload was emitted in document mode; page data unavailable",
+    ):
+        _ = document.full_text.pages
+
+    assert document.model_dump(by_alias=True, exclude_none=True) == data
+
+
+def test_extract_text_document_round_trip_page_mode() -> None:
+    data = {
+        "inputId": "10559e808-4073-488b-b660-a0b1106dd98e",
+        "words": [
+            {
+                "text": "The",
+                "page": 1,
+                "coordinates": {
+                    "topLeft": {"x": 72, "y": 720.7918090820312},
+                    "topRight": {"x": 90.12725830078125, "y": 720.7918090820312},
+                    "bottomLeft": {"x": 72, "y": 704.72412109375},
+                    "bottomRight": {"x": 90.12725830078125, "y": 704.72412109375},
+                },
+                "style": {
+                    "color": {"space": "DeviceRGB", "values": [0, 0, 0]},
+                    "font": {"name": "Calibri", "size": 12},
+                },
+            }
+        ],
+        "fullText": {
+            "pages": [
+                {"page": 1, "text": "The lamb walks"},
+                {"page": 2, "text": "My Cow Eats!"},
+            ]
+        },
+    }
+
+    document = ExtractedTextDocument.model_validate(data)
+
+    assert document.input_id == data["inputId"]
+    assert document.full_text is not None
+    assert document.words is not None
+    assert len(document.words) == 1
+    assert document.full_text.document_text == "The lamb walks My Cow Eats!"
+    pages = document.full_text.pages
+    assert len(pages) == 2
+    assert pages[0].page == 1
+    assert pages[0].text == "The lamb walks"
+    assert pages[1].page == 2
+    assert pages[1].text == "My Cow Eats!"
+    assert document.full_text.iter_pages() == pages
+
+    word = document.words[0]
+    assert word.text == "The"
+    assert word.page == 1
+    assert word.style is not None
+    assert word.style.color.space == "DeviceRGB"
+    assert word.style.color.values == [0, 0, 0]
+    assert word.style.font.name == "Calibri"
+    assert word.style.font.size == 12
+    assert word.coordinates is not None
+    assert word.coordinates.top_left.x == 72
+    assert word.coordinates.top_left.y == 720.7918090820312
+    assert word.coordinates.top_right.x == 90.12725830078125
+    assert word.coordinates.top_right.y == 720.7918090820312
+    assert word.coordinates.bottom_left.x == 72
+    assert word.coordinates.bottom_left.y == 704.72412109375
+    assert word.coordinates.bottom_right.x == 90.12725830078125
+    assert word.coordinates.bottom_right.y == 704.72412109375
+
+    assert document.model_dump(by_alias=True, exclude_none=True) == data
+
+
+def test_extract_text_document_round_trip_without_words_or_full_text() -> None:
+    data = {
+        "inputId": "3f59e808-4073-488b-b660-a0b1106dd9aa",
+    }
+
+    document = ExtractedTextDocument.model_validate(data)
+
+    assert document.input_id == data["inputId"]
+    assert document.full_text is None
+    assert document.words is None
+    assert document.model_dump(by_alias=True, exclude_none=True) == data
+
+
+@pytest.mark.parametrize(
+    ("word_payload", "has_coordinates", "has_style"),
+    [
+        pytest.param(
+            {"text": "Simple", "page": 1},
+            False,
+            False,
+            id="minimal-word",
+        ),
+        pytest.param(
+            {
+                "text": "CoordsOnly",
+                "page": 2,
+                "coordinates": {
+                    "topLeft": {"x": 1, "y": 2},
+                    "topRight": {"x": 3, "y": 4},
+                    "bottomLeft": {"x": 5, "y": 6},
+                    "bottomRight": {"x": 7, "y": 8},
+                },
+            },
+            True,
+            False,
+            id="coordinates-only",
+        ),
+        pytest.param(
+            {
+                "text": "StyleOnly",
+                "page": 3,
+                "style": {
+                    "color": {"space": "DeviceRGB", "values": [0.1, 0.2, 0.3]},
+                    "font": {"name": "Calibri", "size": 10},
+                },
+            },
+            False,
+            True,
+            id="style-only",
+        ),
+        pytest.param(
+            {
+                "text": "Both",
+                "page": 4,
+                "coordinates": {
+                    "topLeft": {"x": 10, "y": 11},
+                    "topRight": {"x": 12, "y": 13},
+                    "bottomLeft": {"x": 14, "y": 15},
+                    "bottomRight": {"x": 16, "y": 17},
+                },
+                "style": {
+                    "color": {"space": "DeviceCMYK", "values": [0, 0, 0, 1]},
+                    "font": {"name": "Times", "size": 8.5},
+                },
+            },
+            True,
+            True,
+            id="coordinates-and-style",
+        ),
+    ],
+)
+def test_extracted_text_words_optional_fields(
+    word_payload: dict[str, object], has_coordinates: bool, has_style: bool
+) -> None:
+    data = {
+        "inputId": "6f59e808-4073-488b-b660-a0b1106dd9bb",
+        "words": [word_payload],
+    }
+
+    document = ExtractedTextDocument.model_validate(data)
+
+    assert document.input_id == data["inputId"]
+    assert document.words is not None
+    word = document.words[0]
+    assert word.text == word_payload["text"]
+    assert word.page == word_payload["page"]
+
+    if has_coordinates:
+        assert word.coordinates is not None
+        coord_payload = word_payload.get("coordinates")
+        assert isinstance(coord_payload, dict)
+        top_left = coord_payload["topLeft"]
+        top_right = coord_payload["topRight"]
+        bottom_left = coord_payload["bottomLeft"]
+        bottom_right = coord_payload["bottomRight"]
+        assert word.coordinates.top_left.x == top_left["x"]
+        assert word.coordinates.top_left.y == top_left["y"]
+        assert word.coordinates.top_right.x == top_right["x"]
+        assert word.coordinates.top_right.y == top_right["y"]
+        assert word.coordinates.bottom_left.x == bottom_left["x"]
+        assert word.coordinates.bottom_left.y == bottom_left["y"]
+        assert word.coordinates.bottom_right.x == bottom_right["x"]
+        assert word.coordinates.bottom_right.y == bottom_right["y"]
+        assert word.coordinates.model_dump(by_alias=True) == coord_payload
+    else:
+        assert word.coordinates is None
+
+    if has_style:
+        assert word.style is not None
+        style_payload = word_payload.get("style")
+        assert isinstance(style_payload, dict)
+        color_payload = style_payload["color"]
+        font_payload = style_payload["font"]
+        assert isinstance(color_payload, dict)
+        assert isinstance(font_payload, dict)
+        assert word.style.color.space == color_payload["space"]
+        assert word.style.color.values == color_payload["values"]
+        assert word.style.font.name == font_payload["name"]
+        assert word.style.font.size == font_payload["size"]
+    else:
+        assert word.style is None
+
+    assert document.model_dump(by_alias=True, exclude_none=True) == data
+
+
+def test_extract_text_document_page_mode_without_words() -> None:
+    data = {
+        "inputId": "9f59e808-4073-488b-b660-a0b1106dd9cc",
+        "fullText": {
+            "pages": [
+                {"page": 1, "text": "One"},
+                {"page": 2, "text": "Two"},
+            ]
+        },
+    }
+
+    document = ExtractedTextDocument.model_validate(data)
+
+    assert document.input_id == data["inputId"]
+    assert document.words is None
+    assert document.full_text is not None
+    pages = document.full_text.pages
+    assert len(pages) == 2
+    assert pages[0].page == 1
+    assert pages[0].text == "One"
+    assert pages[1].page == 2
+    assert pages[1].text == "Two"
+    assert document.full_text.document_text == "One Two"
+    assert document.model_dump(by_alias=True, exclude_none=True) == data
diff --git a/uv.lock b/uv.lock
index d78991ea..402cabeb 100644
--- a/uv.lock
+++ b/uv.lock
@@ -751,6 +751,7 @@ dev = [
     { name = "pytest-rerunfailures" },
     { name = "pytest-xdist" },
     { name = "python-dotenv" },
+    { name = "rich" },
     { name = "ruff" },
 ]
 
@@ -778,6 +779,7 @@ dev = [
     { name = "pytest-rerunfailures", specifier = ">=16.0.1" },
     { name = "pytest-xdist", specifier = ">=3.8.0" },
     { name = "python-dotenv", specifier = ">=1.0.1" },
+    { name = "rich", specifier = ">=14.1.0" },
     { name = "ruff", specifier = ">=0.6.9" },
 ]