From e69b07d4e1d63cc5f1339504ae36ee6ef050139d Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Fri, 19 Dec 2025 15:37:24 -0600 Subject: [PATCH 1/4] client.py: Add compress PDF support with comprehensive validation - Introduced `compress_pdf` method for both synchronous and asynchronous `PdfRestClient` instances. - Added `PdfCompressPayload` to model validation with constraints for files, compression levels, profiles, and output settings. - Included live and unit tests to validate presets, custom profiles, request customization, and error handling. - Added new live test resources and sample compression profile. Assisted-by: Codex --- src/pdfrest/client.py | 67 +++ src/pdfrest/models/_internal.py | 58 +++ tests/live/test_live_compress_pdf.py | 185 ++++++++ tests/resources/compression_profile.json | 83 ++++ tests/test_compress_pdf.py | 536 +++++++++++++++++++++++ 5 files changed, 929 insertions(+) create mode 100644 tests/live/test_live_compress_pdf.py create mode 100644 tests/resources/compression_profile.json create mode 100644 tests/test_compress_pdf.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index d2b136c9..5cf73dfa 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -73,6 +73,7 @@ BmpPdfRestPayload, GifPdfRestPayload, JpegPdfRestPayload, + PdfCompressPayload, PdfFlattenFormsPayload, PdfInfoPayload, PdfMergePayload, @@ -2197,6 +2198,39 @@ def flatten_pdf_forms( timeout=timeout, ) + def compress_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + compression_level: Literal["low", "medium", "high", "custom"], + profile: PdfRestFile | Sequence[PdfRestFile] | None = None, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Compress a PDF using preset or custom compression profiles.""" + + payload: dict[str, Any] = { + "files": file, + "compression_level": compression_level, + } + if profile is not None: + payload["profile"] = profile + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/compressed-pdf", + payload=payload, + payload_model=PdfCompressPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def convert_to_pdfx( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -2707,6 +2741,39 @@ async def flatten_pdf_forms( timeout=timeout, ) + async def compress_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + compression_level: Literal["low", "medium", "high", "custom"], + profile: PdfRestFile | Sequence[PdfRestFile] | None = None, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously compress a PDF.""" + + payload: dict[str, Any] = { + "files": file, + "compression_level": compression_level, + } + if profile is not None: + payload["profile"] = profile + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/compressed-pdf", + payload=payload, + payload_model=PdfCompressPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def convert_to_pdfx( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 1d666824..c3d17830 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -549,6 +549,64 @@ class PdfFlattenFormsPayload(BaseModel): ] = None +class PdfCompressPayload(BaseModel): + """Adapt caller options into a pdfRest-ready compress request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + compression_level: Annotated[ + Literal["low", "medium", "high", "custom"], + Field(serialization_alias="compression_level"), + ] + profile: Annotated[ + list[PdfRestFile] | None, + Field( + default=None, + min_length=1, + max_length=1, + validation_alias=AliasChoices("profile", "profiles"), + serialization_alias="profile_id", + ), + BeforeValidator(_ensure_list), + BeforeValidator( + _allowed_mime_types( + "application/json", + "text/json", + error_msg="Profile must be a JSON file", + ) + ), + PlainSerializer(_serialize_as_first_file_id), + ] = None + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + @model_validator(mode="after") + def _validate_profile_dependency(self) -> PdfCompressPayload: + if self.compression_level == "custom": + if not self.profile: + msg = "compression_level 'custom' requires a profile to be provided." + raise ValueError(msg) + elif self.profile: + msg = "A profile can only be provided when compression_level is 'custom'." + raise ValueError(msg) + return self + + class BmpPdfRestPayload(BasePdfRestGraphicPayload[Literal["rgb", "gray"]]): """Adapt caller options into a pdfRest-ready BMP request payload.""" diff --git a/tests/live/test_live_compress_pdf.py b/tests/live/test_live_compress_pdf.py new file mode 100644 index 00000000..6ee8b365 --- /dev/null +++ b/tests/live/test_live_compress_pdf.py @@ -0,0 +1,185 @@ +from __future__ import annotations + +from typing import Literal + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_for_compression( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.fixture(scope="module") +def uploaded_compression_profile( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("compression_profile.json") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "compression_level", + [ + pytest.param("low", id="low"), + pytest.param("medium", id="medium"), + pytest.param("high", id="high"), + ], +) +def test_live_compress_pdf_presets( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_compression: PdfRestFile, + compression_level: Literal["low", "medium", "high"], +) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.compress_pdf( + uploaded_pdf_for_compression, + compression_level=compression_level, + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_compression.id) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "compression_level", + [ + pytest.param("low", id="low"), + pytest.param("medium", id="medium"), + pytest.param("high", id="high"), + ], +) +async def test_live_async_compress_pdf_presets( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_compression: PdfRestFile, + compression_level: Literal["low", "medium", "high"], +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.compress_pdf( + uploaded_pdf_for_compression, + compression_level=compression_level, + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_compression.id) + + +def test_live_compress_pdf_custom( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_compression: PdfRestFile, + uploaded_compression_profile: PdfRestFile, +) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.compress_pdf( + uploaded_pdf_for_compression, + compression_level="custom", + profile=uploaded_compression_profile, + output="compressed-custom", + ) + + assert ( + response.warning + == "The document could not be made smaller. No output was produced." + ) + assert len(response.input_ids) == 2 + assert uploaded_pdf_for_compression.id in response.input_ids + assert uploaded_compression_profile.id in response.input_ids + + +@pytest.mark.asyncio +async def test_live_async_compress_pdf_custom( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_compression: PdfRestFile, + uploaded_compression_profile: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.compress_pdf( + uploaded_pdf_for_compression, + compression_level="custom", + profile=uploaded_compression_profile, + output="compressed-custom", + ) + + assert ( + response.warning + == "The document could not be made smaller. No output was produced." + ) + assert len(response.input_ids) == 2 + assert uploaded_pdf_for_compression.id in response.input_ids + assert uploaded_compression_profile.id in response.input_ids + + +def test_live_compress_pdf_invalid_level( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_compression: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError), + ): + client.compress_pdf( + uploaded_pdf_for_compression, + compression_level="low", + extra_body={"compression_level": "extreme"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_compress_pdf_invalid_level( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_compression: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.compress_pdf( + uploaded_pdf_for_compression, + compression_level="low", + extra_body={"compression_level": "extreme"}, + ) diff --git a/tests/resources/compression_profile.json b/tests/resources/compression_profile.json new file mode 100644 index 00000000..1a894005 --- /dev/null +++ b/tests/resources/compression_profile.json @@ -0,0 +1,83 @@ +{ + "images": { + "color": { + "downsample": { + "trigger-dpi": 225, + "target-dpi": 150 + }, + "recompress": { + "type": "jpeg", + "quality": "medium" + } + }, + "grayscale": { + "downsample": { + "trigger-dpi": 220, + "target-dpi": 150 + }, + "recompress": { + "type": "jpeg", + "quality": "medium" + } + }, + "monochrome": { + "downsample": { + "trigger-dpi": 400, + "target-dpi": 300 + }, + "recompress": { + "type": "jbig2", + "quality": "lossy" + } + }, + "optimize-images-only-if-reduction-in-size": "on", + "consolidate-duplicate-image-and-forms": "on", + "down-convert-16-to-8-bpc-images": "on" + }, + "fonts": { + "subset-embedded-fonts": "on", + "consolidate-duplicate-fonts": "on", + "unembed-standard-14-fonts": "on", + "resubset-subset-fonts": "off", + "remove-unused-fonts": "on" + }, + "objects": { + "discard-javascript-actions": "off", + "discard-alternate-images": "on", + "discard-thumbnails": "off", + "discard-document-tags": "off", + "discard-bookmarks": "off", + "discard-output-intent": "off" + }, + "userdata": { + "discard-comments-forms-multimedia": "off", + "discard-xmp-metadata-padding": "off", + "discard-document-information-and-metadata": "off", + "discard-file-attachments": "off", + "discard-private-data": "off", + "discard-hidden-layer-content": "off" + }, + "cleanup": { + "compression": "compress-entire-file", + "flate-encode-uncompressed-streams": "on", + "convert-lzw-to-flate": "on", + "optimize-page-content": "on", + "optimize-for-fast-web-view": "on" + }, + "general": { + "write-output-even-if-increase-in-size": "off", + "preserve-version": "off" + }, + "color-conversion": { + "enabled": "off", + "color-convert-action": "convert", + "convert-intent": "profile-intent", + "convert-profile": "acrobat9-cmyk" + }, + "pdfa-conversion": { + "enabled": "off", + "type": "1b", + "pdfa-target-color-space": "rgb", + "rasterize-if-errors-encountered": "off" + } +} diff --git a/tests/test_compress_pdf.py b/tests/test_compress_pdf.py new file mode 100644 index 00000000..2e60fdae --- /dev/null +++ b/tests/test_compress_pdf.py @@ -0,0 +1,536 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfCompressPayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def make_profile_file(file_id: str) -> PdfRestFile: + return PdfRestFile.model_validate( + build_file_info_payload(file_id, "profile.json", "application/json") + ) + + +def test_compress_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfCompressPayload.model_validate( + {"files": [input_file], "compression_level": "low"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/compressed-pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "compressed.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.compress_pdf(input_file, compression_level="low") + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "compressed.pdf" + assert str(response.input_id) == str(input_file.id) + + +def test_compress_pdf_custom_profile(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + profile_file = make_profile_file(str(PdfRestFileID.generate())) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfCompressPayload.model_validate( + { + "files": [input_file], + "compression_level": "custom", + "profile": [profile_file], + "output": "smaller", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/compressed-pdf": + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "smaller.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.compress_pdf( + input_file, + compression_level="custom", + profile=profile_file, + output="smaller", + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "smaller.pdf" + + +def test_compress_pdf_request_customization(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + profile_file = make_profile_file(str(PdfRestFileID.generate())) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/compressed-pdf": + assert request.url.params["trace"] == "sync" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["compression_level"] == "custom" + assert payload["profile_id"] == str(profile_file.id) + assert payload["output"] == "custom-name" + assert payload["diagnostics"] == "on" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "sync" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom-name.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.compress_pdf( + input_file, + compression_level="custom", + profile=[profile_file], + output="custom-name", + extra_query={"trace": "sync"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"diagnostics": "on"}, + timeout=0.42, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom-name.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all(pytest.approx(0.42) == value for value in timeout_value.values()) + else: + assert timeout_value == pytest.approx(0.42) + + +@pytest.mark.asyncio +async def test_async_compress_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + profile_file = make_profile_file(str(PdfRestFileID.generate())) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/compressed-pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["compression_level"] == "custom" + assert payload["profile_id"] == str(profile_file.id) + assert payload["output"] == "async-custom-name" + assert payload["diagnostics"] == "on" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom-name.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.compress_pdf( + input_file, + compression_level="custom", + profile=[profile_file], + output="async-custom-name", + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"diagnostics": "on"}, + timeout=0.88, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom-name.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all(pytest.approx(0.88) == value for value in timeout_value.values()) + else: + assert timeout_value == pytest.approx(0.88) + + +@pytest.mark.asyncio +async def test_async_compress_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfCompressPayload.model_validate( + {"files": [input_file], "compression_level": "medium"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/compressed-pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.compress_pdf( + input_file, + compression_level="medium", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.pdf" + + +@pytest.mark.asyncio +async def test_async_compress_pdf_custom_profile( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + profile_file = make_profile_file(str(PdfRestFileID.generate())) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/compressed-pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["profile_id"] == str(profile_file.id) + assert payload["compression_level"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.compress_pdf( + input_file, + compression_level="custom", + profile=profile_file, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + timeout=0.77, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all(pytest.approx(0.77) == value for value in timeout_value.values()) + else: + assert timeout_value == pytest.approx(0.77) + + +def test_compress_pdf_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + non_pdf_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + profile_file = make_profile_file(str(PdfRestFileID.generate())) + bad_profile_file = make_pdf_file(PdfRestFileID.generate(1), name="example.pdf") + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.compress_pdf(non_pdf_file, compression_level="low") + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, + match="compression_level 'custom' requires a profile", + ), + ): + client.compress_pdf(pdf_file, compression_level="custom") + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, + match="Profile must be a JSON file", + ), + ): + client.compress_pdf( + pdf_file, + compression_level="custom", + profile=bad_profile_file, + ) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, + match="A profile can only be provided when compression_level is 'custom'", + ), + ): + client.compress_pdf( + pdf_file, + compression_level="low", + profile=profile_file, + ) + + +def test_compress_pdf_requires_profile_for_custom_sync( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, + match="compression_level 'custom' requires a profile", + ), + ): + client.compress_pdf(pdf_file, compression_level="custom") + + +@pytest.mark.asyncio +async def test_async_compress_pdf_requires_profile_for_custom( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + with pytest.raises( + ValidationError, + match="compression_level 'custom' requires a profile", + ): + await client.compress_pdf(pdf_file, compression_level="custom") + + +def test_compress_pdf_rejects_profile_for_presets_sync( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + profile_file = make_profile_file(str(PdfRestFileID.generate())) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, + match="A profile can only be provided when compression_level is 'custom'", + ), + ): + client.compress_pdf( + pdf_file, + compression_level="low", + profile=profile_file, + ) + + +@pytest.mark.asyncio +async def test_async_compress_pdf_rejects_profile_for_presets( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + profile_file = make_profile_file(str(PdfRestFileID.generate())) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + with pytest.raises( + ValidationError, + match="A profile can only be provided when compression_level is 'custom'", + ): + await client.compress_pdf( + pdf_file, + compression_level="low", + profile=profile_file, + ) + + +@pytest.mark.asyncio +async def test_async_compress_pdf_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + non_pdf_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + profile_file = make_profile_file(str(PdfRestFileID.generate())) + bad_profile_file = make_pdf_file(PdfRestFileID.generate(1), name="example.pdf") + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + with pytest.raises(ValidationError, match="Must be a PDF file"): + await client.compress_pdf(non_pdf_file, compression_level="low") + + with pytest.raises( + ValidationError, + match="compression_level 'custom' requires a profile", + ): + await client.compress_pdf(pdf_file, compression_level="custom") + + with pytest.raises(ValidationError, match="Profile must be a JSON file"): + await client.compress_pdf( + pdf_file, + compression_level="custom", + profile=bad_profile_file, + ) + + with pytest.raises( + ValidationError, + match="A profile can only be provided when compression_level is 'custom'", + ): + await client.compress_pdf( + pdf_file, + compression_level="low", + profile=profile_file, + ) From 9a79177c875c9fc5ac4b9361a0be676ec42829c2 Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Fri, 19 Dec 2025 15:37:47 -0600 Subject: [PATCH 2/4] models: Allow pdfRest to return no files - Compression might return just a warning. --- src/pdfrest/models/_internal.py | 7 ------- src/pdfrest/models/public.py | 1 - 2 files changed, 8 deletions(-) diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index c3d17830..207bdf61 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -683,13 +683,6 @@ class PdfRestRawFileResponse(BaseModel): ), ] = None - @model_validator(mode="after") - def _check_output_id_or_files(self) -> Any: - if self.output_ids is None and self.files is None: - msg = "output_id or files must be specified" - raise ValueError(msg) - return self - @property def ids(self) -> list[PdfRestFileID] | None: if self.output_ids is not None: diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index da2c8893..108490ce 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -256,7 +256,6 @@ class PdfRestFileBasedResponse(BaseModel): list[PdfRestFile], Field( description="The list of files returned by the pdfRest operation", - min_length=1, validation_alias=AliasChoices("output_file", "outputFile"), ), ] From 56d748e26e6401294e173ecb004ed0d6b79533ec Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Fri, 19 Dec 2025 15:38:42 -0600 Subject: [PATCH 3/4] AGENTS.md: Update testing guidelines to ensure sync/async coverage - Require separate test cases for `PdfRestClient` and `AsyncPdfRestClient` in both unit and live test suites to verify synchronous and asynchronous behavior independently. Assisted-by: Codex --- AGENTS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 11642296..043ec69f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -138,6 +138,11 @@ - Write pytest tests: files named `test_*.py`, test functions `test_*`, fixtures in `conftest.py` where shared. +- Cover both client transports in every new test module (unit and live suites): + add distinct test cases (not parameterized branches) that exercise each + assertion through `PdfRestClient` and `AsyncPdfRestClient` so sync/async + behaviour stays independently verifiable. + - Ensure high-value coverage of public functions and edge cases; document intent in test docstrings when non-obvious. From 89af60151e2b17b1d69c64a2a41cc312c60ee2af Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Fri, 19 Dec 2025 15:39:07 -0600 Subject: [PATCH 4/4] docs: Add comprehensive testing guidelines for pdfRest APIs - Introduced `TESTING_GUIDELINES.md` to formalize expectations for unit and live tests across sync and async clients. - Detailed core principles, environment configuration, and validation patterns for robust API testing. - Covered transports, error handling, literal enumeration, and boundary value analysis. - Provided guidelines on mocking, file handling, request customization, and serialization to ensure complete coverage and consistency. - Added instructions for expanding tests as pdfRest evolves to support new APIs. Assisted-by: Codex --- TESTING_GUIDELINES.md | 278 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) create mode 100644 TESTING_GUIDELINES.md diff --git a/TESTING_GUIDELINES.md b/TESTING_GUIDELINES.md new file mode 100644 index 00000000..9df71bd5 --- /dev/null +++ b/TESTING_GUIDELINES.md @@ -0,0 +1,278 @@ +# Testing Guidelines + +The existing suite already exercises uploads, conversions, compression, +redaction, metadata queries, and file utilities through both synchronous and +asynchronous clients. The expectations below condense every technique we rely on +so new endpoints launch with complete coverage on the first pass—no reviewer +iteration required. + +## Core Principles + +- **Cover both transports everywhere.** Write distinct sync (`PdfRestClient`) + and async (`AsyncPdfRestClient`) tests for every scenario—such as happy paths, + request customization, validation failures, file helpers, and live calls. Do + not hide the transport behind a parameter; the test name itself should reveal + which client is under test. +- **Exercise both sides of the contract.** Hermetic tests (via + `httpx.MockTransport`) validate serialization and local validation. Live + suites prove the server behaves the same way, including invalid literal + handling. +- **Reset global state per test.** Use + `monkeypatch.delenv("PDFREST_API_KEY", raising=False)` (or `setenv`) so + clients never inherit accidental API keys. Patch `importlib.metadata.version` + when asserting SDK headers. +- **Lean on shared helpers.** Reuse `tests/graphics_test_helpers.py` + (`make_pdf_file`, `build_file_info_payload`, `PdfRestFileID.generate()`), + `tests/resources/`, and fixtures from `tests/conftest.py` to keep payloads + deterministic. +- **Assert behaviour, not just invocation.** Validate outbound payloads, + headers, query params, response contents, warnings, and timeouts. Track + request counts (`seen = {"post": 0, "get": 0}`) so redundant HTTP calls fail + loudly. + +## Environment & Configuration Coverage + +- Verify API keys sourced from kwargs vs environment variables, and ensure + invalid/missing keys raise `PdfRestConfigurationError`. +- Confirm SDK identity headers (`wsn`, `User-Agent`, `Accept`) by patching + `importlib.metadata.version`. +- Assert `PdfRestClient` omits `Api-Key` when pointed at custom hosts and honors + caller-provided headers/query params even for control-plane calls like + `.up()`. + +## Mocked (Unit) Tests + +### Transports & Request Inspection + +- Use `httpx.MockTransport` handlers that assert: + - HTTP method + path (`/png`, `/compressed-pdf`, `/resource/{id}`, etc.). + - Query parameters and headers (trace/debug flags, mode switches, custom auth + headers). + - JSON payloads obtained via `json.loads(request.content)` and compared to the + relevant Pydantic payload’s + `.model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True)`. +- For “should not happen” cases (invalid IDs, missing profiles), set the + transport to immediately raise + (`lambda request: (_ for _ in ()).throw(RuntimeError)` or `pytest.fail`) so + local validation is guaranteed. + +### Error Translation & Retries + +- Simulate 4xx/5xx responses and assert the correct exception surfaces: + `PdfRestAuthenticationError` (401/403), `PdfRestApiError` (other status codes, + include error text), `PdfRestTimeoutError` (raise `httpx.TimeoutException`), + `PdfRestTransportError` (raise `httpx.TransportError`). +- Cover retry logic by returning retriable responses multiple times and + confirming the client retries before raising (see `tests/test_client.py` + patterns for `Retry-After`). +- Include `pytest.raises(..., match="...")` to ensure exception messages capture + server warnings, retry hints, or timeout wording. + +### Sync vs Async Coverage + +- Sync tests wrap clients in `with PdfRestClient(...):`. +- Async tests use `@pytest.mark.asyncio` and + `async with AsyncPdfRestClient(...):`. +- When asserting async failures, place `pytest.raises` inside the `async with` + block. Python forbids mixing `with` and `async with` in a single statement. + +### Request Customization + +- For every endpoint that accepts `extra_query`, `extra_headers`, `extra_body`, + or `timeout`, add explicit tests (sync + async) proving those options + propagate. Capture `request.extensions["timeout"]` and assert every component + equals `pytest.approx(expected)`. + +### Validation & Payload Modeling + +- Use the payload models directly (`PngPdfRestPayload`, `PdfCompressPayload`, + `PdfMergePayload`, `PdfSplitPayload`, `PdfRedactionApplyPayload`, etc.) to + assert serialization, output-prefix validation, and range normalization in + isolation from the client. +- Through the client surface, pair calls with + `pytest.raises(ValidationError, match="...")` for MIME enforcement (“Must be a + PDF file”), dependency rules (“compression_level 'custom' requires a + profile”), profile MIME validation (“Profile must be a JSON file”), and list + length bounds. +- Cover all accepted literal shapes: single literal vs list vs tuple for + `PdfInfoQuery`; dict vs tuple vs sequence for `PdfMergeInput`; tuple/list/None + for `PdfRGBColor`; JSON-friendly dicts for redaction instructions. + +### Enumerations, Numeric Ranges, and Options + +- Use `pytest.mark.parametrize` with `pytest.param(..., id="friendly")` to + enumerate literals such as `color_model`, `smoothing`, `compression_level`, + `page_range`, merge selectors, JPEG quality boundaries, or any future literal + surfaced by new APIs. +- Include invalid literals (such as `"extreme"` compression levels, unsupported + `color_model` values, or smoothing arrays containing + duplicates/more-than-allowed entries) to ensure validation errors remain + descriptive—use `re.escape(...)` when asserting. +- For numeric fields (resolution, DPI, percentages, counts, radii, opacity, + etc.), exercise the extremes: the documented minimum/maximum, the first legal + value just inside each bound, and at least one value just outside the range. + Treat every `ge`, `le`, `gt`, `lt`, or `Annotated` constraint as requiring + explicit boundary tests. +- For textual ranges, cover ascending permutations, `"last"`, `"1-last"`, + descending segments (where allowed), and disallowed selectors (such as + `"even"`/`"odd"` when the server forbids them). +- When endpoints expose optional payload arguments (output prefixes, diagnostics + toggles, merge metadata, future knobs), include both defaulted and explicitly + provided cases so serialization doesn’t regress. + +### Response Verification + +- Assert the concrete response types (`PdfRestFileBasedResponse`, `PdfRestFile`, + `PdfRestInfoResponse`, etc.). +- Inspect every relevant attribute: + - File metadata (`name`, `type`, `size`, `url`, `warning`). + - `input_id` echoes the uploaded file ID (string comparison). + - `output_files` count matches the number of IDs returned by the mock service. +- For file-service helpers, compare results against `_build_file_info_payload` + via `_assert_file_matches_payload`. + +### Files API Scenarios + +- Uploads: assert multipart bodies include the correct number of `name="file"` + parts and filenames, and that + `client.files.create`/`create_from_paths`/`create_from_urls` fetch info + documents afterward. +- Downloads: cover `download_file`, `files.read_bytes/text/json`, and + `files.write_bytes` with `tmp_path`. Confirm file contents match expected + bytes. +- Streaming: tests should enter `files.stream()` via `with`/`async with`, + iterate over `iter_raw`, `iter_bytes`, `iter_text`, and `iter_lines`, and join + chunks back to the original payload. Manage nested async context managers + using `ExitStack` / `AsyncExitStack`. +- ID validation: ensure malformed IDs raise before sending HTTP requests + (transport should error if called). + +### Document, Compression, and Other Endpoint Examples + +- Conversions (such as `convert_to_png`, `convert_to_jpeg`, `convert_to_word`, + or any future format helper) must verify payload serialization, request + customization, MIME enforcement, multi-file guards, and smoothing/quality + enumerations. +- Compression helpers (such as `compress_pdf`) enforce the profile dependency + (custom requires JSON profile; presets reject profiles) and validate MIME + types for both PDFs and profiles in sync + async contexts. +- Split/Merge style endpoints (such as `split_pdf` or `merge_pdfs`) should + exercise tuples/dicts/lists, ensure payload serializers emit the correct + parallel arrays, and include validation errors for insufficient sources or + invalid page groups. +- Redaction and metadata helpers (such as `preview_redactions`, + `apply_redactions`, `query_pdf_info`) must cover literal shapes, optional + parameters, and invalid presets. +- Treat these as templates for any future API from + `pdfrest_api_reference_guide.html`: identify its payload model, enumerate + literals/numeric bounds, and apply the same sync+async/unit+live layering. + +### File Fixtures & Helpers + +- Generate fake uploads with `make_pdf_file`, `build_file_info_payload`, and + `PdfRestFileID.generate()` to keep IDs valid. +- When triggering MIME validation, fabricate `PdfRestFile` objects with + deliberately incorrect `type` values (PNG for PDF-only endpoints, PDF for + JSON-only profiles). +- Use `_StaticStream` / `_StaticAsyncStream` from `tests/test_files.py` to + simulate streaming responses without touching disk. + +## Live Tests + +- **Location & structure:** Place suites under `tests/live/` with one module per + endpoint (`test_live_compress_pdf.py`, `test_live_convert_to_png.py`, + `test_live_files.py`, etc.). +- **Fixtures:** Reuse shared fixtures (`pdfrest_api_key`, + `pdfrest_live_base_url`). Upload deterministic assets from `tests/resources/` + via `create_from_paths` (or `client.files.create`) so responses are + predictable. Use `pytest.fixture(scope="module"/"class")` and + `pytest_asyncio.fixture` to cache uploaded PDFs/profiles for both transports. +- **Sync + async parity:** Every live module should contain matching sync and + async tests for success, customization, streaming, and invalid paths + (compression levels, conversion options, file streaming helpers). +- **Enumerate literals:** Parameterize over every accepted literal (compression + levels, `color_model`, `smoothing`, merge selectors, redaction presets). Each + literal should hit the server once per transport. +- **Optional arguments:** Exercise options such as custom output prefixes, + diagnostics toggles, merge metadata, and URL uploads. Validate the server + honors them (filenames start with the user-provided prefix, warnings appear + when expected). +- **Negative live cases:** Override JSON via `extra_body`/`extra_query` to + bypass local validation and assert `PdfRestApiError` (or the exact server + exception) surfaces—for example, sending an invalid compression literal or + smoothing option. +- **Streaming + downloads:** In live `files` suites, cover `write_bytes`, + `files.stream().iter_*`, and URL uploads. Manage nested `async with` blocks + using `AsyncExitStack` to ensure resources are released. +- **Assertions:** Verify file names, MIME types, sizes, warnings, and that + `input_id` matches the uploaded ID. When fixtures are deterministic + (`report.pdf`, `compression_profile.json`), assert exact values rather than + generic truthiness. +- **Resource reuse:** For `create_from_urls`, first upload files to retrieve + stable URLs, then call the URL endpoint—never rely on arbitrary third-party + hosts. + +## Error Handling Patterns + +- Always combine clients with `pytest.raises` (including descriptive `match=`) + when testing validation or HTTP errors. For sync contexts you can use a + compound `with (PdfRestClient(...) as client, pytest.raises(...)):`; for + async, place `pytest.raises` inside the `async with` block. +- Distinguish between: + - Local validation failures (`ValidationError`, `ValueError`) that should + prevent HTTP calls. + - Server/transport failures (`PdfRestApiError`, `PdfRestAuthenticationError`, + `PdfRestTimeoutError`, `PdfRestTransportError`). +- When behaviour should short-circuit locally (bad UUIDs, empty query lists, + missing profiles), configure the transport to raise if invoked so the test + proves no HTTP request occurs. + +## Additional Expectations + +- **Context managers everywhere:** Treat clients and file streams as context + managers so transports close cleanly. +- **pytest fixtures:** Use readable fixtures (such as `client`, + `uploaded_pdf_for_compression`, `live_async_file`) with appropriate scopes. + Prefer `pytest.param(..., id="...")` so parametrized IDs stay intelligible. +- **No real network in unit tests:** Hermetic tests must rely solely on + `httpx.MockTransport`. +- **ID serialization:** Confirm payloads serialize uploaded `PdfRestFile` + objects as IDs via `_serialize_as_first_file_id` rather than embedding nested + structures. +- **Timeout propagation:** Every endpoint that accepts `timeout` needs both sync + and async coverage that inspects `request.extensions["timeout"]`. +- **Multi-file safeguards:** Assert endpoints that accept exactly one + file/profile reject extra inputs (such as conversions or compression + profiles). Conversely, endpoints that require multiple sources (such as merge + operations) should test both valid (≥2) and invalid (\<2) cases. +- **Shared validation suites:** When new payload shapes or validators emerge, + add/update suites such as `tests/test_graphic_payload_validation.py` so every + endpoint inheriting the behaviour gains coverage automatically. + +## Planning for Future APIs + +pdfRest will keep expanding. When implementing a new helper from +`pdfrest_api_reference_guide.html`—whether it resembles existing conversions, +merges, inspections, or something entirely new—follow this playbook: + +1. **Capture inputs and constraints.** Translate every documented literal, + numeric range, dependency, and optional field into payload annotations/tests. + Cover boundary values (minimum, maximum, first legal values inside the range, + and at least one outside value). +2. **Map outputs.** Determine whether the endpoint returns files, JSON, or both, + and assert every returned attribute or warning. +3. **Layer coverage.** For each behaviour, add sync + async unit tests (mocked) + plus sync + async live tests hitting the real service with both valid and + intentionally invalid requests. +4. **Reuse patterns.** If the endpoint resembles an existing suite (such as + conversions, redaction, compression, file uploads, metadata queries), mirror + the structure and assertions to stay consistent. +5. **Evolve shared tests.** Whenever a new validation rule becomes reusable—such + as a fresh output-prefix constraint or numeric range validator—extend the + shared helper modules and suites so future endpoints benefit automatically. + +Following these rules ensures new endpoints debut with deterministic unit tests +and fully instrumented live coverage. Treat the existing conversion, +compression, redaction, split/merge, and file suites as templates—if a behaviour +exists today (or will exist tomorrow), there should either be a matching test +pattern already or one added alongside the new API.