Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions src/pdfrest/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@
SummarizePdfTextPayload,
TiffPdfRestPayload,
TranslatePdfTextPayload,
UnzipPayload,
UploadURLs,
ZipPayload,
)
from .types import (
ALL_PDF_INFO_QUERIES,
Expand Down Expand Up @@ -2602,6 +2604,58 @@ def merge_pdfs(
timeout=timeout,
)

def zip_files(
self,
files: PdfRestFile | Sequence[PdfRestFile],
*,
output: str | None = None,
extra_query: Query | None = None,
extra_headers: AnyMapping | None = None,
extra_body: Body | None = None,
timeout: TimeoutTypes | None = None,
) -> PdfRestFileBasedResponse:
"""Compress one or more files into a zip archive."""

payload: dict[str, Any] = {"files": files}
if output is not None:
payload["output"] = output

return self._post_file_operation(
endpoint="/zip",
payload=payload,
payload_model=ZipPayload,
extra_query=extra_query,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout,
)

def unzip_file(
self,
file: PdfRestFile | Sequence[PdfRestFile],
*,
password: str | None = None,
extra_query: Query | None = None,
extra_headers: AnyMapping | None = None,
extra_body: Body | None = None,
timeout: TimeoutTypes | None = None,
) -> PdfRestFileBasedResponse:
"""Extract files from a zip archive."""

payload: dict[str, Any] = {"files": file}
if password is not None:
payload["password"] = password

return self._post_file_operation(
endpoint="/unzip",
payload=payload,
payload_model=UnzipPayload,
extra_query=extra_query,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout,
)

def convert_to_excel(
self,
file: PdfRestFile | Sequence[PdfRestFile],
Expand Down Expand Up @@ -3889,6 +3943,58 @@ async def merge_pdfs(
timeout=timeout,
)

async def zip_files(
self,
files: PdfRestFile | Sequence[PdfRestFile],
*,
output: str | None = None,
extra_query: Query | None = None,
extra_headers: AnyMapping | None = None,
extra_body: Body | None = None,
timeout: TimeoutTypes | None = None,
) -> PdfRestFileBasedResponse:
"""Asynchronously compress one or more files into a zip archive."""

payload: dict[str, Any] = {"files": files}
if output is not None:
payload["output"] = output

return await self._post_file_operation(
endpoint="/zip",
payload=payload,
payload_model=ZipPayload,
extra_query=extra_query,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout,
)

async def unzip_file(
self,
file: PdfRestFile | Sequence[PdfRestFile],
*,
password: str | None = None,
extra_query: Query | None = None,
extra_headers: AnyMapping | None = None,
extra_body: Body | None = None,
timeout: TimeoutTypes | None = None,
) -> PdfRestFileBasedResponse:
"""Asynchronously extract files from a zip archive."""

payload: dict[str, Any] = {"files": file}
if password is not None:
payload["password"] = password

return await self._post_file_operation(
endpoint="/unzip",
payload=payload,
payload_model=UnzipPayload,
extra_query=extra_query,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout,
)

async def convert_to_excel(
self,
file: PdfRestFile | Sequence[PdfRestFile],
Expand Down
51 changes: 49 additions & 2 deletions src/pdfrest/models/_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ def _serialize_file_ids(value: list[PdfRestFile]) -> str:
return ",".join(str(file.id) for file in value)


def _serialize_file_id_list(value: list[PdfRestFile]) -> list[str]:
return [str(file.id) for file in value]


def _bool_to_on_off(value: Any) -> Any:
if isinstance(value, bool):
return "on" if value else "off"
Expand Down Expand Up @@ -239,6 +243,49 @@ class DeletePayload(BaseModel):
]


class ZipPayload(BaseModel):
"""Adapt caller options into a pdfRest-ready zip request payload."""

files: Annotated[
list[PdfRestFile],
Field(
min_length=1,
validation_alias=AliasChoices("file", "files"),
serialization_alias="id",
),
BeforeValidator(_ensure_list),
PlainSerializer(_serialize_file_id_list),
]
output: Annotated[
str | None,
Field(serialization_alias="output", min_length=1, default=None),
AfterValidator(_validate_output_prefix),
] = None


class UnzipPayload(BaseModel):
"""Adapt caller options into a pdfRest-ready unzip request payload."""

files: Annotated[
list[PdfRestFile],
Field(
min_length=1,
max_length=1,
validation_alias=AliasChoices("file", "files"),
serialization_alias="id",
),
BeforeValidator(_ensure_list),
AfterValidator(
_allowed_mime_types("application/zip", error_msg="Must be a ZIP file")
),
PlainSerializer(_serialize_as_first_file_id),
]
password: Annotated[
str | None,
Field(default=None, min_length=1),
] = None


PageNumber = Annotated[int, Field(ge=1), PlainSerializer(lambda x: str(x))]


Expand Down Expand Up @@ -1365,7 +1412,7 @@ class PdfRestRawUploadedFile(BaseModel):
name: Annotated[str, Field(description="The name of the file")]
id: Annotated[PdfRestFileID, Field(description="The id of the file")]
output_url: Annotated[
str | None,
list[HttpUrl] | HttpUrl | None,
Comment thread
datalogics-cgreen marked this conversation as resolved.
Field(description="The url of the unzipped file", alias="outputUrl"),
BeforeValidator(_ensure_list),
] = None
Expand All @@ -1384,7 +1431,7 @@ class PdfRestRawFileResponse(BaseModel):
BeforeValidator(_ensure_list),
]
output_urls: Annotated[
list[HttpUrl] | None,
list[HttpUrl] | HttpUrl | None,
Field(alias="outputUrl", description="The url of the file"),
BeforeValidator(_ensure_list),
] = None
Expand Down
89 changes: 89 additions & 0 deletions tests/live/test_live_unzip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from __future__ import annotations

import zipfile
from pathlib import Path

import pytest

from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient
from pdfrest.models import PdfRestFile

from ..resources import get_test_resource_path


def _build_zip_payload(tmp_path: Path) -> Path:
zip_path = tmp_path / "sample.zip"
with zipfile.ZipFile(zip_path, "w") as bundle:
report_path = get_test_resource_path("report.pdf")
bundle.write(report_path, arcname="report.pdf")
return zip_path


@pytest.fixture(scope="module")
def uploaded_zip_file(
pdfrest_api_key: str,
pdfrest_live_base_url: str,
tmp_path_factory: pytest.TempPathFactory,
) -> PdfRestFile:
zip_path = _build_zip_payload(tmp_path_factory.mktemp("zip-input"))
with PdfRestClient(
api_key=pdfrest_api_key,
base_url=pdfrest_live_base_url,
) as client:
return client.files.create_from_paths([zip_path])[0]


def test_live_unzip_file(
pdfrest_api_key: str,
pdfrest_live_base_url: str,
uploaded_zip_file: PdfRestFile,
) -> None:
with PdfRestClient(
api_key=pdfrest_api_key,
base_url=pdfrest_live_base_url,
) as client:
response = client.unzip_file(uploaded_zip_file)

assert response.output_files
assert all(file.size > 0 for file in response.output_files)
assert all(file.name for file in response.output_files)
assert str(response.input_id) == str(uploaded_zip_file.id)


def test_live_unzip_invalid_override(
pdfrest_api_key: str,
pdfrest_live_base_url: str,
uploaded_zip_file: PdfRestFile,
) -> None:
with (
PdfRestClient(
api_key=pdfrest_api_key,
base_url=pdfrest_live_base_url,
) as client,
pytest.raises(PdfRestApiError, match="id"),
):
client.unzip_file(
uploaded_zip_file,
extra_body={"id": "not-a-uuid"},
)


@pytest.mark.asyncio
async def test_live_unzip_file_async(
pdfrest_api_key: str,
pdfrest_live_base_url: str,
uploaded_zip_file: PdfRestFile,
) -> None:
async with AsyncPdfRestClient(
api_key=pdfrest_api_key,
base_url=pdfrest_live_base_url,
) as client:
response = await client.unzip_file(
uploaded_zip_file,
extra_query={"trace": "async"},
)

assert response.output_files
assert all(file.size > 0 for file in response.output_files)
assert all(file.name for file in response.output_files)
assert str(response.input_id) == str(uploaded_zip_file.id)
90 changes: 90 additions & 0 deletions tests/live/test_live_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from __future__ import annotations

import pytest

from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient
from pdfrest.models import PdfRestFile

from ..resources import get_test_resource_path


@pytest.fixture(scope="module")
def uploaded_zip_inputs(
pdfrest_api_key: str,
pdfrest_live_base_url: str,
) -> list[PdfRestFile]:
paths = [
get_test_resource_path("report.pdf"),
get_test_resource_path("report.docx"),
]
with PdfRestClient(
api_key=pdfrest_api_key,
base_url=pdfrest_live_base_url,
) as client:
return client.files.create_from_paths(paths)


def test_live_zip_files(
pdfrest_api_key: str,
pdfrest_live_base_url: str,
uploaded_zip_inputs: list[PdfRestFile],
) -> None:
with PdfRestClient(
api_key=pdfrest_api_key,
base_url=pdfrest_live_base_url,
) as client:
response = client.zip_files(
uploaded_zip_inputs,
output="live-zip",
)

assert response.output_file.name.startswith("live-zip")
assert response.output_file.name.endswith(".zip")
assert response.output_file.type == "application/zip"
assert response.output_file.size > 0
assert {str(file.id) for file in uploaded_zip_inputs} == {
str(file_id) for file_id in response.input_ids
}


def test_live_zip_files_invalid_id_override(
pdfrest_api_key: str,
pdfrest_live_base_url: str,
uploaded_zip_inputs: list[PdfRestFile],
) -> None:
with (
PdfRestClient(
api_key=pdfrest_api_key,
base_url=pdfrest_live_base_url,
) as client,
pytest.raises(PdfRestApiError, match="id"),
):
client.zip_files(
uploaded_zip_inputs,
extra_body={"id": "not-a-uuid"},
)


@pytest.mark.asyncio
async def test_live_zip_files_async(
pdfrest_api_key: str,
pdfrest_live_base_url: str,
uploaded_zip_inputs: list[PdfRestFile],
) -> None:
async with AsyncPdfRestClient(
api_key=pdfrest_api_key,
base_url=pdfrest_live_base_url,
) as client:
response = await client.zip_files(
uploaded_zip_inputs,
output="live-zip-async",
extra_query={"trace": "async"},
)

assert response.output_file.name.startswith("live-zip-async")
assert response.output_file.name.endswith(".zip")
assert response.output_file.type == "application/zip"
assert response.output_file.size > 0
assert {str(file.id) for file in uploaded_zip_inputs} == {
str(file_id) for file_id in response.input_ids
}
Loading