From aeef672612d1a1a2400f3456afb5e08fe8f8259b Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:36:49 +0200 Subject: [PATCH 1/3] :sparkles: add support for remote sources --- .github/workflows/_test-integrations.yml | 1 + mindee/client.py | 13 --------- mindee/client_mixin.py | 15 +++++++++- mindee/client_v2.py | 17 +++++++----- mindee/mindee_http/mindee_api_v2.py | 35 ++++++++++++++++-------- tests/data | 2 +- tests/test_client_v2_integration.py | 20 ++++++++++++++ 7 files changed, 70 insertions(+), 33 deletions(-) diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index af1a875f..d1bdcb8b 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -49,6 +49,7 @@ jobs: WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }} MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }} run: | pytest -m integration diff --git a/mindee/client.py b/mindee/client.py index 1acf1aa6..6b8d3ba1 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -562,16 +562,3 @@ def create_endpoint( ) version = "1" return self._build_endpoint(endpoint_name, account_name, version) - - @staticmethod - def source_from_url( - url: str, - ) -> UrlInputSource: - """ - Load a document from a URL. - - :param url: Raw byte input - """ - return UrlInputSource( - url, - ) diff --git a/mindee/client_mixin.py b/mindee/client_mixin.py index 833f77ee..dde7fcc4 100644 --- a/mindee/client_mixin.py +++ b/mindee/client_mixin.py @@ -2,7 +2,7 @@ from typing import BinaryIO, Union from mindee.error import MindeeClientError -from mindee.input import Base64Input, BytesInput, FileInput, PathInput +from mindee.input import Base64Input, BytesInput, FileInput, PathInput, UrlInputSource class ClientMixin: @@ -89,3 +89,16 @@ def _validate_async_params( ) if max_retries < min_retries: raise MindeeClientError(f"Cannot set retries to less than {min_retries}.") + + @staticmethod + def source_from_url( + url: str, + ) -> UrlInputSource: + """ + Load a document from a URL. + + :param url: Raw byte input + """ + return UrlInputSource( + url, + ) diff --git a/mindee/client_v2.py b/mindee/client_v2.py index 288ec0e3..6819b2cf 100644 --- a/mindee/client_v2.py +++ b/mindee/client_v2.py @@ -1,9 +1,10 @@ from time import sleep -from typing import Optional +from typing import Optional, Union from mindee.client_mixin import ClientMixin from mindee.error.mindee_error import MindeeError from mindee.error.mindee_http_error_v2 import handle_error_v2 +from mindee.input import UrlInputSource from mindee.input.inference_parameters import InferenceParameters from mindee.input.polling_options import PollingOptions from mindee.input.sources.local_input_source import LocalInputSource @@ -38,13 +39,14 @@ def __init__(self, api_key: Optional[str] = None) -> None: self.mindee_api = MindeeApiV2(api_key) def enqueue_inference( - self, input_source: LocalInputSource, params: InferenceParameters + self, + input_source: Union[LocalInputSource, UrlInputSource], + params: InferenceParameters, ) -> JobResponse: """ Enqueues a document to a given model. - :param input_source: The document/source file to use. - Has to be created beforehand. + :param input_source: The document/source file to use. Can be local or remote. :param params: Parameters to set when sending a file. :return: A valid inference response. @@ -95,13 +97,14 @@ def get_inference(self, inference_id: str) -> InferenceResponse: return InferenceResponse(dict_response) def enqueue_and_get_inference( - self, input_source: LocalInputSource, params: InferenceParameters + self, + input_source: Union[LocalInputSource, UrlInputSource], + params: InferenceParameters, ) -> InferenceResponse: """ Enqueues to an asynchronous endpoint and automatically polls for a response. - :param input_source: The document/source file to use. - Has to be created beforehand. + :param input_source: The document/source file to use. Can be local or remote. :param params: Parameters to set when sending a file. diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py index 0c5c1e62..7645abc8 100644 --- a/mindee/mindee_http/mindee_api_v2.py +++ b/mindee/mindee_http/mindee_api_v2.py @@ -1,10 +1,10 @@ import os -from typing import Dict, Optional +from typing import Dict, Optional, Union import requests from mindee.error.mindee_error import MindeeApiV2Error -from mindee.input import LocalInputSource +from mindee.input import LocalInputSource, UrlInputSource from mindee.input.inference_parameters import InferenceParameters from mindee.logger import logger from mindee.mindee_http.base_settings import USER_AGENT @@ -68,7 +68,9 @@ def set_from_env(self) -> None: logger.debug("Value was set from env: %s", name) def req_post_inference_enqueue( - self, input_source: LocalInputSource, params: InferenceParameters + self, + input_source: Union[LocalInputSource, UrlInputSource], + params: InferenceParameters, ) -> requests.Response: """ Make an asynchronous request to POST a document for prediction on the V2 API. @@ -87,14 +89,25 @@ def req_post_inference_enqueue( if params.alias and len(params.alias): data["alias"] = params.alias - files = {"file": input_source.read_contents(params.close_file)} - response = requests.post( - url=url, - files=files, - headers=self.base_headers, - data=data, - timeout=self.request_timeout, - ) + if isinstance(input_source, LocalInputSource): + files = {"file": input_source.read_contents(params.close_file)} + response = requests.post( + url=url, + files=files, + headers=self.base_headers, + data=data, + timeout=self.request_timeout, + ) + elif isinstance(input_source, UrlInputSource): + data["url"] = input_source.url + response = requests.post( + url=url, + headers=self.base_headers, + data=data, + timeout=self.request_timeout, + ) + else: + raise MindeeApiV2Error("Invalid input source.") return response def req_get_job(self, job_id: str) -> requests.Response: diff --git a/tests/data b/tests/data index 5e9068a7..e2912fbd 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 5e9068a7eaed322d0738212ea624db7aa00e9a76 +Subproject commit e2912fbd362b7ccf595a5a8d6cc6a67f78901cde diff --git a/tests/test_client_v2_integration.py b/tests/test_client_v2_integration.py index 514dbd2c..f805466d 100644 --- a/tests/test_client_v2_integration.py +++ b/tests/test_client_v2_integration.py @@ -106,3 +106,23 @@ def test_invalid_uuid_must_throw_error_422(v2_client: ClientV2) -> None: exc: MindeeHTTPErrorV2 = exc_info.value assert exc.status == 422 + + +@pytest.mark.integration +@pytest.mark.v2 +def test_url_input_source_must_not_raise_errors( + v2_client: ClientV2, + findoc_model_id: str, +) -> None: + """ + Load a blank PDF from an HTTPS URL and make sure the inference call completes without raising any errors. + """ + url = os.getenv("MINDEE_V2_SE_TESTS_BLANK_PDF_URL") + + input_doc = v2_client.source_from_url(url) + options = InferenceParameters(findoc_model_id) + response: InferenceResponse = v2_client.enqueue_and_get_inference( + input_doc, options + ) + assert response is not None + assert response.inference is not None From 702f800d51ee92d8b272c6e50638507f0c914912 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:44:15 +0200 Subject: [PATCH 2/3] uprade tests --- mindee/parsing/v2/simple_field.py | 4 ++++ tests/data | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/mindee/parsing/v2/simple_field.py b/mindee/parsing/v2/simple_field.py index 4e0ee4cb..94650f2f 100644 --- a/mindee/parsing/v2/simple_field.py +++ b/mindee/parsing/v2/simple_field.py @@ -15,4 +15,8 @@ def __init__(self, raw_response: StringDict, indent_level: int = 0): self.value = raw_response["value"] = raw_response.get("value", None) def __str__(self) -> str: + if type(self.value) is int: + return str(f"{self.value}.0") + if type(self.value) is bool: + return "True" if self.value else "False" return str(self.value if self.value is not None else "") diff --git a/tests/data b/tests/data index e2912fbd..02ace39f 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit e2912fbd362b7ccf595a5a8d6cc6a67f78901cde +Subproject commit 02ace39f3b8cdd99dcac4f060d5b24b67ff5f2ab From d3e310340757823c7feecce503d307e1ef05d292 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:54:39 +0200 Subject: [PATCH 3/3] :recycle: refactor v2 internals into field submodule --- mindee/client_v2.py | 2 +- mindee/input/local_response.py | 2 +- mindee/parsing/v2/field/__init__.py | 0 mindee/parsing/v2/{ => field}/base_field.py | 2 +- mindee/parsing/v2/{ => field}/common_response.py | 0 mindee/parsing/v2/{ => field}/dynamic_field.py | 6 +++--- .../v2/{ => field}/inference_result_fields.py | 6 +++++- mindee/parsing/v2/{ => field}/list_field.py | 6 +++++- mindee/parsing/v2/{ => field}/object_field.py | 6 +++--- mindee/parsing/v2/{ => field}/simple_field.py | 14 ++++++++------ mindee/parsing/v2/inference_response.py | 2 +- mindee/parsing/v2/inference_result.py | 2 +- mindee/parsing/v2/job_response.py | 2 +- tests/v2/test_inference_response.py | 6 +++--- 14 files changed, 33 insertions(+), 23 deletions(-) create mode 100644 mindee/parsing/v2/field/__init__.py rename mindee/parsing/v2/{ => field}/base_field.py (71%) rename mindee/parsing/v2/{ => field}/common_response.py (100%) rename mindee/parsing/v2/{ => field}/dynamic_field.py (86%) rename mindee/parsing/v2/{ => field}/inference_result_fields.py (90%) rename mindee/parsing/v2/{ => field}/list_field.py (88%) rename mindee/parsing/v2/{ => field}/object_field.py (87%) rename mindee/parsing/v2/{ => field}/simple_field.py (56%) diff --git a/mindee/client_v2.py b/mindee/client_v2.py index 6819b2cf..600c7c68 100644 --- a/mindee/client_v2.py +++ b/mindee/client_v2.py @@ -14,7 +14,7 @@ is_valid_get_response, is_valid_post_response, ) -from mindee.parsing.v2.common_response import CommonStatus +from mindee.parsing.v2.field.common_response import CommonStatus from mindee.parsing.v2.inference_response import InferenceResponse from mindee.parsing.v2.job_response import JobResponse diff --git a/mindee/input/local_response.py b/mindee/input/local_response.py index c8a27fbb..b9b43c15 100644 --- a/mindee/input/local_response.py +++ b/mindee/input/local_response.py @@ -7,7 +7,7 @@ from typing import Any, BinaryIO, Dict, Type, TypeVar, Union from mindee.error.mindee_error import MindeeError -from mindee.parsing.v2.common_response import CommonResponse +from mindee.parsing.v2.field.common_response import CommonResponse class LocalResponse: diff --git a/mindee/parsing/v2/field/__init__.py b/mindee/parsing/v2/field/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mindee/parsing/v2/base_field.py b/mindee/parsing/v2/field/base_field.py similarity index 71% rename from mindee/parsing/v2/base_field.py rename to mindee/parsing/v2/field/base_field.py index 6d965111..27393ab3 100644 --- a/mindee/parsing/v2/base_field.py +++ b/mindee/parsing/v2/field/base_field.py @@ -1,6 +1,6 @@ from typing import List, Optional -from mindee.parsing.v2.dynamic_field import DynamicField +from mindee.parsing.v2.field.dynamic_field import DynamicField class BaseField(DynamicField): diff --git a/mindee/parsing/v2/common_response.py b/mindee/parsing/v2/field/common_response.py similarity index 100% rename from mindee/parsing/v2/common_response.py rename to mindee/parsing/v2/field/common_response.py diff --git a/mindee/parsing/v2/dynamic_field.py b/mindee/parsing/v2/field/dynamic_field.py similarity index 86% rename from mindee/parsing/v2/dynamic_field.py rename to mindee/parsing/v2/field/dynamic_field.py index 17d323e7..53379aaa 100644 --- a/mindee/parsing/v2/dynamic_field.py +++ b/mindee/parsing/v2/field/dynamic_field.py @@ -34,13 +34,13 @@ def get_field_type(raw_response: StringDict, indent_level: int = 0) -> DynamicFi """Get appropriate field types.""" if isinstance(raw_response, dict): if "value" in raw_response: - field_file = import_module("mindee.parsing.v2.simple_field") + field_file = import_module("mindee.parsing.v2.field.simple_field") field_class = getattr(field_file, FieldType.SIMPLE.value) elif "items" in raw_response: - field_file = import_module("mindee.parsing.v2.list_field") + field_file = import_module("mindee.parsing.v2.field.list_field") field_class = getattr(field_file, FieldType.LIST.value) elif "fields" in raw_response: - field_file = import_module("mindee.parsing.v2.object_field") + field_file = import_module("mindee.parsing.v2.field.object_field") field_class = getattr(field_file, FieldType.OBJECT.value) else: raise MindeeApiV2Error(f"Unrecognized field format in {raw_response}.") diff --git a/mindee/parsing/v2/inference_result_fields.py b/mindee/parsing/v2/field/inference_result_fields.py similarity index 90% rename from mindee/parsing/v2/inference_result_fields.py rename to mindee/parsing/v2/field/inference_result_fields.py index 5ffbfba9..ad52d072 100644 --- a/mindee/parsing/v2/inference_result_fields.py +++ b/mindee/parsing/v2/field/inference_result_fields.py @@ -1,7 +1,11 @@ from typing import Dict from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.v2.dynamic_field import DynamicField, FieldType, get_field_type +from mindee.parsing.v2.field.dynamic_field import ( + DynamicField, + FieldType, + get_field_type, +) class InferenceResultFields(Dict[str, DynamicField]): diff --git a/mindee/parsing/v2/list_field.py b/mindee/parsing/v2/field/list_field.py similarity index 88% rename from mindee/parsing/v2/list_field.py rename to mindee/parsing/v2/field/list_field.py index fc578086..ce99b389 100644 --- a/mindee/parsing/v2/list_field.py +++ b/mindee/parsing/v2/field/list_field.py @@ -1,7 +1,11 @@ from typing import List from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.v2.dynamic_field import DynamicField, FieldType, get_field_type +from mindee.parsing.v2.field.dynamic_field import ( + DynamicField, + FieldType, + get_field_type, +) class ListField(DynamicField): diff --git a/mindee/parsing/v2/object_field.py b/mindee/parsing/v2/field/object_field.py similarity index 87% rename from mindee/parsing/v2/object_field.py rename to mindee/parsing/v2/field/object_field.py index 994a7b1f..5a751e34 100644 --- a/mindee/parsing/v2/object_field.py +++ b/mindee/parsing/v2/field/object_field.py @@ -1,7 +1,7 @@ from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.v2.base_field import BaseField -from mindee.parsing.v2.dynamic_field import FieldType -from mindee.parsing.v2.inference_result_fields import InferenceResultFields +from mindee.parsing.v2.field.base_field import BaseField +from mindee.parsing.v2.field.dynamic_field import FieldType +from mindee.parsing.v2.field.inference_result_fields import InferenceResultFields class ObjectField(BaseField): diff --git a/mindee/parsing/v2/simple_field.py b/mindee/parsing/v2/field/simple_field.py similarity index 56% rename from mindee/parsing/v2/simple_field.py rename to mindee/parsing/v2/field/simple_field.py index 94650f2f..c9cbd296 100644 --- a/mindee/parsing/v2/simple_field.py +++ b/mindee/parsing/v2/field/simple_field.py @@ -1,8 +1,8 @@ from typing import Union from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.v2.base_field import BaseField -from mindee.parsing.v2.dynamic_field import FieldType +from mindee.parsing.v2.field.base_field import BaseField +from mindee.parsing.v2.field.dynamic_field import FieldType class SimpleField(BaseField): @@ -12,11 +12,13 @@ class SimpleField(BaseField): def __init__(self, raw_response: StringDict, indent_level: int = 0): super().__init__(FieldType.SIMPLE, indent_level) - self.value = raw_response["value"] = raw_response.get("value", None) + value = raw_response.get("value", None) + if isinstance(value, int) and not isinstance(raw_response.get("value"), bool): + self.value = float(value) + else: + self.value = value def __str__(self) -> str: - if type(self.value) is int: - return str(f"{self.value}.0") - if type(self.value) is bool: + if isinstance(self.value, bool): return "True" if self.value else "False" return str(self.value if self.value is not None else "") diff --git a/mindee/parsing/v2/inference_response.py b/mindee/parsing/v2/inference_response.py index f1bb71c2..4a73cba7 100644 --- a/mindee/parsing/v2/inference_response.py +++ b/mindee/parsing/v2/inference_response.py @@ -1,5 +1,5 @@ from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.v2.common_response import CommonResponse +from mindee.parsing.v2.field.common_response import CommonResponse from mindee.parsing.v2.inference import Inference diff --git a/mindee/parsing/v2/inference_result.py b/mindee/parsing/v2/inference_result.py index 2399752a..ae6511ac 100644 --- a/mindee/parsing/v2/inference_result.py +++ b/mindee/parsing/v2/inference_result.py @@ -1,7 +1,7 @@ from typing import Optional from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.v2.inference_result_fields import InferenceResultFields +from mindee.parsing.v2.field.inference_result_fields import InferenceResultFields from mindee.parsing.v2.inference_result_options import InferenceResultOptions diff --git a/mindee/parsing/v2/job_response.py b/mindee/parsing/v2/job_response.py index ae1e3656..6ecfbb0d 100644 --- a/mindee/parsing/v2/job_response.py +++ b/mindee/parsing/v2/job_response.py @@ -1,5 +1,5 @@ from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.v2.common_response import CommonResponse +from mindee.parsing.v2.field.common_response import CommonResponse from mindee.parsing.v2.job import Job diff --git a/tests/v2/test_inference_response.py b/tests/v2/test_inference_response.py index 55ddb3f6..070f175c 100644 --- a/tests/v2/test_inference_response.py +++ b/tests/v2/test_inference_response.py @@ -4,13 +4,13 @@ import pytest +from mindee.parsing.v2.field.list_field import ListField +from mindee.parsing.v2.field.object_field import ObjectField +from mindee.parsing.v2.field.simple_field import SimpleField from mindee.parsing.v2.inference import Inference from mindee.parsing.v2.inference_file import InferenceFile from mindee.parsing.v2.inference_model import InferenceModel from mindee.parsing.v2.inference_response import InferenceResponse -from mindee.parsing.v2.list_field import ListField -from mindee.parsing.v2.object_field import ObjectField -from mindee.parsing.v2.simple_field import SimpleField from tests.test_inputs import V2_DATA_DIR