diff --git a/mindee/parsing/v2/inference_result.py b/mindee/parsing/v2/inference_result.py index ae6511ac..1867fb69 100644 --- a/mindee/parsing/v2/inference_result.py +++ b/mindee/parsing/v2/inference_result.py @@ -2,7 +2,7 @@ from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2.field.inference_result_fields import InferenceResultFields -from mindee.parsing.v2.inference_result_options import InferenceResultOptions +from mindee.parsing.v2.raw_text import RawText class InferenceResult: @@ -10,19 +10,14 @@ class InferenceResult: fields: InferenceResultFields """Fields contained in the inference.""" - options: Optional[InferenceResultOptions] + raw_text: Optional[RawText] = None """Potential options retrieved alongside the inference.""" def __init__(self, raw_response: StringDict) -> None: self.fields = InferenceResultFields(raw_response["fields"]) - self.options = ( - InferenceResultOptions(raw_response["options"]) - if raw_response.get("options") - else None - ) + if raw_response.get("raw_text"): + self.raw_text = RawText(raw_response["raw_text"]) def __str__(self) -> str: out_str = f"\n\nFields\n======{self.fields}" - if self.options: - out_str += f"\n\nOptions\n====={self.options}" return out_str diff --git a/mindee/parsing/v2/inference_result_options.py b/mindee/parsing/v2/inference_result_options.py deleted file mode 100644 index 8eafd547..00000000 --- a/mindee/parsing/v2/inference_result_options.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing import List - -from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.v2.raw_text import RawText - - -class InferenceResultOptions: - """Optional information about the document.""" - - raw_texts: List[RawText] - """List of text found per page.""" - - def __init__(self, raw_response: StringDict): - self.raw_texts = [RawText(raw_text) for raw_text in raw_response["raw_texts"]] diff --git a/mindee/parsing/v2/job.py b/mindee/parsing/v2/job.py index 153eb06f..3b43f0de 100644 --- a/mindee/parsing/v2/job.py +++ b/mindee/parsing/v2/job.py @@ -44,6 +44,6 @@ def __init__(self, raw_response: StringDict) -> None: self.filename = raw_response["filename"] self.result_url = raw_response["result_url"] self.alias = raw_response["alias"] - self.webhooks = [] - for webhook in raw_response["webhooks"]: - self.webhooks.append(JobWebhook(webhook)) + self.webhooks = [ + JobWebhook(webhook) for webhook in raw_response.get("webhooks", []) + ] diff --git a/mindee/parsing/v2/raw_text.py b/mindee/parsing/v2/raw_text.py index 50113eb6..491c8ddb 100644 --- a/mindee/parsing/v2/raw_text.py +++ b/mindee/parsing/v2/raw_text.py @@ -1,14 +1,14 @@ +from typing import List + from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.raw_text_page import RawTextPage class RawText: """Raw text extracted from the document.""" - page: int + pages: List[RawTextPage] """Page the raw text was found on.""" - content: str - """Content of the raw text.""" def __init__(self, raw_response: StringDict): - self.page = raw_response["page"] - self.content = raw_response["content"] + self.pages = [RawTextPage(page) for page in raw_response.get("pages", [])] diff --git a/mindee/parsing/v2/raw_text_page.py b/mindee/parsing/v2/raw_text_page.py new file mode 100644 index 00000000..b10c68c7 --- /dev/null +++ b/mindee/parsing/v2/raw_text_page.py @@ -0,0 +1,11 @@ +from mindee.parsing.common.string_dict import StringDict + + +class RawTextPage: + """Raw text extracted from the page.""" + + content: str + """Content of the raw text.""" + + def __init__(self, raw_response: StringDict): + self.content = raw_response["content"] diff --git a/tests/data b/tests/data index f0175f0e..f6eb112b 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit f0175f0ee644b57b409e6ad7e1c030f28fbe57ef +Subproject commit f6eb112b6b5bd95b3f591b839b6c4920e5ffe80c diff --git a/tests/test_client_v2_integration.py b/tests/test_client_v2_integration.py index f805466d..e7bd364e 100644 --- a/tests/test_client_v2_integration.py +++ b/tests/test_client_v2_integration.py @@ -5,7 +5,7 @@ import pytest -from mindee import ClientV2, InferenceParameters +from mindee import ClientV2, InferenceParameters, PathInput, UrlInputSource from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2 from mindee.parsing.v2.inference_response import InferenceResponse from tests.test_inputs import FILE_TYPES_DIR, PRODUCT_DATA_DIR @@ -39,7 +39,7 @@ def test_parse_file_empty_multiple_pages_must_succeed( input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf" assert input_path.exists(), f"sample file missing: {input_path}" - input_doc = v2_client.source_from_path(input_path) + input_doc = PathInput(input_path) options = InferenceParameters(findoc_model_id) response: InferenceResponse = v2_client.enqueue_and_get_inference( @@ -67,7 +67,7 @@ def test_parse_file_filled_single_page_must_succeed( input_path: Path = PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg" assert input_path.exists(), f"sample file missing: {input_path}" - input_doc = v2_client.source_from_path(input_path) + input_doc = PathInput(input_path) options = InferenceParameters(findoc_model_id) response: InferenceResponse = v2_client.enqueue_and_get_inference( @@ -98,7 +98,7 @@ def test_invalid_uuid_must_throw_error_422(v2_client: ClientV2) -> None: input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf" assert input_path.exists() - input_doc = v2_client.source_from_path(input_path) + input_doc = PathInput(input_path) options = InferenceParameters("INVALID MODEL ID") with pytest.raises(MindeeHTTPErrorV2) as exc_info: @@ -119,7 +119,7 @@ def test_url_input_source_must_not_raise_errors( """ url = os.getenv("MINDEE_V2_SE_TESTS_BLANK_PDF_URL") - input_doc = v2_client.source_from_url(url) + input_doc = UrlInputSource(url) options = InferenceParameters(findoc_model_id) response: InferenceResponse = v2_client.enqueue_and_get_inference( input_doc, options diff --git a/tests/v2/test_inference_response.py b/tests/v2/test_inference_response.py index 49b21ed4..9c4d9bce 100644 --- a/tests/v2/test_inference_response.py +++ b/tests/v2/test_inference_response.py @@ -41,62 +41,60 @@ def test_deep_nested_fields(): inference_result = InferenceResponse(json_sample) assert isinstance(inference_result.inference, Inference) assert isinstance( - inference_result.inference.result.fields.field_simple, SimpleField + inference_result.inference.result.fields["field_simple"], SimpleField ) assert isinstance( - inference_result.inference.result.fields.field_object, ObjectField + inference_result.inference.result.fields["field_object"], ObjectField ) assert isinstance( - inference_result.inference.result.fields.field_object.fields["sub_object_list"], + inference_result.inference.result.fields["field_object"].fields[ + "sub_object_list" + ], ListField, ) assert isinstance( - inference_result.inference.result.fields.field_object.fields[ + inference_result.inference.result.fields["field_object"].fields[ "sub_object_object" ], ObjectField, ) assert isinstance( - inference_result.inference.result.fields.field_object.fields[ - "sub_object_object" - ].fields, + inference_result.inference.result.fields["field_object"] + .fields["sub_object_object"] + .fields, dict, ) assert isinstance( - inference_result.inference.result.fields.field_object.fields[ - "sub_object_object" - ].fields["sub_object_object_sub_object_list"], + inference_result.inference.result.fields["field_object"] + .fields["sub_object_object"] + .fields["sub_object_object_sub_object_list"], ListField, ) assert isinstance( - inference_result.inference.result.fields.field_object.fields[ - "sub_object_object" - ] + inference_result.inference.result.fields["field_object"] + .fields["sub_object_object"] .fields["sub_object_object_sub_object_list"] .items, list, ) assert isinstance( - inference_result.inference.result.fields.field_object.fields[ - "sub_object_object" - ] + inference_result.inference.result.fields["field_object"] + .fields["sub_object_object"] .fields["sub_object_object_sub_object_list"] .items[0], ObjectField, ) assert isinstance( - inference_result.inference.result.fields.field_object.fields[ - "sub_object_object" - ] + inference_result.inference.result.fields["field_object"] + .fields["sub_object_object"] .fields["sub_object_object_sub_object_list"] .items[0] .fields["sub_object_object_sub_object_list_simple"], SimpleField, ) assert ( - inference_result.inference.result.fields.field_object.fields[ - "sub_object_object" - ] + inference_result.inference.result.fields["field_object"] + .fields["sub_object_object"] .fields["sub_object_object_sub_object_list"] .items[0] .fields["sub_object_object_sub_object_list_simple"] @@ -110,30 +108,32 @@ def test_standard_field_types(): json_sample, rst_sample = _get_inference_samples("standard_field_types") inference_result = InferenceResponse(json_sample) assert isinstance(inference_result.inference, Inference) - field_simple_string = inference_result.inference.result.fields.field_simple_string + field_simple_string = inference_result.inference.result.fields[ + "field_simple_string" + ] assert isinstance(field_simple_string, SimpleField) assert field_simple_string.value == "field_simple_string-value" assert field_simple_string.confidence == FieldConfidence.CERTAIN assert str(field_simple_string) == "field_simple_string-value" - field_simple_bool = inference_result.inference.result.fields.field_simple_bool + field_simple_bool = inference_result.inference.result.fields["field_simple_bool"] assert isinstance(field_simple_bool, SimpleField) assert field_simple_bool.value is True assert str(field_simple_bool) == "True" - field_simple_null = inference_result.inference.result.fields.field_simple_null + field_simple_null = inference_result.inference.result.fields["field_simple_null"] assert isinstance(field_simple_null, SimpleField) assert field_simple_null.value is None assert str(field_simple_null) == "" assert isinstance( - inference_result.inference.result.fields.field_object, ObjectField + inference_result.inference.result.fields["field_object"], ObjectField ) assert isinstance( - inference_result.inference.result.fields.field_simple_list, ListField + inference_result.inference.result.fields["field_simple_list"], ListField ) assert isinstance( - inference_result.inference.result.fields.field_object_list, ListField + inference_result.inference.result.fields["field_object_list"], ListField ) assert rst_sample == str(inference_result) @@ -144,11 +144,10 @@ def test_raw_texts(): inference_result = InferenceResponse(json_sample) assert isinstance(inference_result.inference, Inference) - assert inference_result.inference.result.options - assert len(inference_result.inference.result.options.raw_texts) == 2 - assert inference_result.inference.result.options.raw_texts[0].page == 0 + assert inference_result.inference.result.raw_text + assert len(inference_result.inference.result.raw_text.pages) == 2 assert ( - inference_result.inference.result.options.raw_texts[0].content + inference_result.inference.result.raw_text.pages[0].content == "This is the raw text of the first page..." ) @@ -161,13 +160,13 @@ def test_full_inference_response(): assert isinstance(inference_result.inference, Inference) assert inference_result.inference.id == "12345678-1234-1234-1234-123456789abc" assert isinstance(inference_result.inference.result.fields.date, SimpleField) - assert inference_result.inference.result.fields.date.value == "2019-11-02" + assert inference_result.inference.result.fields["date"].value == "2019-11-02" assert isinstance(inference_result.inference.result.fields.taxes, ListField) assert isinstance( - inference_result.inference.result.fields.taxes.items[0], ObjectField + inference_result.inference.result.fields["taxes"].items[0], ObjectField ) assert ( - inference_result.inference.result.fields.customer_address.fields.city.value + inference_result.inference.result.fields["customer_address"].fields.city.value == "New York" ) assert ( @@ -183,7 +182,7 @@ def test_full_inference_response(): assert inference_result.inference.file.page_count == 1 assert inference_result.inference.file.mime_type == "image/jpeg" assert not inference_result.inference.file.alias - assert not inference_result.inference.result.options + assert not inference_result.inference.result.raw_text @pytest.mark.v2 @@ -198,7 +197,7 @@ def test_field_locations_and_confidence() -> None: inference_result = InferenceResponse(json_sample) - date_field: SimpleField = inference_result.inference.result.fields.date + date_field: SimpleField = inference_result.inference.result.fields["date"] assert date_field.locations, "date field should expose locations" loc0 = date_field.locations[0]