Skip to content

Commit 7f28c6f

Browse files
authored
♻️ 💥 update raw text output from server (#352)
1 parent f81dff6 commit 7f28c6f

File tree

8 files changed

+65
-74
lines changed

8 files changed

+65
-74
lines changed

mindee/parsing/v2/inference_result.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,22 @@
22

33
from mindee.parsing.common.string_dict import StringDict
44
from mindee.parsing.v2.field.inference_result_fields import InferenceResultFields
5-
from mindee.parsing.v2.inference_result_options import InferenceResultOptions
5+
from mindee.parsing.v2.raw_text import RawText
66

77

88
class InferenceResult:
99
"""Inference result info."""
1010

1111
fields: InferenceResultFields
1212
"""Fields contained in the inference."""
13-
options: Optional[InferenceResultOptions]
13+
raw_text: Optional[RawText] = None
1414
"""Potential options retrieved alongside the inference."""
1515

1616
def __init__(self, raw_response: StringDict) -> None:
1717
self.fields = InferenceResultFields(raw_response["fields"])
18-
self.options = (
19-
InferenceResultOptions(raw_response["options"])
20-
if raw_response.get("options")
21-
else None
22-
)
18+
if raw_response.get("raw_text"):
19+
self.raw_text = RawText(raw_response["raw_text"])
2320

2421
def __str__(self) -> str:
2522
out_str = f"\n\nFields\n======{self.fields}"
26-
if self.options:
27-
out_str += f"\n\nOptions\n====={self.options}"
2823
return out_str

mindee/parsing/v2/inference_result_options.py

Lines changed: 0 additions & 14 deletions
This file was deleted.

mindee/parsing/v2/job.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,6 @@ def __init__(self, raw_response: StringDict) -> None:
4444
self.filename = raw_response["filename"]
4545
self.result_url = raw_response["result_url"]
4646
self.alias = raw_response["alias"]
47-
self.webhooks = []
48-
for webhook in raw_response["webhooks"]:
49-
self.webhooks.append(JobWebhook(webhook))
47+
self.webhooks = [
48+
JobWebhook(webhook) for webhook in raw_response.get("webhooks", [])
49+
]

mindee/parsing/v2/raw_text.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1+
from typing import List
2+
13
from mindee.parsing.common.string_dict import StringDict
4+
from mindee.parsing.v2.raw_text_page import RawTextPage
25

36

47
class RawText:
58
"""Raw text extracted from the document."""
69

7-
page: int
10+
pages: List[RawTextPage]
811
"""Page the raw text was found on."""
9-
content: str
10-
"""Content of the raw text."""
1112

1213
def __init__(self, raw_response: StringDict):
13-
self.page = raw_response["page"]
14-
self.content = raw_response["content"]
14+
self.pages = [RawTextPage(page) for page in raw_response.get("pages", [])]

mindee/parsing/v2/raw_text_page.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from mindee.parsing.common.string_dict import StringDict
2+
3+
4+
class RawTextPage:
5+
"""Raw text extracted from the page."""
6+
7+
content: str
8+
"""Content of the raw text."""
9+
10+
def __init__(self, raw_response: StringDict):
11+
self.content = raw_response["content"]

tests/test_client_v2_integration.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import pytest
77

8-
from mindee import ClientV2, InferenceParameters
8+
from mindee import ClientV2, InferenceParameters, PathInput, UrlInputSource
99
from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2
1010
from mindee.parsing.v2.inference_response import InferenceResponse
1111
from tests.test_inputs import FILE_TYPES_DIR, PRODUCT_DATA_DIR
@@ -39,7 +39,7 @@ def test_parse_file_empty_multiple_pages_must_succeed(
3939
input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf"
4040
assert input_path.exists(), f"sample file missing: {input_path}"
4141

42-
input_doc = v2_client.source_from_path(input_path)
42+
input_doc = PathInput(input_path)
4343
options = InferenceParameters(findoc_model_id)
4444

4545
response: InferenceResponse = v2_client.enqueue_and_get_inference(
@@ -67,7 +67,7 @@ def test_parse_file_filled_single_page_must_succeed(
6767
input_path: Path = PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg"
6868
assert input_path.exists(), f"sample file missing: {input_path}"
6969

70-
input_doc = v2_client.source_from_path(input_path)
70+
input_doc = PathInput(input_path)
7171
options = InferenceParameters(findoc_model_id)
7272

7373
response: InferenceResponse = v2_client.enqueue_and_get_inference(
@@ -98,7 +98,7 @@ def test_invalid_uuid_must_throw_error_422(v2_client: ClientV2) -> None:
9898
input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf"
9999
assert input_path.exists()
100100

101-
input_doc = v2_client.source_from_path(input_path)
101+
input_doc = PathInput(input_path)
102102
options = InferenceParameters("INVALID MODEL ID")
103103

104104
with pytest.raises(MindeeHTTPErrorV2) as exc_info:
@@ -119,7 +119,7 @@ def test_url_input_source_must_not_raise_errors(
119119
"""
120120
url = os.getenv("MINDEE_V2_SE_TESTS_BLANK_PDF_URL")
121121

122-
input_doc = v2_client.source_from_url(url)
122+
input_doc = UrlInputSource(url)
123123
options = InferenceParameters(findoc_model_id)
124124
response: InferenceResponse = v2_client.enqueue_and_get_inference(
125125
input_doc, options

tests/v2/test_inference_response.py

Lines changed: 36 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -41,62 +41,60 @@ def test_deep_nested_fields():
4141
inference_result = InferenceResponse(json_sample)
4242
assert isinstance(inference_result.inference, Inference)
4343
assert isinstance(
44-
inference_result.inference.result.fields.field_simple, SimpleField
44+
inference_result.inference.result.fields["field_simple"], SimpleField
4545
)
4646
assert isinstance(
47-
inference_result.inference.result.fields.field_object, ObjectField
47+
inference_result.inference.result.fields["field_object"], ObjectField
4848
)
4949
assert isinstance(
50-
inference_result.inference.result.fields.field_object.fields["sub_object_list"],
50+
inference_result.inference.result.fields["field_object"].fields[
51+
"sub_object_list"
52+
],
5153
ListField,
5254
)
5355
assert isinstance(
54-
inference_result.inference.result.fields.field_object.fields[
56+
inference_result.inference.result.fields["field_object"].fields[
5557
"sub_object_object"
5658
],
5759
ObjectField,
5860
)
5961
assert isinstance(
60-
inference_result.inference.result.fields.field_object.fields[
61-
"sub_object_object"
62-
].fields,
62+
inference_result.inference.result.fields["field_object"]
63+
.fields["sub_object_object"]
64+
.fields,
6365
dict,
6466
)
6567
assert isinstance(
66-
inference_result.inference.result.fields.field_object.fields[
67-
"sub_object_object"
68-
].fields["sub_object_object_sub_object_list"],
68+
inference_result.inference.result.fields["field_object"]
69+
.fields["sub_object_object"]
70+
.fields["sub_object_object_sub_object_list"],
6971
ListField,
7072
)
7173
assert isinstance(
72-
inference_result.inference.result.fields.field_object.fields[
73-
"sub_object_object"
74-
]
74+
inference_result.inference.result.fields["field_object"]
75+
.fields["sub_object_object"]
7576
.fields["sub_object_object_sub_object_list"]
7677
.items,
7778
list,
7879
)
7980
assert isinstance(
80-
inference_result.inference.result.fields.field_object.fields[
81-
"sub_object_object"
82-
]
81+
inference_result.inference.result.fields["field_object"]
82+
.fields["sub_object_object"]
8383
.fields["sub_object_object_sub_object_list"]
8484
.items[0],
8585
ObjectField,
8686
)
8787
assert isinstance(
88-
inference_result.inference.result.fields.field_object.fields[
89-
"sub_object_object"
90-
]
88+
inference_result.inference.result.fields["field_object"]
89+
.fields["sub_object_object"]
9190
.fields["sub_object_object_sub_object_list"]
9291
.items[0]
9392
.fields["sub_object_object_sub_object_list_simple"],
9493
SimpleField,
9594
)
9695
assert (
97-
inference_result.inference.result.fields.field_object.fields[
98-
"sub_object_object"
99-
]
96+
inference_result.inference.result.fields["field_object"]
97+
.fields["sub_object_object"]
10098
.fields["sub_object_object_sub_object_list"]
10199
.items[0]
102100
.fields["sub_object_object_sub_object_list_simple"]
@@ -110,30 +108,32 @@ def test_standard_field_types():
110108
json_sample, rst_sample = _get_inference_samples("standard_field_types")
111109
inference_result = InferenceResponse(json_sample)
112110
assert isinstance(inference_result.inference, Inference)
113-
field_simple_string = inference_result.inference.result.fields.field_simple_string
111+
field_simple_string = inference_result.inference.result.fields[
112+
"field_simple_string"
113+
]
114114
assert isinstance(field_simple_string, SimpleField)
115115
assert field_simple_string.value == "field_simple_string-value"
116116
assert field_simple_string.confidence == FieldConfidence.CERTAIN
117117
assert str(field_simple_string) == "field_simple_string-value"
118118

119-
field_simple_bool = inference_result.inference.result.fields.field_simple_bool
119+
field_simple_bool = inference_result.inference.result.fields["field_simple_bool"]
120120
assert isinstance(field_simple_bool, SimpleField)
121121
assert field_simple_bool.value is True
122122
assert str(field_simple_bool) == "True"
123123

124-
field_simple_null = inference_result.inference.result.fields.field_simple_null
124+
field_simple_null = inference_result.inference.result.fields["field_simple_null"]
125125
assert isinstance(field_simple_null, SimpleField)
126126
assert field_simple_null.value is None
127127
assert str(field_simple_null) == ""
128128

129129
assert isinstance(
130-
inference_result.inference.result.fields.field_object, ObjectField
130+
inference_result.inference.result.fields["field_object"], ObjectField
131131
)
132132
assert isinstance(
133-
inference_result.inference.result.fields.field_simple_list, ListField
133+
inference_result.inference.result.fields["field_simple_list"], ListField
134134
)
135135
assert isinstance(
136-
inference_result.inference.result.fields.field_object_list, ListField
136+
inference_result.inference.result.fields["field_object_list"], ListField
137137
)
138138
assert rst_sample == str(inference_result)
139139

@@ -144,11 +144,10 @@ def test_raw_texts():
144144
inference_result = InferenceResponse(json_sample)
145145
assert isinstance(inference_result.inference, Inference)
146146

147-
assert inference_result.inference.result.options
148-
assert len(inference_result.inference.result.options.raw_texts) == 2
149-
assert inference_result.inference.result.options.raw_texts[0].page == 0
147+
assert inference_result.inference.result.raw_text
148+
assert len(inference_result.inference.result.raw_text.pages) == 2
150149
assert (
151-
inference_result.inference.result.options.raw_texts[0].content
150+
inference_result.inference.result.raw_text.pages[0].content
152151
== "This is the raw text of the first page..."
153152
)
154153

@@ -161,13 +160,13 @@ def test_full_inference_response():
161160
assert isinstance(inference_result.inference, Inference)
162161
assert inference_result.inference.id == "12345678-1234-1234-1234-123456789abc"
163162
assert isinstance(inference_result.inference.result.fields.date, SimpleField)
164-
assert inference_result.inference.result.fields.date.value == "2019-11-02"
163+
assert inference_result.inference.result.fields["date"].value == "2019-11-02"
165164
assert isinstance(inference_result.inference.result.fields.taxes, ListField)
166165
assert isinstance(
167-
inference_result.inference.result.fields.taxes.items[0], ObjectField
166+
inference_result.inference.result.fields["taxes"].items[0], ObjectField
168167
)
169168
assert (
170-
inference_result.inference.result.fields.customer_address.fields.city.value
169+
inference_result.inference.result.fields["customer_address"].fields.city.value
171170
== "New York"
172171
)
173172
assert (
@@ -183,7 +182,7 @@ def test_full_inference_response():
183182
assert inference_result.inference.file.page_count == 1
184183
assert inference_result.inference.file.mime_type == "image/jpeg"
185184
assert not inference_result.inference.file.alias
186-
assert not inference_result.inference.result.options
185+
assert not inference_result.inference.result.raw_text
187186

188187

189188
@pytest.mark.v2
@@ -198,7 +197,7 @@ def test_field_locations_and_confidence() -> None:
198197

199198
inference_result = InferenceResponse(json_sample)
200199

201-
date_field: SimpleField = inference_result.inference.result.fields.date
200+
date_field: SimpleField = inference_result.inference.result.fields["date"]
202201

203202
assert date_field.locations, "date field should expose locations"
204203
loc0 = date_field.locations[0]

0 commit comments

Comments
 (0)