From 40e4459b47948d1c69c9fc8130f620d7cab0322b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= <ianare@mindee.co>
Date: Mon, 1 Sep 2025 18:48:57 +0200
Subject: [PATCH 1/5] :sparkles: add inference options

---
 mindee/input/inference_parameters.py          | 11 ++++-
 mindee/mindee_http/mindee_api_v2.py           |  6 +++
 mindee/parsing/v2/inference.py                | 21 +++++----
 mindee/parsing/v2/inference_active_options.py | 25 ++++++++++
 mindee/parsing/v2/inference_result.py         |  2 +-
 mindee/parsing/v2/raw_text.py                 |  3 ++
 tests/test_client_v2_integration.py           | 46 +++++++++++++------
 7 files changed, 88 insertions(+), 26 deletions(-)
 create mode 100644 mindee/parsing/v2/inference_active_options.py

diff --git a/mindee/input/inference_parameters.py b/mindee/input/inference_parameters.py
index b8608f6c..13c4dcbc 100644
--- a/mindee/input/inference_parameters.py
+++ b/mindee/input/inference_parameters.py
@@ -11,7 +11,16 @@ class InferenceParameters:
     model_id: str
     """ID of the model, required."""
     rag: bool = False
-    """If set to `True`, will enable Retrieval-Augmented Generation."""
+    """Use Retrieval-Augmented Generation during inference."""
+    raw_text: bool = False
+    """Extract the entire text from the document as strings, and fill the ``raw_text`` attribute."""
+    polygon: bool = False
+    """Calculate bounding box polygons for values, and fill the ``locations`` attribute of fields"""
+    confidence: bool = False
+    """
+    Calculate confidence scores for values, and fill the ``confidence`` attribute of fields.
+    Useful for automation.
+    """
     alias: Optional[str] = None
     """Use an alias to link the file to your own DB. If empty, no alias will be used."""
     webhook_ids: Optional[List[str]] = None
diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py
index 7645abc8..fdfa5d5c 100644
--- a/mindee/mindee_http/mindee_api_v2.py
+++ b/mindee/mindee_http/mindee_api_v2.py
@@ -84,6 +84,12 @@ def req_post_inference_enqueue(
 
         if params.rag:
             data["rag"] = "true"
+        if params.raw_text:
+            data["raw_text"] = "true"
+        if params.confidence:
+            data["confidence"] = "true"
+        if params.polygon:
+            data["polygon"] = "true"
         if params.webhook_ids and len(params.webhook_ids) > 0:
             data["webhook_ids"] = ",".join(params.webhook_ids)
         if params.alias and len(params.alias):
diff --git a/mindee/parsing/v2/inference.py b/mindee/parsing/v2/inference.py
index 038746d6..4ff64468 100644
--- a/mindee/parsing/v2/inference.py
+++ b/mindee/parsing/v2/inference.py
@@ -1,6 +1,5 @@
-from typing import Optional
-
 from mindee.parsing.common.string_dict import StringDict
+from mindee.parsing.v2.inference_active_options import ActiveOptions
 from mindee.parsing.v2.inference_file import InferenceFile
 from mindee.parsing.v2.inference_model import InferenceModel
 from mindee.parsing.v2.inference_result import InferenceResult
@@ -9,25 +8,29 @@
 class Inference:
     """Inference object for a V2 API return."""
 
+    id: str
+    """ID of the inference."""
     model: InferenceModel
     """Model info for the inference."""
     file: InferenceFile
     """File info for the inference."""
     result: InferenceResult
     """Result of the inference."""
-    id: Optional[str]
-    """ID of the inference."""
+    active_options: ActiveOptions
+    """Active options for the inference."""
 
     def __init__(self, raw_response: StringDict):
+        self.id = raw_response["id"]
         self.model = InferenceModel(raw_response["model"])
         self.file = InferenceFile(raw_response["file"])
         self.result = InferenceResult(raw_response["result"])
-        self.id = raw_response["id"] if "id" in raw_response else None
+        self.active_options = ActiveOptions(raw_response["active_options"])
 
     def __str__(self) -> str:
         return (
-            f"Inference\n#########\n"
-            f"{self.model}\n\n"
-            f"{self.file}"
-            f"{self.result}\n"
+            f"Inference\n#########"
+            f"\n{self.model}"
+            f"\n\n{self.file}"
+            f"\n\n{self.active_options}"
+            f"\n\n{self.result}\n"
         )
diff --git a/mindee/parsing/v2/inference_active_options.py b/mindee/parsing/v2/inference_active_options.py
new file mode 100644
index 00000000..aae44ab4
--- /dev/null
+++ b/mindee/parsing/v2/inference_active_options.py
@@ -0,0 +1,25 @@
+from mindee.parsing.common.string_dict import StringDict
+
+
+class ActiveOptions:
+    """Active options for the inference."""
+
+    raw_text: bool
+    polygon: bool
+    confidence: bool
+    rag: bool
+
+    def __init__(self, raw_response: StringDict):
+        self.raw_text = raw_response["raw_text"]
+        self.polygon = raw_response["polygon"]
+        self.confidence = raw_response["confidence"]
+        self.rag = raw_response["rag"]
+
+    def __str__(self) -> str:
+        return (
+            f"Active Options\n=============="
+            f"\n:Raw Text: {self.raw_text}"
+            f"\n:Polygon: {self.polygon}"
+            f"\n:Confidence: {self.confidence}"
+            f"\n:RAG: {self.rag}"
+        )
diff --git a/mindee/parsing/v2/inference_result.py b/mindee/parsing/v2/inference_result.py
index 75a0abb8..0a89ef24 100644
--- a/mindee/parsing/v2/inference_result.py
+++ b/mindee/parsing/v2/inference_result.py
@@ -19,5 +19,5 @@ def __init__(self, raw_response: StringDict) -> None:
             self.raw_text = RawText(raw_response["raw_text"])
 
     def __str__(self) -> str:
-        out_str = f"\n\nFields\n======{self.fields}"
+        out_str = f"Fields\n======{self.fields}"
         return out_str
diff --git a/mindee/parsing/v2/raw_text.py b/mindee/parsing/v2/raw_text.py
index 491c8ddb..0d9c6329 100644
--- a/mindee/parsing/v2/raw_text.py
+++ b/mindee/parsing/v2/raw_text.py
@@ -12,3 +12,6 @@ class RawText:
 
     def __init__(self, raw_response: StringDict):
         self.pages = [RawTextPage(page) for page in raw_response.get("pages", [])]
+
+    def __str__(self) -> str:
+        return "\n\n".join([page.content for page in self.pages])
diff --git a/tests/test_client_v2_integration.py b/tests/test_client_v2_integration.py
index e7bd364e..955a4612 100644
--- a/tests/test_client_v2_integration.py
+++ b/tests/test_client_v2_integration.py
@@ -37,13 +37,18 @@ def test_parse_file_empty_multiple_pages_must_succeed(
     file & model metadata.
     """
     input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf"
-    assert input_path.exists(), f"sample file missing: {input_path}"
 
-    input_doc = PathInput(input_path)
-    options = InferenceParameters(findoc_model_id)
+    input_source = PathInput(input_path)
+    params = InferenceParameters(
+        model_id=findoc_model_id,
+        rag=False,
+        raw_text=False,
+        polygon=False,
+        confidence=False,
+    )
 
     response: InferenceResponse = v2_client.enqueue_and_get_inference(
-        input_doc, options
+        input_source, params
     )
 
     assert response is not None
@@ -65,13 +70,18 @@ def test_parse_file_filled_single_page_must_succeed(
     Upload a filled single-page JPEG and verify that common fields are present.
     """
     input_path: Path = PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg"
-    assert input_path.exists(), f"sample file missing: {input_path}"
 
-    input_doc = PathInput(input_path)
-    options = InferenceParameters(findoc_model_id)
+    input_source = PathInput(input_path)
+    params = InferenceParameters(
+        model_id=findoc_model_id,
+        rag=False,
+        raw_text=False,
+        polygon=False,
+        confidence=False,
+    )
 
     response: InferenceResponse = v2_client.enqueue_and_get_inference(
-        input_doc, options
+        input_source, params
     )
 
     assert response is not None
@@ -79,6 +89,7 @@ def test_parse_file_filled_single_page_must_succeed(
 
     assert response.inference.file is not None
     assert response.inference.file.name == "default_sample.jpg"
+    assert response.inference.file.page_count == 1
 
     assert response.inference.model is not None
     assert response.inference.model.id == findoc_model_id
@@ -96,13 +107,12 @@ def test_invalid_uuid_must_throw_error_422(v2_client: ClientV2) -> None:
     Using an invalid model identifier must trigger a 422 HTTP error.
     """
     input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf"
-    assert input_path.exists()
 
-    input_doc = PathInput(input_path)
-    options = InferenceParameters("INVALID MODEL ID")
+    input_source = PathInput(input_path)
+    params = InferenceParameters(model_id="INVALID MODEL ID")
 
     with pytest.raises(MindeeHTTPErrorV2) as exc_info:
-        v2_client.enqueue_inference(input_doc, options)
+        v2_client.enqueue_inference(input_source, params)
 
     exc: MindeeHTTPErrorV2 = exc_info.value
     assert exc.status == 422
@@ -119,10 +129,16 @@ def test_url_input_source_must_not_raise_errors(
     """
     url = os.getenv("MINDEE_V2_SE_TESTS_BLANK_PDF_URL")
 
-    input_doc = UrlInputSource(url)
-    options = InferenceParameters(findoc_model_id)
+    input_source = UrlInputSource(url)
+    params = InferenceParameters(
+        model_id=findoc_model_id,
+        rag=False,
+        raw_text=False,
+        polygon=False,
+        confidence=False,
+    )
     response: InferenceResponse = v2_client.enqueue_and_get_inference(
-        input_doc, options
+        input_source, params
     )
     assert response is not None
     assert response.inference is not None

From b51883032aa765320dba5d4794029e64f11ece6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= <ianare@mindee.co>
Date: Tue, 2 Sep 2025 14:53:40 +0200
Subject: [PATCH 2/5] rename class

---
 mindee/parsing/v2/__init__.py                 | 2 ++
 mindee/parsing/v2/inference.py                | 6 +++---
 mindee/parsing/v2/inference_active_options.py | 2 +-
 tests/data                                    | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/mindee/parsing/v2/__init__.py b/mindee/parsing/v2/__init__.py
index a1e3b587..47557c9e 100644
--- a/mindee/parsing/v2/__init__.py
+++ b/mindee/parsing/v2/__init__.py
@@ -1,5 +1,6 @@
 from mindee.parsing.v2.error_response import ErrorResponse
 from mindee.parsing.v2.inference import Inference
+from mindee.parsing.v2.inference_active_options import InferenceActiveOptions
 from mindee.parsing.v2.inference_file import InferenceFile
 from mindee.parsing.v2.inference_model import InferenceModel
 from mindee.parsing.v2.inference_response import InferenceResponse
@@ -8,6 +9,7 @@
 
 __all__ = [
     "Inference",
+    "InferenceActiveOptions",
     "InferenceFile",
     "InferenceModel",
     "InferenceResponse",
diff --git a/mindee/parsing/v2/inference.py b/mindee/parsing/v2/inference.py
index 4ff64468..86c076c9 100644
--- a/mindee/parsing/v2/inference.py
+++ b/mindee/parsing/v2/inference.py
@@ -1,5 +1,5 @@
 from mindee.parsing.common.string_dict import StringDict
-from mindee.parsing.v2.inference_active_options import ActiveOptions
+from mindee.parsing.v2.inference_active_options import InferenceActiveOptions
 from mindee.parsing.v2.inference_file import InferenceFile
 from mindee.parsing.v2.inference_model import InferenceModel
 from mindee.parsing.v2.inference_result import InferenceResult
@@ -16,7 +16,7 @@ class Inference:
     """File info for the inference."""
     result: InferenceResult
     """Result of the inference."""
-    active_options: ActiveOptions
+    active_options: InferenceActiveOptions
     """Active options for the inference."""
 
     def __init__(self, raw_response: StringDict):
@@ -24,7 +24,7 @@ def __init__(self, raw_response: StringDict):
         self.model = InferenceModel(raw_response["model"])
         self.file = InferenceFile(raw_response["file"])
         self.result = InferenceResult(raw_response["result"])
-        self.active_options = ActiveOptions(raw_response["active_options"])
+        self.active_options = InferenceActiveOptions(raw_response["active_options"])
 
     def __str__(self) -> str:
         return (
diff --git a/mindee/parsing/v2/inference_active_options.py b/mindee/parsing/v2/inference_active_options.py
index aae44ab4..6a2d4656 100644
--- a/mindee/parsing/v2/inference_active_options.py
+++ b/mindee/parsing/v2/inference_active_options.py
@@ -1,7 +1,7 @@
 from mindee.parsing.common.string_dict import StringDict
 
 
-class ActiveOptions:
+class InferenceActiveOptions:
     """Active options for the inference."""
 
     raw_text: bool
diff --git a/tests/data b/tests/data
index f6eb112b..11c2edc3 160000
--- a/tests/data
+++ b/tests/data
@@ -1 +1 @@
-Subproject commit f6eb112b6b5bd95b3f591b839b6c4920e5ffe80c
+Subproject commit 11c2edc3d2778b121644317b0fc3efc0102ec83a

From d15d4f24df9dcc7d99f359e5a5adcdec56e1fcf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?ianar=C3=A9?= <97107275+ianardee@users.noreply.github.com>
Date: Tue, 2 Sep 2025 14:55:12 +0200
Subject: [PATCH 3/5] Apply suggestions from code review

Co-authored-by: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com>
---
 tests/test_client_v2_integration.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_client_v2_integration.py b/tests/test_client_v2_integration.py
index 955a4612..9768a6d0 100644
--- a/tests/test_client_v2_integration.py
+++ b/tests/test_client_v2_integration.py
@@ -45,6 +45,8 @@ def test_parse_file_empty_multiple_pages_must_succeed(
         raw_text=False,
         polygon=False,
         confidence=False,
+        webhook_ids=[],
+        alias='py_integration_empty_multiple'
     )
 
     response: InferenceResponse = v2_client.enqueue_and_get_inference(
@@ -78,6 +80,8 @@ def test_parse_file_filled_single_page_must_succeed(
         raw_text=False,
         polygon=False,
         confidence=False,
+        webhook_ids=[],
+        alias='py_integration_empty_multipage'
     )
 
     response: InferenceResponse = v2_client.enqueue_and_get_inference(
@@ -136,6 +140,8 @@ def test_url_input_source_must_not_raise_errors(
         raw_text=False,
         polygon=False,
         confidence=False,
+        webhook_ids=[],
+        alias='py_integration_url_source'
     )
     response: InferenceResponse = v2_client.enqueue_and_get_inference(
         input_source, params

From cdaef2246e416343adbc5ea71c70ceb3ff9782e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= <ianare@mindee.co>
Date: Tue, 2 Sep 2025 15:00:17 +0200
Subject: [PATCH 4/5] check options in inference

---
 tests/test_client_v2_integration.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/test_client_v2_integration.py b/tests/test_client_v2_integration.py
index 9768a6d0..c55b467f 100644
--- a/tests/test_client_v2_integration.py
+++ b/tests/test_client_v2_integration.py
@@ -46,7 +46,7 @@ def test_parse_file_empty_multiple_pages_must_succeed(
         polygon=False,
         confidence=False,
         webhook_ids=[],
-        alias='py_integration_empty_multiple'
+        alias="py_integration_empty_multiple",
     )
 
     response: InferenceResponse = v2_client.enqueue_and_get_inference(
@@ -81,7 +81,7 @@ def test_parse_file_filled_single_page_must_succeed(
         polygon=False,
         confidence=False,
         webhook_ids=[],
-        alias='py_integration_empty_multipage'
+        alias="py_integration_empty_multipage",
     )
 
     response: InferenceResponse = v2_client.enqueue_and_get_inference(
@@ -95,6 +95,12 @@ def test_parse_file_filled_single_page_must_succeed(
     assert response.inference.file.name == "default_sample.jpg"
     assert response.inference.file.page_count == 1
 
+    assert response.inference.active_options is not None
+    assert response.inference.active_options.rag is False
+    assert response.inference.active_options.raw_text is False
+    assert response.inference.active_options.polygon is False
+    assert response.inference.active_options.confidence is False
+
     assert response.inference.model is not None
     assert response.inference.model.id == findoc_model_id
 
@@ -141,7 +147,7 @@ def test_url_input_source_must_not_raise_errors(
         polygon=False,
         confidence=False,
         webhook_ids=[],
-        alias='py_integration_url_source'
+        alias="py_integration_url_source",
     )
     response: InferenceResponse = v2_client.enqueue_and_get_inference(
         input_source, params

From a753ae1f3a546c88dc6c061b4467f812386d7be2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= <ianare@mindee.co>
Date: Tue, 2 Sep 2025 17:12:39 +0200
Subject: [PATCH 5/5] better functional tests

---
 tests/test_client_v2_integration.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/tests/test_client_v2_integration.py b/tests/test_client_v2_integration.py
index c55b467f..5b31fd3e 100644
--- a/tests/test_client_v2_integration.py
+++ b/tests/test_client_v2_integration.py
@@ -42,7 +42,7 @@ def test_parse_file_empty_multiple_pages_must_succeed(
     params = InferenceParameters(
         model_id=findoc_model_id,
         rag=False,
-        raw_text=False,
+        raw_text=True,
         polygon=False,
         confidence=False,
         webhook_ids=[],
@@ -58,10 +58,20 @@ def test_parse_file_empty_multiple_pages_must_succeed(
 
     assert response.inference.file is not None
     assert response.inference.file.name == "multipage_cut-2.pdf"
+    assert response.inference.file.page_count == 2
 
     assert response.inference.model is not None
     assert response.inference.model.id == findoc_model_id
 
+    assert response.inference.active_options is not None
+    assert response.inference.active_options.rag is False
+    assert response.inference.active_options.raw_text is True
+    assert response.inference.active_options.polygon is False
+    assert response.inference.active_options.confidence is False
+
+    assert response.inference.result.raw_text is not None
+    assert len(response.inference.result.raw_text.pages) == 2
+
 
 @pytest.mark.integration
 @pytest.mark.v2
@@ -81,7 +91,7 @@ def test_parse_file_filled_single_page_must_succeed(
         polygon=False,
         confidence=False,
         webhook_ids=[],
-        alias="py_integration_empty_multipage",
+        alias="py_integration_filled_single",
     )
 
     response: InferenceResponse = v2_client.enqueue_and_get_inference(
@@ -95,19 +105,23 @@ def test_parse_file_filled_single_page_must_succeed(
     assert response.inference.file.name == "default_sample.jpg"
     assert response.inference.file.page_count == 1
 
+    assert response.inference.model is not None
+    assert response.inference.model.id == findoc_model_id
+
     assert response.inference.active_options is not None
     assert response.inference.active_options.rag is False
     assert response.inference.active_options.raw_text is False
     assert response.inference.active_options.polygon is False
     assert response.inference.active_options.confidence is False
 
-    assert response.inference.model is not None
-    assert response.inference.model.id == findoc_model_id
+    assert response.inference.result.raw_text is None
 
     assert response.inference.result is not None
     supplier_name = response.inference.result.fields["supplier_name"]
     assert supplier_name is not None
     assert supplier_name.value == "John Smith"
+    assert supplier_name.confidence is None
+    assert len(supplier_name.locations) == 0
 
 
 @pytest.mark.integration