mindee · sebastianMindee · May 5, 2023 · May 2, 2023 · May 2, 2023 · May 2, 2023
diff --git a/docs/client.rst b/docs/client.rst
@@ -21,3 +21,10 @@ PredictResponse
 ---------------
 .. autoclass:: mindee.response.PredictResponse
     :members:
+
+AsyncPredictResponse
+--------------------
+.. autoclass:: mindee.response.AsyncPredictResponse
+    :members:
+.. autoclass:: mindee.response.Job
+    :members:
diff --git a/docs/extras/code_samples/invoice_splitter_v1_async.txt b/docs/extras/code_samples/invoice_splitter_v1_async.txt
@@ -0,0 +1,55 @@
+from mindee import Client, documents
+from time import sleep
+
+# Init a new client
+mindee_client = Client(api_key="my-api-key")
+
+# Load a file from disk
+input_doc = mindee_client.doc_from_path("/path/to/the/file.ext")
+
+# Put the document class in a local variable to keep the code DRY
+
+doc_class = documents.TypeInvoiceSplitterV1
+
+# Limit the amount of API calls to retrieve your document
+MAX_RETRIES = 10
+
+# How many seconds to wait in-between tries
+INTERVAL_SECS = 6
+
+# Counter to keep track of how many times we try to retrieve the document
+times_tried = 1
+
+
+queue_result = input_doc.enqueue(doc_class)
+
+# Get the id of the queue (job)
+queue_id = queue_result.job.job_id
+
+# Recursive function that tries to retrieve the completed document.
+# If the document is not "complete", try again
+def get_doc_from_async_queue(queue_id, times_tried=0):
+
+    # Have we exceeded our retry count?
+    if times_tried >= MAX_RETRIES:
+        raise Exception(f"Maximum retries reached {times_tried}")
+
+    # Wait for a few seconds before fetching
+    sleep(INTERVAL_SECS)
+
+    # Fetch and parse the result, using the same type
+    parsed_result = input_doc.parse_queued(doc_class, queue_id)
+
+    # Check whether the result is ready
+    if parsed_result.job.status == "completed":
+
+        # Print a brief summary of the parsed data
+        print(parsed_result.document.document)
+        return
+
+    # Otherwise, try again...
+    else:
+        get_doc_from_async_queue(queue_id, times_tried+1)
+
+# Start the recursion...
+get_doc_from_async_queue(queue_id)
diff --git a/docs/predictions/standard/documents/invoice_splitter_v1.rst b/docs/predictions/standard/documents/invoice_splitter_v1.rst
@@ -0,0 +1,10 @@
+Invoice Splitter V1
+-------------------
+
+**Sample Code:**
+
+.. literalinclude:: /extras/code_samples/invoice_splitter_v1_async.txt
+    :language: Python
+
+.. autoclass:: mindee.documents.InvoiceSplitterV1
+    :members:
diff --git a/mindee/__init__.py b/mindee/__init__.py
@@ -1,2 +1,2 @@
 from mindee.client import Client, PageOptions
-from mindee.response import PredictResponse
+from mindee.response import AsyncPredictResponse, Job, PredictResponse
diff --git a/mindee/client.py b/mindee/client.py
@@ -15,7 +15,7 @@
     UrlInputSource,
 )
 from mindee.logger import logger
-from mindee.response import PredictResponse
+from mindee.response import AsyncPredictResponse, PredictResponse
 
 
 def get_bound_classname(type_var) -> str:
@@ -84,41 +84,105 @@ def parse(
 
         logger.debug("Parsing document as '%s'", endpoint_name)
 
-        found = []
-        for k in self.doc_configs.keys():
-            if k[1] == endpoint_name:
-                found.append(k)
+        doc_config = self._check_config(endpoint_name, account_name)
+        if not isinstance(self.input_doc, UrlInputSource):
+            if page_options and self.input_doc.is_pdf():
+                self.input_doc.process_pdf(
+                    page_options.operation,
+                    page_options.on_min_pages,
+                    page_options.page_indexes,
+                )
+        return self._make_request(
+            document_class, doc_config, include_words, close_file, cropper
+        )
 
-        if len(found) == 0:
-            raise RuntimeError(f"Document type not configured: {endpoint_name}")
+    def enqueue(
+        self,
+        document_class: TypeDocument,
+        endpoint_name: Optional[str] = None,
+        account_name: Optional[str] = None,
+        include_words: bool = False,
+        close_file: bool = True,
+        page_options: Optional[PageOptions] = None,
+        cropper: bool = False,
+    ) -> AsyncPredictResponse[TypeDocument]:
+        """
+        Enqueueing to an async endpoint.
 
-        if account_name:
-            config_key = (account_name, endpoint_name)
-        elif len(found) == 1:
-            config_key = found[0]
-        else:
-            usernames = [k[0] for k in found]
+        :param document_class: The document class to use.
+            The response object will be instantiated based on this parameter.
+
+        :param endpoint_name: For custom endpoints, the "API name" field in the "Settings" page of the API Builder.
+            Do not set for standard (off the shelf) endpoints.
+
+        :param account_name: For custom endpoints, your account or organization username on the API Builder.
+            This is normally not required unless you have a custom endpoint which has the
+            same name as standard (off the shelf) endpoint.
+            Do not set for standard (off the shelf) endpoints.
+
+        :param include_words: Whether to include the full text for each page.
+            This performs a full OCR operation on the server and will increase response time.
+
+        :param close_file: Whether to ``close()`` the file after parsing it.
+          Set to ``False`` if you need to access the file after this operation.
+
+        :param page_options: If set, remove pages from the document as specified.
+            This is done before sending the file to the server and is useful to avoid page limitations.
+
+        :param cropper: Whether to include cropper results for each page.
+            This performs a cropping operation on the server and will increase response time.
+        """
+        bound_classname = get_bound_classname(document_class)
+        if bound_classname != documents.CustomV1.__name__:
+            endpoint_name = get_bound_classname(document_class)
+        elif endpoint_name is None:
             raise RuntimeError(
-                (
-                    "Duplicate configuration detected.\n"
-                    f"You specified a document_type '{endpoint_name}' in your custom config.\n"
-                    "To avoid confusion, please add the 'account_name' attribute to "
-                    f"the parse method, one of {usernames}."
-                )
+                f"endpoint_name is required when using {bound_classname} class"
             )
 
-        doc_config = self.doc_configs[config_key]
-        doc_config.check_api_keys()
+        logger.debug("Enqueuing document as '%s'", endpoint_name)
+
+        doc_config = self._check_config(endpoint_name, account_name)
         if not isinstance(self.input_doc, UrlInputSource):
             if page_options and self.input_doc.is_pdf():
                 self.input_doc.process_pdf(
                     page_options.operation,
                     page_options.on_min_pages,
                     page_options.page_indexes,
                 )
-        return self._make_request(
-            document_class, doc_config, include_words, close_file, cropper
-        )
+        return self._predict_async(doc_config, include_words, close_file, cropper)
+
+    def parse_queued(
+        self,
+        document_class: TypeDocument,
+        queue_id: str,
+        endpoint_name: Optional[str] = None,
+        account_name: Optional[str] = None,
+    ) -> AsyncPredictResponse[TypeDocument]:
+        """
+        Parses a queued document.
+
+        :param queue_id: queue_id received from the API
+        :param endpoint_name: For custom endpoints, the "API name" field in the "Settings" page of the API Builder.
+            Do not set for standard (off the shelf) endpoints.
+        :param account_name: For custom endpoints, your account or organization username on the API Builder.
+            This is normally not required unless you have a custom endpoint which has the
+            same name as standard (off the shelf) endpoint.
+            Do not set for standard (off the shelf) endpoints.
+        """
+        bound_classname = get_bound_classname(document_class)
+        if bound_classname != documents.CustomV1.__name__:
+            endpoint_name = get_bound_classname(document_class)
+        elif endpoint_name is None:
+            raise RuntimeError(
+                f"endpoint_name is required when using {bound_classname} class"
+            )
+
+        logger.debug("Fetching queued document as '%s'", endpoint_name)
+
+        doc_config = self._check_config(endpoint_name, account_name)
+
+        return self._get_queued_document(doc_config, queue_id)
 
     def _make_request(
         self,
@@ -145,18 +209,108 @@ def _make_request(
             raise HTTPException(
                 f"API {response.status_code} HTTP error: {json.dumps(dict_response)}"
             )
+
         return PredictResponse[TypeDocument](
             http_response=dict_response,
             doc_config=doc_config,
             input_source=self.input_doc,
             response_ok=response.ok,
         )
 
+    def _predict_async(
+        self,
+        doc_config: DocumentConfig,
+        include_words: bool = False,
+        close_file: bool = True,
+        cropper: bool = False,
+    ) -> AsyncPredictResponse[TypeDocument]:
+        """
+        Sends a document to the queue, and sends back an asynchronous predict response.
+
+        :param doc_config: Configuration of the document.
+        """
+        response = doc_config.endpoints[0].predict_async_req_post(
+            self.input_doc, include_words, close_file, cropper
+        )
+
+        dict_response = response.json()
+
+        if not response.ok and self.raise_on_error:
+            raise HTTPException(
+                f"API {response.status_code} HTTP error: {json.dumps(dict_response)}"
+            )
+
+        return AsyncPredictResponse[TypeDocument](
+            http_response=dict_response,
+            doc_config=doc_config,
+            input_source=self.input_doc,
+            response_ok=response.ok,
+        )
+
+    def _get_queued_document(
+        self,
+        doc_config: DocumentConfig,
+        queue_id: str,
+    ) -> AsyncPredictResponse[TypeDocument]:
+        """
+        Fetches a document or a Job from a given queue.
+
+        :param queue_id: Queue_id received from the API
+        :param doc_config: Pre-checked document configuration.
+        """
+        queue_response = doc_config.endpoints[0].document_queue_req_get(
+            queue_id=queue_id
+        )
+
+        if (
+            not queue_response.status_code
+            or queue_response.status_code < 200
+            or queue_response.status_code > 302
+        ):
+            raise HTTPException(
+                f"API {queue_response.status_code} HTTP error: {json.dumps(queue_response)}"
+            )
+
+        return AsyncPredictResponse[TypeDocument](
+            http_response=queue_response.json(),
+            doc_config=doc_config,
+            input_source=self.input_doc,
+            response_ok=queue_response.ok,
+        )
+
     def close(self) -> None:
         """Close the file object."""
         if not isinstance(self.input_doc, UrlInputSource):
             self.input_doc.file_object.close()
 
+    def _check_config(self, endpoint_name, account_name) -> DocumentConfig:
+        found = []
+        for k in self.doc_configs.keys():
+            if k[1] == endpoint_name:
+                found.append(k)
+
+        if len(found) == 0:
+            raise RuntimeError(f"Document type not configured: {endpoint_name}")
+
+        if account_name:
+            config_key = (account_name, endpoint_name)
+        elif len(found) == 1:
+            config_key = found[0]
+        else:
+            usernames = [k[0] for k in found]
+            raise RuntimeError(
+                (
+                    "Duplicate configuration detected.\n"
+                    f"You specified a document_type '{endpoint_name}' in your custom config.\n"
+                    "To avoid confusion, please add the 'account_name' attribute to "
+                    f"the parse method, one of {usernames}."
+                )
+            )
+
+        doc_config = self.doc_configs[config_key]
+        doc_config.check_api_keys()
+        return doc_config
+
 
 class ConfigSpec(NamedTuple):
     doc_class: Type[Document]
@@ -281,7 +435,13 @@ def _init_default_endpoints(self) -> None:
                 url_name="license_plates",
                 version="1",
             ),
+            ConfigSpec(
+                doc_class=documents.InvoiceSplitterV1,
+                url_name="invoice_splitter",
+                version="1",
+            ),
         ]
+
         for config in configs:
             config_key = (OTS_OWNER, config.doc_class.__name__)
             self._doc_configs[config_key] = self._standard_doc_config(

diff --git a/mindee/documents/__init__.py b/mindee/documents/__init__.py
@@ -8,6 +8,7 @@
     TypeFinancialV1,
 )
 from mindee.documents.invoice import InvoiceV3, InvoiceV4, TypeInvoiceV3, TypeInvoiceV4
+from mindee.documents.invoice_splitter import InvoiceSplitterV1, TypeInvoiceSplitterV1
 from mindee.documents.passport import PassportV1, TypePassportV1
 from mindee.documents.proof_of_address import ProofOfAddressV1, TypeProofOfAddressV1
 from mindee.documents.receipt import (

diff --git a/mindee/documents/config.py b/mindee/documents/config.py
@@ -28,7 +28,7 @@ def check_api_keys(self) -> None:
                 raise RuntimeError(
                     (
                         f"Missing API key for '{endpoint.url_name} v{endpoint.version}',"
-                        "check your Client configuration.\n"
+                        " check your Client configuration.\n"
                         "You can set this using the "
                         f"'{API_KEY_ENV_NAME}' environment variable."
                     )

diff --git a/mindee/documents/invoice_splitter/__init__.py b/mindee/documents/invoice_splitter/__init__.py
@@ -0,0 +1 @@
+from .invoice_splitter_v1 import InvoiceSplitterV1, TypeInvoiceSplitterV1
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .invoice_splitter_v1 import InvoiceSplitterV1, TypeInvoiceSplitterV1