From 068fb11b9ca2218e89dec0a572af2556e1a8899e Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Tue, 9 May 2023 10:15:04 +0200 Subject: [PATCH 01/14] added doc support for Invoice Splitter V1 --- docs/predictions/standard/international.rst | 1 + mindee/cli.py | 334 +++++++++++++++----- 2 files changed, 262 insertions(+), 73 deletions(-) diff --git a/docs/predictions/standard/international.rst b/docs/predictions/standard/international.rst index 02eb1b5c..90f27de5 100644 --- a/docs/predictions/standard/international.rst +++ b/docs/predictions/standard/international.rst @@ -9,5 +9,6 @@ International .. include:: ./documents/financial_document_v1.rst .. include:: ./documents/passport_v1.rst .. include:: ./documents/shipping_container_v1.rst +.. include:: ./documents/invoice_splitter_v1.rst .. include:: ./documents/proof_of_address_v1.rst .. include:: ./documents/cropper_v1.rst diff --git a/mindee/cli.py b/mindee/cli.py index e273aed7..732e7f72 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -1,6 +1,6 @@ import argparse import json -from argparse import Namespace +from argparse import Namespace, ArgumentParser from dataclasses import dataclass from typing import Dict, Generic, TypeVar @@ -15,6 +15,8 @@ class CommandConfig(Generic[TypeDoc]): help: str doc_class: TypeDoc + is_sync: bool = True + is_async: bool = False DOCUMENTS: Dict[str, CommandConfig] = { @@ -70,6 +72,12 @@ class CommandConfig(Generic[TypeDoc]): help="FR Bank Account Details", doc_class=documents.fr.TypeBankAccountDetailsV1, ), + "invoice-splitter": CommandConfig( + help="Invoice Splitter", + doc_class=documents.TypeInvoiceSplitterV1, + is_sync=False, + is_async=True, + ) } @@ -145,88 +153,268 @@ def _parse_args() -> Namespace: dest="product_name", required=True, ) + for name, info in DOCUMENTS.items(): + subp = subparsers.add_parser(name, help=info.help) + if info.is_sync: + if name == "custom": + add_sync_custom_options(subp) + else: + add_sync_default_options(subp) + add_sync_common_options(subp) + for name, info in DOCUMENTS.items(): subp = subparsers.add_parser(name, help=info.help) - if name == "custom": - subp.add_argument( - "-u", - "--user", - dest="username", - required=True, - help="API account name for the endpoint", - ) + if info.is_async: + subp = subparsers.add_parser("enqueue") + if name=="custom": + add_async_post_custom_options(subp) + else: + add_async_post_default_options(subp) + add_async_post_common_options(subp) + + subp = subparsers.add_parser("parse-queued") + if name=="custom": + add_async_get_custom_options(subp) + else: + add_async_get_default_options(subp) + add_async_get_common_options(subp) + subp.add_argument( - "-k", - "--key", - dest="api_key", - help="API key for the account", - ) - subp.add_argument( - "-v", - "--version", - default="1", - dest="api_version", - help="Version for the endpoint. If not set, use the latest version of the model.", - ) - subp.add_argument(dest="api_name", help="Name of the API") - else: - subp.add_argument( - "-k", - "--key", - dest="api_key", - help="API key for the account", - ) - subp.add_argument( - "-w", - "--with-words", + "-t", + "--full-text", dest="include_words", action="store_true", - help="Include words in response", + help="include full document text in response", ) - subp.add_argument( - "-i", - "--input-type", - dest="input_type", - choices=["path", "file", "base64", "bytes", "url"], - default="path", - help="Specify how to handle the input.\n" - "- path: open a path (default).\n" - "- file: open as a file handle.\n" - "- base64: open a base64 encoded text file.\n" - "- bytes: open the contents as raw bytes.\n" - "- url: open an URL.", - ) - subp.add_argument( - "-o", - "--output-type", - dest="output_type", - choices=["summary", "raw", "parsed"], - default="summary", - help="Specify how to output the data.\n" - "- summary: a basic summary (default)\n" - "- raw: the raw HTTP response\n" - "- parsed: the validated and parsed data fields\n", - ) - subp.add_argument( - "-c", - "--cut", - dest="cut_doc", - action="store_true", - help="Cut document pages", - ) - subp.add_argument( - "-p", - "--pages-keep", - dest="doc_pages", - type=int, - default=5, - help="Number of document pages to keep, default: 5", - ) - subp.add_argument(dest="path", help="Full path to the file") parsed_args = parser.parse_args() return parsed_args +def add_async_get_common_options( + subp:ArgumentParser +): + subp.add_argument( + "-c", + "--cut-doc", + dest="cut_doc", + action="store_true", + help="Cut document pages", + ) + subp.add_argument( + "-i", + "--input-type", + dest="input_type", + choices=["path", "file", "base64", "bytes", "url"], + default="path", + help="Specify how to handle the input.\n" + "- path: open a path (default).\n" + "- file: open as a file handle.\n" + "- base64: open a base64 encoded text file.\n" + "- bytes: open the contents as raw bytes.\n" + "- url: open an URL.", + ) + subp.add_argument( + "-p", + "--pages-keep", + dest="doc_pages", + type=int, + default=5, + help="Number of document pages to keep, default: 5", + ) + +def add_async_get_default_options( + subp:ArgumentParser +): + subp.add_argument( + "-k", + "--key", + dest="api_key", + help="API key for the account", + ) + subp.add_argument( + "-t", + "--full-text", + dest="include_words", + action="store_true", + help="include full document text in response", + ) + +def add_async_get_custom_options( + subp:ArgumentParser +): + subp.add_argument( + "-e", + "--endpoint", + dest="endpoint_name", + help="API endpoint name (required)", + required=True + ) + subp.add_argument( + "-a", + "--account-name", + dest="username", + required=True, + help="API account name for the endpoint (required)", + ) + subp.add_argument( + "-v", + "--version", + default="1", + dest="api_version", + help="Version for the endpoint. If not set, use the latest version of the model.", + ) + subp.add_argument(dest="api_name", help="Name of the API") + + +def add_async_post_common_options( + subp:ArgumentParser +): + subp.add_argument( + "-c", + "--cut-doc", + dest="cut_doc", + action="store_true", + help="Cut document pages", + ) + subp.add_argument( + "-i", + "--input-type", + dest="input_type", + choices=["path", "file", "base64", "bytes", "url"], + default="path", + help="Specify how to handle the input.\n" + "- path: open a path (default).\n" + "- file: open as a file handle.\n" + "- base64: open a base64 encoded text file.\n" + "- bytes: open the contents as raw bytes.\n" + "- url: open an URL.", + ) + subp.add_argument( + "-p", + "--pages-keep", + dest="doc_pages", + type=int, + default=5, + help="Number of document pages to keep, default: 5", + ) + +def add_async_post_default_options( + subp:ArgumentParser +): + subp.add_argument( + "-t", + "--full-text", + dest="include_words", + action="store_true", + help="include full document text in response", + ) + +def add_async_post_custom_options( + subp:ArgumentParser +): + subp.add_argument( + "-e", + "--endpoint", + dest="endpoint_name", + help="API endpoint name (required)", + required=True + ) + subp.add_argument( + "-a", + "--account-name", + dest="username", + required=True, + help="API account name for the endpoint (required)", + ) + subp.add_argument( + "-v", + "--version", + default="1", + dest="api_version", + help="Version for the endpoint. If not set, use the latest version of the model.", + ) + subp.add_argument(dest="api_name", help="Name of the API") + +def add_sync_common_options( + subp:ArgumentParser +): + subp.add_argument( + "-k", + "--key", + dest="api_key", + help="API key for the account", + ) + subp.add_argument( + "-i", + "--input-type", + dest="input_type", + choices=["path", "file", "base64", "bytes", "url"], + default="path", + help="Specify how to handle the input.\n" + "- path: open a path (default).\n" + "- file: open as a file handle.\n" + "- base64: open a base64 encoded text file.\n" + "- bytes: open the contents as raw bytes.\n" + "- url: open an URL.", + ) + subp.add_argument( + "-o", + "--output-type", + dest="output_type", + choices=["summary", "raw", "parsed"], + default="summary", + help="Specify how to output the data.\n" + "- summary: a basic summary (default)\n" + "- raw: the raw HTTP response\n" + "- parsed: the validated and parsed data fields\n", + ) + subp.add_argument( + "-c", + "--cut-doc", + dest="cut_doc", + action="store_true", + help="Cut document pages", + ) + subp.add_argument( + "-p", + "--pages-keep", + dest="doc_pages", + type=int, + default=5, + help="Number of document pages to keep, default: 5", + ) + subp.add_argument(dest="path", help="Full path to the file") + +def add_sync_default_options( + subp:ArgumentParser +): + subp.add_argument( + "-t", + "--full-text", + dest="include_words", + action="store_true", + help="include full document text in response", + ) + +def add_sync_custom_options( + subp:ArgumentParser +): + subp.add_argument( + "-a", + "--account-name", + dest="username", + required=True, + help="API account name for the endpoint (required)", + ) + subp.add_argument( + "-v", + "--version", + default="1", + dest="api_version", + help="Version for the endpoint. If not set, use the latest version of the model.", + ) + subp.add_argument(dest="api_name", help="Name of the API") + def main() -> None: """Run the Command Line Interface.""" From 03448e2bec414a292512114b5323cae63225c051 Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Tue, 9 May 2023 11:44:14 +0200 Subject: [PATCH 02/14] cli in progress --- mindee/cli.py | 415 ++++++++++++++++++-------------------------------- 1 file changed, 146 insertions(+), 269 deletions(-) diff --git a/mindee/cli.py b/mindee/cli.py index 732e7f72..a65f197c 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -103,12 +103,6 @@ def _get_input_doc(client, args) -> DocumentClient: def call_endpoint(args: Namespace): """Call the endpoint given passed arguments.""" client = Client(api_key=args.api_key, raise_on_error=args.raise_on_error) - if args.product_name == "custom": - client.add_endpoint( - endpoint_name=args.api_name, - account_name=args.username, - version=args.api_version, - ) info = DOCUMENTS[args.product_name] doc_class = info.doc_class @@ -118,17 +112,6 @@ def call_endpoint(args: Namespace): page_options = PageOptions(range(args.doc_pages), on_min_pages=0) else: page_options = None - if args.product_name == "custom": - parsed_data = input_doc.parse( - doc_class, - endpoint_name=args.api_name, - account_name=args.username, - page_options=page_options, - ) - else: - parsed_data = input_doc.parse( - doc_class, include_words=args.include_words, page_options=page_options - ) if args.output_type == "raw": print(json.dumps(parsed_data.http_response, indent=2)) @@ -137,6 +120,52 @@ def call_endpoint(args: Namespace): print(json.dumps(doc, indent=2, default=serialize_for_json)) else: print(parsed_data.document) + + if args.instruction_type == "enqueue": + if args.product_name == "custom": + client.add_endpoint( + endpoint_name=args.api_name, + account_name=args.username, + version=args.api_version, + ) + parsed_data = input_doc.enqueue( + doc_class, + endpoint_name=args.api_name, + account_name=args.username, + page_options=page_options + ) + else: + parsed_data = input_doc.parse( + doc_class, include_words=args.include_words, page_options=page_options + ) + elif args.instruction_type == "parse-queued": + if args.product_name == "custom": + parsed_data = input_doc.parse_queued( + doc_class, + endpoint_name=args.api_name, + account_name=args.username, + ) + else: + parsed_data = input_doc.parse_queued( + doc_class, queue_id=args.queue_id + ) + elif args.instruction_type == "parse": + if args.product_name == "custom": + client.add_endpoint( + endpoint_name=args.api_name, + account_name=args.username, + version=args.api_version, + ) + parsed_data = input_doc.parse( + doc_class, + endpoint_name=args.api_name, + account_name=args.username, + page_options=page_options, + ) + else: + parsed_data = input_doc.parse( + doc_class, include_words=args.include_words, page_options=page_options + ) def _parse_args() -> Namespace: @@ -149,273 +178,121 @@ def _parse_args() -> Namespace: dest="raise_on_error", help="don't raise errors", ) + parser.add_argument( + "-k", + "--key", + dest="api_key", + help="API key for the account", + ) subparsers = parser.add_subparsers( dest="product_name", required=True, ) - for name, info in DOCUMENTS.items(): - subp = subparsers.add_parser(name, help=info.help) - if info.is_sync: - if name == "custom": - add_sync_custom_options(subp) - else: - add_sync_default_options(subp) - add_sync_common_options(subp) - + for name, info in DOCUMENTS.items(): subp = subparsers.add_parser(name, help=info.help) - if info.is_async: - subp = subparsers.add_parser("enqueue") - if name=="custom": - add_async_post_custom_options(subp) - else: - add_async_post_default_options(subp) - add_async_post_common_options(subp) + parsers_instruction_type = subp.add_subparsers(dest="instruction_type") + + if info.is_sync: + subp_predict = parsers_instruction_type.add_parser("parse", help=f"Parse {name}") + _add_options(subp_predict, "predict", name) - subp = subparsers.add_parser("parse-queued") - if name=="custom": - add_async_get_custom_options(subp) - else: - add_async_get_default_options(subp) - add_async_get_common_options(subp) + if info.is_async: + parser_enqueue = parsers_instruction_type.add_parser("enqueue", help=f"Enqueue {name}") + _add_options(parser_enqueue, "enqueue", name) + + parser_parse_queued = parsers_instruction_type.add_parser("parse-queued", help=f"Parse (queued) {name} ") + _add_options(parser_parse_queued, "parse-queued", name) - subp.add_argument( + + parsed_args = parser.parse_args() + return parsed_args + + +def _add_options(parser:ArgumentParser, category: str, name:str): + """ + Adds options to a given command. + + :param parser: The argument parser object. + :param category: The category of the current command (Predict/Enqueue/Parse-Enqueued). + :param name: Name of the current command (Default/Custom). + """ + + + if category in ["predict", "enqueue"]: + parser.add_argument( + "-i", + "--input-type", + dest="input_type", + choices=["path", "file", "base64", "bytes", "url"], + default="path", + help="Specify how to handle the input.\n" + "- path: open a path (default).\n" + "- file: open as a file handle.\n" + "- base64: open a base64 encoded text file.\n" + "- bytes: open the contents as raw bytes.\n" + "- url: open an URL.", + ) + parser.add_argument( + "-c", + "--cut-doc", + dest="cut_doc", + action="store_true", + help="Cut document pages", + ) + parser.add_argument( + "-p", + "--pages-keep", + dest="doc_pages", + type=int, + default=5, + help="Number of document pages to keep, default: 5", + ) + + if name=="custom": + parser.add_argument( + "-a", + "--account-name", + dest="username", + required=True, + help="API account name for the endpoint (required)", + ) + parser.add_argument( + "-e", + "--endpoint", + dest="endpoint_name", + help="API endpoint name (required)", + required=True + ) + parser.add_argument( + "-v", + "--version", + default="1", + dest="api_version", + help="Version for the endpoint. If not set, use the latest version of the model.", + ) + else: + parser.add_argument( "-t", "--full-text", dest="include_words", action="store_true", help="include full document text in response", ) - - parsed_args = parser.parse_args() - return parsed_args - -def add_async_get_common_options( - subp:ArgumentParser -): - subp.add_argument( - "-c", - "--cut-doc", - dest="cut_doc", - action="store_true", - help="Cut document pages", - ) - subp.add_argument( - "-i", - "--input-type", - dest="input_type", - choices=["path", "file", "base64", "bytes", "url"], - default="path", - help="Specify how to handle the input.\n" - "- path: open a path (default).\n" - "- file: open as a file handle.\n" - "- base64: open a base64 encoded text file.\n" - "- bytes: open the contents as raw bytes.\n" - "- url: open an URL.", - ) - subp.add_argument( - "-p", - "--pages-keep", - dest="doc_pages", - type=int, - default=5, - help="Number of document pages to keep, default: 5", - ) - -def add_async_get_default_options( - subp:ArgumentParser -): - subp.add_argument( - "-k", - "--key", - dest="api_key", - help="API key for the account", - ) - subp.add_argument( - "-t", - "--full-text", - dest="include_words", - action="store_true", - help="include full document text in response", - ) - -def add_async_get_custom_options( - subp:ArgumentParser -): - subp.add_argument( - "-e", - "--endpoint", - dest="endpoint_name", - help="API endpoint name (required)", - required=True - ) - subp.add_argument( - "-a", - "--account-name", - dest="username", - required=True, - help="API account name for the endpoint (required)", - ) - subp.add_argument( - "-v", - "--version", - default="1", - dest="api_version", - help="Version for the endpoint. If not set, use the latest version of the model.", - ) - subp.add_argument(dest="api_name", help="Name of the API") - - -def add_async_post_common_options( - subp:ArgumentParser -): - subp.add_argument( - "-c", - "--cut-doc", - dest="cut_doc", - action="store_true", - help="Cut document pages", - ) - subp.add_argument( - "-i", - "--input-type", - dest="input_type", - choices=["path", "file", "base64", "bytes", "url"], - default="path", - help="Specify how to handle the input.\n" - "- path: open a path (default).\n" - "- file: open as a file handle.\n" - "- base64: open a base64 encoded text file.\n" - "- bytes: open the contents as raw bytes.\n" - "- url: open an URL.", - ) - subp.add_argument( - "-p", - "--pages-keep", - dest="doc_pages", - type=int, - default=5, - help="Number of document pages to keep, default: 5", - ) - -def add_async_post_default_options( - subp:ArgumentParser -): - subp.add_argument( - "-t", - "--full-text", - dest="include_words", - action="store_true", - help="include full document text in response", - ) - -def add_async_post_custom_options( - subp:ArgumentParser -): - subp.add_argument( - "-e", - "--endpoint", - dest="endpoint_name", - help="API endpoint name (required)", - required=True - ) - subp.add_argument( - "-a", - "--account-name", - dest="username", - required=True, - help="API account name for the endpoint (required)", - ) - subp.add_argument( - "-v", - "--version", - default="1", - dest="api_version", - help="Version for the endpoint. If not set, use the latest version of the model.", - ) - subp.add_argument(dest="api_name", help="Name of the API") - -def add_sync_common_options( - subp:ArgumentParser -): - subp.add_argument( - "-k", - "--key", - dest="api_key", - help="API key for the account", - ) - subp.add_argument( - "-i", - "--input-type", - dest="input_type", - choices=["path", "file", "base64", "bytes", "url"], - default="path", - help="Specify how to handle the input.\n" - "- path: open a path (default).\n" - "- file: open as a file handle.\n" - "- base64: open a base64 encoded text file.\n" - "- bytes: open the contents as raw bytes.\n" - "- url: open an URL.", - ) - subp.add_argument( - "-o", - "--output-type", - dest="output_type", - choices=["summary", "raw", "parsed"], - default="summary", - help="Specify how to output the data.\n" - "- summary: a basic summary (default)\n" - "- raw: the raw HTTP response\n" - "- parsed: the validated and parsed data fields\n", - ) - subp.add_argument( - "-c", - "--cut-doc", - dest="cut_doc", - action="store_true", - help="Cut document pages", - ) - subp.add_argument( - "-p", - "--pages-keep", - dest="doc_pages", - type=int, - default=5, - help="Number of document pages to keep, default: 5", - ) - subp.add_argument(dest="path", help="Full path to the file") - -def add_sync_default_options( - subp:ArgumentParser -): - subp.add_argument( - "-t", - "--full-text", - dest="include_words", - action="store_true", - help="include full document text in response", - ) + + if category in ["predict", "parse-queued"]: + parser.add_argument( + "-o", + "--output-type", + dest="output_type", + choices=["summary", "raw", "parsed"], + default="summary", + help="Specify how to output the data.\n" + "- summary: a basic summary (default)\n" + "- raw: the raw HTTP response\n" + "- parsed: the validated and parsed data fields\n", + ) -def add_sync_custom_options( - subp:ArgumentParser -): - subp.add_argument( - "-a", - "--account-name", - dest="username", - required=True, - help="API account name for the endpoint (required)", - ) - subp.add_argument( - "-v", - "--version", - default="1", - dest="api_version", - help="Version for the endpoint. If not set, use the latest version of the model.", - ) - subp.add_argument(dest="api_name", help="Name of the API") - - def main() -> None: """Run the Command Line Interface.""" call_endpoint(_parse_args()) From 9e4e147addf935e6b0f667ef37c2caec960bf3f2 Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Tue, 9 May 2023 11:52:13 +0200 Subject: [PATCH 03/14] added queue id --- mindee/cli.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mindee/cli.py b/mindee/cli.py index a65f197c..d316a9b9 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -191,7 +191,7 @@ def _parse_args() -> Namespace: for name, info in DOCUMENTS.items(): subp = subparsers.add_parser(name, help=info.help) - parsers_instruction_type = subp.add_subparsers(dest="instruction_type") + parsers_instruction_type = subp.add_subparsers(dest="instruction_type", required=True) if info.is_sync: subp_predict = parsers_instruction_type.add_parser("parse", help=f"Parse {name}") @@ -201,9 +201,11 @@ def _parse_args() -> Namespace: parser_enqueue = parsers_instruction_type.add_parser("enqueue", help=f"Enqueue {name}") _add_options(parser_enqueue, "enqueue", name) - parser_parse_queued = parsers_instruction_type.add_parser("parse-queued", help=f"Parse (queued) {name} ") + parser_parse_queued = parsers_instruction_type.add_parser("parse-queued", help=f"Parse (queued) {name}") _add_options(parser_parse_queued, "parse-queued", name) + parser_parse_queued.add_argument(dest="queue_id", help="Async queue ID for a document (required)") + subp.add_argument(dest="path", help="Full path to the file") parsed_args = parser.parse_args() return parsed_args From 9fc501d5a085036cc44880f0fe280084b0b8d759 Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Tue, 9 May 2023 14:29:57 +0200 Subject: [PATCH 04/14] fixed wrong millisecs_taken from queue parsing --- mindee/cli.py | 47 ++++++++++++++++++++++++++++++---------------- mindee/client.py | 10 ++++++++++ mindee/response.py | 11 ++++------- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/mindee/cli.py b/mindee/cli.py index d316a9b9..daa5aba5 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -7,6 +7,7 @@ from mindee import Client, PageOptions, documents from mindee.client import DocumentClient from mindee.documents.base import Document, serialize_for_json +from mindee.input.sources import LocalInputSource TypeDoc = TypeVar("TypeDoc", bound=Document) @@ -81,7 +82,7 @@ class CommandConfig(Generic[TypeDoc]): } -def _get_input_doc(client, args) -> DocumentClient: +def _get_input_doc(client: Client, args: Namespace) -> DocumentClient: if args.input_type == "file": with open(args.path, "rb", buffering=30) as file_handle: return client.doc_from_file(input_file=file_handle) @@ -106,22 +107,13 @@ def call_endpoint(args: Namespace): info = DOCUMENTS[args.product_name] doc_class = info.doc_class - input_doc = _get_input_doc(client, args) - if args.cut_doc and args.doc_pages: - page_options = PageOptions(range(args.doc_pages), on_min_pages=0) - else: - page_options = None - - if args.output_type == "raw": - print(json.dumps(parsed_data.http_response, indent=2)) - elif args.output_type == "parsed": - doc = parsed_data.document - print(json.dumps(doc, indent=2, default=serialize_for_json)) - else: - print(parsed_data.document) + page_options = None if args.instruction_type == "enqueue": + if args.cut_doc and args.doc_pages: + page_options = PageOptions(range(args.doc_pages), on_min_pages=0) + input_doc = _get_input_doc(client, args) if args.product_name == "custom": client.add_endpoint( endpoint_name=args.api_name, @@ -135,10 +127,12 @@ def call_endpoint(args: Namespace): page_options=page_options ) else: - parsed_data = input_doc.parse( + parsed_data = input_doc.enqueue( doc_class, include_words=args.include_words, page_options=page_options ) + print(parsed_data.job) elif args.instruction_type == "parse-queued": + input_doc = client.no_doc() if args.product_name == "custom": parsed_data = input_doc.parse_queued( doc_class, @@ -149,7 +143,20 @@ def call_endpoint(args: Namespace): parsed_data = input_doc.parse_queued( doc_class, queue_id=args.queue_id ) + if parsed_data.job.status=="completed": + if args.output_type == "raw": + print(json.dumps(parsed_data.http_response, indent=2)) + elif args.output_type == "parsed": + doc = parsed_data.document.document + print(json.dumps(doc, indent=2, default=serialize_for_json)) + else: + print(parsed_data.job) + else: + print(parsed_data.job) elif args.instruction_type == "parse": + if args.cut_doc and args.doc_pages: + page_options = PageOptions(range(args.doc_pages), on_min_pages=0) + input_doc = _get_input_doc(client, args) if args.product_name == "custom": client.add_endpoint( endpoint_name=args.api_name, @@ -166,6 +173,13 @@ def call_endpoint(args: Namespace): parsed_data = input_doc.parse( doc_class, include_words=args.include_words, page_options=page_options ) + if args.output_type == "raw": + print(json.dumps(parsed_data.http_response, indent=2)) + elif args.output_type == "parsed": + doc = parsed_data.document + print(json.dumps(doc, indent=2, default=serialize_for_json)) + else: + print(parsed_data.document) def _parse_args() -> Namespace: @@ -196,16 +210,17 @@ def _parse_args() -> Namespace: if info.is_sync: subp_predict = parsers_instruction_type.add_parser("parse", help=f"Parse {name}") _add_options(subp_predict, "predict", name) + subp_predict.add_argument(dest="path", help="Full path to the file") if info.is_async: parser_enqueue = parsers_instruction_type.add_parser("enqueue", help=f"Enqueue {name}") _add_options(parser_enqueue, "enqueue", name) + parser_enqueue.add_argument(dest="path", help="Full path to the file") parser_parse_queued = parsers_instruction_type.add_parser("parse-queued", help=f"Parse (queued) {name}") _add_options(parser_parse_queued, "parse-queued", name) parser_parse_queued.add_argument(dest="queue_id", help="Async queue ID for a document (required)") - subp.add_argument(dest="path", help="Full path to the file") parsed_args = parser.parse_args() return parsed_args diff --git a/mindee/client.py b/mindee/client.py index d5dd2c7e..8351b352 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -583,3 +583,13 @@ def doc_from_url( doc_configs=self._doc_configs, raise_on_error=self.raise_on_error, ) + + def no_doc( + self + ): + return DocumentClient( + input_doc=None, + doc_configs=self._doc_configs, + raise_on_error=self.raise_on_error, + ) + \ No newline at end of file diff --git a/mindee/response.py b/mindee/response.py index 7968441d..868d2028 100644 --- a/mindee/response.py +++ b/mindee/response.py @@ -60,10 +60,7 @@ def __init__(self, json_response: dict) -> None: self.job_id = json_response.get("id") self.status = json_response.get("status") if self.available_at: - self.millisecs_taken = int( - (self.available_at.microsecond - self.issued_at.microsecond) / 1000 - ) - + self.millisecs_taken = int((self.available_at-self.issued_at).total_seconds()*1000) def __str__(self) -> str: return json.dumps(self.__dict__, indent=4, sort_keys=True, default=str) @@ -94,7 +91,7 @@ def __init__( self, doc_config: DocumentConfig, http_response: Dict[str, Any], - input_source: Union[LocalInputSource, UrlInputSource], + input_source: Optional[Union[LocalInputSource, UrlInputSource]], response_ok: bool, ) -> None: """ @@ -109,12 +106,12 @@ def __init__( self.document_type = doc_config.document_type self.pages = [] - if not isinstance(input_source, UrlInputSource): + if not isinstance(input_source, UrlInputSource) and input_source: self.input_path = input_source.filepath self.input_filename = input_source.filename self.input_mimetype = input_source.file_mimetype - if not response_ok: + if not response_ok or not input_source: self.document = None else: self._load_response(doc_config, input_source) From 9e1fc769bc93b51bfe109c1b86cb3ca3617f1132 Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Tue, 9 May 2023 14:53:19 +0200 Subject: [PATCH 05/14] fixed http error glitch on async --- mindee/cli.py | 54 ++++++++++++++++++++++++++-------------------- mindee/client.py | 2 +- mindee/response.py | 2 +- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/mindee/cli.py b/mindee/cli.py index daa5aba5..d13e593b 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -2,7 +2,7 @@ import json from argparse import Namespace, ArgumentParser from dataclasses import dataclass -from typing import Dict, Generic, TypeVar +from typing import Dict, Generic, Optional, TypeVar from mindee import Client, PageOptions, documents from mindee.client import DocumentClient @@ -82,24 +82,30 @@ class CommandConfig(Generic[TypeDoc]): } -def _get_input_doc(client: Client, args: Namespace) -> DocumentClient: - if args.input_type == "file": - with open(args.path, "rb", buffering=30) as file_handle: - return client.doc_from_file(input_file=file_handle) - elif args.input_type == "base64": - with open(args.path, "rt", encoding="ascii") as base64_handle: - return client.doc_from_b64string( - input_string=base64_handle.read(), filename="test.jpg" - ) - elif args.input_type == "bytes": - with open(args.path, "rb") as bytes_handle: - return client.doc_from_bytes( - input_bytes=bytes_handle.read(), filename=bytes_handle.name - ) - elif args.input_type == "url": - return client.doc_from_url(url=args.path) - return client.doc_from_path(args.path) - +def _get_input_doc( + client: Client, + args: Namespace, + parsed_path:Optional[str]=None +) -> DocumentClient: + if not parsed_path: + if args.input_type == "file": + with open(args.path, "rb", buffering=30) as file_handle: + return client.doc_from_file(input_file=file_handle) + elif args.input_type == "base64": + with open(args.path, "rt", encoding="ascii") as base64_handle: + return client.doc_from_b64string( + input_string=base64_handle.read(), filename="test.jpg" + ) + elif args.input_type == "bytes": + with open(args.path, "rb") as bytes_handle: + return client.doc_from_bytes( + input_bytes=bytes_handle.read(), filename=bytes_handle.name + ) + elif args.input_type == "url": + return client.doc_from_url(url=args.path) + return client.doc_from_path(args.path) + else: + return client.doc_from_url(parsed_path) def call_endpoint(args: Namespace): """Call the endpoint given passed arguments.""" @@ -135,22 +141,24 @@ def call_endpoint(args: Namespace): input_doc = client.no_doc() if args.product_name == "custom": parsed_data = input_doc.parse_queued( - doc_class, + document_class=doc_class, + queue_id=args.queue_id, endpoint_name=args.api_name, account_name=args.username, ) else: parsed_data = input_doc.parse_queued( - doc_class, queue_id=args.queue_id + document_class=doc_class, queue_id=args.queue_id ) if parsed_data.job.status=="completed": + input_doc = _get_input_doc(client, args, parsed_data.api_request.url) if args.output_type == "raw": - print(json.dumps(parsed_data.http_response, indent=2)) + print(json.dumps(parsed_data.document.http_response, indent=2)) elif args.output_type == "parsed": doc = parsed_data.document.document print(json.dumps(doc, indent=2, default=serialize_for_json)) else: - print(parsed_data.job) + print(parsed_data.document.document_type) else: print(parsed_data.job) elif args.instruction_type == "parse": diff --git a/mindee/client.py b/mindee/client.py index 8351b352..1ad06320 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -268,7 +268,7 @@ def _get_queued_document( or queue_response.status_code > 302 ): raise HTTPException( - f"API {queue_response.status_code} HTTP error: {json.dumps(queue_response)}" + f"API {queue_response.status_code} HTTP error: {json.dumps(queue_response.json())}" ) return AsyncPredictResponse[TypeDocument]( diff --git a/mindee/response.py b/mindee/response.py index 868d2028..6788d21d 100644 --- a/mindee/response.py +++ b/mindee/response.py @@ -111,7 +111,7 @@ def __init__( self.input_filename = input_source.filename self.input_mimetype = input_source.file_mimetype - if not response_ok or not input_source: + if not response_ok: self.document = None else: self._load_response(doc_config, input_source) From 7df0195c58d6069b93a74c287bfa781ad9501a0f Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Tue, 9 May 2023 14:54:33 +0200 Subject: [PATCH 06/14] working + fixes --- mindee/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindee/cli.py b/mindee/cli.py index d13e593b..31a35e8c 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -158,7 +158,7 @@ def call_endpoint(args: Namespace): doc = parsed_data.document.document print(json.dumps(doc, indent=2, default=serialize_for_json)) else: - print(parsed_data.document.document_type) + print(parsed_data.document.document) else: print(parsed_data.job) elif args.instruction_type == "parse": From 678757cd041182f26c2abd8b9fdd685f4ab6b5ce Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Tue, 9 May 2023 15:41:49 +0200 Subject: [PATCH 07/14] updated python-cli doc --- docs/guide/python-cli.md | 20 ++++++++++++++++---- mindee/cli.py | 12 ++++++------ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/docs/guide/python-cli.md b/docs/guide/python-cli.md index 4a9bfe88..4680342f 100644 --- a/docs/guide/python-cli.md +++ b/docs/guide/python-cli.md @@ -10,20 +10,32 @@ python3 -m mindee --help ### Example command help ```shell -python3 -m mindee invoice --help +python3 -m mindee invoice parse --help ``` ### Example parse command for Off-the-Shelf document ```shell -python3 -m mindee invoice --invoice-key xxxxxxx /path/to/invoice.pdf +python3 -m mindee invoice parse --key xxxxxxx /path/to/invoice.pdf +``` + +### Example enqueue command for Off-the-Shelf document (async) + +```shell +python3 -m mindee invoice-splitter enqueue --key xxxxxxx /path/to/invoice-splitter.pdf +``` + +### Example parse-queued command for Off-the-Shelf document (async) + +```shell +python3 -m mindee invoice-splitter parse-queued --key xxxxxxx id-of-the-job ``` ### Works with environment variables ```shell export MINDEE_API_KEY=xxxxxx -python3 -m mindee invoice /path/to/invoice.pdf +python3 -m mindee invoice parse /path/to/invoice.pdf ``` ### Example parse command for a custom document @@ -35,7 +47,7 @@ python3 -m mindee custom -u pikachu -k xxxxxxx pokemon_card /path/to/card.jpg ### You can get the full parsed output as well ```shell -python3 -m mindee invoice -o parsed /path/to/invoice.pdf +python3 -m mindee invoice parse -o parsed /path/to/invoice.pdf ``` ### In the Git repo, there's a helper script for it diff --git a/mindee/cli.py b/mindee/cli.py index 31a35e8c..4e09c19a 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -200,12 +200,6 @@ def _parse_args() -> Namespace: dest="raise_on_error", help="don't raise errors", ) - parser.add_argument( - "-k", - "--key", - dest="api_key", - help="API key for the account", - ) subparsers = parser.add_subparsers( dest="product_name", required=True, @@ -243,6 +237,12 @@ def _add_options(parser:ArgumentParser, category: str, name:str): :param name: Name of the current command (Default/Custom). """ + parser.add_argument( + "-k", + "--key", + dest="api_key", + help="API key for the account", + ) if category in ["predict", "enqueue"]: parser.add_argument( From 62744d523d661149e651911da191a9a371e3e7e7 Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Tue, 9 May 2023 16:20:48 +0200 Subject: [PATCH 08/14] mid-refacto push --- mindee/cli.py | 220 ++++++++++++++++++++++++--------------------- mindee/client.py | 17 ++-- mindee/response.py | 11 ++- 3 files changed, 137 insertions(+), 111 deletions(-) diff --git a/mindee/cli.py b/mindee/cli.py index 4e09c19a..164d5a0e 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -1,13 +1,12 @@ import argparse import json -from argparse import Namespace, ArgumentParser +from argparse import ArgumentParser, Namespace from dataclasses import dataclass from typing import Dict, Generic, Optional, TypeVar from mindee import Client, PageOptions, documents from mindee.client import DocumentClient from mindee.documents.base import Document, serialize_for_json -from mindee.input.sources import LocalInputSource TypeDoc = TypeVar("TypeDoc", bound=Document) @@ -78,14 +77,12 @@ class CommandConfig(Generic[TypeDoc]): doc_class=documents.TypeInvoiceSplitterV1, is_sync=False, is_async=True, - ) + ), } def _get_input_doc( - client: Client, - args: Namespace, - parsed_path:Optional[str]=None + client: Client, args: Namespace, parsed_path: Optional[str] = None ) -> DocumentClient: if not parsed_path: if args.input_type == "file": @@ -104,8 +101,8 @@ def _get_input_doc( elif args.input_type == "url": return client.doc_from_url(url=args.path) return client.doc_from_path(args.path) - else: - return client.doc_from_url(parsed_path) + return client.doc_from_url(parsed_path) + def call_endpoint(args: Namespace): """Call the endpoint given passed arguments.""" @@ -113,81 +110,97 @@ def call_endpoint(args: Namespace): info = DOCUMENTS[args.product_name] doc_class = info.doc_class - - page_options = None - if args.instruction_type == "enqueue": - if args.cut_doc and args.doc_pages: - page_options = PageOptions(range(args.doc_pages), on_min_pages=0) - input_doc = _get_input_doc(client, args) - if args.product_name == "custom": - client.add_endpoint( - endpoint_name=args.api_name, - account_name=args.username, - version=args.api_version, - ) - parsed_data = input_doc.enqueue( - doc_class, - endpoint_name=args.api_name, - account_name=args.username, - page_options=page_options - ) - else: - parsed_data = input_doc.enqueue( - doc_class, include_words=args.include_words, page_options=page_options - ) - print(parsed_data.job) + process_parse_enqueue(args, client, doc_class) elif args.instruction_type == "parse-queued": - input_doc = client.no_doc() - if args.product_name == "custom": - parsed_data = input_doc.parse_queued( - document_class=doc_class, - queue_id=args.queue_id, - endpoint_name=args.api_name, - account_name=args.username, - ) - else: - parsed_data = input_doc.parse_queued( - document_class=doc_class, queue_id=args.queue_id - ) - if parsed_data.job.status=="completed": - input_doc = _get_input_doc(client, args, parsed_data.api_request.url) - if args.output_type == "raw": - print(json.dumps(parsed_data.document.http_response, indent=2)) - elif args.output_type == "parsed": - doc = parsed_data.document.document - print(json.dumps(doc, indent=2, default=serialize_for_json)) - else: - print(parsed_data.document.document) - else: - print(parsed_data.job) + process_parse_queued(args, client, doc_class) elif args.instruction_type == "parse": - if args.cut_doc and args.doc_pages: - page_options = PageOptions(range(args.doc_pages), on_min_pages=0) - input_doc = _get_input_doc(client, args) - if args.product_name == "custom": - client.add_endpoint( - endpoint_name=args.api_name, - account_name=args.username, - version=args.api_version, - ) - parsed_data = input_doc.parse( - doc_class, - endpoint_name=args.api_name, - account_name=args.username, - page_options=page_options, - ) - else: - parsed_data = input_doc.parse( - doc_class, include_words=args.include_words, page_options=page_options - ) + process_parse(args, client, doc_class) + + +def process_parse(args: Namespace, client: Client, doc_class) -> None: + """Processes the results of a parsing request.""" + if args.cut_doc and args.doc_pages: + page_options = PageOptions(range(args.doc_pages), on_min_pages=0) + else: + page_options = None + input_doc = _get_input_doc(client, args) + if args.product_name == "custom": + client.add_endpoint( + endpoint_name=args.api_name, + account_name=args.username, + version=args.api_version, + ) + parsed_data = input_doc.parse( + doc_class, + endpoint_name=args.api_name, + account_name=args.username, + page_options=page_options, + ) + else: + parsed_data = input_doc.parse( + doc_class, include_words=args.include_words, page_options=page_options + ) + if args.output_type == "raw": + print(json.dumps(parsed_data.http_response, indent=2)) + elif args.output_type == "parsed": + doc = parsed_data.document + print(json.dumps(doc, indent=2, default=serialize_for_json)) + else: + print(parsed_data.document) + + +def process_parse_queued(args: Namespace, client: Client, doc_class) -> None: + """Processes the results of a queued parsing request.""" + input_doc = client.no_doc() + if args.product_name == "custom": + parsed_data = input_doc.parse_queued( + document_class=doc_class, + queue_id=args.queue_id, + endpoint_name=args.api_name, + account_name=args.username, + ) + else: + parsed_data = input_doc.parse_queued( + document_class=doc_class, queue_id=args.queue_id + ) + if parsed_data.job.status == "completed": + input_doc = _get_input_doc(client, args, parsed_data.api_request.url) if args.output_type == "raw": - print(json.dumps(parsed_data.http_response, indent=2)) + print(json.dumps(parsed_data.document.http_response, indent=2)) elif args.output_type == "parsed": - doc = parsed_data.document + doc = parsed_data.document.document print(json.dumps(doc, indent=2, default=serialize_for_json)) else: - print(parsed_data.document) + print(parsed_data.document.document) + else: + print(parsed_data.job) + + +def process_parse_enqueue(args: Namespace, client: Client, doc_class) -> None: + """Processes the results of an enqueuing request.""" + if args.cut_doc and args.doc_pages: + page_options = PageOptions(range(args.doc_pages), on_min_pages=0) + else: + page_options = None + input_doc = _get_input_doc(client, args) + if args.product_name == "custom": + client.add_endpoint( + endpoint_name=args.api_name, + account_name=args.username, + version=args.api_version, + ) + parsed_data = input_doc.enqueue( + doc_class, + endpoint_name=args.api_name, + account_name=args.username, + page_options=page_options, + ) + else: + parsed_data = input_doc.enqueue( + doc_class, include_words=args.include_words, page_options=page_options + ) + print(parsed_data.job) def _parse_args() -> Namespace: @@ -204,46 +217,48 @@ def _parse_args() -> Namespace: dest="product_name", required=True, ) - + for name, info in DOCUMENTS.items(): subp = subparsers.add_parser(name, help=info.help) - parsers_instruction_type = subp.add_subparsers(dest="instruction_type", required=True) - + parsers_instruction_type = subp.add_subparsers( + dest="instruction_type", required=True + ) + if info.is_sync: - subp_predict = parsers_instruction_type.add_parser("parse", help=f"Parse {name}") + subp_predict = parsers_instruction_type.add_parser( + "parse", help=f"Parse {name}" + ) _add_options(subp_predict, "predict", name) subp_predict.add_argument(dest="path", help="Full path to the file") - + if info.is_async: - parser_enqueue = parsers_instruction_type.add_parser("enqueue", help=f"Enqueue {name}") + parser_enqueue = parsers_instruction_type.add_parser( + "enqueue", help=f"Enqueue {name}" + ) _add_options(parser_enqueue, "enqueue", name) parser_enqueue.add_argument(dest="path", help="Full path to the file") - - parser_parse_queued = parsers_instruction_type.add_parser("parse-queued", help=f"Parse (queued) {name}") + + parser_parse_queued = parsers_instruction_type.add_parser( + "parse-queued", help=f"Parse (queued) {name}" + ) _add_options(parser_parse_queued, "parse-queued", name) - parser_parse_queued.add_argument(dest="queue_id", help="Async queue ID for a document (required)") - + parser_parse_queued.add_argument( + dest="queue_id", help="Async queue ID for a document (required)" + ) parsed_args = parser.parse_args() return parsed_args - -def _add_options(parser:ArgumentParser, category: str, name:str): - """ - Adds options to a given command. - - :param parser: The argument parser object. - :param category: The category of the current command (Predict/Enqueue/Parse-Enqueued). - :param name: Name of the current command (Default/Custom). - """ - + +def _add_options(parser: ArgumentParser, category: str, name: str): + """Adds options to a given command.""" parser.add_argument( "-k", "--key", dest="api_key", help="API key for the account", ) - + if category in ["predict", "enqueue"]: parser.add_argument( "-i", @@ -273,8 +288,8 @@ def _add_options(parser:ArgumentParser, category: str, name:str): default=5, help="Number of document pages to keep, default: 5", ) - - if name=="custom": + + if name == "custom": parser.add_argument( "-a", "--account-name", @@ -287,7 +302,7 @@ def _add_options(parser:ArgumentParser, category: str, name:str): "--endpoint", dest="endpoint_name", help="API endpoint name (required)", - required=True + required=True, ) parser.add_argument( "-v", @@ -304,7 +319,7 @@ def _add_options(parser:ArgumentParser, category: str, name:str): action="store_true", help="include full document text in response", ) - + if category in ["predict", "parse-queued"]: parser.add_argument( "-o", @@ -317,7 +332,8 @@ def _add_options(parser:ArgumentParser, category: str, name:str): "- raw: the raw HTTP response\n" "- parsed: the validated and parsed data fields\n", ) - + + def main() -> None: """Run the Command Line Interface.""" call_endpoint(_parse_args()) diff --git a/mindee/client.py b/mindee/client.py index 1ad06320..d254aff7 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -584,12 +584,19 @@ def doc_from_url( raise_on_error=self.raise_on_error, ) - def no_doc( - self - ): + def no_doc(self) -> DocumentClient: + """ + Load an empty dummy document. + + Used when calling parse-queued to avoid having to use a formerly + created DocumentClientObject. + """ + input_doc = BytesInput( + bytearray(), + "", + ) return DocumentClient( - input_doc=None, + input_doc=input_doc, doc_configs=self._doc_configs, raise_on_error=self.raise_on_error, ) - \ No newline at end of file diff --git a/mindee/response.py b/mindee/response.py index 6788d21d..90f63efd 100644 --- a/mindee/response.py +++ b/mindee/response.py @@ -60,7 +60,10 @@ def __init__(self, json_response: dict) -> None: self.job_id = json_response.get("id") self.status = json_response.get("status") if self.available_at: - self.millisecs_taken = int((self.available_at-self.issued_at).total_seconds()*1000) + self.millisecs_taken = int( + (self.available_at - self.issued_at).total_seconds() * 1000 + ) + def __str__(self) -> str: return json.dumps(self.__dict__, indent=4, sort_keys=True, default=str) @@ -91,7 +94,7 @@ def __init__( self, doc_config: DocumentConfig, http_response: Dict[str, Any], - input_source: Optional[Union[LocalInputSource, UrlInputSource]], + input_source: Union[LocalInputSource, UrlInputSource], response_ok: bool, ) -> None: """ @@ -106,12 +109,12 @@ def __init__( self.document_type = doc_config.document_type self.pages = [] - if not isinstance(input_source, UrlInputSource) and input_source: + if not isinstance(input_source, UrlInputSource): self.input_path = input_source.filepath self.input_filename = input_source.filename self.input_mimetype = input_source.file_mimetype - if not response_ok: + if not response_ok or not input_source: self.document = None else: self._load_response(doc_config, input_source) From 61229d41a3215ac6444def2bb91c9e0a3246d55d Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Tue, 9 May 2023 17:56:46 +0200 Subject: [PATCH 09/14] fixed type mismatch interactions --- mindee/cli.py | 5 ++--- mindee/client.py | 42 ++++++++++++++++++++++++---------------- mindee/documents/base.py | 2 +- mindee/response.py | 12 ++++++------ 4 files changed, 34 insertions(+), 27 deletions(-) diff --git a/mindee/cli.py b/mindee/cli.py index 164d5a0e..770ec656 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -152,7 +152,7 @@ def process_parse(args: Namespace, client: Client, doc_class) -> None: def process_parse_queued(args: Namespace, client: Client, doc_class) -> None: """Processes the results of a queued parsing request.""" - input_doc = client.no_doc() + input_doc = client.doc_for_async() if args.product_name == "custom": parsed_data = input_doc.parse_queued( document_class=doc_class, @@ -164,8 +164,7 @@ def process_parse_queued(args: Namespace, client: Client, doc_class) -> None: parsed_data = input_doc.parse_queued( document_class=doc_class, queue_id=args.queue_id ) - if parsed_data.job.status == "completed": - input_doc = _get_input_doc(client, args, parsed_data.api_request.url) + if parsed_data.job.status == "completed" and parsed_data.document is not None: if args.output_type == "raw": print(json.dumps(parsed_data.document.http_response, indent=2)) elif args.output_type == "parsed": diff --git a/mindee/client.py b/mindee/client.py index d254aff7..a34043c6 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -24,13 +24,13 @@ def get_bound_classname(type_var) -> str: class DocumentClient: - input_doc: Union[LocalInputSource, UrlInputSource] + input_doc: Optional[Union[LocalInputSource, UrlInputSource]] doc_configs: DocumentConfigDict raise_on_error: bool = True def __init__( self, - input_doc: Union[LocalInputSource, UrlInputSource], + input_doc: Optional[Union[LocalInputSource, UrlInputSource]], doc_configs: DocumentConfigDict, raise_on_error: bool, ): @@ -74,6 +74,8 @@ def parse( :param cropper: Whether to include cropper results for each page. This performs a cropping operation on the server and will increase response time. """ + if self.input_doc is None: + raise RuntimeError("The 'parse' function requires an input document.") bound_classname = get_bound_classname(document_class) if bound_classname != documents.CustomV1.__name__: endpoint_name = get_bound_classname(document_class) @@ -93,7 +95,11 @@ def parse( page_options.page_indexes, ) return self._make_request( - document_class, doc_config, include_words, close_file, cropper + document_class, + doc_config, + include_words, + close_file, + cropper, ) def enqueue( @@ -132,6 +138,8 @@ def enqueue( :param cropper: Whether to include cropper results for each page. This performs a cropping operation on the server and will increase response time. """ + if self.input_doc is None: + raise RuntimeError("The 'enqueue' function requires an input document.") bound_classname = get_bound_classname(document_class) if bound_classname != documents.CustomV1.__name__: endpoint_name = get_bound_classname(document_class) @@ -194,7 +202,10 @@ def _make_request( ) -> PredictResponse[TypeDocument]: if get_bound_classname(document_class) != doc_config.document_class.__name__: raise RuntimeError("Document class mismatch!") - + if self.input_doc is None: + raise RuntimeError( + "The '_make_request' class method requires an input document." + ) response = doc_config.document_class.request( doc_config.endpoints, self.input_doc, @@ -229,6 +240,10 @@ def _predict_async( :param doc_config: Configuration of the document. """ + if self.input_doc is None: + raise RuntimeError( + "The '_predict_async' class method requires an input document." + ) response = doc_config.endpoints[0].predict_async_req_post( self.input_doc, include_words, close_file, cropper ) @@ -280,7 +295,7 @@ def _get_queued_document( def close(self) -> None: """Close the file object.""" - if not isinstance(self.input_doc, UrlInputSource): + if isinstance(self.input_doc, LocalInputSource): self.input_doc.file_object.close() def _check_config(self, endpoint_name, account_name) -> DocumentConfig: @@ -584,19 +599,12 @@ def doc_from_url( raise_on_error=self.raise_on_error, ) - def no_doc(self) -> DocumentClient: - """ - Load an empty dummy document. - - Used when calling parse-queued to avoid having to use a formerly - created DocumentClientObject. - """ - input_doc = BytesInput( - bytearray(), - "", - ) + def doc_for_async( + self, + ) -> DocumentClient: + """Creates an empty doc for asynchronous parsing requests.""" return DocumentClient( - input_doc=input_doc, + input_doc=None, doc_configs=self._doc_configs, raise_on_error=self.raise_on_error, ) diff --git a/mindee/documents/base.py b/mindee/documents/base.py index 75663a17..cf689cfd 100644 --- a/mindee/documents/base.py +++ b/mindee/documents/base.py @@ -46,7 +46,7 @@ class Document: def __init__( self, - input_source: Union[LocalInputSource, UrlInputSource], + input_source: Optional[Union[LocalInputSource, UrlInputSource]], document_type: Optional[str], api_prediction: TypeApiPrediction, page_n: Optional[int] = None, diff --git a/mindee/response.py b/mindee/response.py index 90f63efd..926efb37 100644 --- a/mindee/response.py +++ b/mindee/response.py @@ -94,7 +94,7 @@ def __init__( self, doc_config: DocumentConfig, http_response: Dict[str, Any], - input_source: Union[LocalInputSource, UrlInputSource], + input_source: Optional[Union[LocalInputSource, UrlInputSource]], response_ok: bool, ) -> None: """ @@ -109,12 +109,12 @@ def __init__( self.document_type = doc_config.document_type self.pages = [] - if not isinstance(input_source, UrlInputSource): + if isinstance(input_source, LocalInputSource): self.input_path = input_source.filepath self.input_filename = input_source.filename self.input_mimetype = input_source.file_mimetype - if not response_ok or not input_source: + if not response_ok: self.document = None else: self._load_response(doc_config, input_source) @@ -122,7 +122,7 @@ def __init__( def _load_response( self, doc_config: DocumentConfig, - input_source: Union[LocalInputSource, UrlInputSource], + input_source: Optional[Union[LocalInputSource, UrlInputSource]], ) -> None: # This is some seriously ugly stuff. # Simplify all this in V4, as we won't need to pass the document type anymore @@ -169,13 +169,13 @@ class AsyncPredictResponse(Generic[TypeDocument]): api_request: ApiRequest job: Job """Job object link to the prediction. As long as it isn't complete, the prediction doesn't exist.""" - document: PredictResponse[TypeDocument] + document: Optional[PredictResponse[TypeDocument]] def __init__( self, http_response: Dict[str, Any], doc_config: DocumentConfig, - input_source: Union[LocalInputSource, UrlInputSource], + input_source: Optional[Union[LocalInputSource, UrlInputSource]], response_ok: bool, ) -> None: """ From a9020f78288a295f8bddd35218c04e235d93b6f4 Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Tue, 9 May 2023 18:08:53 +0200 Subject: [PATCH 10/14] fixed CLI tests? --- tests/test_cli.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index b2c8b352..579e2ae8 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -23,6 +23,7 @@ def custom_doc(monkeypatch): output_type="summary", include_words=False, path="./tests/data/pdf/blank.pdf", + instruction_type="parse", ) @@ -38,6 +39,34 @@ def ots_doc(monkeypatch): output_type="summary", include_words=False, path="./tests/data/invoice/invoice.pdf", + instruction_type="parse", + ) + + +@pytest.fixture +def ots_doc_enqueue(monkeypatch): + clear_envvars(monkeypatch) + return Namespace( + api_key="dummy", + raise_on_error=True, + cut_doc=False, + doc_pages=3, + input_type="path", + include_words=False, + path="./tests/data/invoice_splitter/2_invoices.pdf", + instruction_type="enqueue", + ) + + +@pytest.fixture +def ots_doc_parse_queued(monkeypatch): + clear_envvars(monkeypatch) + return Namespace( + api_key="dummy", + raise_on_error=True, + output_type="summary", + queue_id="dummy-queue-id", + instruction_type="parse-queued", ) @@ -94,3 +123,23 @@ def test_cli_us_bank_check(ots_doc): ots_doc.api_key = "dummy" with pytest.raises(HTTPException): call_endpoint(ots_doc) + + +def test_cli_invoice_splitter_enqueue(ots_doc_enqueue): + ots_doc_enqueue.product_name = "invoice-splitter" + ots_doc_enqueue.api_key = "" + with pytest.raises(RuntimeError): + call_endpoint(ots_doc_enqueue) + ots_doc_enqueue.api_key = "dummy" + with pytest.raises(HTTPException): + call_endpoint(ots_doc_enqueue) + + +def test_cli_invoice_splitter_parse_queued(ots_doc_parse_queued): + ots_doc_parse_queued.product_name = "invoice-splitter" + ots_doc_parse_queued.api_key = "" + with pytest.raises(RuntimeError): + call_endpoint(ots_doc_parse_queued) + ots_doc_parse_queued.api_key = "dummy" + with pytest.raises(HTTPException): + call_endpoint(ots_doc_parse_queued) From 82bcfde3da95ec3f57c29500b200169d04d50ae3 Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Wed, 10 May 2023 10:26:26 +0200 Subject: [PATCH 11/14] fixes, P1 --- mindee/cli.py | 68 +++++++++++++++++++++++------------------------- mindee/client.py | 8 +++--- 2 files changed, 36 insertions(+), 40 deletions(-) diff --git a/mindee/cli.py b/mindee/cli.py index 770ec656..a99304fe 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -2,11 +2,12 @@ import json from argparse import ArgumentParser, Namespace from dataclasses import dataclass -from typing import Dict, Generic, Optional, TypeVar +from typing import Dict, Generic, TypeVar from mindee import Client, PageOptions, documents from mindee.client import DocumentClient from mindee.documents.base import Document, serialize_for_json +from mindee.response import PredictResponse TypeDoc = TypeVar("TypeDoc", bound=Document) @@ -81,27 +82,23 @@ class CommandConfig(Generic[TypeDoc]): } -def _get_input_doc( - client: Client, args: Namespace, parsed_path: Optional[str] = None -) -> DocumentClient: - if not parsed_path: - if args.input_type == "file": - with open(args.path, "rb", buffering=30) as file_handle: - return client.doc_from_file(input_file=file_handle) - elif args.input_type == "base64": - with open(args.path, "rt", encoding="ascii") as base64_handle: - return client.doc_from_b64string( - input_string=base64_handle.read(), filename="test.jpg" - ) - elif args.input_type == "bytes": - with open(args.path, "rb") as bytes_handle: - return client.doc_from_bytes( - input_bytes=bytes_handle.read(), filename=bytes_handle.name - ) - elif args.input_type == "url": - return client.doc_from_url(url=args.path) - return client.doc_from_path(args.path) - return client.doc_from_url(parsed_path) +def _get_input_doc(client: Client, args: Namespace) -> DocumentClient: + if args.input_type == "file": + with open(args.path, "rb", buffering=30) as file_handle: + return client.doc_from_file(input_file=file_handle) + elif args.input_type == "base64": + with open(args.path, "rt", encoding="ascii") as base64_handle: + return client.doc_from_b64string( + input_string=base64_handle.read(), filename="test.jpg" + ) + elif args.input_type == "bytes": + with open(args.path, "rb") as bytes_handle: + return client.doc_from_bytes( + input_bytes=bytes_handle.read(), filename=bytes_handle.name + ) + elif args.input_type == "url": + return client.doc_from_url(url=args.path) + return client.doc_from_path(args.path) def call_endpoint(args: Namespace): @@ -141,13 +138,7 @@ def process_parse(args: Namespace, client: Client, doc_class) -> None: parsed_data = input_doc.parse( doc_class, include_words=args.include_words, page_options=page_options ) - if args.output_type == "raw": - print(json.dumps(parsed_data.http_response, indent=2)) - elif args.output_type == "parsed": - doc = parsed_data.document - print(json.dumps(doc, indent=2, default=serialize_for_json)) - else: - print(parsed_data.document) + display_doc(args.output_type, parsed_data) def process_parse_queued(args: Namespace, client: Client, doc_class) -> None: @@ -165,17 +156,22 @@ def process_parse_queued(args: Namespace, client: Client, doc_class) -> None: document_class=doc_class, queue_id=args.queue_id ) if parsed_data.job.status == "completed" and parsed_data.document is not None: - if args.output_type == "raw": - print(json.dumps(parsed_data.document.http_response, indent=2)) - elif args.output_type == "parsed": - doc = parsed_data.document.document - print(json.dumps(doc, indent=2, default=serialize_for_json)) - else: - print(parsed_data.document.document) + display_doc(args.output_type, parsed_data.document) else: print(parsed_data.job) +def display_doc(output_type: str, document_response: PredictResponse): + """Display the parsed document.""" + if output_type == "raw": + print(json.dumps(document_response.http_response, indent=2)) + elif output_type == "parsed": + doc = document_response.document + print(json.dumps(doc, indent=2, default=serialize_for_json)) + else: + print(document_response.document) + + def process_parse_enqueue(args: Namespace, client: Client, doc_class) -> None: """Processes the results of an enqueuing request.""" if args.cut_doc and args.doc_pages: diff --git a/mindee/client.py b/mindee/client.py index a34043c6..38917fa3 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -75,7 +75,7 @@ def parse( This performs a cropping operation on the server and will increase response time. """ if self.input_doc is None: - raise RuntimeError("The 'parse' function requires an input document.") + raise TypeError("The 'parse' function requires an input document.") bound_classname = get_bound_classname(document_class) if bound_classname != documents.CustomV1.__name__: endpoint_name = get_bound_classname(document_class) @@ -139,7 +139,7 @@ def enqueue( This performs a cropping operation on the server and will increase response time. """ if self.input_doc is None: - raise RuntimeError("The 'enqueue' function requires an input document.") + raise TypeError("The 'enqueue' function requires an input document.") bound_classname = get_bound_classname(document_class) if bound_classname != documents.CustomV1.__name__: endpoint_name = get_bound_classname(document_class) @@ -203,7 +203,7 @@ def _make_request( if get_bound_classname(document_class) != doc_config.document_class.__name__: raise RuntimeError("Document class mismatch!") if self.input_doc is None: - raise RuntimeError( + raise TypeError( "The '_make_request' class method requires an input document." ) response = doc_config.document_class.request( @@ -241,7 +241,7 @@ def _predict_async( :param doc_config: Configuration of the document. """ if self.input_doc is None: - raise RuntimeError( + raise TypeError( "The '_predict_async' class method requires an input document." ) response = doc_config.endpoints[0].predict_async_req_post( From 7b278f3c17980b587826c643f1c2ec5c032d3d57 Mon Sep 17 00:00:00 2001 From: Sebastian Olivera Date: Wed, 10 May 2023 11:19:27 +0200 Subject: [PATCH 12/14] minor refactoring --- mindee/cli.py | 190 ++++++++++++++++++++++++++++---------------------- 1 file changed, 108 insertions(+), 82 deletions(-) diff --git a/mindee/cli.py b/mindee/cli.py index a99304fe..5478b1f1 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -220,23 +220,61 @@ def _parse_args() -> Namespace: ) if info.is_sync: - subp_predict = parsers_instruction_type.add_parser( + parser_predict = parsers_instruction_type.add_parser( "parse", help=f"Parse {name}" ) - _add_options(subp_predict, "predict", name) - subp_predict.add_argument(dest="path", help="Full path to the file") + _add_sending_options(parser_predict) + if name == "custom": + _add_custom_options(parser_predict) + else: + parser_predict.add_argument( + "-t", + "--full-text", + dest="include_words", + action="store_true", + help="include full document text in response", + ) + parser_predict.add_argument(dest="path", help="Full path to the file") + parser_predict.add_argument( + "-k", + "--key", + dest="api_key", + help="API key for the account", + ) if info.is_async: parser_enqueue = parsers_instruction_type.add_parser( "enqueue", help=f"Enqueue {name}" ) - _add_options(parser_enqueue, "enqueue", name) + parser_enqueue.add_argument( + "-k", + "--key", + dest="api_key", + help="API key for the account", + ) + _add_sending_options(parser_enqueue) + if name == "custom": + _add_custom_options(parser_enqueue) + else: + parser_enqueue.add_argument( + "-t", + "--full-text", + dest="include_words", + action="store_true", + help="include full document text in response", + ) parser_enqueue.add_argument(dest="path", help="Full path to the file") parser_parse_queued = parsers_instruction_type.add_parser( "parse-queued", help=f"Parse (queued) {name}" ) - _add_options(parser_parse_queued, "parse-queued", name) + parser_parse_queued.add_argument( + "-k", + "--key", + dest="api_key", + help="API key for the account", + ) + _add_display_options(parser_parse_queued) parser_parse_queued.add_argument( dest="queue_id", help="Async queue ID for a document (required)" ) @@ -245,88 +283,76 @@ def _parse_args() -> Namespace: return parsed_args -def _add_options(parser: ArgumentParser, category: str, name: str): - """Adds options to a given command.""" +def _add_display_options(parser: ArgumentParser): + """Adds options related to output/display of a document (parse, parse-queued).""" parser.add_argument( - "-k", - "--key", - dest="api_key", - help="API key for the account", + "-o", + "--output-type", + dest="output_type", + choices=["summary", "raw", "parsed"], + default="summary", + help="Specify how to output the data.\n" + "- summary: a basic summary (default)\n" + "- raw: the raw HTTP response\n" + "- parsed: the validated and parsed data fields\n", ) - if category in ["predict", "enqueue"]: - parser.add_argument( - "-i", - "--input-type", - dest="input_type", - choices=["path", "file", "base64", "bytes", "url"], - default="path", - help="Specify how to handle the input.\n" - "- path: open a path (default).\n" - "- file: open as a file handle.\n" - "- base64: open a base64 encoded text file.\n" - "- bytes: open the contents as raw bytes.\n" - "- url: open an URL.", - ) - parser.add_argument( - "-c", - "--cut-doc", - dest="cut_doc", - action="store_true", - help="Cut document pages", - ) - parser.add_argument( - "-p", - "--pages-keep", - dest="doc_pages", - type=int, - default=5, - help="Number of document pages to keep, default: 5", - ) - if name == "custom": - parser.add_argument( - "-a", - "--account-name", - dest="username", - required=True, - help="API account name for the endpoint (required)", - ) - parser.add_argument( - "-e", - "--endpoint", - dest="endpoint_name", - help="API endpoint name (required)", - required=True, - ) - parser.add_argument( - "-v", - "--version", - default="1", - dest="api_version", - help="Version for the endpoint. If not set, use the latest version of the model.", - ) - else: - parser.add_argument( - "-t", - "--full-text", - dest="include_words", - action="store_true", - help="include full document text in response", - ) +def _add_sending_options(parser: ArgumentParser): + """Adds options for sending requests (parse, enqueue).""" + parser.add_argument( + "-i", + "--input-type", + dest="input_type", + choices=["path", "file", "base64", "bytes", "url"], + default="path", + help="Specify how to handle the input.\n" + "- path: open a path (default).\n" + "- file: open as a file handle.\n" + "- base64: open a base64 encoded text file.\n" + "- bytes: open the contents as raw bytes.\n" + "- url: open an URL.", + ) + parser.add_argument( + "-c", + "--cut-doc", + dest="cut_doc", + action="store_true", + help="Cut document pages", + ) + parser.add_argument( + "-p", + "--pages-keep", + dest="doc_pages", + type=int, + default=5, + help="Number of document pages to keep, default: 5", + ) - if category in ["predict", "parse-queued"]: - parser.add_argument( - "-o", - "--output-type", - dest="output_type", - choices=["summary", "raw", "parsed"], - default="summary", - help="Specify how to output the data.\n" - "- summary: a basic summary (default)\n" - "- raw: the raw HTTP response\n" - "- parsed: the validated and parsed data fields\n", - ) + +def _add_custom_options(parser: ArgumentParser): + """Adds options to custom-type documents.""" + parser.add_argument( + "-a", + "--account-name", + dest="username", + required=True, + help="API account name for the endpoint (required)", + ) + parser.add_argument( + "-e", + "--endpoint", + dest="endpoint_name", + help="API endpoint name (required)", + required=True, + ) + parser.add_argument( + "-v", + "--version", + default="1", + dest="api_version", + help="Version for the endpoint. If not set, use the latest version of the model.", + ) def main() -> None: From d781010abc09166426783a7516572e7faf541f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= Date: Wed, 10 May 2023 13:51:32 +0200 Subject: [PATCH 13/14] minor fixes and rewording --- mindee/cli.py | 70 +++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 39 deletions(-) diff --git a/mindee/cli.py b/mindee/cli.py index 5478b1f1..64489313 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -107,11 +107,11 @@ def call_endpoint(args: Namespace): info = DOCUMENTS[args.product_name] doc_class = info.doc_class - if args.instruction_type == "enqueue": + if args.call_method == "enqueue": process_parse_enqueue(args, client, doc_class) - elif args.instruction_type == "parse-queued": + elif args.call_method == "parse-queued": process_parse_queued(args, client, doc_class) - elif args.instruction_type == "parse": + elif args.call_method == "parse": process_parse(args, client, doc_class) @@ -124,14 +124,14 @@ def process_parse(args: Namespace, client: Client, doc_class) -> None: input_doc = _get_input_doc(client, args) if args.product_name == "custom": client.add_endpoint( - endpoint_name=args.api_name, - account_name=args.username, + endpoint_name=args.endpoint_name, + account_name=args.account_name, version=args.api_version, ) parsed_data = input_doc.parse( doc_class, - endpoint_name=args.api_name, - account_name=args.username, + endpoint_name=args.endpoint_name, + account_name=args.account_name, page_options=page_options, ) else: @@ -148,8 +148,8 @@ def process_parse_queued(args: Namespace, client: Client, doc_class) -> None: parsed_data = input_doc.parse_queued( document_class=doc_class, queue_id=args.queue_id, - endpoint_name=args.api_name, - account_name=args.username, + endpoint_name=args.endpoint_name, + account_name=args.account_name, ) else: parsed_data = input_doc.parse_queued( @@ -182,13 +182,13 @@ def process_parse_enqueue(args: Namespace, client: Client, doc_class) -> None: if args.product_name == "custom": client.add_endpoint( endpoint_name=args.api_name, - account_name=args.username, + account_name=args.account, version=args.api_version, ) parsed_data = input_doc.enqueue( doc_class, endpoint_name=args.api_name, - account_name=args.username, + account_name=args.account, page_options=page_options, ) else: @@ -215,15 +215,15 @@ def _parse_args() -> Namespace: for name, info in DOCUMENTS.items(): subp = subparsers.add_parser(name, help=info.help) - parsers_instruction_type = subp.add_subparsers( - dest="instruction_type", required=True - ) + parsers_call_method = subp.add_subparsers(dest="call_method", required=True) if info.is_sync: - parser_predict = parsers_instruction_type.add_parser( + parser_predict = parsers_call_method.add_parser( "parse", help=f"Parse {name}" ) + _add_main_options(parser_predict) _add_sending_options(parser_predict) + _add_display_options(parser_predict) if name == "custom": _add_custom_options(parser_predict) else: @@ -234,24 +234,12 @@ def _parse_args() -> Namespace: action="store_true", help="include full document text in response", ) - parser_predict.add_argument(dest="path", help="Full path to the file") - parser_predict.add_argument( - "-k", - "--key", - dest="api_key", - help="API key for the account", - ) if info.is_async: - parser_enqueue = parsers_instruction_type.add_parser( + parser_enqueue = parsers_call_method.add_parser( "enqueue", help=f"Enqueue {name}" ) - parser_enqueue.add_argument( - "-k", - "--key", - dest="api_key", - help="API key for the account", - ) + _add_main_options(parser_enqueue) _add_sending_options(parser_enqueue) if name == "custom": _add_custom_options(parser_enqueue) @@ -263,17 +251,11 @@ def _parse_args() -> Namespace: action="store_true", help="include full document text in response", ) - parser_enqueue.add_argument(dest="path", help="Full path to the file") - parser_parse_queued = parsers_instruction_type.add_parser( + parser_parse_queued = parsers_call_method.add_parser( "parse-queued", help=f"Parse (queued) {name}" ) - parser_parse_queued.add_argument( - "-k", - "--key", - dest="api_key", - help="API key for the account", - ) + _add_main_options(parser_parse_queued) _add_display_options(parser_parse_queued) parser_parse_queued.add_argument( dest="queue_id", help="Async queue ID for a document (required)" @@ -283,6 +265,15 @@ def _parse_args() -> Namespace: return parsed_args +def _add_main_options(parser: ArgumentParser): + parser.add_argument( + "-k", + "--key", + dest="api_key", + help="API key for the account", + ) + + def _add_display_options(parser: ArgumentParser): """Adds options related to output/display of a document (parse, parse-queued).""" parser.add_argument( @@ -328,14 +319,15 @@ def _add_sending_options(parser: ArgumentParser): default=5, help="Number of document pages to keep, default: 5", ) + parser.add_argument(dest="path", help="Full path to the file") def _add_custom_options(parser: ArgumentParser): """Adds options to custom-type documents.""" parser.add_argument( "-a", - "--account-name", - dest="username", + "--account", + dest="account_name", required=True, help="API account name for the endpoint (required)", ) From dee97c627e6b338076d7383d511793e663a110f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= Date: Wed, 10 May 2023 13:56:40 +0200 Subject: [PATCH 14/14] fix tests :facepalm: --- mindee/cli.py | 8 ++++---- tests/test_cli.py | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/mindee/cli.py b/mindee/cli.py index 64489313..6822c40c 100644 --- a/mindee/cli.py +++ b/mindee/cli.py @@ -181,14 +181,14 @@ def process_parse_enqueue(args: Namespace, client: Client, doc_class) -> None: input_doc = _get_input_doc(client, args) if args.product_name == "custom": client.add_endpoint( - endpoint_name=args.api_name, - account_name=args.account, + endpoint_name=args.endpoint_name, + account_name=args.account_name, version=args.api_version, ) parsed_data = input_doc.enqueue( doc_class, - endpoint_name=args.api_name, - account_name=args.account, + endpoint_name=args.endpoint_name, + account_name=args.account_name, page_options=page_options, ) else: diff --git a/tests/test_cli.py b/tests/test_cli.py index 579e2ae8..e9d9aad5 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -12,8 +12,8 @@ def custom_doc(monkeypatch): clear_envvars(monkeypatch) return Namespace( product_name="custom", - api_name="license_plate", - username="mindee", + endpoint_name="license_plate", + account_name="mindee", api_key="dummy", api_version="1", raise_on_error=True, @@ -23,7 +23,7 @@ def custom_doc(monkeypatch): output_type="summary", include_words=False, path="./tests/data/pdf/blank.pdf", - instruction_type="parse", + call_method="parse", ) @@ -39,7 +39,7 @@ def ots_doc(monkeypatch): output_type="summary", include_words=False, path="./tests/data/invoice/invoice.pdf", - instruction_type="parse", + call_method="parse", ) @@ -54,7 +54,7 @@ def ots_doc_enqueue(monkeypatch): input_type="path", include_words=False, path="./tests/data/invoice_splitter/2_invoices.pdf", - instruction_type="enqueue", + call_method="enqueue", ) @@ -66,7 +66,7 @@ def ots_doc_parse_queued(monkeypatch): raise_on_error=True, output_type="summary", queue_id="dummy-queue-id", - instruction_type="parse-queued", + call_method="parse-queued", )