From 8c43b0fe5753be4e16781f78343ec57a8aacdb13 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 30 Oct 2025 16:26:07 +0100 Subject: [PATCH 1/3] bump test lib & move test structure --- docs/extras/guide/bank_account_details_v2.md | 148 ----- docs/extras/guide/bank_check_v1.md | 186 ------ docs/extras/guide/barcode_reader_v1.md | 113 ---- docs/extras/guide/bill_of_lading_v1.md | 254 -------- docs/extras/guide/business_card_v1.md | 172 ------ docs/extras/guide/carte_grise_v1.md | 465 -------------- docs/extras/guide/cli.md | 65 -- docs/extras/guide/cropper_v1.md | 109 ---- docs/extras/guide/custom_v1.md | 155 ----- docs/extras/guide/delivery_notes_v1.md | 147 ----- docs/extras/guide/driver_license_v1.md | 191 ------ docs/extras/guide/energy_bill_fra_v1.md | 318 ---------- docs/extras/guide/expense_receipts_v5.md | 385 ------------ docs/extras/guide/financial_document_v1.md | 578 ------------------ docs/extras/guide/french_healthcard_v1.md | 120 ---- docs/extras/guide/generated_v1.md | 106 ---- docs/extras/guide/getting_started.md | 270 -------- docs/extras/guide/idcard_fr_v2.md | 270 -------- docs/extras/guide/ind_passport_v1.md | 285 --------- docs/extras/guide/international_id_v2.md | 240 -------- docs/extras/guide/invoice_splitter_v1.md | 105 ---- docs/extras/guide/invoices_v4.md | 539 ---------------- docs/extras/guide/material_certificate_v1.md | 85 --- .../guide/multi_receipts_detector_v1.md | 115 ---- docs/extras/guide/nutrition_facts_v1.md | 377 ------------ docs/extras/guide/passport_v1.md | 196 ------ docs/extras/guide/payslip_fra_v3.md | 321 ---------- docs/extras/guide/resume_v1.md | 353 ----------- docs/extras/guide/us_healthcare_cards_v1.md | 235 ------- docs/extras/guide/us_mail_v3.md | 130 ---- tests/data | 2 +- tests/{api => v1}/__init__.py | 0 tests/{extraction => v1/api}/__init__.py | 0 tests/{ => v1}/api/test_async_response.py | 0 tests/{ => v1}/api/test_feedback_response.py | 0 tests/{ => v1}/api/test_response.py | 0 tests/{extras => v1/extraction}/__init__.py | 0 .../extraction/test_image_extractor.py | 0 .../test_invoice_splitter_auto_extraction.py | 2 +- .../test_multi_receipts_extractor.py | 0 .../{ => v1}/extraction/test_pdf_extractor.py | 0 tests/{fields => v1/extras}/__init__.py | 0 .../extras/test_extras_integration.py | 0 tests/{ => v1}/extras/test_full_text_ocr.py | 0 tests/{ => v1}/input/__init__.py | 0 .../{ => v1}/input/test_apply_page_options.py | 0 tests/{ => v1}/input/test_compression.py | 0 tests/{ => v1}/input/test_fix_pdf.py | 0 tests/{ => v1}/input/test_inputs.py | 0 tests/{ => v1}/input/test_local_response.py | 2 +- .../test_url_input_source_integration.py | 0 tests/v1/mindee_http/__init__.py | 1 + tests/{ => v1}/mindee_http/test_error.py | 2 +- tests/{mindee_http => v1/parsing}/__init__.py | 0 .../parsing/common}/__init__.py | 0 .../{fields => v1/parsing/common}/test_ocr.py | 0 .../parsing/common}/test_orientation.py | 0 .../parsing/standard}/__init__.py | 0 .../parsing/standard}/test_amount.py | 0 .../parsing/standard}/test_date.py | 0 .../parsing/standard}/test_field.py | 0 .../parsing/standard}/test_locale.py | 0 .../parsing/standard}/test_payment_details.py | 0 .../parsing/standard}/test_position.py | 0 .../parsing/standard/test_string.py} | 0 .../parsing/standard}/test_tax.py | 0 tests/{ => v1}/product/__init__.py | 0 .../product/barcode_reader}/__init__.py | 0 .../barcode_reader/test_barcode_reader_v1.py | 0 .../test_barcode_reader_v1_regression.py | 2 +- .../product/bill_of_lading}/__init__.py | 0 .../bill_of_lading/test_bill_of_lading_v1.py | 0 .../product/business_card}/__init__.py | 0 .../business_card/test_business_card_v1.py | 0 .../product/cropper}/__init__.py | 0 .../product/cropper/test_cropper_v1.py | 0 .../cropper/test_cropper_v1_regression.py | 2 +- .../product/custom}/__init__.py | 0 .../{ => v1}/product/custom/test_custom_v1.py | 0 .../custom/test_custom_v1_line_items.py | 0 .../product/custom/test_custom_v1_v2.py | 0 .../product/delivery_note}/__init__.py | 0 .../delivery_note/test_delivery_note_v1.py | 0 .../product/driver_license}/__init__.py | 0 .../driver_license/test_driver_license_v1.py | 0 .../product/financial_document}/__init__.py | 0 .../test_financial_document_v1.py | 0 .../test_financial_document_v1_regression.py | 2 +- .../carte_grise => v1/product/fr}/__init__.py | 0 .../fr/bank_account_details}/__init__.py | 0 .../test_bank_account_details_v1.py | 0 ...test_bank_account_details_v1_regression.py | 2 +- .../test_bank_account_details_v2.py | 0 ...test_bank_account_details_v2_regression.py | 2 +- .../product/fr/carte_grise}/__init__.py | 0 .../fr/carte_grise/test_carte_grise_v1.py | 0 .../test_carte_grise_v1_regression.py | 2 +- .../product/fr/energy_bill}/__init__.py | 0 .../fr/energy_bill/test_energy_bill_v1.py | 0 .../product/fr/health_card}/__init__.py | 0 .../fr/health_card/test_health_card_v1.py | 0 .../product/fr/id_card}/__init__.py | 0 .../product/fr/id_card/test_id_card_v1.py | 0 .../fr/id_card/test_id_card_v1_regression.py | 2 +- .../product/fr/id_card/test_id_card_v2.py | 0 .../fr/id_card/test_id_card_v2_regression.py | 2 +- .../ind => v1/product/fr/payslip}/__init__.py | 0 .../product/fr/payslip/test_payslip_v2.py | 0 .../product/fr/payslip/test_payslip_v3.py | 0 .../product/generated}/__init__.py | 0 .../product/generated/test_generated_v1.py | 0 .../product/ind}/__init__.py | 0 .../product/ind/indian_passport}/__init__.py | 0 .../test_indian_passport_v1.py | 0 .../product/international_id}/__init__.py | 0 .../test_international_id_v2.py | 0 .../product/invoice}/__init__.py | 0 .../product/invoice/test_invoice_v4.py | 0 .../invoice/test_invoice_v4_regression.py | 2 +- .../product/invoice_splitter}/__init__.py | 0 .../test_invoice_splitter_v1.py | 0 .../test_invoice_splitter_v1_regression.py | 2 +- .../product/material_certificate}/__init__.py | 0 .../test_material_certificate_v1.py | 0 .../multi_receipts_detector}/__init__.py | 0 .../test_multi_receipts_detector_v1.py | 0 ...t_multi_receipts_detector_v1_regression.py | 2 +- .../nutrition_facts_label}/__init__.py | 0 .../test_nutrition_facts_label_v1.py | 0 .../product/passport}/__init__.py | 0 .../product/passport/test_passport_v1.py | 0 .../passport/test_passport_v1_regression.py | 2 +- .../us => v1/product/receipt}/__init__.py | 0 .../product/receipt/test_receipt_v5.py | 0 .../receipt/test_receipt_v5_regression.py | 2 +- .../product/resume}/__init__.py | 0 .../{ => v1}/product/resume/test_resume_v1.py | 0 .../product/us}/__init__.py | 0 .../product/us/bank_check}/__init__.py | 0 .../us/bank_check/test_bank_check_v1.py | 0 .../test_bank_check_v1_regression.py | 2 +- .../product/us/healthcare_card}/__init__.py | 0 .../test_healthcare_card_v1.py | 0 tests/v1/product/us/us_mail/__init__.py | 0 .../product/us/us_mail/test_us_mail_v3.py | 0 tests/{ => v1}/test_cli.py | 0 tests/{ => v1}/test_client.py | 2 +- tests/v1/workflows/__init__.py | 0 tests/{ => v1}/workflows/test_workflow.py | 0 .../workflows/test_workflow_integration.py | 0 tests/{ => v2}/test_client_v2.py | 0 tests/{ => v2}/test_client_v2_integration.py | 0 152 files changed, 20 insertions(+), 7052 deletions(-) delete mode 100644 docs/extras/guide/bank_account_details_v2.md delete mode 100644 docs/extras/guide/bank_check_v1.md delete mode 100644 docs/extras/guide/barcode_reader_v1.md delete mode 100644 docs/extras/guide/bill_of_lading_v1.md delete mode 100644 docs/extras/guide/business_card_v1.md delete mode 100644 docs/extras/guide/carte_grise_v1.md delete mode 100644 docs/extras/guide/cli.md delete mode 100644 docs/extras/guide/cropper_v1.md delete mode 100644 docs/extras/guide/custom_v1.md delete mode 100644 docs/extras/guide/delivery_notes_v1.md delete mode 100644 docs/extras/guide/driver_license_v1.md delete mode 100644 docs/extras/guide/energy_bill_fra_v1.md delete mode 100644 docs/extras/guide/expense_receipts_v5.md delete mode 100644 docs/extras/guide/financial_document_v1.md delete mode 100644 docs/extras/guide/french_healthcard_v1.md delete mode 100644 docs/extras/guide/generated_v1.md delete mode 100644 docs/extras/guide/getting_started.md delete mode 100644 docs/extras/guide/idcard_fr_v2.md delete mode 100644 docs/extras/guide/ind_passport_v1.md delete mode 100644 docs/extras/guide/international_id_v2.md delete mode 100644 docs/extras/guide/invoice_splitter_v1.md delete mode 100644 docs/extras/guide/invoices_v4.md delete mode 100644 docs/extras/guide/material_certificate_v1.md delete mode 100644 docs/extras/guide/multi_receipts_detector_v1.md delete mode 100644 docs/extras/guide/nutrition_facts_v1.md delete mode 100644 docs/extras/guide/passport_v1.md delete mode 100644 docs/extras/guide/payslip_fra_v3.md delete mode 100644 docs/extras/guide/resume_v1.md delete mode 100644 docs/extras/guide/us_healthcare_cards_v1.md delete mode 100644 docs/extras/guide/us_mail_v3.md rename tests/{api => v1}/__init__.py (100%) rename tests/{extraction => v1/api}/__init__.py (100%) rename tests/{ => v1}/api/test_async_response.py (100%) rename tests/{ => v1}/api/test_feedback_response.py (100%) rename tests/{ => v1}/api/test_response.py (100%) rename tests/{extras => v1/extraction}/__init__.py (100%) rename tests/{ => v1}/extraction/test_image_extractor.py (100%) rename tests/{ => v1}/extraction/test_invoice_splitter_auto_extraction.py (97%) rename tests/{ => v1}/extraction/test_multi_receipts_extractor.py (100%) rename tests/{ => v1}/extraction/test_pdf_extractor.py (100%) rename tests/{fields => v1/extras}/__init__.py (100%) rename tests/{ => v1}/extras/test_extras_integration.py (100%) rename tests/{ => v1}/extras/test_full_text_ocr.py (100%) rename tests/{ => v1}/input/__init__.py (100%) rename tests/{ => v1}/input/test_apply_page_options.py (100%) rename tests/{ => v1}/input/test_compression.py (100%) rename tests/{ => v1}/input/test_fix_pdf.py (100%) rename tests/{ => v1}/input/test_inputs.py (100%) rename tests/{ => v1}/input/test_local_response.py (97%) rename tests/{ => v1}/input/test_url_input_source_integration.py (100%) create mode 100644 tests/v1/mindee_http/__init__.py rename tests/{ => v1}/mindee_http/test_error.py (98%) rename tests/{mindee_http => v1/parsing}/__init__.py (100%) rename tests/{product/barcode_reader => v1/parsing/common}/__init__.py (100%) rename tests/{fields => v1/parsing/common}/test_ocr.py (100%) rename tests/{fields => v1/parsing/common}/test_orientation.py (100%) rename tests/{product/bill_of_lading => v1/parsing/standard}/__init__.py (100%) rename tests/{fields => v1/parsing/standard}/test_amount.py (100%) rename tests/{fields => v1/parsing/standard}/test_date.py (100%) rename tests/{fields => v1/parsing/standard}/test_field.py (100%) rename tests/{fields => v1/parsing/standard}/test_locale.py (100%) rename tests/{fields => v1/parsing/standard}/test_payment_details.py (100%) rename tests/{fields => v1/parsing/standard}/test_position.py (100%) rename tests/{fields/test_text.py => v1/parsing/standard/test_string.py} (100%) rename tests/{fields => v1/parsing/standard}/test_tax.py (100%) rename tests/{ => v1}/product/__init__.py (100%) rename tests/{product/business_card => v1/product/barcode_reader}/__init__.py (100%) rename tests/{ => v1}/product/barcode_reader/test_barcode_reader_v1.py (100%) rename tests/{ => v1}/product/barcode_reader/test_barcode_reader_v1_regression.py (93%) rename tests/{product/cropper => v1/product/bill_of_lading}/__init__.py (100%) rename tests/{ => v1}/product/bill_of_lading/test_bill_of_lading_v1.py (100%) rename tests/{product/custom => v1/product/business_card}/__init__.py (100%) rename tests/{ => v1}/product/business_card/test_business_card_v1.py (100%) rename tests/{product/delivery_note => v1/product/cropper}/__init__.py (100%) rename tests/{ => v1}/product/cropper/test_cropper_v1.py (100%) rename tests/{ => v1}/product/cropper/test_cropper_v1_regression.py (93%) rename tests/{product/driver_license => v1/product/custom}/__init__.py (100%) rename tests/{ => v1}/product/custom/test_custom_v1.py (100%) rename tests/{ => v1}/product/custom/test_custom_v1_line_items.py (100%) rename tests/{ => v1}/product/custom/test_custom_v1_v2.py (100%) rename tests/{product/financial_document => v1/product/delivery_note}/__init__.py (100%) rename tests/{ => v1}/product/delivery_note/test_delivery_note_v1.py (100%) rename tests/{product/fr => v1/product/driver_license}/__init__.py (100%) rename tests/{ => v1}/product/driver_license/test_driver_license_v1.py (100%) rename tests/{product/fr/bank_account_details => v1/product/financial_document}/__init__.py (100%) rename tests/{ => v1}/product/financial_document/test_financial_document_v1.py (100%) rename tests/{ => v1}/product/financial_document/test_financial_document_v1_regression.py (94%) rename tests/{product/fr/carte_grise => v1/product/fr}/__init__.py (100%) rename tests/{product/fr/energy_bill => v1/product/fr/bank_account_details}/__init__.py (100%) rename tests/{ => v1}/product/fr/bank_account_details/test_bank_account_details_v1.py (100%) rename tests/{ => v1}/product/fr/bank_account_details/test_bank_account_details_v1_regression.py (94%) rename tests/{ => v1}/product/fr/bank_account_details/test_bank_account_details_v2.py (100%) rename tests/{ => v1}/product/fr/bank_account_details/test_bank_account_details_v2_regression.py (94%) rename tests/{product/fr/health_card => v1/product/fr/carte_grise}/__init__.py (100%) rename tests/{ => v1}/product/fr/carte_grise/test_carte_grise_v1.py (100%) rename tests/{ => v1}/product/fr/carte_grise/test_carte_grise_v1_regression.py (93%) rename tests/{product/fr/id_card => v1/product/fr/energy_bill}/__init__.py (100%) rename tests/{ => v1}/product/fr/energy_bill/test_energy_bill_v1.py (100%) rename tests/{product/fr/payslip => v1/product/fr/health_card}/__init__.py (100%) rename tests/{ => v1}/product/fr/health_card/test_health_card_v1.py (100%) rename tests/{product/generated => v1/product/fr/id_card}/__init__.py (100%) rename tests/{ => v1}/product/fr/id_card/test_id_card_v1.py (100%) rename tests/{ => v1}/product/fr/id_card/test_id_card_v1_regression.py (93%) rename tests/{ => v1}/product/fr/id_card/test_id_card_v2.py (100%) rename tests/{ => v1}/product/fr/id_card/test_id_card_v2_regression.py (93%) rename tests/{product/ind => v1/product/fr/payslip}/__init__.py (100%) rename tests/{ => v1}/product/fr/payslip/test_payslip_v2.py (100%) rename tests/{ => v1}/product/fr/payslip/test_payslip_v3.py (100%) rename tests/{product/ind/indian_passport => v1/product/generated}/__init__.py (100%) rename tests/{ => v1}/product/generated/test_generated_v1.py (100%) rename tests/{product/international_id => v1/product/ind}/__init__.py (100%) rename tests/{product/invoice => v1/product/ind/indian_passport}/__init__.py (100%) rename tests/{ => v1}/product/ind/indian_passport/test_indian_passport_v1.py (100%) rename tests/{product/invoice_splitter => v1/product/international_id}/__init__.py (100%) rename tests/{ => v1}/product/international_id/test_international_id_v2.py (100%) rename tests/{product/material_certificate => v1/product/invoice}/__init__.py (100%) rename tests/{ => v1}/product/invoice/test_invoice_v4.py (100%) rename tests/{ => v1}/product/invoice/test_invoice_v4_regression.py (93%) rename tests/{product/multi_receipts_detector => v1/product/invoice_splitter}/__init__.py (100%) rename tests/{ => v1}/product/invoice_splitter/test_invoice_splitter_v1.py (100%) rename tests/{ => v1}/product/invoice_splitter/test_invoice_splitter_v1_regression.py (94%) rename tests/{product/nutrition_facts_label => v1/product/material_certificate}/__init__.py (100%) rename tests/{ => v1}/product/material_certificate/test_material_certificate_v1.py (100%) rename tests/{product/passport => v1/product/multi_receipts_detector}/__init__.py (100%) rename tests/{ => v1}/product/multi_receipts_detector/test_multi_receipts_detector_v1.py (100%) rename tests/{ => v1}/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py (94%) rename tests/{product/receipt => v1/product/nutrition_facts_label}/__init__.py (100%) rename tests/{ => v1}/product/nutrition_facts_label/test_nutrition_facts_label_v1.py (100%) rename tests/{product/resume => v1/product/passport}/__init__.py (100%) rename tests/{ => v1}/product/passport/test_passport_v1.py (100%) rename tests/{ => v1}/product/passport/test_passport_v1_regression.py (93%) rename tests/{product/us => v1/product/receipt}/__init__.py (100%) rename tests/{ => v1}/product/receipt/test_receipt_v5.py (100%) rename tests/{ => v1}/product/receipt/test_receipt_v5_regression.py (93%) rename tests/{product/us/bank_check => v1/product/resume}/__init__.py (100%) rename tests/{ => v1}/product/resume/test_resume_v1.py (100%) rename tests/{product/us/healthcare_card => v1/product/us}/__init__.py (100%) rename tests/{product/us/us_mail => v1/product/us/bank_check}/__init__.py (100%) rename tests/{ => v1}/product/us/bank_check/test_bank_check_v1.py (100%) rename tests/{ => v1}/product/us/bank_check/test_bank_check_v1_regression.py (93%) rename tests/{workflows => v1/product/us/healthcare_card}/__init__.py (100%) rename tests/{ => v1}/product/us/healthcare_card/test_healthcare_card_v1.py (100%) create mode 100644 tests/v1/product/us/us_mail/__init__.py rename tests/{ => v1}/product/us/us_mail/test_us_mail_v3.py (100%) rename tests/{ => v1}/test_cli.py (100%) rename tests/{ => v1}/test_client.py (99%) create mode 100644 tests/v1/workflows/__init__.py rename tests/{ => v1}/workflows/test_workflow.py (100%) rename tests/{ => v1}/workflows/test_workflow_integration.py (100%) rename tests/{ => v2}/test_client_v2.py (100%) rename tests/{ => v2}/test_client_v2_integration.py (100%) diff --git a/docs/extras/guide/bank_account_details_v2.md b/docs/extras/guide/bank_account_details_v2.md deleted file mode 100644 index 419fca28..00000000 --- a/docs/extras/guide/bank_account_details_v2.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: FR Bank Account Details OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-fr-bank-account-details-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Bank Account Details API](https://platform.mindee.com/mindee/bank_account_details). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/bank_account_details/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Bank Account Details sample](https://github.com/mindee/client-lib-test-data/blob/main/products/bank_account_details/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -result: PredictResponse = mindee_client.parse( - product.fr.BankAccountDetailsV2, - input_doc, -) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: bc8f7265-8dab-49fe-810c-d50049605578 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/bank_account_details v2.0 -:Rotation applied: Yes - -Prediction -========== -:Account Holder's Names: MME HEGALALDIA L ENVOL -:Basic Bank Account Number: - :Bank Code: 13335 - :Branch Code: 00040 - :Key: 06 - :Account Number: 08932891361 -:IBAN: FR7613335000400893289136106 -:SWIFT Code: CEPAFRPP333 - -Page Predictions -================ - -Page 0 ------- -:Account Holder's Names: MME HEGALALDIA L ENVOL -:Basic Bank Account Number: - :Bank Code: 13335 - :Branch Code: 00040 - :Key: 06 - :Account Number: 08932891361 -:IBAN: FR7613335000400893289136106 -:SWIFT Code: CEPAFRPP333 -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Basic Bank Account Number Field -Full extraction of BBAN, including: branch code, bank code, account and key. - -A `BankAccountDetailsV2Bban` implements the following attributes: - -* **bban_bank_code** (`str`): The BBAN bank code outputted as a string. -* **bban_branch_code** (`str`): The BBAN branch code outputted as a string. -* **bban_key** (`str`): The BBAN key outputted as a string. -* **bban_number** (`str`): The BBAN Account number outputted as a string. - -# Attributes -The following fields are extracted for Bank Account Details V2: - -## Account Holder's Names -**account_holders_names** ([StringField](#stringfield)): Full extraction of the account holders names. - -```py -print(result.document.inference.prediction.account_holders_names.value) -``` - -## Basic Bank Account Number -**bban** ([BankAccountDetailsV2Bban](#basic-bank-account-number-field)): Full extraction of BBAN, including: branch code, bank code, account and key. - -```py -print(result.document.inference.prediction.bban.value) -``` - -## IBAN -**iban** ([StringField](#stringfield)): Full extraction of the IBAN number. - -```py -print(result.document.inference.prediction.iban.value) -``` - -## SWIFT Code -**swift_code** ([StringField](#stringfield)): Full extraction of the SWIFT code. - -```py -print(result.document.inference.prediction.swift_code.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/bank_check_v1.md b/docs/extras/guide/bank_check_v1.md deleted file mode 100644 index c1df5d12..00000000 --- a/docs/extras/guide/bank_check_v1.md +++ /dev/null @@ -1,186 +0,0 @@ ---- -title: US Bank Check OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-us-bank-check-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Bank Check API](https://platform.mindee.com/mindee/bank_check). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/bank_check/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Bank Check sample](https://github.com/mindee/client-lib-test-data/blob/main/products/bank_check/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -result: PredictResponse = mindee_client.parse( - product.us.BankCheckV1, - input_doc, -) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: b9809586-57ae-4f84-a35d-a85b2be1f2a2 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/bank_check v1.0 -:Rotation applied: Yes - -Prediction -========== -:Check Issue Date: 2022-03-29 -:Amount: 15332.90 -:Payees: JOHN DOE - JANE DOE -:Routing Number: -:Account Number: 7789778136 -:Check Number: 0003401 - -Page Predictions -================ - -Page 0 ------- -:Check Position: Polygon with 21 points. -:Signature Positions: Polygon with 6 points. -:Check Issue Date: 2022-03-29 -:Amount: 15332.90 -:Payees: JOHN DOE - JANE DOE -:Routing Number: -:Account Number: 7789778136 -:Check Number: 0003401 -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### AmountField -The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - - -### PositionField -The position field `PositionField` does not implement all the basic `BaseField` attributes, only **bounding_box**, **polygon** and **page_id**. On top of these, it has access to: - -* **rectangle** (`[Point, Point, Point, Point]`): a Polygon with four points that may be oriented (even beyond canvas). -* **quadrangle** (`[Point, Point, Point, Point]`): a free polygon made up of four points. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Page-Level Fields -Some fields are constrained to the page level, and so will not be retrievable at document level. - -# Attributes -The following fields are extracted for Bank Check V1: - -## Account Number -**account_number** ([StringField](#stringfield)): The check payer's account number. - -```py -print(result.document.inference.prediction.account_number.value) -``` - -## Amount -**amount** ([AmountField](#amountfield)): The amount of the check. - -```py -print(result.document.inference.prediction.amount.value) -``` - -## Check Number -**check_number** ([StringField](#stringfield)): The issuer's check number. - -```py -print(result.document.inference.prediction.check_number.value) -``` - -## Check Position -[📄](#page-level-fields "This field is only present on individual pages.")**check_position** ([PositionField](#positionfield)): The position of the check on the document. - -```py -for check_position_elem in result.document.check_position: - print(check_position_elem.polygon) -``` - -## Check Issue Date -**date** ([DateField](#datefield)): The date the check was issued. - -```py -print(result.document.inference.prediction.date.value) -``` - -## Payees -**payees** (List[[StringField](#stringfield)]): List of the check's payees (recipients). - -```py -for payees_elem in result.document.inference.prediction.payees: - print(payees_elem.value) -``` - -## Routing Number -**routing_number** ([StringField](#stringfield)): The check issuer's routing number. - -```py -print(result.document.inference.prediction.routing_number.value) -``` - -## Signature Positions -[📄](#page-level-fields "This field is only present on individual pages.")**signatures_positions** (List[[PositionField](#positionfield)]): List of signature positions - -```py -for page in result.document.inference.pages: - for signatures_positions_elem in page.prediction.signatures_positions): - print(signatures_positions_elem.polygon) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/barcode_reader_v1.md b/docs/extras/guide/barcode_reader_v1.md deleted file mode 100644 index f007220e..00000000 --- a/docs/extras/guide/barcode_reader_v1.md +++ /dev/null @@ -1,113 +0,0 @@ ---- -title: Barcode Reader OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-barcode-reader-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Barcode Reader API](https://platform.mindee.com/mindee/barcode_reader). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/barcode_reader/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Barcode Reader sample](https://github.com/mindee/client-lib-test-data/blob/main/products/barcode_reader/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -result: PredictResponse = mindee_client.parse( - product.BarcodeReaderV1, - input_doc, -) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: f9c48da1-a306-4805-8da8-f7231fda2d88 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/barcode_reader v1.0 -:Rotation applied: Yes - -Prediction -========== -:Barcodes 1D: Mindee -:Barcodes 2D: https://developers.mindee.com/docs/barcode-reader-ocr - I love paperwork! - Said no one ever - -Page Predictions -================ - -Page 0 ------- -:Barcodes 1D: Mindee -:Barcodes 2D: https://developers.mindee.com/docs/barcode-reader-ocr - I love paperwork! - Said no one ever -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for Barcode Reader V1: - -## Barcodes 1D -**codes_1d** (List[[StringField](#stringfield)]): List of decoded 1D barcodes. - -```py -for codes_1d_elem in result.document.inference.prediction.codes_1d: - print(codes_1d_elem.value) -``` - -## Barcodes 2D -**codes_2d** (List[[StringField](#stringfield)]): List of decoded 2D barcodes. - -```py -for codes_2d_elem in result.document.inference.prediction.codes_2d: - print(codes_2d_elem.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/bill_of_lading_v1.md b/docs/extras/guide/bill_of_lading_v1.md deleted file mode 100644 index 6ced8301..00000000 --- a/docs/extras/guide/bill_of_lading_v1.md +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: Bill of Lading OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-bill-of-lading-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Bill of Lading API](https://platform.mindee.com/mindee/bill_of_lading). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/bill_of_lading/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Bill of Lading sample](https://github.com/mindee/client-lib-test-data/blob/main/products/bill_of_lading/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.BillOfLadingV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 3b5250a1-b52c-4e0b-bc3e-2f0146b04e29 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/bill_of_lading v1.1 -:Rotation applied: No - -Prediction -========== -:Bill of Lading Number: XYZ123456 -:Shipper: - :Address: 123 OCEAN DRIVE, SHANGHAI, CHINA - :Email: - :Name: GLOBAL FREIGHT SOLUTIONS INC. - :Phone: 86-21-12345678 -:Consignee: - :Address: 789 TRADE STREET, SINGAPORE 567890, SINGAPORE - :Email: - :Name: PACIFIC TRADING CO. - :Phone: 65-65432100 -:Notify Party: - :Address: 789 TRADE STREET, SINGAPORE 567890, SINGAPORE - :Email: - :Name: PACIFIC TRADING CO. - :Phone: 65-65432100 -:Carrier: - :Name: GLOBAL SHIPPING CO.,LTD. - :Professional Number: - :SCAC: -:Items: - +--------------------------------------+--------------+-------------+------------------+----------+-------------+ - | Description | Gross Weight | Measurement | Measurement Unit | Quantity | Weight Unit | - +======================================+==============+=============+==================+==========+=============+ - | ELECTRONIC COMPONENTS\nP/N: 12345... | 500.00 | 1.50 | cbm | 1.00 | kgs | - +--------------------------------------+--------------+-------------+------------------+----------+-------------+ -:Port of Loading: SHANGHAI, CHINA -:Port of Discharge: LOS ANGELES, USA -:Place of Delivery: LOS ANGELES, USA -:Date of issue: 2022-09-30 -:Departure Date: -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Carrier Field -The shipping company responsible for transporting the goods. - -A `BillOfLadingV1Carrier` implements the following attributes: - -* **name** (`str`): The name of the carrier. -* **professional_number** (`str`): The professional number of the carrier. -* **scac** (`str`): The Standard Carrier Alpha Code (SCAC) of the carrier. -Fields which are specific to this product; they are not used in any other product. - -### Consignee Field -The party to whom the goods are being shipped. - -A `BillOfLadingV1Consignee` implements the following attributes: - -* **address** (`str`): The address of the consignee. -* **email** (`str`): The email of the shipper. -* **name** (`str`): The name of the consignee. -* **phone** (`str`): The phone number of the consignee. -Fields which are specific to this product; they are not used in any other product. - -### Items Field -The goods being shipped. - -A `BillOfLadingV1CarrierItem` implements the following attributes: - -* **description** (`str`): A description of the item. -* **gross_weight** (`float`): The gross weight of the item. -* **measurement** (`float`): The measurement of the item. -* **measurement_unit** (`str`): The unit of measurement for the measurement. -* **quantity** (`float`): The quantity of the item being shipped. -* **weight_unit** (`str`): The unit of measurement for weights. -Fields which are specific to this product; they are not used in any other product. - -### Notify Party Field -The party to be notified of the arrival of the goods. - -A `BillOfLadingV1NotifyParty` implements the following attributes: - -* **address** (`str`): The address of the notify party. -* **email** (`str`): The email of the shipper. -* **name** (`str`): The name of the notify party. -* **phone** (`str`): The phone number of the notify party. -Fields which are specific to this product; they are not used in any other product. - -### Shipper Field -The party responsible for shipping the goods. - -A `BillOfLadingV1Shipper` implements the following attributes: - -* **address** (`str`): The address of the shipper. -* **email** (`str`): The email of the shipper. -* **name** (`str`): The name of the shipper. -* **phone** (`str`): The phone number of the shipper. - -# Attributes -The following fields are extracted for Bill of Lading V1: - -## Bill of Lading Number -**bill_of_lading_number** ([StringField](#stringfield)): A unique identifier assigned to a Bill of Lading document. - -```py -print(result.document.inference.prediction.bill_of_lading_number.value) -``` - -## Carrier -**carrier** ([BillOfLadingV1Carrier](#carrier-field)): The shipping company responsible for transporting the goods. - -```py -print(result.document.inference.prediction.carrier.value) -``` - -## Items -**carrier_items** (List[[BillOfLadingV1CarrierItem](#items-field)]): The goods being shipped. - -```py -for carrier_items_elem in result.document.inference.prediction.carrier_items: - print(carrier_items_elem.value) -``` - -## Consignee -**consignee** ([BillOfLadingV1Consignee](#consignee-field)): The party to whom the goods are being shipped. - -```py -print(result.document.inference.prediction.consignee.value) -``` - -## Date of issue -**date_of_issue** ([DateField](#datefield)): The date when the bill of lading is issued. - -```py -print(result.document.inference.prediction.date_of_issue.value) -``` - -## Departure Date -**departure_date** ([DateField](#datefield)): The date when the vessel departs from the port of loading. - -```py -print(result.document.inference.prediction.departure_date.value) -``` - -## Notify Party -**notify_party** ([BillOfLadingV1NotifyParty](#notify-party-field)): The party to be notified of the arrival of the goods. - -```py -print(result.document.inference.prediction.notify_party.value) -``` - -## Place of Delivery -**place_of_delivery** ([StringField](#stringfield)): The place where the goods are to be delivered. - -```py -print(result.document.inference.prediction.place_of_delivery.value) -``` - -## Port of Discharge -**port_of_discharge** ([StringField](#stringfield)): The port where the goods are unloaded from the vessel. - -```py -print(result.document.inference.prediction.port_of_discharge.value) -``` - -## Port of Loading -**port_of_loading** ([StringField](#stringfield)): The port where the goods are loaded onto the vessel. - -```py -print(result.document.inference.prediction.port_of_loading.value) -``` - -## Shipper -**shipper** ([BillOfLadingV1Shipper](#shipper-field)): The party responsible for shipping the goods. - -```py -print(result.document.inference.prediction.shipper.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/business_card_v1.md b/docs/extras/guide/business_card_v1.md deleted file mode 100644 index c88f24b2..00000000 --- a/docs/extras/guide/business_card_v1.md +++ /dev/null @@ -1,172 +0,0 @@ ---- -title: Business Card OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-business-card-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Business Card API](https://platform.mindee.com/mindee/business_card). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/business_card/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Business Card sample](https://github.com/mindee/client-lib-test-data/blob/main/products/business_card/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.BusinessCardV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 6f9a261f-7609-4687-9af0-46a45156566e -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/business_card v1.0 -:Rotation applied: Yes - -Prediction -========== -:Firstname: Andrew -:Lastname: Morin -:Job Title: Founder & CEO -:Company: RemoteGlobal -:Email: amorin@remoteglobalconsulting.com -:Phone Number: +14015555555 -:Mobile Number: +13015555555 -:Fax Number: +14015555556 -:Address: 178 Main Avenue, Providence, RI 02111 -:Website: www.remoteglobalconsulting.com -:Social Media: https://www.linkedin.com/in/johndoe - https://twitter.com/johndoe -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for Business Card V1: - -## Address -**address** ([StringField](#stringfield)): The address of the person. - -```py -print(result.document.inference.prediction.address.value) -``` - -## Company -**company** ([StringField](#stringfield)): The company the person works for. - -```py -print(result.document.inference.prediction.company.value) -``` - -## Email -**email** ([StringField](#stringfield)): The email address of the person. - -```py -print(result.document.inference.prediction.email.value) -``` - -## Fax Number -**fax_number** ([StringField](#stringfield)): The Fax number of the person. - -```py -print(result.document.inference.prediction.fax_number.value) -``` - -## Firstname -**firstname** ([StringField](#stringfield)): The given name of the person. - -```py -print(result.document.inference.prediction.firstname.value) -``` - -## Job Title -**job_title** ([StringField](#stringfield)): The job title of the person. - -```py -print(result.document.inference.prediction.job_title.value) -``` - -## Lastname -**lastname** ([StringField](#stringfield)): The lastname of the person. - -```py -print(result.document.inference.prediction.lastname.value) -``` - -## Mobile Number -**mobile_number** ([StringField](#stringfield)): The mobile number of the person. - -```py -print(result.document.inference.prediction.mobile_number.value) -``` - -## Phone Number -**phone_number** ([StringField](#stringfield)): The phone number of the person. - -```py -print(result.document.inference.prediction.phone_number.value) -``` - -## Social Media -**social_media** (List[[StringField](#stringfield)]): The social media profiles of the person or company. - -```py -for social_media_elem in result.document.inference.prediction.social_media: - print(social_media_elem.value) -``` - -## Website -**website** ([StringField](#stringfield)): The website of the person or company. - -```py -print(result.document.inference.prediction.website.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/carte_grise_v1.md b/docs/extras/guide/carte_grise_v1.md deleted file mode 100644 index e21db7f3..00000000 --- a/docs/extras/guide/carte_grise_v1.md +++ /dev/null @@ -1,465 +0,0 @@ ---- -title: FR Carte Grise OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-fr-carte-grise-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Carte Grise API](https://platform.mindee.com/mindee/carte_grise). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/carte_grise/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Carte Grise sample](https://github.com/mindee/client-lib-test-data/blob/main/products/carte_grise/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -result: PredictResponse = mindee_client.parse( - product.fr.CarteGriseV1, - input_doc, -) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 4443182b-57c1-4426-a288-01b94f226e84 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/carte_grise v1.1 -:Rotation applied: Yes - -Prediction -========== -:a: AB-123-CD -:b: 1998-01-05 -:c1: DUPONT YVES -:c3: 27 RUE DES ROITELETS 59169 FERIN LES BAINS FRANCE -:c41: 2 DELAROCHE -:c4a: EST LE PROPRIETAIRE DU VEHICULE -:d1: -:d3: MODELE -:e: VFS1V2009AS1V2009 -:f1: 1915 -:f2: 1915 -:f3: 1915 -:g: 3030 -:g1: 1307 -:i: 2009-12-04 -:j: N1 -:j1: VP -:j2: AA -:j3: CI -:p1: 1900 -:p2: 90 -:p3: GO -:p6: 6 -:q: 006 -:s1: 5 -:s2: -:u1: 77 -:u2: 3000 -:v7: 155 -:x1: 2011-07-06 -:y1: 17835 -:y2: -:y3: 0 -:y4: 4 -:y5: 2.5 -:y6: 178.35 -:Formula Number: 2009AS05284 -:Owner's First Name: YVES -:Owner's Surname: DUPONT -:MRZ Line 1: -:MRZ Line 2: CI< **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for Carte Grise V1: - -## a -**a** ([StringField](#stringfield)): The vehicle's license plate number. - -```py -print(result.document.inference.prediction.a.value) -``` - -## b -**b** ([DateField](#datefield)): The vehicle's first release date. - -```py -print(result.document.inference.prediction.b.value) -``` - -## c1 -**c1** ([StringField](#stringfield)): The vehicle owner's full name including maiden name. - -```py -print(result.document.inference.prediction.c1.value) -``` - -## c3 -**c3** ([StringField](#stringfield)): The vehicle owner's address. - -```py -print(result.document.inference.prediction.c3.value) -``` - -## c41 -**c41** ([StringField](#stringfield)): Number of owners of the license certificate. - -```py -print(result.document.inference.prediction.c41.value) -``` - -## c4a -**c4a** ([StringField](#stringfield)): Mentions about the ownership of the vehicle. - -```py -print(result.document.inference.prediction.c4a.value) -``` - -## d1 -**d1** ([StringField](#stringfield)): The vehicle's brand. - -```py -print(result.document.inference.prediction.d1.value) -``` - -## d3 -**d3** ([StringField](#stringfield)): The vehicle's commercial name. - -```py -print(result.document.inference.prediction.d3.value) -``` - -## e -**e** ([StringField](#stringfield)): The Vehicle Identification Number (VIN). - -```py -print(result.document.inference.prediction.e.value) -``` - -## f1 -**f1** ([StringField](#stringfield)): The vehicle's maximum admissible weight. - -```py -print(result.document.inference.prediction.f1.value) -``` - -## f2 -**f2** ([StringField](#stringfield)): The vehicle's maximum admissible weight within the license's state. - -```py -print(result.document.inference.prediction.f2.value) -``` - -## f3 -**f3** ([StringField](#stringfield)): The vehicle's maximum authorized weight with coupling. - -```py -print(result.document.inference.prediction.f3.value) -``` - -## Formula Number -**formula_number** ([StringField](#stringfield)): The document's formula number. - -```py -print(result.document.inference.prediction.formula_number.value) -``` - -## g -**g** ([StringField](#stringfield)): The vehicle's weight with coupling if tractor different than category M1. - -```py -print(result.document.inference.prediction.g.value) -``` - -## g1 -**g1** ([StringField](#stringfield)): The vehicle's national empty weight. - -```py -print(result.document.inference.prediction.g1.value) -``` - -## i -**i** ([DateField](#datefield)): The car registration date of the given certificate. - -```py -print(result.document.inference.prediction.i.value) -``` - -## j -**j** ([StringField](#stringfield)): The vehicle's category. - -```py -print(result.document.inference.prediction.j.value) -``` - -## j1 -**j1** ([StringField](#stringfield)): The vehicle's national type. - -```py -print(result.document.inference.prediction.j1.value) -``` - -## j2 -**j2** ([StringField](#stringfield)): The vehicle's body type (CE). - -```py -print(result.document.inference.prediction.j2.value) -``` - -## j3 -**j3** ([StringField](#stringfield)): The vehicle's body type (National designation). - -```py -print(result.document.inference.prediction.j3.value) -``` - -## MRZ Line 1 -**mrz1** ([StringField](#stringfield)): Machine Readable Zone, first line. - -```py -print(result.document.inference.prediction.mrz1.value) -``` - -## MRZ Line 2 -**mrz2** ([StringField](#stringfield)): Machine Readable Zone, second line. - -```py -print(result.document.inference.prediction.mrz2.value) -``` - -## Owner's First Name -**owner_first_name** ([StringField](#stringfield)): The vehicle's owner first name. - -```py -print(result.document.inference.prediction.owner_first_name.value) -``` - -## Owner's Surname -**owner_surname** ([StringField](#stringfield)): The vehicle's owner surname. - -```py -print(result.document.inference.prediction.owner_surname.value) -``` - -## p1 -**p1** ([StringField](#stringfield)): The vehicle engine's displacement (cm3). - -```py -print(result.document.inference.prediction.p1.value) -``` - -## p2 -**p2** ([StringField](#stringfield)): The vehicle's maximum net power (kW). - -```py -print(result.document.inference.prediction.p2.value) -``` - -## p3 -**p3** ([StringField](#stringfield)): The vehicle's fuel type or energy source. - -```py -print(result.document.inference.prediction.p3.value) -``` - -## p6 -**p6** ([StringField](#stringfield)): The vehicle's administrative power (fiscal horsepower). - -```py -print(result.document.inference.prediction.p6.value) -``` - -## q -**q** ([StringField](#stringfield)): The vehicle's power to weight ratio. - -```py -print(result.document.inference.prediction.q.value) -``` - -## s1 -**s1** ([StringField](#stringfield)): The vehicle's number of seats. - -```py -print(result.document.inference.prediction.s1.value) -``` - -## s2 -**s2** ([StringField](#stringfield)): The vehicle's number of standing rooms (person). - -```py -print(result.document.inference.prediction.s2.value) -``` - -## u1 -**u1** ([StringField](#stringfield)): The vehicle's sound level (dB). - -```py -print(result.document.inference.prediction.u1.value) -``` - -## u2 -**u2** ([StringField](#stringfield)): The vehicle engine's rotation speed (RPM). - -```py -print(result.document.inference.prediction.u2.value) -``` - -## v7 -**v7** ([StringField](#stringfield)): The vehicle's CO2 emission (g/km). - -```py -print(result.document.inference.prediction.v7.value) -``` - -## x1 -**x1** ([StringField](#stringfield)): Next technical control date. - -```py -print(result.document.inference.prediction.x1.value) -``` - -## y1 -**y1** ([StringField](#stringfield)): Amount of the regional proportional tax of the registration (in euros). - -```py -print(result.document.inference.prediction.y1.value) -``` - -## y2 -**y2** ([StringField](#stringfield)): Amount of the additional parafiscal tax of the registration (in euros). - -```py -print(result.document.inference.prediction.y2.value) -``` - -## y3 -**y3** ([StringField](#stringfield)): Amount of the additional CO2 tax of the registration (in euros). - -```py -print(result.document.inference.prediction.y3.value) -``` - -## y4 -**y4** ([StringField](#stringfield)): Amount of the fee for managing the registration (in euros). - -```py -print(result.document.inference.prediction.y4.value) -``` - -## y5 -**y5** ([StringField](#stringfield)): Amount of the fee for delivery of the registration certificate in euros. - -```py -print(result.document.inference.prediction.y5.value) -``` - -## y6 -**y6** ([StringField](#stringfield)): Total amount of registration fee to be paid in euros. - -```py -print(result.document.inference.prediction.y6.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/cli.md b/docs/extras/guide/cli.md deleted file mode 100644 index dd030729..00000000 --- a/docs/extras/guide/cli.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Command Line Interface -category: 622b805aaec68102ea7fcbc2 -slug: python-cli -parentDoc: 609808f773b0b90051d839de ---- -# Command Line Usage - -The CLI tool is provided mainly for quick tests and debugging. - -## General help - -```shell -python3 -m mindee --help -``` - -## Example command help - -```shell -python3 -m mindee parse --help -``` - -## Example parse command for Off-the-Shelf document - -```shell -python3 -m mindee parse invoice --key xxxxxxx /path/to/invoice.pdf -``` - -## Works with environment variables - -```shell -export MINDEE_API_KEY=xxxxxx -python3 -m mindee parse invoice /path/to/invoice.pdf -``` - -## Example parse command for a custom document - -```shell -python3 -m mindee parse custom -a pikachu -k xxxxxxx pokemon_card /path/to/card.jpg -``` - -## Example async parse command - -```shell -python3 -m mindee parse invoice-splitter -``` - - -## Full parsed output - -```shell -python3 -m mindee invoice -o parsed /path/to/invoice.pdf -``` - -## Running the script through shell - -A helper script allows you to start the command directly: - -```shell -./mindee-cli.sh -h -``` - - -**Questions?** -Slack Logo Icon  [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/cropper_v1.md b/docs/extras/guide/cropper_v1.md deleted file mode 100644 index 32674167..00000000 --- a/docs/extras/guide/cropper_v1.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: Cropper OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-cropper-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Cropper API](https://platform.mindee.com/mindee/cropper). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/cropper/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Cropper sample](https://github.com/mindee/client-lib-test-data/blob/main/products/cropper/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -result: PredictResponse = mindee_client.parse( - product.CropperV1, - input_doc, -) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 149ce775-8302-4798-8649-7eda9fb84a1a -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/cropper v1.0 -:Rotation applied: No - -Prediction -========== - -Page Predictions -================ - -Page 0 ------- -:Document Cropper: Polygon with 26 points. - Polygon with 25 points. -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### PositionField -The position field `PositionField` does not implement all the basic `BaseField` attributes, only **bounding_box**, **polygon** and **page_id**. On top of these, it has access to: - -* **rectangle** (`[Point, Point, Point, Point]`): a Polygon with four points that may be oriented (even beyond canvas). -* **quadrangle** (`[Point, Point, Point, Point]`): a free polygon made up of four points. - -## Page-Level Fields -Some fields are constrained to the page level, and so will not be retrievable at document level. - -# Attributes -The following fields are extracted for Cropper V1: - -## Document Cropper -[📄](#page-level-fields "This field is only present on individual pages.")**cropping** (List[[PositionField](#positionfield)]): List of documents found in the image. - -```py -for page in result.document.inference.pages: - for cropping_elem in page.prediction.cropping): - print(cropping_elem.polygon) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/custom_v1.md b/docs/extras/guide/custom_v1.md deleted file mode 100644 index 6adf73ba..00000000 --- a/docs/extras/guide/custom_v1.md +++ /dev/null @@ -1,155 +0,0 @@ ---- -title: Custom API Python (Deprecated) -category: 622b805aaec68102ea7fcbc2 -slug: python-api-builder -parentDoc: 609808f773b0b90051d839de ---- -> 🚧 This product is still supported, but is considered to be deprecated. If you are looking for the docTI API documentation, you can find it [here](https://developers.mindee.com/docs/generated-api-python). - -# Quick-Start - -```python -from mindee import Client, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Add your custom endpoint (document) -my_endpoint = mindee_client.create_endpoint( - account_name="my-account", - endpoint_name="my-endpoint", -) - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Parse the file. -# The endpoint must be specified since it cannot be determined from the class. -result = mindee_client.parse( - product.CustomV1, - input_doc, - endpoint=my_endpoint -) - -# Print a brief summary of the parsed data -print(result.document) - -# Iterate over all the fields in the document -for field_name, field_values in result.document.fields.items(): - print(field_name, "=", field_values) -``` - -# Custom Endpoints - -You may have noticed in the previous step that in order to access a custom build, you will need to provide an account and an endpoint name at the very least. - -Although it is optional, the version number should match the latest version of your build in most use-cases. -If it is not set, it will default to "1". - - -# Field Types - -## Custom Fields - -### List Field - -A `ListField` is a special type of custom list that implements the following: - -* **confidence** (`float`): the confidence score of the field prediction. -* **reconstructed** (`bool`): indicates whether or not an object was reconstructed (not extracted as the API gave it). -* **values** (`List[`[ListFieldValue](#list-field-value)`]`): list of value fields - -Since the inner contents can vary, the value isn't accessed through a property, but rather through the following functions: -* **contents_list()** (`-> List[Union[str, float]]`): returns a list of values for each element. -* **contents_string(separator=" ")** (`-> str`): returns a list of concatenated values, with an optional **separator** `str` between them. -> **Note:** the `str()` method returns a string representation of all values of this object, with an empty space between each of them. - - -#### List Field Value - -Values of `ListField`s are stored in a `ListFieldValue` structure, which is implemented as follows: -* **content** (`str`): extracted content of the prediction -* **confidence** (`float`): the confidence score of the prediction -* **bounding_box** (`BBox`): 4 relative vertices corrdinates of a rectangle containing the word in the document. -* **polygon** (`Polygon`): vertices of a polygon containing the word. -* **page_id** (`int`): the ID of the page, is `None` when at document-level. - - -### Classification Field - -A `ClassificationField` is a special type of custom classification that implements the following: - -* **value** (`str`): the value of the classification. Corresponds to one of the values specified during training. -* **confidence** (`float`): the confidence score of the field prediction. -> **Note:** the `str()` method returns a string representation of all values of this object, with an empty space between each of them. - -# Attributes - -Custom builds always have access to at least two attributes: - -## Fields - -**fields** (Dict[`str`: List[ListField](#list-field)]): - -```python -print(str(result.document.inference.prediction.fields["my-field"])) -``` - -## Classifications - -**classifications** ([`str`: List[ClassificationField](#classification-field)]): The purchase category among predefined classes. - -```python -print(str(result.document.inference.prediction.classifications["my-classification"])) -``` - - -# 🧪 Custom Line Items - -> **⚠️ Warning**: Custom Line Items are an **experimental** feature, results may vary. - - -Though not supported directly in the API, sometimes you might need to reconstitute line items by hand. -The library provides a tool for this very purpose: - -## columns_to_line_items() -The **columns_to_line_items()** function can be called from the document and page level prediction objects. - -It takes the following arguments: - -* **anchor_names** (`List[str]`): a list of the names of possible anchor (field) candidate for the horizontal placement a line. If all provided anchors are invalid, the `CustomLine` won't be built. -* **field_names** (`List[str]`): a list of fields to retrieve the values from -* **height_tolerance** (`float`): Optional, the height tolerance used to build the line. It helps when the height of a line can vary unexpectedly. - -Example use: - -```python -# document-level -response.document.inference.prediction.columns_to_line_items( - anchor_names, - field_names, - 0.011 # optional, defaults to 0.01 -) - -# page-level -response.document.pages[0].prediction.columns_to_line_items( - anchor_names, - field_names, - 0.011 # optional, defaults to 0.01 -) -``` - -It returns a list of [CustomLine](#CustomLine) objects. - -## CustomLine - -`CustomLine` represents a line as it has been read from column fields. It has the following attributes: - -* **row_number** (`int`): Number of a given line. Starts at 1. -* **fields** (`Dict[str, ListFieldValue]`[]): List of the fields associated with the line, indexed by their column name. -* **bbox** (`BBox`): Simple bounding box of the current line representing the 4 minimum & maximum coordinates as `float` values. - - -# Questions? - -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/delivery_notes_v1.md b/docs/extras/guide/delivery_notes_v1.md deleted file mode 100644 index 0bf18f20..00000000 --- a/docs/extras/guide/delivery_notes_v1.md +++ /dev/null @@ -1,147 +0,0 @@ ---- -title: Delivery note OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-delivery-note-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Delivery note API](https://platform.mindee.com/mindee/delivery_notes). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/delivery_notes/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Delivery note sample](https://github.com/mindee/client-lib-test-data/blob/main/products/delivery_notes/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.DeliveryNoteV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: d5ead821-edec-4d31-a69a-cf3998d9a506 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/delivery_notes v1.0 -:Rotation applied: Yes - -Prediction -========== -:Delivery Date: 2019-10-02 -:Delivery Number: INT-001 -:Supplier Name: John Smith -:Supplier Address: 4490 Oak Drive, Albany, NY 12210 -:Customer Name: Jessie M Horne -:Customer Address: 4312 Wood Road, New York, NY 10031 -:Total Amount: 204.75 -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### AmountField -The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for Delivery note V1: - -## Customer Address -**customer_address** ([StringField](#stringfield)): The address of the customer receiving the goods. - -```py -print(result.document.inference.prediction.customer_address.value) -``` - -## Customer Name -**customer_name** ([StringField](#stringfield)): The name of the customer receiving the goods. - -```py -print(result.document.inference.prediction.customer_name.value) -``` - -## Delivery Date -**delivery_date** ([DateField](#datefield)): The date on which the delivery is scheduled to arrive. - -```py -print(result.document.inference.prediction.delivery_date.value) -``` - -## Delivery Number -**delivery_number** ([StringField](#stringfield)): A unique identifier for the delivery note. - -```py -print(result.document.inference.prediction.delivery_number.value) -``` - -## Supplier Address -**supplier_address** ([StringField](#stringfield)): The address of the supplier providing the goods. - -```py -print(result.document.inference.prediction.supplier_address.value) -``` - -## Supplier Name -**supplier_name** ([StringField](#stringfield)): The name of the supplier providing the goods. - -```py -print(result.document.inference.prediction.supplier_name.value) -``` - -## Total Amount -**total_amount** ([AmountField](#amountfield)): The total monetary value of the goods being delivered. - -```py -print(result.document.inference.prediction.total_amount.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/driver_license_v1.md b/docs/extras/guide/driver_license_v1.md deleted file mode 100644 index 059b7795..00000000 --- a/docs/extras/guide/driver_license_v1.md +++ /dev/null @@ -1,191 +0,0 @@ ---- -title: Driver License OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-driver-license-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Driver License API](https://platform.mindee.com/mindee/driver_license). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/driver_license/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Driver License sample](https://github.com/mindee/client-lib-test-data/blob/main/products/driver_license/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.DriverLicenseV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: fbdeae38-ada3-43ac-aa58-e01a3d47e474 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/driver_license v1.0 -:Rotation applied: Yes - -Prediction -========== -:Country Code: USA -:State: AZ -:ID: D12345678 -:Category: D -:Last Name: Sample -:First Name: Jelani -:Date of Birth: 1957-02-01 -:Place of Birth: -:Expiry Date: 2018-02-01 -:Issued Date: 2013-01-10 -:Issuing Authority: -:MRZ: -:DD Number: DD1234567890123456 -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for Driver License V1: - -## Category -**category** ([StringField](#stringfield)): The category or class of the driver license. - -```py -print(result.document.inference.prediction.category.value) -``` - -## Country Code -**country_code** ([StringField](#stringfield)): The alpha-3 ISO 3166 code of the country where the driver license was issued. - -```py -print(result.document.inference.prediction.country_code.value) -``` - -## Date of Birth -**date_of_birth** ([DateField](#datefield)): The date of birth of the driver license holder. - -```py -print(result.document.inference.prediction.date_of_birth.value) -``` - -## DD Number -**dd_number** ([StringField](#stringfield)): The DD number of the driver license. - -```py -print(result.document.inference.prediction.dd_number.value) -``` - -## Expiry Date -**expiry_date** ([DateField](#datefield)): The expiry date of the driver license. - -```py -print(result.document.inference.prediction.expiry_date.value) -``` - -## First Name -**first_name** ([StringField](#stringfield)): The first name of the driver license holder. - -```py -print(result.document.inference.prediction.first_name.value) -``` - -## ID -**id** ([StringField](#stringfield)): The unique identifier of the driver license. - -```py -print(result.document.inference.prediction.id.value) -``` - -## Issued Date -**issued_date** ([DateField](#datefield)): The date when the driver license was issued. - -```py -print(result.document.inference.prediction.issued_date.value) -``` - -## Issuing Authority -**issuing_authority** ([StringField](#stringfield)): The authority that issued the driver license. - -```py -print(result.document.inference.prediction.issuing_authority.value) -``` - -## Last Name -**last_name** ([StringField](#stringfield)): The last name of the driver license holder. - -```py -print(result.document.inference.prediction.last_name.value) -``` - -## MRZ -**mrz** ([StringField](#stringfield)): The Machine Readable Zone (MRZ) of the driver license. - -```py -print(result.document.inference.prediction.mrz.value) -``` - -## Place of Birth -**place_of_birth** ([StringField](#stringfield)): The place of birth of the driver license holder. - -```py -print(result.document.inference.prediction.place_of_birth.value) -``` - -## State -**state** ([StringField](#stringfield)): Second part of the ISO 3166-2 code, consisting of two letters indicating the US State. - -```py -print(result.document.inference.prediction.state.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/energy_bill_fra_v1.md b/docs/extras/guide/energy_bill_fra_v1.md deleted file mode 100644 index 7f3ad92b..00000000 --- a/docs/extras/guide/energy_bill_fra_v1.md +++ /dev/null @@ -1,318 +0,0 @@ ---- -title: FR Energy Bill OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-fr-energy-bill-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Energy Bill API](https://platform.mindee.com/mindee/energy_bill_fra). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/energy_bill_fra/default_sample.pdf), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Energy Bill sample](https://github.com/mindee/client-lib-test-data/blob/main/products/energy_bill_fra/default_sample.pdf?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.fr.EnergyBillV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: ff1f2ca8-4d29-44d8-a564-599a982a4ef7 -:Filename: default_sample.pdf - -Inference -######### -:Product: mindee/energy_bill_fra v1.2 -:Rotation applied: Yes - -Prediction -========== -:Invoice Number: 1234567890 -:Contract ID: 9876543210 -:Delivery Point: 98765432109876 -:Invoice Date: 2021-01-29 -:Due Date: 2021-02-15 -:Total Before Taxes: 1241.03 -:Total Taxes: 238.82 -:Total Amount: 1479.85 -:Energy Supplier: - :Address: TSA 12345, 12345 DEMOCITY CEDEX - :Name: EDF -:Energy Consumer: - :Address: 123 RUE DE L'IMAGINAIRE, 75001 PARIS - :Name: JOHN DOE -:Subscription: - +--------------------------------------+------------+------------+----------+-----------+------------+ - | Description | End Date | Start Date | Tax Rate | Total | Unit Price | - +======================================+============+============+==========+===========+============+ - | Abonnement électricité | 2021-02-28 | 2021-01-01 | 5.50 | 59.00 | 29.50 | - +--------------------------------------+------------+------------+----------+-----------+------------+ -:Energy Usage: - +-------------+--------------------------------------+------------+------------+----------+-----------+-----------------+------------+ - | Consumption | Description | End Date | Start Date | Tax Rate | Total | Unit of Measure | Unit Price | - +=============+======================================+============+============+==========+===========+=================+============+ - | 8581.00 | Consommation électricité | 2021-01-27 | 2020-11-28 | 20.00 | 898.43 | kWh | 0.1047 | - +-------------+--------------------------------------+------------+------------+----------+-----------+-----------------+------------+ -:Taxes and Contributions: - +--------------------------------------+------------+------------+----------+-----------+------------+ - | Description | End Date | Start Date | Tax Rate | Total | Unit Price | - +======================================+============+============+==========+===========+============+ - | Contribution au Service Public de... | 2021-01-27 | 2020-11-28 | 20.00 | 193.07 | 0.0225 | - +--------------------------------------+------------+------------+----------+-----------+------------+ - | Taxe Départementale sur la Conso ... | 2021-01-27 | 2020-11-28 | 20.00 | 13.98 | 0.003315 | - +--------------------------------------+------------+------------+----------+-----------+------------+ - | Taxe Communale sur la Conso Final... | 2021-01-27 | 2020-11-28 | 20.00 | 28.56 | 0.006545 | - +--------------------------------------+------------+------------+----------+-----------+------------+ - | Taxe Communale sur la Conso Final... | 2021-01-27 | 2020-11-28 | 20.00 | 27.96 | 0.00663 | - +--------------------------------------+------------+------------+----------+-----------+------------+ -:Meter Details: - :Meter Number: 620 - :Meter Type: electricity - :Unit of Power: 36kVA -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### AmountField -The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Energy Consumer Field -The entity that consumes the energy. - -A `EnergyBillV1EnergyConsumer` implements the following attributes: - -* **address** (`str`): The address of the energy consumer. -* **name** (`str`): The name of the energy consumer. -Fields which are specific to this product; they are not used in any other product. - -### Energy Supplier Field -The company that supplies the energy. - -A `EnergyBillV1EnergySupplier` implements the following attributes: - -* **address** (`str`): The address of the energy supplier. -* **name** (`str`): The name of the energy supplier. -Fields which are specific to this product; they are not used in any other product. - -### Energy Usage Field -Details of energy consumption. - -A `EnergyBillV1EnergyUsage` implements the following attributes: - -* **consumption** (`float`): The price per unit of energy consumed. -* **description** (`str`): Description or details of the energy usage. -* **end_date** (`str`): The end date of the energy usage. -* **start_date** (`str`): The start date of the energy usage. -* **tax_rate** (`float`): The rate of tax applied to the total cost. -* **total** (`float`): The total cost of energy consumed. -* **unit** (`str`): The unit of measurement for energy consumption. - -#### Possible values include: - - kWh - - m3 - - L - -* **unit_price** (`float`): The price per unit of energy consumed. -Fields which are specific to this product; they are not used in any other product. - -### Meter Details Field -Information about the energy meter. - -A `EnergyBillV1MeterDetail` implements the following attributes: - -* **meter_number** (`str`): The unique identifier of the energy meter. -* **meter_type** (`str`): The type of energy meter. - -#### Possible values include: - - electricity - - gas - - water - - None - -* **unit** (`str`): The unit of power for energy consumption. -Fields which are specific to this product; they are not used in any other product. - -### Subscription Field -The subscription details fee for the energy service. - -A `EnergyBillV1Subscription` implements the following attributes: - -* **description** (`str`): Description or details of the subscription. -* **end_date** (`str`): The end date of the subscription. -* **start_date** (`str`): The start date of the subscription. -* **tax_rate** (`float`): The rate of tax applied to the total cost. -* **total** (`float`): The total cost of subscription. -* **unit_price** (`float`): The price per unit of subscription. -Fields which are specific to this product; they are not used in any other product. - -### Taxes and Contributions Field -Details of Taxes and Contributions. - -A `EnergyBillV1TaxesAndContribution` implements the following attributes: - -* **description** (`str`): Description or details of the Taxes and Contributions. -* **end_date** (`str`): The end date of the Taxes and Contributions. -* **start_date** (`str`): The start date of the Taxes and Contributions. -* **tax_rate** (`float`): The rate of tax applied to the total cost. -* **total** (`float`): The total cost of Taxes and Contributions. -* **unit_price** (`float`): The price per unit of Taxes and Contributions. - -# Attributes -The following fields are extracted for Energy Bill V1: - -## Contract ID -**contract_id** ([StringField](#stringfield)): The unique identifier associated with a specific contract. - -```py -print(result.document.inference.prediction.contract_id.value) -``` - -## Delivery Point -**delivery_point** ([StringField](#stringfield)): The unique identifier assigned to each electricity or gas consumption point. It specifies the exact location where the energy is delivered. - -```py -print(result.document.inference.prediction.delivery_point.value) -``` - -## Due Date -**due_date** ([DateField](#datefield)): The date by which the payment for the energy invoice is due. - -```py -print(result.document.inference.prediction.due_date.value) -``` - -## Energy Consumer -**energy_consumer** ([EnergyBillV1EnergyConsumer](#energy-consumer-field)): The entity that consumes the energy. - -```py -print(result.document.inference.prediction.energy_consumer.value) -``` - -## Energy Supplier -**energy_supplier** ([EnergyBillV1EnergySupplier](#energy-supplier-field)): The company that supplies the energy. - -```py -print(result.document.inference.prediction.energy_supplier.value) -``` - -## Energy Usage -**energy_usage** (List[[EnergyBillV1EnergyUsage](#energy-usage-field)]): Details of energy consumption. - -```py -for energy_usage_elem in result.document.inference.prediction.energy_usage: - print(energy_usage_elem.value) -``` - -## Invoice Date -**invoice_date** ([DateField](#datefield)): The date when the energy invoice was issued. - -```py -print(result.document.inference.prediction.invoice_date.value) -``` - -## Invoice Number -**invoice_number** ([StringField](#stringfield)): The unique identifier of the energy invoice. - -```py -print(result.document.inference.prediction.invoice_number.value) -``` - -## Meter Details -**meter_details** ([EnergyBillV1MeterDetail](#meter-details-field)): Information about the energy meter. - -```py -print(result.document.inference.prediction.meter_details.value) -``` - -## Subscription -**subscription** (List[[EnergyBillV1Subscription](#subscription-field)]): The subscription details fee for the energy service. - -```py -for subscription_elem in result.document.inference.prediction.subscription: - print(subscription_elem.value) -``` - -## Taxes and Contributions -**taxes_and_contributions** (List[[EnergyBillV1TaxesAndContribution](#taxes-and-contributions-field)]): Details of Taxes and Contributions. - -```py -for taxes_and_contributions_elem in result.document.inference.prediction.taxes_and_contributions: - print(taxes_and_contributions_elem.value) -``` - -## Total Amount -**total_amount** ([AmountField](#amountfield)): The total amount to be paid for the energy invoice. - -```py -print(result.document.inference.prediction.total_amount.value) -``` - -## Total Before Taxes -**total_before_taxes** ([AmountField](#amountfield)): The total amount to be paid for the energy invoice before taxes. - -```py -print(result.document.inference.prediction.total_before_taxes.value) -``` - -## Total Taxes -**total_taxes** ([AmountField](#amountfield)): Total of taxes applied to the invoice. - -```py -print(result.document.inference.prediction.total_taxes.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/expense_receipts_v5.md b/docs/extras/guide/expense_receipts_v5.md deleted file mode 100644 index 61cb79be..00000000 --- a/docs/extras/guide/expense_receipts_v5.md +++ /dev/null @@ -1,385 +0,0 @@ ---- -title: Receipt OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-receipt-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Receipt API](https://platform.mindee.com/mindee/expense_receipts). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/expense_receipts/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Receipt sample](https://github.com/mindee/client-lib-test-data/blob/main/products/expense_receipts/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -result: PredictResponse = mindee_client.parse( - product.ReceiptV5, - input_doc, -) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -You can also call this product asynchronously: - -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.ReceiptV5, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: d96fb043-8fb8-4adc-820c-387aae83376d -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/expense_receipts v5.3 -:Rotation applied: Yes - -Prediction -========== -:Expense Locale: en-GB; en; GB; GBP; -:Purchase Category: food -:Purchase Subcategory: restaurant -:Document Type: EXPENSE RECEIPT -:Purchase Date: 2016-02-26 -:Purchase Time: 15:20 -:Total Amount: 10.20 -:Total Net: 8.50 -:Total Tax: 1.70 -:Tip and Gratuity: -:Taxes: - +---------------+--------+----------+---------------+ - | Base | Code | Rate (%) | Amount | - +===============+========+==========+===============+ - | 8.50 | VAT | 20.00 | 1.70 | - +---------------+--------+----------+---------------+ -:Supplier Name: Clachan -:Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895 - Type: VAT NUMBER, Value: 232153895 -:Supplier Address: 34 Kingley Street W1B 50H -:Supplier Phone Number: 02074940834 -:Receipt Number: 54/7500 -:Line Items: - +--------------------------------------+----------+--------------+------------+ - | Description | Quantity | Total Amount | Unit Price | - +======================================+==========+==============+============+ - | Meantime Pale | 2.00 | 10.20 | | - +--------------------------------------+----------+--------------+------------+ - -Page Predictions -================ - -Page 0 ------- -:Expense Locale: en-GB; en; GB; GBP; -:Purchase Category: food -:Purchase Subcategory: restaurant -:Document Type: EXPENSE RECEIPT -:Purchase Date: 2016-02-26 -:Purchase Time: 15:20 -:Total Amount: 10.20 -:Total Net: 8.50 -:Total Tax: 1.70 -:Tip and Gratuity: -:Taxes: - +---------------+--------+----------+---------------+ - | Base | Code | Rate (%) | Amount | - +===============+========+==========+===============+ - | 8.50 | VAT | 20.00 | 1.70 | - +---------------+--------+----------+---------------+ -:Supplier Name: Clachan -:Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895 - Type: VAT NUMBER, Value: 232153895 -:Supplier Address: 34 Kingley Street W1B 50H -:Supplier Phone Number: 02074940834 -:Receipt Number: 54/7500 -:Line Items: - +--------------------------------------+----------+--------------+------------+ - | Description | Quantity | Total Amount | Unit Price | - +======================================+==========+==============+============+ - | Meantime Pale | 2.00 | 10.20 | | - +--------------------------------------+----------+--------------+------------+ -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### AmountField -The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. - - -### ClassificationField -The classification field `ClassificationField` does not implement all the basic `BaseField` attributes. It only implements **value**, **confidence** and **page_id**. - -> Note: a classification field's `value is always a `str`. - - -### CompanyRegistrationField -Aside from the basic `BaseField` attributes, the company registration field `CompanyRegistrationField` also implements the following: - -* **type** (`str`): the type of company. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### LocaleField -The locale field `LocaleField` only implements the **value**, **confidence** and **page_id** base `BaseField` attributes, but it comes with its own: - -* **language** (`str`): ISO 639-1 language code (e.g.: `en` for English). Can be `None`. -* **country** (`str`): ISO 3166-1 alpha-2 or ISO 3166-1 alpha-3 code for countries (e.g.: `GRB` or `GB` for "Great Britain"). Can be `None`. -* **currency** (`str`): ISO 4217 code for currencies (e.g.: `USD` for "US Dollars"). Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -### TaxesField -#### Tax -Aside from the basic `BaseField` attributes, the tax field `TaxField` also implements the following: - -* **rate** (`float`): the tax rate applied to an item expressed as a percentage. Can be `None`. -* **code** (`str`): tax code (or equivalent, depending on the origin of the document). Can be `None`. -* **basis** (`float`): base amount used for the tax. Can be `None`. -* **value** (`float`): the value of the tax. Can be `None`. - -> Note: currently `TaxField` is not used on its own, and is accessed through a parent `Taxes` object, a list-like structure. - -#### Taxes (Array) -The `Taxes` field represents a list-like collection of `TaxField` objects. As it is the representation of several objects, it has access to a custom `__str__` method that can render a `TaxField` object as a table line. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Line Items Field -List of all line items on the receipt. - -A `ReceiptV5LineItem` implements the following attributes: - -* **description** (`str`): The item description. -* **quantity** (`float`): The item quantity. -* **total_amount** (`float`): The item total amount. -* **unit_price** (`float`): The item unit price. - -# Attributes -The following fields are extracted for Receipt V5: - -## Purchase Category -**category** ([ClassificationField](#classificationfield)): The purchase category of the receipt. - -#### Possible values include: - - 'toll' - - 'food' - - 'parking' - - 'transport' - - 'accommodation' - - 'gasoline' - - 'telecom' - - 'miscellaneous' - - 'software' - - 'shopping' - - 'energy' - -```py -print(result.document.inference.prediction.category.value) -``` - -## Purchase Date -**date** ([DateField](#datefield)): The date the purchase was made. - -```py -print(result.document.inference.prediction.date.value) -``` - -## Document Type -**document_type** ([ClassificationField](#classificationfield)): The type of receipt: EXPENSE RECEIPT or CREDIT CARD RECEIPT. - -#### Possible values include: - - 'EXPENSE RECEIPT' - - 'CREDIT CARD RECEIPT' - -```py -print(result.document.inference.prediction.document_type.value) -``` - -## Line Items -**line_items** (List[[ReceiptV5LineItem](#line-items-field)]): List of all line items on the receipt. - -```py -for line_items_elem in result.document.inference.prediction.line_items: - print(line_items_elem) -``` - -## Expense Locale -**locale** ([LocaleField](#localefield)): The locale of the document. - -```py -print(result.document.inference.prediction.locale.value) -``` - -## Receipt Number -**receipt_number** ([StringField](#stringfield)): The receipt number or identifier. - -```py -print(result.document.inference.prediction.receipt_number.value) -``` - -## Purchase Subcategory -**subcategory** ([ClassificationField](#classificationfield)): The purchase subcategory of the receipt for transport and food. - -#### Possible values include: - - 'plane' - - 'taxi' - - 'train' - - 'restaurant' - - 'shopping' - - 'other' - - 'groceries' - - 'cultural' - - 'electronics' - - 'office_supplies' - - 'micromobility' - - 'car_rental' - - 'public' - - 'delivery' - - None - -```py -print(result.document.inference.prediction.subcategory.value) -``` - -## Supplier Address -**supplier_address** ([StringField](#stringfield)): The address of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_address.value) -``` - -## Supplier Company Registrations -**supplier_company_registrations** (List[[CompanyRegistrationField](#companyregistrationfield)]): List of company registration numbers associated to the supplier. - -```py -for supplier_company_registrations_elem in result.document.inference.prediction.supplier_company_registrations: - print(supplier_company_registrations_elem.value) -``` - -## Supplier Name -**supplier_name** ([StringField](#stringfield)): The name of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_name.value) -``` - -## Supplier Phone Number -**supplier_phone_number** ([StringField](#stringfield)): The phone number of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_phone_number.value) -``` - -## Taxes -**taxes** (List[[TaxField](#taxes)]): The list of taxes present on the receipt. - -```py -for taxes_elem in result.document.inference.prediction.taxes: - print(taxes_elem.polygon) -``` - -## Purchase Time -**time** ([StringField](#stringfield)): The time the purchase was made. - -```py -print(result.document.inference.prediction.time.value) -``` - -## Tip and Gratuity -**tip** ([AmountField](#amountfield)): The total amount of tip and gratuity. - -```py -print(result.document.inference.prediction.tip.value) -``` - -## Total Amount -**total_amount** ([AmountField](#amountfield)): The total amount paid: includes taxes, discounts, fees, tips, and gratuity. - -```py -print(result.document.inference.prediction.total_amount.value) -``` - -## Total Net -**total_net** ([AmountField](#amountfield)): The net amount paid: does not include taxes, fees, and discounts. - -```py -print(result.document.inference.prediction.total_net.value) -``` - -## Total Tax -**total_tax** ([AmountField](#amountfield)): The sum of all taxes. - -```py -print(result.document.inference.prediction.total_tax.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/financial_document_v1.md b/docs/extras/guide/financial_document_v1.md deleted file mode 100644 index 062684cf..00000000 --- a/docs/extras/guide/financial_document_v1.md +++ /dev/null @@ -1,578 +0,0 @@ ---- -title: Financial Document OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-financial-document-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Financial Document API](https://platform.mindee.com/mindee/financial_document). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/financial_document/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Financial Document sample](https://github.com/mindee/client-lib-test-data/blob/main/products/financial_document/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -result: PredictResponse = mindee_client.parse( - product.FinancialDocumentV1, - input_doc, -) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -You can also call this product asynchronously: - -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.FinancialDocumentV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 6dd26385-719b-4527-bf6f-87d9da619de5 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/financial_document v1.14 -:Rotation applied: Yes - -Prediction -========== -:Locale: en-US; en; US; USD; -:Invoice Number: INT-001 -:Purchase Order Number: 2412/2019 -:Receipt Number: -:Document Number: INT-001 -:Reference Numbers: 2412/2019 -:Purchase Date: 2019-11-02 -:Due Date: 2019-11-17 -:Payment Date: 2019-11-17 -:Total Net: 195.00 -:Total Amount: 204.75 -:Taxes: - +---------------+--------+----------+---------------+ - | Base | Code | Rate (%) | Amount | - +===============+========+==========+===============+ - | 195.00 | | 5.00 | 9.75 | - +---------------+--------+----------+---------------+ -:Supplier Payment Details: -:Supplier Name: JOHN SMITH -:Supplier Company Registrations: -:Supplier Address: 4490 Oak Drive Albany, NY 12210 -:Supplier Phone Number: -:Customer Name: JESSIE M HORNE -:Supplier Website: -:Supplier Email: -:Customer Company Registrations: -:Customer Address: 2019 Redbud Drive New York, NY 10011 -:Customer ID: 1234567890 -:Shipping Address: 2019 Redbud Drive New York, NY 10011 -:Billing Address: 4312 Wood Road New York, NY 10031 -:Document Type: INVOICE -:Document Type Extended: INVOICE -:Purchase Subcategory: -:Purchase Category: miscellaneous -:Total Tax: 9.75 -:Tip and Gratuity: -:Purchase Time: -:Line Items: - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price | - +======================================+==============+==========+============+==============+==============+=================+============+ - | Front and rear brake cables | | 1.00 | | | 100.00 | | 100.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | New set of pedal arms | | 2.00 | | | 50.00 | | 25.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | Labor 3hrs | | 3.00 | | | 45.00 | | 15.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - -Page Predictions -================ - -Page 0 ------- -:Locale: en-US; en; US; USD; -:Invoice Number: INT-001 -:Purchase Order Number: 2412/2019 -:Receipt Number: -:Document Number: INT-001 -:Reference Numbers: 2412/2019 -:Purchase Date: 2019-11-02 -:Due Date: 2019-11-17 -:Payment Date: 2019-11-17 -:Total Net: 195.00 -:Total Amount: 204.75 -:Taxes: - +---------------+--------+----------+---------------+ - | Base | Code | Rate (%) | Amount | - +===============+========+==========+===============+ - | 195.00 | | 5.00 | 9.75 | - +---------------+--------+----------+---------------+ -:Supplier Payment Details: -:Supplier Name: JOHN SMITH -:Supplier Company Registrations: -:Supplier Address: 4490 Oak Drive Albany, NY 12210 -:Supplier Phone Number: -:Customer Name: JESSIE M HORNE -:Supplier Website: -:Supplier Email: -:Customer Company Registrations: -:Customer Address: 2019 Redbud Drive New York, NY 10011 -:Customer ID: 1234567890 -:Shipping Address: 2019 Redbud Drive New York, NY 10011 -:Billing Address: 4312 Wood Road New York, NY 10031 -:Document Type: INVOICE -:Document Type Extended: INVOICE -:Purchase Subcategory: -:Purchase Category: miscellaneous -:Total Tax: 9.75 -:Tip and Gratuity: -:Purchase Time: -:Line Items: - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price | - +======================================+==============+==========+============+==============+==============+=================+============+ - | Front and rear brake cables | | 1.00 | | | 100.00 | | 100.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | New set of pedal arms | | 2.00 | | | 50.00 | | 25.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | Labor 3hrs | | 3.00 | | | 45.00 | | 15.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### AddressField -Aside from the basic `BaseField` attributes, the address field `AddressField` also implements the following: - -* **street_number** (`str`): String representation of the street number. Can be `None`. -* **street_name** (`str`): Name of the street. Can be `None`. -* **po_box** (`str`): String representation of the PO Box number. Can be `None`. -* **address_complement** (`str`): Address complement. Can be `None`. -* **city** (`str`): City name. Can be `None`. -* **postal_code** (`str`): String representation of the postal code. Can be `None`. -* **state** (`str`): State name. Can be `None`. -* **country** (`str`): Country name. Can be `None`. - -Note: The `value` field of an AddressField should be a concatenation of the rest of the values. - - -### AmountField -The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. - - -### ClassificationField -The classification field `ClassificationField` does not implement all the basic `BaseField` attributes. It only implements **value**, **confidence** and **page_id**. - -> Note: a classification field's `value is always a `str`. - - -### CompanyRegistrationField -Aside from the basic `BaseField` attributes, the company registration field `CompanyRegistrationField` also implements the following: - -* **type** (`str`): the type of company. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### LocaleField -The locale field `LocaleField` only implements the **value**, **confidence** and **page_id** base `BaseField` attributes, but it comes with its own: - -* **language** (`str`): ISO 639-1 language code (e.g.: `en` for English). Can be `None`. -* **country** (`str`): ISO 3166-1 alpha-2 or ISO 3166-1 alpha-3 code for countries (e.g.: `GRB` or `GB` for "Great Britain"). Can be `None`. -* **currency** (`str`): ISO 4217 code for currencies (e.g.: `USD` for "US Dollars"). Can be `None`. - -### PaymentDetailsField -Aside from the basic `BaseField` attributes, the payment details field `PaymentDetailsField` also implements the following: - -* **account_number** (`str`): number of an account, expressed as a string. Can be `None`. -* **iban** (`str`): International Bank Account Number. Can be `None`. -* **routing_number** (`str`): routing number of an account. Can be `None`. -* **swift** (`str`): the account holder's bank's SWIFT Business Identifier Code (BIC). Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -### TaxesField -#### Tax -Aside from the basic `BaseField` attributes, the tax field `TaxField` also implements the following: - -* **rate** (`float`): the tax rate applied to an item expressed as a percentage. Can be `None`. -* **code** (`str`): tax code (or equivalent, depending on the origin of the document). Can be `None`. -* **basis** (`float`): base amount used for the tax. Can be `None`. -* **value** (`float`): the value of the tax. Can be `None`. - -> Note: currently `TaxField` is not used on its own, and is accessed through a parent `Taxes` object, a list-like structure. - -#### Taxes (Array) -The `Taxes` field represents a list-like collection of `TaxField` objects. As it is the representation of several objects, it has access to a custom `__str__` method that can render a `TaxField` object as a table line. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Line Items Field -List of line item present on the document. - -A `FinancialDocumentV1LineItem` implements the following attributes: - -* **description** (`str`): The item description. -* **product_code** (`str`): The product code referring to the item. -* **quantity** (`float`): The item quantity -* **tax_amount** (`float`): The item tax amount. -* **tax_rate** (`float`): The item tax rate in percentage. -* **total_amount** (`float`): The item total amount. -* **unit_measure** (`str`): The item unit of measure. -* **unit_price** (`float`): The item unit price. - -# Attributes -The following fields are extracted for Financial Document V1: - -## Billing Address -**billing_address** ([AddressField](#addressfield)): The customer's address used for billing. - -```py -print(result.document.inference.prediction.billing_address.value) -``` - -## Purchase Category -**category** ([ClassificationField](#classificationfield)): The purchase category. - -#### Possible values include: - - 'toll' - - 'food' - - 'parking' - - 'transport' - - 'accommodation' - - 'gasoline' - - 'telecom' - - 'miscellaneous' - - 'software' - - 'shopping' - - 'energy' - -```py -print(result.document.inference.prediction.category.value) -``` - -## Customer Address -**customer_address** ([AddressField](#addressfield)): The address of the customer. - -```py -print(result.document.inference.prediction.customer_address.value) -``` - -## Customer Company Registrations -**customer_company_registrations** (List[[CompanyRegistrationField](#companyregistrationfield)]): List of company registration numbers associated to the customer. - -```py -for customer_company_registrations_elem in result.document.inference.prediction.customer_company_registrations: - print(customer_company_registrations_elem.value) -``` - -## Customer ID -**customer_id** ([StringField](#stringfield)): The customer account number or identifier from the supplier. - -```py -print(result.document.inference.prediction.customer_id.value) -``` - -## Customer Name -**customer_name** ([StringField](#stringfield)): The name of the customer. - -```py -print(result.document.inference.prediction.customer_name.value) -``` - -## Purchase Date -**date** ([DateField](#datefield)): The date the purchase was made. - -```py -print(result.document.inference.prediction.date.value) -``` - -## Document Number -**document_number** ([StringField](#stringfield)): The document number or identifier (invoice number or receipt number). - -```py -print(result.document.inference.prediction.document_number.value) -``` - -## Document Type -**document_type** ([ClassificationField](#classificationfield)): The type of the document: INVOICE or CREDIT NOTE if it is an invoice, CREDIT CARD RECEIPT or EXPENSE RECEIPT if it is a receipt. - -#### Possible values include: - - 'INVOICE' - - 'CREDIT NOTE' - - 'CREDIT CARD RECEIPT' - - 'EXPENSE RECEIPT' - -```py -print(result.document.inference.prediction.document_type.value) -``` - -## Document Type Extended -**document_type_extended** ([ClassificationField](#classificationfield)): Document type extended. - -#### Possible values include: - - 'CREDIT NOTE' - - 'INVOICE' - - 'OTHER' - - 'OTHER_FINANCIAL' - - 'PAYSLIP' - - 'PURCHASE ORDER' - - 'QUOTE' - - 'RECEIPT' - - 'STATEMENT' - -```py -print(result.document.inference.prediction.document_type_extended.value) -``` - -## Due Date -**due_date** ([DateField](#datefield)): The date on which the payment is due. - -```py -print(result.document.inference.prediction.due_date.value) -``` - -## Invoice Number -**invoice_number** ([StringField](#stringfield)): The invoice number or identifier only if document is an invoice. - -```py -print(result.document.inference.prediction.invoice_number.value) -``` - -## Line Items -**line_items** (List[[FinancialDocumentV1LineItem](#line-items-field)]): List of line item present on the document. - -```py -for line_items_elem in result.document.inference.prediction.line_items: - print(line_items_elem) -``` - -## Locale -**locale** ([LocaleField](#localefield)): The locale of the document. - -```py -print(result.document.inference.prediction.locale.value) -``` - -## Payment Date -**payment_date** ([DateField](#datefield)): The date on which the payment is due / fullfilled. - -```py -print(result.document.inference.prediction.payment_date.value) -``` - -## Purchase Order Number -**po_number** ([StringField](#stringfield)): The purchase order number, only if the document is an invoice. - -```py -print(result.document.inference.prediction.po_number.value) -``` - -## Receipt Number -**receipt_number** ([StringField](#stringfield)): The receipt number or identifier only if document is a receipt. - -```py -print(result.document.inference.prediction.receipt_number.value) -``` - -## Reference Numbers -**reference_numbers** (List[[StringField](#stringfield)]): List of Reference numbers, including PO number, only if the document is an invoice. - -```py -for reference_numbers_elem in result.document.inference.prediction.reference_numbers: - print(reference_numbers_elem.value) -``` - -## Shipping Address -**shipping_address** ([AddressField](#addressfield)): The customer's address used for shipping. - -```py -print(result.document.inference.prediction.shipping_address.value) -``` - -## Purchase Subcategory -**subcategory** ([ClassificationField](#classificationfield)): The purchase subcategory for transport, food and shooping. - -#### Possible values include: - - 'plane' - - 'taxi' - - 'train' - - 'restaurant' - - 'shopping' - - 'other' - - 'groceries' - - 'cultural' - - 'electronics' - - 'office_supplies' - - 'micromobility' - - 'car_rental' - - 'public' - - 'delivery' - - None - -```py -print(result.document.inference.prediction.subcategory.value) -``` - -## Supplier Address -**supplier_address** ([AddressField](#addressfield)): The address of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_address.value) -``` - -## Supplier Company Registrations -**supplier_company_registrations** (List[[CompanyRegistrationField](#companyregistrationfield)]): List of company registration numbers associated to the supplier. - -```py -for supplier_company_registrations_elem in result.document.inference.prediction.supplier_company_registrations: - print(supplier_company_registrations_elem.value) -``` - -## Supplier Email -**supplier_email** ([StringField](#stringfield)): The email of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_email.value) -``` - -## Supplier Name -**supplier_name** ([StringField](#stringfield)): The name of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_name.value) -``` - -## Supplier Payment Details -**supplier_payment_details** (List[[PaymentDetailsField](#paymentdetailsfield)]): List of payment details associated to the supplier (only for invoices). - -```py -for supplier_payment_details_elem in result.document.inference.prediction.supplier_payment_details: - print(supplier_payment_details_elem.value) -``` - -## Supplier Phone Number -**supplier_phone_number** ([StringField](#stringfield)): The phone number of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_phone_number.value) -``` - -## Supplier Website -**supplier_website** ([StringField](#stringfield)): The website URL of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_website.value) -``` - -## Taxes -**taxes** (List[[TaxField](#taxes)]): List of all taxes on the document. - -```py -for taxes_elem in result.document.inference.prediction.taxes: - print(taxes_elem.polygon) -``` - -## Purchase Time -**time** ([StringField](#stringfield)): The time the purchase was made (only for receipts). - -```py -print(result.document.inference.prediction.time.value) -``` - -## Tip and Gratuity -**tip** ([AmountField](#amountfield)): The total amount of tip and gratuity - -```py -print(result.document.inference.prediction.tip.value) -``` - -## Total Amount -**total_amount** ([AmountField](#amountfield)): The total amount paid: includes taxes, tips, fees, and other charges. - -```py -print(result.document.inference.prediction.total_amount.value) -``` - -## Total Net -**total_net** ([AmountField](#amountfield)): The net amount paid: does not include taxes, fees, and discounts. - -```py -print(result.document.inference.prediction.total_net.value) -``` - -## Total Tax -**total_tax** ([AmountField](#amountfield)): The sum of all taxes present on the document. - -```py -print(result.document.inference.prediction.total_tax.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/french_healthcard_v1.md b/docs/extras/guide/french_healthcard_v1.md deleted file mode 100644 index 58109873..00000000 --- a/docs/extras/guide/french_healthcard_v1.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -title: FR Health Card OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-fr-health-card-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Health Card API](https://platform.mindee.com/mindee/french_healthcard). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/french_healthcard/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Health Card sample](https://github.com/mindee/client-lib-test-data/blob/main/products/french_healthcard/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.fr.HealthCardV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 9ee2733d-933a-4dcd-a73a-a31395e3b288 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/french_healthcard v1.0 -:Rotation applied: Yes - -Prediction -========== -:Given Name(s): NATHALIE -:Surname: DURAND -:Social Security Number: 2 69 05 49 588 157 80 -:Issuance Date: 2007-01-01 -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for Health Card V1: - -## Given Name(s) -**given_names** (List[[StringField](#stringfield)]): The given names of the card holder. - -```py -for given_names_elem in result.document.inference.prediction.given_names: - print(given_names_elem.value) -``` - -## Issuance Date -**issuance_date** ([DateField](#datefield)): The date when the carte vitale document was issued. - -```py -print(result.document.inference.prediction.issuance_date.value) -``` - -## Social Security Number -**social_security** ([StringField](#stringfield)): The social security number of the card holder. - -```py -print(result.document.inference.prediction.social_security.value) -``` - -## Surname -**surname** ([StringField](#stringfield)): The surname of the card holder. - -```py -print(result.document.inference.prediction.surname.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/generated_v1.md b/docs/extras/guide/generated_v1.md deleted file mode 100644 index 47d53095..00000000 --- a/docs/extras/guide/generated_v1.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: Generated OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: generated-api-python -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports generated APIs. -Generated APIs can theoretically support all APIs in a catch-all generic format. - -# Quick-Start - -```python -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Add the corresponding endpoint (document). Set the account_name to "mindee" if you are using OTS. -my_endpoint = mindee_client.create_endpoint( - account_name="my-account", - endpoint_name="my-endpoint", - version="my-version" -) - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Parse the file. -# The endpoint must be specified since it cannot be determined from the class. -result: PredictResponse = mindee_client.enqueue_and_parse( - product.GeneratedV1, - input_doc, - endpoint=my_endpoint -) - -# Print a brief summary of the parsed data -print(result.document) - -# # Iterate over all the fields in the document -# for field_name, field_values in result.document.inference.prediction.fields.items(): -# print(field_name, "=", field_values) -``` - -# Generated Endpoints - -You may have noticed in the previous step that in order to access a custom build, you will need to provide an account and an endpoint name at the very least. - -Although it is optional, the version number should match the latest version of your build in most use-cases. -If it is not set, it will default to "1". - -# Field Types - -## Generated Fields - -### Generated List Field - -A `GeneratedListField` is a special type of custom list that implements the following: - -- **values** (`List[Union[StringField`[GeneratedObjectField](#Generated-object-field)`]]`): the confidence score of the field prediction. -- **page_id** (`int`): only available for some documents ATM. - -Since the inner contents can vary, the value isn't accessed through a property, but rather through the following functions: - -- **contents_list()** (`-> List[Union[str, float]]`): returns a list of values for each element. -- **contents_string(separator=" ")** (`-> str`): returns a list of concatenated values, with an optional **separator** `str` between them. -> **Note:** the `str()` method returns a string representation of all values of this object, with an empty space between each of them. - -### Generated Object Field - -Unrecognized structures and sometimes values of `ListField`s are stored in a `GeneratedObjectField` structure, which is implemented dynamically depending on the object's structure. - -- **page_id** (`Optional[int]`): the ID of the page, is `None` when at document-level. -- **raw_value** (`Optional[str]`): an optional field for when some post-processing has been done on fields (e.g. amounts). `None` in most instances. -- **confidence** (`Optional[float]`): the confidence score of the field prediction. Warning: support isn't guaranteed on all APIs. - - -> **Other fields**:No matter what, other fields will be stored in a dictionary-like structure with a `key: value` pair where `key` is a string and `value` is a nullable string. They can be accessed like any other regular value, but won't be suggested by your IDE. - - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - - -# Attributes - -Generated builds always have access to at least two attributes: - -## Fields - -**fields** (`Dict[str`: `List[Union[`[GeneratedListField](#generated-list-field), [GeneratedObjectField](#generated-object-field), `(#stringfield)[StringField]]]`): - -```python -print(str(result.document.inference.prediction.fields["my-field"])) -``` - -### Nested fields - -If your field `my-field` is a `GeneratedObjectField`, you can access its individual properties using the following syntax: - -```python -print(str(result.document.inference.prediction.fields["my-field"].my_attribute)) -``` - -# Questions? - -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/getting_started.md b/docs/extras/guide/getting_started.md deleted file mode 100644 index e93ef9b2..00000000 --- a/docs/extras/guide/getting_started.md +++ /dev/null @@ -1,270 +0,0 @@ ---- -title: Getting Started -category: 622b805aaec68102ea7fcbc2 -slug: python-getting-started -parentDoc: 609808f773b0b90051d839de ---- -This guide will help you get started with the Mindee Python OCR SDK to easily extract data from your documents. - -The Python OCR SDK supports [invoice](https://developers.mindee.com/docs/python-invoice-ocr), [passport](https://developers.mindee.com/docs/python-passport-ocr), [receipt](https://developers.mindee.com/docs/python-receipt-ocr) OCR APIs and [custom-built API](https://developers.mindee.com/docs/python-api-builder) from the API Builder. - -You can view the source code on [GitHub](https://github.com/mindee/mindee-api-python), and the package on [PyPI](https://pypi.org/project/mindee/). - -## Prerequisite - -- Download and install [Python](https://www.python.org/downloads/). This library is officially supported on Python `3.7` to `3.11`. Note: support for `3.12` is on its way, but currently untested. -- Download and install [pip package manager](https://pip.pypa.io/en/stable/installation/). - -## Installation - -To quickly get started with the Python OCR SDK anywhere, the preferred installation method is via `pip`. - -```shell -pip install mindee -``` - -### Development Installation - -If you'll be modifying the source code, you'll need to install the development requirements to get started. - -1. First clone the repo. - -```shell -git clone git@github.com:mindee/mindee-api-python.git -``` - -2. Then navigate to the cloned directory and install all development requirements. - -```shell -cd mindee-api-python -pip install -e ".[dev,test]" -``` - -## Updating the Version - -It is important to always check the version of the Mindee OCR SDK you are using, as new and updated features won’t work on old versions. - -To check the installed version: - -```shell -pip show mindee -``` - -To get the latest version: - -```shell -pip install mindee --upgrade -``` - -To install a specific version: - -```shell -pip install mindee== -``` - -## Usage - -To get started with Mindee's APIs, you need to create a `Client` and you're ready to go. - -Let's take a deep dive into how this works. - -## Initializing the Client - -The `Client` centralizes document configurations in a single object. - -The `Client` requires your [API key](https://developers.mindee.com/docs/make-your-first-request#create-an-api-key). - -You can either pass these directly to the constructor or through environment variables. - -### Pass the API key directly - -```python -from mindee import Client -# Init with your API key -mindee_client = Client(api_key="my-api-key") -``` - -### Set the API key in the environment - -API keys should be set as environment variables, especially for any production deployment. - -The following environment variable will set the global API key: - -```shell -MINDEE_API_KEY="my-api-key" -``` - -Then in your code: - -```python -from mindee import Client -# Init without an API key -mindee_client = Client() -``` - -### Setting the Request Timeout - -The request timeout can be set using an environment variable: - -```shell -MINDEE_REQUEST_TIMEOUT=200 -``` - -## Loading a Document File - -Before being able to send a document to the API, it must first be loaded. - -You don't need to worry about different MIME types, the library will take care of handling -all supported types automatically. - -Once a document is loaded, interacting with it is done in exactly the same way, regardless -of how it was loaded. - -There are a few different ways of loading a document file, depending on your use case: - -- [Path](#path) -- [File Object](#file-object) -- [Base64](#base64) -- [Bytes](#bytes) -- [URL](#url) - -### Path - -Load from a file directly from disk. Requires an absolute path, as a string. - -```python -input_doc = mindee_client.source_from_path("/path/to/the/invoice.pdf") -``` - -### File Object - -A normal Python file object with a path. **Must be in binary mode**. - -```python -with open("/path/to/the/receipt.jpg", 'rb') as fo: - input_doc = mindee_client.source_from_file(fo) -``` - -### Base64 - -Requires a base64 encoded string. - -**Note**: The original filename is required when calling the method. - -```python -b64_string = "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLD...." -input_doc = mindee_client.source_from_b64string(b64_string, "receipt.jpg") -``` - -### Bytes - -Requires raw bytes. - -**Note**: The original filename is required when calling the method. - -```python -raw_bytes = b"%PDF-1.3\n%\xbf\xf7\xa2\xfe\n1 0 ob..." -input_doc = mindee_client.source_from_bytes(raw_bytes, "invoice.pdf") -``` - -Loading from bytes is useful when using FastAPI `UploadFile` objects. - -```python -@app.post("/process-file") -async def upload(upload: UploadFile): - input_doc = mindee_client.source_from_bytes( - upload.file.read(), - filename=upload.filename - ) -``` - -### URL - -Allows sending an URL directly. - -**Note**: No local operations can be performed on the input (such as removing pages from a PDF). - -```python -input_doc = mindee_client.source_from_url(url="https://www.example.com/invoice.pdf") -``` - -## Sending a File - -To send a file to the API, we need to specify how to process the document. -This will determine which API endpoint is used and how the API return will be handled internally by the library. - -More specifically, we need to set a `mindee.product` class as the first parameter of the `parse` method. - -This is because the `parse` method's' return type depends on its first argument. - -Product classes inherit from the base `mindee.parsing.common.inference` class. - -More information is available in each document-specific guide. - -### Off-the-Shelf Documents - -Simply setting the correct class and passing the input document is enough: - -```python -result = mindee_client.parse(product.InvoiceV4, input_doc) -``` - -### Custom Documents (docTI & Custom APIs) - -The endpoint to use must be created beforehand and subsequently passed to the `endpoint` argument of the `enqueue_and_parse` method: - -```python -custom_endpoint = mindee_client.create_endpoint( - "my-endpoin-url", - "my-account-name", - # "my-version" # optional -) -result = mindee_client.enqueue_and_get_inference(product.GeneratedV1, input_doc, endpoint=custom_endpoint) -``` - -This is because the `GeneratedV1` class is enough to handle the return processing, but the actual endpoint needs to be specified. - - -## Processing the Response - -Results of a prediction can be retrieved in two different places: - -- [Document level predictions](#document-level-prediction) -- [Page level predictions](#page-level-prediction) - -### Document Level Prediction - -The `document` attribute is an object specific to the type of document being processed. -It is an instance of the `Document` class, to which a generic type is given. - -It contains the data extracted from the entire document, all pages combined. -It's possible to have the same field in various pages, but at the document level only the highest confidence field data will be shown (this is all done automatically at the API level). - -Usage: -```py -print(resp.document) -``` - -A `document`'s fields (attributes) can be accessed through it's `prediction` attribute, which have types that can vary from one product to another. -These attributes are detailed in each product's respective guide. - -### Page Level Prediction - -The `pages` attribute is a list of `Page` objects. `Page` is a wrapper around elements that extend the [`Document` class](#document-level-prediction). -The `prediction` of a `Page` inherits from the product's own `Document`, and adds all page-specific fields to it. - -The order of the elements in the list matches the order of the pages in the document. - -All response objects have a `pages` property, regardless of the number of pages. -Single-page documents will have a single entry. - -Iteration over `pages` is done like with any list, for example: - -```py -for page in resp.pages: - print(page) -``` - -## Questions? - -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/idcard_fr_v2.md b/docs/extras/guide/idcard_fr_v2.md deleted file mode 100644 index e8bb49ff..00000000 --- a/docs/extras/guide/idcard_fr_v2.md +++ /dev/null @@ -1,270 +0,0 @@ ---- -title: FR Carte Nationale d'Identité OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-fr-carte-nationale-didentite-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Carte Nationale d'Identité API](https://platform.mindee.com/mindee/idcard_fr). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/idcard_fr/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Carte Nationale d'Identité sample](https://github.com/mindee/client-lib-test-data/blob/main/products/idcard_fr/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -result: PredictResponse = mindee_client.parse( - product.fr.IdCardV2, - input_doc, -) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: d33828f1-ef7e-4984-b9df-a2bfaa38a78d -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/idcard_fr v2.0 -:Rotation applied: Yes - -Prediction -========== -:Nationality: -:Card Access Number: 175775H55790 -:Document Number: -:Given Name(s): Victor - Marie -:Surname: DAMBARD -:Alternate Name: -:Date of Birth: 1994-04-24 -:Place of Birth: LYON 4E ARRONDISSEM -:Gender: M -:Expiry Date: 2030-04-02 -:Mrz Line 1: IDFRADAMBARD<<<<<<<<<<<<<<<<<<075025 -:Mrz Line 2: 170775H557903VICTOR< **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### ClassificationField -The classification field `ClassificationField` does not implement all the basic `BaseField` attributes. It only implements **value**, **confidence** and **page_id**. - -> Note: a classification field's `value is always a `str`. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Page-Level Fields -Some fields are constrained to the page level, and so will not be retrievable at document level. - -# Attributes -The following fields are extracted for Carte Nationale d'Identité V2: - -## Alternate Name -**alternate_name** ([StringField](#stringfield)): The alternate name of the card holder. - -```py -print(result.document.inference.prediction.alternate_name.value) -``` - -## Issuing Authority -**authority** ([StringField](#stringfield)): The name of the issuing authority. - -```py -print(result.document.inference.prediction.authority.value) -``` - -## Date of Birth -**birth_date** ([DateField](#datefield)): The date of birth of the card holder. - -```py -print(result.document.inference.prediction.birth_date.value) -``` - -## Place of Birth -**birth_place** ([StringField](#stringfield)): The place of birth of the card holder. - -```py -print(result.document.inference.prediction.birth_place.value) -``` - -## Card Access Number -**card_access_number** ([StringField](#stringfield)): The card access number (CAN). - -```py -print(result.document.inference.prediction.card_access_number.value) -``` - -## Document Number -**document_number** ([StringField](#stringfield)): The document number. - -```py -print(result.document.inference.prediction.document_number.value) -``` - -## Document Sides -[📄](#page-level-fields "This field is only present on individual pages.")**document_side** ([ClassificationField](#classificationfield)): The sides of the document which are visible. - -#### Possible values include: - - 'RECTO' - - 'VERSO' - - 'RECTO & VERSO' - -```py -for document_side_elem in result.document.document_side: - print(document_side_elem.value) -``` - -## Document Type -[📄](#page-level-fields "This field is only present on individual pages.")**document_type** ([ClassificationField](#classificationfield)): The document type or format. - -#### Possible values include: - - 'NEW' - - 'OLD' - -```py -for document_type_elem in result.document.document_type: - print(document_type_elem.value) -``` - -## Expiry Date -**expiry_date** ([DateField](#datefield)): The expiry date of the identification card. - -```py -print(result.document.inference.prediction.expiry_date.value) -``` - -## Gender -**gender** ([StringField](#stringfield)): The gender of the card holder. - -```py -print(result.document.inference.prediction.gender.value) -``` - -## Given Name(s) -**given_names** (List[[StringField](#stringfield)]): The given name(s) of the card holder. - -```py -for given_names_elem in result.document.inference.prediction.given_names: - print(given_names_elem.value) -``` - -## Date of Issue -**issue_date** ([DateField](#datefield)): The date of issue of the identification card. - -```py -print(result.document.inference.prediction.issue_date.value) -``` - -## Mrz Line 1 -**mrz1** ([StringField](#stringfield)): The Machine Readable Zone, first line. - -```py -print(result.document.inference.prediction.mrz1.value) -``` - -## Mrz Line 2 -**mrz2** ([StringField](#stringfield)): The Machine Readable Zone, second line. - -```py -print(result.document.inference.prediction.mrz2.value) -``` - -## Mrz Line 3 -**mrz3** ([StringField](#stringfield)): The Machine Readable Zone, third line. - -```py -print(result.document.inference.prediction.mrz3.value) -``` - -## Nationality -**nationality** ([StringField](#stringfield)): The nationality of the card holder. - -```py -print(result.document.inference.prediction.nationality.value) -``` - -## Surname -**surname** ([StringField](#stringfield)): The surname of the card holder. - -```py -print(result.document.inference.prediction.surname.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/ind_passport_v1.md b/docs/extras/guide/ind_passport_v1.md deleted file mode 100644 index 7db4503d..00000000 --- a/docs/extras/guide/ind_passport_v1.md +++ /dev/null @@ -1,285 +0,0 @@ ---- -title: IND Passport - India OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-ind-passport---india-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Passport - India API](https://platform.mindee.com/mindee/ind_passport). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/ind_passport/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Passport - India sample](https://github.com/mindee/client-lib-test-data/blob/main/products/ind_passport/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.ind.IndianPassportV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: cf88fd43-eaa1-497a-ba29-a9569a4edaa7 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/ind_passport v1.2 -:Rotation applied: Yes - -Prediction -========== -:Page Number: 1 -:Country: IND -:ID Number: J8369854 -:Given Names: JOCELYN MICHELLE -:Surname: DOE -:Birth Date: 1959-09-23 -:Birth Place: GUNDUGOLANU -:Issuance Place: HYDERABAD -:Gender: F -:Issuance Date: 2011-10-11 -:Expiry Date: 2021-10-10 -:MRZ Line 1: P **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### ClassificationField -The classification field `ClassificationField` does not implement all the basic `BaseField` attributes. It only implements **value**, **confidence** and **page_id**. - -> Note: a classification field's `value is always a `str`. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for Passport - India V1: - -## Address Line 1 -**address1** ([StringField](#stringfield)): The first line of the address of the passport holder. - -```py -print(result.document.inference.prediction.address1.value) -``` - -## Address Line 2 -**address2** ([StringField](#stringfield)): The second line of the address of the passport holder. - -```py -print(result.document.inference.prediction.address2.value) -``` - -## Address Line 3 -**address3** ([StringField](#stringfield)): The third line of the address of the passport holder. - -```py -print(result.document.inference.prediction.address3.value) -``` - -## Birth Date -**birth_date** ([DateField](#datefield)): The birth date of the passport holder, ISO format: YYYY-MM-DD. - -```py -print(result.document.inference.prediction.birth_date.value) -``` - -## Birth Place -**birth_place** ([StringField](#stringfield)): The birth place of the passport holder. - -```py -print(result.document.inference.prediction.birth_place.value) -``` - -## Country -**country** ([StringField](#stringfield)): ISO 3166-1 alpha-3 country code (3 letters format). - -```py -print(result.document.inference.prediction.country.value) -``` - -## Expiry Date -**expiry_date** ([DateField](#datefield)): The date when the passport will expire, ISO format: YYYY-MM-DD. - -```py -print(result.document.inference.prediction.expiry_date.value) -``` - -## File Number -**file_number** ([StringField](#stringfield)): The file number of the passport document. - -```py -print(result.document.inference.prediction.file_number.value) -``` - -## Gender -**gender** ([ClassificationField](#classificationfield)): The gender of the passport holder. - -#### Possible values include: - - 'M' - - 'F' - -```py -print(result.document.inference.prediction.gender.value) -``` - -## Given Names -**given_names** ([StringField](#stringfield)): The given names of the passport holder. - -```py -print(result.document.inference.prediction.given_names.value) -``` - -## ID Number -**id_number** ([StringField](#stringfield)): The identification number of the passport document. - -```py -print(result.document.inference.prediction.id_number.value) -``` - -## Issuance Date -**issuance_date** ([DateField](#datefield)): The date when the passport was issued, ISO format: YYYY-MM-DD. - -```py -print(result.document.inference.prediction.issuance_date.value) -``` - -## Issuance Place -**issuance_place** ([StringField](#stringfield)): The place where the passport was issued. - -```py -print(result.document.inference.prediction.issuance_place.value) -``` - -## Legal Guardian -**legal_guardian** ([StringField](#stringfield)): The name of the legal guardian of the passport holder (if applicable). - -```py -print(result.document.inference.prediction.legal_guardian.value) -``` - -## MRZ Line 1 -**mrz1** ([StringField](#stringfield)): The first line of the machine-readable zone (MRZ) of the passport document. - -```py -print(result.document.inference.prediction.mrz1.value) -``` - -## MRZ Line 2 -**mrz2** ([StringField](#stringfield)): The second line of the machine-readable zone (MRZ) of the passport document. - -```py -print(result.document.inference.prediction.mrz2.value) -``` - -## Name of Mother -**name_of_mother** ([StringField](#stringfield)): The name of the mother of the passport holder. - -```py -print(result.document.inference.prediction.name_of_mother.value) -``` - -## Name of Spouse -**name_of_spouse** ([StringField](#stringfield)): The name of the spouse of the passport holder (if applicable). - -```py -print(result.document.inference.prediction.name_of_spouse.value) -``` - -## Old Passport Date of Issue -**old_passport_date_of_issue** ([DateField](#datefield)): The date of issue of the old passport (if applicable), ISO format: YYYY-MM-DD. - -```py -print(result.document.inference.prediction.old_passport_date_of_issue.value) -``` - -## Old Passport Number -**old_passport_number** ([StringField](#stringfield)): The number of the old passport (if applicable). - -```py -print(result.document.inference.prediction.old_passport_number.value) -``` - -## Old Passport Place of Issue -**old_passport_place_of_issue** ([StringField](#stringfield)): The place of issue of the old passport (if applicable). - -```py -print(result.document.inference.prediction.old_passport_place_of_issue.value) -``` - -## Page Number -**page_number** ([ClassificationField](#classificationfield)): The page number of the passport document. - -#### Possible values include: - - '1' - - '2' - -```py -print(result.document.inference.prediction.page_number.value) -``` - -## Surname -**surname** ([StringField](#stringfield)): The surname of the passport holder. - -```py -print(result.document.inference.prediction.surname.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/international_id_v2.md b/docs/extras/guide/international_id_v2.md deleted file mode 100644 index 443ef460..00000000 --- a/docs/extras/guide/international_id_v2.md +++ /dev/null @@ -1,240 +0,0 @@ ---- -title: International ID OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-international-id-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [International ID API](https://platform.mindee.com/mindee/international_id). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/international_id/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![International ID sample](https://github.com/mindee/client-lib-test-data/blob/main/products/international_id/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.InternationalIdV2, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: cfa20a58-20cf-43b6-8cec-9505fa69d1c2 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/international_id v2.0 -:Rotation applied: No - -Prediction -========== -:Document Type: IDENTIFICATION_CARD -:Document Number: 12345678A -:Surnames: MUESTRA - MUESTRA -:Given Names: CARMEN -:Sex: F -:Birth Date: 1980-01-01 -:Birth Place: CAMPO DE CRIPTANA CIUDAD REAL ESPANA -:Nationality: ESP -:Personal Number: BAB1834284<44282767Q0 -:Country of Issue: ESP -:State of Issue: MADRID -:Issue Date: -:Expiration Date: 2030-01-01 -:Address: C/REAL N13, 1 DCHA COLLADO VILLALBA MADRID MADRID MADRID -:MRZ Line 1: IDESPBAB1834284<44282767Q0<<<< -:MRZ Line 2: 8001010F1301017ESP<<<<<<<<<<<3 -:MRZ Line 3: MUESTRA **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### ClassificationField -The classification field `ClassificationField` does not implement all the basic `BaseField` attributes. It only implements **value**, **confidence** and **page_id**. - -> Note: a classification field's `value is always a `str`. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for International ID V2: - -## Address -**address** ([StringField](#stringfield)): The physical address of the document holder. - -```py -print(result.document.inference.prediction.address.value) -``` - -## Birth Date -**birth_date** ([DateField](#datefield)): The date of birth of the document holder. - -```py -print(result.document.inference.prediction.birth_date.value) -``` - -## Birth Place -**birth_place** ([StringField](#stringfield)): The place of birth of the document holder. - -```py -print(result.document.inference.prediction.birth_place.value) -``` - -## Country of Issue -**country_of_issue** ([StringField](#stringfield)): The country where the document was issued. - -```py -print(result.document.inference.prediction.country_of_issue.value) -``` - -## Document Number -**document_number** ([StringField](#stringfield)): The unique identifier assigned to the document. - -```py -print(result.document.inference.prediction.document_number.value) -``` - -## Document Type -**document_type** ([ClassificationField](#classificationfield)): The type of personal identification document. - -#### Possible values include: - - 'IDENTIFICATION_CARD' - - 'PASSPORT' - - 'DRIVER_LICENSE' - - 'VISA' - - 'RESIDENCY_CARD' - - 'VOTER_REGISTRATION' - -```py -print(result.document.inference.prediction.document_type.value) -``` - -## Expiration Date -**expiry_date** ([DateField](#datefield)): The date when the document becomes invalid. - -```py -print(result.document.inference.prediction.expiry_date.value) -``` - -## Given Names -**given_names** (List[[StringField](#stringfield)]): The list of the document holder's given names. - -```py -for given_names_elem in result.document.inference.prediction.given_names: - print(given_names_elem.value) -``` - -## Issue Date -**issue_date** ([DateField](#datefield)): The date when the document was issued. - -```py -print(result.document.inference.prediction.issue_date.value) -``` - -## MRZ Line 1 -**mrz_line1** ([StringField](#stringfield)): The Machine Readable Zone, first line. - -```py -print(result.document.inference.prediction.mrz_line1.value) -``` - -## MRZ Line 2 -**mrz_line2** ([StringField](#stringfield)): The Machine Readable Zone, second line. - -```py -print(result.document.inference.prediction.mrz_line2.value) -``` - -## MRZ Line 3 -**mrz_line3** ([StringField](#stringfield)): The Machine Readable Zone, third line. - -```py -print(result.document.inference.prediction.mrz_line3.value) -``` - -## Nationality -**nationality** ([StringField](#stringfield)): The country of citizenship of the document holder. - -```py -print(result.document.inference.prediction.nationality.value) -``` - -## Personal Number -**personal_number** ([StringField](#stringfield)): The unique identifier assigned to the document holder. - -```py -print(result.document.inference.prediction.personal_number.value) -``` - -## Sex -**sex** ([StringField](#stringfield)): The biological sex of the document holder. - -```py -print(result.document.inference.prediction.sex.value) -``` - -## State of Issue -**state_of_issue** ([StringField](#stringfield)): The state or territory where the document was issued. - -```py -print(result.document.inference.prediction.state_of_issue.value) -``` - -## Surnames -**surnames** (List[[StringField](#stringfield)]): The list of the document holder's family names. - -```py -for surnames_elem in result.document.inference.prediction.surnames: - print(surnames_elem.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/invoice_splitter_v1.md b/docs/extras/guide/invoice_splitter_v1.md deleted file mode 100644 index e9d9d7dd..00000000 --- a/docs/extras/guide/invoice_splitter_v1.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -title: Invoice Splitter OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-invoice-splitter-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Invoice Splitter API](https://platform.mindee.com/mindee/invoice_splitter). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/invoice_splitter/default_sample.pdf), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Invoice Splitter sample](https://github.com/mindee/client-lib-test-data/blob/main/products/invoice_splitter/default_sample.pdf?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.InvoiceSplitterV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 15ad7a19-7b75-43d0-b0c6-9a641a12b49b -:Filename: default_sample.pdf - -Inference -######### -:Product: mindee/invoice_splitter v1.2 -:Rotation applied: No - -Prediction -========== -:Invoice Page Groups: - +--------------------------------------------------------------------------+ - | Page Indexes | - +==========================================================================+ - | 0 | - +--------------------------------------------------------------------------+ - | 1 | - +--------------------------------------------------------------------------+ -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Invoice Page Groups Field -List of page groups. Each group represents a single invoice within a multi-invoice document. - -A `InvoiceSplitterV1InvoicePageGroup` implements the following attributes: - -* **page_indexes** (`List[int]`): List of page indexes that belong to the same invoice (group). - -# Attributes -The following fields are extracted for Invoice Splitter V1: - -## Invoice Page Groups -**invoice_page_groups** (List[[InvoiceSplitterV1InvoicePageGroup](#invoice-page-groups-field)]): List of page groups. Each group represents a single invoice within a multi-invoice document. - -```py -for invoice_page_groups_elem in result.document.inference.prediction.invoice_page_groups: - print(invoice_page_groups_elem.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/invoices_v4.md b/docs/extras/guide/invoices_v4.md deleted file mode 100644 index 85d87520..00000000 --- a/docs/extras/guide/invoices_v4.md +++ /dev/null @@ -1,539 +0,0 @@ ---- -title: Invoice OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-invoice-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Invoice API](https://platform.mindee.com/mindee/invoices). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/invoices/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Invoice sample](https://github.com/mindee/client-lib-test-data/blob/main/products/invoices/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -result: PredictResponse = mindee_client.parse( - product.InvoiceV4, - input_doc, -) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -You can also call this product asynchronously: - -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.InvoiceV4, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 744748d5-9051-461c-b70c-bbf81f5ff943 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/invoices v4.11 -:Rotation applied: Yes - -Prediction -========== -:Locale: en-CA; en; CA; CAD; -:Invoice Number: 14 -:Purchase Order Number: AD29094 -:Reference Numbers: AD29094 -:Purchase Date: 2018-09-25 -:Due Date: -:Payment Date: -:Total Net: 2145.00 -:Total Amount: 2608.20 -:Total Tax: 193.20 -:Taxes: - +---------------+--------+----------+---------------+ - | Base | Code | Rate (%) | Amount | - +===============+========+==========+===============+ - | 2145.00 | | 8.00 | 193.20 | - +---------------+--------+----------+---------------+ -:Supplier Payment Details: -:Supplier Name: TURNPIKE DESIGNS -:Supplier Company Registrations: -:Supplier Address: 156 University Ave, Toronto ON, Canada, M5H 2H7 -:Supplier Phone Number: 4165551212 -:Supplier Website: -:Supplier Email: j_coi@example.com -:Customer Name: JIRO DOI -:Customer Company Registrations: -:Customer Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada -:Customer ID: -:Shipping Address: -:Billing Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada -:Document Type: INVOICE -:Document Type Extended: INVOICE -:Purchase Subcategory: -:Purchase Category: miscellaneous -:Line Items: - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price | - +======================================+==============+==========+============+==============+==============+=================+============+ - | Platinum web hosting package Down... | | 1.00 | | | 65.00 | | 65.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | 2 page website design Includes ba... | | 3.00 | | | 2100.00 | | 2100.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | Mobile designs Includes responsiv... | | 1.00 | | | 250.00 | 1 | 250.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - -Page Predictions -================ - -Page 0 ------- -:Locale: en-CA; en; CA; CAD; -:Invoice Number: 14 -:Purchase Order Number: AD29094 -:Reference Numbers: AD29094 -:Purchase Date: 2018-09-25 -:Due Date: -:Payment Date: -:Total Net: 2145.00 -:Total Amount: 2608.20 -:Total Tax: 193.20 -:Taxes: - +---------------+--------+----------+---------------+ - | Base | Code | Rate (%) | Amount | - +===============+========+==========+===============+ - | 2145.00 | | 8.00 | 193.20 | - +---------------+--------+----------+---------------+ -:Supplier Payment Details: -:Supplier Name: TURNPIKE DESIGNS -:Supplier Company Registrations: -:Supplier Address: 156 University Ave, Toronto ON, Canada, M5H 2H7 -:Supplier Phone Number: 4165551212 -:Supplier Website: -:Supplier Email: j_coi@example.com -:Customer Name: JIRO DOI -:Customer Company Registrations: -:Customer Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada -:Customer ID: -:Shipping Address: -:Billing Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada -:Document Type: INVOICE -:Document Type Extended: INVOICE -:Purchase Subcategory: -:Purchase Category: miscellaneous -:Line Items: - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price | - +======================================+==============+==========+============+==============+==============+=================+============+ - | Platinum web hosting package Down... | | 1.00 | | | 65.00 | | 65.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | 2 page website design Includes ba... | | 3.00 | | | 2100.00 | | 2100.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ - | Mobile designs Includes responsiv... | | 1.00 | | | 250.00 | 1 | 250.00 | - +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+ -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### AddressField -Aside from the basic `BaseField` attributes, the address field `AddressField` also implements the following: - -* **street_number** (`str`): String representation of the street number. Can be `None`. -* **street_name** (`str`): Name of the street. Can be `None`. -* **po_box** (`str`): String representation of the PO Box number. Can be `None`. -* **address_complement** (`str`): Address complement. Can be `None`. -* **city** (`str`): City name. Can be `None`. -* **postal_code** (`str`): String representation of the postal code. Can be `None`. -* **state** (`str`): State name. Can be `None`. -* **country** (`str`): Country name. Can be `None`. - -Note: The `value` field of an AddressField should be a concatenation of the rest of the values. - - -### AmountField -The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. - - -### ClassificationField -The classification field `ClassificationField` does not implement all the basic `BaseField` attributes. It only implements **value**, **confidence** and **page_id**. - -> Note: a classification field's `value is always a `str`. - - -### CompanyRegistrationField -Aside from the basic `BaseField` attributes, the company registration field `CompanyRegistrationField` also implements the following: - -* **type** (`str`): the type of company. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### LocaleField -The locale field `LocaleField` only implements the **value**, **confidence** and **page_id** base `BaseField` attributes, but it comes with its own: - -* **language** (`str`): ISO 639-1 language code (e.g.: `en` for English). Can be `None`. -* **country** (`str`): ISO 3166-1 alpha-2 or ISO 3166-1 alpha-3 code for countries (e.g.: `GRB` or `GB` for "Great Britain"). Can be `None`. -* **currency** (`str`): ISO 4217 code for currencies (e.g.: `USD` for "US Dollars"). Can be `None`. - -### PaymentDetailsField -Aside from the basic `BaseField` attributes, the payment details field `PaymentDetailsField` also implements the following: - -* **account_number** (`str`): number of an account, expressed as a string. Can be `None`. -* **iban** (`str`): International Bank Account Number. Can be `None`. -* **routing_number** (`str`): routing number of an account. Can be `None`. -* **swift** (`str`): the account holder's bank's SWIFT Business Identifier Code (BIC). Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -### TaxesField -#### Tax -Aside from the basic `BaseField` attributes, the tax field `TaxField` also implements the following: - -* **rate** (`float`): the tax rate applied to an item expressed as a percentage. Can be `None`. -* **code** (`str`): tax code (or equivalent, depending on the origin of the document). Can be `None`. -* **basis** (`float`): base amount used for the tax. Can be `None`. -* **value** (`float`): the value of the tax. Can be `None`. - -> Note: currently `TaxField` is not used on its own, and is accessed through a parent `Taxes` object, a list-like structure. - -#### Taxes (Array) -The `Taxes` field represents a list-like collection of `TaxField` objects. As it is the representation of several objects, it has access to a custom `__str__` method that can render a `TaxField` object as a table line. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Line Items Field -List of all the line items present on the invoice. - -A `InvoiceV4LineItem` implements the following attributes: - -* **description** (`str`): The item description. -* **product_code** (`str`): The product code of the item. -* **quantity** (`float`): The item quantity -* **tax_amount** (`float`): The item tax amount. -* **tax_rate** (`float`): The item tax rate in percentage. -* **total_amount** (`float`): The item total amount. -* **unit_measure** (`str`): The item unit of measure. -* **unit_price** (`float`): The item unit price. - -# Attributes -The following fields are extracted for Invoice V4: - -## Billing Address -**billing_address** ([AddressField](#addressfield)): The customer billing address. - -```py -print(result.document.inference.prediction.billing_address.value) -``` - -## Purchase Category -**category** ([ClassificationField](#classificationfield)): The purchase category. - -#### Possible values include: - - 'toll' - - 'food' - - 'parking' - - 'transport' - - 'accommodation' - - 'telecom' - - 'miscellaneous' - - 'software' - - 'shopping' - - 'energy' - -```py -print(result.document.inference.prediction.category.value) -``` - -## Customer Address -**customer_address** ([AddressField](#addressfield)): The address of the customer. - -```py -print(result.document.inference.prediction.customer_address.value) -``` - -## Customer Company Registrations -**customer_company_registrations** (List[[CompanyRegistrationField](#companyregistrationfield)]): List of company registration numbers associated to the customer. - -```py -for customer_company_registrations_elem in result.document.inference.prediction.customer_company_registrations: - print(customer_company_registrations_elem.value) -``` - -## Customer ID -**customer_id** ([StringField](#stringfield)): The customer account number or identifier from the supplier. - -```py -print(result.document.inference.prediction.customer_id.value) -``` - -## Customer Name -**customer_name** ([StringField](#stringfield)): The name of the customer or client. - -```py -print(result.document.inference.prediction.customer_name.value) -``` - -## Purchase Date -**date** ([DateField](#datefield)): The date the purchase was made. - -```py -print(result.document.inference.prediction.date.value) -``` - -## Document Type -**document_type** ([ClassificationField](#classificationfield)): Document type: INVOICE or CREDIT NOTE. - -#### Possible values include: - - 'INVOICE' - - 'CREDIT NOTE' - -```py -print(result.document.inference.prediction.document_type.value) -``` - -## Document Type Extended -**document_type_extended** ([ClassificationField](#classificationfield)): Document type extended. - -#### Possible values include: - - 'CREDIT NOTE' - - 'INVOICE' - - 'OTHER' - - 'OTHER_FINANCIAL' - - 'PAYSLIP' - - 'PURCHASE ORDER' - - 'QUOTE' - - 'RECEIPT' - - 'STATEMENT' - -```py -print(result.document.inference.prediction.document_type_extended.value) -``` - -## Due Date -**due_date** ([DateField](#datefield)): The date on which the payment is due. - -```py -print(result.document.inference.prediction.due_date.value) -``` - -## Invoice Number -**invoice_number** ([StringField](#stringfield)): The invoice number or identifier. - -```py -print(result.document.inference.prediction.invoice_number.value) -``` - -## Line Items -**line_items** (List[[InvoiceV4LineItem](#line-items-field)]): List of all the line items present on the invoice. - -```py -for line_items_elem in result.document.inference.prediction.line_items: - print(line_items_elem) -``` - -## Locale -**locale** ([LocaleField](#localefield)): The locale of the document. - -```py -print(result.document.inference.prediction.locale.value) -``` - -## Payment Date -**payment_date** ([DateField](#datefield)): The date on which the payment is due / was full-filled. - -```py -print(result.document.inference.prediction.payment_date.value) -``` - -## Purchase Order Number -**po_number** ([StringField](#stringfield)): The purchase order number. - -```py -print(result.document.inference.prediction.po_number.value) -``` - -## Reference Numbers -**reference_numbers** (List[[StringField](#stringfield)]): List of all reference numbers on the invoice, including the purchase order number. - -```py -for reference_numbers_elem in result.document.inference.prediction.reference_numbers: - print(reference_numbers_elem.value) -``` - -## Shipping Address -**shipping_address** ([AddressField](#addressfield)): Customer's delivery address. - -```py -print(result.document.inference.prediction.shipping_address.value) -``` - -## Purchase Subcategory -**subcategory** ([ClassificationField](#classificationfield)): The purchase subcategory for transport, food and shopping. - -#### Possible values include: - - 'plane' - - 'taxi' - - 'train' - - 'restaurant' - - 'shopping' - - 'other' - - 'groceries' - - 'cultural' - - 'electronics' - - 'office_supplies' - - 'micromobility' - - 'car_rental' - - 'public' - - 'delivery' - - None - -```py -print(result.document.inference.prediction.subcategory.value) -``` - -## Supplier Address -**supplier_address** ([AddressField](#addressfield)): The address of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_address.value) -``` - -## Supplier Company Registrations -**supplier_company_registrations** (List[[CompanyRegistrationField](#companyregistrationfield)]): List of company registration numbers associated to the supplier. - -```py -for supplier_company_registrations_elem in result.document.inference.prediction.supplier_company_registrations: - print(supplier_company_registrations_elem.value) -``` - -## Supplier Email -**supplier_email** ([StringField](#stringfield)): The email address of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_email.value) -``` - -## Supplier Name -**supplier_name** ([StringField](#stringfield)): The name of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_name.value) -``` - -## Supplier Payment Details -**supplier_payment_details** (List[[PaymentDetailsField](#paymentdetailsfield)]): List of payment details associated to the supplier of the invoice. - -```py -for supplier_payment_details_elem in result.document.inference.prediction.supplier_payment_details: - print(supplier_payment_details_elem.value) -``` - -## Supplier Phone Number -**supplier_phone_number** ([StringField](#stringfield)): The phone number of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_phone_number.value) -``` - -## Supplier Website -**supplier_website** ([StringField](#stringfield)): The website URL of the supplier or merchant. - -```py -print(result.document.inference.prediction.supplier_website.value) -``` - -## Taxes -**taxes** (List[[TaxField](#taxes)]): List of taxes. Each item contains the detail of the tax. - -```py -for taxes_elem in result.document.inference.prediction.taxes: - print(taxes_elem.polygon) -``` - -## Total Amount -**total_amount** ([AmountField](#amountfield)): The total amount of the invoice: includes taxes, tips, fees, and other charges. - -```py -print(result.document.inference.prediction.total_amount.value) -``` - -## Total Net -**total_net** ([AmountField](#amountfield)): The net amount of the invoice: does not include taxes, fees, and discounts. - -```py -print(result.document.inference.prediction.total_net.value) -``` - -## Total Tax -**total_tax** ([AmountField](#amountfield)): The total tax: the sum of all the taxes for this invoice. - -```py -print(result.document.inference.prediction.total_tax.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/material_certificate_v1.md b/docs/extras/guide/material_certificate_v1.md deleted file mode 100644 index c26c2d25..00000000 --- a/docs/extras/guide/material_certificate_v1.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: Material Certificate OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-material-certificate-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Material Certificate API](https://platform.mindee.com/mindee/material_certificate). - -The [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/material_certificate/default_sample.jpg) can be used for testing purposes. -![Material Certificate sample](https://github.com/mindee/client-lib-test-data/blob/main/products/material_certificate/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.MaterialCertificateV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for Material Certificate V1: - -## Certificate Type -**certificate_type** ([StringField](#stringfield)): The type of certification. - -```py -print(result.document.inference.prediction.certificate_type.value) -``` - -## Heat Number -**heat_number** ([StringField](#stringfield)): Heat Number is a unique identifier assigned to a batch of material produced in a manufacturing process. - -```py -print(result.document.inference.prediction.heat_number.value) -``` - -## Norm -**norm** ([StringField](#stringfield)): The international standard used for certification. - -```py -print(result.document.inference.prediction.norm.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/multi_receipts_detector_v1.md b/docs/extras/guide/multi_receipts_detector_v1.md deleted file mode 100644 index dbd1c993..00000000 --- a/docs/extras/guide/multi_receipts_detector_v1.md +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: Multi Receipts Detector OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-multi-receipts-detector-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Multi Receipts Detector API](https://platform.mindee.com/mindee/multi_receipts_detector). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/multi_receipts_detector/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Multi Receipts Detector sample](https://github.com/mindee/client-lib-test-data/blob/main/products/multi_receipts_detector/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -result: PredictResponse = mindee_client.parse( - product.MultiReceiptsDetectorV1, - input_doc, -) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: d7c5b25f-e0d3-4491-af54-6183afa1aaab -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/multi_receipts_detector v1.0 -:Rotation applied: Yes - -Prediction -========== -:List of Receipts: Polygon with 4 points. - Polygon with 4 points. - Polygon with 4 points. - Polygon with 4 points. - Polygon with 4 points. - Polygon with 4 points. - -Page Predictions -================ - -Page 0 ------- -:List of Receipts: Polygon with 4 points. - Polygon with 4 points. - Polygon with 4 points. - Polygon with 4 points. - Polygon with 4 points. - Polygon with 4 points. -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### PositionField -The position field `PositionField` does not implement all the basic `BaseField` attributes, only **bounding_box**, **polygon** and **page_id**. On top of these, it has access to: - -* **rectangle** (`[Point, Point, Point, Point]`): a Polygon with four points that may be oriented (even beyond canvas). -* **quadrangle** (`[Point, Point, Point, Point]`): a free polygon made up of four points. - -# Attributes -The following fields are extracted for Multi Receipts Detector V1: - -## List of Receipts -**receipts** (List[[PositionField](#positionfield)]): Positions of the receipts on the document. - -```py -for receipts_elem in result.document.inference.prediction.receipts: - print(receipts_elem.polygon) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/nutrition_facts_v1.md b/docs/extras/guide/nutrition_facts_v1.md deleted file mode 100644 index a09eed95..00000000 --- a/docs/extras/guide/nutrition_facts_v1.md +++ /dev/null @@ -1,377 +0,0 @@ ---- -title: Nutrition Facts Label OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-nutrition-facts-label-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Nutrition Facts Label API](https://platform.mindee.com/mindee/nutrition_facts). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/nutrition_facts/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Nutrition Facts Label sample](https://github.com/mindee/client-lib-test-data/blob/main/products/nutrition_facts/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.NutritionFactsLabelV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 38a12fe0-5d69-4ca4-9b30-12f1b659311c -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/nutrition_facts v1.0 -:Rotation applied: No - -Prediction -========== -:Serving per Box: 2.00 -:Serving Size: - :Amount: 228.00 - :Unit: g -:Calories: - :Daily Value: - :Per 100g: - :Per Serving: 250.00 -:Total Fat: - :Daily Value: - :Per 100g: - :Per Serving: 12.00 -:Saturated Fat: - :Daily Value: 15.00 - :Per 100g: - :Per Serving: 3.00 -:Trans Fat: - :Daily Value: - :Per 100g: - :Per Serving: 3.00 -:Cholesterol: - :Daily Value: 10.00 - :Per 100g: - :Per Serving: 30.00 -:Total Carbohydrate: - :Daily Value: 10.00 - :Per 100g: - :Per Serving: 31.00 -:Dietary Fiber: - :Daily Value: 0.00 - :Per 100g: - :Per Serving: 0.00 -:Total Sugars: - :Daily Value: - :Per 100g: - :Per Serving: 5.00 -:Added Sugars: - :Daily Value: - :Per 100g: - :Per Serving: -:Protein: - :Daily Value: - :Per 100g: - :Per Serving: 5.00 -:sodium: - :Daily Value: 20.00 - :Per 100g: - :Per Serving: 470.00 - :Unit: mg -:nutrients: - +-------------+----------------------+----------+-------------+------+ - | Daily Value | Name | Per 100g | Per Serving | Unit | - +=============+======================+==========+=============+======+ - | 12.00 | Vitamin A | | 4.00 | mcg | - +-------------+----------------------+----------+-------------+------+ - | 12.00 | Vitamin C | | 2.00 | mg | - +-------------+----------------------+----------+-------------+------+ - | 12.00 | Calcium | | 45.60 | mg | - +-------------+----------------------+----------+-------------+------+ - | 12.00 | Iron | | 0.90 | mg | - +-------------+----------------------+----------+-------------+------+ -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### AmountField -The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Added Sugars Field -The amount of added sugars in the product. - -A `NutritionFactsLabelV1AddedSugar` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of added sugars to consume or not to exceed each day. -* **per_100g** (`float`): The amount of added sugars per 100g of the product. -* **per_serving** (`float`): The amount of added sugars per serving of the product. -Fields which are specific to this product; they are not used in any other product. - -### Calories Field -The amount of calories in the product. - -A `NutritionFactsLabelV1Calorie` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of calories to consume or not to exceed each day. -* **per_100g** (`float`): The amount of calories per 100g of the product. -* **per_serving** (`float`): The amount of calories per serving of the product. -Fields which are specific to this product; they are not used in any other product. - -### Cholesterol Field -The amount of cholesterol in the product. - -A `NutritionFactsLabelV1Cholesterol` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of cholesterol to consume or not to exceed each day. -* **per_100g** (`float`): The amount of cholesterol per 100g of the product. -* **per_serving** (`float`): The amount of cholesterol per serving of the product. -Fields which are specific to this product; they are not used in any other product. - -### Dietary Fiber Field -The amount of dietary fiber in the product. - -A `NutritionFactsLabelV1DietaryFiber` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of dietary fiber to consume or not to exceed each day. -* **per_100g** (`float`): The amount of dietary fiber per 100g of the product. -* **per_serving** (`float`): The amount of dietary fiber per serving of the product. -Fields which are specific to this product; they are not used in any other product. - -### nutrients Field -The amount of nutrients in the product. - -A `NutritionFactsLabelV1Nutrient` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of nutrients to consume or not to exceed each day. -* **name** (`str`): The name of nutrients of the product. -* **per_100g** (`float`): The amount of nutrients per 100g of the product. -* **per_serving** (`float`): The amount of nutrients per serving of the product. -* **unit** (`str`): The unit of measurement for the amount of nutrients. -Fields which are specific to this product; they are not used in any other product. - -### Protein Field -The amount of protein in the product. - -A `NutritionFactsLabelV1Protein` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of protein to consume or not to exceed each day. -* **per_100g** (`float`): The amount of protein per 100g of the product. -* **per_serving** (`float`): The amount of protein per serving of the product. -Fields which are specific to this product; they are not used in any other product. - -### Saturated Fat Field -The amount of saturated fat in the product. - -A `NutritionFactsLabelV1SaturatedFat` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of saturated fat to consume or not to exceed each day. -* **per_100g** (`float`): The amount of saturated fat per 100g of the product. -* **per_serving** (`float`): The amount of saturated fat per serving of the product. -Fields which are specific to this product; they are not used in any other product. - -### Serving Size Field -The size of a single serving of the product. - -A `NutritionFactsLabelV1ServingSize` implements the following attributes: - -* **amount** (`float`): The amount of a single serving. -* **unit** (`str`): The unit for the amount of a single serving. -Fields which are specific to this product; they are not used in any other product. - -### sodium Field -The amount of sodium in the product. - -A `NutritionFactsLabelV1Sodium` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of sodium to consume or not to exceed each day. -* **per_100g** (`float`): The amount of sodium per 100g of the product. -* **per_serving** (`float`): The amount of sodium per serving of the product. -* **unit** (`str`): The unit of measurement for the amount of sodium. -Fields which are specific to this product; they are not used in any other product. - -### Total Carbohydrate Field -The total amount of carbohydrates in the product. - -A `NutritionFactsLabelV1TotalCarbohydrate` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of total carbohydrates to consume or not to exceed each day. -* **per_100g** (`float`): The amount of total carbohydrates per 100g of the product. -* **per_serving** (`float`): The amount of total carbohydrates per serving of the product. -Fields which are specific to this product; they are not used in any other product. - -### Total Fat Field -The total amount of fat in the product. - -A `NutritionFactsLabelV1TotalFat` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of total fat to consume or not to exceed each day. -* **per_100g** (`float`): The amount of total fat per 100g of the product. -* **per_serving** (`float`): The amount of total fat per serving of the product. -Fields which are specific to this product; they are not used in any other product. - -### Total Sugars Field -The total amount of sugars in the product. - -A `NutritionFactsLabelV1TotalSugar` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of total sugars to consume or not to exceed each day. -* **per_100g** (`float`): The amount of total sugars per 100g of the product. -* **per_serving** (`float`): The amount of total sugars per serving of the product. -Fields which are specific to this product; they are not used in any other product. - -### Trans Fat Field -The amount of trans fat in the product. - -A `NutritionFactsLabelV1TransFat` implements the following attributes: - -* **daily_value** (`float`): DVs are the recommended amounts of trans fat to consume or not to exceed each day. -* **per_100g** (`float`): The amount of trans fat per 100g of the product. -* **per_serving** (`float`): The amount of trans fat per serving of the product. - -# Attributes -The following fields are extracted for Nutrition Facts Label V1: - -## Added Sugars -**added_sugars** ([NutritionFactsLabelV1AddedSugar](#added-sugars-field)): The amount of added sugars in the product. - -```py -print(result.document.inference.prediction.added_sugars.value) -``` - -## Calories -**calories** ([NutritionFactsLabelV1Calorie](#calories-field)): The amount of calories in the product. - -```py -print(result.document.inference.prediction.calories.value) -``` - -## Cholesterol -**cholesterol** ([NutritionFactsLabelV1Cholesterol](#cholesterol-field)): The amount of cholesterol in the product. - -```py -print(result.document.inference.prediction.cholesterol.value) -``` - -## Dietary Fiber -**dietary_fiber** ([NutritionFactsLabelV1DietaryFiber](#dietary-fiber-field)): The amount of dietary fiber in the product. - -```py -print(result.document.inference.prediction.dietary_fiber.value) -``` - -## nutrients -**nutrients** (List[[NutritionFactsLabelV1Nutrient](#nutrients-field)]): The amount of nutrients in the product. - -```py -for nutrients_elem in result.document.inference.prediction.nutrients: - print(nutrients_elem.value) -``` - -## Protein -**protein** ([NutritionFactsLabelV1Protein](#protein-field)): The amount of protein in the product. - -```py -print(result.document.inference.prediction.protein.value) -``` - -## Saturated Fat -**saturated_fat** ([NutritionFactsLabelV1SaturatedFat](#saturated-fat-field)): The amount of saturated fat in the product. - -```py -print(result.document.inference.prediction.saturated_fat.value) -``` - -## Serving per Box -**serving_per_box** ([AmountField](#amountfield)): The number of servings in each box of the product. - -```py -print(result.document.inference.prediction.serving_per_box.value) -``` - -## Serving Size -**serving_size** ([NutritionFactsLabelV1ServingSize](#serving-size-field)): The size of a single serving of the product. - -```py -print(result.document.inference.prediction.serving_size.value) -``` - -## sodium -**sodium** ([NutritionFactsLabelV1Sodium](#sodium-field)): The amount of sodium in the product. - -```py -print(result.document.inference.prediction.sodium.value) -``` - -## Total Carbohydrate -**total_carbohydrate** ([NutritionFactsLabelV1TotalCarbohydrate](#total-carbohydrate-field)): The total amount of carbohydrates in the product. - -```py -print(result.document.inference.prediction.total_carbohydrate.value) -``` - -## Total Fat -**total_fat** ([NutritionFactsLabelV1TotalFat](#total-fat-field)): The total amount of fat in the product. - -```py -print(result.document.inference.prediction.total_fat.value) -``` - -## Total Sugars -**total_sugars** ([NutritionFactsLabelV1TotalSugar](#total-sugars-field)): The total amount of sugars in the product. - -```py -print(result.document.inference.prediction.total_sugars.value) -``` - -## Trans Fat -**trans_fat** ([NutritionFactsLabelV1TransFat](#trans-fat-field)): The amount of trans fat in the product. - -```py -print(result.document.inference.prediction.trans_fat.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/passport_v1.md b/docs/extras/guide/passport_v1.md deleted file mode 100644 index 6f8dfc5e..00000000 --- a/docs/extras/guide/passport_v1.md +++ /dev/null @@ -1,196 +0,0 @@ ---- -title: Passport OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-passport-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Passport API](https://platform.mindee.com/mindee/passport). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/passport/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Passport sample](https://github.com/mindee/client-lib-test-data/blob/main/products/passport/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, PredictResponse, product - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and parse it. -result: PredictResponse = mindee_client.parse( - product.PassportV1, - input_doc, -) - -# Print a summary of the API result -print(result.document) - -# Print the document-level summary -# print(result.document.inference.prediction) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 18e41f6c-16cd-4f8e-8cd2-00ca02a35764 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/passport v1.0 -:Rotation applied: Yes - -Prediction -========== -:Country Code: GBR -:ID Number: 707797979 -:Given Name(s): HENERT -:Surname: PUDARSAN -:Date of Birth: 1995-05-20 -:Place of Birth: CAMTETH -:Gender: M -:Date of Issue: 2012-04-22 -:Expiry Date: 2017-04-22 -:MRZ Line 1: P **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -# Attributes -The following fields are extracted for Passport V1: - -## Date of Birth -**birth_date** ([DateField](#datefield)): The date of birth of the passport holder. - -```py -print(result.document.inference.prediction.birth_date.value) -``` - -## Place of Birth -**birth_place** ([StringField](#stringfield)): The place of birth of the passport holder. - -```py -print(result.document.inference.prediction.birth_place.value) -``` - -## Country Code -**country** ([StringField](#stringfield)): The country's 3 letter code (ISO 3166-1 alpha-3). - -```py -print(result.document.inference.prediction.country.value) -``` - -## Expiry Date -**expiry_date** ([DateField](#datefield)): The expiry date of the passport. - -```py -print(result.document.inference.prediction.expiry_date.value) -``` - -## Gender -**gender** ([StringField](#stringfield)): The gender of the passport holder. - -```py -print(result.document.inference.prediction.gender.value) -``` - -## Given Name(s) -**given_names** (List[[StringField](#stringfield)]): The given name(s) of the passport holder. - -```py -for given_names_elem in result.document.inference.prediction.given_names: - print(given_names_elem.value) -``` - -## ID Number -**id_number** ([StringField](#stringfield)): The passport's identification number. - -```py -print(result.document.inference.prediction.id_number.value) -``` - -## Date of Issue -**issuance_date** ([DateField](#datefield)): The date the passport was issued. - -```py -print(result.document.inference.prediction.issuance_date.value) -``` - -## MRZ Line 1 -**mrz1** ([StringField](#stringfield)): Machine Readable Zone, first line - -```py -print(result.document.inference.prediction.mrz1.value) -``` - -## MRZ Line 2 -**mrz2** ([StringField](#stringfield)): Machine Readable Zone, second line - -```py -print(result.document.inference.prediction.mrz2.value) -``` - -## Surname -**surname** ([StringField](#stringfield)): The surname of the passport holder. - -```py -print(result.document.inference.prediction.surname.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/payslip_fra_v3.md b/docs/extras/guide/payslip_fra_v3.md deleted file mode 100644 index 4b8da321..00000000 --- a/docs/extras/guide/payslip_fra_v3.md +++ /dev/null @@ -1,321 +0,0 @@ ---- -title: FR Payslip OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-fr-payslip-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Payslip API](https://platform.mindee.com/mindee/payslip_fra). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/payslip_fra/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Payslip sample](https://github.com/mindee/client-lib-test-data/blob/main/products/payslip_fra/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.fr.PayslipV3, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: a479e3e7-6838-4e82-9a7d-99289f34ec7f -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/payslip_fra v3.0 -:Rotation applied: Yes - -Prediction -========== -:Pay Period: - :End Date: 2023-03-31 - :Month: 03 - :Payment Date: 2023-03-29 - :Start Date: 2023-03-01 - :Year: 2023 -:Employee: - :Address: 52 RUE DES FLEURS 33500 LIBOURNE FRANCE - :Date of Birth: - :First Name: Jean Luc - :Last Name: Picard - :Phone Number: - :Registration Number: - :Social Security Number: 123456789012345 -:Employer: - :Address: 1 RUE DU TONNOT 25210 DOUBS - :Company ID: 12345678901234 - :Company Site: - :NAF Code: 1234A - :Name: DEMO COMPANY - :Phone Number: - :URSSAF Number: -:Bank Account Details: - :Bank Name: - :IBAN: - :SWIFT: -:Employment: - :Category: Cadre - :Coefficient: 600,000 - :Collective Agreement: Construction -- Promotion - :Job Title: Directeur Régional du Développement - :Position Level: Niveau 5 Echelon 3 - :Seniority Date: - :Start Date: 2022-05-01 -:Salary Details: - +--------------+-----------+--------------------------------------+--------+-----------+ - | Amount | Base | Description | Number | Rate | - +==============+===========+======================================+========+===========+ - | 6666.67 | | Salaire de base | | | - +--------------+-----------+--------------------------------------+--------+-----------+ - | 9.30 | | Part patronale Mutuelle NR | | | - +--------------+-----------+--------------------------------------+--------+-----------+ - | 508.30 | | Avantages en nature voiture | | | - +--------------+-----------+--------------------------------------+--------+-----------+ -:Pay Detail: - :Gross Salary: 7184.27 - :Gross Salary YTD: 18074.81 - :Income Tax Rate: 17.60 - :Income Tax Withheld: 1030.99 - :Net Paid: 3868.32 - :Net Paid Before Tax: 4899.31 - :Net Taxable: 5857.90 - :Net Taxable YTD: 14752.73 - :Total Cost Employer: 10486.94 - :Total Taxes and Deductions: 1650.36 -:Paid Time Off: - +-----------+--------+-------------+-----------+-----------+ - | Accrued | Period | Type | Remaining | Used | - +===========+========+=============+===========+===========+ - | | N-1 | VACATION | | | - +-----------+--------+-------------+-----------+-----------+ - | 6.17 | N | VACATION | 6.17 | | - +-----------+--------+-------------+-----------+-----------+ - | 2.01 | N | RTT | 2.01 | | - +-----------+--------+-------------+-----------+-----------+ -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Bank Account Details Field -Information about the employee's bank account. - -A `PayslipV3BankAccountDetail` implements the following attributes: - -* **bank_name** (`str`): The name of the bank. -* **iban** (`str`): The IBAN of the bank account. -* **swift** (`str`): The SWIFT code of the bank. -Fields which are specific to this product; they are not used in any other product. - -### Employee Field -Information about the employee. - -A `PayslipV3Employee` implements the following attributes: - -* **address** (`str`): The address of the employee. -* **date_of_birth** (`str`): The date of birth of the employee. -* **first_name** (`str`): The first name of the employee. -* **last_name** (`str`): The last name of the employee. -* **phone_number** (`str`): The phone number of the employee. -* **registration_number** (`str`): The registration number of the employee. -* **social_security_number** (`str`): The social security number of the employee. -Fields which are specific to this product; they are not used in any other product. - -### Employer Field -Information about the employer. - -A `PayslipV3Employer` implements the following attributes: - -* **address** (`str`): The address of the employer. -* **company_id** (`str`): The company ID of the employer. -* **company_site** (`str`): The site of the company. -* **naf_code** (`str`): The NAF code of the employer. -* **name** (`str`): The name of the employer. -* **phone_number** (`str`): The phone number of the employer. -* **urssaf_number** (`str`): The URSSAF number of the employer. -Fields which are specific to this product; they are not used in any other product. - -### Employment Field -Information about the employment. - -A `PayslipV3Employment` implements the following attributes: - -* **category** (`str`): The category of the employment. -* **coefficient** (`str`): The coefficient of the employment. -* **collective_agreement** (`str`): The collective agreement of the employment. -* **job_title** (`str`): The job title of the employee. -* **position_level** (`str`): The position level of the employment. -* **seniority_date** (`str`): The seniority date of the employment. -* **start_date** (`str`): The start date of the employment. -Fields which are specific to this product; they are not used in any other product. - -### Paid Time Off Field -Information about paid time off. - -A `PayslipV3PaidTimeOff` implements the following attributes: - -* **accrued** (`float`): The amount of paid time off accrued in the period. -* **period** (`str`): The paid time off period. - -#### Possible values include: - - N - - N-1 - - N-2 - -* **pto_type** (`str`): The type of paid time off. - -#### Possible values include: - - VACATION - - RTT - - COMPENSATORY - -* **remaining** (`float`): The remaining amount of paid time off at the end of the period. -* **used** (`float`): The amount of paid time off used in the period. -Fields which are specific to this product; they are not used in any other product. - -### Pay Detail Field -Detailed information about the pay. - -A `PayslipV3PayDetail` implements the following attributes: - -* **gross_salary** (`float`): The gross salary of the employee. -* **gross_salary_ytd** (`float`): The year-to-date gross salary of the employee. -* **income_tax_rate** (`float`): The income tax rate of the employee. -* **income_tax_withheld** (`float`): The income tax withheld from the employee's pay. -* **net_paid** (`float`): The net paid amount of the employee. -* **net_paid_before_tax** (`float`): The net paid amount before tax of the employee. -* **net_taxable** (`float`): The net taxable amount of the employee. -* **net_taxable_ytd** (`float`): The year-to-date net taxable amount of the employee. -* **total_cost_employer** (`float`): The total cost to the employer. -* **total_taxes_and_deductions** (`float`): The total taxes and deductions of the employee. -Fields which are specific to this product; they are not used in any other product. - -### Pay Period Field -Information about the pay period. - -A `PayslipV3PayPeriod` implements the following attributes: - -* **end_date** (`str`): The end date of the pay period. -* **month** (`str`): The month of the pay period. -* **payment_date** (`str`): The date of payment for the pay period. -* **start_date** (`str`): The start date of the pay period. -* **year** (`str`): The year of the pay period. -Fields which are specific to this product; they are not used in any other product. - -### Salary Details Field -Detailed information about the earnings. - -A `PayslipV3SalaryDetail` implements the following attributes: - -* **amount** (`float`): The amount of the earning. -* **base** (`float`): The base rate value of the earning. -* **description** (`str`): The description of the earnings. -* **number** (`float`): The number of units in the earning. -* **rate** (`float`): The rate of the earning. - -# Attributes -The following fields are extracted for Payslip V3: - -## Bank Account Details -**bank_account_details** ([PayslipV3BankAccountDetail](#bank-account-details-field)): Information about the employee's bank account. - -```py -print(result.document.inference.prediction.bank_account_details.value) -``` - -## Employee -**employee** ([PayslipV3Employee](#employee-field)): Information about the employee. - -```py -print(result.document.inference.prediction.employee.value) -``` - -## Employer -**employer** ([PayslipV3Employer](#employer-field)): Information about the employer. - -```py -print(result.document.inference.prediction.employer.value) -``` - -## Employment -**employment** ([PayslipV3Employment](#employment-field)): Information about the employment. - -```py -print(result.document.inference.prediction.employment.value) -``` - -## Paid Time Off -**paid_time_off** (List[[PayslipV3PaidTimeOff](#paid-time-off-field)]): Information about paid time off. - -```py -for paid_time_off_elem in result.document.inference.prediction.paid_time_off: - print(paid_time_off_elem.value) -``` - -## Pay Detail -**pay_detail** ([PayslipV3PayDetail](#pay-detail-field)): Detailed information about the pay. - -```py -print(result.document.inference.prediction.pay_detail.value) -``` - -## Pay Period -**pay_period** ([PayslipV3PayPeriod](#pay-period-field)): Information about the pay period. - -```py -print(result.document.inference.prediction.pay_period.value) -``` - -## Salary Details -**salary_details** (List[[PayslipV3SalaryDetail](#salary-details-field)]): Detailed information about the earnings. - -```py -for salary_details_elem in result.document.inference.prediction.salary_details: - print(salary_details_elem.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/resume_v1.md b/docs/extras/guide/resume_v1.md deleted file mode 100644 index 1c629a0f..00000000 --- a/docs/extras/guide/resume_v1.md +++ /dev/null @@ -1,353 +0,0 @@ ---- -title: Resume OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-resume-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Resume API](https://platform.mindee.com/mindee/resume). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/resume/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Resume sample](https://github.com/mindee/client-lib-test-data/blob/main/products/resume/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.ResumeV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 9daa3085-152c-454e-9245-636f13fc9dc3 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/resume v1.1 -:Rotation applied: Yes - -Prediction -========== -:Document Language: ENG -:Document Type: RESUME -:Given Names: Christopher -:Surnames: Morgan -:Nationality: -:Email Address: christoper.m@gmail.com -:Phone Number: +44 (0)20 7666 8555 -:Address: 177 Great Portland Street, London, W5W 6PQ -:Social Networks: - +----------------------+----------------------------------------------------+ - | Name | URL | - +======================+====================================================+ - | LinkedIn | linkedin.com/christopher.morgan | - +----------------------+----------------------------------------------------+ -:Profession: Senior Web Developer -:Job Applied: -:Languages: - +----------+----------------------+ - | Language | Level | - +==========+======================+ - | SPA | Fluent | - +----------+----------------------+ - | ZHO | Beginner | - +----------+----------------------+ - | DEU | Beginner | - +----------+----------------------+ -:Hard Skills: HTML5 - PHP OOP - JavaScript - CSS - MySQL - SQL -:Soft Skills: Project management - Creative design - Strong decision maker - Innovative - Complex problem solver - Service-focused -:Education: - +-----------------+---------------------------+-----------+----------+---------------------------+-------------+------------+ - | Domain | Degree | End Month | End Year | School | Start Month | Start Year | - +=================+===========================+===========+==========+===========================+=============+============+ - | Computer Inf... | Bachelor | | 2014 | Columbia University, NY | | | - +-----------------+---------------------------+-----------+----------+---------------------------+-------------+------------+ -:Professional Experiences: - +-----------------+------------+--------------------------------------+---------------------------+-----------+----------+----------------------+-------------+------------+ - | Contract Type | Department | Description | Employer | End Month | End Year | Role | Start Month | Start Year | - +=================+============+======================================+===========================+===========+==========+======================+=============+============+ - | | | Cooperate with designers to creat... | Luna Web Design, New York | 05 | 2019 | Web Developer | 09 | 2015 | - +-----------------+------------+--------------------------------------+---------------------------+-----------+----------+----------------------+-------------+------------+ -:Certificates: - +------------+--------------------------------+---------------------------+------+ - | Grade | Name | Provider | Year | - +============+================================+===========================+======+ - | | PHP Framework (certificate)... | | | - +------------+--------------------------------+---------------------------+------+ -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - - -### ClassificationField -The classification field `ClassificationField` does not implement all the basic `BaseField` attributes. It only implements **value**, **confidence** and **page_id**. - -> Note: a classification field's `value is always a `str`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Certificates Field -The list of certificates obtained by the candidate. - -A `ResumeV1Certificate` implements the following attributes: - -* **grade** (`str`): The grade obtained for the certificate. -* **name** (`str`): The name of certification. -* **provider** (`str`): The organization or institution that issued the certificate. -* **year** (`str`): The year when a certificate was issued or received. -Fields which are specific to this product; they are not used in any other product. - -### Education Field -The list of the candidate's educational background. - -A `ResumeV1Education` implements the following attributes: - -* **degree_domain** (`str`): The area of study or specialization. -* **degree_type** (`str`): The type of degree obtained, such as Bachelor's, Master's, or Doctorate. -* **end_month** (`str`): The month when the education program or course was completed. -* **end_year** (`str`): The year when the education program or course was completed. -* **school** (`str`): The name of the school. -* **start_month** (`str`): The month when the education program or course began. -* **start_year** (`str`): The year when the education program or course began. -Fields which are specific to this product; they are not used in any other product. - -### Languages Field -The list of languages that the candidate is proficient in. - -A `ResumeV1Language` implements the following attributes: - -* **language** (`str`): The language's ISO 639 code. -* **level** (`str`): The candidate's level for the language. - -#### Possible values include: - - Native - - Fluent - - Proficient - - Intermediate - - Beginner - -Fields which are specific to this product; they are not used in any other product. - -### Professional Experiences Field -The list of the candidate's professional experiences. - -A `ResumeV1ProfessionalExperience` implements the following attributes: - -* **contract_type** (`str`): The type of contract for the professional experience. - -#### Possible values include: - - Full-Time - - Part-Time - - Internship - - Freelance - -* **department** (`str`): The specific department or division within the company. -* **description** (`str`): The description of the professional experience as written in the document. -* **employer** (`str`): The name of the company or organization. -* **end_month** (`str`): The month when the professional experience ended. -* **end_year** (`str`): The year when the professional experience ended. -* **role** (`str`): The position or job title held by the candidate. -* **start_month** (`str`): The month when the professional experience began. -* **start_year** (`str`): The year when the professional experience began. -Fields which are specific to this product; they are not used in any other product. - -### Social Networks Field -The list of social network profiles of the candidate. - -A `ResumeV1SocialNetworksUrl` implements the following attributes: - -* **name** (`str`): The name of the social network. -* **url** (`str`): The URL of the social network. - -# Attributes -The following fields are extracted for Resume V1: - -## Address -**address** ([StringField](#stringfield)): The location information of the candidate, including city, state, and country. - -```py -print(result.document.inference.prediction.address.value) -``` - -## Certificates -**certificates** (List[[ResumeV1Certificate](#certificates-field)]): The list of certificates obtained by the candidate. - -```py -for certificates_elem in result.document.inference.prediction.certificates: - print(certificates_elem.value) -``` - -## Document Language -**document_language** ([StringField](#stringfield)): The ISO 639 code of the language in which the document is written. - -```py -print(result.document.inference.prediction.document_language.value) -``` - -## Document Type -**document_type** ([ClassificationField](#classificationfield)): The type of the document sent. - -#### Possible values include: - - 'RESUME' - - 'MOTIVATION_LETTER' - - 'RECOMMENDATION_LETTER' - -```py -print(result.document.inference.prediction.document_type.value) -``` - -## Education -**education** (List[[ResumeV1Education](#education-field)]): The list of the candidate's educational background. - -```py -for education_elem in result.document.inference.prediction.education: - print(education_elem.value) -``` - -## Email Address -**email_address** ([StringField](#stringfield)): The email address of the candidate. - -```py -print(result.document.inference.prediction.email_address.value) -``` - -## Given Names -**given_names** (List[[StringField](#stringfield)]): The candidate's first or given names. - -```py -for given_names_elem in result.document.inference.prediction.given_names: - print(given_names_elem.value) -``` - -## Hard Skills -**hard_skills** (List[[StringField](#stringfield)]): The list of the candidate's technical abilities and knowledge. - -```py -for hard_skills_elem in result.document.inference.prediction.hard_skills: - print(hard_skills_elem.value) -``` - -## Job Applied -**job_applied** ([StringField](#stringfield)): The position that the candidate is applying for. - -```py -print(result.document.inference.prediction.job_applied.value) -``` - -## Languages -**languages** (List[[ResumeV1Language](#languages-field)]): The list of languages that the candidate is proficient in. - -```py -for languages_elem in result.document.inference.prediction.languages: - print(languages_elem.value) -``` - -## Nationality -**nationality** ([StringField](#stringfield)): The ISO 3166 code for the country of citizenship of the candidate. - -```py -print(result.document.inference.prediction.nationality.value) -``` - -## Phone Number -**phone_number** ([StringField](#stringfield)): The phone number of the candidate. - -```py -print(result.document.inference.prediction.phone_number.value) -``` - -## Profession -**profession** ([StringField](#stringfield)): The candidate's current profession. - -```py -print(result.document.inference.prediction.profession.value) -``` - -## Professional Experiences -**professional_experiences** (List[[ResumeV1ProfessionalExperience](#professional-experiences-field)]): The list of the candidate's professional experiences. - -```py -for professional_experiences_elem in result.document.inference.prediction.professional_experiences: - print(professional_experiences_elem.value) -``` - -## Social Networks -**social_networks_urls** (List[[ResumeV1SocialNetworksUrl](#social-networks-field)]): The list of social network profiles of the candidate. - -```py -for social_networks_urls_elem in result.document.inference.prediction.social_networks_urls: - print(social_networks_urls_elem.value) -``` - -## Soft Skills -**soft_skills** (List[[StringField](#stringfield)]): The list of the candidate's interpersonal and communication abilities. - -```py -for soft_skills_elem in result.document.inference.prediction.soft_skills: - print(soft_skills_elem.value) -``` - -## Surnames -**surnames** (List[[StringField](#stringfield)]): The candidate's last names. - -```py -for surnames_elem in result.document.inference.prediction.surnames: - print(surnames_elem.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/us_healthcare_cards_v1.md b/docs/extras/guide/us_healthcare_cards_v1.md deleted file mode 100644 index 2cac10b2..00000000 --- a/docs/extras/guide/us_healthcare_cards_v1.md +++ /dev/null @@ -1,235 +0,0 @@ ---- -title: US Healthcare Card OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-us-healthcare-card-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [Healthcare Card API](https://platform.mindee.com/mindee/us_healthcare_cards). - -Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/us_healthcare_cards/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. -![Healthcare Card sample](https://github.com/mindee/client-lib-test-data/blob/main/products/us_healthcare_cards/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.us.HealthcareCardV1, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` - -**Output (RST):** -```rst -######## -Document -######## -:Mindee ID: 5e917fc8-5c13-42b2-967f-954f4eed9959 -:Filename: default_sample.jpg - -Inference -######### -:Product: mindee/us_healthcare_cards v1.3 -:Rotation applied: Yes - -Prediction -========== -:Company Name: UnitedHealthcare -:Plan Name: Choice Plus -:Member Name: SUBSCRIBER SMITH -:Member ID: 123456789 -:Issuer 80840: -:Dependents: SPOUSE SMITH - CHILD1 SMITH - CHILD2 SMITH - CHILD3 SMITH -:Group Number: 98765 -:Payer ID: 87726 -:RX BIN: 610279 -:RX ID: -:RX GRP: UHEALTH -:RX PCN: 9999 -:Copays: - +--------------+----------------------+ - | Service Fees | Service Name | - +==============+======================+ - | 20.00 | office_visit | - +--------------+----------------------+ - | 300.00 | emergency_room | - +--------------+----------------------+ - | 75.00 | urgent_care | - +--------------+----------------------+ - | 30.00 | specialist | - +--------------+----------------------+ -:Enrollment Date: -``` - -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### DateField -Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: - -* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Copays Field -Copayments for covered services. - -A `HealthcareCardV1Copay` implements the following attributes: - -* **service_fees** (`float`): The price of the service. -* **service_name** (`str`): The name of the service. - -#### Possible values include: - - primary_care - - emergency_room - - urgent_care - - specialist - - office_visit - - prescription - - -# Attributes -The following fields are extracted for Healthcare Card V1: - -## Company Name -**company_name** ([StringField](#stringfield)): The name of the company that provides the healthcare plan. - -```py -print(result.document.inference.prediction.company_name.value) -``` - -## Copays -**copays** (List[[HealthcareCardV1Copay](#copays-field)]): Copayments for covered services. - -```py -for copays_elem in result.document.inference.prediction.copays: - print(copays_elem.value) -``` - -## Dependents -**dependents** (List[[StringField](#stringfield)]): The list of dependents covered by the healthcare plan. - -```py -for dependents_elem in result.document.inference.prediction.dependents: - print(dependents_elem.value) -``` - -## Enrollment Date -**enrollment_date** ([DateField](#datefield)): The date when the member enrolled in the healthcare plan. - -```py -print(result.document.inference.prediction.enrollment_date.value) -``` - -## Group Number -**group_number** ([StringField](#stringfield)): The group number associated with the healthcare plan. - -```py -print(result.document.inference.prediction.group_number.value) -``` - -## Issuer 80840 -**issuer_80840** ([StringField](#stringfield)): The organization that issued the healthcare plan. - -```py -print(result.document.inference.prediction.issuer_80840.value) -``` - -## Member ID -**member_id** ([StringField](#stringfield)): The unique identifier for the member in the healthcare system. - -```py -print(result.document.inference.prediction.member_id.value) -``` - -## Member Name -**member_name** ([StringField](#stringfield)): The name of the member covered by the healthcare plan. - -```py -print(result.document.inference.prediction.member_name.value) -``` - -## Payer ID -**payer_id** ([StringField](#stringfield)): The unique identifier for the payer in the healthcare system. - -```py -print(result.document.inference.prediction.payer_id.value) -``` - -## Plan Name -**plan_name** ([StringField](#stringfield)): The name of the healthcare plan. - -```py -print(result.document.inference.prediction.plan_name.value) -``` - -## RX BIN -**rx_bin** ([StringField](#stringfield)): The BIN number for prescription drug coverage. - -```py -print(result.document.inference.prediction.rx_bin.value) -``` - -## RX GRP -**rx_grp** ([StringField](#stringfield)): The group number for prescription drug coverage. - -```py -print(result.document.inference.prediction.rx_grp.value) -``` - -## RX ID -**rx_id** ([StringField](#stringfield)): The ID number for prescription drug coverage. - -```py -print(result.document.inference.prediction.rx_id.value) -``` - -## RX PCN -**rx_pcn** ([StringField](#stringfield)): The PCN number for prescription drug coverage. - -```py -print(result.document.inference.prediction.rx_pcn.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/us_mail_v3.md b/docs/extras/guide/us_mail_v3.md deleted file mode 100644 index e0485a7b..00000000 --- a/docs/extras/guide/us_mail_v3.md +++ /dev/null @@ -1,130 +0,0 @@ ---- -title: US US Mail OCR Python -category: 622b805aaec68102ea7fcbc2 -slug: python-us-us-mail-ocr -parentDoc: 609808f773b0b90051d839de ---- -The Python OCR SDK supports the [US Mail API](https://platform.mindee.com/mindee/us_mail). - -The [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/us_mail/default_sample.jpg) can be used for testing purposes. -![US Mail sample](https://github.com/mindee/client-lib-test-data/blob/main/products/us_mail/default_sample.jpg?raw=true) - -# Quick-Start -```py -# -# Install the Python client library by running: -# pip install mindee -# - -from mindee import Client, product, AsyncPredictResponse - -# Init a new client -mindee_client = Client(api_key="my-api-key") - -# Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") - -# Load a file from disk and enqueue it. -result: AsyncPredictResponse = mindee_client.enqueue_and_parse( - product.us.UsMailV3, - input_doc, -) - -# Print a brief summary of the parsed data -print(result.document) - -``` -# Field Types -## Standard Fields -These fields are generic and used in several products. - -### BaseField -Each prediction object contains a set of fields that inherit from the generic `BaseField` class. -A typical `BaseField` object will have the following attributes: - -* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. -* **confidence** (`float`): the confidence score of the field prediction. -* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. -* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. -* **page_id** (`int`): the ID of the page, always `None` when at document-level. -* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). - -> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). - - -Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. - -### StringField -The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. - -## Specific Fields -Fields which are specific to this product; they are not used in any other product. - -### Recipient Addresses Field -The addresses of the recipients. - -A `UsMailV3RecipientAddress` implements the following attributes: - -* **city** (`str`): The city of the recipient's address. -* **complete** (`str`): The complete address of the recipient. -* **is_address_change** (`bool`): Indicates if the recipient's address is a change of address. -* **postal_code** (`str`): The postal code of the recipient's address. -* **private_mailbox_number** (`str`): The private mailbox number of the recipient's address. -* **state** (`str`): Second part of the ISO 3166-2 code, consisting of two letters indicating the US State. -* **street** (`str`): The street of the recipient's address. -* **unit** (`str`): The unit number of the recipient's address. -Fields which are specific to this product; they are not used in any other product. - -### Sender Address Field -The address of the sender. - -A `UsMailV3SenderAddress` implements the following attributes: - -* **city** (`str`): The city of the sender's address. -* **complete** (`str`): The complete address of the sender. -* **postal_code** (`str`): The postal code of the sender's address. -* **state** (`str`): Second part of the ISO 3166-2 code, consisting of two letters indicating the US State. -* **street** (`str`): The street of the sender's address. - -# Attributes -The following fields are extracted for US Mail V3: - -## Return to Sender -**is_return_to_sender** ([BooleanField](#booleanfield)): Whether the mailing is marked as return to sender. - -```py -print(result.document.inference.prediction.is_return_to_sender.value) -``` - -## Recipient Addresses -**recipient_addresses** (List[[UsMailV3RecipientAddress](#recipient-addresses-field)]): The addresses of the recipients. - -```py -for recipient_addresses_elem in result.document.inference.prediction.recipient_addresses: - print(recipient_addresses_elem.value) -``` - -## Recipient Names -**recipient_names** (List[[StringField](#stringfield)]): The names of the recipients. - -```py -for recipient_names_elem in result.document.inference.prediction.recipient_names: - print(recipient_names_elem.value) -``` - -## Sender Address -**sender_address** ([UsMailV3SenderAddress](#sender-address-field)): The address of the sender. - -```py -print(result.document.inference.prediction.sender_address.value) -``` - -## Sender Name -**sender_name** ([StringField](#stringfield)): The name of the sender. - -```py -print(result.document.inference.prediction.sender_name.value) -``` - -# Questions? -[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/tests/data b/tests/data index bc8356c1..7d843db0 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit bc8356c1ce52d60351ed3430d336f33366025012 +Subproject commit 7d843db01df952740d0f2d39f62fc3efb86f92bb diff --git a/tests/api/__init__.py b/tests/v1/__init__.py similarity index 100% rename from tests/api/__init__.py rename to tests/v1/__init__.py diff --git a/tests/extraction/__init__.py b/tests/v1/api/__init__.py similarity index 100% rename from tests/extraction/__init__.py rename to tests/v1/api/__init__.py diff --git a/tests/api/test_async_response.py b/tests/v1/api/test_async_response.py similarity index 100% rename from tests/api/test_async_response.py rename to tests/v1/api/test_async_response.py diff --git a/tests/api/test_feedback_response.py b/tests/v1/api/test_feedback_response.py similarity index 100% rename from tests/api/test_feedback_response.py rename to tests/v1/api/test_feedback_response.py diff --git a/tests/api/test_response.py b/tests/v1/api/test_response.py similarity index 100% rename from tests/api/test_response.py rename to tests/v1/api/test_response.py diff --git a/tests/extras/__init__.py b/tests/v1/extraction/__init__.py similarity index 100% rename from tests/extras/__init__.py rename to tests/v1/extraction/__init__.py diff --git a/tests/extraction/test_image_extractor.py b/tests/v1/extraction/test_image_extractor.py similarity index 100% rename from tests/extraction/test_image_extractor.py rename to tests/v1/extraction/test_image_extractor.py diff --git a/tests/extraction/test_invoice_splitter_auto_extraction.py b/tests/v1/extraction/test_invoice_splitter_auto_extraction.py similarity index 97% rename from tests/extraction/test_invoice_splitter_auto_extraction.py rename to tests/v1/extraction/test_invoice_splitter_auto_extraction.py index ed3bb3a4..e8b537e4 100644 --- a/tests/extraction/test_invoice_splitter_auto_extraction.py +++ b/tests/v1/extraction/test_invoice_splitter_auto_extraction.py @@ -8,8 +8,8 @@ from mindee.parsing.common.document import Document from mindee.product.invoice.invoice_v4 import InvoiceV4 from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR, levenshtein_ratio +from tests.v1.product import get_id, get_version @pytest.fixture diff --git a/tests/extraction/test_multi_receipts_extractor.py b/tests/v1/extraction/test_multi_receipts_extractor.py similarity index 100% rename from tests/extraction/test_multi_receipts_extractor.py rename to tests/v1/extraction/test_multi_receipts_extractor.py diff --git a/tests/extraction/test_pdf_extractor.py b/tests/v1/extraction/test_pdf_extractor.py similarity index 100% rename from tests/extraction/test_pdf_extractor.py rename to tests/v1/extraction/test_pdf_extractor.py diff --git a/tests/fields/__init__.py b/tests/v1/extras/__init__.py similarity index 100% rename from tests/fields/__init__.py rename to tests/v1/extras/__init__.py diff --git a/tests/extras/test_extras_integration.py b/tests/v1/extras/test_extras_integration.py similarity index 100% rename from tests/extras/test_extras_integration.py rename to tests/v1/extras/test_extras_integration.py diff --git a/tests/extras/test_full_text_ocr.py b/tests/v1/extras/test_full_text_ocr.py similarity index 100% rename from tests/extras/test_full_text_ocr.py rename to tests/v1/extras/test_full_text_ocr.py diff --git a/tests/input/__init__.py b/tests/v1/input/__init__.py similarity index 100% rename from tests/input/__init__.py rename to tests/v1/input/__init__.py diff --git a/tests/input/test_apply_page_options.py b/tests/v1/input/test_apply_page_options.py similarity index 100% rename from tests/input/test_apply_page_options.py rename to tests/v1/input/test_apply_page_options.py diff --git a/tests/input/test_compression.py b/tests/v1/input/test_compression.py similarity index 100% rename from tests/input/test_compression.py rename to tests/v1/input/test_compression.py diff --git a/tests/input/test_fix_pdf.py b/tests/v1/input/test_fix_pdf.py similarity index 100% rename from tests/input/test_fix_pdf.py rename to tests/v1/input/test_fix_pdf.py diff --git a/tests/input/test_inputs.py b/tests/v1/input/test_inputs.py similarity index 100% rename from tests/input/test_inputs.py rename to tests/v1/input/test_inputs.py diff --git a/tests/input/test_local_response.py b/tests/v1/input/test_local_response.py similarity index 97% rename from tests/input/test_local_response.py rename to tests/v1/input/test_local_response.py index 5858e2d6..30430980 100644 --- a/tests/input/test_local_response.py +++ b/tests/v1/input/test_local_response.py @@ -3,7 +3,7 @@ import pytest from mindee.input import LocalResponse -from tests.api.test_async_response import ASYNC_DIR +from tests.v1.api.test_async_response import ASYNC_DIR @pytest.fixture diff --git a/tests/input/test_url_input_source_integration.py b/tests/v1/input/test_url_input_source_integration.py similarity index 100% rename from tests/input/test_url_input_source_integration.py rename to tests/v1/input/test_url_input_source_integration.py diff --git a/tests/v1/mindee_http/__init__.py b/tests/v1/mindee_http/__init__.py new file mode 100644 index 00000000..7210b673 --- /dev/null +++ b/tests/v1/mindee_http/__init__.py @@ -0,0 +1 @@ +from tests.v1.mindee_http.test_error import ERROR_DATA_DIR diff --git a/tests/mindee_http/test_error.py b/tests/v1/mindee_http/test_error.py similarity index 98% rename from tests/mindee_http/test_error.py rename to tests/v1/mindee_http/test_error.py index 7ad3c3b7..8dd4f1e5 100644 --- a/tests/mindee_http/test_error.py +++ b/tests/v1/mindee_http/test_error.py @@ -10,8 +10,8 @@ handle_error, ) from mindee.input.sources.path_input import PathInput -from tests.input.test_inputs import FILE_TYPES_DIR from tests.utils import clear_envvars, dummy_envvars +from tests.v1.input.test_inputs import FILE_TYPES_DIR ERROR_DATA_DIR = Path("./tests/data/errors") diff --git a/tests/mindee_http/__init__.py b/tests/v1/parsing/__init__.py similarity index 100% rename from tests/mindee_http/__init__.py rename to tests/v1/parsing/__init__.py diff --git a/tests/product/barcode_reader/__init__.py b/tests/v1/parsing/common/__init__.py similarity index 100% rename from tests/product/barcode_reader/__init__.py rename to tests/v1/parsing/common/__init__.py diff --git a/tests/fields/test_ocr.py b/tests/v1/parsing/common/test_ocr.py similarity index 100% rename from tests/fields/test_ocr.py rename to tests/v1/parsing/common/test_ocr.py diff --git a/tests/fields/test_orientation.py b/tests/v1/parsing/common/test_orientation.py similarity index 100% rename from tests/fields/test_orientation.py rename to tests/v1/parsing/common/test_orientation.py diff --git a/tests/product/bill_of_lading/__init__.py b/tests/v1/parsing/standard/__init__.py similarity index 100% rename from tests/product/bill_of_lading/__init__.py rename to tests/v1/parsing/standard/__init__.py diff --git a/tests/fields/test_amount.py b/tests/v1/parsing/standard/test_amount.py similarity index 100% rename from tests/fields/test_amount.py rename to tests/v1/parsing/standard/test_amount.py diff --git a/tests/fields/test_date.py b/tests/v1/parsing/standard/test_date.py similarity index 100% rename from tests/fields/test_date.py rename to tests/v1/parsing/standard/test_date.py diff --git a/tests/fields/test_field.py b/tests/v1/parsing/standard/test_field.py similarity index 100% rename from tests/fields/test_field.py rename to tests/v1/parsing/standard/test_field.py diff --git a/tests/fields/test_locale.py b/tests/v1/parsing/standard/test_locale.py similarity index 100% rename from tests/fields/test_locale.py rename to tests/v1/parsing/standard/test_locale.py diff --git a/tests/fields/test_payment_details.py b/tests/v1/parsing/standard/test_payment_details.py similarity index 100% rename from tests/fields/test_payment_details.py rename to tests/v1/parsing/standard/test_payment_details.py diff --git a/tests/fields/test_position.py b/tests/v1/parsing/standard/test_position.py similarity index 100% rename from tests/fields/test_position.py rename to tests/v1/parsing/standard/test_position.py diff --git a/tests/fields/test_text.py b/tests/v1/parsing/standard/test_string.py similarity index 100% rename from tests/fields/test_text.py rename to tests/v1/parsing/standard/test_string.py diff --git a/tests/fields/test_tax.py b/tests/v1/parsing/standard/test_tax.py similarity index 100% rename from tests/fields/test_tax.py rename to tests/v1/parsing/standard/test_tax.py diff --git a/tests/product/__init__.py b/tests/v1/product/__init__.py similarity index 100% rename from tests/product/__init__.py rename to tests/v1/product/__init__.py diff --git a/tests/product/business_card/__init__.py b/tests/v1/product/barcode_reader/__init__.py similarity index 100% rename from tests/product/business_card/__init__.py rename to tests/v1/product/barcode_reader/__init__.py diff --git a/tests/product/barcode_reader/test_barcode_reader_v1.py b/tests/v1/product/barcode_reader/test_barcode_reader_v1.py similarity index 100% rename from tests/product/barcode_reader/test_barcode_reader_v1.py rename to tests/v1/product/barcode_reader/test_barcode_reader_v1.py diff --git a/tests/product/barcode_reader/test_barcode_reader_v1_regression.py b/tests/v1/product/barcode_reader/test_barcode_reader_v1_regression.py similarity index 93% rename from tests/product/barcode_reader/test_barcode_reader_v1_regression.py rename to tests/v1/product/barcode_reader/test_barcode_reader_v1_regression.py index dc162613..af32bcf9 100644 --- a/tests/product/barcode_reader/test_barcode_reader_v1_regression.py +++ b/tests/v1/product/barcode_reader/test_barcode_reader_v1_regression.py @@ -2,8 +2,8 @@ from mindee.client import Client from mindee.product.barcode_reader.barcode_reader_v1 import BarcodeReaderV1 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/cropper/__init__.py b/tests/v1/product/bill_of_lading/__init__.py similarity index 100% rename from tests/product/cropper/__init__.py rename to tests/v1/product/bill_of_lading/__init__.py diff --git a/tests/product/bill_of_lading/test_bill_of_lading_v1.py b/tests/v1/product/bill_of_lading/test_bill_of_lading_v1.py similarity index 100% rename from tests/product/bill_of_lading/test_bill_of_lading_v1.py rename to tests/v1/product/bill_of_lading/test_bill_of_lading_v1.py diff --git a/tests/product/custom/__init__.py b/tests/v1/product/business_card/__init__.py similarity index 100% rename from tests/product/custom/__init__.py rename to tests/v1/product/business_card/__init__.py diff --git a/tests/product/business_card/test_business_card_v1.py b/tests/v1/product/business_card/test_business_card_v1.py similarity index 100% rename from tests/product/business_card/test_business_card_v1.py rename to tests/v1/product/business_card/test_business_card_v1.py diff --git a/tests/product/delivery_note/__init__.py b/tests/v1/product/cropper/__init__.py similarity index 100% rename from tests/product/delivery_note/__init__.py rename to tests/v1/product/cropper/__init__.py diff --git a/tests/product/cropper/test_cropper_v1.py b/tests/v1/product/cropper/test_cropper_v1.py similarity index 100% rename from tests/product/cropper/test_cropper_v1.py rename to tests/v1/product/cropper/test_cropper_v1.py diff --git a/tests/product/cropper/test_cropper_v1_regression.py b/tests/v1/product/cropper/test_cropper_v1_regression.py similarity index 93% rename from tests/product/cropper/test_cropper_v1_regression.py rename to tests/v1/product/cropper/test_cropper_v1_regression.py index 7c37795c..aca34886 100644 --- a/tests/product/cropper/test_cropper_v1_regression.py +++ b/tests/v1/product/cropper/test_cropper_v1_regression.py @@ -2,8 +2,8 @@ from mindee.client import Client from mindee.product.cropper.cropper_v1 import CropperV1 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/driver_license/__init__.py b/tests/v1/product/custom/__init__.py similarity index 100% rename from tests/product/driver_license/__init__.py rename to tests/v1/product/custom/__init__.py diff --git a/tests/product/custom/test_custom_v1.py b/tests/v1/product/custom/test_custom_v1.py similarity index 100% rename from tests/product/custom/test_custom_v1.py rename to tests/v1/product/custom/test_custom_v1.py diff --git a/tests/product/custom/test_custom_v1_line_items.py b/tests/v1/product/custom/test_custom_v1_line_items.py similarity index 100% rename from tests/product/custom/test_custom_v1_line_items.py rename to tests/v1/product/custom/test_custom_v1_line_items.py diff --git a/tests/product/custom/test_custom_v1_v2.py b/tests/v1/product/custom/test_custom_v1_v2.py similarity index 100% rename from tests/product/custom/test_custom_v1_v2.py rename to tests/v1/product/custom/test_custom_v1_v2.py diff --git a/tests/product/financial_document/__init__.py b/tests/v1/product/delivery_note/__init__.py similarity index 100% rename from tests/product/financial_document/__init__.py rename to tests/v1/product/delivery_note/__init__.py diff --git a/tests/product/delivery_note/test_delivery_note_v1.py b/tests/v1/product/delivery_note/test_delivery_note_v1.py similarity index 100% rename from tests/product/delivery_note/test_delivery_note_v1.py rename to tests/v1/product/delivery_note/test_delivery_note_v1.py diff --git a/tests/product/fr/__init__.py b/tests/v1/product/driver_license/__init__.py similarity index 100% rename from tests/product/fr/__init__.py rename to tests/v1/product/driver_license/__init__.py diff --git a/tests/product/driver_license/test_driver_license_v1.py b/tests/v1/product/driver_license/test_driver_license_v1.py similarity index 100% rename from tests/product/driver_license/test_driver_license_v1.py rename to tests/v1/product/driver_license/test_driver_license_v1.py diff --git a/tests/product/fr/bank_account_details/__init__.py b/tests/v1/product/financial_document/__init__.py similarity index 100% rename from tests/product/fr/bank_account_details/__init__.py rename to tests/v1/product/financial_document/__init__.py diff --git a/tests/product/financial_document/test_financial_document_v1.py b/tests/v1/product/financial_document/test_financial_document_v1.py similarity index 100% rename from tests/product/financial_document/test_financial_document_v1.py rename to tests/v1/product/financial_document/test_financial_document_v1.py diff --git a/tests/product/financial_document/test_financial_document_v1_regression.py b/tests/v1/product/financial_document/test_financial_document_v1_regression.py similarity index 94% rename from tests/product/financial_document/test_financial_document_v1_regression.py rename to tests/v1/product/financial_document/test_financial_document_v1_regression.py index ededb12a..da3195a9 100644 --- a/tests/product/financial_document/test_financial_document_v1_regression.py +++ b/tests/v1/product/financial_document/test_financial_document_v1_regression.py @@ -2,8 +2,8 @@ from mindee.client import Client from mindee.product.financial_document.financial_document_v1 import FinancialDocumentV1 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/fr/carte_grise/__init__.py b/tests/v1/product/fr/__init__.py similarity index 100% rename from tests/product/fr/carte_grise/__init__.py rename to tests/v1/product/fr/__init__.py diff --git a/tests/product/fr/energy_bill/__init__.py b/tests/v1/product/fr/bank_account_details/__init__.py similarity index 100% rename from tests/product/fr/energy_bill/__init__.py rename to tests/v1/product/fr/bank_account_details/__init__.py diff --git a/tests/product/fr/bank_account_details/test_bank_account_details_v1.py b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1.py similarity index 100% rename from tests/product/fr/bank_account_details/test_bank_account_details_v1.py rename to tests/v1/product/fr/bank_account_details/test_bank_account_details_v1.py diff --git a/tests/product/fr/bank_account_details/test_bank_account_details_v1_regression.py b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1_regression.py similarity index 94% rename from tests/product/fr/bank_account_details/test_bank_account_details_v1_regression.py rename to tests/v1/product/fr/bank_account_details/test_bank_account_details_v1_regression.py index ee9505c5..8f1462b8 100644 --- a/tests/product/fr/bank_account_details/test_bank_account_details_v1_regression.py +++ b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1_regression.py @@ -4,8 +4,8 @@ from mindee.product.fr.bank_account_details.bank_account_details_v1 import ( BankAccountDetailsV1, ) -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/fr/bank_account_details/test_bank_account_details_v2.py b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2.py similarity index 100% rename from tests/product/fr/bank_account_details/test_bank_account_details_v2.py rename to tests/v1/product/fr/bank_account_details/test_bank_account_details_v2.py diff --git a/tests/product/fr/bank_account_details/test_bank_account_details_v2_regression.py b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2_regression.py similarity index 94% rename from tests/product/fr/bank_account_details/test_bank_account_details_v2_regression.py rename to tests/v1/product/fr/bank_account_details/test_bank_account_details_v2_regression.py index a62d0729..3decfeeb 100644 --- a/tests/product/fr/bank_account_details/test_bank_account_details_v2_regression.py +++ b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2_regression.py @@ -4,8 +4,8 @@ from mindee.product.fr.bank_account_details.bank_account_details_v2 import ( BankAccountDetailsV2, ) -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/fr/health_card/__init__.py b/tests/v1/product/fr/carte_grise/__init__.py similarity index 100% rename from tests/product/fr/health_card/__init__.py rename to tests/v1/product/fr/carte_grise/__init__.py diff --git a/tests/product/fr/carte_grise/test_carte_grise_v1.py b/tests/v1/product/fr/carte_grise/test_carte_grise_v1.py similarity index 100% rename from tests/product/fr/carte_grise/test_carte_grise_v1.py rename to tests/v1/product/fr/carte_grise/test_carte_grise_v1.py diff --git a/tests/product/fr/carte_grise/test_carte_grise_v1_regression.py b/tests/v1/product/fr/carte_grise/test_carte_grise_v1_regression.py similarity index 93% rename from tests/product/fr/carte_grise/test_carte_grise_v1_regression.py rename to tests/v1/product/fr/carte_grise/test_carte_grise_v1_regression.py index 27c721a0..369f6aa5 100644 --- a/tests/product/fr/carte_grise/test_carte_grise_v1_regression.py +++ b/tests/v1/product/fr/carte_grise/test_carte_grise_v1_regression.py @@ -2,8 +2,8 @@ from mindee.client import Client from mindee.product.fr.carte_grise.carte_grise_v1 import CarteGriseV1 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/fr/id_card/__init__.py b/tests/v1/product/fr/energy_bill/__init__.py similarity index 100% rename from tests/product/fr/id_card/__init__.py rename to tests/v1/product/fr/energy_bill/__init__.py diff --git a/tests/product/fr/energy_bill/test_energy_bill_v1.py b/tests/v1/product/fr/energy_bill/test_energy_bill_v1.py similarity index 100% rename from tests/product/fr/energy_bill/test_energy_bill_v1.py rename to tests/v1/product/fr/energy_bill/test_energy_bill_v1.py diff --git a/tests/product/fr/payslip/__init__.py b/tests/v1/product/fr/health_card/__init__.py similarity index 100% rename from tests/product/fr/payslip/__init__.py rename to tests/v1/product/fr/health_card/__init__.py diff --git a/tests/product/fr/health_card/test_health_card_v1.py b/tests/v1/product/fr/health_card/test_health_card_v1.py similarity index 100% rename from tests/product/fr/health_card/test_health_card_v1.py rename to tests/v1/product/fr/health_card/test_health_card_v1.py diff --git a/tests/product/generated/__init__.py b/tests/v1/product/fr/id_card/__init__.py similarity index 100% rename from tests/product/generated/__init__.py rename to tests/v1/product/fr/id_card/__init__.py diff --git a/tests/product/fr/id_card/test_id_card_v1.py b/tests/v1/product/fr/id_card/test_id_card_v1.py similarity index 100% rename from tests/product/fr/id_card/test_id_card_v1.py rename to tests/v1/product/fr/id_card/test_id_card_v1.py diff --git a/tests/product/fr/id_card/test_id_card_v1_regression.py b/tests/v1/product/fr/id_card/test_id_card_v1_regression.py similarity index 93% rename from tests/product/fr/id_card/test_id_card_v1_regression.py rename to tests/v1/product/fr/id_card/test_id_card_v1_regression.py index a75d4ea1..000aeebf 100644 --- a/tests/product/fr/id_card/test_id_card_v1_regression.py +++ b/tests/v1/product/fr/id_card/test_id_card_v1_regression.py @@ -2,8 +2,8 @@ from mindee.client import Client from mindee.product.fr.id_card.id_card_v1 import IdCardV1 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/fr/id_card/test_id_card_v2.py b/tests/v1/product/fr/id_card/test_id_card_v2.py similarity index 100% rename from tests/product/fr/id_card/test_id_card_v2.py rename to tests/v1/product/fr/id_card/test_id_card_v2.py diff --git a/tests/product/fr/id_card/test_id_card_v2_regression.py b/tests/v1/product/fr/id_card/test_id_card_v2_regression.py similarity index 93% rename from tests/product/fr/id_card/test_id_card_v2_regression.py rename to tests/v1/product/fr/id_card/test_id_card_v2_regression.py index f8835de8..b3631a53 100644 --- a/tests/product/fr/id_card/test_id_card_v2_regression.py +++ b/tests/v1/product/fr/id_card/test_id_card_v2_regression.py @@ -2,8 +2,8 @@ from mindee.client import Client from mindee.product.fr.id_card.id_card_v2 import IdCardV2 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/ind/__init__.py b/tests/v1/product/fr/payslip/__init__.py similarity index 100% rename from tests/product/ind/__init__.py rename to tests/v1/product/fr/payslip/__init__.py diff --git a/tests/product/fr/payslip/test_payslip_v2.py b/tests/v1/product/fr/payslip/test_payslip_v2.py similarity index 100% rename from tests/product/fr/payslip/test_payslip_v2.py rename to tests/v1/product/fr/payslip/test_payslip_v2.py diff --git a/tests/product/fr/payslip/test_payslip_v3.py b/tests/v1/product/fr/payslip/test_payslip_v3.py similarity index 100% rename from tests/product/fr/payslip/test_payslip_v3.py rename to tests/v1/product/fr/payslip/test_payslip_v3.py diff --git a/tests/product/ind/indian_passport/__init__.py b/tests/v1/product/generated/__init__.py similarity index 100% rename from tests/product/ind/indian_passport/__init__.py rename to tests/v1/product/generated/__init__.py diff --git a/tests/product/generated/test_generated_v1.py b/tests/v1/product/generated/test_generated_v1.py similarity index 100% rename from tests/product/generated/test_generated_v1.py rename to tests/v1/product/generated/test_generated_v1.py diff --git a/tests/product/international_id/__init__.py b/tests/v1/product/ind/__init__.py similarity index 100% rename from tests/product/international_id/__init__.py rename to tests/v1/product/ind/__init__.py diff --git a/tests/product/invoice/__init__.py b/tests/v1/product/ind/indian_passport/__init__.py similarity index 100% rename from tests/product/invoice/__init__.py rename to tests/v1/product/ind/indian_passport/__init__.py diff --git a/tests/product/ind/indian_passport/test_indian_passport_v1.py b/tests/v1/product/ind/indian_passport/test_indian_passport_v1.py similarity index 100% rename from tests/product/ind/indian_passport/test_indian_passport_v1.py rename to tests/v1/product/ind/indian_passport/test_indian_passport_v1.py diff --git a/tests/product/invoice_splitter/__init__.py b/tests/v1/product/international_id/__init__.py similarity index 100% rename from tests/product/invoice_splitter/__init__.py rename to tests/v1/product/international_id/__init__.py diff --git a/tests/product/international_id/test_international_id_v2.py b/tests/v1/product/international_id/test_international_id_v2.py similarity index 100% rename from tests/product/international_id/test_international_id_v2.py rename to tests/v1/product/international_id/test_international_id_v2.py diff --git a/tests/product/material_certificate/__init__.py b/tests/v1/product/invoice/__init__.py similarity index 100% rename from tests/product/material_certificate/__init__.py rename to tests/v1/product/invoice/__init__.py diff --git a/tests/product/invoice/test_invoice_v4.py b/tests/v1/product/invoice/test_invoice_v4.py similarity index 100% rename from tests/product/invoice/test_invoice_v4.py rename to tests/v1/product/invoice/test_invoice_v4.py diff --git a/tests/product/invoice/test_invoice_v4_regression.py b/tests/v1/product/invoice/test_invoice_v4_regression.py similarity index 93% rename from tests/product/invoice/test_invoice_v4_regression.py rename to tests/v1/product/invoice/test_invoice_v4_regression.py index cc24a98d..67516fd3 100644 --- a/tests/product/invoice/test_invoice_v4_regression.py +++ b/tests/v1/product/invoice/test_invoice_v4_regression.py @@ -2,8 +2,8 @@ from mindee.client import Client from mindee.product.invoice.invoice_v4 import InvoiceV4 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/multi_receipts_detector/__init__.py b/tests/v1/product/invoice_splitter/__init__.py similarity index 100% rename from tests/product/multi_receipts_detector/__init__.py rename to tests/v1/product/invoice_splitter/__init__.py diff --git a/tests/product/invoice_splitter/test_invoice_splitter_v1.py b/tests/v1/product/invoice_splitter/test_invoice_splitter_v1.py similarity index 100% rename from tests/product/invoice_splitter/test_invoice_splitter_v1.py rename to tests/v1/product/invoice_splitter/test_invoice_splitter_v1.py diff --git a/tests/product/invoice_splitter/test_invoice_splitter_v1_regression.py b/tests/v1/product/invoice_splitter/test_invoice_splitter_v1_regression.py similarity index 94% rename from tests/product/invoice_splitter/test_invoice_splitter_v1_regression.py rename to tests/v1/product/invoice_splitter/test_invoice_splitter_v1_regression.py index 5782f13d..dc00837b 100644 --- a/tests/product/invoice_splitter/test_invoice_splitter_v1_regression.py +++ b/tests/v1/product/invoice_splitter/test_invoice_splitter_v1_regression.py @@ -2,8 +2,8 @@ from mindee.client import Client from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/nutrition_facts_label/__init__.py b/tests/v1/product/material_certificate/__init__.py similarity index 100% rename from tests/product/nutrition_facts_label/__init__.py rename to tests/v1/product/material_certificate/__init__.py diff --git a/tests/product/material_certificate/test_material_certificate_v1.py b/tests/v1/product/material_certificate/test_material_certificate_v1.py similarity index 100% rename from tests/product/material_certificate/test_material_certificate_v1.py rename to tests/v1/product/material_certificate/test_material_certificate_v1.py diff --git a/tests/product/passport/__init__.py b/tests/v1/product/multi_receipts_detector/__init__.py similarity index 100% rename from tests/product/passport/__init__.py rename to tests/v1/product/multi_receipts_detector/__init__.py diff --git a/tests/product/multi_receipts_detector/test_multi_receipts_detector_v1.py b/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1.py similarity index 100% rename from tests/product/multi_receipts_detector/test_multi_receipts_detector_v1.py rename to tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1.py diff --git a/tests/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py b/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py similarity index 94% rename from tests/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py rename to tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py index 87941071..6226a4db 100644 --- a/tests/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py +++ b/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py @@ -4,8 +4,8 @@ from mindee.product.multi_receipts_detector.multi_receipts_detector_v1 import ( MultiReceiptsDetectorV1, ) -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/receipt/__init__.py b/tests/v1/product/nutrition_facts_label/__init__.py similarity index 100% rename from tests/product/receipt/__init__.py rename to tests/v1/product/nutrition_facts_label/__init__.py diff --git a/tests/product/nutrition_facts_label/test_nutrition_facts_label_v1.py b/tests/v1/product/nutrition_facts_label/test_nutrition_facts_label_v1.py similarity index 100% rename from tests/product/nutrition_facts_label/test_nutrition_facts_label_v1.py rename to tests/v1/product/nutrition_facts_label/test_nutrition_facts_label_v1.py diff --git a/tests/product/resume/__init__.py b/tests/v1/product/passport/__init__.py similarity index 100% rename from tests/product/resume/__init__.py rename to tests/v1/product/passport/__init__.py diff --git a/tests/product/passport/test_passport_v1.py b/tests/v1/product/passport/test_passport_v1.py similarity index 100% rename from tests/product/passport/test_passport_v1.py rename to tests/v1/product/passport/test_passport_v1.py diff --git a/tests/product/passport/test_passport_v1_regression.py b/tests/v1/product/passport/test_passport_v1_regression.py similarity index 93% rename from tests/product/passport/test_passport_v1_regression.py rename to tests/v1/product/passport/test_passport_v1_regression.py index 98c7e708..515762b7 100644 --- a/tests/product/passport/test_passport_v1_regression.py +++ b/tests/v1/product/passport/test_passport_v1_regression.py @@ -2,8 +2,8 @@ from mindee.client import Client from mindee.product.passport.passport_v1 import PassportV1 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/us/__init__.py b/tests/v1/product/receipt/__init__.py similarity index 100% rename from tests/product/us/__init__.py rename to tests/v1/product/receipt/__init__.py diff --git a/tests/product/receipt/test_receipt_v5.py b/tests/v1/product/receipt/test_receipt_v5.py similarity index 100% rename from tests/product/receipt/test_receipt_v5.py rename to tests/v1/product/receipt/test_receipt_v5.py diff --git a/tests/product/receipt/test_receipt_v5_regression.py b/tests/v1/product/receipt/test_receipt_v5_regression.py similarity index 93% rename from tests/product/receipt/test_receipt_v5_regression.py rename to tests/v1/product/receipt/test_receipt_v5_regression.py index c10049e3..c0109a0f 100644 --- a/tests/product/receipt/test_receipt_v5_regression.py +++ b/tests/v1/product/receipt/test_receipt_v5_regression.py @@ -2,8 +2,8 @@ from mindee.client import Client from mindee.product.receipt.receipt_v5 import ReceiptV5 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/product/us/bank_check/__init__.py b/tests/v1/product/resume/__init__.py similarity index 100% rename from tests/product/us/bank_check/__init__.py rename to tests/v1/product/resume/__init__.py diff --git a/tests/product/resume/test_resume_v1.py b/tests/v1/product/resume/test_resume_v1.py similarity index 100% rename from tests/product/resume/test_resume_v1.py rename to tests/v1/product/resume/test_resume_v1.py diff --git a/tests/product/us/healthcare_card/__init__.py b/tests/v1/product/us/__init__.py similarity index 100% rename from tests/product/us/healthcare_card/__init__.py rename to tests/v1/product/us/__init__.py diff --git a/tests/product/us/us_mail/__init__.py b/tests/v1/product/us/bank_check/__init__.py similarity index 100% rename from tests/product/us/us_mail/__init__.py rename to tests/v1/product/us/bank_check/__init__.py diff --git a/tests/product/us/bank_check/test_bank_check_v1.py b/tests/v1/product/us/bank_check/test_bank_check_v1.py similarity index 100% rename from tests/product/us/bank_check/test_bank_check_v1.py rename to tests/v1/product/us/bank_check/test_bank_check_v1.py diff --git a/tests/product/us/bank_check/test_bank_check_v1_regression.py b/tests/v1/product/us/bank_check/test_bank_check_v1_regression.py similarity index 93% rename from tests/product/us/bank_check/test_bank_check_v1_regression.py rename to tests/v1/product/us/bank_check/test_bank_check_v1_regression.py index 2f232b2c..42f12731 100644 --- a/tests/product/us/bank_check/test_bank_check_v1_regression.py +++ b/tests/v1/product/us/bank_check/test_bank_check_v1_regression.py @@ -2,8 +2,8 @@ from mindee.client import Client from mindee.product.us.bank_check.bank_check_v1 import BankCheckV1 -from tests.product import get_id, get_version from tests.utils import PRODUCT_DATA_DIR +from tests.v1.product import get_id, get_version @pytest.mark.regression diff --git a/tests/workflows/__init__.py b/tests/v1/product/us/healthcare_card/__init__.py similarity index 100% rename from tests/workflows/__init__.py rename to tests/v1/product/us/healthcare_card/__init__.py diff --git a/tests/product/us/healthcare_card/test_healthcare_card_v1.py b/tests/v1/product/us/healthcare_card/test_healthcare_card_v1.py similarity index 100% rename from tests/product/us/healthcare_card/test_healthcare_card_v1.py rename to tests/v1/product/us/healthcare_card/test_healthcare_card_v1.py diff --git a/tests/v1/product/us/us_mail/__init__.py b/tests/v1/product/us/us_mail/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/product/us/us_mail/test_us_mail_v3.py b/tests/v1/product/us/us_mail/test_us_mail_v3.py similarity index 100% rename from tests/product/us/us_mail/test_us_mail_v3.py rename to tests/v1/product/us/us_mail/test_us_mail_v3.py diff --git a/tests/test_cli.py b/tests/v1/test_cli.py similarity index 100% rename from tests/test_cli.py rename to tests/v1/test_cli.py diff --git a/tests/test_client.py b/tests/v1/test_client.py similarity index 99% rename from tests/test_client.py rename to tests/v1/test_client.py index 3dd5f7b7..bf5eac0a 100644 --- a/tests/test_client.py +++ b/tests/v1/test_client.py @@ -12,8 +12,8 @@ from mindee.product.invoice.invoice_v4 import InvoiceV4 from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 from mindee.product.receipt.receipt_v5 import ReceiptV5 -from tests.mindee_http.test_error import ERROR_DATA_DIR from tests.utils import FILE_TYPES_DIR, PRODUCT_DATA_DIR, clear_envvars, dummy_envvars +from tests.v1.mindee_http import ERROR_DATA_DIR @pytest.fixture diff --git a/tests/v1/workflows/__init__.py b/tests/v1/workflows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/workflows/test_workflow.py b/tests/v1/workflows/test_workflow.py similarity index 100% rename from tests/workflows/test_workflow.py rename to tests/v1/workflows/test_workflow.py diff --git a/tests/workflows/test_workflow_integration.py b/tests/v1/workflows/test_workflow_integration.py similarity index 100% rename from tests/workflows/test_workflow_integration.py rename to tests/v1/workflows/test_workflow_integration.py diff --git a/tests/test_client_v2.py b/tests/v2/test_client_v2.py similarity index 100% rename from tests/test_client_v2.py rename to tests/v2/test_client_v2.py diff --git a/tests/test_client_v2_integration.py b/tests/v2/test_client_v2_integration.py similarity index 100% rename from tests/test_client_v2_integration.py rename to tests/v2/test_client_v2_integration.py From 26ca48eb429d98cddd7452c1311a9f03e47c5a87 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 30 Oct 2025 16:49:33 +0100 Subject: [PATCH 2/3] re-organize tests --- tests/utils.py | 12 +++--- tests/v1/api/test_async_response.py | 7 ++- tests/v1/api/test_feedback_response.py | 3 +- tests/v1/api/test_response.py | 14 +++--- tests/v1/extraction/test_image_extractor.py | 6 +-- .../test_invoice_splitter_auto_extraction.py | 11 +++-- .../test_multi_receipts_extractor.py | 13 +++--- tests/v1/extraction/test_pdf_extractor.py | 8 ++-- tests/v1/extras/test_extras_integration.py | 6 +-- tests/v1/input/test_apply_page_options.py | 6 +-- tests/v1/input/test_compression.py | 43 +++++++++---------- tests/v1/mindee_http/__init__.py | 2 +- tests/v1/mindee_http/test_error.py | 15 +++---- tests/v1/parsing/common/test_ocr.py | 5 ++- .../barcode_reader/test_barcode_reader_v1.py | 4 +- .../test_barcode_reader_v1_regression.py | 6 +-- .../bill_of_lading/test_bill_of_lading_v1.py | 4 +- .../business_card/test_business_card_v1.py | 4 +- tests/v1/product/cropper/test_cropper_v1.py | 4 +- .../cropper/test_cropper_v1_regression.py | 6 +-- tests/v1/product/custom/test_custom_v1.py | 16 +++---- .../custom/test_custom_v1_line_items.py | 13 +++++- tests/v1/product/custom/test_custom_v1_v2.py | 16 +++---- .../delivery_note/test_delivery_note_v1.py | 4 +- .../driver_license/test_driver_license_v1.py | 4 +- .../test_financial_document_v1.py | 4 +- .../test_financial_document_v1_regression.py | 9 ++-- .../test_bank_account_details_v1.py | 4 +- ...test_bank_account_details_v1_regression.py | 6 +-- .../test_bank_account_details_v2.py | 4 +- ...test_bank_account_details_v2_regression.py | 6 +-- .../fr/carte_grise/test_carte_grise_v1.py | 4 +- .../test_carte_grise_v1_regression.py | 6 +-- .../fr/energy_bill/test_energy_bill_v1.py | 4 +- .../fr/health_card/test_health_card_v1.py | 4 +- .../v1/product/fr/id_card/test_id_card_v1.py | 4 +- .../fr/id_card/test_id_card_v1_regression.py | 6 +-- .../v1/product/fr/id_card/test_id_card_v2.py | 4 +- .../fr/id_card/test_id_card_v2_regression.py | 6 +-- .../v1/product/fr/payslip/test_payslip_v2.py | 4 +- .../v1/product/fr/payslip/test_payslip_v3.py | 4 +- .../v1/product/generated/test_generated_v1.py | 37 +++++++++++----- .../test_indian_passport_v1.py | 4 +- .../test_international_id_v2.py | 4 +- tests/v1/product/invoice/test_invoice_v4.py | 4 +- .../invoice/test_invoice_v4_regression.py | 6 +-- .../test_invoice_splitter_v1.py | 4 +- .../test_invoice_splitter_v1_regression.py | 6 +-- .../test_material_certificate_v1.py | 4 +- .../test_multi_receipts_detector_v1.py | 4 +- ...t_multi_receipts_detector_v1_regression.py | 6 +-- .../test_nutrition_facts_label_v1.py | 4 +- tests/v1/product/passport/test_passport_v1.py | 4 +- .../passport/test_passport_v1_regression.py | 6 +-- tests/v1/product/receipt/test_receipt_v5.py | 4 +- .../receipt/test_receipt_v5_regression.py | 6 +-- tests/v1/product/resume/test_resume_v1.py | 4 +- .../us/bank_check/test_bank_check_v1.py | 4 +- .../test_bank_check_v1_regression.py | 6 +-- .../test_healthcare_card_v1.py | 4 +- .../v1/product/us/us_mail/test_us_mail_v3.py | 4 +- tests/v1/test_cli.py | 14 +++--- tests/v1/test_client.py | 24 +++++++---- tests/v1/workflows/test_workflow.py | 3 +- .../v1/workflows/test_workflow_integration.py | 4 +- tests/v2/test_client_v2_integration.py | 4 +- 66 files changed, 267 insertions(+), 218 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index b550e1ee..bcf0e780 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -7,12 +7,12 @@ REQUEST_TIMEOUT_ENV_NAME, ) -DATA_DIR = Path("./tests/data/") - -EXTRAS_DIR = DATA_DIR / "extras" -FILE_TYPES_DIR = DATA_DIR / "file_types" -V2_DATA_DIR = DATA_DIR / "v2" -PRODUCT_DATA_DIR = DATA_DIR / "products" +ROOT_DATA_DIR = Path("./tests/data/") +V1_DATA_DIR = ROOT_DATA_DIR / "v1" +V2_DATA_DIR = ROOT_DATA_DIR / "v2" +EXTRAS_DIR = V1_DATA_DIR / "extras" +FILE_TYPES_DIR = ROOT_DATA_DIR / "file_types" +V1_PRODUCT_DATA_DIR = V1_DATA_DIR / "products" def clear_envvars(monkeypatch) -> None: diff --git a/tests/v1/api/test_async_response.py b/tests/v1/api/test_async_response.py index e8163d0c..f1493535 100644 --- a/tests/v1/api/test_async_response.py +++ b/tests/v1/api/test_async_response.py @@ -10,8 +10,9 @@ from mindee.parsing.common.api_request import RequestStatus from mindee.parsing.common.async_predict_response import AsyncPredictResponse from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 +from tests.utils import V1_DATA_DIR, V1_PRODUCT_DATA_DIR -ASYNC_DIR = Path("./tests/data/async") +ASYNC_DIR = V1_DATA_DIR / "async" FILE_PATH_POST_SUCCESS = ASYNC_DIR / "post_success.json" FILE_PATH_POST_FAIL = ASYNC_DIR / "post_fail_forbidden.json" @@ -41,7 +42,9 @@ def content(self) -> str: @pytest.fixture def dummy_file_input() -> PathInput: - file_input = PathInput("./tests/data/products/invoice_splitter/default_sample.pdf") + file_input = PathInput( + V1_PRODUCT_DATA_DIR / "invoice_splitter" / "default_sample.pdf" + ) return file_input diff --git a/tests/v1/api/test_feedback_response.py b/tests/v1/api/test_feedback_response.py index cbcef53b..a6c3def2 100644 --- a/tests/v1/api/test_feedback_response.py +++ b/tests/v1/api/test_feedback_response.py @@ -1,11 +1,12 @@ import json from mindee.parsing.common.feedback_response import FeedbackResponse +from tests.utils import V1_PRODUCT_DATA_DIR def test_empty_feedback_response(): response = json.load( - open("./tests/data/products/invoices/feedback_response/empty.json") + open(V1_PRODUCT_DATA_DIR / "invoices" / "feedback_response" / "empty.json") ) feedback_response = FeedbackResponse(response) assert feedback_response is not None diff --git a/tests/v1/api/test_response.py b/tests/v1/api/test_response.py index 4e9b0a3e..2782d955 100644 --- a/tests/v1/api/test_response.py +++ b/tests/v1/api/test_response.py @@ -14,11 +14,12 @@ from mindee.product.passport.passport_v1_document import PassportV1Document from mindee.product.receipt.receipt_v5 import ReceiptV5 from mindee.product.receipt.receipt_v5_document import ReceiptV5Document +from tests.utils import V1_PRODUCT_DATA_DIR def test_invoice_receipt_v5(): response = json.load( - open("./tests/data/products/invoices/response_v4/complete.json") + open(V1_PRODUCT_DATA_DIR / "invoices" / "response_v4" / "complete.json") ) parsed_response = PredictResponse(InvoiceV4, response) assert isinstance(parsed_response.document.inference, InvoiceV4) @@ -29,7 +30,7 @@ def test_invoice_receipt_v5(): def test_response_receipt_v5(): response = json.load( - open("./tests/data/products/expense_receipts/response_v5/complete.json") + open(V1_PRODUCT_DATA_DIR / "expense_receipts" / "response_v5" / "complete.json") ) parsed_response = PredictResponse(ReceiptV5, response) assert isinstance(parsed_response.document.inference, ReceiptV5) @@ -41,7 +42,10 @@ def test_response_receipt_v5(): def test_response_financial_doc_with_receipt(): response = json.load( open( - "./tests/data/products/financial_document/response_v1/complete_receipt.json" + V1_PRODUCT_DATA_DIR + / "financial_document" + / "response_v1" + / "complete_receipt.json" ) ) parsed_response = PredictResponse(FinancialDocumentV1, response) @@ -55,7 +59,7 @@ def test_response_financial_doc_with_receipt(): def test_response_passport_v1(): response = json.load( - open("./tests/data/products/passport/response_v1/complete.json") + open(V1_PRODUCT_DATA_DIR / "passport" / "response_v1" / "complete.json") ) parsed_response = PredictResponse(PassportV1, response) assert isinstance(parsed_response.document.inference, PassportV1) @@ -67,7 +71,7 @@ def test_response_passport_v1(): def test_response_fr_idcard_v2(): response = json.load( - open("./tests/data/products/idcard_fr/response_v2/complete.json") + open(V1_PRODUCT_DATA_DIR / "idcard_fr" / "response_v2" / "complete.json") ) parsed_response = PredictResponse(IdCardV2, response) assert isinstance(parsed_response.document.inference, IdCardV2) diff --git a/tests/v1/extraction/test_image_extractor.py b/tests/v1/extraction/test_image_extractor.py index 05c95cfb..6416802c 100644 --- a/tests/v1/extraction/test_image_extractor.py +++ b/tests/v1/extraction/test_image_extractor.py @@ -6,17 +6,17 @@ from mindee.extraction.common.image_extractor import extract_multiple_images_from_source from mindee.input.sources.path_input import PathInput from mindee.product.barcode_reader.barcode_reader_v1 import BarcodeReaderV1 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR @pytest.fixture def barcode_path(): - return PRODUCT_DATA_DIR / "barcode_reader" / "default_sample.jpg" + return V1_PRODUCT_DATA_DIR / "barcode_reader" / "default_sample.jpg" @pytest.fixture def barcode_json_path(): - return PRODUCT_DATA_DIR / "barcode_reader" / "response_v1" / "complete.json" + return V1_PRODUCT_DATA_DIR / "barcode_reader" / "response_v1" / "complete.json" def test_barcode_image_extraction(barcode_path, barcode_json_path): diff --git a/tests/v1/extraction/test_invoice_splitter_auto_extraction.py b/tests/v1/extraction/test_invoice_splitter_auto_extraction.py index e8b537e4..6ecf7693 100644 --- a/tests/v1/extraction/test_invoice_splitter_auto_extraction.py +++ b/tests/v1/extraction/test_invoice_splitter_auto_extraction.py @@ -8,13 +8,13 @@ from mindee.parsing.common.document import Document from mindee.product.invoice.invoice_v4 import InvoiceV4 from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 -from tests.utils import PRODUCT_DATA_DIR, levenshtein_ratio +from tests.utils import V1_PRODUCT_DATA_DIR, levenshtein_ratio from tests.v1.product import get_id, get_version @pytest.fixture def invoice_splitter_5p_path(): - return PRODUCT_DATA_DIR / "invoice_splitter" / "invoice_5p.pdf" + return V1_PRODUCT_DATA_DIR / "invoice_splitter" / "invoice_5p.pdf" def prepare_invoice_return(rst_file_path: Path, invoice_prediction: Document): @@ -31,7 +31,7 @@ def prepare_invoice_return(rst_file_path: Path, invoice_prediction: Document): def test_pdf_should_extract_invoices_strict(): client = Client() invoice_splitter_input = PathInput( - PRODUCT_DATA_DIR / "invoice_splitter" / "default_sample.pdf" + V1_PRODUCT_DATA_DIR / "invoice_splitter" / "default_sample.pdf" ) response = client.enqueue_and_parse( InvoiceSplitterV1, invoice_splitter_input, close_file=False @@ -50,7 +50,10 @@ def test_pdf_should_extract_invoices_strict(): invoice_0 = client.parse(InvoiceV4, extracted_pdfs_strict[0].as_input_source()) test_string_rst_invoice_0 = prepare_invoice_return( - PRODUCT_DATA_DIR / "invoices" / "response_v4" / "summary_full_invoice_p1.rst", + V1_PRODUCT_DATA_DIR + / "invoices" + / "response_v4" + / "summary_full_invoice_p1.rst", invoice_0.document, ) assert levenshtein_ratio(test_string_rst_invoice_0, str(invoice_0.document)) >= 0.97 diff --git a/tests/v1/extraction/test_multi_receipts_extractor.py b/tests/v1/extraction/test_multi_receipts_extractor.py index 502d2cd1..f1c36451 100644 --- a/tests/v1/extraction/test_multi_receipts_extractor.py +++ b/tests/v1/extraction/test_multi_receipts_extractor.py @@ -10,30 +10,33 @@ from mindee.product.multi_receipts_detector.multi_receipts_detector_v1 import ( MultiReceiptsDetectorV1, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR @pytest.fixture def multi_receipts_single_page_path(): - return PRODUCT_DATA_DIR / "multi_receipts_detector" / "default_sample.jpg" + return V1_PRODUCT_DATA_DIR / "multi_receipts_detector" / "default_sample.jpg" @pytest.fixture def multi_receipts_single_page_json_path(): return ( - PRODUCT_DATA_DIR / "multi_receipts_detector" / "response_v1" / "complete.json" + V1_PRODUCT_DATA_DIR + / "multi_receipts_detector" + / "response_v1" + / "complete.json" ) @pytest.fixture def multi_receipts_multi_page_path(): - return PRODUCT_DATA_DIR / "multi_receipts_detector" / "multipage_sample.pdf" + return V1_PRODUCT_DATA_DIR / "multi_receipts_detector" / "multipage_sample.pdf" @pytest.fixture def multi_receipts_multi_page_json_path(): return ( - PRODUCT_DATA_DIR + V1_PRODUCT_DATA_DIR / "multi_receipts_detector" / "response_v1" / "multipage_sample.json" diff --git a/tests/v1/extraction/test_pdf_extractor.py b/tests/v1/extraction/test_pdf_extractor.py index 3d76aba1..1359e1eb 100644 --- a/tests/v1/extraction/test_pdf_extractor.py +++ b/tests/v1/extraction/test_pdf_extractor.py @@ -8,24 +8,24 @@ from mindee.product.invoice_splitter.invoice_splitter_v1_document import ( InvoiceSplitterV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR @pytest.fixture def invoice_default_sample_path(): - return PRODUCT_DATA_DIR / "invoices" / "default_sample.jpg" + return V1_PRODUCT_DATA_DIR / "invoices" / "default_sample.jpg" @pytest.fixture def invoice_splitter_5p_path(): - return PRODUCT_DATA_DIR / "invoice_splitter" / "invoice_5p.pdf" + return V1_PRODUCT_DATA_DIR / "invoice_splitter" / "invoice_5p.pdf" @pytest.fixture def loaded_prediction(): dummy_client = Client("dummy_key") loaded_prediction_path = ( - PRODUCT_DATA_DIR / "invoice_splitter" / "response_v1" / "complete.json" + V1_PRODUCT_DATA_DIR / "invoice_splitter" / "response_v1" / "complete.json" ) input_response = LocalResponse(loaded_prediction_path) response = dummy_client.load_prediction(InvoiceSplitterV1, input_response) diff --git a/tests/v1/extras/test_extras_integration.py b/tests/v1/extras/test_extras_integration.py index 5235fd16..9c669cc3 100644 --- a/tests/v1/extras/test_extras_integration.py +++ b/tests/v1/extras/test_extras_integration.py @@ -3,7 +3,7 @@ from mindee import Client from mindee.product.international_id.international_id_v2 import InternationalIdV2 from mindee.product.invoice.invoice_v4 import InvoiceV4 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR @pytest.fixture @@ -15,7 +15,7 @@ def client(): @pytest.mark.integration def test_send_cropper_extra(client): sample = client.source_from_path( - PRODUCT_DATA_DIR / "invoices" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "invoices" / "default_sample.jpg", ) response = client.parse(InvoiceV4, sample, cropper=True) assert response.document.inference.pages[0].extras.cropper @@ -24,7 +24,7 @@ def test_send_cropper_extra(client): @pytest.mark.integration def test_send_full_text_ocr_extra(client): sample = client.source_from_path( - PRODUCT_DATA_DIR / "international_id" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "international_id" / "default_sample.jpg", ) response = client.enqueue_and_parse(InternationalIdV2, sample, full_text=True) assert response.document.extras.full_text_ocr diff --git a/tests/v1/input/test_apply_page_options.py b/tests/v1/input/test_apply_page_options.py index 6e70224b..238c49e0 100644 --- a/tests/v1/input/test_apply_page_options.py +++ b/tests/v1/input/test_apply_page_options.py @@ -12,7 +12,7 @@ LocalInputSource, PathInput, ) -from tests.utils import FILE_TYPES_DIR, PRODUCT_DATA_DIR +from tests.utils import FILE_TYPES_DIR, V1_PRODUCT_DATA_DIR def _assert_page_options(input_source: LocalInputSource, numb_pages: int): @@ -135,7 +135,7 @@ def test_pdf_input_from_file(): def test_pdf_input_from_base64(): - with open(PRODUCT_DATA_DIR / "invoices" / "invoice_10p.txt", "rt") as fp: + with open(V1_PRODUCT_DATA_DIR / "invoices" / "invoice_10p.txt", "rt") as fp: input_source = Base64Input(fp.read(), filename="invoice_10p.pdf") assert input_source.is_pdf() is True input_source.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) @@ -143,7 +143,7 @@ def test_pdf_input_from_base64(): def test_pdf_input_from_bytes(): - with open(PRODUCT_DATA_DIR / "invoices" / "invoice_10p.pdf", "rb") as fp: + with open(V1_PRODUCT_DATA_DIR / "invoices" / "invoice_10p.pdf", "rb") as fp: input_source = BytesInput(fp.read(), filename="invoice_10p.pdf") assert input_source.is_pdf() is True input_source.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) diff --git a/tests/v1/input/test_compression.py b/tests/v1/input/test_compression.py index dfba9ec4..8b2235c8 100644 --- a/tests/v1/input/test_compression.py +++ b/tests/v1/input/test_compression.py @@ -10,26 +10,27 @@ from mindee.input.sources import PathInput from mindee.pdf.pdf_compressor import compress_pdf from mindee.pdf.pdf_utils import extract_text_from_pdf +from tests.utils import FILE_TYPES_DIR, ROOT_DATA_DIR, V1_DATA_DIR, V1_PRODUCT_DATA_DIR -DATA_DIR = Path("./tests/data") -OUTPUT_DIR = DATA_DIR / "output" +OUTPUT_DIR = ROOT_DATA_DIR / "output" +RECEIPT_PATH = FILE_TYPES_DIR / "receipt.jpg" def test_image_quality_compress_from_input_source(): - receipt_input = PathInput(DATA_DIR / "file_types/receipt.jpg") + receipt_input = PathInput(RECEIPT_PATH) receipt_input.compress(40) with open(OUTPUT_DIR / "compress_indirect.jpg", "wb") as f: f.write(receipt_input.file_object.read()) receipt_input.file_object.seek(0) - initial_file_stats = os.stat(DATA_DIR / "file_types/receipt.jpg") + initial_file_stats = os.stat(RECEIPT_PATH) rendered_file_stats = os.stat(OUTPUT_DIR / "compress_indirect.jpg") assert rendered_file_stats.st_size < initial_file_stats.st_size def test_image_quality_compresses_from_compressor(): - receipt_input = PathInput(DATA_DIR / "file_types/receipt.jpg") + receipt_input = PathInput(RECEIPT_PATH) compresses = [ compress_image(receipt_input.file_object, 100), compress_image(receipt_input.file_object), @@ -49,7 +50,7 @@ def test_image_quality_compresses_from_compressor(): with open(OUTPUT_DIR / file_names[i], "wb") as f: f.write(compressed) - initial_file_stats = os.stat(DATA_DIR / "file_types/receipt.jpg") + initial_file_stats = os.stat(RECEIPT_PATH) rendered_file_stats = [os.stat(OUTPUT_DIR / file_name) for file_name in file_names] assert initial_file_stats.st_size < rendered_file_stats[0].st_size @@ -60,14 +61,14 @@ def test_image_quality_compresses_from_compressor(): def test_image_resize_from_input_source(): - image_resize_input = PathInput(DATA_DIR / "file_types/receipt.jpg") + image_resize_input = PathInput(RECEIPT_PATH) image_resize_input.compress(75, 250, 1000) with open(OUTPUT_DIR / "resize_indirect.jpg", "wb") as f: f.write(image_resize_input.file_object.read()) image_resize_input.file_object.seek(0) - initial_file_stats = os.stat(DATA_DIR / "file_types/receipt.jpg") + initial_file_stats = os.stat(RECEIPT_PATH) rendered_file_stats = os.stat(OUTPUT_DIR / "resize_indirect.jpg") assert rendered_file_stats.st_size < initial_file_stats.st_size @@ -77,7 +78,7 @@ def test_image_resize_from_input_source(): def test_image_resize_from_compressor(): - image_resize_input = PathInput(DATA_DIR / "file_types/receipt.jpg") + image_resize_input = PathInput(RECEIPT_PATH) resizes = [ compress_image(image_resize_input.file_object, 75, 500), @@ -96,7 +97,7 @@ def test_image_resize_from_compressor(): with open(OUTPUT_DIR / file_names[i], "wb") as f: f.write(resized) - initial_file_stats = os.stat(DATA_DIR / "file_types/receipt.jpg") + initial_file_stats = os.stat(RECEIPT_PATH) rendered_file_stats = [os.stat(OUTPUT_DIR / file_name) for file_name in file_names] assert initial_file_stats.st_size > rendered_file_stats[0].st_size @@ -106,11 +107,9 @@ def test_image_resize_from_compressor(): def test_pdf_input_has_text(): - has_source_text_path = DATA_DIR / "file_types/pdf/multipage.pdf" - has_no_source_text_path = DATA_DIR / "file_types/pdf/blank_1.pdf" - has_no_source_text_since_its_image_path = os.path.join( - DATA_DIR, "file_types/receipt.jpg" - ) + has_source_text_path = FILE_TYPES_DIR / "pdf" / "multipage.pdf" + has_no_source_text_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" + has_no_source_text_since_its_image_path = RECEIPT_PATH has_source_text_input = PathInput(has_source_text_path) has_no_source_text_input = PathInput(has_no_source_text_path) @@ -125,7 +124,7 @@ def test_pdf_input_has_text(): def test_pdf_compress_from_input_source(): pdf_resize_input = PathInput( - DATA_DIR / "products/invoice_splitter/default_sample.pdf" + V1_DATA_DIR / "products" / "invoice_splitter" / "default_sample.pdf" ) compressed_pdf = compress_pdf(pdf_resize_input.file_object, 75, True) @@ -133,7 +132,7 @@ def test_pdf_compress_from_input_source(): f.write(compressed_pdf) initial_file_stats = os.stat( - DATA_DIR / "products/invoice_splitter/default_sample.pdf" + V1_DATA_DIR / "products/invoice_splitter/default_sample.pdf" ) rendered_file_stats = os.stat(OUTPUT_DIR / "resize_indirect.pdf") @@ -142,7 +141,7 @@ def test_pdf_compress_from_input_source(): def test_pdf_compress_from_compressor(): pdf_resize_input = PathInput( - DATA_DIR / "products/invoice_splitter/default_sample.pdf" + V1_DATA_DIR / "products" / "invoice_splitter" / "default_sample.pdf" ) resizes = [] qualities = [85, 75, 50, 10] @@ -161,7 +160,7 @@ def test_pdf_compress_from_compressor(): f.write(resized) initial_file_stats = os.stat( - DATA_DIR / "products/invoice_splitter/default_sample.pdf" + V1_PRODUCT_DATA_DIR / "invoice_splitter" / "default_sample.pdf" ) rendered_file_stats = [os.stat(OUTPUT_DIR / file_name) for file_name in file_names] @@ -172,7 +171,7 @@ def test_pdf_compress_from_compressor(): def test_pdf_compress_with_text_keeps_text(): - initial_with_text = PathInput(DATA_DIR / "file_types/pdf/multipage.pdf") + initial_with_text = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") compressed_with_text = compress_pdf(initial_with_text.file_object, 100, True, False) @@ -194,7 +193,7 @@ def test_pdf_compress_with_text_keeps_text(): def test_pdf_compress_with_text_does_not_compress(): - initial_with_text = PathInput(DATA_DIR / "file_types/pdf/multipage.pdf") + initial_with_text = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") compressed_with_text = compress_pdf(initial_with_text.file_object, 50) @@ -224,6 +223,6 @@ def cleanup(): ] for file_path in created_files: - full_path = DATA_DIR / "output" / file_path + full_path = OUTPUT_DIR / file_path if full_path.exists(): os.remove(full_path) diff --git a/tests/v1/mindee_http/__init__.py b/tests/v1/mindee_http/__init__.py index 7210b673..2101a6f9 100644 --- a/tests/v1/mindee_http/__init__.py +++ b/tests/v1/mindee_http/__init__.py @@ -1 +1 @@ -from tests.v1.mindee_http.test_error import ERROR_DATA_DIR +from tests.v1.mindee_http.test_error import V1_ERROR_DATA_DIR diff --git a/tests/v1/mindee_http/test_error.py b/tests/v1/mindee_http/test_error.py index 8dd4f1e5..19349d84 100644 --- a/tests/v1/mindee_http/test_error.py +++ b/tests/v1/mindee_http/test_error.py @@ -1,5 +1,4 @@ import json -from pathlib import Path import pytest @@ -10,10 +9,10 @@ handle_error, ) from mindee.input.sources.path_input import PathInput -from tests.utils import clear_envvars, dummy_envvars +from tests.utils import V1_DATA_DIR, V1_PRODUCT_DATA_DIR, clear_envvars, dummy_envvars from tests.v1.input.test_inputs import FILE_TYPES_DIR -ERROR_DATA_DIR = Path("./tests/data/errors") +V1_ERROR_DATA_DIR = V1_DATA_DIR / "errors" @pytest.fixture @@ -58,7 +57,7 @@ def test_http_enqueue_and_parse_client_error( def test_http_400_error(): - error_ref = open(ERROR_DATA_DIR / "error_400_no_details.json") + error_ref = open(V1_ERROR_DATA_DIR / "error_400_no_details.json") error_obj = json.load(error_ref) error_obj["status_code"] = 400 error_400 = handle_error("dummy-url", error_obj) @@ -71,7 +70,7 @@ def test_http_400_error(): def test_http_401_error(): - error_ref = open(ERROR_DATA_DIR / "error_401_invalid_token.json") + error_ref = open(V1_ERROR_DATA_DIR / "error_401_invalid_token.json") error_obj = json.load(error_ref) error_obj["status_code"] = 401 error_401 = handle_error("dummy-url", error_obj) @@ -84,7 +83,7 @@ def test_http_401_error(): def test_http_429_error(): - error_ref = open(ERROR_DATA_DIR / "error_429_too_many_requests.json") + error_ref = open(V1_ERROR_DATA_DIR / "error_429_too_many_requests.json") error_obj = json.load(error_ref) error_obj["status_code"] = 429 error_429 = handle_error("dummy-url", error_obj) @@ -97,7 +96,7 @@ def test_http_429_error(): def test_http_500_error(): - error_ref = open(ERROR_DATA_DIR / "error_500_inference_fail.json") + error_ref = open(V1_ERROR_DATA_DIR / "error_500_inference_fail.json") error_obj = json.load(error_ref) error_obj["status_code"] = 500 error_500 = handle_error("dummy-url", error_obj) @@ -110,7 +109,7 @@ def test_http_500_error(): def test_http_500_html_error(): - error_ref_contents = open(ERROR_DATA_DIR / "error_50x.html").read() + error_ref_contents = open(V1_ERROR_DATA_DIR / "error_50x.html").read() error_500 = handle_error("dummy-url", error_ref_contents) with pytest.raises(MindeeHTTPServerError): raise error_500 diff --git a/tests/v1/parsing/common/test_ocr.py b/tests/v1/parsing/common/test_ocr.py index f4d3e821..56c9a75c 100644 --- a/tests/v1/parsing/common/test_ocr.py +++ b/tests/v1/parsing/common/test_ocr.py @@ -1,11 +1,12 @@ import json from mindee.parsing.common.ocr.ocr import Ocr +from tests.utils import V1_DATA_DIR def test_response(): - json_data = json.load(open("./tests/data/extras/ocr/complete.json")) - with open("./tests/data/extras/ocr/ocr.txt") as file_handle: + json_data = json.load(open(V1_DATA_DIR / "extras" / "ocr" / "complete.json")) + with open(V1_DATA_DIR / "extras" / "ocr" / "ocr.txt") as file_handle: expected_text = file_handle.read() ocr = Ocr(json_data["document"]["ocr"]) assert str(ocr) == expected_text diff --git a/tests/v1/product/barcode_reader/test_barcode_reader_v1.py b/tests/v1/product/barcode_reader/test_barcode_reader_v1.py index 6751dbaa..c79a97de 100644 --- a/tests/v1/product/barcode_reader/test_barcode_reader_v1.py +++ b/tests/v1/product/barcode_reader/test_barcode_reader_v1.py @@ -8,9 +8,9 @@ from mindee.product.barcode_reader.barcode_reader_v1_document import ( BarcodeReaderV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "barcode_reader" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "barcode_reader" / "response_v1" BarcodeReaderV1DocumentType = Document[ BarcodeReaderV1Document, diff --git a/tests/v1/product/barcode_reader/test_barcode_reader_v1_regression.py b/tests/v1/product/barcode_reader/test_barcode_reader_v1_regression.py index af32bcf9..18683dfc 100644 --- a/tests/v1/product/barcode_reader/test_barcode_reader_v1_regression.py +++ b/tests/v1/product/barcode_reader/test_barcode_reader_v1_regression.py @@ -2,7 +2,7 @@ from mindee.client import Client from mindee.product.barcode_reader.barcode_reader_v1 import BarcodeReaderV1 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -10,13 +10,13 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR / "barcode_reader" / "response_v1" / "default_sample.rst", + V1_PRODUCT_DATA_DIR / "barcode_reader" / "response_v1" / "default_sample.rst", encoding="utf-8", ) as rst_file: rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "barcode_reader" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "barcode_reader" / "default_sample.jpg", ) response = client.parse(BarcodeReaderV1, sample) doc_response = response.document diff --git a/tests/v1/product/bill_of_lading/test_bill_of_lading_v1.py b/tests/v1/product/bill_of_lading/test_bill_of_lading_v1.py index b76d9c36..204d5092 100644 --- a/tests/v1/product/bill_of_lading/test_bill_of_lading_v1.py +++ b/tests/v1/product/bill_of_lading/test_bill_of_lading_v1.py @@ -8,9 +8,9 @@ from mindee.product.bill_of_lading.bill_of_lading_v1_document import ( BillOfLadingV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "bill_of_lading" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "bill_of_lading" / "response_v1" BillOfLadingV1DocumentType = Document[ BillOfLadingV1Document, diff --git a/tests/v1/product/business_card/test_business_card_v1.py b/tests/v1/product/business_card/test_business_card_v1.py index 2b8d79ec..7111220a 100644 --- a/tests/v1/product/business_card/test_business_card_v1.py +++ b/tests/v1/product/business_card/test_business_card_v1.py @@ -8,9 +8,9 @@ from mindee.product.business_card.business_card_v1_document import ( BusinessCardV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "business_card" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "business_card" / "response_v1" BusinessCardV1DocumentType = Document[ BusinessCardV1Document, diff --git a/tests/v1/product/cropper/test_cropper_v1.py b/tests/v1/product/cropper/test_cropper_v1.py index c6e27ece..84fc9986 100644 --- a/tests/v1/product/cropper/test_cropper_v1.py +++ b/tests/v1/product/cropper/test_cropper_v1.py @@ -11,9 +11,9 @@ from mindee.product.cropper.cropper_v1_page import ( CropperV1Page, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "cropper" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "cropper" / "response_v1" CropperV1DocumentType = Document[ CropperV1Document, diff --git a/tests/v1/product/cropper/test_cropper_v1_regression.py b/tests/v1/product/cropper/test_cropper_v1_regression.py index aca34886..5b1f0c2f 100644 --- a/tests/v1/product/cropper/test_cropper_v1_regression.py +++ b/tests/v1/product/cropper/test_cropper_v1_regression.py @@ -2,7 +2,7 @@ from mindee.client import Client from mindee.product.cropper.cropper_v1 import CropperV1 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -10,13 +10,13 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR / "cropper" / "response_v1" / "default_sample.rst", + V1_PRODUCT_DATA_DIR / "cropper" / "response_v1" / "default_sample.rst", encoding="utf-8", ) as rst_file: rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "cropper" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "cropper" / "default_sample.jpg", ) response = client.parse(CropperV1, sample) doc_response = response.document diff --git a/tests/v1/product/custom/test_custom_v1.py b/tests/v1/product/custom/test_custom_v1.py index bfb4f04a..5f9d6bac 100644 --- a/tests/v1/product/custom/test_custom_v1.py +++ b/tests/v1/product/custom/test_custom_v1.py @@ -10,13 +10,13 @@ from mindee.product.custom.custom_v1 import CustomV1 from mindee.product.custom.custom_v1_document import CustomV1Document from mindee.product.custom.custom_v1_page import CustomV1Page -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR @pytest.fixture def custom_v1_complete_doc() -> Document[CustomV1Document, Page[CustomV1Page]]: json_data = json.load( - open(PRODUCT_DATA_DIR / "custom" / "response_v1" / "complete.json") + open(V1_PRODUCT_DATA_DIR / "custom" / "response_v1" / "complete.json") ) return Document(CustomV1, json_data["document"]) @@ -24,7 +24,7 @@ def custom_v1_complete_doc() -> Document[CustomV1Document, Page[CustomV1Page]]: @pytest.fixture def custom_v1_empty_doc() -> Document[CustomV1Document, Page[CustomV1Page]]: json_data = json.load( - open(PRODUCT_DATA_DIR / "custom" / "response_v1" / "empty.json") + open(V1_PRODUCT_DATA_DIR / "custom" / "response_v1" / "empty.json") ) return Document(CustomV1, json_data["document"]) @@ -33,7 +33,7 @@ def custom_v1_empty_doc() -> Document[CustomV1Document, Page[CustomV1Page]]: @pytest.fixture def custom_v1_complete_page_0() -> Page[CustomV1Page]: json_data = json.load( - open(PRODUCT_DATA_DIR / "custom" / "response_v1" / "complete.json") + open(V1_PRODUCT_DATA_DIR / "custom" / "response_v1" / "complete.json") ) return Page(CustomV1Page, json_data["document"]["inference"]["pages"][0]) @@ -42,7 +42,7 @@ def custom_v1_complete_page_0() -> Page[CustomV1Page]: @pytest.fixture def custom_v1_complete_page_1() -> Page[CustomV1Page]: json_data = json.load( - open(PRODUCT_DATA_DIR / "custom" / "response_v1" / "complete.json") + open(V1_PRODUCT_DATA_DIR / "custom" / "response_v1" / "complete.json") ) return Page(CustomV1Page, json_data["document"]["inference"]["pages"][1]) @@ -66,7 +66,7 @@ def test_empty_doc(custom_v1_empty_doc) -> None: def test_complete_doc(custom_v1_complete_doc) -> None: document_prediction: CustomV1Document = custom_v1_complete_doc.inference.prediction doc_str = open( - PRODUCT_DATA_DIR / "custom" / "response_v1" / "summary_full.rst" + V1_PRODUCT_DATA_DIR / "custom" / "response_v1" / "summary_full.rst" ).read() for field_name, field in document_prediction.fields.items(): assert len(field_name) > 0 @@ -92,7 +92,7 @@ def test_complete_doc(custom_v1_complete_doc) -> None: def test_complete_page_0(custom_v1_complete_page_0): page_0_prediction = custom_v1_complete_page_0.prediction page_0_str = open( - PRODUCT_DATA_DIR / "custom" / "response_v1" / "summary_page0.rst" + V1_PRODUCT_DATA_DIR / "custom" / "response_v1" / "summary_page0.rst" ).read() assert custom_v1_complete_page_0.orientation.value == 0 assert len(custom_v1_complete_page_0.extras.cropper.cropping) == 1 @@ -107,7 +107,7 @@ def test_complete_page_0(custom_v1_complete_page_0): def test_complete_page_1(custom_v1_complete_page_1): page_1_prediction = custom_v1_complete_page_1.prediction page_1_str = open( - PRODUCT_DATA_DIR / "custom" / "response_v1" / "summary_page1.rst" + V1_PRODUCT_DATA_DIR / "custom" / "response_v1" / "summary_page1.rst" ).read() assert custom_v1_complete_page_1.orientation.value == 0 for field in page_1_prediction.fields.values(): diff --git a/tests/v1/product/custom/test_custom_v1_line_items.py b/tests/v1/product/custom/test_custom_v1_line_items.py index e6af9f7d..435546b6 100644 --- a/tests/v1/product/custom/test_custom_v1_line_items.py +++ b/tests/v1/product/custom/test_custom_v1_line_items.py @@ -6,6 +6,7 @@ from mindee.parsing.common.page import Page from mindee.product.custom.custom_v1 import CustomV1 from mindee.product.custom.custom_v1_page import CustomV1Page +from tests.utils import V1_PRODUCT_DATA_DIR @pytest.mark.lineitems @@ -25,7 +26,11 @@ def do_tests(line_items): @pytest.mark.lineitems def test_single_table_01(): json_data_path = ( - "./tests/data/products/custom/response_v1/line_items/single_table_01.json" + V1_PRODUCT_DATA_DIR + / "custom" + / "response_v1" + / "line_items" + / "single_table_01.json" ) json_data = json.load(open(json_data_path, "r")) doc = Document(CustomV1, json_data["document"]).inference.prediction @@ -46,7 +51,11 @@ def test_single_table_01(): @pytest.mark.lineitems def test_single_table_02(): json_data_path = ( - "./tests/data/products/custom/response_v2/line_items/single_table_01.json" + V1_PRODUCT_DATA_DIR + / "custom" + / "response_v2" + / "line_items" + / "single_table_01.json" ) json_data = json.load(open(json_data_path, "r")) doc = Document(CustomV1, json_data["document"]).inference.prediction diff --git a/tests/v1/product/custom/test_custom_v1_v2.py b/tests/v1/product/custom/test_custom_v1_v2.py index 2db301e8..3b7fa0f5 100644 --- a/tests/v1/product/custom/test_custom_v1_v2.py +++ b/tests/v1/product/custom/test_custom_v1_v2.py @@ -10,13 +10,13 @@ from mindee.product.custom.custom_v1 import CustomV1 from mindee.product.custom.custom_v1_document import CustomV1Document from mindee.product.custom.custom_v1_page import CustomV1Page -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR @pytest.fixture def custom_v1_complete_doc() -> Document[CustomV1Document, Page[CustomV1Page]]: json_data = json.load( - open(PRODUCT_DATA_DIR / "custom" / "response_v2" / "complete.json") + open(V1_PRODUCT_DATA_DIR / "custom" / "response_v2" / "complete.json") ) return Document(CustomV1, json_data["document"]) @@ -24,7 +24,7 @@ def custom_v1_complete_doc() -> Document[CustomV1Document, Page[CustomV1Page]]: @pytest.fixture def custom_v1_empty_doc() -> Document[CustomV1Document, Page[CustomV1Page]]: json_data = json.load( - open(PRODUCT_DATA_DIR / "custom" / "response_v2" / "empty.json") + open(V1_PRODUCT_DATA_DIR / "custom" / "response_v2" / "empty.json") ) return Document(CustomV1, json_data["document"]) @@ -33,7 +33,7 @@ def custom_v1_empty_doc() -> Document[CustomV1Document, Page[CustomV1Page]]: @pytest.fixture def custom_v1_complete_page_0() -> Page[CustomV1Page]: json_data = json.load( - open(PRODUCT_DATA_DIR / "custom" / "response_v2" / "complete.json") + open(V1_PRODUCT_DATA_DIR / "custom" / "response_v2" / "complete.json") ) return Page(CustomV1Page, json_data["document"]["inference"]["pages"][0]) @@ -42,7 +42,7 @@ def custom_v1_complete_page_0() -> Page[CustomV1Page]: @pytest.fixture def custom_v1_complete_page_1() -> Page[CustomV1Page]: json_data = json.load( - open(PRODUCT_DATA_DIR / "custom" / "response_v2" / "complete.json") + open(V1_PRODUCT_DATA_DIR / "custom" / "response_v2" / "complete.json") ) return Page(CustomV1Page, json_data["document"]["inference"]["pages"][1]) @@ -66,7 +66,7 @@ def test_empty_doc(custom_v1_empty_doc) -> None: def test_complete_doc(custom_v1_complete_doc) -> None: document_prediction: CustomV1Document = custom_v1_complete_doc.inference.prediction doc_str = open( - PRODUCT_DATA_DIR / "custom" / "response_v2" / "summary_full.rst" + V1_PRODUCT_DATA_DIR / "custom" / "response_v2" / "summary_full.rst" ).read() for field_name, field in document_prediction.fields.items(): assert len(field_name) > 0 @@ -92,7 +92,7 @@ def test_complete_doc(custom_v1_complete_doc) -> None: def test_complete_page_0(custom_v1_complete_page_0): page_0_prediction = custom_v1_complete_page_0.prediction page_0_str = open( - PRODUCT_DATA_DIR / "custom" / "response_v2" / "summary_page0.rst" + V1_PRODUCT_DATA_DIR / "custom" / "response_v2" / "summary_page0.rst" ).read() assert custom_v1_complete_page_0.orientation.value == 0 assert len(custom_v1_complete_page_0.extras.cropper.cropping) == 1 @@ -107,7 +107,7 @@ def test_complete_page_0(custom_v1_complete_page_0): def test_complete_page_1(custom_v1_complete_page_1): page_1_prediction = custom_v1_complete_page_1.prediction page_1_str = open( - PRODUCT_DATA_DIR / "custom" / "response_v2" / "summary_page1.rst" + V1_PRODUCT_DATA_DIR / "custom" / "response_v2" / "summary_page1.rst" ).read() assert custom_v1_complete_page_1.orientation.value == 0 for field in page_1_prediction.fields.values(): diff --git a/tests/v1/product/delivery_note/test_delivery_note_v1.py b/tests/v1/product/delivery_note/test_delivery_note_v1.py index cff46a05..17145446 100644 --- a/tests/v1/product/delivery_note/test_delivery_note_v1.py +++ b/tests/v1/product/delivery_note/test_delivery_note_v1.py @@ -8,9 +8,9 @@ from mindee.product.delivery_note.delivery_note_v1_document import ( DeliveryNoteV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "delivery_notes" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "delivery_notes" / "response_v1" DeliveryNoteV1DocumentType = Document[ DeliveryNoteV1Document, diff --git a/tests/v1/product/driver_license/test_driver_license_v1.py b/tests/v1/product/driver_license/test_driver_license_v1.py index f7583eaa..93df626b 100644 --- a/tests/v1/product/driver_license/test_driver_license_v1.py +++ b/tests/v1/product/driver_license/test_driver_license_v1.py @@ -8,9 +8,9 @@ from mindee.product.driver_license.driver_license_v1_document import ( DriverLicenseV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "driver_license" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "driver_license" / "response_v1" DriverLicenseV1DocumentType = Document[ DriverLicenseV1Document, diff --git a/tests/v1/product/financial_document/test_financial_document_v1.py b/tests/v1/product/financial_document/test_financial_document_v1.py index 07cc01e4..5fa5b4b1 100644 --- a/tests/v1/product/financial_document/test_financial_document_v1.py +++ b/tests/v1/product/financial_document/test_financial_document_v1.py @@ -8,9 +8,9 @@ from mindee.product.financial_document.financial_document_v1_document import ( FinancialDocumentV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "financial_document" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "financial_document" / "response_v1" FinancialDocumentV1DocumentType = Document[ FinancialDocumentV1Document, diff --git a/tests/v1/product/financial_document/test_financial_document_v1_regression.py b/tests/v1/product/financial_document/test_financial_document_v1_regression.py index da3195a9..59396583 100644 --- a/tests/v1/product/financial_document/test_financial_document_v1_regression.py +++ b/tests/v1/product/financial_document/test_financial_document_v1_regression.py @@ -2,7 +2,7 @@ from mindee.client import Client from mindee.product.financial_document.financial_document_v1 import FinancialDocumentV1 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -10,13 +10,16 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR / "financial_document" / "response_v1" / "default_sample.rst", + V1_PRODUCT_DATA_DIR + / "financial_document" + / "response_v1" + / "default_sample.rst", encoding="utf-8", ) as rst_file: rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg", ) response = client.parse(FinancialDocumentV1, sample) doc_response = response.document diff --git a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1.py b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1.py index b1062e5f..d6649163 100644 --- a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1.py +++ b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1.py @@ -10,9 +10,9 @@ from mindee.product.fr.bank_account_details.bank_account_details_v1_document import ( BankAccountDetailsV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "bank_account_details" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "bank_account_details" / "response_v1" BankAccountDetailsV1DocumentType = Document[ BankAccountDetailsV1Document, diff --git a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1_regression.py b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1_regression.py index 8f1462b8..8a452d7f 100644 --- a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1_regression.py +++ b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1_regression.py @@ -4,7 +4,7 @@ from mindee.product.fr.bank_account_details.bank_account_details_v1 import ( BankAccountDetailsV1, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -12,7 +12,7 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR + V1_PRODUCT_DATA_DIR / "bank_account_details" / "response_v1" / "default_sample.rst", @@ -21,7 +21,7 @@ def test_default_sample(): rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "bank_account_details" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "bank_account_details" / "default_sample.jpg", ) response = client.parse(BankAccountDetailsV1, sample) doc_response = response.document diff --git a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2.py b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2.py index c3bfde0b..56e29397 100644 --- a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2.py +++ b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2.py @@ -10,9 +10,9 @@ from mindee.product.fr.bank_account_details.bank_account_details_v2_document import ( BankAccountDetailsV2Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "bank_account_details" / "response_v2" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "bank_account_details" / "response_v2" BankAccountDetailsV2DocumentType = Document[ BankAccountDetailsV2Document, diff --git a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2_regression.py b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2_regression.py index 3decfeeb..66ded383 100644 --- a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2_regression.py +++ b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2_regression.py @@ -4,7 +4,7 @@ from mindee.product.fr.bank_account_details.bank_account_details_v2 import ( BankAccountDetailsV2, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -12,7 +12,7 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR + V1_PRODUCT_DATA_DIR / "bank_account_details" / "response_v2" / "default_sample.rst", @@ -21,7 +21,7 @@ def test_default_sample(): rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "bank_account_details" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "bank_account_details" / "default_sample.jpg", ) response = client.parse(BankAccountDetailsV2, sample) doc_response = response.document diff --git a/tests/v1/product/fr/carte_grise/test_carte_grise_v1.py b/tests/v1/product/fr/carte_grise/test_carte_grise_v1.py index a3f5eb1a..f643a8f9 100644 --- a/tests/v1/product/fr/carte_grise/test_carte_grise_v1.py +++ b/tests/v1/product/fr/carte_grise/test_carte_grise_v1.py @@ -8,9 +8,9 @@ from mindee.product.fr.carte_grise.carte_grise_v1_document import ( CarteGriseV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "carte_grise" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "carte_grise" / "response_v1" CarteGriseV1DocumentType = Document[ CarteGriseV1Document, diff --git a/tests/v1/product/fr/carte_grise/test_carte_grise_v1_regression.py b/tests/v1/product/fr/carte_grise/test_carte_grise_v1_regression.py index 369f6aa5..b4b3380f 100644 --- a/tests/v1/product/fr/carte_grise/test_carte_grise_v1_regression.py +++ b/tests/v1/product/fr/carte_grise/test_carte_grise_v1_regression.py @@ -2,7 +2,7 @@ from mindee.client import Client from mindee.product.fr.carte_grise.carte_grise_v1 import CarteGriseV1 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -10,13 +10,13 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR / "carte_grise" / "response_v1" / "default_sample.rst", + V1_PRODUCT_DATA_DIR / "carte_grise" / "response_v1" / "default_sample.rst", encoding="utf-8", ) as rst_file: rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "carte_grise" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "carte_grise" / "default_sample.jpg", ) response = client.parse(CarteGriseV1, sample) doc_response = response.document diff --git a/tests/v1/product/fr/energy_bill/test_energy_bill_v1.py b/tests/v1/product/fr/energy_bill/test_energy_bill_v1.py index 5f36ff12..383d7dc5 100644 --- a/tests/v1/product/fr/energy_bill/test_energy_bill_v1.py +++ b/tests/v1/product/fr/energy_bill/test_energy_bill_v1.py @@ -8,9 +8,9 @@ from mindee.product.fr.energy_bill.energy_bill_v1_document import ( EnergyBillV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "energy_bill_fra" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "energy_bill_fra" / "response_v1" EnergyBillV1DocumentType = Document[ EnergyBillV1Document, diff --git a/tests/v1/product/fr/health_card/test_health_card_v1.py b/tests/v1/product/fr/health_card/test_health_card_v1.py index 4f434ca4..6c5f2b50 100644 --- a/tests/v1/product/fr/health_card/test_health_card_v1.py +++ b/tests/v1/product/fr/health_card/test_health_card_v1.py @@ -8,9 +8,9 @@ from mindee.product.fr.health_card.health_card_v1_document import ( HealthCardV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "french_healthcard" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "french_healthcard" / "response_v1" HealthCardV1DocumentType = Document[ HealthCardV1Document, diff --git a/tests/v1/product/fr/id_card/test_id_card_v1.py b/tests/v1/product/fr/id_card/test_id_card_v1.py index 5306d996..84507a3d 100644 --- a/tests/v1/product/fr/id_card/test_id_card_v1.py +++ b/tests/v1/product/fr/id_card/test_id_card_v1.py @@ -11,9 +11,9 @@ from mindee.product.fr.id_card.id_card_v1_page import ( IdCardV1Page, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "idcard_fr" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "idcard_fr" / "response_v1" IdCardV1DocumentType = Document[ IdCardV1Document, diff --git a/tests/v1/product/fr/id_card/test_id_card_v1_regression.py b/tests/v1/product/fr/id_card/test_id_card_v1_regression.py index 000aeebf..00bb5588 100644 --- a/tests/v1/product/fr/id_card/test_id_card_v1_regression.py +++ b/tests/v1/product/fr/id_card/test_id_card_v1_regression.py @@ -2,7 +2,7 @@ from mindee.client import Client from mindee.product.fr.id_card.id_card_v1 import IdCardV1 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -10,13 +10,13 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR / "idcard_fr" / "response_v1" / "default_sample.rst", + V1_PRODUCT_DATA_DIR / "idcard_fr" / "response_v1" / "default_sample.rst", encoding="utf-8", ) as rst_file: rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "idcard_fr" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "idcard_fr" / "default_sample.jpg", ) response = client.parse(IdCardV1, sample) doc_response = response.document diff --git a/tests/v1/product/fr/id_card/test_id_card_v2.py b/tests/v1/product/fr/id_card/test_id_card_v2.py index 0ff30079..a603e275 100644 --- a/tests/v1/product/fr/id_card/test_id_card_v2.py +++ b/tests/v1/product/fr/id_card/test_id_card_v2.py @@ -11,9 +11,9 @@ from mindee.product.fr.id_card.id_card_v2_page import ( IdCardV2Page, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "idcard_fr" / "response_v2" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "idcard_fr" / "response_v2" IdCardV2DocumentType = Document[ IdCardV2Document, diff --git a/tests/v1/product/fr/id_card/test_id_card_v2_regression.py b/tests/v1/product/fr/id_card/test_id_card_v2_regression.py index b3631a53..b44b14b8 100644 --- a/tests/v1/product/fr/id_card/test_id_card_v2_regression.py +++ b/tests/v1/product/fr/id_card/test_id_card_v2_regression.py @@ -2,7 +2,7 @@ from mindee.client import Client from mindee.product.fr.id_card.id_card_v2 import IdCardV2 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -10,13 +10,13 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR / "idcard_fr" / "response_v2" / "default_sample.rst", + V1_PRODUCT_DATA_DIR / "idcard_fr" / "response_v2" / "default_sample.rst", encoding="utf-8", ) as rst_file: rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "idcard_fr" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "idcard_fr" / "default_sample.jpg", ) response = client.parse(IdCardV2, sample) doc_response = response.document diff --git a/tests/v1/product/fr/payslip/test_payslip_v2.py b/tests/v1/product/fr/payslip/test_payslip_v2.py index 4429f41a..0ece5278 100644 --- a/tests/v1/product/fr/payslip/test_payslip_v2.py +++ b/tests/v1/product/fr/payslip/test_payslip_v2.py @@ -8,9 +8,9 @@ from mindee.product.fr.payslip.payslip_v2_document import ( PayslipV2Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "payslip_fra" / "response_v2" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "payslip_fra" / "response_v2" PayslipV2DocumentType = Document[ PayslipV2Document, diff --git a/tests/v1/product/fr/payslip/test_payslip_v3.py b/tests/v1/product/fr/payslip/test_payslip_v3.py index e3f7a723..44f356ef 100644 --- a/tests/v1/product/fr/payslip/test_payslip_v3.py +++ b/tests/v1/product/fr/payslip/test_payslip_v3.py @@ -8,9 +8,9 @@ from mindee.product.fr.payslip.payslip_v3_document import ( PayslipV3Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "payslip_fra" / "response_v3" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "payslip_fra" / "response_v3" PayslipV3DocumentType = Document[ PayslipV3Document, diff --git a/tests/v1/product/generated/test_generated_v1.py b/tests/v1/product/generated/test_generated_v1.py index fee21b7f..c414eca8 100644 --- a/tests/v1/product/generated/test_generated_v1.py +++ b/tests/v1/product/generated/test_generated_v1.py @@ -11,7 +11,7 @@ from mindee.product.generated.generated_v1 import GeneratedV1 from mindee.product.generated.generated_v1_document import GeneratedV1Document from mindee.product.generated.generated_v1_page import GeneratedV1Page -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR @pytest.fixture @@ -20,7 +20,7 @@ def international_id_v1_complete_doc() -> ( ): json_data = json.load( open( - PRODUCT_DATA_DIR + V1_PRODUCT_DATA_DIR / "generated" / "response_v1" / "complete_international_id_v1.json", @@ -36,7 +36,7 @@ def international_id_v1_empty_doc() -> ( ): json_data = json.load( open( - PRODUCT_DATA_DIR + V1_PRODUCT_DATA_DIR / "generated" / "response_v1" / "empty_international_id_v1.json", @@ -51,7 +51,7 @@ def international_id_v1_empty_doc() -> ( def invoice_v4_empty_doc() -> Document[GeneratedV1Document, Page[GeneratedV1Page]]: json_data = json.load( open( - PRODUCT_DATA_DIR / "generated" / "response_v1" / "empty_invoice_v4.json", + V1_PRODUCT_DATA_DIR / "generated" / "response_v1" / "empty_invoice_v4.json", encoding="utf-8", ) ) @@ -62,7 +62,10 @@ def invoice_v4_empty_doc() -> Document[GeneratedV1Document, Page[GeneratedV1Page def invoice_v4_complete_doc() -> Document[GeneratedV1Document, Page[GeneratedV1Page]]: json_data = json.load( open( - PRODUCT_DATA_DIR / "generated" / "response_v1" / "complete_invoice_v4.json", + V1_PRODUCT_DATA_DIR + / "generated" + / "response_v1" + / "complete_invoice_v4.json", encoding="utf-8", ) ) @@ -73,7 +76,10 @@ def invoice_v4_complete_doc() -> Document[GeneratedV1Document, Page[GeneratedV1P def invoice_v4_page_0() -> Document[GeneratedV1Document, Page[GeneratedV1Page]]: json_data = json.load( open( - PRODUCT_DATA_DIR / "generated" / "response_v1" / "complete_invoice_v4.json", + V1_PRODUCT_DATA_DIR + / "generated" + / "response_v1" + / "complete_invoice_v4.json", encoding="utf-8", ) ) @@ -82,7 +88,7 @@ def invoice_v4_page_0() -> Document[GeneratedV1Document, Page[GeneratedV1Page]]: def test_international_id_v1_empty_doc(international_id_v1_empty_doc) -> None: doc_str = open( - PRODUCT_DATA_DIR + V1_PRODUCT_DATA_DIR / "generated" / "response_v1" / "summary_empty_international_id_v1.rst", @@ -231,7 +237,7 @@ def test_international_id_v1_empty_doc(international_id_v1_empty_doc) -> None: def test_international_id_v1_complete_doc(international_id_v1_complete_doc) -> None: doc_str = open( - PRODUCT_DATA_DIR + V1_PRODUCT_DATA_DIR / "generated" / "response_v1" / "summary_full_international_id_v1.rst", @@ -398,7 +404,10 @@ def test_international_id_v1_complete_doc(international_id_v1_complete_doc) -> N def test_invoice_v4_complete_doc(invoice_v4_complete_doc) -> None: doc_str = open( - PRODUCT_DATA_DIR / "generated" / "response_v1" / "summary_full_invoice_v4.rst", + V1_PRODUCT_DATA_DIR + / "generated" + / "response_v1" + / "summary_full_invoice_v4.rst", encoding="utf-8", ).read() assert isinstance( @@ -640,7 +649,10 @@ def test_invoice_v4_complete_doc(invoice_v4_complete_doc) -> None: def test_invoice_v4_page0(invoice_v4_page_0) -> None: doc_str = open( - PRODUCT_DATA_DIR / "generated" / "response_v1" / "summary_page0_invoice_v4.rst", + V1_PRODUCT_DATA_DIR + / "generated" + / "response_v1" + / "summary_page0_invoice_v4.rst", encoding="utf-8", ).read() @@ -788,7 +800,10 @@ def test_invoice_v4_page0(invoice_v4_page_0) -> None: def test_invoice_v4_empty_doc(invoice_v4_empty_doc) -> None: doc_str = open( - PRODUCT_DATA_DIR / "generated" / "response_v1" / "summary_empty_invoice_v4.rst", + V1_PRODUCT_DATA_DIR + / "generated" + / "response_v1" + / "summary_empty_invoice_v4.rst", encoding="utf-8", ).read() diff --git a/tests/v1/product/ind/indian_passport/test_indian_passport_v1.py b/tests/v1/product/ind/indian_passport/test_indian_passport_v1.py index 4f2ebd69..3aca0562 100644 --- a/tests/v1/product/ind/indian_passport/test_indian_passport_v1.py +++ b/tests/v1/product/ind/indian_passport/test_indian_passport_v1.py @@ -8,9 +8,9 @@ from mindee.product.ind.indian_passport.indian_passport_v1_document import ( IndianPassportV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "ind_passport" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "ind_passport" / "response_v1" IndianPassportV1DocumentType = Document[ IndianPassportV1Document, diff --git a/tests/v1/product/international_id/test_international_id_v2.py b/tests/v1/product/international_id/test_international_id_v2.py index 5756360a..cd09a400 100644 --- a/tests/v1/product/international_id/test_international_id_v2.py +++ b/tests/v1/product/international_id/test_international_id_v2.py @@ -8,9 +8,9 @@ from mindee.product.international_id.international_id_v2_document import ( InternationalIdV2Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "international_id" / "response_v2" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "international_id" / "response_v2" InternationalIdV2DocumentType = Document[ InternationalIdV2Document, diff --git a/tests/v1/product/invoice/test_invoice_v4.py b/tests/v1/product/invoice/test_invoice_v4.py index 95bac0dd..d63614e3 100644 --- a/tests/v1/product/invoice/test_invoice_v4.py +++ b/tests/v1/product/invoice/test_invoice_v4.py @@ -8,9 +8,9 @@ from mindee.product.invoice.invoice_v4_document import ( InvoiceV4Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "invoices" / "response_v4" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "invoices" / "response_v4" InvoiceV4DocumentType = Document[ InvoiceV4Document, diff --git a/tests/v1/product/invoice/test_invoice_v4_regression.py b/tests/v1/product/invoice/test_invoice_v4_regression.py index 67516fd3..7dd012ca 100644 --- a/tests/v1/product/invoice/test_invoice_v4_regression.py +++ b/tests/v1/product/invoice/test_invoice_v4_regression.py @@ -2,7 +2,7 @@ from mindee.client import Client from mindee.product.invoice.invoice_v4 import InvoiceV4 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -10,13 +10,13 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR / "invoices" / "response_v4" / "default_sample.rst", + V1_PRODUCT_DATA_DIR / "invoices" / "response_v4" / "default_sample.rst", encoding="utf-8", ) as rst_file: rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "invoices" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "invoices" / "default_sample.jpg", ) response = client.parse(InvoiceV4, sample) doc_response = response.document diff --git a/tests/v1/product/invoice_splitter/test_invoice_splitter_v1.py b/tests/v1/product/invoice_splitter/test_invoice_splitter_v1.py index 03bd023b..aeb62c65 100644 --- a/tests/v1/product/invoice_splitter/test_invoice_splitter_v1.py +++ b/tests/v1/product/invoice_splitter/test_invoice_splitter_v1.py @@ -8,9 +8,9 @@ from mindee.product.invoice_splitter.invoice_splitter_v1_document import ( InvoiceSplitterV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "invoice_splitter" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "invoice_splitter" / "response_v1" InvoiceSplitterV1DocumentType = Document[ InvoiceSplitterV1Document, diff --git a/tests/v1/product/invoice_splitter/test_invoice_splitter_v1_regression.py b/tests/v1/product/invoice_splitter/test_invoice_splitter_v1_regression.py index dc00837b..7908fa7f 100644 --- a/tests/v1/product/invoice_splitter/test_invoice_splitter_v1_regression.py +++ b/tests/v1/product/invoice_splitter/test_invoice_splitter_v1_regression.py @@ -2,7 +2,7 @@ from mindee.client import Client from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -10,13 +10,13 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR / "invoice_splitter" / "response_v1" / "default_sample.rst", + V1_PRODUCT_DATA_DIR / "invoice_splitter" / "response_v1" / "default_sample.rst", encoding="utf-8", ) as rst_file: rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "invoice_splitter" / "default_sample.pdf" + V1_PRODUCT_DATA_DIR / "invoice_splitter" / "default_sample.pdf" ) response = client.enqueue_and_parse(InvoiceSplitterV1, sample) diff --git a/tests/v1/product/material_certificate/test_material_certificate_v1.py b/tests/v1/product/material_certificate/test_material_certificate_v1.py index b3c1c440..0ef20dda 100644 --- a/tests/v1/product/material_certificate/test_material_certificate_v1.py +++ b/tests/v1/product/material_certificate/test_material_certificate_v1.py @@ -10,9 +10,9 @@ from mindee.product.material_certificate.material_certificate_v1_document import ( MaterialCertificateV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "material_certificate" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "material_certificate" / "response_v1" MaterialCertificateV1DocumentType = Document[ MaterialCertificateV1Document, diff --git a/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1.py b/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1.py index fde399a6..0a6ed38e 100644 --- a/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1.py +++ b/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1.py @@ -10,9 +10,9 @@ from mindee.product.multi_receipts_detector.multi_receipts_detector_v1_document import ( MultiReceiptsDetectorV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "multi_receipts_detector" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "multi_receipts_detector" / "response_v1" MultiReceiptsDetectorV1DocumentType = Document[ MultiReceiptsDetectorV1Document, diff --git a/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py b/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py index 6226a4db..5c94f1bc 100644 --- a/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py +++ b/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py @@ -4,7 +4,7 @@ from mindee.product.multi_receipts_detector.multi_receipts_detector_v1 import ( MultiReceiptsDetectorV1, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -12,7 +12,7 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR + V1_PRODUCT_DATA_DIR / "multi_receipts_detector" / "response_v1" / "default_sample.rst", @@ -21,7 +21,7 @@ def test_default_sample(): rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "multi_receipts_detector" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "multi_receipts_detector" / "default_sample.jpg", ) response = client.parse(MultiReceiptsDetectorV1, sample) doc_response = response.document diff --git a/tests/v1/product/nutrition_facts_label/test_nutrition_facts_label_v1.py b/tests/v1/product/nutrition_facts_label/test_nutrition_facts_label_v1.py index 63deca5c..a9ed7519 100644 --- a/tests/v1/product/nutrition_facts_label/test_nutrition_facts_label_v1.py +++ b/tests/v1/product/nutrition_facts_label/test_nutrition_facts_label_v1.py @@ -10,9 +10,9 @@ from mindee.product.nutrition_facts_label.nutrition_facts_label_v1_document import ( NutritionFactsLabelV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "nutrition_facts" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "nutrition_facts" / "response_v1" NutritionFactsLabelV1DocumentType = Document[ NutritionFactsLabelV1Document, diff --git a/tests/v1/product/passport/test_passport_v1.py b/tests/v1/product/passport/test_passport_v1.py index ede170e1..690fefed 100644 --- a/tests/v1/product/passport/test_passport_v1.py +++ b/tests/v1/product/passport/test_passport_v1.py @@ -8,9 +8,9 @@ from mindee.product.passport.passport_v1_document import ( PassportV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "passport" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "passport" / "response_v1" PassportV1DocumentType = Document[ PassportV1Document, diff --git a/tests/v1/product/passport/test_passport_v1_regression.py b/tests/v1/product/passport/test_passport_v1_regression.py index 515762b7..836a639b 100644 --- a/tests/v1/product/passport/test_passport_v1_regression.py +++ b/tests/v1/product/passport/test_passport_v1_regression.py @@ -2,7 +2,7 @@ from mindee.client import Client from mindee.product.passport.passport_v1 import PassportV1 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -10,13 +10,13 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR / "passport" / "response_v1" / "default_sample.rst", + V1_PRODUCT_DATA_DIR / "passport" / "response_v1" / "default_sample.rst", encoding="utf-8", ) as rst_file: rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "passport" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "passport" / "default_sample.jpg", ) response = client.parse(PassportV1, sample) doc_response = response.document diff --git a/tests/v1/product/receipt/test_receipt_v5.py b/tests/v1/product/receipt/test_receipt_v5.py index 2dd98942..807d32e3 100644 --- a/tests/v1/product/receipt/test_receipt_v5.py +++ b/tests/v1/product/receipt/test_receipt_v5.py @@ -8,9 +8,9 @@ from mindee.product.receipt.receipt_v5_document import ( ReceiptV5Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "expense_receipts" / "response_v5" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "expense_receipts" / "response_v5" ReceiptV5DocumentType = Document[ ReceiptV5Document, diff --git a/tests/v1/product/receipt/test_receipt_v5_regression.py b/tests/v1/product/receipt/test_receipt_v5_regression.py index c0109a0f..7279c997 100644 --- a/tests/v1/product/receipt/test_receipt_v5_regression.py +++ b/tests/v1/product/receipt/test_receipt_v5_regression.py @@ -2,7 +2,7 @@ from mindee.client import Client from mindee.product.receipt.receipt_v5 import ReceiptV5 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -10,13 +10,13 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR / "expense_receipts" / "response_v5" / "default_sample.rst", + V1_PRODUCT_DATA_DIR / "expense_receipts" / "response_v5" / "default_sample.rst", encoding="utf-8", ) as rst_file: rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "expense_receipts" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "expense_receipts" / "default_sample.jpg", ) response = client.parse(ReceiptV5, sample) doc_response = response.document diff --git a/tests/v1/product/resume/test_resume_v1.py b/tests/v1/product/resume/test_resume_v1.py index 2b141f70..104df9ca 100644 --- a/tests/v1/product/resume/test_resume_v1.py +++ b/tests/v1/product/resume/test_resume_v1.py @@ -8,9 +8,9 @@ from mindee.product.resume.resume_v1_document import ( ResumeV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "resume" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "resume" / "response_v1" ResumeV1DocumentType = Document[ ResumeV1Document, diff --git a/tests/v1/product/us/bank_check/test_bank_check_v1.py b/tests/v1/product/us/bank_check/test_bank_check_v1.py index b4e64764..087f2d82 100644 --- a/tests/v1/product/us/bank_check/test_bank_check_v1.py +++ b/tests/v1/product/us/bank_check/test_bank_check_v1.py @@ -11,9 +11,9 @@ from mindee.product.us.bank_check.bank_check_v1_page import ( BankCheckV1Page, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "bank_check" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "bank_check" / "response_v1" BankCheckV1DocumentType = Document[ BankCheckV1Document, diff --git a/tests/v1/product/us/bank_check/test_bank_check_v1_regression.py b/tests/v1/product/us/bank_check/test_bank_check_v1_regression.py index 42f12731..58deddb0 100644 --- a/tests/v1/product/us/bank_check/test_bank_check_v1_regression.py +++ b/tests/v1/product/us/bank_check/test_bank_check_v1_regression.py @@ -2,7 +2,7 @@ from mindee.client import Client from mindee.product.us.bank_check.bank_check_v1 import BankCheckV1 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR from tests.v1.product import get_id, get_version @@ -10,13 +10,13 @@ def test_default_sample(): client = Client() with open( - PRODUCT_DATA_DIR / "bank_check" / "response_v1" / "default_sample.rst", + V1_PRODUCT_DATA_DIR / "bank_check" / "response_v1" / "default_sample.rst", encoding="utf-8", ) as rst_file: rst_ref = rst_file.read() sample = client.source_from_path( - PRODUCT_DATA_DIR / "bank_check" / "default_sample.jpg", + V1_PRODUCT_DATA_DIR / "bank_check" / "default_sample.jpg", ) response = client.parse(BankCheckV1, sample) doc_response = response.document diff --git a/tests/v1/product/us/healthcare_card/test_healthcare_card_v1.py b/tests/v1/product/us/healthcare_card/test_healthcare_card_v1.py index 0ba6752c..26629bea 100644 --- a/tests/v1/product/us/healthcare_card/test_healthcare_card_v1.py +++ b/tests/v1/product/us/healthcare_card/test_healthcare_card_v1.py @@ -8,9 +8,9 @@ from mindee.product.us.healthcare_card.healthcare_card_v1_document import ( HealthcareCardV1Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "us_healthcare_cards" / "response_v1" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "us_healthcare_cards" / "response_v1" HealthcareCardV1DocumentType = Document[ HealthcareCardV1Document, diff --git a/tests/v1/product/us/us_mail/test_us_mail_v3.py b/tests/v1/product/us/us_mail/test_us_mail_v3.py index e6e2366e..ea3b4862 100644 --- a/tests/v1/product/us/us_mail/test_us_mail_v3.py +++ b/tests/v1/product/us/us_mail/test_us_mail_v3.py @@ -8,9 +8,9 @@ from mindee.product.us.us_mail.us_mail_v3_document import ( UsMailV3Document, ) -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR -RESPONSE_DIR = PRODUCT_DATA_DIR / "us_mail" / "response_v3" +RESPONSE_DIR = V1_PRODUCT_DATA_DIR / "us_mail" / "response_v3" UsMailV3DocumentType = Document[ UsMailV3Document, diff --git a/tests/v1/test_cli.py b/tests/v1/test_cli.py index e3fca975..d7ea0379 100644 --- a/tests/v1/test_cli.py +++ b/tests/v1/test_cli.py @@ -5,7 +5,7 @@ from mindee.commands.cli_parser import MindeeParser from mindee.error.mindee_http_error import MindeeHTTPClientError, MindeeHTTPError -from tests.utils import clear_envvars +from tests.utils import FILE_TYPES_DIR, V1_PRODUCT_DATA_DIR, clear_envvars @pytest.fixture @@ -22,7 +22,7 @@ def custom_doc(monkeypatch): input_type="path", output_type="summary", include_words=False, - path="./tests/data/file_types/pdf/blank.pdf", + path=FILE_TYPES_DIR / "pdf" / "blank.pdf", parse_type="parse", async_parse=False, ) @@ -42,7 +42,7 @@ def generated_doc_sync(monkeypatch): input_type="path", output_type="summary", include_words=False, - path="./tests/data/file_types/pdf/blank.pdf", + path=FILE_TYPES_DIR / "pdf" / "blank.pdf", parse_type="parse", async_parse=False, ) @@ -62,7 +62,7 @@ def generated_doc_async(monkeypatch): input_type="path", output_type="summary", include_words=False, - path="./tests/data/file_types/pdf/blank.pdf", + path=FILE_TYPES_DIR / "pdf" / "blank.pdf", parse_type="parse", async_parse=True, ) @@ -79,7 +79,7 @@ def ots_doc(monkeypatch): input_type="path", output_type="summary", include_words=False, - path="./tests/data/products/invoices/invoice.pdf", + path=V1_PRODUCT_DATA_DIR / "invoices" / "invoice.pdf", parse_type="parse", async_parse=False, ) @@ -95,7 +95,7 @@ def ots_doc_enqueue_and_parse(monkeypatch): doc_pages=3, input_type="path", include_words=False, - path="./tests/data/products/invoice_splitter/default_sample.pdf", + path=V1_PRODUCT_DATA_DIR / "invoice_splitter" / "default_sample.pdf", parse_type="parse", async_parse=True, ) @@ -115,7 +115,7 @@ def ots_doc_feedback(monkeypatch): queue_id="dummy-queue-id", call_method="parse-queued", input_type="path", - path="./tests/data/file_types/pdf/blank.pdf", + path=FILE_TYPES_DIR / "pdf" / "blank.pdf", parse_type="feedback", feedback=json.loads(dummy_feedback), ) diff --git a/tests/v1/test_client.py b/tests/v1/test_client.py index bf5eac0a..da6e44c2 100644 --- a/tests/v1/test_client.py +++ b/tests/v1/test_client.py @@ -12,8 +12,13 @@ from mindee.product.invoice.invoice_v4 import InvoiceV4 from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 from mindee.product.receipt.receipt_v5 import ReceiptV5 -from tests.utils import FILE_TYPES_DIR, PRODUCT_DATA_DIR, clear_envvars, dummy_envvars -from tests.v1.mindee_http import ERROR_DATA_DIR +from tests.utils import ( + FILE_TYPES_DIR, + V1_PRODUCT_DATA_DIR, + clear_envvars, + dummy_envvars, +) +from tests.v1.mindee_http import V1_ERROR_DATA_DIR @pytest.fixture @@ -121,10 +126,13 @@ def test_async_wrong_polling_delay(dummy_client: Client): def test_local_response_from_sync_json(dummy_client: Client): input_file = LocalResponse( - PRODUCT_DATA_DIR / "multi_receipts_detector" / "response_v1" / "complete.json" + V1_PRODUCT_DATA_DIR + / "multi_receipts_detector" + / "response_v1" + / "complete.json" ) with open( - PRODUCT_DATA_DIR + V1_PRODUCT_DATA_DIR / "multi_receipts_detector" / "response_v1" / "summary_full.rst" @@ -137,10 +145,10 @@ def test_local_response_from_sync_json(dummy_client: Client): def test_local_response_from_async_json(dummy_client: Client): input_file = LocalResponse( - PRODUCT_DATA_DIR / "international_id" / "response_v2" / "complete.json" + V1_PRODUCT_DATA_DIR / "international_id" / "response_v2" / "complete.json" ) with open( - PRODUCT_DATA_DIR / "international_id" / "response_v2" / "summary_full.rst" + V1_PRODUCT_DATA_DIR / "international_id" / "response_v2" / "summary_full.rst" ) as f: reference_doc = f.read() result = dummy_client.load_prediction(InternationalIdV2, input_file) @@ -150,13 +158,13 @@ def test_local_response_from_async_json(dummy_client: Client): def test_local_response_from_invalid_file(dummy_client: Client): local_response = LocalResponse( - PRODUCT_DATA_DIR / "invoices" / "response_v4" / "summary_full.rst" + V1_PRODUCT_DATA_DIR / "invoices" / "response_v4" / "summary_full.rst" ) with pytest.raises(MindeeError): print(local_response.as_dict) def test_local_response_from_invalid_dict(dummy_client: Client): - input_file = LocalResponse(ERROR_DATA_DIR / "error_400_no_details.json") + input_file = LocalResponse(V1_ERROR_DATA_DIR / "error_400_no_details.json") with pytest.raises(MindeeError): dummy_client.load_prediction(InvoiceV4, input_file) diff --git a/tests/v1/workflows/test_workflow.py b/tests/v1/workflows/test_workflow.py index 526eeee9..bf0b7e71 100644 --- a/tests/v1/workflows/test_workflow.py +++ b/tests/v1/workflows/test_workflow.py @@ -5,8 +5,9 @@ from mindee.parsing.common.workflow_response import WorkflowResponse from mindee.product.generated.generated_v1 import GeneratedV1 +from tests.utils import V1_DATA_DIR -WORKFLOW_DIR = Path("./tests/data") / "workflows" +WORKFLOW_DIR = V1_DATA_DIR / "workflows" @pytest.fixture diff --git a/tests/v1/workflows/test_workflow_integration.py b/tests/v1/workflows/test_workflow_integration.py index beecb8e7..0fdac38b 100644 --- a/tests/v1/workflows/test_workflow_integration.py +++ b/tests/v1/workflows/test_workflow_integration.py @@ -7,7 +7,7 @@ from mindee.input import WorkflowOptions from mindee.parsing.common.execution_priority import ExecutionPriority from mindee.product import FinancialDocumentV1, GeneratedV1 -from tests.utils import PRODUCT_DATA_DIR +from tests.utils import V1_PRODUCT_DATA_DIR @pytest.fixture @@ -22,7 +22,7 @@ def workflow_id(): @pytest.fixture def input_path(): - return PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg" + return V1_PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg" @pytest.mark.integration diff --git a/tests/v2/test_client_v2_integration.py b/tests/v2/test_client_v2_integration.py index d8b40ffb..a7f03381 100644 --- a/tests/v2/test_client_v2_integration.py +++ b/tests/v2/test_client_v2_integration.py @@ -6,7 +6,7 @@ from mindee import ClientV2, InferenceParameters, PathInput, UrlInputSource from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2 from mindee.parsing.v2.inference_response import InferenceResponse -from tests.utils import FILE_TYPES_DIR, PRODUCT_DATA_DIR +from tests.utils import FILE_TYPES_DIR, V1_PRODUCT_DATA_DIR @pytest.fixture(scope="session") @@ -110,7 +110,7 @@ def test_parse_file_filled_single_page_must_succeed( """ Upload a filled single-page JPEG and verify that common fields are present. """ - input_path: Path = PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg" + input_path: Path = V1_PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg" input_source = PathInput(input_path) params = InferenceParameters( From 2ce094cf3791c1ea406fffc7e7041aa3e1c92dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= Date: Mon, 3 Nov 2025 11:22:09 +0100 Subject: [PATCH 3/3] version is in folder --- .github/workflows/_test-integrations.yml | 1 - tests/utils.py | 3 +- .../v2/{test_client_v2.py => test_client.py} | 0 ...egration.py => test_client_integration.py} | 32 +++++++++++++++---- 4 files changed, 28 insertions(+), 8 deletions(-) rename tests/v2/{test_client_v2.py => test_client.py} (100%) rename tests/v2/{test_client_v2_integration.py => test_client_integration.py} (89%) diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index 2beeacff..c388b1f0 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -53,7 +53,6 @@ jobs: run: | pytest --cov mindee -m integration - - name: Notify Slack Action on Failure uses: ravsamhq/notify-slack-action@2.3.0 if: ${{ always() && github.ref_name == 'main' }} diff --git a/tests/utils.py b/tests/utils.py index bcf0e780..e67b57a0 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -7,12 +7,13 @@ REQUEST_TIMEOUT_ENV_NAME, ) -ROOT_DATA_DIR = Path("./tests/data/") +ROOT_DATA_DIR = Path(__file__).parent / "data" V1_DATA_DIR = ROOT_DATA_DIR / "v1" V2_DATA_DIR = ROOT_DATA_DIR / "v2" EXTRAS_DIR = V1_DATA_DIR / "extras" FILE_TYPES_DIR = ROOT_DATA_DIR / "file_types" V1_PRODUCT_DATA_DIR = V1_DATA_DIR / "products" +V2_PRODUCT_DATA_DIR = V2_DATA_DIR / "products" def clear_envvars(monkeypatch) -> None: diff --git a/tests/v2/test_client_v2.py b/tests/v2/test_client.py similarity index 100% rename from tests/v2/test_client_v2.py rename to tests/v2/test_client.py diff --git a/tests/v2/test_client_v2_integration.py b/tests/v2/test_client_integration.py similarity index 89% rename from tests/v2/test_client_v2_integration.py rename to tests/v2/test_client_integration.py index a7f03381..cabce8db 100644 --- a/tests/v2/test_client_v2_integration.py +++ b/tests/v2/test_client_integration.py @@ -6,7 +6,7 @@ from mindee import ClientV2, InferenceParameters, PathInput, UrlInputSource from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2 from mindee.parsing.v2.inference_response import InferenceResponse -from tests.utils import FILE_TYPES_DIR, V1_PRODUCT_DATA_DIR +from tests.utils import FILE_TYPES_DIR, V2_PRODUCT_DATA_DIR @pytest.fixture(scope="session") @@ -43,14 +43,12 @@ def test_parse_file_empty_multiple_pages_must_succeed( raw_text=True, polygon=False, confidence=False, - webhook_ids=[], alias="py_integration_empty_multiple", ) response: InferenceResponse = v2_client.enqueue_and_get_inference( input_source, params ) - assert response is not None assert response.inference is not None @@ -67,6 +65,8 @@ def test_parse_file_empty_multiple_pages_must_succeed( assert response.inference.active_options.polygon is False assert response.inference.active_options.confidence is False + assert response.inference.result is not None + assert response.inference.result.raw_text is not None assert len(response.inference.result.raw_text.pages) == 2 @@ -88,12 +88,20 @@ def test_parse_file_empty_single_page_options_must_succeed( raw_text=True, polygon=True, confidence=True, - webhook_ids=[], alias="py_integration_empty_page_options", ) response: InferenceResponse = v2_client.enqueue_and_get_inference( input_source, params ) + assert response is not None + assert response.inference is not None + + assert response.inference.model is not None + assert response.inference.model.id == findoc_model_id + + assert response.inference.file is not None + assert response.inference.file.name == "blank_1.pdf" + assert response.inference.file.page_count == 1 assert response.inference.active_options is not None assert response.inference.active_options.rag is True @@ -101,6 +109,8 @@ def test_parse_file_empty_single_page_options_must_succeed( assert response.inference.active_options.polygon is True assert response.inference.active_options.confidence is True + assert response.inference.result is not None + @pytest.mark.integration @pytest.mark.v2 @@ -110,7 +120,7 @@ def test_parse_file_filled_single_page_must_succeed( """ Upload a filled single-page JPEG and verify that common fields are present. """ - input_path: Path = V1_PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg" + input_path: Path = V2_PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg" input_source = PathInput(input_path) params = InferenceParameters( @@ -222,7 +232,7 @@ def test_unknown_webhook_ids_must_throw_error( @pytest.mark.integration @pytest.mark.v2 -def test_url_input_source_must_not_raise_errors( +def test_blank_url_input_source_must_succeed( v2_client: ClientV2, findoc_model_id: str, ) -> None: @@ -246,3 +256,13 @@ def test_url_input_source_must_not_raise_errors( ) assert response is not None assert response.inference is not None + + assert response.inference.file is not None + assert response.inference.file.page_count == 1 + + assert response.inference.model is not None + assert response.inference.model.id == findoc_model_id + + assert response.inference.result is not None + + assert response.inference.active_options is not None