From 2e1af91bf4182ee1d890522f089fd844ed80a720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= Date: Wed, 18 Jan 2023 18:11:05 +0100 Subject: [PATCH] :memo: Add guides documentation --- README.md | 35 +-- docs/guide/python-api-builder.md | 134 +++++++++++ docs/guide/python-cli.md | 48 ++++ docs/guide/python-getting-started.md | 183 +++++++++++++++ docs/guide/python-invoice-ocr.md | 319 +++++++++++++++++++++++++++ docs/guide/python-passport-ocr.md | 212 ++++++++++++++++++ docs/guide/python-receipt-ocr.md | 240 ++++++++++++++++++++ tests/data | 2 +- 8 files changed, 1157 insertions(+), 16 deletions(-) create mode 100644 docs/guide/python-api-builder.md create mode 100644 docs/guide/python-cli.md create mode 100644 docs/guide/python-getting-started.md create mode 100644 docs/guide/python-invoice-ocr.md create mode 100644 docs/guide/python-passport-ocr.md create mode 100644 docs/guide/python-receipt-ocr.md diff --git a/README.md b/README.md index e1696b25..b5fbdb11 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,4 @@ -[![License: MIT](https://img.shields.io/github/license/mindee/mindee-api-python)](https://opensource.org/licenses/MIT) -[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/mindee/mindee-api-python/test.yml)](https://github.com/mindee/mindee-api-python) -[![PyPI Version](https://img.shields.io/pypi/v/mindee)](https://pypi.org/project/mindee/) -[![Downloads](https://img.shields.io/pypi/dm/mindee)](https://pypi.org/project/mindee/) +[![License: MIT](https://img.shields.io/github/license/mindee/mindee-api-python)](https://opensource.org/licenses/MIT) [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/mindee/mindee-api-python/test.yml)](https://github.com/mindee/mindee-api-python) [![PyPI Version](https://img.shields.io/pypi/v/mindee)](https://pypi.org/project/mindee/) [![Downloads](https://img.shields.io/pypi/dm/mindee)](https://pypi.org/project/mindee/) # Mindee API Helper Library for Python Quickly and easily connect to Mindee's API services using Python. @@ -31,10 +28,10 @@ mindee_client = Client(api_key="my-api-key") input_doc = mindee_client.doc_from_path("/path/to/the/file.ext") # Parse the document as an invoice by passing the appropriate type -api_response = input_doc.parse(documents.TypeInvoiceV4) +result = input_doc.parse(documents.TypeInvoiceV4) # Print a brief summary of the parsed data -print(api_response.document) +print(result.document) ``` #### Region-Specific Documents @@ -48,10 +45,10 @@ mindee_client = Client(api_key="my-api-key") input_doc = mindee_client.doc_from_path("/path/to/the/file.ext") # Parse the document as a USA bank check by passing the appropriate type -api_response = input_doc.parse(documents.us.TypeBankCheckV1) +result = input_doc.parse(documents.us.TypeBankCheckV1) # Print a brief summary of the parsed data -print(api_response.document) +print(result.document) ``` #### Custom Document (API Builder) @@ -67,24 +64,29 @@ mindee_client = Client(api_key="my-api-key").add_endpoint( # Load a file from disk and parse it. # The endpoint name must be specified since it can't be determined from the class. -api_response = mindee_client.doc_from_path( +result = mindee_client.doc_from_path( "/path/to/the/file.ext" ).parse(documents.TypeCustomV1, endpoint_name="wnine") # Print a brief summary of the parsed data -print(api_response.document) +print(result.document) # Iterate over all the fields in the document -for field_name, field_values in api_response.document.fields.items(): +for field_name, field_values in result.document.fields.items(): print(field_name, "=", field_values) ``` ## Further Reading -There's more to it than that for those that need more features, or want to -customize the experience. +Complete details on the working of the library are available in the following guides: -All the juicy details are described in the -**[Official Guide](https://developers.mindee.com/docs/python-sdk)**. +* [Getting started](https://developers.mindee.com/docs/getting-started) +* [Command Line Interface (CLI)](https://developers.mindee.com/docs/python-cli) +* [Custom APIs (API Builder)](https://developers.mindee.com/docs/python-api-builder) +* [Invoice API](https://developers.mindee.com/docs/python-invoice-ocr) +* [Passport API](https://developers.mindee.com/docs/python-passport-ocr) +* [Receipt API](https://developers.mindee.com/docs/python-receipt-ocr) + +You can view the source code on [GitHub](https://github.com/mindee/mindee-api-nodejs). You can also take a look at the **[Reference Documentation](https://mindee.github.io/mindee-api-python/)**. @@ -93,3 +95,6 @@ You can also take a look at the Copyright ยฉ Mindee Available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). + +## Questions? +[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw) diff --git a/docs/guide/python-api-builder.md b/docs/guide/python-api-builder.md new file mode 100644 index 00000000..66bc6c0a --- /dev/null +++ b/docs/guide/python-api-builder.md @@ -0,0 +1,134 @@ +The Python OCR SDK supports [custom-built API](https://developers.mindee.com/docs/build-your-first-document-parsing-api) from the API Builder. If your document isn't covered by one of Mindee's Off-the-Shelf APIs, you can create your own API using the [API Builder](https://developers.mindee.com/docs/overview). + +If your document isn't covered by one of Mindee's Off-the-Shelf APIs, you can create your own API using the +[API Builder](https://developers.mindee.com/docs/overview). + +For the following examples, we are using our own [W9s custom API](https://developers.mindee.com/docs/w9-forms-ocr), +created with the [API Builder](https://developers.mindee.com/docs/overview). + +```python +from mindee import Client, documents + +# Init a new client and add your custom endpoint (document) +mindee_client = Client(api_key="my-api-key").add_endpoint( + account_name="john", + endpoint_name="wsnine", + # version="1.2", # optional, see configuring client section below +) + +# Load a file from disk and parse it. +# The endpoint name must be specified since it can't be determined from the class. +result = mindee_client.doc_from_path( + "/path/to/the/w9.jpg" +).parse(documents.TypeCustomV1, endpoint_name="wnine") + +# Print a brief summary of the parsed data +print(result.document) +``` + +## Adding the Endpoint +Below are the arguments for adding a custom endpoint using the `add_endpoint` method. + +**`endpoint_name`**: The endpoint name is the API name from [Settings](https://developers.mindee.com/docs/build-your-first-document-parsing-api#settings-api-keys-and-documentation) page + +**`account_name`**: Your organization's or user's name in the API Builder. + +**`version`**: If set, locks the version of the model to use, you'll be required to update your code every time a new model is trained. + This is probably not needed for development but essential for production use. + If not set, uses the latest version of the model. + +## Parsing Documents +The client calls the `parse` method when parsing your custom document, which will return an object containing the prediction results of sent file. +The `endpoint_name` must be specified when calling the `parse` method for a custom endpoint. + +```python +result = mindee_client.doc_from_path("/path/to/receipt.jpg").parse( + documents.TypeCustomV1, endpoint_name="wnine" +) + +print(result.document) +``` + +> ๐Ÿ“˜ **Info** +> +> If your custom document has the same name as an [off-the-shelf APIs](https://developers.mindee.com/docs/what-is-off-the-shelf-api) document, +> you **must** specify your account name when calling the `parse` method: + +```python +from mindee import Client, documents + +mindee_client = Client(api_key="johndoe-receipt-api-key").add_endpoint( + endpoint_name="receipt", + account_name="JohnDoe", +) + +result = mindee_client.doc_from_path("/path/to/receipt.jpg").parse( + documents.TypeCustomV1, + endpoint_name="wnine", + account_name="JohnDoe", +) +``` + +## Document Fields +All the fields defined in the API Builder when creating your custom document are available. + +In custom documents, each field will hold an array of all the words in the document which are related to that field. +Each word is an object that has the text content, geometry information, and confidence score. + +Value fields can be accessed via the `fields` attribute. + +Classification fields can be accessed via the `classifications` attribute. + +> ๐Ÿ“˜ **Info** +> +> Both document level and page level objects work in the same way. + +### Fields Attribute +The `fields` attribute is a dictionary with the following structure: + +* key: the API name of the field, as a `str` +* value: a `ListField` object which has a `values` attribute, containing a list of all values found for the field. + +Individual field values can be accessed by using the field's API name, in the examples below we'll use the `address` field. + +```python +# raw data, list of each word object +print(result.document.fields["address"].values) + +# list of all values +print(result.document.fields["address"].contents_list) + +# default string representation +print(str(result.document.fields["address"])) + +# custom string representation +print(result.document.fields["address"].contents_string(separator="_")) +``` + +To iterate over all the fields: +```python +for name, info in result.document.fields.items(): + print(name) + print(info.values) +``` + +### Classifications Attribute +The `classifications` attribute is a dictionary with the following structure: + +* key: the API name of the field, as a `str` +* value: a `ClassificationField` object which has a `value` attribute, containing a string representation of the detected classification. + +```python +# raw data, list of each word object +print(result.document.classifications["doc_type"].values) +``` + +To iterate over all the classifications: +```python +for name, info in result.document.classifications.items(): + print(name) + print(info.values) +``` + +## Questions? +[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw) diff --git a/docs/guide/python-cli.md b/docs/guide/python-cli.md new file mode 100644 index 00000000..4a9bfe88 --- /dev/null +++ b/docs/guide/python-cli.md @@ -0,0 +1,48 @@ +## Command Line Usage +The CLI tool is provided mainly for quick tests and debugging. + +### General help + +```shell +python3 -m mindee --help +``` + +### Example command help + +```shell +python3 -m mindee invoice --help +``` + +### Example parse command for Off-the-Shelf document + +```shell +python3 -m mindee invoice --invoice-key xxxxxxx /path/to/invoice.pdf +``` + +### Works with environment variables + +```shell +export MINDEE_API_KEY=xxxxxx +python3 -m mindee invoice /path/to/invoice.pdf +``` + +### Example parse command for a custom document + +```shell +python3 -m mindee custom -u pikachu -k xxxxxxx pokemon_card /path/to/card.jpg +``` + +### You can get the full parsed output as well + +```shell +python3 -m mindee invoice -o parsed /path/to/invoice.pdf +``` + +### In the Git repo, there's a helper script for it + +```shell +./mindee-cli.sh -h +``` + +## Questions? +[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw) diff --git a/docs/guide/python-getting-started.md b/docs/guide/python-getting-started.md new file mode 100644 index 00000000..ac04e9c1 --- /dev/null +++ b/docs/guide/python-getting-started.md @@ -0,0 +1,183 @@ +This guide will help you get started with the Mindee Python OCR SDK to easily extract data from your documents. + +The Python OCR SDK supports [invoice](https://developers.mindee.com/docs/python-invoice-ocr), [passport](https://developers.mindee.com/docs/python-passport-ocr), [receipt](https://developers.mindee.com/docs/python-receipt-ocr) OCR APIs and [custom-built API](https://developers.mindee.com/docs/python-api-builder) from the API Builder. + +You can view the source code on [GitHub](https://github.com/mindee/mindee-api-python), and the package on [PyPI](https://pypi.org/project/mindee/). + +## Prerequisite + +- Download and install [Python](https://www.python.org/downloads/). This library is officially supported on Python `3.7` to `3.11`. +- Download and install [pip package manager](https://pip.pypa.io/en/stable/installation/). + +## Installation +To quickly get started with the Python OCR SDK anywhere, the preferred installation method is via `pip`. + +```shell +pip install mindee +``` + +### Development Installation +If you'll be modifying the source code, you'll need to install the development requirements to get started. + +1. First clone the repo. + +```shell +git clone git@github.com:mindee/mindee-api-python.git +``` + +2. Then navigate to the cloned directory and install all development requirements. + +```shell +cd mindee-api-python +pip install -e ".[dev,test]" +``` + +## Updating the Version +It is important to always check the version of the Mindee OCR SDK you are using, as new and updated features wonโ€™t work on old versions. + +To check the installed version: +```shell +pip show mindee +``` + +To get the latest version: +```shell +pip install mindee --upgrade +``` + +To install a specific version: +```shell +pip install mindee== +``` + +## Usage +To get started with Mindee's APIs, you need to create a `Client` and you're ready to go. + +Let's take a deep dive into how this works. + +## Initializing the Client +The `Client` centralizes document configurations in a single object. + +The `Client` requires your [API key](https://developers.mindee.com/docs/make-your-first-request#create-an-api-key). + +You can either pass these directly to the constructor or through environment variables. + + +### Pass the API key directly +```python +from mindee import Client +# Init with your API key +mindee_client = Client(api_key="my-api-key") +``` + +### Set the API key in the environment +API keys should be set as environment variables, especially for any production deployment. + +The following environment variable will set the global API key: +```shell +MINDEE_API_KEY="my-api-key" +``` + +Then in your code: +```python +from mindee import Client +# Init without an API key +mindee_client = Client() +``` + +### Setting the Request Timeout +The request timeout can be set using an environment variable: +```shell +MINDEE_REQUEST_TIMEOUT=200 +``` + +## Loading a Document File +Before being able to send a document to the API, it must first be loaded. + +You don't need to worry about different MIME types, the library will take care of handling +all supported types automatically. + +Once a document is loaded, interacting with it is done in exactly the same way, regardless +of how it was loaded. + +There are a few different ways of loading a document file, depending on your use case: + +* [Path](#path) +* [File Object](#file-object) +* [Base64](#base64) +* [Bytes](#bytes) + +### Path +Load from a file directly from disk. Requires an absolute path, as a string. + +```python +input_doc = mindee_client.doc_from_path("/path/to/the/invoice.pdf") +``` + +### File Object +A normal Python file object with a path. **Must be in binary mode**. + +```python +with open("/path/to/the/receipt.jpg", 'rb') as fo: + input_doc = mindee_client.doc_from_file(fo) +``` + +### Base64 +Requires a base64 encoded string. + +**Note**: The original filename is required when calling the method. + +```python +b64_string = "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLD...." +input_doc = mindee_client.doc_from_b64string(b64_string, "receipt.jpg") +``` + +### Bytes +Requires raw bytes. + +**Note**: The original filename is required when calling the method. + +```python +raw_bytes = b"%PDF-1.3\n%\xbf\xf7\xa2\xfe\n1 0 ob..." +input_doc = mindee_client.doc_from_bytes(raw_bytes, "invoice.pdf") +``` + +Loading from bytes is useful when using FastAPI `UploadFile` objects. + +```python +@app.post("/process-file") +async def upload(upload: UploadFile): + input_doc = mindee_client.doc_from_bytes( + upload.file.read(), + filename=upload.filename + ) +``` + +## Sending a File +To send a file to the API, we need to specify how to process the document. +This will determine which API endpoint is used and how the API return will be handled internally by the library. + +More specifically, we need to set a `mindee.documents.TypeDocument` type as the first parameter of the `parse` method. + +This is because the `parse` method's' return type depends on its first argument. + +Each document type available in the library has its corresponding class, which inherit from the base `mindee.document.Document` class. +This is detailed in each document-specific guide. + +### Off-the-Shelf Documents +Simply setting the correct class is enough: +```python +result = input_doc.parse(documents.TypeInvoiceV4) +``` + +### Custom Documents +The endpoint to use must also be set, this is done in the `endpoint_name` argument of the `parse` method: +```python +result = input_doc.parse(documents.TypeCustomV1, endpoint_name="wnine") +``` + +This is because the `CustomV1` class is enough to handle the return processing, but the actual endpoint needs to be specified. + + +## Questions? +[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw) diff --git a/docs/guide/python-invoice-ocr.md b/docs/guide/python-invoice-ocr.md new file mode 100644 index 00000000..27b0faa9 --- /dev/null +++ b/docs/guide/python-invoice-ocr.md @@ -0,0 +1,319 @@ +The Python OCR SDK supports the [invoice API](https://developers.mindee.com/docs/invoice-ocr) for extracting data from invoices. + +Using this sample below, we are going to illustrate how to extract the data that we want using the OCR SDK. + +![sample invoice](https://raw.githubusercontent.com/mindee/client-lib-test-data/main/invoice/invoice_1p.jpg) + +## Quick Start +```python +from mindee import Client, documents + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.doc_from_path("/path/to/the/file.ext") + +# Parse the document as an Invoice by passing the appropriate type +api_response = input_doc.parse(documents.TypeInvoiceV4) + +print(api_response.document) +``` + +Output: +``` +----- Invoice V4 ----- +Filename: a74eaa5-c8e283b-sample_invoice.jpeg +Locale: en; en; CAD; +Invoice number: 14 +Reference numbers: AD29094 +Invoice date: 2018-09-25 +Invoice due date: 2018-09-25 +Supplier name: TURNPIKE DESIGNS CO. +Supplier address: 156 University Ave, Toronto ON, Canada M5H 2H7 +Supplier company registrations: +Supplier payment details: +Customer name: JIRO DOI +Customer company registrations: +Customer address: 1954 Bloon Street West Toronto, ON, M6P 3K9 Canada +Line Items: + Code | QTY | Price | Amount | Tax (Rate) | Description + | 1.00 | 65.00 | 65.00 | | Platinum web hosting package Dow... + | 3.00 | 2100.00 | 2100.00 | | 2 page website design Includes b... + | 1.00 | 250.00 | 250.00 | | Mobile designs Includes responsi... +Taxes: 193.20 8.00% +Total taxes: 193.20 +Total amount excluding taxes: 2415.00 +Total amount including taxes: 2608.20 +---------------------- +``` + +## Invoice Data Structure +The invoice object JSON data structure consists of: + +- [Document level prediction](#document-level-prediction) +- [Page level prediction](#page-level-prediction) +- [Raw HTTP response](#raw-http-response) + +### Document Level Prediction +For document level prediction, we construct the document class by using the different pages put in a single document. +The method used for creating a single invoice object with multiple pages relies on field confidence scores. + +Basically, we iterate over each page, and for each field, we keep the one that has the highest probability. + +For example, if you send a three-page invoice, the document level will provide you with one tax, one total, and so on. + +```python +print(api_response.document) +``` + +### Page Level Prediction +For page level prediction, in a multi-page pdf we construct the document class by using a unique page of the pdf. + +```python +# [InvoiceV4, InvoiceV4 ...] +invoice_data.pages +``` + +### Raw HTTP Response +This contains the full Mindee API HTTP response object in JSON format + +```python +# full HTTP request object +invoice_data.http_response +``` + +## Extracted Fields +Each invoice object contains a set of different fields. Each field contains the four following attributes: + +- **value** (Str or Float depending on the field type): corresponds to the field value. Set to None if the `>field` was not extracted. +- **confidence** (Float): the confidence score of the field prediction. +- **bounding_box** (Array[Float]): contains the relative vertices coordinates of the bounding box containing the `>field` in the image. + If the field is not written, the bbox is an empty array. +- **reconstructed** (Bool): True if the field was reconstructed using other fields. + +### Additional Attributes +Depending on the field type, there might be additional attributes that will be extracted. + +- [Customer Information](#customer-information) +- [Dates](#dates) +- [Locale and Currency](#locale-and-currency) +- [Payment Information](#payment-information) +- [Reference Numbers](#reference-numbers) +- [Supplier Information](#supplier-information) +- [Taxes](#taxes) +- [Total Amounts](#total-amounts) +- [Line Items](#line-items) + + +### Customer Information + +- **customer_name**: Customer name + +```python +# To get the customer name (string) +customer_name = invoice_data.document.customer_name.value +``` + +- **customer_address**: Customer's address + +```python +# To get the customer address (string) +customer_address = invoice_data.document.customer_address.value +``` + +- **customer_company_registrations**: Customer Company Registration + +```python +# To get the customer company registation (string) +customer_company_registrations = invoice_data.document.customer_company_registrations + +for customer_company_registration in customer_company_registrations: + # To get the type of number + customer_company_registration_number_type = customer_company_registration.type + + # To get the company number + customer_company_registration_number_value = customer_company_registration.value +``` + +### Dates +**date_object**: Contains the date of issuance of the invoice. Each date field comes with extra attributes: + +- **invoice_date**: Datetime object from python datetime date library. + +```python +# To get the invoice date of issuance (string) +invoice_date = invoice_data.document.invoice_date.value +``` + +- **due_date**: Payment due date of the invoice. + +```python +# To get the invoice due date (string) +due_date = invoice_data.document.due_date.value +``` + +### Locale and Currency + +- **locale**: Language ISO code. + +```python +# To get the total language code +language = invoice_data.document.locale.value +``` + +- **currency** (String): ISO currency code. + +``` python +# To get the invoice currency code +currency = invoice_data.document.locale.currency +``` + +### Payment Information +**payment_details**: List of invoice's supplier payment details. Each object in the list contains extra attributes: + +- **iban**: (String) +- **swift**: (String) +- **routing_number**: (String) +- **account_number**: (String) + +```python +# To get the list of payment details +payment_details = invoice_data.document.payment_details + +# Loop on each object +for payment_detail in payment_details: + # To get the IBAN + iban = payment_detail.iban + + # To get the swift + swift = payment_detail.swift + + # To get the routing number + routing_number = payment_detail.routing_number + + # To get the account_number + account_number = payment_detail.account_number +``` + +### Reference numbers +**reference_numbers**: List of Reference numbers including PO number: + +- **iban**: (String) +- **swift**: (String) +- **routing_number**: (String) +- **account_number**: (String) + +```python +# To get the list of payment details +reference_numbers = invoice_data.document.reference_numbers + +# Loop on each object +for reference_number in reference_numbers: + print(reference_number.value) +``` + +### Supplier Information + +**supplier_company_registrations**: List of detected supplier's company registration number. Each object in the list contains extra attribute: + +- **type** (String Generic): Type of company registration number among predefined categories. +- **value** (String): Value of the company identifier + +```python +# To get the list of company numbers +supplier_company_registrations = invoice_data.document.supplier_company_registrations + +# Loop on each object +for company_registration in supplier_company_registrations: + # To get the type of number + company_registration_type = company_registration.type + + # To get the company number + company_registration_value = company_registration.value +``` + +- **supplier**: Supplier name as written in the invoice (logo or supplier Infos). + +```python +# To get the supplier name +supplier_name = invoice_data.document.supplier_name.value +``` + +- **supplier_address**: Supplier address as written in the invoice. + +```python +# To get the supplier address +supplier_address = invoice_data.document.supplier_address.value +``` + +### Taxes +**taxes**: Contains array of tax fields. Each of the tax fields has two extra attributes: + +- **code** (String): Optional tax code (HST, GST... for Canadian; City Tax, State tax for US, etc..). +- **rate** (Float): Optional tax rate. + +```python +# To get the list of taxes +taxes = invoice_data.document.taxes + +# Loop on each Tax field +for tax in taxes: + # To get the tax amount + tax_amount = tax.value + + # To get the tax code for from a tax object + tax_code = tax.code + + # To get the tax rate + tax_rate = tax.rate +``` + +### Total Amounts + +- **total_amount**: Total amount including taxes. + +```python +# To get the total amount including taxes value (float), ex: 14.24 +total_amount = invoice_data.document.total_amount.value +``` + +- **total_net**: Total amount excluding taxes. + +```python +# To get the total amount excluding taxes value (float), ex: 10.21 +total_net = invoice_data.document.total_net.value +``` + +- **total_tax**: Total tax value from tax lines. + +```python +# To get the total tax amount value (float), ex: 8.42 +total_tax = invoice_data.document.total_tax.value +``` + +### Line Items +**line_items**: List containing the details of line items. Each object in the list contains extra attributes: + +- **product_code**: (String) +- **description**: (String) +- **quantity**: (float) +- **total_amount**: (float) +- **tax_rate**: (float) +- **tax_amount**: (float) +- **unit_price**: (float) +- **confidence**: (float) + + +```python +# Loop on line items +for line_item in invoice_data.document.line_items: + # Show just the description + print(line_item.description) + + # Show a summary of the entire line, each field is divided by the `|` character + print(line_item) +``` + +## Questions? +[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw) diff --git a/docs/guide/python-passport-ocr.md b/docs/guide/python-passport-ocr.md new file mode 100644 index 00000000..d9a2715e --- /dev/null +++ b/docs/guide/python-passport-ocr.md @@ -0,0 +1,212 @@ +The Python OCR SDK supports the [passport API](https://developers.mindee.com/docs/passport-ocr) for extracting data from passports. + +```python +from mindee import Client, documents + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.doc_from_path("/path/to/the/file.ext") + +# Parse the document as an Invoice by passing the appropriate type +api_response = input_doc.parse(documents.TypePassportV1) + +print(api_response.document) +``` + +Using this sample fake passport below, we are going to illustrate how to extract the data that we want using the SDK. +![fake passport](https://files.readme.io/4a16b1d-passport_pic.jpg) + +## Passport Data Structure +The passport object JSON data structure consists of: + +- [Document level prediction](#document-level-prediction) +- [Page level prediction](#page-level-prediction) +- [Raw HTTP response](#raw-http-response) + +### Document Level Prediction +For document level prediction, we construct the document class by combining the different pages in a single document. +This method used for creating a single passport object from multiple pages relies on **field confidence scores**. + +Basically, we iterate over each page, and for each field, we keep the one that has the highest probability. + +For example, if you send a three-page passport, the document level will provide you with one name, one country code, and so on. + +```python +print(api_response.document) +``` + +Output: +``` +-----Passport data----- +Filename: passport.jpeg +Full name: HENERT PUDARSAN +Given names: HENERT +Surname: PUDARSAN +Country: GBR +ID Number: 707797979 +Issuance date: 2012-04-22 +Birth date: 1995-05-20 +Expiry date: 2017-04-22 +MRZ 1: Pfield` was not extracted. +- **probability** (Float): the confidence score of the field prediction. +- **bounding_box** (Array[Float]): contains the relative vertices coordinates of the bounding box containing the `>field` in the image. + If the field is not written, the bbox is an empty array. +- **reconstructed** (Bool): True if the field was reconstructed using other fields. + + +### Additional Attributes +Depending on the field type specified, additional attributes can be extracted in the receipt object. + +Using the above [receipt example](https://files.readme.io/6882f91-receipt23.png), the following are the basic fields that can be extracted. +- [Document Type](#document-type) +- [Categories](#categories) +- [Date](#date) +- [Locale](#locale) +- [Orientation](#orientation) +- [Supplier Information](#supplier-information) +- [Taxes](#taxes) +- [Time](#time) +- [Total Amounts](#total-amounts) +- [Tip](#tip) + +### Document Type +- **document_type** (string): Whether the document is an expense receipt or a credit card receipt. + +```python +document_type = api_response.document.document_type.value +print("document type: ", document_type) +``` + +### Categories +- **category** (string): Receipt category among predefined classes, as seen on the receipt. + +```python +category = api_response.document.category.value +print("purchase category: ", category) +``` + +- **subcategory** (string): The receipt sub-category among predefined classes, as seen on the receipt. + +```python +subcategory = api_response.document.subcategory.value +print("purchase subcategory: ", subcategory) +``` + +### Date +- **date** (string): Payment date as seen on the receipt. + - **value** (string): [ISO 8601 date](https://en.wikipedia.org/wiki/ISO_8601) format (yyyy-mm-dd). European and imperial dates are both supported. + - **raw** (string): In any format as seen on the receipt. + +```python +receipt_date = api_response.document.date.value +print("Date on receipt: ", receipt_date) +``` + +### Locale +- **locale** (string): Concatenation of language and country codes. + +```python +locale = api_response.document.locale.value +print("Locale code: ", locale) +``` + +- **locale.language** (string): Language code in [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) format as seen on the receipt. + +```python +language = api_response.document.locale.language +print("Language code: ", language) +``` + +- **locale.currency** (string): Currency code in [ISO 4217](https://en.wikipedia.org/wiki/ISO_4217) format as seen on the receipt. + +```python +currency = api_response.receipt.locale.currency +print("Currency code: ", currency) +``` + +- **locale.country** (string): Country code in [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1) alpha-2 format as seen on the receipt. + +```python +country = api_response.document.locale.country +print("Country code: ", Country) +``` + +### Orientation +- **orientation** (number): The orientation field is only available at the page level as it describes whether the page image should be rotated to be upright. + The rotation value is also conveniently available in the JSON response at: `document > inference > pages [ ] > orientation > value`. + If the page requires rotation for correct display, the orientation field gives a prediction among these 3 possible outputs: + - 0 degree: the page is already upright + - 90 degrees: the page must be rotated clockwise to be upright + - 270 degrees: the page must be rotated counterclockwise to be upright + +```python +orientation = api_response.document.orientation +print("Degree: ", orientation) +``` + +### Supplier Information +- **supplier** (string): Supplier name as written in the receipt. + +```python +supplier_name = api_response.document.supplier.value +print("Supplier Name: ", supplier_name) +``` + +### Taxes +- **taxes** (string): Contains tax fields as seen on the receipt. + - **value** (float): The tax amount. + - **code** (string): The tax code (HST, GST... for Canadian; City Tax, State tax for US, etc..). + - **rate** (float): The tax rate. + - **basis** (float): The amount used to calculate the tax. + +```python +taxes = api_response.document.taxes + +# Loop on each Tax field +for tax in taxes: + print(f" tax amount: {tax.value}, tax_code: {tax.code}, tax_rate: {tax.rate}") +``` + +### Time +- **time** (string): Time of purchase as seen on the receipt + - **value** (string): Time of purchase with 24 hours formatting (hh:mm). + - **raw** (string): In any format as seen on the receipt. + +```python +time = api_response.document.time.value +print("Time: ", time) +``` + +### Total Amounts +- **total_amount** (number): Total amount including taxes + +```python +total_amount = api_response.document.total_amount.value +print("total with tax", total_amount) +``` + +- **total_net** (number): Total amount paid excluding taxes + +```python +total_net = api_response.document.total_net.value +print("total without tax", total_net) +``` + +- **total_tax** (number): Total tax value from tax lines + +```python +total_tax = api_response.document.total_tax.value +print("total tax", total_tax) +``` + +### Tip +- **tip** (number): Total amount of tip and gratuity. + +```python +tip = api_response.document.tip.value +print("Tip: ", supplier_name) +``` + +## Questions? +[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw) diff --git a/tests/data b/tests/data index 1ebd19bb..9e358354 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 1ebd19bb2ba2cc0822c6538d47c85640fa451cfb +Subproject commit 9e35835467bbffebab0c2920f06052dd0c935376