diff --git a/mindee/documents/base.py b/mindee/documents/base.py index ed83e00f..e4bae71e 100644 --- a/mindee/documents/base.py +++ b/mindee/documents/base.py @@ -34,7 +34,7 @@ def __init__( self, input_file: InputDocument, document_type: str, - api_prediction: dict, + api_prediction: TypeApiPrediction, page_n: Optional[int] = None, ): if input_file: diff --git a/mindee/documents/custom_document.py b/mindee/documents/custom_document.py index 4b4eae7b..04b39619 100644 --- a/mindee/documents/custom_document.py +++ b/mindee/documents/custom_document.py @@ -10,7 +10,7 @@ class CustomDocument(Document): def __init__( self, document_type: str, - api_prediction=None, + api_prediction: TypeApiPrediction, input_file=None, page_n: Optional[int] = None, ): @@ -51,7 +51,7 @@ def __str__(self) -> str: name, " ".join([value["content"] for value in info["values"]]), ) - custom_doc_str += "-----------------\n" + custom_doc_str += "----------------------" return custom_doc_str def _checklist(self) -> None: diff --git a/tests/__init__.py b/tests/__init__.py index e69de29b..76e943e7 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,4 @@ +RECEIPT_DATA_DIR = "./tests/data/receipt" +INVOICE_DATA_DIR = "./tests/data/invoice" +PASSPORT_DATA_DIR = "./tests/data/passport" +CUSTOM_DATA_DIR = "./tests/data/custom" diff --git a/tests/data/custom/response/complete.json b/tests/data/custom/response/complete.json new file mode 100644 index 00000000..f1bd0196 --- /dev/null +++ b/tests/data/custom/response/complete.json @@ -0,0 +1,921 @@ +{ + "api_request": { + "error": {}, + "resources": [ + "document" + ], + "status": "success", + "status_code": 201, + "url": "http://api.mindee.net/v1/products/ianare/field_test/v1/predict?global_orientation=true" + }, + "document": { + "id": "72021a9a-bfa0-49ce-b24a-ce9a3584ac53", + "inference": { + "extras": {}, + "finished_at": "2022-05-19T08:55:35+00:00", + "is_rotation_applied": true, + "pages": [ + { + "extras": {}, + "id": 0, + "orientation": { + "value": 0 + }, + "prediction": { + "date_normal": { + "confidence": 0.99, + "values": [ + { + "confidence": 0.99, + "content": "2020-12-17", + "polygon": [ + [ + 0.834, + 0.177 + ], + [ + 0.927, + 0.177 + ], + [ + 0.927, + 0.186 + ], + [ + 0.835, + 0.187 + ] + ] + } + ] + }, + "date_us": { + "confidence": 0.97, + "values": [ + { + "confidence": 0.97, + "content": "2020-12-17", + "polygon": [ + [ + 0.834, + 0.177 + ], + [ + 0.927, + 0.177 + ], + [ + 0.927, + 0.186 + ], + [ + 0.835, + 0.187 + ] + ] + } + ] + }, + "email": { + "confidence": 1.0, + "values": [ + { + "confidence": 0.9, + "content": "jenny@example.com", + "polygon": [ + [ + 0.522, + 0.917 + ], + [ + 0.623, + 0.916 + ], + [ + 0.623, + 0.923 + ], + [ + 0.523, + 0.924 + ] + ] + } + ] + }, + "integer": { + "confidence": 0.76, + "values": [ + { + "confidence": 0.76, + "content": "2100505343", + "polygon": [ + [ + 0.559, + 0.179 + ], + [ + 0.652, + 0.178 + ], + [ + 0.652, + 0.187 + ], + [ + 0.56, + 0.188 + ] + ] + } + ] + }, + "number": { + "confidence": 0.9, + "values": [ + { + "confidence": 0.9, + "content": "2100505343", + "polygon": [ + [ + 0.559, + 0.179 + ], + [ + 0.652, + 0.178 + ], + [ + 0.652, + 0.187 + ], + [ + 0.56, + 0.188 + ] + ] + } + ] + }, + "phone": { + "confidence": 0.69, + "values": [ + { + "confidence": 0.69, + "content": "8675309", + "polygon": [ + [ + 0.66, + 0.253 + ], + [ + 0.764, + 0.253 + ], + [ + 0.764, + 0.262 + ], + [ + 0.66, + 0.263 + ] + ] + } + ] + }, + "string_09": { + "confidence": 0.98, + "values": [ + { + "confidence": 0.98, + "content": "10119", + "polygon": [ + [ + 0.084, + 0.202 + ], + [ + 0.129, + 0.202 + ], + [ + 0.129, + 0.21 + ], + [ + 0.085, + 0.211 + ] + ] + } + ] + }, + "string_all": { + "confidence": 1.0, + "values": [ + { + "confidence": 1.0, + "content": "Jenny", + "polygon": [ + [ + 0.713, + 0.013 + ], + [ + 0.956, + 0.013 + ], + [ + 0.956, + 0.054 + ], + [ + 0.713, + 0.055 + ] + ] + }, + { + "confidence": 1.0, + "content": "is", + "polygon": [ + [ + 0.713, + 0.013 + ], + [ + 0.956, + 0.013 + ], + [ + 0.956, + 0.054 + ], + [ + 0.713, + 0.055 + ] + ] + }, + { + "confidence": 1.0, + "content": "great", + "polygon": [ + [ + 0.713, + 0.013 + ], + [ + 0.956, + 0.013 + ], + [ + 0.956, + 0.054 + ], + [ + 0.713, + 0.055 + ] + ] + } + ] + }, + "string_az": { + "confidence": 0.61, + "values": [ + { + "confidence": 0.61, + "content": "Jenny", + "polygon": [ + [ + 0.713, + 0.013 + ], + [ + 0.956, + 0.013 + ], + [ + 0.956, + 0.054 + ], + [ + 0.713, + 0.055 + ] + ] + } + ] + }, + "url": { + "confidence": 0.0, + "values": [] + } + } + }, + { + "extras": {}, + "id": 1, + "orientation": { + "value": 0 + }, + "prediction": { + "date_normal": { + "confidence": 0.0, + "values": [] + }, + "date_us": { + "confidence": 0.0, + "values": [] + }, + "email": { + "confidence": 0.0, + "values": [] + }, + "integer": { + "confidence": 0.67, + "values": [ + { + "confidence": 0.67, + "content": "2100505343", + "polygon": [ + [ + 0.56, + 0.181 + ], + [ + 0.652, + 0.181 + ], + [ + 0.652, + 0.191 + ], + [ + 0.56, + 0.191 + ] + ] + } + ] + }, + "number": { + "confidence": 0.86, + "values": [ + { + "confidence": 0.86, + "content": "2100505343", + "polygon": [ + [ + 0.56, + 0.181 + ], + [ + 0.652, + 0.181 + ], + [ + 0.652, + 0.191 + ], + [ + 0.56, + 0.191 + ] + ] + } + ] + }, + "phone": { + "confidence": 0.64, + "values": [ + { + "confidence": 0.64, + "content": "1231234", + "polygon": [ + [ + 0.701, + 0.181 + ], + [ + 0.774, + 0.181 + ], + [ + 0.774, + 0.189 + ], + [ + 0.702, + 0.191 + ] + ] + } + ] + }, + "string_09": { + "confidence": 0.98, + "values": [ + { + "confidence": 0.98, + "content": "10119", + "polygon": [ + [ + 0.113, + 0.149 + ], + [ + 0.158, + 0.149 + ], + [ + 0.158, + 0.158 + ], + [ + 0.113, + 0.159 + ] + ] + } + ] + }, + "string_all": { + "confidence": 0.99, + "values": [ + { + "confidence": 0.99, + "content": "Mindee", + "polygon": [ + [ + 0.715, + 0.018 + ], + [ + 0.956, + 0.015 + ], + [ + 0.956, + 0.053 + ], + [ + 0.716, + 0.057 + ] + ] + }, + { + "confidence": 0.94, + "content": "is", + "polygon": [ + [ + 0.715, + 0.018 + ], + [ + 0.956, + 0.015 + ], + [ + 0.956, + 0.053 + ], + [ + 0.716, + 0.057 + ] + ] + }, + { + "confidence": 0.91, + "content": "awesome", + "polygon": [ + [ + 0.715, + 0.018 + ], + [ + 0.956, + 0.015 + ], + [ + 0.956, + 0.053 + ], + [ + 0.716, + 0.057 + ] + ] + } + ] + }, + "string_az": { + "confidence": 0.67, + "values": [ + { + "confidence": 0.67, + "content": "Mindee", + "polygon": [ + [ + 0.112, + 0.125 + ], + [ + 0.186, + 0.124 + ], + [ + 0.186, + 0.133 + ], + [ + 0.113, + 0.134 + ] + ] + } + ] + }, + "url": { + "confidence": 1.0, + "values": [ + { + "confidence": 1.0, + "content": "developers.mindee.com/docs", + "polygon": [ + [ + 0.576, + 0.366 + ], + [ + 0.831, + 0.366 + ], + [ + 0.831, + 0.383 + ], + [ + 0.576, + 0.383 + ] + ] + } + ] + } + } + } + ], + "prediction": { + "date_normal": { + "confidence": 0.99, + "page_id": 0, + "values": [ + { + "confidence": 0.99, + "content": "2020-12-17", + "polygon": [ + [ + 0.834, + 0.177 + ], + [ + 0.927, + 0.177 + ], + [ + 0.927, + 0.186 + ], + [ + 0.835, + 0.187 + ] + ] + } + ] + }, + "date_us": { + "confidence": 0.97, + "page_id": 0, + "values": [ + { + "confidence": 0.97, + "content": "2020-12-17", + "polygon": [ + [ + 0.834, + 0.177 + ], + [ + 0.927, + 0.177 + ], + [ + 0.927, + 0.186 + ], + [ + 0.835, + 0.187 + ] + ] + } + ] + }, + "email": { + "confidence": 1.0, + "page_id": 0, + "values": [ + { + "confidence": 1.0, + "content": "contact@mindee.com", + "polygon": [ + [ + 0.522, + 0.917 + ], + [ + 0.623, + 0.916 + ], + [ + 0.623, + 0.923 + ], + [ + 0.523, + 0.924 + ] + ] + } + ] + }, + "integer": { + "confidence": 0.76, + "page_id": 0, + "values": [ + { + "confidence": 0.76, + "content": "2100505343", + "polygon": [ + [ + 0.559, + 0.179 + ], + [ + 0.652, + 0.178 + ], + [ + 0.652, + 0.187 + ], + [ + 0.56, + 0.188 + ] + ] + } + ] + }, + "number": { + "confidence": 0.9, + "page_id": 0, + "values": [ + { + "confidence": 0.9, + "content": "2100505343", + "polygon": [ + [ + 0.559, + 0.179 + ], + [ + 0.652, + 0.178 + ], + [ + 0.652, + 0.187 + ], + [ + 0.56, + 0.188 + ] + ] + } + ] + }, + "phone": { + "confidence": 0.69, + "page_id": 0, + "values": [ + { + "confidence": 0.69, + "content": "8675309", + "polygon": [ + [ + 0.66, + 0.253 + ], + [ + 0.764, + 0.253 + ], + [ + 0.764, + 0.262 + ], + [ + 0.66, + 0.263 + ] + ] + } + ] + }, + "string_09": { + "confidence": 0.98, + "page_id": 1, + "values": [ + { + "confidence": 0.98, + "content": "10119", + "polygon": [ + [ + 0.113, + 0.149 + ], + [ + 0.158, + 0.149 + ], + [ + 0.158, + 0.158 + ], + [ + 0.113, + 0.159 + ] + ] + } + ] + }, + "string_all": { + "confidence": 0.99, + "values": [ + { + "confidence": 0.99, + "content": "Mindee", + "polygon": [ + [ + 0.715, + 0.018 + ], + [ + 0.956, + 0.015 + ], + [ + 0.956, + 0.053 + ], + [ + 0.716, + 0.057 + ] + ] + }, + { + "confidence": 0.94, + "content": "is", + "polygon": [ + [ + 0.715, + 0.018 + ], + [ + 0.956, + 0.015 + ], + [ + 0.956, + 0.053 + ], + [ + 0.716, + 0.057 + ] + ] + }, + { + "confidence": 0.91, + "content": "awesome", + "polygon": [ + [ + 0.715, + 0.018 + ], + [ + 0.956, + 0.015 + ], + [ + 0.956, + 0.053 + ], + [ + 0.716, + 0.057 + ] + ] + } + ] + }, + "string_az": { + "confidence": 0.67, + "page_id": 1, + "values": [ + { + "confidence": 0.67, + "content": "Mindee", + "polygon": [ + [ + 0.112, + 0.125 + ], + [ + 0.186, + 0.124 + ], + [ + 0.186, + 0.133 + ], + [ + 0.113, + 0.134 + ] + ] + } + ] + }, + "url": { + "confidence": 1.0, + "page_id": 1, + "values": [ + { + "confidence": 1.0, + "content": "developers.mindee.com/docs", + "polygon": [ + [ + 0.576, + 0.366 + ], + [ + 0.831, + 0.366 + ], + [ + 0.831, + 0.383 + ], + [ + 0.576, + 0.383 + ] + ] + } + ] + } + }, + "processing_time": 8.079, + "product": { + "features": [ + "string_all", + "string_az", + "string_09", + "number", + "integer", + "date_normal", + "date_us", + "phone", + "email", + "url" + ], + "name": "ianare/field_test", + "type": "constructed", + "version": "1.1" + }, + "started_at": "2022-05-19T08:55:26+00:00" + }, + "n_pages": 2, + "name": "0a8c49a9-b40a-4c62-98fa-d9412d9a3795.pdf" + } +} \ No newline at end of file diff --git a/tests/data/custom/response/doc_to_string.txt b/tests/data/custom/response/doc_to_string.txt new file mode 100644 index 00000000..7a8d3a7f --- /dev/null +++ b/tests/data/custom/response/doc_to_string.txt @@ -0,0 +1,12 @@ +----- field_test ----- +date_normal: 2020-12-17 +date_us: 2020-12-17 +email: contact@mindee.com +integer: 2100505343 +number: 2100505343 +phone: 8675309 +string_09: 10119 +string_all: Mindee is awesome +string_az: Mindee +url: developers.mindee.com/docs +---------------------- diff --git a/tests/data/custom/response/empty.json b/tests/data/custom/response/empty.json new file mode 100644 index 00000000..7c0d349a --- /dev/null +++ b/tests/data/custom/response/empty.json @@ -0,0 +1,143 @@ +{ + "api_request": { + "error": {}, + "resources": [ + "document" + ], + "status": "success", + "status_code": 201, + "url": "http://api.mindee.net/v1/products/ianare/field_test/v1/predict?global_orientation=true" + }, + "document": { + "id": "37067c22-4cea-40d3-9809-b9ca1bfedd33", + "inference": { + "extras": {}, + "finished_at": "2022-05-19T08:57:40+00:00", + "is_rotation_applied": true, + "pages": [ + { + "extras": {}, + "id": 0, + "orientation": { + "value": 0 + }, + "prediction": { + "date_normal": { + "confidence": 0.0, + "values": [] + }, + "date_us": { + "confidence": 0.0, + "values": [] + }, + "email": { + "confidence": 0.0, + "values": [] + }, + "integer": { + "confidence": 0.0, + "values": [] + }, + "number": { + "confidence": 0.0, + "values": [] + }, + "phone": { + "confidence": 0.0, + "values": [] + }, + "string_09": { + "confidence": 0.0, + "values": [] + }, + "string_all": { + "confidence": 0.0, + "values": [] + }, + "string_az": { + "confidence": 0.0, + "values": [] + }, + "url": { + "confidence": 0.0, + "values": [] + } + } + } + ], + "prediction": { + "date_normal": { + "confidence": 0.0, + "page_id": 0, + "values": [] + }, + "date_us": { + "confidence": 0.0, + "page_id": 0, + "values": [] + }, + "email": { + "confidence": 0.0, + "page_id": 0, + "values": [] + }, + "integer": { + "confidence": 0.0, + "page_id": 0, + "values": [] + }, + "number": { + "confidence": 0.0, + "page_id": 0, + "values": [] + }, + "phone": { + "confidence": 0.0, + "page_id": 0, + "values": [] + }, + "string_09": { + "confidence": 0.0, + "page_id": 0, + "values": [] + }, + "string_all": { + "confidence": 0.0, + "page_id": 0, + "values": [] + }, + "string_az": { + "confidence": 0.0, + "page_id": 0, + "values": [] + }, + "url": { + "confidence": 0.0, + "page_id": 0, + "values": [] + } + }, + "processing_time": 0.593, + "product": { + "features": [ + "string_all", + "string_az", + "string_09", + "number", + "integer", + "date_normal", + "date_us", + "phone", + "email", + "url" + ], + "name": "ianare/field_test", + "type": "constructed", + "version": "1.1" + }, + "started_at": "2022-05-19T08:57:40+00:00" + }, + "n_pages": 1, + "name": "hello.pdf" + } +} \ No newline at end of file diff --git a/tests/data/custom/response/page0_to_string.txt b/tests/data/custom/response/page0_to_string.txt new file mode 100644 index 00000000..40c5d031 --- /dev/null +++ b/tests/data/custom/response/page0_to_string.txt @@ -0,0 +1,12 @@ +----- field_test ----- +date_normal: 2020-12-17 +date_us: 2020-12-17 +email: jenny@example.com +integer: 2100505343 +number: 2100505343 +phone: 8675309 +string_09: 10119 +string_all: Jenny is great +string_az: Jenny +url: +---------------------- diff --git a/tests/data/custom/response/page1_to_string.txt b/tests/data/custom/response/page1_to_string.txt new file mode 100644 index 00000000..7179e39e --- /dev/null +++ b/tests/data/custom/response/page1_to_string.txt @@ -0,0 +1,12 @@ +----- field_test ----- +date_normal: +date_us: +email: +integer: 2100505343 +number: 2100505343 +phone: 1231234 +string_09: 10119 +string_all: Mindee is awesome +string_az: Mindee +url: developers.mindee.com/docs +---------------------- diff --git a/tests/data/invoices/invoice.pdf b/tests/data/invoice/invoice.pdf similarity index 100% rename from tests/data/invoices/invoice.pdf rename to tests/data/invoice/invoice.pdf diff --git a/tests/data/invoices/invoice_10p.pdf b/tests/data/invoice/invoice_10p.pdf similarity index 100% rename from tests/data/invoices/invoice_10p.pdf rename to tests/data/invoice/invoice_10p.pdf diff --git a/tests/data/invoices/invoice_10p.txt b/tests/data/invoice/invoice_10p.txt similarity index 100% rename from tests/data/invoices/invoice_10p.txt rename to tests/data/invoice/invoice_10p.txt diff --git a/tests/data/invoices/v3/invoice.json b/tests/data/invoice/response/complete.json similarity index 100% rename from tests/data/invoices/v3/invoice.json rename to tests/data/invoice/response/complete.json diff --git a/tests/data/invoice/response/doc_to_string.txt b/tests/data/invoice/response/doc_to_string.txt new file mode 100644 index 00000000..e4d5b50d --- /dev/null +++ b/tests/data/invoice/response/doc_to_string.txt @@ -0,0 +1,18 @@ +-----Invoice data----- +Filename: None +Invoice number: 0042004801351 +Total amount including taxes: 587.95 +Total amount excluding taxes: 489.97 +Invoice date: 2020-02-17 +Invoice due date: 2020-02-17 +Supplier name: TURNPIKE DESIGNS CO. +Supplier address: 156 University Ave, Toronto ON, Canada M5H 2H7 +Customer name: JIRO DOI +Customer company registration: FR00000000000; 111222333 +Customer address: 1954 Bloon Street West Toronto, ON, M6P 3K9 Canada +Payment details: FR7640254025476501124705368; +Company numbers: 501124705; FR33501124705 +Taxes: 97.98 20.0% +Total taxes: 97.98 +Locale: fr; fr; EUR; +---------------------- diff --git a/tests/data/invoices/v3/invoice_all_na.json b/tests/data/invoice/response/empty.json similarity index 100% rename from tests/data/invoices/v3/invoice_all_na.json rename to tests/data/invoice/response/empty.json diff --git a/tests/data/invoice/response/page0_to_string.txt b/tests/data/invoice/response/page0_to_string.txt new file mode 100644 index 00000000..43f75827 --- /dev/null +++ b/tests/data/invoice/response/page0_to_string.txt @@ -0,0 +1,18 @@ +-----Invoice data----- +Filename: None +Invoice number: 0042004801351 +Total amount including taxes: 587.95 +Total amount excluding taxes: 489.97 +Invoice date: 2020-02-17 +Invoice due date: 2020-02-17 +Supplier name: +Supplier address: +Customer name: +Customer company registration: +Customer address: +Payment details: FR7640254025476501124705368; +Company numbers: 501124705; FR33501124705 +Taxes: 97.98 20.0% +Total taxes: 97.98 +Locale: fr; fr; EUR; +---------------------- diff --git a/tests/data/passport/v1/passport.json b/tests/data/passport/response/complete.json similarity index 97% rename from tests/data/passport/v1/passport.json rename to tests/data/passport/response/complete.json index ac13692e..492ec016 100644 --- a/tests/data/passport/v1/passport.json +++ b/tests/data/passport/response/complete.json @@ -20,7 +20,7 @@ "id": 0, "prediction": { "birth_date": { - "confidence": 1.0, + "confidence": 0.98, "polygon": [ [ 0.341, @@ -86,7 +86,7 @@ "value": "GBR" }, "expiry_date": { - "confidence": 1.0, + "confidence": 0.98, "polygon": [ [ 0.34, @@ -105,7 +105,7 @@ 0.82 ] ], - "value": "2017-04-22" + "value": "2057-04-22" }, "gender": { "confidence": 1.0, @@ -154,7 +154,7 @@ } ], "id_number": { - "confidence": 1.0, + "confidence": 0.98, "polygon": [ [ 0.723, @@ -272,7 +272,7 @@ ], "prediction": { "birth_date": { - "confidence": 1.0, + "confidence": 0.98, "page_id": 0, "polygon": [ [ @@ -341,7 +341,7 @@ "value": "GBR" }, "expiry_date": { - "confidence": 1.0, + "confidence": 0.98, "page_id": 0, "polygon": [ [ @@ -361,7 +361,7 @@ 0.82 ] ], - "value": "2017-04-22" + "value": "2057-04-22" }, "gender": { "confidence": 1.0, @@ -412,7 +412,7 @@ } ], "id_number": { - "confidence": 1.0, + "confidence": 0.98, "page_id": 0, "polygon": [ [ diff --git a/tests/data/passport/response/doc_to_string.txt b/tests/data/passport/response/doc_to_string.txt new file mode 100644 index 00000000..0b7982bc --- /dev/null +++ b/tests/data/passport/response/doc_to_string.txt @@ -0,0 +1,14 @@ +-----Passport data----- +Filename: None +Full name: HENERT PUDARSAN +Given names: HENERT +Surname: PUDARSAN +Country: GBR +ID Number: 707797979 +Issuance date: 2012-04-22 +Birth date: 1995-05-20 +Expiry date: 2057-04-22 +MRZ 1: P