Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,5 +84,5 @@ with open('/path/to/file', 'rb') as fp:

From a base64
```python
receipt_data = mindee_client.parse_receipt(base64_string, input_type="base64")
receipt_data = mindee_client.parse_receipt(base64_string, input_type="base64", filename="receipt.jpg")
```
52 changes: 46 additions & 6 deletions mindee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def parse_receipt(
cut_pdf=True,
include_words=False,
cut_pdf_mode=3,
filename=None,
):
"""
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
Expand All @@ -60,6 +61,7 @@ def parse_receipt(
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
:param input_type: String in {'path', 'stream', 'base64'}
:param file: Receipt filepath (allowed jpg, png, tiff, pdf)
:param filename: the name of the file (without the path)
:param version: expense_receipt api version
:return: Wrapped response with Receipts objects parsed
"""
Expand All @@ -68,7 +70,13 @@ def parse_receipt(
"Missing 'expense_receipt_token' arg in parse_receipt() function."
)

input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)
input_file = Inputs(
file,
input_type,
filename=filename,
cut_pdf=cut_pdf,
n_pdf_pages=cut_pdf_mode,
)

response = Receipt.request(
input_file,
Expand Down Expand Up @@ -109,6 +117,7 @@ def parse_passport(
version="1",
cut_pdf=True,
cut_pdf_mode=3,
filename=None,
):
"""
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
Expand All @@ -118,6 +127,7 @@ def parse_passport(
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
:param input_type: String in {'path', 'stream', 'base64'}
:param file: Passport filepath (allowed jpg, png, pdf)
:param filename: the name of the file (without the path)
:param version: passport api version
:return: Wrapped response with passports objects parsed
"""
Expand All @@ -126,7 +136,13 @@ def parse_passport(
"Missing 'passport_token' arg in parse_passport() function."
)

input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)
input_file = Inputs(
file,
input_type,
filename=filename,
cut_pdf=cut_pdf,
n_pdf_pages=cut_pdf_mode,
)

response = Passport.request(input_file, self.passport_token, version)

Expand All @@ -139,6 +155,7 @@ def parse_license_plate(
version="1",
cut_pdf=True,
cut_pdf_mode=3,
filename=None,
):
"""
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
Expand All @@ -148,6 +165,7 @@ def parse_license_plate(
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
:param input_type: String in {'path', 'stream', 'base64'}
:param file: CarPlate filepath (allowed jpg, png, pdf)
:param filename: the name of the file (without the path)
:param version: license_plates api version
:return: Wrapped response with CarPlates objects parsed
"""
Expand All @@ -156,7 +174,13 @@ def parse_license_plate(
"Missing 'license_plate_token' arg in license_plate_token() function."
)

input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)
input_file = Inputs(
file,
input_type,
filename=filename,
cut_pdf=cut_pdf,
n_pdf_pages=cut_pdf_mode,
)

response = CarPlate.request(input_file, self.license_plate_token, version)

Expand All @@ -170,6 +194,7 @@ def parse_invoice(
cut_pdf=True,
include_words=False,
cut_pdf_mode=3,
filename=None,
):
"""
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
Expand All @@ -179,14 +204,21 @@ def parse_invoice(
:param include_words: Bool, extract all words into http_response
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
:param input_type: String in {'path', 'stream', 'base64'}
:param file: Invoice filepath (allowed jpg, png, pdf)
:param file: Invoice full path (allowed jpg, png, pdf)
:param filename: the name of the file (without the path)
:param version: invoices api version
:return: Wrapped response with Invoices objects parsed
"""
if not self.invoice_token:
raise Exception("Missing 'invoice_token' arg in parse_invoice() function.")

input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)
input_file = Inputs(
file,
input_type,
filename=filename,
cut_pdf=cut_pdf,
n_pdf_pages=cut_pdf_mode,
)

response = Invoice.request(
input_file, self.invoice_token, version, include_words
Expand All @@ -201,6 +233,7 @@ def parse_financial_document(
cut_pdf=True,
include_words=False,
cut_pdf_mode=3,
filename=None,
):
"""
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
Expand All @@ -211,14 +244,21 @@ def parse_financial_document(
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
:param input_type: String in {'path', 'stream', 'base64'}
:param file: Invoice or Receipt filepath (allowed jpg, png, pdf)
:param filename: the name of the file (without the path)
:return: Wrapped response with FinancialDocument objects parsed
"""
if not self.invoice_token or not self.expense_receipt_token:
raise Exception(
"parse_invoice() function must include 'invoice_token' and 'expense_receipt_token' args."
)

input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)
input_file = Inputs(
file,
input_type,
filename=filename,
cut_pdf=cut_pdf,
n_pdf_pages=cut_pdf_mode,
)

response = FinancialDocument.request(
input_file,
Expand Down
29 changes: 15 additions & 14 deletions mindee/inputs.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
import io
import os
from base64 import decodebytes
import base64
from mimetypes import guess_type
import pikepdf

ALLOWED_EXTENSIONS = [
"image/png",
"image/jpg",
"image/jpeg",
"image/webp",
"application/pdf",
]


class Inputs:
def __init__(
Expand All @@ -15,17 +23,11 @@ def __init__(
:param filename: File name of the input
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
"""
self.allowed_extensions = [
"image/png",
"image/jpg",
"image/jpeg",
"image/webp",
"application/pdf",
]
assert input_type in ["base64", "path", "stream", "dummy"]
assert 0 < n_pdf_pages <= 3

if input_type == "base64":
assert filename, "filename must be set"
# Only for images
self.file_object = Inputs.b64_to_stream(file)
self.input_type = input_type
Expand Down Expand Up @@ -53,10 +55,9 @@ def __init__(
self.filename = ""
self.filepath = ""
self.file_extension = ""
elif self.file_extension not in self.allowed_extensions:
raise Exception(
"File type not allowed, must be in {%s}"
% ", ".join(self.allowed_extensions)
elif self.file_extension not in ALLOWED_EXTENSIONS:
raise AssertionError(
"File type not allowed, must be in {%s}" % ", ".join(ALLOWED_EXTENSIONS)
)

if self.file_extension == "application/pdf":
Expand Down Expand Up @@ -88,12 +89,12 @@ def load(input_type, filename, filepath, file_extension):
return file_input

@staticmethod
def b64_to_stream(b64_string):
def b64_to_stream(b64_string: str):
"""
:param b64_string: image base 64 string
:return: stream from base64
"""
bytes_object = decodebytes(b64_string.encode("utf-8"))
bytes_object = base64.standard_b64decode(b64_string)
return io.BytesIO(bytes_object)

def count_pdf_pages(self):
Expand Down
1,005 changes: 1,004 additions & 1 deletion tests/data/expense_receipts/receipt.txt

Large diffs are not rendered by default.

50 changes: 29 additions & 21 deletions tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from mindee import Client, Response, Receipt, Passport
from mindee.http import HTTPException


@pytest.fixture
Expand Down Expand Up @@ -61,68 +62,68 @@ def test_parse_license_plate_without_token(empty_client):


def test_parse_receipt_with_wrong_filetype(dummy_client):
with pytest.raises(Exception):
with pytest.raises(AssertionError):
dummy_client.parse_receipt("./tests/data/expense_receipts/receipt.jpga")


def test_parse_invoice_with_wrong_filetype(dummy_client):
with pytest.raises(Exception):
with pytest.raises(AssertionError):
dummy_client.parse_invoice("./tests/data/expense_receipts/receipt.jpga")


def test_parse_financial_doc_with_wrong_filetype(dummy_client):
with pytest.raises(Exception):
with pytest.raises(AssertionError):
dummy_client.parse_financial_document(
"./tests/data/expense_receipts/receipt.jpga"
)


def test_parse_passport_with_wrong_filetype(dummy_client):
with pytest.raises(Exception):
with pytest.raises(AssertionError):
dummy_client.parse_passport("./tests/data/expense_receipts/receipt.jpga")


def test_parse_plate_with_wrong_filetype(dummy_client):
with pytest.raises(Exception):
with pytest.raises(AssertionError):
dummy_client.parse_license_plate("./tests/data/expense_receipts/receipt.jpga")


def test_parse_receipt_with_wrong_token(dummy_client):
with pytest.raises(Exception):
with pytest.raises(HTTPException):
dummy_client.parse_receipt("./tests/data/expense_receipts/receipt.jpg")


def test_parse_receipt_with_wrong_version(dummy_client):
with pytest.raises(Exception):
with pytest.raises(HTTPException):
dummy_client.parse_receipt(
"./tests/data/expense_receipts/receipt.jpg", version="4000"
)


def test_parse_invoice_with_wrong_token(dummy_client):
with pytest.raises(Exception):
with pytest.raises(HTTPException):
dummy_client.parse_invoice("./tests/data/expense_receipts/receipt.jpg")


def test_parse_financial_doc_with_wrong_token_jpg(dummy_client):
with pytest.raises(Exception):
with pytest.raises(HTTPException):
dummy_client.parse_financial_document(
"./tests/data/expense_receipts/receipt.jpg"
)


def test_parse_financial_doc_with_wrong_token_pdf(dummy_client):
with pytest.raises(Exception):
with pytest.raises(HTTPException):
dummy_client.parse_financial_document("./tests/data/invoices/invoice.pdf")


def test_parse_passport_with_wrong_token(dummy_client):
with pytest.raises(Exception):
with pytest.raises(HTTPException):
dummy_client.parse_passport("./tests/data/expense_receipts/receipt.jpg")


def test_parse_license_plate_with_wrong_token(dummy_client):
with pytest.raises(Exception):
with pytest.raises(HTTPException):
dummy_client.parse_license_plate("./tests/data/license_plates/plate.png")


Expand All @@ -147,26 +148,33 @@ def test_response_with_passport_type():


def test_request_with_filepath(dummy_client):
with pytest.raises(Exception):
with pytest.raises(HTTPException):
dummy_client.parse_receipt(
"./tests/data/expense_receipts/receipt.jpg", input_type="path"
)


def test_request_with_file(dummy_client):
with pytest.raises(Exception):
with pytest.raises(HTTPException):
dummy_client.parse_receipt(
open("./tests/data/expense_receipts/receipt.jpg"), input_type="file"
open("./tests/data/expense_receipts/receipt.jpg", "rb"), input_type="stream"
)


def test_request_with_base64(dummy_client):
def test_request_with_base64_no_filename(dummy_client):
with open("./tests/data/expense_receipts/receipt.txt", "r") as fh:
b64 = fh.read()
with pytest.raises(Exception):
with pytest.raises(AssertionError):
dummy_client.parse_receipt(b64, input_type="base64")


def test_request_with_base64(dummy_client):
with open("./tests/data/expense_receipts/receipt.txt", "r") as fh:
b64 = fh.read()
with pytest.raises(HTTPException):
dummy_client.parse_receipt(b64, input_type="base64", filename="receipt.txt")


def test_request_without_raise_on_error(dummy_client_dont_raise):
result = dummy_client_dont_raise.parse_receipt(
"./tests/data/expense_receipts/receipt.jpg", input_type="path"
Expand All @@ -186,13 +194,13 @@ def test_request_without_raise_on_error_include_words(dummy_client_dont_raise):


def test_request_with_file_wrong_type(dummy_client):
with pytest.raises(Exception):
with pytest.raises(AssertionError):
dummy_client.parse_receipt(open("./tests/data/test.txt"), input_type="file")

with pytest.raises(Exception):
with pytest.raises(AssertionError):
dummy_client.parse_receipt("./tests/data/test.txt", input_type="path")


def test_mpdf_reconstruct(dummy_client):
with pytest.raises(Exception):
def test_pdf_reconstruct(dummy_client):
with pytest.raises(HTTPException):
dummy_client.parse_invoice("./tests/data/invoices/invoice_6p.pdf")