Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 43 additions & 90 deletions mindee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,6 @@
from mindee.documents.invoice import Invoice
from mindee.documents.passport import Passport

DOCUMENT_CLASSES = {
"receipt": Receipt,
"invoice": Invoice,
"financial_document": FinancialDocument,
"passport": Passport,
"license_plate": CarPlate,
}


class Client(object):
def __init__(
Expand All @@ -36,20 +28,13 @@ def __init__(
"""
assert type(raise_on_error) == bool
self.raise_on_error = raise_on_error
self.base_url = "https://api.mindee.net/v1/products/mindee/"
self.expense_receipt_token = expense_receipt_token
self.invoice_token = invoice_token
self.passport_token = passport_token
self.license_plate_token = license_plate_token

def parse_receipt(
self,
file,
input_type="path",
version="3",
cut_pdf=True,
include_words=False,
cut_pdf_mode=3,
self, file, input_type="path", cut_pdf=True, include_words=False, cut_pdf_mode=3
):
"""
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
Expand All @@ -60,7 +45,6 @@ def parse_receipt(
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
:param input_type: String in {'path', 'stream', 'base64'}
:param file: Receipt filepath (allowed jpg, png, tiff, pdf)
:param version: expense_receipt api version
:return: Wrapped response with Receipts objects parsed
"""
if not self.expense_receipt_token:
Expand All @@ -71,47 +55,12 @@ def parse_receipt(
input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)

response = Receipt.request(
input_file,
self.base_url,
self.expense_receipt_token,
version,
include_words,
input_file, self.expense_receipt_token, include_words,
)

return self._wrap_response(input_file, response, "receipt")

def _wrap_response(self, input_file, response, document_type):
"""
:param input_file: Input object
:param response: HTTP response
:param document_type: Document class in {"receipt", "invoice", "financial_document", "passport", "license_plate"}
:return: Full response object
"""
dict_response = response.json()

if response.status_code > 201 and self.raise_on_error:
raise HTTPException(
"Receipt API %s HTTP error: %s"
% (response.status_code, json.dumps(dict_response))
)
elif response.status_code > 201:
return Response(
http_response=dict_response,
pages=[],
document=None,
document_type=document_type,
)

return Response.format_response(dict_response, document_type, input_file)

def parse_passport(
self,
file,
input_type="path",
version="1",
cut_pdf=True,
cut_pdf_mode=3,
):
def parse_passport(self, file, input_type="path", cut_pdf=True, cut_pdf_mode=3):
"""
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
if 1: pages [0]
Expand All @@ -120,7 +69,6 @@ def parse_passport(
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
:param input_type: String in {'path', 'stream', 'base64'}
:param file: Passport filepath (allowed jpg, png, pdf)
:param version: passport api version
:return: Wrapped response with passports objects parsed
"""
if not self.passport_token:
Expand All @@ -130,19 +78,12 @@ def parse_passport(

input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)

response = Passport.request(
input_file, self.base_url, self.passport_token, version
)
response = Passport.request(input_file, self.passport_token)

return self._wrap_response(input_file, response, "passport")

def parse_license_plate(
self,
file,
input_type="path",
version="1",
cut_pdf=True,
cut_pdf_mode=3,
self, file, input_type="path", cut_pdf=True, cut_pdf_mode=3
):
"""
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
Expand All @@ -152,7 +93,6 @@ def parse_license_plate(
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
:param input_type: String in {'path', 'stream', 'base64'}
:param file: CarPlate filepath (allowed jpg, png, pdf)
:param version: license_plates api version
:return: Wrapped response with CarPlates objects parsed
"""
if not self.license_plate_token:
Expand All @@ -162,20 +102,12 @@ def parse_license_plate(

input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)

response = CarPlate.request(
input_file, self.base_url, self.license_plate_token, version
)
response = CarPlate.request(input_file, self.license_plate_token)

return self._wrap_response(input_file, response, "license_plate")

def parse_invoice(
self,
file,
input_type="path",
version="2",
cut_pdf=True,
include_words=False,
cut_pdf_mode=3,
self, file, input_type="path", cut_pdf=True, include_words=False, cut_pdf_mode=3
):
"""
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
Expand All @@ -186,27 +118,19 @@ def parse_invoice(
:param cut_pdf: Automatically reconstruct pdf with more than 4 pages
:param input_type: String in {'path', 'stream', 'base64'}
:param file: Invoice filepath (allowed jpg, png, pdf)
:param version: invoices api version
:return: Wrapped response with Invoices objects parsed
"""
if not self.invoice_token:
raise Exception("Missing 'invoice_token' arg in parse_invoice() function.")

input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)

response = Invoice.request(
input_file, self.base_url, self.invoice_token, version, include_words
)
response = Invoice.request(input_file, self.invoice_token, include_words)

return self._wrap_response(input_file, response, "invoice")

def parse_financial_document(
self,
file,
input_type="path",
cut_pdf=True,
include_words=False,
cut_pdf_mode=3,
self, file, input_type="path", cut_pdf=True, include_words=False, cut_pdf_mode=3
):
"""
:param cut_pdf_mode: Number (between 1 and 3 incl.) of pages to reconstruct a pdf with.
Expand All @@ -227,15 +151,44 @@ def parse_financial_document(
input_file = Inputs(file, input_type, cut_pdf=cut_pdf, n_pdf_pages=cut_pdf_mode)

response = FinancialDocument.request(
input_file,
self.base_url,
self.expense_receipt_token,
self.invoice_token,
include_words,
input_file, self.expense_receipt_token, self.invoice_token, include_words,
)

return self._wrap_response(input_file, response, "financial_document")

def _wrap_response(self, input_file, response, document_type):
"""
:param input_file: Input object
:param response: HTTP response
:param document_type: Document class in {"receipt", "invoice", "financial_document", "passport", "license_plate"}
:return: Full response object
"""
dict_response = response.json()

if response.status_code > 201 and self.raise_on_error:
raise HTTPException(
"Receipt API %s HTTP error: %s"
% (response.status_code, json.dumps(dict_response))
)
elif response.status_code > 201:
return Response(
http_response=dict_response,
pages=[],
document=None,
document_type=document_type,
)

return Response.format_response(dict_response, document_type, input_file)


DOCUMENT_CLASSES = {
"receipt": Receipt,
"invoice": Invoice,
"financial_document": FinancialDocument,
"passport": Passport,
"license_plate": CarPlate,
}


class Response(object):
def __init__(
Expand Down
20 changes: 8 additions & 12 deletions mindee/documents/car_plate.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from mindee.documents import Document
from mindee.fields import Field
from mindee.http import request
import os
from mindee.http import make_api_url, make_api_request


class CarPlate(Document):

ENDPOINT = "license_plates"
VERSION = "1"

def __init__(
self, api_prediction=None, input_file=None, license_plates=None, page_n=0
):
Expand Down Expand Up @@ -78,16 +81,9 @@ def compare(license_plate=None, ground_truth=None):
return metrics

@staticmethod
def request(input_file, base_url, license_plates_token=None, version="1"):
"""
Make request to license_plates endpoint
:param input_file: Input object
:param base_url: API base URL
:param license_plates_token: License plate API token
:param version: API version
"""
url = os.path.join(base_url, "license_plates", "v" + version, "predict")
return request(url, input_file, license_plates_token)
def request(input_file, token=None):
url = make_api_url(CarPlate.ENDPOINT, CarPlate.VERSION)
return make_api_request(url, input_file, token)

def _checklist(self):
"""
Expand Down
24 changes: 11 additions & 13 deletions mindee/documents/financial_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
from mindee.fields.tax import Tax
from mindee.documents import Document
from mindee.fields import Field
from mindee.http import request
from mindee.http import make_api_url, make_api_request
from mindee.documents.invoice import Invoice
from mindee.documents.receipt import Receipt
import os


class FinancialDocument(Document):
Expand Down Expand Up @@ -202,32 +201,31 @@ def compare(financial_document=None, ground_truth=None):
# Compute Accuracy metrics
metrics.update(
FinancialDocument.compute_accuracy(financial_document, ground_truth)

)


return metrics

@staticmethod
def request(
input_file,
base_url,
expense_receipt_token=None,
invoice_token=None,
include_words=False,
input_file, expense_receipt_token=None, invoice_token=None, include_words=False,
):
"""
Make request to invoices endpoint if .pdf, expense_receipts otherwise
:param include_words: Bool, extract all words into http_response
:param input_file: Input object
:param base_url: API base URL
:param expense_receipt_token: Expense receipts API token
:param invoice_token: Invoices API token
:param include_words: Bool, extract all words into http_response
"""
if "pdf" in input_file.file_extension:
url = os.path.join(base_url, "invoices", "v2", "predict")
return request(url, input_file, invoice_token, include_words)
url = make_api_url(Invoice.ENDPOINT, Invoice.VERSION)
return make_api_request(url, input_file, invoice_token, include_words)
else:
url = os.path.join(base_url, "expense_receipts", "v3", "predict")
return request(url, input_file, expense_receipt_token, include_words)
url = make_api_url(Receipt.ENDPOINT, Receipt.VERSION)
return make_api_request(
url, input_file, expense_receipt_token, include_words
)

def _checklist(self):
"""
Expand Down
20 changes: 9 additions & 11 deletions mindee/documents/invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
from mindee.fields.orientation import Orientation
from mindee.fields.payment_details import PaymentDetails
from mindee.fields.tax import Tax
from mindee.http import request
import os
from mindee.http import make_api_request, make_api_url


class Invoice(Document):

ENDPOINT = "invoices"
VERSION = "2"

def __init__(
self,
api_prediction=None,
Expand Down Expand Up @@ -42,7 +45,6 @@ def __init__(
:param supplier: supplier value for creating Invoice object from scratch
:param payment_details: payment_details value for creating Invoice object from scratch
:param company_number: company_number value for creating Invoice object from scratch
:param vat_number: vat_number value for creating Invoice object from scratch
:param orientation: orientation value for creating Invoice object from scratch
:param total_tax: total_tax value for creating Invoice object from scratch
:param page_n: Page number for multi pages pdf input
Expand Down Expand Up @@ -199,19 +201,15 @@ def compare(invoice=None, ground_truth=None):
return metrics

@staticmethod
def request(
input_file, base_url, invoice_token=None, version="2", include_words=False
):
def request(input_file, token=None, include_words=False):
"""
Make request to invoices endpoint
:param input_file: Input object
:param base_url: API base URL
:param invoice_token: Invoices API token
:param token: API token
:param include_words: Include Mindee vision words in http_response
:param version: API version
"""
url = os.path.join(base_url, "invoices", "v" + version, "predict")
return request(url, input_file, invoice_token, include_words)
url = make_api_url(Invoice.ENDPOINT, Invoice.VERSION)
return make_api_request(url, input_file, token, include_words)

def _reconstruct(self):
"""
Expand Down
Loading