Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,6 @@ dmypy.json
# Pyre type checker
.pyre/
/data/

# Mac OS
.DS_Store
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Mindee python SDK

## v1.2.0 (2020-08-25)

### Chg

* :sparkles: Adapted SDK to the new Mindee API endpoint
* :zap: Single page object reconstruction is now server-side
* :heavy_minus_sign: Removed Numpy dependency
* :white_check_mark: Updated tests with new data

## v1.1.3 (2020-02-21)

### Fix
Expand Down
78 changes: 31 additions & 47 deletions mindee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
from mindee.documents.passport import Passport
from mindee.benchmark import Benchmark

DOCUMENT_CLASSES = {
"receipt": Receipt,
"invoice": Invoice,
"financial_document": FinancialDocument,
"passport": Passport,
"license_plate": CarPlate
}


class Client(object):
def __init__(
Expand All @@ -29,7 +37,7 @@ def __init__(
"""
assert type(raise_on_error) == bool
self.raise_on_error = raise_on_error
self.base_url = "https://api.mindee.net/products/"
self.base_url = "https://api.mindee.net/v1/products/mindee/"
self.expense_receipt_token = expense_receipt_token
self.invoice_token = invoice_token
self.passport_token = passport_token
Expand Down Expand Up @@ -79,10 +87,11 @@ def _wrap_response(
:return: Full response object
"""
dict_response = response.json()
if response.status_code != 200 and self.raise_on_error:

if response.status_code > 201 and self.raise_on_error:
raise HTTPException(
"Receipt API %s HTTP error: %s" % (response.status_code, json.dumps(dict_response)))
elif response.status_code != 200:
elif response.status_code > 201:
return Response(
http_response=dict_response,
pages=[],
Expand Down Expand Up @@ -288,55 +297,30 @@ def format_response(json_response, document_type, input_file):
json_response["filepath"] = input_file.filepath
json_response["file_extension"] = input_file.file_extension
pages = []
for page_n, page_prediction in enumerate(json_response["predictions"]):
if document_type == "receipt":
pages.append(
Receipt(
api_prediction=page_prediction,
input_file=input_file,
page_n=page_n
)
)
elif document_type == "invoice":
pages.append(
Invoice(
api_prediction=page_prediction,
input_file=input_file,
page_n=page_n
)
)
elif document_type == "financial_document":
pages.append(
FinancialDocument(
api_prediction=page_prediction,
input_file=input_file,
page_n=page_n
)
)
elif document_type == "passport":
pages.append(
Passport(
api_prediction=page_prediction,
input_file=input_file,
page_n=page_n
)
)
elif document_type == "license_plate":
pages.append(
CarPlate(
api_prediction=page_prediction,
input_file=input_file,
page_n=page_n
)

if document_type not in DOCUMENT_CLASSES.keys():
raise Exception("Document type not supported.")

# Create page level objects
for page_n, page_prediction in enumerate(json_response["document"]["inference"]["pages"]):
pages.append(
DOCUMENT_CLASSES[document_type](
api_prediction=page_prediction["prediction"],
input_file=input_file,
page_n=page_prediction["id"]
)
else:
raise Exception("Document type not supported.")
)

document = Document.merge_pages(pages)
# Create the document level object
document_level = DOCUMENT_CLASSES[document_type](
api_prediction=json_response["document"]["inference"]["prediction"],
input_file=input_file,
page_n="-1"
)

return Response(
http_response=json_response,
pages=pages,
document=document,
document=document_level,
document_type=document_type
)
21 changes: 0 additions & 21 deletions mindee/documents/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import copy


class Document(object):
def __init__(self, input_file=None):
self.filepath = None
Expand All @@ -24,21 +21,3 @@ def _reconstruct(self, *args):

def all_checks(self):
return all(self.checklist)

@staticmethod
def merge_pages(page_documents):
"""
:param page_documents: Document object list
:return: A single Document where each field is set with the maximum probability field
"""
document = copy.deepcopy(page_documents[0])
attributes = [a for a in dir(document)]
for doc in page_documents:
for attribute in attributes:
if not hasattr(getattr(doc, attribute), "probability"):
continue

if getattr(doc, attribute).probability > getattr(document, attribute).probability:
setattr(document, attribute, getattr(doc, attribute))

return document
14 changes: 11 additions & 3 deletions mindee/documents/financial_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,26 @@ def build_from_api_prediction(self, api_prediction, input_file, page_n=0):
self.company_number = []

def __str__(self):
return "-----Financial document-----\n" \
return "-----Financial Document data-----\n" \
"Filename: %s \n" \
"Total amount: %s \n" \
"Invoice number: %s \n" \
"Total amount including taxes: %s \n" \
"Total amount excluding taxes: %s \n" \
"Date: %s\n" \
"Merchant name: %s\n" \
"Invoice due date: %s\n" \
"Supplier name: %s\n" \
"Taxes: %s\n" \
"Total taxes: %s\n" \
"----------------------" % \
(
self.filename,
self.invoice_number.value,
self.total_incl.value,
self.total_excl.value,
self.date.value,
self.due_date.value,
self.merchant_name.value,
",".join([str(t.value) + " " + str(t.rate) + "%" for t in self.taxes]),
self.total_tax.value
)

Expand Down
6 changes: 4 additions & 2 deletions mindee/documents/invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def __init__(
supplier=None,
payment_details=None,
company_number=None,
vat_number=None,
orientation=None,
total_tax=None,
page_n=0
Expand Down Expand Up @@ -106,7 +105,8 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
self.due_date = Date(api_prediction["due_date"], value_key="value", page_n=page_n)
self.invoice_number = Field(api_prediction["invoice_number"], page_n=page_n)
self.locale = Locale(api_prediction["locale"], value_key="language", page_n=page_n)
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
if str(page_n) != "-1":
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
self.supplier = Field(api_prediction["supplier"], page_n=page_n)
self.taxes = [
Tax(tax_prediction, page_n=page_n, value_key="value") for tax_prediction in api_prediction["taxes"]
Expand All @@ -128,6 +128,7 @@ def __str__(self):
"Total amount including taxes: %s \n" \
"Total amount excluding taxes: %s \n" \
"Invoice date: %s\n" \
"Invoice due date: %s\n" \
"Supplier name: %s\n" \
"Taxes: %s\n" \
"Total taxes: %s\n" \
Expand All @@ -138,6 +139,7 @@ def __str__(self):
self.total_incl.value,
self.total_excl.value,
self.invoice_date.value,
self.due_date.value,
self.supplier.value,
",".join([str(t.value) + " " + str(t.rate) + "%" for t in self.taxes]),
self.total_tax.value
Expand Down
3 changes: 2 additions & 1 deletion mindee/documents/receipt.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
self.taxes = [
Tax(tax_prediction, page_n=page_n, value_key="value", rate_key="rate", code_key="code")
for tax_prediction in api_prediction["taxes"]]
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
if str(page_n) != "-1":
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
self.total_tax = Amount({"value": None, "probability": 0.}, value_key="value", page_n=page_n)
self.total_excl = Amount({"value": None, "probability": 0.}, value_key="value", page_n=page_n)

Expand Down
4 changes: 2 additions & 2 deletions mindee/fields/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def __init__(
else:
self.probability = 0.

if "segmentation" in abstract_prediction:
self.bbox = abstract_prediction["segmentation"]["bounding_box"]
if "polygon" in abstract_prediction:
self.bbox = abstract_prediction["polygon"]
else:
self.bbox = []

Expand Down
4 changes: 2 additions & 2 deletions mindee/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def request(url, input_file, token, include_words=False):
"""
input_file.file_object.seek(0)

files = {"file": input_file.file_object.read()}
files = {"document": input_file.file_object.read()}

headers = {"X-Inferuser-Token": token}

Expand All @@ -20,7 +20,7 @@ def request(url, input_file, token, include_words=False):
params["include_mvision"] = "true"

response = requests.post(
url+"?include_mvision=True",
url,
files=files,
headers=headers,
data=params
Expand Down
20 changes: 12 additions & 8 deletions mindee/plots.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
import matplotlib.pyplot as plt


Expand All @@ -17,30 +16,35 @@ def autolabel(ax, rects):
ha='center', va='bottom', rotation=90)


def plot_metrics(metrics, accuracies, precisions, save_path):
def plot_metrics(metrics, accuracies, precisions, save_path, savefig=True):
"""
:param savefig: Boolean to specify whether saving the plot as a png file or not
:param metrics: List of metrics names
:param accuracies: List of accuracy values
:param precisions: List of precision values
:param save_path: Path to save the figure
:return: (void) plot the precision and accuracy bar charts and save the figure in save_path
:return: the plt object
"""
x = np.arange(len(metrics)) # the label locations
x_range = [float(k) for k in range(len(metrics))] # the label locations
width = 0.4 # the width of the bars

fig, ax = plt.subplots()
fig.subplots_adjust(bottom=0.15)
rects1 = ax.bar(x - width / 2, accuracies, width, color='#fd3246', label='Accuracy')
rects2 = ax.bar(x + width / 2, precisions, width, color='#007af9', label='Precision')
rects1 = ax.bar([x - width / 2 for x in x_range], accuracies, width, color='#fd3246', label='Accuracy')
rects2 = ax.bar([x + width / 2 for x in x_range], precisions, width, color='#007af9', label='Precision')

autolabel(ax, rects1)
autolabel(ax, rects2)

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('%')
ax.set_title('Metrics')
ax.set_xticks(x)
ax.set_xticks(x_range)
ax.set_xticklabels(metrics, rotation=45, fontsize=6)
ax.legend(loc='lower left')
plt.grid(True, linestyle='--', color='#e1e1e1', alpha=0.4)

plt.savefig(save_path, dpi=300)
if savefig:
plt.savefig(save_path, dpi=300)

return plt
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ requests~=2.23.0
pytz~=2021.1
setuptools~=49.2.0
matplotlib~=3.1.2
numpy~=1.18.5
PyMuPDF~=1.18.6
Loading