Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions mindee/documents/financial_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def build_from_api_prediction(self, api_prediction, input_file, page_n=0):
self.company_number = invoice.company_number
self.orientation = invoice.orientation
self.total_tax = invoice.total_tax
self.time = Field({"value": None, "probability": 0.0})
self.time = Field({"value": None, "confidence": 0.0})
else:
receipt = Receipt(api_prediction, input_file, page_n=page_n)
self.orientation = receipt.orientation
Expand All @@ -154,7 +154,7 @@ def build_from_api_prediction(self, api_prediction, input_file, page_n=0):
self.merchant_name = receipt.merchant_name
self.time = receipt.time
self.total_tax = receipt.total_tax
self.invoice_number = Field({"value": None, "probability": 0.0})
self.invoice_number = Field({"value": None, "confidence": 0.0})
self.payment_details = []
self.company_number = []

Expand Down
14 changes: 6 additions & 8 deletions mindee/documents/invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def __init__(
self.total_incl = Amount(
{"value": total_incl}, value_key="value", page_n=page_n
)
self.date = Date({"value": invoice_date}, value_key="value", page_n=page_n)
self.invoice_date = Date(
{"value": invoice_date}, value_key="value", page_n=page_n
)
Expand Down Expand Up @@ -111,7 +110,7 @@ def __init__(
# Reconstruct extra fields
self._reconstruct()

def build_from_api_prediction(self, api_prediction, page_n=0):
def build_from_api_prediction(self, api_prediction: dict, page_n=0):
"""
:param api_prediction: Raw prediction from HTTP response
:param page_n: Page number for multi pages pdf input
Expand Down Expand Up @@ -149,7 +148,7 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
api_prediction["total_excl"], value_key="value", page_n=page_n
)
self.total_tax = Amount(
{"value": None, "probability": 0.0}, value_key="value", page_n=page_n
{"value": None, "confidence": 0.0}, value_key="value", page_n=page_n
)

def __str__(self) -> str:
Expand Down Expand Up @@ -364,7 +363,7 @@ def __reconstruct_total_incl_from_taxes_plus_excl(self):
[tax.value if tax.value is not None else 0 for tax in self.taxes]
)
+ self.total_excl.value,
"probability": Field.array_probability(self.taxes)
"confidence": Field.array_probability(self.taxes)
* self.total_excl.probability,
}
self.total_incl = Amount(total_incl, value_key="value", reconstructed=True)
Expand All @@ -388,7 +387,7 @@ def __reconstruct_total_excl_from_tcc_and_taxes(self):
- sum(
[tax.value if tax.value is not None else 0 for tax in self.taxes]
),
"probability": Field.array_probability(self.taxes)
"confidence": Field.array_probability(self.taxes)
* self.total_incl.probability,
}
self.total_excl = Amount(total_excl, value_key="value", reconstructed=True)
Expand All @@ -404,7 +403,7 @@ def __reconstruct_total_tax_from_tax_lines(self):
"value": sum(
[tax.value if tax.value is not None else 0 for tax in self.taxes]
),
"probability": Field.array_probability(self.taxes),
"confidence": Field.array_probability(self.taxes),
}
if total_tax["value"] > 0:
self.total_tax = Amount(
Expand All @@ -427,8 +426,7 @@ def __reconstruct_total_tax_from_incl_and_excl(self):

total_tax = {
"value": self.total_incl.value - self.total_excl.value,
"probability": self.total_incl.probability
* self.total_excl.probability,
"confidence": self.total_incl.probability * self.total_excl.probability,
}
if total_tax["value"] >= 0:
self.total_tax = Amount(
Expand Down
8 changes: 4 additions & 4 deletions mindee/documents/passport.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
Field(given_name, page_n=page_n)
for given_name in api_prediction["given_names"]
]
self.mrz = Field({"value": None, "probability": 0.0}, page_n=page_n)
self.full_name = Field({"value": None, "probability": 0.0}, page_n=page_n)
self.mrz = Field({"value": None, "confidence": 0.0}, page_n=page_n)
self.full_name = Field({"value": None, "confidence": 0.0}, page_n=page_n)

def __str__(self) -> str:
return (
Expand Down Expand Up @@ -322,7 +322,7 @@ def __reconstruct_mrz(self):
):
mrz = {
"value": self.mrz1.value + self.mrz2.value,
"probability": Field.array_probability(
"confidence": Field.array_probability(
[self.mrz1.probability, self.mrz2.probability]
),
}
Expand All @@ -342,7 +342,7 @@ def __reconstruct_full_name(self):
):
full_name = {
"value": self.given_names[0].value + " " + self.surname.value,
"probability": Field.array_probability(
"confidence": Field.array_probability(
[self.surname.probability, self.given_names[0].probability]
),
}
Expand Down
8 changes: 4 additions & 4 deletions mindee/documents/receipt.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,10 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
if str(page_n) != "-1":
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
self.total_tax = Amount(
{"value": None, "probability": 0.0}, value_key="value", page_n=page_n
{"value": None, "confidence": 0.0}, value_key="value", page_n=page_n
)
self.total_excl = Amount(
{"value": None, "probability": 0.0}, value_key="value", page_n=page_n
{"value": None, "confidence": 0.0}, value_key="value", page_n=page_n
)

@staticmethod
Expand Down Expand Up @@ -251,7 +251,7 @@ def __reconstruct_total_excl_from_tcc_and_taxes(self):
if len(self.taxes) and self.total_incl.value is not None:
total_excl = {
"value": self.total_incl.value - Field.array_sum(self.taxes),
"probability": Field.array_probability(self.taxes)
"confidence": Field.array_probability(self.taxes)
* self.total_incl.probability,
}
self.total_excl = Amount(total_excl, value_key="value", reconstructed=True)
Expand All @@ -267,7 +267,7 @@ def __reconstruct_total_tax(self):
"value": sum(
[tax.value if tax.value is not None else 0 for tax in self.taxes]
),
"probability": Field.array_probability(self.taxes),
"confidence": Field.array_probability(self.taxes),
}
if total_tax["value"] > 0:
self.total_tax = Amount(
Expand Down
8 changes: 5 additions & 3 deletions mindee/fields/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
class Field:
probability: float

def __init__(
self,
abstract_prediction,
Expand All @@ -25,9 +27,9 @@ def __init__(
else:
self.value = abstract_prediction[value_key]

if "probability" in abstract_prediction:
self.probability = abstract_prediction["probability"]
else:
try:
self.probability = float(abstract_prediction["confidence"])
except (KeyError, TypeError):
self.probability = 0.0

if "polygon" in abstract_prediction:
Expand Down
68 changes: 34 additions & 34 deletions tests/documents/test_financial_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,26 +111,26 @@ def test__str__receipt(financial_doc_from_receipt_object):
# Business tests from receipt
def test__receipt_reconstruct_total_excl_from_total_and_taxes_1(receipt_pred):
# no incl implies no reconstruct for total excl
receipt_pred["total_incl"] = {"value": "N/A", "probability": 0.0}
receipt_pred["taxes"] = [{"rate": 20, "value": 9.5, "probability": 0.9}]
receipt_pred["total_incl"] = {"value": "N/A", "confidence": 0.0}
receipt_pred["taxes"] = [{"rate": 20, "value": 9.5, "confidence": 0.9}]
financial_doc = FinancialDocument(receipt_pred)
assert financial_doc.total_excl.value is None


def test__receipt_reconstruct_total_excl_from_total_and_taxes_2(receipt_pred):
# no taxes implies no reconstruct for total excl
receipt_pred["total_incl"] = {"value": 12.54, "probability": 0.0}
receipt_pred["total_incl"] = {"value": 12.54, "confidence": 0.0}
receipt_pred["taxes"] = []
financial_doc = FinancialDocument(receipt_pred)
assert financial_doc.total_excl.value is None


def test__receipt_reconstruct_total_excl_from_total_and_taxes_3(receipt_pred):
# working example
receipt_pred["total_incl"] = {"value": 12.54, "probability": 0.5}
receipt_pred["total_incl"] = {"value": 12.54, "confidence": 0.5}
receipt_pred["taxes"] = [
{"rate": 20, "value": 0.5, "probability": 0.1},
{"rate": 10, "value": 4.25, "probability": 0.6},
{"rate": 20, "value": 0.5, "confidence": 0.1},
{"rate": 10, "value": 4.25, "confidence": 0.6},
]
financial_doc = FinancialDocument(receipt_pred)
assert financial_doc.total_excl.probability == 0.03
Expand All @@ -147,8 +147,8 @@ def test__receipt_reconstruct_total_tax_1(receipt_pred):
def test__receipt_reconstruct_total_tax_2(receipt_pred):
# working example
receipt_pred["taxes"] = [
{"rate": 20, "value": 10.2, "probability": 0.5},
{"rate": 10, "value": 40.0, "probability": 0.1},
{"rate": 20, "value": 10.2, "confidence": 0.5},
{"rate": 10, "value": 40.0, "confidence": 0.1},
]
financial_doc = FinancialDocument(receipt_pred)
assert financial_doc.total_tax.value == 50.2
Expand All @@ -157,10 +157,10 @@ def test__receipt_reconstruct_total_tax_2(receipt_pred):

def test__receipt_taxes_match_total_incl_1(receipt_pred):
# matching example
receipt_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
receipt_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
receipt_pred["taxes"] = [
{"rate": 20, "value": 10.99, "probability": 0.5},
{"rate": 10, "value": 40.12, "probability": 0.1},
{"rate": 20, "value": 10.99, "confidence": 0.5},
{"rate": 10, "value": 40.12, "confidence": 0.1},
]
financial_doc = FinancialDocument(receipt_pred)
assert financial_doc.checklist["taxes_match_total_incl"] is True
Expand All @@ -171,46 +171,46 @@ def test__receipt_taxes_match_total_incl_1(receipt_pred):

def test__receipt_taxes_match_total_incl_2(receipt_pred):
# not matching example with close error
receipt_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
receipt_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
receipt_pred["taxes"] = [
{"rate": 20, "value": 10.9, "probability": 0.5},
{"rate": 10, "value": 40.12, "probability": 0.1},
{"rate": 20, "value": 10.9, "confidence": 0.5},
{"rate": 10, "value": 40.12, "confidence": 0.1},
]
financial_doc = FinancialDocument(receipt_pred)
assert financial_doc.checklist["taxes_match_total_incl"] is False


def test__receipt_taxes_match_total_incl_3(receipt_pred):
# sanity check with null tax
receipt_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
receipt_pred["taxes"] = [{"rate": 20, "value": 0.0, "probability": 0.5}]
receipt_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
receipt_pred["taxes"] = [{"rate": 20, "value": 0.0, "confidence": 0.5}]
financial_doc = FinancialDocument(receipt_pred)
assert financial_doc.checklist["taxes_match_total_incl"] is False


# Business tests from invoice
def test__invoice_reconstruct_total_excl_from_total_and_taxes_1(invoice_pred):
# no incl implies no reconstruct for total excl
invoice_pred["total_incl"] = {"amount": "N/A", "probability": 0.0}
invoice_pred["taxes"] = [{"rate": 20, "amount": 9.5, "probability": 0.9}]
invoice_pred["total_incl"] = {"amount": "N/A", "confidence": 0.0}
invoice_pred["taxes"] = [{"rate": 20, "amount": 9.5, "confidence": 0.9}]
financial_doc = FinancialDocument(invoice_pred)
assert financial_doc.total_excl.value is None


def test__invoice_reconstruct_total_excl_from_total_and_taxes_2(invoice_pred):
# no taxes implies no reconstruct for total excl
invoice_pred["total_incl"] = {"amount": 12.54, "probability": 0.0}
invoice_pred["total_incl"] = {"amount": 12.54, "confidence": 0.0}
invoice_pred["taxes"] = []
financial_doc = FinancialDocument(invoice_pred)
assert financial_doc.total_excl.value is None


def test__invoice_reconstruct_total_excl_from_total_and_taxes_3(invoice_pred):
# working example
invoice_pred["total_incl"] = {"value": 12.54, "probability": 0.5}
invoice_pred["total_incl"] = {"value": 12.54, "confidence": 0.5}
invoice_pred["taxes"] = [
{"rate": 20, "value": 0.5, "probability": 0.1},
{"rate": 10, "value": 4.25, "probability": 0.6},
{"rate": 20, "value": 0.5, "confidence": 0.1},
{"rate": 10, "value": 4.25, "confidence": 0.6},
]
financial_doc = FinancialDocument(invoice_pred)
assert financial_doc.total_excl.probability == 0.03
Expand All @@ -227,8 +227,8 @@ def test__invoice_reconstruct_total_tax_1(invoice_pred):
def test__invoice_reconstruct_total_tax_2(invoice_pred):
# working example
invoice_pred["taxes"] = [
{"rate": 20, "value": 10.2, "probability": 0.5},
{"rate": 10, "value": 40.0, "probability": 0.1},
{"rate": 20, "value": 10.2, "confidence": 0.5},
{"rate": 10, "value": 40.0, "confidence": 0.1},
]
financial_doc = FinancialDocument(invoice_pred)
assert financial_doc.total_tax.value == 50.2
Expand All @@ -237,10 +237,10 @@ def test__invoice_reconstruct_total_tax_2(invoice_pred):

def test__invoice_taxes_match_total_incl_1(invoice_pred):
# matching example
invoice_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
invoice_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
invoice_pred["taxes"] = [
{"rate": 20, "value": 10.99, "probability": 0.5},
{"rate": 10, "value": 40.12, "probability": 0.1},
{"rate": 20, "value": 10.99, "confidence": 0.5},
{"rate": 10, "value": 40.12, "confidence": 0.1},
]
financial_doc = FinancialDocument(invoice_pred)
assert financial_doc.checklist["taxes_match_total_incl"] is True
Expand All @@ -251,27 +251,27 @@ def test__invoice_taxes_match_total_incl_1(invoice_pred):

def test__invoice_taxes_match_total_incl_2(invoice_pred):
# not matching example with close error
invoice_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
invoice_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
invoice_pred["taxes"] = [
{"rate": 20, "value": 10.9, "probability": 0.5},
{"rate": 10, "value": 40.12, "probability": 0.1},
{"rate": 20, "value": 10.9, "confidence": 0.5},
{"rate": 10, "value": 40.12, "confidence": 0.1},
]
financial_doc = FinancialDocument(invoice_pred)
assert financial_doc.checklist["taxes_match_total_incl"] is False


def test__invoice_taxes_match_total_incl_3(invoice_pred):
# sanity check with null tax
invoice_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
invoice_pred["taxes"] = [{"rate": 20, "value": 0.0, "probability": 0.5}]
invoice_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
invoice_pred["taxes"] = [{"rate": 20, "value": 0.0, "confidence": 0.5}]
financial_doc = FinancialDocument(invoice_pred)
assert financial_doc.checklist["taxes_match_total_incl"] is False


def test__shouldnt_raise_when_tax_rate_none(invoice_pred):
# sanity check with null tax
invoice_pred["total_incl"] = {"value": 507.25, "probability": 0.6}
invoice_pred["taxes"] = [{"rate": "N/A", "value": 0.0, "probability": 0.5}]
invoice_pred["total_incl"] = {"value": 507.25, "confidence": 0.6}
invoice_pred["taxes"] = [{"rate": "N/A", "value": 0.0, "confidence": 0.5}]
financial_doc = FinancialDocument(invoice_pred)
assert financial_doc.checklist["taxes_match_total_incl"] is False

Expand Down
Loading