diff --git a/docs/extras/code_samples/bank_check_v1.txt b/docs/extras/code_samples/bank_check_v1.txt index 092b4cee..402bcbd8 100644 --- a/docs/extras/code_samples/bank_check_v1.txt +++ b/docs/extras/code_samples/bank_check_v1.txt @@ -6,7 +6,7 @@ mindee_client = Client(api_key="my-api-key") # Load a file from disk input_doc = mindee_client.doc_from_path("/path/to/the/file.ext") -# Parse the US Bank Check Details by passing the appropriate type +# Parse the Bank Check by passing the appropriate type result = input_doc.parse(documents.us.TypeBankCheckV1) # Print a brief summary of the parsed data diff --git a/docs/predictions/standard/documents/us/bank_check_v1.rst b/docs/predictions/standard/documents/us/bank_check_v1.rst index a297b8b9..acbc8b5e 100644 --- a/docs/predictions/standard/documents/us/bank_check_v1.rst +++ b/docs/predictions/standard/documents/us/bank_check_v1.rst @@ -8,4 +8,3 @@ Bank Check V1 .. autoclass:: mindee.documents.us.BankCheckV1 :members: - :undoc-members: diff --git a/mindee/documents/cropper/cropper_v1.py b/mindee/documents/cropper/cropper_v1.py index 8a89daa3..0bbf9aad 100644 --- a/mindee/documents/cropper/cropper_v1.py +++ b/mindee/documents/cropper/cropper_v1.py @@ -7,19 +7,18 @@ class CropperV1(Document): """Cropper v1 prediction results.""" - cropping: List[PositionField] - """List of all detected cropped elements in the image""" + cropping: List[PositionField] = [] + """List of documents found in the image.""" def __init__( self, - api_prediction: TypeApiPrediction, + api_prediction=None, input_source=None, page_n: Optional[int] = None, ): """ - Custom document object. + Cropper v1 prediction results. - :param document_type: Document type :param api_prediction: Raw prediction from HTTP response :param input_source: Input object :param page_n: Page number for multi pages pdf input @@ -35,27 +34,33 @@ def __init__( def _build_from_api_prediction( self, api_prediction: TypeApiPrediction, page_n: Optional[int] = None ) -> None: - """Build the document from an API response JSON.""" - self.cropping = [] + """ + Build the object from the prediction API JSON. - # cropping is only present on pages + :param api_prediction: Raw prediction from HTTP response + :param page_n: Page number + """ if page_n is None: return - for crop in api_prediction["cropping"]: - self.cropping.append(PositionField(prediction=crop)) + self.cropping = [ + PositionField(prediction, page_id=page_n) + for prediction in api_prediction["cropping"] + ] - def _checklist(self) -> None: - pass - - def __str__(self): - cropping = "\n ".join([str(crop) for crop in self.cropping]) + def __str__(self) -> str: + cropping = f"\n { ' ' * 18 }".join( + [str(item) for item in self.cropping], + ) return clean_out_string( - "----- Cropper Data -----\n" - f"Filename: {self.filename or ''}\n" - f"Cropping: {cropping}\n" - "------------------------" + "Cropper V1 Prediction\n" + "=====================\n" + f":Filename: {self.filename or ''}\n" + f":Document Cropper: {cropping}\n" ) -TypeCropperV1 = TypeVar("TypeCropperV1", bound=CropperV1) +TypeCropperV1 = TypeVar( + "TypeCropperV1", + bound=CropperV1, +) diff --git a/mindee/documents/custom/custom_v1.py b/mindee/documents/custom/custom_v1.py index 60211322..95688ba0 100644 --- a/mindee/documents/custom/custom_v1.py +++ b/mindee/documents/custom/custom_v1.py @@ -57,12 +57,14 @@ def _build_from_api_prediction( self.fields[field_name] = ListField(prediction=field, page_n=page_n) def __str__(self) -> str: - custom_doc_str = f"----- {self.type} -----\nFilename: {self.filename or ''}\n" + custom_doc_str = f"{self.type} V1 Prediction" + custom_doc_str += "\n" + "=" * len(custom_doc_str) + custom_doc_str += f"\n:Filename: {self.filename or ''}\n" + for class_name, class_info in self.classifications.items(): - custom_doc_str += f"{class_name}: {class_info}\n" + custom_doc_str += f":{class_name}: {class_info}\n" for field_name, field_info in self.fields.items(): - custom_doc_str += f"{field_name}: {field_info}\n" - custom_doc_str += "----------------------" + custom_doc_str += f":{field_name}: {field_info}\n" return clean_out_string(custom_doc_str) diff --git a/mindee/documents/eu/license_plate/license_plate_v1.py b/mindee/documents/eu/license_plate/license_plate_v1.py index 855f4fea..567951fb 100644 --- a/mindee/documents/eu/license_plate/license_plate_v1.py +++ b/mindee/documents/eu/license_plate/license_plate_v1.py @@ -46,15 +46,18 @@ def _build_from_api_prediction( ] def __str__(self) -> str: - license_plates = f"\n { ' ' * 15 }".join( - [str(item) for item in self.license_plates] + license_plates = f"\n { ' ' * 16 }".join( + [str(item) for item in self.license_plates], ) return clean_out_string( - "----- EU License Plate V1 -----\n" - f"Filename: {self.filename or ''}\n" - f"License Plates: { license_plates }\n" - "----------------------" + "EU License Plate V1 Prediction\n" + "==============================\n" + f":Filename: {self.filename or ''}\n" + f":License Plates: {license_plates}\n" ) -TypeLicensePlateV1 = TypeVar("TypeLicensePlateV1", bound=LicensePlateV1) +TypeLicensePlateV1 = TypeVar( + "TypeLicensePlateV1", + bound=LicensePlateV1, +) diff --git a/mindee/documents/financial_document/financial_document_v1.py b/mindee/documents/financial_document/financial_document_v1.py index 847b99b2..87f3aeb1 100644 --- a/mindee/documents/financial_document/financial_document_v1.py +++ b/mindee/documents/financial_document/financial_document_v1.py @@ -217,16 +217,16 @@ def _line_items_to_str(self) -> str: return out_str def __str__(self) -> str: - customer_company_registrations = f"\n { ' ' * 31 }".join( + customer_company_registrations = f"\n { ' ' * 32 }".join( [str(item) for item in self.customer_company_registrations], ) - reference_numbers = f"\n { ' ' * 18 }".join( + reference_numbers = f"\n { ' ' * 19 }".join( [str(item) for item in self.reference_numbers], ) - supplier_company_registrations = f"\n { ' ' * 31 }".join( + supplier_company_registrations = f"\n { ' ' * 32 }".join( [str(item) for item in self.supplier_company_registrations], ) - supplier_payment_details = f"\n { ' ' * 25 }".join( + supplier_payment_details = f"\n { ' ' * 26 }".join( [str(item) for item in self.supplier_payment_details], ) return clean_out_string( @@ -259,4 +259,7 @@ def __str__(self) -> str: ) -TypeFinancialDocumentV1 = TypeVar("TypeFinancialDocumentV1", bound=FinancialDocumentV1) +TypeFinancialDocumentV1 = TypeVar( + "TypeFinancialDocumentV1", + bound=FinancialDocumentV1, +) diff --git a/mindee/documents/fr/bank_account_details/bank_account_details_v1.py b/mindee/documents/fr/bank_account_details/bank_account_details_v1.py index 9597e794..aa23475d 100644 --- a/mindee/documents/fr/bank_account_details/bank_account_details_v1.py +++ b/mindee/documents/fr/bank_account_details/bank_account_details_v1.py @@ -7,10 +7,10 @@ class BankAccountDetailsV1(Document): """Bank Account Details v1 prediction results.""" - iban: TextField - """The International Bank Account Number (IBAN).""" account_holder_name: TextField """The name of the account holder as seen on the document.""" + iban: TextField + """The International Bank Account Number (IBAN).""" swift: TextField """The bank's SWIFT Business Identifier Code (BIC).""" @@ -44,14 +44,14 @@ def _build_from_api_prediction( :param api_prediction: Raw prediction from HTTP response :param page_n: Page number """ - self.iban = TextField( - api_prediction["iban"], - page_id=page_n, - ) self.account_holder_name = TextField( api_prediction["account_holder_name"], page_id=page_n, ) + self.iban = TextField( + api_prediction["iban"], + page_id=page_n, + ) self.swift = TextField( api_prediction["swift"], page_id=page_n, @@ -59,15 +59,16 @@ def _build_from_api_prediction( def __str__(self) -> str: return clean_out_string( - "----- FR Bank Account Details V1 -----\n" - f"Filename: {self.filename or ''}\n" - f"IBAN: { self.iban }\n" - f"Account Holder's Name: { self.account_holder_name }\n" - f"SWIFT Code: { self.swift }\n" - "----------------------" + "FR Bank Account Details V1 Prediction\n" + "=====================================\n" + f":Filename: {self.filename or ''}\n" + f":IBAN: {self.iban}\n" + f":Account Holder's Name: {self.account_holder_name}\n" + f":SWIFT Code: {self.swift}\n" ) TypeBankAccountDetailsV1 = TypeVar( - "TypeBankAccountDetailsV1", bound=BankAccountDetailsV1 + "TypeBankAccountDetailsV1", + bound=BankAccountDetailsV1, ) diff --git a/mindee/documents/fr/bank_account_details/bank_account_details_v2.py b/mindee/documents/fr/bank_account_details/bank_account_details_v2.py index 0b557b2b..0847d852 100644 --- a/mindee/documents/fr/bank_account_details/bank_account_details_v2.py +++ b/mindee/documents/fr/bank_account_details/bank_account_details_v2.py @@ -78,5 +78,6 @@ def __str__(self) -> str: TypeBankAccountDetailsV2 = TypeVar( - "TypeBankAccountDetailsV2", bound=BankAccountDetailsV2 + "TypeBankAccountDetailsV2", + bound=BankAccountDetailsV2, ) diff --git a/mindee/documents/fr/carte_vitale/carte_vitale_v1.py b/mindee/documents/fr/carte_vitale/carte_vitale_v1.py index 4837d59c..d3f3f7b5 100644 --- a/mindee/documents/fr/carte_vitale/carte_vitale_v1.py +++ b/mindee/documents/fr/carte_vitale/carte_vitale_v1.py @@ -10,12 +10,12 @@ class CarteVitaleV1(Document): given_names: List[TextField] """The given name(s) of the card holder.""" - surname: TextField - """The surname of the card holder.""" - social_security: TextField - """The Social Security Number (Numéro de Sécurité Sociale) of the card holder""" issuance_date: DateField """The date the card was issued.""" + social_security: TextField + """The Social Security Number (Numéro de Sécurité Sociale) of the card holder""" + surname: TextField + """The surname of the card holder.""" def __init__( self, @@ -51,30 +51,35 @@ def _build_from_api_prediction( TextField(prediction, page_id=page_n) for prediction in api_prediction["given_names"] ] - self.surname = TextField( - api_prediction["surname"], + self.issuance_date = DateField( + api_prediction["issuance_date"], page_id=page_n, ) self.social_security = TextField( api_prediction["social_security"], page_id=page_n, ) - self.issuance_date = DateField( - api_prediction["issuance_date"], + self.surname = TextField( + api_prediction["surname"], page_id=page_n, ) def __str__(self) -> str: - given_names = "\n".join([str(item) for item in self.given_names]) + given_names = f"\n { ' ' * 15 }".join( + [str(item) for item in self.given_names], + ) return clean_out_string( - "----- FR Carte Vitale V1 -----\n" - f"Filename: {self.filename or ''}\n" - f"Given Name(s): { given_names }\n" - f"Surname: { self.surname }\n" - f"Social Security Number: { self.social_security }\n" - f"Issuance Date: { self.issuance_date }\n" - "----------------------" + "FR Carte Vitale V1 Prediction\n" + "=============================\n" + f":Filename: {self.filename or ''}\n" + f":Given Name(s): {given_names}\n" + f":Surname: {self.surname}\n" + f":Social Security Number: {self.social_security}\n" + f":Issuance Date: {self.issuance_date}\n" ) -TypeCarteVitaleV1 = TypeVar("TypeCarteVitaleV1", bound=CarteVitaleV1) +TypeCarteVitaleV1 = TypeVar( + "TypeCarteVitaleV1", + bound=CarteVitaleV1, +) diff --git a/mindee/documents/fr/id_card/id_card_v1.py b/mindee/documents/fr/id_card/id_card_v1.py index 8c5ccff6..995f7872 100644 --- a/mindee/documents/fr/id_card/id_card_v1.py +++ b/mindee/documents/fr/id_card/id_card_v1.py @@ -108,23 +108,28 @@ def _build_from_api_prediction( ) def __str__(self) -> str: - given_names = "\n".join([str(item) for item in self.given_names]) + given_names = f"\n { ' ' * 15 }".join( + [str(item) for item in self.given_names], + ) return clean_out_string( - "----- FR Carte Nationale d'Identité V1 -----\n" - f"Filename: {self.filename or ''}\n" - f"Document Side: { self.document_side }\n" - f"Identity Number: { self.id_number }\n" - f"Given Name(s): { given_names }\n" - f"Surname: { self.surname }\n" - f"Date of Birth: { self.birth_date }\n" - f"Place of Birth: { self.birth_place }\n" - f"Expiry Date: { self.expiry_date }\n" - f"Issuing Authority: { self.authority }\n" - f"Gender: { self.gender }\n" - f"MRZ Line 1: { self.mrz1 }\n" - f"MRZ Line 2: { self.mrz2 }\n" - "----------------------" + "FR Carte Nationale d'Identité V1 Prediction\n" + "===========================================\n" + f":Filename: {self.filename or ''}\n" + f":Document Side: {self.document_side}\n" + f":Identity Number: {self.id_number}\n" + f":Given Name(s): {given_names}\n" + f":Surname: {self.surname}\n" + f":Date of Birth: {self.birth_date}\n" + f":Place of Birth: {self.birth_place}\n" + f":Expiry Date: {self.expiry_date}\n" + f":Issuing Authority: {self.authority}\n" + f":Gender: {self.gender}\n" + f":MRZ Line 1: {self.mrz1}\n" + f":MRZ Line 2: {self.mrz2}\n" ) -TypeIdCardV1 = TypeVar("TypeIdCardV1", bound=IdCardV1) +TypeIdCardV1 = TypeVar( + "TypeIdCardV1", + bound=IdCardV1, +) diff --git a/mindee/documents/invoice_splitter/invoice_splitter_v1.py b/mindee/documents/invoice_splitter/invoice_splitter_v1.py index 2279d729..1e2610d5 100644 --- a/mindee/documents/invoice_splitter/invoice_splitter_v1.py +++ b/mindee/documents/invoice_splitter/invoice_splitter_v1.py @@ -19,7 +19,7 @@ def __init__(self, prediction: Dict[str, Any]): pass def __str__(self) -> str: - return f"page indexes: {', '.join([str(page_index) for page_index in self.page_indexes])}" + return f":Page indexes: {', '.join([str(page_index) for page_index in self.page_indexes])}" class InvoiceSplitterV1(Document): @@ -61,6 +61,7 @@ def _build_from_api_prediction( ] def __str__(self) -> str: + invoice_page_groups = "" if len(self.invoice_page_groups) > 0: invoice_page_groups = "\n " invoice_page_groups += f"\n{ ' ' * 2 }".join( @@ -68,10 +69,10 @@ def __str__(self) -> str: ) return clean_out_string( - f"----- Invoice Splitter V1 -----\n" - f"Filename: {self.filename or ''}\n" - f"Invoice Page Groups: {invoice_page_groups}\n" - f"----------------------" + "Invoice Splitter V1 Prediction\n" + "==============================\n" + f":Filename: {self.filename or ''}\n" + f":Invoice Page Groups: {invoice_page_groups}\n" ) diff --git a/mindee/documents/passport/passport_v1.py b/mindee/documents/passport/passport_v1.py index ed81d67e..6a1b3f18 100644 --- a/mindee/documents/passport/passport_v1.py +++ b/mindee/documents/passport/passport_v1.py @@ -120,27 +120,24 @@ def _build_from_api_prediction( self.full_name = TextField({"value": None, "confidence": 0.0}, page_id=page_n) def __str__(self) -> str: - given_names = " ".join( - [ - given_name.value if given_name.value is not None else "" - for given_name in self.given_names - ] + given_names = f"\n { ' ' * 15 }".join( + [str(item) for item in self.given_names], ) return clean_out_string( - "----- Passport V1 -----\n" - f"Filename: {self.filename or ''}\n" - f"Full name: {self.full_name}\n" - f"Given names: {given_names}\n" - f"Surname: {self.surname}\n" - f"Country: {self.country}\n" - f"ID Number: {self.id_number}\n" - f"Issuance date: {self.issuance_date}\n" - f"Birth date: {self.birth_date}\n" - f"Expiry date: {self.expiry_date}\n" - f"MRZ 1: {self.mrz1}\n" - f"MRZ 2: {self.mrz2}\n" - f"MRZ: {self.mrz}\n" - "----------------------" + "Passport V1 Prediction\n" + "======================\n" + f":Filename: {self.filename or ''}\n" + f":Country Code: {self.country}\n" + f":ID Number: {self.id_number}\n" + f":Given Name(s): {given_names}\n" + f":Surname: {self.surname}\n" + f":Date of Birth: {self.birth_date}\n" + f":Place of Birth: {self.birth_place}\n" + f":Gender: {self.gender}\n" + f":Date of Issue: {self.issuance_date}\n" + f":Expiry Date: {self.expiry_date}\n" + f":MRZ Line 1: {self.mrz1}\n" + f":MRZ Line 2: {self.mrz2}\n" ) def is_expired(self) -> bool: @@ -312,4 +309,7 @@ def __reconstruct_full_name(self) -> None: self.full_name = TextField(full_name, reconstructed=True) -TypePassportV1 = TypeVar("TypePassportV1", bound=PassportV1) +TypePassportV1 = TypeVar( + "TypePassportV1", + bound=PassportV1, +) diff --git a/mindee/documents/proof_of_address/proof_of_address_v1.py b/mindee/documents/proof_of_address/proof_of_address_v1.py index 7965a590..debd7cc7 100644 --- a/mindee/documents/proof_of_address/proof_of_address_v1.py +++ b/mindee/documents/proof_of_address/proof_of_address_v1.py @@ -10,24 +10,24 @@ class ProofOfAddressV1(Document): """Proof of Address v1 prediction results.""" - locale: LocaleField - """The locale detected on the document.""" - issuer_name: TextField - """The name of the person or company issuing the document.""" - issuer_company_registration: List[CompanyRegistrationField] - """List of company registrations found for the issuer.""" + date: DateField + """The date the document was issued.""" + dates: List[DateField] + """List of dates found on the document.""" issuer_address: TextField """The address of the document's issuer.""" - recipient_name: TextField - """The name of the person or company receiving the document.""" - recipient_company_registration: List[CompanyRegistrationField] - """List of company registrations found for the recipient.""" + issuer_company_registration: List[CompanyRegistrationField] + """List of company registrations found for the issuer.""" + issuer_name: TextField + """The name of the person or company issuing the document.""" + locale: LocaleField + """The locale detected on the document.""" recipient_address: TextField """The address of the recipient.""" - dates: List[DateField] - """List of dates found on the document.""" - date: DateField - """The date the document was issued.""" + recipient_company_registration: List[CompanyRegistrationField] + """List of company registrations found for the recipient.""" + recipient_name: TextField + """The name of the person or company receiving the document.""" def __init__( self, @@ -59,67 +59,70 @@ def _build_from_api_prediction( :param api_prediction: Raw prediction from HTTP response :param page_n: Page number """ - self.locale = LocaleField( - api_prediction["locale"], + self.date = DateField( + api_prediction["date"], page_id=page_n, ) - self.issuer_name = TextField( - api_prediction["issuer_name"], + self.dates = [ + DateField(prediction, page_id=page_n) + for prediction in api_prediction["dates"] + ] + self.issuer_address = TextField( + api_prediction["issuer_address"], page_id=page_n, ) self.issuer_company_registration = [ CompanyRegistrationField(prediction, page_id=page_n) for prediction in api_prediction["issuer_company_registration"] ] - self.issuer_address = TextField( - api_prediction["issuer_address"], + self.issuer_name = TextField( + api_prediction["issuer_name"], page_id=page_n, ) - self.recipient_name = TextField( - api_prediction["recipient_name"], + self.locale = LocaleField( + api_prediction["locale"], page_id=page_n, ) - self.recipient_company_registration = [ - CompanyRegistrationField(prediction, page_id=page_n) - for prediction in api_prediction["recipient_company_registration"] - ] self.recipient_address = TextField( api_prediction["recipient_address"], page_id=page_n, ) - self.dates = [ - DateField(prediction, page_id=page_n) - for prediction in api_prediction["dates"] + self.recipient_company_registration = [ + CompanyRegistrationField(prediction, page_id=page_n) + for prediction in api_prediction["recipient_company_registration"] ] - self.date = DateField( - api_prediction["date"], + self.recipient_name = TextField( + api_prediction["recipient_name"], page_id=page_n, ) def __str__(self) -> str: - issuer_company_registration = f"\n { ' ' * 28 }".join( + dates = f"\n { ' ' * 7 }".join( + [str(item) for item in self.dates], + ) + issuer_company_registration = f"\n { ' ' * 30 }".join( [str(item) for item in self.issuer_company_registration], ) - recipient_company_registration = f"\n { ' ' * 31 }".join( + recipient_company_registration = f"\n { ' ' * 33 }".join( [str(item) for item in self.recipient_company_registration], ) - dates = f"\n { ' ' * 6 }".join( - [str(item) for item in self.dates], - ) return clean_out_string( - "----- Proof of Address V1 -----\n" - f"Filename: {self.filename or ''}\n" - f"Locale: { self.locale }\n" - f"Issuer Name: { self.issuer_name }\n" - f"Issuer Company Registrations: { issuer_company_registration }\n" - f"Issuer Address: { self.issuer_address }\n" - f"Recipient Name: { self.recipient_name }\n" - f"Recipient Company Registrations: { recipient_company_registration }\n" - f"Recipient Address: { self.recipient_address }\n" - f"Dates: { dates }\n" - f"Date of Issue: { self.date }\n" - "----------------------" + "Proof of Address V1 Prediction\n" + "==============================\n" + f":Filename: {self.filename or ''}\n" + f":Locale: {self.locale}\n" + f":Issuer Name: {self.issuer_name}\n" + f":Issuer Company Registrations: {issuer_company_registration}\n" + f":Issuer Address: {self.issuer_address}\n" + f":Recipient Name: {self.recipient_name}\n" + f":Recipient Company Registrations: {recipient_company_registration}\n" + f":Recipient Address: {self.recipient_address}\n" + f":Dates: {dates}\n" + f":Date of Issue: {self.date}\n" ) -TypeProofOfAddressV1 = TypeVar("TypeProofOfAddressV1", bound=ProofOfAddressV1) +TypeProofOfAddressV1 = TypeVar( + "TypeProofOfAddressV1", + bound=ProofOfAddressV1, +) diff --git a/mindee/documents/receipt/receipt_v5.py b/mindee/documents/receipt/receipt_v5.py index 914c7bf9..274e4a72 100644 --- a/mindee/documents/receipt/receipt_v5.py +++ b/mindee/documents/receipt/receipt_v5.py @@ -168,8 +168,8 @@ def _line_items_to_str(self) -> str: return out_str def __str__(self) -> str: - supplier_company_registrations = "; ".join( - [str(n.value) for n in self.supplier_company_registrations] + supplier_company_registrations = f"\n { ' ' * 32 }".join( + [str(item) for item in self.supplier_company_registrations], ) return clean_out_string( "Receipt V5 Prediction\n" @@ -194,4 +194,7 @@ def __str__(self) -> str: ) -TypeReceiptV5 = TypeVar("TypeReceiptV5", bound=ReceiptV5) +TypeReceiptV5 = TypeVar( + "TypeReceiptV5", + bound=ReceiptV5, +) diff --git a/mindee/documents/us/bank_check/bank_check_v1.py b/mindee/documents/us/bank_check/bank_check_v1.py index f1aa4937..cba7d0f6 100644 --- a/mindee/documents/us/bank_check/bank_check_v1.py +++ b/mindee/documents/us/bank_check/bank_check_v1.py @@ -10,22 +10,22 @@ class BankCheckV1(Document): """Bank Check v1 prediction results.""" - date: DateField - """Date the check was issued""" + account_number: TextField + """The check payer's account number.""" amount: AmountField - """Amount to be paid""" - payees: List[TextField] - """List of payees (full name or company name)""" + """The amount of the check.""" check_number: TextField - """Check number""" - routing_number: TextField - """Payer's bank account routing number""" - account_number: TextField - """Payer's bank account number""" + """The issuer's check number.""" check_position: PositionField - """Check's position in the image""" + """The position of the check on the document.""" + date: DateField + """The date the check was issued.""" + payees: List[TextField] + """List of the check's payees (recipients).""" + routing_number: TextField + """The check issuer's routing number.""" signatures_positions: List[PositionField] - """The positions of the signatures on the image.""" + """List of signature positions""" def __init__( self, @@ -34,7 +34,7 @@ def __init__( page_n: Optional[int] = None, ): """ - Bank check document. + Bank Check v1 prediction results. :param api_prediction: Raw prediction from HTTP response :param input_source: Input object @@ -52,47 +52,67 @@ def _build_from_api_prediction( self, api_prediction: TypeApiPrediction, page_n: Optional[int] = None ) -> None: """ - Build the document from an API response JSON. + Build the object from the prediction API JSON. :param api_prediction: Raw prediction from HTTP response - :param page_n: Page number for multi pages pdf input + :param page_n: Page number """ - self.routing_number = TextField( - api_prediction["routing_number"], page_id=page_n - ) self.account_number = TextField( - api_prediction["account_number"], page_id=page_n + api_prediction["account_number"], + page_id=page_n, + ) + self.amount = AmountField( + api_prediction["amount"], + page_id=page_n, + ) + self.check_number = TextField( + api_prediction["check_number"], + page_id=page_n, + ) + self.check_position = PositionField( + api_prediction.get("check_position", {}), + page_id=page_n, + ) + self.date = DateField( + api_prediction["date"], + page_id=page_n, ) - self.check_number = TextField(api_prediction["check_number"], page_id=page_n) - self.date = DateField(api_prediction["date"], page_id=page_n) - self.amount = AmountField(api_prediction["amount"], page_id=page_n) self.payees = [ - TextField(payee, page_id=page_n) for payee in api_prediction["payees"] + TextField(prediction, page_id=page_n) + for prediction in api_prediction["payees"] ] - self.check_position = PositionField( - api_prediction["check_position"], page_n=page_n + self.routing_number = TextField( + api_prediction["routing_number"], + page_id=page_n, ) self.signatures_positions = [ - PositionField(signature_position, page_n=page_n) - for signature_position in api_prediction["signatures_positions"] + PositionField(prediction, page_id=page_n) + for prediction in api_prediction["signatures_positions"] ] def __str__(self) -> str: - payees = ", ".join([str(payee) for payee in self.payees]) + payees = f"\n { ' ' * 8 }".join( + [str(item) for item in self.payees], + ) + signatures_positions = f"\n { ' ' * 21 }".join( + [str(item) for item in self.signatures_positions], + ) return clean_out_string( - "----- US Bank Check V1 -----\n" - f"Filename: {self.filename or ''}\n" - f"Routing number: {self.routing_number}\n" - f"Account number: {self.account_number}\n" - f"Check number: {self.check_number}\n" - f"Date: {self.date}\n" - f"Amount: {self.amount}\n" - f"Payees: {payees}\n" - "----------------------" + "US Bank Check V1 Prediction\n" + "===========================\n" + f":Filename: {self.filename or ''}\n" + f":Check Position: {self.check_position}\n" + f":Signature Positions: {signatures_positions}\n" + f":Check Issue Date: {self.date}\n" + f":Amount: {self.amount}\n" + f":Payees: {payees}\n" + f":Routing Number: {self.routing_number}\n" + f":Account Number: {self.account_number}\n" + f":Check Number: {self.check_number}\n" ) - def _checklist(self) -> None: - pass - -TypeBankCheckV1 = TypeVar("TypeBankCheckV1", bound=BankCheckV1) +TypeBankCheckV1 = TypeVar( + "TypeBankCheckV1", + bound=BankCheckV1, +) diff --git a/mindee/fields/position.py b/mindee/fields/position.py index ec155260..3a85253b 100644 --- a/mindee/fields/position.py +++ b/mindee/fields/position.py @@ -29,7 +29,7 @@ def __init__( prediction: TypePrediction, value_key: str = "polygon", reconstructed: bool = False, - page_n: Optional[int] = None, + page_id: Optional[int] = None, ): """ Position field object. @@ -37,13 +37,13 @@ def __init__( :param prediction: Position prediction object from HTTP response :param value_key: Key to use in the position_prediction dict :param reconstructed: Bool for reconstructed object (not extracted in the API) - :param page_n: Page number for multi-page document + :param page_id: Page number for multi-page document """ super().__init__( prediction, value_key=value_key, reconstructed=reconstructed, - page_n=page_n, + page_n=page_id, ) def get_quadrilateral(key: str) -> Optional[Quadrilateral]: @@ -54,8 +54,14 @@ def get_quadrilateral(key: str) -> Optional[Quadrilateral]: def get_polygon(key: str) -> Optional[Polygon]: try: - return polygon_from_prediction(prediction[key]) - except (KeyError, GeometryError): + polygon = prediction[key] + except KeyError: + return None + if not polygon: + return None + try: + return polygon_from_prediction(polygon) + except GeometryError: return None self.bounding_box = get_quadrilateral("bounding_box") diff --git a/mindee/input/sources.py b/mindee/input/sources.py index db98ca06..1ee5368f 100644 --- a/mindee/input/sources.py +++ b/mindee/input/sources.py @@ -142,13 +142,15 @@ def is_pdf_empty(self) -> bool: self.file_object.seek(0) with pikepdf.open(self.file_object) as pdf: for page in pdf.pages: + # mypy incorrectly identifies the "/Length" key's value as + # an object rather than an int. try: total_size = page["/Contents"]["/Length"] except ValueError: - total_size = 0 - for content in page["/Contents"]: + total_size = 0 # type: ignore + for content in page["/Contents"]: # type: ignore total_size += content["/Length"] - has_data = total_size > 1000 + has_data = total_size > 1000 # type: ignore has_font = "/Font" in page["/Resources"].keys() has_xobj = "/XObject" in page["/Resources"].keys() diff --git a/tests/__init__.py b/tests/__init__.py index f49bf70c..fe198dce 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,8 +1,2 @@ -RECEIPT_DATA_DIR = "./tests/data/receipt" -INVOICE_DATA_DIR = "./tests/data/invoice" -US_BANK_CHECK_DATA_DIR = "./tests/data/us/bank_check" -PASSPORT_DATA_DIR = "./tests/data/passport" -FR_CARTE_GRISE_DATA_DIR = "./tests/data/fr/carte_grise" -CUSTOM_DATA_DIR = "./tests/data/custom" -CROPPER_DATA_DIR = "./tests/data/cropper" -PDF_DATA_DIR = "./tests/data/pdf" +RECEIPT_DATA_DIR = "./tests/data/products/expense_receipts" +INVOICE_DATA_DIR = "./tests/data/products/invoices" diff --git a/tests/api/test_async_response.py b/tests/api/test_async_response.py index b67310c2..11f38e3e 100644 --- a/tests/api/test_async_response.py +++ b/tests/api/test_async_response.py @@ -19,7 +19,7 @@ @pytest.fixture def dummy_file_input(): - file_input = PathInput("./tests/data/invoice_splitter/2_invoices.pdf") + file_input = PathInput("./tests/data/products/invoice_splitter/default_sample.pdf") return file_input diff --git a/tests/api/test_response.py b/tests/api/test_response.py index 03871190..eaf2734f 100644 --- a/tests/api/test_response.py +++ b/tests/api/test_response.py @@ -25,7 +25,7 @@ @pytest.fixture def dummy_file_input(): - file_input = PathInput("./tests/data/receipt/receipt.jpg") + file_input = PathInput("./tests/data/file_types/receipt.jpg") return file_input diff --git a/tests/data b/tests/data index 0a50dfd0..d7c235ef 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 0a50dfd0d9f45bbde57451ddea2047efbd529d52 +Subproject commit d7c235ef536607d8103e21000716c831b0615ff7 diff --git a/tests/documents/test_custom_v1.py b/tests/documents/custom/test_custom_v1.py similarity index 97% rename from tests/documents/test_custom_v1.py rename to tests/documents/custom/test_custom_v1.py index e672ee2c..e5dc9296 100644 --- a/tests/documents/test_custom_v1.py +++ b/tests/documents/custom/test_custom_v1.py @@ -8,8 +8,8 @@ ListField, ListFieldValue, ) -from tests import CUSTOM_DATA_DIR +CUSTOM_DATA_DIR = "./tests/data/products/custom" FILE_PATH_CUSTOM_V1_COMPLETE = f"{CUSTOM_DATA_DIR}/response_v1/complete.json" FILE_PATH_CUSTOM_V1_EMPTY = f"{CUSTOM_DATA_DIR}/response_v1/empty.json" @@ -50,7 +50,7 @@ def test_empty(custom_v1_doc_object_empty): def test_complete(custom_v1_doc_object): - doc_str = open(f"{CUSTOM_DATA_DIR}/response_v1/doc_to_string.txt").read().strip() + doc_str = open(f"{CUSTOM_DATA_DIR}/response_v1/doc_to_string.rst").read() for field_name, field in custom_v1_doc_object.fields.items(): assert len(field_name) > 0 assert isinstance(field, ListField) diff --git a/tests/documents/test_custom_v1_line_items.py b/tests/documents/custom/test_custom_v1_line_items.py similarity index 95% rename from tests/documents/test_custom_v1_line_items.py rename to tests/documents/custom/test_custom_v1_line_items.py index d592ffe8..cac036a3 100644 --- a/tests/documents/test_custom_v1_line_items.py +++ b/tests/documents/custom/test_custom_v1_line_items.py @@ -2,7 +2,7 @@ from mindee.documents import CustomV1 from mindee.documents.custom.line_items import get_line_items -from tests import CUSTOM_DATA_DIR +from tests.documents.custom.test_custom_v1 import CUSTOM_DATA_DIR def test_single_table_01(): diff --git a/tests/documents/eu/test_license_plate_v1.py b/tests/documents/eu/test_license_plate_v1.py index 9330fc39..a617e620 100644 --- a/tests/documents/eu/test_license_plate_v1.py +++ b/tests/documents/eu/test_license_plate_v1.py @@ -4,8 +4,7 @@ from mindee.documents.eu import LicensePlateV1 -EU_LICENSE_PLATE_DATA_DIR = "./tests/data/eu/license_plate" - +EU_LICENSE_PLATE_DATA_DIR = "./tests/data/products/license_plates" FILE_PATH_EU_LICENSE_PLATE_V1_COMPLETE = ( f"{ EU_LICENSE_PLATE_DATA_DIR }/response_v1/complete.json" ) @@ -32,13 +31,17 @@ def license_plate_v1_page0(): return LicensePlateV1(json_data["document"]["inference"]["pages"][0], page_n=0) +def test_empty_doc_constructor(license_plate_v1_doc_empty): + assert len(license_plate_v1_doc_empty.license_plates) == 0 + + def test_doc_constructor(license_plate_v1_doc): - file_path = f"{ EU_LICENSE_PLATE_DATA_DIR }/response_v1/doc_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ EU_LICENSE_PLATE_DATA_DIR }/response_v1/doc_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(license_plate_v1_doc) == reference_str def test_page0_constructor(license_plate_v1_page0): - file_path = f"{ EU_LICENSE_PLATE_DATA_DIR }/response_v1/page0_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ EU_LICENSE_PLATE_DATA_DIR }/response_v1/page0_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(license_plate_v1_page0) == reference_str diff --git a/tests/documents/fr/test_bank_account_details_v1.py b/tests/documents/fr/test_bank_account_details_v1.py index bd26da3f..7c79c877 100644 --- a/tests/documents/fr/test_bank_account_details_v1.py +++ b/tests/documents/fr/test_bank_account_details_v1.py @@ -4,8 +4,7 @@ from mindee.documents.fr import BankAccountDetailsV1 -FR_BANK_ACCOUNT_DETAILS_DATA_DIR = "./tests/data/fr/bank_account_details" - +FR_BANK_ACCOUNT_DETAILS_DATA_DIR = "./tests/data/products/bank_account_details" FILE_PATH_FR_BANK_ACCOUNT_DETAILS_V1_COMPLETE = ( f"{ FR_BANK_ACCOUNT_DETAILS_DATA_DIR }/response_v1/complete.json" ) @@ -34,13 +33,19 @@ def bank_account_details_v1_page0(): ) +def test_empty_doc_constructor(bank_account_details_v1_doc_empty): + assert bank_account_details_v1_doc_empty.iban.value is None + assert bank_account_details_v1_doc_empty.account_holder_name.value is None + assert bank_account_details_v1_doc_empty.swift.value is None + + def test_doc_constructor(bank_account_details_v1_doc): - file_path = f"{ FR_BANK_ACCOUNT_DETAILS_DATA_DIR }/response_v1/doc_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ FR_BANK_ACCOUNT_DETAILS_DATA_DIR }/response_v1/doc_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(bank_account_details_v1_doc) == reference_str def test_page0_constructor(bank_account_details_v1_page0): - file_path = f"{ FR_BANK_ACCOUNT_DETAILS_DATA_DIR }/response_v1/page0_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ FR_BANK_ACCOUNT_DETAILS_DATA_DIR }/response_v1/page0_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(bank_account_details_v1_page0) == reference_str diff --git a/tests/documents/fr/test_bank_account_details_v2.py b/tests/documents/fr/test_bank_account_details_v2.py index 32bea257..ce9220f4 100644 --- a/tests/documents/fr/test_bank_account_details_v2.py +++ b/tests/documents/fr/test_bank_account_details_v2.py @@ -4,7 +4,7 @@ from mindee.documents.fr import BankAccountDetailsV2 -FR_BANK_ACCOUNT_DETAILS_DATA_DIR = "./tests/data/fr/bank_account_details" +FR_BANK_ACCOUNT_DETAILS_DATA_DIR = "./tests/data/products/bank_account_details" FILE_PATH_FR_BANK_ACCOUNT_DETAILS_V2_COMPLETE = ( f"{ FR_BANK_ACCOUNT_DETAILS_DATA_DIR }/response_v2/complete.json" ) diff --git a/tests/documents/fr/test_carte_grise_v1.py b/tests/documents/fr/test_carte_grise_v1.py index c1ddf817..5fdb48bd 100644 --- a/tests/documents/fr/test_carte_grise_v1.py +++ b/tests/documents/fr/test_carte_grise_v1.py @@ -3,8 +3,8 @@ import pytest from mindee.documents.fr import CarteGriseV1 -from tests import FR_CARTE_GRISE_DATA_DIR +FR_CARTE_GRISE_DATA_DIR = "./tests/data/products/carte_grise" FILE_PATH_FR_CARTE_GRISE_V1_COMPLETE = ( f"{FR_CARTE_GRISE_DATA_DIR}/response_v1/complete.json" ) diff --git a/tests/documents/fr/test_carte_vitale_v1.py b/tests/documents/fr/test_carte_vitale_v1.py index 958e9d93..87cb38bd 100644 --- a/tests/documents/fr/test_carte_vitale_v1.py +++ b/tests/documents/fr/test_carte_vitale_v1.py @@ -4,8 +4,7 @@ from mindee.documents.fr import CarteVitaleV1 -FR_CARTE_VITALE_DATA_DIR = "./tests/data/fr/carte_vitale" - +FR_CARTE_VITALE_DATA_DIR = "./tests/data/products/carte_vitale" FILE_PATH_FR_CARTE_VITALE_V1_COMPLETE = ( f"{ FR_CARTE_VITALE_DATA_DIR }/response_v1/complete.json" ) @@ -32,13 +31,20 @@ def carte_vitale_v1_page0(): return CarteVitaleV1(json_data["document"]["inference"]["pages"][0], page_n=0) +def test_empty_doc_constructor(carte_vitale_v1_doc_empty): + assert len(carte_vitale_v1_doc_empty.given_names) == 0 + assert carte_vitale_v1_doc_empty.surname.value is None + assert carte_vitale_v1_doc_empty.social_security.value is None + assert carte_vitale_v1_doc_empty.issuance_date.value is None + + def test_doc_constructor(carte_vitale_v1_doc): - file_path = f"{ FR_CARTE_VITALE_DATA_DIR }/response_v1/doc_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ FR_CARTE_VITALE_DATA_DIR }/response_v1/doc_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(carte_vitale_v1_doc) == reference_str def test_page0_constructor(carte_vitale_v1_page0): - file_path = f"{ FR_CARTE_VITALE_DATA_DIR }/response_v1/page0_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ FR_CARTE_VITALE_DATA_DIR }/response_v1/page0_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(carte_vitale_v1_page0) == reference_str diff --git a/tests/documents/fr/test_id_card_v1.py b/tests/documents/fr/test_id_card_v1.py index 815cbfdc..140700b6 100644 --- a/tests/documents/fr/test_id_card_v1.py +++ b/tests/documents/fr/test_id_card_v1.py @@ -4,7 +4,7 @@ from mindee.documents.fr import IdCardV1 -FR_ID_CARD_DATA_DIR = "./tests/data/fr/id_card" +FR_ID_CARD_DATA_DIR = "./tests/data/products/idcard_fr" FILE_PATH_FR_ID_CARD_V1_COMPLETE = f"{ FR_ID_CARD_DATA_DIR }/response_v1/complete.json" FILE_PATH_FR_ID_CARD_V1_EMPTY = f"{ FR_ID_CARD_DATA_DIR }/response_v1/empty.json" @@ -41,12 +41,12 @@ def test_empty_doc_constructor(id_card_v1_doc_empty): def test_doc_constructor(id_card_v1_doc): - file_path = f"{ FR_ID_CARD_DATA_DIR }/response_v1/doc_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ FR_ID_CARD_DATA_DIR }/response_v1/doc_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(id_card_v1_doc) == reference_str def test_page0_constructor(id_card_v1_page0): - file_path = f"{ FR_ID_CARD_DATA_DIR }/response_v1/page0_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ FR_ID_CARD_DATA_DIR }/response_v1/page0_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(id_card_v1_page0) == reference_str diff --git a/tests/documents/test_cropper_v1.py b/tests/documents/test_cropper_v1.py index 667eb2fb..ba6337ea 100644 --- a/tests/documents/test_cropper_v1.py +++ b/tests/documents/test_cropper_v1.py @@ -3,37 +3,41 @@ import pytest from mindee.documents import CropperV1 -from tests import CROPPER_DATA_DIR -FILE_PATH_CROPPER_V1_COMPLETE = f"{CROPPER_DATA_DIR}/response_v1/complete.json" -FILE_PATH_CROPPER_V1_EMPTY = f"{CROPPER_DATA_DIR}/response_v1/empty.json" +CROPPER_DATA_DIR = "./tests/data/products/cropper" +FILE_PATH_CROPPER_V1_COMPLETE = f"{ CROPPER_DATA_DIR }/response_v1/complete.json" +FILE_PATH_CROPPER_V1_EMPTY = f"{ CROPPER_DATA_DIR }/response_v1/empty.json" @pytest.fixture -def cropper_v1_doc_object(): +def cropper_v1_doc() -> CropperV1: json_data = json.load(open(FILE_PATH_CROPPER_V1_COMPLETE)) - return CropperV1(api_prediction=json_data["document"]["inference"], page_n=None) + return CropperV1(json_data["document"]["inference"], page_n=None) @pytest.fixture -def cropper_v1_doc_object_empty(): +def cropper_v1_doc_empty() -> CropperV1: json_data = json.load(open(FILE_PATH_CROPPER_V1_EMPTY)) - return CropperV1(api_prediction=json_data["document"]["inference"], page_n=None) + return CropperV1(json_data["document"]["inference"], page_n=None) @pytest.fixture -def cropper_v1_page_object(): +def cropper_v1_page0(): json_data = json.load(open(FILE_PATH_CROPPER_V1_COMPLETE)) return CropperV1(json_data["document"]["inference"]["pages"][0], page_n=0) -def test_doc_constructor(cropper_v1_doc_object): - doc_str = open(f"{CROPPER_DATA_DIR}/response_v1/doc_to_string.txt").read().strip() - assert cropper_v1_doc_object.orientation is None - assert str(cropper_v1_doc_object) == doc_str +def test_empty_doc_constructor(cropper_v1_doc_empty): + assert len(cropper_v1_doc_empty.cropping) == 0 -def test_page_constructor(cropper_v1_page_object): - doc_str = open(f"{CROPPER_DATA_DIR}/response_v1/page0_to_string.txt").read().strip() - assert cropper_v1_page_object.orientation.value == 0 - assert str(cropper_v1_page_object) == doc_str +def test_doc_constructor(cropper_v1_doc): + file_path = f"{ CROPPER_DATA_DIR }/response_v1/doc_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() + assert str(cropper_v1_doc) == reference_str + + +def test_page0_constructor(cropper_v1_page0): + file_path = f"{ CROPPER_DATA_DIR }/response_v1/page0_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() + assert str(cropper_v1_page0) == reference_str diff --git a/tests/documents/test_financial_document_v1.py b/tests/documents/test_financial_document_v1.py index 9c1ea983..0fefa2d9 100644 --- a/tests/documents/test_financial_document_v1.py +++ b/tests/documents/test_financial_document_v1.py @@ -4,7 +4,7 @@ from mindee.documents import FinancialDocumentV1 -FINANCIAL_DOC_DATA_DIR = "./tests/data/financial_document" +FINANCIAL_DOC_DATA_DIR = "./tests/data/products/financial_document" FILE_PATH_FINANCIAL_DOC_V1_INVOICE = ( f"{FINANCIAL_DOC_DATA_DIR}/response_v1/complete_invoice.json" ) diff --git a/tests/documents/test_invoice_splitter_v1.py b/tests/documents/test_invoice_splitter_v1.py index 31f3de09..418bd4fb 100644 --- a/tests/documents/test_invoice_splitter_v1.py +++ b/tests/documents/test_invoice_splitter_v1.py @@ -4,7 +4,7 @@ from mindee.documents import InvoiceSplitterV1 -INVOICE_SPLITTER_DATA_DIR = "./tests/data/invoice_splitter" +INVOICE_SPLITTER_DATA_DIR = "./tests/data/products/invoice_splitter" FILE_PATH_INVOICE_SPLITTER_V1_COMPLETE = ( f"{ INVOICE_SPLITTER_DATA_DIR }/response_v1/complete.json" ) @@ -38,6 +38,6 @@ def invoice_splitter_v1_doc_object(): def test_doc_constructor(invoice_splitter_v1_doc): - file_path = f"{ INVOICE_SPLITTER_DATA_DIR }/response_v1/doc_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ INVOICE_SPLITTER_DATA_DIR }/response_v1/doc_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(invoice_splitter_v1_doc) == reference_str diff --git a/tests/documents/test_material_certificate_v1.py b/tests/documents/test_material_certificate_v1.py index 74e8a86d..dad4088b 100644 --- a/tests/documents/test_material_certificate_v1.py +++ b/tests/documents/test_material_certificate_v1.py @@ -4,7 +4,7 @@ from mindee.documents import MaterialCertificateV1 -MATERIAL_CERTIFICATE_DATA_DIR = "./tests/data/material_certificate" +MATERIAL_CERTIFICATE_DATA_DIR = "./tests/data/products/material_certificate" FILE_PATH_MATERIAL_CERTIFICATE_V1_COMPLETE = ( f"{ MATERIAL_CERTIFICATE_DATA_DIR }/response_v1/complete.json" ) diff --git a/tests/documents/test_passport_v1.py b/tests/documents/test_passport_v1.py index e566e5ca..44ace5f2 100644 --- a/tests/documents/test_passport_v1.py +++ b/tests/documents/test_passport_v1.py @@ -4,7 +4,7 @@ from mindee.documents import PassportV1 -PASSPORT_DATA_DIR = "./tests/data/passport" +PASSPORT_DATA_DIR = "./tests/data/products/passport" FILE_PATH_PASSPORT_V1_COMPLETE = f"{ PASSPORT_DATA_DIR }/response_v1/complete.json" FILE_PATH_PASSPORT_V1_EMPTY = f"{ PASSPORT_DATA_DIR }/response_v1/empty.json" @@ -42,36 +42,12 @@ def test_empty_doc_constructor(passport_v1_doc_empty): def test_doc_constructor(passport_v1_doc): - file_path = f"{ PASSPORT_DATA_DIR }/response_v1/doc_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ PASSPORT_DATA_DIR }/response_v1/doc_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(passport_v1_doc) == reference_str def test_page0_constructor(passport_v1_page0): - file_path = f"{ PASSPORT_DATA_DIR }/response_v1/page0_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ PASSPORT_DATA_DIR }/response_v1/page0_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(passport_v1_page0) == reference_str - - -def test_checklist_all_na(passport_v1_doc_empty): - for check in passport_v1_doc_empty.checklist.values(): - assert check is False - - -def test_checksum(): - mrz = "7077979792GBR9505209M1704224<<<<<<<<<<<<<<00" - assert PassportV1.check_sum(mrz[0:10] + mrz[13:20] + mrz[21:43]) == mrz[43] - - -def test_wrong_checksum(): - mrz = "7077974792GBR9505209M1704224<<<<<<<<<<<<<<00" - assert PassportV1.check_sum(mrz[0:10] + mrz[13:20] + mrz[21:43]) != mrz[43] - mrz = "7077974792GBR9505209M1404224<<<<<<<<<<<<<<00" - assert PassportV1.check_sum(mrz[0:10] + mrz[13:20] + mrz[21:43]) != mrz[43] - mrz = "7077974792GBR9505209M1404224<<<<<<<<<<<<<<08" - assert PassportV1.check_sum(mrz[0:10] + mrz[13:20] + mrz[21:43]) != mrz[43] - - -def test_checksum_with_personal_number_alpha(): - mrz = "XDB0661884ESP9502138F1808122RE20050024133894" - assert PassportV1.check_sum(mrz[28:42]) == mrz[42] diff --git a/tests/documents/test_proof_of_address_v1.py b/tests/documents/test_proof_of_address_v1.py index bbfabdfd..644efb4c 100644 --- a/tests/documents/test_proof_of_address_v1.py +++ b/tests/documents/test_proof_of_address_v1.py @@ -4,7 +4,7 @@ from mindee.documents import ProofOfAddressV1 -PROOF_OF_ADDRESS_DATA_DIR = "./tests/data/proof_of_address" +PROOF_OF_ADDRESS_DATA_DIR = "./tests/data/products/proof_of_address" FILE_PATH_PROOF_OF_ADDRESS_V1_COMPLETE = ( f"{ PROOF_OF_ADDRESS_DATA_DIR }/response_v1/complete.json" ) @@ -31,13 +31,25 @@ def proof_of_address_v1_page0(): return ProofOfAddressV1(json_data["document"]["inference"]["pages"][0], page_n=0) +def test_empty_doc_constructor(proof_of_address_v1_doc_empty): + assert proof_of_address_v1_doc_empty.locale.value is None + assert proof_of_address_v1_doc_empty.issuer_name.value is None + assert len(proof_of_address_v1_doc_empty.issuer_company_registration) == 0 + assert proof_of_address_v1_doc_empty.issuer_address.value is None + assert proof_of_address_v1_doc_empty.recipient_name.value is None + assert len(proof_of_address_v1_doc_empty.recipient_company_registration) == 0 + assert proof_of_address_v1_doc_empty.recipient_address.value is None + assert len(proof_of_address_v1_doc_empty.dates) == 0 + assert proof_of_address_v1_doc_empty.date.value is None + + def test_doc_constructor(proof_of_address_v1_doc): - file_path = f"{ PROOF_OF_ADDRESS_DATA_DIR }/response_v1/doc_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ PROOF_OF_ADDRESS_DATA_DIR }/response_v1/doc_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(proof_of_address_v1_doc) == reference_str def test_page0_constructor(proof_of_address_v1_page0): - file_path = f"{ PROOF_OF_ADDRESS_DATA_DIR }/response_v1/page0_to_string.txt" - reference_str = open(file_path, "r", encoding="utf-8").read().strip() + file_path = f"{ PROOF_OF_ADDRESS_DATA_DIR }/response_v1/page0_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() assert str(proof_of_address_v1_page0) == reference_str diff --git a/tests/documents/test_receipt_v4.py b/tests/documents/test_receipt_v4.py index a4f24c1d..067a6bc7 100644 --- a/tests/documents/test_receipt_v4.py +++ b/tests/documents/test_receipt_v4.py @@ -49,9 +49,7 @@ def test_page_constructor(receipt_v4_page_object): def test_cropper(): - json_data = json.load( - open(f"{RECEIPT_DATA_DIR}/response_v4/complete_with_cropper.json") - ) + json_data = json.load(open("./tests/data/extras/cropper/complete.json")) receipt_v4_page_object = ReceiptV4( api_prediction=json_data["document"]["inference"]["pages"][0], page_n=0 ) diff --git a/tests/documents/test_receipt_v5.py b/tests/documents/test_receipt_v5.py index 57fe501d..ace58e6a 100644 --- a/tests/documents/test_receipt_v5.py +++ b/tests/documents/test_receipt_v5.py @@ -4,7 +4,7 @@ from mindee.documents import ReceiptV5 -RECEIPT_DATA_DIR = "./tests/data/receipt" +RECEIPT_DATA_DIR = "./tests/data/products/expense_receipts" FILE_PATH_RECEIPT_V5_COMPLETE = f"{ RECEIPT_DATA_DIR }/response_v5/complete.json" FILE_PATH_RECEIPT_V5_EMPTY = f"{ RECEIPT_DATA_DIR }/response_v5/empty.json" diff --git a/tests/documents/us/test_bank_check_v1.py b/tests/documents/us/test_bank_check_v1.py index f5831b98..83eeda82 100644 --- a/tests/documents/us/test_bank_check_v1.py +++ b/tests/documents/us/test_bank_check_v1.py @@ -3,55 +3,50 @@ import pytest from mindee.documents.us import BankCheckV1 -from tests import US_BANK_CHECK_DATA_DIR +US_BANK_CHECK_DATA_DIR = "./tests/data/products/bank_check" FILE_PATH_US_BANK_CHECK_V1_COMPLETE = ( - f"{US_BANK_CHECK_DATA_DIR}/response_v1/complete.json" + f"{ US_BANK_CHECK_DATA_DIR }/response_v1/complete.json" ) -FILE_PATH_US_BANK_CHECK_V1_EMPTY = f"{US_BANK_CHECK_DATA_DIR}/response_v1/empty.json" +FILE_PATH_US_BANK_CHECK_V1_EMPTY = f"{ US_BANK_CHECK_DATA_DIR }/response_v1/empty.json" @pytest.fixture -def bank_check_v1_doc_object() -> BankCheckV1: +def bank_check_v1_doc() -> BankCheckV1: json_data = json.load(open(FILE_PATH_US_BANK_CHECK_V1_COMPLETE)) return BankCheckV1(json_data["document"]["inference"], page_n=None) @pytest.fixture -def bank_check_v1_doc_object_empty() -> BankCheckV1: +def bank_check_v1_doc_empty() -> BankCheckV1: json_data = json.load(open(FILE_PATH_US_BANK_CHECK_V1_EMPTY)) return BankCheckV1(json_data["document"]["inference"], page_n=None) @pytest.fixture -def bank_check_pred(): - json_data = json.load(open(FILE_PATH_US_BANK_CHECK_V1_EMPTY)) - return json_data["document"]["inference"]["pages"][0] - - -# Technical tests -def test_constructor(bank_check_v1_doc_object): - assert bank_check_v1_doc_object.date.value == "2022-04-26" - assert bank_check_v1_doc_object.amount.value == 6496.58 - assert bank_check_v1_doc_object.routing_number.value == "012345678" - assert bank_check_v1_doc_object.account_number.value == "12345678910" - assert bank_check_v1_doc_object.check_number.value == "8620001342" - doc_str = ( - open(f"{US_BANK_CHECK_DATA_DIR}/response_v1/doc_to_string.txt").read().strip() - ) - assert str(bank_check_v1_doc_object) == doc_str - - -def test_all_na(bank_check_v1_doc_object_empty): - assert bank_check_v1_doc_object_empty.amount.value is None - assert bank_check_v1_doc_object_empty.date.value is None - assert bank_check_v1_doc_object_empty.check_number.value is None - assert bank_check_v1_doc_object_empty.routing_number.value is None - assert bank_check_v1_doc_object_empty.account_number.value is None - assert len(bank_check_v1_doc_object_empty.signatures_positions) == 0 - assert len(bank_check_v1_doc_object_empty.check_position.polygon) == 0 - assert len(bank_check_v1_doc_object_empty.check_position.value) == 0 - assert bank_check_v1_doc_object_empty.check_position.bounding_box is None - assert bank_check_v1_doc_object_empty.check_position.rectangle is None - assert bank_check_v1_doc_object_empty.check_position.quadrangle is None - assert len(bank_check_v1_doc_object_empty.payees) == 0 +def bank_check_v1_page0(): + json_data = json.load(open(FILE_PATH_US_BANK_CHECK_V1_COMPLETE)) + return BankCheckV1(json_data["document"]["inference"]["pages"][0], page_n=0) + + +def test_empty_doc_constructor(bank_check_v1_doc_empty): + assert bank_check_v1_doc_empty.check_position.value is None + assert len(bank_check_v1_doc_empty.signatures_positions) == 0 + assert bank_check_v1_doc_empty.date.value is None + assert bank_check_v1_doc_empty.amount.value is None + assert len(bank_check_v1_doc_empty.payees) == 0 + assert bank_check_v1_doc_empty.routing_number.value is None + assert bank_check_v1_doc_empty.account_number.value is None + assert bank_check_v1_doc_empty.check_number.value is None + + +def test_doc_constructor(bank_check_v1_doc): + file_path = f"{ US_BANK_CHECK_DATA_DIR }/response_v1/doc_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() + assert str(bank_check_v1_doc) == reference_str + + +def test_page0_constructor(bank_check_v1_page0): + file_path = f"{ US_BANK_CHECK_DATA_DIR }/response_v1/page0_to_string.rst" + reference_str = open(file_path, "r", encoding="utf-8").read() + assert str(bank_check_v1_page0) == reference_str diff --git a/tests/fields/test_ocr.py b/tests/fields/test_ocr.py index 0d6649a0..2b364c7e 100644 --- a/tests/fields/test_ocr.py +++ b/tests/fields/test_ocr.py @@ -4,8 +4,8 @@ def test_response(): - json_data = json.load(open("./tests/data/ocr/complete_with_ocr.json")) - with open("./tests/data/ocr/ocr.txt") as file_handle: + json_data = json.load(open("./tests/data/extras/ocr/complete.json")) + with open("./tests/data/extras/ocr/ocr.txt") as file_handle: expected_text = file_handle.read() ocr = Ocr(json_data["document"]["ocr"]) assert str(ocr) == expected_text diff --git a/tests/test_cli.py b/tests/test_cli.py index e9d9aad5..1c4c79fb 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -22,7 +22,7 @@ def custom_doc(monkeypatch): input_type="path", output_type="summary", include_words=False, - path="./tests/data/pdf/blank.pdf", + path="./tests/data/file_types/pdf/blank.pdf", call_method="parse", ) @@ -38,7 +38,7 @@ def ots_doc(monkeypatch): input_type="path", output_type="summary", include_words=False, - path="./tests/data/invoice/invoice.pdf", + path="./tests/data/products/invoices/invoice.pdf", call_method="parse", ) @@ -53,7 +53,7 @@ def ots_doc_enqueue(monkeypatch): doc_pages=3, input_type="path", include_words=False, - path="./tests/data/invoice_splitter/2_invoices.pdf", + path="./tests/data/products/invoice_splitter/default_sample.pdf", call_method="enqueue", ) diff --git a/tests/test_client.py b/tests/test_client.py index a59cfc74..e598f840 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -2,7 +2,9 @@ from mindee import Client, PageOptions, documents from mindee.endpoints import HTTPException -from tests import INVOICE_DATA_DIR, PASSPORT_DATA_DIR, RECEIPT_DATA_DIR +from tests import INVOICE_DATA_DIR, RECEIPT_DATA_DIR +from tests.documents.test_passport_v1 import PASSPORT_DATA_DIR +from tests.test_inputs import FILE_TYPES_DIR from tests.utils import clear_envvars, dummy_envvars @@ -39,42 +41,42 @@ def dummy_client_no_raise(): def test_parse_path_without_token(empty_client): with pytest.raises(RuntimeError): - empty_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpg").parse( - documents.TypeReceiptV3 + empty_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( + documents.TypeReceiptV5 ) with pytest.raises(RuntimeError): - empty_client.doc_from_path(f"{INVOICE_DATA_DIR}/invoice.pdf").parse( - documents.TypeInvoiceV3 + empty_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( + documents.TypeInvoiceV4 ) with pytest.raises(RuntimeError): - empty_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpg").parse( - documents.TypeFinancialV1 + empty_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( + documents.TypeFinancialDocumentV1 ) with pytest.raises(RuntimeError): - empty_client.doc_from_path(f"{PASSPORT_DATA_DIR}/passport.jpeg").parse( + empty_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( documents.TypePassportV1 ) def test_parse_path_with_env_token(env_client): with pytest.raises(HTTPException): - env_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpg").parse( - documents.TypeReceiptV3 + env_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( + documents.TypeReceiptV5 ) with pytest.raises(HTTPException): - env_client.doc_from_path(f"{INVOICE_DATA_DIR}/invoice.pdf").parse( - documents.TypeInvoiceV3 + env_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( + documents.TypeInvoiceV4 ) with pytest.raises(HTTPException): env_client.doc_from_path( - f"{RECEIPT_DATA_DIR}/receipt.jpg", - ).parse(documents.TypeFinancialV1) + f"{FILE_TYPES_DIR}/receipt.jpg", + ).parse(documents.TypeFinancialDocumentV1) with pytest.raises(HTTPException): - env_client.doc_from_path(f"{PASSPORT_DATA_DIR}/passport.jpeg").parse( + env_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( documents.TypePassportV1 ) with pytest.raises(HTTPException): - env_client.doc_from_path(f"{PASSPORT_DATA_DIR}/passport.jpeg").parse( + env_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( documents.TypeCustomV1, "dummy" ) @@ -86,76 +88,76 @@ def test_duplicate_configs(dummy_client): ) assert isinstance(client, Client) with pytest.raises(RuntimeError): - client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpg").parse( + client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( documents.TypeReceiptV3 ) with pytest.raises(HTTPException): - client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpg").parse( + client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( documents.TypeCustomV1, endpoint_name="Receipt", account_name="dummy" ) def test_parse_path_with_wrong_filetype(dummy_client): with pytest.raises(AssertionError): - dummy_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpga").parse( + dummy_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpga").parse( documents.TypeReceiptV3 ) with pytest.raises(AssertionError): - dummy_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpga").parse( + dummy_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpga").parse( documents.TypeInvoiceV3 ) with pytest.raises(AssertionError): dummy_client.doc_from_path( - f"{RECEIPT_DATA_DIR}/receipt.jpga", + f"{FILE_TYPES_DIR}/receipt.jpga", ).parse(documents.TypeFinancialV1) with pytest.raises(AssertionError): - dummy_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpga").parse( + dummy_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpga").parse( documents.TypePassportV1 ) def test_parse_path_with_wrong_token(dummy_client): with pytest.raises(HTTPException): - dummy_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpg").parse( + dummy_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( documents.TypeReceiptV3 ) with pytest.raises(HTTPException): - dummy_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpg").parse( + dummy_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( documents.TypeInvoiceV3 ) with pytest.raises(HTTPException): dummy_client.doc_from_path( - f"{RECEIPT_DATA_DIR}/receipt.jpg", + f"{FILE_TYPES_DIR}/receipt.jpg", ).parse(documents.TypeFinancialV1) with pytest.raises(HTTPException): dummy_client.doc_from_path(f"{INVOICE_DATA_DIR}/invoice.pdf").parse( documents.TypeFinancialV1 ) with pytest.raises(HTTPException): - dummy_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpg").parse( + dummy_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( documents.TypePassportV1 ) def test_request_with_filepath(dummy_client): with pytest.raises(HTTPException): - dummy_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpg").parse( + dummy_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( documents.TypeReceiptV3 ) def test_request_without_raise_on_error(dummy_client_no_raise): - result = dummy_client_no_raise.doc_from_path( - f"{RECEIPT_DATA_DIR}/receipt.jpg" - ).parse(documents.TypeReceiptV3) + result = dummy_client_no_raise.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( + documents.TypeReceiptV3 + ) assert result.document is None assert len(result.pages) == 0 def test_request_without_raise_on_error_include_words(dummy_client_no_raise): - result = dummy_client_no_raise.doc_from_path( - f"{RECEIPT_DATA_DIR}/receipt.jpg" - ).parse(documents.TypeReceiptV3, include_words=True) + result = dummy_client_no_raise.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( + documents.TypeReceiptV3, include_words=True + ) assert result.document is None assert len(result.pages) == 0 @@ -176,13 +178,13 @@ def test_interface_version(): version="1.1", ) with pytest.raises(HTTPException): - fixed_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpg").parse( + fixed_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg").parse( documents.TypeCustomV1, "dummy" ) def test_keep_file_open(dummy_client): - doc = dummy_client.doc_from_path(f"{RECEIPT_DATA_DIR}/receipt.jpg") + doc = dummy_client.doc_from_path(f"{FILE_TYPES_DIR}/receipt.jpg") try: doc.parse(documents.TypeReceiptV3, close_file=False) except HTTPException: diff --git a/tests/test_code_samples.sh b/tests/test_code_samples.sh index caecc986..ab0504e6 100755 --- a/tests/test_code_samples.sh +++ b/tests/test_code_samples.sh @@ -15,7 +15,7 @@ do echo sed "s/my-api-key/${API_KEY}/" "${f}" > $OUTPUT_FILE - sed -i 's/\/path\/to\/the\/file.ext/.\/tests\/data\/pdf\/blank_1.pdf/' $OUTPUT_FILE + sed -i 's/\/path\/to\/the\/file.ext/.\/tests\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE if echo "$f" | grep -q "custom_v1.txt" then diff --git a/tests/test_inputs.py b/tests/test_inputs.py index 7b8a9058..5a9ec222 100644 --- a/tests/test_inputs.py +++ b/tests/test_inputs.py @@ -12,7 +12,10 @@ PathInput, UrlInputSource, ) -from tests import INVOICE_DATA_DIR, PDF_DATA_DIR, RECEIPT_DATA_DIR +from tests import INVOICE_DATA_DIR, RECEIPT_DATA_DIR + +FILE_TYPES_DIR = "./tests/data/file_types" +PDF_DATA_DIR = "./tests/data/file_types/pdf" # # PDF @@ -170,18 +173,18 @@ def test_pdf_blank_check(): def test_tif_input_from_path(): - input_obj_1 = PathInput(f"{RECEIPT_DATA_DIR}/receipt.tif") + input_obj_1 = PathInput(f"{FILE_TYPES_DIR}/receipt.tif") assert input_obj_1.file_mimetype == "image/tiff" - input_obj_2 = PathInput(f"{RECEIPT_DATA_DIR}/receipt.tiff") + input_obj_2 = PathInput(f"{FILE_TYPES_DIR}/receipt.tiff") assert input_obj_2.file_mimetype == "image/tiff" def test_heic_input_from_path(): - input_obj_1 = PathInput(f"{RECEIPT_DATA_DIR}/receipt.heic") + input_obj_1 = PathInput(f"{FILE_TYPES_DIR}/receipt.heic") assert input_obj_1.file_mimetype == "image/heic" def test_txt_input_from_path(): with pytest.raises(MimeTypeError): - PathInput(f"{RECEIPT_DATA_DIR}/receipt.txt") + PathInput(f"{FILE_TYPES_DIR}/receipt.txt")