diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e204edc0..fbe4db9f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,7 @@ repos: - id: gitleaks - repo: https://github.com/PyCQA/pylint - rev: v3.3.1 + rev: v3.3.9 hooks: - id: pylint name: pylint diff --git a/examples/auto_invoice_splitter_extraction_example.py b/examples/auto_invoice_splitter_extraction_example.py index a9a2bb5a..902e648e 100644 --- a/examples/auto_invoice_splitter_extraction_example.py +++ b/examples/auto_invoice_splitter_extraction_example.py @@ -11,7 +11,7 @@ def parse_invoice(file_path): input_source = PathInput(file_path) - if input_source.is_pdf() and input_source.count_doc_pages() > 1: + if input_source.is_pdf() and input_source.page_count > 1: parse_multi_page(input_source) else: parse_single_page(input_source) diff --git a/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.py b/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.py index 7c31ca93..5d42b7c7 100644 --- a/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.py +++ b/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.py @@ -24,7 +24,7 @@ def extract_receipts( raise MindeeError( "No possible receipts candidates found for MultiReceipts extraction." ) - for page_id in range(input_source.count_doc_pages()): + for page_id in range(input_source.page_count): receipt_positions = [ receipt.bounding_box for receipt in inference.pages[page_id].prediction.receipts diff --git a/mindee/input/sources/local_input_source.py b/mindee/input/sources/local_input_source.py index bef4b366..c015239b 100644 --- a/mindee/input/sources/local_input_source.py +++ b/mindee/input/sources/local_input_source.py @@ -36,6 +36,7 @@ class LocalInputSource: file_mimetype: str input_type: InputType filepath: Optional[str] + _page_count: Optional[int] = None def __init__(self, input_type: InputType): self.input_type = input_type @@ -100,17 +101,25 @@ def is_pdf(self) -> bool: """:return: True if the file is a PDF.""" return self.file_mimetype == "application/pdf" - def count_doc_pages(self) -> int: + @property + def page_count(self) -> int: """ - Count the pages in the PDF. + Count the pages in the document. - :return: the number of pages. + :return: The number of pages. """ - if self.is_pdf(): - self.file_object.seek(0) - pdf = pdfium.PdfDocument(self.file_object) - return len(pdf) - return 1 + if self._page_count is None: + if self.is_pdf(): + self.file_object.seek(0) + pdf = pdfium.PdfDocument(self.file_object) + self._page_count = len(pdf) + else: + self._page_count = 1 + return self._page_count + + def count_doc_pages(self) -> int: + """Deprecated. Use ``page_count`` instead.""" + return self.page_count def apply_page_options(self, page_options: PageOptions) -> None: """Apply cut and merge options on multipage documents.""" @@ -131,10 +140,10 @@ def process_pdf( """Run any required processing on a PDF file.""" if self.is_pdf_empty(): raise MindeeSourceError(f"PDF pages are empty in: {self.filename}") - pages_count = self.count_doc_pages() - if on_min_pages > pages_count: + page_count = self.page_count + if on_min_pages > page_count: return - all_pages = list(range(pages_count)) + all_pages = list(range(page_count)) if behavior == KEEP_ONLY: pages_to_keep = set() for page_id in page_indexes: @@ -161,7 +170,7 @@ def merge_pdf_pages(self, page_numbers: set) -> None: """ Create a new PDF from pages and set it to ``file_object``. - :param page_numbers: List of pages number to use for merging in the original PDF. + :param page_numbers: List of page numbers to use for merging in the original PDF. :return: None """ self.file_object.seek(0) @@ -172,6 +181,7 @@ def merge_pdf_pages(self, page_numbers: set) -> None: bytes_io = io.BytesIO() new_pdf.save(bytes_io) self.file_object = bytes_io + self._page_count = len(new_pdf) def is_pdf_empty(self) -> bool: """ diff --git a/pyproject.toml b/pyproject.toml index 159b0072..fb003d2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,15 +44,15 @@ Changelog = "https://github.com/mindee/mindee-api-python/blob/main/CHANGELOG.md" [project.optional-dependencies] lint = [ - "pylint==3.3.1", - "pre-commit~=3.2.2", - "types-pytz>=2023.3", + "pylint==3.3.9", + "pre-commit~=3.6.0", + "types-pytz>=2024.2", "types-requests>=2.31", ] test = [ "toml~=0.10.2", "pytest~=7.4", - "pytest-cov~=4.1", + "pytest-cov~=5.0", ] docs = [ "sphinx~=5.3", diff --git a/tests/extraction/test_image_extractor.py b/tests/extraction/test_image_extractor.py index 7f6d5db2..05c95cfb 100644 --- a/tests/extraction/test_image_extractor.py +++ b/tests/extraction/test_image_extractor.py @@ -6,7 +6,7 @@ from mindee.extraction.common.image_extractor import extract_multiple_images_from_source from mindee.input.sources.path_input import PathInput from mindee.product.barcode_reader.barcode_reader_v1 import BarcodeReaderV1 -from tests.test_inputs import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR @pytest.fixture diff --git a/tests/extraction/test_invoice_splitter_auto_extraction.py b/tests/extraction/test_invoice_splitter_auto_extraction.py index 3abc2d2a..ed3bb3a4 100644 --- a/tests/extraction/test_invoice_splitter_auto_extraction.py +++ b/tests/extraction/test_invoice_splitter_auto_extraction.py @@ -9,8 +9,7 @@ from mindee.product.invoice.invoice_v4 import InvoiceV4 from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 from tests.product import get_id, get_version -from tests.test_inputs import PRODUCT_DATA_DIR -from tests.utils import levenshtein_ratio +from tests.utils import PRODUCT_DATA_DIR, levenshtein_ratio @pytest.fixture diff --git a/tests/extraction/test_multi_receipts_extractor.py b/tests/extraction/test_multi_receipts_extractor.py index 00e22f12..502d2cd1 100644 --- a/tests/extraction/test_multi_receipts_extractor.py +++ b/tests/extraction/test_multi_receipts_extractor.py @@ -10,7 +10,7 @@ from mindee.product.multi_receipts_detector.multi_receipts_detector_v1 import ( MultiReceiptsDetectorV1, ) -from tests.test_inputs import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR @pytest.fixture diff --git a/tests/extraction/test_pdf_extractor.py b/tests/extraction/test_pdf_extractor.py index a236d9c2..3d76aba1 100644 --- a/tests/extraction/test_pdf_extractor.py +++ b/tests/extraction/test_pdf_extractor.py @@ -8,7 +8,7 @@ from mindee.product.invoice_splitter.invoice_splitter_v1_document import ( InvoiceSplitterV1Document, ) -from tests.test_inputs import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR @pytest.fixture diff --git a/tests/extras/test_extras_integration.py b/tests/extras/test_extras_integration.py index ab6681d8..5235fd16 100644 --- a/tests/extras/test_extras_integration.py +++ b/tests/extras/test_extras_integration.py @@ -3,7 +3,7 @@ from mindee import Client from mindee.product.international_id.international_id_v2 import InternationalIdV2 from mindee.product.invoice.invoice_v4 import InvoiceV4 -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR @pytest.fixture diff --git a/tests/input/test_apply_page_options.py b/tests/input/test_apply_page_options.py new file mode 100644 index 00000000..6e70224b --- /dev/null +++ b/tests/input/test_apply_page_options.py @@ -0,0 +1,163 @@ +import io + +import pypdfium2 as pdfium +import pytest + +from mindee.error import MindeeError +from mindee.input.page_options import KEEP_ONLY, REMOVE, PageOptions +from mindee.input.sources import ( + Base64Input, + BytesInput, + FileInput, + LocalInputSource, + PathInput, +) +from tests.utils import FILE_TYPES_DIR, PRODUCT_DATA_DIR + + +def _assert_page_options(input_source: LocalInputSource, numb_pages: int): + assert input_source.is_pdf() is True + # Currently the least verbose way of comparing pages with pypdfium2 + # I.e., each page is read and rendered as a rasterized image. + # These images are then compared as raw byte sequences. + cut_pdf = pdfium.PdfDocument(input_source.file_object) + pdf = pdfium.PdfDocument(FILE_TYPES_DIR / "pdf" / f"multipage_cut-{numb_pages}.pdf") + for idx in range(len(pdf)): + pdf_page = pdf.get_page(idx) + pdf_page_render = pdfium.PdfPage.render(pdf_page) + cut_pdf_page = cut_pdf.get_page(idx) + cut_pdf_page_render = pdfium.PdfPage.render(cut_pdf_page) + + assert bytes(pdf_page_render.buffer) == bytes(cut_pdf_page_render.buffer) + cut_pdf.close() + pdf.close() + + +def test_pdf_reconstruct_ok(): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") + input_source.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=range(5)) + assert isinstance(input_source.file_object, io.BytesIO) + + +@pytest.mark.parametrize("numb_pages", [1, 2, 3]) +def test_process_pdf_cut_n_pages(numb_pages: int): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") + assert input_source.page_count == 12 + input_source.process_pdf( + behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0, -2, -1][:numb_pages] + ) + assert input_source.page_count == numb_pages + _assert_page_options(input_source, numb_pages) + + +@pytest.mark.parametrize("numb_pages", [1, 2, 3]) +def test_apply_pages_pdf_cut_n_pages(numb_pages: int): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") + assert input_source.page_count == 12 + input_source.apply_page_options( + PageOptions(on_min_pages=2, page_indexes=[0, -2, -1][:numb_pages]) + ) + assert input_source.page_count == numb_pages + _assert_page_options(input_source, numb_pages) + + +def test_pdf_keep_5_first_pages(): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") + assert input_source.page_count == 12 + input_source.process_pdf( + behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0, 1, 2, 3, 4] + ) + assert input_source.page_count == 5 + + +def test_pdf_keep_invalid_pages(): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") + assert input_source.page_count == 12 + input_source.process_pdf( + behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0, 1, 17] + ) + assert input_source.page_count == 2 + + +def test_pdf_remove_5_last_pages(): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") + assert input_source.is_pdf() is True + input_source.process_pdf( + behavior=REMOVE, on_min_pages=2, page_indexes=[-5, -4, -3, -2, -1] + ) + assert input_source.page_count == 7 + + +def test_pdf_remove_5_first_pages(): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") + assert input_source.is_pdf() is True + input_source.process_pdf( + behavior=REMOVE, on_min_pages=2, page_indexes=list(range(5)) + ) + assert input_source.page_count == 7 + + +def test_pdf_remove_invalid_pages(): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") + assert input_source.is_pdf() is True + input_source.process_pdf(behavior=REMOVE, on_min_pages=2, page_indexes=[16]) + assert input_source.page_count == 12 + + +def test_pdf_keep_no_pages(): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") + assert input_source.is_pdf() is True + # empty page indexes + with pytest.raises(RuntimeError): + input_source.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[]) + # all invalid pages + with pytest.raises(RuntimeError): + input_source.process_pdf( + behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[16, 17] + ) + + +def test_pdf_remove_all_pages(): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") + assert input_source.is_pdf() is True + with pytest.raises(RuntimeError): + input_source.process_pdf( + behavior=REMOVE, on_min_pages=2, page_indexes=list(range(15)) + ) + + +def test_pdf_input_from_file(): + with open(FILE_TYPES_DIR / "pdf" / "multipage.pdf", "rb") as fp: + input_source = FileInput(fp) + assert input_source.is_pdf() is True + input_source.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) + assert input_source.page_count == 1 + + +def test_pdf_input_from_base64(): + with open(PRODUCT_DATA_DIR / "invoices" / "invoice_10p.txt", "rt") as fp: + input_source = Base64Input(fp.read(), filename="invoice_10p.pdf") + assert input_source.is_pdf() is True + input_source.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) + assert input_source.page_count == 1 + + +def test_pdf_input_from_bytes(): + with open(PRODUCT_DATA_DIR / "invoices" / "invoice_10p.pdf", "rb") as fp: + input_source = BytesInput(fp.read(), filename="invoice_10p.pdf") + assert input_source.is_pdf() is True + input_source.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) + assert input_source.page_count == 1 + + +def test_pdf_blank_check(): + with pytest.raises(MindeeError): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank.pdf") + input_source.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) + + with pytest.raises(MindeeError): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") + input_source.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) + + input_not_blank = PathInput(FILE_TYPES_DIR / "pdf" / "not_blank_image_only.pdf") + assert input_not_blank.page_count == 1 diff --git a/tests/input/test_fix_pdf.py b/tests/input/test_fix_pdf.py new file mode 100644 index 00000000..3b87d561 --- /dev/null +++ b/tests/input/test_fix_pdf.py @@ -0,0 +1,22 @@ +import pytest + +from mindee import PathInput +from mindee.error import MimeTypeError +from tests.utils import FILE_TYPES_DIR + + +def test_broken_unfixable_pdf(): + with pytest.raises(MimeTypeError): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "broken_unfixable.pdf") + input_source.fix_pdf() + + +def test_broken_fixable_pdf(): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "broken_fixable.pdf") + input_source.fix_pdf() + assert input_source.page_count == 1 + + +def test_broken_fixable_invoice_pdf(): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "broken_invoice.pdf") + input_source.fix_pdf() diff --git a/tests/input/test_inputs.py b/tests/input/test_inputs.py new file mode 100644 index 00000000..b9e38e1a --- /dev/null +++ b/tests/input/test_inputs.py @@ -0,0 +1,96 @@ +import io + +import pytest + +from mindee.error.mimetype_error import MimeTypeError +from mindee.error.mindee_error import MindeeSourceError +from mindee.input.sources import ( + Base64Input, + BytesInput, + FileInput, + LocalInputSource, + PathInput, + UrlInputSource, +) +from tests.utils import FILE_TYPES_DIR + + +def test_pdf_read_contents(): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") + contents = input_source.read_contents(close_file=False) + assert contents[0] == "multipage.pdf" + assert isinstance(contents[1], bytes) + assert not input_source.file_object.closed + + input_source.read_contents(close_file=True) + assert input_source.file_object.closed + + +@pytest.mark.parametrize( + ("filename", "page_count"), + ( + ("multipage_cut-1.pdf", 1), + ("multipage_cut-3.pdf", 3), + ("multipage.pdf", 12), + ), +) +def test_pdf_input_from_path(filename, page_count): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / filename) + assert input_source.file_mimetype == "application/pdf" + assert input_source.is_pdf() is True + assert input_source.page_count == page_count + assert isinstance(input_source.file_object, io.BufferedReader) + + +def test_pdf_input_from_url(): + with pytest.raises(MindeeSourceError): + UrlInputSource(url="http://example.com/invoice.pdf") + + +TEST_IMAGES = ( + ("receipt.tif", "image/tiff"), + ("receipt.tiff", "image/tiff"), + ("receipt.jpg", "image/jpeg"), + # invalid extensions won't be detected properly + # ("receipt.jpga", "image/jpeg"), + ("receipt.png", "image/png"), + ("receipt.heic", "image/heic"), +) + + +def _assert_image(input_source: LocalInputSource, mimetype: str) -> None: + assert input_source.file_mimetype == mimetype + assert input_source.is_pdf() is False + assert input_source.page_count == 1 + assert isinstance(input_source.file_object.read(15), bytes) + + +@pytest.mark.parametrize(("filename", "mimetype"), TEST_IMAGES) +def test_image_input_from_path(filename, mimetype): + input_source = PathInput(FILE_TYPES_DIR / filename) + _assert_image(input_source, mimetype) + + +@pytest.mark.parametrize(("filename", "mimetype"), TEST_IMAGES) +def test_image_input_from_file(filename, mimetype): + with open(FILE_TYPES_DIR / filename, "rb") as fp: + input_source = FileInput(fp) + _assert_image(input_source, mimetype) + + +@pytest.mark.parametrize(("filename", "mimetype"), TEST_IMAGES) +def test_image_input_from_bytes(filename, mimetype): + file_bytes = open(FILE_TYPES_DIR / filename, "rb").read() + input_source = BytesInput(file_bytes, filename=filename) + _assert_image(input_source, mimetype) + + +def test_image_input_from_base64(): + base64_input = open(FILE_TYPES_DIR / "receipt.txt", "r").read() + input_source = Base64Input(base64_input, filename="receipt.jpg") + _assert_image(input_source, mimetype="image/jpeg") + + +def test_txt_input_from_path(): + with pytest.raises(MimeTypeError): + PathInput(FILE_TYPES_DIR / "receipt.txt") diff --git a/tests/mindee_http/test_error.py b/tests/mindee_http/test_error.py index 5e2f879e..7ad3c3b7 100644 --- a/tests/mindee_http/test_error.py +++ b/tests/mindee_http/test_error.py @@ -10,7 +10,7 @@ handle_error, ) from mindee.input.sources.path_input import PathInput -from tests.test_inputs import FILE_TYPES_DIR +from tests.input.test_inputs import FILE_TYPES_DIR from tests.utils import clear_envvars, dummy_envvars ERROR_DATA_DIR = Path("./tests/data/errors") diff --git a/tests/product/__init__.py b/tests/product/__init__.py index 3b1a5405..5119c0da 100644 --- a/tests/product/__init__.py +++ b/tests/product/__init__.py @@ -1,8 +1,3 @@ -from pathlib import Path - -PRODUCT_DATA_DIR = Path("./tests/data/products/") - - def get_version(rst_str: str) -> str: """Replaces the version of a created object to avoid errors during tests.""" diff --git a/tests/product/barcode_reader/test_barcode_reader_v1.py b/tests/product/barcode_reader/test_barcode_reader_v1.py index 06195188..6751dbaa 100644 --- a/tests/product/barcode_reader/test_barcode_reader_v1.py +++ b/tests/product/barcode_reader/test_barcode_reader_v1.py @@ -8,7 +8,7 @@ from mindee.product.barcode_reader.barcode_reader_v1_document import ( BarcodeReaderV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "barcode_reader" / "response_v1" diff --git a/tests/product/barcode_reader/test_barcode_reader_v1_regression.py b/tests/product/barcode_reader/test_barcode_reader_v1_regression.py index 17339f04..dc162613 100644 --- a/tests/product/barcode_reader/test_barcode_reader_v1_regression.py +++ b/tests/product/barcode_reader/test_barcode_reader_v1_regression.py @@ -2,7 +2,8 @@ from mindee.client import Client from mindee.product.barcode_reader.barcode_reader_v1 import BarcodeReaderV1 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/bill_of_lading/test_bill_of_lading_v1.py b/tests/product/bill_of_lading/test_bill_of_lading_v1.py index 3283824c..b76d9c36 100644 --- a/tests/product/bill_of_lading/test_bill_of_lading_v1.py +++ b/tests/product/bill_of_lading/test_bill_of_lading_v1.py @@ -8,7 +8,7 @@ from mindee.product.bill_of_lading.bill_of_lading_v1_document import ( BillOfLadingV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "bill_of_lading" / "response_v1" diff --git a/tests/product/business_card/test_business_card_v1.py b/tests/product/business_card/test_business_card_v1.py index a844d450..2b8d79ec 100644 --- a/tests/product/business_card/test_business_card_v1.py +++ b/tests/product/business_card/test_business_card_v1.py @@ -8,7 +8,7 @@ from mindee.product.business_card.business_card_v1_document import ( BusinessCardV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "business_card" / "response_v1" diff --git a/tests/product/cropper/test_cropper_v1.py b/tests/product/cropper/test_cropper_v1.py index 510c757c..c6e27ece 100644 --- a/tests/product/cropper/test_cropper_v1.py +++ b/tests/product/cropper/test_cropper_v1.py @@ -11,7 +11,7 @@ from mindee.product.cropper.cropper_v1_page import ( CropperV1Page, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "cropper" / "response_v1" diff --git a/tests/product/cropper/test_cropper_v1_regression.py b/tests/product/cropper/test_cropper_v1_regression.py index f68152d2..7c37795c 100644 --- a/tests/product/cropper/test_cropper_v1_regression.py +++ b/tests/product/cropper/test_cropper_v1_regression.py @@ -2,7 +2,8 @@ from mindee.client import Client from mindee.product.cropper.cropper_v1 import CropperV1 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/custom/test_custom_v1.py b/tests/product/custom/test_custom_v1.py index 28ce8673..bfb4f04a 100644 --- a/tests/product/custom/test_custom_v1.py +++ b/tests/product/custom/test_custom_v1.py @@ -10,7 +10,7 @@ from mindee.product.custom.custom_v1 import CustomV1 from mindee.product.custom.custom_v1_document import CustomV1Document from mindee.product.custom.custom_v1_page import CustomV1Page -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR @pytest.fixture diff --git a/tests/product/custom/test_custom_v1_v2.py b/tests/product/custom/test_custom_v1_v2.py index f640be81..2db301e8 100644 --- a/tests/product/custom/test_custom_v1_v2.py +++ b/tests/product/custom/test_custom_v1_v2.py @@ -10,7 +10,7 @@ from mindee.product.custom.custom_v1 import CustomV1 from mindee.product.custom.custom_v1_document import CustomV1Document from mindee.product.custom.custom_v1_page import CustomV1Page -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR @pytest.fixture diff --git a/tests/product/delivery_note/test_delivery_note_v1.py b/tests/product/delivery_note/test_delivery_note_v1.py index 0c4fc6ed..cff46a05 100644 --- a/tests/product/delivery_note/test_delivery_note_v1.py +++ b/tests/product/delivery_note/test_delivery_note_v1.py @@ -8,7 +8,7 @@ from mindee.product.delivery_note.delivery_note_v1_document import ( DeliveryNoteV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "delivery_notes" / "response_v1" diff --git a/tests/product/driver_license/test_driver_license_v1.py b/tests/product/driver_license/test_driver_license_v1.py index 94747360..f7583eaa 100644 --- a/tests/product/driver_license/test_driver_license_v1.py +++ b/tests/product/driver_license/test_driver_license_v1.py @@ -8,7 +8,7 @@ from mindee.product.driver_license.driver_license_v1_document import ( DriverLicenseV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "driver_license" / "response_v1" diff --git a/tests/product/financial_document/test_financial_document_v1.py b/tests/product/financial_document/test_financial_document_v1.py index 351c633f..07cc01e4 100644 --- a/tests/product/financial_document/test_financial_document_v1.py +++ b/tests/product/financial_document/test_financial_document_v1.py @@ -8,7 +8,7 @@ from mindee.product.financial_document.financial_document_v1_document import ( FinancialDocumentV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "financial_document" / "response_v1" diff --git a/tests/product/financial_document/test_financial_document_v1_regression.py b/tests/product/financial_document/test_financial_document_v1_regression.py index 36805af4..ededb12a 100644 --- a/tests/product/financial_document/test_financial_document_v1_regression.py +++ b/tests/product/financial_document/test_financial_document_v1_regression.py @@ -2,7 +2,8 @@ from mindee.client import Client from mindee.product.financial_document.financial_document_v1 import FinancialDocumentV1 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/fr/bank_account_details/test_bank_account_details_v1.py b/tests/product/fr/bank_account_details/test_bank_account_details_v1.py index 758a0b5d..b1062e5f 100644 --- a/tests/product/fr/bank_account_details/test_bank_account_details_v1.py +++ b/tests/product/fr/bank_account_details/test_bank_account_details_v1.py @@ -10,7 +10,7 @@ from mindee.product.fr.bank_account_details.bank_account_details_v1_document import ( BankAccountDetailsV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "bank_account_details" / "response_v1" diff --git a/tests/product/fr/bank_account_details/test_bank_account_details_v1_regression.py b/tests/product/fr/bank_account_details/test_bank_account_details_v1_regression.py index 8ab84133..ee9505c5 100644 --- a/tests/product/fr/bank_account_details/test_bank_account_details_v1_regression.py +++ b/tests/product/fr/bank_account_details/test_bank_account_details_v1_regression.py @@ -4,7 +4,8 @@ from mindee.product.fr.bank_account_details.bank_account_details_v1 import ( BankAccountDetailsV1, ) -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/fr/bank_account_details/test_bank_account_details_v2.py b/tests/product/fr/bank_account_details/test_bank_account_details_v2.py index e9e86692..c3bfde0b 100644 --- a/tests/product/fr/bank_account_details/test_bank_account_details_v2.py +++ b/tests/product/fr/bank_account_details/test_bank_account_details_v2.py @@ -10,7 +10,7 @@ from mindee.product.fr.bank_account_details.bank_account_details_v2_document import ( BankAccountDetailsV2Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "bank_account_details" / "response_v2" diff --git a/tests/product/fr/bank_account_details/test_bank_account_details_v2_regression.py b/tests/product/fr/bank_account_details/test_bank_account_details_v2_regression.py index 4f0aee5a..a62d0729 100644 --- a/tests/product/fr/bank_account_details/test_bank_account_details_v2_regression.py +++ b/tests/product/fr/bank_account_details/test_bank_account_details_v2_regression.py @@ -4,7 +4,8 @@ from mindee.product.fr.bank_account_details.bank_account_details_v2 import ( BankAccountDetailsV2, ) -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/fr/carte_grise/test_carte_grise_v1.py b/tests/product/fr/carte_grise/test_carte_grise_v1.py index 009b701e..a3f5eb1a 100644 --- a/tests/product/fr/carte_grise/test_carte_grise_v1.py +++ b/tests/product/fr/carte_grise/test_carte_grise_v1.py @@ -8,7 +8,7 @@ from mindee.product.fr.carte_grise.carte_grise_v1_document import ( CarteGriseV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "carte_grise" / "response_v1" diff --git a/tests/product/fr/carte_grise/test_carte_grise_v1_regression.py b/tests/product/fr/carte_grise/test_carte_grise_v1_regression.py index 3cf794a0..27c721a0 100644 --- a/tests/product/fr/carte_grise/test_carte_grise_v1_regression.py +++ b/tests/product/fr/carte_grise/test_carte_grise_v1_regression.py @@ -2,7 +2,8 @@ from mindee.client import Client from mindee.product.fr.carte_grise.carte_grise_v1 import CarteGriseV1 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/fr/energy_bill/test_energy_bill_v1.py b/tests/product/fr/energy_bill/test_energy_bill_v1.py index 259d8d59..5f36ff12 100644 --- a/tests/product/fr/energy_bill/test_energy_bill_v1.py +++ b/tests/product/fr/energy_bill/test_energy_bill_v1.py @@ -8,7 +8,7 @@ from mindee.product.fr.energy_bill.energy_bill_v1_document import ( EnergyBillV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "energy_bill_fra" / "response_v1" diff --git a/tests/product/fr/health_card/test_health_card_v1.py b/tests/product/fr/health_card/test_health_card_v1.py index e1dc3302..4f434ca4 100644 --- a/tests/product/fr/health_card/test_health_card_v1.py +++ b/tests/product/fr/health_card/test_health_card_v1.py @@ -8,7 +8,7 @@ from mindee.product.fr.health_card.health_card_v1_document import ( HealthCardV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "french_healthcard" / "response_v1" diff --git a/tests/product/fr/id_card/test_id_card_v1.py b/tests/product/fr/id_card/test_id_card_v1.py index fc363b3e..5306d996 100644 --- a/tests/product/fr/id_card/test_id_card_v1.py +++ b/tests/product/fr/id_card/test_id_card_v1.py @@ -11,7 +11,7 @@ from mindee.product.fr.id_card.id_card_v1_page import ( IdCardV1Page, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "idcard_fr" / "response_v1" diff --git a/tests/product/fr/id_card/test_id_card_v1_regression.py b/tests/product/fr/id_card/test_id_card_v1_regression.py index ac33934c..a75d4ea1 100644 --- a/tests/product/fr/id_card/test_id_card_v1_regression.py +++ b/tests/product/fr/id_card/test_id_card_v1_regression.py @@ -2,7 +2,8 @@ from mindee.client import Client from mindee.product.fr.id_card.id_card_v1 import IdCardV1 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/fr/id_card/test_id_card_v2.py b/tests/product/fr/id_card/test_id_card_v2.py index 8d456a3b..0ff30079 100644 --- a/tests/product/fr/id_card/test_id_card_v2.py +++ b/tests/product/fr/id_card/test_id_card_v2.py @@ -11,7 +11,7 @@ from mindee.product.fr.id_card.id_card_v2_page import ( IdCardV2Page, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "idcard_fr" / "response_v2" diff --git a/tests/product/fr/id_card/test_id_card_v2_regression.py b/tests/product/fr/id_card/test_id_card_v2_regression.py index 2c11ebaf..f8835de8 100644 --- a/tests/product/fr/id_card/test_id_card_v2_regression.py +++ b/tests/product/fr/id_card/test_id_card_v2_regression.py @@ -2,7 +2,8 @@ from mindee.client import Client from mindee.product.fr.id_card.id_card_v2 import IdCardV2 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/fr/payslip/test_payslip_v2.py b/tests/product/fr/payslip/test_payslip_v2.py index 77f4580f..4429f41a 100644 --- a/tests/product/fr/payslip/test_payslip_v2.py +++ b/tests/product/fr/payslip/test_payslip_v2.py @@ -8,7 +8,7 @@ from mindee.product.fr.payslip.payslip_v2_document import ( PayslipV2Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "payslip_fra" / "response_v2" diff --git a/tests/product/fr/payslip/test_payslip_v3.py b/tests/product/fr/payslip/test_payslip_v3.py index cdc426d8..e3f7a723 100644 --- a/tests/product/fr/payslip/test_payslip_v3.py +++ b/tests/product/fr/payslip/test_payslip_v3.py @@ -8,7 +8,7 @@ from mindee.product.fr.payslip.payslip_v3_document import ( PayslipV3Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "payslip_fra" / "response_v3" diff --git a/tests/product/generated/test_generated_v1.py b/tests/product/generated/test_generated_v1.py index 5caadbab..fee21b7f 100644 --- a/tests/product/generated/test_generated_v1.py +++ b/tests/product/generated/test_generated_v1.py @@ -11,7 +11,7 @@ from mindee.product.generated.generated_v1 import GeneratedV1 from mindee.product.generated.generated_v1_document import GeneratedV1Document from mindee.product.generated.generated_v1_page import GeneratedV1Page -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR @pytest.fixture diff --git a/tests/product/ind/indian_passport/test_indian_passport_v1.py b/tests/product/ind/indian_passport/test_indian_passport_v1.py index c87fc176..4f2ebd69 100644 --- a/tests/product/ind/indian_passport/test_indian_passport_v1.py +++ b/tests/product/ind/indian_passport/test_indian_passport_v1.py @@ -8,7 +8,7 @@ from mindee.product.ind.indian_passport.indian_passport_v1_document import ( IndianPassportV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "ind_passport" / "response_v1" diff --git a/tests/product/international_id/test_international_id_v2.py b/tests/product/international_id/test_international_id_v2.py index 9b4b839e..5756360a 100644 --- a/tests/product/international_id/test_international_id_v2.py +++ b/tests/product/international_id/test_international_id_v2.py @@ -8,7 +8,7 @@ from mindee.product.international_id.international_id_v2_document import ( InternationalIdV2Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "international_id" / "response_v2" diff --git a/tests/product/invoice/test_invoice_v4.py b/tests/product/invoice/test_invoice_v4.py index 3d373961..95bac0dd 100644 --- a/tests/product/invoice/test_invoice_v4.py +++ b/tests/product/invoice/test_invoice_v4.py @@ -8,7 +8,7 @@ from mindee.product.invoice.invoice_v4_document import ( InvoiceV4Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "invoices" / "response_v4" diff --git a/tests/product/invoice/test_invoice_v4_regression.py b/tests/product/invoice/test_invoice_v4_regression.py index 212fd901..cc24a98d 100644 --- a/tests/product/invoice/test_invoice_v4_regression.py +++ b/tests/product/invoice/test_invoice_v4_regression.py @@ -2,7 +2,8 @@ from mindee.client import Client from mindee.product.invoice.invoice_v4 import InvoiceV4 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/invoice_splitter/test_invoice_splitter_v1.py b/tests/product/invoice_splitter/test_invoice_splitter_v1.py index 78c63607..03bd023b 100644 --- a/tests/product/invoice_splitter/test_invoice_splitter_v1.py +++ b/tests/product/invoice_splitter/test_invoice_splitter_v1.py @@ -8,7 +8,7 @@ from mindee.product.invoice_splitter.invoice_splitter_v1_document import ( InvoiceSplitterV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "invoice_splitter" / "response_v1" diff --git a/tests/product/invoice_splitter/test_invoice_splitter_v1_regression.py b/tests/product/invoice_splitter/test_invoice_splitter_v1_regression.py index ae09c77f..5782f13d 100644 --- a/tests/product/invoice_splitter/test_invoice_splitter_v1_regression.py +++ b/tests/product/invoice_splitter/test_invoice_splitter_v1_regression.py @@ -2,7 +2,8 @@ from mindee.client import Client from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/material_certificate/test_material_certificate_v1.py b/tests/product/material_certificate/test_material_certificate_v1.py index 732fcb9b..b3c1c440 100644 --- a/tests/product/material_certificate/test_material_certificate_v1.py +++ b/tests/product/material_certificate/test_material_certificate_v1.py @@ -10,7 +10,7 @@ from mindee.product.material_certificate.material_certificate_v1_document import ( MaterialCertificateV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "material_certificate" / "response_v1" diff --git a/tests/product/multi_receipts_detector/test_multi_receipts_detector_v1.py b/tests/product/multi_receipts_detector/test_multi_receipts_detector_v1.py index 6f97b18b..fde399a6 100644 --- a/tests/product/multi_receipts_detector/test_multi_receipts_detector_v1.py +++ b/tests/product/multi_receipts_detector/test_multi_receipts_detector_v1.py @@ -10,7 +10,7 @@ from mindee.product.multi_receipts_detector.multi_receipts_detector_v1_document import ( MultiReceiptsDetectorV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "multi_receipts_detector" / "response_v1" diff --git a/tests/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py b/tests/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py index 7c6c3ff5..87941071 100644 --- a/tests/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py +++ b/tests/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py @@ -4,7 +4,8 @@ from mindee.product.multi_receipts_detector.multi_receipts_detector_v1 import ( MultiReceiptsDetectorV1, ) -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/nutrition_facts_label/test_nutrition_facts_label_v1.py b/tests/product/nutrition_facts_label/test_nutrition_facts_label_v1.py index 16bb29f2..63deca5c 100644 --- a/tests/product/nutrition_facts_label/test_nutrition_facts_label_v1.py +++ b/tests/product/nutrition_facts_label/test_nutrition_facts_label_v1.py @@ -10,7 +10,7 @@ from mindee.product.nutrition_facts_label.nutrition_facts_label_v1_document import ( NutritionFactsLabelV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "nutrition_facts" / "response_v1" diff --git a/tests/product/passport/test_passport_v1.py b/tests/product/passport/test_passport_v1.py index 3bbb2ea3..ede170e1 100644 --- a/tests/product/passport/test_passport_v1.py +++ b/tests/product/passport/test_passport_v1.py @@ -8,7 +8,7 @@ from mindee.product.passport.passport_v1_document import ( PassportV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "passport" / "response_v1" diff --git a/tests/product/passport/test_passport_v1_regression.py b/tests/product/passport/test_passport_v1_regression.py index 13869838..98c7e708 100644 --- a/tests/product/passport/test_passport_v1_regression.py +++ b/tests/product/passport/test_passport_v1_regression.py @@ -2,7 +2,8 @@ from mindee.client import Client from mindee.product.passport.passport_v1 import PassportV1 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/receipt/test_receipt_v5.py b/tests/product/receipt/test_receipt_v5.py index 2c57d903..2dd98942 100644 --- a/tests/product/receipt/test_receipt_v5.py +++ b/tests/product/receipt/test_receipt_v5.py @@ -8,7 +8,7 @@ from mindee.product.receipt.receipt_v5_document import ( ReceiptV5Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "expense_receipts" / "response_v5" diff --git a/tests/product/receipt/test_receipt_v5_regression.py b/tests/product/receipt/test_receipt_v5_regression.py index 83444883..c10049e3 100644 --- a/tests/product/receipt/test_receipt_v5_regression.py +++ b/tests/product/receipt/test_receipt_v5_regression.py @@ -2,7 +2,8 @@ from mindee.client import Client from mindee.product.receipt.receipt_v5 import ReceiptV5 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/resume/test_resume_v1.py b/tests/product/resume/test_resume_v1.py index 66541882..2b141f70 100644 --- a/tests/product/resume/test_resume_v1.py +++ b/tests/product/resume/test_resume_v1.py @@ -8,7 +8,7 @@ from mindee.product.resume.resume_v1_document import ( ResumeV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "resume" / "response_v1" diff --git a/tests/product/us/bank_check/test_bank_check_v1.py b/tests/product/us/bank_check/test_bank_check_v1.py index 755fe6c8..b4e64764 100644 --- a/tests/product/us/bank_check/test_bank_check_v1.py +++ b/tests/product/us/bank_check/test_bank_check_v1.py @@ -11,7 +11,7 @@ from mindee.product.us.bank_check.bank_check_v1_page import ( BankCheckV1Page, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "bank_check" / "response_v1" diff --git a/tests/product/us/bank_check/test_bank_check_v1_regression.py b/tests/product/us/bank_check/test_bank_check_v1_regression.py index 57b89d9b..2f232b2c 100644 --- a/tests/product/us/bank_check/test_bank_check_v1_regression.py +++ b/tests/product/us/bank_check/test_bank_check_v1_regression.py @@ -2,7 +2,8 @@ from mindee.client import Client from mindee.product.us.bank_check.bank_check_v1 import BankCheckV1 -from tests.product import PRODUCT_DATA_DIR, get_id, get_version +from tests.product import get_id, get_version +from tests.utils import PRODUCT_DATA_DIR @pytest.mark.regression diff --git a/tests/product/us/healthcare_card/test_healthcare_card_v1.py b/tests/product/us/healthcare_card/test_healthcare_card_v1.py index 9b179007..0ba6752c 100644 --- a/tests/product/us/healthcare_card/test_healthcare_card_v1.py +++ b/tests/product/us/healthcare_card/test_healthcare_card_v1.py @@ -8,7 +8,7 @@ from mindee.product.us.healthcare_card.healthcare_card_v1_document import ( HealthcareCardV1Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "us_healthcare_cards" / "response_v1" diff --git a/tests/product/us/us_mail/test_us_mail_v3.py b/tests/product/us/us_mail/test_us_mail_v3.py index aefa85ff..e6e2366e 100644 --- a/tests/product/us/us_mail/test_us_mail_v3.py +++ b/tests/product/us/us_mail/test_us_mail_v3.py @@ -8,7 +8,7 @@ from mindee.product.us.us_mail.us_mail_v3_document import ( UsMailV3Document, ) -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR RESPONSE_DIR = PRODUCT_DATA_DIR / "us_mail" / "response_v3" diff --git a/tests/test_client.py b/tests/test_client.py index 1f02efd5..3dd5f7b7 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -13,8 +13,7 @@ from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 from mindee.product.receipt.receipt_v5 import ReceiptV5 from tests.mindee_http.test_error import ERROR_DATA_DIR -from tests.test_inputs import FILE_TYPES_DIR, PRODUCT_DATA_DIR -from tests.utils import clear_envvars, dummy_envvars +from tests.utils import FILE_TYPES_DIR, PRODUCT_DATA_DIR, clear_envvars, dummy_envvars @pytest.fixture @@ -102,7 +101,7 @@ def test_cut_options(dummy_client: Client): ) except MindeeHTTPError: pass - assert input_doc.count_doc_pages() == 5 + assert input_doc.page_count == 5 input_doc.close() diff --git a/tests/test_client_v2.py b/tests/test_client_v2.py index f70165bc..a9e69b27 100644 --- a/tests/test_client_v2.py +++ b/tests/test_client_v2.py @@ -10,8 +10,7 @@ from mindee.parsing.v2.inference import Inference from mindee.parsing.v2.job import Job from mindee.parsing.v2.job_response import JobResponse -from tests.test_inputs import FILE_TYPES_DIR, V2_DATA_DIR -from tests.utils import dummy_envvars +from tests.utils import FILE_TYPES_DIR, V2_DATA_DIR, dummy_envvars @pytest.fixture diff --git a/tests/test_client_v2_integration.py b/tests/test_client_v2_integration.py index b88c1d72..26d60ff3 100644 --- a/tests/test_client_v2_integration.py +++ b/tests/test_client_v2_integration.py @@ -8,7 +8,7 @@ from mindee import ClientV2, InferenceParameters, PathInput, UrlInputSource from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2 from mindee.parsing.v2.inference_response import InferenceResponse -from tests.test_inputs import FILE_TYPES_DIR, PRODUCT_DATA_DIR +from tests.utils import FILE_TYPES_DIR, PRODUCT_DATA_DIR @pytest.fixture(scope="session") diff --git a/tests/test_inputs.py b/tests/test_inputs.py deleted file mode 100644 index 5f901bfc..00000000 --- a/tests/test_inputs.py +++ /dev/null @@ -1,229 +0,0 @@ -import io -from pathlib import Path - -import pypdfium2 as pdfium -import pytest - -from mindee.error.mimetype_error import MimeTypeError -from mindee.error.mindee_error import MindeeError, MindeeSourceError -from mindee.input.page_options import KEEP_ONLY, REMOVE, PageOptions -from mindee.input.sources import ( - Base64Input, - BytesInput, - FileInput, - PathInput, - UrlInputSource, -) -from tests.product import PRODUCT_DATA_DIR - -FILE_TYPES_DIR = Path("./tests/data/file_types") -V2_DATA_DIR = Path("./tests/data/v2") - -# -# PDF -# - - -def test_pdf_reconstruct_ok(): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - input_obj.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=range(5)) - assert isinstance(input_obj.file_object, io.BytesIO) - - -def test_pdf_read_contents(): - input_doc = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - contents = input_doc.read_contents(close_file=False) - assert contents[0] == "multipage.pdf" - assert isinstance(contents[1], bytes) - assert not input_doc.file_object.closed - - input_doc.read_contents(close_file=True) - assert input_doc.file_object.closed - - -def test_pdf_reconstruct_no_cut(): - input_file = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - assert input_file.count_doc_pages() == 12 - assert isinstance(input_file.file_object, io.BufferedReader) - - -def _assert_pdf_options(input_obj, numb_pages): - assert input_obj.is_pdf() is True - # Currently the least verbose way of comparing pages with pypdfium2 - # I.e. each page is read & rendered as a rasterized image. These images are then compared as raw byte sequences. - cut_pdf = pdfium.PdfDocument(input_obj.file_object) - pdf = pdfium.PdfDocument(FILE_TYPES_DIR / "pdf" / f"multipage_cut-{numb_pages}.pdf") - for idx in range(len(pdf)): - pdf_page = pdf.get_page(idx) - pdf_page_render = pdfium.PdfPage.render(pdf_page) - cut_pdf_page = cut_pdf.get_page(idx) - cut_pdf_page_render = pdfium.PdfPage.render(cut_pdf_page) - - assert bytes(pdf_page_render.buffer) == bytes(cut_pdf_page_render.buffer) - cut_pdf.close() - pdf.close() - - -@pytest.mark.parametrize("numb_pages", [1, 2, 3]) -def test_process_pdf_cut_n_pages(numb_pages: int): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - input_obj.process_pdf( - behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0, -2, -1][:numb_pages] - ) - assert input_obj.count_doc_pages() == numb_pages - _assert_pdf_options(input_obj, numb_pages) - - -@pytest.mark.parametrize("numb_pages", [1, 2, 3]) -def test_apply_pages_pdf_cut_n_pages(numb_pages: int): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - input_obj.apply_page_options( - PageOptions(on_min_pages=2, page_indexes=[0, -2, -1][:numb_pages]) - ) - assert input_obj.count_doc_pages() == numb_pages - _assert_pdf_options(input_obj, numb_pages) - - -def test_pdf_keep_5_first_pages(): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - assert input_obj.is_pdf() is True - input_obj.process_pdf( - behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0, 1, 2, 3, 4] - ) - assert input_obj.count_doc_pages() == 5 - - -def test_pdf_keep_invalid_pages(): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - assert input_obj.is_pdf() is True - input_obj.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0, 1, 17]) - assert input_obj.count_doc_pages() == 2 - - -def test_pdf_remove_5_last_pages(): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - assert input_obj.is_pdf() is True - input_obj.process_pdf( - behavior=REMOVE, on_min_pages=2, page_indexes=[-5, -4, -3, -2, -1] - ) - assert input_obj.count_doc_pages() == 7 - - -def test_pdf_remove_5_first_pages(): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - assert input_obj.is_pdf() is True - input_obj.process_pdf(behavior=REMOVE, on_min_pages=2, page_indexes=list(range(5))) - assert input_obj.count_doc_pages() == 7 - - -def test_pdf_remove_invalid_pages(): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - assert input_obj.is_pdf() is True - input_obj.process_pdf(behavior=REMOVE, on_min_pages=2, page_indexes=[16]) - assert input_obj.count_doc_pages() == 12 - - -def test_pdf_keep_no_pages(): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - assert input_obj.is_pdf() is True - # empty page indexes - with pytest.raises(RuntimeError): - input_obj.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[]) - # all invalid pages - with pytest.raises(RuntimeError): - input_obj.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[16, 17]) - - -def test_pdf_remove_all_pages(): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf") - assert input_obj.is_pdf() is True - with pytest.raises(RuntimeError): - input_obj.process_pdf( - behavior=REMOVE, on_min_pages=2, page_indexes=list(range(15)) - ) - - -def test_pdf_input_from_file(): - with open(FILE_TYPES_DIR / "pdf" / "multipage.pdf", "rb") as fp: - input_obj = FileInput(fp) - assert input_obj.is_pdf() is True - input_obj.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) - assert input_obj.count_doc_pages() == 1 - - -def test_pdf_input_from_base64(): - with open(PRODUCT_DATA_DIR / "invoices" / "invoice_10p.txt", "rt") as fp: - input_obj = Base64Input(fp.read(), filename="invoice_10p.pdf") - assert input_obj.is_pdf() is True - input_obj.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) - assert input_obj.count_doc_pages() == 1 - - -def test_pdf_input_from_bytes(): - with open(PRODUCT_DATA_DIR / "invoices" / "invoice_10p.pdf", "rb") as fp: - input_obj = BytesInput(fp.read(), filename="invoice_10p.pdf") - assert input_obj.is_pdf() is True - input_obj.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) - assert input_obj.count_doc_pages() == 1 - - -def test_pdf_input_from_url(): - with pytest.raises(MindeeSourceError): - UrlInputSource(url="http://example.com/invoice.pdf") - - -def test_pdf_blank_check(): - with pytest.raises(MindeeError): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "blank.pdf") - input_obj.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) - - with pytest.raises(MindeeError): - input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") - input_obj.process_pdf(behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0]) - - input_not_blank = PathInput(FILE_TYPES_DIR / "pdf" / "not_blank_image_only.pdf") - assert input_not_blank.count_doc_pages() == 1 - - -# -# Broken PDFS fixing -# - - -def test_broken_unfixable_pdf(): - with pytest.raises(MimeTypeError): - input_doc = PathInput(FILE_TYPES_DIR / "pdf" / "broken_unfixable.pdf") - input_doc.fix_pdf() - - -def test_broken_fixable_pdf(): - input_doc = PathInput(FILE_TYPES_DIR / "pdf" / "broken_fixable.pdf") - input_doc.fix_pdf() - - -def test_broken_fixable_invoice_pdf(): - input_doc = PathInput(FILE_TYPES_DIR / "pdf" / "broken_invoice.pdf") - input_doc.fix_pdf() - - -# -# Images -# - - -def test_tif_input_from_path(): - input_obj_1 = PathInput(FILE_TYPES_DIR / "receipt.tif") - assert input_obj_1.file_mimetype == "image/tiff" - - input_obj_2 = PathInput(FILE_TYPES_DIR / "receipt.tiff") - assert input_obj_2.file_mimetype == "image/tiff" - - -def test_heic_input_from_path(): - input_obj_1 = PathInput(FILE_TYPES_DIR / "receipt.heic") - assert input_obj_1.file_mimetype == "image/heic" - - -def test_txt_input_from_path(): - with pytest.raises(MimeTypeError): - PathInput(FILE_TYPES_DIR / "receipt.txt") diff --git a/tests/utils.py b/tests/utils.py index 6e1af706..b550e1ee 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -7,6 +7,13 @@ REQUEST_TIMEOUT_ENV_NAME, ) +DATA_DIR = Path("./tests/data/") + +EXTRAS_DIR = DATA_DIR / "extras" +FILE_TYPES_DIR = DATA_DIR / "file_types" +V2_DATA_DIR = DATA_DIR / "v2" +PRODUCT_DATA_DIR = DATA_DIR / "products" + def clear_envvars(monkeypatch) -> None: """ @@ -25,9 +32,6 @@ def dummy_envvars(monkeypatch) -> None: monkeypatch.setenv(API_KEY_ENV_NAME, "dummy") -EXTRAS_DIR = Path("./tests/data/extras/") - - def levenshtein_ratio(ref_str: str, target_str: str) -> float: """ Calculates the Levenshtein ratio between two strings. diff --git a/tests/v2/test_inference_response.py b/tests/v2/test_inference_response.py index e305d9e6..a5b8bff7 100644 --- a/tests/v2/test_inference_response.py +++ b/tests/v2/test_inference_response.py @@ -10,7 +10,7 @@ from mindee.parsing.v2.inference import Inference from mindee.parsing.v2.inference_file import InferenceFile from mindee.parsing.v2.inference_model import InferenceModel -from tests.test_inputs import V2_DATA_DIR +from tests.utils import V2_DATA_DIR def _get_samples(json_path: Path, rst_path: Path) -> Tuple[dict, str]: diff --git a/tests/workflows/test_workflow_integration.py b/tests/workflows/test_workflow_integration.py index 4c4a2cdb..beecb8e7 100644 --- a/tests/workflows/test_workflow_integration.py +++ b/tests/workflows/test_workflow_integration.py @@ -7,7 +7,7 @@ from mindee.input import WorkflowOptions from mindee.parsing.common.execution_priority import ExecutionPriority from mindee.product import FinancialDocumentV1, GeneratedV1 -from tests.product import PRODUCT_DATA_DIR +from tests.utils import PRODUCT_DATA_DIR @pytest.fixture