diff --git a/pypdf/filters.py b/pypdf/filters.py index 430bec220..fe3f3c71a 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -817,9 +817,16 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes, ".tiff", False, ) + elif mode == "CMYK": + img, image_format, extension, invert_color = ( + Image.frombytes(mode, size, data), + "TIFF", + ".tif", + False, + ) + elif mode == "": + raise PdfReadError(f"ColorSpace field not found in {x_object_obj}") else: - if mode == "": - raise PdfReadError(f"ColorSpace field not found in {x_object_obj}") img, image_format, extension, invert_color = ( Image.frombytes(mode, size, data), "PNG", diff --git a/tests/test_images.py b/tests/test_images.py index 7e9686fac..b1b907014 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -246,3 +246,12 @@ def test_bi_in_text(): reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) assert reader.pages[0].images.keys() == ["~0~"] assert reader.pages[0].images[0].name == "~0~.png" + + +@pytest.mark.enable_socket() +def test_cmyk_no_filter(): + """Cf #2522""" + url = "https://github.com/py-pdf/pypdf/files/14614887/out3.pdf" + name = "iss2522.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + reader.pages[0].images[0].image