diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index 1748f063d..f7292b3f2 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -602,10 +602,12 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]: from .generic import ByteStringObject if isinstance(lookup, ByteStringObject): + if base == ColorSpaces.DEVICE_GRAY and len(lookup) == hival + 1: + lookup = b"".join([lookup[i:i + 1] * 3 for i in range(len(lookup))]) img.putpalette(lookup) else: img.putpalette(lookup.get_data()) - img = img.convert("RGB") + img = img.convert("L" if base == ColorSpaces.DEVICE_GRAY else "RGB") if G.S_MASK in x_object_obj: # add alpha channel alpha = Image.frombytes("L", size, x_object_obj[G.S_MASK].get_data()) img.putalpha(alpha) diff --git a/tests/test_reader.py b/tests/test_reader.py index d883cb075..bd8c58335 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -192,6 +192,7 @@ def test_get_outline(src, outline_elements): marks=pytest.mark.xfail(reason="broken image extraction"), ), ("imagemagick-CCITTFaxDecode.pdf", ["Im0.tiff"]), + (SAMPLE_ROOT / "019-grayscale-image/grayscale-image.pdf", ["X0.png"]), ], ) def test_get_images(src, expected_images): @@ -211,7 +212,7 @@ def test_get_images(src, expected_images): for image, expected_image in zip(images_extracted, expected_images): assert image.name == expected_image try: - fn = f"test-out-{src}-{image.name}" + fn = f"{src}-test-out-{image.name}" with open(fn, "wb") as fp: fp.write(image.data) assert (