Skip to content

Commit

Permalink
cope with deflated images with CMYK Black Only
Browse files Browse the repository at this point in the history
  • Loading branch information
pubpub-zz committed Nov 29, 2023
1 parent 441bb59 commit ec4e1ca
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 7 deletions.
8 changes: 8 additions & 0 deletions pypdf/_xobj_image_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,18 @@ def _get_imagemode(
)
return mode2, True
elif color_space[0] == "/DeviceN":
original_color_space = color_space
color_components = len(color_space[1])
color_space = color_space[2]
if isinstance(color_space, IndirectObject): # pragma: no cover
color_space = color_space.get_object()
if color_space == "/DeviceCMYK" and color_components == 1:
if original_color_space[1][0] != "/Black":
logger_warning(
f"Color {original_color_space[1][0]} converted to Gray. Please share PDF with pypdf dev team",
__name__,
)
return "L", True
mode2, invert_color = _get_imagemode(
color_space, color_components, prev_mode, depth + 1
)
Expand Down
16 changes: 9 additions & 7 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,13 +558,16 @@ class CCITTFaxDecode:

@staticmethod
def _get_parameters(
parameters: Union[None, ArrayObject, DictionaryObject, IndirectObject], rows: int
parameters: Union[None, ArrayObject, DictionaryObject, IndirectObject],
rows: int,
) -> CCITParameters:
# TABLE 3.9 Optional parameters for the CCITTFaxDecode filter
k = 0
columns = 1728
if parameters:
parameters_unwrapped = cast(Union[ArrayObject, DictionaryObject], parameters.get_object())
parameters_unwrapped = cast(
Union[ArrayObject, DictionaryObject], parameters.get_object()
)
if isinstance(parameters_unwrapped, ArrayObject):
for decode_parm in parameters_unwrapped:
if CCITT.COLUMNS in decode_parm:
Expand Down Expand Up @@ -778,8 +781,8 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes,
alpha = None
filters = x_object_obj.get(SA.FILTER, [None])
lfilters = filters[-1] if isinstance(filters, list) else filters
if lfilters == FT.FLATE_DECODE:
img, image_format, extension, invert_color = _handle_flate(
if lfilters in (FT.FLATE_DECODE, FT.RUN_LENGTH_DECODE):
img, image_format, extension, _ = _handle_flate(
size,
data,
mode,
Expand Down Expand Up @@ -821,15 +824,14 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes,
".png",
False,
)

# CMYK image and other colorspaces without decode
# requires reverting scale (cf p243,2§ last sentence)
decode = x_object_obj.get(
IA.DECODE,
([1.0, 0.0] * len(img.getbands()))
if (
(img.mode == "CMYK" or (invert_color and img.mode == "L"))
and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE)
(img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE))
or (invert_color and img.mode == "L")
)
else None,
)
Expand Down
16 changes: 16 additions & 0 deletions tests/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,19 @@ def test_loop_in_image_keys():
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
reader.pages[0]["/Resources"]["/XObject"][NameObject("/toto")] = NullObject()
reader.pages[0].images.keys()


@pytest.mark.enable_socket()
def test_devicen_cmyk_black_only():
"""Cf #2321"""
url = "https://github.com/py-pdf/pypdf/files/13501846/Addressing_Adversarial_Attacks.pdf"
name = "iss2321.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
url = "https://github.com/py-pdf/pypdf/assets/4083478/cc2dabc1-86e6-4179-a8a4-2b0efea124be"
name = "iss2321_img0.pdf"
img = Image.open(BytesIO(get_data_from_url(url, name=name)))
assert image_similarity(reader.pages[5].images[0].image, img) >= 0.99
url = "https://github.com/py-pdf/pypdf/assets/4083478/6b64a949-42be-40d5-9eea-95707f350d89"
name = "iss2321_img1.pdf"
img = Image.open(BytesIO(get_data_from_url(url, name=name)))
assert image_similarity(reader.pages[10].images[0].image, img) >= 0.99

0 comments on commit ec4e1ca

Please sign in to comment.