Skip to content

Commit

Permalink
BUG: Incorrect number of inline images
Browse files Browse the repository at this point in the history
closes #2629
  • Loading branch information
pubpub-zz committed May 8, 2024
1 parent a584fb5 commit c5d62a3
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 18 deletions.
24 changes: 6 additions & 18 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
# POSSIBILITY OF SUCH DAMAGE.

import math
import re
import sys
from decimal import Decimal
from pathlib import Path
Expand Down Expand Up @@ -58,7 +57,6 @@
mult,
)
from ._utils import (
WHITESPACES_AS_REGEXP,
CompressedTransformationMatrix,
File,
ImageFile,
Expand Down Expand Up @@ -335,7 +333,6 @@ def __init__(
self.pdf = pdf
self.inline_images: Optional[Dict[str, ImageFile]] = None
# below Union for mypy but actually Optional[List[str]]
self.inline_images_keys: Optional[List[Union[str, List[str]]]] = None
self.indirect_reference = indirect_reference

def hash_value_data(self) -> bytes:
Expand Down Expand Up @@ -439,19 +436,8 @@ def _get_ids_image(
return []
else:
call_stack.append(_i)
if self.inline_images_keys is None:
content = self._get_contents_as_bytes() or b""
nb_inlines = 0
for matching in re.finditer(
WHITESPACES_AS_REGEXP + b"BI" + WHITESPACES_AS_REGEXP,
content,
):
start_of_string = content[: matching.start()]
if len(re.findall(b"[^\\\\]\\(", start_of_string)) == len(
re.findall(b"[^\\\\]\\)", start_of_string)
):
nb_inlines += 1
self.inline_images_keys = [f"~{x}~" for x in range(nb_inlines)]
if self.inline_images is None:
self.inline_images = self._get_inline_images()
if obj is None:
obj = self
if ancest is None:
Expand All @@ -460,7 +446,7 @@ def _get_ids_image(
if PG.RESOURCES not in obj or RES.XOBJECT not in cast(
DictionaryObject, obj[PG.RESOURCES]
):
return self.inline_images_keys
return [] if self.inline_images is None else list(self.inline_images.keys())

x_object = obj[PG.RESOURCES][RES.XOBJECT].get_object() # type: ignore
for o in x_object:
Expand All @@ -470,7 +456,9 @@ def _get_ids_image(
lst.append(o if len(ancest) == 0 else ancest + [o])
else: # is a form with possible images inside
lst.extend(self._get_ids_image(x_object[o], ancest + [o], call_stack))
return lst + self.inline_images_keys
if self.inline_images is not None:
lst.extend(list(self.inline_images.keys()))
return lst

def _get_image(
self,
Expand Down
5 changes: 5 additions & 0 deletions tests/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,11 @@ def test_inline_images():
with pytest.raises(KeyError) as exc:
reader.pages[2]._get_image(("test",))

url = "https://github.com/py-pdf/pypdf/files/15233597/bug1065245.pdf"
name = "iss2598c.pdf" # test coming from another test in test_image.py
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
assert len(reader.pages[0].images) == 3


@pytest.mark.enable_socket()
def test_iss():
Expand Down

0 comments on commit c5d62a3

Please sign in to comment.