diff --git a/PyPDF2/generic/_data_structures.py b/PyPDF2/generic/_data_structures.py index 283b33b22..f6630066c 100644 --- a/PyPDF2/generic/_data_structures.py +++ b/PyPDF2/generic/_data_structures.py @@ -759,17 +759,17 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]: tok = stream.read(1) # Check for End Image tok2 = stream.read(1) - if tok2 == b"I": - # Data can contain EI, so check for the Q operator. + if tok2 == b"I" and buf[loc - 1 : loc] in WHITESPACES: + # Data can contain [\s]EI, so check for the separator \s; 4 chars suffisent Q operator not required. tok3 = stream.read(1) info = tok + tok2 - # We need to find whitespace between EI and Q. + # We need to find at least one whitespace after. has_q_whitespace = False while tok3 in WHITESPACES: has_q_whitespace = True info += tok3 tok3 = stream.read(1) - if tok3 == b"Q" and has_q_whitespace: + if has_q_whitespace: stream.seek(-1, 1) break else: diff --git a/tests/test_workflows.py b/tests/test_workflows.py index cc194f435..57cab7018 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -17,7 +17,7 @@ from PyPDF2.constants import ImageAttributes as IA from PyPDF2.constants import PageAttributes as PG from PyPDF2.constants import Ressources as RES -from PyPDF2.errors import PdfReadError, PdfReadWarning +from PyPDF2.errors import PdfReadWarning from PyPDF2.filters import _xobj_to_image from . import get_pdf_from_url, normalize_warnings @@ -425,7 +425,7 @@ def test_get_metadata(url, name): "https://corpora.tika.apache.org/base/docs/govdocs1/938/938702.pdf", "tika-938702.pdf", False, - (PdfReadError, "Unexpected end of stream"), + None, # iss #1090 is now fixed ), ( "https://corpora.tika.apache.org/base/docs/govdocs1/942/942358.pdf", @@ -512,19 +512,16 @@ def test_extract_text(url, name, strict, exception): ), ( "https://corpora.tika.apache.org/base/docs/govdocs1/957/957304.pdf", - "tika-938702.pdf", + "tika-957304.pdf", ), ], ) def test_compress_raised(url, name): data = BytesIO(get_pdf_from_url(url, name=name)) reader = PdfReader(data) - # TODO: which page exactly? - # TODO: Is it reasonable to have an exception here? - with pytest.raises(PdfReadError) as exc: - for page in reader.pages: - page.compress_content_streams() - assert exc.value.args[0] == "Unexpected end of stream" + # no more error since iss #1090 fix + for page in reader.pages: + page.compress_content_streams() @pytest.mark.parametrize(