diff --git a/PyPDF2/_cmap.py b/PyPDF2/_cmap.py index 6b668d2e8..c3acb6564 100644 --- a/PyPDF2/_cmap.py +++ b/PyPDF2/_cmap.py @@ -182,7 +182,7 @@ def parse_to_unicode( cm = prepare_cm(ft) for l in cm.split(b"\n"): process_rg, process_char = process_cm_line( - l, process_rg, process_char, map_dict, int_entry + l.strip(b" "), process_rg, process_char, map_dict, int_entry ) for a, value in map_dict.items(): diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py index 53b9e4392..1664bfe9d 100644 --- a/PyPDF2/_reader.py +++ b/PyPDF2/_reader.py @@ -1303,7 +1303,7 @@ def _basic_validation(self, stream: StreamType) -> None: stream.seek(0, os.SEEK_END) def _find_eof_marker(self, stream: StreamType) -> None: - last_mb = stream.tell() - 1024 * 1024 + 1 # offset of last MB of stream + last_mb = 8 # to parse whole file line = b"" while line[:5] != b"%%EOF": if stream.tell() < last_mb: diff --git a/tests/test_reader.py b/tests/test_reader.py index 97365da49..cb356094f 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -11,11 +11,7 @@ from PyPDF2.constants import ImageAttributes as IA from PyPDF2.constants import PageAttributes as PG from PyPDF2.constants import Ressources as RES -from PyPDF2.errors import ( - STREAM_TRUNCATED_PREMATURELY, - PdfReadError, - PdfReadWarning, -) +from PyPDF2.errors import PdfReadError, PdfReadWarning from PyPDF2.filters import _xobj_to_image from PyPDF2.generic import Destination @@ -396,7 +392,9 @@ def test_read_malformed_header(): def test_read_malformed_body(): with pytest.raises(PdfReadError) as exc: PdfReader(io.BytesIO(b"%PDF-"), strict=True) - assert exc.value.args[0] == STREAM_TRUNCATED_PREMATURELY + assert ( + exc.value.args[0] == "EOF marker not found" + ) # used to be:STREAM_TRUNCATED_PREMATURELY def test_read_prev_0_trailer():