diff --git a/pypdf/_reader.py b/pypdf/_reader.py index 034a0d091..809ccda83 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -398,7 +398,8 @@ def get_object( self.stream.seek(m.start(0) + 1) idnum, generation = self.read_object_header(self.stream) else: - idnum = -1 # exception will be raised below + idnum = -1 + generation = -1 # exception will be raised below if idnum != indirect_reference.idnum and self.xref_index: # Xref table probably had bad indexes due to not being zero-indexed if self.strict: diff --git a/tests/test_reader.py b/tests/test_reader.py index a346ca76d..ff39189e0 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1516,3 +1516,17 @@ def test_truncated_xref(caplog): name = "iss2575.pdf" PdfReader(BytesIO(get_data_from_url(url, name=name))) assert "Invalid/Truncated xref table. Rebuilding it." in caplog.text + + +@pytest.mark.enable_socket() +def test_damaged_pdf(): + url = "https://github.com/py-pdf/pypdf/files/15186107/malformed_pdf.pdf" + name = "malformed_pdf.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name=name)), strict=False) + len(reader.pages) + strict_reader = PdfReader(BytesIO(get_data_from_url(url, name=name)), strict=True) + with pytest.raises(PdfReadError) as exc: + len(strict_reader.pages) + assert ( + exc.value.args[0] == "Expected object ID (21 0) does not match actual (-1 -1)." + )