From ee30daac9561f804b06cfcbd1c6523e7813ca38b Mon Sep 17 00:00:00 2001 From: Harry Karvonen Date: Mon, 4 Jul 2022 13:58:36 +0300 Subject: [PATCH] Resolve IndirectObject when it refers to a free entry. --- PyPDF2/_reader.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py index 3d5ac84e5..8156a8278 100644 --- a/PyPDF2/_reader.py +++ b/PyPDF2/_reader.py @@ -691,6 +691,9 @@ def _get_outlines( # so continue to load the file without the Bookmarks return outlines + if isinstance(lines, NullObject): + return outlines + # TABLE 8.3 Entries in the outline dictionary if "/First" in lines: node = cast(DictionaryObject, lines["/First"]) @@ -1052,6 +1055,10 @@ def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]: indirect_reference.generation in self.xref and indirect_reference.idnum in self.xref[indirect_reference.generation] ): + if self.xref_free_entry.get(indirect_reference.generation, {}).get( + indirect_reference.idnum, False + ): + return NullObject() start = self.xref[indirect_reference.generation][indirect_reference.idnum] self.stream.seek(start, 0) idnum, generation = self.read_object_header(self.stream) @@ -1225,6 +1232,7 @@ def read(self, stream: StreamType) -> None: # read all cross reference tables and their trailers self.xref: Dict[int, Dict[Any, Any]] = {} + self.xref_free_entry: Dict[int, Dict[Any, Any]] = {} self.xref_objStm: Dict[int, Tuple[Any, Any]] = {} self.trailer = DictionaryObject() while True: @@ -1379,10 +1387,11 @@ def _read_standard_xref_table(self, stream: StreamType) -> None: if line[-1] in b"0123456789t": stream.seek(-1, 1) - offset_b, generation_b = line[:16].split(b" ") + offset_b, generation_b, entry_type_b = line[:18].split(b" ") offset, generation = int(offset_b), int(generation_b) if generation not in self.xref: self.xref[generation] = {} + self.xref_free_entry[generation] = {} if num in self.xref[generation]: # It really seems like we should allow the last # xref table in the file to override previous @@ -1391,6 +1400,7 @@ def _read_standard_xref_table(self, stream: StreamType) -> None: pass else: self.xref[generation][num] = offset + self.xref_free_entry[generation][num] = entry_type_b == b"f" cnt += 1 num += 1 read_non_whitespace(stream)