diff --git a/pypdf/_page_labels.py b/pypdf/_page_labels.py index 000820fca..63016b616 100644 --- a/pypdf/_page_labels.py +++ b/pypdf/_page_labels.py @@ -11,12 +11,12 @@ Example 1 --------- ->>> reader.trailer["/Root"]["/PageLabels"]["/Nums"] +>>> reader.root_object["/PageLabels"]["/Nums"] [0, IndirectObject(18, 0, 139929798197504), 8, IndirectObject(19, 0, 139929798197504)] ->>> reader.get_object(reader.trailer["/Root"]["/PageLabels"]["/Nums"][1]) +>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1]) {'/S': '/r'} ->>> reader.get_object(reader.trailer["/Root"]["/PageLabels"]["/Nums"][3]) +>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3]) {'/S': '/D'} Example 2 @@ -57,7 +57,7 @@ aa to zz for the next 26, and so on) """ -from typing import Iterator, Optional, Tuple +from typing import Iterator, Optional, Tuple, cast from ._protocols import PdfReaderProtocol from ._utils import logger_warning @@ -127,10 +127,10 @@ def index2label(reader: PdfReaderProtocol, index: int) -> str: Returns: The label of the page, e.g. "iv" or "4". """ - root = reader.trailer["/Root"] + root = cast(DictionaryObject, reader.root_object) if "/PageLabels" not in root: return str(index + 1) # Fallback - number_tree = root["/PageLabels"] + number_tree = cast(DictionaryObject, root["/PageLabels"].get_object()) if "/Nums" in number_tree: # [Nums] shall be an array of the form # [ key 1 value 1 key 2 value 2 ... key n value n ] @@ -139,7 +139,7 @@ def index2label(reader: PdfReaderProtocol, index: int) -> str: # The keys shall be sorted in numerical order, # analogously to the arrangement of keys in a name tree # as described in 7.9.6, "Name Trees." - nums = number_tree["/Nums"] + nums = cast(ArrayObject, number_tree["/Nums"]) i = 0 value = None start_index = 0 diff --git a/pypdf/_protocols.py b/pypdf/_protocols.py index fdb8a01b5..e5ede7946 100644 --- a/pypdf/_protocols.py +++ b/pypdf/_protocols.py @@ -59,6 +59,10 @@ def pages(self) -> List[Any]: def trailer(self) -> Dict[str, Any]: ... + @property + def root_object(self) -> PdfObjectProtocol: + ... + def get_object(self, indirect_reference: Any) -> Optional[PdfObjectProtocol]: ... @@ -67,6 +71,10 @@ class PdfWriterProtocol(Protocol): # deprecated _objects: List[Any] _id_translated: Dict[int, Dict[int, int]] + @property + def root_object(self) -> PdfObjectProtocol: + ... + def get_object(self, indirect_reference: Any) -> Optional[PdfObjectProtocol]: ... diff --git a/pypdf/_reader.py b/pypdf/_reader.py index 230852653..01ad4b782 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -282,9 +282,7 @@ class PdfReader: @property def viewer_preferences(self) -> Optional[ViewerPreferences]: """Returns the existing ViewerPreferences as an overloaded dictionary.""" - o = cast(DictionaryObject, self.trailer["/Root"]).get( - CD.VIEWER_PREFERENCES, None - ) + o = self.root_object.get(CD.VIEWER_PREFERENCES, None) if o is None: return None o = o.get_object() @@ -344,6 +342,33 @@ def __init__( elif password is not None: raise PdfReadError("Not encrypted file") + @property + def root_object(self) -> DictionaryObject: + """Provide access to "/Root". standardized with PdfWriter.""" + return cast(DictionaryObject, self.trailer[TK.ROOT].get_object()) + + @property + def _info(self) -> Optional[DictionaryObject]: + """ + Provide access to "/Info". standardized with PdfWriter. + + Returns: + /Info Dictionary ; None if the entry does not exists + """ + info = self.trailer.get(TK.INFO, None) + return None if info is None else cast(DictionaryObject, info.get_object()) + + @property + def _ID(self) -> Optional[ArrayObject]: + """ + Provide access to "/ID". standardized with PdfWriter. + + Returns: + /ID array ; None if the entry does not exists + """ + id = self.trailer.get(TK.ID, None) + return None if id is None else cast(ArrayObject, id.get_object()) + def _repr_mimebundle_( self, include: Union[None, Iterable[str]] = None, @@ -400,13 +425,12 @@ def metadata(self) -> Optional[DocumentInformation]: """ if TK.INFO not in self.trailer: return None - obj = self.trailer[TK.INFO] retval = DocumentInformation() - if isinstance(obj, type(None)): + if isinstance(self._info, type(None)): raise PdfReadError( "trailer not found or does not point to document information directory" ) - retval.update(obj) # type: ignore + retval.update(self._info) # type: ignore return retval @property @@ -414,7 +438,7 @@ def xmp_metadata(self) -> Optional[XmpInformation]: """XMP (Extensible Metadata Platform) data.""" try: self._override_encryption = True - return self.trailer[TK.ROOT].xmp_metadata # type: ignore + return self.root_object.xmp_metadata # type: ignore finally: self._override_encryption = False @@ -433,7 +457,7 @@ def _get_num_pages(self) -> int: # the PDF file's page count is used in this case. Otherwise, # the original method (flattened page count) is used. if self.is_encrypted: - return self.trailer[TK.ROOT]["/Pages"]["/Count"] # type: ignore + return self.root_object["/Pages"]["/Count"] # type: ignore else: if self.flattened_pages is None: self._flatten() @@ -493,7 +517,7 @@ def get_fields( field_attributes.update(CheckboxRadioButtonAttributes.attributes_dict()) if retval is None: retval = {} - catalog = cast(DictionaryObject, self.trailer[TK.ROOT]) + catalog = self.root_object # get the AcroForm tree if CD.ACRO_FORM in catalog: tree = cast(Optional[TreeObject], catalog[CD.ACRO_FORM]) @@ -755,7 +779,7 @@ def _get_named_destinations( """ if retval is None: retval = {} - catalog = cast(DictionaryObject, self.trailer[TK.ROOT]) + catalog = self.root_object # get the name tree if CA.DESTS in catalog: @@ -822,7 +846,7 @@ def _get_outline( ) -> OutlineType: if outline is None: outline = [] - catalog = cast(DictionaryObject, self.trailer[TK.ROOT]) + catalog = self.root_object # get the outline dictionary and named destinations if CO.OUTLINES in catalog: @@ -868,7 +892,7 @@ def threads(self) -> Optional[ArrayObject]: It's an array of dictionaries with "/F" and "/I" properties or None if there are no articles. """ - catalog = cast(DictionaryObject, self.trailer[TK.ROOT]) + catalog = self.root_object if CO.THREADS in catalog: return cast("ArrayObject", catalog[CO.THREADS]) else: @@ -1071,9 +1095,8 @@ def page_layout(self) -> Optional[str]: * - /TwoPageRight - Show two pages at a time, odd-numbered pages on the right """ - trailer = cast(DictionaryObject, self.trailer[TK.ROOT]) - if CD.PAGE_LAYOUT in trailer: - return cast(NameObject, trailer[CD.PAGE_LAYOUT]) + if CD.PAGE_LAYOUT in self.root_object: + return cast(NameObject, self.root_object[CD.PAGE_LAYOUT]) return None @property @@ -1098,7 +1121,7 @@ def page_mode(self) -> Optional[PagemodeType]: - Show attachments panel """ try: - return self.trailer[TK.ROOT]["/PageMode"] # type: ignore + return self.root_object["/PageMode"] # type: ignore except KeyError: return None @@ -1119,12 +1142,12 @@ def _flatten( if pages is None: # Fix issue 327: set flattened_pages attribute only for # decrypted file - catalog = self.trailer[TK.ROOT].get_object() - pages = catalog["/Pages"].get_object() # type: ignore + catalog = self.root_object + pages = cast(DictionaryObject, catalog["/Pages"].get_object()) self.flattened_pages = [] if PA.TYPE in pages: - t = pages[PA.TYPE] + t = cast(str, pages[PA.TYPE]) # if pdf has no type, considered as a page if /Kids is missing elif PA.KIDS not in pages: t = "/Page" @@ -1925,7 +1948,7 @@ def is_encrypted(self) -> bool: def xfa(self) -> Optional[Dict[str, Any]]: tree: Optional[TreeObject] = None retval: Dict[str, Any] = {} - catalog = cast(DictionaryObject, self.trailer[TK.ROOT]) + catalog = self.root_object if "/AcroForm" not in catalog or not catalog["/AcroForm"]: return None @@ -1955,7 +1978,7 @@ def add_form_topname(self, name: str) -> Optional[DictionaryObject]: Returns: The created object. ``None`` means no object was created. """ - catalog = cast(DictionaryObject, self.trailer[TK.ROOT]) + catalog = self.root_object if "/AcroForm" not in catalog or not isinstance( catalog["/AcroForm"], DictionaryObject @@ -1997,7 +2020,7 @@ def rename_form_topname(self, name: str) -> Optional[DictionaryObject]: Returns: The modified object. ``None`` means no object was modified. """ - catalog = cast(DictionaryObject, self.trailer[TK.ROOT]) + catalog = self.root_object if "/AcroForm" not in catalog or not isinstance( catalog["/AcroForm"], DictionaryObject @@ -2030,7 +2053,7 @@ def _list_attachments(self) -> List[str]: Returns: list of filenames """ - catalog = cast(DictionaryObject, self.trailer["/Root"]) + catalog = self.root_object # From the catalog get the embedded file names try: filenames = cast( @@ -2068,7 +2091,7 @@ def _get_attachments( dictionary of filename -> Union[bytestring or List[ByteString]] if the filename exists multiple times a List of the different version will be provided """ - catalog = cast(DictionaryObject, self.trailer["/Root"]) + catalog = self.root_object # From the catalog get the embedded file names try: filenames = cast( diff --git a/pypdf/_writer.py b/pypdf/_writer.py index db529eb8c..949cf8f5a 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -211,6 +211,16 @@ def __init__( self._encrypt_entry: Optional[DictionaryObject] = None self._ID: Union[ArrayObject, None] = None + @property + def root_object(self) -> DictionaryObject: + """ + Provide direct access to Pdf Structure + + Note: + Recommended be used only for read access + """ + return self._root_object + def __enter__(self) -> "PdfWriter": """Store that writer is initialized by 'with'.""" self.with_as_usage = True @@ -1084,7 +1094,7 @@ def clone_reader_document_root(self, reader: PdfReader) -> None: reader: PdfReader from the document root should be copied. """ self._objects.clear() - self._root_object = cast(DictionaryObject, reader.trailer[TK.ROOT].clone(self)) + self._root_object = reader.root_object.clone(self) self._root = self._root_object.indirect_reference # type: ignore[assignment] self._pages = self._root_object.raw_get("/Pages") self._flatten() @@ -1165,10 +1175,10 @@ def clone_document_from_reader( """ self.clone_reader_document_root(reader) if TK.INFO in reader.trailer: - self._info = reader.trailer[TK.INFO].clone(self).indirect_reference # type: ignore + self._info = reader._info.clone(self).indirect_reference # type: ignore try: - self._ID = cast(ArrayObject, reader.trailer[TK.ID].clone(self)) - except KeyError: + self._ID = cast(ArrayObject, reader._ID).clone(self) + except AttributeError: pass if callable(after_page_append): for page in cast( @@ -2546,7 +2556,7 @@ def merge( else: outline_item_typ = self.get_outline_root() - _ro = cast("DictionaryObject", reader.trailer[TK.ROOT]) + _ro = reader.root_object if import_outline and CO.OUTLINES in _ro: outline = self._get_filtered_outline( _ro.get(CO.OUTLINES, None), srcpages, reader @@ -2569,7 +2579,7 @@ def merge( self._root_object[NameObject("/AcroForm")] = self._add_object( cast( DictionaryObject, - cast(DictionaryObject, reader.trailer["/Root"])["/AcroForm"], + reader.root_object["/AcroForm"], ).clone(self, False, ("/Fields",)) ) arr = ArrayObject() @@ -2580,7 +2590,7 @@ def merge( ) trslat = self._id_translated[id(reader)] try: - for f in reader.trailer["/Root"]["/AcroForm"]["/Fields"]: # type: ignore + for f in reader.root_object["/AcroForm"]["/Fields"]: # type: ignore try: ind = IndirectObject(trslat[f.idnum], 0, self) if ind not in arr: diff --git a/tests/test_writer.py b/tests/test_writer.py index 12110e766..9469092f5 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -998,7 +998,7 @@ def test_startup_dest(): assert pdf_file_writer.open_destination is None pdf_file_writer.open_destination = pdf_file_writer.pages[9] # checked also using Acrobrat to verify the good page is opened - op = pdf_file_writer._root_object["/OpenAction"] + op = pdf_file_writer.root_object["/OpenAction"] assert op[0] == pdf_file_writer.pages[9].indirect_reference assert op[1] == "/Fit" op = pdf_file_writer.open_destination @@ -1008,16 +1008,16 @@ def test_startup_dest(): assert pdf_file_writer.open_destination == op # irrelevant, just for coverage - pdf_file_writer._root_object[NameObject("/OpenAction")][0] = NumberObject(0) + pdf_file_writer.root_object[NameObject("/OpenAction")][0] = NumberObject(0) pdf_file_writer.open_destination with pytest.raises(Exception) as exc: - del pdf_file_writer._root_object[NameObject("/OpenAction")][0] + del pdf_file_writer.root_object[NameObject("/OpenAction")][0] pdf_file_writer.open_destination assert "Invalid Destination" in str(exc.value) pdf_file_writer.open_destination = "Test" # checked also using Acrobrat to verify open_destination - op = pdf_file_writer._root_object["/OpenAction"] + op = pdf_file_writer.root_object["/OpenAction"] assert isinstance(op, TextStringObject) assert op == "Test" op = pdf_file_writer.open_destination @@ -1025,10 +1025,10 @@ def test_startup_dest(): assert op == "Test" # irrelevant, this is just for coverage - pdf_file_writer._root_object[NameObject("/OpenAction")] = NumberObject(0) + pdf_file_writer.root_object[NameObject("/OpenAction")] = NumberObject(0) assert pdf_file_writer.open_destination is None pdf_file_writer.open_destination = None - assert "/OpenAction" not in pdf_file_writer._root_object + assert "/OpenAction" not in pdf_file_writer.root_object pdf_file_writer.open_destination = None @@ -1112,7 +1112,7 @@ def test_append_multiple(): reader, [0, 0, 0] ) # to demonstre multiple insertion of same page at once writer.append(reader, [0, 0, 0]) # second pack - pages = writer._root_object["/Pages"]["/Kids"] + pages = writer.root_object["/Pages"]["/Kids"] assert pages[0] not in pages[1:] # page not repeated assert pages[-1] not in pages[0:-1] # page not repeated @@ -1436,10 +1436,10 @@ def test_named_dest_page_number(): writer = PdfWriter() writer.add_blank_page(100, 100) writer.append(BytesIO(get_data_from_url(url, name=name)), pages=[0, 1, 2]) - assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 2 - assert writer._root_object["/Names"]["/Dests"]["/Names"][-1][0] == (1 + 1) + assert len(writer.root_object["/Names"]["/Dests"]["/Names"]) == 2 + assert writer.root_object["/Names"]["/Dests"]["/Names"][-1][0] == (1 + 1) writer.append(BytesIO(get_data_from_url(url, name=name))) - assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 6 + assert len(writer.root_object["/Names"]["/Dests"]["/Names"]) == 6 writer2 = PdfWriter() writer2.add_blank_page(100, 100) dest = writer2.add_named_destination("toto", 0) @@ -1448,7 +1448,7 @@ def test_named_dest_page_number(): writer2.write(b) b.seek(0) writer.append(b) - assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 6 + assert len(writer.root_object["/Names"]["/Dests"]["/Names"]) == 6 def test_update_form_fields(tmp_path): @@ -1681,7 +1681,7 @@ def test_missing_fields(pdf_file_path): writer = PdfWriter() writer.append(reader, [0]) - del writer._root_object["/AcroForm"]["/Fields"] + del writer.root_object["/AcroForm"]["/Fields"] with pytest.raises(PyPdfError) as exc: writer.update_page_form_field_values( writer.pages[0], {"foo": "some filled in text"}, flags=1 @@ -1765,8 +1765,7 @@ def test_viewerpreferences(): assert v.center_window == True # noqa: E712 v.center_window = False assert ( - writer._root_object["/ViewerPreferences"]["/CenterWindow"] - == False # noqa: E712 + writer.root_object["/ViewerPreferences"]["/CenterWindow"] == False # noqa: E712 ) assert v.print_area == "/CropBox" with pytest.raises(ValueError): @@ -1775,7 +1774,7 @@ def test_viewerpreferences(): v.non_fullscreen_pagemode = "/toto" v.non_fullscreen_pagemode = "/UseOutlines" assert ( - writer._root_object["/ViewerPreferences"]["/NonFullScreenPageMode"] + writer.root_object["/ViewerPreferences"]["/NonFullScreenPageMode"] == "/UseOutlines" ) writer = PdfWriter(clone_from=reader) @@ -1783,19 +1782,17 @@ def test_viewerpreferences(): assert v.center_window == True # noqa: E712 v.center_window = False assert ( - writer._root_object["/ViewerPreferences"]["/CenterWindow"] - == False # noqa: E712 + writer.root_object["/ViewerPreferences"]["/CenterWindow"] == False # noqa: E712 ) writer = PdfWriter(clone_from=reader) - writer._root_object[NameObject("/ViewerPreferences")] = writer._add_object( - writer._root_object["/ViewerPreferences"] + writer.root_object[NameObject("/ViewerPreferences")] = writer._add_object( + writer.root_object["/ViewerPreferences"] ) v = writer.viewer_preferences v.center_window = False assert ( - writer._root_object["/ViewerPreferences"]["/CenterWindow"] - == False # noqa: E712 + writer.root_object["/ViewerPreferences"]["/CenterWindow"] == False # noqa: E712 ) v.num_copies = 1 assert v.num_copies == 1 @@ -1806,9 +1803,9 @@ def test_viewerpreferences(): assert len(v.print_pagerange) == 0 writer.create_viewer_preferences() - assert len(writer._root_object["/ViewerPreferences"]) == 0 + assert len(writer.root_object["/ViewerPreferences"]) == 0 writer.viewer_preferences.direction = "/R2L" - assert len(writer._root_object["/ViewerPreferences"]) == 1 + assert len(writer.root_object["/ViewerPreferences"]) == 1 del reader.trailer["/Root"]["/ViewerPreferences"] assert reader.viewer_preferences is None @@ -1994,11 +1991,11 @@ def test_reattach_fields(): writer.add_page(p) assert len(writer.reattach_fields()) == 15 assert len(writer.reattach_fields()) == 0 # nothing to append anymore - assert len(writer._root_object["/AcroForm"]["/Fields"]) == 15 + assert len(writer.root_object["/AcroForm"]["/Fields"]) == 15 writer = PdfWriter(clone_from=reader) assert len(writer.reattach_fields()) == 7 writer.reattach_fields() - assert len(writer._root_object["/AcroForm"]["/Fields"]) == 15 + assert len(writer.root_object["/AcroForm"]["/Fields"]) == 15 writer = PdfWriter() for p in reader.pages: