py-pdf · MartinThoma · Aug 31, 2022 · Aug 29, 2022 · Aug 29, 2022 · Aug 29, 2022
diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py
@@ -71,6 +71,7 @@
     TextStringObject,
     encode_pdfdocencoding,
 )
+from .types import PdfReaderProtocol
 
 
 def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject:
@@ -241,13 +242,11 @@ class PageObject(DictionaryObject):
 
     def __init__(
         self,
-        pdf: Optional[Any] = None,  # PdfReader
+        pdf: Optional[PdfReaderProtocol] = None,
         indirect_ref: Optional[IndirectObject] = None,
     ) -> None:
-        from ._reader import PdfReader
-
         DictionaryObject.__init__(self)
-        self.pdf: Optional[PdfReader] = pdf
+        self.pdf: Optional[PdfReaderProtocol] = pdf
         self.indirect_ref = indirect_ref
 
     def hash_value_data(self) -> bytes:

diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py
@@ -908,9 +908,9 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
         return outline_item
 
     @property
-    def pages(self) -> _VirtualList:
+    def pages(self) -> List[PageObject]:
         """Read-only property that emulates a list of :py:class:`Page<PyPDF2._page.Page>` objects."""
-        return _VirtualList(self._get_num_pages, self._get_page)
+        return _VirtualList(self._get_num_pages, self._get_page)  # type: ignore
 
     @property
     def page_layout(self) -> Optional[str]:

diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py
@@ -1460,7 +1460,7 @@ def remove_text(self, ignore_byte_string_object: bool = False) -> None:
         pg_dict = cast(DictionaryObject, self.get_object(self._pages))
         pages = cast(List[IndirectObject], pg_dict[PA.KIDS])
         for page in pages:
-            page_ref = cast(Dict[str, Any], self.get_object(page))
+            page_ref = cast(PageObject, self.get_object(page))
             content = page_ref["/Contents"].get_object()
             if not isinstance(content, ContentStream):
                 content = ContentStream(content, page_ref)

diff --git a/PyPDF2/types.py b/PyPDF2/types.py
@@ -1,12 +1,12 @@
 """Helpers for working with PDF types."""
 
-from typing import List, Union
+from typing import Any, Dict, List, Optional, Union
 
 try:
     # Python 3.8+: https://peps.python.org/pep-0586
-    from typing import Literal  # type: ignore[attr-defined]
+    from typing import Literal, Protocol  # type: ignore[attr-defined]
 except ImportError:
-    from typing_extensions import Literal  # type: ignore[misc]
+    from typing_extensions import Literal, Protocol  # type: ignore[misc]
 
 try:
     # Python 3.10+: https://www.python.org/dev/peps/pep-0484/
@@ -54,3 +54,24 @@
     "/UseOC",
     "/UseAttachments",
 ]
+
+
+class PdfReaderProtocol(Protocol):  # pragma: no cover
+    @property
+    def pdf_header(self) -> str:
+        ...
+
+    @property
+    def strict(self) -> bool:
+        ...
+
+    @property
+    def xref(self) -> Dict[int, Dict[int, Any]]:
+        ...
+
+    @property
+    def pages(self) -> List[Any]:
+        ...
+
+    def get_object(self, indirect_reference: Any) -> Optional[Any]:
+        ...