Skip to content

Commit

Permalink
MAINT: Return None instead of -1 when page is not attached (#2376)
Browse files Browse the repository at this point in the history
If a page is not attached to a document, it does not have a page number. So we cannot return a "normal" number.
Before this PR, we returned -1.

Returning None compared to using `-1` has two advantages:

* It makes intuitive sense what it means
* It is part of the type annotation and mypy will complain about it if you don't handle that. If the callers (users) of pypdf are not careful, mypy might catch their error.

For this reason, we now return `None`.

See #2010

Closes #2371
  • Loading branch information
MartinThoma committed Jan 6, 2024
1 parent ef5bacb commit e411b76
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 19 deletions.
8 changes: 4 additions & 4 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1480,21 +1480,21 @@ def compress_content_streams(self, level: int = -1) -> None:
raise ValueError("Page must be part of a PdfWriter")

@property
def page_number(self) -> int:
def page_number(self) -> Optional[int]:
"""
Read-only property which return the page number with the pdf file.
Returns:
int : page number ; -1 if the page is not attached to a pdf
int : page number ; None if the page is not attached to a pdf
"""
if self.indirect_reference is None:
return -1
return None
else:
try:
lst = self.indirect_reference.pdf.pages
return lst.index(self)
except ValueError:
return -1
return None

def _debug_for_extract(self) -> str: # pragma: no cover
out = ""
Expand Down
16 changes: 8 additions & 8 deletions pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,32 +805,32 @@ def threads(self) -> Optional[ArrayObject]:

def _get_page_number_by_indirect(
self, indirect_reference: Union[None, int, NullObject, IndirectObject]
) -> int:
) -> Optional[int]:
"""
Generate _page_id2num.
Args:
indirect_reference:
Returns:
The page number.
The page number or None
"""
if self._page_id2num is None:
self._page_id2num = {
x.indirect_reference.idnum: i for i, x in enumerate(self.pages) # type: ignore
}

if indirect_reference is None or isinstance(indirect_reference, NullObject):
return -1
return None
if isinstance(indirect_reference, int):
idnum = indirect_reference
else:
idnum = indirect_reference.idnum
assert self._page_id2num is not None, "hint for mypy"
ret = self._page_id2num.get(idnum, -1)
ret = self._page_id2num.get(idnum, None)
return ret

def get_page_number(self, page: PageObject) -> int:
def get_page_number(self, page: PageObject) -> Optional[int]:
"""
Retrieve page number of a given PageObject.
Expand All @@ -839,19 +839,19 @@ def get_page_number(self, page: PageObject) -> int:
an instance of :class:`PageObject<pypdf._page.PageObject>`
Returns:
The page number or -1 if page is not found
The page number or None if page is not found
"""
return self._get_page_number_by_indirect(page.indirect_reference)

def get_destination_page_number(self, destination: Destination) -> int:
def get_destination_page_number(self, destination: Destination) -> Optional[int]:
"""
Retrieve page number of a given Destination object.
Args:
destination: The destination to get page number.
Returns:
The page number or -1 if page is not found
The page number or None if page is not found
"""
return self._get_page_number_by_indirect(destination.page)

Expand Down
6 changes: 3 additions & 3 deletions tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1199,12 +1199,12 @@ def test_merge_transformed_page_into_blank():
True,
)
blank = PageObject.create_blank_page(width=100, height=100)
assert blank.page_number == -1
assert blank.page_number is None
inserted_blank = writer.add_page(blank)
assert blank.page_number == -1 # the inserted page is a clone
assert blank.page_number is None # the inserted page is a clone
assert inserted_blank.page_number == len(writer.pages) - 1
del writer._pages.get_object()["/Kids"][-1]
assert inserted_blank.page_number == -1
assert inserted_blank.page_number is None


def test_pages_printing():
Expand Down
14 changes: 10 additions & 4 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import time
from io import BytesIO
from pathlib import Path
from typing import List, Union

import pytest

Expand Down Expand Up @@ -36,6 +37,9 @@
SAMPLE_ROOT = PROJECT_ROOT / "sample-files"


NestedList = Union[int, None, List["NestedList"]]


@pytest.mark.parametrize(
("src", "num_pages"),
[("selenium-pypdf-issue-177.pdf", 1), ("pdflatex-outline.pdf", 4)],
Expand Down Expand Up @@ -695,12 +699,14 @@ def test_issue604(caplog, strict):
]
assert normalize_warnings(caplog.text) == msg

def get_dest_pages(x) -> int:
def get_dest_pages(x) -> NestedList:
if isinstance(x, list):
r = [get_dest_pages(y) for y in x]
return r
return [get_dest_pages(y) for y in x]
else:
return pdf.get_destination_page_number(x) + 1
destination_page_number = pdf.get_destination_page_number(x)
if destination_page_number is None:
return destination_page_number
return destination_page_number + 1

out = []

Expand Down

0 comments on commit e411b76

Please sign in to comment.