Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add page label support to PdfWriter #1558

Merged
merged 23 commits into from
Jan 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b110225
ENH: Add page label support to PdfWriter
lorenzomanini Jan 15, 2023
c5fa998
Merge branch 'main' of https://github.com/py-pdf/pypdf
lorenzomanini Jan 15, 2023
39c6423
fix import, fix overlapping ranges behaviour
lorenzomanini Jan 17, 2023
4a48c29
add tests
lorenzomanini Jan 17, 2023
474b211
Merge remote-tracking branch 'upstream/main'
lorenzomanini Jan 17, 2023
9c97450
Merge branch 'main' of https://github.com/lorenzomanini/pypdf
lorenzomanini Jan 17, 2023
55ed6c8
Apply suggestions from code review: fix mypy
lorenzomanini Jan 17, 2023
dbd6bce
Apply suggestions from code review: fix mypy
lorenzomanini Jan 17, 2023
717c882
Fix mypy
lorenzomanini Jan 17, 2023
e298494
fix flake
lorenzomanini Jan 17, 2023
6977af6
Moved nums functions to _page_labels.py
lorenzomanini Jan 18, 2023
251390f
Added tests
lorenzomanini Jan 18, 2023
d56a63a
Added Docs
lorenzomanini Jan 18, 2023
64beecb
add PageLabelStyle constants
lorenzomanini Jan 18, 2023
739d117
More explicit PageLabelStyle constants
lorenzomanini Jan 18, 2023
b24b2ce
Apply suggestions from code review: fix Doc
lorenzomanini Jan 18, 2023
75fa103
fix flake8
lorenzomanini Jan 18, 2023
8c0a82e
add lorenzomanini to CONTRIBUTORS.md
lorenzomanini Jan 18, 2023
679b8f8
Merge branch 'main' into main
lorenzomanini Jan 18, 2023
cd0b81e
change set_page_label page indexing to 0 based
lorenzomanini Jan 18, 2023
c1c1357
Apply suggestions from code review: fix Doc
lorenzomanini Jan 19, 2023
ef2a6e8
Merge remote-tracking branch 'upstream/main'
lorenzomanini Jan 19, 2023
daad866
fix test
lorenzomanini Jan 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ history and [GitHubs 'Contributors' feature](https://github.com/py-pdf/pypdf/gra
* [KourFrost](https://github.com/KourFrost)
* [Lightup1](https://github.com/Lightup1)
* [Majumder, Jonah](https://github.com/jonahmajumder)
* [Manini, Lorenzo](https://github.com/lorenzomanini)
* [maxbeer99](https://github.com/maxbeer99)
* [Mérino, Antoine](https://github.com/Merinorus)
* [Perrensen, Olsen](https://github.com/olsonperrensen)
Expand Down
90 changes: 89 additions & 1 deletion pypdf/_page_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,21 @@
aa to zz for the next 26, and so on)
"""

from typing import Iterator
from typing import (
Iterator,
Optional,
Tuple,
)

from ._protocols import PdfReaderProtocol
from ._utils import logger_warning

from .generic import (
ArrayObject,
DictionaryObject,
NumberObject,
)


def number2uppercase_roman_numeral(num: int) -> str:
roman = [
Expand Down Expand Up @@ -176,3 +186,81 @@ def index2label(reader: PdfReaderProtocol, index: int) -> str:
)
# TODO: Implement /Kids and /Limits for number tree
return str(index + 1) # Fallback


def nums_insert(
lorenzomanini marked this conversation as resolved.
Show resolved Hide resolved
key: NumberObject,
value: DictionaryObject,
nums: ArrayObject,
) -> None:
"""
Insert a key, value pair in a Nums array.

See 7.9.7 "Number Trees".

Args:
key: number key of the entry
value: value of the entry
nums: Nums array to modify
"""
if len(nums) % 2 != 0:
raise ValueError("a nums like array must have an even number of elements")
lorenzomanini marked this conversation as resolved.
Show resolved Hide resolved
lorenzomanini marked this conversation as resolved.
Show resolved Hide resolved

i = len(nums)
while i != 0 and key <= nums[i - 2]:
i = i - 2

if i < len(nums) and key == nums[i]:
nums[i + 1] = value
else:
nums.insert(i, key)
nums.insert(i + 1, value)


def nums_clear_range(
key: NumberObject,
page_index_to: int,
nums: ArrayObject,
) -> None:
"""
Remove all entries in a number tree in a range after an entry.

See 7.9.7 "Number Trees".

Args:
key: number key of the entry before the range
page_index_to: The page index of the upper limit of the range
nums: Nums array to modify
"""
if len(nums) % 2 != 0:
raise ValueError("a nums like array must have an even number of elements")
if page_index_to < key:
raise ValueError("page_index_to must be greater or equal than key")

i = nums.index(key) + 2
while i < len(nums) and nums[i] <= page_index_to:
nums.pop(i)
nums.pop(i)


def nums_next(
key: NumberObject,
nums: ArrayObject,
) -> Tuple[Optional[NumberObject], Optional[DictionaryObject]]:
"""
Return the (key, value) pair of the entry after the given one.

See 7.9.7 "Number Trees".

Args:
key: number key of the entry
nums: Nums array
"""
if len(nums) % 2 != 0:
raise ValueError("a nums like array must have an even number of elements")

i = nums.index(key) + 2
if i < len(nums):
return (nums[i], nums[i + 1])
else:
return (None, None)
105 changes: 105 additions & 0 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@

from ._encryption import Encryption
from ._page import PageObject, _VirtualList
from ._page_labels import nums_insert, nums_clear_range, nums_next
from ._reader import PdfReader
from ._security import _alg33, _alg34, _alg35
from ._utils import (
Expand Down Expand Up @@ -88,6 +89,7 @@
from .constants import StreamAttributes as SA
from .constants import TrailerKeys as TK
from .constants import TypFitArguments, UserAccessPermissions
from .constants import PageLabelStyle
from .generic import (
PAGE_FIT,
AnnotationBuilder,
Expand Down Expand Up @@ -123,6 +125,7 @@
ZoomArgType,
)


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -2874,6 +2877,108 @@ def reset_translation(
else:
raise Exception("invalid parameter {reader}")

def set_page_label(
lorenzomanini marked this conversation as resolved.
Show resolved Hide resolved
self,
page_index_from: int,
page_index_to: int,
style: Optional[PageLabelStyle] = None,
prefix: Optional[str] = None,
start: Optional[int] = 0,
MartinThoma marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
"""
Set a page label to a range of pages.

Page indexes must be given starting from 0.
Labels must have a style, a prefix or both.
If to a range is not assigned any page label a decimal label starting from 1 is applied.

Args:
page_index_from: page index of the beginning of the range starting from 0
page_index_to: page index of the beginning of the range starting from 0
style: The numbering style to be used for the numeric portion of each page label:
'/D' Decimal arabic numerals
'/R' Uppercase roman numerals
'/r' Lowercase roman numerals
'/A' Uppercase letters (A to Z for the first 26 pages, AA to ZZ for the next 26, and so on)
'/a' Lowercase letters (a to z for the first 26 pages, aa to zz for the next 26, and so on)
prefix: The label prefix for page labels in this range.
start: The value of the numeric portion for the first page label in the range.
Subsequent pages are numbered sequentially from this value, which must be greater than or equal to 1. Default value: 1.
"""
if style is None and prefix is None:
raise ValueError("at least one between style and prefix must be given")
if page_index_from < 0:
raise ValueError("page_index_from must be equal or greater then 0")
if page_index_to < page_index_from:
raise ValueError(
"page_index_to must be equal or greater then page_index_from"
)
if page_index_to >= len(self.pages):
raise ValueError("page_index_to exceeds number of pages")
if start is not None and start != 0 and start < 1:
raise ValueError("if given, start must be equal or greater than one")

self._set_page_label(page_index_from, page_index_to, style, prefix, start)

def _set_page_label(
self,
page_index_from: int,
page_index_to: int,
style: Optional[PageLabelStyle] = None,
prefix: Optional[str] = None,
start: Optional[int] = 0,
) -> None:
"""
Set a page label to a range of pages.
Page indexes must be given starting from 0.
Labels must have a style, a prefix or both.
If to a range is not assigned any page label a decimal label starting from 1 is applied.

Args:
page_index_from: page index of the beginning of the range starting from 0
page_index_to: page index of the beginning of the range starting from 0
style: The numbering style to be used for the numeric portion of each page label:
/D Decimal arabic numerals
/R Uppercase roman numerals
/r Lowercase roman numerals
/A Uppercase letters (A to Z for the first 26 pages, AA to ZZ for the next 26, and so on)
/a Lowercase letters (a to z for the first 26 pages, aa to zz for the next 26, and so on)
prefix: The label prefix for page labels in this range.
start: The value of the numeric portion for the first page label in the range.
Subsequent pages are numbered sequentially from this value, which must be greater than or equal to 1. Default value: 1.
"""
default_page_label = DictionaryObject()
default_page_label[NameObject("/S")] = NameObject("/D")

new_page_label = DictionaryObject()
if style is not None:
new_page_label[NameObject("/S")] = NameObject(style)
if prefix is not None:
new_page_label[NameObject("/P")] = TextStringObject(prefix)
if start != 0:
new_page_label[NameObject("/St")] = NumberObject(start)
MartinThoma marked this conversation as resolved.
Show resolved Hide resolved

if not NameObject(CatalogDictionary.PAGE_LABELS) in self._root_object:
nums = ArrayObject()
nums_insert(NumberObject(0), default_page_label, nums)
page_labels = TreeObject()
page_labels[NameObject("/Nums")] = nums
self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels

page_labels = cast(
TreeObject, self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)]
)
nums = cast(ArrayObject, page_labels[NameObject("/Nums")])

nums_insert(NumberObject(page_index_from), new_page_label, nums)
nums_clear_range(NumberObject(page_index_from), page_index_to, nums)
next_label_pos, *_ = nums_next(NumberObject(page_index_from), nums)
if next_label_pos != page_index_to + 1 and page_index_to + 1 < len(self.pages):
nums_insert(NumberObject(page_index_to + 1), default_page_label, nums)

page_labels[NameObject("/Nums")] = nums
self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels


def _pdf_objectify(obj: Union[Dict[str, Any], str, int, List[Any]]) -> PdfObject:
if isinstance(obj, PdfObject):
Expand Down
10 changes: 10 additions & 0 deletions pypdf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,16 @@ class OutlineFontFlag(IntFlag):
bold = 2


class PageLabelStyle:
"""Table 8.10 in the 1.7 reference."""

DECIMAL = "/D" # Decimal arabics
LOWERCASE_ROMAN = "/r" # Lowercase roman numbers
UPPERCASE_ROMAN = "/R" # Uppercase roman numbers
LOWERCASE_LETTER = "/a" # Lowercase letters
UPPERCASE_LETTER = "/A" # Uppercase letters


PDF_KEYS = (
AnnotationDictionaryAttributes,
CatalogAttributes,
Expand Down
Loading