Skip to content

Commit

Permalink
ENH: Add page label support to PdfWriter (#1558)
Browse files Browse the repository at this point in the history
Add PageLabelStyle constants
  • Loading branch information
lorenzomanini committed Jan 19, 2023
1 parent e6500f8 commit e711846
Show file tree
Hide file tree
Showing 5 changed files with 346 additions and 1 deletion.
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ history and [GitHubs 'Contributors' feature](https://github.com/py-pdf/pypdf/gra
* [KourFrost](https://github.com/KourFrost)
* [Lightup1](https://github.com/Lightup1)
* [Majumder, Jonah](https://github.com/jonahmajumder)
* [Manini, Lorenzo](https://github.com/lorenzomanini)
* [maxbeer99](https://github.com/maxbeer99)
* [Mérino, Antoine](https://github.com/Merinorus)
* [Perrensen, Olsen](https://github.com/olsonperrensen)
Expand Down
90 changes: 89 additions & 1 deletion pypdf/_page_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,21 @@
aa to zz for the next 26, and so on)
"""

from typing import Iterator
from typing import (
Iterator,
Optional,
Tuple,
)

from ._protocols import PdfReaderProtocol
from ._utils import logger_warning

from .generic import (
ArrayObject,
DictionaryObject,
NumberObject,
)


def number2uppercase_roman_numeral(num: int) -> str:
roman = [
Expand Down Expand Up @@ -176,3 +186,81 @@ def index2label(reader: PdfReaderProtocol, index: int) -> str:
)
# TODO: Implement /Kids and /Limits for number tree
return str(index + 1) # Fallback


def nums_insert(
key: NumberObject,
value: DictionaryObject,
nums: ArrayObject,
) -> None:
"""
Insert a key, value pair in a Nums array.
See 7.9.7 "Number Trees".
Args:
key: number key of the entry
value: value of the entry
nums: Nums array to modify
"""
if len(nums) % 2 != 0:
raise ValueError("a nums like array must have an even number of elements")

i = len(nums)
while i != 0 and key <= nums[i - 2]:
i = i - 2

if i < len(nums) and key == nums[i]:
nums[i + 1] = value
else:
nums.insert(i, key)
nums.insert(i + 1, value)


def nums_clear_range(
key: NumberObject,
page_index_to: int,
nums: ArrayObject,
) -> None:
"""
Remove all entries in a number tree in a range after an entry.
See 7.9.7 "Number Trees".
Args:
key: number key of the entry before the range
page_index_to: The page index of the upper limit of the range
nums: Nums array to modify
"""
if len(nums) % 2 != 0:
raise ValueError("a nums like array must have an even number of elements")
if page_index_to < key:
raise ValueError("page_index_to must be greater or equal than key")

i = nums.index(key) + 2
while i < len(nums) and nums[i] <= page_index_to:
nums.pop(i)
nums.pop(i)


def nums_next(
key: NumberObject,
nums: ArrayObject,
) -> Tuple[Optional[NumberObject], Optional[DictionaryObject]]:
"""
Return the (key, value) pair of the entry after the given one.
See 7.9.7 "Number Trees".
Args:
key: number key of the entry
nums: Nums array
"""
if len(nums) % 2 != 0:
raise ValueError("a nums like array must have an even number of elements")

i = nums.index(key) + 2
if i < len(nums):
return (nums[i], nums[i + 1])
else:
return (None, None)
105 changes: 105 additions & 0 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@

from ._encryption import Encryption
from ._page import PageObject, _VirtualList
from ._page_labels import nums_insert, nums_clear_range, nums_next
from ._reader import PdfReader
from ._security import _alg33, _alg34, _alg35
from ._utils import (
Expand Down Expand Up @@ -88,6 +89,7 @@
from .constants import StreamAttributes as SA
from .constants import TrailerKeys as TK
from .constants import TypFitArguments, UserAccessPermissions
from .constants import PageLabelStyle
from .generic import (
PAGE_FIT,
AnnotationBuilder,
Expand Down Expand Up @@ -123,6 +125,7 @@
ZoomArgType,
)


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -2874,6 +2877,108 @@ def reset_translation(
else:
raise Exception("invalid parameter {reader}")

def set_page_label(
self,
page_index_from: int,
page_index_to: int,
style: Optional[PageLabelStyle] = None,
prefix: Optional[str] = None,
start: Optional[int] = 0,
) -> None:
"""
Set a page label to a range of pages.
Page indexes must be given starting from 0.
Labels must have a style, a prefix or both.
If to a range is not assigned any page label a decimal label starting from 1 is applied.
Args:
page_index_from: page index of the beginning of the range starting from 0
page_index_to: page index of the beginning of the range starting from 0
style: The numbering style to be used for the numeric portion of each page label:
'/D' Decimal arabic numerals
'/R' Uppercase roman numerals
'/r' Lowercase roman numerals
'/A' Uppercase letters (A to Z for the first 26 pages, AA to ZZ for the next 26, and so on)
'/a' Lowercase letters (a to z for the first 26 pages, aa to zz for the next 26, and so on)
prefix: The label prefix for page labels in this range.
start: The value of the numeric portion for the first page label in the range.
Subsequent pages are numbered sequentially from this value, which must be greater than or equal to 1. Default value: 1.
"""
if style is None and prefix is None:
raise ValueError("at least one between style and prefix must be given")
if page_index_from < 0:
raise ValueError("page_index_from must be equal or greater then 0")
if page_index_to < page_index_from:
raise ValueError(
"page_index_to must be equal or greater then page_index_from"
)
if page_index_to >= len(self.pages):
raise ValueError("page_index_to exceeds number of pages")
if start is not None and start != 0 and start < 1:
raise ValueError("if given, start must be equal or greater than one")

self._set_page_label(page_index_from, page_index_to, style, prefix, start)

def _set_page_label(
self,
page_index_from: int,
page_index_to: int,
style: Optional[PageLabelStyle] = None,
prefix: Optional[str] = None,
start: Optional[int] = 0,
) -> None:
"""
Set a page label to a range of pages.
Page indexes must be given starting from 0.
Labels must have a style, a prefix or both.
If to a range is not assigned any page label a decimal label starting from 1 is applied.
Args:
page_index_from: page index of the beginning of the range starting from 0
page_index_to: page index of the beginning of the range starting from 0
style: The numbering style to be used for the numeric portion of each page label:
/D Decimal arabic numerals
/R Uppercase roman numerals
/r Lowercase roman numerals
/A Uppercase letters (A to Z for the first 26 pages, AA to ZZ for the next 26, and so on)
/a Lowercase letters (a to z for the first 26 pages, aa to zz for the next 26, and so on)
prefix: The label prefix for page labels in this range.
start: The value of the numeric portion for the first page label in the range.
Subsequent pages are numbered sequentially from this value, which must be greater than or equal to 1. Default value: 1.
"""
default_page_label = DictionaryObject()
default_page_label[NameObject("/S")] = NameObject("/D")

new_page_label = DictionaryObject()
if style is not None:
new_page_label[NameObject("/S")] = NameObject(style)
if prefix is not None:
new_page_label[NameObject("/P")] = TextStringObject(prefix)
if start != 0:
new_page_label[NameObject("/St")] = NumberObject(start)

if not NameObject(CatalogDictionary.PAGE_LABELS) in self._root_object:
nums = ArrayObject()
nums_insert(NumberObject(0), default_page_label, nums)
page_labels = TreeObject()
page_labels[NameObject("/Nums")] = nums
self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels

page_labels = cast(
TreeObject, self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)]
)
nums = cast(ArrayObject, page_labels[NameObject("/Nums")])

nums_insert(NumberObject(page_index_from), new_page_label, nums)
nums_clear_range(NumberObject(page_index_from), page_index_to, nums)
next_label_pos, *_ = nums_next(NumberObject(page_index_from), nums)
if next_label_pos != page_index_to + 1 and page_index_to + 1 < len(self.pages):
nums_insert(NumberObject(page_index_to + 1), default_page_label, nums)

page_labels[NameObject("/Nums")] = nums
self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels


def _pdf_objectify(obj: Union[Dict[str, Any], str, int, List[Any]]) -> PdfObject:
if isinstance(obj, PdfObject):
Expand Down
10 changes: 10 additions & 0 deletions pypdf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,16 @@ class OutlineFontFlag(IntFlag):
bold = 2


class PageLabelStyle:
"""Table 8.10 in the 1.7 reference."""

DECIMAL = "/D" # Decimal arabics
LOWERCASE_ROMAN = "/r" # Lowercase roman numbers
UPPERCASE_ROMAN = "/R" # Uppercase roman numbers
LOWERCASE_LETTER = "/a" # Lowercase letters
UPPERCASE_LETTER = "/A" # Uppercase letters


PDF_KEYS = (
AnnotationDictionaryAttributes,
CatalogAttributes,
Expand Down
Loading

0 comments on commit e711846

Please sign in to comment.