Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: Add List of pages to PageRangeSpec #1456

Merged
merged 1 commit into from
Dec 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 14 additions & 6 deletions PyPDF2/_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def merge(
or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
Can also be a list of pages to merge.

:param bool import_outline: You may prevent the source document's
outline (collection of outline items, previously referred to as
Expand All @@ -174,6 +175,8 @@ def merge(
pages = (0, len(reader.pages))
elif isinstance(pages, PageRange):
pages = pages.indices(len(reader.pages))
elif isinstance(pages, list):
pass
elif not isinstance(pages, tuple):
raise TypeError('"pages" must be a tuple of (start, stop[, step])')

Expand Down Expand Up @@ -255,7 +258,9 @@ def append(
self,
fileobj: Union[StrByteType, PdfReader, Path],
outline_item: Optional[str] = None,
pages: Union[None, PageRange, Tuple[int, int], Tuple[int, int, int]] = None,
pages: Union[
None, PageRange, Tuple[int, int], Tuple[int, int, int], List[int]
] = None,
import_outline: bool = True,
) -> None:
"""
Expand All @@ -275,6 +280,7 @@ def append(
or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
Can also be a list of pages to append.

:param bool import_outline: You may prevent the source document's
outline (collection of outline items, previously referred to as
Expand Down Expand Up @@ -420,12 +426,13 @@ def _trim_dests(
self,
pdf: PdfReader,
dests: Dict[str, Dict[str, Any]],
pages: Union[Tuple[int, int], Tuple[int, int, int]],
pages: Union[Tuple[int, int], Tuple[int, int, int], List[int]],
) -> List[Dict[str, Any]]:
"""Remove named destinations that are not a part of the specified page set."""
new_dests = []
lst = pages if isinstance(pages, list) else list(range(*pages))
for key, obj in dests.items():
for j in range(*pages):
for j in lst:
if pdf.pages[j].get_object() == obj["/Page"].get_object():
obj[NameObject("/Page")] = obj["/Page"].get_object()
assert str_(key) == str_(obj["/Title"])
Expand All @@ -437,21 +444,22 @@ def _trim_outline(
self,
pdf: PdfReader,
outline: OutlineType,
pages: Union[Tuple[int, int], Tuple[int, int, int]],
pages: Union[Tuple[int, int], Tuple[int, int, int], List[int]],
) -> OutlineType:
"""Remove outline item entries that are not a part of the specified page set."""
new_outline = []
prev_header_added = True
lst = pages if isinstance(pages, list) else list(range(*pages))
for i, outline_item in enumerate(outline):
if isinstance(outline_item, list):
sub = self._trim_outline(pdf, outline_item, pages) # type: ignore
sub = self._trim_outline(pdf, outline_item, lst) # type: ignore
if sub:
if not prev_header_added:
new_outline.append(outline[i - 1])
new_outline.append(sub) # type: ignore
else:
prev_header_added = False
for j in range(*pages):
for j in lst:
if outline_item["/Page"] is None:
continue
if pdf.pages[j].get_object() == outline_item["/Page"].get_object():
Expand Down
2 changes: 1 addition & 1 deletion PyPDF2/pagerange.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,4 @@ def parse_filename_page_ranges(
return pairs


PageRangeSpec = Union[str, PageRange, Tuple[int, int], Tuple[int, int, int]]
PageRangeSpec = Union[str, PageRange, Tuple[int, int], Tuple[int, int, int], List[int]]
6 changes: 5 additions & 1 deletion tests/test_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ def merger_operate(merger):
merger.append(reader)

# PdfReader object:
merger.append(PyPDF2.PdfReader(pdf_path), outline_item="foo")
r = PyPDF2.PdfReader(pdf_path)
merger.append(r, outline_item="foo", pages=list(range(len(r.pages))))

# PdfReader object with List:
# merger.append(PyPDF2.PdfReader(pdf_path), outline_item="foo")

# File handle
with open(pdf_path, "rb") as fh:
Expand Down