Skip to content

Commit

Permalink
Merge remote-tracking branch 'py-pdf/main' into cloning
Browse files Browse the repository at this point in the history
  • Loading branch information
pubpub-zz committed Dec 2, 2022
2 parents e8b4929 + 3e250c5 commit 4ccfbff
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 7 deletions.
20 changes: 14 additions & 6 deletions PyPDF2/_merger.py
Expand Up @@ -155,6 +155,7 @@ def merge(
or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
Can also be a list of pages to merge.
:param bool import_outline: You may prevent the source document's
outline (collection of outline items, previously referred to as
Expand All @@ -174,6 +175,8 @@ def merge(
pages = (0, len(reader.pages))
elif isinstance(pages, PageRange):
pages = pages.indices(len(reader.pages))
elif isinstance(pages, list):
pass
elif not isinstance(pages, tuple):
raise TypeError('"pages" must be a tuple of (start, stop[, step])')

Expand Down Expand Up @@ -255,7 +258,9 @@ def append(
self,
fileobj: Union[StrByteType, PdfReader, Path],
outline_item: Optional[str] = None,
pages: Union[None, PageRange, Tuple[int, int], Tuple[int, int, int]] = None,
pages: Union[
None, PageRange, Tuple[int, int], Tuple[int, int, int], List[int]
] = None,
import_outline: bool = True,
) -> None:
"""
Expand All @@ -275,6 +280,7 @@ def append(
or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
Can also be a list of pages to append.
:param bool import_outline: You may prevent the source document's
outline (collection of outline items, previously referred to as
Expand Down Expand Up @@ -420,12 +426,13 @@ def _trim_dests(
self,
pdf: PdfReader,
dests: Dict[str, Dict[str, Any]],
pages: Union[Tuple[int, int], Tuple[int, int, int]],
pages: Union[Tuple[int, int], Tuple[int, int, int], List[int]],
) -> List[Dict[str, Any]]:
"""Remove named destinations that are not a part of the specified page set."""
new_dests = []
lst = pages if isinstance(pages, list) else list(range(*pages))
for key, obj in dests.items():
for j in range(*pages):
for j in lst:
if pdf.pages[j].get_object() == obj["/Page"].get_object():
obj[NameObject("/Page")] = obj["/Page"].get_object()
assert str_(key) == str_(obj["/Title"])
Expand All @@ -437,21 +444,22 @@ def _trim_outline(
self,
pdf: PdfReader,
outline: OutlineType,
pages: Union[Tuple[int, int], Tuple[int, int, int]],
pages: Union[Tuple[int, int], Tuple[int, int, int], List[int]],
) -> OutlineType:
"""Remove outline item entries that are not a part of the specified page set."""
new_outline = []
prev_header_added = True
lst = pages if isinstance(pages, list) else list(range(*pages))
for i, outline_item in enumerate(outline):
if isinstance(outline_item, list):
sub = self._trim_outline(pdf, outline_item, pages) # type: ignore
sub = self._trim_outline(pdf, outline_item, lst) # type: ignore
if sub:
if not prev_header_added:
new_outline.append(outline[i - 1])
new_outline.append(sub) # type: ignore
else:
prev_header_added = False
for j in range(*pages):
for j in lst:
if outline_item["/Page"] is None:
continue
if pdf.pages[j].get_object() == outline_item["/Page"].get_object():
Expand Down
6 changes: 5 additions & 1 deletion tests/test_merger.py
Expand Up @@ -45,7 +45,11 @@ def merger_operate(merger):
merger.append(reader)

# PdfReader object:
merger.append(PyPDF2.PdfReader(pdf_path), outline_item="foo")
r = PyPDF2.PdfReader(pdf_path)
merger.append(r, outline_item="foo", pages=list(range(len(r.pages))))

# PdfReader object with List:
# merger.append(PyPDF2.PdfReader(pdf_path), outline_item="foo")

# File handle
with open(pdf_path, "rb") as fh:
Expand Down
13 changes: 13 additions & 0 deletions tests/test_reader.py
Expand Up @@ -18,6 +18,7 @@
WrongPasswordError,
)
from PyPDF2.generic import (
ArrayObject,
Destination,
DictionaryObject,
NameObject,
Expand Down Expand Up @@ -1192,6 +1193,18 @@ def test_zeroing_xref():
len(reader.pages)


def test_thread():
url = "https://github.com/py-pdf/PyPDF2/files/9066120/UTA_OSHA_3115_Fall_Protection_Training_09162021_.pdf"
name = "UTA_OSHA.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
assert reader.threads is None
url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
name = "tika-924666.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
assert isinstance(reader.threads, ArrayObject)
assert len(reader.threads) >= 1


def test_build_outline_item(caplog):
url = "https://github.com/py-pdf/PyPDF2/files/9464742/shiv_resume.pdf"
name = "shiv_resume.pdf"
Expand Down

0 comments on commit 4ccfbff

Please sign in to comment.