Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add support for pathlib in merger #1190

Merged
merged 5 commits into from Jul 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 5 additions & 4 deletions PyPDF2/_merger.py
Expand Up @@ -26,6 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE.

from io import BytesIO, FileIO, IOBase
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast

from ._encryption import Encryption
Expand Down Expand Up @@ -99,7 +100,7 @@ def __init__(self, strict: bool = False) -> None:
def merge(
self,
position: int,
fileobj: Union[StrByteType, PdfReader],
fileobj: Union[Path, StrByteType, PdfReader],
outline_item: Optional[str] = None,
pages: Optional[PageRangeSpec] = None,
import_outline: bool = True,
Expand Down Expand Up @@ -184,7 +185,7 @@ def merge(
self.pages[position:position] = srcpages

def _create_stream(
self, fileobj: Union[StrByteType, PdfReader]
self, fileobj: Union[Path, StrByteType, PdfReader]
) -> Tuple[IOBase, bool, Optional[Encryption]]:
# This parameter is passed to self.inputs.append and means
# that the stream used was created in this method.
Expand All @@ -198,7 +199,7 @@ def _create_stream(
# If fileobj is none of the above types, it is not modified
encryption_obj = None
stream: IOBase
if isinstance(fileobj, str):
if isinstance(fileobj, (str, Path)):
stream = FileIO(fileobj, "rb")
my_file = True
elif isinstance(fileobj, PdfReader):
Expand All @@ -224,7 +225,7 @@ def _create_stream(
@deprecate_bookmark(bookmark="outline_item", import_bookmarks="import_outline")
def append(
self,
fileobj: Union[StrByteType, PdfReader],
fileobj: Union[StrByteType, PdfReader, Path],
outline_item: Optional[str] = None,
pages: Union[None, PageRange, Tuple[int, int], Tuple[int, int, int]] = None,
import_outline: bool = True,
Expand Down
21 changes: 11 additions & 10 deletions tests/bench.py
@@ -1,17 +1,18 @@
import os
from pathlib import Path

import PyPDF2
from PyPDF2 import PdfReader, Transformation
from PyPDF2.generic import Destination

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
SAMPLE_ROOT = os.path.join(PROJECT_ROOT, "sample-files")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"
SAMPLE_ROOT = PROJECT_ROOT / "sample-files"


def page_ops(pdf_path, password):
pdf_path = os.path.join(RESOURCE_ROOT, pdf_path)
pdf_path = RESOURCE_ROOT / pdf_path

reader = PdfReader(pdf_path)

Expand Down Expand Up @@ -50,10 +51,10 @@ def test_page_operations(benchmark):


def merge():
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")
pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf")
pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
outline = RESOURCE_ROOT / "pdflatex-outline.pdf"
pdf_forms = RESOURCE_ROOT / "pdflatex-forms.pdf"
pdf_pw = RESOURCE_ROOT / "libreoffice-writer-password.pdf"

file_merger = PyPDF2.PdfMerger()

Expand Down Expand Up @@ -126,5 +127,5 @@ def text_extraction(pdf_path):


def test_text_extraction(benchmark):
file_path = os.path.join(SAMPLE_ROOT, "009-pdflatex-geotopo/GeoTopo.pdf")
file_path = SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf"
benchmark(text_extraction, file_path)
9 changes: 5 additions & 4 deletions tests/test_basic_features.py
@@ -1,14 +1,15 @@
import os
from pathlib import Path

from PyPDF2 import PdfReader, PdfWriter

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"


def test_basic_features():
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
writer = PdfWriter()

Expand Down
20 changes: 10 additions & 10 deletions tests/test_encryption.py
@@ -1,4 +1,4 @@
import os
from pathlib import Path

import pytest

Expand All @@ -14,9 +14,9 @@
except ImportError:
HAS_PYCRYPTODOME = False

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"


@pytest.mark.parametrize(
Expand Down Expand Up @@ -51,7 +51,7 @@
],
)
def test_encryption(name, requres_pycryptodome):
inputfile = os.path.join(RESOURCE_ROOT, "encryption", name)
inputfile = RESOURCE_ROOT / "encryption" / name
if requres_pycryptodome and not HAS_PYCRYPTODOME:
with pytest.raises(DependencyError) as exc:
ipdf = PyPDF2.PdfReader(inputfile)
Expand All @@ -61,7 +61,7 @@ def test_encryption(name, requres_pycryptodome):
return
else:
ipdf = PyPDF2.PdfReader(inputfile)
if inputfile.endswith("unencrypted.pdf"):
if str(inputfile).endswith("unencrypted.pdf"):
assert not ipdf.is_encrypted
else:
assert ipdf.is_encrypted
Expand Down Expand Up @@ -91,7 +91,7 @@ def test_encryption(name, requres_pycryptodome):
def test_both_password(name, user_passwd, owner_passwd):
from PyPDF2 import PasswordType

inputfile = os.path.join(RESOURCE_ROOT, "encryption", name)
inputfile = RESOURCE_ROOT / "encryption" / name
ipdf = PyPDF2.PdfReader(inputfile)
assert ipdf.is_encrypted
assert ipdf.decrypt(user_passwd) == PasswordType.USER_PASSWORD
Expand All @@ -113,7 +113,7 @@ def test_get_page_of_encrypted_file_new_algorithm(pdffile, password):
This is a regression test for issue 327:
IndexError for get_page() of decrypted file
"""
path = os.path.join(RESOURCE_ROOT, pdffile)
path = RESOURCE_ROOT / pdffile
PyPDF2.PdfReader(path, password=password).pages[0]


Expand All @@ -133,7 +133,7 @@ def test_get_page_of_encrypted_file_new_algorithm(pdffile, password):
@pytest.mark.skipif(not HAS_PYCRYPTODOME, reason="No pycryptodome")
def test_encryption_merge(names):
pdf_merger = PyPDF2.PdfMerger()
files = [os.path.join(RESOURCE_ROOT, "encryption", x) for x in names]
files = [RESOURCE_ROOT / "encryption" / x for x in names]
pdfs = [PyPDF2.PdfReader(x) for x in files]
for pdf in pdfs:
if pdf.is_encrypted:
Expand All @@ -157,7 +157,7 @@ def test_encrypt_decrypt_class(cryptcls):


def test_decrypt_not_decrypted_pdf():
path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
path = RESOURCE_ROOT / "crazyones.pdf"
with pytest.raises(PdfReadError) as exc:
PdfReader(path, password="nonexistant")
assert exc.value.args[0] == "Not encrypted file"
15 changes: 8 additions & 7 deletions tests/test_generic.py
@@ -1,5 +1,6 @@
import os
from io import BytesIO
from pathlib import Path
from unittest.mock import patch

import pytest
Expand Down Expand Up @@ -33,9 +34,9 @@

from . import get_pdf_from_url

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"


def test_float_object_exception():
Expand Down Expand Up @@ -395,7 +396,7 @@ def test_remove_child_not_in_tree():


def test_remove_child_in_tree():
pdf = os.path.join(RESOURCE_ROOT, "form.pdf")
pdf = RESOURCE_ROOT / "form.pdf"

tree = TreeObject()
reader = PdfReader(pdf)
Expand Down Expand Up @@ -503,7 +504,7 @@ def test_issue_997(mock_logger_warning):

def test_annotation_builder_free_text():
# Arrange
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
page = reader.pages[0]
writer = PdfWriter()
Expand Down Expand Up @@ -533,7 +534,7 @@ def test_annotation_builder_free_text():

def test_annotation_builder_line():
# Arrange
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
page = reader.pages[0]
writer = PdfWriter()
Expand All @@ -558,7 +559,7 @@ def test_annotation_builder_line():

def test_annotation_builder_link():
# Arrange
pdf_path = os.path.join(RESOURCE_ROOT, "outline-without-title.pdf")
pdf_path = RESOURCE_ROOT / "outline-without-title.pdf"
reader = PdfReader(pdf_path)
page = reader.pages[0]
writer = PdfWriter()
Expand Down
10 changes: 5 additions & 5 deletions tests/test_javascript.py
@@ -1,18 +1,18 @@
import os
from pathlib import Path

import pytest

from PyPDF2 import PdfReader, PdfWriter

# Configure path environment
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"


@pytest.fixture()
def pdf_file_writer():
reader = PdfReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))
reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
writer = PdfWriter()
writer.append_pages_from_reader(reader)
return writer
Expand Down
31 changes: 14 additions & 17 deletions tests/test_merger.py
@@ -1,6 +1,7 @@
import os
import sys
from io import BytesIO
from pathlib import Path

import pytest

Expand All @@ -10,18 +11,18 @@

from . import get_pdf_from_url

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"

sys.path.append(PROJECT_ROOT)
sys.path.append(str(PROJECT_ROOT))


def test_merge():
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")
pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf")
pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
outline = RESOURCE_ROOT / "pdflatex-outline.pdf"
pdf_forms = RESOURCE_ROOT / "pdflatex-forms.pdf"
pdf_pw = RESOURCE_ROOT / "libreoffice-writer-password.pdf"

merger = PyPDF2.PdfMerger()

Expand Down Expand Up @@ -122,7 +123,7 @@ def test_merge():

def test_merge_page_exception():
merger = PyPDF2.PdfMerger()
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
with pytest.raises(TypeError) as exc:
merger.merge(0, pdf_path, pages="a:b")
assert exc.value.args[0] == '"pages" must be a tuple of (start, stop[, step])'
Expand All @@ -131,14 +132,14 @@ def test_merge_page_exception():

def test_merge_page_tuple():
merger = PyPDF2.PdfMerger()
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
merger.merge(0, pdf_path, pages=(0, 1))
merger.close()


def test_merge_write_closed_fh():
merger = PyPDF2.PdfMerger()
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
merger.append(pdf_path)

err_closed = "close() was called and thus the writer cannot be used anymore"
Expand Down Expand Up @@ -313,9 +314,7 @@ def test_iss1145():


def test_deprecate_bookmark_decorator_warning():
reader = PdfReader(
os.path.join(RESOURCE_ROOT, "outlines-with-invalid-destinations.pdf")
)
reader = PdfReader(RESOURCE_ROOT / "outlines-with-invalid-destinations.pdf")
merger = PdfMerger()
with pytest.warns(
UserWarning,
Expand All @@ -326,9 +325,7 @@ def test_deprecate_bookmark_decorator_warning():

@pytest.mark.filterwarnings("ignore::UserWarning")
def test_deprecate_bookmark_decorator_output():
reader = PdfReader(
os.path.join(RESOURCE_ROOT, "outlines-with-invalid-destinations.pdf")
)
reader = PdfReader(RESOURCE_ROOT / "outlines-with-invalid-destinations.pdf")
merger = PdfMerger()
merger.merge(0, reader, import_bookmarks=True)
first_oi_title = 'Valid Destination: Action /GoTo Named Destination "section.1"'
Expand Down