Skip to content

Commit

Permalink
TST: Add test for Tree and _security (#945)
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma committed Jun 4, 2022
1 parent 76e38ac commit 34919f9
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 13 deletions.
30 changes: 20 additions & 10 deletions PyPDF2/_security.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@
)


# Implementation of algorithm 3.2 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg32(
password: Union[str, bytes],
rev: Any,
Expand All @@ -55,6 +53,10 @@ def _alg32(
id1_entry: ByteStringObject,
metadata_encrypt: bool = True,
) -> bytes:
"""
Implementation of algorithm 3.2 of the PDF standard security handler,
section 3.5.2 of the PDF 1.6 reference.
"""
# 1. Pad or truncate the password string to exactly 32 bytes. If the
# password string is more than 32 bytes long, use only its first 32 bytes;
# if it is less than 32 bytes long, pad it by appending the required number
Expand Down Expand Up @@ -95,9 +97,11 @@ def _alg32(
return md5_hash[:keylen]


# Implementation of algorithm 3.3 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg33(owner_pwd: str, user_pwd: str, rev: int, keylen: int) -> bytes:
"""
Implementation of algorithm 3.3 of the PDF standard security handler,
section 3.5.2 of the PDF 1.6 reference.
"""
# steps 1 - 4
key = _alg33_1(owner_pwd, rev, keylen)
# 5. Pad or truncate the user password string as described in step 1 of
Expand All @@ -123,8 +127,8 @@ def _alg33(owner_pwd: str, user_pwd: str, rev: int, keylen: int) -> bytes:
return val


# Steps 1-4 of algorithm 3.3
def _alg33_1(password: Union[bytes, str], rev: int, keylen: int) -> bytes:
"""Steps 1-4 of algorithm 3.3"""
# 1. Pad or truncate the owner password string as described in step 1 of
# algorithm 3.2. If there is no owner password, use the user password
# instead.
Expand All @@ -148,17 +152,21 @@ def _alg33_1(password: Union[bytes, str], rev: int, keylen: int) -> bytes:
return key


# Implementation of algorithm 3.4 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg34(
password: Union[str, bytes],
owner_entry: ByteStringObject,
p_entry: int,
id1_entry: ByteStringObject,
) -> Tuple[bytes, bytes]:
"""
Implementation of algorithm 3.4 of the PDF standard security handler,
section 3.5.2 of the PDF 1.6 reference.
"""
# 1. Create an encryption key based on the user password string, as
# described in algorithm 3.2.
key = _alg32(password, 2, 5, owner_entry, p_entry, id1_entry)
rev = 2
keylen = 5
key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
# 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2,
# using an RC4 encryption function with the encryption key from the
# preceding step.
Expand All @@ -168,8 +176,6 @@ def _alg34(
return U, key


# Implementation of algorithm 3.4 of the PDF standard security handler,
# section 3.5.2 of the PDF 1.6 reference.
def _alg35(
password: Union[str, bytes],
rev: int,
Expand All @@ -179,6 +185,10 @@ def _alg35(
id1_entry: ByteStringObject,
metadata_encrypt: bool,
) -> Tuple[bytes, bytes]:
"""
Implementation of algorithm 3.4 of the PDF standard security handler,
section 3.5.2 of the PDF 1.6 reference.
"""
# 1. Create an encryption key based on the user password string, as
# described in Algorithm 3.2.
key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
Expand Down
Binary file added resources/crazyones-encrypted-256.pdf
Binary file not shown.
34 changes: 34 additions & 0 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os
from io import BytesIO

import pytest

from PyPDF2 import PdfReader, PdfWriter
from PyPDF2.constants import TypFitArguments as TF
from PyPDF2.errors import PdfReadError, PdfStreamError
from PyPDF2.generic import (
Expand All @@ -18,13 +20,18 @@
NumberObject,
RectangleObject,
TextStringObject,
TreeObject,
createStringObject,
encode_pdfdocencoding,
read_object,
readHexStringFromStream,
readStringFromStream,
)

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")


def test_float_object_exception():
assert FloatObject("abc") == 0
Expand Down Expand Up @@ -376,3 +383,30 @@ def test_TextStringObject_autodetect_utf16():
tso = TextStringObject("foo")
tso.autodetect_utf16 = True
assert tso.get_original_bytes() == b"\xfe\xff\x00f\x00o\x00o"


def test_remove_child_not_in_tree():
tree = TreeObject()
with pytest.raises(ValueError) as exc:
tree.remove_child(NameObject("foo"))
assert exc.value.args[0] == "Removed child does not appear to be a tree item"


def test_remove_child_in_tree():
pdf = os.path.join(RESOURCE_ROOT, "form.pdf")

tree = TreeObject()
reader = PdfReader(pdf)
writer = PdfWriter()
writer.add_page(reader.pages[0])
writer.add_bookmark("foo", 0)
obj = writer._objects[-1]
# print(dict)
# print(type(dict))
# for obj in writer._objects:
# print(obj)
# print(type(obj))
tree.add_child(obj, writer)
tree.remove_child(obj)
tree.add_child(obj, writer)
tree.emptyTree()
33 changes: 30 additions & 3 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,23 +255,50 @@ def test_issue297():


@pytest.mark.parametrize(
("password", "should_fail"), [("test", False), ("qwerty", True)]
("pdffile", "password", "should_fail"),
[
("encrypted-file.pdf", "test", False),
("encrypted-file.pdf", "qwerty", True),
("encrypted-file.pdf", b"qwerty", True),
],
)
def test_get_page_of_encrypted_file(password, should_fail):
def test_get_page_of_encrypted_file(pdffile, password, should_fail):
"""
Check if we can read a page of an encrypted file.
This is a regression test for issue 327:
IndexError for get_page() of decrypted file
"""
path = os.path.join(RESOURCE_ROOT, "encrypted-file.pdf")
path = os.path.join(RESOURCE_ROOT, pdffile)
if should_fail:
with pytest.raises(PdfReadError):
PdfReader(path, password=password)
else:
PdfReader(path, password=password).pages[0]


@pytest.mark.parametrize(
("pdffile", "password"),
[
("crazyones-encrypted-256.pdf", "password"),
],
)
def test_get_page_of_encrypted_file_new_algorithm(pdffile, password):
"""
Check if we can read a page of an encrypted file.
This is a regression test for issue 327:
IndexError for get_page() of decrypted file
"""
path = os.path.join(RESOURCE_ROOT, pdffile)
with pytest.raises(NotImplementedError) as exc:
PdfReader(path, password=password).pages[0]
assert (
exc.value.args[0]
== "only algorithm code 1 and 2 are supported. This PDF uses code 5"
)


@pytest.mark.parametrize(
("src", "expected", "expected_get_fields"),
[
Expand Down
6 changes: 6 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,9 @@ def test_b():
assert PyPDF2._utils.b_("😀") == "😀".encode()
assert PyPDF2._utils.b_("‰") == "‰".encode()
assert PyPDF2._utils.b_("▷") == "▷".encode()


def test_deprecate_no_replacement():
with pytest.raises(PendingDeprecationWarning) as exc:
PyPDF2._utils.deprecate_no_replacement("foo")
assert exc.value.args[0] == "foo is deprecated and will be removed in PyPDF2 3.0.0."

0 comments on commit 34919f9

Please sign in to comment.