TST: Add test for Tree and _security (#945)

py-pdf · Jun 4, 2022 · 34919f9 · 34919f9
1 parent 76e38ac
commit 34919f9
Show file tree

Hide file tree

Showing 5 changed files with 90 additions and 13 deletions.
diff --git a/PyPDF2/_security.py b/PyPDF2/_security.py
@@ -44,8 +44,6 @@
 )
 
 
-# Implementation of algorithm 3.2 of the PDF standard security handler,
-# section 3.5.2 of the PDF 1.6 reference.
 def _alg32(
     password: Union[str, bytes],
     rev: Any,
@@ -55,6 +53,10 @@ def _alg32(
     id1_entry: ByteStringObject,
     metadata_encrypt: bool = True,
 ) -> bytes:
+    """
+    Implementation of algorithm 3.2 of the PDF standard security handler,
+    section 3.5.2 of the PDF 1.6 reference.
+    """
     # 1. Pad or truncate the password string to exactly 32 bytes.  If the
     # password string is more than 32 bytes long, use only its first 32 bytes;
     # if it is less than 32 bytes long, pad it by appending the required number
@@ -95,9 +97,11 @@ def _alg32(
     return md5_hash[:keylen]
 
 
-# Implementation of algorithm 3.3 of the PDF standard security handler,
-# section 3.5.2 of the PDF 1.6 reference.
 def _alg33(owner_pwd: str, user_pwd: str, rev: int, keylen: int) -> bytes:
+    """
+    Implementation of algorithm 3.3 of the PDF standard security handler,
+    section 3.5.2 of the PDF 1.6 reference.
+    """
     # steps 1 - 4
     key = _alg33_1(owner_pwd, rev, keylen)
     # 5. Pad or truncate the user password string as described in step 1 of
@@ -123,8 +127,8 @@ def _alg33(owner_pwd: str, user_pwd: str, rev: int, keylen: int) -> bytes:
     return val
 
 
-# Steps 1-4 of algorithm 3.3
 def _alg33_1(password: Union[bytes, str], rev: int, keylen: int) -> bytes:
+    """Steps 1-4 of algorithm 3.3"""
     # 1. Pad or truncate the owner password string as described in step 1 of
     # algorithm 3.2.  If there is no owner password, use the user password
     # instead.
@@ -148,17 +152,21 @@ def _alg33_1(password: Union[bytes, str], rev: int, keylen: int) -> bytes:
     return key
 
 
-# Implementation of algorithm 3.4 of the PDF standard security handler,
-# section 3.5.2 of the PDF 1.6 reference.
 def _alg34(
     password: Union[str, bytes],
     owner_entry: ByteStringObject,
     p_entry: int,
     id1_entry: ByteStringObject,
 ) -> Tuple[bytes, bytes]:
+    """
+    Implementation of algorithm 3.4 of the PDF standard security handler,
+    section 3.5.2 of the PDF 1.6 reference.
+    """
     # 1. Create an encryption key based on the user password string, as
     # described in algorithm 3.2.
-    key = _alg32(password, 2, 5, owner_entry, p_entry, id1_entry)
+    rev = 2
+    keylen = 5
+    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
     # 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2,
     # using an RC4 encryption function with the encryption key from the
     # preceding step.
@@ -168,8 +176,6 @@ def _alg34(
     return U, key
 
 
-# Implementation of algorithm 3.4 of the PDF standard security handler,
-# section 3.5.2 of the PDF 1.6 reference.
 def _alg35(
     password: Union[str, bytes],
     rev: int,
@@ -179,6 +185,10 @@ def _alg35(
     id1_entry: ByteStringObject,
     metadata_encrypt: bool,
 ) -> Tuple[bytes, bytes]:
+    """
+    Implementation of algorithm 3.4 of the PDF standard security handler,
+    section 3.5.2 of the PDF 1.6 reference.
+    """
     # 1. Create an encryption key based on the user password string, as
     # described in Algorithm 3.2.
     key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)

diff --git a/resources/crazyones-encrypted-256.pdf b/resources/crazyones-encrypted-256.pdf
diff --git a/tests/test_generic.py b/tests/test_generic.py
@@ -1,7 +1,9 @@
+import os
 from io import BytesIO
 
 import pytest
 
+from PyPDF2 import PdfReader, PdfWriter
 from PyPDF2.constants import TypFitArguments as TF
 from PyPDF2.errors import PdfReadError, PdfStreamError
 from PyPDF2.generic import (
@@ -18,13 +20,18 @@
     NumberObject,
     RectangleObject,
     TextStringObject,
+    TreeObject,
     createStringObject,
     encode_pdfdocencoding,
     read_object,
     readHexStringFromStream,
     readStringFromStream,
 )
 
+TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
+PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
+RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
+
 
 def test_float_object_exception():
     assert FloatObject("abc") == 0
@@ -376,3 +383,30 @@ def test_TextStringObject_autodetect_utf16():
     tso = TextStringObject("foo")
     tso.autodetect_utf16 = True
     assert tso.get_original_bytes() == b"\xfe\xff\x00f\x00o\x00o"
+
+
+def test_remove_child_not_in_tree():
+    tree = TreeObject()
+    with pytest.raises(ValueError) as exc:
+        tree.remove_child(NameObject("foo"))
+    assert exc.value.args[0] == "Removed child does not appear to be a tree item"
+
+
+def test_remove_child_in_tree():
+    pdf = os.path.join(RESOURCE_ROOT, "form.pdf")
+
+    tree = TreeObject()
+    reader = PdfReader(pdf)
+    writer = PdfWriter()
+    writer.add_page(reader.pages[0])
+    writer.add_bookmark("foo", 0)
+    obj = writer._objects[-1]
+    # print(dict)
+    # print(type(dict))
+    # for obj in writer._objects:
+    #     print(obj)
+    #     print(type(obj))
+    tree.add_child(obj, writer)
+    tree.remove_child(obj)
+    tree.add_child(obj, writer)
+    tree.emptyTree()
diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -255,23 +255,50 @@ def test_issue297():
 
 
 @pytest.mark.parametrize(
-    ("password", "should_fail"), [("test", False), ("qwerty", True)]
+    ("pdffile", "password", "should_fail"),
+    [
+        ("encrypted-file.pdf", "test", False),
+        ("encrypted-file.pdf", "qwerty", True),
+        ("encrypted-file.pdf", b"qwerty", True),
+    ],
 )
-def test_get_page_of_encrypted_file(password, should_fail):
+def test_get_page_of_encrypted_file(pdffile, password, should_fail):
     """
     Check if we can read a page of an encrypted file.
 
     This is a regression test for issue 327:
     IndexError for get_page() of decrypted file
     """
-    path = os.path.join(RESOURCE_ROOT, "encrypted-file.pdf")
+    path = os.path.join(RESOURCE_ROOT, pdffile)
     if should_fail:
         with pytest.raises(PdfReadError):
             PdfReader(path, password=password)
     else:
         PdfReader(path, password=password).pages[0]
 
 
+@pytest.mark.parametrize(
+    ("pdffile", "password"),
+    [
+        ("crazyones-encrypted-256.pdf", "password"),
+    ],
+)
+def test_get_page_of_encrypted_file_new_algorithm(pdffile, password):
+    """
+    Check if we can read a page of an encrypted file.
+
+    This is a regression test for issue 327:
+    IndexError for get_page() of decrypted file
+    """
+    path = os.path.join(RESOURCE_ROOT, pdffile)
+    with pytest.raises(NotImplementedError) as exc:
+        PdfReader(path, password=password).pages[0]
+    assert (
+        exc.value.args[0]
+        == "only algorithm code 1 and 2 are supported. This PDF uses code 5"
+    )
+
+
 @pytest.mark.parametrize(
     ("src", "expected", "expected_get_fields"),
     [

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -89,3 +89,9 @@ def test_b():
     assert PyPDF2._utils.b_("😀") == "😀".encode()
     assert PyPDF2._utils.b_("‰") == "‰".encode()
     assert PyPDF2._utils.b_("▷") == "▷".encode()
+
+
+def test_deprecate_no_replacement():
+    with pytest.raises(PendingDeprecationWarning) as exc:
+        PyPDF2._utils.deprecate_no_replacement("foo")
+    assert exc.value.args[0] == "foo is deprecated and will be removed in PyPDF2 3.0.0."