diff --git a/PyPDF2/__init__.py b/PyPDF2/__init__.py index f7fcd92b9..08b03d5ec 100644 --- a/PyPDF2/__init__.py +++ b/PyPDF2/__init__.py @@ -7,6 +7,7 @@ You can read the full docs at https://pypdf2.readthedocs.io/. """ +from ._encryption import PasswordType from ._merger import PdfFileMerger, PdfMerger from ._page import PageObject, Transformation from ._reader import DocumentInformation, PdfFileReader, PdfReader @@ -29,4 +30,5 @@ "PdfWriter", "Transformation", "PageObject", + "PasswordType", ] diff --git a/PyPDF2/_encryption.py b/PyPDF2/_encryption.py index bc6870170..e343556c8 100644 --- a/PyPDF2/_encryption.py +++ b/PyPDF2/_encryption.py @@ -25,10 +25,11 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. +from enum import IntEnum import hashlib import random import struct -from typing import Dict, Optional, Tuple, Union, cast +from typing import Optional, Tuple, Union, cast from PyPDF2.errors import DependencyError from PyPDF2.generic import ( @@ -226,16 +227,7 @@ def _padding(data: bytes) -> bytes: return (data + _PADDING)[:32] -def _bytes(value: Union[bytes, str]) -> bytes: - if isinstance(value, bytes): - return value - try: - return value.encode("latin-1") - except Exception: # noqa - return value.encode("utf-8") - - -class AlgR4: +class AlgV4: @staticmethod def compute_key( password: bytes, @@ -397,10 +389,10 @@ def verify_user_password( encryption dictionary’s U (user password) value (Security handlers of revision 3 or greater)") shall be used to decrypt the document. """ - key = AlgR4.compute_key( + key = AlgV4.compute_key( user_pwd, rev, key_size, o_entry, P, id1_entry, metadata_encrypted ) - u_value = AlgR4.compute_U_value(key, rev, id1_entry) + u_value = AlgV4.compute_U_value(key, rev, id1_entry) if rev >= 3: u_value = u_value[:16] u_entry = u_entry[:16] @@ -434,7 +426,7 @@ def verify_owner_password( c) The result of step (b) purports to be the user password. Authenticate this user password using "Algorithm 6: Authenticating the user password". If it is correct, the password supplied is the correct owner password. """ - rc4_key = AlgR4.compute_O_value_key(owner_pwd, rev, key_size) + rc4_key = AlgV4.compute_O_value_key(owner_pwd, rev, key_size) if rev <= 2: u_pwd = RC4_decrypt(rc4_key, o_entry) @@ -443,15 +435,15 @@ def verify_owner_password( for i in range(19, -1, -1): key = bytes(bytearray(x ^ i for x in rc4_key)) u_pwd = RC4_decrypt(key, u_pwd) - return AlgR4.verify_user_password( + return AlgV4.verify_user_password( u_pwd, rev, key_size, o_entry, u_entry, P, id1_entry, metadata_encrypted ) -class AlgR5: +class AlgV5: @staticmethod def verify_owner_password( - password: bytes, o_value: bytes, oe_value: bytes, u_value: bytes + R: int, password: bytes, o_value: bytes, oe_value: bytes, u_value: bytes ) -> bytes: """ Algorithm 3.2a Computing an encryption key @@ -483,23 +475,44 @@ def verify_owner_password( should match the value in the P key. """ password = password[:127] - if hashlib.sha256(password + o_value[32:40] + u_value).digest() != o_value[:32]: + if AlgV5.calculate_hash(R, password, o_value[32:40], u_value) != o_value[:32]: return b"" iv = bytes(0 for _ in range(16)) - tmp_key = hashlib.sha256(password + o_value[40:] + u_value).digest() + tmp_key = AlgV5.calculate_hash(R, password, o_value[40:], u_value) key = AES_CBC_decrypt(tmp_key, iv, oe_value) return key @staticmethod - def verify_user_password(password: bytes, u_value: bytes, ue_value: bytes) -> bytes: + def verify_user_password(R: int, password: bytes, u_value: bytes, ue_value: bytes) -> bytes: """see :func:`verify_owner_password`""" password = password[:127] - if hashlib.sha256(password + u_value[32:40]).digest() != u_value[:32]: + if AlgV5.calculate_hash(R, password, u_value[32:40], b"") != u_value[:32]: return b"" iv = bytes(0 for _ in range(16)) - tmp_key = hashlib.sha256(password + u_value[40:]).digest() + tmp_key = AlgV5.calculate_hash(R, password, u_value[40:], b"") return AES_CBC_decrypt(tmp_key, iv, ue_value) + @staticmethod + def calculate_hash(R: int, password: bytes, salt: bytes, udata: bytes) -> bytes: + # from https://github.com/qpdf/qpdf/blob/main/libqpdf/QPDF_encryption.cc + K = hashlib.sha256(password + salt + udata).digest() + if R < 6: + return K + count = 0 + while True: + count += 1 + K1 = password + K + udata + E = AES_CBC_encrypt(K[:16], K[16:32], K1 * 64) + hash_fn = ( + hashlib.sha256, + hashlib.sha384, + hashlib.sha512, + )[sum(E[:16]) % 3] + K = hash_fn(E).digest() + if count >= 64 and E[-1] <= count - 32: + break + return K[:32] + @staticmethod def verify_perms( key: bytes, perms: bytes, p: int, metadata_encrypted: bool @@ -514,9 +527,9 @@ def verify_perms( def generate_values( user_pwd: bytes, owner_pwd: bytes, key: bytes, p: int, metadata_encrypted: bool ) -> dict: - u_value, ue_value = AlgR5.compute_U_value(user_pwd, key) - o_value, oe_value = AlgR5.compute_O_value(owner_pwd, key, u_value) - perms = AlgR5.compute_Perms_value(key, p, metadata_encrypted) + u_value, ue_value = AlgV5.compute_U_value(user_pwd, key) + o_value, oe_value = AlgV5.compute_O_value(owner_pwd, key, u_value) + perms = AlgV5.compute_Perms_value(key, p, metadata_encrypted) return { "/U": u_value, "/UE": ue_value, @@ -599,10 +612,17 @@ def compute_Perms_value(key: bytes, p: int, metadata_encrypted: bool) -> bytes: return perms +class PasswordType(IntEnum): + NOT_DECRYPTED = 0 + USER_PASSWORD = 1 + OWNER_PASSWORD = 2 + + class Encryption: def __init__( self, algV: int, + algR: int, entry: DictionaryObject, first_id_entry: bytes, StmF: str, @@ -611,6 +631,7 @@ def __init__( ) -> None: # See TABLE 3.18 Entries common to all encryption dictionaries self.algV = algV + self.algR = algR self.entry = entry self.key_size = entry.get("/Length", 40) self.id1_entry = first_id_entry @@ -618,10 +639,13 @@ def __init__( self.StrF = StrF self.EFF = EFF + # 1 => owner password + # 2 => user password + self._password_type = PasswordType.NOT_DECRYPTED self._key: Optional[bytes] = None - # keep key - self._user_keys: Dict = {} - self._owner_keys: Dict = {} + + def is_decrypted(self) -> bool: + return self._password_type != PasswordType.NOT_DECRYPTED def decrypt_object(self, obj: PdfObject, idnum: int, generation: int) -> PdfObject: """ @@ -666,7 +690,7 @@ def decrypt_object(self, obj: PdfObject, idnum: int, generation: int) -> PdfObje key_hash.update(b"sAlT") aes128_key = key_hash.digest()[: min(n + 5, 16)] - # for V=5 use AES-256 + # for AES-256 aes256_key = key stmCrypt = self._get_crypt(self.StmF, rc4_key, aes128_key, aes256_key) @@ -689,36 +713,22 @@ def _get_crypt( else: return CryptRC4(rc4_key) - def verify(self, user_pwd: Union[bytes, str], owner_pwd: Union[bytes, str]) -> int: - up_bytes = _bytes(user_pwd) - op_bytes = _bytes(owner_pwd) - - key = self._user_keys.get(up_bytes) - if key: - self._key = key - return 1 - - key = self._owner_keys.get(op_bytes) - if key: - self._key = key - return 2 - - rc = 0 - if self.algV <= 4: - key, rc = self.verify_r4(up_bytes, op_bytes) + def verify(self, password: Union[bytes, str]) -> PasswordType: + if isinstance(password, str): + try: + pwd = password.encode("latin-1") + except Exception: # noqa + pwd = password.encode("utf-8") else: - key, rc = self.verify_r5(up_bytes, op_bytes) + pwd = password - if rc == 1: - self._key = key - self._user_keys[up_bytes] = key - elif rc == 2: + key, rc = self.verify_v4(pwd) if self.algV <= 4 else self.verify_v5(pwd) + if rc != PasswordType.NOT_DECRYPTED: + self._password_type = rc self._key = key - self._owner_keys[op_bytes] = key - return rc - def verify_r4(self, user_pwd: bytes, owner_pwd: bytes) -> Tuple[bytes, int]: + def verify_v4(self, password: bytes) -> Tuple[bytes, PasswordType]: R = cast(int, self.entry["/R"]) P = cast(int, self.entry["/P"]) P = (P + 0x100000000) % 0x100000000 # maybe < 0 @@ -726,8 +736,9 @@ def verify_r4(self, user_pwd: bytes, owner_pwd: bytes) -> Tuple[bytes, int]: o_entry = cast(ByteStringObject, self.entry["/O"].get_object()).original_bytes u_entry = cast(ByteStringObject, self.entry["/U"].get_object()).original_bytes - key = AlgR4.verify_user_password( - user_pwd, + # verify owner password first + key = AlgV4.verify_owner_password( + password, R, self.key_size, o_entry, @@ -737,9 +748,9 @@ def verify_r4(self, user_pwd: bytes, owner_pwd: bytes) -> Tuple[bytes, int]: metadata_encrypted, ) if key: - return key, 1 - key = AlgR4.verify_owner_password( - owner_pwd, + return key, PasswordType.OWNER_PASSWORD + key = AlgV4.verify_user_password( + password, R, self.key_size, o_entry, @@ -749,33 +760,32 @@ def verify_r4(self, user_pwd: bytes, owner_pwd: bytes) -> Tuple[bytes, int]: metadata_encrypted, ) if key: - return key, 2 - return b"", 0 + return key, PasswordType.USER_PASSWORD + return b"", PasswordType.NOT_DECRYPTED - def verify_r5(self, user_pwd: bytes, owner_pwd: bytes) -> Tuple[bytes, int]: + def verify_v5(self, password: bytes) -> Tuple[bytes, PasswordType]: # TODO: use SASLprep process o_entry = cast(ByteStringObject, self.entry["/O"].get_object()).original_bytes u_entry = cast(ByteStringObject, self.entry["/U"].get_object()).original_bytes oe_entry = cast(ByteStringObject, self.entry["/OE"].get_object()).original_bytes ue_entry = cast(ByteStringObject, self.entry["/UE"].get_object()).original_bytes - rc = 0 - key = AlgR5.verify_user_password(user_pwd, u_entry, ue_entry) - if key: - rc = 1 - else: - key = AlgR5.verify_owner_password(owner_pwd, o_entry, oe_entry, u_entry) - if key: - rc = 2 - if rc == 0: - return b"", 0 + # verify owner password first + key = AlgV5.verify_owner_password(self.algR, password, o_entry, oe_entry, u_entry) + rc = PasswordType.OWNER_PASSWORD + if not key: + key = AlgV5.verify_user_password(self.algR, password, u_entry, ue_entry) + rc = PasswordType.USER_PASSWORD + if not key: + return b"", PasswordType.NOT_DECRYPTED + # verify Perms perms = cast(ByteStringObject, self.entry["/Perms"].get_object()).original_bytes P = cast(int, self.entry["/P"]) P = (P + 0x100000000) % 0x100000000 # maybe < 0 metadata_encrypted = self.entry.get("/EncryptMetadata", True) - if not AlgR5.verify_perms(key, perms, P, metadata_encrypted): - return b"", 0 + if not AlgV5.verify_perms(key, perms, P, metadata_encrypted): + return b"", PasswordType.NOT_DECRYPTED return key, rc @staticmethod @@ -818,7 +828,4 @@ def read(encryption_entry: DictionaryObject, first_id_entry: bytes) -> "Encrypti raise NotImplementedError(f"EFF Method {EFF} NOT supported!") R = cast(int, encryption_entry["/R"]) - if R > 5: - raise NotImplementedError(f"encryption R={R} NOT supported!") - - return Encryption(V, encryption_entry, first_id_entry, StmF, StrF, EFF) + return Encryption(V, R, encryption_entry, first_id_entry, StmF, StrF, EFF) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 7cb27e605..36b40ad22 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -202,7 +202,7 @@ def _create_stream( stream = FileIO(fileobj, "rb") my_file = True elif isinstance(fileobj, PdfReader): - if hasattr(fileobj, "_encryption"): + if fileobj._encryption: encryption_obj = fileobj._encryption orig_tell = fileobj.stream.tell() fileobj.stream.seek(0) diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py index c9cc1101f..5f349be07 100644 --- a/PyPDF2/_reader.py +++ b/PyPDF2/_reader.py @@ -45,6 +45,7 @@ cast, ) +from ._encryption import Encryption, PasswordType from ._page import PageObject, _VirtualList from ._utils import ( StrByteType, @@ -66,7 +67,6 @@ from .constants import PagesAttributes as PA from .constants import TrailerKeys as TK from .errors import ( - DependencyError, PdfReadError, PdfReadWarning, PdfStreamError, @@ -254,8 +254,26 @@ def __init__( self.stream = stream self._override_encryption = False - if password is not None and self.decrypt(password) == 0: - raise PdfReadError("Wrong password") + self._encryption: Optional[Encryption] = None + if self.is_encrypted: + self._override_encryption = True + # Some documents may not have a /ID, use two empty + # byte strings instead. Solves + # https://github.com/mstamy2/PyPDF2/issues/608 + id_entry = self.trailer.get(TK.ID) + id1_entry = id_entry[0].get_object().original_bytes if id_entry else b"" + encrypt_entry = cast(DictionaryObject, self.trailer[TK.ENCRYPT].get_object()) + self._encryption = Encryption.read(encrypt_entry, id1_entry) + + # try empty password if no password provided + pwd = password if password is not None else b"" + if self._encryption.verify(pwd) == PasswordType.NOT_DECRYPTED and password is not None: + # raise if password provided + raise PdfReadError("Wrong password") + self._override_encryption = False + else: + if password is not None: + raise PdfReadError("Not encrypted file") @property def pdf_header(self) -> str: @@ -352,17 +370,7 @@ def _get_num_pages(self) -> int: # the PDF file's page count is used in this case. Otherwise, # the original method (flattened page count) is used. if self.is_encrypted: - try: - self._override_encryption = True - self.decrypt("") - return self.trailer[TK.ROOT]["/Pages"]["/Count"] # type: ignore - except DependencyError as e: - # make dependency error clear to users - raise e - except Exception: - raise PdfReadError("File has not been decrypted") - finally: - self._override_encryption = False + return self.trailer[TK.ROOT]["/Pages"]["/Count"] # type: ignore else: if self.flattened_pages is None: self._flatten() @@ -1063,10 +1071,10 @@ def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]: retval = read_object(self.stream, self) # type: ignore # override encryption is used for the /Encrypt dictionary - if not self._override_encryption and self.is_encrypted: + if not self._override_encryption and self._encryption is not None: # if we don't have the encryption key: - if not hasattr(self, "_encryption"): - raise PdfReadError("file has not been decrypted") + if not self._encryption.is_decrypted(): + raise PdfReadError("File has not been decrypted") # otherwise, decrypt here... retval = cast(PdfObject, retval) retval = self._encryption.decrypt_object( @@ -1562,7 +1570,7 @@ def readNextEndLine( deprecate_no_replacement("readNextEndLine") return self.read_next_end_line(stream, limit_offset) - def decrypt(self, password: Union[str, bytes]) -> int: + def decrypt(self, password: Union[str, bytes]) -> PasswordType: """ When using an encrypted / secured PDF file with the PDF Standard encryption handler, this function will allow the file to be decrypted. @@ -1575,18 +1583,14 @@ def decrypt(self, password: Union[str, bytes]) -> int: this library. :param str password: The password to match. - :return: ``0`` if the password failed, ``1`` if the password matched the user - password, and ``2`` if the password matched the owner password. + :return: `PasswordType`. :rtype: int - :raises NotImplementedError: if document uses an unsupported encryption method. """ - - self._override_encryption = True - try: - return self._decrypt(password) - finally: - self._override_encryption = False + if not self._encryption: + raise PdfReadError("Not encrypted file") + # TODO: raise Exception for wrong password + return self._encryption.verify(password) def decode_permissions(self, permissions_code: int) -> Dict[str, bool]: # Takes the permissions as an integer, returns the allowed access @@ -1603,26 +1607,6 @@ def decode_permissions(self, permissions_code: int) -> Dict[str, bool]: ) # bit 12 return permissions - def _decrypt(self, password: Union[str, bytes]) -> int: - # already got the KEY - if hasattr(self, "_encryption"): - return 3 - from PyPDF2._encryption import Encryption - - # Some documents may not have a /ID, use two empty - # byte strings instead. Solves - # https://github.com/mstamy2/PyPDF2/issues/608 - id_entry = self.trailer.get(TK.ID) - id1_entry = id_entry[0].get_object().original_bytes if id_entry else b"" - encryptEntry = cast(DictionaryObject, self.trailer[TK.ENCRYPT].get_object()) - encryption = Encryption.read(encryptEntry, id1_entry) - # maybe password is owner password - # TODO: add/modify api to set owner password - rr = encryption.verify(password, password) - if rr > 0: - self._encryption = encryption - return rr - @property def is_encrypted(self) -> bool: """ diff --git a/resources/encryption/enca.pdf b/resources/encryption/enca.pdf deleted file mode 100644 index 1a9d32570..000000000 Binary files a/resources/encryption/enca.pdf and /dev/null differ diff --git a/resources/encryption/encb.pdf b/resources/encryption/encb.pdf deleted file mode 100644 index 900c151d1..000000000 Binary files a/resources/encryption/encb.pdf and /dev/null differ diff --git a/resources/encryption/enc1.pdf b/resources/encryption/r2-empty-password.pdf similarity index 100% rename from resources/encryption/enc1.pdf rename to resources/encryption/r2-empty-password.pdf diff --git a/resources/encryption/enc3.pdf b/resources/encryption/r2-user-password.pdf similarity index 100% rename from resources/encryption/enc3.pdf rename to resources/encryption/r2-user-password.pdf diff --git a/resources/encryption/enc2.pdf b/resources/encryption/r3-empty-password.pdf similarity index 100% rename from resources/encryption/enc2.pdf rename to resources/encryption/r3-empty-password.pdf diff --git a/resources/encryption/enc4.pdf b/resources/encryption/r3-user-password.pdf similarity index 100% rename from resources/encryption/enc4.pdf rename to resources/encryption/r3-user-password.pdf diff --git a/resources/encryption/enc6.pdf b/resources/encryption/r4-aes-user-password.pdf similarity index 100% rename from resources/encryption/enc6.pdf rename to resources/encryption/r4-aes-user-password.pdf diff --git a/resources/encryption/enc5.pdf b/resources/encryption/r4-user-password.pdf similarity index 100% rename from resources/encryption/enc5.pdf rename to resources/encryption/r4-user-password.pdf diff --git a/resources/encryption/enc7.pdf b/resources/encryption/r5-empty-password.pdf similarity index 100% rename from resources/encryption/enc7.pdf rename to resources/encryption/r5-empty-password.pdf diff --git a/resources/encryption/enc9.pdf b/resources/encryption/r5-owner-password.pdf similarity index 100% rename from resources/encryption/enc9.pdf rename to resources/encryption/r5-owner-password.pdf diff --git a/resources/encryption/enc8.pdf b/resources/encryption/r5-user-password.pdf similarity index 100% rename from resources/encryption/enc8.pdf rename to resources/encryption/r5-user-password.pdf diff --git a/resources/encryption/r6-both-passwords.pdf b/resources/encryption/r6-both-passwords.pdf new file mode 100644 index 000000000..8bd720622 Binary files /dev/null and b/resources/encryption/r6-both-passwords.pdf differ diff --git a/resources/encryption/r6-empty-password.pdf b/resources/encryption/r6-empty-password.pdf new file mode 100644 index 000000000..7b22a0c26 Binary files /dev/null and b/resources/encryption/r6-empty-password.pdf differ diff --git a/resources/encryption/r6-owner-password.pdf b/resources/encryption/r6-owner-password.pdf new file mode 100644 index 000000000..3b36c563a Binary files /dev/null and b/resources/encryption/r6-owner-password.pdf differ diff --git a/resources/encryption/r6-user-password.pdf b/resources/encryption/r6-user-password.pdf new file mode 100644 index 000000000..432a50387 Binary files /dev/null and b/resources/encryption/r6-user-password.pdf differ diff --git a/resources/encryption/enc0.pdf b/resources/encryption/unencrypted.pdf similarity index 100% rename from resources/encryption/enc0.pdf rename to resources/encryption/unencrypted.pdf diff --git a/tests/test_encryption.py b/tests/test_encryption.py index 06545d129..711cd6e5a 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -10,63 +10,79 @@ @pytest.mark.parametrize( - "src", + "name", [ # unencrypted pdf - (os.path.join(RESOURCE_ROOT, "encryption", "enc0.pdf")), - # created by `qpdf --encrypt "" "" 40 -- enc0.pdf enc1.pdf` - (os.path.join(RESOURCE_ROOT, "encryption", "enc1.pdf")), - # created by `qpdf --encrypt "" "" 128 -- enc0.pdf enc2.pdf` - (os.path.join(RESOURCE_ROOT, "encryption", "enc2.pdf")), - # created by `qpdf --encrypt "asdfzxcv" "" 40 -- enc0.pdf enc3.pdf` - (os.path.join(RESOURCE_ROOT, "encryption", "enc3.pdf")), - # created by `qpdf --encrypt "asdfzxcv" "" 128 -- enc0.pdf enc4.pdf` - (os.path.join(RESOURCE_ROOT, "encryption", "enc4.pdf")), - # V=4 and AES128 - # created by `qpdf --encrypt "asdfzxcv" "" 128 --force-V4 -- enc0.pdf enc5.pdf` - (os.path.join(RESOURCE_ROOT, "encryption", "enc5.pdf")), - # created by `qpdf --encrypt "asdfzxcv" "" 128 --use-aes=y -- enc0.pdf enc6.pdf` - (os.path.join(RESOURCE_ROOT, "encryption", "enc6.pdf")), - # # V=5 and R=5 use AES-256 - # # created by `qpdf --encrypt "" "" 256 --force-R5 -- enc0.pdf enc7.pdf` - (os.path.join(RESOURCE_ROOT, "encryption", "enc7.pdf")), - # # created by `qpdf --encrypt "asdfzxcv" "" 256 --force-R5 -- enc0.pdf enc8.pdf` - (os.path.join(RESOURCE_ROOT, "encryption", "enc8.pdf")), - # # created by `qpdf --encrypt "" "asdfzxcv" 256 --force-R5 -- enc0.pdf enc9.pdf` - (os.path.join(RESOURCE_ROOT, "encryption", "enc9.pdf")), - # asdfzxcv is owner password - # created by `qpdf --encrypt "" "asdfzxcv" 128 --use-aes=y -- enc0.pdf enca.pdf` - (os.path.join(RESOURCE_ROOT, "encryption", "enca.pdf")), - # created by `qpdf --encrypt "1234" "asdfzxcv" 128 --use-aes=y -- enc0.pdf encb.pdf` - (os.path.join(RESOURCE_ROOT, "encryption", "encb.pdf")), + "unencrypted.pdf", + # created by `qpdf --encrypt "" "" 40 -- unencrypted.pdf r2-empty-password.pdf` + "r2-empty-password.pdf", + # created by `qpdf --encrypt "" "" 128 -- unencrypted.pdf r3-empty-password.pdf` + "r3-empty-password.pdf", + # created by `qpdf --encrypt "asdfzxcv" "" 40 -- unencrypted.pdf r2-user-password.pdf` + "r2-user-password.pdf", + # created by `qpdf --encrypt "asdfzxcv" "" 128 -- unencrypted.pdf r3-user-password.pdf` + "r3-user-password.pdf", + # created by `qpdf --encrypt "asdfzxcv" "" 128 --force-V4 -- unencrypted.pdf r4-user-password.pdf` + "r4-user-password.pdf", + # created by `qpdf --encrypt "asdfzxcv" "" 128 --use-aes=y -- unencrypted.pdf r4-aes-user-password.pdf` + "r4-aes-user-password.pdf", + # # created by `qpdf --encrypt "" "" 256 --force-R5 -- unencrypted.pdf r5-empty-password.pdf` + "r5-empty-password.pdf", + # # created by `qpdf --encrypt "asdfzxcv" "" 256 --force-R5 -- unencrypted.pdf r5-user-password.pdf` + "r5-user-password.pdf", + # # created by `qpdf --encrypt "" "asdfzxcv" 256 --force-R5 -- unencrypted.pdf r5-owner-password.pdf` + "r5-owner-password.pdf", + # created by `qpdf --encrypt "" "" 256 -- unencrypted.pdf r6-empty-password.pdf` + "r6-empty-password.pdf", + # created by `qpdf --encrypt "asdfzxcv" "" 256 -- unencrypted.pdf r6-user-password.pdf` + "r6-user-password.pdf", + # created by `qpdf --encrypt "" "asdfzxcv" 256 -- unencrypted.pdf r6-owner-password.pdf` + "r6-owner-password.pdf", ], ) -def test_encryption(src): - with open(src, "rb") as inputfile: - ipdf = PyPDF2.PdfReader(inputfile) - if src.endswith("enc0.pdf"): - assert not ipdf.is_encrypted - else: - assert ipdf.is_encrypted - ipdf.decrypt("asdfzxcv") - assert len(ipdf.pages) == 1 - dd = dict(ipdf.metadata) - # remove empty value entry - dd = {x[0]: x[1] for x in dd.items() if x[1]} - assert dd == { - "/Author": "cheng", - "/CreationDate": "D:20220414132421+05'24'", - "/Creator": "WPS Writer", - "/ModDate": "D:20220414132421+05'24'", - "/SourceModified": "D:20220414132421+05'24'", - "/Trapped": "/False", - } +def test_encryption(name): + inputfile = os.path.join(RESOURCE_ROOT, "encryption", name) + ipdf = PyPDF2.PdfReader(inputfile) + if inputfile.endswith("unencrypted.pdf"): + assert not ipdf.is_encrypted + else: + assert ipdf.is_encrypted + ipdf.decrypt("asdfzxcv") + assert len(ipdf.pages) == 1 + dd = dict(ipdf.metadata) + # remove empty value entry + dd = {x[0]: x[1] for x in dd.items() if x[1]} + assert dd == { + "/Author": "cheng", + "/CreationDate": "D:20220414132421+05'24'", + "/Creator": "WPS Writer", + "/ModDate": "D:20220414132421+05'24'", + "/SourceModified": "D:20220414132421+05'24'", + "/Trapped": "/False", + } + + +@pytest.mark.parametrize( + ("name", "user_passwd", "owner_passwd"), + [ + # created by `qpdf --encrypt "foo" "bar" 256 -- unencrypted.pdf r6-both-passwords.pdf` + ("r6-both-passwords.pdf", "foo", "bar"), + ], +) +def test_both_password(name, user_passwd, owner_passwd): + from PyPDF2 import PasswordType + inputfile = os.path.join(RESOURCE_ROOT, "encryption", name) + ipdf = PyPDF2.PdfReader(inputfile) + assert ipdf.is_encrypted + assert ipdf.decrypt(user_passwd) == PasswordType.USER_PASSWORD + assert ipdf.decrypt(owner_passwd) == PasswordType.OWNER_PASSWORD + assert len(ipdf.pages) == 1 @pytest.mark.parametrize( "names", [ - (["enc0.pdf", "enc4.pdf", "enc5.pdf", "enc6.pdf"]), + (["unencrypted.pdf", "r3-user-password.pdf", "r4-aes-user-password.pdf", "r5-user-password.pdf"]), ], ) def test_encryption_merge(names): diff --git a/tests/test_reader.py b/tests/test_reader.py index 68b16ae12..a38385d5d 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -294,9 +294,7 @@ def test_get_page_of_encrypted_file_new_algorithm(pdffile, password): IndexError for get_page() of decrypted file """ path = os.path.join(RESOURCE_ROOT, pdffile) - with pytest.raises(NotImplementedError) as exc: - PdfReader(path, password=password).pages[0] - assert exc.value.args[0] == "encryption R=6 NOT supported!" + PdfReader(path, password=password).pages[0] @pytest.mark.parametrize(