From 553ed6e51c0fafd1779b5c7a8e1948507df2653d Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Mon, 8 Jan 2024 12:26:16 +0100 Subject: [PATCH 1/3] ENH: Improve/rewrite PDF permission retrieval --- pypdf/_reader.py | 14 ++++++- pypdf/_writer.py | 2 +- pypdf/constants.py | 41 ++++++++++++++++++++ tests/test_constants.py | 84 ++++++++++++++++++++++++++++++++++++++++- tests/test_reader.py | 50 ++++++++++++++++++++++-- 5 files changed, 184 insertions(+), 7 deletions(-) diff --git a/pypdf/_reader.py b/pypdf/_reader.py index 70f074276..a2ec36288 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -55,6 +55,7 @@ StrByteType, StreamType, b_, + deprecate_with_replacement, logger_warning, parse_iso8824_date, read_non_whitespace, @@ -1811,12 +1812,16 @@ def decrypt(self, password: Union[str, bytes]) -> PasswordType: def decode_permissions(self, permissions_code: int) -> Dict[str, bool]: """Take the permissions as an integer, return the allowed access.""" + deprecate_with_replacement( + old_name="decode_permissions", new_name="user_access_permissions", removed_in="5.0.0" + ) + permissions_mapping = { "print": UserAccessPermissions.PRINT, "modify": UserAccessPermissions.MODIFY, "copy": UserAccessPermissions.EXTRACT, "annotations": UserAccessPermissions.ADD_OR_MODIFY, - "forms": UserAccessPermissions.R7, + "forms": UserAccessPermissions.FILL_FORM_FIELDS, "accessability": UserAccessPermissions.EXTRACT_TEXT_AND_GRAPHICS, "assemble": UserAccessPermissions.ASSEMBLE_DOC, "print_high_quality": UserAccessPermissions.PRINT_TO_REPRESENTATION, @@ -1827,6 +1832,13 @@ def decode_permissions(self, permissions_code: int) -> Dict[str, bool]: for key, flag in permissions_mapping.items() } + @property + def user_access_permissions(self) -> Optional[UserAccessPermissions]: + """Get the user access permissions for encrypted documents. Returns None if not encrypted.""" + if self._encryption is None: + return None + return UserAccessPermissions(self._encryption.P) + @property def is_encrypted(self) -> bool: """ diff --git a/pypdf/_writer.py b/pypdf/_writer.py index e12c2e8ab..d5562b7b4 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -122,7 +122,7 @@ ) OPTIONAL_READ_WRITE_FIELD = FieldFlag(0) -ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions((2**31 - 1) - 3) +ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions.all() class ObjectDeletionFlag(enum.IntFlag): diff --git a/pypdf/constants.py b/pypdf/constants.py index 56a24b183..49d897308 100644 --- a/pypdf/constants.py +++ b/pypdf/constants.py @@ -85,6 +85,47 @@ class UserAccessPermissions(IntFlag): R31 = 2**30 R32 = 2**31 + @classmethod + def _is_reserved(cls, name: str) -> bool: + """Check if the given name corresponds to a reserved flag entry.""" + return name.startswith("R") and name[1:].isdigit() + + @classmethod + def _defaults_to_one(cls, name: str) -> bool: + """Check if the given reserved name defaults to 1 = active.""" + return name not in {"R1", "R2"} + + def to_dict(self) -> Dict[str, bool]: + """Convert the given flag value to a corresponding verbose name mapping.""" + result: Dict[str, bool] = {} + for name, flag in UserAccessPermissions.__members__.items(): + if UserAccessPermissions._is_reserved(name): + continue + result[name.lower()] = (self & flag) == flag + return result + + @classmethod + def from_dict(cls, value: Dict[str, bool]) -> "UserAccessPermissions": + """Convert the verbose name mapping to the corresponding flag value.""" + value_copy = value.copy() + result = cls(0) + for name, flag in cls.__members__.items(): + if cls._is_reserved(name): + # Reserved names have a required value. Use it. + if cls._defaults_to_one(name): + result |= flag + continue + is_active = value_copy.pop(name.lower(), False) + if is_active: + result |= flag + if value_copy: + raise ValueError(f"Unknown dictionary keys: {value_copy!r}") + return result + + @classmethod + def all(cls) -> "UserAccessPermissions": + return cls((2**32 - 1) - cls.R1 - cls.R2) + class Ressources: """TABLE 3.30 Entries in a resource dictionary.""" diff --git a/tests/test_constants.py b/tests/test_constants.py index 62fbae743..140857847 100644 --- a/tests/test_constants.py +++ b/tests/test_constants.py @@ -2,7 +2,9 @@ import re from typing import Callable -from pypdf.constants import PDF_KEYS, GraphicsStateParameters +import pytest + +from pypdf.constants import PDF_KEYS, GraphicsStateParameters, UserAccessPermissions def test_slash_prefix(): @@ -10,7 +12,7 @@ def test_slash_prefix(): Naming conventions of PDF_KEYS (constant names) are followed. This test function validates if PDF key names follow the required pattern: - - Starts with a slash '/' + - Starts with a slash "/" - Followed by an uppercase letter - Contains alphanumeric characters (letters and digits) - The attribute name should be a case-insensitive match, with underscores removed @@ -34,3 +36,81 @@ def test_slash_prefix(): if cls == GraphicsStateParameters and attr in ["ca", "op"]: continue assert pattern.match(constant_value) + + +def test_user_access_permissions__dict_handling(): + # Value is mix of configurable and reserved bits. + # Reserved bits should not be part of the dictionary. + as_dict = UserAccessPermissions(512 + 64 + 8).to_dict() + assert as_dict == { + "add_or_modify": False, + "assemble_doc": False, + "extract": False, + "extract_text_and_graphics": True, + "fill_form_fields": False, + "modify": True, + "print": False, + "print_to_representation": False, + } + + # Convert the dictionary back to an integer. + # This should add the reserved bits automatically. + permissions = UserAccessPermissions.from_dict(as_dict) + assert permissions == 4294963912 + + # Roundtrip for valid dictionary. + data = { + "add_or_modify": True, + "assemble_doc": False, + "extract": False, + "extract_text_and_graphics": True, + "fill_form_fields": False, + "modify": True, + "print": False, + "print_to_representation": True, + } + assert UserAccessPermissions.from_dict(data).to_dict() == data + + # Empty inputs. + assert UserAccessPermissions.from_dict({}) == 4294963392 # Reserved bits. + assert UserAccessPermissions(0).to_dict() == { + "add_or_modify": False, + "assemble_doc": False, + "extract": False, + "extract_text_and_graphics": False, + "fill_form_fields": False, + "modify": False, + "print": False, + "print_to_representation": False, + } + + # Unknown dictionary keys. + data = { + "add_or_modify": True, + "key1": False, + "key2": True, + } + unknown = { + "key1": False, + "key2": True, + } + with pytest.raises( + ValueError, + match=f'Unknown dictionary keys: {unknown!r}' + ): + UserAccessPermissions.from_dict(data) + + +def test_user_access_permissions__all(): + all_permissions = UserAccessPermissions.all() + all_int = int(all_permissions) + all_string = bin(all_permissions) + + assert all_string.startswith("0b") + assert len(all_string[2:]) == 32 # 32-bit integer + + assert all_int & UserAccessPermissions.R1 == 0 + assert all_int & UserAccessPermissions.R2 == 0 + assert all_int & UserAccessPermissions.PRINT == UserAccessPermissions.PRINT + assert all_int & UserAccessPermissions.R7 == UserAccessPermissions.R7 + assert all_int & UserAccessPermissions.R31 == UserAccessPermissions.R31 diff --git a/tests/test_reader.py b/tests/test_reader.py index ab4a8d302..546b588fe 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -7,11 +7,12 @@ import pytest -from pypdf import PdfReader +from pypdf import PdfReader, PdfWriter from pypdf._crypt_providers import crypt_provider from pypdf._reader import convert_to_int from pypdf.constants import ImageAttributes as IA from pypdf.constants import PageAttributes as PG +from pypdf.constants import UserAccessPermissions as UAP from pypdf.errors import ( EmptyFileError, FileNotDecryptedError, @@ -730,11 +731,54 @@ def test_decode_permissions(): print_ = base.copy() print_["print"] = True - assert reader.decode_permissions(4) == print_ + with pytest.raises( + DeprecationWarning, + match="decode_permissions is deprecated and will be removed in pypdf 5.0.0. Use user_access_permissions instead", + ): + assert reader.decode_permissions(4) == print_ modify = base.copy() modify["modify"] = True - assert reader.decode_permissions(8) == modify + with pytest.raises( + DeprecationWarning, + match="decode_permissions is deprecated and will be removed in pypdf 5.0.0. Use user_access_permissions instead", + ): + assert reader.decode_permissions(8) == modify + + +@pytest.mark.skipif(not HAS_AES, reason="No AES implementation") +def test_user_access_permissions(): + # Not encrypted. + reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") + assert reader.user_access_permissions is None + + # Encrypted. + reader = PdfReader(RESOURCE_ROOT / "encryption" / "r6-owner-password.pdf") + assert reader.user_access_permissions == UAP.all() + + # Custom writer permissions. + writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf") + writer.encrypt( + user_password="", + owner_password="abc", + permissions_flag=UAP.PRINT | UAP.FILL_FORM_FIELDS, + ) + output = BytesIO() + writer.write(output) + reader = PdfReader(output) + assert reader.user_access_permissions == (UAP.PRINT | UAP.FILL_FORM_FIELDS) + + # All writer permissions. + writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf") + writer.encrypt( + user_password="", + owner_password="abc", + permissions_flag=UAP.all(), + ) + output = BytesIO() + writer.write(output) + reader = PdfReader(output) + assert reader.user_access_permissions == UAP.all() def test_pages_attribute(): From 316471c03eb43c6e29de65bfc09e740b745a03a3 Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Mon, 8 Jan 2024 12:28:48 +0100 Subject: [PATCH 2/3] fix ruff violations --- tests/test_constants.py | 2 +- tests/test_reader.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_constants.py b/tests/test_constants.py index 140857847..d53ebed33 100644 --- a/tests/test_constants.py +++ b/tests/test_constants.py @@ -96,7 +96,7 @@ def test_user_access_permissions__dict_handling(): } with pytest.raises( ValueError, - match=f'Unknown dictionary keys: {unknown!r}' + match=f"Unknown dictionary keys: {unknown!r}" ): UserAccessPermissions.from_dict(data) diff --git a/tests/test_reader.py b/tests/test_reader.py index 546b588fe..98f73a01c 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -733,7 +733,7 @@ def test_decode_permissions(): print_["print"] = True with pytest.raises( DeprecationWarning, - match="decode_permissions is deprecated and will be removed in pypdf 5.0.0. Use user_access_permissions instead", + match="decode_permissions is deprecated and will be removed in pypdf 5.0.0. Use user_access_permissions instead", # noqa: E501 ): assert reader.decode_permissions(4) == print_ @@ -741,7 +741,7 @@ def test_decode_permissions(): modify["modify"] = True with pytest.raises( DeprecationWarning, - match="decode_permissions is deprecated and will be removed in pypdf 5.0.0. Use user_access_permissions instead", + match="decode_permissions is deprecated and will be removed in pypdf 5.0.0. Use user_access_permissions instead", # noqa: E501 ): assert reader.decode_permissions(8) == modify From 8ed9a9fedb306b8c057fee8a90c619b8f3e14b76 Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Tue, 9 Jan 2024 09:52:57 +0100 Subject: [PATCH 3/3] perform proposed renaming --- pypdf/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pypdf/constants.py b/pypdf/constants.py index 49d897308..884dbaedf 100644 --- a/pypdf/constants.py +++ b/pypdf/constants.py @@ -91,7 +91,7 @@ def _is_reserved(cls, name: str) -> bool: return name.startswith("R") and name[1:].isdigit() @classmethod - def _defaults_to_one(cls, name: str) -> bool: + def _is_active(cls, name: str) -> bool: """Check if the given reserved name defaults to 1 = active.""" return name not in {"R1", "R2"} @@ -112,7 +112,7 @@ def from_dict(cls, value: Dict[str, bool]) -> "UserAccessPermissions": for name, flag in cls.__members__.items(): if cls._is_reserved(name): # Reserved names have a required value. Use it. - if cls._defaults_to_one(name): + if cls._is_active(name): result |= flag continue is_active = value_copy.pop(name.lower(), False)