Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Improve/rewrite PDF permission retrieval #2400

Merged
merged 4 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 13 additions & 1 deletion pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
StrByteType,
StreamType,
b_,
deprecate_with_replacement,
logger_warning,
parse_iso8824_date,
read_non_whitespace,
Expand Down Expand Up @@ -1811,12 +1812,16 @@ def decrypt(self, password: Union[str, bytes]) -> PasswordType:

def decode_permissions(self, permissions_code: int) -> Dict[str, bool]:
"""Take the permissions as an integer, return the allowed access."""
deprecate_with_replacement(
old_name="decode_permissions", new_name="user_access_permissions", removed_in="5.0.0"
)

permissions_mapping = {
"print": UserAccessPermissions.PRINT,
"modify": UserAccessPermissions.MODIFY,
"copy": UserAccessPermissions.EXTRACT,
"annotations": UserAccessPermissions.ADD_OR_MODIFY,
"forms": UserAccessPermissions.R7,
"forms": UserAccessPermissions.FILL_FORM_FIELDS,
MartinThoma marked this conversation as resolved.
Show resolved Hide resolved
"accessability": UserAccessPermissions.EXTRACT_TEXT_AND_GRAPHICS,
"assemble": UserAccessPermissions.ASSEMBLE_DOC,
"print_high_quality": UserAccessPermissions.PRINT_TO_REPRESENTATION,
Expand All @@ -1827,6 +1832,13 @@ def decode_permissions(self, permissions_code: int) -> Dict[str, bool]:
for key, flag in permissions_mapping.items()
}

@property
def user_access_permissions(self) -> Optional[UserAccessPermissions]:
"""Get the user access permissions for encrypted documents. Returns None if not encrypted."""
if self._encryption is None:
return None
return UserAccessPermissions(self._encryption.P)

MartinThoma marked this conversation as resolved.
Show resolved Hide resolved
@property
def is_encrypted(self) -> bool:
"""
Expand Down
2 changes: 1 addition & 1 deletion pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@
)

OPTIONAL_READ_WRITE_FIELD = FieldFlag(0)
ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions((2**31 - 1) - 3)
ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions.all()
MartinThoma marked this conversation as resolved.
Show resolved Hide resolved


class ObjectDeletionFlag(enum.IntFlag):
Expand Down
41 changes: 41 additions & 0 deletions pypdf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,47 @@ class UserAccessPermissions(IntFlag):
R31 = 2**30
R32 = 2**31

@classmethod
def _is_reserved(cls, name: str) -> bool:
"""Check if the given name corresponds to a reserved flag entry."""
return name.startswith("R") and name[1:].isdigit()

@classmethod
def _is_active(cls, name: str) -> bool:
"""Check if the given reserved name defaults to 1 = active."""
return name not in {"R1", "R2"}

def to_dict(self) -> Dict[str, bool]:
"""Convert the given flag value to a corresponding verbose name mapping."""
result: Dict[str, bool] = {}
for name, flag in UserAccessPermissions.__members__.items():
if UserAccessPermissions._is_reserved(name):
continue
result[name.lower()] = (self & flag) == flag
return result

@classmethod
def from_dict(cls, value: Dict[str, bool]) -> "UserAccessPermissions":
"""Convert the verbose name mapping to the corresponding flag value."""
value_copy = value.copy()
result = cls(0)
for name, flag in cls.__members__.items():
if cls._is_reserved(name):
# Reserved names have a required value. Use it.
if cls._is_active(name):
result |= flag
continue
is_active = value_copy.pop(name.lower(), False)
if is_active:
result |= flag
if value_copy:
raise ValueError(f"Unknown dictionary keys: {value_copy!r}")
return result

@classmethod
def all(cls) -> "UserAccessPermissions":
return cls((2**32 - 1) - cls.R1 - cls.R2)


class Ressources:
"""TABLE 3.30 Entries in a resource dictionary."""
Expand Down
84 changes: 82 additions & 2 deletions tests/test_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
import re
from typing import Callable

from pypdf.constants import PDF_KEYS, GraphicsStateParameters
import pytest

from pypdf.constants import PDF_KEYS, GraphicsStateParameters, UserAccessPermissions


def test_slash_prefix():
"""
Naming conventions of PDF_KEYS (constant names) are followed.

This test function validates if PDF key names follow the required pattern:
- Starts with a slash '/'
- Starts with a slash "/"
- Followed by an uppercase letter
- Contains alphanumeric characters (letters and digits)
- The attribute name should be a case-insensitive match, with underscores removed
Expand All @@ -34,3 +36,81 @@ def test_slash_prefix():
if cls == GraphicsStateParameters and attr in ["ca", "op"]:
continue
assert pattern.match(constant_value)


def test_user_access_permissions__dict_handling():
# Value is mix of configurable and reserved bits.
# Reserved bits should not be part of the dictionary.
as_dict = UserAccessPermissions(512 + 64 + 8).to_dict()
assert as_dict == {
"add_or_modify": False,
"assemble_doc": False,
"extract": False,
"extract_text_and_graphics": True,
"fill_form_fields": False,
"modify": True,
"print": False,
"print_to_representation": False,
}

# Convert the dictionary back to an integer.
# This should add the reserved bits automatically.
permissions = UserAccessPermissions.from_dict(as_dict)
assert permissions == 4294963912

# Roundtrip for valid dictionary.
data = {
"add_or_modify": True,
"assemble_doc": False,
"extract": False,
"extract_text_and_graphics": True,
"fill_form_fields": False,
"modify": True,
"print": False,
"print_to_representation": True,
}
assert UserAccessPermissions.from_dict(data).to_dict() == data

# Empty inputs.
assert UserAccessPermissions.from_dict({}) == 4294963392 # Reserved bits.
assert UserAccessPermissions(0).to_dict() == {
"add_or_modify": False,
"assemble_doc": False,
"extract": False,
"extract_text_and_graphics": False,
"fill_form_fields": False,
"modify": False,
"print": False,
"print_to_representation": False,
}

# Unknown dictionary keys.
data = {
"add_or_modify": True,
"key1": False,
"key2": True,
}
unknown = {
"key1": False,
"key2": True,
}
with pytest.raises(
ValueError,
match=f"Unknown dictionary keys: {unknown!r}"
):
UserAccessPermissions.from_dict(data)


def test_user_access_permissions__all():
all_permissions = UserAccessPermissions.all()
all_int = int(all_permissions)
all_string = bin(all_permissions)

assert all_string.startswith("0b")
assert len(all_string[2:]) == 32 # 32-bit integer

assert all_int & UserAccessPermissions.R1 == 0
assert all_int & UserAccessPermissions.R2 == 0
assert all_int & UserAccessPermissions.PRINT == UserAccessPermissions.PRINT
assert all_int & UserAccessPermissions.R7 == UserAccessPermissions.R7
assert all_int & UserAccessPermissions.R31 == UserAccessPermissions.R31
50 changes: 47 additions & 3 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@

import pytest

from pypdf import PdfReader
from pypdf import PdfReader, PdfWriter
from pypdf._crypt_providers import crypt_provider
from pypdf._reader import convert_to_int
from pypdf.constants import ImageAttributes as IA
from pypdf.constants import PageAttributes as PG
from pypdf.constants import UserAccessPermissions as UAP
from pypdf.errors import (
EmptyFileError,
FileNotDecryptedError,
Expand Down Expand Up @@ -730,11 +731,54 @@ def test_decode_permissions():

print_ = base.copy()
print_["print"] = True
assert reader.decode_permissions(4) == print_
with pytest.raises(
DeprecationWarning,
match="decode_permissions is deprecated and will be removed in pypdf 5.0.0. Use user_access_permissions instead", # noqa: E501
):
assert reader.decode_permissions(4) == print_

modify = base.copy()
modify["modify"] = True
assert reader.decode_permissions(8) == modify
with pytest.raises(
DeprecationWarning,
match="decode_permissions is deprecated and will be removed in pypdf 5.0.0. Use user_access_permissions instead", # noqa: E501
):
assert reader.decode_permissions(8) == modify


@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_user_access_permissions():
# Not encrypted.
reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
assert reader.user_access_permissions is None

# Encrypted.
reader = PdfReader(RESOURCE_ROOT / "encryption" / "r6-owner-password.pdf")
assert reader.user_access_permissions == UAP.all()

# Custom writer permissions.
writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
writer.encrypt(
user_password="",
owner_password="abc",
permissions_flag=UAP.PRINT | UAP.FILL_FORM_FIELDS,
)
output = BytesIO()
writer.write(output)
reader = PdfReader(output)
assert reader.user_access_permissions == (UAP.PRINT | UAP.FILL_FORM_FIELDS)

# All writer permissions.
writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
writer.encrypt(
user_password="",
owner_password="abc",
permissions_flag=UAP.all(),
)
output = BytesIO()
writer.write(output)
reader = PdfReader(output)
assert reader.user_access_permissions == UAP.all()


def test_pages_attribute():
Expand Down