BUG: encode_pdfdocencoding() always returns bytes (#2440)

In the function encode_pdfdocencoding, cast its return value from bytearray to bytes to match its function signature. This casting is necessary because bytearray is duck type compatible with bytes in mypy, however this library expects only bytes in its Encryption class. Fixes #2434.
py-pdf · Feb 20, 2024 · efe7790 · efe7790
1 parent c1258c3
commit efe7790
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 1 deletion.
diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py
@@ -650,4 +650,4 @@ def encode_pdfdocencoding(unicode_string: str) -> bytes:
             raise UnicodeEncodeError(
                 "pdfdocencoding", c, -1, -1, "does not exist in translation table"
             )
-    return retval
+    return bytes(retval)
diff --git a/tests/test_generic.py b/tests/test_generic.py
@@ -1,4 +1,5 @@
 """Test the pypdf.generic module."""
+
 from io import BytesIO
 from pathlib import Path
 from unittest.mock import patch
@@ -273,6 +274,16 @@ def test_encode_pdfdocencoding_keyerror():
     assert exc.value.args[0] == "pdfdocencoding"
 
 
+@pytest.mark.parametrize("test_input", ["", "data"])
+def test_encode_pdfdocencoding_returns_bytes(test_input):
+    """
+    Test that encode_pdfdocencoding() always returns bytes because bytearray
+    is duck type compatible with bytes in mypy
+    """
+    out = encode_pdfdocencoding(test_input)
+    assert isinstance(out, bytes)
+
+
 def test_read_object_comment_exception():
     stream = BytesIO(b"% foobar")
     pdf = None