From efe779008594eb5c8e8ce8870b732602d54ccc75 Mon Sep 17 00:00:00 2001 From: Stefan Bourlon Date: Tue, 20 Feb 2024 11:30:29 -0800 Subject: [PATCH] BUG: encode_pdfdocencoding() always returns bytes (#2440) In the function encode_pdfdocencoding, cast its return value from bytearray to bytes to match its function signature. This casting is necessary because bytearray is duck type compatible with bytes in mypy, however this library expects only bytes in its Encryption class. Fixes #2434. --- pypdf/generic/_base.py | 2 +- tests/test_generic.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index 5a2757295..813b1df04 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -650,4 +650,4 @@ def encode_pdfdocencoding(unicode_string: str) -> bytes: raise UnicodeEncodeError( "pdfdocencoding", c, -1, -1, "does not exist in translation table" ) - return retval + return bytes(retval) diff --git a/tests/test_generic.py b/tests/test_generic.py index 0e0fff677..2a9c5ec00 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -1,4 +1,5 @@ """Test the pypdf.generic module.""" + from io import BytesIO from pathlib import Path from unittest.mock import patch @@ -273,6 +274,16 @@ def test_encode_pdfdocencoding_keyerror(): assert exc.value.args[0] == "pdfdocencoding" +@pytest.mark.parametrize("test_input", ["", "data"]) +def test_encode_pdfdocencoding_returns_bytes(test_input): + """ + Test that encode_pdfdocencoding() always returns bytes because bytearray + is duck type compatible with bytes in mypy + """ + out = encode_pdfdocencoding(test_input) + assert isinstance(out, bytes) + + def test_read_object_comment_exception(): stream = BytesIO(b"% foobar") pdf = None