From 591127b0e8aaaf35ac41c2c1fb3372fc91e7e327 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 6 Nov 2025 12:03:34 +0200 Subject: [PATCH 1/4] gh-141061: Fix decoding with non-standard Base64 alphabet The "+" and "/" characters are no longer recognized as the part of the Base64 alphabet in base64.urlsafe_b64decode() and base64.b64decode() the altchars argument that does not contain them. --- Lib/base64.py | 7 +++--- Lib/test/test_base64.py | 23 ++++++++++--------- ...-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst | 4 ++++ 3 files changed, 20 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst diff --git a/Lib/base64.py b/Lib/base64.py index 5d78cc09f40cd3..530babdfae2a75 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -80,8 +80,9 @@ def b64decode(s, altchars=None, validate=False): s = _bytes_from_decode_data(s) if altchars is not None: altchars = _bytes_from_decode_data(altchars) - assert len(altchars) == 2, repr(altchars) - s = s.translate(bytes.maketrans(altchars, b'+/')) + if len(altchars) != 2: + raise ValueError(f'invalid altchars: {altchars!r}') + s = s.translate(bytes.maketrans(b'+/' + altchars, altchars + b'+/')) return binascii.a2b_base64(s, strict_mode=validate) @@ -104,7 +105,7 @@ def standard_b64decode(s): _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') -_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') +_urlsafe_decode_translation = bytes.maketrans(b'+/-_', b'-_+/') def urlsafe_b64encode(s): """Encode bytes using the URL- and filesystem-safe Base64 alphabet. diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 65977ca8c9f2e0..efbd554b00f476 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -265,6 +265,11 @@ def test_b64decode_altchars(self): eq(base64.b64decode(data, altchars=altchars_str), res) eq(base64.b64decode(data_str, altchars=altchars_str), res) + self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+') + self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+/-') + self.assertRaises(ValueError, base64.b64decode, '', altchars='+') + self.assertRaises(ValueError, base64.b64decode, '', altchars='+/-') + def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') self.assertRaises(binascii.Error, base64.b64decode, 'abc') @@ -296,13 +301,13 @@ def test_b64decode_invalid_chars(self): with self.assertRaises(binascii.Error): base64.b64decode(bstr.decode('ascii'), validate=True) - # Normal alphabet characters not discarded when alternative given - res = b'\xfb\xef\xff' - self.assertEqual(base64.b64decode(b'++//', validate=True), res) - self.assertEqual(base64.b64decode(b'++//', '-_', validate=True), res) - self.assertEqual(base64.b64decode(b'--__', '-_', validate=True), res) - self.assertEqual(base64.urlsafe_b64decode(b'++//'), res) - self.assertEqual(base64.urlsafe_b64decode(b'--__'), res) + # Normal alphabet characters are discarded when alternative given + self.assertEqual(base64.b64decode(b'++//', altchars=b'-_'), b'') + self.assertEqual(base64.urlsafe_b64decode(b'++//'), b'') + with self.assertRaises(binascii.Error): + base64.b64decode(b'++++', altchars=b'-_', validate=True) + with self.assertRaises(binascii.Error): + base64.b64decode(b'////', altchars=b'-_', validate=True) def _altchars_strategy(): """Generate 'altchars' for base64 encoding.""" @@ -394,10 +399,6 @@ def test_b32decode_casefold(self): self.assertRaises(binascii.Error, base64.b32decode, b'me======') self.assertRaises(binascii.Error, base64.b32decode, 'me======') - # Mapping zero and one - eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe') - eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe') - def test_b32decode_map01(self): # Mapping zero and one eq = self.assertEqual diff --git a/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst b/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst new file mode 100644 index 00000000000000..6fec18992622d5 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst @@ -0,0 +1,4 @@ +The ``+`` and ``/`` characters are no longer recognized as the part of the +Base64 alphabet in :func:`base64.urlsafe_b64decode` and +:func:`base64.b64decode` with the *altchars* argument that does not contain +them. From 5c1e8d4011477be9ca4d4a4c96acd67a28b1843d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 6 Nov 2025 17:36:50 +0200 Subject: [PATCH 2/4] Fix the issue number. --- ....7Gfpgw.rst => 2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/Security/{2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst => 2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst} (100%) diff --git a/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst b/Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst similarity index 100% rename from Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-141061.7Gfpgw.rst rename to Misc/NEWS.d/next/Security/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst From b0d5877394d4c17996e9552709b9e2790780e580 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 6 Nov 2025 17:47:13 +0200 Subject: [PATCH 3/4] Remove unrelated changes. --- Lib/test/test_base64.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index efbd554b00f476..5c797a9cc745fb 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -399,6 +399,10 @@ def test_b32decode_casefold(self): self.assertRaises(binascii.Error, base64.b32decode, b'me======') self.assertRaises(binascii.Error, base64.b32decode, 'me======') + # Mapping zero and one + eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe') + eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe') + def test_b32decode_map01(self): # Mapping zero and one eq = self.assertEqual From 414e4ac592ac247e2220ed38e34a1dae100e43c2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 6 Nov 2025 23:02:01 +0200 Subject: [PATCH 4/4] Only emit a warning if validate=False. --- Lib/base64.py | 19 +++++++++++++++++-- Lib/test/test_base64.py | 14 +++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index 530babdfae2a75..edac542a7dca4c 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -78,12 +78,27 @@ def b64decode(s, altchars=None, validate=False): https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64 """ s = _bytes_from_decode_data(s) + badchar = None if altchars is not None: altchars = _bytes_from_decode_data(altchars) if len(altchars) != 2: raise ValueError(f'invalid altchars: {altchars!r}') - s = s.translate(bytes.maketrans(b'+/' + altchars, altchars + b'+/')) - return binascii.a2b_base64(s, strict_mode=validate) + if validate: + s = s.translate(bytes.maketrans(b'+/' + altchars, altchars + b'+/')) + else: + for b in set(b'+/') - set(altchars): + if b in s: + badchar = b + break + s = s.translate(bytes.maketrans(altchars, b'+/')) + result = binascii.a2b_base64(s, strict_mode=validate) + if badchar is not None: + import warnings + warnings.warn(f'invalid character {chr(badchar)!a} in base64 data ' + f'with altchars={altchars!r} will be discarded in ' + f'future Python versions', + FutureWarning, stacklevel=2) + return result def standard_b64encode(s): diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index efbd554b00f476..312db0f3a0f776 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -301,13 +301,21 @@ def test_b64decode_invalid_chars(self): with self.assertRaises(binascii.Error): base64.b64decode(bstr.decode('ascii'), validate=True) - # Normal alphabet characters are discarded when alternative given - self.assertEqual(base64.b64decode(b'++//', altchars=b'-_'), b'') - self.assertEqual(base64.urlsafe_b64decode(b'++//'), b'') + # Normal alphabet characters will be discarded when alternative given + with self.assertWarns(FutureWarning): + self.assertEqual(base64.b64decode(b'++++', altchars=b'-_'), + b'\xfb\xef\xbe') + with self.assertWarns(FutureWarning): + self.assertEqual(base64.b64decode(b'////', altchars=b'-_'), + b'\xff\xff\xff') + self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'') + self.assertEqual(base64.urlsafe_b64decode(b'////'), b'') with self.assertRaises(binascii.Error): base64.b64decode(b'++++', altchars=b'-_', validate=True) with self.assertRaises(binascii.Error): base64.b64decode(b'////', altchars=b'-_', validate=True) + with self.assertRaises(binascii.Error): + base64.b64decode(b'+/!', altchars=b'-_') def _altchars_strategy(): """Generate 'altchars' for base64 encoding."""