diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 771628677c3d98..1a1785cb58772e 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -146,13 +146,20 @@ POST request. Accepting the ``+`` and ``/`` characters is now deprecated. -.. function:: b32encode(s) +.. function:: b32encode(s, *, wrapcol=0) Encode the :term:`bytes-like object` *s* using Base32 and return the encoded :class:`bytes`. + If *wrapcol* is non-zero, insert a newline (``b'\n'``) character + after at most every *wrapcol* characters. + If *wrapcol* is zero (default), do not add any newlines. + + .. versionchanged:: next + Added the *wrapcol* parameter. + -.. function:: b32decode(s, casefold=False, map01=None) +.. function:: b32decode(s, casefold=False, map01=None, *, ignorechars=b'') Decode the Base32 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -168,20 +175,29 @@ POST request. digit 0 is always mapped to the letter O). For security purposes the default is ``None``, so that 0 and 1 are not allowed in the input. + *ignorechars* should be a :term:`bytes-like object` containing characters + to ignore from the input. + A :exc:`binascii.Error` is raised if *s* is incorrectly padded or if there are non-alphabet characters present in the input. + .. versionchanged:: next + Added the *ignorechars* parameter. + -.. function:: b32hexencode(s) +.. function:: b32hexencode(s, *, wrapcol=0) Similar to :func:`b32encode` but uses the Extended Hex Alphabet, as defined in :rfc:`4648`. .. versionadded:: 3.10 + .. versionchanged:: next + Added the *wrapcol* parameter. + -.. function:: b32hexdecode(s, casefold=False) +.. function:: b32hexdecode(s, casefold=False, *, ignorechars=b'') Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defined in :rfc:`4648`. @@ -193,14 +209,24 @@ POST request. .. versionadded:: 3.10 + .. versionchanged:: next + Added the *ignorechars* parameter. + -.. function:: b16encode(s) +.. function:: b16encode(s, *, wrapcol=0) Encode the :term:`bytes-like object` *s* using Base16 and return the encoded :class:`bytes`. + If *wrapcol* is non-zero, insert a newline (``b'\n'``) character + after at most every *wrapcol* characters. + If *wrapcol* is zero (default), do not add any newlines. + + .. versionchanged:: next + Added the *wrapcol* parameter. -.. function:: b16decode(s, casefold=False) + +.. function:: b16decode(s, casefold=False, *, ignorechars=b'') Decode the Base16 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -209,10 +235,17 @@ POST request. lowercase alphabet is acceptable as input. For security purposes, the default is ``False``. + *ignorechars* should be a :term:`bytes-like object` containing characters + to ignore from the input. + A :exc:`binascii.Error` is raised if *s* is incorrectly padded or if there are non-alphabet characters present in the input. + .. versionchanged:: next + Added the *ignorechars* parameter. + + .. _base64-base-85: Base85 Encodings @@ -277,7 +310,7 @@ Refer to the documentation of the individual functions for more information. .. versionadded:: 3.4 -.. function:: b85encode(b, pad=False) +.. function:: b85encode(b, pad=False, *, wrapcol=0) Encode the :term:`bytes-like object` *b* using base85 (as used in e.g. git-style binary diffs) and return the encoded :class:`bytes`. @@ -285,19 +318,32 @@ Refer to the documentation of the individual functions for more information. If *pad* is true, the input is padded with ``b'\0'`` so its length is a multiple of 4 bytes before encoding. + If *wrapcol* is non-zero, insert a newline (``b'\n'``) character + after at most every *wrapcol* characters. + If *wrapcol* is zero (default), do not add any newlines. + .. versionadded:: 3.4 + .. versionchanged:: next + Added the *wrapcol* parameter. -.. function:: b85decode(b) + +.. function:: b85decode(b, *, ignorechars=b'') Decode the base85-encoded :term:`bytes-like object` or ASCII string *b* and return the decoded :class:`bytes`. Padding is implicitly removed, if necessary. + *ignorechars* should be a :term:`bytes-like object` containing characters + to ignore from the input. + .. versionadded:: 3.4 + .. versionchanged:: next + Added the *ignorechars* parameter. + -.. function:: z85encode(s, pad=False) +.. function:: z85encode(s, pad=False, *, wrapcol=0) Encode the :term:`bytes-like object` *s* using Z85 (as used in ZeroMQ) and return the encoded :class:`bytes`. See `Z85 specification @@ -306,20 +352,33 @@ Refer to the documentation of the individual functions for more information. If *pad* is true, the input is padded with ``b'\0'`` so its length is a multiple of 4 bytes before encoding. + If *wrapcol* is non-zero, insert a newline (``b'\n'``) character + after at most every *wrapcol* characters. + If *wrapcol* is zero (default), do not add any newlines. + .. versionadded:: 3.13 .. versionchanged:: 3.15 The *pad* parameter was added. + .. versionchanged:: next + Added the *wrapcol* parameter. + -.. function:: z85decode(s) +.. function:: z85decode(s, *, ignorechars=b'') Decode the Z85-encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. See `Z85 specification `_ for more information. + *ignorechars* should be a :term:`bytes-like object` containing characters + to ignore from the input. + .. versionadded:: 3.13 + .. versionchanged:: next + Added the *ignorechars* parameter. + .. _base64-legacy: diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 64c1ce948d2d32..4a82d0742ae9db 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -101,7 +101,7 @@ The :mod:`!binascii` module defines the following functions: Added the *alphabet* and *wrapcol* parameters. -.. function:: a2b_ascii85(string, /, *, foldspaces=False, adobe=False, ignorechars=b"") +.. function:: a2b_ascii85(string, /, *, foldspaces=False, adobe=False, ignorechars=b'') Convert Ascii85 data back to binary and return the binary data. @@ -151,7 +151,7 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 -.. function:: a2b_base85(string, /, *, alphabet=BASE85_ALPHABET) +.. function:: a2b_base85(string, /, *, alphabet=BASE85_ALPHABET, ignorechars=b'') Convert Base85 data back to binary and return the binary data. More than one line may be passed at a time. @@ -164,12 +164,15 @@ The :mod:`!binascii` module defines the following functions: Optional *alphabet* must be a :class:`bytes` object of length 85 which specifies an alternative alphabet. + *ignorechars* should be a :term:`bytes-like object` containing characters + to ignore from the input. + Invalid Base85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 -.. function:: b2a_base85(data, /, *, alphabet=BASE85_ALPHABET, pad=False) +.. function:: b2a_base85(data, /, *, alphabet=BASE85_ALPHABET, wrapcol=0, pad=False) Convert binary data to a line of ASCII characters in Base85 coding. The return value is the converted line. @@ -177,13 +180,17 @@ The :mod:`!binascii` module defines the following functions: Optional *alphabet* must be a :term:`bytes-like object` of length 85 which specifies an alternative alphabet. + If *wrapcol* is non-zero, insert a newline (``b'\n'``) character + after at most every *wrapcol* characters. + If *wrapcol* is zero (default), do not insert any newlines. + If *pad* is true, the input is padded with ``b'\0'`` so its length is a multiple of 4 bytes before encoding. .. versionadded:: 3.15 -.. function:: a2b_base32(string, /, *, alphabet=BASE32_ALPHABET) +.. function:: a2b_base32(string, /, *, alphabet=BASE32_ALPHABET, ignorechars=b'') Convert base32 data back to binary and return the binary data. @@ -201,11 +208,17 @@ The :mod:`!binascii` module defines the following functions: Optional *alphabet* must be a :class:`bytes` object of length 32 which specifies an alternative alphabet. + *ignorechars* should be a :term:`bytes-like object` containing characters + to ignore from the input. + If *ignorechars* contains the pad character ``'='``, the pad characters + presented before the end of the encoded data and the excess pad characters + will be ignored. + Invalid base32 data will raise :exc:`binascii.Error`. .. versionadded:: next -.. function:: b2a_base32(data, /, *, alphabet=BASE32_ALPHABET) +.. function:: b2a_base32(data, /, *, alphabet=BASE32_ALPHABET, wrapcol=0) Convert binary data to a line of ASCII characters in base32 coding, as specified in :rfc:`4648`. The return value is the converted line. @@ -213,6 +226,10 @@ The :mod:`!binascii` module defines the following functions: Optional *alphabet* must be a :term:`bytes-like object` of length 32 which specifies an alternative alphabet. + If *wrapcol* is non-zero, insert a newline (``b'\n'``) character + after at most every *wrapcol* characters. + If *wrapcol* is zero (default), do not insert any newlines. + .. versionadded:: next .. function:: a2b_qp(data, header=False) @@ -288,18 +305,25 @@ The :mod:`!binascii` module defines the following functions: .. versionchanged:: 3.8 The *sep* and *bytes_per_sep* parameters were added. -.. function:: a2b_hex(hexstr) - unhexlify(hexstr) +.. function:: a2b_hex(hexstr, *, ignorechars=b'') + unhexlify(hexstr, *, ignorechars=b'') Return the binary data represented by the hexadecimal string *hexstr*. This function is the inverse of :func:`b2a_hex`. *hexstr* must contain an even number of hexadecimal digits (which can be upper or lower case), otherwise an :exc:`Error` exception is raised. + *ignorechars* should be a :term:`bytes-like object` containing characters + to ignore from the input. + Similar functionality (accepting only text string arguments, but more liberal towards whitespace) is also accessible using the :meth:`bytes.fromhex` class method. + .. versionchanged:: next + Added the *ignorechars* parameter. + + .. exception:: Error Exception raised on errors. These are usually programming errors. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 0973c387a1e595..37d6304ae65a74 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -635,16 +635,28 @@ base64 * Added the *pad* parameter in :func:`~base64.z85encode`. (Contributed by Hauke Dämpfling in :gh:`143103`.) -* Added the *wrapcol* parameter in :func:`~base64.b64encode`. - (Contributed by Serhiy Storchaka in :gh:`143214`.) +* Added the *wrapcol* parameter in :func:`~base64.b16encode`, + :func:`~base64.b32encode`, :func:`~base64.b32hexencode`, + :func:`~base64.b64encode`, :func:`~base64.b85encode`, and + :func:`~base64.z85encode`. + (Contributed by Serhiy Storchaka in :gh:`143214` and :gh:`146431`.) -* Added the *ignorechars* parameter in :func:`~base64.b64decode`. - (Contributed by Serhiy Storchaka in :gh:`144001`.) +* Added the *ignorechars* parameter in :func:`~base64.b16decode`, + :func:`~base64.b32decode`, :func:`~base64.b32hexdecode`, + :func:`~base64.b64decode`, :func:`~base64.b85decode`, and + :func:`~base64.z85decode`. + (Contributed by Serhiy Storchaka in :gh:`144001` and :gh:`146431`.) binascii -------- +* Added functions for Base32 encoding: + + - :func:`~binascii.b2a_base32` and :func:`~binascii.a2b_base32` + + (Contributed by James Seo in :gh:`146192`.) + * Added functions for Ascii85, Base85, and Z85 encoding: - :func:`~binascii.b2a_ascii85` and :func:`~binascii.a2b_ascii85` @@ -659,14 +671,9 @@ binascii :func:`~binascii.a2b_base64`. (Contributed by Serhiy Storchaka in :gh:`145980`.) -* Added the *ignorechars* parameter in :func:`~binascii.a2b_base64`. - (Contributed by Serhiy Storchaka in :gh:`144001`.) - -* Added functions for Base32 encoding: - - - :func:`~binascii.b2a_base32` and :func:`~binascii.a2b_base32` - - (Contributed by James Seo in :gh:`146192`.) +* Added the *ignorechars* parameter in :func:`~binascii.a2b_hex`, + :func:`~binascii.unhexlify`, and :func:`~binascii.a2b_base64`. + (Contributed by Serhiy Storchaka in :gh:`144001` and :gh:`146431`.) calendar diff --git a/Lib/base64.py b/Lib/base64.py index 9b57cdfefce1e6..70f23d50292482 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -186,12 +186,18 @@ def urlsafe_b64decode(s): # Base32 encoding/decoding must be done in Python _B32_ENCODE_DOCSTRING = ''' Encode the bytes-like objects using {encoding} and return a bytes object. + +If wrapcol is non-zero, insert a newline (b'\\n') character after at most +every wrapcol characters. ''' _B32_DECODE_DOCSTRING = ''' Decode the {encoding} encoded bytes-like object or ASCII string s. Optional casefold is a flag specifying whether a lowercase alphabet is acceptable as input. For security purposes, the default is False. + +ignorechars should be a byte string containing characters to ignore +from the input. {extra_args} The result is returned as a bytes object. A binascii.Error is raised if the input is incorrectly padded or if there are non-alphabet @@ -207,11 +213,11 @@ def urlsafe_b64decode(s): 0 and 1 are not allowed in the input. ''' -def b32encode(s): - return binascii.b2a_base32(s) +def b32encode(s, *, wrapcol=0): + return binascii.b2a_base32(s, wrapcol=wrapcol) b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') -def b32decode(s, casefold=False, map01=None): +def b32decode(s, casefold=False, map01=None, *, ignorechars=b''): s = _bytes_from_decode_data(s) # Handle section 2.4 zero and one mapping. The flag map01 will be either # False, or the character to map the digit 1 (one) to. It should be @@ -222,20 +228,22 @@ def b32decode(s, casefold=False, map01=None): s = s.translate(bytes.maketrans(b'01', b'O' + map01)) if casefold: s = s.upper() - return binascii.a2b_base32(s) + return binascii.a2b_base32(s, ignorechars=ignorechars) b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', extra_args=_B32_DECODE_MAP01_DOCSTRING) -def b32hexencode(s): - return binascii.b2a_base32(s, alphabet=binascii.BASE32HEX_ALPHABET) +def b32hexencode(s, *, wrapcol=0): + return binascii.b2a_base32(s, wrapcol=wrapcol, + alphabet=binascii.BASE32HEX_ALPHABET) b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') -def b32hexdecode(s, casefold=False): +def b32hexdecode(s, casefold=False, *, ignorechars=b''): s = _bytes_from_decode_data(s) # base32hex does not have the 01 mapping if casefold: s = s.upper() - return binascii.a2b_base32(s, alphabet=binascii.BASE32HEX_ALPHABET) + return binascii.a2b_base32(s, alphabet=binascii.BASE32HEX_ALPHABET, + ignorechars=ignorechars) b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', extra_args='') @@ -243,28 +251,44 @@ def b32hexdecode(s, casefold=False): # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns # lowercase. The RFC also recommends against accepting input case # insensitively. -def b16encode(s): +def b16encode(s, *, wrapcol=0): """Encode the bytes-like object s using Base16 and return a bytes object. + + If wrapcol is non-zero, insert a newline (b'\\n') character after at most + every wrapcol characters. """ - return binascii.hexlify(s).upper() + if not wrapcol: + return binascii.hexlify(s).upper() + if wrapcol < 0: + raise ValueError('Negative wrapcol') + if wrapcol < 2: + wrapcol = 2 + return binascii.hexlify(s, bytes_per_sep=-(wrapcol//2), sep=b'\n').upper() +_upper_hex_trans = bytes.maketrans(b'abcdef', b'ABCDEF') -def b16decode(s, casefold=False): +def b16decode(s, casefold=False, *, ignorechars=b''): """Decode the Base16 encoded bytes-like object or ASCII string s. Optional casefold is a flag specifying whether a lowercase alphabet is acceptable as input. For security purposes, the default is False. + ignorechars should be a byte string containing characters to ignore + from the input. + The result is returned as a bytes object. A binascii.Error is raised if s is incorrectly padded or if there are non-alphabet characters present in the input. """ - s = _bytes_from_decode_data(s) - if casefold: - s = s.upper() - if s.translate(None, delete=b'0123456789ABCDEF'): - raise binascii.Error('Non-base16 digit found') - return binascii.unhexlify(s) + if not casefold: + s = _bytes_from_decode_data(s) + if not isinstance(ignorechars, bytes): + ignorechars = bytes(memoryview(ignorechars)) + for b in b'abcdef': + if b in s and b not in ignorechars: + raise binascii.Error('Non-base16 digit found') + s = s.translate(None, delete=b'abcdef') + return binascii.unhexlify(s, ignorechars=ignorechars) # # Ascii85 encoding/decoding @@ -307,31 +331,42 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): return binascii.a2b_ascii85(b, foldspaces=foldspaces, adobe=adobe, ignorechars=ignorechars) -def b85encode(b, pad=False): +def b85encode(b, pad=False, *, wrapcol=0): """Encode bytes-like object b in base85 format and return a bytes object. + If wrapcol is non-zero, insert a newline (b'\\n') character after at most + every wrapcol characters. + If pad is true, the input is padded with b'\\0' so its length is a multiple of 4 bytes before encoding. """ - return binascii.b2a_base85(b, pad=pad) + return binascii.b2a_base85(b, wrapcol=wrapcol, pad=pad) -def b85decode(b): +def b85decode(b, ignorechars=b''): """Decode the base85-encoded bytes-like object or ASCII string b The result is returned as a bytes object. """ - return binascii.a2b_base85(b) + return binascii.a2b_base85(b, ignorechars=ignorechars) + +def z85encode(s, pad=False, *, wrapcol=0): + """Encode bytes-like object b in z85 format and return a bytes object. -def z85encode(s, pad=False): - """Encode bytes-like object b in z85 format and return a bytes object.""" - return binascii.b2a_base85(s, pad=pad, alphabet=binascii.Z85_ALPHABET) + If wrapcol is non-zero, insert a newline (b'\\n') character after at most + every wrapcol characters. + + If pad is true, the input is padded with b'\\0' so its length is a multiple of + 4 bytes before encoding. + """ + return binascii.b2a_base85(s, wrapcol=wrapcol, pad=pad, + alphabet=binascii.Z85_ALPHABET) -def z85decode(s): +def z85decode(s, ignorechars=b''): """Decode the z85-encoded bytes-like object or ASCII string b The result is returned as a bytes object. """ - return binascii.a2b_base85(s, alphabet=binascii.Z85_ALPHABET) + return binascii.a2b_base85(s, alphabet=binascii.Z85_ALPHABET, ignorechars=ignorechars) # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 9648624b267a54..bb3b3c1e2353c4 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -209,30 +209,37 @@ def test_b64encode(self): b'\xd3V\xbeo\xf7\x1d', b'01a-b_cd') self.check_encode_type_errors(base64.urlsafe_b64encode) - def test_b64encode_wrapcol(self): + def _common_test_wrapcol(self, func, data): eq = self.assertEqual - b = b'www.python.org' - eq(base64.b64encode(b, wrapcol=0), b'd3d3LnB5dGhvbi5vcmc=') - eq(base64.b64encode(b, wrapcol=8), b'd3d3LnB5\ndGhvbi5v\ncmc=') - eq(base64.b64encode(b, wrapcol=11), b'd3d3LnB5\ndGhvbi5v\ncmc=') - eq(base64.b64encode(b, wrapcol=76), b'd3d3LnB5dGhvbi5vcmc=') - eq(base64.b64encode(b, wrapcol=1), b'd3d3\nLnB5\ndGhv\nbi5v\ncmc=') - eq(base64.b64encode(b, wrapcol=sys.maxsize), b'd3d3LnB5dGhvbi5vcmc=') + expected = func(data) + eq(func(data, wrapcol=0), expected) + eq(func(data, wrapcol=80), expected) + eq(func(b'', wrapcol=0), func(b'')) + eq(func(b'', wrapcol=1), func(b'')) + if func is not base64.b16encode: + eq(func(data, wrapcol=sys.maxsize), expected) if check_impl_detail(): - eq(base64.b64encode(b, wrapcol=sys.maxsize*2), - b'd3d3LnB5dGhvbi5vcmc=') + if func is not base64.b16encode: + eq(func(data, wrapcol=sys.maxsize*2), expected) with self.assertRaises(OverflowError): - base64.b64encode(b, wrapcol=2**1000) + func(data, wrapcol=2**1000) with self.assertRaises(ValueError): - base64.b64encode(b, wrapcol=-8) - with self.assertRaises(TypeError): - base64.b64encode(b, wrapcol=8.0) + func(data, wrapcol=-80) with self.assertRaises(TypeError): - base64.b64encode(b, wrapcol='8') + func(data, wrapcol=80.0) with self.assertRaises(TypeError): - base64.b64encode(b, wrapcol=None) - eq(base64.b64encode(b'', wrapcol=0), b'') - eq(base64.b64encode(b'', wrapcol=8), b'') + func(data, wrapcol='80') + if func is not base64.b16encode: + with self.assertRaises(TypeError): + func(data, wrapcol=None) + + def test_b64encode_wrapcol(self): + eq = self.assertEqual + b = b'www.python.org' + self._common_test_wrapcol(base64.b64encode, b) + eq(base64.b64encode(b, wrapcol=8), b'd3d3LnB5\ndGhvbi5v\ncmc=') + eq(base64.b64encode(b, wrapcol=11), b'd3d3LnB5\ndGhvbi5v\ncmc=') + eq(base64.b64encode(b, wrapcol=1), b'd3d3\nLnB5\ndGhv\nbi5v\ncmc=') def test_b64decode(self): eq = self.assertEqual @@ -309,6 +316,24 @@ def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') self.assertRaises(binascii.Error, base64.b64decode, 'abc') + def _common_test_ignorechars(self, func): + eq = self.assertEqual + eq(func(b'', ignorechars=b' \n'), b'') + eq(func(b'', ignorechars=b''), b'') + eq(func(b' \n', ignorechars=b' \n'), b'') + with self.assertRaises(binascii.Error): + func(b' \n', ignorechars=b'') + with self.assertRaises(binascii.Error): + func(b' \n', ignorechars=b' ') + with self.assertRaises(binascii.Error): + func(b' \n', ignorechars=b'\n') + with self.assertRaises(TypeError): + func(b'', ignorechars='') + with self.assertRaises(TypeError): + func(b'', ignorechars=[]) + with self.assertRaises(TypeError): + func(b'', ignorechars=None) + def test_b64decode_invalid_chars(self): # issue 1466065: Test some invalid characters. tests = ((b'%3d==', b'\xdd', b'%$'), @@ -351,12 +376,7 @@ def test_b64decode_invalid_chars(self): r = base64.b64decode(bstr, ignorechars=ignorechars) self.assertEqual(r, res) - with self.assertRaises(TypeError): - base64.b64decode(b'', ignorechars='') - with self.assertRaises(TypeError): - base64.b64decode(b'', ignorechars=[]) - with self.assertRaises(TypeError): - base64.b64decode(b'', ignorechars=None) + self._common_test_ignorechars(base64.b64decode) # Normal alphabet characters will be discarded when alternative given discarded = ("invalid character %a in Base64 data with %s " @@ -469,6 +489,14 @@ def test_b32encode(self): self.check_other_types(base64.b32encode, b'abcd', b'MFRGGZA=') self.check_encode_type_errors(base64.b32encode) + def test_b32encode_wrapcol(self): + eq = self.assertEqual + b = b'www.python.org' + self._common_test_wrapcol(base64.b32encode, b) + eq(base64.b32encode(b, wrapcol=16), b'O53XOLTQPF2GQ33O\nFZXXEZY=') + eq(base64.b32encode(b, wrapcol=23), b'O53XOLTQPF2GQ33O\nFZXXEZY=') + eq(base64.b32encode(b, wrapcol=1), b'O53XOLTQ\nPF2GQ33O\nFZXXEZY=') + def test_b32decode(self): eq = self.assertEqual tests = {b'': b'', @@ -504,6 +532,7 @@ def test_b32decode_casefold(self): for data, res in tests.items(): eq(base64.b32decode(data, True), res) + eq(base64.b32decode(data, casefold=True), res) eq(base64.b32decode(data.decode('ascii'), True), res) self.assertRaises(binascii.Error, base64.b32decode, b'me======') @@ -537,6 +566,31 @@ def test_b32decode_map01(self): eq(base64.b32decode(b'M%c023456' % map01, map01=map01), res) eq(base64.b32decode(b'M%cO23456' % map01, map01=map01), res) + def test_b32decode_ignorechars(self): + self._common_test_ignorechars(base64.b32decode) + eq = self.assertEqual + eq(base64.b32decode(b'MFRG\n GZDF\n', ignorechars=b' \n'), b'abcde') + eq(base64.b32decode(b'MFRG\n GZDF\n', ignorechars=bytearray(b' \n')), b'abcde') + eq(base64.b32decode(b'M=======FRGGZDF', ignorechars=b'='), b'abcde') + eq(base64.b32decode(b'MF======RGGZDF', ignorechars=b'='), b'abcde') + eq(base64.b32decode(b'MFR=====GGZDF', ignorechars=b'='), b'abcde') + eq(base64.b32decode(b'MFRG====GZDF', ignorechars=b'='), b'abcde') + eq(base64.b32decode(b'MFRGG===ZDF', ignorechars=b'='), b'abcde') + eq(base64.b32decode(b'MFRGGZ==DF', ignorechars=b'='), b'abcde') + eq(base64.b32decode(b'MFRGGZD=F', ignorechars=b'='), b'abcde') + eq(base64.b32decode(b'MFRGGZDF=', ignorechars=b'='), b'abcde') + eq(base64.b32decode(b'MFRA======', ignorechars=b'='), b'ab') + + eq(base64.b32decode(b'mfRggzDfmzTQ====', ignorechars=b'mfgz'), + b'\x88\xe7') + eq(base64.b32decode(b'mfRggzDfmzTQ====', casefold=True, ignorechars=b'mfgz'), + b'abcdefg') + eq(base64.b32decode(b'M0F1R0G1G0Z1D0F1', ignorechars=b'01'), b'abcde') + eq(base64.b32decode(b'M0F1R0G1G0Z1D0F1', map01=b'L', ignorechars=b'01'), + b'c\x8a\xb8\xb8\xcb3\xb2\xb1\xb8\xab') + eq(base64.b32decode(b'M0F1R0G1G0Z1D0F1', map01=b'I', ignorechars=b'01'), + b'c\x8a\x88\xb8\xc83\xb2\x81\xb8\xa8') + def test_b32decode_error(self): tests = [b'abc', b'ABCDEF==', b'==ABCDEF'] prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF'] @@ -585,6 +639,14 @@ def test_b32hexencode_other_types(self): self.check_other_types(base64.b32hexencode, b'abcd', b'C5H66P0=') self.check_encode_type_errors(base64.b32hexencode) + def test_b32hexencode_wrapcol(self): + eq = self.assertEqual + b = b'www.python.org' + self._common_test_wrapcol(base64.b32hexencode, b) + eq(base64.b32hexencode(b, wrapcol=16), b'ETRNEBJGF5Q6GRRE\n5PNN4PO=') + eq(base64.b32hexencode(b, wrapcol=23), b'ETRNEBJGF5Q6GRRE\n5PNN4PO=') + eq(base64.b32hexencode(b, wrapcol=1), b'ETRNEBJG\nF5Q6GRRE\n5PNN4PO=') + def test_b32hexdecode(self): test_cases = [ # to_decode, expected, casefold @@ -619,6 +681,28 @@ def test_b32hexdecode_other_types(self): self.check_other_types(base64.b32hexdecode, b'C5H66===', b'abc') self.check_decode_type_errors(base64.b32hexdecode) + def test_b32hexdecode_ignorechars(self): + self._common_test_ignorechars(base64.b32hexdecode) + eq = self.assertEqual + eq(base64.b32hexdecode(b'C5H6\n 6P35\n', ignorechars=b' \n'), b'abcde') + eq(base64.b32hexdecode(b'C5H6\n 6P35\n', ignorechars=bytearray(b' \n')), b'abcde') + eq(base64.b32hexdecode(b'========C5H66P35', ignorechars=b'='), b'abcde') + eq(base64.b32hexdecode(b'C=======5H66P35', ignorechars=b'='), b'abcde') + eq(base64.b32hexdecode(b'C5======H66P35', ignorechars=b'='), b'abcde') + eq(base64.b32hexdecode(b'C5H=====66P35', ignorechars=b'='), b'abcde') + eq(base64.b32hexdecode(b'C5H6====6P35', ignorechars=b'='), b'abcde') + eq(base64.b32hexdecode(b'C5H66===P35', ignorechars=b'='), b'abcde') + eq(base64.b32hexdecode(b'C5H66P==35', ignorechars=b'='), b'abcde') + eq(base64.b32hexdecode(b'C5H66P3=5', ignorechars=b'='), b'abcde') + eq(base64.b32hexdecode(b'C5H66P35=', ignorechars=b'='), b'abcde') + eq(base64.b32hexdecode(b'C5H0======', ignorechars=b'='), b'ab') + + eq(base64.b32hexdecode(b'c5h66p35cpjmg===', ignorechars=b'cghjmp'), + b')\x8c2') + eq(base64.b32hexdecode(b'c5h66p35cpjmg===', casefold=True, + ignorechars=b'cghjmp'), + b'abcdefgh') + def test_b32hexdecode_error(self): tests = [b'abc', b'ABCDEF==', b'==ABCDEF', b'c4======'] prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF'] @@ -654,6 +738,14 @@ def test_b16encode(self): b'0102ABCDEF') self.check_encode_type_errors(base64.b16encode) + def test_b16encode_wrapcol(self): + eq = self.assertEqual + b = b'\x01\x02\xab\xcd\xef' + self._common_test_wrapcol(base64.b16encode, b) + eq(base64.b16encode(b, wrapcol=4), b'0102\nABCD\nEF') + eq(base64.b16encode(b, wrapcol=5), b'0102\nABCD\nEF') + eq(base64.b16encode(b, wrapcol=1), b'01\n02\nAB\nCD\nEF') + def test_b16decode(self): eq = self.assertEqual eq(base64.b16decode(b'0102ABCDEF'), b'\x01\x02\xab\xcd\xef') @@ -681,6 +773,14 @@ def test_b16decode(self): # Incorrect "padding" self.assertRaises(binascii.Error, base64.b16decode, '010') + def test_b16decode_ignorechars(self): + self._common_test_ignorechars(base64.b16decode) + eq = self.assertEqual + eq(base64.b16decode(b'A B\nC D\n', ignorechars=b' \n'), b'\xab\xcd') + eq(base64.b16decode(b'A B\nC D\n', ignorechars=bytearray(b' \n')), b'\xab\xcd') + eq(base64.b16decode(b'aBcD', ignorechars=b'ac'), b'\xbd') + eq(base64.b16decode(b'aBcD', casefold=True, ignorechars=b'ac'), b'\xab\xcd') + @hypothesis.given( payload=hypothesis.strategies.binary(), casefold=hypothesis.strategies.booleans()) @@ -737,7 +837,7 @@ def test_a85encode(self): def test_a85encode_wrapcol(self): eq = self.assertEqual b = b'www.python.org' - eq(base64.a85encode(b, wrapcol=0), b'GB\\6`E-ZP=Df.1GEb>') + self._common_test_wrapcol(base64.a85encode, b) eq(base64.a85encode(b, wrapcol=7), b'GB\\6`E-\nZP=Df.1\nGEb>') eq(base64.a85encode(b"\0\0\0\0www.python.org", wrapcol=7), b'zGB\\6`E\n-ZP=Df.\n1GEb>') @@ -750,24 +850,8 @@ def test_a85encode_wrapcol(self): b'G\nB\n\\\n6\n`\nE\n-\nZ\nP\n=\nD\nf\n.\n1\nG\nE\nb\n>') eq(base64.a85encode(b, wrapcol=1, adobe=True), b'<~\nGB\n\\6\n`E\n-Z\nP=\nDf\n.1\nGE\nb>\n~>') - eq(base64.a85encode(b, wrapcol=sys.maxsize), b'GB\\6`E-ZP=Df.1GEb>') - if check_impl_detail(): - eq(base64.a85encode(b, wrapcol=sys.maxsize*2), - b'GB\\6`E-ZP=Df.1GEb>') - with self.assertRaises(OverflowError): - base64.a85encode(b, wrapcol=2**1000) - with self.assertRaises(ValueError): - base64.a85encode(b, wrapcol=-7) with self.assertRaises(ValueError): base64.a85encode(b, wrapcol=-7, adobe=True) - with self.assertRaises(TypeError): - base64.a85encode(b, wrapcol=7.0) - with self.assertRaises(TypeError): - base64.a85encode(b, wrapcol='7') - with self.assertRaises(TypeError): - base64.a85encode(b, wrapcol=None) - eq(base64.a85encode(b'', wrapcol=0), b'') - eq(base64.a85encode(b'', wrapcol=7), b'') eq(base64.a85encode(b'', wrapcol=1, adobe=True), b'<~\n~>') eq(base64.a85encode(b'', wrapcol=3, adobe=True), b'<~\n~>') eq(base64.a85encode(b'', wrapcol=4, adobe=True), b'<~~>') @@ -806,6 +890,14 @@ def test_b85encode(self): self.check_other_types(base64.b85encode, b"www.python.org", b'cXxL#aCvlSZ*DGca%T') + def test_b85encode_wrapcol(self): + eq = self.assertEqual + b = b'www.python.org' + self._common_test_wrapcol(base64.b85encode, b) + eq(base64.b85encode(b, wrapcol=10), b'cXxL#aCvlS\nZ*DGca%T') + eq(base64.b85encode(b, wrapcol=14), b'cXxL#aCvlS\nZ*DGca%T') + eq(base64.b85encode(b, wrapcol=1), b'cXxL#\naCvlS\nZ*DGc\na%T') + def test_z85encode(self): eq = self.assertEqual @@ -841,6 +933,14 @@ def test_z85encode(self): self.check_other_types(base64.z85encode, b"www.python.org", b'CxXl-AcVLsz/dgCA+t') + def test_z85encode_wrapcol(self): + eq = self.assertEqual + b = b'www.python.org' + self._common_test_wrapcol(base64.z85encode, b) + eq(base64.z85encode(b, wrapcol=10), b'CxXl-AcVLs\nz/dgCA+t') + eq(base64.z85encode(b, wrapcol=14), b'CxXl-AcVLs\nz/dgCA+t') + eq(base64.z85encode(b, wrapcol=1), b'CxXl-\nAcVLs\nz/dgC\nA+t') + def test_a85decode(self): eq = self.assertEqual @@ -1047,24 +1147,20 @@ def test_a85decode_errors(self): self.assertEqual(base64.a85decode(b"a b\nc", ignorechars=b" \n"), b'\xc9\x89') - with self.assertRaises(ValueError): - base64.a85decode(b"a b\nc", ignorechars=b"") - with self.assertRaises(ValueError): - base64.a85decode(b"a b\nc", ignorechars=b" ") - with self.assertRaises(ValueError): - base64.a85decode(b"a b\nc", ignorechars=b"\n") - with self.assertRaises(TypeError): - base64.a85decode(b"a b\nc", ignorechars=" \n") - with self.assertRaises(TypeError): - base64.a85decode(b"a b\nc", ignorechars=None) + self._common_test_ignorechars(base64.a85decode) def test_b85decode_errors(self): illegal = list(range(33)) + \ list(b'"\',./:[\\]') + \ list(range(128, 256)) for c in illegal: - with self.assertRaises(ValueError, msg=bytes([c])): - base64.b85decode(b'0000' + bytes([c])) + b = bytes([c]) + with self.assertRaises(ValueError, msg=b): + base64.b85decode(b'0000' + b) + self.assertEqual(base64.b85decode(b'0000' + b, ignorechars=b), + b'\x00\x00\x00') + + self._common_test_ignorechars(base64.b85decode) self.assertRaises(ValueError, base64.b85decode, b'|') self.assertRaises(ValueError, base64.b85decode, b'|N') @@ -1077,8 +1173,13 @@ def test_z85decode_errors(self): list(b'"\',;_`|\\~') + \ list(range(128, 256)) for c in illegal: - with self.assertRaises(ValueError, msg=bytes([c])): - base64.z85decode(b'0000' + bytes([c])) + b = bytes([c]) + with self.assertRaises(ValueError, msg=b): + base64.z85decode(b'0000' + b) + self.assertEqual(base64.z85decode(b'0000' + b, ignorechars=b), + b'\x00\x00\x00') + + self._common_test_ignorechars(base64.z85decode) # b'\xff\xff\xff\xff' encodes to b'%nSc0', the following will overflow: self.assertRaises(ValueError, base64.z85decode, b'%') diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 1dcd2b25c79087..959a61b530b1a5 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -226,8 +226,30 @@ def assertInvalidLength(*args): assertExcessPadding(b'abcd====efgh', b'i\xb7\x1dy\xf8!') assertExcessPadding(b'abcd=====efgh', b'i\xb7\x1dy\xf8!') + def _common_test_ignorechars(self, func): + eq = self.assertEqual + empty = self.type2test(b'') + data = self.type2test(b'\n \n') + ignorechars = self.type2test(b' \n') + eq(func(empty, ignorechars=ignorechars), b'') + eq(func(empty, ignorechars=empty), b'') + eq(func(data, ignorechars=ignorechars), b'') + with self.assertRaises(binascii.Error): + func(data, ignorechars=empty) + with self.assertRaises(binascii.Error): + func(data, ignorechars=ignorechars[1:]) + with self.assertRaises(binascii.Error): + func(data, ignorechars=ignorechars[:-1]) + with self.assertRaises(TypeError): + func(empty, ignorechars='') + with self.assertRaises(TypeError): + func(empty, ignorechars=[]) + with self.assertRaises(TypeError): + func(empty, ignorechars=None) + def test_base64_invalidchars(self): # Test non-base64 data exceptions + self._common_test_ignorechars(binascii.a2b_base64) def assertNonBase64Data(data, expected, ignorechars): data = self.type2test(data) assert_regex = r'(?i)Only base64 data' @@ -253,6 +275,12 @@ def assertNonBase64Data(data, expected, ignorechars): assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n')) assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n')) + self.assertEqual(binascii.a2b_base64(b'+A-/B_', ignorechars=b'+/-_'), + b'\xf8\x0f\xc1') + self.assertEqual(binascii.a2b_base64(b'+A-/B_', ignorechars=b'+/-_', + alphabet=binascii.URLSAFE_BASE64_ALPHABET), + b'\x03\xe0\x7f') + # Same cell in the cache: '\r' >> 3 == '\n' >> 3. data = self.type2test(b'\r\n') with self.assertRaises(binascii.Error): @@ -264,14 +292,6 @@ def assertNonBase64Data(data, expected, ignorechars): binascii.a2b_base64(data, ignorechars=b'*') self.assertEqual(binascii.a2b_base64(data, ignorechars=b'*\n'), b'') - data = self.type2test(b'a\nb==') - with self.assertRaises(TypeError): - binascii.a2b_base64(data, ignorechars='') - with self.assertRaises(TypeError): - binascii.a2b_base64(data, ignorechars=[]) - with self.assertRaises(TypeError): - binascii.a2b_base64(data, ignorechars=None) - def test_base64_excess_data(self): # Test excess data exceptions def assertExcessData(data, expected): @@ -495,8 +515,32 @@ def assertInvalidChar(data, **kwargs): assertInvalidChar(b"\tFCb", ignorechars=b"\n") assertInvalidChar(b"xxxB\nP\thU'D v/F+", ignorechars=b" \n\tv") + def _common_test_wrapcol(self, func): + eq = self.assertEqual + data = self.data + expected = func(data) + eq(func(data, wrapcol=0), expected) + eq(func(data, wrapcol=8000), expected) + eq(func(b'', wrapcol=0), func(b'')) + eq(func(b'', wrapcol=1), func(b'')) + eq(func(data, wrapcol=sys.maxsize), expected) + if check_impl_detail(): + eq(func(data, wrapcol=sys.maxsize*2), expected) + with self.assertRaises(OverflowError): + func(data, wrapcol=2**1000) + with self.assertRaises(ValueError): + func(data, wrapcol=-80) + with self.assertRaises(TypeError): + func(data, wrapcol=80.0) + with self.assertRaises(TypeError): + func(data, wrapcol='80') + with self.assertRaises(TypeError): + func(data, wrapcol=None) + def test_ascii85_wrapcol(self): # Test Ascii85 splitting lines + self._common_test_wrapcol(binascii.b2a_ascii85) + def assertEncode(a_expected, data, n, adobe=False): b = self.type2test(data) a = binascii.b2a_ascii85(b, adobe=adobe, wrapcol=n) @@ -635,6 +679,16 @@ def assertOverflow(data): assertOverflow(b"|NsC0~") assertOverflow(b"|NsC0|NsC0|NsD0") + def test_base85_wrapcol(self): + self._common_test_wrapcol(binascii.b2a_base85) + b = self.type2test(b'www.python.org') + self.assertEqual(binascii.b2a_base85(b, wrapcol=10), + b'cXxL#aCvlS\nZ*DGca%T') + self.assertEqual(binascii.b2a_base85(b, wrapcol=14), + b'cXxL#aCvlS\nZ*DGca%T') + self.assertEqual(binascii.b2a_base85(b, wrapcol=1), + b'cXxL#\naCvlS\nZ*DGc\na%T') + def test_base85_pad(self): # Test base85 with encode padding rawdata = b"n1n3Tee\n ch@rAc\te\r$" @@ -646,6 +700,17 @@ def test_base85_pad(self): b_pad_expected = b + b"\0" * padding self.assertEqual(b_pad, b_pad_expected) + def test_base85_ignorechars(self): + a2b_base85 = binascii.a2b_base85 + self._common_test_ignorechars(a2b_base85) + eq = self.assertEqual + eq(a2b_base85(b'VPa\n !s\n', ignorechars=b' \n'), b'abcd') + eq(a2b_base85(b'VPa\n !s\n', ignorechars=bytearray(b' \n')), b'abcd') + + eq(a2b_base85(b'A~[B];C', ignorechars=b';[]~'), b'"1\xa3\x15') + eq(a2b_base85(b'A~[B];C', ignorechars=b';[]~', + alphabet=binascii.Z85_ALPHABET), b'r\xd8dv') + def test_base85_alphabet(self): alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz' b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#') @@ -824,6 +889,36 @@ def assertInvalidLength(*args): assertInvalidLength(b"BEEFCA=K", b"\t\x08Q\x01") assertInvalidLength(b"BEEFCA=====K", b"\t\x08Q\x01") + def test_base32_wrapcol(self): + self._common_test_wrapcol(binascii.b2a_base32) + b = self.type2test(b'www.python.org') + self.assertEqual(binascii.b2a_base32(b, wrapcol=16), + b'O53XOLTQPF2GQ33O\nFZXXEZY=') + self.assertEqual(binascii.b2a_base32(b, wrapcol=23), + b'O53XOLTQPF2GQ33O\nFZXXEZY=') + self.assertEqual(binascii.b2a_base32(b, wrapcol=1), + b'O53XOLTQ\nPF2GQ33O\nFZXXEZY=') + + def test_base32_ignorechars(self): + a2b_base32 = binascii.a2b_base32 + self._common_test_ignorechars(a2b_base32) + eq = self.assertEqual + eq(a2b_base32(b'MFRG\n GZDF\n', ignorechars=b' \n'), b'abcde') + eq(a2b_base32(b'MFRG\n GZDF\n', ignorechars=bytearray(b' \n')), b'abcde') + eq(a2b_base32(b'M=======FRGGZDF', ignorechars=b'='), b'abcde') + eq(a2b_base32(b'MF======RGGZDF', ignorechars=b'='), b'abcde') + eq(a2b_base32(b'MFR=====GGZDF', ignorechars=b'='), b'abcde') + eq(a2b_base32(b'MFRG====GZDF', ignorechars=b'='), b'abcde') + eq(a2b_base32(b'MFRGG===ZDF', ignorechars=b'='), b'abcde') + eq(a2b_base32(b'MFRGGZ==DF', ignorechars=b'='), b'abcde') + eq(a2b_base32(b'MFRGGZD=F', ignorechars=b'='), b'abcde') + eq(a2b_base32(b'MFRGGZDF=', ignorechars=b'='), b'abcde') + eq(a2b_base32(b'MFRA======', ignorechars=b'='), b'ab') + + eq(a2b_base32(b'A1B3C5W7Z9', ignorechars=b'19WZ'), b'\x00v.\xdb\xf9') + eq(a2b_base32(b'A1B3C5W7Z9', ignorechars=b'19WZ', + alphabet=binascii.BASE32HEX_ALPHABET), b'PV6\x14\xe9') + def test_base32_alphabet(self): alphabet = b'0Aa1Bb2Cc3Dd4Ee5Ff6Gg7Hh8Ii9JjKk' data = self.type2test(self.rawdata) @@ -951,6 +1046,15 @@ def test_hex_separator(self): expected1 = s.hex(':').encode('ascii') self.assertEqual(binascii.b2a_hex(self.type2test(s), ':'), expected1) + def test_hex_ignorechars(self): + a2b_hex = binascii.a2b_hex + self._common_test_ignorechars(a2b_hex) + self._common_test_ignorechars(binascii.unhexlify) + eq = self.assertEqual + eq(a2b_hex(b'A B\nC D\n', ignorechars=b' \n'), b'\xab\xcd') + eq(a2b_hex(b'A B\nC D\n', ignorechars=bytearray(b' \n')), b'\xab\xcd') + eq(a2b_hex(b'aBcD', ignorechars=b'ac'), b'\xab\xcd') + def test_qp(self): type2test = self.type2test a2b_qp = binascii.a2b_qp @@ -1128,38 +1232,17 @@ def test_b2a_base64_newline(self): self.assertEqual(binascii.b2a_base64(b, newline=False), b'') def test_b2a_base64_wrapcol(self): + self._common_test_wrapcol(binascii.b2a_base64) b = self.type2test(b'www.python.org') - self.assertEqual(binascii.b2a_base64(b), - b'd3d3LnB5dGhvbi5vcmc=\n') - self.assertEqual(binascii.b2a_base64(b, wrapcol=0), - b'd3d3LnB5dGhvbi5vcmc=\n') self.assertEqual(binascii.b2a_base64(b, wrapcol=8), b'd3d3LnB5\ndGhvbi5v\ncmc=\n') self.assertEqual(binascii.b2a_base64(b, wrapcol=11), b'd3d3LnB5\ndGhvbi5v\ncmc=\n') - self.assertEqual(binascii.b2a_base64(b, wrapcol=76), - b'd3d3LnB5dGhvbi5vcmc=\n') self.assertEqual(binascii.b2a_base64(b, wrapcol=8, newline=False), b'd3d3LnB5\ndGhvbi5v\ncmc=') self.assertEqual(binascii.b2a_base64(b, wrapcol=1), b'd3d3\nLnB5\ndGhv\nbi5v\ncmc=\n') - self.assertEqual(binascii.b2a_base64(b, wrapcol=sys.maxsize), - b'd3d3LnB5dGhvbi5vcmc=\n') - if check_impl_detail(): - self.assertEqual(binascii.b2a_base64(b, wrapcol=sys.maxsize*2), - b'd3d3LnB5dGhvbi5vcmc=\n') - with self.assertRaises(OverflowError): - binascii.b2a_base64(b, wrapcol=2**1000) - with self.assertRaises(ValueError): - binascii.b2a_base64(b, wrapcol=-8) - with self.assertRaises(TypeError): - binascii.b2a_base64(b, wrapcol=8.0) - with self.assertRaises(TypeError): - binascii.b2a_base64(b, wrapcol='8') b = self.type2test(b'') - self.assertEqual(binascii.b2a_base64(b), b'\n') - self.assertEqual(binascii.b2a_base64(b, wrapcol=0), b'\n') - self.assertEqual(binascii.b2a_base64(b, wrapcol=8), b'\n') self.assertEqual(binascii.b2a_base64(b, wrapcol=8, newline=False), b'') @hypothesis.given( diff --git a/Misc/NEWS.d/next/Library/2026-03-25-21-08-51.gh-issue-146431.zERPwe.rst b/Misc/NEWS.d/next/Library/2026-03-25-21-08-51.gh-issue-146431.zERPwe.rst new file mode 100644 index 00000000000000..6268a52926ffaa --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-25-21-08-51.gh-issue-146431.zERPwe.rst @@ -0,0 +1,11 @@ +Add the *wrapcol* parameter to :mod:`base64` functions +:func:`~base64.b16encode`, :func:`~base64.b32encode`, +:func:`~base64.b32hexencode`, :func:`~base64.b85encode` and +:func:`~base64.z85encode`, and :mod:`binascii` functions +:func:`~binascii.b2a_base32` and :func:`~binascii.b2a_base85`. Add the +*ignorechars* parameter to :mod:`base64` functions +:func:`~base64.b16decode`, :func:`~base64.b32decode`, +:func:`~base64.b32hexdecode`, :func:`~base64.b85decode` and +:func:`~base64.z85decode`, and :mod:`binascii` functions +:func:`~binascii.a2b_hex`, :func:`~binascii.unhexlify`, +:func:`~binascii.a2b_base32` and :func:`~binascii.a2b_base85`. diff --git a/Modules/binascii.c b/Modules/binascii.c index dbe77ff248d34e..6f29b2f98e922a 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -803,7 +803,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, if (quad_pos >= 2 && quad_pos + pads <= 4) { continue; } - // See RFC 4648, section-3.3: "specifications MAY ignore the + // See RFC 4648, section 3.3: "specifications MAY ignore the // pad character, "=", treating it as non-alphabet data, if // it is present before the end of the encoded data" and // "the excess pad characters MAY also be ignored." @@ -826,7 +826,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, unsigned char v = table_a2b[this_ch]; if (v >= 64) { - // See RFC 4648, section-3.3. + // See RFC 4648, section 3.3. if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache)) { state = get_binascii_state(module); if (state) { @@ -838,7 +838,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, } // Characters that are not '=', in the middle of the padding, are - // not allowed (except when they are). See RFC 4648, section-3.3. + // not allowed (except when they are). See RFC 4648, section 3.3. if (pads && strict_mode && !ignorechar(BASE64_PAD, ignorechars, ignorecache)) { @@ -1294,14 +1294,16 @@ binascii.a2b_base85 / * alphabet: PyBytesObject(c_default="NULL") = BASE85_ALPHABET + ignorechars: Py_buffer = b'' + A byte string containing characters to ignore from the input. Decode a line of Base85 data. [clinic start generated code]*/ static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet) -/*[clinic end generated code: output=3e114af53812e8ff input=0b6b83b38ad4497c]*/ + PyBytesObject *alphabet, Py_buffer *ignorechars) +/*[clinic end generated code: output=6a8d6eae798818d7 input=04d72a319712bdf3]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1318,6 +1320,14 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, table_a2b = (const unsigned char *)PyBytes_AS_STRING(table_obj); } + if (ignorechars->len == 0) { + ignorechars = NULL; + } + ignorecache_t ignorecache; + if (ignorechars != NULL) { + memset(ignorecache, 0, sizeof(ignorecache)); + } + assert(ascii_len >= 0); /* Allocate output buffer. */ @@ -1333,9 +1343,10 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int group_pos = 0; for (; ascii_len > 0 || group_pos != 0; ascii_len--, ascii_data++) { /* Shift (in radix-85) data or padding into our buffer. */ + unsigned char this_ch; unsigned char this_digit; if (ascii_len > 0) { - unsigned char this_ch = *ascii_data; + this_ch = *ascii_data; this_digit = table_a2b[this_ch]; } else { @@ -1358,7 +1369,7 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, leftchar = leftchar * 85 + this_digit; group_pos++; } - else { + else if (!ignorechar(this_ch, ignorechars, ignorecache)) { state = get_binascii_state(module); if (state != NULL) { PyErr_Format(state->Error, "bad Base85 character at position %d", @@ -1399,6 +1410,7 @@ binascii.b2a_base85 * pad: bool = False Pad input to a multiple of 4 before encoding. + wrapcol: size_t = 0 alphabet: Py_buffer(c_default="{NULL, NULL}") = BASE85_ALPHABET Base85-code line of data. @@ -1406,8 +1418,8 @@ Base85-code line of data. static PyObject * binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad, - Py_buffer *alphabet) -/*[clinic end generated code: output=a59f4f2ff6f0e69f input=30f545c6ff554db7]*/ + size_t wrapcol, Py_buffer *alphabet) +/*[clinic end generated code: output=98b962ed52c776a4 input=1b20b0bd6572691b]*/ { const unsigned char *bin_data = data->buf; Py_ssize_t bin_len = data->len; @@ -1428,6 +1440,11 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad, if (!pad && (bin_len % 4)) { out_len -= 4 - (bin_len % 4); } + if (wrapcol && out_len) { + /* Each line should encode a whole number of bytes. */ + wrapcol = wrapcol < 5 ? 5 : wrapcol / 5 * 5; + out_len += (out_len - 1u) / wrapcol; + } if (out_len > PY_SSIZE_T_MAX) { binascii_state *state = get_binascii_state(module); if (state == NULL) { @@ -1480,6 +1497,11 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad, ascii_data += group_len; } + if (wrapcol && out_len) { + unsigned char *start = PyBytesWriter_GetData(writer); + ascii_data = start + wraplines(start, ascii_data - start, wrapcol); + } + return PyBytesWriter_FinishWithPointer(writer, ascii_data); } @@ -1490,14 +1512,16 @@ binascii.a2b_base32 / * alphabet: PyBytesObject(c_default="NULL") = BASE32_ALPHABET + ignorechars: Py_buffer = b'' + A byte string containing characters to ignore from the input. Decode a line of base32 data. [clinic start generated code]*/ static PyObject * binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet) -/*[clinic end generated code: output=12cb58bf547237e2 input=426055ea49ac147e]*/ + PyBytesObject *alphabet, Py_buffer *ignorechars) +/*[clinic end generated code: output=2cf7c8c9e6e98b88 input=b0333508aad1b3ac]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1516,6 +1540,14 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, table_a2b = (const unsigned char *)PyBytes_AS_STRING(table_obj); } + if (ignorechars->len == 0) { + ignorechars = NULL; + } + ignorecache_t ignorecache; + if (ignorechars != NULL) { + memset(ignorecache, 0, sizeof(ignorecache)); + } + /* Allocate output buffer. */ size_t bin_len = ((size_t)ascii_len + 7) / 8 * 5; PyBytesWriter *writer = PyBytesWriter_Create(bin_len); @@ -1525,6 +1557,7 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, } unsigned char *bin_data = PyBytesWriter_GetData(writer); +fastpath: /* * Fast path: use optimized decoder for complete octas (groups of 8 bytes). * The fast path stops at padding, invalid chars, or incomplete octas. @@ -1555,6 +1588,13 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, { continue; } + // See RFC 4648, section 3.3: "specifications MAY ignore the + // pad character, "=", treating it as non-alphabet data, if + // it is present before the end of the encoded data" and + // "the excess pad characters MAY also be ignored." + if (ignorechar(BASE32_PAD, ignorechars, ignorecache)) { + continue; + } if (octa_pos == 1 || octa_pos == 3 || octa_pos == 6) { /* Set an error below. */ break; @@ -1571,15 +1611,20 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, unsigned char v = table_a2b[this_ch]; if (v >= 32) { - state = get_binascii_state(module); - if (state) { - PyErr_SetString(state->Error, "Only base32 data is allowed"); + // See RFC 4648, section 3.3. + if (!ignorechar(this_ch, ignorechars, ignorecache)) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Only base32 data is allowed"); + } + goto error; } - goto error; + continue; } - /* Data in the middle of/after the padding is not allowed. */ - if (pads) { + // Characters that are not '=', in the middle of the padding, are + // not allowed (except when they are). See RFC 4648, section 3.3. + if (pads && !ignorechar(BASE32_PAD, ignorechars, ignorecache)) { state = get_binascii_state(module); if (state) { PyErr_SetString(state->Error, (octa_pos + pads == 8) @@ -1626,6 +1671,9 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, octa_pos = 0; *bin_data++ = (leftchar << 5) | v; leftchar = 0; + ascii_data++; + ascii_len--; + goto fastpath; } } @@ -1642,9 +1690,7 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, goto error; } - if ((octa_pos != 0 && octa_pos + pads != 8) - || (octa_pos == 0 && pads != 0)) - { + if (octa_pos != 0 && octa_pos + pads < 8) { state = get_binascii_state(module); if (state) { PyErr_SetString(state->Error, "Incorrect padding"); @@ -1667,15 +1713,16 @@ binascii.b2a_base32 data: Py_buffer / * + wrapcol: size_t = 0 alphabet: Py_buffer(c_default="{NULL, NULL}") = BASE32_ALPHABET Base32-code line of data. [clinic start generated code]*/ static PyObject * -binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, +binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, size_t wrapcol, Py_buffer *alphabet) -/*[clinic end generated code: output=058d0d1aeb014d3b input=99cbe7194799d368]*/ +/*[clinic end generated code: output=d41fafbdaf29e280 input=a3d93b73836f2879]*/ { const unsigned char *table_b2a = table_b2a_base32; const unsigned char *bin_data = data->buf; @@ -1697,6 +1744,11 @@ binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, * Use unsigned integer arithmetic to avoid signed integer overflow. */ size_t ascii_len = ((size_t)bin_len + 4u) / 5u * 8u; + if (wrapcol && ascii_len) { + /* Each line should encode a whole number of bytes. */ + wrapcol = wrapcol < 8 ? 8 : wrapcol / 8 * 8; + ascii_len += (ascii_len - 1u) / wrapcol; + } if (ascii_len > PY_SSIZE_T_MAX) { state = get_binascii_state(module); if (state) { @@ -1772,6 +1824,11 @@ binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, *ascii_data++ = BASE32_PAD; } + if (wrapcol && ascii_len) { + unsigned char *start = PyBytesWriter_GetData(writer); + ascii_data = start + wraplines(start, ascii_data - start, wrapcol); + } + return PyBytesWriter_FinishWithPointer(writer, ascii_data); } @@ -2060,6 +2117,9 @@ binascii.a2b_hex hexstr: ascii_buffer / + * + ignorechars: Py_buffer = b'' + A byte string containing characters to ignore from the input. Binary data of hexadecimal representation. @@ -2068,53 +2128,68 @@ This function is also available as "unhexlify()". [clinic start generated code]*/ static PyObject * -binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) -/*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/ +binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr, + Py_buffer *ignorechars) +/*[clinic end generated code: output=021a7ed5a742cb20 input=6154b3f4e6e2c0c3]*/ { - const char* argbuf; - Py_ssize_t arglen; - Py_ssize_t i, j; - binascii_state *state; - - argbuf = hexstr->buf; - arglen = hexstr->len; - - assert(arglen >= 0); + const unsigned char *ascii_data = hexstr->buf; + size_t ascii_len = hexstr->len; + binascii_state *state = NULL; - /* XXX What should we do about strings with an odd length? Should - * we add an implicit leading zero, or a trailing zero? For now, - * raise an exception. - */ - if (arglen % 2) { - state = get_binascii_state(module); - if (state == NULL) { - return NULL; - } - PyErr_SetString(state->Error, "Odd-length string"); - return NULL; + if (ignorechars->len == 0) { + ignorechars = NULL; + } + ignorecache_t ignorecache; + if (ignorechars != NULL) { + memset(ignorecache, 0, sizeof(ignorecache)); } - PyBytesWriter *writer = PyBytesWriter_Create(arglen/2); + /* Allocate the buffer */ + Py_ssize_t bin_len = ascii_len/2; + PyBytesWriter *writer = PyBytesWriter_Create(bin_len); if (writer == NULL) { return NULL; } - char *retbuf = PyBytesWriter_GetData(writer); + unsigned char *bin_data = PyBytesWriter_GetData(writer); - for (i=j=0; i < arglen; i += 2) { - unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])]; - unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])]; - if (top >= 16 || bot >= 16) { - state = get_binascii_state(module); - if (state == NULL) { + int pair_pos = 0; + unsigned char leftchar = 0; + for (; ascii_len; ascii_data++, ascii_len--) { + unsigned char this_ch = *ascii_data; + + unsigned char this_digit = _PyLong_DigitValue[this_ch]; + if (this_digit >= 16) { + // See RFC 4648, section 3.3. + if (!ignorechar(this_ch, ignorechars, ignorecache)) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, + "Non-hexadecimal digit found"); + } goto error; } - PyErr_SetString(state->Error, - "Non-hexadecimal digit found"); - goto error; + continue; + } + + if (!pair_pos) { + pair_pos = 1; + leftchar = this_digit; + } + else { + pair_pos = 0; + *bin_data++ = (leftchar << 4) | this_digit; } - retbuf[j++] = (top << 4) + bot; } - return PyBytesWriter_Finish(writer); + + if (pair_pos) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Odd number of hexadecimal digits"); + } + goto error; + } + + return PyBytesWriter_FinishWithPointer(writer, bin_data); error: PyBytesWriter_Discard(writer); @@ -2130,10 +2205,11 @@ hexstr must contain an even number of hex digits (upper or lower case). [clinic start generated code]*/ static PyObject * -binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr) -/*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/ +binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr, + Py_buffer *ignorechars) +/*[clinic end generated code: output=40e87f8a0ded5880 input=dd8c012725f462da]*/ { - return binascii_a2b_hex_impl(module, hexstr); + return binascii_a2b_hex_impl(module, hexstr, ignorechars); } #define MAXLINESIZE 76 diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 7a411bfc829943..bbddd7121bf793 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -544,17 +544,21 @@ binascii_b2a_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, } PyDoc_STRVAR(binascii_a2b_base85__doc__, -"a2b_base85($module, data, /, *, alphabet=BASE85_ALPHABET)\n" +"a2b_base85($module, data, /, *, alphabet=BASE85_ALPHABET,\n" +" ignorechars=b\'\')\n" "--\n" "\n" -"Decode a line of Base85 data."); +"Decode a line of Base85 data.\n" +"\n" +" ignorechars\n" +" A byte string containing characters to ignore from the input."); #define BINASCII_A2B_BASE85_METHODDEF \ {"a2b_base85", _PyCFunction_CAST(binascii_a2b_base85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base85__doc__}, static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet); + PyBytesObject *alphabet, Py_buffer *ignorechars); static PyObject * binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -562,7 +566,7 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 1 + #define NUM_KEYWORDS 2 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -571,7 +575,7 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(alphabet), }, + .ob_item = { &_Py_ID(alphabet), &_Py_ID(ignorechars), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -580,17 +584,18 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "alphabet", NULL}; + static const char * const _keywords[] = {"", "alphabet", "ignorechars", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base85", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; PyBytesObject *alphabet = NULL; + Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -603,24 +608,37 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P if (!noptargs) { goto skip_optional_kwonly; } - if (!PyBytes_Check(args[1])) { - _PyArg_BadArgument("a2b_base85", "argument 'alphabet'", "bytes", args[1]); + if (args[1]) { + if (!PyBytes_Check(args[1])) { + _PyArg_BadArgument("a2b_base85", "argument 'alphabet'", "bytes", args[1]); + goto exit; + } + alphabet = (PyBytesObject *)args[1]; + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) { goto exit; } - alphabet = (PyBytesObject *)args[1]; skip_optional_kwonly: - return_value = binascii_a2b_base85_impl(module, &data, alphabet); + return_value = binascii_a2b_base85_impl(module, &data, alphabet, &ignorechars); exit: /* Cleanup for data */ if (data.obj) PyBuffer_Release(&data); + /* Cleanup for ignorechars */ + if (ignorechars.obj) { + PyBuffer_Release(&ignorechars); + } return return_value; } PyDoc_STRVAR(binascii_b2a_base85__doc__, -"b2a_base85($module, data, /, *, pad=False, alphabet=BASE85_ALPHABET)\n" +"b2a_base85($module, data, /, *, pad=False, wrapcol=0,\n" +" alphabet=BASE85_ALPHABET)\n" "--\n" "\n" "Base85-code line of data.\n" @@ -633,7 +651,7 @@ PyDoc_STRVAR(binascii_b2a_base85__doc__, static PyObject * binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad, - Py_buffer *alphabet); + size_t wrapcol, Py_buffer *alphabet); static PyObject * binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -641,7 +659,7 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -650,7 +668,7 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(pad), &_Py_ID(alphabet), }, + .ob_item = { &_Py_ID(pad), &_Py_ID(wrapcol), &_Py_ID(alphabet), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -659,17 +677,18 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "pad", "alphabet", NULL}; + static const char * const _keywords[] = {"", "pad", "wrapcol", "alphabet", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "b2a_base85", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int pad = 0; + size_t wrapcol = 0; Py_buffer alphabet = {NULL, NULL}; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, @@ -692,11 +711,19 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[2], &alphabet, PyBUF_SIMPLE) != 0) { + if (args[2]) { + if (!_PyLong_Size_t_Converter(args[2], &wrapcol)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (PyObject_GetBuffer(args[3], &alphabet, PyBUF_SIMPLE) != 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_b2a_base85_impl(module, &data, pad, &alphabet); + return_value = binascii_b2a_base85_impl(module, &data, pad, wrapcol, &alphabet); exit: /* Cleanup for data */ @@ -712,17 +739,21 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } PyDoc_STRVAR(binascii_a2b_base32__doc__, -"a2b_base32($module, data, /, *, alphabet=BASE32_ALPHABET)\n" +"a2b_base32($module, data, /, *, alphabet=BASE32_ALPHABET,\n" +" ignorechars=b\'\')\n" "--\n" "\n" -"Decode a line of base32 data."); +"Decode a line of base32 data.\n" +"\n" +" ignorechars\n" +" A byte string containing characters to ignore from the input."); #define BINASCII_A2B_BASE32_METHODDEF \ {"a2b_base32", _PyCFunction_CAST(binascii_a2b_base32), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base32__doc__}, static PyObject * binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet); + PyBytesObject *alphabet, Py_buffer *ignorechars); static PyObject * binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -730,7 +761,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 1 + #define NUM_KEYWORDS 2 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -739,7 +770,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(alphabet), }, + .ob_item = { &_Py_ID(alphabet), &_Py_ID(ignorechars), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -748,17 +779,18 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "alphabet", NULL}; + static const char * const _keywords[] = {"", "alphabet", "ignorechars", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base32", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; PyBytesObject *alphabet = NULL; + Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -771,24 +803,36 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P if (!noptargs) { goto skip_optional_kwonly; } - if (!PyBytes_Check(args[1])) { - _PyArg_BadArgument("a2b_base32", "argument 'alphabet'", "bytes", args[1]); + if (args[1]) { + if (!PyBytes_Check(args[1])) { + _PyArg_BadArgument("a2b_base32", "argument 'alphabet'", "bytes", args[1]); + goto exit; + } + alphabet = (PyBytesObject *)args[1]; + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) { goto exit; } - alphabet = (PyBytesObject *)args[1]; skip_optional_kwonly: - return_value = binascii_a2b_base32_impl(module, &data, alphabet); + return_value = binascii_a2b_base32_impl(module, &data, alphabet, &ignorechars); exit: /* Cleanup for data */ if (data.obj) PyBuffer_Release(&data); + /* Cleanup for ignorechars */ + if (ignorechars.obj) { + PyBuffer_Release(&ignorechars); + } return return_value; } PyDoc_STRVAR(binascii_b2a_base32__doc__, -"b2a_base32($module, data, /, *, alphabet=BASE32_ALPHABET)\n" +"b2a_base32($module, data, /, *, wrapcol=0, alphabet=BASE32_ALPHABET)\n" "--\n" "\n" "Base32-code line of data."); @@ -797,7 +841,7 @@ PyDoc_STRVAR(binascii_b2a_base32__doc__, {"b2a_base32", _PyCFunction_CAST(binascii_b2a_base32), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base32__doc__}, static PyObject * -binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, +binascii_b2a_base32_impl(PyObject *module, Py_buffer *data, size_t wrapcol, Py_buffer *alphabet); static PyObject * @@ -806,7 +850,7 @@ binascii_b2a_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 1 + #define NUM_KEYWORDS 2 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -815,7 +859,7 @@ binascii_b2a_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(alphabet), }, + .ob_item = { &_Py_ID(wrapcol), &_Py_ID(alphabet), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -824,16 +868,17 @@ binascii_b2a_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "alphabet", NULL}; + static const char * const _keywords[] = {"", "wrapcol", "alphabet", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "b2a_base32", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; + size_t wrapcol = 0; Py_buffer alphabet = {NULL, NULL}; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, @@ -847,11 +892,19 @@ binascii_b2a_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P if (!noptargs) { goto skip_optional_kwonly; } - if (PyObject_GetBuffer(args[1], &alphabet, PyBUF_SIMPLE) != 0) { + if (args[1]) { + if (!_PyLong_Size_t_Converter(args[1], &wrapcol)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (PyObject_GetBuffer(args[2], &alphabet, PyBUF_SIMPLE) != 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_b2a_base32_impl(module, &data, &alphabet); + return_value = binascii_b2a_base32_impl(module, &data, wrapcol, &alphabet); exit: /* Cleanup for data */ @@ -1172,68 +1225,168 @@ binascii_hexlify(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb } PyDoc_STRVAR(binascii_a2b_hex__doc__, -"a2b_hex($module, hexstr, /)\n" +"a2b_hex($module, hexstr, /, *, ignorechars=b\'\')\n" "--\n" "\n" "Binary data of hexadecimal representation.\n" "\n" +" ignorechars\n" +" A byte string containing characters to ignore from the input.\n" +"\n" "hexstr must contain an even number of hex digits (upper or lower case).\n" "This function is also available as \"unhexlify()\"."); #define BINASCII_A2B_HEX_METHODDEF \ - {"a2b_hex", (PyCFunction)binascii_a2b_hex, METH_O, binascii_a2b_hex__doc__}, + {"a2b_hex", _PyCFunction_CAST(binascii_a2b_hex), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_hex__doc__}, static PyObject * -binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr); +binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr, + Py_buffer *ignorechars); static PyObject * -binascii_a2b_hex(PyObject *module, PyObject *arg) +binascii_a2b_hex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(ignorechars), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "ignorechars", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "a2b_hex", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer hexstr = {NULL, NULL}; + Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; - if (!ascii_buffer_converter(arg, &hexstr)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!ascii_buffer_converter(args[0], &hexstr)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (PyObject_GetBuffer(args[1], &ignorechars, PyBUF_SIMPLE) != 0) { goto exit; } - return_value = binascii_a2b_hex_impl(module, &hexstr); +skip_optional_kwonly: + return_value = binascii_a2b_hex_impl(module, &hexstr, &ignorechars); exit: /* Cleanup for hexstr */ if (hexstr.obj) PyBuffer_Release(&hexstr); + /* Cleanup for ignorechars */ + if (ignorechars.obj) { + PyBuffer_Release(&ignorechars); + } return return_value; } PyDoc_STRVAR(binascii_unhexlify__doc__, -"unhexlify($module, hexstr, /)\n" +"unhexlify($module, hexstr, /, *, ignorechars=b\'\')\n" "--\n" "\n" "Binary data of hexadecimal representation.\n" "\n" +" ignorechars\n" +" A byte string containing characters to ignore from the input.\n" +"\n" "hexstr must contain an even number of hex digits (upper or lower case)."); #define BINASCII_UNHEXLIFY_METHODDEF \ - {"unhexlify", (PyCFunction)binascii_unhexlify, METH_O, binascii_unhexlify__doc__}, + {"unhexlify", _PyCFunction_CAST(binascii_unhexlify), METH_FASTCALL|METH_KEYWORDS, binascii_unhexlify__doc__}, static PyObject * -binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr); +binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr, + Py_buffer *ignorechars); static PyObject * -binascii_unhexlify(PyObject *module, PyObject *arg) +binascii_unhexlify(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(ignorechars), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "ignorechars", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "unhexlify", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer hexstr = {NULL, NULL}; + Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; - if (!ascii_buffer_converter(arg, &hexstr)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { goto exit; } - return_value = binascii_unhexlify_impl(module, &hexstr); + if (!ascii_buffer_converter(args[0], &hexstr)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (PyObject_GetBuffer(args[1], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_unhexlify_impl(module, &hexstr, &ignorechars); exit: /* Cleanup for hexstr */ if (hexstr.obj) PyBuffer_Release(&hexstr); + /* Cleanup for ignorechars */ + if (ignorechars.obj) { + PyBuffer_Release(&ignorechars); + } return return_value; } @@ -1411,4 +1564,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=242c0c56b918bd33 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7afd570a9d5a3627 input=a9049054013a1b77]*/