Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Doc/deprecations/pending-removal-in-3.17.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ Pending removal in Python 3.17
(Contributed by Shantanu Jain in :gh:`91896`.)


* :mod:`encodings`:

- Passing non-ascii *encoding* names to :func:`encodings.normalize_encoding`
is deprecated and scheduled for removal in Python 3.17.
(Contributed by Stan Ulbrych in :gh:`136702`)

* :mod:`typing`:

- Before Python 3.14, old-style unions were implemented using the private class
Expand Down
1 change: 1 addition & 0 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ def params(self):
value = urllib.parse.unquote(value, encoding='latin-1')
else:
try:
charset = utils._sanitize_charset_name(charset, 'us-ascii')
value = value.decode(charset, 'surrogateescape')
except (LookupError, UnicodeEncodeError):
# XXX: there should really be a custom defect for
Expand Down
10 changes: 10 additions & 0 deletions Lib/email/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,15 @@ def decode_params(params):
new_params.append((name, '"%s"' % value))
return new_params

def _sanitize_charset_name(charset, fallback_charset):
if not charset:
return charset
sanitized = ''.join(
c for c in charset
if (ord(c) < 0xDC80 or ord(c) > 0xDCFF) and c.isascii()
)
return sanitized if sanitized else fallback_charset

def collapse_rfc2231_value(value, errors='replace',
fallback_charset='us-ascii'):
if not isinstance(value, tuple) or len(value) != 3:
Expand All @@ -458,6 +467,7 @@ def collapse_rfc2231_value(value, errors='replace',
# Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse
# the value, so use the fallback_charset.
charset = fallback_charset
charset = _sanitize_charset_name(charset, fallback_charset)
rawbytes = bytes(text, 'raw-unicode-escape')
try:
return str(rawbytes, charset, errors)
Expand Down
8 changes: 7 additions & 1 deletion Lib/encodings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.

"""#"
"""

import codecs
import sys
Expand Down Expand Up @@ -55,6 +55,12 @@ def normalize_encoding(encoding):
if isinstance(encoding, bytes):
encoding = str(encoding, "ascii")

if not encoding.isascii():
import warnings
warnings.warn(
"Support for non-ascii encoding names will be removed in 3.17",
DeprecationWarning, stacklevel=2)

chars = []
punct = False
for c in encoding:
Expand Down
10 changes: 7 additions & 3 deletions Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3886,22 +3886,26 @@ def search_function(encoding):
self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa-8', 2, 3, 4))
self.assertEqual(codecs.lookup('TEST.AAA---8'), ('test.aaa---8', 2, 3, 4))
self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa---8', 2, 3, 4))
self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))
self.assertEqual(codecs.lookup('TEST.AAA.8'), ('test.aaa.8', 2, 3, 4))
self.assertEqual(codecs.lookup('TEST.AAA...8'), ('test.aaa...8', 2, 3, 4))
with self.assertWarns(DeprecationWarning):
self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))

def test_encodings_normalize_encoding(self):
# encodings.normalize_encoding() ignores non-ASCII characters.
normalize = encodings.normalize_encoding
self.assertEqual(normalize('utf_8'), 'utf_8')
self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
self.assertEqual(normalize('utf 8'), 'utf_8')
# encodings.normalize_encoding() doesn't convert
# characters to lower case.
self.assertEqual(normalize('UTF 8'), 'UTF_8')
self.assertEqual(normalize('utf.8'), 'utf.8')
self.assertEqual(normalize('utf...8'), 'utf...8')

# Non-ASCII *encoding* is deprecated.
with self.assertWarnsRegex(DeprecationWarning,
"Support for non-ascii encoding names will be removed in 3.17"):
self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:mod:`encodings`: Deprecate passing a non-ascii *encoding* name to
:func:`encodings.normalize_encoding` and schedule removal of support for
Python 3.17.
Loading