From 92873d68937cb68077e01957611969fd26f0bad4 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Mon, 14 Jul 2025 09:34:06 +0100
Subject: [PATCH 1/7] C

---
 Lib/encodings/__init__.py                     | 33 +++++++++-------
 Lib/test/test_codecs.py                       |  6 ++-
 ...5-07-14-09-33-17.gh-issue-55531.Gt2e12.rst |  4 ++
 Modules/_codecsmodule.c                       | 39 +++++++++++++++++++
 Modules/clinic/_codecsmodule.c.h              | 31 ++++++++++++++-
 Objects/unicodeobject.c                       | 15 +++----
 Python/codecs.c                               |  7 ++--
 Python/fileutils.c                            |  4 +-
 8 files changed, 110 insertions(+), 29 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2025-07-14-09-33-17.gh-issue-55531.Gt2e12.rst

diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index 298177eb8003a7..31ab4147668f26 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -26,7 +26,7 @@
 
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 
-"""#"
+"""
 
 import codecs
 import sys
@@ -37,10 +37,23 @@
 _import_tail = ['*']
 _aliases = aliases.aliases
 
+
+_norm_encoding_map = (
+    #0123456789ABCDEF0123456789ABCDEF
+    '                                '
+    '              . 0123456789      '
+    ' ABCDEFGHIJKLMNOPQRSTUVWXYZ     '
+    ' abcdefghijklmnopqrstuvwxyz     '
+    '                                '
+    '                                '
+    '                                '
+    '                                ')
+
+
 class CodecRegistryError(LookupError, SystemError):
     pass
 
-def normalize_encoding(encoding):
+def normalize_encoding(encoding, /):
 
     """ Normalize an encoding name.
 
@@ -55,18 +68,10 @@ def normalize_encoding(encoding):
     if isinstance(encoding, bytes):
         encoding = str(encoding, "ascii")
 
-    chars = []
-    punct = False
-    for c in encoding:
-        if c.isalnum() or c == '.':
-            if punct and chars:
-                chars.append('_')
-            if c.isascii():
-                chars.append(c)
-            punct = False
-        else:
-            punct = True
-    return ''.join(chars)
+    s = encoding.translate(_norm_encoding_map)
+    return '_'.join(s.split())
+
+from _codecs import _normalize_encoding as normalize_encoding
 
 def search_function(encoding):
 
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index d8666f7290e72e..99ea833b60bce6 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3895,11 +3895,13 @@ def search_function(encoding):
         self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
 
     def test_encodings_normalize_encoding(self):
-        # encodings.normalize_encoding() ignores non-ASCII characters.
         normalize = encodings.normalize_encoding
         self.assertEqual(normalize('utf_8'), 'utf_8')
-        self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
         self.assertEqual(normalize('utf   8'), 'utf_8')
+
+        # encodings.normalize_encoding() does not accept non-ASCII characters.
+        self.assertRaises(UnicodeEncodeError, normalize, 'utf\xE9\u20AC\U0010ffff-8')
+
         # encodings.normalize_encoding() doesn't convert
         # characters to lower case.
         self.assertEqual(normalize('UTF 8'), 'UTF_8')
diff --git a/Misc/NEWS.d/next/Library/2025-07-14-09-33-17.gh-issue-55531.Gt2e12.rst b/Misc/NEWS.d/next/Library/2025-07-14-09-33-17.gh-issue-55531.Gt2e12.rst
new file mode 100644
index 00000000000000..70e39a4f2c167c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-07-14-09-33-17.gh-issue-55531.Gt2e12.rst
@@ -0,0 +1,4 @@
+:mod:`encodings`: Improve :func:`~encodings.normalize_encoding` performance
+by implementing the function in C using the private
+``_Py_normalize_encoding`` which has been modified to make lowercase
+conversion optional.
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 7cf3f152eeecc6..c8ce3738693bc0 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -1022,6 +1022,44 @@ _codecs_lookup_error_impl(PyObject *module, const char *name)
     return PyCodec_LookupError(name);
 }
 
+extern int _Py_normalize_encoding(const char *, char *, size_t, int);
+
+/*[clinic input]
+_codecs._normalize_encoding
+    encoding: str(encoding='ascii')
+    /
+
+Normalize an encoding name, while not converting to lower case (to_lower == 1).
+Used for encodings.normalize_encoding.
+[clinic start generated code]*/
+
+static PyObject *
+_codecs__normalize_encoding_impl(PyObject *module, char *encoding)
+/*[clinic end generated code: output=d5e3a4b5266fbe96 input=ca002bbc262228f1]*/
+{
+    size_t len = strlen(encoding);
+    if (len > PY_SSIZE_T_MAX) {
+        PyErr_SetString(PyExc_OverflowError, "encoding is too large");
+        return NULL;
+    }
+
+    char *normalized = PyMem_Malloc(len + 1);
+    if (normalized == NULL) {
+        return PyErr_NoMemory();
+    }
+
+    if (!_Py_normalize_encoding(encoding, normalized, len + 1, 0)) {
+        PyErr_SetString(PyExc_RuntimeError, "_Py_normalize_encoding() failed");
+        PyMem_Free(normalized);
+        return NULL;
+    }
+
+    PyObject *v = PyUnicode_FromString(normalized);
+    PyMem_Free(normalized);
+    return v;
+}
+
+
 /* --- Module API --------------------------------------------------------- */
 
 static PyMethodDef _codecs_functions[] = {
@@ -1071,6 +1109,7 @@ static PyMethodDef _codecs_functions[] = {
     _CODECS_REGISTER_ERROR_METHODDEF
     _CODECS__UNREGISTER_ERROR_METHODDEF
     _CODECS_LOOKUP_ERROR_METHODDEF
+    _CODECS__NORMALIZE_ENCODING_METHODDEF
     {NULL, NULL}                /* sentinel */
 };
 
diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h
index b0310325759326..eb4f481f8118de 100644
--- a/Modules/clinic/_codecsmodule.c.h
+++ b/Modules/clinic/_codecsmodule.c.h
@@ -2779,6 +2779,35 @@ _codecs_lookup_error(PyObject *module, PyObject *arg)
     return return_value;
 }
 
+PyDoc_STRVAR(_codecs__normalize_encoding__doc__,
+"_normalize_encoding($module, encoding, /)\n"
+"--\n"
+"\n"
+"Normalize an encoding name. Used for encodings.normalize_encoding.");
+
+#define _CODECS__NORMALIZE_ENCODING_METHODDEF    \
+    {"_normalize_encoding", (PyCFunction)_codecs__normalize_encoding, METH_O, _codecs__normalize_encoding__doc__},
+
+static PyObject *
+_codecs__normalize_encoding_impl(PyObject *module, char *encoding);
+
+static PyObject *
+_codecs__normalize_encoding(PyObject *module, PyObject *arg)
+{
+    PyObject *return_value = NULL;
+    char *encoding = NULL;
+
+    if (!PyArg_Parse(arg, "es:_normalize_encoding", "ascii", &encoding)) {
+        goto exit;
+    }
+    return_value = _codecs__normalize_encoding_impl(module, encoding);
+    /* Post parse cleanup for encoding */
+    PyMem_FREE(encoding);
+
+exit:
+    return return_value;
+}
+
 #ifndef _CODECS_MBCS_DECODE_METHODDEF
     #define _CODECS_MBCS_DECODE_METHODDEF
 #endif /* !defined(_CODECS_MBCS_DECODE_METHODDEF) */
@@ -2802,4 +2831,4 @@ _codecs_lookup_error(PyObject *module, PyObject *arg)
 #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
     #define _CODECS_CODE_PAGE_ENCODE_METHODDEF
 #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=ed13f20dfb09e306 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=aa3636e281f5268f input=a9049054013a1b77]*/
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5c2308a012142a..64d8cf4397237c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3587,13 +3587,14 @@ PyUnicode_FromEncodedObject(PyObject *obj,
     return v;
 }
 
-/* Normalize an encoding name: similar to encodings.normalize_encoding(), but
-   also convert to lowercase. Return 1 on success, or 0 on error (encoding is
-   longer than lower_len-1). */
+/* Normalize an encoding name like encodings.normalize_encoding()
+   Optionally covert convert to lowercase by setting *to_lower* to 1.
+   Return 1 on success, or 0 on error (encoding is longer than lower_len-1). */
 int
 _Py_normalize_encoding(const char *encoding,
                        char *lower,
-                       size_t lower_len)
+                       size_t lower_len,
+                       int to_lower)
 {
     const char *e;
     char *l;
@@ -3624,7 +3625,7 @@ _Py_normalize_encoding(const char *encoding,
             if (l == l_end) {
                 return 0;
             }
-            *l++ = Py_TOLOWER(c);
+            *l++ = to_lower ? Py_TOLOWER(c) : c;
         }
         else {
             punct = 1;
@@ -3659,7 +3660,7 @@ PyUnicode_Decode(const char *s,
     }
 
     /* Shortcuts for common default encodings */
-    if (_Py_normalize_encoding(encoding, buflower, sizeof(buflower))) {
+    if (_Py_normalize_encoding(encoding, buflower, sizeof(buflower), 1)) {
         char *lower = buflower;
 
         /* Fast paths */
@@ -3916,7 +3917,7 @@ PyUnicode_AsEncodedString(PyObject *unicode,
     }
 
     /* Shortcuts for common default encodings */
-    if (_Py_normalize_encoding(encoding, buflower, sizeof(buflower))) {
+    if (_Py_normalize_encoding(encoding, buflower, sizeof(buflower), 1)) {
         char *lower = buflower;
 
         /* Fast paths */
diff --git a/Python/codecs.c b/Python/codecs.c
index caf8d9d5f3c188..ffcb14928e0a82 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -90,7 +90,7 @@ PyCodec_Unregister(PyObject *search_function)
     return 0;
 }
 
-extern int _Py_normalize_encoding(const char *, char *, size_t);
+extern int _Py_normalize_encoding(const char *, char *, size_t, int);
 
 /* Convert a string to a normalized Python string(decoded from UTF-8): all characters are
    converted to lower case, spaces and hyphens are replaced with underscores. */
@@ -108,10 +108,11 @@ PyObject *normalizestring(const char *string)
     }
 
     encoding = PyMem_Malloc(len + 1);
-    if (encoding == NULL)
+    if (encoding == NULL) {
         return PyErr_NoMemory();
+    }
 
-    if (!_Py_normalize_encoding(string, encoding, len + 1))
+    if (!_Py_normalize_encoding(string, encoding, len + 1, 1))
     {
         PyErr_SetString(PyExc_RuntimeError, "_Py_normalize_encoding() failed");
         PyMem_Free(encoding);
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 2a3f12d4e872f8..aedf8576c7a930 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -180,7 +180,7 @@ _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
 
 #define USE_FORCE_ASCII
 
-extern int _Py_normalize_encoding(const char *, char *, size_t);
+extern int _Py_normalize_encoding(const char *, char *, size_t, int);
 
 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
    and POSIX locale. nl_langinfo(CODESET) announces an alias of the
@@ -231,7 +231,7 @@ check_force_ascii(void)
     }
 
     char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
-    if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
+    if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding), 1)) {
         goto error;
     }
 

From 4bae23a7353f8ec04631dd647a5c51d56baf86c6 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Mon, 14 Jul 2025 09:43:10 +0100
Subject: [PATCH 2/7] Correct clinic note

---
 Modules/_codecsmodule.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index c8ce3738693bc0..d9441ee9b2a74e 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -1029,8 +1029,10 @@ _codecs._normalize_encoding
     encoding: str(encoding='ascii')
     /
 
-Normalize an encoding name, while not converting to lower case (to_lower == 1).
+Normalize an encoding name *encoding*.
+
 Used for encodings.normalize_encoding.
+Does not convert to lower case (to_lower == 1).
 [clinic start generated code]*/
 
 static PyObject *

From b5f3df3a44dff71ea9795e12e510fd0de9eafdcb Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Mon, 14 Jul 2025 09:54:58 +0100
Subject: [PATCH 3/7] Little fixes

---
 Lib/encodings/__init__.py        |  2 +-
 Modules/_codecsmodule.c          |  6 ++---
 Modules/clinic/_codecsmodule.c.h | 42 +++++++++++++++++++++++++++-----
 Objects/unicodeobject.c          |  2 +-
 4 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index 31ab4147668f26..ef15189b984f40 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -53,7 +53,7 @@
 class CodecRegistryError(LookupError, SystemError):
     pass
 
-def normalize_encoding(encoding, /):
+def normalize_encoding(encoding):
 
     """ Normalize an encoding name.
 
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index d9441ee9b2a74e..36e58015e84286 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -1027,17 +1027,15 @@ extern int _Py_normalize_encoding(const char *, char *, size_t, int);
 /*[clinic input]
 _codecs._normalize_encoding
     encoding: str(encoding='ascii')
-    /
 
 Normalize an encoding name *encoding*.
 
-Used for encodings.normalize_encoding.
-Does not convert to lower case (to_lower == 1).
+Used for encodings.normalize_encoding. Does not convert to lower case.
 [clinic start generated code]*/
 
 static PyObject *
 _codecs__normalize_encoding_impl(PyObject *module, char *encoding)
-/*[clinic end generated code: output=d5e3a4b5266fbe96 input=ca002bbc262228f1]*/
+/*[clinic end generated code: output=d5e3a4b5266fbe96 input=cdb53c013b2400e3]*/
 {
     size_t len = strlen(encoding);
     if (len > PY_SSIZE_T_MAX) {
diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h
index eb4f481f8118de..540c980216dcb6 100644
--- a/Modules/clinic/_codecsmodule.c.h
+++ b/Modules/clinic/_codecsmodule.c.h
@@ -2780,24 +2780,54 @@ _codecs_lookup_error(PyObject *module, PyObject *arg)
 }
 
 PyDoc_STRVAR(_codecs__normalize_encoding__doc__,
-"_normalize_encoding($module, encoding, /)\n"
+"_normalize_encoding($module, /, encoding)\n"
 "--\n"
 "\n"
-"Normalize an encoding name. Used for encodings.normalize_encoding.");
+"Normalize an encoding name *encoding*.\n"
+"\n"
+"Used for encodings.normalize_encoding. Does not convert to lower case.");
 
 #define _CODECS__NORMALIZE_ENCODING_METHODDEF    \
-    {"_normalize_encoding", (PyCFunction)_codecs__normalize_encoding, METH_O, _codecs__normalize_encoding__doc__},
+    {"_normalize_encoding", _PyCFunction_CAST(_codecs__normalize_encoding), METH_FASTCALL|METH_KEYWORDS, _codecs__normalize_encoding__doc__},
 
 static PyObject *
 _codecs__normalize_encoding_impl(PyObject *module, char *encoding);
 
 static PyObject *
-_codecs__normalize_encoding(PyObject *module, PyObject *arg)
+_codecs__normalize_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 1
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        Py_hash_t ob_hash;
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_hash = -1,
+        .ob_item = { &_Py_ID(encoding), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"encoding", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .format = "es:_normalize_encoding",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
     char *encoding = NULL;
 
-    if (!PyArg_Parse(arg, "es:_normalize_encoding", "ascii", &encoding)) {
+    if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
+        "ascii", &encoding)) {
         goto exit;
     }
     return_value = _codecs__normalize_encoding_impl(module, encoding);
@@ -2831,4 +2861,4 @@ _codecs__normalize_encoding(PyObject *module, PyObject *arg)
 #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
     #define _CODECS_CODE_PAGE_ENCODE_METHODDEF
 #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=aa3636e281f5268f input=a9049054013a1b77]*/
+/*[clinic end generated code: output=0859b218fa612efd input=a9049054013a1b77]*/
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 64d8cf4397237c..ba66e273a208be 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3588,7 +3588,7 @@ PyUnicode_FromEncodedObject(PyObject *obj,
 }
 
 /* Normalize an encoding name like encodings.normalize_encoding()
-   Optionally covert convert to lowercase by setting *to_lower* to 1.
+   but allow to convert to lowercase if *to_lower* is true.
    Return 1 on success, or 0 on error (encoding is longer than lower_len-1). */
 int
 _Py_normalize_encoding(const char *encoding,

From 2ad72b20dfd463f2f17bea6d4c9284eff6cd39f6 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Mon, 14 Jul 2025 10:17:22 +0100
Subject: [PATCH 4/7] Keep the messiness

---
 Lib/encodings/__init__.py        | 18 ++----------------
 Modules/_codecsmodule.c          | 15 ++++++++++-----
 Modules/clinic/_codecsmodule.c.h | 21 +++++++++++++--------
 3 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index ef15189b984f40..523b43e2f69cde 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -29,6 +29,7 @@
 """
 
 import codecs
+from _codecs import _normalize_encoding
 import sys
 from . import aliases
 
@@ -38,18 +39,6 @@
 _aliases = aliases.aliases
 
 
-_norm_encoding_map = (
-    #0123456789ABCDEF0123456789ABCDEF
-    '                                '
-    '              . 0123456789      '
-    ' ABCDEFGHIJKLMNOPQRSTUVWXYZ     '
-    ' abcdefghijklmnopqrstuvwxyz     '
-    '                                '
-    '                                '
-    '                                '
-    '                                ')
-
-
 class CodecRegistryError(LookupError, SystemError):
     pass
 
@@ -68,10 +57,7 @@ def normalize_encoding(encoding):
     if isinstance(encoding, bytes):
         encoding = str(encoding, "ascii")
 
-    s = encoding.translate(_norm_encoding_map)
-    return '_'.join(s.split())
-
-from _codecs import _normalize_encoding as normalize_encoding
+    return _normalize_encoding(encoding)
 
 def search_function(encoding):
 
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 36e58015e84286..f2ba4eb79650ab 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -1026,7 +1026,7 @@ extern int _Py_normalize_encoding(const char *, char *, size_t, int);
 
 /*[clinic input]
 _codecs._normalize_encoding
-    encoding: str(encoding='ascii')
+    encoding: unicode
 
 Normalize an encoding name *encoding*.
 
@@ -1034,10 +1034,15 @@ Used for encodings.normalize_encoding. Does not convert to lower case.
 [clinic start generated code]*/
 
 static PyObject *
-_codecs__normalize_encoding_impl(PyObject *module, char *encoding)
-/*[clinic end generated code: output=d5e3a4b5266fbe96 input=cdb53c013b2400e3]*/
+_codecs__normalize_encoding_impl(PyObject *module, PyObject *encoding)
+/*[clinic end generated code: output=d27465d81e361f8e input=3ff3f4d64995b988]*/
 {
-    size_t len = strlen(encoding);
+    const char *cstr = PyUnicode_AsUTF8(encoding);
+    if (cstr == NULL) {
+        return NULL;
+    }
+
+    size_t len = strlen(cstr);
     if (len > PY_SSIZE_T_MAX) {
         PyErr_SetString(PyExc_OverflowError, "encoding is too large");
         return NULL;
@@ -1048,7 +1053,7 @@ _codecs__normalize_encoding_impl(PyObject *module, char *encoding)
         return PyErr_NoMemory();
     }
 
-    if (!_Py_normalize_encoding(encoding, normalized, len + 1, 0)) {
+    if (!_Py_normalize_encoding(cstr, normalized, len + 1, 0)) {
         PyErr_SetString(PyExc_RuntimeError, "_Py_normalize_encoding() failed");
         PyMem_Free(normalized);
         return NULL;
diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h
index 540c980216dcb6..9e2a7950ebde64 100644
--- a/Modules/clinic/_codecsmodule.c.h
+++ b/Modules/clinic/_codecsmodule.c.h
@@ -2791,7 +2791,7 @@ PyDoc_STRVAR(_codecs__normalize_encoding__doc__,
     {"_normalize_encoding", _PyCFunction_CAST(_codecs__normalize_encoding), METH_FASTCALL|METH_KEYWORDS, _codecs__normalize_encoding__doc__},
 
 static PyObject *
-_codecs__normalize_encoding_impl(PyObject *module, char *encoding);
+_codecs__normalize_encoding_impl(PyObject *module, PyObject *encoding);
 
 static PyObject *
 _codecs__normalize_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
@@ -2820,19 +2820,24 @@ _codecs__normalize_encoding(PyObject *module, PyObject *const *args, Py_ssize_t
     static const char * const _keywords[] = {"encoding", NULL};
     static _PyArg_Parser _parser = {
         .keywords = _keywords,
-        .format = "es:_normalize_encoding",
+        .fname = "_normalize_encoding",
         .kwtuple = KWTUPLE,
     };
     #undef KWTUPLE
-    char *encoding = NULL;
+    PyObject *argsbuf[1];
+    PyObject *encoding;
 
-    if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
-        "ascii", &encoding)) {
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+            /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    if (!PyUnicode_Check(args[0])) {
+        _PyArg_BadArgument("_normalize_encoding", "argument 'encoding'", "str", args[0]);
         goto exit;
     }
+    encoding = args[0];
     return_value = _codecs__normalize_encoding_impl(module, encoding);
-    /* Post parse cleanup for encoding */
-    PyMem_FREE(encoding);
 
 exit:
     return return_value;
@@ -2861,4 +2866,4 @@ _codecs__normalize_encoding(PyObject *module, PyObject *const *args, Py_ssize_t
 #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
     #define _CODECS_CODE_PAGE_ENCODE_METHODDEF
 #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=0859b218fa612efd input=a9049054013a1b77]*/
+/*[clinic end generated code: output=a968c493bb28be3e input=a9049054013a1b77]*/

From 3660160929bd17ad3afae8a00ad805dc1ff93ef6 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Mon, 14 Jul 2025 10:29:49 +0100
Subject: [PATCH 5/7] Clean up tests

---
 Lib/test/test_codecs.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 99ea833b60bce6..348b450d1118d2 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3895,13 +3895,12 @@ def search_function(encoding):
         self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
 
     def test_encodings_normalize_encoding(self):
+        # encodings.normalize_encoding() ignores non-ASCII characters.
         normalize = encodings.normalize_encoding
         self.assertEqual(normalize('utf_8'), 'utf_8')
+        self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
         self.assertEqual(normalize('utf   8'), 'utf_8')
 
-        # encodings.normalize_encoding() does not accept non-ASCII characters.
-        self.assertRaises(UnicodeEncodeError, normalize, 'utf\xE9\u20AC\U0010ffff-8')
-
         # encodings.normalize_encoding() doesn't convert
         # characters to lower case.
         self.assertEqual(normalize('UTF 8'), 'UTF_8')

From 4e12b9ec888d1d33c9d956e10318847e3a50b58b Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Mon, 14 Jul 2025 13:53:48 +0100
Subject: [PATCH 6/7] Remove unnecessary message

---
 Modules/_codecsmodule.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index f2ba4eb79650ab..1d3534ab98fc47 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -1054,7 +1054,6 @@ _codecs__normalize_encoding_impl(PyObject *module, PyObject *encoding)
     }
 
     if (!_Py_normalize_encoding(cstr, normalized, len + 1, 0)) {
-        PyErr_SetString(PyExc_RuntimeError, "_Py_normalize_encoding() failed");
         PyMem_Free(normalized);
         return NULL;
     }
@@ -1064,7 +1063,6 @@ _codecs__normalize_encoding_impl(PyObject *module, PyObject *encoding)
     return v;
 }
 
-
 /* --- Module API --------------------------------------------------------- */
 
 static PyMethodDef _codecs_functions[] = {

From 1c9e55ab8ffafd2bb0e68c688fadab90399cfc16 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Tue, 15 Jul 2025 18:02:12 +0100
Subject: [PATCH 7/7] Review

---
 Lib/encodings/__init__.py |  5 ++---
 Lib/test/test_codecs.py   |  1 -
 Modules/_codecsmodule.c   | 19 +++++++++++++++----
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index 523b43e2f69cde..e7e4ca3358e0f9 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -26,11 +26,11 @@
 
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 
-"""
+"""#"
 
 import codecs
-from _codecs import _normalize_encoding
 import sys
+from _codecs import _normalize_encoding
 from . import aliases
 
 _cache = {}
@@ -38,7 +38,6 @@
 _import_tail = ['*']
 _aliases = aliases.aliases
 
-
 class CodecRegistryError(LookupError, SystemError):
     pass
 
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 348b450d1118d2..d8666f7290e72e 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3900,7 +3900,6 @@ def test_encodings_normalize_encoding(self):
         self.assertEqual(normalize('utf_8'), 'utf_8')
         self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
         self.assertEqual(normalize('utf   8'), 'utf_8')
-
         # encodings.normalize_encoding() doesn't convert
         # characters to lower case.
         self.assertEqual(normalize('UTF 8'), 'UTF_8')
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 1d3534ab98fc47..853d461ef15950 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -1037,30 +1037,41 @@ static PyObject *
 _codecs__normalize_encoding_impl(PyObject *module, PyObject *encoding)
 /*[clinic end generated code: output=d27465d81e361f8e input=3ff3f4d64995b988]*/
 {
-    const char *cstr = PyUnicode_AsUTF8(encoding);
+    Py_ssize_t len;
+    const char *cstr = PyUnicode_AsUTF8AndSize(encoding, &len);
     if (cstr == NULL) {
         return NULL;
     }
 
-    size_t len = strlen(cstr);
     if (len > PY_SSIZE_T_MAX) {
         PyErr_SetString(PyExc_OverflowError, "encoding is too large");
         return NULL;
     }
 
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(len + 1);
+    if (writer == NULL) {
+        return NULL;
+    }
+
     char *normalized = PyMem_Malloc(len + 1);
     if (normalized == NULL) {
+        PyUnicodeWriter_Discard(writer);
         return PyErr_NoMemory();
     }
 
     if (!_Py_normalize_encoding(cstr, normalized, len + 1, 0)) {
         PyMem_Free(normalized);
+        PyUnicodeWriter_Discard(writer);
         return NULL;
     }
 
-    PyObject *v = PyUnicode_FromString(normalized);
+    if (PyUnicodeWriter_WriteUTF8(writer, normalized, (Py_ssize_t)strlen(normalized)) < 0) {
+        PyUnicodeWriter_Discard(writer);
+        PyMem_Free(normalized);
+        return NULL;
+    }
     PyMem_Free(normalized);
-    return v;
+    return PyUnicodeWriter_Finish(writer);
 }
 
 /* --- Module API --------------------------------------------------------- */