From 1e315c01167205f3f2f6b1aed94f818b5a094a94 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 17 Sep 2018 23:58:09 +0200 Subject: [PATCH 1/3] bpo-34589: Add -X coerce_c_locale command line option Add a new -X coerce_c_locale command line option to control C locale coercion (PEP 538). --- Doc/using/cmdline.rst | 18 ++- Doc/whatsnew/3.7.rst | 7 ++ Lib/test/test_c_locale_coercion.py | 55 +++++++-- Lib/test/test_cmd_line.py | 7 +- Lib/test/test_embed.py | 4 +- Lib/test/test_sys.py | 8 +- Lib/test/test_utf8_mode.py | 3 +- .../2018-09-18-01-41-33.bpo-34589.lLVTYc.rst | 2 + Python/coreconfig.c | 115 ++++++++++++------ 9 files changed, 163 insertions(+), 56 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index b61df8a4b77dff..51eba2f3d16886 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -438,13 +438,22 @@ Miscellaneous options * Set the :attr:`~sys.flags.dev_mode` attribute of :attr:`sys.flags` to ``True`` - * ``-X utf8`` enables UTF-8 mode for operating system interfaces, overriding + * ``-X utf8`` enables UTF-8 mode (:pep:`540`) for operating system interfaces, overriding the default locale-aware mode. ``-X utf8=0`` explicitly disables UTF-8 mode (even when it would otherwise activate automatically). See :envvar:`PYTHONUTF8` for more details. * ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel tree rooted at the given directory instead of to the code tree. See also :envvar:`PYTHONPYCACHEPREFIX`. + * ``-X coerce_c_locale`` or ``-X coerce_c_locale=1`` tries to coerce the C + locale (:pep:`538`). + ``-X coerce_c_locale=0`` skips coercing the legacy ASCII-based C and POSIX + locales to a more capable UTF-8 based alternative. + ``-X coerce_c_locale=warn`` will cause Python to emit warning messages on + ``stderr`` if either the locale coercion activates, or else if a locale + that *would* have triggered coercion is still active when the Python + runtime is initialized. + See :envvar:`PYTHONCOERCECLOCALE` for more details. It also allows passing arbitrary values and retrieving them through the :data:`sys._xoptions` dictionary. @@ -464,6 +473,9 @@ Miscellaneous options .. versionadded:: 3.7 The ``-X importtime``, ``-X dev`` and ``-X utf8`` options. + .. versionadded:: 3.7.1 + The ``-X coerce_c_locale`` option. + .. versionadded:: 3.8 The ``-X pycache_prefix`` option. @@ -810,7 +822,7 @@ conflict. to skip coercing the legacy ASCII-based C and POSIX locales to a more capable UTF-8 based alternative. - If this variable is *not* set (or is set to a value other than ``0``), the + If this variable is *not* set (or is set to a value other than ``0``), he ``LC_ALL`` locale override environment variable is also not set, and the current locale reported for the ``LC_CTYPE`` category is either the default ``C`` locale, or else the explicitly ASCII-based ``POSIX`` locale, then the @@ -850,6 +862,8 @@ conflict. order to force the interpreter to use ``ASCII`` instead of ``UTF-8`` for system interfaces. + Also available as the :option:`-X` ``coerce_c_locale`` option. + Availability: \*nix .. versionadded:: 3.7 diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index f53a0268738ad2..6cd9d46a42b0ab 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -2494,3 +2494,10 @@ versions, it respected an ill-defined subset of those environment variables, while in Python 3.7.0 it didn't read any of them due to :issue:`34247`). If this behavior is unwanted, set :c:data:`Py_IgnoreEnvironmentFlag` to 1 before calling :c:func:`Py_Initialize`. + +:c:func:`Py_Initialize` and :c:func:`Py_Main` cannot enable the C locale +coercion (:pep:`538`) anymore: it is always disabled. It can now only be +enabled by the Python program ("python3). + +New :option:`-X` ``coerce_c_locale`` command line option to control C locale +coercion (:pep:`538`). diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 1db293b9c37359..f62208ab2006f3 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -139,7 +139,7 @@ def _handle_output_variations(data): return data @classmethod - def get_child_details(cls, env_vars): + def get_child_details(cls, env_vars, xoption=None): """Retrieves fsencoding and standard stream details from a child process Returns (encoding_details, stderr_lines): @@ -150,10 +150,11 @@ def get_child_details(cls, env_vars): The child is run in isolated mode if the current interpreter supports that. """ - result, py_cmd = run_python_until_end( - "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT, - **env_vars - ) + args = [] + if xoption: + args.extend(("-X", f"coerce_c_locale={xoption}")) + args.extend(("-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT)) + result, py_cmd = run_python_until_end(*args, **env_vars) if not result.rc == 0: result.fail(py_cmd) # All subprocess outputs in this test case should be pure ASCII @@ -212,7 +213,8 @@ def _check_child_encoding_details(self, expected_fs_encoding, expected_stream_encoding, expected_warnings, - coercion_expected): + coercion_expected, + xoption=None): """Check the C locale handling for the given process environment Parameters: @@ -220,7 +222,7 @@ def _check_child_encoding_details(self, expected_stream_encoding: expected encoding for standard streams expected_warning: stderr output to expect (if any) """ - result = EncodingDetails.get_child_details(env_vars) + result = EncodingDetails.get_child_details(env_vars, xoption) encoding_details, stderr_lines = result expected_details = EncodingDetails.get_expected_details( coercion_expected, @@ -290,6 +292,7 @@ def _check_c_locale_coercion(self, coerce_c_locale, expected_warnings=None, coercion_expected=True, + use_xoption=False, **extra_vars): """Check the C locale handling for various configurations @@ -319,8 +322,12 @@ def _check_c_locale_coercion(self, "PYTHONCOERCECLOCALE": "", } base_var_dict.update(extra_vars) + xoption = None if coerce_c_locale is not None: - base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale + if use_xoption: + xoption = coerce_c_locale + else: + base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale # Check behaviour for the default locale with self.subTest(default_locale=True, @@ -342,7 +349,8 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, _expected_warnings, - _coercion_expected) + _coercion_expected, + xoption=xoption) # Check behaviour for explicitly configured locales for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS: @@ -357,7 +365,8 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, expected_warnings, - coercion_expected) + coercion_expected, + xoption=xoption) def test_PYTHONCOERCECLOCALE_not_set(self): # This should coerce to the first available target locale by default @@ -404,6 +413,32 @@ def test_LC_ALL_set_to_C(self): expected_warnings=[LEGACY_LOCALE_WARNING], coercion_expected=False) + def test_xoption_set_to_1(self): + self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale="1", + use_xoption=True) + + def test_xoption_set_to_zero(self): + # The setting "0" should result in the locale coercion being disabled + self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, + EXPECTED_C_LOCALE_STREAM_ENCODING, + coerce_c_locale="0", + coercion_expected=False, + use_xoption=True) + # Setting LC_ALL=C shouldn't make any difference to the behaviour + self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, + EXPECTED_C_LOCALE_STREAM_ENCODING, + coerce_c_locale="0", + LC_ALL="C", + coercion_expected=False, + use_xoption=True) + + def test_xoption_set_to_warn(self): + # -X coerce_c_locale=warn enables runtime warnings for legacy locales + self._check_c_locale_coercion("utf-8", "utf-8", + coerce_c_locale="warn", + expected_warnings=[CLI_COERCION_WARNING], + use_xoption=True) + def test_main(): test.support.run_unittest( LocaleConfigurationTests, diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 21511b896cad17..7e967b20ab88c0 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -159,13 +159,16 @@ def test_undecodable_code(self): env = os.environ.copy() # Use C locale to get ascii for the locale encoding env['LC_ALL'] = 'C' - env['PYTHONCOERCECLOCALE'] = '0' code = ( b'import locale; ' b'print(ascii("' + undecodable + b'"), ' b'locale.getpreferredencoding())') p = subprocess.Popen( - [sys.executable, "-c", code], + [sys.executable, + # Disable C locale coercion and UTF-8 Mode to not use UTF-8 + "-X", "coerce_c_locale=0", + "-X", "utf8=0", + "-c", code], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env) stdout, stderr = p.communicate() diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index e531fd49d5a36b..6654122c5fb9e7 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -340,9 +340,7 @@ def check_config(self, testname, expected): for key in list(env): if key.startswith('PYTHON'): del env[key] - # Disable C locale coercion and UTF-8 mode to not depend - # on the current locale - env['PYTHONCOERCECLOCALE'] = '0' + # Disable UTF-8 mode to not depend on the current locale env['PYTHONUTF8'] = '0' if expected['stdio_encoding'] is None or expected['stdio_errors'] is None: diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index b90366d814452b..a7f29282713032 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -656,9 +656,8 @@ def test_getfilesystemencoding(self): def c_locale_get_error_handler(self, locale, isolated=False, encoding=None): # Force the POSIX locale - env = os.environ.copy() + env = dict(os.environ) env["LC_ALL"] = locale - env["PYTHONCOERCECLOCALE"] = "0" code = '\n'.join(( 'import sys', 'def dump(name):', @@ -668,7 +667,10 @@ def c_locale_get_error_handler(self, locale, isolated=False, encoding=None): 'dump("stdout")', 'dump("stderr")', )) - args = [sys.executable, "-X", "utf8=0", "-c", code] + args = [sys.executable, + "-X", "utf8=0", + "-X", "coerce_c_locale=0", + "-c", code] if isolated: args.append("-I") if encoding is not None: diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 7280ce77ef8279..0902e6b88194d2 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -19,7 +19,6 @@ class UTF8ModeTests(unittest.TestCase): DEFAULT_ENV = { 'PYTHONUTF8': '', 'PYTHONLEGACYWINDOWSFSENCODING': '', - 'PYTHONCOERCECLOCALE': '0', } def posix_locale(self): @@ -27,6 +26,8 @@ def posix_locale(self): return (loc in POSIX_LOCALES) def get_output(self, *args, failure=False, **kw): + # Never enable the C locale coercion (PEP 538) + #args = ('-X', 'coerce_c_locale=0', *args) kw = dict(self.DEFAULT_ENV, **kw) if failure: out = assert_python_failure(*args, **kw) diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst b/Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst new file mode 100644 index 00000000000000..618092d192c472 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst @@ -0,0 +1,2 @@ +Add a new :option:`-X` ``coerce_c_locale`` command line option to control C +locale coercion (:pep:`538`). diff --git a/Python/coreconfig.c b/Python/coreconfig.c index 131a043ff28066..28624ec35d6d27 100644 --- a/Python/coreconfig.c +++ b/Python/coreconfig.c @@ -705,6 +705,18 @@ config_init_utf8_mode(_PyCoreConfig *config) return _Py_INIT_OK(); } +#ifndef MS_WINDOWS + /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ + const char *ctype_loc = setlocale(LC_CTYPE, NULL); + if (ctype_loc != NULL + && (strcmp(ctype_loc, "C") == 0 + || strcmp(ctype_loc, "POSIX") == 0)) + { + config->utf8_mode = 1; + return _Py_INIT_OK(); + } +#endif + return _Py_INIT_OK(); } @@ -808,25 +820,6 @@ config_read_env_vars(_PyCoreConfig *config) config->malloc_stats = 1; } - const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE"); - if (env) { - if (strcmp(env, "0") == 0) { - if (config->_coerce_c_locale < 0) { - config->_coerce_c_locale = 0; - } - } - else if (strcmp(env, "warn") == 0) { - if (config->_coerce_c_locale_warn < 0) { - config->_coerce_c_locale_warn = 1; - } - } - else { - if (config->_coerce_c_locale < 0) { - config->_coerce_c_locale = 1; - } - } - } - wchar_t *path; int res = _PyCoreConfig_GetEnvDup(config, &path, L"PYTHONPATH", "PYTHONPATH"); @@ -966,28 +959,76 @@ config_read_complex_options(_PyCoreConfig *config) } -static void -config_init_locale(_PyCoreConfig *config) +static _PyInitError +config_init_coerce_c_locale(_PyCoreConfig *config) { + const wchar_t *xopt = config_get_xoption(config, L"coerce_c_locale"); + if (xopt) { + wchar_t *sep = wcschr(xopt, L'='); + if (sep) { + xopt = sep + 1; + if (wcscmp(xopt, L"1") == 0) { + if (config->_coerce_c_locale < 0) { + config->_coerce_c_locale = 1; + } + } + else if (wcscmp(xopt, L"0") == 0) { + if (config->_coerce_c_locale < 0) { + config->_coerce_c_locale = 0; + } + } + else if (wcscmp(xopt, L"warn") == 0) { + if (config->_coerce_c_locale_warn < 0) { + config->_coerce_c_locale_warn = 1; + } + } + else { + return _Py_INIT_USER_ERR("invalid -X coerce_c_locale option value"); + } + } + else { + if (config->_coerce_c_locale < 0) { + config->_coerce_c_locale = 1; + } + } + + if (config->_coerce_c_locale_warn < 0) { + config->_coerce_c_locale_warn = 0; + } + } + + const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE"); + if (env) { + if (strcmp(env, "0") == 0) { + if (config->_coerce_c_locale < 0) { + config->_coerce_c_locale = 0; + } + } + else if (strcmp(env, "warn") == 0) { + if (config->_coerce_c_locale_warn < 0) { + config->_coerce_c_locale_warn = 1; + } + } + else { + if (config->_coerce_c_locale < 0) { + config->_coerce_c_locale = 1; + } + } + + if (config->_coerce_c_locale_warn < 0) { + config->_coerce_c_locale_warn = 0; + } + } + if (config->_coerce_c_locale < 0) { /* The C locale enables the C locale coercion (PEP 538) */ if (_Py_LegacyLocaleDetected()) { config->_coerce_c_locale = 1; + return _Py_INIT_OK(); } } -#ifndef MS_WINDOWS - if (config->utf8_mode < 0) { - /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ - const char *ctype_loc = setlocale(LC_CTYPE, NULL); - if (ctype_loc != NULL - && (strcmp(ctype_loc, "C") == 0 - || strcmp(ctype_loc, "POSIX") == 0)) - { - config->utf8_mode = 1; - } - } -#endif + return _Py_INIT_OK(); } @@ -1293,8 +1334,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config) } } - if (config->utf8_mode < 0 || config->_coerce_c_locale < 0) { - config_init_locale(config); + if (config->_coerce_c_locale < 0 || config->_coerce_c_locale_warn < 0) { + err = config_init_coerce_c_locale(config); + if (_Py_INIT_FAILED(err)) { + return err; + } } if (config->_install_importlib) { @@ -1349,6 +1393,7 @@ _PyCoreConfig_Read(_PyCoreConfig *config) } assert(config->_coerce_c_locale >= 0); + assert(config->_coerce_c_locale_warn >= 0); assert(config->use_environment >= 0); assert(config->filesystem_encoding != NULL); assert(config->filesystem_errors != NULL); From 0eb615cb83dc8207b6dc5c8b888e281c06fbbf5d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 18 Sep 2018 01:49:43 +0200 Subject: [PATCH 2/3] fix typo --- Doc/using/cmdline.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 51eba2f3d16886..cd3b2410c84d48 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -822,7 +822,7 @@ conflict. to skip coercing the legacy ASCII-based C and POSIX locales to a more capable UTF-8 based alternative. - If this variable is *not* set (or is set to a value other than ``0``), he + If this variable is *not* set (or is set to a value other than ``0``), the ``LC_ALL`` locale override environment variable is also not set, and the current locale reported for the ``LC_CTYPE`` category is either the default ``C`` locale, or else the explicitly ASCII-based ``POSIX`` locale, then the From 82b9f5196d4fe5458aa1eff29ecf7ad35278c396 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 18 Sep 2018 01:55:35 +0200 Subject: [PATCH 3/3] Fix tests --- Lib/test/test_embed.py | 4 +++- Lib/test/test_utf8_mode.py | 6 +++--- Python/coreconfig.c | 3 +-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 6654122c5fb9e7..e531fd49d5a36b 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -340,7 +340,9 @@ def check_config(self, testname, expected): for key in list(env): if key.startswith('PYTHON'): del env[key] - # Disable UTF-8 mode to not depend on the current locale + # Disable C locale coercion and UTF-8 mode to not depend + # on the current locale + env['PYTHONCOERCECLOCALE'] = '0' env['PYTHONUTF8'] = '0' if expected['stdio_encoding'] is None or expected['stdio_errors'] is None: diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 0902e6b88194d2..c3cbb49060e7e4 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -19,6 +19,7 @@ class UTF8ModeTests(unittest.TestCase): DEFAULT_ENV = { 'PYTHONUTF8': '', 'PYTHONLEGACYWINDOWSFSENCODING': '', + 'PYTHONCOERCECLOCALE': '0', } def posix_locale(self): @@ -26,8 +27,8 @@ def posix_locale(self): return (loc in POSIX_LOCALES) def get_output(self, *args, failure=False, **kw): - # Never enable the C locale coercion (PEP 538) - #args = ('-X', 'coerce_c_locale=0', *args) + # Always disable the C locale coercion (PEP 538) + args = ('-X', 'coerce_c_locale=0', *args) kw = dict(self.DEFAULT_ENV, **kw) if failure: out = assert_python_failure(*args, **kw) @@ -117,7 +118,6 @@ def test_filesystemencoding(self): # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode # and has the priority over -X utf8 and PYTHONUTF8 out = self.get_output('-X', 'utf8', '-c', code, - PYTHONUTF8='strict', PYTHONLEGACYWINDOWSFSENCODING='1') self.assertEqual(out, 'mbcs/replace') diff --git a/Python/coreconfig.c b/Python/coreconfig.c index 28624ec35d6d27..b2459dca57b0a2 100644 --- a/Python/coreconfig.c +++ b/Python/coreconfig.c @@ -709,8 +709,7 @@ config_init_utf8_mode(_PyCoreConfig *config) /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ const char *ctype_loc = setlocale(LC_CTYPE, NULL); if (ctype_loc != NULL - && (strcmp(ctype_loc, "C") == 0 - || strcmp(ctype_loc, "POSIX") == 0)) + && (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0)) { config->utf8_mode = 1; return _Py_INIT_OK();