From 7f6ec16554c1e32f750b4fabd157b3f1eb39e48e Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Thu, 21 Dec 2023 21:02:20 +0100 Subject: [PATCH 1/3] gh-109989: Fix test_c_locale_coercion when PYTHONIOENCODING is set This fixes the existing tests when PYTHONIOENCODING is set by unsetting PYTHONIOENCODING. Also add a test that explicitly checks what happens when PYTHONIOENCODING is set. --- Lib/test/test_c_locale_coercion.py | 48 ++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 71f934756e26a1..193667e281e5f9 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -114,10 +114,14 @@ class EncodingDetails(_EncodingDetails): @classmethod def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars): """Returns expected child process details for a given encoding""" - _stream = stream_encoding + ":{}" - # stdin and stdout should use surrogateescape either because the - # coercion triggered, or because the C locale was detected - stream_info = 2*[_stream.format("surrogateescape")] + _stream = stream_encoding.split(":")[0] + ":{}" + if ":" in stream_encoding: + stream_info = 2*[stream_encoding] + else: + # stdin and stdout should use surrogateescape either because the + # coercion triggered, or because the C locale was detected + stream_info = 2*[_stream.format("surrogateescape")] + # stderr should always use backslashreplace stream_info.append(_stream.format("backslashreplace")) expected_lang = env_vars.get("LANG", "not set") @@ -257,6 +261,7 @@ def test_external_target_locale_configuration(self): "LC_CTYPE": "", "LC_ALL": "", "PYTHONCOERCECLOCALE": "", + "PYTHONIOENCODING": "", } for env_var in ("LANG", "LC_CTYPE"): for locale_to_set in AVAILABLE_TARGETS: @@ -276,7 +281,38 @@ def test_external_target_locale_configuration(self): expected_warnings=None, coercion_expected=False) + def test_with_ioencoding(self): + # Explicitly setting a target locale should give the same behaviour as + # is seen when implicitly coercing to that target locale + self.maxDiff = None + + expected_fs_encoding = "utf-8" + expected_stream_encoding = "utf-8:strict" + + base_var_dict = { + "LANG": "", + "LC_CTYPE": "", + "LC_ALL": "", + "PYTHONCOERCECLOCALE": "", + "PYTHONIOENCODING": "UTF-8", + } + for env_var in ("LANG", "LC_CTYPE"): + for locale_to_set in AVAILABLE_TARGETS: + # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as + # expected, so skip that combination for now + # See https://bugs.python.org/issue30672 for discussion + if env_var == "LANG" and locale_to_set == "UTF-8": + continue + with self.subTest(env_var=env_var, + configured_locale=locale_to_set): + var_dict = base_var_dict.copy() + var_dict[env_var] = locale_to_set + self._check_child_encoding_details(var_dict, + expected_fs_encoding, + expected_stream_encoding, + expected_warnings=None, + coercion_expected=False) @support.cpython_only @unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"), @@ -316,6 +352,7 @@ def _check_c_locale_coercion(self, "LC_CTYPE": "", "LC_ALL": "", "PYTHONCOERCECLOCALE": "", + "PYTHONIOENCODING": "", } base_var_dict.update(extra_vars) if coerce_c_locale is not None: @@ -348,7 +385,8 @@ def _check_c_locale_coercion(self, for env_var in ("LANG", "LC_CTYPE"): with self.subTest(env_var=env_var, nominal_locale=locale_to_set, - PYTHONCOERCECLOCALE=coerce_c_locale): + PYTHONCOERCECLOCALE=coerce_c_locale, + PYTHONIOENCODING=""): var_dict = base_var_dict.copy() var_dict[env_var] = locale_to_set # Check behaviour on successful coercion From f3f3da7ed56628dae47822d160eed77f634e3839 Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Thu, 21 Dec 2023 22:05:53 +0100 Subject: [PATCH 2/3] Slightly cleaner test code --- Lib/test/test_c_locale_coercion.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 193667e281e5f9..1f86125f9723e4 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -112,15 +112,15 @@ class EncodingDetails(_EncodingDetails): ]) @classmethod - def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars): + def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, stream_errors, env_vars): """Returns expected child process details for a given encoding""" - _stream = stream_encoding.split(":")[0] + ":{}" - if ":" in stream_encoding: - stream_info = 2*[stream_encoding] - else: + _stream = stream_encoding + ":{}" + if stream_errors is None: # stdin and stdout should use surrogateescape either because the # coercion triggered, or because the C locale was detected - stream_info = 2*[_stream.format("surrogateescape")] + stream_errors = "surrogateescape" + + stream_info = 2*[_stream.format(stream_errors)] # stderr should always use backslashreplace stream_info.append(_stream.format("backslashreplace")) @@ -214,6 +214,7 @@ def _check_child_encoding_details(self, env_vars, expected_fs_encoding, expected_stream_encoding, + expected_stream_errors, expected_warnings, coercion_expected): """Check the C locale handling for the given process environment @@ -229,6 +230,7 @@ def _check_child_encoding_details(self, coercion_expected, expected_fs_encoding, expected_stream_encoding, + expected_stream_errors, env_vars ) self.assertEqual(encoding_details, expected_details) @@ -278,6 +280,7 @@ def test_external_target_locale_configuration(self): self._check_child_encoding_details(var_dict, expected_fs_encoding, expected_stream_encoding, + expected_stream_errors=None, expected_warnings=None, coercion_expected=False) @@ -287,7 +290,7 @@ def test_with_ioencoding(self): self.maxDiff = None expected_fs_encoding = "utf-8" - expected_stream_encoding = "utf-8:strict" + expected_stream_encoding = "utf-8" base_var_dict = { "LANG": "", @@ -311,6 +314,7 @@ def test_with_ioencoding(self): self._check_child_encoding_details(var_dict, expected_fs_encoding, expected_stream_encoding, + expected_stream_errors="strict", expected_warnings=None, coercion_expected=False) @@ -377,6 +381,7 @@ def _check_c_locale_coercion(self, self._check_child_encoding_details(base_var_dict, fs_encoding, stream_encoding, + None, _expected_warnings, _coercion_expected) @@ -393,6 +398,7 @@ def _check_c_locale_coercion(self, self._check_child_encoding_details(var_dict, fs_encoding, stream_encoding, + None, expected_warnings, coercion_expected) From 316c39bec92a14441d0de1a2411731687978aa75 Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Fri, 22 Dec 2023 10:06:49 +0100 Subject: [PATCH 3/3] Update Lib/test/test_c_locale_coercion.py Co-authored-by: Nikita Sobolev --- Lib/test/test_c_locale_coercion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 1f86125f9723e4..7334a325ba22f0 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -120,7 +120,7 @@ def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, s # coercion triggered, or because the C locale was detected stream_errors = "surrogateescape" - stream_info = 2*[_stream.format(stream_errors)] + stream_info = [_stream.format(stream_errors)] * 2 # stderr should always use backslashreplace stream_info.append(_stream.format("backslashreplace"))