From 2413bdf027c77db66ed21c8177bb24cefda410e2 Mon Sep 17 00:00:00 2001 From: "R. David Murray" Date: Sat, 6 Sep 2025 15:01:11 -0400 Subject: [PATCH] gh-90548: Allow Alpine/MUSL to pass test_c_locale_coercion. (GH-134454) Like cygwin, MUSL defaults to utf-8 if no variables are set. I have no idea if the existing tests pass on cygwin, but I made the modifications such that I shouldn't break it if is. The additional checks needed for MUSL are guarded by DEFAULT_LOCALE_IS_C being False. Based on this flag, we expect utf-8 for the encodings and no coercion message, as long as LC_ALL is not set to C. (That looks like a bit of an issue with the test structure, but I'm not going to attempt to "fix" that.) DEFAULT_ENCODING is intentionally not given a default since it is only used when DEFAULT_LOCALE_IS_C is False, and if you use the flag you'll need to set it. After reading through issue 30672, looking at the source, and running a test on Android, I *think* the current situation is that coercion will be done if the local is set to POSIX regardless of platform. However, if the platform doesn't make POSIX equivalent to C, the encodings when coercion is disabled will not be the same as for C (it is utf-8 on android, for example). This means the tests would fail if POSIX were added unconditionally to the EXPECTED_C_LOCALE_EQUIVALENTS as envisioned in the issue. This *could* be fixed with another flag, but I'm not sure it is worth the effort. I'm not even sure Python is behaving optimally in this case (assuming my analysis is correct). So I just altered the comment and add POSIX if and only if the platform is linux. (cherry picked from commit bcdac03670eff192e670aef696cce47e69061be8) Co-authored-by: R. David Murray --- Lib/test/test_c_locale_coercion.py | 33 +++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index e4b0b8c451fd45..10f8ba2255228b 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -12,7 +12,9 @@ from test.support.script_helper import run_python_until_end -# Set the list of ways we expect to be able to ask for the "C" locale +# Set the list of ways we expect to be able to ask for the "C" locale. +# 'invalid.ascii' is an invalid LOCALE name and so should get turned in to the +# default locale, which is traditionally C. EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "invalid.ascii"] # Set our expectation for the default encoding used in the C locale @@ -21,6 +23,7 @@ EXPECTED_C_LOCALE_FS_ENCODING = "ascii" # Set our expectation for the default locale used when none is specified +DEFAULT_LOCALE_IS_C = True EXPECT_COERCION_IN_DEFAULT_LOCALE = True TARGET_LOCALES = ["C.UTF-8", "C.utf8", "UTF-8"] @@ -30,12 +33,12 @@ # Android defaults to using UTF-8 for all system interfaces EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8" EXPECTED_C_LOCALE_FS_ENCODING = "utf-8" -elif sys.platform.startswith("linux"): - # Linux distros typically alias the POSIX locale directly to the C - # locale. - # TODO: Once https://bugs.python.org/issue30672 is addressed, we'll be - # able to check this case unconditionally - EXPECTED_C_LOCALE_EQUIVALENTS.append("POSIX") +elif support.linked_to_musl(): + # MUSL defaults to utf-8 unless the C locale is set explicitly. + EXPECTED_C_LOCALE_EQUIVALENTS = ["C"] + DEFAULT_LOCALE_IS_C = False + DEFAULT_ENCODING = 'utf-8' + EXPECT_COERCION_IN_DEFAULT_LOCALE = False elif sys.platform.startswith("aix"): # AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII EXPECTED_C_LOCALE_STREAM_ENCODING = "iso8859-1" @@ -52,6 +55,11 @@ # VxWorks defaults to using UTF-8 for all system interfaces EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8" EXPECTED_C_LOCALE_FS_ENCODING = "utf-8" +if sys.platform.startswith("linux"): + # Linux recognizes POSIX as a synonym for C. Python will always coerce + # if the locale is set to POSIX, but not all platforms will use the + # C locale encodings if POSIX is set, so we'll only test it on linux. + EXPECTED_C_LOCALE_EQUIVALENTS.append("POSIX") # Note that the above expectations are still wrong in some cases, such as: # * Windows when PYTHONLEGACYWINDOWSFSENCODING is set @@ -362,9 +370,14 @@ def _check_c_locale_coercion(self, base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale # Check behaviour for the default locale + _fs_encoding = fs_encoding + _stream_encoding = stream_encoding + if not DEFAULT_LOCALE_IS_C and 'LC_ALL' not in extra_vars: + _fs_encoding = _stream_encoding = DEFAULT_ENCODING with self.subTest(default_locale=True, PYTHONCOERCECLOCALE=coerce_c_locale): - if EXPECT_COERCION_IN_DEFAULT_LOCALE: + if (EXPECT_COERCION_IN_DEFAULT_LOCALE + or (not DEFAULT_LOCALE_IS_C and 'LC_ALL' in extra_vars)): _expected_warnings = expected_warnings _coercion_expected = coercion_expected else: @@ -378,8 +391,8 @@ def _check_c_locale_coercion(self, _expected_warnings == [CLI_COERCION_WARNING]): _expected_warnings = None self._check_child_encoding_details(base_var_dict, - fs_encoding, - stream_encoding, + _fs_encoding, + _stream_encoding, None, _expected_warnings, _coercion_expected)