Skip to content

Commit 0c8fecc

Browse files
gh-137729: Fix support for locales with @-modifiers (GH-137253)
1 parent bc28724 commit 0c8fecc

File tree

6 files changed

+164
-16
lines changed

6 files changed

+164
-16
lines changed

Doc/library/locale.rst

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ The :mod:`locale` module defines the following exception and functions:
4242
If *locale* is a pair, it is converted to a locale name using
4343
the locale aliasing engine.
4444
The language code has the same format as a :ref:`locale name <locale_name>`,
45-
but without encoding and ``@``-modifier.
45+
but without encoding.
4646
The language code and encoding can be ``None``.
4747

4848
If *locale* is omitted or ``None``, the current setting for *category* is
@@ -58,6 +58,9 @@ The :mod:`locale` module defines the following exception and functions:
5858
specified in the :envvar:`LANG` environment variable). If the locale is not
5959
changed thereafter, using multithreading should not cause problems.
6060

61+
.. versionchanged:: next
62+
Support language codes with ``@``-modifiers.
63+
6164

6265
.. function:: localeconv()
6366

@@ -366,11 +369,15 @@ The :mod:`locale` module defines the following exception and functions:
366369
values except :const:`LC_ALL`. It defaults to :const:`LC_CTYPE`.
367370

368371
The language code has the same format as a :ref:`locale name <locale_name>`,
369-
but without encoding and ``@``-modifier.
372+
but without encoding.
370373
The language code and encoding may be ``None`` if their values cannot be
371374
determined.
372375
The "C" locale is represented as ``(None, None)``.
373376

377+
.. versionchanged:: next
378+
``@``-modifier are no longer silently removed, but included in
379+
the language code.
380+
374381

375382
.. function:: getpreferredencoding(do_setlocale=True)
376383

Doc/whatsnew/3.15.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,15 @@ http.cookies
274274
(Contributed by Nick Burns and Senthil Kumaran in :gh:`92936`.)
275275

276276

277+
locale
278+
------
279+
280+
* :func:`~locale.setlocale` now supports language codes with ``@``-modifiers.
281+
``@``-modifier are no longer silently removed in :func:`~locale.getlocale`,
282+
but included in the language code.
283+
(Contributed by Serhiy Storchaka in :gh:`137729`.)
284+
285+
277286
math
278287
----
279288

Lib/locale.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -375,12 +375,14 @@ def _replace_encoding(code, encoding):
375375
def _append_modifier(code, modifier):
376376
if modifier == 'euro':
377377
if '.' not in code:
378-
return code + '.ISO8859-15'
378+
# Linux appears to require keeping the "@euro" modifier in place,
379+
# even when using the ".ISO8859-15" encoding.
380+
return code + '.ISO8859-15@euro'
379381
_, _, encoding = code.partition('.')
380-
if encoding in ('ISO8859-15', 'UTF-8'):
382+
if encoding == 'UTF-8':
381383
return code
382384
if encoding == 'ISO8859-1':
383-
return _replace_encoding(code, 'ISO8859-15')
385+
code = _replace_encoding(code, 'ISO8859-15')
384386
return code + '@' + modifier
385387

386388
def normalize(localename):
@@ -485,13 +487,18 @@ def _parse_localename(localename):
485487
# Deal with locale modifiers
486488
code, modifier = code.split('@', 1)
487489
if modifier == 'euro' and '.' not in code:
488-
# Assume Latin-9 for @euro locales. This is bogus,
489-
# since some systems may use other encodings for these
490-
# locales. Also, we ignore other modifiers.
491-
return code, 'iso-8859-15'
490+
# Assume ISO8859-15 for @euro locales. Do note that some systems
491+
# may use other encodings for these locales, so this may not always
492+
# be correct.
493+
return code + '@euro', 'ISO8859-15'
494+
else:
495+
modifier = ''
492496

493497
if '.' in code:
494-
return tuple(code.split('.')[:2])
498+
code, encoding = code.split('.')[:2]
499+
if modifier:
500+
code += '@' + modifier
501+
return code, encoding
495502
elif code == 'C':
496503
return None, None
497504
elif code == 'UTF-8':
@@ -516,7 +523,14 @@ def _build_localename(localetuple):
516523
if encoding is None:
517524
return language
518525
else:
519-
return language + '.' + encoding
526+
if '@' in language:
527+
language, modifier = language.split('@', 1)
528+
else:
529+
modifier = ''
530+
localename = language + '.' + encoding
531+
if modifier:
532+
localename += '@' + modifier
533+
return localename
520534
except (TypeError, ValueError):
521535
raise TypeError('Locale must be None, a string, or an iterable of '
522536
'two strings -- language code, encoding.') from None
@@ -888,6 +902,12 @@ def getpreferredencoding(do_setlocale=True):
888902
# SS 2025-06-10:
889903
# Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist
890904
# on all platforms.
905+
#
906+
# SS 2025-07-30:
907+
# Remove conflicts with GNU libc.
908+
#
909+
# removed 'el_gr@euro'
910+
# removed 'uz_uz@cyrillic'
891911

892912
locale_alias = {
893913
'a3': 'az_AZ.KOI8-C',
@@ -1021,7 +1041,6 @@ def getpreferredencoding(do_setlocale=True):
10211041
'el': 'el_GR.ISO8859-7',
10221042
'el_cy': 'el_CY.ISO8859-7',
10231043
'el_gr': 'el_GR.ISO8859-7',
1024-
'el_gr@euro': 'el_GR.ISO8859-15',
10251044
'en': 'en_US.ISO8859-1',
10261045
'en_ag': 'en_AG.UTF-8',
10271046
'en_au': 'en_AU.ISO8859-1',
@@ -1456,7 +1475,6 @@ def getpreferredencoding(do_setlocale=True):
14561475
'ur_pk': 'ur_PK.CP1256',
14571476
'uz': 'uz_UZ.UTF-8',
14581477
'uz_uz': 'uz_UZ.UTF-8',
1459-
'uz_uz@cyrillic': 'uz_UZ.UTF-8',
14601478
've': 've_ZA.UTF-8',
14611479
've_za': 've_ZA.UTF-8',
14621480
'vi': 'vi_VN.TCVN',

Lib/test/test_locale.py

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from decimal import Decimal
2+
from test import support
23
from test.support import cpython_only, verbose, is_android, linked_to_musl, os_helper
34
from test.support.warnings_helper import check_warnings
45
from test.support.import_helper import ensure_lazy_imports, import_fresh_module
@@ -425,8 +426,8 @@ def test_hyphenated_encoding(self):
425426
self.check('cs_CZ.ISO8859-2', 'cs_CZ.ISO8859-2')
426427

427428
def test_euro_modifier(self):
428-
self.check('de_DE@euro', 'de_DE.ISO8859-15')
429-
self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15')
429+
self.check('de_DE@euro', 'de_DE.ISO8859-15@euro')
430+
self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15@euro')
430431
self.check('de_DE.utf8@euro', 'de_DE.UTF-8')
431432

432433
def test_latin_modifier(self):
@@ -534,6 +535,105 @@ def test_setlocale_long_encoding(self):
534535
with self.assertRaises(locale.Error):
535536
locale.setlocale(locale.LC_ALL, loc2)
536537

538+
@support.subTests('localename,localetuple', [
539+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso885915')),
540+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso88591')),
541+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
542+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-1')),
543+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', None)),
544+
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso885915')),
545+
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso88591')),
546+
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
547+
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-1')),
548+
('de_DE.ISO8859-15@euro', ('de_DE@euro', None)),
549+
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'iso88597')),
550+
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
551+
('el_GR.ISO8859-7@euro', ('el_GR@euro', None)),
552+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso885915')),
553+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso88591')),
554+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
555+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-1')),
556+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', None)),
557+
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'utf8')),
558+
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
559+
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', None)),
560+
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'utf8')),
561+
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
562+
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', None)),
563+
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'utf8')),
564+
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
565+
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', None)),
566+
('be_BY.UTF-8@latin', ('be_BY@latin', 'utf8')),
567+
('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
568+
('be_BY.UTF-8@latin', ('be_BY@latin', None)),
569+
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'utf8')),
570+
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
571+
('sr_RS.UTF-8@latin', ('sr_RS@latin', None)),
572+
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'utf8')),
573+
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
574+
('ug_CN.UTF-8@latin', ('ug_CN@latin', None)),
575+
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'utf8')),
576+
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
577+
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', None)),
578+
])
579+
def test_setlocale_with_modifier(self, localename, localetuple):
580+
try:
581+
locale.setlocale(locale.LC_CTYPE, localename)
582+
except locale.Error as exc:
583+
self.skipTest(str(exc))
584+
loc = locale.setlocale(locale.LC_CTYPE, localetuple)
585+
self.assertEqual(loc, localename)
586+
587+
loctuple = locale.getlocale(locale.LC_CTYPE)
588+
loc = locale.setlocale(locale.LC_CTYPE, loctuple)
589+
self.assertEqual(loc, localename)
590+
591+
@support.subTests('localename,localetuple', [
592+
('fr_FR.iso885915@euro', ('fr_FR@euro', 'ISO8859-15')),
593+
('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
594+
('fr_FR@euro', ('fr_FR@euro', 'ISO8859-15')),
595+
('de_DE.iso885915@euro', ('de_DE@euro', 'ISO8859-15')),
596+
('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
597+
('de_DE@euro', ('de_DE@euro', 'ISO8859-15')),
598+
('el_GR.iso88597@euro', ('el_GR@euro', 'ISO8859-7')),
599+
('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
600+
('el_GR@euro', ('el_GR@euro', 'ISO8859-7')),
601+
('ca_ES.iso885915@euro', ('ca_ES@euro', 'ISO8859-15')),
602+
('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
603+
('ca_ES@euro', ('ca_ES@euro', 'ISO8859-15')),
604+
('ca_ES.utf8@valencia', ('ca_ES@valencia', 'UTF-8')),
605+
('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
606+
('ca_ES@valencia', ('ca_ES@valencia', 'UTF-8')),
607+
('ks_IN.utf8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
608+
('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
609+
('ks_IN@devanagari', ('ks_IN@devanagari', 'UTF-8')),
610+
('sd_IN.utf8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
611+
('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
612+
('sd_IN@devanagari', ('sd_IN@devanagari', 'UTF-8')),
613+
('be_BY.utf8@latin', ('be_BY@latin', 'UTF-8')),
614+
('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
615+
('be_BY@latin', ('be_BY@latin', 'UTF-8')),
616+
('sr_RS.utf8@latin', ('sr_RS@latin', 'UTF-8')),
617+
('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
618+
('sr_RS@latin', ('sr_RS@latin', 'UTF-8')),
619+
('ug_CN.utf8@latin', ('ug_CN@latin', 'UTF-8')),
620+
('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
621+
('ug_CN@latin', ('ug_CN@latin', 'UTF-8')),
622+
('uz_UZ.utf8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
623+
('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
624+
('uz_UZ@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
625+
])
626+
def test_getlocale_with_modifier(self, localename, localetuple):
627+
try:
628+
locale.setlocale(locale.LC_CTYPE, localename)
629+
except locale.Error as exc:
630+
self.skipTest(str(exc))
631+
loctuple = locale.getlocale(locale.LC_CTYPE)
632+
self.assertEqual(loctuple, localetuple)
633+
634+
locale.setlocale(locale.LC_CTYPE, loctuple)
635+
self.assertEqual(locale.getlocale(locale.LC_CTYPE), localetuple)
636+
537637

538638
class TestMiscellaneous(unittest.TestCase):
539639
def test_defaults_UTF8(self):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`locale.setlocale` now supports language codes with ``@``-modifiers.
2+
``@``-modifier are no longer silently removed in :func:`locale.getlocale`,
3+
but included in the language code.

Tools/i18n/makelocalealias.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,24 @@ def parse(filename):
4444
# Ignore one letter locale mappings (except for 'c')
4545
if len(locale) == 1 and locale != 'c':
4646
continue
47+
if '@' in locale and '@' not in alias:
48+
# Do not simply remove the "@euro" modifier.
49+
# Glibc generates separate locales with the "@euro" modifier, and
50+
# not always generates a locale without it with the same encoding.
51+
# It can also affect collation.
52+
if locale.endswith('@euro') and not locale.endswith('.utf-8@euro'):
53+
alias += '@euro'
4754
# Normalize encoding, if given
4855
if '.' in locale:
4956
lang, encoding = locale.split('.')[:2]
5057
encoding = encoding.replace('-', '')
5158
encoding = encoding.replace('_', '')
5259
locale = lang + '.' + encoding
5360
data[locale] = alias
61+
# Conflict with glibc.
62+
data.pop('el_gr@euro', None)
63+
data.pop('uz_uz@cyrillic', None)
64+
data.pop('uz_uz.utf8@cyrillic', None)
5465
return data
5566

5667
def parse_glibc_supported(filename):
@@ -81,7 +92,7 @@ def parse_glibc_supported(filename):
8192
# Add an encoding to alias
8293
alias, _, modifier = alias.partition('@')
8394
alias = _locale._replace_encoding(alias, alias_encoding)
84-
if modifier and not (modifier == 'euro' and alias_encoding == 'ISO-8859-15'):
95+
if modifier:
8596
alias += '@' + modifier
8697
data[locale] = alias
8798
return data

0 commit comments

Comments
 (0)