Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Lib/locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,23 @@ def setlocale(category, value=None):
if 'strcoll' not in globals():
strcoll = _strcoll

if sys.platform.startswith(('freebsd', 'dragonflybsd')):
# On FreeBSD, wcsxfrm() fails with EINVAL for
# 'Å' (U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE) and
# 'Å' (U+212B ANGSTROM SIGN) on non-C locales.
# As a workaround, replace them with
# 'å' (U+00E5 LATIN SMALL LETTER A WITH RING ABOVE).
# To preserve the relative order of these characters according to
# wcscoll(), add a digit 0-2.
_strxfrm = strxfrm
def strxfrm(string, /):
if (not string.isascii() and
_setlocale(LC_COLLATE) not in ('C', 'C.UTF-8', 'POSIX') and
('\xe5' in string or '\xc5' in string or '\u212b' in string)):
string = string.replace('\xe5', '\xe50')
string = string.replace('\xc5', '\xe51')
string = string.replace('\u212b', '\xe52')
return _strxfrm(string)

_localeconv = localeconv

Expand Down
31 changes: 29 additions & 2 deletions Lib/test/test_locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,12 +332,14 @@ def test_strcoll(self):
self.assertLess(locale.strcoll('a', 'b'), 0)
self.assertEqual(locale.strcoll('a', 'a'), 0)
self.assertGreater(locale.strcoll('b', 'a'), 0)
self.assertLess(locale.strcoll('A', 'B'), 0)
# embedded null character
self.assertRaises(ValueError, locale.strcoll, 'a\0', 'a')
self.assertRaises(ValueError, locale.strcoll, 'a', 'a\0')

def test_strxfrm(self):
self.assertLess(locale.strxfrm('a'), locale.strxfrm('b'))
self.assertLess(locale.strxfrm('A'), locale.strxfrm('B'))
# embedded null character
self.assertRaises(ValueError, locale.strxfrm, 'a\0')

Expand All @@ -351,8 +353,7 @@ def setUp(self):
enc = codecs.lookup(locale.getencoding() or 'ascii').name
if enc not in ('utf-8', 'iso8859-1', 'cp1252'):
raise unittest.SkipTest('encoding not suitable')
if enc != 'iso8859-1' and (sys.platform == 'darwin' or is_android or
sys.platform.startswith('freebsd')):
if enc != 'iso8859-1' and (sys.platform == 'darwin' or is_android):
raise unittest.SkipTest('wcscoll/wcsxfrm have known bugs')
BaseLocalizedTest.setUp(self)

Expand All @@ -363,6 +364,10 @@ def setUp(self):
"gh-124108: NetBSD doesn't support UTF-8 for LC_COLLATE")
def test_strcoll_with_diacritic(self):
self.assertLess(locale.strcoll('à', 'b'), 0)
self.assertLess(locale.strcoll('À', 'B'), 0)
self.assertLess(locale.strcoll('å', 'b'), 0)
self.assertLess(locale.strcoll('\xc5', 'B'), 0)
self.assertLess(locale.strcoll('\u212b', 'B'), 0)

@unittest.skipIf(sys.platform.startswith('aix'),
'bpo-29972: broken test on AIX')
Expand All @@ -371,6 +376,28 @@ def test_strcoll_with_diacritic(self):
"gh-124108: NetBSD doesn't support UTF-8 for LC_COLLATE")
def test_strxfrm_with_diacritic(self):
self.assertLess(locale.strxfrm('à'), locale.strxfrm('b'))
self.assertLess(locale.strxfrm('À'), locale.strxfrm('B'))
self.assertLess(locale.strxfrm('å'), locale.strxfrm('b'))
# gh-130567: Should not fail with OSError EINVAL.
self.assertLess(locale.strxfrm('\xc5'), locale.strxfrm('B'))
self.assertLess(locale.strxfrm('\u212b'), locale.strxfrm('B'))

def test_strxfrm_strcoll_consistency(self):
enc = codecs.lookup(locale.getencoding() or 'ascii').name
if enc != 'utf-8':
self.skipTest('strcoll() and strxfrm() can be inconsistent on non-UTF-8 locale')
def check(a, b):
r = locale.strcoll(a, b)
if r < 0:
self.assertLess(locale.strxfrm(a), locale.strxfrm(b))
elif r > 0:
self.assertGreater(locale.strxfrm(a), locale.strxfrm(b))
else:
self.assertEqual(locale.strxfrm(a), locale.strxfrm(b))
check('à', 'À')
check('å', '\xc5') # 'Å' U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
check('å', '\u212b') # 'Å' U+212B ANGSTROM SIGN
check('\xc5', '\u212b')


class NormalizeTest(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix :func:`locale.strxfrm` failure on FreeBSD and DragonFlyBSD for strings
containing characters 'Å' (U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE) or
'Å' (U+212B ANGSTROM SIGN).
Loading