From 7c904e74317b5a6358da8e08dac3209715755410 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 7 Sep 2025 13:42:05 +0300 Subject: [PATCH 1/2] gh-130567: Fix crash in locale.strxfrm() on macOS --- Lib/test/test_locale.py | 18 ++++++++++++++++-- Modules/_localemodule.c | 2 +- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 01b1e754d04219..c16505534dd03f 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -351,8 +351,7 @@ def setUp(self): enc = codecs.lookup(locale.getencoding() or 'ascii').name if enc not in ('utf-8', 'iso8859-1', 'cp1252'): raise unittest.SkipTest('encoding not suitable') - if enc != 'iso8859-1' and (sys.platform == 'darwin' or is_android or - sys.platform.startswith('freebsd')): + if enc != 'iso8859-1' and is_android: raise unittest.SkipTest('wcscoll/wcsxfrm have known bugs') BaseLocalizedTest.setUp(self) @@ -372,6 +371,21 @@ def test_strcoll_with_diacritic(self): def test_strxfrm_with_diacritic(self): self.assertLess(locale.strxfrm('à'), locale.strxfrm('b')) + # @unittest.skipUnless(sys.platform == 'darwin', + # "only macOS") + def test_xxx(self): + bad = [] + for c in map(chr, range(1, 0x1000)): + if c.isprintable(): + for n in range(8): + x = 'a'*n + c + s = locale.strxfrm(x) + if '\1' in s and s.index('\1') < len(x): + bad += c + print(f'{x!r} {x!a} -> {s!a}') + break + self.fail(repr(''.join(bad))) + class NormalizeTest(unittest.TestCase): def check(self, localename, expected): diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index e86d5b17d1759d..5f649122640bda 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -457,7 +457,7 @@ _locale_strxfrm_impl(PyObject *module, PyObject *str) /* assume no change in size, first */ n1 = n1 + 1; - buf = PyMem_New(wchar_t, n1); + buf = PyMem_New(wchar_t, n1+1); if (!buf) { PyErr_NoMemory(); goto exit; From c32fa1f5d02325277aa680d44731b1af8ec79cd7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 7 Sep 2025 15:10:59 +0300 Subject: [PATCH 2/2] =?UTF-8?q?Use=20'Gwich=CA=BCin'=20in=20tests.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Lib/test/test_locale.py | 17 ++--------------- Modules/_localemodule.c | 2 +- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index c16505534dd03f..f3cb1fd323fd84 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -370,21 +370,8 @@ def test_strcoll_with_diacritic(self): "gh-124108: NetBSD doesn't support UTF-8 for LC_COLLATE") def test_strxfrm_with_diacritic(self): self.assertLess(locale.strxfrm('à'), locale.strxfrm('b')) - - # @unittest.skipUnless(sys.platform == 'darwin', - # "only macOS") - def test_xxx(self): - bad = [] - for c in map(chr, range(1, 0x1000)): - if c.isprintable(): - for n in range(8): - x = 'a'*n + c - s = locale.strxfrm(x) - if '\1' in s and s.index('\1') < len(x): - bad += c - print(f'{x!r} {x!a} -> {s!a}') - break - self.fail(repr(''.join(bad))) + # gh-130567: Should not crash on macOS. + locale.strxfrm('Gwich\u02bcin') class NormalizeTest(unittest.TestCase): diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 5f649122640bda..e86d5b17d1759d 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -457,7 +457,7 @@ _locale_strxfrm_impl(PyObject *module, PyObject *str) /* assume no change in size, first */ n1 = n1 + 1; - buf = PyMem_New(wchar_t, n1+1); + buf = PyMem_New(wchar_t, n1); if (!buf) { PyErr_NoMemory(); goto exit;