From b213b516677a1ef9a4b70a92773cba6e150f2d6a Mon Sep 17 00:00:00 2001 From: Ronan Amicel Date: Thu, 9 Nov 2023 11:54:31 +0100 Subject: [PATCH] Be more specific about which alternative space characters should work as group symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `\s` character class (or the `string.isspace()` method) could match characters like new lines that we probably don’t want to consider as potential group symbols in numbers. --- babel/numbers.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/babel/numbers.py b/babel/numbers.py index 32ea06780..6df1db8cb 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -998,6 +998,15 @@ def __init__(self, message: str, suggestions: list[str] | None = None) -> None: self.suggestions = suggestions +SPACE_CHARS = { + ' ', # space + '\xa0', # no-break space + '\u202f', # narrow no-break space +} + +SPACE_CHARS_RE = re.compile('|'.join(SPACE_CHARS)) + + def parse_number( string: str, locale: Locale | str | None = LC_NUMERIC, @@ -1029,12 +1038,12 @@ def parse_number( group_symbol = get_group_symbol(locale, numbering_system=numbering_system) if ( - re.match(r'\s', group_symbol) and # if the grouping symbol is a kind of space, + group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space, group_symbol not in string and # and the string to be parsed does not contain it, - re.search(r'\s', string) # but it does contain any other kind of space instead, + SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead, ): # ... it's reasonable to assume it is taking the place of the grouping symbol. - string = re.sub(r'\s', group_symbol, string) + string = SPACE_CHARS_RE.sub(group_symbol, string) try: return int(string.replace(group_symbol, '')) @@ -1095,12 +1104,12 @@ def parse_decimal( decimal_symbol = get_decimal_symbol(locale, numbering_system=numbering_system) if not strict and ( - re.match(r'\s', group_symbol) and # if the grouping symbol is a kind of space, + group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space, group_symbol not in string and # and the string to be parsed does not contain it, - re.search(r'\s', string) # but it does contain any other kind of space instead, + SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead, ): # ... it's reasonable to assume it is taking the place of the grouping symbol. - string = re.sub(r'\s', group_symbol, string) + string = SPACE_CHARS_RE.sub(group_symbol, string) try: parsed = decimal.Decimal(string.replace(group_symbol, '')