From 0509ae4165e190c2f65e8001578287f78313be30 Mon Sep 17 00:00:00 2001 From: kajte <33686434+kajte@users.noreply.github.com> Date: Mon, 27 Nov 2023 23:06:26 +0200 Subject: [PATCH 1/3] Add support for number symbols of different numbering systems - Import number symbols for available numbering systems from cldr data - Add default_numbering_system and other_numbering_systems properties for Locale - Add numbering_system argument to relevant number formatting fuctions and use number symbols based on the given numbering system Fixes partially issue https://github.com/python-babel/babel/issues/446 --- babel/core.py | 32 ++++- babel/numbers.py | 274 ++++++++++++++++++++++++++++++++++------- babel/support.py | 35 ++++-- babel/units.py | 48 +++++++- scripts/import_cldr.py | 24 +++- tests/test_core.py | 22 +++- tests/test_numbers.py | 77 ++++++++++++ tests/test_smoke.py | 5 + tests/test_support.py | 60 +++++++-- 9 files changed, 495 insertions(+), 82 deletions(-) diff --git a/babel/core.py b/babel/core.py index 782c8f26a..207c13b92 100644 --- a/babel/core.py +++ b/babel/core.py @@ -149,7 +149,7 @@ class Locale: `Locale` objects provide access to a collection of locale data, such as territory and language names, number and date format patterns, and more: - >>> locale.number_symbols['decimal'] + >>> locale.number_symbols['latn']['decimal'] u'.' If a locale is requested for which no locale data is available, an @@ -625,16 +625,42 @@ def currency_symbols(self) -> localedata.LocaleDataDict: @property def number_symbols(self) -> localedata.LocaleDataDict: - """Symbols used in number formatting. + """Symbols used in number formatting by number system. .. note:: The format of the value returned may change between Babel versions. - >>> Locale('fr', 'FR').number_symbols['decimal'] + >>> Locale('fr', 'FR').number_symbols["latn"]['decimal'] u',' + >>> Locale('fa', 'IR').number_symbols["arabext"]['decimal'] + u'٫' + >>> Locale('fa', 'IR').number_symbols["latn"]['decimal'] + u'.' """ return self._data['number_symbols'] + @property + def other_numbering_systems(self) -> localedata.LocaleDataDict: + """ + Mapping of other numbering systems available for the locale. + See: https://www.unicode.org/reports/tr35/tr35-numbers.html#otherNumberingSystems + + >>> Locale('el', 'GR').other_numbering_systems['traditional'] + u'grek' + + .. note:: The format of the value returned may change between + Babel versions. + """ + return self._data['numbering_systems'] + + @property + def default_numbering_system(self) -> str: + """The default numbering system used by the locale. + >>> Locale('el', 'GR').default_numbering_system + u'latn' + """ + return self._data['default_numbering_system'] + @property def decimal_formats(self) -> localedata.LocaleDataDict: """Locale patterns for decimal number formatting. diff --git a/babel/numbers.py b/babel/numbers.py index c6fcb9504..a9c19510d 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -317,70 +317,161 @@ def _is_active(start, end): return result -def get_decimal_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: +def _get_numbering_system(locale: Locale, numbering_system: Literal["default"] | str = "latn") -> str: + if numbering_system == "default": + return locale.default_numbering_system + else: + return numbering_system + + +def _get_number_symbols( + locale: Locale | str, + *, + numbering_system: Literal["default"] | str = "latn" +) -> LocaleDataDict: + parsed_locale = Locale.parse(locale) + numbering_system = _get_numbering_system(parsed_locale, numbering_system) + try: + return parsed_locale.number_symbols[numbering_system] + except KeyError as error: + raise UnsupportedNumberingSystemError(f"Unknown numbering system {numbering_system} for Locale {parsed_locale}.") from error + + +class UnsupportedNumberingSystemError(Exception): + """Exception thrown when an unsupported numbering system is requested for the given Locale.""" + pass + + +def get_decimal_symbol( + locale: Locale | str | None = LC_NUMERIC, + *, + numbering_system: Literal["default"] | str = "latn", +) -> str: """Return the symbol used by the locale to separate decimal fractions. >>> get_decimal_symbol('en_US') u'.' + >>> get_decimal_symbol('ar_EG', numbering_system='default') + u'٫' + >>> get_decimal_symbol('ar_EG', numbering_system='latn') + u'.' :param locale: the `Locale` object or locale identifier + :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. """ - return Locale.parse(locale).number_symbols.get('decimal', '.') + return _get_number_symbols(locale, numbering_system=numbering_system).get('decimal', '.') -def get_plus_sign_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: +def get_plus_sign_symbol( + locale: Locale | str | None = LC_NUMERIC, + *, + numbering_system: Literal["default"] | str = "latn", +) -> str: """Return the plus sign symbol used by the current locale. >>> get_plus_sign_symbol('en_US') u'+' + >>> get_plus_sign_symbol('ar_EG', numbering_system='default') + u'\u061c+' + >>> get_plus_sign_symbol('ar_EG', numbering_system='latn') + u'\u200e+' :param locale: the `Locale` object or locale identifier + :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale. """ - return Locale.parse(locale).number_symbols.get('plusSign', '+') + return _get_number_symbols(locale, numbering_system=numbering_system).get('plusSign', '+') -def get_minus_sign_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: +def get_minus_sign_symbol( + locale: Locale | str | None = LC_NUMERIC, + *, + numbering_system: Literal["default"] | str = "latn", +) -> str: """Return the plus sign symbol used by the current locale. >>> get_minus_sign_symbol('en_US') u'-' + >>> get_minus_sign_symbol('ar_EG', numbering_system='default') + u'\u061c-' + >>> get_minus_sign_symbol('ar_EG', numbering_system='latn') + u'\u200e-' :param locale: the `Locale` object or locale identifier + :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale. """ - return Locale.parse(locale).number_symbols.get('minusSign', '-') + return _get_number_symbols(locale, numbering_system=numbering_system).get('minusSign', '-') -def get_exponential_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: +def get_exponential_symbol( + locale: Locale | str | None = LC_NUMERIC, + *, + numbering_system: Literal["default"] | str = "latn", +) -> str: """Return the symbol used by the locale to separate mantissa and exponent. >>> get_exponential_symbol('en_US') u'E' + >>> get_exponential_symbol('ar_EG', numbering_system='default') + u'اس' + >>> get_exponential_symbol('ar_EG', numbering_system='latn') + u'E' :param locale: the `Locale` object or locale identifier + :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale. """ - return Locale.parse(locale).number_symbols.get('exponential', 'E') + return _get_number_symbols(locale, numbering_system=numbering_system).get('exponential', 'E') -def get_group_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: +def get_group_symbol( + locale: Locale | str | None = LC_NUMERIC, + *, + numbering_system: Literal["default"] | str = "latn", +) -> str: """Return the symbol used by the locale to separate groups of thousands. >>> get_group_symbol('en_US') u',' + >>> get_group_symbol('ar_EG', numbering_system='default') + u'٬' + >>> get_group_symbol('ar_EG', numbering_system='latn') + u',' :param locale: the `Locale` object or locale identifier + :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale. """ - return Locale.parse(locale).number_symbols.get('group', ',') + return _get_number_symbols(locale, numbering_system=numbering_system).get('group', ',') -def get_infinity_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: +def get_infinity_symbol( + locale: Locale | str | None = LC_NUMERIC, + *, + numbering_system: Literal["default"] | str = "latn", +) -> str: """Return the symbol used by the locale to represent infinity. >>> get_infinity_symbol('en_US') u'∞' + >>> get_infinity_symbol('ar_EG', numbering_system='default') + u'∞' + >>> get_infinity_symbol('ar_EG', numbering_system='latn') + u'∞' :param locale: the `Locale` object or locale identifier + :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale. """ - return Locale.parse(locale).number_symbols.get('infinity', '∞') + return _get_number_symbols(locale, numbering_system=numbering_system).get('infinity', '∞') def format_number(number: float | decimal.Decimal | str, locale: Locale | str | None = LC_NUMERIC) -> str: @@ -430,6 +521,8 @@ def format_decimal( locale: Locale | str | None = LC_NUMERIC, decimal_quantization: bool = True, group_separator: bool = True, + *, + numbering_system: Literal["default"] | str = "latn", ) -> str: """Return the given decimal number formatted for a specific locale. @@ -443,6 +536,10 @@ def format_decimal( u'1,234' >>> format_decimal(1.2345, locale='de') u'1,234' + >>> format_decimal(1.2345, locale='ar_EG', numbering_system='default') + u'1٫234' + >>> format_decimal(1.2345, locale='ar_EG', numbering_system='latn') + u'1.234' The appropriate thousands grouping and the decimal separator are used for each locale: @@ -470,13 +567,16 @@ def format_decimal( the format pattern. Defaults to `True`. :param group_separator: Boolean to switch group separator on/off in a locale's number format. + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. """ locale = Locale.parse(locale) if format is None: format = locale.decimal_formats[format] pattern = parse_pattern(format) return pattern.apply( - number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator) + number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator, numbering_system=numbering_system) def format_compact_decimal( @@ -485,6 +585,7 @@ def format_compact_decimal( format_type: Literal["short", "long"] = "short", locale: Locale | str | None = LC_NUMERIC, fraction_digits: int = 0, + numbering_system: Literal["default"] | str = "latn", ) -> str: """Return the given decimal number formatted for a specific locale in compact form. @@ -500,11 +601,16 @@ def format_compact_decimal( u'2 милиони' >>> format_compact_decimal(21000000, format_type="long", locale="mk") u'21 милион' + >>> format_compact_decimal(12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='default') + u'12٫34\xa0ألف' :param number: the number to format :param format_type: Compact format to use ("short" or "long") :param locale: the `Locale` object or locale identifier :param fraction_digits: Number of digits after the decimal point to use. Defaults to `0`. + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. """ locale = Locale.parse(locale) compact_format = locale.compact_decimal_formats[format_type] @@ -513,7 +619,7 @@ def format_compact_decimal( if format is None: format = locale.decimal_formats[None] pattern = parse_pattern(format) - return pattern.apply(number, locale, decimal_quantization=False) + return pattern.apply(number, locale, decimal_quantization=False, numbering_system=numbering_system) def _get_compact_format( @@ -569,6 +675,8 @@ def format_currency( format_type: Literal["name", "standard", "accounting"] = "standard", decimal_quantization: bool = True, group_separator: bool = True, + *, + numbering_system: Literal["default"] | str = "latn", ) -> str: """Return formatted currency value. @@ -578,6 +686,8 @@ def format_currency( u'US$1.099,98' >>> format_currency(1099.98, 'EUR', locale='de_DE') u'1.099,98\\xa0\\u20ac' + >>> format_currency(1099.98, 'EGP', locale='ar_EG', numbering_system='default') + u'\u200f1٬099٫98\xa0ج.م.\u200f' The format can also be specified explicitly. The currency is placed with the '¤' sign. As the sign gets repeated the format @@ -655,12 +765,15 @@ def format_currency( the format pattern. Defaults to `True`. :param group_separator: Boolean to switch group separator on/off in a locale's number format. - + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. """ if format_type == 'name': return _format_currency_long_name(number, currency, format=format, locale=locale, currency_digits=currency_digits, - decimal_quantization=decimal_quantization, group_separator=group_separator) + decimal_quantization=decimal_quantization, group_separator=group_separator, + numbering_system=numbering_system) locale = Locale.parse(locale) if format: pattern = parse_pattern(format) @@ -672,7 +785,7 @@ def format_currency( return pattern.apply( number, locale, currency=currency, currency_digits=currency_digits, - decimal_quantization=decimal_quantization, group_separator=group_separator) + decimal_quantization=decimal_quantization, group_separator=group_separator, numbering_system=numbering_system) def _format_currency_long_name( @@ -684,6 +797,8 @@ def _format_currency_long_name( format_type: Literal["name", "standard", "accounting"] = "standard", decimal_quantization: bool = True, group_separator: bool = True, + *, + numbering_system: Literal["default"] | str = "latn" ) -> str: # Algorithm described here: # https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies @@ -710,7 +825,7 @@ def _format_currency_long_name( number_part = pattern.apply( number, locale, currency=currency, currency_digits=currency_digits, - decimal_quantization=decimal_quantization, group_separator=group_separator) + decimal_quantization=decimal_quantization, group_separator=group_separator, numbering_system=numbering_system) return unit_pattern.format(number_part, display_name) @@ -722,6 +837,7 @@ def format_compact_currency( format_type: Literal["short"] = "short", locale: Locale | str | None = LC_NUMERIC, fraction_digits: int = 0, + numbering_system: Literal["default"] | str = "latn", ) -> str: """Format a number as a currency value in compact form. @@ -737,6 +853,9 @@ def format_compact_currency( :param format_type: the compact format type to use. Defaults to "short". :param locale: the `Locale` object or locale identifier :param fraction_digits: Number of digits after the decimal point to use. Defaults to `0`. + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. """ locale = Locale.parse(locale) try: @@ -759,7 +878,8 @@ def format_compact_currency( if format is None: raise ValueError('No compact currency format found for the given number and locale.') pattern = parse_pattern(format) - return pattern.apply(number, locale, currency=currency, currency_digits=False, decimal_quantization=False) + return pattern.apply(number, locale, currency=currency, currency_digits=False, decimal_quantization=False, + numbering_system=numbering_system) def format_percent( @@ -768,6 +888,8 @@ def format_percent( locale: Locale | str | None = LC_NUMERIC, decimal_quantization: bool = True, group_separator: bool = True, + *, + numbering_system: Literal["default"] | str = "latn" ) -> str: """Return formatted percent value for a specific locale. @@ -777,6 +899,8 @@ def format_percent( u'2,512%' >>> format_percent(25.1234, locale='sv_SE') u'2\\xa0512\\xa0%' + >>> format_percent(25.1234, locale='ar_EG', numbering_system='default') + u'2٬512%' The format pattern can also be specified explicitly: @@ -805,13 +929,18 @@ def format_percent( the format pattern. Defaults to `True`. :param group_separator: Boolean to switch group separator on/off in a locale's number format. + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. """ locale = Locale.parse(locale) if not format: format = locale.percent_formats[None] pattern = parse_pattern(format) return pattern.apply( - number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator) + number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator, + numbering_system=numbering_system + ) def format_scientific( @@ -819,11 +948,15 @@ def format_scientific( format: str | NumberPattern | None = None, locale: Locale | str | None = LC_NUMERIC, decimal_quantization: bool = True, + *, + numbering_system: Literal["default"] | str = "latn" ) -> str: """Return value formatted in scientific notation for a specific locale. >>> format_scientific(10000, locale='en_US') u'1E4' + >>> format_scientific(10000, locale='ar_EG', numbering_system='default') + u'1اس4' The format pattern can also be specified explicitly: @@ -844,13 +977,16 @@ def format_scientific( :param locale: the `Locale` object or locale identifier :param decimal_quantization: Truncate and round high-precision numbers to the format pattern. Defaults to `True`. + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. """ locale = Locale.parse(locale) if not format: format = locale.scientific_formats[None] pattern = parse_pattern(format) return pattern.apply( - number, locale, decimal_quantization=decimal_quantization) + number, locale, decimal_quantization=decimal_quantization, numbering_system=numbering_system) class NumberFormatError(ValueError): @@ -862,7 +998,12 @@ def __init__(self, message: str, suggestions: list[str] | None = None) -> None: self.suggestions = suggestions -def parse_number(string: str, locale: Locale | str | None = LC_NUMERIC) -> int: +def parse_number( + string: str, + locale: Locale | str | None = LC_NUMERIC, + *, + numbering_system: Literal["default"] | str = "latn", +) -> int: """Parse localized number string into an integer. >>> parse_number('1,099', locale='en_US') @@ -879,16 +1020,25 @@ def parse_number(string: str, locale: Locale | str | None = LC_NUMERIC) -> int: :param string: the string to parse :param locale: the `Locale` object or locale identifier + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. :return: the parsed number :raise `NumberFormatError`: if the string can not be converted to a number + :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale. """ try: - return int(string.replace(get_group_symbol(locale), '')) + return int(string.replace(get_group_symbol(locale, numbering_system=numbering_system), '')) except ValueError as ve: raise NumberFormatError(f"{string!r} is not a valid number") from ve -def parse_decimal(string: str, locale: Locale | str | None = LC_NUMERIC, strict: bool = False) -> decimal.Decimal: +def parse_decimal( + string: str, + locale: Locale | str | None = LC_NUMERIC, + strict: bool = False, + *, + numbering_system: Literal["default"] | str = "latn", +) -> decimal.Decimal: """Parse localized decimal string into a decimal. >>> parse_decimal('1,099.98', locale='en_US') @@ -897,6 +1047,8 @@ def parse_decimal(string: str, locale: Locale | str | None = LC_NUMERIC, strict: Decimal('1099.98') >>> parse_decimal('12 345,123', locale='ru') Decimal('12345.123') + >>> parse_decimal('1٬099٫98', locale='ar_EG', numbering_system='default') + Decimal('1099.98') When the given string cannot be parsed, an exception is raised: @@ -922,12 +1074,15 @@ def parse_decimal(string: str, locale: Locale | str | None = LC_NUMERIC, strict: :param locale: the `Locale` object or locale identifier :param strict: controls whether numbers formatted in a weird way are accepted or rejected + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. :raise NumberFormatError: if the string can not be converted to a decimal number + :raise UnsupportedNumberingSystemError: if the numbering system is not supported by the locale. """ locale = Locale.parse(locale) - group_symbol = get_group_symbol(locale) - decimal_symbol = get_decimal_symbol(locale) + group_symbol = get_group_symbol(locale, numbering_system=numbering_system) + decimal_symbol = get_decimal_symbol(locale, numbering_system=numbering_system) if not strict and ( group_symbol == '\xa0' and # if the grouping symbol is U+00A0 NO-BREAK SPACE, @@ -943,7 +1098,7 @@ def parse_decimal(string: str, locale: Locale | str | None = LC_NUMERIC, strict: except decimal.InvalidOperation as exc: raise NumberFormatError(f"{string!r} is not a valid decimal number") from exc if strict and group_symbol in string: - proper = format_decimal(parsed, locale=locale, decimal_quantization=False) + proper = format_decimal(parsed, locale=locale, decimal_quantization=False, numbering_system=numbering_system) if string != proper and string.rstrip('0') != (proper + decimal_symbol): try: parsed_alt = decimal.Decimal(string.replace(decimal_symbol, '') @@ -955,7 +1110,12 @@ def parse_decimal(string: str, locale: Locale | str | None = LC_NUMERIC, strict: suggestions=[proper], ) from exc else: - proper_alt = format_decimal(parsed_alt, locale=locale, decimal_quantization=False) + proper_alt = format_decimal( + parsed_alt, + locale=locale, + decimal_quantization=False, + numbering_system=numbering_system + ) if proper_alt == proper: raise NumberFormatError( f"{string!r} is not a properly formatted decimal number. " @@ -1111,7 +1271,12 @@ def compute_scale(self) -> Literal[0, 2, 3]: scale = 3 return scale - def scientific_notation_elements(self, value: decimal.Decimal, locale: Locale | str | None) -> tuple[decimal.Decimal, int, str]: + def scientific_notation_elements( + self, + value: decimal.Decimal, + locale: Locale | str | None, + numbering_system: str + ) -> tuple[decimal.Decimal, int, str]: """ Returns normalized scientific notation components of a value. """ # Normalize value to only have one lead digit. @@ -1129,9 +1294,9 @@ def scientific_notation_elements(self, value: decimal.Decimal, locale: Locale | # Get exponent sign symbol. exp_sign = '' if exp < 0: - exp_sign = get_minus_sign_symbol(locale) + exp_sign = get_minus_sign_symbol(locale, numbering_system=numbering_system) elif self.exp_plus: - exp_sign = get_plus_sign_symbol(locale) + exp_sign = get_plus_sign_symbol(locale, numbering_system=numbering_system) # Normalize exponent value now that we have the sign. exp = abs(exp) @@ -1147,6 +1312,8 @@ def apply( decimal_quantization: bool = True, force_frac: tuple[int, int] | None = None, group_separator: bool = True, + *, + numbering_system: Literal["default"] | str = "latn" ): """Renders into a string a number following the defined pattern. @@ -1170,8 +1337,11 @@ def apply( :type decimal_quantization: bool :param force_frac: DEPRECATED - a forced override for `self.frac_prec` for a single formatting invocation. + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn", special + value "default" will use the default numbering system of the locale. :return: Formatted decimal string. :rtype: str + :raise UnsupportedNumberingSystemError: If the numbering system is not supported by the locale. """ if not isinstance(value, decimal.Decimal): value = decimal.Decimal(str(value)) @@ -1184,7 +1354,7 @@ def apply( # Prepare scientific notation metadata. if self.exp_prec: - value, exp, exp_sign = self.scientific_notation_elements(value, locale) + value, exp, exp_sign = self.scientific_notation_elements(value, locale, numbering_system=numbering_system) # Adjust the precision of the fractional part and force it to the # currency's if necessary. @@ -1213,10 +1383,10 @@ def apply( # Render scientific notation. if self.exp_prec: number = ''.join([ - self._quantize_value(value, locale, frac_prec, group_separator), - get_exponential_symbol(locale), + self._quantize_value(value, locale, frac_prec, group_separator, numbering_system=numbering_system), + get_exponential_symbol(locale, numbering_system=numbering_system), exp_sign, # type: ignore # exp_sign is always defined here - self._format_int(str(exp), self.exp_prec[0], self.exp_prec[1], locale), # type: ignore # exp is always defined here + self._format_int(str(exp), self.exp_prec[0], self.exp_prec[1], locale, numbering_system=numbering_system), # type: ignore # exp is always defined here ]) # Is it a significant digits pattern? @@ -1225,13 +1395,13 @@ def apply( self.int_prec[0], self.int_prec[1]) a, sep, b = text.partition(".") - number = self._format_int(a, 0, 1000, locale) + number = self._format_int(a, 0, 1000, locale, numbering_system=numbering_system) if sep: - number += get_decimal_symbol(locale) + b + number += get_decimal_symbol(locale, numbering_system=numbering_system) + b # A normal number pattern. else: - number = self._quantize_value(value, locale, frac_prec, group_separator) + number = self._quantize_value(value, locale, frac_prec, group_separator, numbering_system=numbering_system) retval = ''.join([ self.prefix[is_negative], @@ -1288,33 +1458,45 @@ def _format_significant(self, value: decimal.Decimal, minimum: int, maximum: int ).rstrip('.') return result - def _format_int(self, value: str, min: int, max: int, locale: Locale | str | None) -> str: + def _format_int(self, value: str, min: int, max: int, locale: Locale | str | None, numbering_system: str) -> str: width = len(value) if width < min: value = '0' * (min - width) + value gsize = self.grouping[0] ret = '' - symbol = get_group_symbol(locale) + symbol = get_group_symbol(locale, numbering_system=numbering_system) while len(value) > gsize: ret = symbol + value[-gsize:] + ret value = value[:-gsize] gsize = self.grouping[1] return value + ret - def _quantize_value(self, value: decimal.Decimal, locale: Locale | str | None, frac_prec: tuple[int, int], group_separator: bool) -> str: + def _quantize_value( + self, + value: decimal.Decimal, + locale: Locale | str | None, + frac_prec: tuple[int, int], group_separator: bool, + numbering_system: str, + ) -> str: # If the number is +/-Infinity, we can't quantize it if value.is_infinite(): - return get_infinity_symbol(locale) + return get_infinity_symbol(locale, numbering_system=numbering_system) quantum = get_decimal_quantum(frac_prec[1]) rounded = value.quantize(quantum) a, sep, b = f"{rounded:f}".partition(".") integer_part = a if group_separator: - integer_part = self._format_int(a, self.int_prec[0], self.int_prec[1], locale) - number = integer_part + self._format_frac(b or '0', locale, frac_prec) + integer_part = self._format_int(a, self.int_prec[0], self.int_prec[1], locale, numbering_system=numbering_system) + number = integer_part + self._format_frac(b or '0', locale=locale, force_frac=frac_prec, numbering_system=numbering_system) return number - def _format_frac(self, value: str, locale: Locale | str | None, force_frac: tuple[int, int] | None = None) -> str: + def _format_frac( + self, + value: str, + numbering_system: str, + locale: Locale | str | None, + force_frac: tuple[int, int] | None = None, + ) -> str: min, max = force_frac or self.frac_prec if len(value) < min: value += ('0' * (min - len(value))) @@ -1322,4 +1504,4 @@ def _format_frac(self, value: str, locale: Locale | str | None, force_frac: tupl return '' while len(value) > min and value[-1] == '0': value = value[:-1] - return get_decimal_symbol(locale) + value + return get_decimal_symbol(locale, numbering_system=numbering_system) + value diff --git a/babel/support.py b/babel/support.py index 8d03934a3..dd5c996f3 100644 --- a/babel/support.py +++ b/babel/support.py @@ -49,14 +49,23 @@ class Format: u'1.234' """ - def __init__(self, locale: Locale | str, tzinfo: datetime.tzinfo | None = None) -> None: + def __init__( + self, + locale: Locale | str, + tzinfo: datetime.tzinfo | None = None, + numbering_system: Literal["default"] | str = "latn", + ) -> None: """Initialize the formatter. :param locale: the locale identifier or `Locale` instance :param tzinfo: the time-zone info (a `tzinfo` instance or `None`) + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. """ self.locale = Locale.parse(locale) self.tzinfo = tzinfo + self.numbering_system = numbering_system def date( self, @@ -129,7 +138,7 @@ def number(self, number: float | decimal.Decimal | str) -> str: >>> fmt.number(1099) u'1,099' """ - return format_decimal(number, locale=self.locale) + return format_decimal(number, locale=self.locale, numbering_system=self.numbering_system) def decimal(self, number: float | decimal.Decimal | str, format: str | None = None) -> str: """Return a decimal number formatted for the locale. @@ -138,7 +147,7 @@ def decimal(self, number: float | decimal.Decimal | str, format: str | None = No >>> fmt.decimal(1.2345) u'1.234' """ - return format_decimal(number, format, locale=self.locale) + return format_decimal(number, format, locale=self.locale, numbering_system=self.numbering_system) def compact_decimal( self, @@ -154,14 +163,18 @@ def compact_decimal( >>> fmt.compact_decimal(1234567, format_type='long', fraction_digits=2) '1.23 million' """ - return format_compact_decimal(number, format_type=format_type, - fraction_digits=fraction_digits, - locale=self.locale) + return format_compact_decimal( + number, + format_type=format_type, + fraction_digits=fraction_digits, + locale=self.locale, + numbering_system=self.numbering_system, + ) def currency(self, number: float | decimal.Decimal | str, currency: str) -> str: """Return a number in the given currency formatted for the locale. """ - return format_currency(number, currency, locale=self.locale) + return format_currency(number, currency, locale=self.locale, numbering_system=self.numbering_system) def compact_currency( self, @@ -176,8 +189,8 @@ def compact_currency( >>> Format('en_US').compact_currency(1234567, "USD", format_type='short', fraction_digits=2) '$1.23M' """ - return format_compact_currency(number, currency, format_type=format_type, - fraction_digits=fraction_digits, locale=self.locale) + return format_compact_currency(number, currency, format_type=format_type, fraction_digits=fraction_digits, + locale=self.locale, numbering_system=self.numbering_system) def percent(self, number: float | decimal.Decimal | str, format: str | None = None) -> str: """Return a number formatted as percentage for the locale. @@ -186,12 +199,12 @@ def percent(self, number: float | decimal.Decimal | str, format: str | None = No >>> fmt.percent(0.34) u'34%' """ - return format_percent(number, format, locale=self.locale) + return format_percent(number, format, locale=self.locale, numbering_system=self.numbering_system) def scientific(self, number: float | decimal.Decimal | str) -> str: """Return a number formatted using scientific notation for the locale. """ - return format_scientific(number, locale=self.locale) + return format_scientific(number, locale=self.locale, numbering_system=self.numbering_system) class LazyProxy: diff --git a/babel/units.py b/babel/units.py index 465d2263d..67f09299a 100644 --- a/babel/units.py +++ b/babel/units.py @@ -81,6 +81,8 @@ def format_unit( length: Literal['short', 'long', 'narrow'] = 'long', format: str | None = None, locale: Locale | str | None = LC_NUMERIC, + *, + numbering_system: Literal["default"] | str = "latn" ) -> str: """Format a value of a given unit. @@ -95,6 +97,8 @@ def format_unit( u'1\\xa0200 millimeter kvikks\\xf8lv' >>> format_unit(270, 'ton', locale='en') u'270 tons' + >>> format_unit(1234.5, 'kilogram', locale='ar_EG', numbering_system='default') + u'1٬234٫5 كيلوغرام' Number formats may be overridden with the ``format`` parameter. @@ -127,6 +131,9 @@ def format_unit( :param length: "short", "long" or "narrow" :param format: An optional format, as accepted by `format_decimal`. :param locale: the `Locale` object or locale identifier + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. + :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. """ locale = Locale.parse(locale) @@ -139,7 +146,7 @@ def format_unit( formatted_value = value plural_form = "one" else: - formatted_value = format_decimal(value, format, locale) + formatted_value = format_decimal(value, format, locale, numbering_system=numbering_system) plural_form = locale.plural_form(value) if plural_form in unit_patterns: @@ -211,6 +218,8 @@ def format_compound_unit( length: Literal["short", "long", "narrow"] = "long", format: str | None = None, locale: Locale | str | None = LC_NUMERIC, + *, + numbering_system: Literal["default"] | str = "latn" ) -> str | None: """ Format a compound number value, i.e. "kilometers per hour" or similar. @@ -230,6 +239,9 @@ def format_compound_unit( >>> format_compound_unit(32.5, "ton", 15, denominator_unit="hour", locale="en") '32.5 tons per 15 hours' + >>> format_compound_unit(1234.5, "ton", 15, denominator_unit="hour", locale="ar_EG", numbering_system="default") + '1٬234٫5 طن لكل 15 ساعة' + >>> format_compound_unit(160, denominator_unit="square-meter", locale="fr") '160 par m\\xe8tre carr\\xe9' @@ -254,7 +266,10 @@ def format_compound_unit( :param length: The formatting length. "short", "long" or "narrow" :param format: An optional format, as accepted by `format_decimal`. :param locale: the `Locale` object or locale identifier + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. :return: A formatted compound value. + :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. """ locale = Locale.parse(locale) @@ -263,7 +278,14 @@ def format_compound_unit( if numerator_unit and denominator_unit and denominator_value == 1: compound_unit = _find_compound_unit(numerator_unit, denominator_unit, locale=locale) if compound_unit: - return format_unit(numerator_value, compound_unit, length=length, format=format, locale=locale) + return format_unit( + numerator_value, + compound_unit, + length=length, + format=format, + locale=locale, + numbering_system=numbering_system, + ) # ... failing that, construct one "by hand". @@ -271,10 +293,20 @@ def format_compound_unit( formatted_numerator = numerator_value elif numerator_unit: # Numerator has unit formatted_numerator = format_unit( - numerator_value, numerator_unit, length=length, format=format, locale=locale, + numerator_value, + numerator_unit, + length=length, + format=format, + locale=locale, + numbering_system=numbering_system, ) else: # Unitless numerator - formatted_numerator = format_decimal(numerator_value, format=format, locale=locale) + formatted_numerator = format_decimal( + numerator_value, + format=format, + locale=locale, + numbering_system=numbering_system, + ) if isinstance(denominator_value, str): # Denominator is preformatted formatted_denominator = denominator_value @@ -295,9 +327,15 @@ def format_compound_unit( length=length, format=format, locale=locale, + numbering_system=numbering_system, ).strip() else: # Bare denominator - formatted_denominator = format_decimal(denominator_value, format=format, locale=locale) + formatted_denominator = format_decimal( + denominator_value, + format=format, + locale=locale, + numbering_system=numbering_system + ) # TODO: this doesn't support "compound_variations" (or "prefix"), and will fall back to the "x/y" representation per_pattern = locale._data["compound_unit_patterns"].get("per", {}).get(length, {}).get("compound", "{0}/{1}") diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index 15d533980..1ff39a8d9 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -432,6 +432,7 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False): parse_interval_formats(data, calendar) parse_number_symbols(data, tree) + parse_numbering_systems(data, tree) parse_decimal_formats(data, tree) parse_scientific_formats(data, tree) parse_percent_formats(data, tree) @@ -751,14 +752,27 @@ def parse_calendar_datetime_skeletons(data, calendar): def parse_number_symbols(data, tree): number_symbols = data.setdefault('number_symbols', {}) - for symbol_elem in tree.findall('.//numbers/symbols'): - if _should_skip_number_elem(data, symbol_elem): # TODO: Support other number systems + for symbol_system_elem in tree.findall('.//numbers/symbols'): + number_system = symbol_system_elem.get('numberSystem') + if not number_system: continue - for elem in symbol_elem.findall('./*'): - if _should_skip_elem(elem): + for symbol_element in symbol_system_elem.findall('./*'): + if _should_skip_elem(symbol_element): continue - number_symbols[elem.tag] = str(elem.text) + + number_symbols.setdefault(number_system, {})[symbol_element.tag] = str(symbol_element.text) + + +def parse_numbering_systems(data, tree): + default_number_system_node = tree.find('.//numbers/defaultNumberingSystem') + if default_number_system_node is not None: + data['default_numbering_system'] = default_number_system_node.text + + numbering_systems = data.setdefault('numbering_systems', {}) + other_numbering_systems_node = tree.find('.//numbers/otherNumberingSystems') or [] + for system in other_numbering_systems_node: + numbering_systems[system.tag] = system.text def parse_decimal_formats(data, tree): diff --git a/tests/test_core.py b/tests/test_core.py index aa370131d..2e7846214 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -19,7 +19,7 @@ def test_locale_provides_access_to_cldr_locale_data(): locale = Locale('en', 'US') assert locale.display_name == 'English (United States)' - assert locale.number_symbols['decimal'] == '.' + assert locale.number_symbols["latn"]['decimal'] == '.' def test_locale_repr(): @@ -162,7 +162,25 @@ def test_currency_symbols_property(self): assert Locale('es', 'CO').currency_symbols['USD'] == 'US$' def test_number_symbols_property(self): - assert Locale('fr', 'FR').number_symbols['decimal'] == ',' + assert Locale('fr', 'FR').number_symbols["latn"]['decimal'] == ',' + assert Locale('ar', 'IL').number_symbols["arab"]['percentSign'] == '٪\u061c' + assert Locale('ar', 'IL').number_symbols["latn"]['percentSign'] == '\u200e%\u200e' + + def test_other_numbering_systems_property(self): + assert Locale('fr', 'FR').other_numbering_systems['native'] == 'latn' + assert 'traditional' not in Locale('fr', 'FR').other_numbering_systems + + assert Locale('el', 'GR').other_numbering_systems['native'] == 'latn' + assert Locale('el', 'GR').other_numbering_systems['traditional'] == 'grek' + + def test_default_numbering_systems_property(self): + assert Locale('en', 'GB').default_numbering_system == 'latn' + assert Locale('ar', 'EG').default_numbering_system == 'arab' + + @pytest.mark.all_locales + def test_all_locales_have_default_numbering_system(self, locale): + locale = Locale.parse(locale) + assert locale.default_numbering_system def test_decimal_formats(self): assert Locale('en', 'US').decimal_formats[None].pattern == '#,##0.###' diff --git a/tests/test_numbers.py b/tests/test_numbers.py index 036780412..1eb001bcc 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -176,6 +176,12 @@ def test_compact(self): assert numbers.format_compact_decimal(1234, locale='it', format_type='long') == '1 mila' assert numbers.format_compact_decimal(1000, locale='fr', format_type='long') == 'mille' assert numbers.format_compact_decimal(1234, locale='fr', format_type='long') == '1 millier' + assert numbers.format_compact_decimal( + 12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='default' + ) == '12٫34\xa0ألف' + assert numbers.format_compact_decimal( + 12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='latn' + ) == '12.34\xa0ألف' class NumberParsingTestCase(unittest.TestCase): @@ -183,8 +189,12 @@ class NumberParsingTestCase(unittest.TestCase): def test_can_parse_decimals(self): assert decimal.Decimal('1099.98') == numbers.parse_decimal('1,099.98', locale='en_US') assert decimal.Decimal('1099.98') == numbers.parse_decimal('1.099,98', locale='de') + assert decimal.Decimal('1099.98') == numbers.parse_decimal('1٬099٫98', locale='ar', numbering_system="default") with pytest.raises(numbers.NumberFormatError): numbers.parse_decimal('2,109,998', locale='de') + with pytest.raises(numbers.UnsupportedNumberingSystemError): + numbers.parse_decimal('2,109,998', locale='de', numbering_system="unknown") + def test_parse_decimal_strict_mode(self): # Numbers with a misplaced grouping symbol should be rejected @@ -317,23 +327,62 @@ def test_get_territory_currencies(): def test_get_decimal_symbol(): assert numbers.get_decimal_symbol('en_US') == '.' + assert numbers.get_decimal_symbol('en_US', numbering_system="default") == '.' + assert numbers.get_decimal_symbol('en_US', numbering_system="latn") == '.' + assert numbers.get_decimal_symbol('sv_SE') == ',' + assert numbers.get_decimal_symbol('ar_EG') == '.' + assert numbers.get_decimal_symbol('ar_EG', numbering_system="default") == '٫' + assert numbers.get_decimal_symbol('ar_EG', numbering_system="latn") == '.' + assert numbers.get_decimal_symbol('ar_EG', numbering_system="arab") == '٫' def test_get_plus_sign_symbol(): assert numbers.get_plus_sign_symbol('en_US') == '+' + assert numbers.get_plus_sign_symbol('en_US', numbering_system="default") == '+' + assert numbers.get_plus_sign_symbol('en_US', numbering_system="latn") == '+' + assert numbers.get_plus_sign_symbol('ar_EG') == '\u200e+' + assert numbers.get_plus_sign_symbol('ar_EG', numbering_system="default") == '\u061c+' + assert numbers.get_plus_sign_symbol('ar_EG', numbering_system="arab") == '\u061c+' + assert numbers.get_plus_sign_symbol('ar_EG', numbering_system="latn") == '\u200e+' def test_get_minus_sign_symbol(): assert numbers.get_minus_sign_symbol('en_US') == '-' + assert numbers.get_minus_sign_symbol('en_US', numbering_system="default") == '-' + assert numbers.get_minus_sign_symbol('en_US', numbering_system="latn") == '-' assert numbers.get_minus_sign_symbol('nl_NL') == '-' + assert numbers.get_minus_sign_symbol('ar_EG') == '\u200e-' + assert numbers.get_minus_sign_symbol('ar_EG', numbering_system="default") == '\u061c-' + assert numbers.get_minus_sign_symbol('ar_EG', numbering_system="arab") == '\u061c-' + assert numbers.get_minus_sign_symbol('ar_EG', numbering_system="latn") == '\u200e-' def test_get_exponential_symbol(): assert numbers.get_exponential_symbol('en_US') == 'E' + assert numbers.get_exponential_symbol('en_US', numbering_system="latn") == 'E' + assert numbers.get_exponential_symbol('en_US', numbering_system="default") == 'E' + assert numbers.get_exponential_symbol('ja_JP') == 'E' + assert numbers.get_exponential_symbol('ar_EG') == 'E' + assert numbers.get_exponential_symbol('ar_EG', numbering_system="default") == 'اس' + assert numbers.get_exponential_symbol('ar_EG', numbering_system="arab") == 'اس' + assert numbers.get_exponential_symbol('ar_EG', numbering_system="latn") == 'E' def test_get_group_symbol(): assert numbers.get_group_symbol('en_US') == ',' + assert numbers.get_group_symbol('en_US', numbering_system="latn") == ',' + assert numbers.get_group_symbol('en_US', numbering_system="default") == ',' + assert numbers.get_group_symbol('ar_EG') == ',' + assert numbers.get_group_symbol('ar_EG', numbering_system="default") == '٬' + assert numbers.get_group_symbol('ar_EG', numbering_system="arab") == '٬' + assert numbers.get_group_symbol('ar_EG', numbering_system="latn") == ',' + + +def test_get_infinity_symbol(): + assert numbers.get_infinity_symbol('en_US') == '∞' + assert numbers.get_infinity_symbol('ar_EG', numbering_system="latn") == '∞' + assert numbers.get_infinity_symbol('ar_EG', numbering_system="default") == '∞' + assert numbers.get_infinity_symbol('ar_EG', numbering_system="arab") == '∞' def test_decimal_precision(): @@ -356,6 +405,15 @@ def test_format_decimal(): assert numbers.format_decimal(0000000.5, locale='en_US') == '0.5' assert numbers.format_decimal(000, locale='en_US') == '0' + assert numbers.format_decimal(12345.5, locale='ar_EG') == '12,345.5' + assert numbers.format_decimal(12345.5, locale='ar_EG', numbering_system="default") == '12٬345٫5' + assert numbers.format_decimal(12345.5, locale='ar_EG', numbering_system="arab") == '12٬345٫5' + + with pytest.raises(numbers.UnsupportedNumberingSystemError): + numbers.format_decimal(12345.5, locale='en_US', numbering_system="unknown") + + + @pytest.mark.parametrize('input_value, expected_value', [ ('10000', '10,000'), @@ -395,12 +453,16 @@ def test_format_decimal_quantization(): def test_format_currency(): assert (numbers.format_currency(1099.98, 'USD', locale='en_US') == '$1,099.98') + assert (numbers.format_currency(1099.98, 'USD', locale='en_US', numbering_system="default") + == '$1,099.98') assert (numbers.format_currency(0, 'USD', locale='en_US') == '$0.00') assert (numbers.format_currency(1099.98, 'USD', locale='es_CO') == 'US$1.099,98') assert (numbers.format_currency(1099.98, 'EUR', locale='de_DE') == '1.099,98\xa0\u20ac') + assert (numbers.format_currency(1099.98, 'USD', locale='ar_EG', numbering_system="default") + == '\u200f1٬099٫98\xa0US$') assert (numbers.format_currency(1099.98, 'EUR', '\xa4\xa4 #,##0.00', locale='en_US') == 'EUR 1,099.98') @@ -454,6 +516,7 @@ def test_format_compact_currency(): assert numbers.format_compact_currency(999, 'USD', locale='en_US', format_type="short") == '$999' assert numbers.format_compact_currency(123456789, 'USD', locale='en_US', format_type="short") == '$123M' assert numbers.format_compact_currency(123456789, 'USD', locale='en_US', fraction_digits=2, format_type="short") == '$123.46M' + assert numbers.format_compact_currency(123456789, 'USD', locale='en_US', fraction_digits=2, format_type="short", numbering_system="default") == '$123.46M' assert numbers.format_compact_currency(-123456789, 'USD', locale='en_US', fraction_digits=2, format_type="short") == '-$123.46M' assert numbers.format_compact_currency(1, 'JPY', locale='ja_JP', format_type="short") == '¥1' assert numbers.format_compact_currency(1234, 'JPY', locale='ja_JP', format_type="short") == '¥1234' @@ -462,6 +525,7 @@ def test_format_compact_currency(): assert numbers.format_compact_currency(123, 'EUR', locale='yav', format_type="short") == '€\xa0123' assert numbers.format_compact_currency(12345, 'EUR', locale='yav', format_type="short") == '€\xa012K' assert numbers.format_compact_currency(123456789, 'EUR', locale='de_DE', fraction_digits=1) == '123,5\xa0Mio.\xa0€' + assert numbers.format_compact_currency(123456789, 'USD', locale='ar_EG', fraction_digits=2, format_type="short", numbering_system="default") == '123٫46\xa0مليون\xa0US$' def test_format_compact_currency_invalid_format_type(): @@ -511,6 +575,10 @@ def test_format_currency_quantization(): def test_format_currency_long_display_name(): assert (numbers.format_currency(1099.98, 'USD', locale='en_US', format_type='name') == '1,099.98 US dollars') + assert (numbers.format_currency(1099.98, 'USD', locale='en_US', format_type='name', numbering_system="default") + == '1,099.98 US dollars') + assert (numbers.format_currency(1099.98, 'USD', locale='ar_EG', format_type='name', numbering_system="default") + == '1٬099٫98 دولار أمريكي') assert (numbers.format_currency(1.00, 'USD', locale='en_US', format_type='name') == '1.00 US dollar') assert (numbers.format_currency(1.00, 'EUR', locale='en_US', format_type='name') @@ -556,6 +624,7 @@ def test_format_currency_long_display_name_custom_format(): def test_format_percent(): assert numbers.format_percent(0.34, locale='en_US') == '34%' + assert numbers.format_percent(0.34, locale='en_US', numbering_system="default") == '34%' assert numbers.format_percent(0, locale='en_US') == '0%' assert numbers.format_percent(0.34, '##0%', locale='en_US') == '34%' assert numbers.format_percent(34, '##0', locale='en_US') == '34' @@ -564,6 +633,8 @@ def test_format_percent(): == '2\xa0512\xa0%') assert (numbers.format_percent(25.1234, '#,##0\u2030', locale='en_US') == '25,123\u2030') + assert numbers.format_percent(134.5, locale='ar_EG', numbering_system="default") == '13٬450%' + @pytest.mark.parametrize('input_value, expected_value', [ @@ -602,6 +673,7 @@ def test_format_percent_quantization(): def test_format_scientific(): assert numbers.format_scientific(10000, locale='en_US') == '1E4' + assert numbers.format_scientific(10000, locale='en_US', numbering_system="default") == '1E4' assert numbers.format_scientific(4234567, '#.#E0', locale='en_US') == '4.2E6' assert numbers.format_scientific(4234567, '0E0000', locale='en_US') == '4.234567E0006' assert numbers.format_scientific(4234567, '##0E00', locale='en_US') == '4.234567E06' @@ -610,6 +682,7 @@ def test_format_scientific(): assert numbers.format_scientific(4234567, '##0.#####E00', locale='en_US') == '4.23457E06' assert numbers.format_scientific(4234567, '##0.##E00', locale='en_US') == '4.23E06' assert numbers.format_scientific(42, '00000.000000E0000', locale='en_US') == '42000.000000E-0003' + assert numbers.format_scientific(0.2, locale="ar_EG", numbering_system="default") == '2اس\u061c-1' def test_default_scientific_format(): @@ -660,11 +733,15 @@ def test_format_scientific_quantization(): def test_parse_number(): assert numbers.parse_number('1,099', locale='en_US') == 1099 assert numbers.parse_number('1.099', locale='de_DE') == 1099 + assert numbers.parse_number('1٬099', locale='ar_EG', numbering_system="default") == 1099 with pytest.raises(numbers.NumberFormatError) as excinfo: numbers.parse_number('1.099,98', locale='de') assert excinfo.value.args[0] == "'1.099,98' is not a valid number" + with pytest.raises(numbers.UnsupportedNumberingSystemError): + numbers.parse_number('1.099,98', locale='en', numbering_system="unsupported") + def test_parse_decimal(): assert (numbers.parse_decimal('1,099.98', locale='en_US') diff --git a/tests/test_smoke.py b/tests/test_smoke.py index c91684534..c36151e7e 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -43,9 +43,13 @@ def test_smoke_numbers(locale): locale = Locale.parse(locale) for number in NUMBERS: assert numbers.format_decimal(number, locale=locale) + assert numbers.format_decimal(number, locale=locale, numbering_system="default") assert numbers.format_currency(number, "EUR", locale=locale) + assert numbers.format_currency(number, "EUR", locale=locale, numbering_system="default") assert numbers.format_scientific(number, locale=locale) + assert numbers.format_scientific(number, locale=locale, numbering_system="default") assert numbers.format_percent(number / 100, locale=locale) + assert numbers.format_percent(number / 100, locale=locale, numbering_system="default") @pytest.mark.all_locales @@ -54,3 +58,4 @@ def test_smoke_units(locale): for unit in ('length-meter', 'mass-kilogram', 'energy-calorie', 'volume-liter'): for number in NUMBERS: assert units.format_unit(number, measurement_unit=unit, locale=locale) + assert units.format_unit(number, measurement_unit=unit, locale=locale, numbering_system="default") diff --git a/tests/test_support.py b/tests/test_support.py index 92188a4cb..d0d1ac223 100644 --- a/tests/test_support.py +++ b/tests/test_support.py @@ -17,6 +17,7 @@ import sys import tempfile import unittest +from decimal import Decimal from io import BytesIO import pytest @@ -296,16 +297,55 @@ def raise_attribute_error(): assert str(exception.value) == 'message' -WHEN = datetime.datetime(2007, 4, 1, 15, 30) - -def test_format_datetime(timezone_getter): - fmt = support.Format('en_US', tzinfo=timezone_getter('US/Eastern')) - assert fmt.datetime(WHEN) == 'Apr 1, 2007, 11:30:00\u202fAM' - - -def test_format_time(timezone_getter): - fmt = support.Format('en_US', tzinfo=timezone_getter('US/Eastern')) - assert fmt.time(WHEN) == '11:30:00\u202fAM' +class TestFormat: + def test_format_datetime(self, timezone_getter): + when = datetime.datetime(2007, 4, 1, 15, 30) + fmt = support.Format('en_US', tzinfo=timezone_getter('US/Eastern')) + assert fmt.datetime(when) == 'Apr 1, 2007, 11:30:00\u202fAM' + + def test_format_time(self, timezone_getter): + when = datetime.datetime(2007, 4, 1, 15, 30) + fmt = support.Format('en_US', tzinfo=timezone_getter('US/Eastern')) + assert fmt.time(when) == '11:30:00\u202fAM' + + def test_format_number(self): + assert support.Format('en_US').number(1234) == '1,234' + assert support.Format('ar_EG', numbering_system="default").number(1234) == '1٬234' + + def test_format_decimal(self): + assert support.Format('en_US').decimal(1234.5) == '1,234.5' + assert support.Format('en_US').decimal(Decimal("1234.5")) == '1,234.5' + assert support.Format('ar_EG', numbering_system="default").decimal(1234.5) == '1٬234٫5' + assert support.Format('ar_EG', numbering_system="default").decimal(Decimal("1234.5")) == '1٬234٫5' + + def test_format_compact_decimal(self): + assert support.Format('en_US').compact_decimal(1234) == '1K' + assert support.Format('ar_EG', numbering_system="default").compact_decimal( + 1234, fraction_digits=1) == '1٫2\xa0ألف' + assert support.Format('ar_EG', numbering_system="default").compact_decimal( + Decimal("1234"), fraction_digits=1) == '1٫2\xa0ألف' + + def test_format_currency(self): + assert support.Format('en_US').currency(1099.98, 'USD') == '$1,099.98' + assert support.Format('en_US').currency(Decimal("1099.98"), 'USD') == '$1,099.98' + assert support.Format('ar_EG', numbering_system="default").currency( + 1099.98, 'EGP') == '\u200f1٬099٫98\xa0ج.م.\u200f' + + def test_format_compact_currency(self): + assert support.Format('en_US').compact_currency(1099.98, 'USD') == '$1K' + assert support.Format('en_US').compact_currency(Decimal("1099.98"), 'USD') == '$1K' + assert support.Format('ar_EG', numbering_system="default").compact_currency( + 1099.98, 'EGP') == '1\xa0ألف\xa0ج.م.\u200f' + + def test_format_percent(self): + assert support.Format('en_US').percent(0.34) == '34%' + assert support.Format('en_US').percent(Decimal("0.34")) == '34%' + assert support.Format('ar_EG', numbering_system="default").percent(134.5) == '13٬450%' + + def test_format_scientific(self): + assert support.Format('en_US').scientific(10000) == '1E4' + assert support.Format('en_US').scientific(Decimal("10000")) == '1E4' + assert support.Format('ar_EG', numbering_system="default").scientific(10000) == '1اس4' def test_lazy_proxy(): From 648bddba350097348e9c739edcf07e4e2051bae9 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 28 Nov 2023 10:38:12 +0200 Subject: [PATCH 2/3] Apply ruff autofixes --- babel/numbers.py | 16 ++++++++-------- babel/units.py | 6 +++--- tests/test_numbers.py | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/babel/numbers.py b/babel/numbers.py index a9c19510d..89cf60e15 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -327,7 +327,7 @@ def _get_numbering_system(locale: Locale, numbering_system: Literal["default"] | def _get_number_symbols( locale: Locale | str, *, - numbering_system: Literal["default"] | str = "latn" + numbering_system: Literal["default"] | str = "latn", ) -> LocaleDataDict: parsed_locale = Locale.parse(locale) numbering_system = _get_numbering_system(parsed_locale, numbering_system) @@ -798,7 +798,7 @@ def _format_currency_long_name( decimal_quantization: bool = True, group_separator: bool = True, *, - numbering_system: Literal["default"] | str = "latn" + numbering_system: Literal["default"] | str = "latn", ) -> str: # Algorithm described here: # https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies @@ -889,7 +889,7 @@ def format_percent( decimal_quantization: bool = True, group_separator: bool = True, *, - numbering_system: Literal["default"] | str = "latn" + numbering_system: Literal["default"] | str = "latn", ) -> str: """Return formatted percent value for a specific locale. @@ -939,7 +939,7 @@ def format_percent( pattern = parse_pattern(format) return pattern.apply( number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator, - numbering_system=numbering_system + numbering_system=numbering_system, ) @@ -949,7 +949,7 @@ def format_scientific( locale: Locale | str | None = LC_NUMERIC, decimal_quantization: bool = True, *, - numbering_system: Literal["default"] | str = "latn" + numbering_system: Literal["default"] | str = "latn", ) -> str: """Return value formatted in scientific notation for a specific locale. @@ -1114,7 +1114,7 @@ def parse_decimal( parsed_alt, locale=locale, decimal_quantization=False, - numbering_system=numbering_system + numbering_system=numbering_system, ) if proper_alt == proper: raise NumberFormatError( @@ -1275,7 +1275,7 @@ def scientific_notation_elements( self, value: decimal.Decimal, locale: Locale | str | None, - numbering_system: str + numbering_system: str, ) -> tuple[decimal.Decimal, int, str]: """ Returns normalized scientific notation components of a value. """ @@ -1313,7 +1313,7 @@ def apply( force_frac: tuple[int, int] | None = None, group_separator: bool = True, *, - numbering_system: Literal["default"] | str = "latn" + numbering_system: Literal["default"] | str = "latn", ): """Renders into a string a number following the defined pattern. diff --git a/babel/units.py b/babel/units.py index 67f09299a..f676bbdc2 100644 --- a/babel/units.py +++ b/babel/units.py @@ -82,7 +82,7 @@ def format_unit( format: str | None = None, locale: Locale | str | None = LC_NUMERIC, *, - numbering_system: Literal["default"] | str = "latn" + numbering_system: Literal["default"] | str = "latn", ) -> str: """Format a value of a given unit. @@ -219,7 +219,7 @@ def format_compound_unit( format: str | None = None, locale: Locale | str | None = LC_NUMERIC, *, - numbering_system: Literal["default"] | str = "latn" + numbering_system: Literal["default"] | str = "latn", ) -> str | None: """ Format a compound number value, i.e. "kilometers per hour" or similar. @@ -334,7 +334,7 @@ def format_compound_unit( denominator_value, format=format, locale=locale, - numbering_system=numbering_system + numbering_system=numbering_system, ) # TODO: this doesn't support "compound_variations" (or "prefix"), and will fall back to the "x/y" representation diff --git a/tests/test_numbers.py b/tests/test_numbers.py index 1eb001bcc..7e4df672c 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -177,10 +177,10 @@ def test_compact(self): assert numbers.format_compact_decimal(1000, locale='fr', format_type='long') == 'mille' assert numbers.format_compact_decimal(1234, locale='fr', format_type='long') == '1 millier' assert numbers.format_compact_decimal( - 12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='default' + 12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='default', ) == '12٫34\xa0ألف' assert numbers.format_compact_decimal( - 12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='latn' + 12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='latn', ) == '12.34\xa0ألف' From abe6bdafd48339f2770a3186723434b4afe77de8 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 28 Nov 2023 11:08:34 +0200 Subject: [PATCH 3/3] Apply suggestions from code review --- babel/numbers.py | 28 ++++++++++++++++++++-------- babel/support.py | 2 +- babel/units.py | 2 +- tests/test_numbers.py | 4 ---- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/babel/numbers.py b/babel/numbers.py index 89cf60e15..01f1ca70e 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -325,7 +325,7 @@ def _get_numbering_system(locale: Locale, numbering_system: Literal["default"] | def _get_number_symbols( - locale: Locale | str, + locale: Locale | str | None, *, numbering_system: Literal["default"] | str = "latn", ) -> LocaleDataDict: @@ -1275,7 +1275,8 @@ def scientific_notation_elements( self, value: decimal.Decimal, locale: Locale | str | None, - numbering_system: str, + *, + numbering_system: Literal["default"] | str = "latn", ) -> tuple[decimal.Decimal, int, str]: """ Returns normalized scientific notation components of a value. """ @@ -1337,8 +1338,8 @@ def apply( :type decimal_quantization: bool :param force_frac: DEPRECATED - a forced override for `self.frac_prec` for a single formatting invocation. - :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn", special - value "default" will use the default numbering system of the locale. + :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". + The special value "default" will use the default numbering system of the locale. :return: Formatted decimal string. :rtype: str :raise UnsupportedNumberingSystemError: If the numbering system is not supported by the locale. @@ -1458,7 +1459,15 @@ def _format_significant(self, value: decimal.Decimal, minimum: int, maximum: int ).rstrip('.') return result - def _format_int(self, value: str, min: int, max: int, locale: Locale | str | None, numbering_system: str) -> str: + def _format_int( + self, + value: str, + min: int, + max: int, + locale: Locale | str | None, + *, + numbering_system: Literal["default"] | str, + ) -> str: width = len(value) if width < min: value = '0' * (min - width) + value @@ -1475,8 +1484,10 @@ def _quantize_value( self, value: decimal.Decimal, locale: Locale | str | None, - frac_prec: tuple[int, int], group_separator: bool, - numbering_system: str, + frac_prec: tuple[int, int], + group_separator: bool, + *, + numbering_system: Literal["default"] | str, ) -> str: # If the number is +/-Infinity, we can't quantize it if value.is_infinite(): @@ -1493,9 +1504,10 @@ def _quantize_value( def _format_frac( self, value: str, - numbering_system: str, locale: Locale | str | None, force_frac: tuple[int, int] | None = None, + *, + numbering_system: Literal["default"] | str, ) -> str: min, max = force_frac or self.frac_prec if len(value) < min: diff --git a/babel/support.py b/babel/support.py index dd5c996f3..1774d9d85 100644 --- a/babel/support.py +++ b/babel/support.py @@ -53,6 +53,7 @@ def __init__( self, locale: Locale | str, tzinfo: datetime.tzinfo | None = None, + *, numbering_system: Literal["default"] | str = "latn", ) -> None: """Initialize the formatter. @@ -61,7 +62,6 @@ def __init__( :param tzinfo: the time-zone info (a `tzinfo` instance or `None`) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn". The special value "default" will use the default numbering system of the locale. - :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale. """ self.locale = Locale.parse(locale) self.tzinfo = tzinfo diff --git a/babel/units.py b/babel/units.py index f676bbdc2..36206d0c8 100644 --- a/babel/units.py +++ b/babel/units.py @@ -239,7 +239,7 @@ def format_compound_unit( >>> format_compound_unit(32.5, "ton", 15, denominator_unit="hour", locale="en") '32.5 tons per 15 hours' - >>> format_compound_unit(1234.5, "ton", 15, denominator_unit="hour", locale="ar_EG", numbering_system="default") + >>> format_compound_unit(1234.5, "ton", 15, denominator_unit="hour", locale="ar_EG", numbering_system="arab") '1٬234٫5 طن لكل 15 ساعة' >>> format_compound_unit(160, denominator_unit="square-meter", locale="fr") diff --git a/tests/test_numbers.py b/tests/test_numbers.py index 7e4df672c..5d9830bd5 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -412,9 +412,6 @@ def test_format_decimal(): with pytest.raises(numbers.UnsupportedNumberingSystemError): numbers.format_decimal(12345.5, locale='en_US', numbering_system="unknown") - - - @pytest.mark.parametrize('input_value, expected_value', [ ('10000', '10,000'), ('1', '1'), @@ -742,7 +739,6 @@ def test_parse_number(): with pytest.raises(numbers.UnsupportedNumberingSystemError): numbers.parse_number('1.099,98', locale='en', numbering_system="unsupported") - def test_parse_decimal(): assert (numbers.parse_decimal('1,099.98', locale='en_US') == decimal.Decimal('1099.98'))