Skip to content

Commit

Permalink
import_cldr: ignore formatting rules for non-Latin numbering systems
Browse files Browse the repository at this point in the history
Previously the script could have inadvertently merged formatting rules between numbering systems due to the XML selectors used.  This makes sure only Latin rules are used for the time being.  When support for other numbering systems is properly added (see #470), these checks can be changed.
  • Loading branch information
akx committed May 28, 2018
1 parent a72cdf1 commit 77dc9d4
Showing 1 changed file with 101 additions and 56 deletions.
157 changes: 101 additions & 56 deletions scripts/import_cldr.py
Expand Up @@ -389,6 +389,8 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False):
territory != '001' and territory or None
]))

data['locale_id'] = locale_id

if locale_id in plural_rules:
data['plural_form'] = plural_rules[locale_id]
if locale_id in ordinal_rules:
Expand Down Expand Up @@ -430,6 +432,31 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False):
write_datafile(data_filename, data, dump_json=dump_json)


def _should_skip_number_elem(data, elem):
"""
Figure out whether the numbering-containing element `elem` is in a currently
non-supported (i.e. currently non-Latin) numbering system.
If it is, a warning is raised.
:param data: The root data element, for formatting the warning.
:param elem: Element with `numberSystem` key
:return: Boolean
"""
number_system = elem.get('numberSystem', 'latn')

if number_system != 'latn':
log('%s: Unsupported number system "%s" in <%s numberSystem="%s">' % (
data['locale_id'],
number_system,
elem.tag,
number_system,
))
return True

return False


def _should_skip_elem(elem, type=None, dest=None):
"""
Check whether the given element should be skipped.
Expand Down Expand Up @@ -701,59 +728,73 @@ def parse_calendar_datetime_skeletons(data, calendar):

def parse_number_symbols(data, tree):
number_symbols = data.setdefault('number_symbols', {})
for elem in tree.findall('.//numbers/symbols/*'):
if _should_skip_elem(elem):
for symbol_elem in tree.findall('.//numbers/symbols'):
if _should_skip_number_elem(data, symbol_elem): # TODO: Support other number systems
continue
number_symbols[elem.tag] = text_type(elem.text)

for elem in symbol_elem.findall('./*'):
if _should_skip_elem(elem):
continue
number_symbols[elem.tag] = text_type(elem.text)


def parse_decimal_formats(data, tree):
decimal_formats = data.setdefault('decimal_formats', {})
for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
length_type = elem.attrib.get('type')
if _should_skip_elem(elem, length_type, decimal_formats):
for df_elem in tree.findall('.//decimalFormats'):
if _should_skip_number_elem(data, df_elem): # TODO: Support other number systems
continue
if elem.findall('./alias'):
# TODO map the alias to its target
continue
for pattern_el in elem.findall('./decimalFormat/pattern'):
pattern_type = pattern_el.attrib.get('type')
pattern = numbers.parse_pattern(text_type(pattern_el.text))
if pattern_type:
# This is a compact decimal format, see:
# http://www.unicode.org/reports/tr35/tr35-45/tr35-numbers.html#Compact_Number_Formats

# These are mapped into a `compact_decimal_formats` dictionary
# with the format {length: {count: {multiplier: pattern}}}.

# TODO: Add support for formatting them.
compact_decimal_formats = data.setdefault('compact_decimal_formats', {})
length_map = compact_decimal_formats.setdefault(length_type, {})
length_count_map = length_map.setdefault(pattern_el.attrib['count'], {})
length_count_map[pattern_type] = pattern
else:
# Regular decimal format.
decimal_formats[length_type] = pattern
for elem in df_elem.findall('./decimalFormatLength'):
length_type = elem.attrib.get('type')
if _should_skip_elem(elem, length_type, decimal_formats):
continue
if elem.findall('./alias'):
# TODO map the alias to its target
continue
for pattern_el in elem.findall('./decimalFormat/pattern'):
pattern_type = pattern_el.attrib.get('type')
pattern = numbers.parse_pattern(text_type(pattern_el.text))
if pattern_type:
# This is a compact decimal format, see:
# http://www.unicode.org/reports/tr35/tr35-45/tr35-numbers.html#Compact_Number_Formats

# These are mapped into a `compact_decimal_formats` dictionary
# with the format {length: {count: {multiplier: pattern}}}.

# TODO: Add support for formatting them.
compact_decimal_formats = data.setdefault('compact_decimal_formats', {})
length_map = compact_decimal_formats.setdefault(length_type, {})
length_count_map = length_map.setdefault(pattern_el.attrib['count'], {})
length_count_map[pattern_type] = pattern
else:
# Regular decimal format.
decimal_formats[length_type] = pattern


def parse_scientific_formats(data, tree):
scientific_formats = data.setdefault('scientific_formats', {})
for elem in tree.findall('.//scientificFormats/scientificFormatLength'):
type = elem.attrib.get('type')
if _should_skip_elem(elem, type, scientific_formats):
for sf_elem in tree.findall('.//scientificFormats'):
if _should_skip_number_elem(data, sf_elem): # TODO: Support other number systems
continue
pattern = text_type(elem.findtext('scientificFormat/pattern'))
scientific_formats[type] = numbers.parse_pattern(pattern)
for elem in sf_elem.findall('./scientificFormatLength'):
type = elem.attrib.get('type')
if _should_skip_elem(elem, type, scientific_formats):
continue
pattern = text_type(elem.findtext('scientificFormat/pattern'))
scientific_formats[type] = numbers.parse_pattern(pattern)


def parse_percent_formats(data, tree):
percent_formats = data.setdefault('percent_formats', {})
for elem in tree.findall('.//percentFormats/percentFormatLength'):
type = elem.attrib.get('type')
if _should_skip_elem(elem, type, percent_formats):

for pf_elem in tree.findall('.//percentFormats'):
if _should_skip_number_elem(data, pf_elem): # TODO: Support other number systems
continue
pattern = text_type(elem.findtext('percentFormat/pattern'))
percent_formats[type] = numbers.parse_pattern(pattern)
for elem in pf_elem.findall('.//percentFormatLength'):
type = elem.attrib.get('type')
if _should_skip_elem(elem, type, percent_formats):
continue
pattern = text_type(elem.findtext('percentFormat/pattern'))
percent_formats[type] = numbers.parse_pattern(pattern)


def parse_currency_names(data, tree):
Expand Down Expand Up @@ -837,25 +878,29 @@ def parse_interval_formats(data, tree):

def parse_currency_formats(data, tree):
currency_formats = data.setdefault('currency_formats', {})
for length_elem in tree.findall('.//currencyFormats/currencyFormatLength'):
curr_length_type = length_elem.attrib.get('type')
for elem in length_elem.findall('currencyFormat'):
type = elem.attrib.get('type')
if curr_length_type:
# Handle `<currencyFormatLength type="short">`, etc.
# TODO(3.x): use nested dicts instead of colon-separated madness
type = '%s:%s' % (type, curr_length_type)
if _should_skip_elem(elem, type, currency_formats):
continue
for child in elem.getiterator():
if child.tag == 'alias':
currency_formats[type] = Alias(
_translate_alias(['currency_formats', elem.attrib['type']],
child.attrib['path'])
)
elif child.tag == 'pattern':
pattern = text_type(child.text)
currency_formats[type] = numbers.parse_pattern(pattern)
for currency_format in tree.findall('.//currencyFormats'):
if _should_skip_number_elem(data, currency_format): # TODO: Support other number systems
continue

for length_elem in currency_format.findall('./currencyFormatLength'):
curr_length_type = length_elem.attrib.get('type')
for elem in length_elem.findall('currencyFormat'):
type = elem.attrib.get('type')
if curr_length_type:
# Handle `<currencyFormatLength type="short">`, etc.
# TODO(3.x): use nested dicts instead of colon-separated madness
type = '%s:%s' % (type, curr_length_type)
if _should_skip_elem(elem, type, currency_formats):
continue
for child in elem.getiterator():
if child.tag == 'alias':
currency_formats[type] = Alias(
_translate_alias(['currency_formats', elem.attrib['type']],
child.attrib['path'])
)
elif child.tag == 'pattern':
pattern = text_type(child.text)
currency_formats[type] = numbers.parse_pattern(pattern)


def parse_day_period_rules(tree):
Expand Down

0 comments on commit 77dc9d4

Please sign in to comment.