Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to CLDR 45 #1077

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions babel/localedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,27 @@ def locale_identifiers() -> list[str]:
]


def _is_non_likely_script(name: str) -> bool:
"""Return whether the locale is of the form ``lang_Script``,
and the script is not the likely script for the language.

This implements the behavior of the ``nonlikelyScript`` value of the
``localRules`` attribute for parent locales added in CLDR 45.
"""
from babel.core import get_global, parse_locale

try:
lang, territory, script, variant, *rest = parse_locale(name)
except ValueError:
return False

if lang and script and not territory and not variant and not rest:
likely_subtag = get_global('likely_subtags').get(lang)
_, _, likely_script, *_ = parse_locale(likely_subtag)
return script != likely_script
return False


def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str, Any]:
"""Load the locale data for the given locale.

Expand Down Expand Up @@ -132,8 +153,11 @@ def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str
from babel.core import get_global
parent = get_global('parent_exceptions').get(name)
if not parent:
parts = name.split('_')
parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
if _is_non_likely_script(name):
parent = 'root'
Comment on lines +156 to +157
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we have some sort of test case for this code path? I'm not sure which locales might exhibit this, though...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is already covered by the second test case with az_Arab. Do you want me to add another test for _is_non_likely_script as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any news on this @akx ? Do you think it's ok like this?

else:
parts = name.split('_')
parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
data = load(parent).copy()
filename = resolve_locale_filename(name)
with open(filename, 'rb') as fileobj:
Expand Down
8 changes: 4 additions & 4 deletions scripts/download_import_cldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
import zipfile
from urllib.request import urlretrieve

URL = 'https://unicode.org/Public/cldr/44/cldr-common-44.0.zip'
FILENAME = 'cldr-common-44.0.zip'
# Via https://unicode.org/Public/cldr/44/hashes/SHASUM512
FILESUM = 'f2cd8733948caf308d6e39eae21724da7f29f528f8969d456514e1e84ecd5f1e6936d0460414a968888bb1b597bc1ee723950ea47df5cba21a02bb14f96d18b6'
URL = 'https://unicode.org/Public/cldr/45/cldr-common-45.0.zip'
FILENAME = 'cldr-common-45.0.zip'
# Via https://unicode.org/Public/cldr/45/hashes/SHASUM512.txt
FILESUM = '638123882bd29911fc9492ec152926572fec48eb6c1f5dd706aee3e59cad8be4963a334bb7a09a645dbedc3356f60ef7ac2ef7ab4ccf2c8926b547782175603c'
BLKSIZE = 131072


Expand Down
5 changes: 5 additions & 0 deletions scripts/import_cldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,11 @@ def parse_global(srcdir, sup):

for paternity in parentBlock.findall('./parentLocale'):
parent = paternity.attrib['parent']
if parent == 'root':
# Since CLDR-45, the 'root' parent locale uses 'localeRules="nonlikelyScript"' instead of
# 'locales'. This special case is handled in babel when loading locale data
# (https://cldr.unicode.org/index/downloads/cldr-45#h.5rbkhkncdqi9)
continue
for child in paternity.attrib['locales'].split():
parent_exceptions[child] = parent

Expand Down
15 changes: 15 additions & 0 deletions tests/test_localedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ def test_load():
assert localedata.load('en_US') is localedata.load('en_US')


def test_load_inheritance(monkeypatch):
from babel.localedata import _cache

_cache.clear()
localedata.load('hi_Latn')
# Must not be ['root', 'hi_Latn'] even though 'hi_Latn' matches the 'lang_Script'
# form used by 'nonLikelyScripts'. This is because 'hi_Latn' has an explicit parent locale 'en_IN'.
assert list(_cache.keys()) == ['root', 'en', 'en_001', 'en_IN', 'hi_Latn']

_cache.clear()
localedata.load('az_Arab')
# Must not include 'az' as 'Arab' is not a likely script for 'az'.
assert list(_cache.keys()) == ['root', 'az_Arab']


def test_merge():
d = {1: 'foo', 3: 'baz'}
localedata.merge(d, {1: 'Foo', 2: 'Bar'})
Expand Down
2 changes: 1 addition & 1 deletion tests/test_numbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def test_list_currencies():

assert list_currencies(locale='pa_Arab') == {'PKR', 'INR', 'EUR'}

assert len(list_currencies()) == 305
assert len(list_currencies()) == 306


def test_validate_currency():
Expand Down