Skip to content

Commit

Permalink
fix: ensure values of en-US-POSIX canonicalize correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
ramsey committed Sep 11, 2023
1 parent 5f4c7b7 commit 707d52a
Show file tree
Hide file tree
Showing 10 changed files with 108 additions and 36 deletions.
89 changes: 68 additions & 21 deletions src/ecma402/locale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ namespace {
int getNumberingSystemsForLocale(char *localeId, const char **values);
int getTimeZonesForLocale(char *localeId, const char **values);
int languageTagForLocaleId(const char *localeId, char *languageTag, ecma402_errorStatus *status);
int languageTagForLocaleIdAlt(const char *localeId, char *languageTag, ecma402_errorStatus *status);

} // namespace

Expand Down Expand Up @@ -868,54 +869,100 @@ namespace {

int languageTagForLocaleId(const char *localeId, char *languageTag, ecma402_errorStatus *status)
{
icu::Locale canonicalLocale;
UErrorCode icuStatus = U_ZERO_ERROR;
UBool const strict = 1;
char *unicodeLocaleId;
icu::Locale unicodeLocale;
std::string bcp47Locale;
char *resolvedLocale;
int resolvedLocaleLength;

canonicalLocale = icu::Locale::createCanonical(localeId);
if (canonicalLocale == nullptr) {
ecma402_ecmaError(status, CANNOT_CREATE_LOCALE_ID, "Invalid language tag \"%s\"", localeId);
return -1;
unicodeLocale = icu::Locale::createCanonical(localeId);
if (unicodeLocale == nullptr) {
// If icu::Locale::createCanonical() fails to create a locale, then
// use an alternate form of creation.
// See: https://unicode-org.atlassian.net/browse/ICU-22486
return languageTagForLocaleIdAlt(localeId, languageTag, status);
}

std::string const locale = canonicalLocale.toLanguageTag<std::string>(icuStatus);
if (U_FAILURE(icuStatus) != U_ZERO_ERROR) {
bcp47Locale = unicodeLocale.toLanguageTag<std::string>(icuStatus);
if (icuStatus > U_ZERO_ERROR) {
ecma402_icuError(status, icuStatus, "Invalid language tag \"%s\"", localeId);
return -1;
}

// If the input localeId is not "und," but we got "und," then return 0.
if (strcasecmp(locale.c_str(), UNDETERMINED_LANGUAGE) == 0 &&
// If the input localeId is not "und," but we got "und," then return -1.
if (strcasecmp(bcp47Locale.c_str(), UNDETERMINED_LANGUAGE) == 0 &&
strcasecmp(localeId, UNDETERMINED_LANGUAGE) != 0) {
ecma402_ecmaError(status, UNDEFINED_LOCALE_ID, "Invalid language tag \"%s\"", localeId);
return -1;
}

// This additional conversion step forces tags like "en-latn-us-co-foo" and
// "de-de_euro" to result in failures, which is the desired result.
unicodeLocaleId = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
int const length =
uloc_toLanguageTag(locale.c_str(), unicodeLocaleId, ULOC_FULLNAME_CAPACITY, strict, &icuStatus);
resolvedLocale = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
resolvedLocaleLength =
uloc_toLanguageTag(bcp47Locale.c_str(), resolvedLocale, ULOC_FULLNAME_CAPACITY, 1, &icuStatus);

if (U_FAILURE(icuStatus) != U_ZERO_ERROR || strlen(unicodeLocaleId) == 0 || unicodeLocaleId == nullptr) {
if (U_FAILURE(icuStatus) != U_ZERO_ERROR) {
if (icuStatus > U_ZERO_ERROR || strlen(resolvedLocale) == 0 || resolvedLocale == nullptr) {
if (icuStatus > U_ZERO_ERROR) {
ecma402_icuError(status, icuStatus, "Invalid language tag \"%s\"", localeId);
} else {
ecma402_ecmaError(status, INVALID_LOCALE_ID, "Invalid language tag \"%s\"", localeId);
}

if (unicodeLocaleId != nullptr) {
free(unicodeLocaleId);
if (resolvedLocale != nullptr) {
free(resolvedLocale);
}

return -1;
}

memcpy(languageTag, unicodeLocaleId, length + 1);
free(unicodeLocaleId);
memcpy(languageTag, resolvedLocale, resolvedLocaleLength + 1);
free(resolvedLocale);

return resolvedLocaleLength;
}

int languageTagForLocaleIdAlt(const char *localeId, char *languageTag, ecma402_errorStatus *status)
{
UErrorCode icuStatus = U_ZERO_ERROR;
char *unicodeLocale, *bcp47Locale;
int bcp47LocaleLength;

unicodeLocale = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
uloc_canonicalize(localeId, unicodeLocale, ULOC_FULLNAME_CAPACITY, &icuStatus);

if (icuStatus > U_ZERO_ERROR) {
ecma402_icuError(status, icuStatus, "Invalid language tag \"%s\"", localeId);
free(unicodeLocale);
return -1;
}

bcp47Locale = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
bcp47LocaleLength = uloc_toLanguageTag(unicodeLocale, bcp47Locale, ULOC_FULLNAME_CAPACITY, 1, &icuStatus);
free(unicodeLocale);

if (icuStatus > U_ZERO_ERROR) {
ecma402_icuError(status, icuStatus, "Invalid language tag \"%s\"", localeId);
free(bcp47Locale);
return -1;
}

// If the input localeId is not "und," but we got "und," then return -1.
if (strcasecmp(bcp47Locale, UNDETERMINED_LANGUAGE) == 0 && strcasecmp(localeId, UNDETERMINED_LANGUAGE) != 0) {
ecma402_ecmaError(status, UNDEFINED_LOCALE_ID, "Invalid language tag \"%s\"", localeId);
return -1;
}

if (bcp47Locale == nullptr || strlen(bcp47Locale) == 0) {
ecma402_ecmaError(status, INVALID_LOCALE_ID, "Invalid language tag \"%s\"", localeId);
free(bcp47Locale);
return -1;
}

memcpy(languageTag, bcp47Locale, bcp47LocaleLength + 1);
free(bcp47Locale);

return length;
return bcp47LocaleLength;
}

} // namespace
7 changes: 6 additions & 1 deletion tests/criterion/ecma402/locale_canonicalization_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ Test(TEST_SUITE, canonicalizeLocaleIdHasErrorForStructurallyInvalidLocaleId)

ParameterizedTestParameters(TEST_SUITE, canonicalizeLocaleIdCanonicalizes)
{
START_STRING_TEST_PARAMS(24)
START_STRING_TEST_PARAMS(29)
STRING_TEST("de", "de")
STRING_TEST("DE-de", "de-DE")
STRING_TEST("de-DE", "de-DE")
Expand All @@ -102,6 +102,11 @@ ParameterizedTestParameters(TEST_SUITE, canonicalizeLocaleIdCanonicalizes)
STRING_TEST("zh-xiang-u-nu-thai-x-0", "hsn-u-nu-thai-x-0")
STRING_TEST("de-DE-u-cu-FOO", "de-DE-u-cu-foo")
STRING_TEST("de-DE-u-cu-FOOBAR", "de-DE-u-cu-foobar")
STRING_TEST("de-POSIX", "de-u-va-posix")
STRING_TEST("de-DE-POSIX", "de-DE-u-va-posix")
STRING_TEST("en-POSIX", "en-u-va-posix")
STRING_TEST("en-GB-POSIX", "en-GB-u-va-posix")
STRING_TEST("en-US-POSIX", "en-US-u-va-posix")
END_STRING_TEST_PARAMS;
}

Expand Down
2 changes: 2 additions & 0 deletions tests/phpt/Intl/Locale-001.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ $tests = [
'en-u-kn-false',
'en-u-nu-arab',
'en-latn-us-u-ca-gregory-kf-upper-co-emoji-hc-h23-nu-latn-kn-true-cu-USD',
'en-US-POSIX',
];

foreach ($tests as $test) {
Expand All @@ -38,3 +39,4 @@ en-u-hc-h12
en-u-kn-false
en-u-nu-arab
en-Latn-US-u-ca-gregory-co-emoji-cu-usd-hc-h23-kf-upper-kn-nu-latn
en-US-u-va-posix
2 changes: 2 additions & 0 deletions tests/phpt/Intl/Locale-008.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ $tests = [
)],
['en-latn-us-u-ca-gregory-kf-upper-co-emoji-hc-h23-nu-latn-kn-true', new Options()],
['en-US-u-cu-usd', new Options(currency: 'EUR')],
['en-US-POSIX', new Options(region: 'gb')],
];

foreach ($tests as $params) {
Expand All @@ -54,3 +55,4 @@ en-u-nu-latn
es-Some-MX-u-ca-buddhist-co-phonebk-cu-mxn-hc-h11-kf-false-kn-false-nu-arab
en-Latn-US-u-ca-gregory-co-emoji-hc-h23-kf-upper-kn-nu-latn
en-US-u-cu-eur
en-GB-u-va-posix
5 changes: 5 additions & 0 deletions tests/phpt/Intl/Locale-009.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ $tests = [

'hy-arevela',
'hy-arevmda',
'en-US-POSIX',
];

foreach ($tests as $test) {
Expand Down Expand Up @@ -109,3 +110,7 @@ minimized: hy
canonical: hyw
maximized: hyw
minimized: hyw

canonical: en-US-u-va-posix
maximized: en-Latn-US-u-va-posix
minimized: en-u-va-posix
5 changes: 5 additions & 0 deletions tests/phpt/Intl/Locale-010.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ $tests = [

'hy-arevela',
'hy-arevmda',
'en-US-POSIX',
];

foreach ($tests as $test) {
Expand Down Expand Up @@ -109,3 +110,7 @@ minimized: hy
canonical: hyw
maximized: hyw-Armn-AM
minimized: hyw

canonical: en-US-u-va-posix
maximized: en-Latn-US-u-va-posix
minimized: en-u-va-posix
16 changes: 9 additions & 7 deletions tests/phpt/Intl_getCanonicalLocales-006.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ $generate = function () use ($makeStringable, $makeImplicitStringable): iterable
'cmn-hans-cn-u-ca-t-ca-x-t-u',
$makeImplicitStringable('de-gregory-u-ca-gregory'),
'sgn-GR',
'en-US-POSIX',
'ji',
'de-DD',
$makeStringable('in'),
Expand Down Expand Up @@ -73,11 +74,12 @@ array (
7 => 'zh-Hans-CN-t-ca-u-ca-x-t-u',
8 => 'de-gregory-u-ca-gregory',
9 => 'gss',
10 => 'yi',
11 => 'id',
12 => 'sr-Cyrl-ekavsk',
13 => 'en-CA-newfound',
14 => 'sl-1994-biske-rozaj',
15 => 'da-u-attr',
16 => 'da-u-attr-co-search',
10 => 'en-US-u-va-posix',
11 => 'yi',
12 => 'id',
13 => 'sr-Cyrl-ekavsk',
14 => 'en-CA-newfound',
15 => 'sl-1994-biske-rozaj',
16 => 'da-u-attr',
17 => 'da-u-attr-co-search',
)
16 changes: 9 additions & 7 deletions tests/phpt/Intl_getCanonicalLocales-007.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ $tags = [
$makeStringable('cmn-hans-cn-u-ca-t-ca-x-t-u'),
$makeStringable('de-gregory-u-ca-gregory'),
'sgn-GR',
$makeStringable('en-US-POSIX'),
$makeStringable('ji'),
'de-DD',
'in',
Expand Down Expand Up @@ -66,11 +67,12 @@ array (
7 => 'zh-Hans-CN-t-ca-u-ca-x-t-u',
8 => 'de-gregory-u-ca-gregory',
9 => 'gss',
10 => 'yi',
11 => 'id',
12 => 'sr-Cyrl-ekavsk',
13 => 'en-CA-newfound',
14 => 'sl-1994-biske-rozaj',
15 => 'da-u-attr',
16 => 'da-u-attr-co-search',
10 => 'en-US-u-va-posix',
11 => 'yi',
12 => 'id',
13 => 'sr-Cyrl-ekavsk',
14 => 'en-CA-newfound',
15 => 'sl-1994-biske-rozaj',
16 => 'da-u-attr',
17 => 'da-u-attr-co-search',
)
1 change: 1 addition & 0 deletions tests/phpt/Intl_getCanonicalLocales-008.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ $generate = function () use ($makeStringable): iterable {
'cmn-hans-cn-u-ca-t-ca-x-t-u',
'de-gregory-u-ca-gregory',
'sgn-GR',
'en-US-POSIX',
'ji',
'de-DD',
'in',
Expand Down
1 change: 1 addition & 0 deletions tests/phpt/Intl_getCanonicalLocales-009.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ $tags = [
$makeStringable('cmn-hans-cn-u-ca-t-ca-x-t-u'),
$makeStringable('de-gregory-u-ca-gregory'),
$makeStringable('sgn-GR'),
'en-US-POSIX',
'ji',
'de-DD',
new stdClass(), // Here is the invalid value.
Expand Down

0 comments on commit 707d52a

Please sign in to comment.