Skip to content

Commit

Permalink
feat: add library method for validating default locale strings
Browse files Browse the repository at this point in the history
  • Loading branch information
ramsey committed Oct 11, 2023
1 parent 87f4685 commit 3508594
Show file tree
Hide file tree
Showing 9 changed files with 183 additions and 52 deletions.
1 change: 1 addition & 0 deletions config.m4
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ if test "$PHP_ECMA_INTL" != "no"; then
tests/criterion/ecma402/hour_cycle_test.c \
tests/criterion/ecma402/language_tag_test.cpp \
tests/criterion/ecma402/locale_canonicalization_test.c \
tests/criterion/ecma402/locale_default_validation_test.cpp \
tests/criterion/ecma402/locale_getBaseName_test.c \
tests/criterion/ecma402/locale_getCalendar_test.c \
tests/criterion/ecma402/locale_getCaseFirst_test.c \
Expand Down
7 changes: 7 additions & 0 deletions package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ Add Locale::$currency and Locale\Options::$currency properties. ECMA-402 does no
<file name="hour_cycle_test.c" role="test"/>
<file name="language_tag_test.cpp" role="test"/>
<file name="locale_canonicalization_test.c" role="test"/>
<file name="locale_default_validation_test.cpp" role="test"/>
<file name="locale_getBaseName_test.c" role="test"/>
<file name="locale_getCalendar_test.c" role="test"/>
<file name="locale_getCaseFirst_test.c" role="test"/>
Expand All @@ -161,6 +162,12 @@ Add Locale::$currency and Locale\Options::$currency properties. ECMA-402 does no
</dir>
<file name="Pest.php" role="test"/>
<dir name="phpt">
<file name="ini-default_locale-001.phpt" role="test"/>
<file name="ini-default_locale-002.phpt" role="test"/>
<file name="ini-default_locale-003.phpt" role="test"/>
<file name="ini-default_locale-004.phpt" role="test"/>
<file name="ini-default_locale-005.phpt" role="test"/>
<file name="ini-default_locale-006.phpt" role="test"/>
<dir name="Intl">
<file name="Category-001.phpt" role="test"/>
<file name="Collator_supportedLocalesOf-001.phpt" role="test"/>
Expand Down
30 changes: 30 additions & 0 deletions src/ecma402/locale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,36 @@ int ecma402_minimize(const char *localeId, char *minimized, ecma402_errorStatus
return getMaxOrMin(MINIMIZE, localeId, minimized, status, isCanonicalized);
}

int ecma402_validateAndCanonicalizeForDefaultLocaleId(const char *defaultLocaleId, char *result)
{
char **available, *bestAvailable, *canonicalized;
size_t total, length, resultLength = -1;
ecma402_errorStatus *status;

available = (char **)malloc(sizeof(char *) * uloc_countAvailable());
bestAvailable = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
total = ecma402_intlAvailableLocales(available);

if (ecma402_bestAvailableLocale(available, total, defaultLocaleId, bestAvailable, false) > 0) {
status = ecma402_initErrorStatus();
canonicalized = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
length = languageTagForLocaleId(defaultLocaleId, canonicalized, status);

if (!ecma402_hasError(status) && length > 0) {
strcpy(result, canonicalized);
resultLength = length;
}

free(canonicalized);
ecma402_freeErrorStatus(status);
}

free(bestAvailable);
free(available);

return resultLength;
}

namespace {

int getHourCyclesForLocale(char *localeId, const char **values)
Expand Down
19 changes: 19 additions & 0 deletions src/ecma402/locale.h
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,25 @@ int ecma402_maximize(const char *localeId, char *maximized, ecma402_errorStatus
*/
int ecma402_minimize(const char *localeId, char *minimized, ecma402_errorStatus *status, bool isCanonicalized);

/**
* Returns a canonicalized locale ID after validating the locale is supported
* by this implementation (using ecma402_bestAvailableLocale()).
*
* The result parameter should already be allocated on the stack with
* enough memory to store the buffer. Typically, this should use
* ULOC_FULLNAME_CAPACITY. For example:
*
* malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY)
*
* @param defaultLocaleId The locale ID intended for use as the default locale.
* @param result A buffer in which to store the validated and canonicalized
* default locale.
*
* @return The length of the string stored to the result buffer, or -1 if the
* default locale ID could not be validated or canonicalized.
*/
int ecma402_validateAndCanonicalizeForDefaultLocaleId(const char *defaultLocaleId, char *result);

#ifdef __cplusplus
}
#endif
Expand Down
105 changes: 56 additions & 49 deletions src/php/ecma_intl.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,55 +35,41 @@

ZEND_DECLARE_MODULE_GLOBALS(ecma_intl)

/**
* Validates whether the locale provided in the ecma_intl.default_locale INI
* setting is supported by this implementation. If so, it stores the canonicalized
* BCP 47 version of the language tag to a global setting.
*/
ZEND_INI_MH(onUpdateLocale)
const char *ecma_defaultLocale(void)
{
if (!new_value || (new_value && !ZSTR_VAL(new_value)[0])) {
return FAILURE;
}

char **p = (char **)ZEND_INI_GET_ADDR();
char **available, *bestAvailable, *canonicalized;
size_t total, length;
zend_result result = FAILURE;
ecma402_errorStatus *status;

available = (char **)emalloc(sizeof(char *) * uloc_countAvailable());
bestAvailable = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
total = ecma402_intlAvailableLocales(available);

if (ecma402_bestAvailableLocale(available, total, ZSTR_VAL(new_value), bestAvailable, false) > 0) {
status = ecma402_initErrorStatus();
canonicalized = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
length = ecma402_canonicalizeUnicodeLocaleId(ZSTR_VAL(new_value), canonicalized, status);

if (!ecma402_hasError(status) && length > 0) {
strcpy(*p, canonicalized);
result = SUCCESS;
if (ECMA_INTL_G(defaultLocale) == NULL || strcmp(ECMA_INTL_G(defaultLocale), "") == 0) {
char *ini = INI_STR(PHP_ECMA_INI_DEFAULT_LOCALE);
if (ini == NULL || strcmp(ini, "") == 0) {
// Fall back to ICU default, if we don't have a default locale set.
ini = (char *)uloc_getDefault();
}

efree(canonicalized);
ecma402_freeErrorStatus(status);
// Let's check again to make sure ICU gave us a value.
if (ini == NULL || strcmp(ini, "") == 0) {
// If all else fails, use "en" as the default locale. This isn't
// perfect, but it ensures we have at least something.
strcpy(ECMA_INTL_G(defaultLocale), "en");
} else {
char *defaultLocaleId = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
if (ecma402_validateAndCanonicalizeForDefaultLocaleId(ini, defaultLocaleId) > 0) {
strcpy(ECMA_INTL_G(defaultLocale), defaultLocaleId);
}
efree(defaultLocaleId);
}
}

efree(bestAvailable);
efree(available);

return result;
return ECMA_INTL_G(defaultLocale);
}

static PHP_GINIT_FUNCTION(ecma_intl);
static PHP_GSHUTDOWN_FUNCTION(ecma_intl);
static ZEND_INI_MH(onUpdateLocale);

PHP_INI_BEGIN()
STD_PHP_INI_ENTRY("ecma_intl.default_locale", NULL, PHP_INI_ALL, onUpdateLocale, defaultLocale, zend_ecma_intl_globals,
STD_PHP_INI_ENTRY(PHP_ECMA_INI_DEFAULT_LOCALE, NULL, PHP_INI_ALL, onUpdateLocale, defaultLocale, zend_ecma_intl_globals,
ecma_intl_globals)
PHP_INI_END()

static PHP_GINIT_FUNCTION(ecma_intl);
static PHP_GSHUTDOWN_FUNCTION(ecma_intl);

zend_module_entry ecma_intl_module_entry = {STANDARD_MODULE_HEADER,
"ecma_intl",
NULL,
Expand All @@ -106,17 +92,6 @@ ZEND_TSRMLS_CACHE_DEFINE()
ZEND_GET_MODULE(ecma_intl)
#endif

static PHP_GINIT_FUNCTION(ecma_intl)
{
ZEND_SECURE_ZERO(ecma_intl_globals, sizeof(zend_ecma_intl_globals));
ecma_intl_globals->defaultLocale = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
}

static PHP_GSHUTDOWN_FUNCTION(ecma_intl)
{
efree(ecma_intl_globals->defaultLocale);
}

PHP_MINIT_FUNCTION(ecma_intl_all)
{
REGISTER_INI_ENTRIES();
Expand Down Expand Up @@ -172,3 +147,35 @@ PHP_MINFO_FUNCTION(ecma_intl)

DISPLAY_INI_ENTRIES();
}

static PHP_GINIT_FUNCTION(ecma_intl)
{
ZEND_SECURE_ZERO(ecma_intl_globals, sizeof(zend_ecma_intl_globals));
ecma_intl_globals->defaultLocale = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);
}

static PHP_GSHUTDOWN_FUNCTION(ecma_intl)
{
efree(ecma_intl_globals->defaultLocale);
}

static ZEND_INI_MH(onUpdateLocale)
{
zend_result result = FAILURE;

if (!new_value || (new_value && !ZSTR_VAL(new_value)[0])) {
return result;
}

char **p = (char **)ZEND_INI_GET_ADDR();
char *defaultLocaleId = (char *)emalloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);

if (ecma402_validateAndCanonicalizeForDefaultLocaleId(ZSTR_VAL(new_value), defaultLocaleId) > 0) {
strcpy(*p, defaultLocaleId);
result = SUCCESS;
}

efree(defaultLocaleId);

return result;
}
1 change: 1 addition & 0 deletions src/php/ecma_intl.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ extern zend_module_entry ecma_intl_module_entry;
#define phpext_ecma_intl_ptr &ecma_intl_module_entry

#define PHP_ECMA_INTL_VERSION "0.3.0-dev"
#define PHP_ECMA_INI_DEFAULT_LOCALE "ecma_intl.default_locale"

ZEND_BEGIN_MODULE_GLOBALS(ecma_intl)
char *defaultLocale;
Expand Down
62 changes: 62 additions & 0 deletions tests/criterion/ecma402/locale_default_validation_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#include "../test.h"

#include "src/ecma402/locale.h"

#include <unicode/uloc.h>

#define TEST_SUITE ecma402LocaleDefaultValidation

// NOLINTBEGIN(cert-err58-cpp, misc-const-correctness,
// misc-use-anonymous-namespace)

using string = std::basic_string<char, std::char_traits<char>, criterion::allocator<char>>;

struct defaultLocaleIdTest {
string defaultLocaleId;
string expected;
size_t expectedLength;

defaultLocaleIdTest(string defaultLocaleId, string expected, size_t expectedLength)
: defaultLocaleId(defaultLocaleId), expected(expected), expectedLength(expectedLength)
{}
};

ParameterizedTestParameters(TEST_SUITE, validateAndCanonicalizeForDefaultLocaleId)
{
static criterion::parameters<struct defaultLocaleIdTest> tests;

tests.emplace_back("en-US", "en-US", 5);
tests.emplace_back("en_US", "en-US", 5);
tests.emplace_back("en-Latn-US", "en-Latn-US", 10);
tests.emplace_back("en-US-u-nu-latn-ca-gregory", "en-US-u-ca-gregory-nu-latn", 26);
tests.emplace_back("foobar", "", -1);
tests.emplace_back("zz-ZZ", "", -1);
tests.emplace_back("und-u-va-posix", "", -1);
tests.emplace_back("en-US-POSIX", "en-US-u-va-posix", 16);
tests.emplace_back("en_US_POSIX", "en-US-u-va-posix", 16);

return tests;
}

ParameterizedTest(struct defaultLocaleIdTest *test, TEST_SUITE, validateAndCanonicalizeForDefaultLocaleId)
{
char *result;
size_t resultLength;

result = (char *)malloc(sizeof(char) * ULOC_FULLNAME_CAPACITY);

resultLength = ecma402_validateAndCanonicalizeForDefaultLocaleId(test->defaultLocaleId.c_str(), result);

cr_expect(eq(i8, resultLength, test->expectedLength));

if (test->expectedLength > 0) {
cr_expect(eq(str, result, test->expected.c_str()));
} else {
cr_expect(eq(ptr, result, nullptr));
}

free(result);
}

// NOLINTEND(cert-err58-cpp, misc-const-correctness,
// misc-use-anonymous-namespace)
4 changes: 2 additions & 2 deletions tests/phpt/ini-default_locale-005.phpt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
--TEST--
ecma_intl.default_locale cannot recognize underscores in locale IDs
ecma_intl.default_locale can recognize underscores in locale IDs
--EXTENSIONS--
ecma_intl
--INI--
Expand All @@ -11,4 +11,4 @@ declare(strict_types=1);
var_dump(ini_get('ecma_intl.default_locale'));

--EXPECT--
string(0) ""
string(11) "en_US_POSIX"
6 changes: 5 additions & 1 deletion tests/phpt/ini-default_locale-006.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,20 @@ ecma_intl.default_locale=
declare(strict_types=1);

var_dump(ini_set('ecma_intl.default_locale', 'en-US'));
var_dump(ini_set('ecma_intl.default_locale', 'en_US'));
var_dump(ini_set('ecma_intl.default_locale', 'foobar'));
var_dump(ini_set('ecma_intl.default_locale', 'en-Latn-US'));
var_dump(ini_set('ecma_intl.default_locale', 'en-US-POSIX'));
var_dump(ini_set('ecma_intl.default_locale', 'en_US_POSIX'));
var_dump(ini_set('ecma_intl.default_locale', 'de'));
var_dump(ini_get('ecma_intl.default_locale'));

--EXPECT--
string(0) ""
bool(false)
string(5) "en-US"
bool(false)
string(5) "en_US"
string(10) "en-Latn-US"
string(11) "en-US-POSIX"
string(11) "en_US_POSIX"
string(2) "de"

0 comments on commit 3508594

Please sign in to comment.