Skip to content

Commit

Permalink
feature #52198 [String] New locale aware casing methods (bram123)
Browse files Browse the repository at this point in the history
This PR was squashed before being merged into the 7.1 branch.

Discussion
----------

[String] New locale aware casing methods

| Q             | A
| ------------- | ---
| Branch?       | 7.1
| Bug fix?      | no
| New feature?  | yes<!-- please update src/**/CHANGELOG.md files -->
| Deprecations? | no
| Tickets       | Fix #52161
| License       | MIT

Adds new localeUpper/localeLower/localeTitle methods to the AbstractUnicodeString class
- To change the string case according to locale-specific case mappings

Code examples:
```php
$string = new UnicodeString('άδικος');
echo $string->upper(), PHP_EOL; // ΆΔΙΚΟΣ
echo $string->localeUpper('el'), PHP_EOL; // ΑΔΙΚΟΣ

$string = new UnicodeString('ijssel');
echo $string->title(), PHP_EOL; // Ijssel
echo $string->localeTitle('nl'), PHP_EOL; // IJssel

$string = new UnicodeString('İSTANBUL');
echo $string->lower(), PHP_EOL; // i̇stanbul (LATIN SMALL LETTER I COMBINING DOT ABOVE)
echo $string->localeLower('tr'), PHP_EOL; // istanbul (LATIN SMALL LETTER I)
```

Commits
-------

7f4ed5c [String] New locale aware casing methods
  • Loading branch information
fabpot committed Nov 18, 2023
2 parents 34edf43 + 7f4ed5c commit 19f5240
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 0 deletions.
74 changes: 74 additions & 0 deletions src/Symfony/Component/String/AbstractUnicodeString.php
Expand Up @@ -220,6 +220,21 @@ public function lower(): static
return $str;
}

/**
* @param string $locale In the format language_region (e.g. tr_TR)
*/
public function localeLower(string $locale): static
{
if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Lower')) {
$str = clone $this;
$str->string = $transliterator->transliterate($str->string);

return $str;
}

return $this->lower();
}

public function match(string $regexp, int $flags = 0, int $offset = 0): array
{
$match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match';
Expand Down Expand Up @@ -363,6 +378,21 @@ public function title(bool $allWords = false): static
return $str;
}

/**
* @param string $locale In the format language_region (e.g. tr_TR)
*/
public function localeTitle(string $locale): static
{
if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Title')) {
$str = clone $this;
$str->string = $transliterator->transliterate($str->string);

return $str;
}

return $this->title();
}

public function trim(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static
{
if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) {
Expand Down Expand Up @@ -450,6 +480,21 @@ public function upper(): static
return $str;
}

/**
* @param string $locale In the format language_region (e.g. tr_TR)
*/
public function localeUpper(string $locale): static
{
if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Upper')) {
$str = clone $this;
$str->string = $transliterator->transliterate($str->string);

return $str;
}

return $this->upper();
}

public function width(bool $ignoreAnsiDecoration = true): int
{
$width = 0;
Expand Down Expand Up @@ -587,4 +632,33 @@ private function wcswidth(string $string): int

return $width;
}

private function getLocaleTransliterator(string $locale, string $id): ?\Transliterator
{
$rule = $locale.'-'.$id;
if (\array_key_exists($rule, self::$transliterators)) {
return self::$transliterators[$rule];
}

if (null !== $transliterator = self::$transliterators[$rule] = \Transliterator::create($rule)) {
return $transliterator;
}

// Try to find a parent locale (nl_BE -> nl)
if (false === $i = strpos($locale, '_')) {
return null;
}

$parentRule = substr_replace($locale, '-'.$id, $i);

// Parent locale was already cached, return and store as current locale
if (\array_key_exists($parentRule, self::$transliterators)) {
return self::$transliterators[$rule] = self::$transliterators[$parentRule];
}

// Create transliterator based on parent locale and cache the result on both initial and parent locale values
$transliterator = \Transliterator::create($parentRule);

return self::$transliterators[$rule] = self::$transliterators[$parentRule] = $transliterator;
}
}
5 changes: 5 additions & 0 deletions src/Symfony/Component/String/CHANGELOG.md
@@ -1,6 +1,11 @@
CHANGELOG
=========

7.1
---

* Add `localeLower()`, `localeUpper()`, `localeTitle()` methods to `AbstractUnicodeString`

6.2
---

Expand Down
114 changes: 114 additions & 0 deletions src/Symfony/Component/String/Tests/AbstractUnicodeTestCase.php
Expand Up @@ -50,6 +50,48 @@ public function testAsciiClosureRule()
$this->assertSame('Dieser Wert sollte grOEsser oder gleich', (string) $s->ascii([$rule]));
}

/**
* @dataProvider provideLocaleLower
*
* @requires extension intl
*/
public function testLocaleLower(string $locale, string $expected, string $origin)
{
$instance = static::createFromString($origin)->localeLower($locale);

$this->assertNotSame(static::createFromString($origin), $instance);
$this->assertEquals(static::createFromString($expected), $instance);
$this->assertSame($expected, (string) $instance);
}

/**
* @dataProvider provideLocaleUpper
*
* @requires extension intl
*/
public function testLocaleUpper(string $locale, string $expected, string $origin)
{
$instance = static::createFromString($origin)->localeUpper($locale);

$this->assertNotSame(static::createFromString($origin), $instance);
$this->assertEquals(static::createFromString($expected), $instance);
$this->assertSame($expected, (string) $instance);
}

/**
* @dataProvider provideLocaleTitle
*
* @requires extension intl
*/
public function testLocaleTitle(string $locale, string $expected, string $origin)
{
$instance = static::createFromString($origin)->localeTitle($locale);

$this->assertNotSame(static::createFromString($origin), $instance);
$this->assertEquals(static::createFromString($expected), $instance);
$this->assertSame($expected, (string) $instance);
}

public function provideCreateFromCodePoint(): array
{
return [
Expand Down Expand Up @@ -291,6 +333,78 @@ public static function provideLower(): array
);
}

public static function provideLocaleLower(): array
{
return [
// Lithuanian
// Introduce an explicit dot above when lowercasing capital I's and J's
// whenever there are more accents above.
// LATIN CAPITAL LETTER I WITH OGONEK -> LATIN SMALL LETTER I WITH OGONEK
['lt', 'į', 'Į'],
// LATIN CAPITAL LETTER I WITH GRAVE -> LATIN SMALL LETTER I COMBINING DOT ABOVE
['lt', 'i̇̀', 'Ì'],
// LATIN CAPITAL LETTER I WITH ACUTE -> LATIN SMALL LETTER I COMBINING DOT ABOVE COMBINING ACUTE ACCENT
['lt', 'i̇́', 'Í'],
// LATIN CAPITAL LETTER I WITH TILDE -> LATIN SMALL LETTER I COMBINING DOT ABOVE COMBINING TILDE
['lt', 'i̇̃', 'Ĩ'],

// Turkish and Azeri
// When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into 'i'.
// LATIN CAPITAL LETTER I WITH DOT ABOVE -> LATIN SMALL LETTER I
['tr', 'i', 'İ'],
['tr_TR', 'i', 'İ'],
['az', 'i', 'İ'],

// Default casing rules
// LATIN CAPITAL LETTER I WITH DOT ABOVE -> LATIN SMALL LETTER I COMBINING DOT ABOVE
['en_US', 'i̇', 'İ'],
['en', 'i̇', 'İ'],
];
}

public static function provideLocaleUpper(): array
{
return [
// Turkish and Azeri
// When uppercasing, i turns into a dotted capital I
// LATIN SMALL LETTER I -> LATIN CAPITAL LETTER I WITH DOT ABOVE
['tr', 'İ', 'i'],
['tr_TR', 'İ', 'i'],
['az', 'İ', 'i'],

// Greek
// Remove accents when uppercasing
// GREEK SMALL LETTER ALPHA WITH TONOS -> GREEK CAPITAL LETTER ALPHA
['el', 'Α', 'ά'],
['el_GR', 'Α', 'ά'],

// Default casing rules
// GREEK SMALL LETTER ALPHA WITH TONOS -> GREEK CAPITAL LETTER ALPHA WITH TONOS
['en_US', 'Ά', 'ά'],
['en', 'Ά', 'ά'],
];
}

public static function provideLocaleTitle(): array
{
return [
// Greek
// Titlecasing words, should keep the accents on the first letter
['el', 'Άδικος', 'άδικος'],
['el_GR', 'Άδικος', 'άδικος'],
['en', 'Άδικος', 'άδικος'],

// Dutch
// Title casing should treat 'ij' as one character
['nl_NL', 'IJssel', 'ijssel'],
['nl_BE', 'IJssel', 'ijssel'],
['nl', 'IJssel', 'ijssel'],

// Default casing rules
['en', 'Ijssel', 'ijssel'],
];
}

public static function provideUpper(): array
{
return array_merge(
Expand Down

0 comments on commit 19f5240

Please sign in to comment.