Skip to content

Commit

Permalink
Strings: throw exception on malformed UTF-8 in webalize() and to… (#205)
Browse files Browse the repository at this point in the history
  • Loading branch information
VasekPurchart authored and dg committed Dec 3, 2019
1 parent 108730b commit aa00974
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/Utils/Strings.php
Expand Up @@ -142,7 +142,7 @@ public static function toAscii(string $s): string
$transliterator = \Transliterator::create('Any-Latin; Latin-ASCII');
}

$s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
$s = self::pcre('preg_replace', ['#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s]);
$s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
$s = str_replace(
["\u{201E}", "\u{201C}", "\u{201D}", "\u{201A}", "\u{2018}", "\u{2019}", "\u{B0}"],
Expand Down
3 changes: 3 additions & 0 deletions tests/Utils/Strings.toAscii().phpt
Expand Up @@ -21,6 +21,9 @@ Assert::same('', Strings::toAscii("\u{10000}")); // U+10000
Assert::same('', Strings::toAscii("\u{A4}")); // non-ASCII char
Assert::same('a b', Strings::toAscii("a\u{A0}b")); // non-breaking space
Assert::same('Tarikh', Strings::toAscii("Ta\u{2BE}rikh")); // Taʾrikh
Assert::exception(function () {
Strings::toAscii("0123456789\xFF");
}, Nette\Utils\RegexpException::class, null, PREG_BAD_UTF8_ERROR);

if (class_exists('Transliterator') && \Transliterator::create('Any-Latin; Latin-ASCII')) {
Assert::same('Athena->Moskva', Strings::toAscii("\u{391}\u{3B8}\u{3AE}\u{3BD}\u{3B1}\u{2192}\u{41C}\u{43E}\u{441}\u{43A}\u{432}\u{430}")); // Αθήνα→Москва
Expand Down
3 changes: 3 additions & 0 deletions tests/Utils/Strings.webalize().phpt
Expand Up @@ -17,3 +17,6 @@ Assert::same('zlutoucky-kun-oooo', Strings::webalize("&\u{17D}LU\u{164}OU\u{10C}
Assert::same('ZLUTOUCKY-KUN-oooo', Strings::webalize("&\u{17D}LU\u{164}OU\u{10C}K\u{DD} K\u{16E}\u{147} \u{F6}\u{151}\u{F4}o!", null, false)); // &ŽLUŤOUČKÝ KŮŇ öőôo!
Assert::same('1-4-!', Strings::webalize("\u{BC} !", '!'));
Assert::same('a-b', Strings::webalize("a\u{A0}b")); // non-breaking space
Assert::exception(function () {
Strings::toAscii("0123456789\xFF");
}, Nette\Utils\RegexpException::class, null, PREG_BAD_UTF8_ERROR);

0 comments on commit aa00974

Please sign in to comment.