From aa0097469f42866fbded7b748af580017768ffd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Va=C5=A1ek=20Purchart?= Date: Tue, 3 Dec 2019 21:40:43 +0100 Subject: [PATCH] =?UTF-8?q?Strings:=20throw=20exception=20on=20malformed?= =?UTF-8?q?=20UTF-8=20in=20webalize()=20and=20to=E2=80=A6=20(#205)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Utils/Strings.php | 2 +- tests/Utils/Strings.toAscii().phpt | 3 +++ tests/Utils/Strings.webalize().phpt | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Utils/Strings.php b/src/Utils/Strings.php index 195b3d715..c7de947b0 100644 --- a/src/Utils/Strings.php +++ b/src/Utils/Strings.php @@ -142,7 +142,7 @@ public static function toAscii(string $s): string $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII'); } - $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s); + $s = self::pcre('preg_replace', ['#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s]); $s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06"); $s = str_replace( ["\u{201E}", "\u{201C}", "\u{201D}", "\u{201A}", "\u{2018}", "\u{2019}", "\u{B0}"], diff --git a/tests/Utils/Strings.toAscii().phpt b/tests/Utils/Strings.toAscii().phpt index 9bcaa7016..1ea1a51d6 100644 --- a/tests/Utils/Strings.toAscii().phpt +++ b/tests/Utils/Strings.toAscii().phpt @@ -21,6 +21,9 @@ Assert::same('', Strings::toAscii("\u{10000}")); // U+10000 Assert::same('', Strings::toAscii("\u{A4}")); // non-ASCII char Assert::same('a b', Strings::toAscii("a\u{A0}b")); // non-breaking space Assert::same('Tarikh', Strings::toAscii("Ta\u{2BE}rikh")); // Taʾrikh +Assert::exception(function () { + Strings::toAscii("0123456789\xFF"); +}, Nette\Utils\RegexpException::class, null, PREG_BAD_UTF8_ERROR); if (class_exists('Transliterator') && \Transliterator::create('Any-Latin; Latin-ASCII')) { Assert::same('Athena->Moskva', Strings::toAscii("\u{391}\u{3B8}\u{3AE}\u{3BD}\u{3B1}\u{2192}\u{41C}\u{43E}\u{441}\u{43A}\u{432}\u{430}")); // Αθήνα→Москва diff --git a/tests/Utils/Strings.webalize().phpt b/tests/Utils/Strings.webalize().phpt index c276ed288..4751ff3cf 100644 --- a/tests/Utils/Strings.webalize().phpt +++ b/tests/Utils/Strings.webalize().phpt @@ -17,3 +17,6 @@ Assert::same('zlutoucky-kun-oooo', Strings::webalize("&\u{17D}LU\u{164}OU\u{10C} Assert::same('ZLUTOUCKY-KUN-oooo', Strings::webalize("&\u{17D}LU\u{164}OU\u{10C}K\u{DD} K\u{16E}\u{147} \u{F6}\u{151}\u{F4}o!", null, false)); // &ŽLUŤOUČKÝ KŮŇ öőôo! Assert::same('1-4-!', Strings::webalize("\u{BC} !", '!')); Assert::same('a-b', Strings::webalize("a\u{A0}b")); // non-breaking space +Assert::exception(function () { + Strings::toAscii("0123456789\xFF"); +}, Nette\Utils\RegexpException::class, null, PREG_BAD_UTF8_ERROR);