From 9a05bd0f8f37e6c4ac126af5f6b822bc8907f9a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20S=CC=8Ckoda?= Date: Thu, 8 Nov 2012 08:49:50 +0100 Subject: [PATCH 1/2] MDL-36245 try to workaround buggy iconv() when converting text from utf-8 to utf-8 --- lib/textlib.class.php | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/textlib.class.php b/lib/textlib.class.php index 77fafc1278e9c..ab0db3c4780fc 100644 --- a/lib/textlib.class.php +++ b/lib/textlib.class.php @@ -161,8 +161,7 @@ public static function parse_charset($charset) { /** * Converts the text between different encodings. It uses iconv extension with //TRANSLIT parameter, - * falls back to typo3. - * Returns false if fails. + * falls back to typo3. If both source and target are utf-8 it tries to fix invalid characters only. * * @param string $text * @param string $fromCS source encoding @@ -179,6 +178,10 @@ public static function convert($text, $fromCS, $toCS='utf-8') { return ''; } + if ($toCS === 'utf-8' and $fromCS === 'utf-8') { + return fix_utf8($text); + } + $result = iconv($fromCS, $toCS.'//TRANSLIT', $text); if ($result === false or $result === '') { From 15dbd27efa84a8557025e5d0ba17be9d9a8c2819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20S=CC=8Ckoda?= Date: Thu, 8 Nov 2012 08:56:11 +0100 Subject: [PATCH 2/2] MDL-36245 add tests for the buggy utf-8 to utf-8 conversion --- lib/tests/textlib_test.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/tests/textlib_test.php b/lib/tests/textlib_test.php index 1760074b0e0b0..e45f17f8ade80 100644 --- a/lib/tests/textlib_test.php +++ b/lib/tests/textlib_test.php @@ -62,29 +62,35 @@ public function test_convert() { $this->assertSame(textlib::convert($iso2, 'iso-8859-2', 'win-1250'), $win); $this->assertSame(textlib::convert($iso2, 'iso-8859-2', 'iso-8859-2'), $iso2); $this->assertSame(textlib::convert($win, 'win-1250', 'cp1250'), $win); + $this->assertSame(textlib::convert($utf8, 'utf-8', 'utf-8'), $utf8); $utf8 = '言語設定'; $str = pack("H*", "b8c0b8ecc0dfc4ea"); //EUC-JP $this->assertSame(textlib::convert($utf8, 'utf-8', 'EUC-JP'), $str); $this->assertSame(textlib::convert($str, 'EUC-JP', 'utf-8'), $utf8); + $this->assertSame(textlib::convert($utf8, 'utf-8', 'utf-8'), $utf8); $str = pack("H*", "1b24423840386c405f446a1b2842"); //ISO-2022-JP $this->assertSame(textlib::convert($utf8, 'utf-8', 'ISO-2022-JP'), $str); $this->assertSame(textlib::convert($str, 'ISO-2022-JP', 'utf-8'), $utf8); + $this->assertSame(textlib::convert($utf8, 'utf-8', 'utf-8'), $utf8); $str = pack("H*", "8cbe8cea90dd92e8"); //SHIFT-JIS $this->assertSame(textlib::convert($utf8, 'utf-8', 'SHIFT-JIS'), $str); $this->assertSame(textlib::convert($str, 'SHIFT-JIS', 'utf-8'), $utf8); + $this->assertSame(textlib::convert($utf8, 'utf-8', 'utf-8'), $utf8); $utf8 = '简体中文'; $str = pack("H*", "bcf2cce5d6d0cec4"); //GB2312 $this->assertSame(textlib::convert($utf8, 'utf-8', 'GB2312'), $str); $this->assertSame(textlib::convert($str, 'GB2312', 'utf-8'), $utf8); + $this->assertSame(textlib::convert($utf8, 'utf-8', 'utf-8'), $utf8); $str = pack("H*", "bcf2cce5d6d0cec4"); //GB18030 $this->assertSame(textlib::convert($utf8, 'utf-8', 'GB18030'), $str); $this->assertSame(textlib::convert($str, 'GB18030', 'utf-8'), $utf8); + $this->assertSame(textlib::convert($utf8, 'utf-8', 'utf-8'), $utf8); } /**