Permalink
Browse files

MDL-36212 rework html entity conversions

This should resolve all html entity conversion problems in different PHP versions.
  • Loading branch information...
1 parent 322af44 commit eb2d6a23aba961f4f3eb39bc43393ee1de62971a @skodak skodak committed Dec 9, 2012
@@ -126,6 +126,7 @@ function &_getMatches($lang, $str) {
}
function _unhtmlentities($string) {
+ return textlib::entities_to_utf8($string); // Moodle hack
$string = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $string);
$string = preg_replace('~&#([0-9]+);~e', 'chr(\\1)', $string);
@@ -7,6 +7,4 @@ List of changes:
* Modified config file to use moodle $CFG.
* Moved static files to /tinymce/ subfolder.
* MDL-25736 - French spellchecker fixes.
-
-Commits:
-https://github.com/moodle/custom-tinymce_spellchecker_php/commits/MOODLE_22_2.0.6b
+* Fix htmlentities conversion in GoogleSpell.php
@@ -293,19 +293,22 @@ public function test_encode_mimeheader() {
* @return void
*/
public function test_entities_to_utf8() {
- $str = "Žluťoučký koníček";
- $this->assertSame(textlib::entities_to_utf8($str), "Žluťoučký koníček");
+ $str = "Žluťoučký koníček©"&<>§«";
+ $this->assertSame("Žluťoučký koníček©\"&<>§«", textlib::entities_to_utf8($str));
}
/**
* Tests the static utf8_to_entities method
* @return void
*/
public function test_utf8_to_entities() {
- $str = "Žluťoučký koníček";
- $this->assertSame(textlib::utf8_to_entities($str), "&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#xed;&#x10d;ek");
- $this->assertSame(textlib::utf8_to_entities($str, true), "&#381;lu&#357;ou&#269;k&#253; kon&#237;&#269;ek");
+ $str = "&#x17d;luťoučký kon&iacute;ček&copy;&quot;&amp;&lt;&gt;&sect;&laquo;";
+ $this->assertSame("&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&iacute;&#x10d;ek&copy;&quot;&amp;&lt;&gt;&sect;&laquo;", textlib::utf8_to_entities($str));
+ $this->assertSame("&#381;lu&#357;ou&#269;k&#253; kon&iacute;&#269;ek&copy;&quot;&amp;&lt;&gt;&sect;&laquo;", textlib::utf8_to_entities($str, true));
+ $str = "&#381;luťoučký kon&iacute;ček&copy;&quot;&amp;&lt;&gt;&sect;&laquo;";
+ $this->assertSame("&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#xed;&#x10d;ek&#xa9;\"&<>&#xa7;&#xab;", textlib::utf8_to_entities($str, false, true));
+ $this->assertSame("&#381;lu&#357;ou&#269;k&#253; kon&#237;&#269;ek&#169;\"&<>&#167;&#171;", textlib::utf8_to_entities($str, true, true));
}
/**
View
@@ -442,6 +442,34 @@ public static function encode_mimeheader($text, $charset='utf-8') {
}
/**
+ * Returns HTML entity transliteration table.
+ * @return array with (html entity => utf-8) elements
+ */
+ protected static function get_entities_table() {
+ static $trans_tbl = null;
+
+ // Generate/create $trans_tbl
+ if (!isset($trans_tbl)) {
+ if (version_compare(phpversion(), '5.3.4') < 0) {
+ $trans_tbl = array();
+ foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
+ $trans_tbl[$key] = textlib::convert($val, 'ISO-8859-1', 'utf-8');
+ }
+
+ } else if (version_compare(phpversion(), '5.4.0') < 0) {
+ $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'UTF-8');
+ $trans_tbl = array_flip($trans_tbl);
+
+ } else {
+ $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT | ENT_HTML401, 'UTF-8');
+ $trans_tbl = array_flip($trans_tbl);
+ }
+ }
+
+ return $trans_tbl;
+ }
+
+ /**
* Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8
* Original from laurynas dot butkus at gmail at:
* http://php.net/manual/en/function.html-entity-decode.php#75153
@@ -450,28 +478,24 @@ public static function encode_mimeheader($text, $charset='utf-8') {
* @param string $str input string
* @param boolean $htmlent convert also html entities (defaults to true)
* @return string encoded UTF-8 string
- *
- * NOTE: we could have used typo3 entities_to_utf8() here
- * but the direct alternative used runs 400% quicker
- * and uses 0.5Mb less memory, so, let's use it
- * (tested against 10^6 conversions)
*/
public static function entities_to_utf8($str, $htmlent=true) {
- static $trans_tbl; // Going to use static transliteration table
+ static $callback1 = null ;
+ static $callback2 = null ;
+
+ if (!$callback1 or !$callback2) {
+ $callback1 = create_function('$matches', 'return textlib::code2utf8(hexdec($matches[1]));');
+ $callback2 = create_function('$matches', 'return textlib::code2utf8($matches[1]);');
+ }
- // Replace numeric entities
- $result = preg_replace('~&#x([0-9a-f]+);~ei', 'textlib::code2utf8(hexdec("\\1"))', $str);
- $result = preg_replace('~&#([0-9]+);~e', 'textlib::code2utf8(\\1)', $result);
+ $result = (string)$str;
+ $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback1, $result);
+ $result = preg_replace_callback('/&#([0-9]+);/', $callback2, $result);
// Replace literal entities (if desired)
if ($htmlent) {
- // Generate/create $trans_tbl
- if (!isset($trans_tbl)) {
- $trans_tbl = array();
- foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
- $trans_tbl[$key] = utf8_encode($val);
- }
- }
+ $trans_tbl = self::get_entities_table();
+ // It should be safe to search for ascii strings and replace them with utf-8 here.
$result = strtr($result, $trans_tbl);
}
// Return utf8-ised string
@@ -487,17 +511,24 @@ public static function entities_to_utf8($str, $htmlent=true) {
* @return string converted string
*/
public static function utf8_to_entities($str, $dec=false, $nonnum=false) {
- // Avoid some notices from Typo3 code
- $oldlevel = error_reporting(E_PARSE);
+ static $callback = null ;
+
if ($nonnum) {
- $str = self::typo3()->entities_to_utf8((string)$str, true);
+ $str = self::entities_to_utf8($str, true);
}
+
+ // Avoid some notices from Typo3 code
+ $oldlevel = error_reporting(E_PARSE);
$result = self::typo3()->utf8_to_entities((string)$str);
+ error_reporting($oldlevel);
+
if ($dec) {
- $result = preg_replace('/&#x([0-9a-f]+);/ie', "'&#'.hexdec('$1').';'", $result);
+ if (!$callback) {
+ $callback = create_function('$matches', 'return \'&#\'.(hexdec($matches[1])).\';\';');
+ }
+ $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback, $result);
}
- // Restore original debug level
- error_reporting($oldlevel);
+
return $result;
}
View
@@ -1384,7 +1384,7 @@ function format_text_email($text, $format) {
case FORMAT_WIKI:
// there should not be any of these any more!
$text = wikify_links($text);
- return strtr(strip_tags($text), array_flip(get_html_translation_table(HTML_ENTITIES)));
+ return textlib::entities_to_utf8(strip_tags($text), true);
break;
case FORMAT_HTML:
@@ -1395,7 +1395,7 @@ function format_text_email($text, $format) {
case FORMAT_MARKDOWN:
default:
$text = wikify_links($text);
- return strtr(strip_tags($text), array_flip(get_html_translation_table(HTML_ENTITIES)));
+ return textlib::entities_to_utf8(strip_tags($text), true);
break;
}
}

0 comments on commit eb2d6a2

Please sign in to comment.