Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

MDL-36212 rework html entity conversions

This should resolve all html entity conversion problems in different PHP versions.
  • Loading branch information...
commit ea0f9a277c1f67f6e4ed8f21487aacf115291651 1 parent 4a76321
@skodak skodak authored
View
1  lib/editor/tinymce/tiny_mce/3.5.1.1/plugins/spellchecker/classes/GoogleSpell.php
@@ -126,6 +126,7 @@ function &_getMatches($lang, $str) {
}
function _unhtmlentities($string) {
+ return textlib::entities_to_utf8($string); // Moodle hack
$string = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $string);
$string = preg_replace('~&#([0-9]+);~e', 'chr(\\1)', $string);
View
13 lib/tests/textlib_test.php
@@ -293,8 +293,8 @@ public function test_encode_mimeheader() {
* @return void
*/
public function test_entities_to_utf8() {
- $str = "Žluťoučký koníček";
- $this->assertSame(textlib::entities_to_utf8($str), "Žluťoučký koníček");
+ $str = "Žluťoučký koníček©"&<>§«";
+ $this->assertSame("Žluťoučký koníček©\"&<>§«", textlib::entities_to_utf8($str));
}
/**
@@ -302,10 +302,13 @@ public function test_entities_to_utf8() {
* @return void
*/
public function test_utf8_to_entities() {
- $str = "Žluťoučký koníček";
- $this->assertSame(textlib::utf8_to_entities($str), "&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#xed;&#x10d;ek");
- $this->assertSame(textlib::utf8_to_entities($str, true), "&#381;lu&#357;ou&#269;k&#253; kon&#237;&#269;ek");
+ $str = "&#x17d;luťoučký kon&iacute;ček&copy;&quot;&amp;&lt;&gt;&sect;&laquo;";
+ $this->assertSame("&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&iacute;&#x10d;ek&copy;&quot;&amp;&lt;&gt;&sect;&laquo;", textlib::utf8_to_entities($str));
+ $this->assertSame("&#381;lu&#357;ou&#269;k&#253; kon&iacute;&#269;ek&copy;&quot;&amp;&lt;&gt;&sect;&laquo;", textlib::utf8_to_entities($str, true));
+ $str = "&#381;luťoučký kon&iacute;ček&copy;&quot;&amp;&lt;&gt;&sect;&laquo;";
+ $this->assertSame("&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#xed;&#x10d;ek&#xa9;\"&<>&#xa7;&#xab;", textlib::utf8_to_entities($str, false, true));
+ $this->assertSame("&#381;lu&#357;ou&#269;k&#253; kon&#237;&#269;ek&#169;\"&<>&#167;&#171;", textlib::utf8_to_entities($str, true, true));
}
/**
View
75 lib/textlib.class.php
@@ -442,6 +442,34 @@ public static function encode_mimeheader($text, $charset='utf-8') {
}
/**
+ * Returns HTML entity transliteration table.
+ * @return array with (html entity => utf-8) elements
+ */
+ protected static function get_entities_table() {
+ static $trans_tbl = null;
+
+ // Generate/create $trans_tbl
+ if (!isset($trans_tbl)) {
+ if (version_compare(phpversion(), '5.3.4') < 0) {
+ $trans_tbl = array();
+ foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
+ $trans_tbl[$key] = textlib::convert($val, 'ISO-8859-1', 'utf-8');
+ }
+
+ } else if (version_compare(phpversion(), '5.4.0') < 0) {
+ $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'UTF-8');
+ $trans_tbl = array_flip($trans_tbl);
+
+ } else {
+ $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT | ENT_HTML401, 'UTF-8');
+ $trans_tbl = array_flip($trans_tbl);
+ }
+ }
+
+ return $trans_tbl;
+ }
+
+ /**
* Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8
* Original from laurynas dot butkus at gmail at:
* http://php.net/manual/en/function.html-entity-decode.php#75153
@@ -450,28 +478,24 @@ public static function encode_mimeheader($text, $charset='utf-8') {
* @param string $str input string
* @param boolean $htmlent convert also html entities (defaults to true)
* @return string encoded UTF-8 string
- *
- * NOTE: we could have used typo3 entities_to_utf8() here
- * but the direct alternative used runs 400% quicker
- * and uses 0.5Mb less memory, so, let's use it
- * (tested against 10^6 conversions)
*/
public static function entities_to_utf8($str, $htmlent=true) {
- static $trans_tbl; // Going to use static transliteration table
+ static $callback1 = null ;
+ static $callback2 = null ;
+
+ if (!$callback1 or !$callback2) {
+ $callback1 = create_function('$matches', 'return textlib::code2utf8(hexdec($matches[1]));');
+ $callback2 = create_function('$matches', 'return textlib::code2utf8($matches[1]);');
+ }
- // Replace numeric entities
- $result = preg_replace('~&#x([0-9a-f]+);~ei', 'textlib::code2utf8(hexdec("\\1"))', $str);
- $result = preg_replace('~&#([0-9]+);~e', 'textlib::code2utf8(\\1)', $result);
+ $result = (string)$str;
+ $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback1, $result);
+ $result = preg_replace_callback('/&#([0-9]+);/', $callback2, $result);
// Replace literal entities (if desired)
if ($htmlent) {
- // Generate/create $trans_tbl
- if (!isset($trans_tbl)) {
- $trans_tbl = array();
- foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
- $trans_tbl[$key] = utf8_encode($val);
- }
- }
+ $trans_tbl = self::get_entities_table();
+ // It should be safe to search for ascii strings and replace them with utf-8 here.
$result = strtr($result, $trans_tbl);
}
// Return utf8-ised string
@@ -487,17 +511,24 @@ public static function entities_to_utf8($str, $htmlent=true) {
* @return string converted string
*/
public static function utf8_to_entities($str, $dec=false, $nonnum=false) {
- // Avoid some notices from Typo3 code
- $oldlevel = error_reporting(E_PARSE);
+ static $callback = null ;
+
if ($nonnum) {
- $str = self::typo3()->entities_to_utf8((string)$str, true);
+ $str = self::entities_to_utf8($str, true);
}
+
+ // Avoid some notices from Typo3 code
+ $oldlevel = error_reporting(E_PARSE);
$result = self::typo3()->utf8_to_entities((string)$str);
+ error_reporting($oldlevel);
+
if ($dec) {
- $result = preg_replace('/&#x([0-9a-f]+);/ie', "'&#'.hexdec('$1').';'", $result);
+ if (!$callback) {
+ $callback = create_function('$matches', 'return \'&#\'.(hexdec($matches[1])).\';\';');
+ }
+ $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback, $result);
}
- // Restore original debug level
- error_reporting($oldlevel);
+
return $result;
}
View
4 lib/weblib.php
@@ -1382,7 +1382,7 @@ function format_text_email($text, $format) {
case FORMAT_WIKI:
// there should not be any of these any more!
$text = wikify_links($text);
- return strtr(strip_tags($text), array_flip(get_html_translation_table(HTML_ENTITIES)));
+ return textlib::entities_to_utf8(strip_tags($text), true);
break;
case FORMAT_HTML:
@@ -1393,7 +1393,7 @@ function format_text_email($text, $format) {
case FORMAT_MARKDOWN:
default:
$text = wikify_links($text);
- return strtr(strip_tags($text), array_flip(get_html_translation_table(HTML_ENTITIES)));
+ return textlib::entities_to_utf8(strip_tags($text), true);
break;
}
}
Please sign in to comment.
Something went wrong with that request. Please try again.