Permalink
Browse files

Merge branch 'w51_MDL-36212_m23_entities' of git://github.com/skodak/…

…moodle into MOODLE_23_STABLE
  • Loading branch information...
2 parents 52817fd + ea0f9a2 commit 178a5b4102cba92eb3969b0fd2f102e75646a286 Sam Hemelryk committed Jan 7, 2013
@@ -128,6 +128,7 @@ function &_getMatches($lang, $str) {
}
function _unhtmlentities($string) {
+ return textlib::entities_to_utf8($string); // Moodle hack
$string = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $string);
$string = preg_replace('~&#([0-9]+);~e', 'chr(\\1)', $string);
View
@@ -293,19 +293,22 @@ public function test_encode_mimeheader() {
* @return void
*/
public function test_entities_to_utf8() {
- $str = "Žluťoučký koníček";
- $this->assertSame(textlib::entities_to_utf8($str), "Žluťoučký koníček");
+ $str = "Žluťoučký koníček©"&<>§«";
+ $this->assertSame("Žluťoučký koníček©\"&<>§«", textlib::entities_to_utf8($str));
}
/**
* Tests the static utf8_to_entities method
* @return void
*/
public function test_utf8_to_entities() {
- $str = "Žluťoučký koníček";
- $this->assertSame(textlib::utf8_to_entities($str), "&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#xed;&#x10d;ek");
- $this->assertSame(textlib::utf8_to_entities($str, true), "&#381;lu&#357;ou&#269;k&#253; kon&#237;&#269;ek");
+ $str = "&#x17d;luťoučký kon&iacute;ček&copy;&quot;&amp;&lt;&gt;&sect;&laquo;";
+ $this->assertSame("&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&iacute;&#x10d;ek&copy;&quot;&amp;&lt;&gt;&sect;&laquo;", textlib::utf8_to_entities($str));
+ $this->assertSame("&#381;lu&#357;ou&#269;k&#253; kon&iacute;&#269;ek&copy;&quot;&amp;&lt;&gt;&sect;&laquo;", textlib::utf8_to_entities($str, true));
+ $str = "&#381;luťoučký kon&iacute;ček&copy;&quot;&amp;&lt;&gt;&sect;&laquo;";
+ $this->assertSame("&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#xed;&#x10d;ek&#xa9;\"&<>&#xa7;&#xab;", textlib::utf8_to_entities($str, false, true));
+ $this->assertSame("&#381;lu&#357;ou&#269;k&#253; kon&#237;&#269;ek&#169;\"&<>&#167;&#171;", textlib::utf8_to_entities($str, true, true));
}
/**
View
@@ -442,6 +442,34 @@ public static function encode_mimeheader($text, $charset='utf-8') {
}
/**
+ * Returns HTML entity transliteration table.
+ * @return array with (html entity => utf-8) elements
+ */
+ protected static function get_entities_table() {
+ static $trans_tbl = null;
+
+ // Generate/create $trans_tbl
+ if (!isset($trans_tbl)) {
+ if (version_compare(phpversion(), '5.3.4') < 0) {
+ $trans_tbl = array();
+ foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
+ $trans_tbl[$key] = textlib::convert($val, 'ISO-8859-1', 'utf-8');
+ }
+
+ } else if (version_compare(phpversion(), '5.4.0') < 0) {
+ $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'UTF-8');
+ $trans_tbl = array_flip($trans_tbl);
+
+ } else {
+ $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT | ENT_HTML401, 'UTF-8');
+ $trans_tbl = array_flip($trans_tbl);
+ }
+ }
+
+ return $trans_tbl;
+ }
+
+ /**
* Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8
* Original from laurynas dot butkus at gmail at:
* http://php.net/manual/en/function.html-entity-decode.php#75153
@@ -450,28 +478,24 @@ public static function encode_mimeheader($text, $charset='utf-8') {
* @param string $str input string
* @param boolean $htmlent convert also html entities (defaults to true)
* @return string encoded UTF-8 string
- *
- * NOTE: we could have used typo3 entities_to_utf8() here
- * but the direct alternative used runs 400% quicker
- * and uses 0.5Mb less memory, so, let's use it
- * (tested against 10^6 conversions)
*/
public static function entities_to_utf8($str, $htmlent=true) {
- static $trans_tbl; // Going to use static transliteration table
+ static $callback1 = null ;
+ static $callback2 = null ;
+
+ if (!$callback1 or !$callback2) {
+ $callback1 = create_function('$matches', 'return textlib::code2utf8(hexdec($matches[1]));');
+ $callback2 = create_function('$matches', 'return textlib::code2utf8($matches[1]);');
+ }
- // Replace numeric entities
- $result = preg_replace('~&#x([0-9a-f]+);~ei', 'textlib::code2utf8(hexdec("\\1"))', $str);
- $result = preg_replace('~&#([0-9]+);~e', 'textlib::code2utf8(\\1)', $result);
+ $result = (string)$str;
+ $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback1, $result);
+ $result = preg_replace_callback('/&#([0-9]+);/', $callback2, $result);
// Replace literal entities (if desired)
if ($htmlent) {
- // Generate/create $trans_tbl
- if (!isset($trans_tbl)) {
- $trans_tbl = array();
- foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
- $trans_tbl[$key] = utf8_encode($val);
- }
- }
+ $trans_tbl = self::get_entities_table();
+ // It should be safe to search for ascii strings and replace them with utf-8 here.
$result = strtr($result, $trans_tbl);
}
// Return utf8-ised string
@@ -487,17 +511,24 @@ public static function entities_to_utf8($str, $htmlent=true) {
* @return string converted string
*/
public static function utf8_to_entities($str, $dec=false, $nonnum=false) {
- // Avoid some notices from Typo3 code
- $oldlevel = error_reporting(E_PARSE);
+ static $callback = null ;
+
if ($nonnum) {
- $str = self::typo3()->entities_to_utf8((string)$str, true);
+ $str = self::entities_to_utf8($str, true);
}
+
+ // Avoid some notices from Typo3 code
+ $oldlevel = error_reporting(E_PARSE);
$result = self::typo3()->utf8_to_entities((string)$str);
+ error_reporting($oldlevel);
+
if ($dec) {
- $result = preg_replace('/&#x([0-9a-f]+);/ie', "'&#'.hexdec('$1').';'", $result);
+ if (!$callback) {
+ $callback = create_function('$matches', 'return \'&#\'.(hexdec($matches[1])).\';\';');
+ }
+ $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback, $result);
}
- // Restore original debug level
- error_reporting($oldlevel);
+
return $result;
}
View
@@ -1382,7 +1382,7 @@ function format_text_email($text, $format) {
case FORMAT_WIKI:
// there should not be any of these any more!
$text = wikify_links($text);
- return strtr(strip_tags($text), array_flip(get_html_translation_table(HTML_ENTITIES)));
+ return textlib::entities_to_utf8(strip_tags($text), true);
break;
case FORMAT_HTML:
@@ -1393,7 +1393,7 @@ function format_text_email($text, $format) {
case FORMAT_MARKDOWN:
default:
$text = wikify_links($text);
- return strtr(strip_tags($text), array_flip(get_html_translation_table(HTML_ENTITIES)));
+ return textlib::entities_to_utf8(strip_tags($text), true);
break;
}
}

0 comments on commit 178a5b4

Please sign in to comment.