Permalink
Browse files

MDL-31142 fix textlib::substr() performance, invalid length in iconv_…

…substr; use mbstring by default
  • Loading branch information...
1 parent f70f6e9 commit 76917f764fd0d985b0215e8ed62a2f339ec41693 @skodak skodak committed Jan 14, 2012
Showing with 46 additions and 11 deletions.
  1. +2 −0 lib/simpletest/testtextlib.php
  2. +44 −11 lib/textlib.class.php
@@ -83,6 +83,8 @@ public function test_convert() {
public function test_substr() {
$str = "Žluťoučký koníček";
+ $this->assertIdentical(textlib::substr($str, 0), $str);
+ $this->assertIdentical(textlib::substr($str, 1), 'luťoučký koníček');
$this->assertIdentical(textlib::substr($str, 1, 3), 'luť');
$this->assertIdentical(textlib::substr($str, 0, 100), $str);
$this->assertIdentical(textlib::substr($str, -3, 2), 'če');
View
@@ -182,7 +182,7 @@ public static function convert($text, $fromCS, $toCS='utf-8') {
}
/**
- * Multibyte safe substr() function, uses iconv for utf-8, falls back to typo3.
+ * Multibyte safe substr() function, uses mbstring or iconv for UTF-8, falls back to typo3.
*
* @param string $text
* @param int $start negative value means from end
@@ -194,18 +194,39 @@ public static function substr($text, $start, $len=null, $charset='utf-8') {
$charset = self::parse_charset($charset);
if ($charset === 'utf-8') {
- return iconv_substr($text, $start, $len, $charset);
+ if (function_exists('mb_substr')) {
+ // this is much faster than iconv - see MDL-31142
+ if ($len === null) {
+ $oldcharset = mb_internal_encoding();
+ mb_internal_encoding('UTF-8');
+ $result = mb_substr($text, $start);
+ mb_internal_encoding($oldcharset);
+ return $result;
+ } else {
+ return mb_substr($text, $start, $len, 'UTF-8');
+ }
+
+ } else {
+ if ($len === null) {
+ $len = iconv_strlen($text, 'UTF-8');
+ }
+ return iconv_substr($text, $start, $len, 'UTF-8');
+ }
}
$oldlevel = error_reporting(E_PARSE);
- $result = self::typo3()->substr($charset, $text, $start, $len);
+ if ($len === null) {
+ $result = self::typo3()->substr($charset, $text, $start);
+ } else {
+ $result = self::typo3()->substr($charset, $text, $start, $len);
+ }
error_reporting($oldlevel);
return $result;
}
/**
- * Multibyte safe strlen() function, uses iconv for utf-8, falls back to typo3.
+ * Multibyte safe strlen() function, uses mbstring or iconv for UTF-8, falls back to typo3.
*
* @param string $text
* @param string $charset encoding of the text
@@ -215,7 +236,11 @@ public static function strlen($text, $charset='utf-8') {
$charset = self::parse_charset($charset);
if ($charset === 'utf-8') {
- return iconv_strlen($text, $charset);
+ if (function_exists('mb_strlen')) {
+ return mb_strlen($text, 'UTF-8');
+ } else {
+ return iconv_strlen($text, 'UTF-8');
+ }
}
$oldlevel = error_reporting(E_PARSE);
@@ -236,7 +261,7 @@ public static function strtolower($text, $charset='utf-8') {
$charset = self::parse_charset($charset);
if ($charset === 'utf-8' and function_exists('mb_strtolower')) {
- return mb_strtolower($text, $charset);
+ return mb_strtolower($text, 'UTF-8');
}
$oldlevel = error_reporting(E_PARSE);
@@ -257,7 +282,7 @@ public static function strtoupper($text, $charset='utf-8') {
$charset = self::parse_charset($charset);
if ($charset === 'utf-8' and function_exists('mb_strtoupper')) {
- return mb_strtoupper($text, $charset);
+ return mb_strtoupper($text, 'UTF-8');
}
$oldlevel = error_reporting(E_PARSE);
@@ -268,26 +293,34 @@ public static function strtoupper($text, $charset='utf-8') {
}
/**
- * UTF-8 ONLY safe strpos(), uses iconv..
+ * UTF-8 ONLY safe strpos(), uses mbstring, falls back to iconv.
*
* @param string $haystack
* @param string $needle
* @param int $offset
* @return string
*/
public static function strpos($haystack, $needle, $offset=0) {
- return iconv_strpos($haystack, $needle, $offset, 'utf-8');
+ if (function_exists('mb_strpos')) {
+ return mb_strpos($haystack, $needle, $offset, 'UTF-8');
+ } else {
+ return iconv_strpos($haystack, $needle, $offset, 'UTF-8');
+ }
}
/**
- * UTF-8 ONLY safe strrpos(), uses iconv.
+ * UTF-8 ONLY safe strrpos(), uses mbstring, falls back to iconv.
*
* @param string $haystack
* @param string $needle
* @return string
*/
public static function strrpos($haystack, $needle) {
- return iconv_strrpos($haystack, $needle, 'utf-8');
+ if (function_exists('mb_strpos')) {
+ return mb_strrpos($haystack, $needle, null, 'UTF-8');
+ } else {
+ return iconv_strrpos($haystack, $needle, 'UTF-8');
+ }
}
/**

0 comments on commit 76917f7

Please sign in to comment.