Skip to content
Browse files

intl string wrapper and some small other changes

  • Loading branch information...
1 parent 49d8ec4 commit 29e0da2a9b623bb55a81bd72749b6df29d0f7c3c @marc-mabe committed
View
7 library/Zend/Stdlib/Exception/ExtensionNotLoadedException.php
@@ -0,0 +1,7 @@
+<?php
+
+namespace Zend\Stdlib\Exception;
+
+class ExtensionNotLoadedException extends RuntimeException
+{
+}
View
7 library/Zend/Stdlib/Exception/RuntimeException.php
@@ -0,0 +1,7 @@
+<?php
+
+namespace Zend\Stdlib\Exception;
+
+class RuntimeException extends \RuntimeException implements ExceptionInterface
+{
+}
View
27 library/Zend/Stdlib/StringUtils.php
@@ -7,6 +7,7 @@
Zend\Stdlib\StringWrapper\StringWrapperInterface,
Zend\Stdlib\StringWrapper\MbString as MbStringWrapper,
Zend\Stdlib\StringWrapper\Iconv as IconvWrapper,
+ Zend\Stdlib\StringWrapper\Intl as IntlWrapper,
Zend\Stdlib\StringWrapper\Native as NativeWrapper;
class StringUtils
@@ -33,6 +34,10 @@ public static function getRegisteredWrappers()
if (static::$wrapperRegistry === null) {
static::$wrapperRegistry = array();
+ if (extension_loaded('intl')) {
+ static::$wrapperRegistry[] = new IntlWrapper();
+ }
+
if (extension_loaded('mbstring')) {
static::$wrapperRegistry[] = new MbStringWrapper();
}
@@ -64,17 +69,33 @@ public static function unregisterWrapper(StringWrapperInterface $wrapper)
public static function getWrapper($charset = 'UTF-8')
{
+ $charsets = func_get_args();
+
foreach (static::getRegisteredWrappers() as $wrapper) {
- if ($wrapper->isCharsetSupported($charset)) {
- return $wrapper;
+ foreach ($charsets as $charset) {
+ if (!$wrapper->isCharsetSupported($charset)) {
+ continue 2;
+ }
}
+
+ return $wrapper;
}
- throw new Exception\RuntimeException("No wrapper found for charset '{$charset}'");
+ throw new Exception\RuntimeException('No wrapper found supporting charset(s) ' . implode(', ', $charsets));
+ }
+
+ public static function getSingleByteCharsets()
+ {
+ return static::$singleByteCharsets;
}
public static function isSingleByteCharset($charset)
{
return in_array(strtoupper($charset), static::$singleByteCharsets);
}
+
+ public static function isValidUtf8($string)
+ {
+ return ($string === '' || preg_match('/^./su', $string) == 1);
+ }
}
View
6 library/Zend/Stdlib/StringWrapper/AbstractStringWrapper.php
@@ -10,17 +10,17 @@
*
* @var string[]
*/
- protected static $charsets = array();
+ protected $charsets = array();
public function isCharsetSupported($charset)
{
$charset = strtoupper($charset);
- return in_array($charset, static::$charsets);
+ return in_array($charset, $this->charsets);
}
public function getSupportedCharsets()
{
- return static::$charsets;
+ return $this->$charsets;
}
/**
View
8 library/Zend/Stdlib/StringWrapper/Iconv.php
@@ -11,7 +11,11 @@ class Iconv extends AbstractStringWrapper
* @var string[]
* @link http://php.net/manual/mbstring.supported-encodings.php
*/
- protected static $charsets = array(
+ protected $charsets = array(
+ 'ASCII', '7BIT', '8BIT',
+ 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', 'ISO-8859-5',
+ 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', 'ISO-8859-10',
+ 'ISO-8859-11', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
'UTF-8', // TODO
);
@@ -24,7 +28,7 @@ public function __construct()
{
if (!extension_loaded('iconv')) {
throw new Exception\ExtensionNotLoadedException(
- 'PHP extension "iconv" is required for this adapter'
+ 'PHP extension "iconv" is required for this wrapper'
);
}
}
View
68 library/Zend/Stdlib/StringWrapper/Intl.php
@@ -0,0 +1,68 @@
+<?php
+
+namespace Zend\Stdlib\StringWrapper;
+
+class Intl extends AbstractStringWrapper
+{
+
+ /**
+ * List of supported character sets (upper case)
+ *
+ * @var string[]
+ */
+ protected $charsets = array('UTF-8');
+
+ /**
+ * Constructor
+ *
+ * @throws Exception\ExtensionNotLoadedException
+ */
+ public function __construct()
+ {
+ if (!extension_loaded('intl')) {
+ throw new Exception\ExtensionNotLoadedException(
+ 'PHP extension "intl" is required for this wrapper'
+ );
+ }
+ }
+
+ public function strlen($str, $charset = 'UTF-8')
+ {
+ if (strcasecmp($charset, 'UTF-8') != 0) {
+ trigger_error("Character set '{$charset}' not supported by intl");
+ return false;
+ }
+
+ return grapheme_strlen($str);
+ }
+
+ public function substr($str, $offset = 0, $length = null, $charset = 'UTF-8')
+ {
+ if (strcasecmp($charset, 'UTF-8') != 0) {
+ trigger_error("Character set '{$charset}' not supported by intl");
+ return false;
+ }
+
+ return grapheme_substr($str, $offset, $length);
+ }
+
+ public function strpos($haystack, $needle, $offset = 0, $charset = 'UTF-8')
+ {
+ if (strcasecmp($charset, 'UTF-8') != 0) {
+ trigger_error("Character set '{$charset}' not supported by intl");
+ return false;
+ }
+
+ return grapheme_strpos($haystack, $needle, $offset);
+ }
+
+ public function convert($str, $toCharset, $fromCharset = 'UTF-8')
+ {
+ if (strcasecmp($toCharset, $fromCharset) != 0) {
+ trigger_error("Can't convert '{$fromCharset}' to '{$toCharset}' using intl", E_WARNING);
+ return false;
+ }
+
+ return true;
+ }
+}
View
8 library/Zend/Stdlib/StringWrapper/MbString.php
@@ -11,7 +11,11 @@ class MbString extends AbstractStringWrapper
* @var string[]
* @link http://php.net/manual/mbstring.supported-encodings.php
*/
- protected static $charsets = array(
+ protected $charsets = array(
+ 'ASCII', '7BIT', '8BIT',
+ 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', 'ISO-8859-5',
+ 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', 'ISO-8859-10',
+ 'ISO-8859-11', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
'UCS-4', 'UCS-4BE', 'UCS-4LE',
'UCS-2', 'UCS-2BE', 'UCS-2LE',
'UTF-8', // TODO
@@ -26,7 +30,7 @@ public function __construct()
{
if (!extension_loaded('mbstring')) {
throw new Exception\ExtensionNotLoadedException(
- 'PHP extension "mbstring" is required for this adapter'
+ 'PHP extension "mbstring" is required for this wrapper'
);
}
}
View
141 library/Zend/Stdlib/StringWrapper/Native.php
@@ -6,155 +6,28 @@
class Native extends AbstractStringWrapper
{
-
- /**
- * List of supported character sets (upper case)
- *
- * @var string[]
- * @link http://php.net/manual/mbstring.supported-encodings.php
- */
- protected static $charsets = array(
- 'ASCII',
- 'UTF-7', 'UTF-8', 'UTF-16', 'UTF-32',
- 'UCS-2', 'UCS-2BE', 'UCS-2LE',
- 'UCS-4', 'UCS-4BE', 'UCS-4LE',
- );
+ public function __construct()
+ {
+ $this->charsets = StringUtils::getSingleByteCharsets();
+ }
public function strlen($str, $charset = 'UTF-8')
{
- if (StringUtils::isSingleByteCharset($charset)) {
- return strlen($str);
- }
-
- $charset = strtoupper($charset);
- if ($charset == 'UTF-8') {
- // replace multibyte characters with 1 byte and count bytes
- return strlen(preg_replace('/('
- . '[\xc0-\xdf][\x80-\xbf]' // 2 bytes (110xxxxx 10xxxxxx)
- . '|[\xe0-\xef][\x80-\xbf]{2}' // 3 bytes (1110xxxx [10xxxxxx, ...])
- . '|[\xf0-\xf7][\x80-\xbf]{3}' // 4 bytes (11110xxx [10xxxxxx, ...])
- . '|[\xf8-\xfb][\x80-\xbf]{4}' // 5 bytes (111110xx [10xxxxxx, ...])
- . '|[\xfd-\xfe][\x80-\xbf]{5}' // 6 bytes (1111110x [10xxxxxx, ...])
- . '|\xfe[\x80-\xbf]{6}' // 7 bytes (11111110 [10xxxxxx, ...])
- . ')/s', ' ', $str));
- } elseif ($charset == 'UTF-7') {
- // TODO
- } elseif ($charset == 'UTF-16' || $charset == 'UCS-2' || $charset == 'UCS-2BE' || $charset == 'UCS-2LE') {
- return ceil(strlen($str) / 2);
- } elseif ($charset == 'UTF-32' || $charset == 'UCS-4' || $charset == 'UCS-4BE' || $charset == 'UCS-4LE') {
- return ceil(strlen($str) / 4);
- }
-
- return false;
+ return strlen($str);
}
public function substr($str, $offset = 0, $length = null, $charset = 'UTF-8')
{
- if (StringUtils::isSingleByteCharset($charset)) {
- return substr($str, $offset, $length);
- }
-
- $charset = strtoupper($charset);
- if ($charset == 'UTF-8') {
- // TODO
- } elseif ($charset == 'UTF-7') {
- // TODO
- } elseif ($charset == 'UTF-16' || $charset == 'UCS-2') {
- return substr($str, $offset * 2, $length * 2);
- } elseif ($charset == 'UTF-32' || $charset == 'UCS-4') {
- return substr($str, $offset * 4, $length * 4);
- }
-
- return false;
+ return substr($str, $offset, $length);
}
public function strpos($haystack, $needle, $offset = 0, $charset = 'UTF-8')
{
- if (StringUtils::isSingleByteCharset($charset)) {
- return strpos($haystack, $needle, $offset);
- }
-
- $charset = strtoupper($charset);
- if ($charset == 'UTF-8') {
- // TODO
- } elseif ($charset == 'UTF-7') {
- // TODO
- } elseif ($charset == 'UTF-16' || $charset == 'UCS-2') {
- // TODO
- } elseif ($charset == 'UTF-32' || $charset == 'UCS-4') {
- // TODO
- }
-
- return false;
+ return strpos($haystack, $needle, $offset);
}
public function convert($str, $toCharset, $fromCharset = 'UTF-8')
{
- $fromName = str_replace('-', '', strtolower($fromCharset));
- $toName = str_replace('-', '', strtolower($toCharset));
- $method = 'convert' . $fromName . 'To' . $toName;
-
- if (method_exists($this, $method)) {
- return $this->$method($str);
- }
-
return false;
}
-
- public function convertAsciiToUtf8($str)
- {
- return $str;
- }
-
- public function convertAsciiToUtf16($str)
- {
- return preg_replace_callback("/./", function ($char) {
- return "\0" . $char;
- }, $str);
- }
-
- public function convertAsciiToUcs2($str)
- {
- return $this->convertAsciiToUtf16($str);
- }
-
- public function convertAsciiToUtf32($str)
- {
- return preg_replace_callback("/./", function ($char) {
- return "\0\0\0" . $char;
- }, $str);
- }
-
- public function convertAsciiToUcs4($str)
- {
- return $this->convertAsciiToUtf32($str);
- }
-
- public function convertUtf8ToAscii($str)
- {
- // TODO
- return $str;
- }
-
- public function convertUtf8ToUtf16($str)
- {
- // TODO
- return $str;
- }
-
- public function convertUtf8ToUcs2($str)
- {
- return $this->convertUtf8ToUtf16($str);
- }
-
- public function convertUtf8ToUtf32($str)
- {
- // TODO
- return $str;
- }
-
- public function convertUtf8ToUcs4($str)
- {
- return $this->convertUtf8ToUtf32($str);
- }
}
View
35 tests/Zend/Stdlib/StringUtilsTest.php
@@ -83,8 +83,7 @@ public function testIsSingleByteCharsetReturnsFalse($charset)
public function testGetWrapper()
{
- $wrapper = StringUtils::getWrapper('UTF-8');
-
+ $wrapper = StringUtils::getWrapper('ISO-8859-1');
if (extension_loaded('mbstring')) {
$this->assertInstanceOf('Zend\Stdlib\StringWrapper\MbString', $wrapper);
} elseif (extension_loaded('iconv')) {
@@ -92,5 +91,37 @@ public function testGetWrapper()
} else {
$this->assertInstanceOf('Zend\Stdlib\StringWrapper\Native', $wrapper);
}
+
+ try {
+ $wrapper = StringUtils::getWrapper('UTF-8');
+ if (extension_loaded('intl')) {
+ $this->assertInstanceOf('Zend\Stdlib\StringWrapper\Intl', $wrapper);
+ } elseif (extension_loaded('mbstring')) {
+ $this->assertInstanceOf('Zend\Stdlib\StringWrapper\MbString', $wrapper);
+ } elseif (extension_loaded('iconv')) {
+ $this->assertInstanceOf('Zend\Stdlib\StringWrapper\Iconv', $wrapper);
+ }
+ } catch (Exception $e) {
+ if (extension_loaded('intl')
+ || extension_loaded('mbstring')
+ || extension_loaded('iconv')
+ ) {
+ $this->fail("Failed to get intl, mbstring or iconv wrapper for UTF-8");
+ }
+ }
+
+ try {
+ $wrapper = StringUtils::getWrapper('UTF-8', 'ISO-8859-1');
+ if (extension_loaded('mbstring')) {
+ $this->assertInstanceOf('Zend\Stdlib\StringWrapper\MbString', $wrapper);
+ } elseif (extension_loaded('iconv')) {
+ $this->assertInstanceOf('Zend\Stdlib\StringWrapper\Iconv', $wrapper);
+ }
+ } catch (Exception $e) {
+ if (extension_loaded('mbstring') || extension_loaded('iconv')
+ ) {
+ $this->fail("Failed to get mbstring or iconv wrapper for UTF-8 and ISO-8859-1");
+ }
+ }
}
}

0 comments on commit 29e0da2

Please sign in to comment.
Something went wrong with that request. Please try again.