Permalink
Browse files

initial StringUtils

  • Loading branch information...
1 parent 8f943ea commit cd09a59af20dcd30c26d83c3d55113004e5bfbb7 @marc-mabe committed Jun 15, 2012
View
138 library/Zend/Stdlib/StringAdapter/AbstractStringAdapter.php
@@ -0,0 +1,138 @@
+<?php
+
+namespace Zend\Stdlib\StringAdapter;
+
+abstract class AbstractStringAdapter implements StringAdapterInterface
+{
+
+ /**
+ * Word wrap
+ *
+ * @param string $string
+ * @param integer $width
+ * @param string $break
+ * @param boolean $cut
+ * @param string $charset
+ * @return string
+ */
+ public function wordWrap($string, $width = 75, $break = "\n", $cut = false, $charset = 'UTF-8')
+ {
+ $string = (string) $string;
+ if ($string === '') {
+ return '';
+ }
+
+ $break = (string) $break;
+ if ($break === '') {
+ throw new Exception\InvalidArgumentException('Break string cannot be empty');
+ }
+
+ $width = (int) $width;
+ $cut = (bool) $cut;
+ if ($width === 0 && $cut) {
+ throw new Exception\InvalidArgumentException('Cannot force cut when width is zero');
+ }
+
+ $charset = strtoupper($charset);
+ $stringWidth = $this->strlen($string, $charset);
+ $breakWidth = $this->strlen($break, $charset);
+
+ $result = '';
+ $lastStart = $lastSpace = 0;
+
+ for ($current = 0; $current < $stringWidth; $current++) {
+ $char = $this->substr($string, $current, 1, $charset);
+
+ $possibleBreak = $char;
+ if ($breakWidth !== 1) {
+ $possibleBreak = $this->substr($string, $current, $breakWidth, $charset);
+ }
+
+ if ($possibleBreak === $break) {
+ $result .= $this->substr($string, $lastStart, $current - $lastStart + $breakWidth, $charset);
+ $current += $breakWidth - 1;
+ $lastStart = $lastSpace = $current + 1;
+ continue;
+ }
+
+ if ($char === ' ') {
+ if ($current - $lastStart >= $width) {
+ $result .= $this->substr($string, $lastStart, $current - $lastStart, $charset) . $break;
+ $lastStart = $current + 1;
+ }
+
+ $lastSpace = $current;
+ continue;
+ }
+
+ if ($current - $lastStart >= $width && $cut && $lastStart >= $lastSpace) {
+ $result .= $this->substr($string, $lastStart, $current - $lastStart, $charset) . $break;
+ $lastStart = $lastSpace = $current;
+ continue;
+ }
+
+ if ($current - $lastStart >= $width && $lastStart < $lastSpace) {
+ $result .= $this->substr($string, $lastStart, $lastSpace - $lastStart, $charset) . $break;
+ $lastStart = $lastSpace = $lastSpace + 1;
+ continue;
+ }
+ }
+
+ if ($lastStart !== $current) {
+ $result .= $this->substr($string, $lastStart, $current - $lastStart, $charset);
+ }
+
+ return $result;
+ }
+
+ /**
+ * String padding
+ *
+ * @param string $input
+ * @param integer $padLength
+ * @param string $padString
+ * @param integer $padType
+ * @param string $charset
+ * @return string
+ */
+ public function strPad($input, $padLength, $padString = ' ', $padType = \STR_PAD_RIGHT, $charset = 'UTF-8')
+ {
+ $charset = strtoupper($charset);
+ $return = '';
+ $lengthOfPadding = $padLength - $this->strlen($input, $charset);
+ $padStringLength = $this->strlen($padString, $charset);
+
+ if ($padStringLength === 0 || $lengthOfPadding <= 0) {
+ $return = $input;
+ } else {
+ $repeatCount = floor($lengthOfPadding / $padStringLength);
+
+ if ($padType === \STR_PAD_BOTH) {
+ $lastStringLeft = '';
+ $lastStringRight = '';
+ $repeatCountLeft = $repeatCountRight = ($repeatCount - $repeatCount % 2) / 2;
+
+ $lastStringLength = $lengthOfPadding - 2 * $repeatCountLeft * $padStringLength;
+ $lastStringLeftLength = $lastStringRightLength = floor($lastStringLength / 2);
+ $lastStringRightLength += $lastStringLength % 2;
+
+ $lastStringLeft = $this->substr($padString, 0, $lastStringLeftLength, $charset);
+ $lastStringRight = $this->substr($padString, 0, $lastStringRightLength, $charset);
+
+ $return = str_repeat($padString, $repeatCountLeft) . $lastStringLeft
+ . $input
+ . str_repeat($padString, $repeatCountRight) . $lastStringRight;
+ } else {
+ $lastString = $this->substr($padString, 0, $lengthOfPadding % $padStringLength, $charset);
+
+ if ($padType === \STR_PAD_LEFT) {
+ $return = str_repeat($padString, $repeatCount) . $lastString . $input;
+ } else {
+ $return = $input . str_repeat($padString, $repeatCount) . $lastString;
+ }
+ }
+ }
+
+ return $return;
+ }
+}
View
52 library/Zend/Stdlib/StringAdapter/Iconv.php
@@ -0,0 +1,52 @@
+<?php
+
+namespace Zend\Stdlib\StringAdapter;
+
+class Iconv extends AbstractStringAdapter
+{
+
+ /**
+ * List of supported character sets (upper case)
+ *
+ * @var string[]
+ * @link http://php.net/manual/mbstring.supported-encodings.php
+ */
+ protected static $charsets = array(
+ 'UTF-8', // TODO
+ );
+
+ public function __construct()
+ {
+ if (!extension_loaded('iconv')) {
+ throw new Exception\ExtensionNotLoadedException(
+ 'PHP extension "iconv" is required for this adapter'
+ );
+ }
+ }
+
+ public function isCharsetSupported($charset)
+ {
+ $charset = strtoupper($charset);
+ return in_array($charset, static::$charsets);
+ }
+
+ public function strlen($str, $charset = 'UTF-8')
+ {
+ return iconv_strlen($str, $charset);
+ }
+
+ public function substr($str, $offset = 0, $length = null, $charset = 'UTF-8')
+ {
+ return iconv_substr($str, $offset, $length, $charset);
+ }
+
+ public function strpos($haystack, $needle, $offset = 0, $charset = 'UTF-8')
+ {
+ return iconv_strpos($haystack, $needle, $offset, $charset);
+ }
+
+ public function convert($str, $toCharset, $fromCharset = 'UTF-8')
+ {
+ return iconv($fromCharset, $toCharset, $str);
+ }
+}
View
54 library/Zend/Stdlib/StringAdapter/MbString.php
@@ -0,0 +1,54 @@
+<?php
+
+namespace Zend\Stdlib\StringAdapter;
+
+class MbString extends AbstractStringAdapter
+{
+
+ /**
+ * List of supported character sets (upper case)
+ *
+ * @var string[]
+ * @link http://php.net/manual/mbstring.supported-encodings.php
+ */
+ protected static $charsets = array(
+ 'UCS-4', 'UCS-4BE', 'UCS-4LE',
+ 'UCS-2', 'UCS-2BE', 'UCS-2LE',
+ 'UTF-8', // TODO
+ );
+
+ public function __construct()
+ {
+ if (!extension_loaded('mbstring')) {
+ throw new Exception\ExtensionNotLoadedException(
+ 'PHP extension "mbstring" is required for this adapter'
+ );
+ }
+ }
+
+ public function isCharsetSupported($charset)
+ {
+ $charset = strtoupper($charset);
+ return in_array($charset, static::$charsets);
+ }
+
+ public function strlen($str, $charset = 'UTF-8')
+ {
+ return mb_strlen($str, $charset);
+ }
+
+ public function substr($str, $offset = 0, $length = null, $charset = 'UTF-8')
+ {
+ return mb_substr($str, $offset, $length, $charset);
+ }
+
+ public function strpos($haystack, $needle, $offset = 0, $charset = 'UTF-8')
+ {
+ return mb_strpos($haystack, $needle, $offset, $charset);
+ }
+
+ public function convert($str, $toCharset, $fromCharset = 'UTF-8')
+ {
+ return mb_convert_encoding($str, $toCharset, $fromCharset);
+ }
+}
View
175 library/Zend/Stdlib/StringAdapter/Native.php
@@ -0,0 +1,175 @@
+<?php
+
+namespace Zend\Stdlib\StringAdapter;
+
+use Zend\Stdlib\StringUtils;
+
+class Native extends AbstractStringAdapter
+{
+
+ /**
+ * List of supported character sets (upper case)
+ *
+ * @var string[]
+ * @link http://php.net/manual/mbstring.supported-encodings.php
+ */
+ protected static $charsets = array(
+ 'ASCII',
+ 'UTF-7', 'UTF-8', 'UTF-16', 'UTF-32',
+ 'UCS-2', 'UCS-2BE', 'UCS-2LE',
+ 'UCS-4', 'UCS-4BE', 'UCS-4LE',
+ );
+
+ public function __construct()
+ {
+ if (!extension_loaded('mbstring')) {
+ throw new Exception\ExtensionNotLoadedException(
+ 'PHP extension "mbstring" is required for this adapter'
+ );
+ }
+ }
+
+ public function isCharsetSupported($charset)
+ {
+ $charset = strtoupper($charset);
+ return in_array($charset, static::$charsets);
+ }
+
+ public function strlen($str, $charset = 'UTF-8')
+ {
+ if (StringUtils::isSingleByteCharset($charset)) {
+ return strlen($str);
+ }
+
+ $charset = strtoupper($charset);
+ if ($charset == 'UTF-8') {
+ // replace multibyte characters with 1 byte and count bytes
+ return strlen(preg_replace('/('
+ . '[\xc0-\xdf][\x80-\xbf]' // 2 bytes (110xxxxx 10xxxxxx)
+ . '|[\xe0-\xef][\x80-\xbf]{2}' // 3 bytes (1110xxxx [10xxxxxx, ...])
+ . '|[\xf0-\xf7][\x80-\xbf]{3}' // 4 bytes (11110xxx [10xxxxxx, ...])
+ . '|[\xf8-\xfb][\x80-\xbf]{4}' // 5 bytes (111110xx [10xxxxxx, ...])
+ . '|[\xfd-\xfe][\x80-\xbf]{5}' // 6 bytes (1111110x [10xxxxxx, ...])
+ . '|\xfe[\x80-\xbf]{6}' // 7 bytes (11111110 [10xxxxxx, ...])
+ . ')/s', ' ', $str));
+ } elseif ($charset == 'UTF-7') {
+ // TODO
+ } elseif ($charset == 'UTF-16' || $charset == 'UCS-2' || $charset == 'UCS-2BE' || $charset == 'UCS-2LE') {
+ return ceil(strlen($str) / 2);
+ } elseif ($charset == 'UTF-32' || $charset == 'UCS-4' || $charset == 'UCS-4BE' || $charset == 'UCS-4LE') {
+ return ceil(strlen($str) / 4);
+ }
+
+ return false;
+ }
+
+ public function substr($str, $offset = 0, $length = null, $charset = 'UTF-8')
+ {
+ if (StringUtils::isSingleByteCharset($charset)) {
+ return substr($str, $offset, $length);
+ }
+
+ $charset = strtoupper($charset);
+ if ($charset == 'UTF-8') {
+ // TODO
+ } elseif ($charset == 'UTF-7') {
+ // TODO
+ } elseif ($charset == 'UTF-16' || $charset == 'UCS-2') {
+ return substr($str, $offset * 2, $length * 2);
+ } elseif ($charset == 'UTF-32' || $charset == 'UCS-4') {
+ return substr($str, $offset * 4, $length * 4);
+ }
+
+ return false;
+ }
+
+ public function strpos($haystack, $needle, $offset = 0, $charset = 'UTF-8')
+ {
+ if (StringUtils::isSingleByteCharset($charset)) {
+ return strpos($haystack, $needle, $offset);
+ }
+
+ $charset = strtoupper($charset);
+ if ($charset == 'UTF-8') {
+ // TODO
+ } elseif ($charset == 'UTF-7') {
+ // TODO
+ } elseif ($charset == 'UTF-16' || $charset == 'UCS-2') {
+ // TODO
+ } elseif ($charset == 'UTF-32' || $charset == 'UCS-4') {
+ // TODO
+ }
+
+ return false;
+ }
+
+ public function convert($str, $toCharset, $fromCharset = 'UTF-8')
+ {
+ $fromName = str_replace('-', '', strtolower($fromCharset));
+ $toName = str_replace('-', '', strtolower($toCharset));
+ $method = 'convert' . $fromName . 'To' . $toName;
+
+ if (method_exists($this, $method)) {
+ return $this->$method($str);
+ }
+
+ return false;
+ }
+
+ public function convertAsciiToUtf8($str)
+ {
+ return $str;
+ }
+
+ public function convertAsciiToUtf16($str)
+ {
+ return preg_replace_callback("/./", function ($char) {
+ return "\0" . $char;
+ }, $str);
+ }
+
+ public function convertAsciiToUcs2($str)
+ {
+ return $this->convertAsciiToUtf16($str);
+ }
+
+ public function convertAsciiToUtf32($str)
+ {
+ return preg_replace_callback("/./", function ($char) {
+ return "\0\0\0" . $char;
+ }, $str);
+ }
+
+ public function convertAsciiToUcs4($str)
+ {
+ return $this->convertAsciiToUtf32($str);
+ }
+
+ public function convertUtf8ToAscii($str)
+ {
+ // TODO
+ return $str;
+ }
+
+ public function convertUtf8ToUtf16($str)
+ {
+ // TODO
+ return $str;
+ }
+
+ public function convertUtf8ToUcs2($str)
+ {
+ return $this->convertUtf8ToUtf16($str);
+ }
+
+ public function convertUtf8ToUtf32($str)
+ {
+ // TODO
+ return $str;
+ }
+
+ public function convertUtf8ToUcs4($str)
+ {
+ return $this->convertUtf8ToUtf32($str);
+ }
+}
View
41 library/Zend/Stdlib/StringAdapter/StringAdapterInterface.php
@@ -0,0 +1,41 @@
+<?php
+
+namespace Zend\Stdlib\StringAdapter;
+
+interface StringAdapterInterface
+{
+
+ public function isCharsetSupported($charset);
+
+ public function strlen($str, $charset = 'UTF-8');
+
+ public function substr($str, $offset = 0, $length = null, $charset = 'UTF-8');
+
+ public function strpos($haystack, $needle, $offset = 0, $charset = 'UTF-8');
+
+ public function convert($str, $toCharset, $fromCharset = 'UTF-8');
+
+ /**
+ * Word wrap
+ *
+ * @param string $str
+ * @param integer $width
+ * @param string $break
+ * @param boolean $cut
+ * @param string $charset
+ * @return string
+ */
+ public function wordWrap($str, $width = 75, $break = "\n", $cut = false, $charset = 'UTF-8');
+
+ /**
+ * String padding
+ *
+ * @param string $input
+ * @param integer $padLength
+ * @param string $padString
+ * @param integer $padType
+ * @param string $charset
+ * @return string
+ */
+ public function strPad($input, $padLength, $padString = ' ', $padType = \STR_PAD_RIGHT, $charset = 'UTF-8');
+}
View
76 library/Zend/Stdlib/StringUtils.php
@@ -0,0 +1,76 @@
+<?php
+
+namespace Zend\Stdlib;
+
+use Zend\Loader\Broker,
+ Zend\Loader\PluginBroker,
+ Zend\Stdlib\StringAdapter\MbString as MbStringAdapter,
+ Zend\Stdlib\StringAdapter\Iconv as IconvAdapter,
+ Zend\Stdlib\StringAdapter\Native as NativeAdapter;
+
+class StringUtils
+{
+
+ protected static $broker;
+ protected static $singleByteCharsets = array(
+ 'ASCII', '7BIT', '8BIT',
+ 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', 'ISO-8859-5',
+ 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', 'ISO-8859-10',
+ 'ISO-8859-11', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
+ 'CP-1251', 'CP-1252'
+ // TODO
+
+ );
+
+ /**
+ * Get broker
+ *
+ * @return Zend\Loader\Broker
+ */
+ public static function getBroker()
+ {
+ if (static::$broker === null) {
+ $broker = new PluginBroker();
+
+ if (extension_loaded('mbstring')) {
+ $broker->register('mbstring', new MbStringAdapter());
+ }
+
+ if (extension_loaded('iconv')) {
+ $broker->register('iconv', new IconvAdapter());
+ }
+
+ $broker->register('native', new NativeAdapter());
+
+ static::setBroker($broker);
+ }
+ return static::$broker;
+ }
+
+ public static function setBroker(Broker $broker)
+ {
+ static::$broker = $broker;
+ }
+
+ public static function resetBroker()
+ {
+ static::$broker = null;
+ }
+
+ public static function getAdapterByCharset($charset = 'UTF-8')
+ {
+ $broker = static::getBroker();
+ foreach ($broker->getPlugins() as $adapter) {
+ if ($adapter->isCharsetSupported($charset)) {
+ return $adapter;
+ }
+ }
+
+ throw new Exception\RuntimeException("No string adapter found for charset '{$charset}'");
+ }
+
+ public static function isSingleByteCharset($charset)
+ {
+ return in_array(strtoupper($charset), static::$singleByteCharsets);
+ }
+}

0 comments on commit cd09a59

Please sign in to comment.