Permalink
Browse files

[Yaml] Improved support for double quoted values.

Added support for the full range of escaped values in double quoted
strings in chapter 5 of the YAML 1.1 and 1.2 specs. The escaping
and unescaping strategies were factored out into separate classes to
keep the logic isolated.

Added examples from the spec to the unit tests for all escaped values.
  • Loading branch information...
1 parent bfb8c07 commit 8865226de6191de6c477890a55251bcbadb31e4e @lewinski lewinski committed Mar 5, 2011
Showing with 237 additions and 8 deletions.
  1. +88 −0 Escaper.php
  2. +7 −8 Inline.php
  3. +142 −0 Unescaper.php
View
@@ -0,0 +1,88 @@
+<?php
+
+/*
+ * This file is part of the Symfony package.
+ * (c) Fabien Potencier <fabien.potencier@symfony-project.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace Symfony\Component\Yaml;
+
+/**
+ * Escaper encapsulates escaping rules for single and double-quoted
+ * YAML strings.
+ *
+ * @author Matthew Lewinski <matthew@lewinski.org>
+ */
+class Escaper
+{
+ // Characters that would cause a dumped string to require double quoting.
+ const REGEX_CHARACTER_TO_ESCAPE = "[\\x00-\\x1f]|\xc2\x85|\xc2\xa0|\xe2\x80\xa8|\xe2\x80\xa9";
+
+ // Mapping arrays for escaping a double quoted string. The backslash is
+ // first to ensure proper escaping because str_replace operates iteratively
+ // on the input arrays. This ordering of the characters avoids the use of strtr,
+ // which performs more slowly.
+ static private $escapees = array('\\\\', '\\"',
+ "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07",
+ "\x08", "\x09", "\x0a", "\x0b", "\x0c", "\x0d", "\x0e", "\x0f",
+ "\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17",
+ "\x18", "\x19", "\x1a", "\x1b", "\x1c", "\x1d", "\x1e", "\x1f",
+ "\xc2\x85", "\xc2\xa0", "\xe2\x80\xa8", "\xe2\x80\xa9");
+ static private $escaped = array('\\"', '\\\\',
+ "\\0", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\a",
+ "\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "\\x0e", "\\x0f",
+ "\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17",
+ "\\x18", "\\x19", "\\x1a", "\\e", "\\x1c", "\\x1d", "\\x1e", "\\x1f",
+ "\\N", "\\_", "\\L", "\\P");
+
+ /**
+ * Determines if a PHP value would require double quoting in YAML.
+ *
+ * @param string $value A PHP value
+ *
+ * @return Boolean True if the value would require double quotes.
+ */
+ static public function requiresDoubleQuoting($value)
+ {
+ return preg_match('/'.self::REGEX_CHARACTER_TO_ESCAPE.'/u', $value);
+ }
+
+ /**
+ * Escapes and surrounds a PHP value with double quotes.
+ *
+ * @param string $value A PHP value
+ *
+ * @return string The quoted, escaped string
+ */
+ static public function escapeWithDoubleQuotes($value)
+ {
+ return sprintf('"%s"', str_replace(self::$escapees, self::$escaped, $value));
+ }
+
+ /**
+ * Determines if a PHP value would require single quoting in YAML.
+ *
+ * @param string $value A PHP value
+ *
+ * @return Boolean True if the value would require single quotes.
+ */
+ static public function requiresSingleQuoting($value)
+ {
+ return preg_match('/[ \s \' " \: \{ \} \[ \] , & \* \# \?] | \A[ - ? | < > = ! % @ ` ]/x', $value);
+ }
+
+ /**
+ * Escapes and surrounds a PHP value with single quotes.
+ *
+ * @param string $value A PHP value
+ *
+ * @return string The quoted, escaped string
+ */
+ static public function escapeWithSingleQuotes($value)
+ {
+ return sprintf("'%s'", str_replace('\'', '\'\'', $value));
+ }
+}
View
@@ -88,10 +88,10 @@ static public function dump($value)
return is_string($value) ? "'$value'" : (int) $value;
case is_numeric($value):
return is_infinite($value) ? str_ireplace('INF', '.Inf', strval($value)) : (is_string($value) ? "'$value'" : $value);
- case false !== strpos($value, "\n") || false !== strpos($value, "\r"):
- return sprintf('"%s"', str_replace(array('"', "\n", "\r"), array('\\"', '\n', '\r'), $value));
- case preg_match('/[ \s \' " \: \{ \} \[ \] , & \* \# \?] | \A[ - ? | < > = ! % @ ` ]/x', $value):
- return sprintf("'%s'", str_replace('\'', '\'\'', $value));
+ case Escaper::requiresDoubleQuoting($value):
+ return Escaper::escapeWithDoubleQuotes($value);
+ case Escaper::requiresSingleQuoting($value):
+ return Escaper::escapeWithSingleQuotes($value);
case '' == $value:
return "''";
case preg_match(self::getTimestampRegex(), $value):
@@ -197,12 +197,11 @@ static protected function parseQuotedScalar($scalar, &$i)
$output = substr($match[0], 1, strlen($match[0]) - 2);
+ $unescaper = new Unescaper();
if ('"' == $scalar[$i]) {
- // evaluate the string
- $output = str_replace(array('\\"', '\\n', '\\r'), array('"', "\n", "\r"), $output);
+ $output = $unescaper->unescapeDoubleQuotedString($output);
} else {
- // unescape '
- $output = str_replace('\'\'', '\'', $output);
+ $output = $unescaper->unescapeSingleQuotedString($output);
}
$i += strlen($match[0]);
View
@@ -0,0 +1,142 @@
+<?php
+
+/*
+ * This file is part of the Symfony package.
+ * (c) Fabien Potencier <fabien.potencier@symfony-project.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace Symfony\Component\Yaml;
+
+/**
+ * Unescaper encapsulates unescaping rules for single and double-quoted
+ * YAML strings.
+ *
+ * @author Matthew Lewinski <matthew@lewinski.org>
+ */
+class Unescaper
+{
+ // Parser and Inline assume UTF-8 encoding, so escaped Unicode characters
+ // must be converted to that encoding.
+ const ENCODING = 'UTF-8';
+
+ // Regex fragment that matches an escaped character in a double quoted
+ // string.
+ const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})";
+
+ /**
+ * Unescapes a single quoted string.
+ *
+ * @param string $value A single quoted string.
+ *
+ * @return string The unescaped string.
+ */
+ public function unescapeSingleQuotedString($value)
+ {
+ return str_replace('\'\'', '\'', $value);
+ }
+
+ /**
+ * Unescapes a double quoted string.
+ *
+ * @param string $value A double quoted string.
+ *
+ * @return string The unescaped string.
+ */
+ public function unescapeDoubleQuotedString($value)
+ {
+ $self = $this;
+ $callback = function($match) use($self) {
+ return $self->unescapeCharacter($match[0]);
+ };
+
+ // evaluate the string
+ return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
+ }
+
+ /**
+ * Unescapes a character that was found in a double-quoted string
+ *
+ * @param string $value An escaped character
+ *
+ * @return string The unescaped character
+ */
+ public function unescapeCharacter($value)
+ {
+ switch ($value{1}) {
+ case '0':
+ return "\x0";
+ case 'a':
+ return "\x7";
+ case 'b':
+ return "\x8";
+ case 't':
+ return "\t";
+ case "\t":
+ return "\t";
+ case 'n':
+ return "\n";
+ case 'v':
+ return "\xb";
+ case 'f':
+ return "\xc";
+ case 'r':
+ return "\xd";
+ case 'e':
+ return "\x1b";
+ case ' ':
+ return ' ';
+ case '"':
+ return '"';
+ case '/':
+ return '/';
+ case '\\':
+ return '\\';
+ case 'N':
+ // U+0085 NEXT LINE
+ return $this->convertEncoding("\x00\x85", self::ENCODING, 'UCS-2BE');
+ case '_':
+ // U+00A0 NO-BREAK SPACE
+ return $this->convertEncoding("\x00\xA0", self::ENCODING, 'UCS-2BE');
+ case 'L':
+ // U+2028 LINE SEPARATOR
+ return $this->convertEncoding("\x20\x28", self::ENCODING, 'UCS-2BE');
+ case 'P':
+ // U+2029 PARAGRAPH SEPARATOR
+ return $this->convertEncoding("\x20\x29", self::ENCODING, 'UCS-2BE');
+ case 'x':
+ $char = pack('n', hexdec(substr($value, 2, 2)));
+ return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
+ case 'u':
+ $char = pack('n', hexdec(substr($value, 2, 4)));
+ return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
+ case 'U':
+ $char = pack('N', hexdec(substr($value, 2, 8)));
+ return $this->convertEncoding($char, self::ENCODING, 'UCS-4BE');
+ }
+ }
+
+ /**
+ * Convert a string from one encoding to another.
+ *
+ * @param string $string The string to convert
+ * @param string $to The input encoding
+ * @param string $from The output encoding
+ *
+ * @return string The string with the new encoding
+ *
+ * @throws \RuntimeException if no suitable encoding function is found (iconv or mbstring)
+ */
+ protected function convertEncoding($value, $to, $from)
+ {
+ if (function_exists('iconv')) {
+ return iconv($from, $to, $value);
+ } elseif (function_exists('mb_convert_encoding')) {
+ return mb_convert_encoding($value, $to, $from);
+ }
+
+ throw new \RuntimeException('No suitable convert encoding function (install the iconv or mbstring extension).');
+ }
+}

0 comments on commit 8865226

Please sign in to comment.