Skip to content

Commit

Permalink
Parser: decodeString() moved to StringNode, literalToValue() moved to…
Browse files Browse the repository at this point in the history
… LiteralNode
  • Loading branch information
dg committed Nov 2, 2021
1 parent f7debca commit c7a810c
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 85 deletions.
43 changes: 43 additions & 0 deletions src/Neon/Node/LiteralNode.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,19 @@
/** @internal */
final class LiteralNode extends Node
{
private const SIMPLE_TYPES = [
'true' => true, 'True' => true, 'TRUE' => true, 'yes' => true, 'Yes' => true, 'YES' => true, 'on' => true, 'On' => true, 'ON' => true,
'false' => false, 'False' => false, 'FALSE' => false, 'no' => false, 'No' => false, 'NO' => false, 'off' => false, 'Off' => false, 'OFF' => false,
'null' => null, 'Null' => null, 'NULL' => null,
];

private const DEPRECATED_TYPES = ['on' => 1, 'On' => 1, 'ON' => 1, 'off' => 1, 'Off' => 1, 'OFF' => 1];

private const PATTERN_DATETIME = '#\d\d\d\d-\d\d?-\d\d?(?:(?:[Tt]| ++)\d\d?:\d\d:\d\d(?:\.\d*+)? *+(?:Z|[-+]\d\d?(?::?\d\d)?)?)?$#DA';
private const PATTERN_HEX = '#0x[0-9a-fA-F]++$#DA';
private const PATTERN_OCTAL = '#0o[0-7]++$#DA';
private const PATTERN_BINARY = '#0b[0-1]++$#DA';

/** @var mixed */
public $value;

Expand All @@ -32,6 +45,36 @@ public function toValue(callable $evaluator = null)
}


/** @return mixed */
public static function parse(string $value, bool $isKey = false)
{
if (!$isKey && array_key_exists($value, self::SIMPLE_TYPES)) {
if (isset(self::DEPRECATED_TYPES[$value])) {
trigger_error("Neon: keyword '$value' is deprecated, use true/yes or false/no.", E_USER_DEPRECATED);
}
return self::SIMPLE_TYPES[$value];

} elseif (is_numeric($value)) {
return $value * 1;

} elseif (preg_match(self::PATTERN_HEX, $value)) {
return hexdec($value);

} elseif (preg_match(self::PATTERN_OCTAL, $value)) {
return octdec($value);

} elseif (preg_match(self::PATTERN_BINARY, $value)) {
return bindec($value);

} elseif (!$isKey && preg_match(self::PATTERN_DATETIME, $value)) {
return new \DateTimeImmutable($value);

} else {
return $value;
}
}


public function toString(callable $serializer = null): string
{
if ($this->value instanceof \DateTimeInterface) {
Expand Down
42 changes: 42 additions & 0 deletions src/Neon/Node/StringNode.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
/** @internal */
final class StringNode extends Node
{
private const ESCAPE_SEQUENCES = [
't' => "\t", 'n' => "\n", 'r' => "\r", 'f' => "\x0C", 'b' => "\x08", '"' => '"', '\\' => '\\', '/' => '/', '_' => "\u{A0}",
];

/** @var string */
public $value;

Expand All @@ -33,6 +37,44 @@ public function toValue(callable $evaluator = null): string
}


public static function parse(string $s): string
{
if (preg_match('#^...\n++([\t ]*+)#', $s, $m)) { // multiline
$res = substr($s, 3, -3);
$res = str_replace("\n" . $m[1], "\n", $res);
$res = preg_replace('#^\n|\n[\t ]*+$#D', '', $res);
} else {
$res = substr($s, 1, -1);
if ($s[0] === "'") {
$res = str_replace("''", "'", $res);
}
}
if ($s[0] === "'") {
return $res;
}
return preg_replace_callback(
'#\\\\(?:ud[89ab][0-9a-f]{2}\\\\ud[c-f][0-9a-f]{2}|u[0-9a-f]{4}|x[0-9a-f]{2}|.)#i',
function (array $m): string {
$sq = $m[0];
if (isset(self::ESCAPE_SEQUENCES[$sq[1]])) {
return self::ESCAPE_SEQUENCES[$sq[1]];
} elseif ($sq[1] === 'u' && strlen($sq) >= 6) {
if (($res = json_decode('"' . $sq . '"')) !== null) {
return $res;
}
throw new Nette\Neon\Exception("Invalid UTF-8 sequence $sq");
} elseif ($sq[1] === 'x' && strlen($sq) === 4) {
trigger_error("Neon: '$sq' is deprecated, use '\\uXXXX' instead.", E_USER_DEPRECATED);
return chr(hexdec(substr($sq, 2)));
} else {
throw new Nette\Neon\Exception("Invalid escaping sequence $sq");
}
},
$res
);
}


public function toString(callable $serializer = null): string
{
$res = json_encode($this->value, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
Expand Down
92 changes: 7 additions & 85 deletions src/Neon/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,6 @@
/** @internal */
final class Parser
{
private const PATTERN_DATETIME = '#\d\d\d\d-\d\d?-\d\d?(?:(?:[Tt]| ++)\d\d?:\d\d:\d\d(?:\.\d*+)? *+(?:Z|[-+]\d\d?(?::?\d\d)?)?)?$#DA';
private const PATTERN_HEX = '#0x[0-9a-fA-F]++$#DA';
private const PATTERN_OCTAL = '#0o[0-7]++$#DA';
private const PATTERN_BINARY = '#0b[0-1]++$#DA';

private const SIMPLE_TYPES = [
'true' => true, 'True' => true, 'TRUE' => true, 'yes' => true, 'Yes' => true, 'YES' => true, 'on' => true, 'On' => true, 'ON' => true,
'false' => false, 'False' => false, 'FALSE' => false, 'no' => false, 'No' => false, 'NO' => false, 'off' => false, 'Off' => false, 'OFF' => false,
'null' => null, 'Null' => null, 'NULL' => null,
];

private const DEPRECATED_TYPES = ['on' => 1, 'On' => 1, 'ON' => 1, 'off' => 1, 'Off' => 1, 'OFF' => 1];

private const ESCAPE_SEQUENCES = [
't' => "\t", 'n' => "\n", 'r' => "\r", 'f' => "\x0C", 'b' => "\x08", '"' => '"', '\\' => '\\', '/' => '/', '_' => "\u{A0}",
];

/** @var TokenStream */
private $tokens;

Expand Down Expand Up @@ -133,11 +116,15 @@ private function parseBlock(string $indent, bool $onlyBullets = false): Node
private function parseValue(): Node
{
if ($token = $this->tokens->consume(Token::STRING)) {
$node = new Node\StringNode($this->decodeString($token->value), $this->tokens->getPos() - 1);
try {
$node = new Node\StringNode(Node\StringNode::parse($token->value), $this->tokens->getPos() - 1);
} catch (Exception $e) {
$this->tokens->error($e->getMessage(), $this->tokens->getPos() - 1);
}

} elseif ($token = $this->tokens->consume(Token::LITERAL)) {
$pos = $this->tokens->getPos() - 1;
$node = new Node\LiteralNode($this->literalToValue($token->value, $this->tokens->isNext(':', '=')), $pos);
$node = new Node\LiteralNode(Node\LiteralNode::parse($token->value, $this->tokens->isNext(':', '=')), $pos);

} elseif ($this->tokens->isNext('[', '(', '{')) {
$node = $this->parseBraces();
Expand All @@ -159,7 +146,7 @@ private function parseEntity(Node $node): Node
$entities[] = new Node\EntityNode($node, $attributes->items, $node->startPos, $attributes->endPos);

while ($token = $this->tokens->consume(Token::LITERAL)) {
$valueNode = new Node\LiteralNode($this->literalToValue($token->value), $this->tokens->getPos() - 1);
$valueNode = new Node\LiteralNode(Node\LiteralNode::parse($token->value), $this->tokens->getPos() - 1);
if ($this->tokens->isNext('(')) {
$attributes = $this->parseBraces();
$entities[] = new Node\EntityNode($valueNode, $attributes->items, $valueNode->startPos, $attributes->endPos);
Expand Down Expand Up @@ -213,41 +200,6 @@ private function parseBraces(): Node\ArrayNode
}


private function decodeString(string $s): string
{
if (preg_match('#^...\n++([\t ]*+)#', $s, $m)) { // multiline
$res = substr($s, 3, -3);
$res = str_replace("\n" . $m[1], "\n", $res);
$res = preg_replace('#^\n|\n[\t ]*+$#D', '', $res);
} else {
$res = substr($s, 1, -1);
if ($s[0] === "'") {
$res = str_replace("''", "'", $res);
}
}
if ($s[0] === '"') {
$res = preg_replace_callback(
'#\\\\(?:ud[89ab][0-9a-f]{2}\\\\ud[c-f][0-9a-f]{2}|u[0-9a-f]{4}|x[0-9a-f]{2}|.)#i',
function (array $m): string {
$sq = $m[0];
if (isset(self::ESCAPE_SEQUENCES[$sq[1]])) {
return self::ESCAPE_SEQUENCES[$sq[1]];
} elseif ($sq[1] === 'u' && strlen($sq) >= 6) {
return json_decode('"' . $sq . '"') ?? $this->tokens->error("Invalid UTF-8 sequence $sq", $this->tokens->getPos() - 1);
} elseif ($sq[1] === 'x' && strlen($sq) === 4) {
trigger_error("Neon: '$sq' is deprecated, use '\\uXXXX' instead.", E_USER_DEPRECATED);
return chr(hexdec(substr($sq, 2)));
} else {
$this->tokens->error("Invalid escaping sequence $sq", $this->tokens->getPos() - 1);
}
},
$res
);
}
return $res;
}


private function checkArrayKey(Node $key, array &$arr): void
{
if ((!$key instanceof Node\StringNode && !$key instanceof Node\LiteralNode) || !is_scalar($key->value)) {
Expand All @@ -259,34 +211,4 @@ private function checkArrayKey(Node $key, array &$arr): void
}
$arr[$k] = true;
}


/** @return mixed */
public function literalToValue(string $value, bool $isKey = false)
{
if (!$isKey && array_key_exists($value, self::SIMPLE_TYPES)) {
if (isset(self::DEPRECATED_TYPES[$value])) {
trigger_error("Neon: keyword '$value' is deprecated, use true/yes or false/no.", E_USER_DEPRECATED);
}
return self::SIMPLE_TYPES[$value];

} elseif (is_numeric($value)) {
return $value * 1;

} elseif (preg_match(self::PATTERN_HEX, $value)) {
return hexdec($value);

} elseif (preg_match(self::PATTERN_OCTAL, $value)) {
return octdec($value);

} elseif (preg_match(self::PATTERN_BINARY, $value)) {
return bindec($value);

} elseif (!$isKey && preg_match(self::PATTERN_DATETIME, $value)) {
return new \DateTimeImmutable($value);

} else {
return $value;
}
}
}

0 comments on commit c7a810c

Please sign in to comment.