Skip to content

Commit

Permalink
Flexible doc: Validate end label indentation
Browse files Browse the repository at this point in the history
Move doc string parsing logic from rebuildParsers.php and
String_::parseDocString() into ParserAbstract. This stuff is
going to get complicated now.

For now only implement the validation of the indentation on the
end label.
  • Loading branch information
nikic committed Sep 21, 2018
1 parent 146411b commit 5f73c4d
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 65 deletions.
9 changes: 3 additions & 6 deletions grammar/php5.y
Expand Up @@ -791,11 +791,9 @@ common_scalar:
| T_FUNC_C { $$ = Scalar\MagicConst\Function_[]; }
| T_NS_C { $$ = Scalar\MagicConst\Namespace_[]; }
| T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
$$ = new Scalar\String_(Scalar\String_::parseDocString($1, $2, false), $attrs); }
{ $$ = $this->parseDocString($1, $2, $3, attributes(), stackAttributes(#3), false); }
| T_START_HEREDOC T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
$$ = new Scalar\String_('', $attrs); }
{ $$ = $this->parseDocString($1, '', $2, attributes(), stackAttributes(#2), false); }
;

static_scalar:
Expand Down Expand Up @@ -856,8 +854,7 @@ scalar:
{ $attrs = attributes(); $attrs['kind'] = Scalar\String_::KIND_DOUBLE_QUOTED;
parseEncapsed($2, '"', true); $$ = new Scalar\Encapsed($2, $attrs); }
| T_START_HEREDOC encaps_list T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
parseEncapsedDoc($2, true); $$ = new Scalar\Encapsed($2, $attrs); }
{ $$ = $this->parseDocString($1, $2, $3, attributes(), stackAttributes(#3), true); }
;

static_array_pair_list:
Expand Down
9 changes: 3 additions & 6 deletions grammar/php7.y
Expand Up @@ -847,17 +847,14 @@ scalar:
| dereferencable_scalar { $$ = $1; }
| constant { $$ = $1; }
| T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
$$ = new Scalar\String_(Scalar\String_::parseDocString($1, $2), $attrs); }
{ $$ = $this->parseDocString($1, $2, $3, attributes(), stackAttributes(#3), true); }
| T_START_HEREDOC T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
$$ = new Scalar\String_('', $attrs); }
{ $$ = $this->parseDocString($1, '', $2, attributes(), stackAttributes(#2), true); }
| '"' encaps_list '"'
{ $attrs = attributes(); $attrs['kind'] = Scalar\String_::KIND_DOUBLE_QUOTED;
parseEncapsed($2, '"', true); $$ = new Scalar\Encapsed($2, $attrs); }
| T_START_HEREDOC encaps_list T_END_HEREDOC
{ $attrs = attributes(); setDocStringAttrs($attrs, $1);
parseEncapsedDoc($2, true); $$ = new Scalar\Encapsed($2, $attrs); }
{ $$ = $this->parseDocString($1, $2, $3, attributes(), stackAttributes(#3), true); }
;

optional_expr:
Expand Down
18 changes: 0 additions & 18 deletions grammar/rebuildParsers.php
Expand Up @@ -166,15 +166,6 @@ function($matches) {
. ' $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, ' . $args[1] . ', ' . $args[2] . '); } }';
}

if ('parseEncapsedDoc' == $name) {
assertArgs(2, $args, $name);

return 'foreach (' . $args[0] . ' as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) {'
. ' $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, null, ' . $args[1] . '); } }'
. ' $s->value = preg_replace(\'~(\r\n|\n|\r)\z~\', \'\', $s->value);'
. ' if (\'\' === $s->value) array_pop(' . $args[0] . ');';
}

if ('makeNop' == $name) {
assertArgs(3, $args, $name);

Expand All @@ -192,15 +183,6 @@ function($matches) {
. '? Scalar\String_::KIND_SINGLE_QUOTED : Scalar\String_::KIND_DOUBLE_QUOTED)';
}

if ('setDocStringAttrs' == $name) {
assertArgs(2, $args, $name);

return $args[0] . '[\'kind\'] = strpos(' . $args[1] . ', "\'") === false '
. '? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; '
. 'preg_match(\'/\A[bB]?<<<[ \t]*[\\\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\\\'"]?(?:\r\n|\n|\r)\z/\', ' . $args[1] . ', $matches); '
. $args[0] . '[\'docLabel\'] = $matches[1];';
}

if ('prependLeadingComments' == $name) {
assertArgs(1, $args, $name);

Expand Down
23 changes: 0 additions & 23 deletions lib/PhpParser/Node/Scalar/String_.php
Expand Up @@ -134,29 +134,6 @@ private static function codePointToUtf8(int $num) : string {
}
throw new Error('Invalid UTF-8 codepoint escape sequence: Codepoint too large');
}

/**
* @internal
*
* Parses a constant doc string.
*
* @param string $startToken Doc string start token content (<<<SMTHG)
* @param string $str String token content
* @param bool $parseUnicodeEscape Whether to parse PHP 7 \u escapes
*
* @return string Parsed string
*/
public static function parseDocString(string $startToken, string $str, bool $parseUnicodeEscape = true) : string {
// strip last newline (thanks tokenizer for sticking it into the string!)
$str = preg_replace('~(\r\n|\n|\r)\z~', '', $str);

// nowdoc string
if (false !== strpos($startToken, '\'')) {
return $str;
}

return self::parseEscapeSequences($str, null, $parseUnicodeEscape);
}

public function getType() : string {
return 'Scalar_String';
Expand Down
9 changes: 3 additions & 6 deletions lib/PhpParser/Parser/Php5.php
Expand Up @@ -2264,12 +2264,10 @@ protected function initReduceCallbacks() {
$this->semValue = new Scalar\MagicConst\Namespace_($this->startAttributeStack[$stackPos-(1-1)] + $this->endAttributes);
},
436 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(3-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(3-1)], $matches); $attrs['docLabel'] = $matches[1];;
$this->semValue = new Scalar\String_(Scalar\String_::parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)], false), $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)], $this->semStack[$stackPos-(3-3)], $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(3-3)] + $this->endAttributeStack[$stackPos-(3-3)], false);
},
437 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(2-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(2-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(2-1)], $matches); $attrs['docLabel'] = $matches[1];;
$this->semValue = new Scalar\String_('', $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(2-1)], '', $this->semStack[$stackPos-(2-2)], $this->startAttributeStack[$stackPos-(2-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(2-2)] + $this->endAttributeStack[$stackPos-(2-2)], false);
},
438 => function ($stackPos) {
$this->semValue = $this->semStack[$stackPos-(1-1)];
Expand Down Expand Up @@ -2405,8 +2403,7 @@ protected function initReduceCallbacks() {
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '"', true); } }; $this->semValue = new Scalar\Encapsed($this->semStack[$stackPos-(3-2)], $attrs);
},
482 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(3-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(3-1)], $matches); $attrs['docLabel'] = $matches[1];;
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, null, true); } } $s->value = preg_replace('~(\r\n|\n|\r)\z~', '', $s->value); if ('' === $s->value) array_pop($this->semStack[$stackPos-(3-2)]);; $this->semValue = new Scalar\Encapsed($this->semStack[$stackPos-(3-2)], $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)], $this->semStack[$stackPos-(3-3)], $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(3-3)] + $this->endAttributeStack[$stackPos-(3-3)], true);
},
483 => function ($stackPos) {
$this->semValue = array();
Expand Down
9 changes: 3 additions & 6 deletions lib/PhpParser/Parser/Php7.php
Expand Up @@ -2201,20 +2201,17 @@ protected function initReduceCallbacks() {
$this->semValue = $this->semStack[$stackPos-(1-1)];
},
445 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(3-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(3-1)], $matches); $attrs['docLabel'] = $matches[1];;
$this->semValue = new Scalar\String_(Scalar\String_::parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)]), $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)], $this->semStack[$stackPos-(3-3)], $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(3-3)] + $this->endAttributeStack[$stackPos-(3-3)], true);
},
446 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(2-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(2-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(2-1)], $matches); $attrs['docLabel'] = $matches[1];;
$this->semValue = new Scalar\String_('', $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(2-1)], '', $this->semStack[$stackPos-(2-2)], $this->startAttributeStack[$stackPos-(2-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(2-2)] + $this->endAttributeStack[$stackPos-(2-2)], true);
},
447 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes; $attrs['kind'] = Scalar\String_::KIND_DOUBLE_QUOTED;
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '"', true); } }; $this->semValue = new Scalar\Encapsed($this->semStack[$stackPos-(3-2)], $attrs);
},
448 => function ($stackPos) {
$attrs = $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes; $attrs['kind'] = strpos($this->semStack[$stackPos-(3-1)], "'") === false ? Scalar\String_::KIND_HEREDOC : Scalar\String_::KIND_NOWDOC; preg_match('/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/', $this->semStack[$stackPos-(3-1)], $matches); $attrs['docLabel'] = $matches[1];;
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\Scalar\EncapsedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, null, true); } } $s->value = preg_replace('~(\r\n|\n|\r)\z~', '', $s->value); if ('' === $s->value) array_pop($this->semStack[$stackPos-(3-2)]);; $this->semValue = new Scalar\Encapsed($this->semStack[$stackPos-(3-2)], $attrs);
$this->semValue = $this->parseDocString($this->semStack[$stackPos-(3-1)], $this->semStack[$stackPos-(3-2)], $this->semStack[$stackPos-(3-3)], $this->startAttributeStack[$stackPos-(3-1)] + $this->endAttributes, $this->startAttributeStack[$stackPos-(3-3)] + $this->endAttributeStack[$stackPos-(3-3)], true);
},
449 => function ($stackPos) {
$this->semValue = null;
Expand Down
64 changes: 64 additions & 0 deletions lib/PhpParser/ParserAbstract.php
Expand Up @@ -9,6 +9,7 @@
use PhpParser\Node\Expr;
use PhpParser\Node\Name;
use PhpParser\Node\Param;
use PhpParser\Node\Scalar\Encapsed;
use PhpParser\Node\Scalar\LNumber;
use PhpParser\Node\Scalar\String_;
use PhpParser\Node\Stmt\Class_;
Expand Down Expand Up @@ -710,6 +711,69 @@ protected function parseNumString(string $str, array $attributes) {
return new LNumber($num, $attributes);
}

protected function stripIndentation(string $string, string $indentation, bool $newlineBefore) {
// TODO
}

protected function parseDocString(
string $startToken, $contents, string $endToken,
array $attributes, array $endTokenAttributes, bool $parseUnicodeEscape
) {
$kind = strpos($startToken, "'") === false
? String_::KIND_HEREDOC : String_::KIND_NOWDOC;

$regex = '/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/';
$result = preg_match($regex, $startToken, $matches);
assert($result === 1);
$label = $matches[1];

$result = preg_match('/\A[ \t]*/', $endToken, $matches);
assert($result === 1);
$indentation = $matches[0];

$attributes['kind'] = $kind;
$attributes['docLabel'] = $label;
$attributes['docIndentation'] = $indentation;

$indentHasSpaces = false !== strpos($indentation, " ");
$indentHasTabs = false !== strpos($indentation, "\t");
if ($indentHasSpaces && $indentHasTabs) {
$this->emitError(new Error(
'Invalid indentation - tabs and spaces cannot be mixed',
$endTokenAttributes
));

// Proceed processing as if this doc string is not indented
$indentation = '';
}

if (\is_string($contents)) {
if ($contents === '') {
return new String_('', $attributes);
}

// strip last newline (thanks tokenizer for sticking it into the string!)
$string = preg_replace('~(\r\n|\n|\r)\z~', '', $contents);

if ($kind === String_::KIND_HEREDOC) {
$string = String_::parseEscapeSequences($string, null, $parseUnicodeEscape);
}

return new String_($string, $attributes);
} else {
foreach ($contents as $s) {
if ($s instanceof Node\Scalar\EncapsedStringPart) {
$s->value = String_::parseEscapeSequences($s->value, null, $parseUnicodeEscape);
}
}
$s->value = preg_replace('~(\r\n|\n|\r)\z~', '', $s->value);
if ('' === $s->value) {
array_pop($contents);
}
return new Encapsed($contents, $attributes);
}
}

protected function checkModifier($a, $b, $modifierPos) {
// Jumping through some hoops here because verifyModifier() is also used elsewhere
try {
Expand Down
25 changes: 25 additions & 0 deletions test/code/parser/scalar/flexibleDocStringErrors.test
@@ -0,0 +1,25 @@
Error conditions for flexible doc strings
-----
<?php

<<<A
@@{ "\t" }@@A;

<<<A
FooBar
@@{ "\t" }@@A;
-----
Invalid indentation - tabs and spaces cannot be mixed from 4:1 to 4:3
Invalid indentation - tabs and spaces cannot be mixed from 8:1 to 8:3
array(
0: Stmt_Expression(
expr: Scalar_String(
value:
)
)
1: Stmt_Expression(
expr: Scalar_String(
value: FooBar
)
)
)

0 comments on commit 5f73c4d

Please sign in to comment.