Skip to content

Commit

Permalink
Fix here-docs in nested quote-like operators
Browse files Browse the repository at this point in the history
When the lexer encounters a quote-like operator, it extracts the con-
tents of the quotes and starts an inner lexing scope.

To handle eval "s//<<FOO/e\n...", the here-doc parser peeks into the
outer lexing scope’s PL_linestr (current line buffer, which inside an
eval contains the entire string of code being parsed; for quote-like
operators, that is where the contents of the quote are stored).  It
only does this inside a string eval.  When parsing a file, the input
comes in one line at a time.  So the here-doc parser steals lines from
the input stream for s//<<FOO/e outside an eval.

This approach fails in this case, as the peekee is the linestr for
s///, not for the eval:

eval ' s//"${\<<END}"/e; print
Just another Perl hacker,
END
'or die $@
__END__
Can't find string terminator "END" anywhere before EOF at (eval 1) line 1.

We also need to do this peeking stuff outside of a string eval, to
solve this:

s//"${\<<END}"
Just another Perl hacker,
END
/e; print
__END__
Can't find string terminator "END" anywhere before EOF at - line 1.

In the first example above, we need to look not in the parent lexing
scope’s linestr, but in that of the grandparent.

To solve the second example, we need to check whether the outer lexing
scope is a quote-like operator when we are not in an eval.

For parsing here-docs in quotes in eval, we currently store two
things, the former buffer pointer and the former linestr, in
PL_sublex_info.super_{bufp,lines}tr.  The values for upper scopes are
stashed away on the savestack somewhere.

We need to be able to iterate through the outer lexer scopes till we
find one with multiple lines.  Retrieving the information from the
savestack would be too complex and error-prone.

Since PL_linestr is an SV, we can abuse a couple of fields in it.
Upgrading it to PVNV gives it both IVX and NVX fields, which are big
enough to store pointers.

IVX is already used to hold an op number.  So for the innermost quoted
scope we still need to use PL_sublex_info.super_bufptr.  When entering
a new lexing scope (in sublex_push), we can localise the IVX field of
the outer PL_linestr SV and set it to what PL_sublex_info.super_bufptr
was in that scope.  SvIVX(linestr) is only used for an op number when
that linestr’s lexing scope is the innermost one.

PL_sublex_info.super_linestr can be eliminated and replaced with
SvNVX(PL_linestr).
  • Loading branch information
Father Chrysostomos committed Aug 21, 2012
1 parent 62abd0d commit 99bd9d9
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 19 deletions.
1 change: 0 additions & 1 deletion perl.h
Original file line number Diff line number Diff line change
Expand Up @@ -3450,7 +3450,6 @@ struct _sublex_info {
U16 sub_inwhat; /* "lex_inwhat" to use */
OP *sub_op; /* "lex_op" to use */
char *super_bufptr; /* PL_parser->bufptr that was */
SV *super_linestr; /* PL_parser->linestr that was */
char *re_eval_start;/* start of "(?{..." text */
};

Expand Down
16 changes: 15 additions & 1 deletion t/base/lex.t
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!./perl

print "1..60\n";
print "1..62\n";

$x = 'x';

Expand Down Expand Up @@ -295,3 +295,17 @@ ok 60 - null on same line as heredoc in s/// in eval
foo
";
print $_ || "not ok 60\n";

$_ = "";
eval ' s/(?:)/"${\<<END}"/e;
ok 61 - heredoc in "" in single-line s///e in eval
END
';
print $_ || "not ok 61\n";

$_ = "";
s|(?:)|"${\<<END}"
ok 62 - heredoc in "" in multiline s///e outside eval
END
|e;
print $_ || "not ok 62\n";
81 changes: 64 additions & 17 deletions toke.c
Original file line number Diff line number Diff line change
Expand Up @@ -2461,7 +2461,6 @@ S_sublex_push(pTHX)
SAVEI8(PL_lex_state);
SAVEPPTR(PL_sublex_info.re_eval_start);
SAVEPPTR(PL_sublex_info.super_bufptr);
SAVEPPTR(PL_sublex_info.super_linestr);
SAVEVPTR(PL_lex_inpat);
SAVEI16(PL_lex_inwhat);
SAVECOPLINE(PL_curcop);
Expand All @@ -2476,11 +2475,29 @@ S_sublex_push(pTHX)
SAVEGENERICPV(PL_lex_brackstack);
SAVEGENERICPV(PL_lex_casestack);

PL_sublex_info.super_linestr = PL_linestr;
/* The here-doc parser needs to be able to peek into outer lexing
scopes to find the body of the here-doc. We use SvIVX(PL_linestr)
to store the outer PL_bufptr and SvNVX to store the outer
PL_linestr. Since SvIVX already means something else, we use
PL_sublex_info.super_bufptr for the innermost scope (the one we are
now entering), and a localised SvIVX for outer scopes.
*/
SvUPGRADE(PL_linestr, SVt_PVIV);
/* A null super_bufptr means the outer lexing scope is not peekable,
because it is a single line from an input stream. */
SAVEIV(SvIVX(PL_linestr));
SvIVX(PL_linestr) = PTR2IV(PL_sublex_info.super_bufptr);
PL_sublex_info.super_bufptr =
(SvTYPE(PL_linestr) < SVt_PVNV || !SvNVX(PL_linestr))
&& (PL_rsfp || PL_parser->filtered)
? NULL
: PL_bufptr;
SvUPGRADE(PL_lex_stuff, SVt_PVNV);
SvNVX(PL_lex_stuff) = PTR2NV(PL_linestr);

PL_linestr = PL_lex_stuff;
PL_lex_stuff = NULL;
PL_sublex_info.re_eval_start = NULL;
PL_sublex_info.super_bufptr = PL_bufptr;

PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
= SvPVX(PL_linestr);
Expand Down Expand Up @@ -2536,6 +2553,8 @@ S_sublex_done(pTHX)
/* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
assert(PL_lex_inwhat != OP_TRANSR);
if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
SvUPGRADE(PL_lex_repl, SVt_PVNV);
SvNVX(PL_lex_repl) = SvNVX(PL_linestr);
PL_linestr = PL_lex_repl;
PL_lex_inpat = 0;
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
Expand Down Expand Up @@ -9452,17 +9471,21 @@ S_scan_trans(pTHX_ char *start)
The three methods are:
- Steal lines from the input stream (stream)
- Scan the heredoc in PL_linestr and remove it therefrom (linestr)
- Peek at the PL_linestr of the outer lexing scope (peek)
- Peek at the PL_linestr of outer lexing scopes (peek)
They are used in these cases:
file scope or filtered eval stream
string eval linestr
multiline quoted construct linestr
single-line quoted construct in file stream
single-line quoted construct in eval peek
file scope or filtered eval stream
string eval linestr
multiline quoted construct linestr
single-line quoted construct in file stream
single-line quoted construct in eval or quote peek
Single-line also applies to heredocs that begin on the last line of a
quote-like operator.
Peeking within a quote also involves falling back to the stream method,
if the outer quote-like operators are all on one line (or the heredoc
marker is on the last line).
*/

STATIC char *
Expand Down Expand Up @@ -9600,14 +9623,37 @@ S_scan_heredoc(pTHX_ register char *s)
CLINE;
PL_multi_start = CopLINE(PL_curcop);
PL_multi_open = PL_multi_close = '<';
if (!infile && PL_lex_inwhat && !found_newline) {
char * const bufptr = PL_sublex_info.super_bufptr;
char * const bufend = SvEND(PL_sublex_info.super_linestr);
if (PL_lex_inwhat && !found_newline) {
/* Peek into the line buffer of the parent lexing scope, going up
as many levels as necessary to find one with a newline after
bufptr. See the comments in sublex_push for how IVX and NVX
are abused.
*/
SV *linestr = NUM2PTR(SV *, SvNVX(PL_linestr));
char *bufptr = PL_sublex_info.super_bufptr;
char *bufend = SvEND(linestr);
char * const olds = s - SvCUR(herewas);
char * const real_olds = s;
if (!bufptr) {
s = real_olds;
goto streaming;
}
while (!(s = (char *)memchr((void *)bufptr, '\n', bufend-bufptr))){
if (SvIVX(linestr)) {
bufptr = INT2PTR(char *, SvIVX(linestr));
linestr = NUM2PTR(SV *, SvNVX(linestr));
bufend = SvEND(linestr);
}
else if (infile) {
s = real_olds;
goto streaming;
}
else {
s = bufend;
break;
}
}
term = *PL_tokenbuf;
s = (char *)memchr((void *)bufptr, '\n', bufend-bufptr);
if (!s)
s = bufend;
d = s;
while (s < bufend &&
(*s != term || memNE(s,PL_tokenbuf,len)) ) {
Expand All @@ -9623,8 +9669,8 @@ S_scan_heredoc(pTHX_ register char *s)
s += len - 1;
sv_catpvn(herewas,s,bufend-s);
Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
SvCUR_set(PL_sublex_info.super_linestr,
bufptr-SvPVX_const(PL_sublex_info.super_linestr)
SvCUR_set(linestr,
bufptr-SvPVX_const(linestr)
+ SvCUR(herewas));

s = olds;
Expand Down Expand Up @@ -9663,6 +9709,7 @@ S_scan_heredoc(pTHX_ register char *s)
}
else
sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
streaming:
term = PL_tokenbuf[1];
len--;
while (s >= PL_bufend) { /* multiple line string? */
Expand Down

0 comments on commit 99bd9d9

Please sign in to comment.