Permalink
Fetching contributors…
Cannot retrieve contributors at this time
307 lines (278 sloc) 8.46 KB
/*
* This code was copied from perl/toke.c and subsequently butchered
* by Lukas Mai (2012).
*/
/* vi: set ft=c: */
/* vvvvvvvvvvvvvvvvvvvvv I HAVE NO IDEA WHAT I'M DOING vvvvvvvvvvvvvvvvvvvv */
#define PL_linestr (PL_parser->linestr)
#define PL_copline (PL_parser->copline)
#define PL_bufptr (PL_parser->bufptr)
#define PL_bufend (PL_parser->bufend)
#define PL_multi_start (PL_parser->multi_start)
#define PL_multi_open (PL_parser->multi_open)
#define PL_multi_close (PL_parser->multi_close)
#define PL_multi_end (PL_parser->multi_end)
#define PL_rsfp (PL_parser->rsfp)
#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
#ifdef USE_UTF8_SCRIPTS
# define UTF (!IN_BYTES)
#else
# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
#endif
static STRLEN S_scan_word(pTHX_ const char *start, int allow_package) {
const char *s = start;
for (;;) {
if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) { /* UTF handled below */
s++;
} else if (allow_package && s > start && *s == '\'' && isIDFIRST_lazy_if(s+1, UTF)) {
s++;
} else if (allow_package && s[0] == ':' && s[1] == ':' && isIDFIRST_lazy_if(s+2, UTF)) {
s += 2;
} else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
do {
s += UTF8SKIP(s);
} while (UTF8_IS_CONTINUED(*s) && is_utf8_mark((U8*)s));
} else {
return s - start;
}
}
}
static char *S_scan_str(pTHX_ SV *sv, int keep_quoted, int keep_delims) {
dVAR;
char *start = PL_bufptr;
const char *tmps; /* temp string, used for delimiter matching */
char *s = start; /* current position in the buffer */
char term; /* terminating character */
char *to; /* current position in the sv's data */
I32 brackets = 1; /* bracket nesting level */
bool has_utf8 = FALSE; /* is there any utf8 content? */
I32 termcode; /* terminating char. code */
U8 termstr[UTF8_MAXBYTES]; /* terminating string */
STRLEN termlen; /* length of terminating string */
int last_off = 0; /* last position for nesting bracket */
/* XXX ATTENTION: we don't skip whitespace! */
/* mark where we are, in case we need to report errors */
CLINE;
/* after skipping whitespace, the next character is the terminator */
term = *s;
if (!UTF) {
termcode = termstr[0] = term;
termlen = 1;
}
else {
#if HAVE_PERL_VERSION(5, 16, 0)
termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
#else
termcode = utf8_to_uvchr((U8*)s, &termlen);
#endif
Copy(s, termstr, termlen, U8);
if (!UTF8_IS_INVARIANT(term))
has_utf8 = TRUE;
}
/* mark where we are */
PL_multi_start = CopLINE(PL_curcop);
PL_multi_open = term;
/* find corresponding closing delimiter */
if (term && (tmps = strchr("([{< )]}> )]}>",term)))
termcode = termstr[0] = term = tmps[5];
PL_multi_close = term;
{
STRLEN dummy;
SvPV_force(sv, dummy);
sv_setpvs(sv, "");
SvGROW(sv, 80);
}
/* move past delimiter and try to read a complete string */
if (keep_delims)
sv_catpvn(sv, s, termlen);
s += termlen;
for (;;) {
if (PL_encoding && !UTF) {
bool cont = TRUE;
while (cont) {
int offset = s - SvPVX_const(PL_linestr);
const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
&offset, (char*)termstr, termlen);
const char * const ns = SvPVX_const(PL_linestr) + offset;
char * const svlast = SvEND(sv) - 1;
for (; s < ns; s++) {
if (*s == '\n' && !PL_rsfp
#if HAVE_PERL_VERSION(5, 16, 0)
&& !PL_parser->filtered
#endif
)
CopLINE_inc(PL_curcop);
}
if (!found)
goto read_more_line;
else {
/* handle quoted delimiters */
if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
const char *t;
for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
t--;
if ((svlast-1 - t) % 2) {
if (!keep_quoted) {
*(svlast-1) = term;
*svlast = '\0';
SvCUR_set(sv, SvCUR(sv) - 1);
}
continue;
}
}
if (PL_multi_open == PL_multi_close) {
cont = FALSE;
}
else {
const char *t;
char *w;
for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
/* At here, all closes are "was quoted" one,
so we don't check PL_multi_close. */
if (*t == '\\') {
if (!keep_quoted && *(t+1) == PL_multi_open)
t++;
else
*w++ = *t++;
}
else if (*t == PL_multi_open)
brackets++;
*w = *t;
}
if (w < t) {
*w++ = term;
*w = '\0';
SvCUR_set(sv, w - SvPVX_const(sv));
}
last_off = w - SvPVX(sv);
if (--brackets <= 0)
cont = FALSE;
}
}
}
if (!keep_delims) {
SvCUR_set(sv, SvCUR(sv) - 1);
*SvEND(sv) = '\0';
}
break;
}
/* extend sv if need be */
SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
/* set 'to' to the next character in the sv's string */
to = SvPVX(sv)+SvCUR(sv);
/* if open delimiter is the close delimiter read unbridle */
if (PL_multi_open == PL_multi_close) {
for (; s < PL_bufend; s++,to++) {
/* embedded newlines increment the current line number */
if (*s == '\n' && !PL_rsfp
#if HAVE_PERL_VERSION(5, 16, 0)
&& !PL_parser->filtered
#endif
)
CopLINE_inc(PL_curcop);
/* handle quoted delimiters */
if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
if (!keep_quoted && s[1] == term)
s++;
/* any other quotes are simply copied straight through */
else
*to++ = *s++;
}
/* terminate when run out of buffer (the for() condition), or
have found the terminator */
else if (*s == term) {
if (termlen == 1)
break;
if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
break;
}
else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
has_utf8 = TRUE;
*to = *s;
}
}
/* if the terminator isn't the same as the start character (e.g.,
matched brackets), we have to allow more in the quoting, and
be prepared for nested brackets.
*/
else {
/* read until we run out of string, or we find the terminator */
for (; s < PL_bufend; s++,to++) {
/* embedded newlines increment the line count */
if (*s == '\n' && !PL_rsfp
#if HAVE_PERL_VERSION(5, 16, 0)
&& !PL_parser->filtered
#endif
)
CopLINE_inc(PL_curcop);
/* backslashes can escape the open or closing characters */
if (*s == '\\' && s+1 < PL_bufend) {
if (!keep_quoted &&
((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
s++;
else
*to++ = *s++;
}
/* allow nested opens and closes */
else if (*s == PL_multi_close && --brackets <= 0)
break;
else if (*s == PL_multi_open)
brackets++;
else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
has_utf8 = TRUE;
*to = *s;
}
}
/* terminate the copied string and update the sv's end-of-string */
*to = '\0';
SvCUR_set(sv, to - SvPVX_const(sv));
/*
* this next chunk reads more into the buffer if we're not done yet
*/
if (s < PL_bufend)
break; /* handle case where we are done yet :-) */
#ifndef PERL_STRICT_CR
if (to - SvPVX_const(sv) >= 2) {
if ((to[-2] == '\r' && to[-1] == '\n') ||
(to[-2] == '\n' && to[-1] == '\r'))
{
to[-2] = '\n';
to--;
SvCUR_set(sv, to - SvPVX_const(sv));
}
else if (to[-1] == '\r')
to[-1] = '\n';
}
else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
to[-1] = '\n';
#endif
read_more_line:
/* if we're out of file, or a read fails, bail and reset the current
line marker so we can report where the unterminated string began
*/
CopLINE_inc(PL_curcop);
PL_bufptr = PL_bufend;
if (!lex_next_chunk(0)) {
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
return NULL;
}
s = PL_bufptr;
}
/* at this point, we have successfully read the delimited string */
if (!PL_encoding || UTF) {
if (keep_delims)
sv_catpvn(sv, s, termlen);
s += termlen;
}
if (has_utf8 || PL_encoding)
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
/* if we allocated too much space, give some back */
if (SvCUR(sv) + 5 < SvLEN(sv)) {
SvLEN_set(sv, SvCUR(sv) + 1);
SvPV_renew(sv, SvLEN(sv));
}
PL_bufptr = s;
return s;
}
/* ^^^^^^^^^^^^^^^^^^^^^ I HAVE NO IDEA WHAT I'M DOING ^^^^^^^^^^^^^^^^^^^^ */