From 95555ae95a9070a060ffba81b64c87dc1ebc7253 Mon Sep 17 00:00:00 2001 From: David Nichols Date: Fri, 14 Jan 2022 11:15:10 +0100 Subject: [PATCH] refs #2096 fixed a crash in the scanner handling EOF in regexes refs #4379 fixed an error in the last commit related to this fix --- doxygen/lang/900_release_notes.dox.tmpl | 2 + include/qore/intern/QoreLibIntern.h | 5 - lib/QoreLib.cpp | 1 - lib/QoreSquareBracketsOperatorNode.cpp | 2 +- lib/scanner.lpp | 119 ++++++++++++++---------- 5 files changed, 74 insertions(+), 55 deletions(-) diff --git a/doxygen/lang/900_release_notes.dox.tmpl b/doxygen/lang/900_release_notes.dox.tmpl index 600c9e9c27..c3ba991904 100644 --- a/doxygen/lang/900_release_notes.dox.tmpl +++ b/doxygen/lang/900_release_notes.dox.tmpl @@ -21,6 +21,8 @@ (issue 4379) - added a warning for constant operands with the square bracket operator that are not integers (issue 3409) + - fixed a bug handling EOF conditions while parsing the final part of regular expressions + (issue 2096) @section qore_1_0_13 Qore 1.0.13 diff --git a/include/qore/intern/QoreLibIntern.h b/include/qore/intern/QoreLibIntern.h index ef14d71313..db04b1157a 100644 --- a/include/qore/intern/QoreLibIntern.h +++ b/include/qore/intern/QoreLibIntern.h @@ -152,7 +152,6 @@ struct QoreParseContext { int pflag = 0; int lvids = 0; const QoreTypeInfo* typeInfo = nullptr; - qore_type_t value_type = -1; DLLLOCAL QoreParseContext(QoreProgram* pgm = getProgram()) : pgm(pgm) { } @@ -171,10 +170,6 @@ struct QoreParseContext { pflag |= flags; return rv; } - - DLLLOCAL bool isConstant() const { - return value_type >= NT_NOTHING && value_type <= NT_NUMBER; - } }; class QoreParseContextFlagHelper { diff --git a/lib/QoreLib.cpp b/lib/QoreLib.cpp index 3c2ce8a488..1ed0d79a11 100644 --- a/lib/QoreLib.cpp +++ b/lib/QoreLib.cpp @@ -518,7 +518,6 @@ bool qore_has_debug() { } int parse_init_value(QoreValue& val, QoreParseContext& parse_context) { - parse_context.value_type = val.getType(); if (val.hasNode()) { AbstractQoreNode* n = val.getInternalNode(); //printd(5, "parse_init_value() n: %p '%s'\n", n, get_type_name(n)); diff --git a/lib/QoreSquareBracketsOperatorNode.cpp b/lib/QoreSquareBracketsOperatorNode.cpp index 0d62e16104..27eb3bbb24 100644 --- a/lib/QoreSquareBracketsOperatorNode.cpp +++ b/lib/QoreSquareBracketsOperatorNode.cpp @@ -134,7 +134,7 @@ int QoreSquareBracketsOperatorNode::parseInitImpl(QoreValue& val, QoreParseConte edesc->concat(" and so will always evaluate to zero"); qore_program_private::makeParseWarning(getProgram(), *loc, QP_WARN_INVALID_OPERATION, "INVALID-OPERATION", edesc); - } else if (parse_context.isConstant() && parse_context.value_type != NT_INT) { + } else if (right.isConstant() && right.getType() != NT_INT) { // FIXME: raise exceptions with %strict-types QoreStringNode* edesc = new QoreStringNode("the offset operand expression with the '[]' operator is a " "constant of "); diff --git a/lib/scanner.lpp b/lib/scanner.lpp index 817014078f..07ae827333 100644 --- a/lib/scanner.lpp +++ b/lib/scanner.lpp @@ -570,16 +570,16 @@ static DateTimeNode* makeRelativeTime(char* str) { return DateTimeNode::makeRelative(0, 0, 0, hour, minute, second, us); } -static bool isRegexModifier(QoreRegex *qr, int c) { - if (c == 'i') +static bool isRegexModifier(QoreRegex* qr, int c) { + if (c == 'i') { qr->setCaseInsensitive(); - else if (c == 's') + } else if (c == 's') { qr->setDotAll(); - else if (c == 'x') + } else if (c == 'x') { qr->setExtended(); - else if (c == 'm') + } else if (c == 'm') { qr->setMultiline(); - else if (c == 'u') { + } else if (c == 'u') { qr->setUnicode(); } else { return false; @@ -587,28 +587,30 @@ static bool isRegexModifier(QoreRegex *qr, int c) { return true; } -static bool isRegexExtractModifier(QoreRegex *qr, int c) { - if (isRegexModifier(qr, c)) +static bool isRegexExtractModifier(QoreRegex* qr, int c) { + if (isRegexModifier(qr, c)) { return true; - if (c == 'g') + } + if (c == 'g') { qr->setGlobal(); - else + } else { return false; + } return true; } -static bool isRegexSubstModifier(QoreRegexSubst *qr, int c) { - if (c == 'g') +static bool isRegexSubstModifier(QoreRegexSubst* qr, int c) { + if (c == 'g') { qr->setGlobal(); - else if (c == 'i') + } else if (c == 'i') { qr->setCaseInsensitive(); - else if (c == 's') + } else if (c == 's') { qr->setDotAll(); - else if (c == 'x') + } else if (c == 'x') { qr->setExtended(); - else if (c == 'm') + } else if (c == 'm') { qr->setMultiline(); - else if (c == 'u') { + } else if (c == 'u') { qr->setUnicode(); } else { return false; @@ -1303,16 +1305,22 @@ RTIME PT(-?[0-9]+(\.[0-9]+)?[HMSu])+ } { \/ { - // get regex modifiers - int c; - do { - c = yyinput(yyscanner); - } while (isRegexSubstModifier(yylval->RegexSubst, c)); - unput(c); - yylloc->restoreFirst(); - BEGIN(INITIAL); - yylval->RegexSubst->parse(); - return REGEX_SUBST; + // get regex modifiers + int c; + do { + c = yyinput(yyscanner); + } while (isRegexSubstModifier(yylval->RegexSubst, c)); + // issue #2096: handle EOF while scanning chars manually + if (!c) { + yylval->RegexSubst->deref(); + QORE_FLEX_DO_EOF + } else { + unput(c); + yylloc->restoreFirst(); + BEGIN(INITIAL); + yylval->RegexSubst->parse(); + return REGEX_SUBST; + } } \n { yylval->RegexSubst->concatTarget('\n'); @@ -1396,16 +1404,22 @@ RTIME PT(-?[0-9]+(\.[0-9]+)?[HMSu])+ } { \/ { - // get regex modifiers - int c; - do { - c = yyinput(yyscanner); - } while (isRegexModifier(yylval->Regex, c)); - unput(c); - yylloc->restoreFirst(); - BEGIN(INITIAL); - yylval->Regex->parse([&] () { return get_loc(yylloc); }); - return REGEX; + // get regex modifiers + int c; + do { + c = yyinput(yyscanner); + } while (isRegexModifier(yylval->Regex, c)); + // issue #2096: handle EOF while scanning chars manually + if (!c) { + yylval->Regex->deref(); + QORE_FLEX_DO_EOF + } else { + unput(c); + yylloc->restoreFirst(); + BEGIN(INITIAL); + yylval->Regex->parse([&] () { return get_loc(yylloc); }); + return REGEX; + } } \n yylval->Regex->concat('\n'); \\\/ yylval->Regex->concat('/'); @@ -1422,20 +1436,29 @@ RTIME PT(-?[0-9]+(\.[0-9]+)?[HMSu])+ } { \/ { - // get regex modifiers - int c; - do { - c = yyinput(yyscanner); - } while (isRegexExtractModifier(yylval->Regex, c)); - unput(c); - yylloc->restoreFirst(); - BEGIN(INITIAL); - yylval->Regex->parse([&] () { return get_loc(yylloc); }); - return REGEX_EXTRACT; + // get regex modifiers + int c; + do { + c = yyinput(yyscanner); + } while (isRegexExtractModifier(yylval->Regex, c)); + // issue #2096: handle EOF while scanning chars manually + if (!c) { + yylval->Regex->deref(); + QORE_FLEX_DO_EOF + } else { + unput(c); + yylloc->restoreFirst(); + BEGIN(INITIAL); + yylval->Regex->parse([&] () { return get_loc(yylloc); }); + return REGEX_EXTRACT; + } } \n yylval->Regex->concat('\n'); \\\/ yylval->Regex->concat('/'); - \\. { yylval->Regex->concat('\\'); yylval->Regex->concat(yytext[1]); } + \\. { + yylval->Regex->concat('\\'); + yylval->Regex->concat(yytext[1]); + } [^\n\\/]+ { char* yptr = yytext; while (*yptr)