lexer: fix top-level probe ending in wildcard ambiguity handling

The lexer contains code to determine whether foo*bar at the top level of a script is a wildcarded probe name or a variable declaration (which would need a semicolon or an = or something else too, but maybe the lexer hasn't got to it yet). Back in the Solaris days this used to be done by just blindly looking up the stuff before the * as a type and considering it not a type if it wasn't, but this forces loading of all CTF basically no matter what (even the hardwired definition of NULL forces it). I rejigged this code way back in 5a7483d in 2012 to stop it looking up quite so many types unnecessarily, but in the process broke the code that puts the parts of the token after the * back on the parser stack again. You need to be very unlucky for this to trigger anything, but more recent flex has started actually doing what it has long promised and made unput() *actually* corrupt yytext: put the two together and bugs jump out and you start getting garbage or repeated junk after the * in cases like int*x (seen in test/unittest/options/err.pspec-default.d). Getting it right is actually simpler than getting it wrong: we don't need to take a temporary copy of the yytext to avoid the problem of its getting corrupted if we use yyless(), which does not corrupt yytext, rather than unput(); doing this simplifies the code that preserves the content beyond the * as well. (The rest of the lexer is still using unput() everywhere and needs fixing later.) This does not fix err.pspec-default.d, but it's a prerequisite for a fix that works (so err.pspec-default.d will serve as a test for it). Signed-off-by: Nick Alcock <nick.alcock@oracle.com> Reviewed-by: Kris Van Hees <kris.van.hees@oracle.com>
oracle · Jan 26, 2024 · 03ac5b8 · 03ac5b8
1 parent b189765
commit 03ac5b8
Showing 1 changed file with 7 additions and 18 deletions.
diff --git a/libdtrace/dt_lex.l b/libdtrace/dt_lex.l
@@ -1,7 +1,7 @@
 %{
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -574,33 +574,25 @@ if (yypcb->pcb_token != 0) {
 			 * case of the tick and profile probes in the profile
 			 * provider and a bunch of other possibilities too.
 			 */
+			if ((yylval.l_str = strdup(yytext)) == NULL)
+				longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
+
 			if (!(yypcb->pcb_cflags & DTRACE_C_PSPEC) &&
 			    strpbrk(yytext, ":-.?!`") == NULL &&
 			    strcmp(yytext, "BEGIN") != 0 &&
 			    strcmp(yytext, "END") != 0 &&
 			    strcmp(yytext, "ERROR") != 0) {
 
 				char *p = strchr(yytext, '*');
-				char *q = yytext + yyleng - 1;
 
 				if (p != NULL && p > yytext)
 					*p = '\0'; /* prune yytext */
 
 				if (dt_type_lookup(yytext, NULL) == 0) {
-					yylval.l_str = strdup(yytext);
 
-					if (yylval.l_str == NULL) {
-						longjmp(yypcb->pcb_jmpbuf,
-						    EDT_NOMEM);
-					}
-
-					if (p != NULL)
-						p = yylval.l_str + (p - yytext);
-					q = yylval.l_str + (q - yytext);
-
-					if (p != NULL && p > yylval.l_str) {
-						for (*p = '*'; q >= p; q--)
-							unput(*q);
+					if (p != NULL && p > yytext) {
+						*p = '*';
+						yyless(p - yytext);
 					}
 
 					yybegin(YYS_EXPR);
@@ -611,9 +603,6 @@ if (yypcb->pcb_token != 0) {
 					*p = '*'; /* restore yytext */
 			}
 
-			if ((yylval.l_str = strdup(yytext)) == NULL)
-				longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
-
 			return DT_TOK_PSPEC;
 		}