From 61287be181899065fc700ea566525c93677bf848 Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Wed, 20 Nov 2019 19:43:34 +0000 Subject: [PATCH] Re-apply fixed r354847 unifdef(1): Improve worst-case bound on symbol resolution Use RB_TREE to make some algorithms O(lg N) and O(N lg N) instead of O(N) and O(N^2). While here, remove arbitrarily limit on number of macros understood. Reverts r354877 and r354878, which disabled the (correct) test. PR: 242095 Reported by: lwhsu --- .../netbsd-tests/usr.bin/unifdef/t_basic.sh | 3 - usr.bin/unifdef/unifdef.c | 142 +++++++++++------- 2 files changed, 86 insertions(+), 59 deletions(-) diff --git a/contrib/netbsd-tests/usr.bin/unifdef/t_basic.sh b/contrib/netbsd-tests/usr.bin/unifdef/t_basic.sh index 86acbe0db41136..751c303075b8f0 100755 --- a/contrib/netbsd-tests/usr.bin/unifdef/t_basic.sh +++ b/contrib/netbsd-tests/usr.bin/unifdef/t_basic.sh @@ -35,9 +35,6 @@ basic_head() { } basic_body() { - if [ "$(atf_config_get ci false)" = "true" ]; then - atf_skip "https://bugs.freebsd.org/242095" - fi atf_check -s ignore -o file:$(atf_get_srcdir)/d_basic.out \ -x "unifdef -U__FreeBSD__ $(atf_get_srcdir)/d_basic.in" diff --git a/usr.bin/unifdef/unifdef.c b/usr.bin/unifdef/unifdef.c index 616f93a621eb6b..50331d5ad9b2e3 100644 --- a/usr.bin/unifdef/unifdef.c +++ b/usr.bin/unifdef/unifdef.c @@ -45,8 +45,11 @@ * it possible to handle all "dodgy" directives correctly. */ +#include #include +#include +#include #include #include #include @@ -149,7 +152,6 @@ static char const * const linestate_name[] = { */ #define MAXDEPTH 64 /* maximum #if nesting */ #define MAXLINE 4096 /* maximum length of line */ -#define MAXSYMS 16384 /* maximum number of symbols */ /* * Sometimes when editing a keyword the replacement text is longer, so @@ -157,6 +159,26 @@ static char const * const linestate_name[] = { */ #define EDITSLOP 10 +/* + * C17/18 allow 63 characters per macro name, but up to 127 arbitrarily large + * parameters. + */ +struct macro { + RB_ENTRY(macro) entry; + const char *name; + const char *value; + bool ignore; /* -iDsym or -iUsym */ +}; + +static int +macro_cmp(struct macro *a, struct macro *b) +{ + return (strcmp(a->name, b->name)); +} + +static RB_HEAD(MACROMAP, macro) macro_tree = RB_INITIALIZER(¯o_tree); +RB_GENERATE_STATIC(MACROMAP, macro, entry, macro_cmp); + /* * Globals. */ @@ -174,11 +196,6 @@ static bool symlist; /* -s: output symbol list */ static bool symdepth; /* -S: output symbol depth */ static bool text; /* -t: this is a text file */ -static const char *symname[MAXSYMS]; /* symbol name */ -static const char *value[MAXSYMS]; /* -Dsym=value */ -static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ -static int nsyms; /* number of symbols */ - static FILE *input; /* input file pointer */ static const char *filename; /* input file name */ static int linenum; /* current line number */ @@ -227,12 +244,12 @@ static char *astrcat(const char *, const char *); static void cleantemp(void); static void closeio(void); static void debug(const char *, ...); -static void debugsym(const char *, int); +static void debugsym(const char *, const struct macro *); static bool defundef(void); static void defundefile(const char *); static void done(void); static void error(const char *); -static int findsym(const char **); +static struct macro *findsym(const char **); static void flushline(bool); static void hashline(void); static void help(void); @@ -807,7 +824,7 @@ static Linetype parseline(void) { const char *cp; - int cursym; + struct macro *cursym; Linetype retval; Comment_state wascomment; @@ -829,15 +846,15 @@ parseline(void) if ((cp = matchsym("ifdef", keyword)) != NULL || (cp = matchsym("ifndef", keyword)) != NULL) { cp = skipcomment(cp); - if ((cursym = findsym(&cp)) < 0) + if ((cursym = findsym(&cp)) == NULL) retval = LT_IF; else { retval = (keyword[2] == 'n') ? LT_FALSE : LT_TRUE; - if (value[cursym] == NULL) + if (cursym->value == NULL) retval = (retval == LT_TRUE) ? LT_FALSE : LT_TRUE; - if (ignore[cursym]) + if (cursym->ignore) retval = (retval == LT_TRUE) ? LT_TRUEI : LT_FALSEI; } @@ -1037,7 +1054,7 @@ eval_unary(const struct ops *ops, long *valp, const char **cpp) { const char *cp; char *ep; - int sym; + struct macro *sym; bool defparen; Linetype lt; @@ -1102,27 +1119,27 @@ eval_unary(const struct ops *ops, long *valp, const char **cpp) debug("eval%d defined missing ')'", prec(ops)); return (LT_ERROR); } - if (sym < 0) { + if (sym == NULL) { debug("eval%d defined unknown", prec(ops)); lt = LT_IF; } else { - debug("eval%d defined %s", prec(ops), symname[sym]); - *valp = (value[sym] != NULL); + debug("eval%d defined %s", prec(ops), sym->name); + *valp = (sym->value != NULL); lt = *valp ? LT_TRUE : LT_FALSE; } constexpr = false; } else if (!endsym(*cp)) { debug("eval%d symbol", prec(ops)); sym = findsym(&cp); - if (sym < 0) { + if (sym == NULL) { lt = LT_IF; cp = skipargs(cp); - } else if (value[sym] == NULL) { + } else if (sym->value == NULL) { *valp = 0; lt = LT_FALSE; } else { - *valp = strtol(value[sym], &ep, 0); - if (*ep != '\0' || ep == value[sym]) + *valp = strtol(sym->value, &ep, 0); + if (*ep != '\0' || ep == sym->value) return (LT_ERROR); lt = *valp ? LT_TRUE : LT_FALSE; cp = skipargs(cp); @@ -1439,17 +1456,18 @@ matchsym(const char *s, const char *t) * Look for the symbol in the symbol table. If it is found, we return * the symbol table index, else we return -1. */ -static int +static struct macro * findsym(const char **strp) { const char *str; - int symind; + char *strkey; + struct macro key, *res; str = *strp; *strp = skipsym(str); if (symlist) { if (*strp == str) - return (-1); + return (NULL); if (symdepth && firstsym) printf("%s%3d", zerosyms ? "" : "\n", depth); firstsym = zerosyms = false; @@ -1458,15 +1476,26 @@ findsym(const char **strp) (int)(*strp-str), str, symdepth ? "" : "\n"); /* we don't care about the value of the symbol */ - return (0); + return (NULL); } - for (symind = 0; symind < nsyms; ++symind) { - if (matchsym(symname[symind], str) != NULL) { - debugsym("findsym", symind); - return (symind); - } - } - return (-1); + + /* + * 'str' just points into the current mid-parse input and is not + * nul-terminated. We know the length of the symbol, *strp - str, but + * need to provide a nul-terminated lookup key for RB_FIND's comparison + * function. Create one here. + */ + strkey = malloc(*strp - str + 1); + memcpy(strkey, str, *strp - str); + strkey[*strp - str] = 0; + + key.name = strkey; + res = RB_FIND(MACROMAP, ¯o_tree, &key); + if (res != NULL) + debugsym("findsym", res); + + free(strkey); + return (res); } /* @@ -1476,22 +1505,23 @@ static void indirectsym(void) { const char *cp; - int changed, sym, ind; + int changed; + struct macro *sym, *ind; do { changed = 0; - for (sym = 0; sym < nsyms; ++sym) { - if (value[sym] == NULL) + RB_FOREACH(sym, MACROMAP, ¯o_tree) { + if (sym->value == NULL) continue; - cp = value[sym]; + cp = sym->value; ind = findsym(&cp); - if (ind == -1 || ind == sym || + if (ind == NULL || ind == sym || *cp != '\0' || - value[ind] == NULL || - value[ind] == value[sym]) + ind->value == NULL || + ind->value == sym->value) continue; debugsym("indir...", sym); - value[sym] = value[ind]; + sym->value = ind->value; debugsym("...ectsym", sym); changed++; } @@ -1523,29 +1553,29 @@ addsym1(bool ignorethis, bool definethis, char *symval) * Add a symbol to the symbol table. */ static void -addsym2(bool ignorethis, const char *sym, const char *val) +addsym2(bool ignorethis, const char *symname, const char *val) { - const char *cp = sym; - int symind; - - symind = findsym(&cp); - if (symind < 0) { - if (nsyms >= MAXSYMS) - errx(2, "too many symbols"); - symind = nsyms++; + const char *cp = symname; + struct macro *sym, *r; + + sym = findsym(&cp); + if (sym == NULL) { + sym = calloc(1, sizeof(*sym)); + sym->ignore = ignorethis; + sym->name = symname; + sym->value = val; + r = RB_INSERT(MACROMAP, ¯o_tree, sym); + assert(r == NULL); } - ignore[symind] = ignorethis; - symname[symind] = sym; - value[symind] = val; - debugsym("addsym", symind); + debugsym("addsym", sym); } static void -debugsym(const char *why, int symind) +debugsym(const char *why, const struct macro *sym) { - debug("%s %s%c%s", why, symname[symind], - value[symind] ? '=' : ' ', - value[symind] ? value[symind] : "undef"); + debug("%s %s%c%s", why, sym->name, + sym->value ? '=' : ' ', + sym->value ? sym->value : "undef"); } /*