From a71036671ab736c810f2428eec31698fa8816a6e Mon Sep 17 00:00:00 2001 From: Nicolas Cornu Date: Tue, 30 Jan 2024 10:30:52 +0100 Subject: [PATCH] Replace hoc_regexp_* by std::regex (#2694) --- cmake/NeuronFileLists.cmake | 1 - src/nrnoc/cabcode.cpp | 46 +++- src/oc/oc_ansi.h | 2 - src/oc/regexp.cpp | 466 ------------------------------------ 4 files changed, 39 insertions(+), 476 deletions(-) delete mode 100644 src/oc/regexp.cpp diff --git a/cmake/NeuronFileLists.cmake b/cmake/NeuronFileLists.cmake index e42d769355..fb6ca521e7 100644 --- a/cmake/NeuronFileLists.cmake +++ b/cmake/NeuronFileLists.cmake @@ -103,7 +103,6 @@ set(OC_FILE_LIST ocerf.cpp plot.cpp plt.cpp - regexp.cpp scoprand.cpp settext.cpp symbol.cpp diff --git a/src/nrnoc/cabcode.cpp b/src/nrnoc/cabcode.cpp index cfd0faa111..5fdc65fd56 100644 --- a/src/nrnoc/cabcode.cpp +++ b/src/nrnoc/cabcode.cpp @@ -1,10 +1,12 @@ #include <../../nrnconf.h> /* /local/src/master/nrn/src/nrnoc/cabcode.cpp,v 1.37 1999/07/08 14:24:59 hines Exp */ -#define HOC_L_LIST 1 +#include #include #include #include + +#define HOC_L_LIST 1 #include "section.h" #include "nrn_ansi.h" #include "nrniv_mf.h" @@ -13,6 +15,36 @@ #include "hocparse.h" #include "membdef.h" +static char* escape_bracket(const char* s) { + static char* b; + const char* p1; + char* p2; + if (!b) { + b = new char[256]; + } + for (p1 = s, p2 = b; *p1; ++p1, ++p2) { + switch (*p1) { + case '<': + *p2 = '['; + break; + case '>': + *p2 = ']'; + break; + case '[': + case ']': + *p2 = '\\'; + *(++p2) = *p1; + break; + default: + *p2 = *p1; + break; + } + } + *p2 = '\0'; + return b; +} + + extern int hoc_execerror_messages; #define symlist hoc_symlist @@ -1979,8 +2011,8 @@ void forall_section(void) { Section* sec = hocSEC(qsec); qsec = qsec->next; if (buf[0]) { - hoc_regexp_compile(buf); - if (!hoc_regexp_search(secname(sec))) { + std::regex pattern(escape_bracket(buf)); + if (!std::regex_match(secname(sec), pattern)) { continue; } } @@ -2011,8 +2043,8 @@ void hoc_ifsec(void) { s = hoc_strpop(); Sprintf(buf, ".*%s.*", *s); - hoc_regexp_compile(buf); - if (hoc_regexp_search(secname(chk_access()))) { + std::regex pattern(escape_bracket(buf)); + if (std::regex_match(secname(chk_access()), pattern)) { hoc_execute(relative(savepc)); } if (!hoc_returning) @@ -2020,8 +2052,8 @@ void hoc_ifsec(void) { } void issection(void) { /* returns true if string is the access section */ - hoc_regexp_compile(gargstr(1)); - if (hoc_regexp_search(secname(chk_access()))) { + std::regex pattern(escape_bracket(gargstr(1))); + if (std::regex_match(secname(chk_access()), pattern)) { hoc_retpushx(1.); } else { hoc_retpushx(0.); diff --git a/src/oc/oc_ansi.h b/src/oc/oc_ansi.h index 34cc37062f..c83f9300fd 100644 --- a/src/oc/oc_ansi.h +++ b/src/oc/oc_ansi.h @@ -358,8 +358,6 @@ int is_vector_arg(int); char* vector_get_label(IvocVect*); void vector_set_label(IvocVect*, char*); -void hoc_regexp_compile(const char*); -int hoc_regexp_search(const char*); Symbol* hoc_install_var(const char*, double*); void hoc_class_registration(); void hoc_spinit(); diff --git a/src/oc/regexp.cpp b/src/oc/regexp.cpp deleted file mode 100644 index 4387fa1613..0000000000 --- a/src/oc/regexp.cpp +++ /dev/null @@ -1,466 +0,0 @@ -#include <../../nrnconf.h> -/* /local/src/master/nrn/src/oc/regexp.cpp,v 1.1.1.1 1994/10/12 17:22:13 hines Exp */ -/* -regexp.cpp,v - * Revision 1.1.1.1 1994/10/12 17:22:13 hines - * NEURON 3.0 distribution - * - * Revision 2.19 93/02/02 10:34:37 hines - * static functions declared before used - * - * Revision 1.3 92/07/31 12:11:31 hines - * following merged from hoc - * The regular expression has been augmented with - * {istart-iend} where istart and iend are integers. The expression matches - * any integer that falls in this range. - * - * Revision 1.2 92/01/30 08:17:19 hines - * bug fixes found in hoc incorporated. if()return, no else, objectcenter - * warnings. - * - * Revision 1.1 91/10/11 11:12:16 hines - * Initial revision - * - * Revision 3.108 90/10/24 09:44:14 hines - * saber warnings gone - * - * Revision 3.58 90/05/17 16:30:52 jamie - * changed global functions to start with hoc_ - * moved regexp.cpp from project 'neuron' to 'hoc' - * - * Revision 1.25 89/08/31 10:28:46 mlh - * regular expressions for issection() - * differences between standard regular expressions are: - * allways match from beginning to end of target (implicit ^ and $) - * change [] to <> - * eliminate \( - * - * Revision 1.2 89/08/31 09:22:17 mlh - * works as in e.cpp and lint free - * - * Revision 1.1 89/08/31 08:24:59 mlh - * Initial revision - * -*/ - -/* regular expression match for section names - grabbed prototype from e.cpp - Use by first compiling the search string with hoc_regexp_compile(pattern) - Then checking target strings one at a time with hoc_regexp_search(target) -*/ - -#include -#include "hocdec.h" -#define CABLESECTION 1 -/* Always match from beginning of string (implicit ^), - Always match end of string (implicit $), - change [] to <>, - eliminate \( -*/ - -#define STAR 01 -#define SUFF '.' -#define TILDE '~' - -#define EREGEXP 24 -#define error(enum) hoc_execerror("search string format error", pattern) -#define CBRA 1 -#define CCHR 2 -#define CDOT 4 -#define CCL 6 -#define NCCL 8 -#define CDOL 10 -#define CEOF 11 -#define CKET 12 -#if CABLESECTION -#define INTRANGE 14 -#endif -#define NBRA 5 -#define ESIZE 256 -#define eof '\0' -static char expbuf[ESIZE + 4]; -static const char* pattern = ""; -static char* loc1; -static char* loc2; -static char* locs; -static char* braslist[NBRA]; -static char* braelist[NBRA]; -static int circfl; -#if CABLESECTION -static int int_range_start[NBRA]; -static int int_range_stop[NBRA]; -#endif - -static int advance(char* lp, char* ep); -static int hoc_cclass(char* set, char c, int af); - -void hoc_regexp_compile(const char* pat) { - char* cp = (char*) pat; - int c; - char* ep; - char* lastep = 0; -#if (!CABLESECTION) - char bracket[NBRA], *bracketp; - int nbra; -#else - int int_range_index = 0; -#endif - int cclcnt; - int tempc; - - - if (!cp) { - pattern = ""; - error(EREGEXP); - } - if (pattern == cp && strcmp(pattern, cp)) { - /* if previous pattern != cp then may have been freed */ - return; - } - pattern = cp; - ep = expbuf; -#if (!CABLESECTION) - bracketp = bracket; - nbra = 0; -#endif - if ((c = *cp++) == '\n') { - cp--; - c = eof; - } - if (c == eof) { - if (*ep == 0) - error(EREGEXP); - return; - } -#if CABLESECTION - circfl = 1; -#else - circfl = 0; - if (c == '^') { - c = *cp++; - circfl++; - } -#endif - if (c == '*') - goto cerror; - cp--; - for (;;) { - if (ep >= &expbuf[ESIZE]) - goto cerror; - c = *cp++; - if (c == '\n') { - cp--; - c = eof; - } - if (c == eof) { -#if CABLESECTION - *ep++ = CDOL; -#endif - *ep++ = CEOF; - return; - } - if (c != '*') - lastep = ep; - switch (c) { - case '\\': -#if (!CABLESECTION) - if ((c = *cp++) == '(') { - if (nbra >= NBRA) - goto cerror; - *bracketp++ = nbra; - *ep++ = CBRA; - *ep++ = nbra++; - continue; - } - if (c == ')') { - if (bracketp <= bracket) - goto cerror; - *ep++ = CKET; - *ep++ = *--bracketp; - continue; - } -#endif - *ep++ = CCHR; - if (c == '\n') - goto cerror; - *ep++ = c; - continue; - - case '.': - *ep++ = CDOT; - continue; - - case '\n': - goto cerror; - - case '*': - if (*lastep == CBRA || *lastep == CKET) - error(EREGEXP); - *lastep |= STAR; - continue; - -#if (!CABLESECTION) - case '$': - tempc = *cp; - if (tempc != eof && tempc != '\n') - goto defchar; - *ep++ = CDOL; - continue; -#endif - -#if CABLESECTION - case '{': { - char* cp1 = cp; - if (int_range_index >= NBRA) - goto cerror; - *ep++ = INTRANGE; - do { - if (!(*cp >= '0' && *cp <= '9') && *cp != '-') { - error(EREGEXP); - } - } while (*(++cp) != '}'); - cp++; - if (2 != sscanf(cp1, - "%d-%d", - int_range_start + int_range_index, - int_range_stop + int_range_index)) { - error(EREGEXP); - } - *ep++ = int_range_index++; - } - continue; -#endif -#if CABLESECTION - case '<': -#else - case '[': -#endif - *ep++ = CCL; - *ep++ = 0; - cclcnt = 1; - if ((c = *cp++) == '^') { - c = *cp++; - ep[-2] = NCCL; - } - do { - if (c == '\n') - goto cerror; - /* - * Handle the escaped '-' - */ - if (c == '-' && *(ep - 1) == '\\') - *(ep - 1) = '-'; - /* - * Handle ranges of characters (e.g. a-z) - */ - else if ((tempc = *cp++) != ']' && c == '-' && cclcnt > 1 && tempc != '\n' && - (c = *(ep - 1)) <= tempc) { - while (++c <= tempc) { - *ep++ = c; - cclcnt++; - if (ep >= &expbuf[ESIZE]) - goto cerror; - } - } - /* - * Normal case. Add character to buffer - */ - else { - cp--; - *ep++ = c; - cclcnt++; - if (ep >= &expbuf[ESIZE]) - goto cerror; - } -#if CABLESECTION - } while ((c = *cp++) != '>'); -#else - - } while ((c = *cp++) != ']'); -#endif - lastep[1] = cclcnt; - continue; - -#if (!CABLESECTION) - defchar: -#endif - default: - *ep++ = CCHR; - *ep++ = c; - } - } -cerror: - expbuf[0] = 0; - error(EREGEXP); -} - -int hoc_regexp_search(const char* tar) { /*return true if target matches pattern*/ - char* target = (char*) tar; - char *p1, *p2, c; - -#if 1 - if (target == (char*) 0) { - return (0); - } - p1 = target; - locs = (char*) 0; -#else /* in e, apparently for searches within or at begining of string */ - if (gf) { - if (circfl) - return (0); - p1 = linebuf; - p2 = genbuf; - while (*p1++ = *p2++) - ; - locs = p1 = loc2; - } else { - if (addr == zero) - return (0); - p1 = getline(*addr); - locs = NULL; - } -#endif - p2 = expbuf; - if (circfl) { - loc1 = p1; - return (advance(p1, p2)); - } - /* fast check for first character */ - if (*p2 == CCHR) { - c = p2[1]; - do { - if (*p1 != c) - continue; - if (advance(p1, p2)) { - loc1 = p1; - return (1); - } - } while (*p1++); - return (0); - } - /* regular algorithm */ - do { - if (advance(p1, p2)) { - loc1 = p1; - return (1); - } - } while (*p1++); - return (0); -} - -static int advance(char* lp, char* ep) { - char* curlp; - - for (;;) - switch (*ep++) { - case CCHR: - if (*ep++ == *lp++) - continue; - return (0); - - case CDOT: - if (*lp++) - continue; - return (0); - - case CDOL: - if (*lp == 0) - continue; - return (0); - - case CEOF: - loc2 = lp; - return (1); - -#if CABLESECTION - case INTRANGE: { - int start, stop, num; - start = int_range_start[*ep]; - stop = int_range_stop[*ep++]; - num = *lp++ - '0'; - if (num < 0 || num > 9) { - return (0); - } - while (*lp >= '0' && *lp <= '9') { - num = 10 * num + *lp - '0'; - ++lp; - } - if (num >= start && num <= stop) { - continue; - } - } - return (0); -#endif - - case CCL: - if (hoc_cclass(ep, *lp++, 1)) { - ep += *ep; - continue; - } - return (0); - - case NCCL: - if (hoc_cclass(ep, *lp++, 0)) { - ep += *ep; - continue; - } - return (0); - - case CBRA: - braslist[*ep++] = lp; - continue; - - case CKET: - braelist[*ep++] = lp; - continue; - - case CDOT | STAR: - curlp = lp; - /*EMPTY*/ - while (*lp++) - ; - goto star; - - case CCHR | STAR: - curlp = lp; - /*EMPTY*/ - while (*lp++ == *ep) - ; - ep++; - goto star; - - case CCL | STAR: - case NCCL | STAR: - curlp = lp; - /*EMPTY*/ - while (hoc_cclass(ep, *lp++, ep[-1] == (CCL | STAR))) - ; - ep += *ep; - goto star; - - star: - do { - lp--; - if (lp == locs) - break; - if (advance(lp, ep)) - return (1); - } while (lp > curlp); - return (0); - - default: - error(EREGEXP); - } -} - -static int hoc_cclass(char* set, char c, int af) { - int n; - - if (c == 0) - return (0); - n = *set++; - while (--n) - if (*set++ == c) - return (af); - return (!af); -}