|
|
@@ -167,7 +167,7 @@ def TranslateTree(re_tree, f, in_char_class=False): |
|
|
f.write('.')
|
|
|
|
|
|
else:
|
|
|
raise AssertionError(name)
|
|
|
raise RuntimeError("I don't understand regex construct: %r" % name)
|
|
|
|
|
|
# NOTE: negate and not_literal are sort of duplicated
|
|
|
|
|
|
@@ -178,7 +178,11 @@ def TranslateRegex(pat): |
|
|
#import pprint
|
|
|
#print(pprint.pformat(re_tree), file=sys.stderr)
|
|
|
f = cStringIO.StringIO()
|
|
|
TranslateTree(re_tree, f)
|
|
|
try:
|
|
|
TranslateTree(re_tree, f)
|
|
|
except RuntimeError:
|
|
|
print('Error translating %r' % pat, file=sys.stderr)
|
|
|
raise
|
|
|
return f.getvalue()
|
|
|
|
|
|
|
|
|
@@ -192,7 +196,37 @@ def TranslateRegex(pat): |
|
|
# http://re2c.org/examples/example_03.html
|
|
|
|
|
|
|
|
|
def TranslateLexer(lexer_def):
|
|
|
def TranslateEcholexer(echo_def):
|
|
|
print(r"""
|
|
|
static inline void MatchEchoToken(unsigned char* line, int line_len,
|
|
|
int start_pos, int* id, int* end_pos) {
|
|
|
assert(start_pos <= line_len); /* caller should have checked */
|
|
|
|
|
|
unsigned char* p = line + start_pos; /* modified by re2c */
|
|
|
|
|
|
unsigned char* YYMARKER; /* why do we need this? */
|
|
|
|
|
|
for (;;) {
|
|
|
/*!re2c
|
|
|
""")
|
|
|
|
|
|
for is_regex, pat, token_id in echo_def:
|
|
|
if is_regex:
|
|
|
re2c_pat = TranslateRegex(pat)
|
|
|
else:
|
|
|
re2c_pat = TranslateConstant(pat)
|
|
|
id_name = meta.IdName(token_id)
|
|
|
print(' %-30s { *id = id__%s; break; }' % (re2c_pat, id_name))
|
|
|
|
|
|
print("""
|
|
|
*/
|
|
|
}
|
|
|
*end_pos = p - line; /* relative */
|
|
|
}
|
|
|
""")
|
|
|
|
|
|
|
|
|
def TranslateOshLexer(lexer_def):
|
|
|
# https://stackoverflow.com/questions/12836171/difference-between-an-inline-function-and-static-inline-function
|
|
|
# Has to be 'static inline' rather than 'inline', otherwise the
|
|
|
# _bin/oil.ovm-dbg build fails (but the _bin/oil.ovm doesn't!).
|
|
|
@@ -208,7 +242,7 @@ def TranslateLexer(lexer_def): |
|
|
re2c:yyfill:enable = 0; // generated code doesn't ask for more input
|
|
|
*/
|
|
|
|
|
|
static inline void MatchToken(int lex_mode, unsigned char* line, int line_len,
|
|
|
static inline void MatchOshToken(int lex_mode, unsigned char* line, int line_len,
|
|
|
int start_pos, int* id, int* end_pos) {
|
|
|
assert(start_pos <= line_len); /* caller should have checked */
|
|
|
|
|
|
@@ -231,11 +265,11 @@ def TranslateLexer(lexer_def): |
|
|
|
|
|
for is_regex, pat, token_id in pat_list:
|
|
|
if is_regex:
|
|
|
re2_pat = TranslateRegex(pat)
|
|
|
re2c_pat = TranslateRegex(pat)
|
|
|
else:
|
|
|
re2_pat = TranslateConstant(pat)
|
|
|
re2c_pat = TranslateConstant(pat)
|
|
|
id_name = meta.IdName(token_id)
|
|
|
print(' %-30s { *id = id__%s; break; }' % (re2_pat, id_name))
|
|
|
print(' %-30s { *id = id__%s; break; }' % (re2c_pat, id_name))
|
|
|
|
|
|
# EARLY RETURN: Do NOT advance past the NUL terminator.
|
|
|
print(' %-30s { *id = id__Eol_Tok; *end_pos = start_pos; return; }' % \
|
|
|
@@ -293,8 +327,8 @@ def TranslateRegexToPredicate(py_regex, func_name): |
|
|
re2c_pat = TranslateRegex(py_regex)
|
|
|
print(r"""
|
|
|
static inline int %s(const char* s, int len) {
|
|
|
unsigned char* p = s; /* modified by re2c */
|
|
|
unsigned char* end = s + len;
|
|
|
const char* p = s; /* modified by re2c */
|
|
|
const char* end = s + len;
|
|
|
|
|
|
/*!re2c
|
|
|
re2c:define:YYCURSOR = p;
|
|
|
@@ -315,7 +349,8 @@ def main(argv): |
|
|
action = argv[1]
|
|
|
if action == 'c':
|
|
|
# Print code to stdout.
|
|
|
TranslateLexer(lex.LEXER_DEF)
|
|
|
TranslateOshLexer(lex.LEXER_DEF)
|
|
|
TranslateEcholexer(lex.ECHO_E_DEF)
|
|
|
TranslateRegexToPredicate(lex.VAR_NAME_RE, 'IsValidVarName')
|
|
|
TranslateRegexToPredicate(pretty.PLAIN_WORD_RE, 'IsPlainWord')
|
|
|
|
|
|
|
0 comments on commit
f7fde14