Permalink
Browse files

Enable the re2c-based lexer.

All unit tests and spec tests pass.

benchmarks/testdata/configure is parsed in 5.4 seconds on lisa, rather
than 8.5.  Full benchmark run forthcoming.

Also:

- Remove unused variable in generated code.
  • Loading branch information...
Andy Chu
Andy Chu committed Nov 25, 2017
1 parent 7ae49ef commit f2343a06da3c59da9f697040a543e52babd834b5
Showing with 17 additions and 5 deletions.
  1. +16 −4 core/lexer_gen.py
  2. +1 −1 osh/parse_lib.py
View
@@ -169,25 +169,37 @@ def TranslateRegex(pat):
return f.getvalue()
# This explains the sentinel method, which we will use.
# http://re2c.org/examples/example_01.html
#
# TODO: Change ParseTuple to use 's' rather than '#s' ?
# I don't think we need this YYFILL mechanism, because we lex a line at a
# time.
# http://re2c.org/examples/example_03.html
def TranslateLexer(lexer_def):
print r"""
/* Common stuff */
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
re2c:yyfill:enable = 0;
re2c:define:YYCURSOR = p;
re2c:define:YYLIMIT = q;
re2c:yyfill:enable = 0; // generated code doesn't ask for more input
*/
inline void MatchToken(int lex_mode, unsigned char* line, int line_len,
int start_pos, int* id, int* end_pos) {
// bounds checking
assert(start_pos < line_len);
if (start_pos >= line_len) {
fprintf(stderr, "start_pos %d line_len %d\n", start_pos, line_len);
assert(0);
}
//assert(start_pos < line_len);
unsigned char* p = line + start_pos; /* modified by re2c */
unsigned char* q = line + line_len; /* yylimit */
//printf("p: %p q: %p\n", p, q);
unsigned char* YYMARKER; /* why do we need this? */
View
@@ -49,7 +49,7 @@ def MatchToken_Fast(lex_mode, line, start_pos):
def _MakeMatcher():
# NOTE: Could have an environment variable to control this for speed?
return MatchToken_Slow(lex.LEXER_DEF)
#return MatchToken_Slow(lex.LEXER_DEF)
if fastlex:
return MatchToken_Fast

0 comments on commit f2343a0

Please sign in to comment.