Permalink
Browse files

Hook up the echo -e lexer to fastlex.

- Exclude NUL from normal characters
- Handle NUL (end of string) as a special special re2c clause generated
  by core/lexer_gen.py
- Split the lexer into "matcher" (fast/slow) and "lexer" components.

All unit tests and spec tests pass.  (Although unit tests are running
against fastlex, not without fastlex.)
  • Loading branch information...
Andy Chu
Andy Chu committed Jun 1, 2018
1 parent 4fb0bac commit 9995c96ef7d2f250936e3c434ea2aaaa338d071b
Showing with 30 additions and 23 deletions.
  1. +4 −0 core/lexer_gen.py
  2. +1 −1 osh/lex.py
  3. +3 −0 osh/lex_test.py
  4. +22 −22 osh/match.py
View
@@ -218,6 +218,10 @@ def TranslateEcholexer(echo_def):
id_name = meta.IdName(token_id)
print(' %-30s { *id = id__%s; break; }' % (re2c_pat, id_name))
# EARLY RETURN: Do NOT advance past the NUL terminator.
print(' %-30s { *id = id__Eol_Tok; *end_pos = start_pos; return; }' % \
r'"\x00"')
print("""
*/
}
View
@@ -417,7 +417,7 @@ def IsKeyword(name):
C(r'\c', Id.Char_Stop),
# e.g. 'foo', anything that's not a backslash escape
R(r'[^\\]+', Id.Char_Literals),
R(r'[^\\\0]+', Id.Char_Literals),
]
# NOTE: Id.Ignored_LineCont is also not supported here, even though the whole
View
@@ -252,6 +252,9 @@ def testEchoLexer(self):
print(list(lex.Tokens(r'unicode \u0065 \U00000065')))
print(list(lex.Tokens(r'\d \e \f \g')))
# NOTE: We only test with one of these.
print(match.ECHO_MATCHER) # either fast or slow
if __name__ == '__main__':
unittest.main()
View
@@ -67,51 +67,53 @@ def __call__(self, lex_mode, line, start_pos):
def _MatchOshToken_Fast(lex_mode, line, start_pos):
"""Returns (id, end_pos)."""
"""Returns (Id, end_pos)."""
tok_type, end_pos = fastlex.MatchOshToken(lex_mode.enum_id, line, start_pos)
# IMPORTANT: We're reusing Id instances here. Ids are very common, so this
# saves memory.
return IdInstance(tok_type), end_pos
# TODO:
#
# MATCHER -> OSH_MATCHER
# add ECHO_MATCHER
# TODO: Make this the slow path!
class SimpleLexer(object):
"""Lexer for echo -e, which interprets C-escaped strings.
Based on osh/parse_lib.py MatchOshToken_Slow.
"""
def __init__(self, pat_list):
self.pat_list = _CompileAll(pat_list)
def __init__(self, match_func):
self.match_func = match_func
def Tokens(self, line):
"""Yields tokens."""
pos = 0
n = len(line)
while pos < n:
matches = []
for regex, tok_type in self.pat_list:
m = regex.match(line, pos) # left-anchored
if m:
matches.append((m.end(0), tok_type, m.group(0)))
if not matches:
raise AssertionError(
'no match at position %d: %r (%r)' % (pos, line, line[pos]))
while pos != n:
# NOTE: Need longest-match semantics to find \377 vs \.
end_pos, tok_type, tok_val = max(matches, key=lambda m: m[0])
tok_type, end_pos = self.match_func(line, pos)
yield tok_type, line[pos:end_pos]
pos = end_pos
class _MatchEchoToken_Slow(object):
def __init__(self, pat_list):
self.pat_list = _CompileAll(pat_list)
def __call__(self, line, start_pos):
return _LongestMatch(self.pat_list, line, start_pos)
def _MatchEchoToken_Fast(line, start_pos):
"""Returns (id, end_pos)."""
tok_type, end_pos = fastlex.MatchEchoToken(line, start_pos)
return IdInstance(tok_type), end_pos
if fastlex:
MATCHER = _MatchOshToken_Fast
ECHO_MATCHER = _MatchEchoToken_Fast
IsValidVarName = fastlex.IsValidVarName
else:
MATCHER = _MatchOshToken_Slow(lex.LEXER_DEF)
ECHO_MATCHER = _MatchEchoToken_Slow(lex.ECHO_E_DEF)
# Used by osh/cmd_parse.py to validate for loop name. Note it must be
# anchored on the right.
@@ -121,6 +123,4 @@ def IsValidVarName(s):
return _VAR_NAME_RE.match(s)
# TODO: Conditionally create it
ECHO_LEXER = SimpleLexer(lex.ECHO_E_DEF)
ECHO_LEXER = SimpleLexer(ECHO_MATCHER)

0 comments on commit 9995c96

Please sign in to comment.