Permalink
Browse files

Change LexMode from a util.Enum to an ASDL type called lex_mode_e.

  • Loading branch information...
Andy Chu
Andy Chu committed Nov 20, 2017
1 parent 0c27fab commit 764dc485e865db2136838f6b5fc4728074595013
Showing with 171 additions and 179 deletions.
  1. +3 −3 core/id_kind.py
  2. +3 −3 core/lexer.py
  3. +2 −2 core/tdop.py
  4. +1 −1 osh/arith_parse_test.py
  5. +7 −8 osh/bool_parse.py
  6. +3 −3 osh/bool_parse_test.py
  7. +6 −5 osh/cmd_parse.py
  8. +17 −29 osh/lex.py
  9. +46 −46 osh/lex_test.py
  10. +4 −0 osh/osh.asdl
  11. +56 −56 osh/word_parse.py
  12. +23 −23 osh/word_parse_test.py
View
@@ -161,7 +161,7 @@ def _AddKinds(spec):
spec.AddKind('Ignored', ['LineCont', 'Space', 'Comment'])
# Id.WS_Space is for LexMode.OUTER; Id.Ignored_Space is for LexMode.ARITH
# Id.WS_Space is for lex_mode_e.OUTER; Id.Ignored_Space is for lex_mode_e.ARITH
spec.AddKind('WS', ['Space'])
spec.AddKind('Lit', [
@@ -172,8 +172,8 @@ def _AddKinds(spec):
'Tilde', # tilde expansion
'Pound', # for comment or VAROP state
'Slash', 'Percent', # / # % for patsub, NOT unary op
'Digits', # for LexMode.ARITH
'At', # for ${a[@]}, in LexMode.ARITH
'Digits', # for lex_mode_e.ARITH
'At', # for ${a[@]}, in lex_mode_e.ARITH
'ArithVarLike', # for $((var+1)). Distinct from Lit_VarLike 'var='
])
View
@@ -95,9 +95,9 @@ def LookAhead(self, lex_mode):
Does NOT advance self.line_pos.
Called with at least the following modes:
LexMode.ARITH -- for ${a[@]} vs ${a[1+2]}
LexMode.VS_1
LexMode.OUTER
lex_mode_e.ARITH -- for ${a[@]} vs ${a[1+2]}
lex_mode_e.VS_1
lex_mode_e.OUTER
"""
pos = self.line_pos
#print('Look ahead from pos %d, line %r' % (pos,self.line))
View
@@ -8,12 +8,12 @@
from core import util
from osh import ast_ as ast
from osh.lex import LexMode
p_die = util.p_die
arith_expr_e = ast.arith_expr_e
word_e = ast.word_e
lex_mode_e = ast.lex_mode_e
def Assert(s, expected, tree):
@@ -248,7 +248,7 @@ def Eat(self, token_type):
def Next(self):
"""Preferred over Eat()? """
self.cur_word = self.w_parser.ReadWord(LexMode.ARITH)
self.cur_word = self.w_parser.ReadWord(lex_mode_e.ARITH)
if self.cur_word is None:
error_stack = self.w_parser.Error()
self.error_stack.extend(error_stack)
View
@@ -27,7 +27,7 @@ class ExprSyntaxError(Exception):
def ParseAndEval(code_str):
w_parser, _ = parse_lib.MakeParserForCompletion(code_str)
#spec = arith_parse.MakeShellSpec()
#a_parser = tdop.TdopParser(spec, w_parser) # Calls ReadWord(LexMode.ARITH)
#a_parser = tdop.TdopParser(spec, w_parser) # Calls ReadWord(lex_mode_e.ARITH)
#anode = a_parser.Parse()
anode = w_parser._ReadArithExpr() # need the right lex state?
View
@@ -40,13 +40,12 @@
from core.id_kind import Id, Kind, LookupKind, IdName
from core import util
from osh.lex import LexMode
try:
import libc # for regex_parse
except ImportError:
from benchmarks import fake_libc as libc
lex_mode_e = ast.lex_mode_e
log = util.log
@@ -75,11 +74,11 @@ def AddErrorContext(self, msg, *args, **kwargs):
err = util.ParseError(msg, *args, **kwargs)
self.error_stack.append(err)
def _NextOne(self, lex_mode=LexMode.DBRACKET):
def _NextOne(self, lex_mode=lex_mode_e.DBRACKET):
#print('_Next', self.cur_word)
n = len(self.words)
if n == 2:
assert lex_mode == LexMode.DBRACKET
assert lex_mode == lex_mode_e.DBRACKET
self.words[0] = self.words[1]
self.cur_word = self.words[0]
del self.words[1]
@@ -100,7 +99,7 @@ def _NextOne(self, lex_mode=LexMode.DBRACKET):
#print('---- word', self.cur_word, 'op_id', self.op_id, self.b_kind, lex_mode)
return True
def _Next(self, lex_mode=LexMode.DBRACKET):
def _Next(self, lex_mode=lex_mode_e.DBRACKET):
"""Advance to the next token, skipping newlines.
We don't handle newlines in the lexer because we want the newline after ]]
@@ -120,7 +119,7 @@ def _LookAhead(self):
if n != 1:
raise AssertionError(self.words)
w = self.w_parser.ReadWord(LexMode.DBRACKET)
w = self.w_parser.ReadWord(lex_mode_e.DBRACKET)
self.words.append(w) # Save it for _Next()
return w
@@ -229,11 +228,11 @@ def ParseFactor(self):
if not self._Next(): return None
op = self.op_id
# TODO: Need to change to LexMode.BASH_REGEX.
# TODO: Need to change to lex_mode_e.BASH_REGEX.
# _Next(lex_mode) then?
is_regex = t2_op_id == Id.BoolBinary_EqualTilde
if is_regex:
if not self._Next(lex_mode=LexMode.BASH_REGEX): return None
if not self._Next(lex_mode=lex_mode_e.BASH_REGEX): return None
else:
if not self._Next(): return None
View
@@ -16,15 +16,15 @@
from osh import ast_ as ast
from osh import parse_lib
from osh import bool_parse # module under test
from osh.lex import LexMode
bool_expr_e = ast.bool_expr_e
lex_mode_e = ast.lex_mode_e
def _ReadWords(w_parser):
words = []
while True:
w = w_parser.ReadWord(LexMode.DBRACKET)
w = w_parser.ReadWord(lex_mode_e.DBRACKET)
if w.Type() == Id.Eof_Real:
break
words.append(w)
@@ -37,7 +37,7 @@ def _ReadWords(w_parser):
def _MakeParser(code_str):
# NOTE: We need the extra ]] token
w_parser, _ = parse_lib.MakeParserForCompletion(code_str + ' ]]')
w_parser._Next(LexMode.DBRACKET) # for tests only
w_parser._Next(lex_mode_e.DBRACKET) # for tests only
p = bool_parse.BoolParser(w_parser)
if not p._Next():
raise AssertionError
View
@@ -18,13 +18,14 @@
from core import util
from osh import ast_ as ast
from osh.lex import LexMode, VAR_NAME_RE
from osh.lex import VAR_NAME_RE
from osh.bool_parse import BoolParser
log = util.log
command_e = ast.command_e
word_e = ast.word_e
assign_op_e = ast.assign_op_e
lex_mode_e = ast.lex_mode_e
class CommandParser(object):
@@ -47,7 +48,7 @@ def Reset(self):
self.completion_stack = []
# Cursor state set by _Peek()
self.next_lex_mode = LexMode.OUTER
self.next_lex_mode = lex_mode_e.OUTER
self.cur_word = None # current word
self.c_kind = Kind.Undefined
self.c_id = Id.Undefined_Tok
@@ -132,7 +133,7 @@ def _MaybeReadHereDocs(self):
return True
def _Next(self, lex_mode=LexMode.OUTER):
def _Next(self, lex_mode=lex_mode_e.OUTER):
"""Helper method."""
self.next_lex_mode = lex_mode
@@ -148,7 +149,7 @@ def _Peek(self):
Returns True for success and False on error. Error examples: bad command
sub word, or unterminated quoted string, etc.
"""
if self.next_lex_mode != LexMode.NONE:
if self.next_lex_mode != lex_mode_e.NONE:
w = self.w_parser.ReadWord(self.next_lex_mode)
if w is None:
error_stack = self.w_parser.Error()
@@ -165,7 +166,7 @@ def _Peek(self):
self.c_kind = word.CommandKind(self.cur_word)
self.c_id = word.CommandId(self.cur_word)
self.next_lex_mode = LexMode.NONE
self.next_lex_mode = lex_mode_e.NONE
#print('_Peek', self.cur_word)
return True
View
@@ -27,27 +27,15 @@
Op_LBracket Op_RBracketEqual
"""
import re
from core.id_kind import Id, Kind, ID_SPEC
from core import util
from core.lexer import C, R
import re
from osh import ast_ as ast
lex_mode_e = ast.lex_mode_e
# Thirteen lexer modes for osh.
# Possible additional modes:
# - extended glob?
# - nested backticks: echo `echo \`echo foo\` bar`
LexMode = util.Enum('LexMode', """
NONE
COMMENT
OUTER
DBRACKET
SQ DQ DOLLAR_SQ
ARITH
EXTGLOB
VS_1 VS_2 VS_ARG_UNQ VS_ARG_DQ
BASH_REGEX BASH_REGEX_CHARS
""".split())
# In oil, I hope to have these lexer modes:
# COMMAND
@@ -128,7 +116,7 @@
LEXER_DEF = {} # TODO: Should be a list so we enforce order.
# Anything until the end of the line is a comment.
LEXER_DEF[LexMode.COMMENT] = [
LEXER_DEF[lex_mode_e.COMMENT] = [
R(r'.*', Id.Ignored_Comment) # does not match newline
]
@@ -242,14 +230,14 @@ def IsKeyword(name):
# [[.
# Keywords have to be checked before _UNQUOTED so we get <KW_If "if"> instead
# of <Lit_Chars "if">.
LEXER_DEF[LexMode.OUTER] = [
LEXER_DEF[lex_mode_e.OUTER] = [
C('((', Id.Op_DLeftParen), # not allowed within [[
] + _KEYWORDS + _MORE_KEYWORDS + _UNQUOTED + _EXTGLOB_BEGIN
# DBRACKET: can be like OUTER, except:
# - Don't really need redirects either... Redir_Less could be Op_Less
# - Id.Op_DLeftParen can't be nested inside.
LEXER_DEF[LexMode.DBRACKET] = [
LEXER_DEF[lex_mode_e.DBRACKET] = [
C(']]', Id.Lit_DRightBracket),
C('!', Id.KW_Bang),
] + ID_SPEC.LexerPairs(Kind.BoolUnary) + \
@@ -261,7 +249,7 @@ def IsKeyword(name):
# nested, so _EXTGLOB_BEGIN appears here.
#
# Example: echo @(<> <>|&&|'foo'|$bar)
LEXER_DEF[LexMode.EXTGLOB] = \
LEXER_DEF[lex_mode_e.EXTGLOB] = \
_BACKSLASH + _LEFT_SUBS + _VARS + _EXTGLOB_BEGIN + [
R(r'[^\\$`"\'|)@*+!?]+', Id.Lit_Chars),
C('|', Id.Op_Pipe),
@@ -270,7 +258,7 @@ def IsKeyword(name):
R('.', Id.Lit_Other), # everything else is literal
]
LEXER_DEF[LexMode.BASH_REGEX] = [
LEXER_DEF[lex_mode_e.BASH_REGEX] = [
# Match these literals first, and then the rest of the OUTER state I guess.
# That's how bash works.
#
@@ -282,7 +270,7 @@ def IsKeyword(name):
C('|', Id.Lit_Chars),
] + _UNQUOTED
LEXER_DEF[LexMode.DQ] = [
LEXER_DEF[lex_mode_e.DQ] = [
# Only 4 characters are backslash escaped inside "".
# https://www.gnu.org/software/bash/manual/bash.html#Double-Quotes
R(r'\\[$`"\\]', Id.Lit_EscapedChar),
@@ -303,7 +291,7 @@ def IsKeyword(name):
]
# Kind.{LIT,IGNORED,VS,LEFT,RIGHT,Eof}
LEXER_DEF[LexMode.VS_ARG_UNQ] = \
LEXER_DEF[lex_mode_e.VS_ARG_UNQ] = \
_VS_ARG_COMMON + _LEFT_SUBS + _LEFT_UNQUOTED + _VARS + [
# NOTE: added < and > so it doesn't eat <()
R(r'[^$`/}"\'\0\\#%<>]+', Id.Lit_Chars),
@@ -312,7 +300,7 @@ def IsKeyword(name):
]
# Kind.{LIT,IGNORED,VS,LEFT,RIGHT,Eof}
LEXER_DEF[LexMode.VS_ARG_DQ] = _VS_ARG_COMMON + _LEFT_SUBS + _VARS + [
LEXER_DEF[lex_mode_e.VS_ARG_DQ] = _VS_ARG_COMMON + _LEFT_SUBS + _VARS + [
R(r'[^$`/}"\0\\#%]+', Id.Lit_Chars), # matches a line at most
# Weird wart: even in double quoted state, double quotes are allowed
C('"', Id.Left_DoubleQuote),
@@ -322,22 +310,22 @@ def IsKeyword(name):
# NOTE: Id.Ignored_LineCont is NOT supported in SQ state, as opposed to DQ
# state.
LEXER_DEF[LexMode.SQ] = [
LEXER_DEF[lex_mode_e.SQ] = [
R(r"[^']+", Id.Lit_Chars), # matches a line at most
C("'", Id.Right_SingleQuote),
C('\0', Id.Eof_Real),
]
# NOTE: Id.Ignored_LineCont is also not supported here, even though the whole
# point of it is that supports other backslash escapes like \n!
LEXER_DEF[LexMode.DOLLAR_SQ] = [
LEXER_DEF[lex_mode_e.DOLLAR_SQ] = [
R(r"[^'\\]+", Id.Lit_Chars),
R(r"\\.", Id.Lit_EscapedChar),
C("'", Id.Right_SingleQuote),
C('\0', Id.Eof_Real),
]
LEXER_DEF[LexMode.VS_1] = [
LEXER_DEF[lex_mode_e.VS_1] = [
R(_VAR_NAME_RE, Id.VSub_Name),
# ${11} is valid, compared to $11 which is $1 and then literal 1.
R(r'[0-9]+', Id.VSub_Number),
@@ -358,7 +346,7 @@ def IsKeyword(name):
R(r'.', Id.Unknown_Tok), # any char except newline
]
LEXER_DEF[LexMode.VS_2] = \
LEXER_DEF[lex_mode_e.VS_2] = \
ID_SPEC.LexerPairs(Kind.VTest) + \
ID_SPEC.LexerPairs(Kind.VOp1) + \
ID_SPEC.LexerPairs(Kind.VOp2) + [
@@ -370,7 +358,7 @@ def IsKeyword(name):
]
# https://www.gnu.org/software/bash/manual/html_node/Shell-Arithmetic.html#Shell-Arithmetic
LEXER_DEF[LexMode.ARITH] = \
LEXER_DEF[lex_mode_e.ARITH] = \
_LEFT_SUBS + _VARS + _LEFT_UNQUOTED + [
# newline is ignored space, unlike in OUTER
R(r'[ \t\r\n]+', Id.Ignored_Space),
Oops, something went wrong.

0 comments on commit 764dc48

Please sign in to comment.