Skip to content

Commit

Permalink
[ysh language] Parse most of YSH case statement (#1633)
Browse files Browse the repository at this point in the history
The pattern can be:

- a list of words
- a list of expressions
- an eggex
- (else) if nothing matches

We have some tricky/hacky logic in _NewlineOkForYshCase() due to overly intimate CommandParser -> WordParser -> Lexer interaction.

---------

Co-authored-by: Andy Chu <andy@oilshell.org>
  • Loading branch information
PossiblyAShrub and Andy Chu committed Jun 3, 2023
1 parent e4387c6 commit c0e91d4
Show file tree
Hide file tree
Showing 12 changed files with 516 additions and 47 deletions.
6 changes: 5 additions & 1 deletion core/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,11 @@ def InitWordParser(word_str, oil_at=False, arena=None):

def InitCommandParser(code_str, arena=None):
arena = arena or MakeArena('<test_lib>')
parse_ctx = InitParseContext(arena=arena)

loader = pyutil.GetResourceLoader()
oil_grammar = pyutil.LoadOilGrammar(loader)

parse_ctx = InitParseContext(arena=arena, oil_grammar=oil_grammar)
line_reader, _ = InitLexer(code_str, arena)
c_parser = parse_ctx.MakeOshParser(line_reader)
return c_parser
Expand Down
39 changes: 37 additions & 2 deletions frontend/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ def LookAheadOne(self, lex_mode):
tok_type, _ = match.OneToken(lex_mode, line_str, pos)
return tok_type

def AssertAtEndOfLine(self):
# type: () -> None
assert self.line_pos == len(self.src_line.content), \
'%d %s' % (self.line_pos, self.src_line.content)

def LookPastSpace(self, lex_mode):
# type: (lex_mode_t) -> Id_t
"""Look ahead in current line for non-space token, using given lexer mode.
Expand Down Expand Up @@ -148,8 +153,9 @@ def LookPastSpace(self, lex_mode):

# NOTE: Instead of hard-coding this token, we could pass it in.
# LookPastSpace(lex_mode, past_token_type)
# WS_Space only appears in the ShCommand state!
if tok_type != Id.WS_Space:
# - WS_Space only given in lex_mode_e.ShCommand
# - Id.Ignored_Space given in lex_mode_e.Expr
if tok_type != Id.WS_Space and tok_type != Id.Ignored_Space:
break
pos = end_pos

Expand Down Expand Up @@ -320,6 +326,35 @@ def PushHint(self, old_id, new_id):
#log(' PushHint %s ==> %s', Id_str(old_id), Id_str(new_id))
self.translation_stack.append((old_id, new_id))

def MoveToNextLine(self):
# type: () -> None
"""For lookahead on the next line.
This is required by `ParseYshCase` and is used in `_NewlineOkForYshCase`.
We use this because otherwise calling `LookPastSpace` would return
`Id.Unknown_Tok` when the lexer has reached the end of the line. For an
example, take this case:
case (x) {
^--- We are here
(else) {
^--- We want lookahead to here
echo test
}
}
But, without `MoveToNextLine`, it is impossible to peek the '(' without
consuming it. And consuming it would be a problem once we want to hand off
pattern parsing to the expression parser.
"""
self.line_lexer.AssertAtEndOfLine() # Only call this when you've seen \n

src_line, line_pos = self.line_reader.GetLine()
self.line_lexer.Reset(src_line, line_pos) # fill with a new line

def _Read(self, lex_mode):
# type: (lex_mode_t) -> Token
"""Read from the normal line buffer, not an alias."""
Expand Down
79 changes: 78 additions & 1 deletion frontend/lexer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import unittest

from _devbuild.gen.id_kind_asdl import Id
from _devbuild.gen.id_kind_asdl import Id, Id_str
from _devbuild.gen.types_asdl import lex_mode_e
from core import test_lib
from core.test_lib import Tok
Expand All @@ -32,6 +32,19 @@ def _PrintfOuterTokens(fmt):
log('')


def _PrintToken(t):
#print(t)
print('%20s %r' % (Id_str(t.id), t.tval))


def _PrintAllTokens(lx, lex_mode):
while True:
t = lx.Read(lex_mode)
_PrintToken(t)
if t.id in (Id.Eof_Real, Id.Eol_Tok):
break


class TokenTest(unittest.TestCase):

def testToken(self):
Expand All @@ -57,6 +70,70 @@ def testPrintStats(self):
print("Number of lex states: %d" % len(LEXER_DEF))
print("Number of token dispatches: %d" % total)

def testMoveToNextLine(self):
"""
Test that it doesn't mess up invariants
"""
arena = test_lib.MakeArena('<lexer_test.py>')
code_str = '''cd {
}'''

print('=== Printing all tokens')
if 1:
_, lx = test_lib.InitLexer(code_str, arena)
_PrintAllTokens(lx, lex_mode_e.ShCommand)

print()
print('=== MoveToNextLine() and LookAheadOne()')
_, lx = test_lib.InitLexer(code_str, arena)

t = lx.Read(lex_mode_e.ShCommand)
_PrintToken(t)
self.assertEqual(Id.Lit_Chars, t.id)

t = lx.Read(lex_mode_e.ShCommand)
_PrintToken(t)
self.assertEqual(Id.WS_Space, t.id)

t = lx.Read(lex_mode_e.ShCommand)
_PrintToken(t)
self.assertEqual(Id.Lit_LBrace, t.id)

try:
lx.MoveToNextLine()
except AssertionError:
pass
else:
self.fail('Should have asserted')

t = lx.Read(lex_mode_e.ShCommand)
_PrintToken(t)
self.assertEqual(Id.Op_Newline, t.id)

look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
self.assertEqual(Id.Unknown_Tok, look_ahead_id)

# Method being tested
lx.MoveToNextLine()

# Lookahead
print('Lookahead')
look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
self.assertEqual(Id.Lit_RBrace, look_ahead_id)

# Lookahead again
print('Lookahead 2')
look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
self.assertEqual(Id.Lit_RBrace, look_ahead_id)

t = lx.Read(lex_mode_e.ShCommand)
_PrintToken(t)
self.assertEqual(Id.Lit_RBrace, t.id)

t = lx.Read(lex_mode_e.ShCommand)
_PrintToken(t)
self.assertEqual(Id.Eof_Real, t.id)

def testMaybeUnreadOne(self):
arena = test_lib.MakeArena('<lexer_test.py>')
_, lx = test_lib.InitLexer('()', arena)
Expand Down
13 changes: 12 additions & 1 deletion frontend/parse_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from _devbuild.gen.id_kind_asdl import Id, Id_t
from _devbuild.gen.syntax_asdl import (
Token, CompoundWord, expr_t, word_t, Redir, ArgList, NameType, command
Token, CompoundWord, expr_t, word_t, Redir, ArgList, NameType, command, pat_t
)
from _devbuild.gen.types_asdl import lex_mode_e
from _devbuild.gen import grammar_nt
Expand Down Expand Up @@ -392,6 +392,17 @@ def ParseYshForExpr(self, lexer, start_symbol):

return lvalue, iterable, last_token

def ParseYshCasePattern(self, lexer):
# type: (Lexer) -> pat_t
""" (6) | (7), / dot* '.py' /, (else), etc """
e_parser = self._YshParser()
with ctx_PNodeAllocator(e_parser):
pnode, _last_token = e_parser.Parse(lexer, grammar_nt.case_pat)

pattern = self.tr.YshCasePattern(pnode)

return pattern

def ParseProc(self, lexer, out):
# type: (Lexer, command.Proc) -> Token
""" proc f(x, y, @args) { """
Expand Down
8 changes: 7 additions & 1 deletion frontend/syntax.asdl
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ module syntax
# For YSH typed case, left can be ( and /
# And case_pat may contain more details
CaseArm = (
Token left, List[word] pat_list, Token middle, List[command] action,
Token left, pat pattern, Token middle, List[command] action,
Token? right
)

Expand All @@ -277,6 +277,12 @@ module syntax
Word(word w)
| YshExpr(expr e)

pat =
Else
| Words(List[word] words)
| YshExprs(List[expr] exprs)
| Eggex(re eggex)

# Each if arm starts with either an "if" or "elif" keyword
# In YSH, the then keyword is not used (replaced by braces {})
IfArm = (
Expand Down
9 changes: 8 additions & 1 deletion osh/cmd_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
proc_sig, proc_sig_e,
redir_param, redir_param_e,
for_iter, for_iter_e,
pat, pat_e,
)
from _devbuild.gen.runtime_asdl import (
lvalue, lvalue_e,
Expand Down Expand Up @@ -1419,7 +1420,13 @@ def _Dispatch(self, node, cmd_st):
done = False

for case_arm in node.arms:
for pat_word in case_arm.pat_list:
if case_arm.pattern.tag() != pat_e.Words:
# TODO: support more than pat.Words
raise NotImplementedError()

pat_words = cast(pat.Words, case_arm.pattern)

for pat_word in pat_words.words:
# NOTE: Is it OK that we're evaluating these as we go?
# TODO: test it out in a loop
pat_val = self.word_ev.EvalWordToString(pat_word,
Expand Down

0 comments on commit c0e91d4

Please sign in to comment.