Permalink
Browse files

Several changes to prepare to optimize here doc parsing.

- Make unterminated here docs a hard error instead of a warning.
  - This makes it easier for users to fix the example error in the unit
    test.
  - We don't check for 'EOF)', it's always 'EOF\n'.
  - Document this in the manual.

- Remove a misusage of Id.Eof_Real.
  - It was being used when lookahead failed.  In this case,
    Id.Unknown_Tok is more appropriate.  Id.Eof_Real should only be used
    at the actual end of a file!
  - (Or it could get its own Id.Unknown_Eol.)

- Implement LineLexer.GetSpanIdForEof to show the location of EOF.  This
  improves error messages.
  - It also caused a cascade of changes because we want to assume that
    arenas are always available.  The unit tests hadn't yet been updated
    with this assumption.

- Now that we show EOF location information, fix the error in the here
  doc test.

- Make gold tests out of some files.

- Move shell scripts that show syntax errors to their own directory.

- Fix unit test breakages caused by the last change.
  • Loading branch information...
Andy Chu
Andy Chu committed Nov 8, 2017
1 parent ee4d2b8 commit ba59db602cd8bb5281d25623f208a826ffd6d6d4
View
@@ -322,7 +322,7 @@ def OshMain(argv, login_shell):
if HAVE_READLINE:
ev = word_eval.CompletionWordEvaluator(mem, exec_opts)
status_out = completion.StatusOutput(status_lines, exec_opts)
completion.Init(builtin.BUILTIN_DEF, mem, funcs, comp_lookup,
completion.Init(pool, builtin.BUILTIN_DEF, mem, funcs, comp_lookup,
status_out, ev)
# TODO: Could instantiate "printer" instead of showing ops
View
@@ -109,6 +109,13 @@ def GetDebugInfo(self, line_id):
return path, line_num
def CompletionArena(pool):
"""A temporary arena that only exists for a function call?"""
arena = pool.NewArena()
arena.PushSource('<completion>')
return arena
# In C++, InteractiveLineReader and StringLineReader should use the same
# representation: std::string with internal NULs to terminate lines, and then
# std::vector<char*> that points into to it.
View
@@ -40,6 +40,7 @@
from osh import ast_ as ast
from osh import parse_lib
from core import alloc
from core import runtime
from core import state
from core import ui
@@ -583,8 +584,8 @@ class RootCompleter(object):
"""
Provide completion of a buffer according to the configured rules.
"""
def __init__(self, make_parser, ev, comp_lookup, var_comp):
self.make_parser = make_parser
def __init__(self, pool, ev, comp_lookup, var_comp):
self.pool = pool
self.ev = ev
self.comp_lookup = comp_lookup
# This can happen in any position, with any command
@@ -593,7 +594,8 @@ def __init__(self, make_parser, ev, comp_lookup, var_comp):
self.parser = DummyParser() # TODO: remove
def Matches(self, buf, status_out):
w_parser, c_parser = self.make_parser(buf)
arena = alloc.CompletionArena(self.pool)
w_parser, c_parser = parse_lib.MakeParserForCompletion(buf, arena=arena)
comp_type, prefix, comp_words = _GetCompletionType(
w_parser, c_parser, self.ev, status_out)
@@ -747,7 +749,7 @@ def Write(self, index, msg, *args):
self.status_lines[index].Write(msg, *args)
def Init(builtins, mem, funcs, comp_lookup, status_out, ev):
def Init(pool, builtins, mem, funcs, comp_lookup, status_out, ev):
aliases_action = WordsAction(['TODO:alias'])
commands_action = ExternalCommandAction(mem)
@@ -775,7 +777,7 @@ def Init(builtins, mem, funcs, comp_lookup, status_out, ev):
make_parser = parse_lib.MakeParserForCompletion
var_comp = VarAction(os.environ, mem)
root_comp = RootCompleter(make_parser, ev, comp_lookup, var_comp)
root_comp = RootCompleter(pool, ev, comp_lookup, var_comp)
complete_cb = ReadlineCompleter(root_comp, status_out)
InitReadline(complete_cb)
@@ -798,7 +800,8 @@ def Init(builtins, mem, funcs, comp_lookup, status_out, ev):
comp_lookup = CompletionLookup()
ev = None
Init(builtins, mem, funcs, comp_lookup, status_out, ev)
pool = None
Init(pool, builtins, mem, funcs, comp_lookup, status_out, ev)
# Disable it. OK so this is how you go back and forth? At least in Python.
# Enable and disable custom completer?
@@ -815,5 +818,5 @@ def Init(builtins, mem, funcs, comp_lookup, status_out, ev):
pass
while True:
s = input('! ')
s = raw_input('! ')
print(s)
View
@@ -11,11 +11,13 @@
import unittest
from core import alloc
from core import cmd_exec
from core import cmd_exec_test
from core import completion # module under test
from core import lexer
from core import state
from core import test_lib
from core import word_eval
from core import ui
from core.id_kind import Id
@@ -106,9 +108,9 @@ def testRootCompleter(self):
ev = _MakeTestEvaluator()
pool = alloc.Pool()
var_comp = V1
r = completion.RootCompleter(parse_lib.MakeParserForCompletion,
ev, comp_lookup, var_comp)
r = completion.RootCompleter(pool, ev, comp_lookup, var_comp)
m = list(r.Matches('grep f', STATUS))
self.assertEqual(['foo.py ', 'foo '], m)
@@ -147,7 +149,8 @@ def _MakeTestEvaluator():
def _TestGetCompletionType(buf):
ev = _MakeTestEvaluator()
w_parser, c_parser = parse_lib.MakeParserForCompletion(buf)
arena = test_lib.MakeArena('<completion_test.py>')
w_parser, c_parser = parse_lib.MakeParserForCompletion(buf, arena=arena)
print('---', buf)
return completion._GetCompletionType(w_parser, c_parser, ev, STATUS)
View
@@ -83,20 +83,38 @@ def MaybeUnreadOne(self):
self.arena_skip = True # don't add the next token to the arena
return True
def GetSpanIdForEof(self):
assert self.arena, self.arena # This is mandatory now?
# zero length is special!
line_span = ast.line_span(self.line_id, self.line_pos, 0)
return self.arena.AddLineSpan(line_span)
def LookAhead(self, lex_mode):
"""Look ahead for a non-space token, using the given lexical state."""
"""Look ahead for a non-space token, using the given lexer mode.
Does NOT advance self.line_pos.
Called with at least the following modes:
LexMode.ARITH -- for ${a[@]} vs ${a[1+2]}
LexMode.VS_1
LexMode.OUTER
"""
pos = self.line_pos
#print('Look ahead from pos %d, line %r' % (pos,self.line))
while True:
if pos == len(self.line):
t = ast.token(Id.Eof_Real, '') # no location
# We don't allow lookahead while already at end of line, because it
# would involve interacting with the line reader, and we never need
# it. In the OUTER mode, there is an explicit newline token, but
# ARITH doesn't have it.
t = ast.token(Id.Unknown_Tok, '', -1) # no span ID
return t
re_list = self.lexer_def[lex_mode]
end_index, tok_type, tok_val = FindLongestMatch(
re_list, self.line, pos)
# NOTE: Instead of hard-coding this token, we could pass it in. This one
# only appears in OUTER state! LookAhead(lex_mode, past_token_type)
# NOTE: Instead of hard-coding this token, we could pass it in. This
# one only appears in OUTER state! LookAhead(lex_mode, past_token_type)
if tok_type != Id.WS_Space:
break
pos = end_index
@@ -210,9 +228,8 @@ def _Read(self, lex_mode):
line_id, line = self.line_reader.GetLine()
if line is None: # no more lines
t = ast.token(Id.Eof_Real, '', -1)
# No line number. I guess we are showing the last line of the file.
# TODO: Could keep track of previous position for this case?
span_id = self.line_lexer.GetSpanIdForEof()
t = ast.token(Id.Eof_Real, '', span_id)
return t
self.line_lexer.Reset(line, line_id)
View
@@ -12,7 +12,9 @@
import unittest
import sys
from core import alloc
from core import reader
from core import test_lib
# These may move around
from osh import parse_lib
@@ -44,9 +46,11 @@ def testGetLine(self):
def ParseAndExecute(code_str):
line_reader, lexer = parse_lib.InitLexer(code_str)
arena = test_lib.MakeArena('<shell_test.py>')
line_reader, lexer = parse_lib.InitLexer(code_str, arena=arena)
w_parser = WordParser(lexer, line_reader)
c_parser = CommandParser(w_parser, lexer, line_reader)
c_parser = CommandParser(w_parser, lexer, line_reader, arena=arena)
node = c_parser.ParseWholeFile()
if not node:
View
@@ -11,6 +11,7 @@
# Hm this doesn't use scope_e or var_flags_e
scope = runtime.scope
value_e = runtime.value_e
var_flags = runtime.var_flags
@@ -76,7 +77,7 @@ def testSetVarClearFlag(self):
mem.SetVar(
runtime.LhsName('g2'), None, (var_flags.Exported,),
scope.Dynamic)
self.assertEqual('', mem.var_stack[0]['g2'].val.s)
self.assertEqual(value_e.Undef, mem.var_stack[0]['g2'].val.tag)
self.assertEqual(True, mem.var_stack[0]['g2'].exported)
# readonly myglobal
@@ -131,7 +132,7 @@ def testSetVarClearFlag(self):
mem.SetVar(
runtime.LhsName('r2'), None, (var_flags.ReadOnly,),
scope.Dynamic)
self.assertEqual('', mem.var_stack[0]['r2'].val.s)
self.assertEqual(value_e.Undef, mem.var_stack[0]['r2'].val.tag)
self.assertEqual(True, mem.var_stack[0]['r2'].readonly)
# export -n PYTHONPATH
View
@@ -9,6 +9,9 @@
test_lib.py - Functions for testing.
"""
from core import alloc
def TokensEqual(left, right):
# Ignoring location in CompoundObj.__eq__ now, but we might want this later.
return left.id == right.id and left.val == right.val
@@ -19,3 +22,10 @@ def TokenWordsEqual(left, right):
# Ignoring location in CompoundObj.__eq__ now, but we might want this later.
return TokensEqual(left.token, right.token)
#return left == right
def MakeArena(source_name):
pool = alloc.Pool()
arena = pool.NewArena()
arena.PushSource('<lex_test.py>')
return arena
View
@@ -86,6 +86,29 @@ hi;})` anyway.
extglob`. That flag is a no-op in OSH. OSH avoids dynamic parsing, while
bash does it in many places.
(6) **Here Doc Terminators Must Be On Their Own Line**
NO:
a=$(cat <<EOF
abc
EOF)
a=$(cat <<EOF
abc
EOF # not a comment, read as here doc delimiter
)
YES:
a=$(cat <<EOF
abc
EOF
) # newline
Just like `EOF]` will not end the here doc, `EOF)` doesn't end it either. It
must be on its own line.
## set builtin
### errexit
@@ -142,7 +142,8 @@ echo
for x in 1 2 $(cat <<EOF
THREE
EOF); do
EOF
); do
echo for word $x
done
echo
View
@@ -206,16 +206,20 @@ def _MaybeReadHereDocs(self, node):
#print('')
for h in here_docs:
lines = []
#print(h.here_end)
#log('HERE %r' % h.here_end)
while True:
# If op is <<-, strip off all leading tabs (NOT spaces).
# (in C++, just bump the start?)
line_id, line = self.line_reader.GetLine()
#print("LINE %r %r" % (line, h.here_end))
if not line: # EOF
print('WARNING: unterminated here doc', file=sys.stderr)
break
# An unterminated here doc is just a warning in bash. We make it
# fatal because we want to be strict, and because it causes problems
# reporting other errors.
# Attribute it to the << in <<EOF for now.
self.AddErrorContext('Unterminated here doc', span_id=h.spids[0])
return False
# NOTE: Could do this runtime to preserve LST.
if h.op_id == Id.Redir_DLessDash:
@@ -264,7 +268,8 @@ def _MaybeReadHereDocsAfterNewline(self, node):
return False
#print('_Maybe testing for newline', self.cur_word, node)
if self.c_id == Id.Op_Newline:
self._MaybeReadHereDocs(node)
if not self._MaybeReadHereDocs(node):
return False
#print('_Maybe read redirects', node)
self._Next()
if not self._Peek():
@@ -759,6 +764,7 @@ def ParseForWords(self):
self._Next()
break
if self.cur_word.tag != word_e.CompoundWord:
# TODO: Can we also show a pointer to the 'for' keyword?
self.AddErrorContext('Invalid word in for loop', word=self.cur_word)
return None
@@ -1399,7 +1405,7 @@ def ParsePipeline(self):
# If the pipeline ended in a newline, we need to read here docs.
if self.c_id == Id.Op_Newline:
for child in children:
self._MaybeReadHereDocs(child)
if not self._MaybeReadHereDocs(child): return None
node = ast.Pipeline(children, negated)
node.stderr_indices = stderr_indices
@@ -1487,13 +1493,13 @@ def ParseCommandLine(self):
if not self._Peek(): return None
if self.c_id == Id.Op_Newline:
self._MaybeReadHereDocs(child)
if not self._MaybeReadHereDocs(child): return None
done = True
elif self.c_id == Id.Eof_Real:
done = True
elif self.c_id == Id.Op_Newline:
self._MaybeReadHereDocs(child)
if not self._MaybeReadHereDocs(child): return None
done = True
elif self.c_id == Id.Eof_Real:
@@ -1558,8 +1564,9 @@ def ParseCommandTerm(self):
if self.c_id == Id.Op_Newline:
# Read ALL Here docs so far. cat <<EOF; echo hi <newline>
for c in children:
self._MaybeReadHereDocs(c)
self._MaybeReadHereDocs(child) # Read last child's here docs
if not self._MaybeReadHereDocs(c): return None
# Read last child's here docs
if not self._MaybeReadHereDocs(child): return None
self._Next()
if not self._Peek(): return None
@@ -1573,8 +1580,9 @@ def ParseCommandTerm(self):
if not self._Peek(): return None
if self.c_id == Id.Op_Newline:
for c in children:
self._MaybeReadHereDocs(c)
self._MaybeReadHereDocs(child) # Read last child's
if not self._MaybeReadHereDocs(c): return None
# Read last child's
if not self._MaybeReadHereDocs(child): return None
self._Next() # skip over newline
@@ -1598,7 +1606,7 @@ def ParseCommandTerm(self):
if not self._Peek(): return None
if self.c_id == Id.Op_Newline:
for c in children:
self._MaybeReadHereDocs(c)
if not self._MaybeReadHereDocs(c): return None
return ast.CommandList(children)
@@ -1614,6 +1622,8 @@ def ParseCommandList(self):
easier.
"""
if not self._NewlineOk(): return None
#if not self._MaybeReadHereDocsAfterNewline(node):
# return None
node = self.ParseCommandTerm()
if node is None: return None
Oops, something went wrong.

0 comments on commit ba59db6

Please sign in to comment.