Permalink
Browse files

Wow, greatly simplified here doc parsing code!

It should have sped it up too, but I still have to measure it.

The post-order traversal was the wrong idea.  All we need is simple
deque().  (It's probably a linked list in C.)

Also, fix here strings/here words to evaluate to a string with an
additional newline.  gold/complex-here-docs.sh now passes!
  • Loading branch information...
Andy Chu
Andy Chu committed Nov 9, 2017
1 parent 593348f commit feec0de152a93ebec6eaff20630813db6050e2c5
Showing with 87 additions and 174 deletions.
  1. +2 −1 core/cmd_exec.py
  2. +71 −171 osh/cmd_parse.py
  3. +12 −0 spec/here-doc.test.sh
  4. +2 −2 test/spec.sh
View
@@ -393,7 +393,8 @@ def _EvalRedirect(self, n):
if val.tag != value_e.Str:
util.warn("Here word body should be a string, got %s", val)
return None
return runtime.HereRedirect(fd, val.s)
# NOTE: bash and mksh both add \n
return runtime.HereRedirect(fd, val.s + '\n')
else:
raise AssertionError('Unknown redirect op')
View
@@ -10,6 +10,7 @@
cmd_parse.py - Parse high level shell commands.
"""
import collections
import sys
from core import braces
@@ -27,76 +28,6 @@
assign_op = ast.assign_op
def _UnfilledHereDocs(redirects):
return [
r for r in redirects
if r.op_id in (Id.Redir_DLess, Id.Redir_DLessDash) and not r.was_filled
]
def _GetHereDocsToFill(node):
"""For CommandParser to fill here docs"""
# Has to be a POST ORDER TRAVERSAL of here docs, e.g.
#
# while read line; do cat <<EOF1; done <<EOF2
# body
# EOF1
# while
# EOF2
# Leaf nodes: no redirects and no children.
if node.tag in (command_e.NoOp, command_e.Assignment, command_e.ControlFlow):
return []
# Leaf nodes: these have redirects but not children.
if node.tag in (
command_e.SimpleCommand, command_e.DParen, command_e.DBracket):
return _UnfilledHereDocs(node.redirects)
# Interior nodes with children:
here_docs = []
if node.tag == command_e.If:
for arm in node.arms:
for child in arm.cond:
here_docs.extend(_GetHereDocsToFill(child))
for child in arm.action:
here_docs.extend(_GetHereDocsToFill(child))
for child in node.else_action:
here_docs.extend(_GetHereDocsToFill(child))
elif node.tag == command_e.Case:
for arm in node.arms:
for child in arm.action:
here_docs.extend(_GetHereDocsToFill(child))
elif node.tag in (command_e.ForEach, command_e.ForExpr, command_e.FuncDef):
here_docs.extend(_GetHereDocsToFill(node.body))
elif node.tag in (command_e.While, command_e.Until):
for child in node.cond:
here_docs.extend(_GetHereDocsToFill(child))
here_docs.extend(_GetHereDocsToFill(node.body))
elif node.tag in (command_e.Sentence, command_e.Subshell):
here_docs.extend(_GetHereDocsToFill(node.child))
elif node.tag == command_e.TimeBlock:
here_docs.extend(_GetHereDocsToFill(node.pipeline))
else:
for child in node.children:
here_docs.extend(_GetHereDocsToFill(child))
# && || and | don't have their own redirects, but have children that may.
if node.tag not in (
command_e.AndOr, command_e.Pipeline, command_e.CommandList,
command_e.Sentence, command_e.TimeBlock):
here_docs.extend(_UnfilledHereDocs(node.redirects)) # parent
return here_docs
class CommandParser(object):
"""
Args:
@@ -122,6 +53,8 @@ def Reset(self):
self.c_kind = Kind.Undefined
self.c_id = Id.Undefined_Tok
self.pending_here_docs = collections.deque()
def Error(self):
return self.error_stack
@@ -141,70 +74,13 @@ def AddErrorContext(self, msg, *args, **kwargs):
def GetCompletionState(self):
return self.completion_stack
def Peek(self):
"""Public method for REPL."""
if not self._Peek():
return None
return self.cur_word
def _Peek(self):
"""Helper method.
Returns True for success and False on error. Error examples: bad command
sub word, or unterminated quoted string, etc.
"""
if self.next_lex_mode != LexMode.NONE:
w = self.w_parser.ReadWord(self.next_lex_mode)
if w is None:
error_stack = self.w_parser.Error()
self.error_stack.extend(error_stack)
return False
self.cur_word = w
self.c_kind = word.CommandKind(self.cur_word)
self.c_id = word.CommandId(self.cur_word)
self.next_lex_mode = LexMode.NONE
#print('_Peek', self.cur_word)
return True
def _Next(self, lex_mode=LexMode.OUTER):
"""Helper method."""
self.next_lex_mode = lex_mode
def _Eat(self, c_id):
"""Consume a word of a type. If it doesn't match, return False.
Args:
c_id: either EKeyword.* or a token type like Id.Right_Subshell.
TODO: Rationalize / type check this.
"""
if not self._Peek():
return False
# TODO: It would be nicer to print the word type, right now we get a number
if self.c_id != c_id:
self.AddErrorContext(
"Expected word type %s, got %s", c_id, self.cur_word,
word=self.cur_word)
return False
self._Next()
return True
def _NewlineOk(self):
"""Check for optional newline and consume it."""
if not self._Peek():
return False
if self.c_id == Id.Op_Newline:
self._Next()
if not self._Peek():
return False
return True
def _MaybeReadHereDocs(self):
while True:
try:
h = self.pending_here_docs.popleft()
except IndexError:
break
def _MaybeReadHereDocs(self, node):
here_docs = _GetHereDocsToFill(node)
#print('')
#print('--> FILLING', here_docs)
#print('')
for h in here_docs:
lines = []
#log('HERE %r' % h.here_end)
while True:
@@ -262,15 +138,66 @@ def _MaybeReadHereDocs(self, node):
#print('')
return True
def _MaybeReadHereDocsAfterNewline(self, node):
"""Like _NewlineOk, but also reads here docs."""
def _Next(self, lex_mode=LexMode.OUTER):
"""Helper method."""
self.next_lex_mode = lex_mode
def Peek(self):
"""Public method for REPL."""
if not self._Peek():
return None
return self.cur_word
def _Peek(self):
"""Helper method.
Returns True for success and False on error. Error examples: bad command
sub word, or unterminated quoted string, etc.
"""
if self.next_lex_mode != LexMode.NONE:
w = self.w_parser.ReadWord(self.next_lex_mode)
if w is None:
error_stack = self.w_parser.Error()
self.error_stack.extend(error_stack)
return False
# Here docs only happen in command mode, so other kinds of newlines don't
# count.
if w.tag == word_e.TokenWord and w.token.id == Id.Op_Newline:
if not self._MaybeReadHereDocs():
return False
self.cur_word = w
self.c_kind = word.CommandKind(self.cur_word)
self.c_id = word.CommandId(self.cur_word)
self.next_lex_mode = LexMode.NONE
#print('_Peek', self.cur_word)
return True
def _Eat(self, c_id):
"""Consume a word of a type. If it doesn't match, return False.
Args:
c_id: either EKeyword.* or a token type like Id.Right_Subshell.
TODO: Rationalize / type check this.
"""
if not self._Peek():
return False
# TODO: It would be nicer to print the word type, right now we get a number
if self.c_id != c_id:
self.AddErrorContext(
"Expected word type %s, got %s", c_id, self.cur_word,
word=self.cur_word)
return False
self._Next()
return True
def _NewlineOk(self):
"""Check for optional newline and consume it."""
if not self._Peek():
return False
#print('_Maybe testing for newline', self.cur_word, node)
if self.c_id == Id.Op_Newline:
if not self._MaybeReadHereDocs(node):
return False
#print('_Maybe read redirects', node)
self._Next()
if not self._Peek():
return False
@@ -296,9 +223,6 @@ def ParseRedirect(self):
else:
fd = -1
# TODO: Set a flag here, and then _MaybeReadHereDocsAfterNewline can use
# it to short-circuit
if self.c_id in (Id.Redir_DLess, Id.Redir_DLessDash): # here doc
node = ast.HereDoc()
node.op_id = self.c_id
@@ -320,6 +244,8 @@ def ParseRedirect(self):
node.do_expansion = not quoted
self._Next()
self.pending_here_docs.append(node) # will be filled on next newline.
else:
node = ast.Redir()
node.op_id = self.c_id
@@ -1382,8 +1308,7 @@ def ParsePipeline(self):
while True:
self._Next() # skip past Id.Op_Pipe or Id.Op_PipeAmp
# cat <<EOF | <newline>
if not self._MaybeReadHereDocsAfterNewline(child):
if not self._NewlineOk():
return None
child = self.ParseCommand()
@@ -1402,11 +1327,6 @@ def ParsePipeline(self):
stderr_indices.append(pipe_index)
pipe_index += 1
# If the pipeline ended in a newline, we need to read here docs.
if self.c_id == Id.Op_Newline:
for child in children:
if not self._MaybeReadHereDocs(child): return None
node = ast.Pipeline(children, negated)
node.stderr_indices = stderr_indices
return node
@@ -1433,8 +1353,7 @@ def ParseAndOr(self):
op = self.c_id
self._Next() # Skip past operator
# cat <<EOF || <newline>
if not self._MaybeReadHereDocsAfterNewline(left): return None
if not self._NewlineOk(): return None
right = self.ParseAndOr()
if not right: return None
@@ -1492,14 +1411,10 @@ def ParseCommandLine(self):
self._Next()
if not self._Peek(): return None
if self.c_id == Id.Op_Newline:
if not self._MaybeReadHereDocs(child): return None
done = True
elif self.c_id == Id.Eof_Real:
if self.c_id in (Id.Op_Newline, Id.Eof_Real):
done = True
elif self.c_id == Id.Op_Newline:
if not self._MaybeReadHereDocs(child): return None
done = True
elif self.c_id == Id.Eof_Real:
@@ -1562,11 +1477,6 @@ def ParseCommandTerm(self):
if not self._Peek(): return None
if self.c_id == Id.Op_Newline:
# Read ALL Here docs so far. cat <<EOF; echo hi <newline>
for c in children:
if not self._MaybeReadHereDocs(c): return None
# Read last child's here docs
if not self._MaybeReadHereDocs(child): return None
self._Next()
if not self._Peek(): return None
@@ -1579,11 +1489,6 @@ def ParseCommandTerm(self):
if not self._Peek(): return None
if self.c_id == Id.Op_Newline:
for c in children:
if not self._MaybeReadHereDocs(c): return None
# Read last child's
if not self._MaybeReadHereDocs(child): return None
self._Next() # skip over newline
# Test if we should keep going. There might be another command after
@@ -1604,9 +1509,6 @@ def ParseCommandTerm(self):
children.append(child)
if not self._Peek(): return None
if self.c_id == Id.Op_Newline:
for c in children:
if not self._MaybeReadHereDocs(c): return None
return ast.CommandList(children)
@@ -1622,8 +1524,6 @@ def ParseCommandList(self):
easier.
"""
if not self._NewlineOk(): return None
#if not self._MaybeReadHereDocsAfterNewline(node):
# return None
node = self.ParseCommandTerm()
if node is None: return None
View
@@ -1,5 +1,17 @@
#!/usr/bin/env bash
### Here string
cat <<< 'hi'
# stdout-json: "hi\n"
# N-I dash stdout-json: ""
# N-I dash status: 2
### Here string with $
cat <<< $'one\ntwo\n'
# stdout-json: "one\ntwo\n\n"
# N-I dash stdout-json: ""
# N-I dash status: 2
### Here redirect with explicit descriptor
# A space betwen 0 and <<EOF causes it to pass '0' as an arg to cat.
cat 0<<EOF
View
@@ -331,13 +331,13 @@ parse-errors() {
}
here-doc() {
# NOTE: The last two tests, 28 and 29, have different behavior on my Ubuntu
# NOTE: The last two tests, 31 and 32, have different behavior on my Ubuntu
# and Debian machines.
# - On Ubuntu, read_from_fd.py fails with Errno 9 -- bad file descriptor.
# - On Debian, the whole process hangs.
# Is this due to Python 3.2 vs 3.4? Either way osh doesn't implement the
# functionality, so it's probably best to just implement it.
sh-spec spec/here-doc.test.sh --osh-failures-allowed 2 --range 0-28 \
sh-spec spec/here-doc.test.sh --osh-failures-allowed 3 --range 0-30 \
${REF_SHELLS[@]} $OSH "$@"
}

0 comments on commit feec0de

Please sign in to comment.