Permalink
Browse files

[osh2oil] Translate here docs to Oil.

- Now using to word_part* stdin_parts rather than CompoundWord.
- osh/word_parse.py: Factor out _ReadLikeDQ()
- Got a couple here-doc cases passing in test/osh2oil.sh
  - Implemented word.RightMostSpanForWord() to aid the translation
- Add a more complicated test for the LST invariant in test/arena/

Other cleanups:

- Remove unused 'decay' param.
- core/cmd_exec.py: Remove error checks that never happen.
  • Loading branch information...
Andy Chu
Andy Chu committed Aug 28, 2018
1 parent efbf977 commit 9d37d76e81a263d19df8fe68e62503eebc3bd364
Showing with 165 additions and 160 deletions.
  1. +9 −12 core/cmd_exec.py
  2. +5 −13 core/word.py
  3. +22 −14 core/word_eval.py
  4. +17 −27 osh/cmd_parse.py
  5. +6 −13 osh/cmd_parse_test.py
  6. +2 −1 osh/osh.asdl
  7. +42 −57 osh/word_parse.py
  8. +1 −0 test/arena.sh
  9. +12 −0 test/arena/here-multiple.sh
  10. +12 −12 test/osh2oil.sh
  11. +19 −0 test/runtime-errors.sh
  12. +18 −11 tools/osh2oil.py
View
@@ -480,22 +480,18 @@ def _EvalRedirect(self, n):
return runtime.DescRedirect(n.op.id, fd, target_fd)
elif redir_type == redir_arg_type_e.Here: # here word
# TODO: decay should be controlled by an option
val = self.word_ev.EvalWordToString(n.arg_word, decay=True)
if val.tag != value_e.Str: # TODO: This error never fires
util.warn("Here word body should be a string, got %s", val)
return None
val = self.word_ev.EvalWordToString(n.arg_word)
assert val.tag == value_e.Str, val
# NOTE: bash and mksh both add \n
return runtime.HereRedirect(fd, val.s + '\n')
else:
raise AssertionError('Unknown redirect op')
elif n.tag == redir_e.HereDoc:
# TODO: decay shoudl be controlled by an option
val = self.word_ev.EvalWordToString(n.body, decay=True)
if val.tag != value_e.Str: # TODO: This error never fires
util.warn("Here doc body should be a string, got %s", val)
return None
# HACK: Wrap it in a word to evaluate.
w = ast.CompoundWord(n.stdin_parts)
val = self.word_ev.EvalWordToString(w)
assert val.tag == value_e.Str, val
return runtime.HereRedirect(fd, val.s)
else:
@@ -1211,7 +1207,7 @@ def ExecuteAndCatch(self, node, fork_external=True):
raise # Invalid
except util.FatalRuntimeError as e:
ui.PrettyPrintError(e, self.arena)
print('osh failed: %s' % e.UserErrorString(), file=sys.stderr)
#print('osh failed: %s' % e.UserErrorString(), file=sys.stderr)
status = e.exit_status if e.exit_status is not None else 1
# TODO: dump self.mem if requested. Maybe speify with OIL_DUMP_PREFIX.
@@ -1490,8 +1486,9 @@ def _EvalPS4(self):
# allowed too. The OUTER mode would stop at spaces, and ReadWord
# doesn't allow lex_mode_e.DQ.
ok = True
ps4_word = ast.CompoundWord()
try:
ps4_word = w_parser.ReadHereDocBody()
w_parser.ReadHereDocBody(ps4_word.parts)
except util.ParseError as e:
ok = False
else:
View
@@ -179,17 +179,10 @@ def _RightMostSpanForPart(part):
return part.token.span_id
elif part.tag == word_part_e.SingleQuotedPart:
if part.tokens:
return part.tokens[-1].span_id
else:
return const.NO_INTEGER
return part.spids[1] # right '
elif part.tag == word_part_e.DoubleQuotedPart:
if part.parts:
return LeftMostSpanForPart(part.parts[-1])
else:
# We need the double quote location
return const.NO_INTEGER
return part.spids[1] # right "
elif part.tag == word_part_e.SimpleVarSub:
return part.token.span_id
@@ -258,13 +251,12 @@ def LeftMostSpanForWord(w):
# This is needed for DoWord I guess? IT makes it easier to write the fixer.
def UNUSED_RightMostSpanForWord(w):
def RightMostSpanForWord(w):
# TODO: Really we should use par
if w.tag == word_e.CompoundWord:
if len(w.parts) == 0:
return const.NO_INTEGER
elif len(w.parts) == 1:
return _RightMostSpanForPart(w.parts[0])
# TODO: Use EmptyWord instead
raise AssertionError("CompoundWord shouldn't be empty")
else:
end = w.parts[-1]
return _RightMostSpanForPart(end)
View
@@ -46,7 +46,7 @@ def _BackslashEscape(s):
def _ValueToPartValue(val, quoted):
"""Helper for VarSub evaluation.
Called by _EvalBracedVarSub and __EvalWordPart for SimpleVarSub.
Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
"""
assert isinstance(val, runtime.value), val
@@ -454,9 +454,9 @@ def _EvalBracedVarSub(self, part, part_vals, quoted):
val, maybe_decay_array = self._EvalSpecialVar(part.token.id, quoted)
# 2. Bracket: value -> (value v, bool maybe_decay_array)
# maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER suffix ops
# are applied. If we take the length with a prefix op, the distinction is
# ignored.
# maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER
# suffix ops are applied. If we take the length with a prefix op, the
# distinction is ignored.
if part.bracket_op:
if part.bracket_op.tag == bracket_op_e.WholeArray:
op_id = part.bracket_op.op_id
@@ -469,6 +469,7 @@ def _EvalBracedVarSub(self, part, part_vals, quoted):
elif val.tag == value_e.Str:
e_die("Can't index string with @: %r", val, part=part)
elif val.tag == value_e.StrArray:
# TODO: Is this a no-op? Just leave 'val' alone.
val = runtime.StrArray(val.strs)
elif op_id == Id.Arith_Star:
@@ -478,7 +479,8 @@ def _EvalBracedVarSub(self, part, part_vals, quoted):
elif val.tag == value_e.Str:
e_die("Can't index string with *: %r", val, part=part)
elif val.tag == value_e.StrArray:
# Always maybe_decay_array with ${a[*]} or "${a[*]}"
# TODO: Is this a no-op? Just leave 'val' alone.
# ${a[*]} or "${a[*]}" : maybe_decay_array is always true
val = runtime.StrArray(val.strs)
else:
@@ -777,27 +779,27 @@ def _EvalWordToParts(self, word, quoted, part_vals):
for p in word.parts:
self._EvalWordPart(p, part_vals, quoted=quoted)
def EvalWordToString(self, word, do_fnmatch=False, decay=False):
def EvalWordToString(self, word, do_fnmatch=False):
"""
Args:
word: CompoundWord
Used for redirect arg, ControlFlow arg, ArithWord, BoolWord, etc.
do_fnmatch is true for case $pat and RHS of [[ == ]].
pat="*.py"
case $x in
$pat) echo 'matches glob pattern' ;;
"$pat") echo 'equal to glob string' ;; // must be glob escaped
"$pat") echo 'equal to glob string' ;; # must be glob escaped
esac
"""
part_vals = []
for part in word.parts:
self._EvalWordPart(part, part_vals, quoted=False)
for p in word.parts:
self._EvalWordPart(p, part_vals, quoted=False)
strs = []
for part_val in part_vals:
# TODO: if decay, then allow string part. e.g. for here word or here
# doc with "$@".
if part_val.tag == part_value_e.StringPartValue:
# [[ foo == */"*".py ]] or case *.py) ... esac
if do_fnmatch and not part_val.do_split_glob:
@@ -807,11 +809,17 @@ def EvalWordToString(self, word, do_fnmatch=False, decay=False):
else:
if self.exec_opts.strict_array:
# Examples: echo f > "$@"; local foo="$@"
e_die("Expected string, got %s", part_val, word=word)
# TODO: This attributes too coarsely, to the word rather than the
# parts. Problem: the word is a TREE of parts, but we only have a
# flat list of part_vals. The only case where we really get arrays
# is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
e_die("This word should evaluate to a string, but part of it was an "
"array", word=word)
# TODO: Maybe add detail like this.
#e_die('RHS of assignment should only have strings. '
# 'To assign arrays, using b=( "${a[@]}" )')
# 'To assign arrays, use b=( "${a[@]}" )')
else:
# It appears to not respect IFS
s = ' '.join(s for s in part_val.strs if s is not None)
View
@@ -92,11 +92,16 @@ def _MaybeReadHereDocs(self):
# performing quote removal on word, and the here-document lines shall not
# be expanded. Otherwise, the delimiter shall be the word itself."
# NOTE: \EOF counts, or even E\OF
ok, delimiter, quoted = word.StaticEval(h.here_begin)
ok, delimiter, delim_quoted = word.StaticEval(h.here_begin)
if not ok:
p_die('Invalid here doc delimiter', word=h.here_begin)
do_expansion = not quoted
# NOTE: We read all lines at once, instead of parsing line-by-line,
# because of cases like this:
# cat <<EOF
# 1 $(echo 2
# echo 3) 4
# EOF
while True:
line_id, line = self.line_reader.GetLine()
@@ -120,28 +125,7 @@ def _MaybeReadHereDocs(self):
lines.append((line_id, line))
parts = []
if do_expansion:
# NOTE: We read all lines at once, instead of doing it line-by-line,
# because of cases like this:
# cat <<EOF
# 1 $(echo 2
# echo 3) 4
# EOF
# NOTE: How to assign spids for these lines?
# VirtualLineReader needs to pick up the tokens somehow?
# self.arena.AddLineSpan() is the thing that assigns IDs.
# So here you just need to call self.AddLineSpan()!
from osh import parse_lib # Avoid circular import
w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena)
# NOTE: There can be different kinds of parse errors in here docs.
w = w_parser.ReadHereDocBody()
assert w is not None
h.body = w
else:
if delim_quoted: # << 'EOF'
# Create a line_span and a token for each line.
tokens = []
for line_id, line in lines:
@@ -150,12 +134,19 @@ def _MaybeReadHereDocs(self):
t = ast.token(Id.Lit_Chars, line, span_id)
tokens.append(t)
# LiteralPart for each line.
h.body = ast.CompoundWord([ast.LiteralPart(t) for t in tokens])
h.stdin_parts = [ast.LiteralPart(t) for t in tokens]
else:
from osh import parse_lib # Avoid circular import
w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena)
# NOTE: There can be different kinds of parse errors in here docs.
w = w_parser.ReadHereDocBody(h.stdin_parts)
# Create a span with the end terminator. Maintains the invariant that
# the spans "add up".
line_span = ast.line_span(here_end_line_id, 0, len(here_end_line))
unused_spid = self.arena.AddLineSpan(line_span)
h.here_end_span_id = self.arena.AddLineSpan(line_span)
del self.pending_here_docs[:] # No .clear() until Python 3.3.
@@ -247,7 +238,6 @@ def ParseRedirect(self):
if op.id in (Id.Redir_DLess, Id.Redir_DLessDash): # here doc
node = ast.HereDoc()
node.op = op
node.body = None # not read yet
node.fd = fd
self._Next()
View
@@ -200,14 +200,10 @@ def testParseAdjacentDoubleQuotedWords(self):
def assertHereDocToken(test, expected_token_val, node):
#print(node)
"""A sanity check for some ad hoc tests."""
test.assertEqual(1, len(node.redirects))
h = node.redirects[0]
word_parts = h.body.parts
test.assertEqual(1, len(word_parts)) # 1 line, one literal part
part1 = word_parts[0]
test.assertGreater(len(part1.parts), 1, part1)
test.assertEqual(expected_token_val, part1.parts[0].token.val)
test.assertEqual(expected_token_val, h.stdin_parts[0].token.val)
class HereDocTest(unittest.TestCase):
@@ -223,11 +219,8 @@ def testUnquotedHereDoc(self):
""")
self.assertEqual(1, len(node.redirects))
h = node.redirects[0]
self.assertEqual(1, len(h.body.parts)) # 1 double quoted part
dq = h.body.parts[0]
self.assertTrue(isinstance(dq, ast.DoubleQuotedPart))
# 4 literal parts: VarSub, newline, right ", "two\n"
self.assertEqual(4, len(dq.parts))
self.assertEqual(4, len(h.stdin_parts))
def testQuotedHereDocs(self):
# Quoted here doc
@@ -239,7 +232,7 @@ def testQuotedHereDocs(self):
""")
self.assertEqual(1, len(node.redirects))
h = node.redirects[0]
self.assertEqual(2, len(h.body.parts)) # 2 literal parts
self.assertEqual(2, len(h.stdin_parts)) # 2 literal parts
node = assertParseCommandLine(self, """\
cat <<'EOF'
@@ -248,7 +241,7 @@ def testQuotedHereDocs(self):
""")
self.assertEqual(1, len(node.redirects))
h = node.redirects[0]
self.assertEqual(1, len(h.body.parts)) # 1 line, one literal part
self.assertEqual(1, len(h.stdin_parts)) # 1 line, one literal part
# \ escape
node = assertParseCommandLine(self, r"""\
@@ -258,7 +251,7 @@ def testQuotedHereDocs(self):
""")
self.assertEqual(1, len(node.redirects))
h = node.redirects[0]
self.assertEqual(1, len(h.body.parts)) # 1 line, one literal part
self.assertEqual(1, len(h.stdin_parts)) # 1 line, one literal part
def testLeadingTabs(self):
node = assertParseCommandLine(self, """\
View
@@ -142,7 +142,8 @@ module osh
| HereDoc(token op, int fd,
word here_begin, -- e.g. EOF or 'EOF'
int here_end_span_id, -- this span is an entire line
word? body)
word_part* stdin_parts -- one for each line
)
assign_op = Equal | PlusEqual
assign_pair = (lhs_expr lhs, assign_op op, word? rhs)
Oops, something went wrong.

0 comments on commit 9d37d76

Please sign in to comment.