Skip to content

Commit

Permalink
Working on translating here docs to Oil.
Browse files Browse the repository at this point in the history
- Test an invariant of the arena with here docs.
  - Make the here-dq.sh test pass by adding a line_span() instance for
    the ending delimiter.
  - here-sq.sh still doesn't pass.
- Statically evaluate the here doc delimiter at the end of the line,
  rather when you encounter it.  This makes it so there are fewer things
  in the HereDoc node.
- Remove HereDoc.was_filled.  Add here_end_span_id.
- here-doc spec test: Clarify that multiple leading tabs are stripped.
- Move some parse error bugs from cmd_parse_test.py to
  test/parse-errors.sh.  This revealed a bug in osh -c 'cat <<EOF',
  which is now fixed.
  • Loading branch information
Andy Chu committed Aug 28, 2018
1 parent 92051ae commit 4cdba05
Show file tree
Hide file tree
Showing 12 changed files with 178 additions and 57 deletions.
7 changes: 5 additions & 2 deletions bin/oil.py
Expand Up @@ -184,8 +184,9 @@ def OshMain(argv0, argv, login_shell):
['text', 'abbrev-text', 'html', 'abbrev-html', 'oheap', 'none'],
default='abbrev-text')
spec.LongFlag('--show-ast') # execute and show
spec.LongFlag('--fix')
spec.LongFlag('--fix') # oshc translate
spec.LongFlag('--debug-spans') # For oshc translate
spec.LongFlag('--parse-and-print-arena') # Invariant for translation
spec.LongFlag('--print-status')
spec.LongFlag('--trace', ['cmd-parse', 'word-parse', 'lexer']) # NOTE: can only trace one now
spec.LongFlag('--hijack-shebang')
Expand Down Expand Up @@ -346,9 +347,11 @@ def OshMain(argv0, argv, login_shell):

do_exec = True
if opts.fix:
#log('SPANS: %s', arena.spans)
osh2oil.PrintAsOil(arena, node, opts.debug_spans)
do_exec = False
if opts.parse_and_print_arena:
osh2oil.PrintArena(arena)
do_exec = False
if exec_opts.noexec:
do_exec = False

Expand Down
4 changes: 4 additions & 0 deletions core/alloc.py
Expand Up @@ -107,6 +107,10 @@ def GetLineSpan(self, span_id):
util.log('Span ID out of range: %d', span_id)
raise

def LastSpanId(self):
"""Return one past the last span ID."""
return len(self.spans)

def GetDebugInfo(self, line_id):
"""Get the path and physical line number, for parse errors."""
assert line_id != const.NO_INTEGER, line_id
Expand Down
57 changes: 38 additions & 19 deletions osh/cmd_parse.py
Expand Up @@ -82,14 +82,25 @@ def GetCompletionState(self):

def _MaybeReadHereDocs(self):
for h in self.pending_here_docs:
here_end_line = None
here_end_line_id = -1
lines = []

# "If any character in word is quoted, the delimiter shall be formed by
# performing quote removal on word, and the here-document lines shall not
# be expanded. Otherwise, the delimiter shall be the word itself."
# NOTE: \EOF counts, or even E\OF
ok, delimiter, quoted = word.StaticEval(h.here_begin)
if not ok:
p_die('Invalid here doc delimiter', word=h.here_begin)
do_expansion = not quoted

#log('HERE %r' % h.here_end)
while True:
# If op is <<-, strip off all leading tabs (NOT spaces).
# (in C++, just bump the start?)
line_id, line = self.line_reader.GetLine()

#print("LINE %r %r" % (line, h.here_end))
if not line: # EOF
# An unterminated here doc is just a warning in bash. We make it
# fatal because we want to be strict, and because it causes problems
Expand All @@ -100,40 +111,51 @@ def _MaybeReadHereDocs(self):

# NOTE: Could do this runtime to preserve LST.
if h.op.id == Id.Redir_DLessDash:
# NOTE: Stripping multiple leading tabs is correct!
line = line.lstrip('\t')
if line.rstrip() == h.here_end:
if line.rstrip() == delimiter:
here_end_line = line
here_end_line_id = line_id
break

lines.append((line_id, line))

parts = []
if h.do_expansion:
if do_expansion:
# NOTE: We read all lines at once, instead of doing it line-by-line,
# because of cases like this:
# cat <<EOF
# 1 $(echo 2
# echo 3) 4
# EOF

# NOTE: How to assign spids for these lines?
# VirtualLineReader needs to pick up the tokens somehow?

# self.arena.AddLineSpan() is the thing that assigns IDs.
# So here you just need to call self.AddLineSpan()!

from osh import parse_lib # Avoid circular import
w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena)

# NOTE: There can be different kinds of parse errors in here docs.
word = w_parser.ReadHereDocBody()
assert word is not None
h.body = word
h.was_filled = True
w = w_parser.ReadHereDocBody()
assert w is not None
h.body = w
else:
# Each line is a single span. TODO: Add span_id to token.
tokens = [
ast.token(Id.Lit_Chars, line, const.NO_INTEGER)
for _, line in lines]
parts = [ast.LiteralPart(t) for t in tokens]
h.body = ast.CompoundWord(parts)
h.was_filled = True

# No .clear() until Python 3.3.
del self.pending_here_docs[:]
# Create a span with the end terminator. Maintains the invariant that
# the spans "add up".
line_span = ast.line_span(here_end_line_id, 0, len(here_end_line))
unused_spid = self.arena.AddLineSpan(line_span)

del self.pending_here_docs[:] # No .clear() until Python 3.3.

return True

Expand Down Expand Up @@ -225,19 +247,10 @@ def ParseRedirect(self):
node.op = op
node.body = None # not read yet
node.fd = fd
node.was_filled = False
self._Next()

if not self._Peek(): return None
node.here_begin = self.cur_word
# "If any character in word is quoted, the delimiter shall be formed by
# performing quote removal on word, and the here-document lines shall not
# be expanded. Otherwise, the delimiter shall be the word itself."
# NOTE: \EOF counts, or even E\OF
ok, node.here_end, quoted = word.StaticEval(node.here_begin)
if not ok:
p_die('Invalid here doc delimiter', word=node.here_begin)
node.do_expansion = not quoted
self._Next()

self.pending_here_docs.append(node) # will be filled on next newline.
Expand Down Expand Up @@ -1500,4 +1513,10 @@ def ParseWholeFile(self):
assert node is not None
assert node is not False

# NOTE: This happens when there is no newline at the end of a file, like
# osh -c 'cat <<EOF'
if self.pending_here_docs:
node = self.pending_here_docs[0] # Just show the first one?
p_die('Unterminated here doc began here', word=node.here_begin)

return node
19 changes: 0 additions & 19 deletions osh/cmd_parse_test.py
Expand Up @@ -228,7 +228,6 @@ def testUnquotedHereDoc(self):
self.assertTrue(isinstance(dq, ast.DoubleQuotedPart))
# 4 literal parts: VarSub, newline, right ", "two\n"
self.assertEqual(4, len(dq.parts))
self.assertEqual(True, h.do_expansion)

def testQuotedHereDocs(self):
# Quoted here doc
Expand All @@ -241,7 +240,6 @@ def testQuotedHereDocs(self):
self.assertEqual(1, len(node.redirects))
h = node.redirects[0]
self.assertEqual(2, len(h.body.parts)) # 2 literal parts
self.assertEqual(False, h.do_expansion)

node = assertParseCommandLine(self, """\
cat <<'EOF'
Expand All @@ -251,7 +249,6 @@ def testQuotedHereDocs(self):
self.assertEqual(1, len(node.redirects))
h = node.redirects[0]
self.assertEqual(1, len(h.body.parts)) # 1 line, one literal part
self.assertEqual(False, h.do_expansion)

# \ escape
node = assertParseCommandLine(self, r"""\
Expand All @@ -262,7 +259,6 @@ def testQuotedHereDocs(self):
self.assertEqual(1, len(node.redirects))
h = node.redirects[0]
self.assertEqual(1, len(h.body.parts)) # 1 line, one literal part
self.assertEqual(False, h.do_expansion)

def testLeadingTabs(self):
node = assertParseCommandLine(self, """\
Expand Down Expand Up @@ -1200,21 +1196,6 @@ def testCommand(self):

err = _assertParseCommandListError(self, 'ls < <')

# Invalid words as here docs
err = _assertParseCommandListError(self, 'cat << $(invalid here end)')

# TODO: Arith parser doesn't have location information
err = _assertParseCommandListError(self, 'cat << $((1+2))')
err = _assertParseCommandListError(self, 'cat << a=(1 2 3)')
err = _assertParseCommandListError(self, r'cat << \a$(invalid)')

# Actually the $invalid part should be highlighted... yeah an individual
# part is the problem.
err = _assertParseCommandListError(self, r"cat << 'single'$(invalid)")
err = _assertParseCommandListError(self, r'cat << "double"$(invalid)')
err = _assertParseCommandListError(self, r'cat << ~foo/$(invalid)')
err = _assertParseCommandListError(self, r'cat << $var/$(invalid)')

# Word parse error in command parser
err = _assertParseCommandListError(self, r'echo foo$(ls <)bar')

Expand Down
8 changes: 3 additions & 5 deletions osh/osh.asdl
Expand Up @@ -135,16 +135,14 @@ module osh
-- pass could StaticEval it to a string and set do_expansion.
-- * To reprint the here doc, we need the here_end delimiter, but it doesn't
-- matter at runtime. do_expansion is calculated from it.
-- * was_filled is only used during the parse and should be eliminated from
-- serialization format.
-- TODO : id -> token for translation?

redir =
Redir(token op, int fd, word arg_word)
| HereDoc(token op, int fd,
word here_begin, -- For translation
string here_end, bool do_expansion, -- Derived from here_begin
word? body, bool was_filled)
word here_begin, -- e.g. EOF or 'EOF'
int here_end_span_id, -- this span is an entire line
word? body)

assign_op = Equal | PlusEqual
assign_pair = (lhs_expr lhs, assign_op op, word? rhs)
Expand Down
14 changes: 10 additions & 4 deletions osh/word_parse.py
Expand Up @@ -642,6 +642,7 @@ def _ReadDoubleQuotedPart(self, eof_type=Id.Undefined_Tok, here_doc=False):
left_spid = const.NO_INTEGER # gets set later
right_spid = const.NO_INTEGER # gets set later

# TODO: Use here doc.
if self.cur_token is not None: # None in here doc case
left_token = self.cur_token
left_spid = left_token.span_id
Expand Down Expand Up @@ -682,7 +683,7 @@ def _ReadDoubleQuotedPart(self, eof_type=Id.Undefined_Tok, here_doc=False):
quoted_part.parts.append(ast.LiteralPart(self.cur_token))
else:
done = True # assume Id.Right_DoubleQuote
right_spid = self.cur_token.span_id
right_spid = self.cur_token.span_id

elif self.token_kind == Kind.Eof:
if here_doc: # here docs will have an EOF in their token stream
Expand Down Expand Up @@ -1203,8 +1204,13 @@ def ReadHereDocBody(self):
CompoundWord. NOTE: We could also just use a DoubleQuotedPart for both
cases?
"""
w = ast.CompoundWord()
dq = self._ReadDoubleQuotedPart(here_doc=True)
assert dq is not None
w.parts.append(dq)
return w
return ast.CompoundWord([dq])

# TODO: _ReadDQContext(parts) should be shared between
# _ReadDoubleQuotedPart() and ReadHereDocBody()
# Call with dq_part.parts
# and here_doc_node.body


6 changes: 4 additions & 2 deletions spec/here-doc.test.sh
Expand Up @@ -295,12 +295,14 @@ EOF
cat <<-EOF
1
2
3
3 # 2 tabs are both stripped
4 # spaces are preserved
EOF
## STDOUT:
1
2
3
3 # 2 tabs are both stripped
4 # spaces are preserved
## END

#### Here doc within subshell with boolean
Expand Down
42 changes: 42 additions & 0 deletions test/arena.sh
@@ -0,0 +1,42 @@
#!/bin/bash
#
# Usage:
# ./arena.sh <function name>

set -o nounset
set -o pipefail
set -o errexit

source test/common.sh

compare() {
local path=$1

mkdir -p _tmp/arena
bin/osh --parse-and-print-arena $path > _tmp/arena/left.txt
diff -u $path _tmp/arena/left.txt
}

here-doc() {
compare test/arena/here-dq.sh
compare test/arena/here-sq.sh
}

readonly -a PASSING=(
here-doc
)

all-passing() {
run-all "${PASSING[@]}"
}

run-for-release() {
local out_dir=_tmp/arena
mkdir -p $out_dir

all-passing | tee $out_dir/log.txt

echo "Wrote $out_dir/log.txt"
}

"$@"
9 changes: 9 additions & 0 deletions test/arena/here-dq.sh
@@ -0,0 +1,9 @@
#!/bin/bash

echo "DQ"

cat <<EOF
here
doc $var
EOF
echo --
9 changes: 9 additions & 0 deletions test/arena/here-sq.sh
@@ -0,0 +1,9 @@
#!/bin/bash

echo 'SQ'

cat <<'EOF'
here
doc $var
EOF
echo --
20 changes: 20 additions & 0 deletions test/parse-errors.sh
Expand Up @@ -282,13 +282,33 @@ EOF
'
}

here-doc-delimiter() {
set +o errexit

# NOTE: This is more like the case where.
_error-case 'cat << $(invalid here end)'

# TODO: Arith parser doesn't have location information
_error-case 'cat << $((1+2))'
_error-case 'cat << a=(1 2 3)'
_error-case 'cat << \a$(invalid)'

# Actually the $invalid part should be highlighted... yeah an individual
# part is the problem.
#"cat << 'single'$(invalid)"
_error-case 'cat << "double"$(invalid)'
_error-case 'cat << ~foo/$(invalid)'
_error-case 'cat << $var/$(invalid)'
}

cases-in-strings() {
set +o errexit

cmd-parse
simple-command
redirect
here-doc
here-doc-delimiter

# Word
word-parse
Expand Down

0 comments on commit 4cdba05

Please sign in to comment.