diff --git a/bin/oil.py b/bin/oil.py index 341c147bf2..9cf9103efe 100755 --- a/bin/oil.py +++ b/bin/oil.py @@ -184,8 +184,9 @@ def OshMain(argv0, argv, login_shell): ['text', 'abbrev-text', 'html', 'abbrev-html', 'oheap', 'none'], default='abbrev-text') spec.LongFlag('--show-ast') # execute and show - spec.LongFlag('--fix') + spec.LongFlag('--fix') # oshc translate spec.LongFlag('--debug-spans') # For oshc translate + spec.LongFlag('--parse-and-print-arena') # Invariant for translation spec.LongFlag('--print-status') spec.LongFlag('--trace', ['cmd-parse', 'word-parse', 'lexer']) # NOTE: can only trace one now spec.LongFlag('--hijack-shebang') @@ -346,9 +347,11 @@ def OshMain(argv0, argv, login_shell): do_exec = True if opts.fix: - #log('SPANS: %s', arena.spans) osh2oil.PrintAsOil(arena, node, opts.debug_spans) do_exec = False + if opts.parse_and_print_arena: + osh2oil.PrintArena(arena) + do_exec = False if exec_opts.noexec: do_exec = False diff --git a/core/alloc.py b/core/alloc.py index a636b5f87f..bebc96111f 100644 --- a/core/alloc.py +++ b/core/alloc.py @@ -107,6 +107,10 @@ def GetLineSpan(self, span_id): util.log('Span ID out of range: %d', span_id) raise + def LastSpanId(self): + """Return one past the last span ID.""" + return len(self.spans) + def GetDebugInfo(self, line_id): """Get the path and physical line number, for parse errors.""" assert line_id != const.NO_INTEGER, line_id diff --git a/osh/cmd_parse.py b/osh/cmd_parse.py index b01fccbb46..471434d81b 100644 --- a/osh/cmd_parse.py +++ b/osh/cmd_parse.py @@ -82,14 +82,25 @@ def GetCompletionState(self): def _MaybeReadHereDocs(self): for h in self.pending_here_docs: + here_end_line = None + here_end_line_id = -1 lines = [] + + # "If any character in word is quoted, the delimiter shall be formed by + # performing quote removal on word, and the here-document lines shall not + # be expanded. Otherwise, the delimiter shall be the word itself." + # NOTE: \EOF counts, or even E\OF + ok, delimiter, quoted = word.StaticEval(h.here_begin) + if not ok: + p_die('Invalid here doc delimiter', word=h.here_begin) + do_expansion = not quoted + #log('HERE %r' % h.here_end) while True: # If op is <<-, strip off all leading tabs (NOT spaces). # (in C++, just bump the start?) line_id, line = self.line_reader.GetLine() - #print("LINE %r %r" % (line, h.here_end)) if not line: # EOF # An unterminated here doc is just a warning in bash. We make it # fatal because we want to be strict, and because it causes problems @@ -100,14 +111,17 @@ def _MaybeReadHereDocs(self): # NOTE: Could do this runtime to preserve LST. if h.op.id == Id.Redir_DLessDash: + # NOTE: Stripping multiple leading tabs is correct! line = line.lstrip('\t') - if line.rstrip() == h.here_end: + if line.rstrip() == delimiter: + here_end_line = line + here_end_line_id = line_id break lines.append((line_id, line)) parts = [] - if h.do_expansion: + if do_expansion: # NOTE: We read all lines at once, instead of doing it line-by-line, # because of cases like this: # cat < token for translation? redir = Redir(token op, int fd, word arg_word) | HereDoc(token op, int fd, - word here_begin, -- For translation - string here_end, bool do_expansion, -- Derived from here_begin - word? body, bool was_filled) + word here_begin, -- e.g. EOF or 'EOF' + int here_end_span_id, -- this span is an entire line + word? body) assign_op = Equal | PlusEqual assign_pair = (lhs_expr lhs, assign_op op, word? rhs) diff --git a/osh/word_parse.py b/osh/word_parse.py index 280c7aee64..d5a0ef0065 100644 --- a/osh/word_parse.py +++ b/osh/word_parse.py @@ -642,6 +642,7 @@ def _ReadDoubleQuotedPart(self, eof_type=Id.Undefined_Tok, here_doc=False): left_spid = const.NO_INTEGER # gets set later right_spid = const.NO_INTEGER # gets set later + # TODO: Use here doc. if self.cur_token is not None: # None in here doc case left_token = self.cur_token left_spid = left_token.span_id @@ -682,7 +683,7 @@ def _ReadDoubleQuotedPart(self, eof_type=Id.Undefined_Tok, here_doc=False): quoted_part.parts.append(ast.LiteralPart(self.cur_token)) else: done = True # assume Id.Right_DoubleQuote - right_spid = self.cur_token.span_id + right_spid = self.cur_token.span_id elif self.token_kind == Kind.Eof: if here_doc: # here docs will have an EOF in their token stream @@ -1203,8 +1204,13 @@ def ReadHereDocBody(self): CompoundWord. NOTE: We could also just use a DoubleQuotedPart for both cases? """ - w = ast.CompoundWord() dq = self._ReadDoubleQuotedPart(here_doc=True) assert dq is not None - w.parts.append(dq) - return w + return ast.CompoundWord([dq]) + + # TODO: _ReadDQContext(parts) should be shared between + # _ReadDoubleQuotedPart() and ReadHereDocBody() + # Call with dq_part.parts + # and here_doc_node.body + + diff --git a/spec/here-doc.test.sh b/spec/here-doc.test.sh index 1a67dc823f..739cdd596b 100644 --- a/spec/here-doc.test.sh +++ b/spec/here-doc.test.sh @@ -295,12 +295,14 @@ EOF cat <<-EOF 1 2 - 3 + 3 # 2 tabs are both stripped + 4 # spaces are preserved EOF ## STDOUT: 1 2 - 3 +3 # 2 tabs are both stripped + 4 # spaces are preserved ## END #### Here doc within subshell with boolean diff --git a/test/arena.sh b/test/arena.sh new file mode 100755 index 0000000000..f10bd358ee --- /dev/null +++ b/test/arena.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# +# Usage: +# ./arena.sh + +set -o nounset +set -o pipefail +set -o errexit + +source test/common.sh + +compare() { + local path=$1 + + mkdir -p _tmp/arena + bin/osh --parse-and-print-arena $path > _tmp/arena/left.txt + diff -u $path _tmp/arena/left.txt +} + +here-doc() { + compare test/arena/here-dq.sh + compare test/arena/here-sq.sh +} + +readonly -a PASSING=( + here-doc +) + +all-passing() { + run-all "${PASSING[@]}" +} + +run-for-release() { + local out_dir=_tmp/arena + mkdir -p $out_dir + + all-passing | tee $out_dir/log.txt + + echo "Wrote $out_dir/log.txt" +} + +"$@" diff --git a/test/arena/here-dq.sh b/test/arena/here-dq.sh new file mode 100755 index 0000000000..2a0766780a --- /dev/null +++ b/test/arena/here-dq.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +echo "DQ" + +cat <