diff --git a/core/word.py b/core/word.py index 8bd205b196..9c7f692052 100644 --- a/core/word.py +++ b/core/word.py @@ -250,9 +250,8 @@ def LeftMostSpanForWord(w): return const.NO_INTEGER -# This is needed for DoWord I guess? IT makes it easier to write the fixer. def RightMostSpanForWord(w): - # TODO: Really we should use par + """Needed for here doc delimiters.""" if w.tag == word_e.CompoundWord: if len(w.parts) == 0: # TODO: Use EmptyWord instead @@ -265,6 +264,20 @@ def RightMostSpanForWord(w): return w.token.span_id +# From bash, general.c, unquoted_tilde_word(): +# POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at +# the beginning of the word, followed by all of the characters preceding the +# first unquoted slash in the word, or all the characters in the word if there +# is no slash...If none of the characters in the tilde-prefix are quoted, the +# characters in the tilde-prefix following the tilde shell be treated as a +# possible login name. +#define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':') +# +# So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and +# substitutions. +# +# We only detect ~Lit_Chars and split. So we might as well just write a regex. + def TildeDetect(word): """Detect tilde expansion. @@ -397,8 +410,8 @@ def LooksLikeAssignment(w): """Tests whether a word looks like FOO=bar. Returns: - (string, CompoundWord) if it looks like FOO=bar - False if it doesn't + (string, op, CompoundWord) if it looks like FOO=bar + False if it doesn't s=1 s+=1 @@ -431,9 +444,9 @@ def LooksLikeAssignment(w): rhs = ast.CompoundWord() if len(w.parts) == 1: - # This fake SingleQuotedPart is necesssary so that EmptyUnquoted elision - # isn't applied. EMPTY= is like EMPTY=''. - # TODO: This part doesn't have spids, so it might break some invariants. + # This is necessary so that EmptyUnquoted elision isn't applied. EMPTY= is + # like EMPTY=''. + # TODO: Change to EmptyWord rhs.parts.append(ast.EmptyPart()) else: for p in w.parts[1:]: diff --git a/osh/lex.py b/osh/lex.py index 2646e5cd30..33a02a9157 100644 --- a/osh/lex.py +++ b/osh/lex.py @@ -208,6 +208,7 @@ C('}', Id.Lit_RBrace), # Also for var sub ${a} C(',', Id.Lit_Comma), C('~', Id.Lit_Tilde), # For tilde expansion + # TODO: Add the rest of Lit_Chars minus / here. R(r'[ \t\r]+', Id.WS_Space), diff --git a/osh/osh.asdl b/osh/osh.asdl index daffb41687..b0b675cf4a 100644 --- a/osh/osh.asdl +++ b/osh/osh.asdl @@ -76,6 +76,7 @@ module osh id? prefix_op, -- prefix # or ! operators bracket_op? bracket_op suffix_op? suffix_op) + -- This should be token tilde, token rest | TildeSubPart(string prefix) -- For command sub and process sub: $(...) <(...) >(...) | CommandSubPart(command command_list, token left_token) diff --git a/osh/word_parse.py b/osh/word_parse.py index 3888ded772..828d6c679b 100644 --- a/osh/word_parse.py +++ b/osh/word_parse.py @@ -1111,11 +1111,7 @@ def _ReadWord(self, lex_mode): Kind.VSub, Kind.Lit, Kind.Left, Kind.KW, Kind.Assign, Kind.ControlFlow, Kind.BoolUnary, Kind.BoolBinary, Kind.ExtGlob): # We're beginning a word. If we see Id.Lit_Pound, change to - # lex_mode_e.COMMENT and read until end of line. (TODO: How to add - # comments to AST?) - - # TODO: Can we do the same thing for Tilde here? Enter a state where we - # look for / too. + # lex_mode_e.COMMENT and read until end of line. if self.token_type == Id.Lit_Pound: self._Next(lex_mode_e.COMMENT) self._Peek() diff --git a/spec/brace-expansion.test.sh b/spec/brace-expansion.test.sh index a0451cb0eb..4bdbcc14a2 100644 --- a/spec/brace-expansion.test.sh +++ b/spec/brace-expansion.test.sh @@ -149,20 +149,25 @@ HOME=/home/foo echo ~ HOME=/home/bar echo ~ -## stdout-json: "/home/foo\n/home/bar\n" +## STDOUT: +/home/foo +/home/bar +## END #### Tilde expansion with brace expansion +# NOTE: osh matches mksh. Is that OK? # The brace expansion happens FIRST. After that, the second token has tilde # FIRST, so it gets expanded. The first token has an unexpanded tilde, because # it's not in the leading position. # NOTE: mksh gives different behavior! So it probably doesn't matter that -# much... +# much HOME=/home/bob echo {foo~,~}/bar ## stdout: foo~/bar /home/bob/bar ## OK mksh stdout: foo~/bar ~/bar #### Two kinds of tilde expansion +# NOTE: osh matches mksh. Is that OK? # ~/foo and ~bar HOME=/home/bob echo ~{/src,root} @@ -177,7 +182,10 @@ foo='~' echo $foo # In the second instance, we expand into a literal ~, and since var expansion # comes after tilde expansion, it is NOT tried again. -## stdout-json: "/home/bob\n~\n" +## STDOUT: +/home/bob +~ +## END #### Number range expansion echo -{1..8..3}- diff --git a/test/arena.sh b/test/arena.sh index 9952bd6dbe..a868421621 100755 --- a/test/arena.sh +++ b/test/arena.sh @@ -23,8 +23,13 @@ here-doc() { compare test/arena/here-multiple.sh } +tilde() { + compare test/arena/tilde.sh +} + readonly -a PASSING=( here-doc + tilde ) all-passing() { diff --git a/test/arena/tilde.sh b/test/arena/tilde.sh new file mode 100755 index 0000000000..a683146891 --- /dev/null +++ b/test/arena/tilde.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +echo ~/src +echo ~andy/git +bin=~andy/bin +echo not~an~expansion~ +echo +echo ~{/src,root} +echo {foo~,~}/bar diff --git a/tools/osh2oil.py b/tools/osh2oil.py index 5eca66c57b..cac04727bb 100644 --- a/tools/osh2oil.py +++ b/tools/osh2oil.py @@ -1053,10 +1053,10 @@ def DoWordPart(self, node, local_symbols, quoted=False): # TODO: We might want to do it all on the word level though. For # example, foo"bar" becomes "foobar" in oil. spid = node.token.span_id - if spid is None: + if spid == const.NO_INTEGER: #raise RuntimeError('%s has no span_id' % node.token) # TODO: Fix word.TildeDetect to construct proper tokens. - print('WARNING:%s has no span_id' % node.token, file=sys.stderr) + log('WARNING: %s has no span_id' % node.token) else: self.cursor.PrintUntil(spid + 1)