Permalink
Browse files

Brainstorming about a rewrite of TildeSubPart.

We want to maintain the arena invariant: the spans "sum up" to the
original file.

- osh2oil: expose the fact the "synthetic" LiteralPart after the tilde
  has no span_id
- Adjust spec tests for tilde.
- Add comments.
  • Loading branch information...
Andy Chu
Andy Chu committed Aug 28, 2018
1 parent 9d37d76 commit 58f2496ebfdd341dadd8869b7ef7f00a8300d52e
Showing with 50 additions and 17 deletions.
  1. +20 −7 core/word.py
  2. +1 −0 osh/lex.py
  3. +1 −0 osh/osh.asdl
  4. +1 −5 osh/word_parse.py
  5. +11 −3 spec/brace-expansion.test.sh
  6. +5 −0 test/arena.sh
  7. +9 −0 test/arena/tilde.sh
  8. +2 −2 tools/osh2oil.py
View
@@ -250,9 +250,8 @@ def LeftMostSpanForWord(w):
return const.NO_INTEGER
# This is needed for DoWord I guess? IT makes it easier to write the fixer.
def RightMostSpanForWord(w):
# TODO: Really we should use par
"""Needed for here doc delimiters."""
if w.tag == word_e.CompoundWord:
if len(w.parts) == 0:
# TODO: Use EmptyWord instead
@@ -265,6 +264,20 @@ def RightMostSpanForWord(w):
return w.token.span_id
# From bash, general.c, unquoted_tilde_word():
# POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
# the beginning of the word, followed by all of the characters preceding the
# first unquoted slash in the word, or all the characters in the word if there
# is no slash...If none of the characters in the tilde-prefix are quoted, the
# characters in the tilde-prefix following the tilde shell be treated as a
# possible login name.
#define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':')
#
# So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
# substitutions.
#
# We only detect ~Lit_Chars and split. So we might as well just write a regex.
def TildeDetect(word):
"""Detect tilde expansion.
@@ -397,8 +410,8 @@ def LooksLikeAssignment(w):
"""Tests whether a word looks like FOO=bar.
Returns:
(string, CompoundWord) if it looks like FOO=bar
False if it doesn't
(string, op, CompoundWord) if it looks like FOO=bar
False if it doesn't
s=1
s+=1
@@ -431,9 +444,9 @@ def LooksLikeAssignment(w):
rhs = ast.CompoundWord()
if len(w.parts) == 1:
# This fake SingleQuotedPart is necesssary so that EmptyUnquoted elision
# isn't applied. EMPTY= is like EMPTY=''.
# TODO: This part doesn't have spids, so it might break some invariants.
# This is necessary so that EmptyUnquoted elision isn't applied. EMPTY= is
# like EMPTY=''.
# TODO: Change to EmptyWord
rhs.parts.append(ast.EmptyPart())
else:
for p in w.parts[1:]:
View
@@ -208,6 +208,7 @@
C('}', Id.Lit_RBrace), # Also for var sub ${a}
C(',', Id.Lit_Comma),
C('~', Id.Lit_Tilde), # For tilde expansion
# TODO: Add the rest of Lit_Chars minus / here.
R(r'[ \t\r]+', Id.WS_Space),
View
@@ -76,6 +76,7 @@ module osh
id? prefix_op, -- prefix # or ! operators
bracket_op? bracket_op
suffix_op? suffix_op)
-- This should be token tilde, token rest
| TildeSubPart(string prefix)
-- For command sub and process sub: $(...) <(...) >(...)
| CommandSubPart(command command_list, token left_token)
View
@@ -1111,11 +1111,7 @@ def _ReadWord(self, lex_mode):
Kind.VSub, Kind.Lit, Kind.Left, Kind.KW, Kind.Assign, Kind.ControlFlow,
Kind.BoolUnary, Kind.BoolBinary, Kind.ExtGlob):
# We're beginning a word. If we see Id.Lit_Pound, change to
# lex_mode_e.COMMENT and read until end of line. (TODO: How to add
# comments to AST?)
# TODO: Can we do the same thing for Tilde here? Enter a state where we
# look for / too.
# lex_mode_e.COMMENT and read until end of line.
if self.token_type == Id.Lit_Pound:
self._Next(lex_mode_e.COMMENT)
self._Peek()
@@ -149,20 +149,25 @@ HOME=/home/foo
echo ~
HOME=/home/bar
echo ~
## stdout-json: "/home/foo\n/home/bar\n"
## STDOUT:
/home/foo
/home/bar
## END
#### Tilde expansion with brace expansion
# NOTE: osh matches mksh. Is that OK?
# The brace expansion happens FIRST. After that, the second token has tilde
# FIRST, so it gets expanded. The first token has an unexpanded tilde, because
# it's not in the leading position.
# NOTE: mksh gives different behavior! So it probably doesn't matter that
# much...
# much
HOME=/home/bob
echo {foo~,~}/bar
## stdout: foo~/bar /home/bob/bar
## OK mksh stdout: foo~/bar ~/bar
#### Two kinds of tilde expansion
# NOTE: osh matches mksh. Is that OK?
# ~/foo and ~bar
HOME=/home/bob
echo ~{/src,root}
@@ -177,7 +182,10 @@ foo='~'
echo $foo
# In the second instance, we expand into a literal ~, and since var expansion
# comes after tilde expansion, it is NOT tried again.
## stdout-json: "/home/bob\n~\n"
## STDOUT:
/home/bob
~
## END
#### Number range expansion
echo -{1..8..3}-
View
@@ -23,8 +23,13 @@ here-doc() {
compare test/arena/here-multiple.sh
}
tilde() {
compare test/arena/tilde.sh
}
readonly -a PASSING=(
here-doc
tilde
)
all-passing() {
View
@@ -0,0 +1,9 @@
#!/bin/bash
echo ~/src
echo ~andy/git
bin=~andy/bin
echo not~an~expansion~
echo
echo ~{/src,root}
echo {foo~,~}/bar
View
@@ -1053,10 +1053,10 @@ def DoWordPart(self, node, local_symbols, quoted=False):
# TODO: We might want to do it all on the word level though. For
# example, foo"bar" becomes "foobar" in oil.
spid = node.token.span_id
if spid is None:
if spid == const.NO_INTEGER:
#raise RuntimeError('%s has no span_id' % node.token)
# TODO: Fix word.TildeDetect to construct proper tokens.
print('WARNING:%s has no span_id' % node.token, file=sys.stderr)
log('WARNING: %s has no span_id' % node.token)
else:
self.cursor.PrintUntil(spid + 1)

0 comments on commit 58f2496

Please sign in to comment.