Skip to content

Commit

Permalink
Brainstorming about a rewrite of TildeSubPart.
Browse files Browse the repository at this point in the history
We want to maintain the arena invariant: the spans "sum up" to the
original file.

- osh2oil: expose the fact the "synthetic" LiteralPart after the tilde
  has no span_id
- Adjust spec tests for tilde.
- Add comments.
  • Loading branch information
Andy Chu committed Aug 28, 2018
1 parent 9d37d76 commit 58f2496
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 17 deletions.
27 changes: 20 additions & 7 deletions core/word.py
Expand Up @@ -250,9 +250,8 @@ def LeftMostSpanForWord(w):
return const.NO_INTEGER


# This is needed for DoWord I guess? IT makes it easier to write the fixer.
def RightMostSpanForWord(w):
# TODO: Really we should use par
"""Needed for here doc delimiters."""
if w.tag == word_e.CompoundWord:
if len(w.parts) == 0:
# TODO: Use EmptyWord instead
Expand All @@ -265,6 +264,20 @@ def RightMostSpanForWord(w):
return w.token.span_id


# From bash, general.c, unquoted_tilde_word():
# POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
# the beginning of the word, followed by all of the characters preceding the
# first unquoted slash in the word, or all the characters in the word if there
# is no slash...If none of the characters in the tilde-prefix are quoted, the
# characters in the tilde-prefix following the tilde shell be treated as a
# possible login name.
#define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':')
#
# So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
# substitutions.
#
# We only detect ~Lit_Chars and split. So we might as well just write a regex.

def TildeDetect(word):
"""Detect tilde expansion.
Expand Down Expand Up @@ -397,8 +410,8 @@ def LooksLikeAssignment(w):
"""Tests whether a word looks like FOO=bar.
Returns:
(string, CompoundWord) if it looks like FOO=bar
False if it doesn't
(string, op, CompoundWord) if it looks like FOO=bar
False if it doesn't
s=1
s+=1
Expand Down Expand Up @@ -431,9 +444,9 @@ def LooksLikeAssignment(w):

rhs = ast.CompoundWord()
if len(w.parts) == 1:
# This fake SingleQuotedPart is necesssary so that EmptyUnquoted elision
# isn't applied. EMPTY= is like EMPTY=''.
# TODO: This part doesn't have spids, so it might break some invariants.
# This is necessary so that EmptyUnquoted elision isn't applied. EMPTY= is
# like EMPTY=''.
# TODO: Change to EmptyWord
rhs.parts.append(ast.EmptyPart())
else:
for p in w.parts[1:]:
Expand Down
1 change: 1 addition & 0 deletions osh/lex.py
Expand Up @@ -208,6 +208,7 @@
C('}', Id.Lit_RBrace), # Also for var sub ${a}
C(',', Id.Lit_Comma),
C('~', Id.Lit_Tilde), # For tilde expansion
# TODO: Add the rest of Lit_Chars minus / here.

R(r'[ \t\r]+', Id.WS_Space),

Expand Down
1 change: 1 addition & 0 deletions osh/osh.asdl
Expand Up @@ -76,6 +76,7 @@ module osh
id? prefix_op, -- prefix # or ! operators
bracket_op? bracket_op
suffix_op? suffix_op)
-- This should be token tilde, token rest
| TildeSubPart(string prefix)
-- For command sub and process sub: $(...) <(...) >(...)
| CommandSubPart(command command_list, token left_token)
Expand Down
6 changes: 1 addition & 5 deletions osh/word_parse.py
Expand Up @@ -1111,11 +1111,7 @@ def _ReadWord(self, lex_mode):
Kind.VSub, Kind.Lit, Kind.Left, Kind.KW, Kind.Assign, Kind.ControlFlow,
Kind.BoolUnary, Kind.BoolBinary, Kind.ExtGlob):
# We're beginning a word. If we see Id.Lit_Pound, change to
# lex_mode_e.COMMENT and read until end of line. (TODO: How to add
# comments to AST?)

# TODO: Can we do the same thing for Tilde here? Enter a state where we
# look for / too.
# lex_mode_e.COMMENT and read until end of line.
if self.token_type == Id.Lit_Pound:
self._Next(lex_mode_e.COMMENT)
self._Peek()
Expand Down
14 changes: 11 additions & 3 deletions spec/brace-expansion.test.sh
Expand Up @@ -149,20 +149,25 @@ HOME=/home/foo
echo ~
HOME=/home/bar
echo ~
## stdout-json: "/home/foo\n/home/bar\n"
## STDOUT:
/home/foo
/home/bar
## END

#### Tilde expansion with brace expansion
# NOTE: osh matches mksh. Is that OK?
# The brace expansion happens FIRST. After that, the second token has tilde
# FIRST, so it gets expanded. The first token has an unexpanded tilde, because
# it's not in the leading position.
# NOTE: mksh gives different behavior! So it probably doesn't matter that
# much...
# much
HOME=/home/bob
echo {foo~,~}/bar
## stdout: foo~/bar /home/bob/bar
## OK mksh stdout: foo~/bar ~/bar

#### Two kinds of tilde expansion
# NOTE: osh matches mksh. Is that OK?
# ~/foo and ~bar
HOME=/home/bob
echo ~{/src,root}
Expand All @@ -177,7 +182,10 @@ foo='~'
echo $foo
# In the second instance, we expand into a literal ~, and since var expansion
# comes after tilde expansion, it is NOT tried again.
## stdout-json: "/home/bob\n~\n"
## STDOUT:
/home/bob
~
## END

#### Number range expansion
echo -{1..8..3}-
Expand Down
5 changes: 5 additions & 0 deletions test/arena.sh
Expand Up @@ -23,8 +23,13 @@ here-doc() {
compare test/arena/here-multiple.sh
}

tilde() {
compare test/arena/tilde.sh
}

readonly -a PASSING=(
here-doc
tilde
)

all-passing() {
Expand Down
9 changes: 9 additions & 0 deletions test/arena/tilde.sh
@@ -0,0 +1,9 @@
#!/bin/bash

echo ~/src
echo ~andy/git
bin=~andy/bin
echo not~an~expansion~
echo
echo ~{/src,root}
echo {foo~,~}/bar
4 changes: 2 additions & 2 deletions tools/osh2oil.py
Expand Up @@ -1053,10 +1053,10 @@ def DoWordPart(self, node, local_symbols, quoted=False):
# TODO: We might want to do it all on the word level though. For
# example, foo"bar" becomes "foobar" in oil.
spid = node.token.span_id
if spid is None:
if spid == const.NO_INTEGER:
#raise RuntimeError('%s has no span_id' % node.token)
# TODO: Fix word.TildeDetect to construct proper tokens.
print('WARNING:%s has no span_id' % node.token, file=sys.stderr)
log('WARNING: %s has no span_id' % node.token)
else:
self.cursor.PrintUntil(spid + 1)

Expand Down

0 comments on commit 58f2496

Please sign in to comment.