Permalink
Browse files

Integrate brace detection and expansion.

Brace detection changes a CompoundWord node to a BracedWordTree node.
Brace expansion expands it using the "Cartesian product" algorithm back
into an *array* of CompoundWord.

This is relevant in three contexts:

- commands: echo -{a,b}- other
- for loops: for i in -{a,b}-  other; do echo $i; done
- arrays: array=({a,b} other)

So I inserted BraceDetectAll in osh/cmd_parse.py and osh/word_parse.py;
and BraceExpandAll in core/word_eval.py and core/cmd_exec.py.

I also fixed tilde expansion in all three contexts and added tests.

Changes to the algorithm in braces.py:

- Fixed bug where alternatives like {a,b}{c,d} were expanded in the
  wrong order.  The first one varies the slowest, and the last the
  fastest: ac ad bc bd.
- Fix error cases like {foo}
- Use _StackFrame to simplify the code

Testing:

- Add a comparison of count.sh to gold-test.sh, so we're testing the
  brace expansion in our own repo.
- Add date and oil version information to spec tests.

Other:

- Fixed a typo in evaluation of EscapedLiteralPart, which fixed a bunch
  of tests.  (--osh-allowed-failures bumped down in several places.)
- Rename AltPart to BracedAltPart, NumRangePart to BracedNumRangePart.
  • Loading branch information...
Andy Chu
Andy Chu committed Mar 13, 2017
1 parent 43581de commit 700c223847f001be55f572187feeb19e0bd83ded
Showing with 211 additions and 173 deletions.
  1. +1 −1 asdl/py_meta.py
  2. +81 −40 core/braces.py
  3. +13 −8 core/braces_test.py
  4. +7 −3 core/cmd_exec.py
  5. +11 −0 core/word.py
  6. +4 −23 core/word_eval.py
  7. +6 −0 gold-test.sh
  8. +13 −73 osh/cmd_parse.py
  9. +13 −4 osh/osh.asdl
  10. +8 −4 osh/word_parse.py
  11. +16 −8 spec.sh
  12. +11 −0 tests/array.test.sh
  13. +13 −9 tests/brace-expansion.test.sh
  14. +14 −0 tests/loop.test.sh
View
@@ -206,8 +206,8 @@ def __setattr__(self, name, value):
self.__dict__[name] = value
def __repr__(self):
#ast_f = fmt.AnsiOutput(io.StringIO()) # Color by default.
ast_f = fmt.TextOutput(io.StringIO()) # No color by default.
#ast_f = fmt.AnsiOutput(io.StringIO())
tree = fmt.MakeTree(self)
fmt.PrintTree(tree, ast_f)
s, _ = ast_f.GetRaw()
View
@@ -8,7 +8,7 @@
Possible optimization flags for CompoundWord:
- has Lit_LBrace, LitRBrace -- set during word_parse phase
- it if has both, then do _BraceDetect
- has AltPart -- set during _BraceDetect
- has BracedAltPart -- set during _BraceDetect
- if it does, then do the expansion
- has Lit_Star, ?, [ ] -- globbing?
- but after expansion do you still have those flags?
@@ -20,6 +20,14 @@
from osh import ast_ as ast
word_part_e = ast.word_part_e
word_e = ast.word_e
class _StackFrame:
def __init__(self, cur_parts):
self.cur_parts = cur_parts
self.alt_part = ast.BracedAltPart()
self.saw_comma = False
def _BraceDetect(w):
@@ -68,28 +76,37 @@ def _BraceDetect(w):
# {a} - no comma, and also not an numeric range
cur_parts = []
parts_stack = [] # stack of parts in progress
alt_stack = [] # stack of alternatives
stack = []
found = False
for i, part in enumerate(w.parts):
append = True
if part.tag == word_part_e.LiteralPart:
id_ = part.token.id
if id_ == Id.Lit_LBrace:
# Save prefix parts. Start new parts list.
parts_stack.append(cur_parts)
new_frame = _StackFrame(cur_parts)
stack.append(new_frame)
cur_parts = []
alternatives = ast.AltPart()
alt_stack.append(alternatives)
append = False
found = True # assume found, but can early exit with None later
elif id_ == Id.Lit_Comma:
# Append a new alternative.
#print('*** Appending after COMMA', cur_parts)
alt_stack[-1].words.append(ast.CompoundWord(cur_parts))
cur_parts = [] # clear
append = False
# NOTE: Should we allow this:
# ,{a,b}
# or force this:
# \,{a,b}
# ? We're forcing braces right now but not commas.
if stack:
stack[-1].saw_comma = True
stack[-1].alt_part.words.append(ast.CompoundWord(cur_parts))
cur_parts = [] # clear
append = False
elif id_ == Id.Lit_RBrace:
# TODO:
@@ -100,25 +117,42 @@ def _BraceDetect(w):
# - digit+ '..' digit+ ( '..' digit+ )?
# - Char ranges are bash only!
#
# ast.NumRangePart()
# ast.BracedIntRangePart()
# ast.CharRangePart()
alt_stack[-1].words.append(ast.CompoundWord(cur_parts))
# TODO: catch errors here
cur_parts = parts_stack.pop()
alternatives = alt_stack.pop()
if not stack: # e.g. echo } -- unbalancd {
return None
if not stack[-1].saw_comma: # {foo} is not a real alternative
return None
stack[-1].alt_part.words.append(ast.CompoundWord(cur_parts))
cur_parts.append(alternatives) # TODO: Wrap in AltPart
frame = stack.pop()
cur_parts = frame.cur_parts
cur_parts.append(frame.alt_part)
append = False
if append:
cur_parts.append(part)
# TODO: Errors here
assert len(alt_stack) == 0
assert len(parts_stack) == 0
return ast.CompoundWord(cur_parts)
if len(stack) != 0:
return None
if found:
return ast.BracedWordTree(cur_parts)
else:
return None
def BraceDetectAll(words):
out = []
for w in words:
#print(w)
brace_tree = _BraceDetect(w)
if brace_tree:
out.append(brace_tree)
else:
out.append(w)
return out
# Possible optmization for later:
@@ -133,21 +167,21 @@ def _TreeCount(tree_word):
"""
# TODO: Copy the structure of _BraceExpand and _BraceExpandOne.
for part in tree_word.parts:
if part.tag == word_part_e.AltPart:
if part.tag == word_part_e.BracedAltPart:
for word in part.words:
pass
num_results = 2
max_parts = 5
return num_results , max_parts
def _BraceExpandOne(parts, first_alt_index, suffix):
def _BraceExpandOne(parts, first_alt_index, suffixes):
"""Helper for _BraceExpand.
Args:
parts: input parts
first_alt_index: index of the first AltPart
suffix: the suffix to append
first_alt_index: index of the first BracedAltPart
suffixes: List of suffixes to append.
"""
out = []
@@ -159,26 +193,25 @@ def _BraceExpandOne(parts, first_alt_index, suffix):
prefix = parts[ : first_alt_index]
for alt_parts in expanded_alts:
out_parts = []
out_parts.extend(prefix)
out_parts.extend(alt_parts)
out_parts.extend(suffix)
# TODO: Do we need to preserve flags?
out.append(out_parts)
for suffix in suffixes:
out_parts = []
out_parts.extend(prefix)
out_parts.extend(alt_parts)
out_parts.extend(suffix)
# TODO: Do we need to preserve flags?
out.append(out_parts)
return out
def _BraceExpand(parts):
num_alts = 0
first_alt_index = -1
second_alt_index = -1
for i, part in enumerate(parts):
if part.tag == word_part_e.AltPart:
if part.tag == word_part_e.BracedAltPart:
num_alts += 1
if num_alts == 1:
first_alt_index = i
elif num_alts == 2:
second_alt_index = i
break # don't need to count anymore
# NOTE: There are TWO recursive calls here, not just one -- one for
@@ -189,16 +222,24 @@ def _BraceExpand(parts):
elif num_alts == 1:
out = []
suffix = parts[first_alt_index+1 : ]
return _BraceExpandOne(parts, first_alt_index, suffix)
return _BraceExpandOne(parts, first_alt_index, [suffix])
else:
# Now call it on the tail
tail_parts = parts[second_alt_index : ]
tail_parts = parts[first_alt_index+1 : ]
suffixes = _BraceExpand(tail_parts) # recursive call
out = []
for suffix in suffixes:
out.extend(_BraceExpandOne(parts, first_alt_index, suffix))
return out
return _BraceExpandOne(parts, first_alt_index, suffixes)
def BraceExpandWords(words):
out = []
for w in words:
if w.tag == word_e.BracedWordTree:
parts_list = _BraceExpand(w.parts)
out.extend(ast.CompoundWord(p) for p in parts_list)
else:
out.append(w)
return out
def _Cartesian(tuples):
View
@@ -29,6 +29,14 @@ def _ColorPrint(n):
class BracesTest(unittest.TestCase):
def testBraceDetect(self):
w = _assertReadWord(self, '}')
tree = braces._BraceDetect(w)
self.assertEqual(None, tree)
w = _assertReadWord(self, ',')
tree = braces._BraceDetect(w)
self.assertEqual(None, tree)
w = _assertReadWord(self, 'B-{a,b}-E')
tree = braces._BraceDetect(w)
self.assertEqual(3, len(tree.parts))
@@ -56,14 +64,14 @@ def testBraceDetect(self):
self.assertEqual(3, len(tree.parts)) # B- {} -E
middle_part = tree.parts[1]
self.assertEqual(word_part_e.AltPart, middle_part.tag)
self.assertEqual(word_part_e.BracedAltPart, middle_part.tag)
self.assertEqual(4, len(middle_part.words)) # a b c ={d,e}
last_alternative = middle_part.words[3]
self.assertEqual(2, len(last_alternative.parts)) # = {d,e}
second_part = last_alternative.parts[1]
self.assertEqual(word_part_e.AltPart, second_part.tag)
self.assertEqual(word_part_e.BracedAltPart, second_part.tag)
self.assertEqual(2, len(second_part.words)) # {d,e}
# Another nested expansion
@@ -73,7 +81,7 @@ def testBraceDetect(self):
self.assertEqual(3, len(tree.parts)) # B- {} -E
middle_part = tree.parts[1]
self.assertEqual(word_part_e.AltPart, middle_part.tag)
self.assertEqual(word_part_e.BracedAltPart, middle_part.tag)
self.assertEqual(3, len(middle_part.words)) # a ={b,c}= d
first_alternative = middle_part.words[0]
@@ -85,7 +93,7 @@ def testBraceDetect(self):
self.assertEqual(3, len(middle_alternative.parts)) # = {b,c} =
middle_part2 = middle_alternative.parts[1]
self.assertEqual(word_part_e.AltPart, middle_part2.tag)
self.assertEqual(word_part_e.BracedAltPart, middle_part2.tag)
self.assertEqual(2, len(middle_part2.words)) # b c
# Third alternative is a CompoundWord with zero parts
@@ -97,10 +105,7 @@ def testBraceDetect(self):
def testBraceExpand(self):
w = _assertReadWord(self, 'hi')
tree = braces._BraceDetect(w)
self.assertEqual(1, len(tree.parts))
pprint(tree)
results = braces._BraceExpand(tree.parts)
results = braces._BraceExpand(w.parts)
self.assertEqual(1, len(results))
for parts in results:
_ColorPrint(ast.CompoundWord(parts))
View
@@ -60,6 +60,7 @@
import stat
import sys
from core import braces
from core import completion
from core import expr_eval
from core import word_eval
@@ -518,7 +519,8 @@ def _GetProcessForNode(self, node):
Assume we will run the node in another process. Return a process.
"""
if node.tag == command_e.SimpleCommand:
argv = self.ev.EvalWords(node.words)
words = braces.BraceExpandWords(node.words)
argv = self.ev.EvalWords(words)
if argv is None:
err = self.ev.Error()
raise AssertionError("Error evaluating words: %s" % err)
@@ -649,7 +651,8 @@ def _Execute(self, node):
# TODO: Only eval argv[0] once. It can have side effects!
if node.tag == command_e.SimpleCommand:
argv = self.ev.EvalWords(node.words)
words = braces.BraceExpandWords(node.words)
argv = self.ev.EvalWords(words)
if argv is None:
self.error_stack.extend(self.ev.Error())
@@ -784,7 +787,8 @@ def _Execute(self, node):
if node.do_arg_iter:
iter_list = self.mem.GetArgv()
else:
iter_list = self.ev.EvalWords(node.iter_words)
words = braces.BraceExpandWords(node.iter_words)
iter_list = self.ev.EvalWords(words)
# We need word splitting and so forth
# NOTE: This expands globs too. TODO: We should pass in a Globber()
# object.
View
@@ -316,6 +316,17 @@ def TildeDetect(word):
return w
def TildeDetectAll(words):
out = []
for w in words:
t = TildeDetect(w)
if t:
out.append(t)
else:
out.append(w)
return out
def HasArrayPart(w):
"""Used in cmd_parse."""
assert w.tag == word_e.CompoundWord
View
@@ -7,6 +7,7 @@
import pwd
import re
from core import braces
from core import expr_eval # ArithEval
from core.glob_ import Globber, GlobEscape
from core.id_kind import Id, Kind, IdName, LookupKind
@@ -527,28 +528,8 @@ def _EvalTildeSub(self, prefix):
return Value.FromString(s)
def _EvalArrayLiteralPart(self, part):
# TODO: Also ened globbing here. Call EvalWords?
#print(self.words, '!!!')
array = []
for w in part.words:
# - perform splitting when necessary?
# set IFS here?
val = self._EvalCompoundWord(w)
# NOTE: For now, we enforce homogeneous arrays of strings. This is for
# the shell / proc dialect. For func dialect, we can have heterogeneous
# arrays.
is_str, s = val.AsString()
if is_str:
array.append(s)
else:
# TODO:
# - interpolate array into array
self._AddErrorContext('Expected string in array')
raise _EvalError()
words = braces.BraceExpandWords(part.words)
array = self._EvalWords(words)
return Value.FromArray(array)
def _EvalVarNum(self, var_num):
@@ -662,7 +643,7 @@ def _EvalWordPart(self, part, quoted=False):
return Value.FromString(s)
elif part.tag == word_part_e.EscapedLiteralPart:
val = self.token.val
val = part.token.val
assert len(val) == 2, val # e.g. \*
assert val[0] == '\\'
s = val[1]
Oops, something went wrong.

0 comments on commit 700c223

Please sign in to comment.