Permalink
Browse files

Fix evaluation of x"y ".

It is translated to 'xy\ ' for splitting, and then we were trimming off
the last space, thinking that it is whitesapce, not escaped characters.

The spec test case now passes.

This fix is somewhat hacky, and the next commit will hopefully contain a
cleaner one.

- Also, fix incorrect call to parse_lib.MakeParser().
- Move ints to enums in runtime.asdl so that they can be pretty-printed
  for debugging.
  • Loading branch information...
Andy Chu
Andy Chu committed Sep 1, 2018
1 parent 46640af commit 3e2c6a10f8430e35f508358bf455ea447d93ebb7
Showing with 78 additions and 9 deletions.
  1. +2 −2 core/cmd_exec.py
  2. +48 −5 core/legacy.py
  3. +10 −1 core/legacy_test.py
  4. +5 −0 core/runtime.asdl
  5. +1 −1 core/word_eval.py
  6. +12 −0 spec/word-split.test.sh
View
@@ -211,7 +211,7 @@ def ParseTrapCode(self, code_str):
A node, or None if the code is invalid.
"""
line_reader = reader.StringLineReader(code_str, self.arena)
_, c_parser = parse_lib.MakeParser(line_reader, self.arena)
_, c_parser = parse_lib.MakeParser(line_reader, self.arena, self.aliases)
source_name = '<trap string>'
self.arena.PushSource(source_name)
@@ -246,7 +246,7 @@ def _Source(self, argv):
try:
line_reader = reader.FileLineReader(f, self.arena)
_, c_parser = parse_lib.MakeParser(line_reader, self.arena)
_, c_parser = parse_lib.MakeParser(line_reader, self.arena, self.aliases)
# A sourced module CAN have a new arguments array, but it always shares
# the same variable scope as the caller. The caller could be at either a
View
@@ -32,6 +32,9 @@
value_e = runtime.value_e
span_e = runtime.span_e
emit_e = runtime.emit_e
state_e = runtime.state_e
char_kind_e = runtime.char_kind_e
log = util.log
@@ -163,6 +166,9 @@ def SplitForWordEval(self, s):
"""
sp = self._GetSplitter()
spans = sp.Split(s, True)
if 0:
for span in spans:
log('SPAN %s', span)
return _SpansToParts(s, spans)
def SplitForRead(self, line, allow_escape):
@@ -227,17 +233,39 @@ def Split(self, s, allow_escape):
# SplitForRead() will check if the last two spans are a \ and \\n. Easy.
# TODO: Get rid of these redundant assignments.
# Edges are characters. CH_DE_ is the delimiter prefix. WHITE is for
# whitespace; GRAY is for other IFS chars; BLACK is for significant
# characters.
CH_DE_WHITE, CH_DE_GRAY, CH_BLACK, CH_BACKSLASH = range(4)
CH_DE_WHITE, CH_DE_GRAY, CH_BLACK, CH_BACKSLASH = (
char_kind_e.CH_DE_WHITE,
char_kind_e.CH_DE_GRAY,
char_kind_e.CH_BLACK,
char_kind_e.CH_BACKSLASH)
# Nodes are states
(ST_INVALID, ST_START, ST_DE_WHITE1, ST_DE_GRAY, ST_DE_WHITE2,
ST_BLACK, ST_BACKSLASH) = range(7)
ST_BLACK, ST_BACKSLASH) = (
state_e.ST_INVALID,
state_e.ST_START,
state_e.ST_DE_WHITE1,
state_e.ST_DE_GRAY,
state_e.ST_DE_WHITE2,
state_e.ST_BLACK,
state_e.ST_BACKSLASH)
# Actions control what spans to emit.
EMIT_PART, EMIT_DE, EMIT_EMPTY, EMIT_ESCAPE, NO_EMIT = range(5)
EMIT_PART, EMIT_DE, EMIT_EMPTY, EMIT_ESCAPE, NO_EMIT = (
emit_e.EMIT_PART,
emit_e.EMIT_DE,
emit_e.EMIT_EMPTY,
emit_e.EMIT_ESCAPE,
emit_e.NO_EMIT)
TRANSITIONS = {
# Whitespace should have been stripped
@@ -295,6 +323,9 @@ def Split(self, s, allow_escape):
s: string to split
allow_escape: False for read -r, this means \ doesn't do anything.
Returns:
List of (runtime.span, end_index) pairs
TODO: This should be (frag, do_split) pairs, to avoid IFS='\'
double-escaping issue.
"""
@@ -325,9 +356,21 @@ def Split(self, s, allow_escape):
return spans
# Ignore trailing IFS whitespace too. This is necessary for the case:
# IFS=':' ; read x y z <<< 'a : b : c :'. We don't want
# IFS=':' ; read x y z <<< 'a : b : c :'.
trimmed = False
while s[n-1] in self.ifs_whitespace:
n -= 1
trimmed = True
# BUG FIX: A hacky way to fix 'a\ ', but don't break 'a\'
# The alternative is to parse forward and collect into '\ ' pairs? This
# might be OK? It assumes that \ is not an IFS whitespace character, which
# is OK because other things break in that case.
#
# Another way to handle this is to POP the last span when it's all
# whitespace?
if trimmed and s[n-1] == '\\':
n += 1
state = ST_START
while i < n:
@@ -346,7 +389,7 @@ def Split(self, s, allow_escape):
raise AssertionError(
'Invalid transition from %r with %r' % (state, ch))
#log('i %d c %r ch %s state %s new_state %s action %s',
#log('i %d c %r ch %s current: %s next: %s %s',
# i, c, ch, state, new_state, action)
if action == EMIT_PART:
View
@@ -54,6 +54,16 @@ def testSpansToParts(self):
parts = legacy._SpansToParts(s, spans, max_results=1)
self.assertEqual(['one two'], parts)
def testTrailingWhitespaceBug(self):
# Bug: these differed
CASES = [
(['x y'], ' x\ y', True),
(['ab '], ' ab\ ', True),
(['ab '], ' ab\ ', True),
]
sp = legacy.IfsSplitter(legacy.DEFAULT_IFS, '')
_RunSplitCases(self, sp, CASES)
def testDefaultIfs(self):
CASES = [
([], '', True),
@@ -70,7 +80,6 @@ def testDefaultIfs(self):
(['Aa', 'b', ' a b'], 'Aa b \\ a\\ b', True),
]
#sp = legacy.WhitespaceSplitter(legacy.DEFAULT_IFS)
sp = legacy.IfsSplitter(legacy.DEFAULT_IFS, '')
_RunSplitCases(self, sp, CASES)
View
@@ -53,7 +53,12 @@ module runtime
| PipelineStatus(int* statuses)
-- Word splitting in legacy.py
-- TODO: Rename these
span = Black | Delim | Backslash
emit = EMIT_PART | EMIT_DE | EMIT_EMPTY | EMIT_ESCAPE | NO_EMIT
state = ST_INVALID | ST_START | ST_DE_WHITE1 | ST_DE_GRAY | ST_DE_WHITE2
| ST_BLACK | ST_BACKSLASH
char_kind = CH_DE_WHITE | CH_DE_GRAY | CH_BLACK | CH_BACKSLASH
builtin =
NONE | READ | ECHO | SHIFT
View
@@ -891,7 +891,7 @@ def _EvalWordFrame(self, frame, argv):
# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
frags = []
for frag, do_split_glob in frame:
#log('frag %s do_split_glob %s', frag, do_split_glob)
#log('frag %r do_split_glob %s', frag, do_split_glob)
# If it was quoted, then
View
@@ -210,6 +210,17 @@ argv.py $s
['', 'x', 'y', 'z']
## END
#### Trailing space
argv.py 'Xec ho '
argv.py X'ec ho '
argv.py X"ec ho "
## STDOUT:
['Xec ho ']
['Xec ho ']
['Xec ho ']
## END
# TODO:
# - unquoted args of whitespace are not elided (when IFS = null)
# - empty quoted args are kept
@@ -231,3 +242,4 @@ AB="A B"
X="X"
Yspaces=" Y "

0 comments on commit 3e2c6a1

Please sign in to comment.