Skip to content

Commit

Permalink
Fix evaluation of x"y ".
Browse files Browse the repository at this point in the history
It is translated to 'xy\ ' for splitting, and then we were trimming off
the last space, thinking that it is whitesapce, not escaped characters.

The spec test case now passes.

This fix is somewhat hacky, and the next commit will hopefully contain a
cleaner one.

- Also, fix incorrect call to parse_lib.MakeParser().
- Move ints to enums in runtime.asdl so that they can be pretty-printed
  for debugging.
  • Loading branch information
Andy Chu committed Sep 1, 2018
1 parent 46640af commit 3e2c6a1
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 9 deletions.
4 changes: 2 additions & 2 deletions core/cmd_exec.py
Expand Up @@ -211,7 +211,7 @@ def ParseTrapCode(self, code_str):
A node, or None if the code is invalid.
"""
line_reader = reader.StringLineReader(code_str, self.arena)
_, c_parser = parse_lib.MakeParser(line_reader, self.arena)
_, c_parser = parse_lib.MakeParser(line_reader, self.arena, self.aliases)

source_name = '<trap string>'
self.arena.PushSource(source_name)
Expand Down Expand Up @@ -246,7 +246,7 @@ def _Source(self, argv):

try:
line_reader = reader.FileLineReader(f, self.arena)
_, c_parser = parse_lib.MakeParser(line_reader, self.arena)
_, c_parser = parse_lib.MakeParser(line_reader, self.arena, self.aliases)

# A sourced module CAN have a new arguments array, but it always shares
# the same variable scope as the caller. The caller could be at either a
Expand Down
53 changes: 48 additions & 5 deletions core/legacy.py
Expand Up @@ -32,6 +32,9 @@

value_e = runtime.value_e
span_e = runtime.span_e
emit_e = runtime.emit_e
state_e = runtime.state_e
char_kind_e = runtime.char_kind_e
log = util.log


Expand Down Expand Up @@ -163,6 +166,9 @@ def SplitForWordEval(self, s):
"""
sp = self._GetSplitter()
spans = sp.Split(s, True)
if 0:
for span in spans:
log('SPAN %s', span)
return _SpansToParts(s, spans)

def SplitForRead(self, line, allow_escape):
Expand Down Expand Up @@ -227,17 +233,39 @@ def Split(self, s, allow_escape):

# SplitForRead() will check if the last two spans are a \ and \\n. Easy.


# TODO: Get rid of these redundant assignments.

# Edges are characters. CH_DE_ is the delimiter prefix. WHITE is for
# whitespace; GRAY is for other IFS chars; BLACK is for significant
# characters.
CH_DE_WHITE, CH_DE_GRAY, CH_BLACK, CH_BACKSLASH = range(4)
CH_DE_WHITE, CH_DE_GRAY, CH_BLACK, CH_BACKSLASH = (
char_kind_e.CH_DE_WHITE,
char_kind_e.CH_DE_GRAY,
char_kind_e.CH_BLACK,
char_kind_e.CH_BACKSLASH)


# Nodes are states
(ST_INVALID, ST_START, ST_DE_WHITE1, ST_DE_GRAY, ST_DE_WHITE2,
ST_BLACK, ST_BACKSLASH) = range(7)
ST_BLACK, ST_BACKSLASH) = (
state_e.ST_INVALID,
state_e.ST_START,
state_e.ST_DE_WHITE1,
state_e.ST_DE_GRAY,
state_e.ST_DE_WHITE2,
state_e.ST_BLACK,
state_e.ST_BACKSLASH)


# Actions control what spans to emit.
EMIT_PART, EMIT_DE, EMIT_EMPTY, EMIT_ESCAPE, NO_EMIT = range(5)
EMIT_PART, EMIT_DE, EMIT_EMPTY, EMIT_ESCAPE, NO_EMIT = (
emit_e.EMIT_PART,
emit_e.EMIT_DE,
emit_e.EMIT_EMPTY,
emit_e.EMIT_ESCAPE,
emit_e.NO_EMIT)


TRANSITIONS = {
# Whitespace should have been stripped
Expand Down Expand Up @@ -295,6 +323,9 @@ def Split(self, s, allow_escape):
s: string to split
allow_escape: False for read -r, this means \ doesn't do anything.
Returns:
List of (runtime.span, end_index) pairs
TODO: This should be (frag, do_split) pairs, to avoid IFS='\'
double-escaping issue.
"""
Expand Down Expand Up @@ -325,9 +356,21 @@ def Split(self, s, allow_escape):
return spans

# Ignore trailing IFS whitespace too. This is necessary for the case:
# IFS=':' ; read x y z <<< 'a : b : c :'. We don't want
# IFS=':' ; read x y z <<< 'a : b : c :'.
trimmed = False
while s[n-1] in self.ifs_whitespace:
n -= 1
trimmed = True
# BUG FIX: A hacky way to fix 'a\ ', but don't break 'a\'
# The alternative is to parse forward and collect into '\ ' pairs? This
# might be OK? It assumes that \ is not an IFS whitespace character, which
# is OK because other things break in that case.
#
# Another way to handle this is to POP the last span when it's all
# whitespace?

if trimmed and s[n-1] == '\\':
n += 1

state = ST_START
while i < n:
Expand All @@ -346,7 +389,7 @@ def Split(self, s, allow_escape):
raise AssertionError(
'Invalid transition from %r with %r' % (state, ch))

#log('i %d c %r ch %s state %s new_state %s action %s',
#log('i %d c %r ch %s current: %s next: %s %s',
# i, c, ch, state, new_state, action)

if action == EMIT_PART:
Expand Down
11 changes: 10 additions & 1 deletion core/legacy_test.py
Expand Up @@ -54,6 +54,16 @@ def testSpansToParts(self):
parts = legacy._SpansToParts(s, spans, max_results=1)
self.assertEqual(['one two'], parts)

def testTrailingWhitespaceBug(self):
# Bug: these differed
CASES = [
(['x y'], ' x\ y', True),
(['ab '], ' ab\ ', True),
(['ab '], ' ab\ ', True),
]
sp = legacy.IfsSplitter(legacy.DEFAULT_IFS, '')
_RunSplitCases(self, sp, CASES)

def testDefaultIfs(self):
CASES = [
([], '', True),
Expand All @@ -70,7 +80,6 @@ def testDefaultIfs(self):
(['Aa', 'b', ' a b'], 'Aa b \\ a\\ b', True),
]

#sp = legacy.WhitespaceSplitter(legacy.DEFAULT_IFS)
sp = legacy.IfsSplitter(legacy.DEFAULT_IFS, '')
_RunSplitCases(self, sp, CASES)

Expand Down
5 changes: 5 additions & 0 deletions core/runtime.asdl
Expand Up @@ -53,7 +53,12 @@ module runtime
| PipelineStatus(int* statuses)

-- Word splitting in legacy.py
-- TODO: Rename these
span = Black | Delim | Backslash
emit = EMIT_PART | EMIT_DE | EMIT_EMPTY | EMIT_ESCAPE | NO_EMIT
state = ST_INVALID | ST_START | ST_DE_WHITE1 | ST_DE_GRAY | ST_DE_WHITE2
| ST_BLACK | ST_BACKSLASH
char_kind = CH_DE_WHITE | CH_DE_GRAY | CH_BLACK | CH_BACKSLASH

builtin =
NONE | READ | ECHO | SHIFT
Expand Down
2 changes: 1 addition & 1 deletion core/word_eval.py
Expand Up @@ -891,7 +891,7 @@ def _EvalWordFrame(self, frame, argv):
# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
frags = []
for frag, do_split_glob in frame:
#log('frag %s do_split_glob %s', frag, do_split_glob)
#log('frag %r do_split_glob %s', frag, do_split_glob)

# If it was quoted, then

Expand Down
12 changes: 12 additions & 0 deletions spec/word-split.test.sh
Expand Up @@ -210,6 +210,17 @@ argv.py $s
['', 'x', 'y', 'z']
## END

#### Trailing space
argv.py 'Xec ho '
argv.py X'ec ho '
argv.py X"ec ho "
## STDOUT:
['Xec ho ']
['Xec ho ']
['Xec ho ']
## END


# TODO:
# - unquoted args of whitespace are not elided (when IFS = null)
# - empty quoted args are kept
Expand All @@ -231,3 +242,4 @@ AB="A B"
X="X"
Yspaces=" Y "


0 comments on commit 3e2c6a1

Please sign in to comment.