Fix evaluation of x"y ".

It is translated to 'xy\ ' for splitting, and then we were trimming off the last space, thinking that it is whitesapce, not escaped characters. The spec test case now passes. This fix is somewhat hacky, and the next commit will hopefully contain a cleaner one. - Also, fix incorrect call to parse_lib.MakeParser(). - Move ints to enums in runtime.asdl so that they can be pretty-printed for debugging.
oilshell · Sep 1, 2018 · 3e2c6a1 · 3e2c6a1
1 parent 46640af
commit 3e2c6a1
Show file tree

Hide file tree

Showing 6 changed files with 78 additions and 9 deletions.
diff --git a/core/cmd_exec.py b/core/cmd_exec.py
@@ -211,7 +211,7 @@ def ParseTrapCode(self, code_str):
       A node, or None if the code is invalid.
     """
     line_reader = reader.StringLineReader(code_str, self.arena)
-    _, c_parser = parse_lib.MakeParser(line_reader, self.arena)
+    _, c_parser = parse_lib.MakeParser(line_reader, self.arena, self.aliases)
 
     source_name = '<trap string>'
     self.arena.PushSource(source_name)
@@ -246,7 +246,7 @@ def _Source(self, argv):
 
     try:
       line_reader = reader.FileLineReader(f, self.arena)
-      _, c_parser = parse_lib.MakeParser(line_reader, self.arena)
+      _, c_parser = parse_lib.MakeParser(line_reader, self.arena, self.aliases)
 
       # A sourced module CAN have a new arguments array, but it always shares
       # the same variable scope as the caller.  The caller could be at either a

diff --git a/core/legacy.py b/core/legacy.py
@@ -32,6 +32,9 @@
 
 value_e = runtime.value_e
 span_e = runtime.span_e
+emit_e = runtime.emit_e
+state_e = runtime.state_e
+char_kind_e = runtime.char_kind_e
 log = util.log
 
 
@@ -163,6 +166,9 @@ def SplitForWordEval(self, s):
     """
     sp = self._GetSplitter()
     spans = sp.Split(s, True)
+    if 0:
+      for span in spans:
+        log('SPAN %s', span)
     return _SpansToParts(s, spans)
 
   def SplitForRead(self, line, allow_escape):
@@ -227,17 +233,39 @@ def Split(self, s, allow_escape):
 
 # SplitForRead() will check if the last two spans are a \ and \\n.  Easy.
 
+
+# TODO: Get rid of these redundant assignments.
+
 # Edges are characters.  CH_DE_ is the delimiter prefix.  WHITE is for
 # whitespace; GRAY is for other IFS chars; BLACK is for significant
 # characters.
-CH_DE_WHITE, CH_DE_GRAY, CH_BLACK, CH_BACKSLASH = range(4)
+CH_DE_WHITE, CH_DE_GRAY, CH_BLACK, CH_BACKSLASH = (
+  char_kind_e.CH_DE_WHITE,
+  char_kind_e.CH_DE_GRAY,
+  char_kind_e.CH_BLACK,
+  char_kind_e.CH_BACKSLASH)
+
 
 # Nodes are states
 (ST_INVALID, ST_START, ST_DE_WHITE1, ST_DE_GRAY, ST_DE_WHITE2,
- ST_BLACK, ST_BACKSLASH) = range(7)
+ ST_BLACK, ST_BACKSLASH) = (
+ state_e.ST_INVALID,
+ state_e.ST_START,
+ state_e.ST_DE_WHITE1, 
+ state_e.ST_DE_GRAY, 
+ state_e.ST_DE_WHITE2,
+ state_e.ST_BLACK, 
+ state_e.ST_BACKSLASH)
+
 
 # Actions control what spans to emit.
-EMIT_PART, EMIT_DE, EMIT_EMPTY, EMIT_ESCAPE, NO_EMIT = range(5)
+EMIT_PART, EMIT_DE, EMIT_EMPTY, EMIT_ESCAPE, NO_EMIT = (
+  emit_e.EMIT_PART,
+  emit_e.EMIT_DE,
+  emit_e.EMIT_EMPTY,
+  emit_e.EMIT_ESCAPE,
+  emit_e.NO_EMIT)
+
 
 TRANSITIONS = {
     # Whitespace should have been stripped
@@ -295,6 +323,9 @@ def Split(self, s, allow_escape):
       s: string to split
       allow_escape: False for read -r, this means \ doesn't do anything.
 
+    Returns:
+      List of (runtime.span, end_index) pairs
+
     TODO: This should be (frag, do_split) pairs, to avoid IFS='\'
     double-escaping issue.
     """
@@ -325,9 +356,21 @@ def Split(self, s, allow_escape):
       return spans
 
     # Ignore trailing IFS whitespace too.  This is necessary for the case:
-    # IFS=':' ; read x y z <<< 'a : b : c :'.  We don't want
+    # IFS=':' ; read x y z <<< 'a : b : c :'.
+    trimmed = False
     while s[n-1] in self.ifs_whitespace:
       n -= 1
+      trimmed = True
+    # BUG FIX: A hacky way to fix 'a\ ', but don't break 'a\'
+    # The alternative is to parse forward and collect into '\ ' pairs?  This
+    # might be OK?  It assumes that \ is not an IFS whitespace character, which
+    # is OK because other things break in that case.
+    #
+    # Another way to handle this is to POP the last span when it's all
+    # whitespace?
+
+    if trimmed and s[n-1] == '\\':
+      n += 1
 
     state = ST_START
     while i < n:
@@ -346,7 +389,7 @@ def Split(self, s, allow_escape):
         raise AssertionError(
             'Invalid transition from %r with %r' % (state, ch))
 
-      #log('i %d c %r ch %s state %s new_state %s action %s',
+      #log('i %d c %r ch %s current: %s next: %s %s',
       #     i, c, ch, state, new_state, action)
 
       if action == EMIT_PART:

diff --git a/core/legacy_test.py b/core/legacy_test.py
@@ -54,6 +54,16 @@ def testSpansToParts(self):
     parts = legacy._SpansToParts(s, spans, max_results=1)
     self.assertEqual(['one two'], parts)
 
+  def testTrailingWhitespaceBug(self):
+    # Bug: these differed
+    CASES = [
+        (['x y'], ' x\ y', True),
+        (['ab '], ' ab\ ', True),
+        (['ab '], ' ab\  ', True),
+    ]
+    sp = legacy.IfsSplitter(legacy.DEFAULT_IFS, '')
+    _RunSplitCases(self, sp, CASES)
+
   def testDefaultIfs(self):
     CASES = [
         ([], '', True),
@@ -70,7 +80,6 @@ def testDefaultIfs(self):
         (['Aa', 'b', ' a b'], 'Aa b \\ a\\ b', True),
     ]
 
-    #sp = legacy.WhitespaceSplitter(legacy.DEFAULT_IFS)
     sp = legacy.IfsSplitter(legacy.DEFAULT_IFS, '')
     _RunSplitCases(self, sp, CASES)
 

diff --git a/core/runtime.asdl b/core/runtime.asdl
@@ -53,7 +53,12 @@ module runtime
   | PipelineStatus(int* statuses)
 
   -- Word splitting in legacy.py
+  -- TODO: Rename these
   span = Black | Delim | Backslash
+  emit = EMIT_PART | EMIT_DE | EMIT_EMPTY | EMIT_ESCAPE | NO_EMIT
+  state = ST_INVALID | ST_START | ST_DE_WHITE1 | ST_DE_GRAY | ST_DE_WHITE2
+        | ST_BLACK | ST_BACKSLASH
+  char_kind = CH_DE_WHITE | CH_DE_GRAY | CH_BLACK | CH_BACKSLASH
 
   builtin = 
     NONE | READ | ECHO | SHIFT

diff --git a/core/word_eval.py b/core/word_eval.py
@@ -891,7 +891,7 @@ def _EvalWordFrame(self, frame, argv):
     # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
     frags = []
     for frag, do_split_glob in frame:
-      #log('frag %s do_split_glob %s', frag, do_split_glob)
+      #log('frag %r do_split_glob %s', frag, do_split_glob)
 
       # If it was quoted, then
 

diff --git a/spec/word-split.test.sh b/spec/word-split.test.sh
@@ -210,6 +210,17 @@ argv.py $s
 ['', 'x', 'y', 'z']
 ## END
 
+#### Trailing space
+argv.py 'Xec  ho '
+argv.py X'ec  ho '
+argv.py X"ec  ho "
+## STDOUT:
+['Xec  ho ']
+['Xec  ho ']
+['Xec  ho ']
+## END
+
+
 # TODO:
 # - unquoted args of whitespace are not elided (when IFS = null)
 # - empty quoted args are kept
@@ -231,3 +242,4 @@ AB="A B"
 X="X"
 Yspaces=" Y "
 
+