New functions to detect and parse assignments.

Wrote tests for them, but haven't hooked them up to the main codebase yet. The new _MakeAssignPair function invokes the arithmetic parser where necessary. Wrote a new _AppendMoreEnv function, using the 'prepared' or 'detected' tuple. Also: - Notes on OSH architecture. 'alias' and array location assignment made things less clean.
oilshell · Sep 15, 2018 · ea80189 · ea80189
1 parent 3812c98
commit ea80189
Show file tree

Hide file tree

Showing 11 changed files with 323 additions and 43 deletions.
diff --git a/core/alloc.py b/core/alloc.py
@@ -104,7 +104,8 @@ def GetLineSpan(self, span_id):
     try:
       return self.spans[span_id]
     except IndexError:
-      util.log('Span ID out of range: %d', span_id)
+      util.log('Span ID out of range: %d is greater than %d', span_id,
+          len(self.spans))
       raise
 
   def LastSpanId(self):
@@ -123,16 +124,15 @@ def GetDebugInfo(self, line_id):
     return path, line_num
 
 
-def CompletionArena(pool):
-  """A temporary arena that only exists for a function call?"""
-  arena = pool.NewArena()
-  arena.PushSource('<temp completion buffer>')
-  return arena
+def SideArena(source_name):
+  """A new arena outside the main one.
+  
+  For completion, $PS1 and $PS4, a[x++]=1, etc.
 
-
-def PluginArena(source_name):
-  """For PS4, etc."""
-  # TODO: Should there only be one pool?  This isn't worked out yet.
+  Translation takes advantage of the fact that arenas have contiguous span_ids.
+  """
+  # TODO: Should there only be one pool?  This isn't worked out yet.  Or just
+  # get rid of the pool concept?
   pool = Pool()
   arena = pool.NewArena()
   arena.PushSource(source_name)

diff --git a/core/cmd_exec.py b/core/cmd_exec.py
@@ -1421,7 +1421,7 @@ def __init__(self, parse_ctx, exec_opts, mem, word_ev):
     self.mem = mem
     self.word_ev = word_ev
 
-    self.arena = alloc.PluginArena('<$PS4>')
+    self.arena = alloc.SideArena('<$PS4>')
     self.parse_cache = {}  # PS4 value -> CompoundWord.  PS4 is scoped.
 
   def _EvalPS4(self):

diff --git a/core/completion.py b/core/completion.py
@@ -610,7 +610,7 @@ def __init__(self, pool, ev, comp_lookup, var_comp, parse_ctx):
     self.parser = DummyParser()  # TODO: remove
 
   def Matches(self, buf, status_out):
-    arena = alloc.CompletionArena(self.pool)
+    arena = alloc.SideArena('<completion>')
     w_parser, c_parser = self.parse_ctx.MakeParserForCompletion(buf, arena)
     comp_type, prefix, comp_words = _GetCompletionType(
         w_parser, c_parser, self.ev, status_out)

diff --git a/core/word.py b/core/word.py
@@ -346,7 +346,7 @@ def AsFuncName(w):
 def AsArithVarName(w):
   """Returns a string if this word looks like an arith var; otherwise False.
 
-  NOTE: This can't be combined with LooksLikeAssignment because VarLike and
+  NOTE: This can't be combined with DetectAssignment because VarLike and
   ArithVarLike must be different tokens.  Otherwise _ReadCompoundWord will be
   confused between array assigments foo=(1 2) and function calls foo(1, 2).
   """
@@ -378,14 +378,18 @@ def IsVarLike(w):
   return _LiteralPartId(w.parts[0]) == Id.Lit_VarLike
 
 
-def LooksLikeAssignment(w):
+def DetectAssignment_OLD(w):
   """Tests whether a word looks like FOO=bar or FOO[x]=bar.
 
   Returns:
     (string, op, CompoundWord) if it looks like FOO=bar
     False                      if it doesn't
 
 
+  (token left,   # Lit_VarLike, Lit_ArrayLhsOpen, or Undefined_Tok
+   token? right, # Lit_ArrayLhsClose if it was detected
+   part_offset)  # where to start the token, 0
+
   TODO: could use assign_parse
   Or (spid, k, (spid1, spid2), op, v)
   spid1 and spid2 are [ and ]
@@ -440,23 +444,28 @@ def LooksLikeAssignment(w):
   return name, op, rhs
 
 
-# TODO:
-# - local/declare should use this.
-# - Doesn't work with 'readonly' or 'export'
-# - global is parsed at the top level with LhsIndexedLike.
-def LooksLikeLhsIndex(s):
-  """Tests if a STRING looks like a[x + 1]=b
-
-  # After EvalStatic, do another around of lexing at runtime.
-  # Use osh/lex.py.
-
-  Returns:
-    (string, arith_expr) if it looks like a[x + 1]=b
-    LhsIndexedName?
-
-    False                  if it doesn't
+def DetectAssignment(w):
   """
-  # PROBLEM: What arena tokens to use?
+  """
+  assert w.tag == word_e.CompoundWord
+  n = len(w.parts)
+  if n == 0:
+    return None, None, 0
+
+  part0 = w.parts[0]
+  id0 = _LiteralPartId(part0)
+  if id0 == Id.Lit_VarLike:
+    return part0.token, None, 1  # everything after first token is the value
+
+  if id0 == Id.Lit_ArrayLhsOpen:
+    if n < 3:  # a[]= can't be valid
+      return None, None, 0
+    for i in xrange(2, n):
+      if _LiteralPartId(w.parts[i]) == Id.Lit_ArrayLhsClose:
+        return part0.token, w.parts[i].token, i+1
+
+  # Nothing detected.  Could be 'foobar' or a[x+1+2/' without the closing ].
+  return None, None, 0
 
 
 def KeywordToken(w):

diff --git a/core/word_test.py b/core/word_test.py
@@ -6,13 +6,97 @@
 
 import unittest
 
+from osh import word_parse_test
+from osh.meta import Id
+from core.util import log
+
 from core import word  # module under test
 
 
+def _Detect(test, word_str, expected):
+  # TODO: This function could be moved to test_lib.
+  log('-'*80)
+  arena, w = word_parse_test._assertReadWordWithArena(test, word_str)
+
+  actual = word.DetectAssignment(w)
+  left_token, close_token, part_offset = actual
+
+  expected_left, expected_close, expected_part_offset = expected
+
+  print(left_token, close_token, part_offset)
+  print()
+
+  if expected_left is None:
+    test.assertEqual(None, left_token)
+  else:
+    test.assertEqual(expected_left, left_token.id)
+
+  if expected_close is None:
+    test.assertEqual(None, close_token)
+  else:
+    test.assertEqual(expected_left, left_token.id)
+
+  test.assertEqual(expected_part_offset, part_offset)
+
+  # Test that we can reparse niput
+  from osh import cmd_parse
+  from osh import parse_lib
+  from core import alloc
+
+  parse_ctx = parse_lib.ParseContext(arena, {})
+
+  if left_token and left_token.id in (Id.Lit_VarLike, Id.Lit_ArrayLhsOpen):
+    more_env = []
+    preparsed = (left_token, close_token, part_offset, w)
+    try:
+      cmd_parse._AppendMoreEnv([preparsed], more_env)
+    except Exception as e:
+      log('Error: %s', e)
+    else:
+      log('more_env: %s', more_env)
+
+    try:
+      assign_pair = cmd_parse._MakeAssignPair(parse_ctx, preparsed)
+    except Exception as e:
+      log('Error: %s', e)
+    else:
+      log('assign_pair: %s', assign_pair)
+
+
 class WordTest(unittest.TestCase):
 
-  def testFoo(self):
-    print(word)
+  def testDetectLocation(self):
+    CASES = [
+        ('foobar', (None, None, 0)),
+        ('a[x', (None, None, 0)),
+
+        # Empty is not valid, there has to be at least one token.
+        ('a[]=$foo$bar', (None, None, 0)),
+        ('a[]+=$foo$bar', (None, None, 0)),
+
+        ('s=1', (Id.Lit_VarLike, None, 1)),
+        ('s+=1', (Id.Lit_VarLike, None, 1)),
+        ('a[x]=1', (Id.Lit_ArrayLhsOpen, Id.Lit_ArrayLhsClose, 3)),
+        ('a[x]+=1', (Id.Lit_ArrayLhsOpen, Id.Lit_ArrayLhsClose, 3)),
+        ('a[x++]+=1', (Id.Lit_ArrayLhsOpen, Id.Lit_ArrayLhsClose, 5)),
+
+        ('a=(1 2 3)', (Id.Lit_VarLike, None, 1)),
+        ('a+=(1 2 3)', (Id.Lit_VarLike, None, 1)),
+
+        # EmptyWord on RHS
+        ('s=', (Id.Lit_VarLike, None, 1)),
+        ('a[x]=', (Id.Lit_ArrayLhsOpen, Id.Lit_ArrayLhsClose, 3)),
+
+        # Tilde sub
+        ('s=~foo', (Id.Lit_VarLike, None, 1)),
+        ('a[x]=~', (Id.Lit_ArrayLhsOpen, Id.Lit_ArrayLhsClose, 3)),
+    ]
+    for word_str, expected in CASES:
+      _Detect(self, word_str, expected)
+
+    # These don't parse, as they shouldn't.  But not the best error message.
+    #w = assertReadWord(self, 'a[x]=(1 2 3)')
+    #w = assertReadWord(self, 'a[x]+=(1 2 3)')
 
 
 if __name__ == '__main__':

diff --git a/doc/architecture-notes.md b/doc/architecture-notes.md
@@ -0,0 +1,79 @@
+Notes on OSH Architecture
+-------------------------
+
+## Where we (unfortunately) must re-parse previously parsed text
+
+- alias expansion
+- Array assignment like `a[x+1]=foo` (because breaking word boundaries like
+  `a[x + 1]=foo` causes a lot of problems, and I don't see it used.)
+
+Each of these cases has implications for translation, because we break the
+"arena invariant".
+
+## Parser Lookahead
+
+- `func() { echo hi; }` vs.
+- `func=()  # an array`
+
+## Where the arena invariant is broken
+
+- Here docs with <<-.  The leading tab is lost, because we don't need it for
+  translation.
+
+## Where VirtualLineReader is used
+
+This isn't re-parsing, but it's re-reading.
+
+- alias expansion
+- HereDoc
+
+## Where parsers are instantiated
+
+- See `osh/parse_lib.py` and its callers.
+
+## Where code strings are evaluated
+
+- source and eval
+- trap
+- PS1 and PS4 (WordParser is used)
+- completion hooks registered by `complete -F ls_complete_func ls`
+
+## Parse errors at runtime (need line numbers)
+
+- [ -a -a -a ]
+- command line flag usage errors
+
+## Where unicode is respected
+
+- ${#s} -- length in code points
+- ${s:1:2} -- offsets in code points
+- ${x#?} and family (not yet implemented)
+
+## Parse-time and Runtime Pairs
+
+- echo -e '\x00\n' and echo $'\x00\n' (shared in OSH)
+- test / [ and [[ (shared in OSH)
+
+### Other Pairs
+
+- expr and $(( )) (expr not in shell)
+- later: find and our own language
+
+
+## Dependencies
+
+- Optional: readline
+
+## Borrowed Code
+
+- All of OPy:
+  - pgen2
+  - compiler2 from stdlib
+  - byterun
+- ASDL front end from CPython (heavily refactored)
+- core/tdop.py: Heavily adapted from tinypy
+
+## Generated Code
+
+- See `build/dev.sh`
+