First pass of test/[ builtin.

I found an elegant way to reuse the BoolParser (by creating the StringWord variant). However [ and [[ slightly differ in behavior, and I added test cases to expose these differences. They still need to be fixed. Unrelated: - comments about ASDF
oilshell · Aug 26, 2017 · 203a7ba · 203a7ba
1 parent bd5d5e6
commit 203a7ba
Show file tree

Hide file tree

Showing 12 changed files with 272 additions and 31 deletions.
diff --git a/core/builtin.py b/core/builtin.py
@@ -57,6 +57,7 @@
 COMPLETE COMPGEN DEBUG_LINE
 TRUE FALSE
 COLON
+TEST BRACKET
 HELP
 """.split())
 
@@ -191,6 +192,11 @@ def Resolve(argv0):
   elif argv0 == "false":
     return EBuiltin.FALSE
 
+  elif argv0 == "test":
+    return EBuiltin.TEST
+  elif argv0 == "[":
+    return EBuiltin.BRACKET
+
   elif argv0 == "help":
     return EBuiltin.HELP
 

diff --git a/core/cmd_exec.py b/core/cmd_exec.py
@@ -25,6 +25,7 @@
 from core import braces
 from core import expr_eval
 from core import reader
+from core import test_builtin
 from core import word_eval
 from core import ui
 from core import util
@@ -290,6 +291,12 @@ def _RunBuiltin(self, builtin_id, argv):
     elif builtin_id == EBuiltin.FALSE:
       status = 1
 
+    elif builtin_id == EBuiltin.TEST:
+      status = test_builtin.Test(argv, False)
+
+    elif builtin_id == EBuiltin.BRACKET:
+      status = test_builtin.Test(argv, True)  # need_right_bracket
+
     elif builtin_id == EBuiltin.HELP:
       loader = util.GetResourceLoader()
       status = builtin.Help(argv, loader)

diff --git a/core/id_kind.py b/core/id_kind.py
@@ -366,7 +366,13 @@ def _AddKinds(spec):
 # Id -> OperandType
 BOOL_OPS = {}  # type: dict
 
-UNARY_FILE_CHARS = tuple('abcdefghLprsStuwxOGN')
+# Shared between [[ and test/[.
+_UNARY_STR_CHARS = 'zn'  # -z -n
+_UNARY_OTHER_CHARS = 'ovR'
+_UNARY_PATH_CHARS = 'abcdefghLprsStuwxOGN'
+
+_BINARY_PATH = ['ef', 'nt', 'ot']
+_BINARY_INT = ['eq', 'ne', 'gt', 'ge', 'lt', 'le']
 
 OperandType = util.Enum('OperandType', 'Undefined Path Int Str Other'.split())
 
@@ -378,18 +384,18 @@ def _Dash(strs):
 
 def _AddBoolKinds(spec):
   spec.AddBoolKind('BoolUnary', {
-      OperandType.Str: _Dash(list('zn')),  # -z -n
-      OperandType.Other: _Dash(list('ovR')),
-      OperandType.Path: _Dash(UNARY_FILE_CHARS),
+      OperandType.Str: _Dash(list(_UNARY_STR_CHARS)),
+      OperandType.Other: _Dash(list(_UNARY_OTHER_CHARS)),
+      OperandType.Path: _Dash(list(_UNARY_PATH_CHARS)),
   })
 
   spec.AddBoolKind('BoolBinary', {
       OperandType.Str: [
           ('Equal', '='), ('DEqual', '=='), ('NEqual', '!='),
           ('EqualTilde', '=~'),
       ],
-      OperandType.Path: _Dash(['ef', 'nt', 'ot']),
-      OperandType.Int: _Dash(['eq', 'ne', 'gt', 'ge', 'lt', 'le']),
+      OperandType.Path: _Dash(_BINARY_PATH),
+      OperandType.Int: _Dash(_BINARY_INT),
   })
 
   # logical, arity, arg_type
@@ -401,6 +407,38 @@ def _AddBoolKinds(spec):
   spec.AddBoolOp(Id.Redir_Great, OperandType.Str)
 
 
+def SetupTestBuiltin(spec):
+  """Setup tokens for test/[.
+
+  Similar to _AddBoolKinds above.  Differences:
+  - =~ doesn't exist
+  - && -> -a, || -> -o
+  - ( ) -> Op_LParen (they don't appear above)
+  """ 
+  for letter in _UNARY_STR_CHARS + _UNARY_OTHER_CHARS + _UNARY_PATH_CHARS:
+    token_name = 'BoolUnary_%s' % letter
+    spec['-' + letter] = getattr(Id, token_name)
+
+  for s in _BINARY_PATH + _BINARY_INT:
+    token_name = 'BoolBinary_%s' % s
+    spec['-' + s] = getattr(Id, token_name)
+
+  # Like the above, but without =~.
+  spec['='] = Id.BoolBinary_Equal
+  spec['=='] = Id.BoolBinary_DEqual
+  spec['!='] = Id.BoolBinary_NEqual
+
+  spec['-a'] = Id.Op_DAmp  # like [[ &&
+  spec['-o'] = Id.Op_DPipe # like [[ ||
+  spec['!'] = Id.KW_Bang  # like [[ !
+  spec['('] = Id.Op_LParen
+  spec[')'] = Id.Op_RParen
+
+  # Some of these names don't quite match, but it keeps the BoolParser simple.
+  spec['<'] = Id.Redir_Less
+  spec['>'] = Id.Redir_Great
+
+
 #
 # Instantiate the spec
 #

diff --git a/core/test_builtin.py b/core/test_builtin.py
@@ -0,0 +1,87 @@
+#!/usr/bin/python
+"""
+test_builtin.py
+"""
+
+import sys
+
+from core import id_kind
+from core import expr_eval
+from core import word
+from core import runtime
+from core.util import log, e_die
+
+from osh import bool_parse
+from osh import ast_ as ast
+
+Id = id_kind.Id
+
+
+_ID_LOOKUP = {}  # string -> Id
+
+id_kind.SetupTestBuiltin(_ID_LOOKUP)
+
+
+class _WordParser:
+  """For test/[, we need a word parser that returns StringWord.
+  
+  The BoolParser calls word.BoolId(w), and deals with Kind.BoolUnary,
+  Kind.BoolBinary, etc.  This is instead of CompoundWord/TokenWord (as in the
+  [[ case.
+  """
+  def __init__(self, argv):
+    self.argv = argv
+    self.i = 0
+    self.n = len(argv)
+
+  def ReadWord(self, lex_mode):
+    if self.i == self.n:
+      # NOTE: Could define something special
+      return ast.StringWord(Id.Eof_Real, '')
+
+    #log('ARGV %s i %d', self.argv, self.i)
+    s = self.argv[self.i]
+    self.i += 1
+
+    id_ = _ID_LOOKUP.get(s, Id.Word_Compound)  # default is an operand word
+    return ast.StringWord(id_, s)
+
+
+class _WordEvaluator:
+
+  def EvalWordToString(self, w, do_fnmatch=False):
+    # do_fnmatch: for the [[ == ]] semantics which we don't have!
+    # I think I need another type of node
+    # Maybe it should be BuiltinEqual and BuiltinDEqual?  Parse it into a different tree.
+    return runtime.Str(w.s)
+
+
+def Test(argv, need_right_bracket):
+  """The test/[ builtin.
+
+  The only difference between test and [ is that [ needs a matching ].
+  """
+  w_parser = _WordParser(argv)
+  b_parser = bool_parse.BoolParser(w_parser)
+  node = b_parser.ParseForBuiltin(need_right_bracket)
+  if node is None:
+    for e in b_parser.Error():
+      log("error %s", e)
+    e_die("Error parsing test/[ expression")
+
+  log('Bool expr %s', node)
+
+  # def __init__(self, mem, exec_opts, word_ev):
+  # mem: Don't need it for BASH_REMATCH?  Or I guess you could support it
+  # exec_opts: don't need it
+  # word_ev: don't need it
+
+  mem = None
+  exec_opts = None
+  word_ev = _WordEvaluator()
+
+  bool_ev = expr_eval.BoolEvaluator(mem, exec_opts, word_ev)
+  # TODO: Catch exceptions and turn into failure.  It can't have a fatal error, like [[ ${foo?error} ]].
+  result = bool_ev.Eval(node)
+  status = 0 if result else 1
+  return status
diff --git a/core/util.py b/core/util.py
@@ -43,6 +43,9 @@ def __init__(self, msg, *args, **kwargs):
     if kwargs:
       raise AssertionError('Invalid keyword args %s' % kwargs)
 
+  #def __repr__(self):
+  #  return '<%s %s %r %r %d>' % (self.msg, self.args, self.token, self.word, self.exit_status)
+
   def UserErrorString(self):
     return self.msg % self.args
 

diff --git a/core/word.py b/core/word.py
@@ -473,6 +473,9 @@ def ArithId(node):
 
 
 def BoolId(node):
+  if node.tag == word_e.StringWord:  # for test/[
+    return node.id
+
   if node.tag == word_e.TokenWord:
     return node.token.id
 

diff --git a/osh/bool_parse.py b/osh/bool_parse.py
@@ -44,6 +44,8 @@
 
 import libc  # for regex_parse
 
+log = util.log
+
 
 class BoolParser(object):
   """Parses [[ at compile time and [ at runtime."""
@@ -133,6 +135,14 @@ def Parse(self):
       return None
     return node
 
+  def ParseForBuiltin(self, need_right_bracket):
+    """For test/[."""
+    if not self._Next(): return None
+
+    node = self.ParseExpr()
+    #log('TRAILING op_id %s', self.op_id)
+    return node
+
   def ParseExpr(self):
     """
     Iterative:

diff --git a/osh/osh.asdl b/osh/osh.asdl
@@ -96,6 +96,8 @@ module osh
     -- A BracedWordTree is a word because it can appear in a command.  It can
     -- contains any type of word_part.
   | BracedWordTree(word_part* parts)
+    -- For dynamic parsing of test/[ -- the string is already evaluated.
+  | StringWord(id id, string s)
 
   -- TODO: Might want to preserve token here.
   lhs_expr =

diff --git a/spec/dbracket.test.sh b/spec/dbracket.test.sh
@@ -126,45 +126,25 @@ hex=0x0f    # = 15 (decimal)
 # stdout-json: "true\ntrue\n"
 # OK bash/mksh stdout-json: "true\ntrue\n"
 
-### [[ compare with literal -f
+### [[ compare with literal -f (compare with test-builtin.test.sh)
 var=-f
 [[ $var == -f ]] && echo true
 [[ '-f' == $var ]] && echo true
 # stdout-json: "true\ntrue\n"
 
-### [ compare with literal -f
-# Hm this is the same
-var=-f
-[ $var == -f ] && echo true
-[ '-f' == $var ] && echo true
-# stdout-json: "true\ntrue\n"
-
-### [[ with op variable
+### [[ with op variable (compare with test-builtin.test.sh)
 # Parse error -- parsed BEFORE evaluation of vars
 op='=='
 [[ a $op a ]] && echo true
 [[ a $op b ]] || echo false
 # status: 2
 # OK mksh status: 1
 
-### [ with op variable
-# OK -- parsed AFTER evaluation of vars
-op='=='
-[ a $op a ] && echo true
-[ a $op b ] || echo false
-# status: 0
-# stdout-json: "true\nfalse\n"
-
-### [[ with unquoted empty var
+### [[ with unquoted empty var (compare with test-builtin.test.sh)
 empty=''
 [[ $empty == '' ]] && echo true
 # stdout: true
 
-### [ with unquoted empty var
-empty=''
-[ $empty == '' ] && echo true
-# status: 2
-
 ### [[ at runtime doesn't work
 dbracket=[[
 $dbracket foo == foo ]]
@@ -222,3 +202,35 @@ expr='1+2'
 ### -eq coercion produces weird results
 [[ '' -eq 0 ]] && echo true
 # stdout: true
+
+### [[ '(' ]] is treated as literal
+[[ '(' ]]
+echo status=$?
+# stdout: status=0
+
+### [[ '(' foo ]] is syntax error
+[[ '(' foo ]]
+echo status=$?
+# status: 2
+# OK mksh status: 1
+
+### empty ! is treated as literal
+[[ '!' ]]
+echo status=$?
+# stdout: status=0
+
+### [[ -z ]] is syntax error
+[[ -z ]]
+echo status=$?
+# status: 2
+# OK mksh status: 1
+
+### [[ -z '>' ]]
+[[ -z '>' ]] || echo false  # -z is operator
+# stdout: false
+
+### [[ -z '>' a ]] is syntax error
+[[ -z '>' -- ]]
+echo status=$?
+# status: 2
+# OK mksh status: 1