Permalink
Browse files

Fixes to the test / [ builtin.

I learned that the [ language isn't really specified by a grammar, as [[
is.  Instead we handle it like bash does: by splitting it into 6 cases:
expressions of length 0, 1, 2, 3, 4, and more.

The bash source implies that POSIX specifies this.  And it explains why
-a and -o are deprecated.

I'm not supporting -a as a unary operator, since it's an alias for -e.

I also added a couple test cases to dbracket.test.sh, and fixed some
syntax error handling bugs in [[.

NOTE: There's still a bug where [ abc = 'a*' ] should not support
globbing.

Addresses issue #19 and issue #26.
  • Loading branch information...
Andy Chu
Andy Chu committed Aug 26, 2017
1 parent afbfd5f commit 0f0fb35e306db12253379fb1b12a1d87c0f3d6a0
Showing with 272 additions and 69 deletions.
  1. +2 −0 core/expr_eval.py
  2. +21 −15 core/id_kind.py
  3. +122 −18 core/test_builtin.py
  4. +14 −11 osh/bool_parse.py
  5. +19 −0 spec/dbracket.test.sh
  6. +93 −24 spec/test-builtin.test.sh
  7. +1 −1 test/spec.sh
View
@@ -484,6 +484,8 @@ def Eval(self, node):
if op_id == Id.BoolUnary_f:
return stat.S_ISREG(mode)
raise NotImplementedError(op_id)
if arg_type == OperandType.Str:
if op_id == Id.BoolUnary_z:
return not bool(s)
View
@@ -369,7 +369,11 @@ def _AddKinds(spec):
# Shared between [[ and test/[.
_UNARY_STR_CHARS = 'zn' # -z -n
_UNARY_OTHER_CHARS = 'ovR'
_UNARY_PATH_CHARS = 'abcdefghLprsStuwxOGN'
# NOTE: In bash and mksh, -a is an alias for -e (exists). For example, see
# test.c in bash. That causes an ambiguity with -a as the binary "and"
# operator, so I just omit it. BoolParser can be changed to deal with the
# ambiguity later if necessary.
_UNARY_PATH_CHARS = 'bcdefghLprsStuwxOGN'
_BINARY_PATH = ['ef', 'nt', 'ot']
_BINARY_INT = ['eq', 'ne', 'gt', 'ge', 'lt', 'le']
@@ -407,7 +411,7 @@ def _AddBoolKinds(spec):
spec.AddBoolOp(Id.Redir_Great, OperandType.Str)
def SetupTestBuiltin(spec):
def SetupTestBuiltin(unary_lookup, binary_lookup, other_lookup):
"""Setup tokens for test/[.
Similar to _AddBoolKinds above. Differences:
@@ -417,26 +421,28 @@ def SetupTestBuiltin(spec):
"""
for letter in _UNARY_STR_CHARS + _UNARY_OTHER_CHARS + _UNARY_PATH_CHARS:
token_name = 'BoolUnary_%s' % letter
spec['-' + letter] = getattr(Id, token_name)
unary_lookup['-' + letter] = getattr(Id, token_name)
for s in _BINARY_PATH + _BINARY_INT:
token_name = 'BoolBinary_%s' % s
spec['-' + s] = getattr(Id, token_name)
binary_lookup['-' + s] = getattr(Id, token_name)
# Like the above, but without =~.
spec['='] = Id.BoolBinary_Equal
spec['=='] = Id.BoolBinary_DEqual
spec['!='] = Id.BoolBinary_NEqual
spec['-a'] = Id.Op_DAmp # like [[ &&
spec['-o'] = Id.Op_DPipe # like [[ ||
spec['!'] = Id.KW_Bang # like [[ !
spec['('] = Id.Op_LParen
spec[')'] = Id.Op_RParen
binary_lookup['='] = Id.BoolBinary_Equal
binary_lookup['=='] = Id.BoolBinary_DEqual
binary_lookup['!='] = Id.BoolBinary_NEqual
# Some of these names don't quite match, but it keeps the BoolParser simple.
spec['<'] = Id.Redir_Less
spec['>'] = Id.Redir_Great
binary_lookup['<'] = Id.Redir_Less
binary_lookup['>'] = Id.Redir_Great
other_lookup['-a'] = Id.Op_DAmp # like [[ &&
other_lookup['-o'] = Id.Op_DPipe # like [[ ||
other_lookup['!'] = Id.KW_Bang # like [[ !
other_lookup['('] = Id.Op_LParen
other_lookup[')'] = Id.Op_RParen
other_lookup[']'] = Id.Arith_RBracket # For closing ]
#
View
@@ -9,20 +9,23 @@
from core import expr_eval
from core import word
from core import runtime
from core.util import log, e_die
from core import util
from osh import bool_parse
from osh import ast_ as ast
Id = id_kind.Id
log = util.log
_ID_LOOKUP = {} # string -> Id
_UNARY_LOOKUP = {}
_BINARY_LOOKUP = {}
_OTHER_LOOKUP = {}
id_kind.SetupTestBuiltin(_ID_LOOKUP)
id_kind.SetupTestBuiltin(_UNARY_LOOKUP, _BINARY_LOOKUP, _OTHER_LOOKUP)
class _WordParser:
class _StringWordEmitter:
"""For test/[, we need a word parser that returns StringWord.
The BoolParser calls word.BoolId(w), and deals with Kind.BoolUnary,
@@ -43,7 +46,11 @@ def ReadWord(self, lex_mode):
s = self.argv[self.i]
self.i += 1
id_ = _ID_LOOKUP.get(s, Id.Word_Compound) # default is an operand word
# default is an operand word
id_ = (
_UNARY_LOOKUP.get(s) or _BINARY_LOOKUP.get(s) or _OTHER_LOOKUP.get(s)
or Id.Word_Compound)
return ast.StringWord(id_, s)
@@ -56,32 +63,129 @@ def EvalWordToString(self, w, do_fnmatch=False):
return runtime.Str(w.s)
def _StringWordTest(s):
# TODO: Could be Word_String
return ast.WordTest(ast.StringWord(Id.Word_Compound, s))
def _TwoArgs(argv):
"""Returns an expression tree to be evaluated."""
a0, a1 = argv
if a0 == '!':
return ast.LogicalNot(_StringWordTest(a1))
unary_id = _UNARY_LOOKUP.get(a0)
if unary_id is None:
# TODO:
# - syntax error
# - separate lookup by unary
util.p_die('Expected unary operator, got %r', a0)
child = ast.StringWord(Id.Word_Compound, a1)
return ast.BoolUnary(unary_id, child)
def _ThreeArgs(argv):
"""Returns an expression tree to be evaluated."""
a0, a1, a2 = argv
# NOTE: Order is important here.
binary_id = _BINARY_LOOKUP.get(a1)
if binary_id is not None:
left = ast.StringWord(Id.Word_Compound, a0)
right = ast.StringWord(Id.Word_Compound, a2)
return ast.BoolBinary(binary_id, left, right)
if a1 == '-a':
left = _StringWordTest(a0)
right = _StringWordTest(a2)
return ast.LogicalAnd(left, right)
if a1 == '-o':
left = _StringWordTest(a0)
right = _StringWordTest(a2)
return ast.LogicalOr(left, right)
if a0 == '!':
child = _TwoArgs(argv[1:])
return ast.LogicalNot(child)
if a0 == '(' and a2 == ')':
return _StringWordTest(a1)
util.p_die('Syntax error: binary operator expected')
def Test(argv, need_right_bracket):
"""The test/[ builtin.
The only difference between test and [ is that [ needs a matching ].
"""
w_parser = _WordParser(argv)
if need_right_bracket:
if argv[-1] != ']':
util.error('[: missing closing ]')
return 2
del argv[-1]
w_parser = _StringWordEmitter(argv)
b_parser = bool_parse.BoolParser(w_parser)
node = b_parser.ParseForBuiltin(need_right_bracket)
if node is None:
for e in b_parser.Error():
log("error %s", e)
e_die("Error parsing test/[ expression")
log('Bool expr %s', node)
# There is a fundamental ambiguity due to poor language design, in cases like:
# [ -z ]
# [ -z -a ]
# [ -z -a ] ]
#
# See posixtest() in bash's test.c:
# "This is an implementation of a Posix.2 proposal by David Korn."
# It dispatches on expressions of length 0, 1, 2, 3, 4, and N args. We do
# the same here.
#
# Another ambiguity:
# -a is both a unary prefix operator and an infix operator. How to fix this
# ambiguity?
bool_node = None
n = len(argv)
try:
if n == 0:
return 1 # [ ] is False
elif n == 1:
bool_node = _StringWordTest(argv[0])
elif n == 2:
bool_node = _TwoArgs(argv)
elif n == 3:
bool_node = _ThreeArgs(argv)
if n == 4:
a0 = argv[0]
if a0 == '!':
child = _ThreeArgs(argv[1:])
bool_node = ast.LogicalNot(child)
elif a0 == '(' and argv[3] == ')':
bool_node = _TwoArgs(argv[1:3])
else:
pass # fallthrough
if bool_node is None:
bool_node = b_parser.ParseForBuiltin()
#log('Bool expr %s', bool_node)
if bool_node is None:
for e in b_parser.Error():
log("error %s", e)
log("Error parsing test/[ expression")
return 2 # parse error is 2
except util.ParseError as e:
util.error(e.UserErrorString())
return 2
# def __init__(self, mem, exec_opts, word_ev):
# mem: Don't need it for BASH_REMATCH? Or I guess you could support it
# exec_opts: don't need it
# word_ev: don't need it
# exec_opts: don't need it, but might need it later
mem = None
exec_opts = None
word_ev = _WordEvaluator()
bool_ev = expr_eval.BoolEvaluator(mem, exec_opts, word_ev)
# TODO: Catch exceptions and turn into failure. It can't have a fatal error, like [[ ${foo?error} ]].
result = bool_ev.Eval(node)
status = 0 if result else 1
b = bool_ev.Eval(bool_node)
status = 0 if b else 1
return status
View
@@ -112,10 +112,6 @@ def _Next(self, lex_mode=LexMode.DBRACKET):
break
return True
def AtEnd(self):
#print('B_ID', IdName(self.op_id), self.cur_word)
return self.op_id == Id.Lit_DRightBracket
def _LookAhead(self):
n = len(self.words)
if n != 1:
@@ -129,18 +125,21 @@ def Parse(self):
if not self._Next(): return None
node = self.ParseExpr()
if not self.AtEnd():
if self.op_id != Id.Lit_DRightBracket:
self.AddErrorContext("Unexpected extra word %r", self.cur_word,
word=self.cur_word)
return None
return node
def ParseForBuiltin(self, need_right_bracket):
"""For test/[."""
def ParseForBuiltin(self):
"""For test builtin."""
if not self._Next(): return None
node = self.ParseExpr()
#log('TRAILING op_id %s', self.op_id)
if self.op_id != Id.Eof_Real:
self.AddErrorContext("Trailing words in test", self.cur_word)
return None
return node
def ParseExpr(self):
@@ -167,6 +166,8 @@ def ParseTerm(self):
Term : Negated (AND Term)?
"""
left = self.ParseNegatedFactor()
if not left:
return None # TODO: An exception should handle this case.
if self.op_id == Id.Op_DAmp:
if not self._Next(): return None
right = self.ParseTerm()
@@ -244,9 +245,11 @@ def ParseFactor(self):
if not self._Next(): return None
node = self.ParseExpr()
if self.op_id != Id.Op_RParen:
raise RuntimeError("Expected ), got %s", self.cur_word)
self.AddErrorContext("Expected ), got %s", self.cur_word, word=self.cur_word)
return None
if not self._Next(): return None
return node
# TODO: A proper error, e.g. for "&&"
raise AssertionError("Unexpected token: %s" % self.cur_word)
# TODO: A proper error, e.g. for [[ && ]] or [[ ]]
self.AddErrorContext("Unexpected token: %s" % self.cur_word, word=self.cur_word)
return None
View
@@ -234,3 +234,22 @@ echo status=$?
echo status=$?
# status: 2
# OK mksh status: 1
### test whether ']]' is empty
[[ ']]' ]]
echo status=$?
# status: 0
### [[ ]] is syntax error
[[ ]]
echo status=$?
# stdout-json: ""
# status: 2
# OK mksh status: 1
### [[ && ]] is syntax error
[[ && ]]
echo status=$?
# stdout-json: ""
# status: 2
# OK mksh status: 1
Oops, something went wrong.

0 comments on commit 0f0fb35

Please sign in to comment.