Permalink
Browse files

Rewrite of the entire word evaluation pipeline.

The old code was more monolithic, handling perhaps 80% of the cases
well.  Getting to 100% would be messy.

The new code is an explicit series of stages connected by ASDL data
structures, much like the parser.  It's more principled about glob
escaping.  See comments in core/runtime.asdl for a summary.

Make a distinction between EvalWordSequence and
EvalWordToString/EvalWordToAny.  The former does globbing.

Details:

- Split up WordEvaluator and WordPartEvaluator
- Get rid of value.py for the runtime.value ASDL sum type

Tests:

- Stats: 556 total / 329 osh pass / 169 osh fail
- new word-eval.test.sh, a smoke test for the whole pipeline.
- add some more tests to investigate shell behavior.
  • Loading branch information...
Andy Chu
Andy Chu committed Mar 18, 2017
1 parent fb3c648 commit c5af69702bb784953d12f7d08e647859cfbd6392
Showing with 854 additions and 697 deletions.
  1. +1 −0 asdl/py_meta.py
  2. +69 −36 core/cmd_exec.py
  3. +7 −7 core/cmd_exec_test.py
  4. +11 −18 core/completion.py
  5. +17 −40 core/expr_eval.py
  6. +32 −36 core/glob_.py
  7. +33 −18 core/runtime.asdl
  8. +0 −122 core/value.py
  9. +547 −406 core/word_eval.py
  10. +4 −4 core/word_eval_test.py
  11. +1 −0 sh_spec.py
  12. +7 −2 spec-runner.sh
  13. +11 −4 spec.sh
  14. +14 −3 tests/array.test.sh
  15. +13 −0 tests/var-op-test.test.sh
  16. +1 −1 tests/var-sub.test.sh
  17. +86 −0 tests/word-eval.test.sh
View
@@ -283,6 +283,7 @@ def MakeTypes(module, root, app_types=None):
tag_num = {}
# e.g. for arith_expr
# Should this be arith_expr_t? It is in C++.
base_class = type(defn.name, (CompoundObj, ), {})
setattr(root, defn.name, base_class)
View
@@ -72,11 +72,13 @@
FdState, Pipeline, Process,
HereDocRedirect, DescriptorRedirect, FilenameRedirect,
FuncThunk, ExternalThunk, SubProgramThunk, BuiltinThunk)
from core.value import Value
from core import runtime
from osh import ast_ as ast
command_e = ast.command_e
part_value_e = runtime.part_value_e
value_e = runtime.value_e
log = util.log
@@ -98,7 +100,7 @@ class Mem(object):
"""
def __init__(self, argv0, argv):
self.top = {} # string -> (flags, Value)
self.top = {} # string -> (flags, runtime.value)
self.var_stack = [self.top]
self.argv0 = argv0
self.argv_stack = [argv]
@@ -130,14 +132,14 @@ def SetArgv(self, argv):
def SetGlobalArray(self, name, a):
"""Helper for completion."""
assert isinstance(a, list)
val = Value.FromArray(a)
val = runtime.StrArray(a)
pairs = [(name, val)]
self.SetGlobal(pairs, 0)
def SetGlobalString(self, name, s):
"""Helper for completion."""
assert isinstance(s, str)
val = Value.FromString(s)
val = runtime.Str(s)
pairs = [(name, val)]
self.SetGlobal(pairs, 0)
@@ -151,25 +153,28 @@ def GetGlobal(self, name):
return False, None
def Get(self, name):
# TODO: Don't implement dynamic scope
for i in range(len(self.var_stack) - 1, -1, -1):
scope = self.var_stack[i]
if name in scope:
# Don't need to use flags
_, value = scope[name]
return True, value
return value
# Fall back on environment
v = os.getenv(name)
if v is not None:
return True, Value.FromString(v)
return runtime.Str(v)
return False, None
return runtime.Undef()
def SetGlobal(self, pairs, flags):
"""For completion."""
g = self.var_stack[0] # global scope
for lhs, value in pairs:
assert isinstance(value, Value), value
#log('SETTING %s -> %s', lhs, value)
assert value.tag in (value_e.Str, value_e.StrArray)
# Assuming LeftVar for now.
g[lhs.name] = flags, value
@@ -192,7 +197,7 @@ def SetLocal(self, pairs, flags):
# it to a list is en error. I guess you will have to turn this no for
# bash?
for lhs, value in pairs:
assert isinstance(value, Value), value
assert value.tag in (value_e.Str, value_e.StrArray)
# Assuming LeftVar for now.
self.top[lhs.name] = flags, value
@@ -269,7 +274,7 @@ def __init__(self, mem, builtins, funcs, comp_lookup, exec_opts,
self.exec_opts = exec_opts
self.make_parser = make_parser
self.ev = word_eval.NormalEvaluator(mem, exec_opts, self)
self.ev = word_eval.NormalWordEvaluator(mem, exec_opts, self)
self.mem.last_status = 0 # For $?
@@ -310,7 +315,7 @@ def _Read(self, argv):
if not line: # EOF
return 1
# TODO: split line and do that logic
val = Value.FromString(line.strip())
val = runtime.Str(line.strip())
pairs = [(ast.LeftVar(names[0]), val)]
self.mem.SetLocal(pairs, 0) # read always uses local variables?
return 0
@@ -545,11 +550,11 @@ def _GetProcessForNode(self, node):
"""
if node.tag == command_e.SimpleCommand:
words = braces.BraceExpandWords(node.words)
argv = self.ev.EvalWords(words)
argv = self.ev.EvalWordSequence(words)
if argv is None:
err = self.ev.Error()
raise AssertionError("Error evaluating words: %s" % err)
more_env = self.ev.EvalEnv(node.more_env)
more_env = self._EvalEnv(node.more_env)
if more_env is None:
# TODO: proper error
raise AssertionError()
@@ -595,28 +600,28 @@ def _EvalRedirects(self, node):
redir_type = REDIR_TYPE[n.op_id]
if redir_type == RedirType.Path:
# NOTE: no globbing. You can write to a file called '*.py'.
ok, val = self.ev.EvalCompoundWord(n.arg_word)
ok, val = self.ev.EvalWordToString(n.arg_word)
if not ok:
return False
is_str, filename = val.AsString()
if not is_str:
if val.tag != value_e.Str:
self._AddErrorContext("filename to redirect to should be a string")
return False
filename = val.s
if not filename:
self._AddErrorContext("filename can't be empty")
return False
redirects.append(FilenameRedirect(n.op_id, n.fd, filename))
elif redir_type == RedirType.Desc: # e.g. 1>&2
ok, val = self.ev.EvalCompoundWord(n.arg_word)
ok, val = self.ev.EvalWordToString(n.arg_word)
if not ok:
return False
is_str, t = val.AsString()
if not is_str:
if val.tag != value_e.Str:
self._AddErrorContext(
"descriptor to redirect to should be an integer, not list")
return False
t = val.s
if not t:
self._AddErrorContext("descriptor can't be empty")
return False
@@ -629,17 +634,47 @@ def _EvalRedirects(self, node):
redirects.append(DescriptorRedirect(n.op_id, n.fd, target_fd))
elif redir_type == RedirType.Str:
ok, val = self.ev.EvalCompoundWord(n.arg_word)
ok, val = self.ev.EvalWordToString(n.arg_word)
if not ok:
return False
is_str, body = val.AsString()
assert is_str, val # here doc body can only be parsed as a string!
redirects.append(HereDocRedirect(n.op_id, n.fd, body))
assert val.tag == value_e.Str, \
"descriptor to redirect to should be an integer, not list"
redirects.append(HereDocRedirect(n.op_id, n.fd, val.s))
else:
raise AssertionError
return redirects
def _EvalEnv(self, more_env):
"""Evaluate environment variable bindings.
Args:
more_env: list of ast.env_pair
Returns:
A dictionary of strings to strings
Side effect: sets local variables so bindings can reference each other.
Hm. Is this wrong?
"""
result = {}
for env_pair in more_env:
name = env_pair.name
rhs = env_pair.val
ok, val = self.ev.EvalWordToString(rhs)
if not ok:
raise AssertionError
# Set each var so the next one can reference it. Example:
# FOO=1 BAR=$FOO ls /
self.mem.SetSimpleVar(name, val)
# TODO: Need to pop bindings for simple commands. Need a stack.
result[name] = val.s
return result
def _RunPipeline(self, node):
# TODO: Also check for "echo" and "read". Turn them into HereDocRedirect()
# and p.CaptureOutput()
@@ -678,12 +713,12 @@ def _Execute(self, node):
# TODO: Only eval argv[0] once. It can have side effects!
if node.tag == command_e.SimpleCommand:
words = braces.BraceExpandWords(node.words)
argv = self.ev.EvalWords(words)
argv = self.ev.EvalWordSequence(words)
if argv is None:
self.error_stack.extend(self.ev.Error())
raise _FatalError()
more_env = self.ev.EvalEnv(node.more_env)
more_env = self._EvalEnv(node.more_env)
if more_env is None:
print(self.error_stack)
# TODO: throw exception
@@ -754,11 +789,10 @@ def _Execute(self, node):
elif node.tag == command_e.Assignment:
pairs = []
for pair in node.pairs:
# NOTE: do_glob=False, because foo=*.a makes foo equal to '*.a',
# literally.
# TODO: Also have to evaluate the right hand side.
ok, val = self.ev.EvalCompoundWord(pair.rhs)
# RHS can be a string or array.
ok, val = self.ev.EvalWordToAny(pair.rhs)
assert isinstance(val, runtime.value), val
log('RHS %s -> %s', pair.rhs, val)
if not ok:
self.error_stack.extend(self.ev.Error())
raise _FatalError()
@@ -775,13 +809,12 @@ def _Execute(self, node):
elif node.tag == command_e.ControlFlow:
if node.arg_word: # Evaluate the argument
ok, val = self.ev.EvalCompoundWord(node.arg_word)
ok, val = self.ev.EvalWordToString(node.arg_word)
if not ok:
self.error_stack.extend(self.ev.Error())
raise _FatalError()
is_str, arg_str = val.AsString()
assert is_str
arg = int(arg_str) # They all take integers
assert val.tag == value_e.Str
arg = int(val.s) # They all take integers
else:
arg = 0 # return 0, break 0 levels, etc.
@@ -831,13 +864,13 @@ def _Execute(self, node):
iter_list = self.mem.GetArgv()
else:
words = braces.BraceExpandWords(node.iter_words)
iter_list = self.ev.EvalWords(words)
iter_list = self.ev.EvalWordSequence(words)
# We need word splitting and so forth
# NOTE: This expands globs too. TODO: We should pass in a Globber()
# object.
status = 0 # in case we don't loop
for x in iter_list:
self.mem.SetSimpleVar(iter_name, Value.FromString(x))
self.mem.SetSimpleVar(iter_name, runtime.Str(x))
try:
status = self._Execute(node.body) # last one wins
View
@@ -18,7 +18,7 @@
from core.id_kind import Id
from core import ui
from core import word_eval
from core.value import Value
from core import runtime
from osh import ast_ as ast
from osh import parse_lib
@@ -52,8 +52,8 @@ def InitExecutor():
def InitEvaluator():
mem = cmd_exec.Mem('', [])
val1 = Value.FromString('xxx')
val2 = Value.FromString('yyy')
val1 = runtime.Str('xxx')
val2 = runtime.Str('yyy')
pairs = [(ast.LeftVar('x'), val1), (ast.LeftVar('y'), val2)]
mem.SetLocal(pairs, 0)
@@ -241,10 +241,10 @@ class VarOpTest(unittest.TestCase):
def testVarOps(self):
ev = InitEvaluator() # initializes x=xxx and y=yyy
unset_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'unset'))
print(ev._EvalWordPart(unset_sub))
print(ev.part_ev._EvalWordPart(unset_sub))
set_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'x'))
print(ev._EvalWordPart(set_sub))
print(ev.part_ev._EvalWordPart(set_sub))
# Now add some ops
part = ast.LiteralPart(ast.token(Id.Lit_Chars, 'default'))
@@ -253,8 +253,8 @@ def testVarOps(self):
unset_sub.suffix_op = test_op
set_sub.suffix_op = test_op
print(ev._EvalWordPart(unset_sub))
print(ev._EvalWordPart(set_sub))
print(ev.part_ev._EvalWordPart(unset_sub))
print(ev.part_ev._EvalWordPart(set_sub))
if __name__ == '__main__':
View
@@ -39,11 +39,13 @@
from osh import ast_ as ast
from osh import parse_lib
from core import runtime
from core import ui
from core import util
from core.id_kind import Id
command_e = ast.command_e
value_e = runtime.value_e
class CompletionLookup(object):
@@ -212,10 +214,10 @@ def Matches(self, words, index, prefix):
print('COMP_REPLY not defined', file=sys.stderr)
return
is_array, reply = val.AsArray()
if not is_array:
if val.tag != value_e.StrArray:
print('ERROR: COMP_REPLY should be an array, got %s', file=sys.stderr)
return
reply = val.strs
print('REPLY', reply)
#reply = ['g1', 'g2', 'h1', 'i1']
@@ -282,15 +284,11 @@ def Matches(self, words, index, prefix):
- When we get a newer timestamp, we should clear the old one.
- When PATH is changed, we can remove old entries.
"""
defined, val = self.mem.Get('PATH')
if not defined:
# No matches if not defined
return
is_str, path = val.AsString()
if not is_str:
val = self.mem.Get('PATH')
if val.tag != value_e.Str:
# No matches if not a string
return
path_dirs = path.split(':')
path_dirs = val.s.split(':')
#print(path_dirs)
names = []
@@ -445,26 +443,21 @@ def _GetCompletionType(w_parser, c_parser, ev, status_lines):
# - EvalTildeSub needs to be somewhere else
# - EvalCommandSub needs to be
#
# maybe write a version of Executor._EvalWords that doesn't do
# maybe write a version of Executor._EvalWordSequence that doesn't do
# CommandSub. Or honestly you can just reuse it for now. Can you pass
# the same cmd_exec in? What about side effects? I guess it can't
# really have any. It can only have them on the file system. Hm.
# Defining funcitons? Yeah if you complete partial functions that could
# be bad. That is, you could change the name of the function.
ifs = ''
do_glob = True # this is from the cmd_exec. $foo/bar could be split up.
# Does that matter for us? Yes it can matter if $foo is
# '-a bar'! Because then flag parsing will be different.
argv = []
for w in node.words:
ok, val = ev.EvalCompoundWord(w, ifs, do_glob)
ok, val = ev.EvalWordToAny(w)
if not ok:
# Why would it fail?
continue
is_str, s = val.AsString()
if is_str:
argv.append(s)
if val.tag == value_e.Str:
argv.append(val.s)
else:
pass
# Oh I have to handle $@ on the command line?
Oops, something went wrong.

0 comments on commit c5af697

Please sign in to comment.