View
@@ -26,11 +26,11 @@
put: an op is Add() and not Add, an instance of a class, not an integer value.
"""
import io
import sys
import pprint
from asdl import asdl_parse
asdl = asdl_parse # ALIAS for nodes
from asdl import format as fmt
from asdl import asdl_parse as asdl # ALIAS for nodes
def _CheckType(value, expected_desc):
@@ -117,34 +117,46 @@ class CompoundObj(Obj):
def __init__(self, *args, **kwargs):
# The user must specify ALL required fields or NONE.
self._assigned = {f: False for f in self.FIELDS}
self._SetDefaults()
if args or kwargs:
self._Init(args, kwargs)
else:
# Set defaults here?
pass
def __eq__(self, other):
if self.tag != other.tag:
return False
for name in self.FIELDS:
left = getattr(self, name)
right = getattr(other, name)
if left != right:
return False
return True
def _SetDefaults(self):
for name in self.FIELDS:
#print("%r wasn't assigned" % name)
desc = self.DESCRIPTOR_LOOKUP[name]
# item_desc = desc.desc
if isinstance(desc, asdl.MaybeType):
self.__setattr__(name, None) # Maybe values can be None
elif isinstance(desc, asdl.ArrayType):
self.__setattr__(name, [])
def _Init(self, args, kwargs):
for i, val in enumerate(args):
name = self.FIELDS[i]
self._assigned[name] = True
self.__setattr__(name, val)
for name, val in kwargs.items():
if self._assigned[name]:
raise AssertionError('Duplicate assignment of field %r' % name)
self._assigned[name] = True
self.__setattr__(name, val)
for name in self.FIELDS:
if not self._assigned[name]:
#print("%r wasn't assigned" % name)
desc = self.DESCRIPTOR_LOOKUP[name]
if isinstance(desc, asdl.MaybeType):
# item_desc = desc.desc
self.__setattr__(name, None) # Maybe values can be None
else:
# If anything was set, then required fields raise an error.
raise ValueError("Field %r is required and wasn't initialized" % name)
# If anything was set, then required fields raise an error.
raise ValueError("Field %r is required and wasn't initialized" % name)
def CheckUnassigned(self):
"""See if there are unassigned fields, for later encoding."""
@@ -161,7 +173,11 @@ def __setattr__(self, name, value):
if name == '_assigned':
self.__dict__[name] = value
return
desc = self.DESCRIPTOR_LOOKUP[name]
try:
desc = self.DESCRIPTOR_LOOKUP[name]
except KeyError:
raise AttributeError('Object of type %r has no attribute %r' %
(self.__class__.__name__, name))
if False: # Disable type checking for now
#if not _CheckType(value, desc):
raise AssertionError("Field %r should be of type %s, got %r" %
@@ -171,7 +187,16 @@ def __setattr__(self, name, value):
self.__dict__[name] = value
def __repr__(self):
return '<%s %s>' % (self.__class__.__name__, pprint.pformat(self.__dict__))
#import pprint
#return '<%s %s>' % (self.__class__.__name__, pprint.pformat(self.__dict__))
f = io.StringIO()
tree = fmt.MakeTree(self)
fmt.PrintTree(tree, f)
return f.getvalue()
# For backward compatibility. TODO: Get rid of this.
def DebugString(self):
return self.__repr__()
def _MakeFieldDescriptors(module, fields):
@@ -229,21 +254,31 @@ def MakeTypes(module, root):
# Set a static attribute like op_id.Plus, op_id.Minus.
setattr(cls, name, val)
else:
tag_num = {}
# e.g. for arith_expr
base_class = type(defn.name, (CompoundObj, ), {})
setattr(root, defn.name, base_class)
# Make a type for each alternative.
# Make a type and a enum tag for each alternative.
for i, cons in enumerate(typ.types):
tag = i + 1 # zero reserved?
tag_num[cons.name] = tag # for enum
class_attr = _MakeFieldDescriptors(module, cons.fields)
class_attr['DESCRIPTOR'] = cons
# TODO: Allow setting these integers. We're reusing ID 0 for every
# sum type, but that's OK because fields are strongly typed.
class_attr['tag'] = i + 1 # zero reserved?
class_attr['tag'] = tag
cls = type(cons.name, (base_class, ), class_attr)
setattr(root, cons.name, cls)
# e.g. arith_expr_e.Const == 1
enum_name = defn.name + '_e'
tag_enum = type(enum_name, (), tag_num)
setattr(root, enum_name, tag_enum)
elif isinstance(typ, asdl.Product):
class_attr = _MakeFieldDescriptors(module, typ.fields)
class_attr['DESCRIPTOR'] = typ
View
@@ -34,6 +34,11 @@ asdl-arith-encode() {
hexdump $out
}
asdl-arith-format() {
local expr="$1"
asdl/asdl_demo.py arith-format "$expr"
}
asdl-py() {
local schema=$1
asdl/asdl_demo.py py $schema
@@ -91,9 +96,9 @@ gdb-trace() {
}
build-demo() {
local name=$1
local schema=$1
local schema=asdl/${name}.asdl
local name=$(basename $schema .asdl)
# Generate C++ code
asdl-cpp $schema _tmp/${name}.asdl.h
@@ -113,15 +118,15 @@ arith-demo() {
local bin=_tmp/${name}_demo
build-demo $name $bin
build-demo asdl/arith.asdl
set -x
gdb-trace $bin $data
#$bin $data
}
osh-demo() {
build-demo osh
build-demo osh/osh.asdl
}
a2() {
View
@@ -43,6 +43,7 @@
from core.pool import Pool
from core import reader
from core.id_kind import Id
from core import word
from core import word_eval
from core import ui
from core import util
@@ -62,7 +63,7 @@ def InteractiveLoop(opts, ex, c_parser, w_parser, line_reader):
if w is None:
raise RuntimeError('Failed parse: %s' % c_parser.Error())
c_id = w.CommandId()
c_id = word.CommandId(w)
if c_id == Id.Op_Newline:
print('nothing to execute')
elif c_id == Id.Eof_Real:
@@ -75,12 +76,11 @@ def InteractiveLoop(opts, ex, c_parser, w_parser, line_reader):
# could do that in the first position?
# ParseSimpleCommand fails with '\n' token?
if not node:
# TODO: PrintError here
raise RuntimeError('failed parse: %s' % c_parser.Error())
if opts.print_ast:
node.PrintTree(sys.stdout)
sys.stdout.write('\n\n')
sys.stdout.flush()
print(node)
status, cflow = ex.ExecuteTop(node)
@@ -104,8 +104,9 @@ def Options():
"""Returns an option parser instance."""
p = optparse.OptionParser()
# NOTE: default command is None because empty string is valid.
p.add_option(
'-c', dest='command', default='',
'-c', dest='command', default=None,
help='Shell command to run')
p.add_option(
'-i', dest='interactive', default=False, action='store_true',
@@ -204,7 +205,7 @@ def OshMain(argv):
if e.errno != errno.ENOENT:
raise
if opts.command:
if opts.command is not None:
pool.AddSourcePath('<-c arg>')
line_reader = reader.StringLineReader(opts.command, pool=pool)
interactive = False
@@ -259,9 +260,7 @@ def OshMain(argv):
return 2 # parse error is code 2
if opts.print_ast:
node.PrintTree(sys.stdout)
sys.stdout.write('\n\n')
sys.stdout.flush()
print(node)
if opts.do_exec:
status, cflow = ex.Execute(node)
View
@@ -76,50 +76,15 @@ def MakeError(msg, *args, token=None, word=None):
if token:
near_token = token
elif word:
begin, end = word.TokenPair()
near_token = begin # for now
from core.word import ParseErrorLocation
near_token = ParseErrorLocation(word)
#print('NEAR TOKEN', near_token)
# TODO: Change this to LocationPair()? It could be a single location or
# multiple locations? Put it in word.py? Or somewhere else? I think you
# implement runtime errors in addition to parse time errors first.
else:
near_token = None
return (near_token, msg)
class _Node(object):
def __init__(self, id):
self.id = id # type: Id
def PrintTree(self, f, indent=0):
"""Print to a tree.
Default is to print on a single line.
() : Node
{} : Word
[] : WordPart
<> : Token
_BNode is contained within (). So maybe the parens should be {}.
_ExprNode is contained within []. So maybe the parens should be <>.
There might be confusion between tokens?
Also anode has full on words, lke ${}. A WordPart contains _ExprNode which
contains full words.
"""
f.write(indent * ' ')
self.PrintLine(f)
def PrintLine(self, f):
"""Print to a string."""
raise NotImplementedError
def DebugString(self):
f = io.StringIO()
self.PrintTree(f)
return f.getvalue()
def __repr__(self):
# repr() always prints as a single line
f = io.StringIO()
self.PrintLine(f)
return f.getvalue()
View
@@ -66,15 +66,18 @@
from core import util
from core.builtin import EBuiltin
from core.cmd_node import ListNode
from core.id_kind import Id, RedirType, REDIR_TYPE
from core.process import (
FdState, Pipeline, Process,
HereDocRedirect, DescriptorRedirect, FilenameRedirect,
FuncThunk, ExternalThunk, SubProgramThunk, BuiltinThunk)
from core.word_node import EAssignScope
from core.value import Value
from osh import ast
assign_scope_e = ast.assign_scope
command_e = ast.command_e
log = util.log
@@ -503,7 +506,7 @@ def _GetProcessForNode(self, node):
"""
Assume we will run the node in another process. Return a process.
"""
if node.id == Id.Node_Command:
if node.tag == command_e.SimpleCommand:
argv = self.ev.EvalWords(node.words)
if argv is None:
err = self.ev.Error()
@@ -534,7 +537,7 @@ def _EvalRedirects(self, nodes):
"""
redirects = []
for n in nodes:
redir_type = REDIR_TYPE[n.id]
redir_type = REDIR_TYPE[n.op_id]
if redir_type == RedirType.Path:
# NOTE: no globbing. You can write to a file called '*.py'.
ok, val = self.ev.EvalCompoundWord(n.arg_word)
@@ -548,7 +551,7 @@ def _EvalRedirects(self, nodes):
self._AddErrorContext("filename can't be empty")
return False
redirects.append(FilenameRedirect(n.id, n.fd, filename))
redirects.append(FilenameRedirect(n.op_id, n.fd, filename))
elif redir_type == RedirType.Desc: # e.g. 1>&2
ok, val = self.ev.EvalCompoundWord(n.arg_word)
@@ -568,15 +571,15 @@ def _EvalRedirects(self, nodes):
self._AddErrorContext(
"descriptor to redirect to should be an integer, not string")
return False
redirects.append(DescriptorRedirect(n.id, n.fd, target_fd))
redirects.append(DescriptorRedirect(n.op_id, n.fd, target_fd))
elif redir_type == RedirType.Str:
ok, val = self.ev.EvalCompoundWord(n.arg_word)
if not ok:
return False
is_str, body = val.AsString()
assert is_str, val # here doc body can only be parsed as a string!
redirects.append(HereDocRedirect(n.id, n.fd, body))
redirects.append(HereDocRedirect(n.op_id, n.fd, body))
else:
raise AssertionError
@@ -628,7 +631,13 @@ def Execute(self, node):
Args:
node: of type AstNode
"""
redirects = self._EvalRedirects(node.redirects)
# No redirects
if node.tag in (
command_e.NoOp, command_e.Assignment, command_e.Pipeline,
command_e.AndOr, command_e.Fork, command_e.CommandList):
redirects = []
else:
redirects = self._EvalRedirects(node.redirects)
# TODO: Change this to its own enum?
# or add EBuiltin.THROW _throw? For testing.
@@ -640,8 +649,7 @@ def Execute(self, node):
cflow = EBuiltin.NONE
# TODO: Only eval argv[0] once. It can have side effects!
if node.id == Id.Node_Command:
if node.tag == command_e.SimpleCommand:
argv = self.ev.EvalWords(node.words)
if argv is None:
err = self.ev.Error()
@@ -684,23 +692,23 @@ def Execute(self, node):
else:
self.fd_state.ForgetAll()
elif node.id == Id.Op_Pipe:
elif node.tag == command_e.Pipeline:
status, cflow = self._RunPipeline(node)
elif node.id == Id.Node_Subshell:
elif node.tag == command_e.Subshell:
# This makes sure we don't waste a process if we'd launch one anyway.
p = self._GetProcessForNode(node.children[0])
status = p.Run()
elif node.id == Id.KW_DLeftBracket:
elif node.tag == command_e.DBracket:
bool_ev = expr_eval.BoolEvaluator(self.mem, self.ev)
ok = bool_ev.Eval(node.bnode)
ok = bool_ev.Eval(node.expr)
if ok:
status = 0 if bool_ev.Result() else 1
else:
raise AssertionError('Error evaluating boolean: %s' % bool_ev.Error())
elif node.id == Id.Op_DLeftParen:
elif node.tag == command_e.DParen:
arith_ev = expr_eval.ArithEvaluator(self.mem, self.ev)
ok = arith_ev.Eval(node.anode)
if ok:
@@ -711,7 +719,7 @@ def Execute(self, node):
else:
raise AssertionError('Error evaluating (( )): %s' % arith_ev.Error())
elif node.id == Id.Node_Assign:
elif node.tag == command_e.Assignment:
# TODO: Respect flags: readonly, export, sametype, etc.
# Just pass the Value
pairs = []
@@ -723,38 +731,38 @@ def Execute(self, node):
return None
pairs.append((name, val))
if node.scope == EAssignScope.LOCAL:
if node.scope == assign_scope_e.Local:
self.mem.SetLocal(pairs, node.flags)
elif node.scope == EAssignScope.GLOBAL:
elif node.scope == assign_scope_e.Global:
self.mem.SetGlobal(pairs, node.flags)
else:
raise AssertionError(node.scope)
# TODO: This should be eval of RHS, unlike bash!
status = 0
elif node.id == Id.Op_Semi:
elif node.tag == command_e.CommandList:
status = 0 # for empty list
for child in node.children:
status, cflow = self.Execute(child) # last status wins
if cflow in (EBuiltin.BREAK, EBuiltin.CONTINUE):
break
elif node.id == Id.Node_AndOr:
elif node.tag == command_e.AndOr:
#print(node.children)
left, right = node.children
status, cflow = self.Execute(left)
if node.op == Id.Op_DPipe:
if node.op_id == Id.Op_DPipe:
if status != 0:
status, cflow = self.Execute(right)
elif node.op == Id.Op_DAmp:
elif node.op_id == Id.Op_DAmp:
if status == 0:
status, cflow = self.Execute(right)
else:
raise AssertionError
elif node.id == Id.KW_While:
elif node.tag == command_e.While:
cond, action = node.children
while True:
@@ -768,7 +776,7 @@ def Execute(self, node):
if cflow == EBuiltin.CONTINUE:
cflow = EBuiltin.NONE # reset since we respected it
elif node.id == Id.Node_ForEach:
elif node.tag == command_e.ForEach:
iter_name = node.iter_name
if node.do_arg_iter:
iter_list = self.mem.GetArgv()
@@ -793,11 +801,11 @@ def Execute(self, node):
if cflow == EBuiltin.CONTINUE:
cflow = EBuiltin.NONE # reset since we respected it
elif node.id == Id.Node_FuncDef:
elif node.tag == command_e.FuncDef:
self.funcs[node.name] = node
status = 0
elif node.id == Id.KW_If:
elif node.tag == command_e.If:
i = 0
while i < len(node.children):
cond = node.children[i]
@@ -808,14 +816,14 @@ def Execute(self, node):
break
i += 2
elif node.id == Id.Node_NoOp:
elif node.tag == command_e.NoOp:
status = 0 # make it true
elif node.id == Id.KW_Case:
elif node.tag == command_e.Case:
raise NotImplementedError
else:
raise AssertionError(node.id)
raise AssertionError(node.tag)
if self.exec_opts.errexit:
if status != 0:
View
@@ -17,14 +17,12 @@
from core.cmd_exec import *
from core.id_kind import Id
from core import ui
from core.word_node import LiteralPart, CompoundWord, VarSubPart, VarOp1
from core import word_eval
from core.value import Value
from core.cmd_node import SimpleCommandNode
from core.lexer import Token
from osh import ast
from osh import parse_lib
from osh.word_parse import CompoundWord, LiteralPart
def banner(msg):
@@ -108,21 +106,21 @@ def testPipeline2(self):
ex = InitExecutor()
# Simulating subshell for each command
w1 = CompoundWord()
w1.parts.append(LiteralPart(Token(Id.Lit_Chars, 'ls')))
node1 = SimpleCommandNode()
w1 = ast.CompoundWord()
w1.parts.append(ast.LiteralPart(Token(Id.Lit_Chars, 'ls')))
node1 = ast.SimpleCommand()
node1.words = [w1]
w2 = CompoundWord()
w2.parts.append(LiteralPart(Token(Id.Lit_Chars, 'head')))
node2 = SimpleCommandNode()
w2 = ast.CompoundWord()
w2.parts.append(ast.LiteralPart(Token(Id.Lit_Chars, 'head')))
node2 = ast.SimpleCommand()
node2.words = [w2]
w3 = CompoundWord()
w3.parts.append(LiteralPart(Token(Id.Lit_Chars, 'sort')))
w4 = CompoundWord()
w4.parts.append(LiteralPart(Token(Id.Lit_Chars, '--reverse')))
node3 = SimpleCommandNode()
w3 = ast.CompoundWord()
w3.parts.append(ast.LiteralPart(Token(Id.Lit_Chars, 'sort')))
w4 = ast.CompoundWord()
w4.parts.append(ast.LiteralPart(Token(Id.Lit_Chars, '--reverse')))
node3 = ast.SimpleCommand()
node3.words = [w3, w4]
p = Pipeline()
@@ -244,17 +242,17 @@ class VarOpTest(unittest.TestCase):
def testVarOps(self):
ev = InitEvaluator() # initializes x=xxx and y=yyy
unset_sub = VarSubPart('unset')
unset_sub = ast.VarSubPart('unset')
print(ev.EvalVarSub(unset_sub))
set_sub = VarSubPart('x')
set_sub = ast.VarSubPart('x')
print(ev.EvalVarSub(set_sub))
part = LiteralPart(Token(Id.Lit_Chars, 'default'))
arg_word = CompoundWord(parts=[part])
test_op = VarOp1(Id.VTest_ColonHyphen, arg_word)
unset_sub.test_op = test_op
set_sub.test_op = test_op
part = ast.LiteralPart(Token(Id.Lit_Chars, 'default'))
arg_word = ast.CompoundWord([part])
test_op = ast.StringUnary(Id.VTest_ColonHyphen, arg_word)
unset_sub.suffix_op = test_op
set_sub.suffix_op = test_op
print(ev.EvalVarSub(unset_sub))
print(ev.EvalVarSub(set_sub))
View

This file was deleted.

Oops, something went wrong.
View
@@ -37,11 +37,14 @@
import time
import traceback
from osh import ast
from osh import parse_lib
from core import ui
from core import util
from core.id_kind import Id
command_e = ast.command_e
class CompletionLookup(object):
"""
@@ -373,7 +376,7 @@ def _FindLastSimpleCommand(node):
ls | wc -l
test -f foo && hello
"""
if node.id == Id.Node_Command:
if node.tag == command_e.SimpleCommand:
return node
assert hasattr(node, 'children'), node
@@ -434,7 +437,7 @@ def _GetCompletionType(w_parser, c_parser, ev, status_lines):
com_node = None
if node:
# These 4 should all parse
if node.id == Id.Node_Command:
if node.tag == command_e.SimpleCommand:
# NOTE: prev_token can be ;, then complete a new one
#print('WORDS', node.words)
# TODO:
@@ -469,11 +472,11 @@ def _GetCompletionType(w_parser, c_parser, ev, status_lines):
print(argv)
com_node = node
elif node.id == Id.Op_Semi: # echo a; echo b
elif node.tag == command_e.CommandList: # echo a; echo b
com_node = _FindLastSimpleCommand(node)
elif node.id == Id.Node_AndOr: # echo a && echo b
elif node.tag == command_e.AndOr: # echo a && echo b
com_node = _FindLastSimpleCommand(node)
elif node.id == Id.Op_Pipe: # echo a | wc -l
elif node.tag == command_e.Pipeline : # echo a | wc -l
com_node = _FindLastSimpleCommand(node)
else:
# Return NONE? Not handling it for now
@@ -493,7 +496,7 @@ def _GetCompletionType(w_parser, c_parser, ev, status_lines):
# This one can be multiple lines
s3.Write('node: %s %s',
node.DebugString() if node else '<Parse Error>',
node.id if node else '')
node.tag if node else '')
# This one can be multiple lines
s6.Write('com_node: %s', com_node.DebugString() if com_node else '<None>')
View
@@ -13,17 +13,17 @@
from core import cmd_exec
from core import lexer
from core import cmd_node
from core.word_node import CompoundWord, LiteralPart, ArrayLiteralPart
from core import word_eval
from core.word_node import EAssignScope
from core import ui
from core import cmd_exec_test
from core import completion # module under test
from core.id_kind import Id
from osh import ast
from osh import parse_lib
assign_scope_e = ast.assign_scope
A1 = completion.WordsAction(['foo.py', 'foo', 'bar.py'])
@@ -60,22 +60,22 @@ def testExternalCommandAction(self):
def testShellFuncExecution(self):
ex = cmd_exec_test.InitExecutor()
func_node = cmd_node.FunctionDefNode()
func_node = ast.FuncDef()
# Set global COMPREPLY=(f1 f2)
body_node = cmd_node.AssignmentNode(EAssignScope.GLOBAL, 0)
body_node = ast.Assignment(assign_scope_e.Global, 0)
c1 = CompoundWord()
c1 = ast.CompoundWord()
t1 = lexer.Token(Id.Lit_Chars, 'f1')
c1.parts.append(LiteralPart(t1))
c1.parts.append(ast.LiteralPart(t1))
c2 = CompoundWord()
c2 = ast.CompoundWord()
t2 = lexer.Token(Id.Lit_Chars, 'f2')
c2.parts.append(LiteralPart(t2))
c2.parts.append(ast.LiteralPart(t2))
a = ArrayLiteralPart()
a = ast.ArrayLiteralPart()
a.words = [c1, c2]
w = CompoundWord()
w = ast.CompoundWord()
w.parts.append(a)
body_node.bindings = [('COMPREPLY', w)]
View
@@ -16,12 +16,17 @@
except ImportError:
from core import fake_libc as libc
from core.expr_node import _ExprNode, TernaryExprNode
from core.id_kind import BOOL_OPS, OperandType, Id, IdName
from core.util import cast
from core.util import log
from core.value import TValue
from osh import ast
arith_expr_e = ast.arith_expr_e
bool_expr_e = ast.bool_expr_e # used for dispatch
word_e = ast.word_e
#from core import word_eval
@@ -54,7 +59,7 @@ def Error(self):
def Result(self):
return self.result
def Eval(self, node: _ExprNode):
def Eval(self, node):
try:
result = self._Eval(node)
except ExprEvalError as e:
@@ -139,7 +144,7 @@ def _ValToInteger(self, val):
return False, 0
return True, integer
def _Eval(self, node: _ExprNode):
def _Eval(self, node):
"""
Args:
node: _ExprNode
@@ -154,8 +159,9 @@ def _Eval(self, node: _ExprNode):
# NOTE: Variable NAMES cannot be formed dynamically; but INTEGERS can.
# ${foo:-3}4 is OK. $? will be a compound word too, so we don't have to
# handle that as a special case.
if node.id == Id.Node_ArithVar:
defined, val = self.mem.Get(node.var_name)
#if node.id == Id.Node_ArithVar:
if node.tag == arith_expr_e.RightVar:
defined, val = self.mem.Get(node.name)
# By default, undefined variables are the ZERO value. TODO: Respect
# nounset and raise an exception.
if not defined:
@@ -167,7 +173,7 @@ def _Eval(self, node: _ExprNode):
else:
raise ExprEvalError()
elif node.id == Id.Word_Compound: # constant string
elif node.tag == arith_expr_e.ArithWord: # constant string
ok, val = self.word_ev.EvalCompoundWord(node, elide_empty=False)
if not ok:
raise ExprEvalError(self.word_ev.Error())
@@ -178,7 +184,8 @@ def _Eval(self, node: _ExprNode):
else:
raise ExprEvalError()
elif node.id == Id.Node_UnaryExpr:
#elif node.id == Id.Node_UnaryExpr:
elif node.tag == arith_expr_e.ArithUnary:
atype = node.op_id
# TODO: Should we come up with a kind/arity??
@@ -188,20 +195,19 @@ def _Eval(self, node: _ExprNode):
elif atype == Id.Node_UnaryMinus:
return -self._Eval(node.child)
elif node.id == Id.Node_TernaryExpr:
if node.op_id == Id.Arith_QMark:
node = cast(TernaryExprNode, node)
#elif node.id == Id.Node_TernaryExpr:
elif node.tag == arith_expr_e.TernaryOp:
node = cast(ast.TernaryOp, node)
lhs = self._Eval(node.cond)
if lhs != 0:
ret = self._Eval(node.true_expr)
else:
ret = self._Eval(node.false_expr)
return ret
lhs = self._Eval(node.cond)
if lhs != 0:
ret = self._Eval(node.true_expr)
else:
raise ExprEvalError("%s not implemented" % IdName(node.op_id))
ret = self._Eval(node.false_expr)
return ret
elif node.id == Id.Node_BinaryExpr:
#elif node.id == Id.Node_BinaryExpr:
elif node.tag == arith_expr_e.ArithBinary:
# TODO: Do type check at PARSE TIME, where applicable
lhs = self._Eval(node.left)
rhs = self._Eval(node.right)
@@ -275,18 +281,33 @@ def _EvalCompoundWord(self, word, do_glob=False):
return s
def _Eval(self, node):
# TODO: Switch on node.tag.
if node.id == Id.Word_Compound:
#print('!!', node.tag)
# TODO: word_e.CompoundWord overlaps with other values! Make them all distin
# Use WordTest.
if isinstance(node, ast.CompoundWord) and node.tag == word_e.CompoundWord:
s = self._EvalCompoundWord(node)
return bool(s)
if node.id == Id.Node_UnaryExpr:
op_id = node.op_id
if op_id == Id.KW_Bang:
# child could either be a Word, or it could be a BNode
b = self._Eval(node.child)
return not b
if node.tag == bool_expr_e.LogicalNot:
b = self._Eval(node.child)
return not b
if node.tag == bool_expr_e.LogicalAnd:
# Short-circuit evaluation
if self._Eval(node.left):
return self._Eval(node.right)
else:
return False
if node.tag == bool_expr_e.LogicalOr:
if self._Eval(node.left):
return True
else:
return self._Eval(node.right)
if node.tag == bool_expr_e.BoolUnary:
op_id = node.op_id
s = self._EvalCompoundWord(node.child)
# Now dispatch on arg type
@@ -312,22 +333,10 @@ def _Eval(self, node):
raise NotImplementedError(arg_type)
if node.id == Id.Node_BinaryExpr:
#if node.id == Id.Node_BinaryExpr:
if node.tag == bool_expr_e.BoolBinary:
op_id = node.op_id
# Short-circuit evaluation
if op_id == Id.Op_DAmp:
if self._Eval(node.left):
return self._Eval(node.right)
else:
return False
if op_id == Id.Op_DPipe:
if self._Eval(node.left):
return True
else:
return self._Eval(node.right)
s1 = self._EvalCompoundWord(node.left)
# Whehter to glob escape
do_glob = op_id in (
View

This file was deleted.

Oops, something went wrong.
View
@@ -65,16 +65,7 @@ def MakeLookup(p):
lookup2 = MakeLookup(id_kind.ID_SPEC.LexerPairs(Kind.BoolBinary))
self.assertEqual(Id.BoolBinary_eq, lookup2['-eq'])
def PrintBoolTable():
for i, arg_type in id_kind.BOOL_OPS.items():
row = (id_kind.IdName(i), arg_type)
print('\t'.join(str(c) for c in row))
if __name__ == '__main__':
import sys
if len(sys.argv) > 1 and sys.argv[1] == 'stats':
def testPrintStats(self):
k = id_kind._kind_sizes
print('STATS: %d tokens in %d groups: %s' % (sum(k), len(k), k))
# Thinking about switching
@@ -83,5 +74,12 @@ def PrintBoolTable():
PrintBoolTable()
else:
unittest.main()
def PrintBoolTable():
for i, arg_type in id_kind.BOOL_OPS.items():
row = (id_kind.IdName(i), arg_type)
print('\t'.join(str(c) for c in row))
if __name__ == '__main__':
unittest.main()
View
@@ -189,6 +189,7 @@ def _Read(self, lex_mode):
if line is None: # no more lines
t = Token(Id.Eof_Real, '')
# No line number. I guess we are showing the last line of the file.
# TODO: Could keep track of previous position for this case?
t.pool_index = self.pool_index - 1
t.col = 0
t.length = 0
View
@@ -19,7 +19,6 @@
from osh.word_parse import *
from osh.cmd_parse import CommandParser
from core.cmd_node import *
from core import cmd_exec_test # for InitExecutor. TODO: testutil?
from core import util
@@ -32,56 +31,6 @@
#util.WrapMethods(Lexer, state)
class PrinterTest(unittest.TestCase):
def testWordParts(self):
# Tokens use <> ?
t1 = Token(Id.Lit_Chars, 'echo')
t2 = Token(Id.Op_Newline, '\n')
print(t1)
print(t2)
# Word parts use {}
l1 = LiteralPart(t1)
print(l1)
l2 = LiteralPart(t2)
print(l2)
l3 = LiteralPart(Token(Id.Lit_Chars, 'foo'))
print(l3)
l4 = LiteralPart(Token(Id.Lit_LBrace, '{'))
print(l4)
command_list = SimpleCommandNode()
command_list.words = [l1, l3]
t = Token(Id.Left_CommandSub, '$(')
cs_part = CommandSubPart(t, command_list)
print(cs_part)
vs_part = VarSubPart('foo')
print(vs_part)
# A part that contains other parts
dq = DoubleQuotedPart()
dq.parts.append(l1)
dq.parts.append(cs_part)
print(dq)
# Word
cw = CompoundWord()
cw.parts = [l1, dq]
print(cw)
tw = TokenWord(t2)
print(tw)
class LineReaderTest(unittest.TestCase):
def testGetLine(self):
View
@@ -4,13 +4,18 @@
"""
from core import base
from core.expr_node import UnaryExprNode, BinaryExprNode, VarExprNode
from core.id_kind import Id, IdName
from core.util import cast
from core import word
from osh import ast
from osh.lex import LexMode
arith_expr_e = ast.arith_expr_e
word_e = ast.word_e
class ParseError(Exception):
pass
@@ -32,7 +37,10 @@ def IsCallable(node):
# f(x), or f[1](x)
# I guess function calls can be callable? Return a function later. Not
# sure. Python allows f(3)(4).
return node.op_id in (Id.Node_ArithVar, Id.Arith_LBracket)
if node.tag == arith_expr_e.RightVar:
return True
if node.tag == arith_expr_e.ArithBinary:
return node.op_id == Id.Arith_LBracket
def IsIndexable(node):
@@ -42,17 +50,23 @@ def IsIndexable(node):
node: ExprNode
"""
# f[1], or f(x)[1], or f[1][1]
return node.op_id in (Id.Node_ArithVar, Id.Arith_LBracket, Id.Node_FuncCall)
if node.tag == arith_expr_e.RightVar:
return True
if node.tag == arith_expr_e.ArithBinary:
return node.op_id in (Id.Arith_LBracket, Id.Node_FuncCall)
def IsLValue(node):
"""Determine if a node is a valid L-value by whitelisting Ids.
"""Determine if a node is a valid L-value by whitelisting tags.
Args:
node: ExprNode (could be VarExprNode or BinaryExprNode)
"""
# foo = bar, foo[1] = bar
return node.op_id in (Id.Node_ArithVar, Id.Arith_LBracket)
if node.tag == arith_expr_e.RightVar:
return True
if node.tag == arith_expr_e.ArithBinary:
return node.op_id == Id.Arith_LBracket
#
@@ -65,13 +79,13 @@ def NullError(p, t, bp):
raise ParseError("Token %s can't be used in prefix position" % t)
def NullConstant(p, word, bp):
def NullConstant(p, w, bp):
# The word itself is a node
if word.id == Id.Word_Compound:
var_name = word.AsArithVarName()
if w.tag == word_e.CompoundWord:
var_name = word.AsArithVarName(w)
if var_name:
return VarExprNode(var_name)
return word
return ast.RightVar(var_name)
return w
def NullParen(p, t, bp):
@@ -81,7 +95,7 @@ def NullParen(p, t, bp):
return r
def NullPrefixOp(p, t, bp):
def NullPrefixOp(p, w, bp):
"""Prefix operator.
Low precedence: return, raise, etc.
@@ -91,7 +105,7 @@ def NullPrefixOp(p, t, bp):
!x && y is (!x) && y, not !(x && y)
"""
right = p.ParseUntil(bp)
return UnaryExprNode(t.ArithId(), right)
return ast.ArithUnary(word.ArithId(w), right)
#
@@ -103,18 +117,18 @@ def LeftError(p, t, left, rbp):
raise ParseError("Token %s can't be used in infix position" % t)
def LeftBinaryOp(p, t, left, rbp):
def LeftBinaryOp(p, w, left, rbp):
""" Normal binary operator like 1+2 or 2*3, etc. """
return BinaryExprNode(t.ArithId(), left, p.ParseUntil(rbp))
return ast.ArithBinary(word.ArithId(w), left, p.ParseUntil(rbp))
def LeftAssign(p, t, left, rbp):
def LeftAssign(p, w, left, rbp):
""" Normal binary operator like 1+2 or 2*3, etc. """
# x += 1, or a[i] += 1
if not IsLValue(left):
raise ParseError("Can't assign to %r (%s)" % (left, IdName(left.id)))
return BinaryExprNode(t.ArithId(), left, p.ParseUntil(rbp))
return ast.ArithAssign(word.ArithId(w), left, p.ParseUntil(rbp))
#
@@ -236,7 +250,7 @@ def Next(self):
word=self.cur_word)
#return False
raise ParseError() # use exceptions for now
self.op_id = self.cur_word.ArithId()
self.op_id = word.ArithId(self.cur_word)
return True
def ParseUntil(self, rbp):
@@ -251,13 +265,13 @@ def ParseUntil(self, rbp):
t = self.cur_word
self.Next() # skip over the token, e.g. ! ~ + -
null_info = self.spec.LookupNud(t.ArithId())
null_info = self.spec.LookupNud(word.ArithId(t))
node = null_info.nud(self, t, null_info.bp)
while True:
t = self.cur_word
try:
left_info = self._Led(t.ArithId())
left_info = self._Led(word.ArithId(t))
except KeyError:
raise ParseError('Invalid token %s' % t)
View
@@ -13,23 +13,11 @@
import re
from core import id_kind
from core.base import _Node
class Token(_Node):
# Should a token really be a PAIR of source locations? They must all be in
# the same file? The "len" is basically "Compression".
# Or do we need a flat list of tokens and use token IDs?
# But they might not be on the same line.
SCHEMA = """
record Token extends Node {
loc Ref<SourceLocation>
val Str
}
"""
class Token:
def __init__(self, id, val):
_Node.__init__(self, id)
self.id = id
self.val = val
# In C++, instead of val, it will be this triple. Change it to Val()
@@ -52,17 +40,8 @@ def __eq__(self, other): # for unit tests
return self.id == other.id and self.val == other.val
def __repr__(self):
return '<%s %s>' % (id_kind.IdName(self.id), EncodeTokenVal(self.val))
def Val(self, pool):
"""Given a pool of lines, return the value of this token.
NOTE: Not used right now because we haven't threaded 'pool' through
everything.
"""
line = pool.GetLine(self.pool_index)
c = self.col
return line[c : c+self.length]
#pos = '(%s %s %s)' % (self.pool_index, self.col, self.length)
return ('<%s %s>' % (id_kind.IdName(self.id), EncodeTokenVal(self.val))) #+pos
def Kind(self):
return id_kind.LookupKind(self.id)
@@ -78,3 +57,41 @@ def EncodeTokenVal(s):
return s
else:
return json.dumps(s)
class LineSpan:
"""A part of a line associated with a token.
Location information for parse errors.
TODO: Attach to VarSubPart for beginning. And maybe end?
# What about command sub part? and arith sub? Maybe just the beginning.
TokenWord only needs the ID. It doesn't need any value.
How to encode these as runtime errors?
- Instead of pool_index, you get a string. Or you can keep pool index if you
save the whole thing.
- And you add a string path somewhere.
Line span can be passed a pool to get the value. Combine with other line
spans to get a SourceRange? SourceRange is serialized for runtime errors?
Parse errors generally occur at one location.
What about type errors? Sometimes you have two locations.
"""
def __init__(self, pool_index=-1, col=-1, length=-1):
self.pool_index = pool_index
self.col = col # zero-indexed
self.length = length
def Val(self, pool):
"""Given a pool of lines, return the value of this token.
NOTE: Not used right now because we haven't threaded 'pool' through
everything.
"""
line = pool.GetLine(self.pool_index)
c = self.col
return line[c : c+self.length]
View
@@ -87,6 +87,22 @@ def MakeStatusLines():
def PrintError(error_stack, pool, f):
# TODO:
# - rename to PrintParseError()
# - although parse errors happen at runtime because of 'source'
# - should there be a distinction then?
# - Write unit tests
# - Change error stack to have LineSpan? No it should be kind of "raw"
# - maybe it should be:
# - MakeError
#
# ParseError(
# msg
# (args,)
# near_token
# near_word
# no near node for now...
for token, msg in error_stack:
if token:
#print(token)
View

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -13,13 +13,12 @@
from core import libc
except ImportError:
from core import fake_libc as libc
from core.word_node import (
CompoundWord,
ArrayLiteralPart, LiteralPart, EscapedLiteralPart, SingleQuotedPart,
DoubleQuotedPart, CommandSubPart, VarSubPart, TildeSubPart, ArithSubPart)
from core.id_kind import Id, Kind, IdName, LookupKind
from core.value import Value
from core.util import cast
from osh import ast
bracket_op_e = ast.bracket_op_e
# Glob Helpers for WordParts.
@@ -168,6 +167,42 @@ def _AppendArray(strs, array, glob_escape=False):
return empty
word_part_e = ast.word_part_e
def _GlobsAreExpanded(p):
"""
Are globs expanded at the top level? Yes for LiteralPart, and all the var
sub parts.
No for TildeSubPart, and all the quoted parts.
"""
return p.tag in (
word_part_e.LiteralPart, word_part_e.CommandSubPart,
word_part_e.VarSubPart)
"""
return p.tag in (
word_part_e.LiteralPart, word_part_e.CommandSubPart,
word_part_e.VarSubPart)
"""
# TODO: Turn this into an ASDL predicate? Fast table lookup for C++.
def _IsSubst(p):
"""
Returns:
Is the part an substitution? (If called
This is used:
1) To determine whether result of evaluation of the part should be split
in a unquoted context.
2) To determine whether an empty string can be elided.
3) To do globbing. If we are NOT in a substitution or literal.
"""
return p.tag in (
word_part_e.CommandSubPart, word_part_e.VarSubPart,
word_part_e.ArithSubPart)
class _Evaluator(object):
"""Abstract base class."""
@@ -314,30 +349,34 @@ def EvalVarSub(self, part, quoted=False):
index_error = False # test_op can suppress this
if defined and part.bracket_op:
id = part.bracket_op.id
if part.bracket_op.tag == bracket_op_e.WholeArray:
op_id = part.bracket_op.op_id
# TODO: Change this to array_op instead of bracket_op?
if id == Id.Lit_At:
if val.IsArray():
array_ok = True
else:
self._AddErrorContext("Can't index non-array with @")
return False, None
# TODO: Change this to array_op instead of bracket_op?
if op_id == Id.Lit_At:
if val.IsArray():
array_ok = True
else:
self._AddErrorContext("Can't index non-array with @")
return False, None
elif op_id == Id.Arith_Star:
if val.IsArray():
array_ok = True
else:
self._AddErrorContext("Can't index non-array with *")
return False, None
elif id == Id.Arith_Star:
if val.IsArray():
array_ok = True
else:
self._AddErrorContext("Can't index non-array with *")
return False, None
raise AssertionError(op_id)
elif id == Id.VOp2_LBracket:
elif part.bracket_op.tag == bracket_op_e.ArrayIndex:
array_ok = True
is_array, a = val.AsArray()
if is_array:
anode = part.bracket_op.arg_word
anode = part.bracket_op.expr
# TODO: This should propagate errors
arith_ev = expr_eval.ArithEvaluator(self)
arith_ev = expr_eval.ArithEvaluator(self.mem, self)
ok = arith_ev.Eval(anode)
if not ok:
self._AddErrorContext(
@@ -355,8 +394,9 @@ def EvalVarSub(self, part, quoted=False):
else: # it's a string
raise NotImplementedError("String indexing not implemented")
else:
raise AssertionError(id)
raise AssertionError(part.bracket_op.tag)
if defined and val.IsArray():
if not array_ok:
@@ -368,11 +408,11 @@ def EvalVarSub(self, part, quoted=False):
# if the op does NOT have colon
#use_default = not defined
if part.suffix_op and LookupKind(part.suffix_op.id) == Kind.VTest:
if part.suffix_op and LookupKind(part.suffix_op.op_id) == Kind.VTest:
op = part.suffix_op
# TODO: Change this to a bit test.
if op.id in (
if op.op_id in (
Id.VTest_ColonHyphen, Id.VTest_ColonEquals, Id.VTest_ColonQMark,
Id.VTest_ColonPlus):
is_falsey = not defined or val.IsEmptyString()
@@ -381,7 +421,7 @@ def EvalVarSub(self, part, quoted=False):
#print('!!',id, is_falsey)
if op.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
if op.op_id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
if is_falsey:
argv = []
ok, val2 = self.EvalCompoundWord(op.arg_word)
@@ -417,9 +457,9 @@ def EvalVarSub(self, part, quoted=False):
return True, Value.FromString('')
if part.prefix_op:
op = part.prefix_op
op_id = part.prefix_op
if op.id == Id.VSub_Pound:
if op_id == Id.VSub_Pound:
# LENGTH
if val.IsArray():
#print("ARRAY LENGTH", len(val.a))
@@ -430,7 +470,7 @@ def EvalVarSub(self, part, quoted=False):
val = Value.FromString(str(length))
# NOTE: You could have both prefix and suffix
if part.suffix_op and LookupKind(part.suffix_op.id) in (
if part.suffix_op and LookupKind(part.suffix_op.op_id) in (
Kind.VOp1, Kind.VOp2):
op = part.suffix_op
@@ -459,13 +499,13 @@ def EvalVarSub(self, part, quoted=False):
# And then pat_subst() does some special cases. Geez.
# prefix strip
if op.id == Id.VOp1_DPound:
if op.op_id == Id.VOp1_DPound:
pass
elif op.id == Id.VOp1_Pound:
elif op.op_id == Id.VOp1_Pound:
pass
# suffix strip
elif op.id == Id.VOp1_Percent:
elif op.op_id == Id.VOp1_Percent:
print(op.words)
argv = []
for w in op.words:
@@ -488,18 +528,18 @@ def EvalVarSub(self, part, quoted=False):
s = s[:-len(suffix)]
val = Value.FromString(s)
elif op.id == Id.VOp1_DPercent:
elif op.op_id == Id.VOp1_DPercent:
pass
# Patsub, vectorized
elif op.id == Id.VOp2_Slash:
elif op.op_id == Id.VOp2_Slash:
pass
# Either string slicing or array slicing. However string slicing has a
# unicode problem? TODO: Test bash out. We need utf-8 parsing in C++?
#
# Or maybe have a different operator for byte slice and char slice.
elif op.id == Id.VOp2_Colon:
elif op.op_id == Id.VOp2_Colon:
pass
else:
@@ -530,7 +570,7 @@ def EvalCompoundWord(self, word, ifs='', do_glob=False, elide_empty=True):
Returns:
Value -- empty unquoted, string, or array
"""
assert isinstance(word, CompoundWord), "Exected CompoundWord, got %s" % word
assert isinstance(word, ast.CompoundWord), "Expected CompoundWord, got %s" % word
# assume we elide, unless we get something "significant"
is_empty_unquoted = True
ev = self
@@ -545,10 +585,10 @@ def EvalCompoundWord(self, word, ifs='', do_glob=False, elide_empty=True):
is_str, s = val.AsString()
#print('-VAL', val, is_str)
glob_escape = do_glob and not p.GlobsAreExpanded()
glob_escape = do_glob and not _GlobsAreExpanded(p)
if is_str:
if p.IsSubst(): # Split substitutions
if _IsSubst(p): # Split substitutions
# NOTE: Splitting is the same whether we are glob escaping or not
split_parts = _IfsSplit(s, ifs)
empty = _AppendArray(strs, split_parts, glob_escape=glob_escape)
@@ -634,55 +674,46 @@ def EvalArrayLiteralPart(self, part):
return True, Value.FromArray(array)
def EvalWordPart(self, part, quoted=False):
if part.id == Id.Right_ArrayLiteral:
part = cast(ArrayLiteralPart, part)
if part.tag == word_part_e.ArrayLiteralPart:
return self.EvalArrayLiteralPart(part)
elif part.id == Id.Lit_Chars:
part = cast(LiteralPart, part)
elif part.tag == word_part_e.LiteralPart:
s = part.token.val
return True, Value.FromString(s)
elif part.id == Id.Lit_EscapedChar:
part = cast(EscapedLiteralPart, part)
elif part.tag == word_part_e.EscapedLiteralPart:
val = self.token.val
assert len(val) == 2, val # e.g. \*
assert val[0] == '\\'
s = val[1]
return True, Value.FromString(s)
elif part.id == Id.Left_SingleQuote:
part = cast(SingleQuotedPart, part)
s = part._Eval() # shared with EvalStatic. TODO: Consolidate
elif part.tag == word_part_e.SingleQuotedPart:
s = ''.join(t.val for t in part.tokens)
return True, Value.FromString(s)
elif part.id == Id.Left_DoubleQuote:
part = cast(DoubleQuotedPart, part)
elif part.tag == word_part_e.DoubleQuotedPart:
return self.EvalDoubleQuotedPart(part)
elif part.id == Id.Left_CommandSub:
elif part.tag == word_part_e.CommandSubPart:
# TODO: If token is Id.Left_ProcSubIn or Id.Left_ProcSubOut, we have to
# supply something like /dev/fd/63.
part = cast(CommandSubPart, part)
return self.EvalCommandSub(part.command_list)
elif part.id == Id.Left_VarSub:
part = cast(VarSubPart, part)
elif part.tag == word_part_e.VarSubPart:
# This is the only one that uses quoted?
return self.EvalVarSub(part, quoted=quoted)
elif part.id == Id.Lit_Tilde:
part = cast(TildeSubPart, part)
elif part.tag == word_part_e.TildeSubPart:
# We never parse a quoted string into a TildeSubPart.
assert not quoted
return self.EvalTildeSub(part.prefix)
elif part.id in (Id.Left_ArithSub, Id.Left_ArithSub2):
part = cast(ArithSubPart, part)
elif part.tag == word_part_e.ArithSubPart:
return self.EvalArithSub(part.anode)
else:
raise AssertionError(part.id)
raise AssertionError(part.tag)
def EvalDoubleQuotedPart(self, part):
# NOTE: quoted arg isn't used
@@ -829,11 +860,13 @@ class CompletionEvaluator(_Evaluator):
TODO: Also disable side effects! Like ${a:=b} rather than ${a:-b}
And also $(( a+=1 ))
TODO: Unify with EvalStatic() methods on Word and WordPart?
TODO: Unify with static_eval? Completion allows more stuff like var names,
and maybe words within arrays as well.
"""
def __init__(self, mem, exec_opts):
_Evaluator.__init__(self, mem, exec_opts)
def EvalCommandSub(self, node):
# Just return a dummy string?
return True, Value.FromString('__COMMAND_SUB_NOT_EXECUTED__')
View

This file was deleted.

Oops, something went wrong.
View
@@ -0,0 +1,18 @@
#!/usr/bin/env python3
"""
word_test.py: Tests for word.py
"""
import unittest
import word # module under test
class WordTest(unittest.TestCase):
def testFoo(self):
print('Hello from word_test.py')
if __name__ == '__main__':
unittest.main()
View
@@ -62,7 +62,7 @@ parser() {
echo
echo 'AST and IDs'
wc -l core/{*_node,tokens,id_kind}.py | sort -n
wc -l osh/osh.asdl core/{tokens,id_kind}.py | sort -n
echo
echo 'Common Algorithms'
View
@@ -10,62 +10,61 @@
from core import tdop
from core.id_kind import Id
from core.expr_node import (
UnaryExprNode, BinaryExprNode, TernaryExprNode, FuncCallNode)
from core import word
from osh import ast
def NullIncDec(p, t, bp):
def NullIncDec(p, w, bp):
""" ++x or ++x[1] """
right = p.ParseUntil(bp)
if not tdop.IsLValue(right):
raise tdop.ParseError("Can't assign to %r (%s)" % (right, right.token))
return UnaryExprNode(t.ArithId(), right)
return ast.ArithUnary(word.ArithId(w), right)
def NullUnaryPlus(p, t, bp):
""" +x, to distinguish from binary operator. """
right = p.ParseUntil(bp)
return UnaryExprNode(Id.Node_UnaryPlus, right)
return ast.ArithUnary(Id.Node_UnaryPlus, right)
def NullUnaryMinus(p, t, bp):
""" -1, to distinguish from binary operator. """
right = p.ParseUntil(bp)
return UnaryExprNode(Id.Node_UnaryMinus, right)
return ast.ArithUnary(Id.Node_UnaryMinus, right)
def LeftIncDec(p, t, left, rbp):
def LeftIncDec(p, w, left, rbp):
""" For i++ and i--
"""
if not tdop.IsLValue(left):
raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token))
if t.ArithId() == Id.Arith_DPlus:
if word.ArithId(w) == Id.Arith_DPlus:
op_id = Id.Node_PostDPlus
elif t.ArithId() == Id.Arith_DMinus:
elif word.ArithId(w) == Id.Arith_DMinus:
op_id = Id.Node_PostDMinus
else:
raise AssertionError
return UnaryExprNode(op_id, left)
return ast.ArithUnary(op_id, left)
def LeftIndex(p, t, left, unused_bp):
def LeftIndex(p, w, left, unused_bp):
""" index f[x+1] """
# f[x] or f[x][y]
if not tdop.IsIndexable(left):
raise tdop.ParseError("%s can't be indexed" % left)
index = p.ParseUntil(0)
p.Eat(Id.Arith_RBracket)
return BinaryExprNode(t.ArithId(), left, index)
return ast.ArithBinary(word.ArithId(w), left, index)
def LeftTernary(p, t, left, bp):
""" Function call f(a, b). """
true_expr = p.ParseUntil(bp)
p.Eat(Id.Arith_Colon)
false_expr = p.ParseUntil(bp)
children = [left, true_expr, false_expr]
return TernaryExprNode(t.ArithId(), left, true_expr, false_expr)
return ast.TernaryOp(left, true_expr, false_expr)
# For overloading of , inside function calls
@@ -84,7 +83,7 @@ def LeftFuncCall(p, t, left, unused_bp):
if p.AtToken(Id.Arith_Comma):
p.Next()
p.Eat(Id.Arith_RParen)
return FuncCallNode(left, children)
return ast.FuncCall(left, children)
def MakeShellSpec():
View
@@ -57,9 +57,9 @@
from osh import ast
from core import base
from core import word
from core.id_kind import Id, Kind, LookupKind, IdName
from core.expr_node import UnaryExprNode, BinaryExprNode
from osh.lex import LexMode
try:
from core import libc
@@ -112,7 +112,7 @@ def _NextOne(self, lex_mode=LexMode.DBRACKET):
self.words[0] = w
self.cur_word = w
self.op_id = self.cur_word.BoolId()
self.op_id = word.BoolId(self.cur_word)
self.b_kind = LookupKind(self.op_id)
#print('---- word', self.cur_word, 'op_id', self.op_id, self.b_kind, lex_mode)
return True
@@ -167,7 +167,7 @@ def ParseExpr(self):
if self.op_id == Id.Op_DPipe:
if not self._Next(): return None
right = self.ParseExpr()
return BinaryExprNode(Id.Op_DPipe, left, right)
return ast.LogicalOr(left, right)
else:
return left
@@ -182,7 +182,7 @@ def ParseTerm(self):
if self.op_id == Id.Op_DAmp:
if not self._Next(): return None
right = self.ParseTerm()
return BinaryExprNode(Id.Op_DAmp, left, right)
return ast.LogicalAnd(left, right)
else:
return left
@@ -193,8 +193,8 @@ def ParseNegatedFactor(self):
if self.op_id == Id.KW_Bang:
if not self._Next(): return None
child = self.ParseFactor()
return UnaryExprNode(Id.KW_Bang, child)
#return ast.LogicalNot(child)
#return UnaryExprNode(Id.KW_Bang, child)
return ast.LogicalNot(child)
else:
return self.ParseFactor()
@@ -210,15 +210,15 @@ def ParseFactor(self):
# Just save the type and not the token itself?
op = self.op_id
if not self._Next(): return None
word = self.cur_word
w = self.cur_word
if not self._Next(): return None
node = UnaryExprNode(op, word)
node = ast.BoolUnary(op, w)
return node
if self.b_kind == Kind.Word:
# Peek ahead another token.
t2 = self._LookAhead()
t2_op_id = t2.BoolId()
t2_op_id = word.BoolId(t2)
t2_b_kind = LookupKind(t2_op_id)
# Redir PUN for < and >
@@ -238,20 +238,21 @@ def ParseFactor(self):
right = self.cur_word
if is_regex:
ok, regex_str, unused_quoted = right.EvalStatic()
ok, regex_str, unused_quoted = word.StaticEval(right)
# doesn't contain $foo, etc.
if ok and not libc.regex_parse(regex_str):
self.AddErrorContext("Invalid regex: %r" % regex_str, word=right)
return None
if not self._Next(): return None
return BinaryExprNode(op, left, right)
return ast.BoolBinary(op, left, right)
else:
# [[ foo ]] is implicit Implicit [[ -n foo ]]
op = Id.BoolUnary_n
word = self.cur_word
#op = Id.BoolUnary_n
w = self.cur_word
if not self._Next(): return None
return UnaryExprNode(op, word)
#return UnaryExprNode(op, word)
return w
if self.op_id == Id.Op_LParen:
if not self._Next(): return None
View
@@ -12,12 +12,15 @@
import unittest
from core.id_kind import Id
from core.expr_node import *
from osh import ast
from osh import parse_lib
from osh import bool_parse # module under test
from osh.lex import LexMode
bool_expr_e = ast.bool_expr_e
word_e = ast.word_e
def _ReadWords(w_parser):
words = []
@@ -58,57 +61,49 @@ def testParseFactor(self):
node = p.ParseFactor()
print(node)
self.assertTrue(p.AtEnd())
self.assertEqual(Id.BoolUnary_z, node.op_id)
self.assertEqual(UnaryExprNode, node.__class__)
self.assertEqual(bool_expr_e.BoolUnary, node.tag)
p = _MakeParser('foo == bar')
node = p.ParseFactor()
print(node)
self.assertTrue(p.AtEnd())
self.assertEqual(Id.BoolBinary_DEqual, node.op_id)
self.assertEqual(BinaryExprNode, node.__class__)
self.assertEqual(bool_expr_e.BoolBinary, node.tag)
def testParseNegatedFactor(self):
p = _MakeParser('foo')
node = p.ParseNegatedFactor()
print(node)
self.assertTrue(p.AtEnd())
self.assertEqual(Id.BoolUnary_n, node.op_id)
self.assertEqual(UnaryExprNode, node.__class__)
self.assertEqual(word_e.CompoundWord, node.tag)
p = _MakeParser('! foo')
node = p.ParseNegatedFactor()
print(node)
self.assertTrue(p.AtEnd())
self.assertEqual(Id.KW_Bang, node.op_id)
self.assertEqual(UnaryExprNode, node.__class__)
self.assertEqual(bool_expr_e.LogicalNot, node.tag)
def testParseTerm(self):
p = _MakeParser('foo && ! bar')
node = p.ParseTerm()
print(node)
self.assertEqual(BinaryExprNode, node.__class__)
self.assertEqual(Id.Op_DAmp, node.op_id)
self.assertEqual(bool_expr_e.LogicalAnd, node.tag)
# TODO: This is an entire expression I guess
p = _MakeParser('foo && ! bar && baz')
node = p.ParseTerm()
print(node)
self.assertEqual(BinaryExprNode, node.__class__)
self.assertEqual(Id.Op_DAmp, node.op_id)
self.assertEqual(bool_expr_e.LogicalAnd, node.tag)
p = _MakeParser('-z foo && -z bar')
node = p.ParseTerm()
print(node)
self.assertEqual(BinaryExprNode, node.__class__)
self.assertEqual(Id.Op_DAmp, node.op_id)
self.assertEqual(bool_expr_e.LogicalAnd, node.tag)
def testParseExpr(self):
p = _MakeParser('foo || ! bar')
node = p.ParseExpr()
print(node)
self.assertEqual(BinaryExprNode, node.__class__)
self.assertEqual(Id.Op_DPipe, node.op_id)
self.assertEqual(bool_expr_e.LogicalOr, node.tag)
p = _MakeParser('a == b')
print(p.ParseExpr())
@@ -118,15 +113,13 @@ def testParseFactorInParens(self):
node = p.ParseFactor()
print(node)
self.assertTrue(p.AtEnd())
self.assertEqual(BinaryExprNode, node.__class__)
self.assertEqual(Id.BoolBinary_DEqual, node.op_id)
self.assertEqual(bool_expr_e.BoolBinary, node.tag)
def testParseParenthesized(self):
p = _MakeParser('zoo && ( foo == bar )')
node = p.ParseExpr()
print(node)
self.assertEqual(BinaryExprNode, node.__class__)
self.assertEqual(Id.Op_DAmp, node.op_id)
self.assertEqual(bool_expr_e.LogicalAnd, node.tag)
if __name__ == '__main__':
View

Large diffs are not rendered by default.

Oops, something went wrong.
View

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -1,110 +1,179 @@
-- ASDL's six builtin types are identifier, int, string, bytes, object, singleton
-- NOTE we're not using identifier/object/singleton/bytes
-- Python only uses bytes/object/singleton once.
-- identifier is used all over. Why? I have different rules for
-- for functions and vars. case/for/assign have vars.
-- functions and vars. case/for/assign have vars.
-- TODO:
-- How to encode position information?
-- Add bool, for C++ encoding
-- FuncCall inside ${}, in addition to arithmetic context?
-- Well I guess you can do this: echo $(( f(x,y) ))a. It is a little more
-- annoying.
-- Do we need stricter ID types?
-- arith_op_id, bool_op_id, vpre_op_id, array_op_id, vpost_op_id
module osh
{
source_location = (string path, int line, int col, int length)
token = (string value, source_location loc)
id = Foo | Bar -- TODO: you need a placeholder to say this is generated
-- by another tool. Suppress the error.
-- TODO: generate the following:
-- arith_op_id, bool_op_id, vpre_op_id, array_op_id, vpost_op_id
source_location = (string path, int line, int col, int length)
-- parse errors don't need to be serialized! You just calculate them on the
-- fly from tokens? VarSubPart has an extra token. Maybe it needs a source
-- location.
-- these are for RUNTIME errors. do you always use either a src_point or
-- src_range? Or more likely they point to a word, which will have
src_point = (int line, int col)
src_range = (string path, src_point begin, src_point end)
-- Physical line number
debug_line = (int line_num, string line)
debug_info = (debug_line* lines)
-- A unit is a single file.
unit = (string path, command root, debug_info info)
-- The top level structure of multiple pre-compiled modules.
-- The standard library can use this I guess.
package = (unit* units)
-- Does the token hold the location, or do nodes? I think we need to
-- annotate most nodes with source_location, but token is only for TokenWord,
-- LiteralPart, etc.
token = (id id, string value, source_location loc)
bracket_op =
ArrayOp(id op_id) -- * or @
| ArrayIndex(arith_expr a)
WholeArray(id op_id) -- * or @
| ArrayIndex(arith_expr expr)
suffix_op =
VarUnary(id op_id, word arg)
| VarReplace(word pat, word? replace)
| VarSlice(arith_expr start, arith_expr? len)
StringUnary(id op_id, word arg_word)
| PatSub(word pat, word? replace, int do_all, int do_prefix, int do_suffix)
| Slice(arith_expr begin, arith_expr? length)
word_part =
ArrayLiteralPart(word* words)
| LiteralPart(token t)
| EscapedLiteralPart(token t)
| LiteralPart(token token)
| EscapedLiteralPart(token token)
| SingleQuotedPart(token* tokens)
| DoubleQuotedPart(word_part* parts)
| VarSubPart(string name,
id prefix_op,
bracket_op bracket_op
suffix_op suffix_op)
id? prefix_op, -- prefix # or ! operators
bracket_op? bracket_op
suffix_op? suffix_op)
| TildeSubPart(string prefix)
| CommandSubPart(command c)
| ArithSubPart(arith_expr a)
-- NOTE: Could put | Token(token t) as an optimization.
| CommandSubPart(command command_list)
| ArithSubPart(arith_expr anode)
word = Word(word_part* parts)
-- NOTE: Could put | Token(token t) as an optimization.
word =
TokenWord(token token)
| CompoundWord(word_part* parts)
lvalue =
LeftVar(string name)
| LeftIndex(string name, arith_expr index)
arith_expr =
ArithVar(string name) -- eval variable
| ArithWord(word w) -- word to be evaluated as a constant
| ArithUnary(id op_id, arith_expr a)
RightVar(string name) -- eval variable
| ArithWord(word w) -- a string expression
| ArithUnary(id op_id, arith_expr child)
| ArithBinary(id op_id, arith_expr left, arith_expr right)
| ArithAssign(id op_id, lvalue left, arith_expr right)
| TernaryOp(arith_expr cond, arith_expr true_expr, arith_expr false_expr)
| FuncCall(arith_expr func, arith_expr* args)
bool_expr =
WordTest(word w) -- e.g. [[ myword ]]
| BoolBinary(word left, word right)
| BoolUnary(word child)
| LogicalNot(bool_expr b)
| BoolBinary(id op_id, word left, word right)
| BoolUnary(id op_id, word child)
| LogicalNot(bool_expr child)
| LogicalAnd(bool_expr left, bool_expr right)
| LogicalOr(bool_expr left, bool_expr right)
-- NOTE: To reprint the here doc, I guess we need the whole delimiter? And
-- then do_expansion is calculated from that.
-- To reprint the here doc, we need the here_end delimiter. But it
-- doesn't matter at runtime. do_expansion is calculated from it.
-- TODO: was_filled should be ELIMINATED from serialization format. It's
-- only for use at parse time.
-- arg_word uses the same name for Here word and here doc.
redir =
HereDoc(id op_id, word arg_word, int fd, int do_expansion)
| Redirect(id op_id, word arg_word, int fd)
lvalue =
LeftVar(string name)
| LeftIndex(string name, arith_expr index)
Redirect(id op_id, word arg_word, int fd)
| HereDoc(id op_id, word arg_word, int fd, int do_expansion,
string here_end, int? was_filled) -- default was_filled None
scope = Global | Local
var_flags = Export | Readonly
assign_scope = Global | Local
assign_flags = Export | ReadOnly
binding = (lvalue lhs, word rhs)
and_or = DAmp | DPipe
-- For now, using stderr_indices representation because it's more compact.
-- |& in osh; |- in oil.
pipe_op = Pipe | PipeAndStderr
-- pipe_op = Pipe | PipeAndStderr
case_arm = (word* pat, command* action)
-- Almost all of these can have redirects, even function defs.
-- AndOr/Pipeline/Fork are exceptions.
-- Fork is in its own wrapper node because it's easy to translate.
-- Redirects are easier to translate as well if they are WITHIN each command
-- node.
command_hetero =
XNoOp
| XSimpleCommand(word* words, redir* redirects, binding* more_env)
| XAssignment(assign_scope scope,
assign_flags flags,
word* names, -- names mentioned without a binding
binding* bindings)
| XFork(command* commands) -- shell only allows one command
| XPipeline(command* commands, int negated, int* op)
| XAndOr(command* commands, and_or* ops)
| XBlock(command* commands, redir* redirects)
| XSubshell(command* commands, redir* redirects)
| XDParen(arith_expr child, redir* redirects)
| XDBracket(bool_expr child, redir* redirects)
| XForEach(string var, word* words, command* body, redir* redirects)
-- C-style for loop
| XForExpr(arith_expr init, arith_expr test, arith_expr update, command* body,
redir* redirects)
| XWhile(command cond, command* body, redir* redirects)
| XUntil(command cond, command* body, redir* redirects)
| XIf(command test, command* body, command* orelse, redir* redirects)
| XCase(string var_name, case_arm* cases, redir* redirects)
| XFuncDef(string name, command* body, redir* redirects)
-- Homogeneous version
command =
NoOp
| SimpleCommand(word* words, redir* redirects, binding* more_env)
| Assignment(scope scope,
var_flags flags,
word* names, -- names mentioned without a binding
binding* bindings)
| DParen(arith_expr a)
| DBracket(bool_expr b)
| Block(command* commands)
| Subshell(command* commands)
| Fork(command* commands) -- shell only allows one command
| Pipeline(command* commands, int negated, pipe_op* op)
| AndOr(command* commands, and_or ops)
-- NOTE: Can't have multiple var length? Maybe it's just a single command.
| ForEach(string var, word* words, command* body)
| ForExpr(arith_expr init, arith_expr test, arith_expr update, command* body)
-- NOTE: in oil, we will have expression variants?i
| While(command cond, command* body)
| Until(command cond, command* body)
| If(command test, command* body, command* orelse)
| Case(string var_name, case_arm* cases)
| FuncDef(string name, command* body)
| Assignment(assign_scope scope,
assign_flags flags,
word* words, -- names mentioned without a binding
binding* bindings) -- empty redirects for polymorphism?
| Fork(command* children) -- shell only allows one command
| Pipeline(command* children, int negated, int* stderr_indices)
-- | AndOr(command* children, and_or* ops)
| AndOr(command* children, int op_id)
-- A command list is used for for/if/while conditions and bodies. No redirects.
| CommandList(command* children)
-- A compound command, with redirects
| BraceGroup(command* children, redir* redirects)
| Subshell(command* children, redir* redirects)
| DParen(arith_expr child, redir* redirects)
| DBracket(bool_expr expr, redir* redirects)
-- do_arg_iter: whether to implicitly loop over "$@"
| ForEach(string iter_name, word* iter_words, int do_arg_iter,
command* children, redir* redirects)
-- C-style for loop
| ForExpr(arith_expr init, arith_expr cond, arith_expr update, command* children,
redir* redirects)
| While(command* children, redir* redirects)
| Until(command* children, redir* redirects)
| If(command* children, redir* redirects)
| Case(word to_match, word* pat_word_list, command* children, redir* redirects)
| FuncDef(string name, command* children, redir* redirects)
}
View
@@ -13,10 +13,10 @@
from osh import cmd_parse
def InitLexer(s):
def InitLexer(s, pool=None):
"""For tests only."""
line_lexer = lexer.LineLexer(lex.LEXER_DEF, '')
line_reader = reader.StringLineReader(s)
line_reader = reader.StringLineReader(s, pool=pool)
lx = lexer.Lexer(line_lexer, line_reader)
return line_reader, lx
View
@@ -10,19 +10,17 @@
"""
from core import base
from core.word_node import (
CompoundWord, TokenWord,
LiteralPart, EscapedLiteralPart, SingleQuotedPart, DoubleQuotedPart,
VarSubPart, CommandSubPart, ArithSubPart, ArrayLiteralPart,
VarOp0, VarOp1, SliceVarOp, PatSubVarOp)
from core.id_kind import Id, Kind, IdName
from core.tokens import Token
from core import word
from core import tdop
from core.cmd_node import ForExpressionNode
from osh import arith_parse
from osh.lex import LexMode
from osh import ast
word_part_e = ast.word_part_e
# Substitutions can be nested, but which inner subs are allowed depends on the
# outer sub. See _ReadLeftParts vs. _ReadDoubleQuotedLeftParts.
@@ -164,7 +162,7 @@ def _ReadSliceVarOp(self):
#print('BVS2', self.cur_token)
if self.token_type == Id.Arith_RBrace:
return SliceVarOp(begin, None) # No length specified
return ast.Slice(begin, None) # No length specified
# Id.Arith_Colon is a pun for Id.VOp2_Colon
elif self.token_type == Id.Arith_Colon:
@@ -173,7 +171,7 @@ def _ReadSliceVarOp(self):
if not length: return None
#print('after colon', self.cur_token)
return SliceVarOp(begin, length)
return ast.Slice(begin, length)
else:
self.AddErrorContext("Unexpected token in slice: %s", self.cur_token)
@@ -193,36 +191,38 @@ def _ReadPatSubVarOp(self, lex_mode):
if not pat: return None
if len(pat.parts) == 1:
ok, s, quoted = pat.EvalStatic()
ok, s, quoted = word.StaticEval(pat)
if ok and s == '/' and not quoted: # Looks like ${a////c}, read again
self._Next(lex_mode)
self._Peek()
p = LiteralPart(self.cur_token)
p = ast.LiteralPart(self.cur_token)
pat.parts.append(p)
# Check for other modifiers
lit_id = pat.parts[0].LiteralId()
if lit_id == Id.Lit_Slash:
do_all = True
pat.parts.pop(0)
elif lit_id == Id.Lit_Percent:
do_prefix = True
pat.parts.pop(0)
elif lit_id == Id.Lit_Pound:
do_suffix = True
pat.parts.pop(0)
first_part = pat.parts[0]
if first_part.tag == word_part_e.LiteralPart:
lit_id = first_part.token.id
if lit_id == Id.Lit_Slash:
do_all = True
pat.parts.pop(0)
elif lit_id == Id.Lit_Percent:
do_prefix = True
pat.parts.pop(0)
elif lit_id == Id.Lit_Pound:
do_suffix = True
pat.parts.pop(0)
#self._Peek()
if self.token_type == Id.Right_VarSub:
return PatSubVarOp(pat, None, do_all, do_prefix, do_suffix)
return ast.PatSub(pat, None, do_all, do_prefix, do_suffix)
elif self.token_type == Id.Lit_Slash:
replace = self._ReadVarOpArg(lex_mode) # do not stop at /
if not replace: return None
self._Peek()
if self.token_type == Id.Right_VarSub:
return PatSubVarOp(pat, replace, do_all, do_prefix, do_suffix)
return ast.PatSub(pat, replace, do_all, do_prefix, do_suffix)
else:
self._BadToken("Expected } after pat sub, got %s", self.cur_token)
@@ -239,7 +239,7 @@ def _ReadSubscript(self):
# expression.
t2 = self.lexer.LookAhead(LexMode.ARITH)
if t2.id in (Id.Lit_At, Id.Arith_Star):
op = VarOp0(t2.id)
op = ast.WholeArray(t2.id)
self._Next(LexMode.ARITH) # skip past [
self._Peek()
@@ -249,7 +249,7 @@ def _ReadSubscript(self):
anode = self._ReadArithExpr()
if not anode:
return None
op = VarOp1(Id.VOp2_LBracket, anode)
op = ast.ArrayIndex(anode)
#print('AFTER', IdName(self.token_type))
#self._Peek() # Can't do this here. Should the test go elsewhere?
@@ -284,7 +284,7 @@ def _ParseVarOf(self):
else:
bracket_op = None
part = VarSubPart(name, token=debug_token)
part = ast.VarSubPart(name) # TODO: add debug_token
part.bracket_op = bracket_op
return part
@@ -310,7 +310,7 @@ def _ParseVarExpr(self, arg_lex_mode):
self._BadToken('Unexpected token after test arg: %s', self.cur_token)
return None
part.suffix_op = VarOp1(id, arg_word)
part.suffix_op = ast.StringUnary(id, arg_word)
elif op_kind == Kind.VOp1:
id = self.token_type
@@ -319,7 +319,7 @@ def _ParseVarExpr(self, arg_lex_mode):
self._BadToken('Unexpected token after unary op: %s', self.cur_token)
return None
op = VarOp1(id, arg_word)
op = ast.StringUnary(id, arg_word)
part.suffix_op = op
elif op_kind == Kind.VOp2:
@@ -429,7 +429,7 @@ def _ReadBracedVarSubPart(self, d_quoted=False):
self.cur_token)
return None
part.prefix_op = VarOp0(Id.VSub_Pound) # length
part.prefix_op = Id.VSub_Pound # length
else: # not a prefix, '#' is the variable
part = self._ParseVarExpr(arg_lex_mode)
@@ -449,7 +449,7 @@ def _ReadBracedVarSubPart(self, d_quoted=False):
part = self._ParseVarExpr(arg_lex_mode)
if not part: return None
part.prefix_op = VarOp0(Id.VSub_Bang)
part.prefix_op = Id.VSub_Bang
else: # not a prefix, '!' is the variable
part = self._ParseVarExpr(arg_lex_mode)
@@ -467,7 +467,7 @@ def _ReadBracedVarSubPart(self, d_quoted=False):
def _ReadDollarSqPart(self):
# Do we need a flag to tell if it's $'' rather than ''?
quoted_part = SingleQuotedPart()
quoted_part = ast.SingleQuotedPart()
done = False
while not done:
@@ -490,7 +490,7 @@ def _ReadDollarSqPart(self):
return quoted_part
def _ReadSingleQuotedPart(self):
quoted_part = SingleQuotedPart()
quoted_part = ast.SingleQuotedPart()
done = False
while not done:
@@ -590,7 +590,7 @@ def _ReadDoubleQuotedPart(self, eof_type=Id.Undefined_Tok, here_doc=False):
Also ${foo%%a b c} # treat this as double quoted. until you hit
"""
quoted_part = DoubleQuotedPart()
quoted_part = ast.DoubleQuotedPart()
done = False
while not done:
@@ -604,9 +604,9 @@ def _ReadDoubleQuotedPart(self, eof_type=Id.Undefined_Tok, here_doc=False):
elif self.token_kind == Kind.Lit:
if self.token_type == Id.Lit_EscapedChar:
part = EscapedLiteralPart(self.cur_token)
part = ast.EscapedLiteralPart(self.cur_token)
else:
part = LiteralPart(self.cur_token)
part = ast.LiteralPart(self.cur_token)
quoted_part.parts.append(part)
elif self.token_kind == Kind.Left:
@@ -617,14 +617,14 @@ def _ReadDoubleQuotedPart(self, eof_type=Id.Undefined_Tok, here_doc=False):
elif self.token_kind == Kind.VSub:
# strip $ off of $name, $$, etc.
part = VarSubPart(self.cur_token.val[1:], token=self.cur_token)
part = ast.VarSubPart(self.cur_token.val[1:]) # TODO: Debug token
quoted_part.parts.append(part)
elif self.token_kind == Kind.Right:
assert self.token_type == Id.Right_DoubleQuote
if here_doc:
# Turn Id.Right_DoubleQuote into a literal part
quoted_part.parts.append(LiteralPart(self.cur_token))
quoted_part.parts.append(ast.LiteralPart(self.cur_token))
else:
done = True # assume Id.Right_DoubleQuote
@@ -675,7 +675,7 @@ def _ReadCommandSubPart(self, token_type):
self.AddErrorContext('Error parsing commmand list in command sub')
return None
cs_part = CommandSubPart(node_token, node)
cs_part = ast.CommandSubPart(node)
return cs_part
def _ReadArithExpr(self, do_next=True):
@@ -743,7 +743,7 @@ def _ReadArithSubPart(self):
self.cur_token)
return None
return ArithSubPart(anode)
return ast.ArithSubPart(anode)
def _ReadArithSub2Part(self):
"""Non-standard arith sub $[a + 1]."""
@@ -756,7 +756,7 @@ def _ReadArithSub2Part(self):
self.AddErrorContext("Expected ], got %s", self.cur_token)
return None
return ArithSubPart(anode)
return ast.ArithSubPart(anode)
def ReadDParen(self):
"""Read ((1+ 2)) -- command context.
@@ -845,10 +845,10 @@ def ReadForExpression(self):
return None
self._Next(LexMode.OUTER)
return ForExpressionNode(init_node, cond_node, update_node)
return ast.ForExpr(init_node, cond_node, update_node)
def _ReadArrayLiteralPart(self):
array_part = ArrayLiteralPart()
array_part = ast.ArrayLiteralPart()
self._Next(LexMode.OUTER) # advance past (
self._Peek()
@@ -857,10 +857,10 @@ def _ReadArrayLiteralPart(self):
# MUST use a new word parser (with same lexer).
w_parser = WordParser(self.lexer, self.line_reader)
while True:
word = w_parser.ReadWord(LexMode.OUTER)
if word.CommandId() == Id.Right_ArrayLiteral:
w = w_parser.ReadWord(LexMode.OUTER)
if word.CommandId(w) == Id.Right_ArrayLiteral:
break
array_part.words.append(word)
array_part.words.append(w)
return array_part
@@ -871,7 +871,7 @@ def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok, lex_mode=LexMode.OUTER,
Postcondition: Looking at the token after, e.g. space or operator
"""
#print('_ReadCompoundWord', lex_mode)
word = CompoundWord()
word = ast.CompoundWord()
num_parts = 0
done = False
@@ -886,9 +886,9 @@ def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok, lex_mode=LexMode.OUTER,
elif self.token_kind in (
Kind.Lit, Kind.KW, Kind.Assign, Kind.BoolUnary, Kind.BoolBinary):
if self.token_type == Id.Lit_EscapedChar:
part = EscapedLiteralPart(self.cur_token)
part = ast.EscapedLiteralPart(self.cur_token)
else:
part = LiteralPart(self.cur_token)
part = ast.LiteralPart(self.cur_token)
word.parts.append(part)
if self.token_type == Id.Lit_VarLike:
@@ -906,7 +906,7 @@ def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok, lex_mode=LexMode.OUTER,
word.parts.append(part2)
elif self.token_kind == Kind.VSub:
part = VarSubPart(self.cur_token.val[1:]) # strip $
part = ast.VarSubPart(self.cur_token.val[1:]) # strip $
word.parts.append(part)
elif self.token_kind == Kind.Left:
@@ -972,7 +972,7 @@ def _ReadArithWord(self):
elif self.token_kind == Kind.Eof:
# Just return EOF token
w = TokenWord(self.cur_token)
w = ast.TokenWord(self.cur_token)
return w, False
#self.AddErrorContext("Unexpected EOF in arith context: %s",
# self.cur_token, token=self.cur_token)
@@ -987,7 +987,7 @@ def _ReadArithWord(self):
elif self.token_kind in (Kind.Arith, Kind.Right):
# Id.Right_ArithSub IS just a normal token, handled by ArithParser
self._Next(LexMode.ARITH)
w = TokenWord(self.cur_token)
w = ast.TokenWord(self.cur_token)
return w, False
elif self.token_kind in (Kind.Lit, Kind.Left):
@@ -999,9 +999,9 @@ def _ReadArithWord(self):
elif self.token_kind == Kind.VSub:
# strip $ off of $name, $$, etc.
# TODO: Maybe consolidate with _ReadDoubleQuotedPart
part = VarSubPart(self.cur_token.val[1:], token=self.cur_token)
part = ast.VarSubPart(self.cur_token.val[1:]) # TODO: debug token
self._Next(LexMode.ARITH)
w = CompoundWord(parts=[part])
w = ast.CompoundWord([part])
return w, False
else:
@@ -1023,7 +1023,7 @@ def _ReadWord(self, lex_mode):
if self.token_kind == Kind.Eof:
# No advance
return TokenWord(self.cur_token), False
return ast.TokenWord(self.cur_token), False
# Allow Arith for ) at end of for loop?
elif self.token_kind in (Kind.Op, Kind.Redir, Kind.Arith):
@@ -1033,7 +1033,7 @@ def _ReadWord(self, lex_mode):
#print('SKIP(nl)', self.cur_token)
return None, True
return TokenWord(self.cur_token), False
return ast.TokenWord(self.cur_token), False
elif self.token_kind == Kind.Right:
#print('WordParser.Read: Kind.Right', self.cur_token)
@@ -1043,7 +1043,7 @@ def _ReadWord(self, lex_mode):
raise AssertionError(self.cur_token)
self._Next(lex_mode)
return TokenWord(self.cur_token), False
return ast.TokenWord(self.cur_token), False
elif self.token_kind in (Kind.Ignored, Kind.WS):
self._Next(lex_mode)
@@ -1104,26 +1104,26 @@ def ReadWord(self, lex_mode):
while True:
if lex_mode == LexMode.ARITH:
# TODO: Can this be unified?
word, need_more = self._ReadArithWord()
w, need_more = self._ReadArithWord()
elif lex_mode in (LexMode.OUTER, LexMode.DBRACKET, LexMode.BASH_REGEX):
word, need_more = self._ReadWord(lex_mode)
w, need_more = self._ReadWord(lex_mode)
else:
raise AssertionError('Invalid lex state %s' % lex_mode)
if not need_more:
break
if not word:
if not w:
return None
if self.words_out is not None:
self.words_out.append(word)
self.cursor = word
self.words_out.append(w)
self.cursor = w
# TODO: Do consolidation of newlines in the lexer?
# Note that there can be an infinite (Id.Ignored_Comment Id.Op_Newline
# Id.Ignored_Comment Id.Op_Newline) sequence, so we have to keep track of
# the last non-ignored token.
self.cursor_was_newline = (self.cursor.CommandId() == Id.Op_Newline)
self.cursor_was_newline = (word.CommandId(self.cursor) == Id.Op_Newline)
return self.cursor
def ReadHereDocBody(self):
@@ -1135,7 +1135,7 @@ def ReadHereDocBody(self):
CompoundWord. NOTE: We could also just use a DoubleQuotedPart for both
cases?
"""
w = CompoundWord()
w = ast.CompoundWord()
dq = self._ReadDoubleQuotedPart(here_doc=True)
if not dq:
self.AddErrorContext('Error parsing here doc body')
View
@@ -11,14 +11,17 @@
import unittest
from core.word_node import LiteralPart, CompoundWord, TokenWord
from core.id_kind import Id, IdName
from core.tokens import Token
from core import word
from osh import ast
from osh import parse_lib
from osh.lex import LexMode
from osh.word_parse import WordParser # module under test
arith_expr_e = ast.arith_expr_e
def InitWordParser(s):
line_reader, lexer = parse_lib.InitLexer(s)
@@ -38,7 +41,7 @@ def _assertReadWord(test, word_str):
# Next word must be \n
w2 = w_parser.ReadWord(LexMode.OUTER)
test.assertEqual(TokenWord(Token(Id.Op_Newline, '\n')), w2)
test.assertEqual(ast.TokenWord(Token(Id.Op_Newline, '\n')), w2)
return w
@@ -74,12 +77,12 @@ def _GetVarSub(test, w):
class WordParserTest(unittest.TestCase):
def testEvalStatic(self):
def testStaticEvalWord(self):
expr = r'\EOF' # Quoted here doc delimiter
w_parser = InitWordParser(expr)
w = w_parser.ReadWord(LexMode.OUTER)
print(w)
ok, s, quoted = w.EvalStatic()
ok, s, quoted = word.StaticEval(w)
self.assertEqual(True, ok)
self.assertEqual('EOF', s)
self.assertEqual(True, quoted)
@@ -97,14 +100,14 @@ def testDisambiguatePrefix(self):
w = _assertReadWord(self, '${15}')
w = _assertReadWord(self, '${#var}')
self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w).id)
self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
w = _assertReadWord(self, '${!ref}')
self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w).id)
self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
# Length of length
w = _assertReadWord(self, '${##}')
self.assertEqual('#', _GetVarSub(self, w).name)
self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w).id)
self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
w = _assertReadWord(self, '${array[0]}')
self.assertEqual(1, len(w.parts))
@@ -114,20 +117,20 @@ def testDisambiguatePrefix(self):
# Length of element
w = _assertReadWord(self, '${#array[0]}')
self.assertEqual(1, len(w.parts))
self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w).id)
self.assertEqual(Id.VSub_Pound, _GetPrefixOp(self, w))
# Ref for element
w = _assertReadWord(self, '${!array[0]}')
self.assertEqual(1, len(w.parts))
self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w).id)
self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
w = _assertReadWord(self, '${var#prefix}')
self.assertEqual(1, len(w.parts))
self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).id)
self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).op_id)
w = _assertReadWord(self, '${!var#prefix}')
self.assertEqual(1, len(w.parts))
self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w).id)
self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).id)
self.assertEqual(Id.VSub_Bang, _GetPrefixOp(self, w))
self.assertEqual(Id.VOp1_Pound, _GetSuffixOp(self, w).op_id)
_assertReadWordFailure(self, '${#var#prefix}')
@@ -144,8 +147,8 @@ def testVarOf(self):
# Should be DISALLOWED!
#w = _assertReadWord(self, '${11[@]}')
def assertUnquoted(self, expected, word):
ok, s, quoted = word.EvalStatic()
def assertUnquoted(self, expected, w):
ok, s, quoted = word.StaticEval(w)
self.assertTrue(ok)
self.assertEqual(expected, s)
self.assertFalse(quoted)
@@ -201,7 +204,7 @@ def testPatSub(self):
op = _GetSuffixOp(self, w)
self.assertUnquoted('pat', op.pat)
ok, s, quoted = op.replace.EvalStatic()
ok, s, quoted = word.StaticEval(op.replace)
self.assertTrue(ok)
self.assertEqual('//', s)
self.assertTrue(quoted)
@@ -214,7 +217,7 @@ def testPatSub(self):
self.assertUnquoted('/', op.pat)
ok, s, quoted = op.replace.EvalStatic()
ok, s, quoted = word.StaticEval(op.replace)
self.assertTrue(ok)
self.assertEqual(r'\/', s)
@@ -234,14 +237,14 @@ def testSlice(self):
def testLength(self):
# Synonym for $#, had a bug here
w = _assertReadWord(self, '${#@}')
self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w).id)
self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
# Length of arg 11
w = _assertReadWord(self, '${#11}')
self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w).id)
self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
w = _assertReadWord(self, '${#str}')
self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w).id)
self.assertTrue(Id.VSub_Pound, _GetPrefixOp(self, w))
w = _assertReadWord(self, '${#array[0]}')
print(w)
@@ -254,14 +257,14 @@ def testLength(self):
def testUnary(self):
w = _assertReadWord(self, '${var#}')
self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).id)
self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).op_id)
w = _assertReadWord(self, '${var#prefix}')
self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).id)
self.assertTrue(Id.VOp1_Pound, _GetSuffixOp(self, w).op_id)
w = _assertReadWord(self, '${var##}')
self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).id)
self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).op_id)
w = _assertReadWord(self, '${var##prefix}')
self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).id)
self.assertTrue(Id.VOp1_DPound, _GetSuffixOp(self, w).op_id)
w = _assertReadWord(self, '${var%suffix}')
w = _assertReadWord(self, '${var%%suffix}')
@@ -318,7 +321,7 @@ def testRead(self):
print(w)
if w.CommandId() == Id.Eof_Real:
if word.CommandId(w) == Id.Eof_Real:
break
def testReadComment(self):
@@ -366,13 +369,13 @@ def testReadRegex(self):
def testReadArithWord(self):
w = _assertReadWord(self, '$(( f(x) ))')
anode = w.parts[0].anode
self.assertEqual(Id.Node_FuncCall, anode.id)
child = w.parts[0].anode
self.assertEqual(arith_expr_e.FuncCall, child.tag)
w = _assertReadWord(self, '$(( f(1, 2, 3, 4) ))')
anode = w.parts[0].anode
self.assertEqual(Id.Node_FuncCall, anode.id)
self.assertEqual(4, len(anode.args))
child = w.parts[0].anode
self.assertEqual(arith_expr_e.FuncCall, child.tag)
self.assertEqual(4, len(child.args))
def testReadArith(self):
CASES = [
@@ -419,7 +422,7 @@ def testReadArith(self):
self.fail(err)
break
print(w)
if w.CommandId() in (Id.Eof_Real, Id.Unknown_Tok):
if word.CommandId(w) in (Id.Eof_Real, Id.Unknown_Tok):
break
def testMultiLine(self):
@@ -433,32 +436,62 @@ def testMultiLine(self):
print('--MULTI')
w = w_parser.ReadWord(LexMode.OUTER)
parts = [LiteralPart(Token(Id.Lit_Chars, 'ls'))]
self.assertEqual(CompoundWord(parts=parts), w)
parts = [ast.LiteralPart(Token(Id.Lit_Chars, 'ls'))]
self.assertEqual(ast.CompoundWord(parts), w)
w = w_parser.ReadWord(LexMode.OUTER)
parts = [LiteralPart(Token(Id.Lit_Chars, 'foo'))]
self.assertEqual(CompoundWord(parts=parts), w)
parts = [ast.LiteralPart(Token(Id.Lit_Chars, 'foo'))]
self.assertEqual(ast.CompoundWord(parts), w)
w = w_parser.ReadWord(LexMode.OUTER)
t = Token(Id.Op_Newline, '\n')
self.assertEqual(TokenWord(t), w)
self.assertEqual(ast.TokenWord(t), w)
w = w_parser.ReadWord(LexMode.OUTER)
parts = [LiteralPart(Token(Id.Lit_Chars, 'ls'))]
self.assertEqual(CompoundWord(parts=parts), w)
parts = [ast.LiteralPart(Token(Id.Lit_Chars, 'ls'))]
self.assertEqual(ast.CompoundWord(parts), w)
w = w_parser.ReadWord(LexMode.OUTER)
parts = [LiteralPart(Token(Id.Lit_Chars, 'bar'))]
self.assertEqual(CompoundWord(parts=parts), w)
parts = [ast.LiteralPart(Token(Id.Lit_Chars, 'bar'))]
self.assertEqual(ast.CompoundWord(parts), w)
w = w_parser.ReadWord(LexMode.OUTER)
t = Token(Id.Op_Newline, '\n')
self.assertEqual(TokenWord(t), w)
self.assertEqual(ast.TokenWord(t), w)
w = w_parser.ReadWord(LexMode.OUTER)
t = Token(Id.Eof_Real, '')
self.assertEqual(TokenWord(t), w)
self.assertEqual(ast.TokenWord(t), w)
def testParseErrorLocation(self):
from core import word
w = _assertReadWord(self, 'a=(1 2 3)')
print(word.ParseErrorLocation(w))
w = _assertReadWord(self, 'foo')
print(word.ParseErrorLocation(w))
w = _assertReadWord(self, '\\$')
print(word.ParseErrorLocation(w))
w = _assertReadWord(self, "''")
print(word.ParseErrorLocation(w))
w = _assertReadWord(self, '""')
print(word.ParseErrorLocation(w))
w = _assertReadWord(self, '$(echo command sub)')
print(word.ParseErrorLocation(w))
w = _assertReadWord(self, '$(( 1 + 2 ))')
print(word.ParseErrorLocation(w))
w = _assertReadWord(self, '~user')
print(word.ParseErrorLocation(w))
w = _assertReadWord(self, '${var#}')
print(word.ParseErrorLocation(w))
if __name__ == '__main__':
View
@@ -81,4 +81,22 @@ osh-interactive() {
echo 'exit' | $OSH -i
}
die() {
echo 1>&2 "$@"
exit 1
}
assert() {
test "$@" || die "$@ failed"
}
# Had a bug with these two cases.
empty() {
bin/osh -c ''
assert $? -eq 0
echo -n '' | bin/osh
assert $? -eq 0
}
"$@"