From 451ed31026ec0c18fcb8516f7ab7c600655b141f Mon Sep 17 00:00:00 2001 From: Andy Chu Date: Mon, 11 Jan 2021 00:33:07 -0800 Subject: [PATCH] [oil-language] Make assignment keyword checks all static. They're done in the CommandParser now. We got rid of all dynamic checks! This is (good) fallout from the 'var in a loop' problem. - [spec/oil-assign] Fix setref test cases --- core/state.py | 26 +-------- osh/cmd_parse.py | 111 +++++++++++++++++++++++++-------------- osh/word_parse.py | 20 +++++-- spec/oil-assign.test.sh | 35 +++++------- spec/oil-options.test.sh | 1 + test/parse-errors.sh | 41 ++++++++++++++- 6 files changed, 142 insertions(+), 92 deletions(-) diff --git a/core/state.py b/core/state.py index c92ce90d78..7f94ef06cc 100644 --- a/core/state.py +++ b/core/state.py @@ -11,7 +11,7 @@ import cStringIO -from _devbuild.gen.id_kind_asdl import Id, Id_t +from _devbuild.gen.id_kind_asdl import Id from _devbuild.gen.option_asdl import option_i from _devbuild.gen.runtime_asdl import ( value, value_e, value_t, value__Str, value__MaybeStrArray, value__AssocArray, @@ -1268,27 +1268,6 @@ def IsAssocArray(self, name): return True return False - def _CheckOilKeyword(self, keyword_id, name, cell): - # type: (Id_t, str, Optional[cell]) -> None - """Check that 'var' and setvar/set are used correctly. - - NOTE: These are dynamic checks, but the syntactic difference between - definition and mutation will help translate the Oil subset of OSH to static - languages. - """ - # This dynamic check can prevent 'local' before 'var', or 'readonly before - # 'const', etc. But it also prevents 'var' in a loop, which we don't want. - # TODO: Possibly disable local/readonly/declare inside 'proc'. - - # if cell is not None and keyword_id in (Id.KW_Var, Id.KW_Const): - # e_die('%r has already been declared', name) - - # TODO: Also do this at parse time. We have some name resolution in - # ctx_Declarations. - if cell is None and keyword_id in (Id.KW_Set, Id.KW_SetLocal, - Id.KW_SetGlobal): - e_die("%r hasn't been declared", name) - def _DisallowNamerefCycle(self, name, which_scopes, ref_trail): # type: (str, scope_t, List[str]) -> None """Recursively resolve names until the trail ends or a cycle is detected. @@ -1367,7 +1346,6 @@ def SetValue(self, lval, val, which_scopes, flags=0): which_scopes, ref_required) - self._CheckOilKeyword(keyword_id, lval.name, cell) if cell: # Clear before checking readonly bit. # NOTE: Could be cell.flags &= flag_clear_mask @@ -1445,7 +1423,6 @@ def SetValue(self, lval, val, which_scopes, flags=0): # -o nounset fails.) cell, name_map, _ = self._ResolveNameOrRef(lval.name, which_scopes, ref_required) - self._CheckOilKeyword(keyword_id, lval.name, cell) if not cell: self._BindNewArrayWithEntry(name_map, lval, rval, flags) return @@ -1505,7 +1482,6 @@ def SetValue(self, lval, val, which_scopes, flags=0): cell, name_map, _ = self._ResolveNameOrRef(lval.name, which_scopes, ref_required) - self._CheckOilKeyword(keyword_id, lval.name, cell) if cell.readonly: e_die("Can't assign to readonly associative array", span_id=left_spid) diff --git a/osh/cmd_parse.py b/osh/cmd_parse.py index a2da6bcf8f..ad1c6bb53f 100644 --- a/osh/cmd_parse.py +++ b/osh/cmd_parse.py @@ -347,19 +347,8 @@ def _MakeSimpleCommand(preparsed_list, suffix_words, redirects, block): return node -class Declarations(object): - """Ensure that var and const can only appear once per function. - - Bash allows this: - - f() { - g() { - echo 'top level function defined in another one' - } - } - - So we have a stack... And then - """ +class VarChecker(object): + """Statically check for proc and variable usage errors.""" def __init__(self): # type: () -> None """ @@ -368,16 +357,28 @@ def __init__(self): """ # tokens has 'proc' or some other token self.tokens = [] # type: List[Token] - self.names = [{}] # type: List[Dict[str, bool]] + self.names = [{}] # type: List[Dict[str, Id_t]] def Push(self, blame_tok): # type: (Token) -> None + """ + Bash allows this, but it's confusing because it's the same as two functions + at the top level. + + f() { + g() { + echo 'top level function defined in another one' + } + } + + Oil disallows nested procs. + """ if len(self.tokens) != 0: if self.tokens[0].id == Id.KW_Proc or blame_tok.id == Id.KW_Proc: p_die("procs can't contain other procs or functions", token=blame_tok) self.tokens.append(blame_tok) - entry = {} # type: Dict[str, bool] + entry = {} # type: Dict[str, Id_t] self.names.append(entry) def Pop(self): @@ -385,24 +386,52 @@ def Pop(self): self.names.pop() self.tokens.pop() - def Register(self, name_tok): - # type: (Token) -> None - """ - Register a variable or param declaration. + def Check(self, keyword_id, name_tok): + # type: (Id_t, Token) -> None + """Check for errors in declaration and mutation errors. + + var x, const x: + x already declared + setlocal x: (must be var) + x is not declared + x is constant + setvar x: + x is constant (only for locals) + setglobal x: + I don't think any errors are possible + We would have to have many conditions to statically know the names: + - no 'source' + - shopt -u copy_env. + - AND use lib has to be static + + LATER: + setref x: + Should only mutate out params """ top = self.names[-1] name = name_tok.val - if name in top: - p_die('%r was already declared', name, token=name_tok) - else: - top[name] = True + if keyword_id in (Id.KW_Const, Id.KW_Var): + if name in top: + p_die('%r was already declared', name, token=name_tok) + else: + top[name] = keyword_id + + if keyword_id in (Id.KW_Set, Id.KW_SetLocal): + if name not in top: + p_die("%r hasn't been declared", name, token=name_tok) + + if keyword_id in (Id.KW_Set, Id.KW_SetLocal, Id.KW_SetVar): + if name in top and top[name] == Id.KW_Const: + p_die("Can't modify constant %r", name, token=name_tok) + + # TODO: setref. -class ctx_Declarations(object): - def __init__(self, declarations, blame_tok): - # type: (Declarations, Token) -> None - declarations.Push(blame_tok) - self.declarations = declarations +class ctx_VarChecker(object): + def __init__(self, var_checker, blame_tok): + # type: (VarChecker, Token) -> None + var_checker.Push(blame_tok) + self.var_checker = var_checker def __enter__(self): # type: () -> None @@ -410,7 +439,7 @@ def __enter__(self): def __exit__(self, type, value, traceback): # type: (Any, Any, Any) -> None - self.declarations.Pop() + self.var_checker.Pop() SECONDARY_KEYWORDS = [ @@ -440,7 +469,7 @@ def __init__(self, parse_ctx, w_parser, lexer, line_reader): # A hacky boolean to remove 'if cd / {' ambiguity. self.allow_block = True self.parse_opts = parse_ctx.parse_opts - self.declarations = Declarations() + self.var_checker = VarChecker() self.Reset() @@ -655,7 +684,7 @@ def _ScanSimpleCommand(self): if self.allow_block: # Disabled for if/while condition, etc. blame_tok = _KeywordToken(self.cur_word) # Our own scope for 'var' - with ctx_Declarations(self.declarations, blame_tok): + with ctx_VarChecker(self.var_checker, blame_tok): block = self.ParseBraceGroup() if 0: print('--') @@ -1597,23 +1626,26 @@ def ParseCompoundCommand(self): # Oil extensions if self.c_id in (Id.KW_Var, Id.KW_Const): + keyword_id = self.c_id kw_token = word_.LiteralToken(self.cur_word) self._Next() n8 = self.w_parser.ParseVarDecl(kw_token) for lhs in n8.lhs: - self.declarations.Register(lhs.name) + self.var_checker.Check(keyword_id, lhs.name) return n8 if self.c_id in (Id.KW_SetVar, Id.KW_SetRef, Id.KW_SetLocal, Id.KW_SetGlobal): kw_token = word_.LiteralToken(self.cur_word) self._Next() - return self.w_parser.ParsePlaceMutation(kw_token) + n9 = self.w_parser.ParsePlaceMutation(kw_token, self.var_checker) + return n9 if self.parse_opts.parse_set() and self.c_id == Id.KW_Set: kw_token = word_.LiteralToken(self.cur_word) self._Next() - return self.w_parser.ParsePlaceMutation(kw_token) + n10 = self.w_parser.ParsePlaceMutation(kw_token, self.var_checker) + return n10 # Happens in function body, e.g. myfunc() oops p_die('Unexpected word while parsing compound command', word=self.cur_word) @@ -1663,7 +1695,7 @@ def ParseFunctionDef(self): func = command.ShFunction() func.name = name - with ctx_Declarations(self.declarations, blame_tok): + with ctx_VarChecker(self.var_checker, blame_tok): func.body = self.ParseCompoundCommand() # matches ParseKshFunctionDef below @@ -1704,7 +1736,7 @@ def ParseKshFunctionDef(self): func = command.ShFunction() func.name = name - with ctx_Declarations(self.declarations, keyword_tok): + with ctx_VarChecker(self.var_checker, keyword_tok): func.body = self.ParseCompoundCommand() # matches ParseFunctionDef above @@ -1718,12 +1750,13 @@ def ParseOilProc(self): node = command.Proc() keyword_tok = _KeywordToken(self.cur_word) - with ctx_Declarations(self.declarations, keyword_tok): + with ctx_VarChecker(self.var_checker, keyword_tok): self.w_parser.ParseProc(node) if node.sig.tag_() == proc_sig_e.Closed: # Register params sig = cast(proc_sig__Closed, node.sig) for param in sig.params: - self.declarations.Register(param.name) + # Treat params as variables. TODO: Treat setref params differently? + self.var_checker.Check(Id.KW_Var, param.name) self._Next() node.body = self.ParseBraceGroup() @@ -1887,7 +1920,7 @@ def ParseCommand(self): # NOTE: tok.id should be Lit_Chars, but that check is redundant if (match.IsValidVarName(tok.val) and self.w_parser.LookAhead() == Id.Lit_Equals): - self.declarations.Register(tok) + self.var_checker.Check(Id.KW_Const, tok) enode = self.w_parser.ParseBareDecl() self._Next() # Somehow this is necessary diff --git a/osh/word_parse.py b/osh/word_parse.py index 877c41ed9a..d8fdeba88c 100644 --- a/osh/word_parse.py +++ b/osh/word_parse.py @@ -63,8 +63,10 @@ word_part, word_part_e, word_part_t, word_part__ArithSub, word_part__ExtGlob, word_part__ExprSub, - command, command_t, command__ForExpr, command__Proc, command__Import, - command__VarDecl, + command, command__ForExpr, command__Proc, command__Import, + command__PlaceMutation, command__VarDecl, + + place_expr_e, place_expr__Var, expr_t, source, arg_list, ) @@ -79,6 +81,7 @@ from osh import arith_parse from osh import braces from osh import word_ +from mycpp.mylib import tagswitch from typing import List, Optional, Tuple, cast from typing import TYPE_CHECKING @@ -86,6 +89,7 @@ from frontend.lexer import Lexer from frontend.parse_lib import ParseContext from frontend.reader import _Reader + from osh.cmd_parse import VarChecker _ = log @@ -925,8 +929,8 @@ def ParseVarDecl(self, kw_token): self._Next(lex_mode_e.ShCommand) # always back to this return enode - def ParsePlaceMutation(self, kw_token): - # type: (Token) -> command_t + def ParsePlaceMutation(self, kw_token, var_checker): + # type: (Token, VarChecker) -> command__PlaceMutation """ setvar a[i] = 1 setvar i += 1 @@ -939,6 +943,14 @@ def ParsePlaceMutation(self, kw_token): if last_token.id == Id.Op_RBrace: last_token.id = Id.Lit_RBrace + for place in enode.lhs: + UP_place = place + with tagswitch(place) as case: + if case(place_expr_e.Var): + place = cast(place_expr__Var, UP_place) + var_checker.Check(kw_token.id, place.name) + # TODO: Do indices as well + # Let the CommandParser see the Op_Semi or Op_Newline. self.buffered_word = last_token self._Next(lex_mode_e.ShCommand) # always back to this diff --git a/spec/oil-assign.test.sh b/spec/oil-assign.test.sh index 6ac1b475c3..c2b4b536fb 100644 --- a/spec/oil-assign.test.sh +++ b/spec/oil-assign.test.sh @@ -43,14 +43,8 @@ proc f { setvar x = 'mutated' echo x=$x } -var x = 'global' -echo x=$x -f -echo x=$x -## status: 1 +## status: 2 ## STDOUT: -x=global -x=local ## END #### const can't be redeclared @@ -107,11 +101,8 @@ f() { set z = 3 # NOT DECLARED echo z=$z } -f -## status: 1 +## status: 2 ## STDOUT: -x=XX -y=YY ## END #### setlocal works (with bin/osh, no shopt) @@ -129,10 +120,10 @@ p #### setlocal at top level var x = 1 -setlocal x = 42 +setlocal x = 42 # this is allowed echo $x setlocal y = 50 # error because it's not declared -## status: 1 +## status: 2 ## STDOUT: 42 ## END @@ -165,7 +156,7 @@ x=local x=mutated ## END -#### setglobal of undeclared var is an error +#### setglobal of undeclared var is allowed var x = 'XX' echo x=$x setglobal x = 'xx' @@ -174,15 +165,11 @@ echo x=$x # fatal error setglobal y = 'YY' -## status: 1 ## STDOUT: x=XX x=xx ## END - - - #### var/setvar x, y = 1, 2 # Python doesn't allow you to have annotation on each variable! @@ -317,20 +304,22 @@ f=local f=setvar ## END -#### setref (not implemented) - -# TODO: should be :out (rather than out Ref, because procs have no types?) -# or (out Ref, b Block) ? -proc p (s, out) { +#### setref +proc p (s, :out) { setref out = 'YY' } var x = 'XX' echo x=$x p abcd :x echo x=$x + +p zz :undefined_var +echo u=$undefined_var + ## STDOUT: x=XX x=YY +u=YY ## END #### circular dict diff --git a/spec/oil-options.test.sh b/spec/oil-options.test.sh index 6a0eaece82..788e0192fd 100644 --- a/spec/oil-options.test.sh +++ b/spec/oil-options.test.sh @@ -221,6 +221,7 @@ echo x=$x echo argv "$@" shopt -s parse_set +var x = 1 set x=42 builtin set -- echo x=$x diff --git a/test/parse-errors.sh b/test/parse-errors.sh index bf7e6f0bb0..f524f55a4a 100755 --- a/test/parse-errors.sh +++ b/test/parse-errors.sh @@ -898,7 +898,7 @@ oil_var_decl() { ' if is-oil-native; then - echo 'skipping oil_nested_proc' # TODO: re-enable with pgen2 + echo 'skipping oil_var_decl' # TODO: re-enable with pgen2 return fi @@ -915,6 +915,44 @@ oil_var_decl() { ' } +oil_place_mutation() { + set +o errexit + + _oil-parse-error ' + proc p(x) { + setvar g = "G" # This can be a global, no error + + setlocal L = "L" # ERROR: not declared + } + ' + + _oil-parse-error ' + proc p(x) { + const c = 123 + setvar c = 42 # ERROR: cannot modify constant + } + ' + + # can't modify constant + _oil-parse-error ' + proc p(x) { + c = 123 + set c = 42 # ERROR: cannot modify constant + } + ' + + if is-oil-native; then + echo 'skipping oil_place_mutation' # TODO: re-enable with pgen2 + return + fi + + _should-parse ' + proc p(x) { + setvar x = "X" # is mutating params allowed? I guess why not. + } + ' +} + cases-in-strings() { set +o errexit @@ -963,6 +1001,7 @@ cases-in-strings() { oil_to_make_nicer oil_nested_proc oil_var_decl + oil_place_mutation parse_at }