Skip to content

Commit

Permalink
[pgen2 refactor] Use NT_OFFSET more consistently
Browse files Browse the repository at this point in the history
The ISNONTERMINAL refactoring reminded me of this issue.

We have to shift Kind.Expr down below 256 to prevent a conflict from
occurring.

Or we can adjust NT_OFFSET everywhere.
  • Loading branch information
Andy Chu committed Apr 12, 2024
1 parent 64a054e commit 18484a2
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 33 deletions.
9 changes: 5 additions & 4 deletions pgen2/grammar.py
Expand Up @@ -16,6 +16,7 @@

from mycpp.mylib import log
from mycpp import mylib
from pgen2 import token

from typing import TYPE_CHECKING

Expand Down Expand Up @@ -47,9 +48,9 @@ class Grammar(object):
The instance variables are as follows:
symbol2number -- a dict mapping symbol names to numbers. Symbol
numbers are always 256 or higher, to distinguish
them from token numbers, which are between 0 and
255 (inclusive).
numbers are always NT_OFFSET or higher, to distinguish
them from token numbers, which are between 0 and 255
(inclusive).
number2symbol -- a dict mapping numbers to symbol names;
these two are each other's inverse.
Expand Down Expand Up @@ -105,7 +106,7 @@ def __init__(self):
self.keywords = {} # type: Dict[str, int]
self.tokens = {} # type: Dict[int, int]
self.symbol2label = {} # type: Dict[str, int]
self.start = 256
self.start = token.NT_OFFSET

if mylib.PYTHON:
def dump(self, f):
Expand Down
5 changes: 3 additions & 2 deletions pgen2/parse.py
Expand Up @@ -14,6 +14,7 @@

from typing import TYPE_CHECKING, Optional, Any, List
from pgen2.pnode import PNode, PNodeAllocator
from pgen2 import token

if TYPE_CHECKING:
from _devbuild.gen.syntax_asdl import Token
Expand Down Expand Up @@ -137,7 +138,7 @@ def addtoken(self, typ, opaque, ilabel):
t = self.grammar.labels[ilab]
if ilabel == ilab:
# Look it up in the list of labels
assert t < 256, t
assert t < token.NT_OFFSET, t
# Shift a token; we're done with it
self.shift(typ, opaque, newstate)
# Pop while we are in an accept-only state
Expand All @@ -162,7 +163,7 @@ def addtoken(self, typ, opaque, ilabel):

# Done with this token
return False
elif t >= 256:
elif t >= token.NT_OFFSET:
# See if it's a symbol and if we're in its first set
itsdfa = self.grammar.dfas[t]
_, itsfirst = itsdfa
Expand Down
4 changes: 2 additions & 2 deletions pgen2/pgen.py
Expand Up @@ -357,7 +357,7 @@ def make_label(tok_def, gr, label):
if value in gr.keywords:
return gr.keywords[value]
else:
gr.labels.append(token.NAME) # arbitrary number < 256
gr.labels.append(token.NAME) # arbitrary number < NT_OFFSET
gr.keywords[value] = ilabel
return ilabel

Expand Down Expand Up @@ -405,7 +405,7 @@ def MakeGrammar(f, tok_def=None):

gr = grammar.Grammar()
for name in names:
i = 256 + len(gr.symbol2number)
i = token.NT_OFFSET + len(gr.symbol2number)
gr.symbol2number[name] = i
gr.number2symbol[i] = name

Expand Down
20 changes: 15 additions & 5 deletions test/ysh-parse-errors.sh
Expand Up @@ -65,21 +65,31 @@ test-func-var-checker() {

test-arglist() {
_ysh-parse-error 'json write ()'

# named args allowed in first group
_ysh-should-parse 'json write (42, indent=1)'
_ysh-should-parse 'json write (42; indent=2)'

_ysh-should-parse 'p (; n=42)'
_ysh-should-parse '= f(; n=42)'
_ysh-should-parse '= toJson(42, indent=1)'
_ysh-should-parse '= toJson(42; indent=2)'

# Named gropu only
_ysh-should-parse 'p (; n=true)'
_ysh-should-parse '= f(; n=true)'

# Allowed because the named section can be empty
# Empty named group
_ysh-should-parse 'p (;)'
_ysh-should-parse '= f(;)'

_ysh-should-parse 'p (42;)'
_ysh-should-parse '= f(42;)'

# No block group in func arg lists
_ysh-parse-error '= f(42; n=true; block)'
_ysh-parse-error '= f(42; ; block)'

# TODO: blocks
#_ysh-should-parse 'p (42; n=42; block)'
# TODO: Block expressions in proc arg lists
#_ysh-should-parse 'p (42; n=true; block)'
#_ysh-should-parse 'p (42; ; block)'

#_ysh-parse-error 'p (42; n=42; bad=3)'
Expand Down
23 changes: 7 additions & 16 deletions ysh/expr_to_ast.py
Expand Up @@ -92,6 +92,8 @@

RANGE_POINT_TOO_LONG = "Range start/end shouldn't have more than one character"

POS_ARG_MISPLACED = "Positional arg can't appear in group of named args"

# Copied from pgen2/token.py to avoid dependency.
NT_OFFSET = 256

Expand All @@ -105,25 +107,16 @@ def MakeGrammarNames(ysh_grammar):

names = {}

#from _devbuild.gen.id_kind_asdl import _Id_str
# This is a dictionary

# _Id_str()

for id_name, k in lexer_def.ID_SPEC.id_str2int.items():
# Hm some are out of range
#assert k < 256, (k, id_name)

# HACK: Cut it off at 256 now! Expr/Arith/Op doesn't go higher than
# that. TODO: Change NT_OFFSET? That might affect C code though.
# Best to keep everything fed to pgen under 256. This only affects
# pretty printing.
if k < 256:
# TODO: Some tokens have values greater than NT_OFFSET
if k < NT_OFFSET:
names[k] = id_name

for k, v in ysh_grammar.number2symbol.items():
# eval_input == 256. Remove?
assert k >= 256, (k, v)
assert k >= NT_OFFSET, (k, v)
names[k] = v

return names
Expand Down Expand Up @@ -971,8 +964,7 @@ def _Argument(self, p_node, after_semi, arglist):
if n == 1:
child = p_node.GetChild(0)
if after_semi:
p_die('Positional args must come before the semi-colon',
child.tok)
p_die(POS_ARG_MISPLACED, child.tok)
arg = self.Expr(child)
pos_args.append(arg)
return
Expand All @@ -993,8 +985,7 @@ def _Argument(self, p_node, after_semi, arglist):
if p_node.GetChild(1).typ == grammar_nt.comp_for:
child = p_node.GetChild(0)
if after_semi:
p_die('Positional args must come before the semi-colon',
child.tok)
p_die(POS_ARG_MISPLACED, child.tok)

elt = self.Expr(child)
comp = self._CompFor(p_node.GetChild(1))
Expand Down
8 changes: 4 additions & 4 deletions ysh/grammar_gen.py
Expand Up @@ -16,7 +16,7 @@
from frontend import lexer
from frontend import lexer_def
from frontend import reader
from pgen2 import parse, pgen
from pgen2 import parse, pgen, token


class OilTokenDef(object):
Expand All @@ -30,7 +30,7 @@ def GetTerminalNum(self, label):
"""e.g. translate Expr_Name in the grammar to 178."""
id_ = getattr(Id, label)
#log('Id %s = %d', id_, id_)
assert id_ < 256, id_
assert id_ < token.NT_OFFSET, id_
return id_

def GetKeywordNum(self, s):
Expand All @@ -42,7 +42,7 @@ def GetKeywordNum(self, s):
id_ = self.keyword_ops.get(s)
if id_ is None:
return None
assert id_ < 256, id_
assert id_ < token.NT_OFFSET, id_
return id_

def GetOpNum(self, op_str):
Expand All @@ -55,7 +55,7 @@ def GetOpNum(self, op_str):
"""
# Fail if not there
id_ = self.ops.get(op_str) or self.more_ops[op_str]
assert id_ < 256, id_
assert id_ < token.NT_OFFSET, id_
return id_


Expand Down

0 comments on commit 18484a2

Please sign in to comment.