Skip to content

Commit

Permalink
Fix parser bug where "type" was misinterpreted as a keyword inside a …
Browse files Browse the repository at this point in the history
…match (#3950)

Fixes #3790

Slightly hacky, but I think this is correct and it should also improve performance somewhat.
  • Loading branch information
JelleZijlstra committed Oct 17, 2023
1 parent 722735d commit bb58807
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGES.md
Expand Up @@ -37,6 +37,8 @@

<!-- Changes to the parser or to version autodetection -->

- Fix bug where attributes named `type` were not acccepted inside `match` statements
(#3950)
- Add support for PEP 695 type aliases containing lambdas and other unusual expressions
(#3949)

Expand Down
19 changes: 18 additions & 1 deletion src/blib2to3/pgen2/parse.py
Expand Up @@ -211,6 +211,7 @@ def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:
# See note in docstring above. TL;DR this is ignored.
self.convert = convert or lam_sub
self.is_backtracking = False
self.last_token: Optional[int] = None

def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:
"""Prepare for parsing.
Expand All @@ -236,6 +237,7 @@ def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:
self.rootnode: Optional[NL] = None
self.used_names: Set[str] = set()
self.proxy = proxy
self.last_token = None

def addtoken(self, type: int, value: str, context: Context) -> bool:
"""Add a token; return True iff this is the end of the program."""
Expand Down Expand Up @@ -317,6 +319,7 @@ def _addtoken(self, ilabel: int, type: int, value: str, context: Context) -> boo
dfa, state, node = self.stack[-1]
states, first = dfa
# Done with this token
self.last_token = type
return False

else:
Expand All @@ -343,9 +346,23 @@ def classify(self, type: int, value: str, context: Context) -> List[int]:
return [self.grammar.keywords[value]]
elif value in self.grammar.soft_keywords:
assert type in self.grammar.tokens
# Current soft keywords (match, case, type) can only appear at the
# beginning of a statement. So as a shortcut, don't try to treat them
# like keywords in any other context.
# ('_' is also a soft keyword in the real grammar, but for our grammar
# it's just an expression, so we don't need to treat it specially.)
if self.last_token not in (
None,
token.INDENT,
token.DEDENT,
token.NEWLINE,
token.SEMI,
token.COLON,
):
return [self.grammar.tokens[type]]
return [
self.grammar.soft_keywords[value],
self.grammar.tokens[type],
self.grammar.soft_keywords[value],
]

ilabel = self.grammar.tokens.get(type)
Expand Down
4 changes: 4 additions & 0 deletions tests/data/cases/pattern_matching_complex.py
Expand Up @@ -143,3 +143,7 @@
y = 1
case []:
y = 2
# issue 3790
match (X.type, Y):
case _:
pass
9 changes: 9 additions & 0 deletions tests/data/cases/type_aliases.py
Expand Up @@ -5,6 +5,8 @@
type Alias[T]=lambda: T
type And[T]=T and T
type IfElse[T]=T if T else T
type One = int; type Another = str
class X: type InClass = int

type = aliased
print(type(42))
Expand All @@ -16,6 +18,13 @@
type Alias[T] = lambda: T
type And[T] = T and T
type IfElse[T] = T if T else T
type One = int
type Another = str


class X:
type InClass = int


type = aliased
print(type(42))

0 comments on commit bb58807

Please sign in to comment.