diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d101f5c37e60b1..26274e1c8a2207 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,6 +18,10 @@ repos: name: Run Ruff (lint) on Argument Clinic args: [--exit-non-zero-on-fix, --config=Tools/clinic/.ruff.toml] files: ^Tools/clinic/|Lib/test/test_clinic.py + - id: ruff + name: Run Ruff (lint) on Tools/peg_generator/ + args: [--exit-non-zero-on-fix, --config=Tools/peg_generator/.ruff.toml] + files: ^Tools/peg_generator/ - id: ruff-format name: Run Ruff (format) on Doc/ args: [--check] diff --git a/Tools/peg_generator/.ruff.toml b/Tools/peg_generator/.ruff.toml new file mode 100644 index 00000000000000..d778cf0df9b49f --- /dev/null +++ b/Tools/peg_generator/.ruff.toml @@ -0,0 +1,27 @@ +extend = "../../.ruff.toml" # Inherit the project-wide settings + +extend-exclude = [ + # Generated files: + "Tools/peg_generator/pegen/grammar_parser.py", +] + +[lint] +select = [ + "F", # pyflakes + "I", # isort + "UP", # pyupgrade + "RUF100", # Ban unused `# noqa` comments + "PGH004", # Ban blanket `# noqa` comments (only ignore specific error codes) +] +ignore = [ + # Use PEP-604 unions rather than tuples for isinstance() checks. + # Makes code slower and more verbose. https://github.com/astral-sh/ruff/issues/7871. + "UP038", +] +unfixable = [ + # The autofixes sometimes do the wrong things for these; + # it's better to have to manually look at the code and see how it needs fixing + "F841", # Detects unused variables + "F601", # Detects dictionaries that have duplicate keys + "F602", # Also detects dictionaries that have duplicate keys +] diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py index 0b0b4b291c2b0e..a6bc627d47cbbb 100755 --- a/Tools/peg_generator/pegen/__main__.py +++ b/Tools/peg_generator/pegen/__main__.py @@ -10,7 +10,6 @@ import time import token import traceback -from typing import Tuple from pegen.grammar import Grammar from pegen.parser import Parser @@ -21,7 +20,7 @@ def generate_c_code( args: argparse.Namespace, -) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: +) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]: from pegen.build import build_c_parser_and_generator verbose = args.verbose @@ -50,7 +49,7 @@ def generate_c_code( def generate_python_code( args: argparse.Namespace, -) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: +) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]: from pegen.build import build_python_parser_and_generator verbose = args.verbose @@ -188,7 +187,7 @@ def main() -> None: if __name__ == "__main__": - if sys.version_info < (3, 8): + if sys.version_info < (3, 8): # noqa: UP036 print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr) sys.exit(1) main() diff --git a/Tools/peg_generator/pegen/ast_dump.py b/Tools/peg_generator/pegen/ast_dump.py index 07f8799c114f5d..60dc1b04672e2f 100644 --- a/Tools/peg_generator/pegen/ast_dump.py +++ b/Tools/peg_generator/pegen/ast_dump.py @@ -6,7 +6,7 @@ TODO: Remove the above-described hack. """ -from typing import Any, Optional, Tuple +from typing import Any def ast_dump( @@ -14,9 +14,9 @@ def ast_dump( annotate_fields: bool = True, include_attributes: bool = False, *, - indent: Optional[str] = None, + indent: str | None = None, ) -> str: - def _format(node: Any, level: int = 0) -> Tuple[str, bool]: + def _format(node: Any, level: int = 0) -> tuple[str, bool]: if indent is not None: level += 1 prefix = "\n" + indent * level @@ -41,7 +41,7 @@ def _format(node: Any, level: int = 0) -> Tuple[str, bool]: value, simple = _format(value, level) allsimple = allsimple and simple if keywords: - args.append("%s=%s" % (name, value)) + args.append(f"{name}={value}") else: args.append(value) if include_attributes and node._attributes: @@ -54,16 +54,16 @@ def _format(node: Any, level: int = 0) -> Tuple[str, bool]: continue value, simple = _format(value, level) allsimple = allsimple and simple - args.append("%s=%s" % (name, value)) + args.append(f"{name}={value}") if allsimple and len(args) <= 3: - return "%s(%s)" % (node.__class__.__name__, ", ".join(args)), not args - return "%s(%s%s)" % (node.__class__.__name__, prefix, sep.join(args)), False + return "{}({})".format(node.__class__.__name__, ", ".join(args)), not args + return f"{node.__class__.__name__}({prefix}{sep.join(args)})", False elif isinstance(node, list): if not node: return "[]", True - return "[%s%s]" % (prefix, sep.join(_format(x, level)[0] for x in node)), False + return f"[{prefix}{sep.join(_format(x, level)[0] for x in node)}]", False return repr(node), True if all(cls.__name__ != "AST" for cls in node.__class__.__mro__): - raise TypeError("expected AST, got %r" % node.__class__.__name__) + raise TypeError(f"expected AST, got {node.__class__.__name__!r}") return _format(node)[0] diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py index be289c352de585..44a58581686a4f 100644 --- a/Tools/peg_generator/pegen/build.py +++ b/Tools/peg_generator/pegen/build.py @@ -6,7 +6,7 @@ import sysconfig import tempfile import tokenize -from typing import IO, Any, Dict, List, Optional, Set, Tuple +from typing import IO, Any from pegen.c_generator import CParserGenerator from pegen.grammar import Grammar @@ -18,11 +18,11 @@ MOD_DIR = pathlib.Path(__file__).resolve().parent -TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]] +TokenDefinitions = tuple[dict[int, str], dict[str, int], set[str]] Incomplete = Any # TODO: install `types-setuptools` and remove this alias -def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]: +def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> list[str]: flags = sysconfig.get_config_var(compiler_flags) py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist) if flags is None or py_flags_nodist is None: @@ -71,11 +71,11 @@ def fixup_build_ext(cmd: Incomplete) -> None: def compile_c_extension( generated_source_path: str, - build_dir: Optional[str] = None, + build_dir: str | None = None, verbose: bool = False, keep_asserts: bool = True, disable_optimization: bool = False, - library_dir: Optional[str] = None, + library_dir: str | None = None, ) -> pathlib.Path: """Compile the generated source for a parser generator into an extension module. @@ -93,11 +93,10 @@ def compile_c_extension( """ import setuptools.command.build_ext import setuptools.logging - - from setuptools import Extension, Distribution - from setuptools.modified import newer_group + from setuptools import Distribution, Extension from setuptools._distutils.ccompiler import new_compiler from setuptools._distutils.sysconfig import customize_compiler + from setuptools.modified import newer_group if verbose: setuptools.logging.set_threshold(logging.DEBUG) @@ -241,7 +240,7 @@ def compile_c_extension( def build_parser( grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False -) -> Tuple[Grammar, Parser, Tokenizer]: +) -> tuple[Grammar, Parser, Tokenizer]: with open(grammar_file) as file: tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer) parser = GrammarParser(tokenizer, verbose=verbose_parser) @@ -292,7 +291,7 @@ def build_c_generator( keep_asserts_in_extension: bool = True, skip_actions: bool = False, ) -> ParserGenerator: - with open(tokens_file, "r") as tok_file: + with open(tokens_file) as tok_file: all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) with open(output_file, "w") as file: gen: ParserGenerator = CParserGenerator( @@ -333,7 +332,7 @@ def build_c_parser_and_generator( verbose_c_extension: bool = False, keep_asserts_in_extension: bool = True, skip_actions: bool = False, -) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: +) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]: """Generate rules, C parser, tokenizer, parser generator for a given grammar Args: @@ -373,7 +372,7 @@ def build_python_parser_and_generator( verbose_tokenizer: bool = False, verbose_parser: bool = False, skip_actions: bool = False, -) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: +) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]: """Generate rules, python parser, tokenizer, parser generator for a given grammar Args: diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index 04f66eec1a0154..fa75174ea0d59d 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -1,9 +1,10 @@ import ast import os.path import re +from collections.abc import Callable from dataclasses import dataclass, field from enum import Enum -from typing import IO, Any, Callable, Dict, List, Optional, Set, Text, Tuple +from typing import IO, Any from pegen import grammar from pegen.grammar import ( @@ -86,13 +87,13 @@ class NodeTypes(Enum): @dataclass class FunctionCall: function: str - arguments: List[Any] = field(default_factory=list) - assigned_variable: Optional[str] = None - assigned_variable_type: Optional[str] = None - return_type: Optional[str] = None - nodetype: Optional[NodeTypes] = None + arguments: list[Any] = field(default_factory=list) + assigned_variable: str | None = None + assigned_variable_type: str | None = None + return_type: str | None = None + nodetype: NodeTypes | None = None force_true: bool = False - comment: Optional[str] = None + comment: str | None = None def __str__(self) -> str: parts = [] @@ -124,14 +125,14 @@ class CCallMakerVisitor(GrammarVisitor): def __init__( self, parser_generator: ParserGenerator, - exact_tokens: Dict[str, int], - non_exact_tokens: Set[str], + exact_tokens: dict[str, int], + non_exact_tokens: set[str], ): self.gen = parser_generator self.exact_tokens = exact_tokens self.non_exact_tokens = non_exact_tokens - self.cache: Dict[str, str] = {} - self.cleanup_statements: List[str] = [] + self.cache: dict[str, str] = {} + self.cleanup_statements: list[str] = [] def keyword_helper(self, keyword: str) -> FunctionCall: return FunctionCall( @@ -167,7 +168,7 @@ def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall: ) return FunctionCall( assigned_variable=f"{name.lower()}_var", - function=f"_PyPegen_expect_token", + function="_PyPegen_expect_token", arguments=["p", name], nodetype=NodeTypes.GENERIC_TOKEN, return_type="Token *", @@ -199,7 +200,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall: type = self.exact_tokens[val] return FunctionCall( assigned_variable="_literal", - function=f"_PyPegen_expect_token", + function="_PyPegen_expect_token", arguments=["p", type], nodetype=NodeTypes.GENERIC_TOKEN, return_type="Token *", @@ -271,7 +272,7 @@ def visit_Forced(self, node: Forced) -> FunctionCall: type = self.exact_tokens[val] return FunctionCall( assigned_variable="_literal", - function=f"_PyPegen_expect_forced_token", + function="_PyPegen_expect_forced_token", arguments=["p", type, f'"{val}"'], nodetype=NodeTypes.GENERIC_TOKEN, return_type="Token *", @@ -283,7 +284,7 @@ def visit_Forced(self, node: Forced) -> FunctionCall: call.comment = None return FunctionCall( assigned_variable="_literal", - function=f"_PyPegen_expect_forced_result", + function="_PyPegen_expect_forced_result", arguments=["p", str(call), f'"{node.node.rhs!s}"'], return_type="void *", comment=f"forced_token=({node.node.rhs!s})", @@ -306,7 +307,7 @@ def _generate_artificial_rule_call( node: Any, prefix: str, rule_generation_func: Callable[[], str], - return_type: Optional[str] = None, + return_type: str | None = None, ) -> FunctionCall: node_str = f"{node}" key = f"{prefix}_{node_str}" @@ -377,10 +378,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): def __init__( self, grammar: grammar.Grammar, - tokens: Dict[int, str], - exact_tokens: Dict[str, int], - non_exact_tokens: Set[str], - file: Optional[IO[Text]], + tokens: dict[int, str], + exact_tokens: dict[str, int], + non_exact_tokens: set[str], + file: IO[str] | None, debug: bool = False, skip_actions: bool = False, ): @@ -391,7 +392,7 @@ def __init__( self._varname_counter = 0 self.debug = debug self.skip_actions = skip_actions - self.cleanup_statements: List[str] = [] + self.cleanup_statements: list[str] = [] def add_level(self) -> None: self.print("if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) {") @@ -427,12 +428,12 @@ def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None: self.print(f"if ({error_var}) {{") with self.indent(): self.print(f"goto {goto_target};") - self.print(f"}}") + self.print("}") def out_of_memory_return( self, expr: str, - cleanup_code: Optional[str] = None, + cleanup_code: str | None = None, ) -> None: self.print(f"if ({expr}) {{") with self.indent(): @@ -441,14 +442,14 @@ def out_of_memory_return( self.print("p->error_indicator = 1;") self.print("PyErr_NoMemory();") self.add_return("NULL") - self.print(f"}}") + self.print("}") def out_of_memory_goto(self, expr: str, goto_target: str) -> None: self.print(f"if ({expr}) {{") with self.indent(): self.print("PyErr_NoMemory();") self.print(f"goto {goto_target};") - self.print(f"}}") + self.print("}") def generate(self, filename: str) -> None: self.collect_rules() @@ -491,8 +492,8 @@ def generate(self, filename: str) -> None: if trailer: self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename)) - def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: - groups: Dict[int, List[Tuple[str, int]]] = {} + def _group_keywords_by_length(self) -> dict[int, list[tuple[str, int]]]: + groups: dict[int, list[tuple[str, int]]] = {} for keyword_str, keyword_type in self.keywords.items(): length = len(keyword_str) if length in groups: @@ -584,10 +585,10 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: self.print("if (_raw == NULL || p->mark <= _resmark)") with self.indent(): self.print("break;") - self.print(f"_resmark = p->mark;") + self.print("_resmark = p->mark;") self.print("_res = _raw;") self.print("}") - self.print(f"p->mark = _resmark;") + self.print("p->mark = _resmark;") self.add_return("_res") self.print("}") self.print(f"static {result_type}") @@ -643,7 +644,7 @@ def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: if memoize: self.print("int _start_mark = p->mark;") self.print("void **_children = PyMem_Malloc(sizeof(void *));") - self.out_of_memory_return(f"!_children") + self.out_of_memory_return("!_children") self.print("Py_ssize_t _children_capacity = 1;") self.print("Py_ssize_t _n = 0;") if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): @@ -661,7 +662,7 @@ def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: self.add_return("NULL") self.print("}") self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);") - self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);") + self.out_of_memory_return("!_seq", cleanup_code="PyMem_Free(_children);") self.print("for (Py_ssize_t i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);") self.print("PyMem_Free(_children);") if memoize and node.name: @@ -715,7 +716,7 @@ def visit_NamedItem(self, node: NamedItem) -> None: self.print(call) def visit_Rhs( - self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str] + self, node: Rhs, is_loop: bool, is_gather: bool, rulename: str | None ) -> None: if is_loop: assert len(node.alts) == 1 @@ -734,7 +735,7 @@ def join_conditions(self, keyword: str, node: Any) -> None: self.visit(item) self.print(")") - def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None: + def emit_action(self, node: Alt, cleanup_code: str | None = None) -> None: self.print(f"_res = {node.action};") self.print("if (_res == NULL && PyErr_Occurred()) {") @@ -776,7 +777,7 @@ def emit_default_action(self, is_gather: bool, node: Alt) -> None: def emit_dummy_action(self) -> None: self.print("_res = _PyPegen_dummy_name(p);") - def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: + def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: str | None) -> None: self.join_conditions(keyword="if", node=node) self.print("{") # We have parsed successfully all the conditions for the option. @@ -796,10 +797,10 @@ def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) self.emit_default_action(is_gather, node) # As the current option has parsed correctly, do not continue with the rest. - self.print(f"goto done;") + self.print("goto done;") self.print("}") - def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: + def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: str | None) -> None: # Condition of the main body of the alternative self.join_conditions(keyword="while", node=node) self.print("{") @@ -823,7 +824,7 @@ def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) - self.print( "void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));" ) - self.out_of_memory_return(f"!_new_children", cleanup_code="PyMem_Free(_children);") + self.out_of_memory_return("!_new_children", cleanup_code="PyMem_Free(_children);") self.print("_children = _new_children;") self.print("}") self.print("_children[_n++] = _res;") @@ -831,7 +832,7 @@ def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) - self.print("}") def visit_Alt( - self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str] + self, node: Alt, is_loop: bool, is_gather: bool, rulename: str | None ) -> None: if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"): self.print(f"if (p->call_invalid_rules) {{ // {node}") @@ -875,7 +876,7 @@ def visit_Alt( self.print("}") self.print("}") - def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]: + def collect_vars(self, node: Alt) -> dict[str | None, str | None]: types = {} with self.local_variable_context(): for item in node.items: @@ -883,7 +884,7 @@ def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]: types[name] = type return types - def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]: + def add_var(self, node: NamedItem) -> tuple[str | None, str | None]: call = self.callmakervisitor.generate_call(node.item) name = node.name if node.name else call.assigned_variable if name is not None: diff --git a/Tools/peg_generator/pegen/first_sets.py b/Tools/peg_generator/pegen/first_sets.py index 6d794ffa4bfa8b..f6b83c0ca31e3f 100755 --- a/Tools/peg_generator/pegen/first_sets.py +++ b/Tools/peg_generator/pegen/first_sets.py @@ -3,7 +3,6 @@ import argparse import pprint import sys -from typing import Dict, Set from pegen.build import build_parser from pegen.grammar import ( @@ -33,20 +32,20 @@ class FirstSetCalculator(GrammarVisitor): - def __init__(self, rules: Dict[str, Rule]) -> None: + def __init__(self, rules: dict[str, Rule]) -> None: self.rules = rules self.nullables = compute_nullables(rules) - self.first_sets: Dict[str, Set[str]] = dict() - self.in_process: Set[str] = set() + self.first_sets: dict[str, set[str]] = dict() + self.in_process: set[str] = set() - def calculate(self) -> Dict[str, Set[str]]: + def calculate(self) -> dict[str, set[str]]: for name, rule in self.rules.items(): self.visit(rule) return self.first_sets - def visit_Alt(self, item: Alt) -> Set[str]: - result: Set[str] = set() - to_remove: Set[str] = set() + def visit_Alt(self, item: Alt) -> set[str]: + result: set[str] = set() + to_remove: set[str] = set() for other in item.items: new_terminals = self.visit(other) if isinstance(other.item, NegativeLookahead): @@ -71,34 +70,34 @@ def visit_Alt(self, item: Alt) -> Set[str]: return result - def visit_Cut(self, item: Cut) -> Set[str]: + def visit_Cut(self, item: Cut) -> set[str]: return set() - def visit_Group(self, item: Group) -> Set[str]: + def visit_Group(self, item: Group) -> set[str]: return self.visit(item.rhs) - def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]: + def visit_PositiveLookahead(self, item: Lookahead) -> set[str]: return self.visit(item.node) - def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]: + def visit_NegativeLookahead(self, item: NegativeLookahead) -> set[str]: return self.visit(item.node) - def visit_NamedItem(self, item: NamedItem) -> Set[str]: + def visit_NamedItem(self, item: NamedItem) -> set[str]: return self.visit(item.item) - def visit_Opt(self, item: Opt) -> Set[str]: + def visit_Opt(self, item: Opt) -> set[str]: return self.visit(item.node) - def visit_Gather(self, item: Gather) -> Set[str]: + def visit_Gather(self, item: Gather) -> set[str]: return self.visit(item.node) - def visit_Repeat0(self, item: Repeat0) -> Set[str]: + def visit_Repeat0(self, item: Repeat0) -> set[str]: return self.visit(item.node) - def visit_Repeat1(self, item: Repeat1) -> Set[str]: + def visit_Repeat1(self, item: Repeat1) -> set[str]: return self.visit(item.node) - def visit_NameLeaf(self, item: NameLeaf) -> Set[str]: + def visit_NameLeaf(self, item: NameLeaf) -> set[str]: if item.value not in self.rules: return {item.value} @@ -110,16 +109,16 @@ def visit_NameLeaf(self, item: NameLeaf) -> Set[str]: return self.first_sets[item.value] - def visit_StringLeaf(self, item: StringLeaf) -> Set[str]: + def visit_StringLeaf(self, item: StringLeaf) -> set[str]: return {item.value} - def visit_Rhs(self, item: Rhs) -> Set[str]: - result: Set[str] = set() + def visit_Rhs(self, item: Rhs) -> set[str]: + result: set[str] = set() for alt in item.alts: result |= self.visit(alt) return result - def visit_Rule(self, item: Rule) -> Set[str]: + def visit_Rule(self, item: Rule) -> set[str]: if item.name in self.in_process: return set() elif item.name not in self.first_sets: @@ -138,7 +137,7 @@ def main() -> None: try: grammar, parser, tokenizer = build_parser(args.grammar_file) except Exception as err: - print("ERROR: Failed to parse grammar file", file=sys.stderr) + print("ERROR: Failed to parse grammar file", err, file=sys.stderr) sys.exit(1) firs_sets = FirstSetCalculator(grammar.rules).calculate() diff --git a/Tools/peg_generator/pegen/grammar.py b/Tools/peg_generator/pegen/grammar.py index 1ee9d100b61f49..cca8584a632071 100644 --- a/Tools/peg_generator/pegen/grammar.py +++ b/Tools/peg_generator/pegen/grammar.py @@ -1,15 +1,7 @@ from __future__ import annotations -from typing import ( - AbstractSet, - Any, - Iterable, - Iterator, - List, - Optional, - Tuple, - Union, -) +from collections.abc import Iterable, Iterator, Set +from typing import Any class GrammarError(Exception): @@ -34,7 +26,7 @@ def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Any: class Grammar: - def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]): + def __init__(self, rules: Iterable[Rule], metas: Iterable[tuple[str, str | None]]): # Check if there are repeated rules in "rules" all_rules = {} for rule in rules: @@ -66,7 +58,7 @@ def __iter__(self) -> Iterator[Rule]: class Rule: - def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None): + def __init__(self, name: str, type: str | None, rhs: Rhs, memo: object | None = None): self.name = name self.type = type self.rhs = rhs @@ -141,9 +133,9 @@ def __repr__(self) -> str: class Rhs: - def __init__(self, alts: List[Alt]): + def __init__(self, alts: list[Alt]): self.alts = alts - self.memo: Optional[Tuple[Optional[str], str]] = None + self.memo: tuple[str | None, str] | None = None def __str__(self) -> str: return " | ".join(str(alt) for alt in self.alts) @@ -151,7 +143,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"Rhs({self.alts!r})" - def __iter__(self) -> Iterator[List[Alt]]: + def __iter__(self) -> Iterator[list[Alt]]: yield self.alts @property @@ -165,7 +157,7 @@ def can_be_inlined(self) -> bool: class Alt: - def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None): + def __init__(self, items: list[NamedItem], *, icut: int = -1, action: str | None = None): self.items = items self.icut = icut self.action = action @@ -185,12 +177,12 @@ def __repr__(self) -> str: args.append(f"action={self.action!r}") return f"Alt({', '.join(args)})" - def __iter__(self) -> Iterator[List[NamedItem]]: + def __iter__(self) -> Iterator[list[NamedItem]]: yield self.items class NamedItem: - def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None): + def __init__(self, name: str | None, item: Item, type: str | None = None): self.name = name self.item = item self.type = type @@ -271,7 +263,7 @@ class Repeat: def __init__(self, node: Plain): self.node = node - self.memo: Optional[Tuple[Optional[str], str]] = None + self.memo: tuple[str | None, str] | None = None def __iter__(self) -> Iterator[Plain]: yield self.node @@ -334,12 +326,12 @@ def __init__(self) -> None: pass def __repr__(self) -> str: - return f"Cut()" + return "Cut()" def __str__(self) -> str: - return f"~" + return "~" - def __iter__(self) -> Iterator[Tuple[str, str]]: + def __iter__(self) -> Iterator[tuple[str, str]]: yield from () def __eq__(self, other: object) -> bool: @@ -347,15 +339,15 @@ def __eq__(self, other: object) -> bool: return NotImplemented return True - def initial_names(self) -> AbstractSet[str]: + def initial_names(self) -> Set[str]: return set() -Plain = Union[Leaf, Group] -Item = Union[Plain, Opt, Repeat, Forced, Lookahead, Rhs, Cut] -RuleName = Tuple[str, Optional[str]] -MetaTuple = Tuple[str, Optional[str]] -MetaList = List[MetaTuple] -RuleList = List[Rule] -NamedItemList = List[NamedItem] -LookaheadOrCut = Union[Lookahead, Cut] +Plain = Leaf | Group +Item = Plain | Opt | Repeat | Forced | Lookahead | Rhs | Cut +RuleName = tuple[str, str | None] +MetaTuple = tuple[str, str | None] +MetaList = list[MetaTuple] +RuleList = list[Rule] +NamedItemList = list[NamedItem] +LookaheadOrCut = Lookahead | Cut diff --git a/Tools/peg_generator/pegen/grammar_visualizer.py b/Tools/peg_generator/pegen/grammar_visualizer.py index 11f784f45b66b8..eadb6987389b88 100644 --- a/Tools/peg_generator/pegen/grammar_visualizer.py +++ b/Tools/peg_generator/pegen/grammar_visualizer.py @@ -1,6 +1,7 @@ import argparse import sys -from typing import Any, Callable, Iterator +from collections.abc import Callable, Iterator +from typing import Any from pegen.build import build_parser from pegen.grammar import Grammar, Rule @@ -52,7 +53,7 @@ def main() -> None: try: grammar, parser, tokenizer = build_parser(args.filename) except Exception as err: - print("ERROR: Failed to parse grammar file", file=sys.stderr) + print("ERROR: Failed to parse grammar file", err, file=sys.stderr) sys.exit(1) visitor = ASTGrammarPrinter() diff --git a/Tools/peg_generator/pegen/parser.py b/Tools/peg_generator/pegen/parser.py index a987d30a9d6438..806003c8350c03 100644 --- a/Tools/peg_generator/pegen/parser.py +++ b/Tools/peg_generator/pegen/parser.py @@ -5,7 +5,8 @@ import tokenize import traceback from abc import abstractmethod -from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast +from collections.abc import Callable +from typing import Any, ClassVar, TypeVar, cast from pegen.tokenizer import Mark, Tokenizer, exact_token_types @@ -74,12 +75,12 @@ def memoize_wrapper(self: "Parser", *args: object) -> Any: def memoize_left_rec( - method: Callable[["Parser"], Optional[T]] -) -> Callable[["Parser"], Optional[T]]: + method: Callable[["Parser"], T | None] +) -> Callable[["Parser"], T | None]: """Memoize a left-recursive symbol method.""" method_name = method.__name__ - def memoize_left_rec_wrapper(self: "Parser") -> Optional[T]: + def memoize_left_rec_wrapper(self: "Parser") -> T | None: mark = self._mark() key = mark, method_name, () # Fast path: cache hit, and not verbose. @@ -160,15 +161,15 @@ def memoize_left_rec_wrapper(self: "Parser") -> Optional[T]: class Parser: """Parsing base class.""" - KEYWORDS: ClassVar[Tuple[str, ...]] + KEYWORDS: ClassVar[tuple[str, ...]] - SOFT_KEYWORDS: ClassVar[Tuple[str, ...]] + SOFT_KEYWORDS: ClassVar[tuple[str, ...]] def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False): self._tokenizer = tokenizer self._verbose = verbose self._level = 0 - self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {} + self._cache: dict[tuple[Mark, str, tuple[Any, ...]], tuple[Any, Mark]] = {} # Integer tracking whether we are in a left recursive rule or not. Can be useful # for error reporting. self.in_recursive_rule = 0 @@ -185,28 +186,28 @@ def showpeek(self) -> str: return f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}" @memoize - def name(self) -> Optional[tokenize.TokenInfo]: + def name(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.NAME and tok.string not in self.KEYWORDS: return self._tokenizer.getnext() return None @memoize - def number(self) -> Optional[tokenize.TokenInfo]: + def number(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.NUMBER: return self._tokenizer.getnext() return None @memoize - def string(self) -> Optional[tokenize.TokenInfo]: + def string(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.STRING: return self._tokenizer.getnext() return None @memoize - def fstring_start(self) -> Optional[tokenize.TokenInfo]: + def fstring_start(self) -> tokenize.TokenInfo | None: FSTRING_START = getattr(token, "FSTRING_START", None) if not FSTRING_START: return None @@ -216,7 +217,7 @@ def fstring_start(self) -> Optional[tokenize.TokenInfo]: return None @memoize - def fstring_middle(self) -> Optional[tokenize.TokenInfo]: + def fstring_middle(self) -> tokenize.TokenInfo | None: FSTRING_MIDDLE = getattr(token, "FSTRING_MIDDLE", None) if not FSTRING_MIDDLE: return None @@ -226,7 +227,7 @@ def fstring_middle(self) -> Optional[tokenize.TokenInfo]: return None @memoize - def fstring_end(self) -> Optional[tokenize.TokenInfo]: + def fstring_end(self) -> tokenize.TokenInfo | None: FSTRING_END = getattr(token, "FSTRING_END", None) if not FSTRING_END: return None @@ -236,28 +237,28 @@ def fstring_end(self) -> Optional[tokenize.TokenInfo]: return None @memoize - def op(self) -> Optional[tokenize.TokenInfo]: + def op(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.OP: return self._tokenizer.getnext() return None @memoize - def type_comment(self) -> Optional[tokenize.TokenInfo]: + def type_comment(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.TYPE_COMMENT: return self._tokenizer.getnext() return None @memoize - def soft_keyword(self) -> Optional[tokenize.TokenInfo]: + def soft_keyword(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS: return self._tokenizer.getnext() return None @memoize - def expect(self, type: str) -> Optional[tokenize.TokenInfo]: + def expect(self, type: str) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.string == type: return self._tokenizer.getnext() @@ -271,7 +272,7 @@ def expect(self, type: str) -> Optional[tokenize.TokenInfo]: return self._tokenizer.getnext() return None - def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]: + def expect_forced(self, res: Any, expectation: str) -> tokenize.TokenInfo | None: if res is None: raise self.make_syntax_error(f"expected {expectation}") return res @@ -293,7 +294,7 @@ def make_syntax_error(self, message: str, filename: str = "") -> Syntax return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line)) -def simple_parser_main(parser_class: Type[Parser]) -> None: +def simple_parser_main(parser_class: type[Parser]) -> None: argparser = argparse.ArgumentParser() argparser.add_argument( "-v", @@ -330,7 +331,7 @@ def simple_parser_main(parser_class: Type[Parser]) -> None: endpos = 0 else: endpos = file.tell() - except IOError: + except OSError: endpos = 0 finally: if file is not sys.stdin: diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py index 976f5e6e57d7c6..43054c258d2a05 100644 --- a/Tools/peg_generator/pegen/parser_generator.py +++ b/Tools/peg_generator/pegen/parser_generator.py @@ -1,22 +1,10 @@ -import sys import ast import contextlib import re +import sys from abc import abstractmethod -from typing import ( - IO, - AbstractSet, - Any, - Dict, - Iterable, - Iterator, - List, - Optional, - Set, - Text, - Tuple, - Union, -) +from collections.abc import Iterable, Iterator, Set +from typing import IO, Any from pegen import sccutils from pegen.grammar import ( @@ -44,7 +32,7 @@ class RuleCollectorVisitor(GrammarVisitor): """Visitor that invokes a provided callmaker visitor with just the NamedItem nodes""" - def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None: + def __init__(self, rules: dict[str, Rule], callmakervisitor: GrammarVisitor) -> None: self.rules = rules self.callmaker = callmakervisitor @@ -58,7 +46,7 @@ def visit_NamedItem(self, item: NamedItem) -> None: class KeywordCollectorVisitor(GrammarVisitor): """Visitor that collects all the keywords and soft keywords in the Grammar""" - def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]): + def __init__(self, gen: "ParserGenerator", keywords: dict[str, int], soft_keywords: set[str]): self.generator = gen self.keywords = keywords self.soft_keywords = soft_keywords @@ -73,7 +61,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> None: class RuleCheckingVisitor(GrammarVisitor): - def __init__(self, rules: Dict[str, Rule], tokens: Set[str]): + def __init__(self, rules: dict[str, Rule], tokens: set[str]): self.rules = rules self.tokens = tokens # If python < 3.12 add the virtual fstring tokens @@ -100,11 +88,11 @@ def visit_NamedItem(self, node: NamedItem) -> None: class ParserGenerator: callmakervisitor: GrammarVisitor - def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]): + def __init__(self, grammar: Grammar, tokens: set[str], file: IO[str] | None): self.grammar = grammar self.tokens = tokens - self.keywords: Dict[str, int] = {} - self.soft_keywords: Set[str] = set() + self.keywords: dict[str, int] = {} + self.soft_keywords: set[str] = set() self.rules = grammar.rules self.validate_rule_names() if "trailer" not in grammar.metas and "start" not in self.rules: @@ -117,8 +105,8 @@ def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]) self.first_graph, self.first_sccs = compute_left_recursives(self.rules) self.counter = 0 # For name_rule()/name_loop() self.keyword_counter = 499 # For keyword_type() - self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules - self._local_variable_stack: List[List[str]] = [] + self.all_rules: dict[str, Rule] = self.rules.copy() # Rules + temporal rules + self._local_variable_stack: list[list[str]] = [] def validate_rule_names(self) -> None: for rule in self.rules: @@ -132,7 +120,7 @@ def local_variable_context(self) -> Iterator[None]: self._local_variable_stack.pop() @property - def local_variable_names(self) -> List[str]: + def local_variable_names(self) -> list[str]: return self._local_variable_stack[-1] @abstractmethod @@ -164,7 +152,7 @@ def collect_rules(self) -> None: keyword_collector.visit(rule) rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor) - done: Set[str] = set() + done: set[str] = set() while True: computed_rules = list(self.all_rules) todo = [i for i in computed_rules if i not in done] @@ -229,10 +217,10 @@ def dedupe(self, name: str) -> str: class NullableVisitor(GrammarVisitor): - def __init__(self, rules: Dict[str, Rule]) -> None: + def __init__(self, rules: dict[str, Rule]) -> None: self.rules = rules - self.visited: Set[Any] = set() - self.nullables: Set[Union[Rule, NamedItem]] = set() + self.visited: set[Any] = set() + self.nullables: set[Rule | NamedItem] = set() def visit_Rule(self, rule: Rule) -> bool: if rule in self.visited: @@ -294,7 +282,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> bool: return not node.value -def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]: +def compute_nullables(rules: dict[str, Rule]) -> set[Any]: """Compute which rules in a grammar are nullable. Thanks to TatSu (tatsu/leftrec.py) for inspiration. @@ -306,12 +294,12 @@ def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]: class InitialNamesVisitor(GrammarVisitor): - def __init__(self, rules: Dict[str, Rule]) -> None: + def __init__(self, rules: dict[str, Rule]) -> None: self.rules = rules self.nullables = compute_nullables(rules) - def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]: - names: Set[str] = set() + def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> set[Any]: + names: set[str] = set() for value in node: if isinstance(value, list): for item in value: @@ -320,33 +308,33 @@ def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[A names |= self.visit(value, *args, **kwargs) return names - def visit_Alt(self, alt: Alt) -> Set[Any]: - names: Set[str] = set() + def visit_Alt(self, alt: Alt) -> set[Any]: + names: set[str] = set() for item in alt.items: names |= self.visit(item) if item not in self.nullables: break return names - def visit_Forced(self, force: Forced) -> Set[Any]: + def visit_Forced(self, force: Forced) -> set[Any]: return set() - def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]: + def visit_LookAhead(self, lookahead: Lookahead) -> set[Any]: return set() - def visit_Cut(self, cut: Cut) -> Set[Any]: + def visit_Cut(self, cut: Cut) -> set[Any]: return set() - def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]: + def visit_NameLeaf(self, node: NameLeaf) -> set[Any]: return {node.value} - def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]: + def visit_StringLeaf(self, node: StringLeaf) -> set[Any]: return set() def compute_left_recursives( - rules: Dict[str, Rule] -) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]: + rules: dict[str, Rule] +) -> tuple[dict[str, Set[str]], list[Set[str]]]: graph = make_first_graph(rules) sccs = list(sccutils.strongly_connected_components(graph.keys(), graph)) for scc in sccs: @@ -374,7 +362,7 @@ def compute_left_recursives( return graph, sccs -def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: +def make_first_graph(rules: dict[str, Rule]) -> dict[str, Set[str]]: """Compute the graph of left-invocations. There's an edge from A to B if A may invoke B at its initial @@ -384,7 +372,7 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: """ initial_name_visitor = InitialNamesVisitor(rules) graph = {} - vertices: Set[str] = set() + vertices: set[str] = set() for rulename, rhs in rules.items(): graph[rulename] = names = initial_name_visitor.visit(rhs) vertices |= names diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py index 4bb26480ebc0af..e69ea4b5f4e14c 100644 --- a/Tools/peg_generator/pegen/python_generator.py +++ b/Tools/peg_generator/pegen/python_generator.py @@ -1,6 +1,7 @@ import os.path import token -from typing import IO, Any, Callable, Dict, Optional, Sequence, Set, Text, Tuple +from collections.abc import Callable, Sequence +from typing import IO, Any from pegen import grammar from pegen.grammar import ( @@ -74,10 +75,10 @@ def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool: def visit_Opt(self, node: Opt) -> bool: return self.visit(node.node) - def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]: + def visit_Repeat(self, node: Repeat0) -> tuple[str, str]: return self.visit(node.node) - def visit_Gather(self, node: Gather) -> Tuple[str, str]: + def visit_Gather(self, node: Gather) -> tuple[str, str]: return self.visit(node.node) def visit_Group(self, node: Group) -> bool: @@ -93,9 +94,9 @@ def visit_Forced(self, node: Forced) -> bool: class PythonCallMakerVisitor(GrammarVisitor): def __init__(self, parser_generator: ParserGenerator): self.gen = parser_generator - self.cache: Dict[str, Tuple[str, str]] = {} + self.cache: dict[str, tuple[str, str]] = {} - def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: + def visit_NameLeaf(self, node: NameLeaf) -> tuple[str | None, str]: name = node.value if name == "SOFT_KEYWORD": return "soft_keyword", "self.soft_keyword()" @@ -108,31 +109,31 @@ def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: return "_" + name.lower(), f"self.expect({name!r})" return name, f"self.{name}()" - def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: + def visit_StringLeaf(self, node: StringLeaf) -> tuple[str, str]: return "literal", f"self.expect({node.value})" - def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]: + def visit_NamedItem(self, node: NamedItem) -> tuple[str | None, str]: name, call = self.visit(node.item) if node.name: name = node.name return name, call - def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]: + def lookahead_call_helper(self, node: Lookahead) -> tuple[str, str]: name, call = self.visit(node.node) head, tail = call.split("(", 1) assert tail[-1] == ")" tail = tail[:-1] return head, tail - def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]: + def visit_PositiveLookahead(self, node: PositiveLookahead) -> tuple[None, str]: head, tail = self.lookahead_call_helper(node) return None, f"self.positive_lookahead({head}, {tail})" - def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]: + def visit_NegativeLookahead(self, node: NegativeLookahead) -> tuple[None, str]: head, tail = self.lookahead_call_helper(node) return None, f"self.negative_lookahead({head}, {tail})" - def visit_Opt(self, node: Opt) -> Tuple[str, str]: + def visit_Opt(self, node: Opt) -> tuple[str, str]: name, call = self.visit(node.node) # Note trailing comma (the call may already have one comma # at the end, for example when rules have both repeat0 and optional @@ -148,7 +149,7 @@ def _generate_artificial_rule_call( prefix: str, call_by_name_func: Callable[[str], str], rule_generation_func: Callable[[], str], - ) -> Tuple[str, str]: + ) -> tuple[str, str]: node_str = f"{node}" key = f"{prefix}_{node_str}" if key in self.cache: @@ -159,7 +160,7 @@ def _generate_artificial_rule_call( self.cache[key] = name, call return self.cache[key] - def visit_Rhs(self, node: Rhs) -> Tuple[str, str]: + def visit_Rhs(self, node: Rhs) -> tuple[str, str]: if len(node.alts) == 1 and len(node.alts[0].items) == 1: return self.visit(node.alts[0].items[0]) @@ -170,7 +171,7 @@ def visit_Rhs(self, node: Rhs) -> Tuple[str, str]: lambda: self.gen.artificial_rule_from_rhs(node), ) - def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: + def visit_Repeat0(self, node: Repeat0) -> tuple[str, str]: return self._generate_artificial_rule_call( node, "repeat0", @@ -178,7 +179,7 @@ def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False), ) - def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: + def visit_Repeat1(self, node: Repeat1) -> tuple[str, str]: return self._generate_artificial_rule_call( node, "repeat1", @@ -186,7 +187,7 @@ def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True), ) - def visit_Gather(self, node: Gather) -> Tuple[str, str]: + def visit_Gather(self, node: Gather) -> tuple[str, str]: return self._generate_artificial_rule_call( node, "gather", @@ -194,13 +195,13 @@ def visit_Gather(self, node: Gather) -> Tuple[str, str]: lambda: self.gen.artificial_rule_from_gather(node), ) - def visit_Group(self, node: Group) -> Tuple[Optional[str], str]: + def visit_Group(self, node: Group) -> tuple[str | None, str]: return self.visit(node.rhs) - def visit_Cut(self, node: Cut) -> Tuple[str, str]: + def visit_Cut(self, node: Cut) -> tuple[str, str]: return "cut", "True" - def visit_Forced(self, node: Forced) -> Tuple[str, str]: + def visit_Forced(self, node: Forced) -> tuple[str, str]: if isinstance(node.node, Group): _, val = self.visit(node.node.rhs) return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')" @@ -215,10 +216,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): def __init__( self, grammar: grammar.Grammar, - file: Optional[IO[Text]], - tokens: Set[str] = set(token.tok_name.values()), - location_formatting: Optional[str] = None, - unreachable_formatting: Optional[str] = None, + file: IO[str] | None, + tokens: set[str] = set(token.tok_name.values()), + location_formatting: str | None = None, + unreachable_formatting: str | None = None, ): tokens.add("SOFT_KEYWORD") super().__init__(grammar, tokens, file) @@ -355,7 +356,7 @@ def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None: if is_loop: self.print(f"children.append({action})") - self.print(f"mark = self._mark()") + self.print("mark = self._mark()") else: if "UNREACHABLE" in action: action = action.replace("UNREACHABLE", self.unreachable_formatting) diff --git a/Tools/peg_generator/pegen/sccutils.py b/Tools/peg_generator/pegen/sccutils.py index da4c9331625dd9..51f618a14d936b 100644 --- a/Tools/peg_generator/pegen/sccutils.py +++ b/Tools/peg_generator/pegen/sccutils.py @@ -1,11 +1,11 @@ # Adapted from mypy (mypy/build.py) under the MIT license. -from typing import * +from collections.abc import Iterable, Iterator, Set def strongly_connected_components( - vertices: AbstractSet[str], edges: Dict[str, AbstractSet[str]] -) -> Iterator[AbstractSet[str]]: + vertices: Set[str], edges: dict[str, Set[str]] +) -> Iterator[Set[str]]: """Compute Strongly Connected Components of a directed graph. Args: @@ -20,12 +20,12 @@ def strongly_connected_components( From https://code.activestate.com/recipes/578507-strongly-connected-components-of-a-directed-graph/. """ - identified: Set[str] = set() - stack: List[str] = [] - index: Dict[str, int] = {} - boundaries: List[int] = [] + identified: set[str] = set() + stack: list[str] = [] + index: dict[str, int] = {} + boundaries: list[int] = [] - def dfs(v: str) -> Iterator[Set[str]]: + def dfs(v: str) -> Iterator[set[str]]: index[v] = len(stack) stack.append(v) boundaries.append(index[v]) @@ -50,8 +50,8 @@ def dfs(v: str) -> Iterator[Set[str]]: def topsort( - data: Dict[AbstractSet[str], Set[AbstractSet[str]]] -) -> Iterable[AbstractSet[AbstractSet[str]]]: + data: dict[Set[str], set[Set[str]]] +) -> Iterable[Set[Set[str]]]: """Topological sort. Args: @@ -94,12 +94,12 @@ def topsort( break yield ready data = {item: (dep - ready) for item, dep in data.items() if item not in ready} - assert not data, "A cyclic dependency exists amongst %r" % data + assert not data, f"A cyclic dependency exists amongst {data}" def find_cycles_in_scc( - graph: Dict[str, AbstractSet[str]], scc: AbstractSet[str], start: str -) -> Iterable[List[str]]: + graph: dict[str, Set[str]], scc: Set[str], start: str +) -> Iterable[list[str]]: """Find cycles in SCC emanating from start. Yields lists of the form ['A', 'B', 'C', 'A'], which means there's @@ -117,7 +117,7 @@ def find_cycles_in_scc( assert start in graph # Recursive helper that yields cycles. - def dfs(node: str, path: List[str]) -> Iterator[List[str]]: + def dfs(node: str, path: list[str]) -> Iterator[list[str]]: if node in path: yield path + [node] return diff --git a/Tools/peg_generator/pegen/testutil.py b/Tools/peg_generator/pegen/testutil.py index 0e85b844ef152b..46f6c7fec1bda0 100644 --- a/Tools/peg_generator/pegen/testutil.py +++ b/Tools/peg_generator/pegen/testutil.py @@ -6,7 +6,7 @@ import textwrap import token import tokenize -from typing import IO, Any, Dict, Final, Optional, Type, cast +from typing import IO, Any, Final, cast from pegen.build import compile_c_extension from pegen.c_generator import CParserGenerator @@ -23,19 +23,19 @@ } -def generate_parser(grammar: Grammar) -> Type[Parser]: +def generate_parser(grammar: Grammar) -> type[Parser]: # Generate a parser. out = io.StringIO() genr = PythonParserGenerator(grammar, out) genr.generate("") # Load the generated parser class. - ns: Dict[str, Any] = {} + ns: dict[str, Any] = {} exec(out.getvalue(), ns) return ns["GeneratedParser"] -def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any: +def run_parser(file: IO[bytes], parser_class: type[Parser], *, verbose: bool = False) -> Any: # Run a parser on a file (stream). tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore[arg-type] # typeshed issue #3515 parser = parser_class(tokenizer, verbose=verbose) @@ -46,7 +46,7 @@ def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = F def parse_string( - source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False + source: str, parser_class: type[Parser], *, dedent: bool = True, verbose: bool = False ) -> Any: # Run the parser on a string. if dedent: @@ -55,7 +55,7 @@ def parse_string( return run_parser(file, parser_class, verbose=verbose) # type: ignore[arg-type] # typeshed issue #3515 -def make_parser(source: str) -> Type[Parser]: +def make_parser(source: str) -> type[Parser]: # Combine parse_string() and generate_parser(). grammar = parse_string(source, GrammarParser) return generate_parser(grammar) @@ -86,7 +86,7 @@ def generate_parser_c_extension( grammar: Grammar, path: pathlib.PurePath, debug: bool = False, - library_dir: Optional[str] = None, + library_dir: str | None = None, ) -> Any: """Generate a parser c extension for the given grammar in the given path diff --git a/Tools/peg_generator/pegen/tokenizer.py b/Tools/peg_generator/pegen/tokenizer.py index 7ee49e1432b77a..fba5bac9517895 100644 --- a/Tools/peg_generator/pegen/tokenizer.py +++ b/Tools/peg_generator/pegen/tokenizer.py @@ -1,6 +1,6 @@ import token import tokenize -from typing import Dict, Iterator, List +from collections.abc import Iterator Mark = int # NewType('Mark', int) @@ -8,7 +8,11 @@ def shorttok(tok: tokenize.TokenInfo) -> str: - return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}" + formatted = ( + f"{tok.start[0]}.{tok.start[1]}: " + f"{token.tok_name[tok.type]}:{tok.string!r}" + ) + return f"{formatted:<25.25}" class Tokenizer: @@ -17,7 +21,7 @@ class Tokenizer: This is pretty tied to Python's syntax. """ - _tokens: List[tokenize.TokenInfo] + _tokens: list[tokenize.TokenInfo] def __init__( self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False @@ -26,7 +30,7 @@ def __init__( self._tokens = [] self._index = 0 self._verbose = verbose - self._lines: Dict[int, str] = {} + self._lines: dict[int, str] = {} self._path = path if verbose: self.report(False, False) @@ -72,7 +76,7 @@ def get_last_non_whitespace_token(self) -> tokenize.TokenInfo: break return tok - def get_lines(self, line_numbers: List[int]) -> List[str]: + def get_lines(self, line_numbers: list[int]) -> list[str]: """Retrieve source lines corresponding to line numbers.""" if self._lines: lines = self._lines diff --git a/Tools/peg_generator/pegen/validator.py b/Tools/peg_generator/pegen/validator.py index 4699d5712d9522..635eb398b41808 100644 --- a/Tools/peg_generator/pegen/validator.py +++ b/Tools/peg_generator/pegen/validator.py @@ -1,5 +1,3 @@ -from typing import Optional - from pegen import grammar from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule @@ -11,7 +9,7 @@ class ValidationError(Exception): class GrammarValidator(GrammarVisitor): def __init__(self, grammar: grammar.Grammar) -> None: self.grammar = grammar - self.rulename: Optional[str] = None + self.rulename: str | None = None def validate_rule(self, rulename: str, node: Rule) -> None: self.rulename = rulename