diff --git a/mypyc/annotate.py b/mypyc/annotate.py index 3368a68832bd..6736ca63c9e8 100644 --- a/mypyc/annotate.py +++ b/mypyc/annotate.py @@ -13,19 +13,31 @@ from mypy.build import BuildResult from mypy.nodes import ( + AssignmentStmt, CallExpr, + ClassDef, + Decorator, + DictionaryComprehension, Expression, ForStmt, FuncDef, + GeneratorExpr, + IndexExpr, LambdaExpr, MemberExpr, MypyFile, + NamedTupleExpr, NameExpr, + NewTypeExpr, Node, + OpExpr, RefExpr, TupleExpr, + TypedDictExpr, TypeInfo, + TypeVarExpr, Var, + WithStmt, ) from mypy.traverser import TraverserVisitor from mypy.types import AnyType, Instance, ProperType, Type, TypeOfAny, get_proper_type @@ -33,6 +45,7 @@ from mypyc.ir.func_ir import FuncIR from mypyc.ir.module_ir import ModuleIR from mypyc.ir.ops import CallC, LoadLiteral, LoadStatic, Value +from mypyc.irbuild.mapper import Mapper class Annotation: @@ -71,18 +84,21 @@ def __init__(self, message: str, priority: int = 1) -> None: stdlib_hints: Final = { "functools.partial": Annotation( - '"functools.partial" is inefficient in compiled code.', priority=2 + '"functools.partial" is inefficient in compiled code.', priority=3 ), "itertools.chain": Annotation( '"itertools.chain" is inefficient in compiled code (hint: replace with for loops).', - priority=2, + priority=3, ), "itertools.groupby": Annotation( - '"itertools.groupby" is inefficient in compiled code.', priority=2 + '"itertools.groupby" is inefficient in compiled code.', priority=3 ), "itertools.islice": Annotation( '"itertools.islice" is inefficient in compiled code (hint: replace with for loop over index range).', - priority=2, + priority=3, + ), + "copy.deepcopy": Annotation( + '"copy.deepcopy" tends to be slow. Make a shallow copy if possible.', priority=2 ), } @@ -127,14 +143,16 @@ def __init__(self, path: str, annotations: dict[int, list[Annotation]]) -> None: def generate_annotated_html( - html_fnam: str, result: BuildResult, modules: dict[str, ModuleIR] + html_fnam: str, result: BuildResult, modules: dict[str, ModuleIR], mapper: Mapper ) -> None: annotations = [] for mod, mod_ir in modules.items(): path = result.graph[mod].path tree = result.graph[mod].tree assert tree is not None - annotations.append(generate_annotations(path or "", tree, mod_ir, result.types)) + annotations.append( + generate_annotations(path or "", tree, mod_ir, result.types, mapper) + ) html = generate_html_report(annotations) with open(html_fnam, "w") as f: f.write(html) @@ -145,15 +163,18 @@ def generate_annotated_html( def generate_annotations( - path: str, tree: MypyFile, ir: ModuleIR, type_map: dict[Expression, Type] + path: str, tree: MypyFile, ir: ModuleIR, type_map: dict[Expression, Type], mapper: Mapper ) -> AnnotatedSource: anns = {} for func_ir in ir.functions: anns.update(function_annotations(func_ir, tree)) - visitor = ASTAnnotateVisitor(type_map) + visitor = ASTAnnotateVisitor(type_map, mapper) for defn in tree.defs: defn.accept(visitor) anns.update(visitor.anns) + for line in visitor.ignored_lines: + if line in anns: + del anns[line] return AnnotatedSource(path, anns) @@ -168,18 +189,28 @@ def function_annotations(func_ir: FuncIR, tree: MypyFile) -> dict[int, list[Anno ann: str | Annotation | None = None if name == "CPyObject_GetAttr": attr_name = get_str_literal(op.args[1]) - if attr_name == "__prepare__": - # These attributes are internal to mypyc/CPython, and the user has - # little control over them. + if attr_name in ("__prepare__", "GeneratorExit", "StopIteration"): + # These attributes are internal to mypyc/CPython, and/or accessed + # implicitly in generated code. The user has little control over + # them. ann = None elif attr_name: ann = f'Get non-native attribute "{attr_name}".' else: ann = "Dynamic attribute lookup." + elif name == "PyObject_SetAttr": + attr_name = get_str_literal(op.args[1]) + if attr_name == "__mypyc_attrs__": + # This is set implicitly and can't be avoided. + ann = None + elif attr_name: + ann = f'Set non-native attribute "{attr_name}".' + else: + ann = "Dynamic attribute set." elif name == "PyObject_VectorcallMethod": method_name = get_str_literal(op.args[0]) if method_name: - ann = f'Call non-native method "{method_name}".' + ann = f'Call non-native method "{method_name}" (it may be defined in a non-native class, or decorated).' else: ann = "Dynamic method call." elif name in op_hints: @@ -218,10 +249,12 @@ def function_annotations(func_ir: FuncIR, tree: MypyFile) -> dict[int, list[Anno class ASTAnnotateVisitor(TraverserVisitor): """Generate annotations from mypy AST and inferred types.""" - def __init__(self, type_map: dict[Expression, Type]) -> None: + def __init__(self, type_map: dict[Expression, Type], mapper: Mapper) -> None: self.anns: dict[int, list[Annotation]] = {} + self.ignored_lines: set[int] = set() self.func_depth = 0 self.type_map = type_map + self.mapper = mapper def visit_func_def(self, o: FuncDef, /) -> None: if self.func_depth > 0: @@ -235,21 +268,84 @@ def visit_func_def(self, o: FuncDef, /) -> None: self.func_depth -= 1 def visit_for_stmt(self, o: ForStmt, /) -> None: - typ = self.get_type(o.expr) - if isinstance(typ, AnyType): - self.annotate(o.expr, 'For loop uses generic operations (iterable has type "Any").') - elif isinstance(typ, Instance) and typ.type.fullname in ( - "typing.Iterable", - "typing.Iterator", - "typing.Sequence", - "typing.MutableSequence", - ): - self.annotate( - o.expr, - f'For loop uses generic operations (iterable has the abstract type "{typ.type.fullname}").', - ) + self.check_iteration([o.expr], "For loop") super().visit_for_stmt(o) + def visit_dictionary_comprehension(self, o: DictionaryComprehension, /) -> None: + self.check_iteration(o.sequences, "Comprehension") + super().visit_dictionary_comprehension(o) + + def visit_generator_expr(self, o: GeneratorExpr, /) -> None: + self.check_iteration(o.sequences, "Comprehension or generator") + super().visit_generator_expr(o) + + def check_iteration(self, expressions: list[Expression], kind: str) -> None: + for expr in expressions: + typ = self.get_type(expr) + if isinstance(typ, AnyType): + self.annotate(expr, f'{kind} uses generic operations (iterable has type "Any").') + elif isinstance(typ, Instance) and typ.type.fullname in ( + "typing.Iterable", + "typing.Iterator", + "typing.Sequence", + "typing.MutableSequence", + ): + self.annotate( + expr, + f'{kind} uses generic operations (iterable has the abstract type "{typ.type.fullname}").', + ) + + def visit_class_def(self, o: ClassDef, /) -> None: + super().visit_class_def(o) + if self.func_depth == 0: + # Don't complain about base classes at top level + for base in o.base_type_exprs: + self.ignored_lines.add(base.line) + + for s in o.defs.body: + if isinstance(s, AssignmentStmt): + # Don't complain about attribute initializers + self.ignored_lines.add(s.line) + elif isinstance(s, Decorator): + # Don't complain about decorator definitions that generate some + # dynamic operations. This is a bit heavy-handed. + self.ignored_lines.add(s.func.line) + + def visit_with_stmt(self, o: WithStmt, /) -> None: + for expr in o.expr: + if isinstance(expr, CallExpr) and isinstance(expr.callee, RefExpr): + node = expr.callee.node + if isinstance(node, Decorator): + if any( + isinstance(d, RefExpr) + and d.node + and d.node.fullname == "contextlib.contextmanager" + for d in node.decorators + ): + self.annotate( + expr, + f'"{node.name}" uses @contextmanager, which is slow ' + + "in compiled code. Use a native class with " + + '"__enter__" and "__exit__" methods instead.', + priority=3, + ) + super().visit_with_stmt(o) + + def visit_assignment_stmt(self, o: AssignmentStmt, /) -> None: + special_form = False + if self.func_depth == 0: + analyzed: Expression | None = o.rvalue + if isinstance(o.rvalue, (CallExpr, IndexExpr, OpExpr)): + analyzed = o.rvalue.analyzed + if o.is_alias_def or isinstance( + analyzed, (TypeVarExpr, NamedTupleExpr, TypedDictExpr, NewTypeExpr) + ): + special_form = True + if special_form: + # TODO: Ignore all lines if multi-line + self.ignored_lines.add(o.line) + super().visit_assignment_stmt(o) + def visit_name_expr(self, o: NameExpr, /) -> None: if ann := stdlib_hints.get(o.fullname): self.annotate(o, ann) @@ -268,6 +364,30 @@ def visit_call_expr(self, o: CallExpr, /) -> None: ): arg = o.args[1] self.check_isinstance_arg(arg) + elif isinstance(o.callee, RefExpr) and isinstance(o.callee.node, TypeInfo): + info = o.callee.node + class_ir = self.mapper.type_to_ir.get(info) + if (class_ir and not class_ir.is_ext_class) or ( + class_ir is None and not info.fullname.startswith("builtins.") + ): + self.annotate( + o, f'Creating an instance of non-native class "{info.name}" ' + "is slow.", 2 + ) + elif class_ir and class_ir.is_augmented: + self.annotate( + o, + f'Class "{info.name}" is only partially native, and ' + + "constructing an instance is slow.", + 2, + ) + elif isinstance(o.callee, RefExpr) and isinstance(o.callee.node, Decorator): + decorator = o.callee.node + if self.mapper.is_native_ref_expr(o.callee): + self.annotate( + o, + f'Calling a decorated function ("{decorator.name}") is inefficient, even if it\'s native.', + 2, + ) def check_isinstance_arg(self, arg: Expression) -> None: if isinstance(arg, RefExpr): @@ -287,9 +407,9 @@ def visit_lambda_expr(self, o: LambdaExpr, /) -> None: ) super().visit_lambda_expr(o) - def annotate(self, o: Node, ann: str | Annotation) -> None: + def annotate(self, o: Node, ann: str | Annotation, priority: int = 1) -> None: if isinstance(ann, str): - ann = Annotation(ann) + ann = Annotation(ann, priority=priority) self.anns.setdefault(o.line, []).append(ann) def get_type(self, e: Expression) -> ProperType: diff --git a/mypyc/build.py b/mypyc/build.py index cb05cda991d9..1a74d4692d17 100644 --- a/mypyc/build.py +++ b/mypyc/build.py @@ -242,7 +242,7 @@ def generate_c( print(f"Parsed and typechecked in {t1 - t0:.3f}s") errors = Errors(options) - modules, ctext = emitmodule.compile_modules_to_c( + modules, ctext, mapper = emitmodule.compile_modules_to_c( result, compiler_options=compiler_options, errors=errors, groups=groups ) t2 = time.time() @@ -255,7 +255,7 @@ def generate_c( print(f"Compiled to C in {t2 - t1:.3f}s") if options.mypyc_annotation_file: - generate_annotated_html(options.mypyc_annotation_file, result, modules) + generate_annotated_html(options.mypyc_annotation_file, result, modules, mapper) return ctext, "\n".join(format_modules(modules)) diff --git a/mypyc/codegen/emitmodule.py b/mypyc/codegen/emitmodule.py index 1ec3064eb5b9..713fa5c51fa1 100644 --- a/mypyc/codegen/emitmodule.py +++ b/mypyc/codegen/emitmodule.py @@ -397,7 +397,7 @@ def load_scc_from_cache( def compile_modules_to_c( result: BuildResult, compiler_options: CompilerOptions, errors: Errors, groups: Groups -) -> tuple[ModuleIRs, list[FileContents]]: +) -> tuple[ModuleIRs, list[FileContents], Mapper]: """Compile Python module(s) to the source of Python C extension modules. This generates the source code for the "shared library" module @@ -427,12 +427,12 @@ def compile_modules_to_c( modules = compile_modules_to_ir(result, mapper, compiler_options, errors) if errors.num_errors > 0: - return {}, [] + return {}, [], Mapper({}) ctext = compile_ir_to_c(groups, modules, result, mapper, compiler_options) write_cache(modules, result, group_map, ctext) - return modules, [ctext[name] for _, name in groups] + return modules, [ctext[name] for _, name in groups], mapper def generate_function_declaration(fn: FuncIR, emitter: Emitter) -> None: diff --git a/mypyc/test-data/annotate-basic.test b/mypyc/test-data/annotate-basic.test index 23e9ae8814ca..c9e1c4b64a32 100644 --- a/mypyc/test-data/annotate-basic.test +++ b/mypyc/test-data/annotate-basic.test @@ -7,12 +7,23 @@ def f1(x): def f2(x: Any) -> object: return x.foo # A: Get non-native attribute "foo". +def f3(x): + x.bar = 1 # A: Set non-native attribute "bar". + class C: foo: int -def f3(x: C) -> int: + def method(self) -> int: + return self.foo + +def good1(x: C) -> int: return x.foo +[case testAnnotateMethod] +class C: + def method(self, x): + return x + "y" # A: Generic "+" operation. + [case testAnnotateGenericBinaryOperations] def generic_add(x): return x + 1 # A: Generic "+" operation. @@ -98,11 +109,11 @@ def f(x): from typing import Any def f1(x): - return x.foo() # A: Call non-native method "foo". + return x.foo() # A: Call non-native method "foo" (it may be defined in a non-native class, or decorated). def f2(x: Any) -> None: - x.foo(1) # A: Call non-native method "foo". - x.foo(a=1) # A: Call non-native method "foo". + x.foo(1) # A: Call non-native method "foo" (it may be defined in a non-native class, or decorated). + x.foo(a=1) # A: Call non-native method "foo" (it may be defined in a non-native class, or decorated). t = (1, 'x') x.foo(*t) # A: Get non-native attribute "foo". Generic call operation. d = {"a": 1} @@ -175,7 +186,7 @@ def startswith(x: str) -> bool: return x.startswith('foo') def islower(x: str) -> bool: - return x.islower() # A: Call non-native method "islower". + return x.islower() # A: Call non-native method "islower" (it may be defined in a non-native class, or decorated). [case testAnnotateSpecificStdlibFeatures] import functools @@ -244,6 +255,24 @@ def good2(a: List[str]) -> None: for x in a: pass +[case testAnnotateGenericComprehensionOrGenerator] +from typing import List, Iterable + +def f1(a): + return [x for x in a] # A: Comprehension or generator uses generic operations (iterable has type "Any"). + +def f2(a: Iterable[int]): + return {x for x in a} # A: Comprehension or generator uses generic operations (iterable has the abstract type "typing.Iterable"). + +def f3(a): + return {x: 1 for x in a} # A: Comprehension uses generic operations (iterable has type "Any"). + +def f4(a): + return (x for x in a) # A: Comprehension or generator uses generic operations (iterable has type "Any"). + +def good1(a: List[int]) -> List[int]: + return [x + 1 for x in a] + [case testAnnotateIsinstance] from typing import Protocol, runtime_checkable, Union @@ -273,3 +302,176 @@ def good2(x: Union[int, str]) -> int: else: return int(x + "1") [typing fixtures/typing-full.pyi] + +[case testAnnotateDeepcopy] +from typing import Any +import copy + +def f(x: Any) -> Any: + return copy.deepcopy(x) # A: "copy.deepcopy" tends to be slow. Make a shallow copy if possible. + +[case testAnnotateContextManager] +from typing import Iterator +from contextlib import contextmanager + +@contextmanager +def slow_ctx_manager() -> Iterator[None]: + yield + +class FastCtxManager: + def __enter__(self) -> None: pass + def __exit__(self, a, b, c) -> None: pass + +def f1(x) -> None: + with slow_ctx_manager(): # A: "slow_ctx_manager" uses @contextmanager, which is slow in compiled code. Use a native class with "__enter__" and "__exit__" methods instead. + x.foo # A: Get non-native attribute "foo". + +def f2(x) -> None: + with FastCtxManager(): + x.foo # A: Get non-native attribute "foo". + +[case testAnnotateAvoidNoiseAtTopLevel] +from typing import Final + +class C(object): + x = "s" + y: Final = 1 + +x = "s" +y: Final = 1 + +def f1() -> None: + x = object # A: Get non-native attribute "object". + +[case testAnnotateCreateNonNativeInstance] +from typing import NamedTuple +from dataclasses import dataclass + +from nonnative import C + +def f1() -> None: + c = C() # A: Creating an instance of non-native class "C" is slow. + c.foo() # A: Call non-native method "foo" (it may be defined in a non-native class, or decorated). + +class NT(NamedTuple): + x: int + y: str + +def f2() -> int: + o = NT(1, "x") # A: Creating an instance of non-native class "NT" is slow. + return o.x + +def f3() -> int: + o = NT(x=1, y="x") # A: Creating an instance of non-native class "NT" is slow. + a, b = o + return a + +@dataclass +class D: + x: int + +def f4() -> int: + o = D(1) # A: Class "D" is only partially native, and constructing an instance is slow. + return o.x + +class Nat: + x: int + +class Deriv(Nat): + def __init__(self, y: int) -> None: + self.y = y + +def good1() -> int: + n = Nat() + d = Deriv(y=1) + return n.x + d.x + d.y + +[file nonnative.py] +class C: + def foo(self) -> None: pass + +[case testAnnotateGetAttrAndSetAttrBuiltins] +def f1(x, s: str): + return getattr("x", s) # A: Dynamic attribute lookup. + +def f2(x, s: str): + setattr(x, s, None) # A: Dynamic attribute set. + +[case testAnnotateSpecialAssignments] +from typing import TypeVar, NamedTuple, List, TypedDict, NewType + +# Even though these are slow, we don't complain about them since there is generally +# no better way (and at module top level these are very unlikely to be bottlenecks) +A = List[int] +T = TypeVar("T", bound=List[int]) +NT = NamedTuple("NT", [("x", List[int])]) +TD = TypedDict("TD", {"x": List[int]}) +New = NewType("New", List[int]) +[typing fixtures/typing-full.pyi] + +[case testAnnotateCallDecoratedNativeFunctionOrMethod] +from typing import TypeVar, Callable, Any + +F = TypeVar("F", bound=Callable[..., Any]) + +def mydeco(f: F) -> F: + return f + +@mydeco +def d(x: int) -> int: + return x + +def f1() -> int: + return d(1) # A: Calling a decorated function ("d") is inefficient, even if it's native. + +class C: + @mydeco + def d(self) -> None: + pass + + +def f2() -> None: + c = C() + c.d() # A: Call non-native method "d" (it may be defined in a non-native class, or decorated). + +[case testAnnotateCallDifferentKindsOfMethods] +from abc import ABC, abstractmethod + +class C: + @staticmethod + def s() -> None: ... + + @classmethod + def c(cls) -> None: ... + + @property + def p(self) -> int: + return 0 + + @property + def p2(self) -> int: + return 0 + + @p2.setter + def p2(self, x: int) -> None: + pass + +def f1() -> int: + c = C() + c.s() + c.c() + c.p2 = 1 + return c.p + c.p2 + +class A(ABC): + @abstractmethod + def m(self) -> int: + raise NotImplementedError # A: Get non-native attribute "NotImplementedError". + +class D(A): + def m(self) -> int: + return 1 + +def f2() -> int: + d = D() + return d.m() diff --git a/mypyc/test/test_annotate.py b/mypyc/test/test_annotate.py index bb4941064bdb..4a9a2c1a1b93 100644 --- a/mypyc/test/test_annotate.py +++ b/mypyc/test/test_annotate.py @@ -44,13 +44,15 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: ir = None try: - ir, tree, type_map = build_ir_for_single_file2(testcase.input, options) + ir, tree, type_map, mapper = build_ir_for_single_file2(testcase.input, options) except CompileError as e: actual = e.messages else: - annotations = generate_annotations("native.py", tree, ir, type_map) + annotations = generate_annotations("native.py", tree, ir, type_map, mapper) actual = [] - for line_num, line_anns in annotations.annotations.items(): + for line_num, line_anns in sorted( + annotations.annotations.items(), key=lambda it: it[0] + ): anns = get_max_prio(line_anns) str_anns = [a.message for a in anns] s = " ".join(str_anns) diff --git a/mypyc/test/test_run.py b/mypyc/test/test_run.py index f4798660079f..e5b7e2421433 100644 --- a/mypyc/test/test_run.py +++ b/mypyc/test/test_run.py @@ -251,7 +251,7 @@ def run_case_step(self, testcase: DataDrivenTestCase, incremental_step: int) -> alt_lib_path=".", ) errors = Errors(options) - ir, cfiles = emitmodule.compile_modules_to_c( + ir, cfiles, _ = emitmodule.compile_modules_to_c( result, compiler_options=compiler_options, errors=errors, groups=groups ) if errors.num_errors: diff --git a/mypyc/test/testutil.py b/mypyc/test/testutil.py index 1961c47e85ee..7b56b8aa0dec 100644 --- a/mypyc/test/testutil.py +++ b/mypyc/test/testutil.py @@ -100,7 +100,7 @@ def build_ir_for_single_file( def build_ir_for_single_file2( input_lines: list[str], compiler_options: CompilerOptions | None = None -) -> tuple[ModuleIR, MypyFile, dict[Expression, Type]]: +) -> tuple[ModuleIR, MypyFile, dict[Expression, Type], Mapper]: program_text = "\n".join(input_lines) # By default generate IR compatible with the earliest supported Python C API. @@ -125,13 +125,9 @@ def build_ir_for_single_file2( raise CompileError(result.errors) errors = Errors(options) + mapper = Mapper({"__main__": None}) modules = build_ir( - [result.files["__main__"]], - result.graph, - result.types, - Mapper({"__main__": None}), - compiler_options, - errors, + [result.files["__main__"]], result.graph, result.types, mapper, compiler_options, errors ) if errors.num_errors: raise CompileError(errors.new_messages()) @@ -141,7 +137,7 @@ def build_ir_for_single_file2( assert_func_ir_valid(fn) tree = result.graph[module.fullname].tree assert tree is not None - return module, tree, result.types + return module, tree, result.types, mapper def update_testcase_output(testcase: DataDrivenTestCase, output: list[str]) -> None: