Skip to content
Merged
58 changes: 25 additions & 33 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort
from mypy.indirection import TypeIndirectionVisitor
from mypy.messages import MessageBuilder
from mypy.nodes import Import, ImportAll, ImportBase, ImportFrom, MypyFile, SymbolTable, TypeInfo
from mypy.nodes import Import, ImportAll, ImportBase, ImportFrom, MypyFile, SymbolTable
from mypy.partially_defined import PossiblyUndefinedVariableVisitor
from mypy.semanal import SemanticAnalyzer
from mypy.semanal_pass1 import SemanticAnalyzerPreAnalysis
Expand Down Expand Up @@ -1765,26 +1765,24 @@ def delete_cache(id: str, path: str, manager: BuildManager) -> None:

For single nodes, processing is simple. If the node was cached, we
deserialize the cache data and fix up cross-references. Otherwise, we
do semantic analysis followed by type checking. We also handle (c)
above; if a module has valid cache data *but* any of its
dependencies was processed from source, then the module should be
processed from source.

A relatively simple optimization (outside SCCs) we might do in the
future is as follows: if a node's cache data is valid, but one or more
of its dependencies are out of date so we have to re-parse the node
from source, once we have fully type-checked the node, we can decide
whether its symbol table actually changed compared to the cache data
(by reading the cache data and comparing it to the data we would be
writing). If there is no change we can declare the node up to date,
and any node that depends (and for which we have cached data, and
whose other dependencies are up to date) on it won't need to be
re-parsed from source.
do semantic analysis followed by type checking. Once we (re-)processed
an SCC we check whether its interface (symbol table) is still fresh
(matches previous cached value). If it is not, we consider dependent SCCs
stale so that they need to be re-parsed as well.

Note on indirect dependencies: normally dependencies are determined from
imports, but since our interfaces are "opaque" (i.e. symbol tables can
contain cross-references as well as types identified by name), these are not
enough. We *must* also add "indirect" dependencies from symbols and types to
their definitions. For this purpose, we record all accessed symbols during
semantic analysis, and after we finished processing a module, we traverse its
type map, and for each type we find (transitively) on which named types it
depends.

Import cycles
-------------

Finally we have to decide how to handle (c), import cycles. Here
Finally we have to decide how to handle (b), import cycles. Here
we'll need a modified version of the original state machine
(build.py), but we only need to do this per SCC, and we won't have to
deal with changes to the list of nodes while we're processing it.
Expand Down Expand Up @@ -2409,21 +2407,15 @@ def finish_passes(self) -> None:

# We should always patch indirect dependencies, even in full (non-incremental) builds,
# because the cache still may be written, and it must be correct.
# TODO: find a more robust way to traverse *all* relevant types?
all_types = list(self.type_map().values())
for _, sym, _ in self.tree.local_definitions():
if sym.type is not None:
all_types.append(sym.type)
if isinstance(sym.node, TypeInfo):
# TypeInfo symbols have some extra relevant types.
all_types.extend(sym.node.bases)
if sym.node.metaclass_type:
all_types.append(sym.node.metaclass_type)
if sym.node.typeddict_type:
all_types.append(sym.node.typeddict_type)
if sym.node.tuple_type:
all_types.append(sym.node.tuple_type)
self._patch_indirect_dependencies(self.type_checker().module_refs, all_types)
self._patch_indirect_dependencies(
# Two possible sources of indirect dependencies:
# * Symbols not directly imported in this module but accessed via an attribute
# or via a re-export (vast majority of these recorded in semantic analysis).
# * For each expression type we need to record definitions of type components
# since "meaning" of the type may be updated when definitions are updated.
self.tree.module_refs | self.type_checker().module_refs,
set(self.type_map().values()),
)

if self.options.dump_inference_stats:
dump_type_stats(
Expand Down Expand Up @@ -2452,7 +2444,7 @@ def free_state(self) -> None:
self._type_checker.reset()
self._type_checker = None

def _patch_indirect_dependencies(self, module_refs: set[str], types: list[Type]) -> None:
def _patch_indirect_dependencies(self, module_refs: set[str], types: set[Type]) -> None:
assert None not in types
valid = self.valid_references()

Expand Down
6 changes: 2 additions & 4 deletions mypy/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,11 +378,9 @@ class TypeChecker(NodeVisitor[None], TypeCheckerSharedApi):
inferred_attribute_types: dict[Var, Type] | None = None
# Don't infer partial None types if we are processing assignment from Union
no_partial_types: bool = False

# The set of all dependencies (suppressed or not) that this module accesses, either
# directly or indirectly.
# Extra module references not detected during semantic analysis (these are rare cases
# e.g. access to class-level import via instance).
module_refs: set[str]

# A map from variable nodes to a snapshot of the frame ids of the
# frames that were active when the variable was declared. This can
# be used to determine nearest common ancestor frame of a variable's
Expand Down
33 changes: 0 additions & 33 deletions mypy/checkexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,6 @@
)
from mypy.typestate import type_state
from mypy.typevars import fill_typevars
from mypy.util import split_module_names
from mypy.visitor import ExpressionVisitor

# Type of callback user for checking individual function arguments. See
Expand Down Expand Up @@ -248,36 +247,6 @@ def allow_fast_container_literal(t: Type) -> bool:
)


def extract_refexpr_names(expr: RefExpr, output: set[str]) -> None:
"""Recursively extracts all module references from a reference expression.

Note that currently, the only two subclasses of RefExpr are NameExpr and
MemberExpr."""
while isinstance(expr.node, MypyFile) or expr.fullname:
if isinstance(expr.node, MypyFile) and expr.fullname:
# If it's None, something's wrong (perhaps due to an
# import cycle or a suppressed error). For now we just
# skip it.
output.add(expr.fullname)

if isinstance(expr, NameExpr):
is_suppressed_import = isinstance(expr.node, Var) and expr.node.is_suppressed_import
if isinstance(expr.node, TypeInfo):
# Reference to a class or a nested class
output.update(split_module_names(expr.node.module_name))
elif "." in expr.fullname and not is_suppressed_import:
# Everything else (that is not a silenced import within a class)
output.add(expr.fullname.rsplit(".", 1)[0])
break
elif isinstance(expr, MemberExpr):
if isinstance(expr.expr, RefExpr):
expr = expr.expr
else:
break
else:
raise AssertionError(f"Unknown RefExpr subclass: {type(expr)}")


class Finished(Exception):
"""Raised if we can terminate overload argument check early (no match)."""

Expand Down Expand Up @@ -370,7 +339,6 @@ def visit_name_expr(self, e: NameExpr) -> Type:

It can be of any kind: local, member or global.
"""
extract_refexpr_names(e, self.chk.module_refs)
result = self.analyze_ref_expr(e)
narrowed = self.narrow_type_from_binder(e, result)
self.chk.check_deprecated(e.node, e)
Expand Down Expand Up @@ -3344,7 +3312,6 @@ def check_union_call(

def visit_member_expr(self, e: MemberExpr, is_lvalue: bool = False) -> Type:
"""Visit member expression (of form e.id)."""
extract_refexpr_names(e, self.chk.module_refs)
result = self.analyze_ordinary_member_access(e, is_lvalue)
narrowed = self.narrow_type_from_binder(e, result)
self.chk.warn_deprecated(e.node, e)
Expand Down
2 changes: 2 additions & 0 deletions mypy/checkmember.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,8 @@ def analyze_member_var_access(
if isinstance(v, FuncDef):
assert False, "Did not expect a function"
if isinstance(v, MypyFile):
# Special case: accessing module on instances is allowed, but will not
# be recorded by semantic analyzer.
mx.chk.module_refs.add(v.fullname)

if isinstance(vv, (TypeInfo, TypeAlias, MypyFile, TypeVarLikeExpr)):
Expand Down
2 changes: 1 addition & 1 deletion mypy/fixup.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,4 +441,4 @@ def missing_info(modules: dict[str, MypyFile]) -> TypeInfo:

def missing_alias() -> TypeAlias:
suggestion = _SUGGESTION.format("alias")
return TypeAlias(AnyType(TypeOfAny.special_form), suggestion, line=-1, column=-1)
return TypeAlias(AnyType(TypeOfAny.special_form), suggestion, "<missing>", line=-1, column=-1)
101 changes: 56 additions & 45 deletions mypy/indirection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,6 @@

import mypy.types as types
from mypy.types import TypeVisitor
from mypy.util import split_module_names


def extract_module_names(type_name: str | None) -> list[str]:
"""Returns the module names of a fully qualified type name."""
if type_name is not None:
# Discard the first one, which is just the qualified name of the type
possible_module_names = split_module_names(type_name)
return possible_module_names[1:]
else:
return []


class TypeIndirectionVisitor(TypeVisitor[None]):
Expand All @@ -23,49 +12,57 @@ class TypeIndirectionVisitor(TypeVisitor[None]):
def __init__(self) -> None:
# Module references are collected here
self.modules: set[str] = set()
# User to avoid infinite recursion with recursive type aliases
self.seen_aliases: set[types.TypeAliasType] = set()
# Used to avoid redundant work
self.seen_fullnames: set[str] = set()
# User to avoid infinite recursion with recursive types
self.seen_types: set[types.TypeAliasType | types.Instance] = set()

def find_modules(self, typs: Iterable[types.Type]) -> set[str]:
self.modules = set()
self.seen_fullnames = set()
self.seen_aliases = set()
self.seen_types = set()
for typ in typs:
self._visit(typ)
return self.modules

def _visit(self, typ: types.Type) -> None:
if isinstance(typ, types.TypeAliasType):
# Avoid infinite recursion for recursive type aliases.
self.seen_aliases.add(typ)
# Note: instances are needed for `class str(Sequence[str]): ...`
if (
isinstance(typ, types.TypeAliasType)
or isinstance(typ, types.ProperType)
and isinstance(typ, types.Instance)
):
# Avoid infinite recursion for recursive types.
if typ in self.seen_types:
return
self.seen_types.add(typ)
typ.accept(self)

def _visit_type_tuple(self, typs: tuple[types.Type, ...]) -> None:
# Micro-optimization: Specialized version of _visit for lists
for typ in typs:
if isinstance(typ, types.TypeAliasType):
# Avoid infinite recursion for recursive type aliases.
if typ in self.seen_aliases:
if (
isinstance(typ, types.TypeAliasType)
or isinstance(typ, types.ProperType)
and isinstance(typ, types.Instance)
):
# Avoid infinite recursion for recursive types.
if typ in self.seen_types:
continue
self.seen_aliases.add(typ)
self.seen_types.add(typ)
typ.accept(self)

def _visit_type_list(self, typs: list[types.Type]) -> None:
# Micro-optimization: Specialized version of _visit for tuples
for typ in typs:
if isinstance(typ, types.TypeAliasType):
# Avoid infinite recursion for recursive type aliases.
if typ in self.seen_aliases:
if (
isinstance(typ, types.TypeAliasType)
or isinstance(typ, types.ProperType)
and isinstance(typ, types.Instance)
):
# Avoid infinite recursion for recursive types.
if typ in self.seen_types:
continue
self.seen_aliases.add(typ)
self.seen_types.add(typ)
typ.accept(self)

def _visit_module_name(self, module_name: str) -> None:
if module_name not in self.modules:
self.modules.update(split_module_names(module_name))

def visit_unbound_type(self, t: types.UnboundType) -> None:
self._visit_type_tuple(t.args)

Expand Down Expand Up @@ -105,27 +102,36 @@ def visit_parameters(self, t: types.Parameters) -> None:
self._visit_type_list(t.arg_types)

def visit_instance(self, t: types.Instance) -> None:
# Instance is named, record its definition and continue digging into
# components that constitute semantic meaning of this type: bases, metaclass,
# tuple type, and typeddict type.
# Note: we cannot simply record the MRO, in case an intermediate base contains
# a reference to type alias, this affects meaning of map_instance_to_supertype(),
# see e.g. testDoubleReexportGenericUpdated.
self._visit_type_tuple(t.args)
if t.type:
# Uses of a class depend on everything in the MRO,
# as changes to classes in the MRO can add types to methods,
# change property types, change the MRO itself, etc.
# Important optimization: instead of simply recording the definition and
# recursing into bases, record the MRO and only traverse generic bases.
for s in t.type.mro:
self._visit_module_name(s.module_name)
if t.type.metaclass_type is not None:
self._visit_module_name(t.type.metaclass_type.type.module_name)
self.modules.add(s.module_name)
for base in s.bases:
if base.args:
self._visit_type_tuple(base.args)
if t.type.metaclass_type:
self._visit(t.type.metaclass_type)
if t.type.typeddict_type:
self._visit(t.type.typeddict_type)
if t.type.tuple_type:
self._visit(t.type.tuple_type)

def visit_callable_type(self, t: types.CallableType) -> None:
self._visit_type_list(t.arg_types)
self._visit(t.ret_type)
if t.definition is not None:
fullname = t.definition.fullname
if fullname not in self.seen_fullnames:
self.modules.update(extract_module_names(t.definition.fullname))
self.seen_fullnames.add(fullname)
self._visit_type_tuple(t.variables)

def visit_overloaded(self, t: types.Overloaded) -> None:
self._visit_type_list(list(t.items))
for item in t.items:
self._visit(item)
self._visit(t.fallback)

def visit_tuple_type(self, t: types.TupleType) -> None:
Expand All @@ -149,4 +155,9 @@ def visit_type_type(self, t: types.TypeType) -> None:
self._visit(t.item)

def visit_type_alias_type(self, t: types.TypeAliasType) -> None:
self._visit(types.get_proper_type(t))
# Type alias is named, record its definition and continue digging into
# components that constitute semantic meaning of this type: target and args.
if t.alias:
self.modules.add(t.alias.module)
self._visit(t.alias.target)
self._visit_type_list(t.args)
Loading
Loading