From 8853a1b134d53f8a2be4e4264b52d924c5a3ba19 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Thu, 13 Nov 2025 23:59:31 +0000 Subject: [PATCH 01/21] Initial random experiments --- mypy/build.py | 249 +++++++++++++++++++++++++++++++--- mypy/build_worker/__init__.py | 0 mypy/build_worker/__main__.py | 6 + mypy/build_worker/worker.py | 122 +++++++++++++++++ mypy/cache.py | 5 +- mypy/dmypy/client.py | 30 +--- mypy/ipc.py | 41 +++++- mypy/main.py | 4 + mypy/options.py | 1 + pyproject.toml | 4 +- setup.py | 1 + 11 files changed, 409 insertions(+), 54 deletions(-) create mode 100644 mypy/build_worker/__init__.py create mode 100644 mypy/build_worker/__main__.py create mode 100644 mypy/build_worker/worker.py diff --git a/mypy/build.py b/mypy/build.py index 853e54e445ac..7e95b3e303b6 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -13,30 +13,37 @@ from __future__ import annotations +import base64 import collections import contextlib import gc import json import os +import pickle import platform +import psutil import re import stat +import subprocess import sys import time import types from collections.abc import Iterator, Mapping, Sequence, Set as AbstractSet +from select import select from typing import TYPE_CHECKING, Any, Callable, ClassVar, Final, NoReturn, TextIO, TypedDict from typing_extensions import TypeAlias as _TypeAlias +from heapq import heappush, heappop from librt.internal import cache_version import mypy.semanal_main -from mypy.cache import CACHE_VERSION, CacheMeta, ReadBuffer, WriteBuffer +from mypy.cache import CACHE_VERSION, CacheMeta, ReadBuffer, WriteBuffer, read_json, write_json from mypy.checker import TypeChecker from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort from mypy.indirection import TypeIndirectionVisitor +from mypy.ipc import IPCClient, read_status, BadStatus, IPCBase from mypy.messages import MessageBuilder from mypy.nodes import Import, ImportAll, ImportBase, ImportFrom, MypyFile, SymbolTable from mypy.partially_defined import PossiblyUndefinedVariableVisitor @@ -114,19 +121,24 @@ Graph: _TypeAlias = dict[str, "State"] +t_import = time.time() + class SCC: """A simple class that represents a strongly connected component (import cycle).""" id_counter: ClassVar[int] = 0 - def __init__(self, ids: set[str]) -> None: - self.id = SCC.id_counter - SCC.id_counter += 1 + def __init__(self, ids: set[str], scc_id: int | None = None, deps: list[int] | None = None) -> None: + if scc_id is None: + self.id = SCC.id_counter + SCC.id_counter += 1 + else: + self.id = scc_id # Ids of modules in this cycle. self.mod_ids = ids # Direct dependencies, should be populated by the caller. - self.deps: set[int] = set() + self.deps: set[int] = set(deps) if deps is not None else set() # Direct dependencies that have not been processed yet. # Should be populated by the caller. This set may change during graph # processing, while the above stays constant. @@ -135,6 +147,7 @@ def __init__(self, ids: set[str]) -> None: # make processing order more predictable. Dependents will be notified # that they may be ready in the order in this list. self.direct_dependents: list[int] = [] + self.size_hint: int = 0 # TODO: Get rid of BuildResult. We might as well return a BuildManager. @@ -158,6 +171,89 @@ def __init__(self, manager: BuildManager, graph: Graph) -> None: self.errors: list[str] = [] # Filled in by build if desired +def receive(connection: IPCBase) -> dict[str, Any]: + """Receive single JSON data frame from a connection. + + Raise OSError if the data received is not valid JSON or if it is + not a dict. + """ + bdata = connection.read_bytes() + if not bdata: + raise OSError("No data received") + try: + buf = ReadBuffer(bdata) + data = read_json(buf) + except Exception as e: + raise OSError("Data received is not valid JSON dict") from e + return data + + +def send(connection: IPCBase, data: dict[str, Any]) -> None: + """Send data to a connection encoded and framed. + + The data must be JSON-serializable. We assume that a single send call is a + single frame to be sent on the connect. + """ + buf = WriteBuffer() + write_json(buf, data) + connection.write_bytes(buf.getvalue()) + + +class WorkerClient: + def __init__(self, idx: int, conn: IPCClient, worker_pid: int) -> None: + self.idx = idx + self.conn = conn + self.worker_pid = worker_pid + + +def wait_for_worker(status_file: str, timeout: float = 5.0) -> tuple[int, str]: + """Wait until the worker is up. + + Exit if it doesn't happen within the timeout. + """ + endtime = time.time() + timeout + while time.time() < endtime: + try: + data = read_status(status_file) + except BadStatus: + # If the file isn't there yet, retry later. + time.sleep(0.05) + continue + try: + pid = data["pid"] + connection_name = data["connection_name"] + assert isinstance(pid, int) and isinstance(connection_name, str) + return pid, connection_name + except Exception: + # If the file's content is bogus or the process is dead, fail. + pass + print("Worker process failed to start") + sys.exit(2) + + +def start_worker(options_data: str, idx: int) -> None: + status_file = f".mypy_worker.{idx}.json" + if os.path.isfile(status_file): + os.unlink(status_file) + command = [ + sys.executable, + "-m", + "mypy.build_worker", + f"--status-file={status_file}", + f'--options-data="{options_data}"' + ] + subprocess.Popen(command) + + +def get_worker(idx: int) -> WorkerClient: + status_file = f".mypy_worker.{idx}.json" + pid, connection_name = wait_for_worker(status_file) + proc = psutil.Process(pid=pid) + core = idx + 1 + proc.cpu_affinity([core * 3, core * 3 + 1, core * 3 + 2]) + return WorkerClient(idx, IPCClient(connection_name, 10), pid) + + def build_error(msg: str) -> NoReturn: raise CompileError([f"mypy: error: {msg}"]) @@ -211,9 +307,31 @@ def default_flush_errors( stderr = stderr or sys.stderr extra_plugins = extra_plugins or [] + print("Starting workers", time.time() - t_import) + + proc = psutil.Process() + proc.cpu_affinity([0, 1, 2]) + + workers = [] + if options.num_workers > 0: + pickled_options = pickle.dumps(options.snapshot()) + options_data = base64.b64encode(pickled_options).decode() + for i in range(options.num_workers): + start_worker(options_data, i) + for i in range(options.num_workers): + workers.append(get_worker(i)) + + for worker in workers: + source_tuples = [ + (s.path, s.module, s.text, s.base_dir, s.followed) for s in sources + ] + send(worker.conn, {"sources": source_tuples}) + + print("Sent sources to workers", time.time() - t_import) + try: - result = _build( - sources, options, alt_lib_path, flush_errors, fscache, stdout, stderr, extra_plugins + result = build_inner( + sources, options, alt_lib_path, flush_errors, fscache, stdout, stderr, extra_plugins, workers ) result.errors = messages return result @@ -228,7 +346,7 @@ def default_flush_errors( raise -def _build( +def build_inner( sources: list[BuildSource], options: Options, alt_lib_path: str | None, @@ -237,6 +355,7 @@ def _build( stdout: TextIO, stderr: TextIO, extra_plugins: Sequence[Plugin], + workers: list[WorkerClient], ) -> BuildResult: if platform.python_implementation() == "CPython": # Run gc less frequently, as otherwise we can spent a large fraction of @@ -286,6 +405,7 @@ def _build( stdout=stdout, stderr=stderr, ) + manager.workers = workers if manager.verbosity() >= 2: manager.trace(repr(options)) @@ -300,6 +420,16 @@ def _build( dump_line_checking_stats(options.line_checking_stats, graph) return BuildResult(manager, graph) finally: + + for worker in workers: + send(worker.conn, {"final": True}) + time.sleep(0.005) + for worker in workers: + worker.conn.close() + status_file = f".mypy_worker.{worker.idx}.json" + if os.path.isfile(status_file): + os.unlink(status_file) + t0 = time.time() manager.metastore.commit() manager.add_stats(cache_commit_time=time.time() - t0) @@ -699,9 +829,12 @@ def __init__( # Stale SCCs that are queued for processing. Note that as of now we have just # one worker, that is the same process. In the future, we will support multiple # parallel worker processes. - self.scc_queue: list[SCC] = [] + self.scc_queue: list[tuple[int, int, SCC]] = [] # SCCs that have been fully processed. self.done_sccs: set[int] = set() + self.workers: list[WorkerClient] = [] + self.free_workers: set[int] = set() + self.queue_order: int = 0 def dump_stats(self) -> None: if self.options.dump_build_stats: @@ -903,7 +1036,19 @@ def stats_summary(self) -> Mapping[str, object]: def submit(self, sccs: list[SCC]) -> None: """Submit a stale SCC for processing in current process.""" - self.scc_queue.extend(sccs) + if self.workers: + for scc in sccs: + heappush(self.scc_queue, (-scc.size_hint, self.queue_order, scc)) + self.queue_order += 1 + else: + self.scc_queue.extend([(0, 0, scc) for scc in sccs]) + while self.scc_queue and self.free_workers: + worker = self.free_workers.pop() + if self.workers: + _, _, scc = heappop(self.scc_queue) + else: + _, _, scc = self.scc_queue.pop(0) + send(self.workers[worker].conn, {"scc_id": scc.id}) def wait_for_done(self, graph: Graph) -> tuple[list[SCC], bool]: """Wait for a stale SCC processing (in process) to finish. @@ -912,11 +1057,28 @@ def wait_for_done(self, graph: Graph) -> tuple[list[SCC], bool]: This emulates the API we will have for parallel processing in multiple worker processes. """ - if not self.scc_queue: + if not self.workers: + if not self.scc_queue: + return [], False + _, _, next_scc = self.scc_queue.pop(0) + process_stale_scc(graph, next_scc, self) + return [next_scc], bool(self.scc_queue) + + if not self.scc_queue and len(self.free_workers) == len(self.workers): return [], False - next_scc = self.scc_queue.pop(0) - process_stale_scc(graph, next_scc, self) - return [next_scc], bool(self.scc_queue) + + # TODO: don't select from free workers. + conns = [w.conn.connection for w in self.workers] + ready, _, _ = select(conns, [], [], 100) + done_sccs = [] + for r in ready: + idx = conns.index(r) + data = receive(self.workers[idx].conn) + self.free_workers.add(idx) + scc_id = data["scc_id"] + done_sccs.append(self.scc_by_id[scc_id]) + self.submit([]) # advance after some workers are free. + return done_sccs, bool(self.scc_queue) or len(self.free_workers) < len(self.workers) def deps_to_json(x: dict[str, set[str]]) -> bytes: @@ -2008,6 +2170,7 @@ def __init__( if exist_added_packages(self.suppressed, manager, self.options): self.parse_file() # This is safe because the cache is anyway stale. self.compute_dependencies() + self.size_hint = self.meta.size else: # When doing a fine-grained cache load, pretend we only # know about modules that have cache information and defer @@ -2019,6 +2182,8 @@ def __init__( # Parse the file (and then some) to get the dependencies. self.parse_file(temporary=temporary) self.compute_dependencies() + if self.manager.workers: + self.tree = None def add_ancestors(self) -> None: if self.path is not None: @@ -2094,16 +2259,19 @@ def load_fine_grained_deps(self) -> dict[str, set[str]]: return self.manager.load_fine_grained_deps(self.id) def load_tree(self, temporary: bool = False) -> None: - assert ( - self.meta is not None - ), "Internal error: this method must be called only for cached modules" + # assert ( + # self.meta is not None + # ), "Internal error: this method must be called only for cached modules" + + assert self.path is not None + _, data_file, _ = get_cache_names(self.id, self.path, self.manager.options) data: bytes | dict[str, Any] | None if self.options.fixed_format_cache: - data = _load_ff_file(self.meta.data_file, self.manager, "Could not load tree: ") + data = _load_ff_file(data_file, self.manager, "Could not load tree: ") else: data = _load_json_file( - self.meta.data_file, self.manager, "Load tree ", "Could not load tree: " + data_file, self.manager, "Load tree ", "Could not load tree: " ) if data is None: return @@ -2188,6 +2356,7 @@ def parse_file(self, *, temporary: bool = False) -> None: self.source_hash = compute_hash(source) self.parse_inline_configuration(source) + self.size_hint = len(source) if not cached: self.tree = manager.parse_file( self.id, @@ -2888,6 +3057,8 @@ def dispatch(sources: list[BuildSource], manager: BuildManager, stdout: TextIO) t0 = time.time() graph = load_graph(sources, manager) + print("Coordinator loaded graph", time.time() - t_import) + # This is a kind of unfortunate hack to work around some of fine-grained's # fragility: if we have loaded less than 50% of the specified files from # cache in fine-grained cache mode, load the graph again honestly. @@ -3249,6 +3420,7 @@ def find_stale_sccs( * The interface hashes of direct dependents matches those recorded in the cache. The first and second conditions are verified by is_fresh(). """ + t0 = time.time() stale_sccs = [] fresh_sccs = [] for ascc in sccs: @@ -3297,6 +3469,7 @@ def find_stale_sccs( else: manager.log("Scheduling SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg)) stale_sccs.append(ascc) + manager.add_stats(find_stale_time=time.time() - t0) return stale_sccs, fresh_sccs @@ -3311,6 +3484,19 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: manager.scc_by_id = scc_by_id manager.top_order = [scc.id for scc in sccs] + for worker in manager.workers: + data = receive(worker.conn) + print(worker.idx, data["status"]) + send(worker.conn, {"sccs": [(list(scc.mod_ids), scc.id, list(scc.deps)) for scc in sccs]}) + + for worker in manager.workers: + data = receive(worker.conn) + print(worker.idx, data["status"]) + + print("Workers loaded graph", time.time() - t_import) + + manager.free_workers = {w.idx for w in manager.workers} + # Prime the ready list with leaf SCCs (that have no dependencies). ready = [] not_ready = [] @@ -3335,12 +3521,14 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: else: done, still_working = manager.wait_for_done(graph) ready = [] + t0 = time.time() for done_scc in done: for dependent in done_scc.direct_dependents: scc_by_id[dependent].not_ready_deps.discard(done_scc.id) if not scc_by_id[dependent].not_ready_deps: not_ready.remove(scc_by_id[dependent]) ready.append(scc_by_id[dependent]) + manager.add_stats(notify_dependents_time=time.time() - t0) def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_INDIRECT) -> list[str]: @@ -3409,6 +3597,7 @@ def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManage def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: """Process the modules in one SCC from source code.""" # First verify if all transitive dependencies are loaded in the current process. + t0 = time.time() missing_sccs = set() sccs_to_find = ascc.deps.copy() while sccs_to_find: @@ -3418,12 +3607,15 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: missing_sccs.add(dep_scc) sccs_to_find.update(manager.scc_by_id[dep_scc].deps) + t1 = time.time() if missing_sccs: # Load missing SCCs from cache. # TODO: speed-up ordering if this causes problems for large builds. + ts = time.time() fresh_sccs_to_load = [ manager.scc_by_id[sid] for sid in manager.top_order if sid in missing_sccs ] + manager.add_stats(fresh_order_time=time.time() - ts) manager.log(f"Processing {len(fresh_sccs_to_load)} fresh SCCs") if ( not manager.options.test_env @@ -3437,8 +3629,9 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: # generation with the freeze()/unfreeze() trick below. This is arguably # a hack, but it gives huge performance wins for large third-party # libraries, like torch. - gc.collect() + tc = time.time() gc.disable() + manager.add_stats(gc_pre_freeze_time=time.time() - tc) for prev_scc in fresh_sccs_to_load: manager.done_sccs.add(prev_scc.id) process_fresh_modules(graph, sorted(prev_scc.mod_ids), manager) @@ -3448,12 +3641,17 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES ): manager.gc_freeze_cycles += 1 + tc = time.time() gc.freeze() gc.unfreeze() gc.enable() + manager.add_stats(gc_post_freeze_time=time.time() - tc) + t2 = time.time() # Process the SCC in stable order. scc = order_ascc_ex(graph, ascc) + + t3 = time.time() stale = scc for id in stale: # We may already have parsed the module, or not. @@ -3467,6 +3665,7 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: assert typing_mod, "The typing module was not parsed" mypy.semanal_main.semantic_analysis_for_scc(graph, scc, manager.errors) + t4 = time.time() # Track what modules aren't yet done, so we can finish them as soon # as possible, saving memory. unfinished_modules = set(stale) @@ -3489,6 +3688,7 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: graph[id].generate_unused_ignore_notes() graph[id].generate_ignore_without_code_notes() + t5 = time.time() # Flush errors, and write cache in two phases: first data files, then meta files. meta_tuples = {} errors_by_id = {} @@ -3510,6 +3710,14 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: meta.error_lines = errors_by_id.get(id, []) write_cache_meta(meta, manager, meta_file) manager.done_sccs.add(ascc.id) + manager.add_stats( + find_missing_time=t1 - t0, + load_missing_time=t2 - t1, + order_scc_time=t3 - t2, + semanal_time=t4 - t3, + type_check_time=t5 - t4, + flush_and_cache_time=time.time() - t5, + ) def prepare_sccs_full( @@ -3557,6 +3765,7 @@ def sorted_components(graph: Graph) -> list[SCC]: # and the result is [{c, d}, {a, b}]. sorted_ready = sorted(ready, key=lambda scc: -min(graph[id].order for id in scc.mod_ids)) for scc in sorted_ready: + scc.size_hint = sum(graph[mid].size_hint for mid in scc.mod_ids) for dep in scc_dep_map[scc]: dep.direct_dependents.append(scc.id) res.extend(sorted_ready) diff --git a/mypy/build_worker/__init__.py b/mypy/build_worker/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/mypy/build_worker/__main__.py b/mypy/build_worker/__main__.py new file mode 100644 index 000000000000..2156f44e461f --- /dev/null +++ b/mypy/build_worker/__main__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from mypy.build_worker.worker import console_entry + +if __name__ == "__main__": + console_entry() diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py new file mode 100644 index 000000000000..0d3e476a9560 --- /dev/null +++ b/mypy/build_worker/worker.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import argparse +import base64 +import gc +import json +import os +import pickle +import platform +import sys +import time + +from mypy.build import receive, send, BuildManager, load_plugins, load_graph, dump_graph, SCC, process_stale_scc +from mypy.errors import Errors +from mypy.fscache import FileSystemCache +from mypy.ipc import IPCServer +from mypy.main import RECURSION_LIMIT +from mypy.modulefinder import BuildSource, compute_search_paths, BuildSourceSet +from mypy.options import Options +from mypy.util import read_py_file +from mypy.version import __version__ + +parser = argparse.ArgumentParser(prog="mypy_worker", description="Mypy build worker") +parser.add_argument("--status-file", help="status file to communicate worker details") +parser.add_argument("--options-data", help="serialized mypy options") + +CONNECTION_NAME = "build_worker" + + +def main(argv: list[str]) -> None: + # Set recursion limit consistent with mypy/main.py + sys.setrecursionlimit(RECURSION_LIMIT) + if platform.python_implementation() == "CPython": + gc.set_threshold(200 * 1000, 30, 30) + + args = parser.parse_args(argv) + + options_dict = pickle.loads(base64.b64decode(args.options_data)) + options_obj = Options() + options = options_obj.apply_changes(options_dict) + + status_file = args.status_file + server = IPCServer(CONNECTION_NAME, 10) + + with open(status_file, "w") as f: + json.dump({"pid": os.getpid(), "connection_name": server.connection_name}, f) + f.write("\n") + + with server: + data = receive(server) + sources = [BuildSource(*st) for st in data["sources"]] + + data_dir = os.path.dirname(os.path.dirname(__file__)) + fscache = FileSystemCache() + search_paths = compute_search_paths(sources, options, data_dir, None) + + source_set = BuildSourceSet(sources) + cached_read = fscache.read + errors = Errors(options, read_source=lambda path: read_py_file(path, cached_read)) + plugin, snapshot = load_plugins(options, errors, sys.stdout, []) + + messages = [] + + def flush_errors( + filename: str | None, new_messages: list[str], is_serious: bool + ) -> None: + messages.extend(new_messages) + + manager = BuildManager( + data_dir, + search_paths, + ignore_prefix=os.getcwd(), + source_set=source_set, + reports=None, + options=options, + version_id=__version__, + plugin=plugin, + plugins_snapshot=snapshot, + errors=errors, + error_formatter=None, + flush_errors=flush_errors, + fscache=fscache, + stdout=sys.stdout, + stderr=sys.stderr, + ) + + graph = load_graph(sources, manager) + + print(len(gc.get_objects())) + + for id in graph: + manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) + send(server, {"status": "ok"}) + + data = receive(server) + sccs = [SCC(set(mod_ids), scc_id, deps) for (mod_ids, scc_id, deps) in data["sccs"]] + + manager.scc_by_id = {scc.id: scc for scc in sccs} + manager.top_order = [scc.id for scc in sccs] + + send(server, {"status": "ok"}) + + while True: + data = receive(server) + if "final" in data: + manager.dump_stats() + break + scc_id = data["scc_id"] + scc = manager.scc_by_id[scc_id] + t0 = time.time() + process_stale_scc(graph, scc, manager) + manager.add_stats( + total_process_stale_time=time.time() - t0, + stale_sccs_processed=1, + ) + send(server, {"scc_id": scc_id}) + + server.cleanup() + + +def console_entry() -> None: + main(sys.argv[1:]) diff --git a/mypy/cache.py b/mypy/cache.py index ad12fd96f1fa..de470b4a0f89 100644 --- a/mypy/cache.py +++ b/mypy/cache.py @@ -392,6 +392,9 @@ def write_str_opt_list(data: WriteBuffer, value: list[str | None]) -> None: JsonValue: _TypeAlias = Union[None, int, str, bool, list["JsonValue"], dict[str, "JsonValue"]] +JsonValueEx: _TypeAlias = Union[ + None, int, str, bool, list["JsonValueEx"], dict[str, "JsonValueEx"], tuple["JsonValueEx", ...] +] def read_json_value(data: ReadBuffer) -> JsonValue: @@ -417,7 +420,7 @@ def read_json_value(data: ReadBuffer) -> JsonValue: # Currently tuples are used by mypyc plugin. They will be normalized to # JSON lists after a roundtrip. -def write_json_value(data: WriteBuffer, value: JsonValue | tuple[JsonValue, ...]) -> None: +def write_json_value(data: WriteBuffer, value: JsonValueEx) -> None: if value is None: write_tag(data, LITERAL_NONE) elif isinstance(value, bool): diff --git a/mypy/dmypy/client.py b/mypy/dmypy/client.py index 3db47f80d01b..265db565476e 100644 --- a/mypy/dmypy/client.py +++ b/mypy/dmypy/client.py @@ -19,7 +19,7 @@ from mypy.dmypy_os import alive, kill from mypy.dmypy_util import DEFAULT_STATUS_FILE, receive, send -from mypy.ipc import IPCClient, IPCException +from mypy.ipc import IPCClient, IPCException, BadStatus, read_status from mypy.main import RECURSION_LIMIT from mypy.util import check_python_version, get_terminal_width, should_force_color from mypy.version import __version__ @@ -256,16 +256,6 @@ def __init__(self, prog: str, **kwargs: Any) -> None: del p -class BadStatus(Exception): - """Exception raised when there is something wrong with the status file. - - For example: - - No status file found - - Status file malformed - - Process whose pid is in the status file does not exist - """ - - def main(argv: list[str]) -> None: """The code is top-down.""" check_python_version("dmypy") @@ -727,24 +717,6 @@ def check_status(data: dict[str, Any]) -> tuple[int, str]: return pid, connection_name -def read_status(status_file: str) -> dict[str, object]: - """Read status file. - - Raise BadStatus if the status file doesn't exist or contains - invalid JSON or the JSON is not a dict. - """ - if not os.path.isfile(status_file): - raise BadStatus("No status file found") - with open(status_file) as f: - try: - data = json.load(f) - except Exception as e: - raise BadStatus("Malformed status file (not JSON)") from e - if not isinstance(data, dict): - raise BadStatus("Invalid status file (not a dict)") - return data - - def is_running(status_file: str) -> bool: """Check if the server is running cleanly""" try: diff --git a/mypy/ipc.py b/mypy/ipc.py index b2046a47ab15..57ab4665bc44 100644 --- a/mypy/ipc.py +++ b/mypy/ipc.py @@ -8,6 +8,7 @@ import base64 import codecs +import json import os import shutil import sys @@ -64,6 +65,9 @@ def frame_from_buffer(self) -> bytearray | None: return bdata def read(self, size: int = 100000) -> str: + return self.read_bytes(size).decode("utf-8") + + def read_bytes(self, size: int = 100000) -> bytes: """Read bytes from an IPC connection until we have a full frame.""" bdata: bytearray | None = bytearray() if sys.platform == "win32": @@ -118,14 +122,17 @@ def read(self, size: int = 100000) -> str: if not bdata: # Socket was empty and we didn't get any frame. # This should only happen if the socket was closed. - return "" - return codecs.decode(bdata, "base64").decode("utf8") + return b"" + return codecs.decode(bdata, "base64") def write(self, data: str) -> None: + self.write_bytes(data.encode("utf-8")) + + def write_bytes(self, data: bytes) -> None: """Write to an IPC connection.""" # Frame the data by urlencoding it and separating by space. - encoded_data = codecs.encode(data.encode("utf8"), "base64") + b" " + encoded_data = codecs.encode(data, "base64") + b" " if sys.platform == "win32": try: @@ -311,3 +318,31 @@ def connection_name(self) -> str: name = self.sock.getsockname() assert isinstance(name, str) return name + + +class BadStatus(Exception): + """Exception raised when there is something wrong with the status file. + + For example: + - No status file found + - Status file malformed + - Process whose pid is in the status file does not exist + """ + + +def read_status(status_file: str) -> dict[str, object]: + """Read status file. + + Raise BadStatus if the status file doesn't exist or contains + invalid JSON or the JSON is not a dict. + """ + if not os.path.isfile(status_file): + raise BadStatus("No status file found") + with open(status_file) as f: + try: + data = json.load(f) + except Exception as e: + raise BadStatus("Malformed status file (not JSON)") from e + if not isinstance(data, dict): + raise BadStatus("Invalid status file (not a dict)") + return data diff --git a/mypy/main.py b/mypy/main.py index 7d5721851c3d..58cdb5f76722 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -167,6 +167,7 @@ def main( # Exit without freeing objects -- it's faster. # # NOTE: We don't flush all open files on exit (or run other destructors)! + # TODO: use this for workers and join? util.hard_exit(code) elif code: sys.exit(code) @@ -1152,6 +1153,9 @@ def add_invertible_flag( # This undocumented feature exports limited line-level dependency information. internals_group.add_argument("--export-ref-info", action="store_true", help=argparse.SUPPRESS) + # Experimental parallel type-checking support. + internals_group.add_argument("--num-workers", type=int, default=0, help=argparse.SUPPRESS) + report_group = parser.add_argument_group( title="Report generation", description="Generate a report in the specified format." ) diff --git a/mypy/options.py b/mypy/options.py index 39490c9f0bee..4294e880d93b 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -358,6 +358,7 @@ def __init__(self) -> None: self.test_env = False # -- experimental options -- + self.num_workers: int = 0 self.shadow_file: list[list[str]] | None = None self.show_column_numbers: bool = False self.show_error_end: bool = False diff --git a/pyproject.toml b/pyproject.toml index 42ff3a6ca019..087f78d9a7ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ requires = [ # the following is from build-requirements.txt "types-psutil", "types-setuptools", + "psutil>=4.0", ] build-backend = "setuptools.build_meta" @@ -55,11 +56,12 @@ dependencies = [ "pathspec>=0.9.0", "tomli>=1.1.0; python_version<'3.11'", "librt>=0.5.0", + "psutil>=4.0", ] dynamic = ["version"] [project.optional-dependencies] -dmypy = ["psutil>=4.0"] +dmypy = [] mypyc = ["setuptools>=50"] python2 = [] reports = ["lxml"] diff --git a/setup.py b/setup.py index 0037624f9bbc..9df75c16889e 100644 --- a/setup.py +++ b/setup.py @@ -81,6 +81,7 @@ def run(self) -> None: "__main__.py", "pyinfo.py", os.path.join("dmypy", "__main__.py"), + os.path.join("build_worker", "__main__.py"), "exportjson.py", # Uses __getattr__/__setattr__ "split_namespace.py", From 37533da49269bd34ac08a9149da7e0d2c00b84e4 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Fri, 14 Nov 2025 01:40:36 +0000 Subject: [PATCH 02/21] Play more with GC --- mypy/build.py | 5 ++++- mypy/build_worker/worker.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 7e95b3e303b6..eb029d53faf5 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -3055,8 +3055,11 @@ def dispatch(sources: list[BuildSource], manager: BuildManager, stdout: TextIO) log_configuration(manager, sources) t0 = time.time() + gc.disable() graph = load_graph(sources, manager) - + gc.freeze() + gc.unfreeze() + gc.enable() print("Coordinator loaded graph", time.time() - t_import) # This is a kind of unfortunate hack to work around some of fine-grained's diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index 0d3e476a9560..b4985c83b584 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -84,9 +84,11 @@ def flush_errors( stderr=sys.stderr, ) + gc.disable() graph = load_graph(sources, manager) - - print(len(gc.get_objects())) + gc.freeze() + gc.unfreeze() + gc.enable() for id in graph: manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) From 465d84baa0aafba12bfec9b2738f4daa6eb5dc9a Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Sat, 15 Nov 2025 20:16:52 +0000 Subject: [PATCH 03/21] Add some error handling logic --- mypy/build.py | 36 ++++++++++++++++++++++++++++-------- mypy/build_worker/worker.py | 8 +++----- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index eb029d53faf5..f362f6ea3982 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1050,7 +1050,7 @@ def submit(self, sccs: list[SCC]) -> None: _, _, scc = self.scc_queue.pop(0) send(self.workers[worker].conn, {"scc_id": scc.id}) - def wait_for_done(self, graph: Graph) -> tuple[list[SCC], bool]: + def wait_for_done(self, graph: Graph) -> tuple[list[SCC], bool, dict[str, tuple[str, list[str]]]]: """Wait for a stale SCC processing (in process) to finish. Return next processed SCC and whether we have more in the queue. @@ -1059,26 +1059,28 @@ def wait_for_done(self, graph: Graph) -> tuple[list[SCC], bool]: """ if not self.workers: if not self.scc_queue: - return [], False + return [], False, {} _, _, next_scc = self.scc_queue.pop(0) process_stale_scc(graph, next_scc, self) - return [next_scc], bool(self.scc_queue) + return [next_scc], bool(self.scc_queue), {} if not self.scc_queue and len(self.free_workers) == len(self.workers): - return [], False + return [], False, {} # TODO: don't select from free workers. conns = [w.conn.connection for w in self.workers] ready, _, _ = select(conns, [], [], 100) done_sccs = [] + results = {} for r in ready: idx = conns.index(r) data = receive(self.workers[idx].conn) self.free_workers.add(idx) scc_id = data["scc_id"] + results.update(data["result"]) done_sccs.append(self.scc_by_id[scc_id]) self.submit([]) # advance after some workers are free. - return done_sccs, bool(self.scc_queue) or len(self.free_workers) < len(self.workers) + return done_sccs, bool(self.scc_queue) or len(self.free_workers) < len(self.workers), results def deps_to_json(x: dict[str, set[str]]) -> bytes: @@ -1474,11 +1476,11 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | No A CacheMeta instance if the cache data was found and appears valid; otherwise None. """ + t0 = time.time() # TODO: May need to take more build options into account meta_file, data_file, _ = get_cache_names(id, path, manager.options) manager.trace(f"Looking for {id} at {meta_file}") meta: bytes | dict[str, Any] | None - t0 = time.time() if manager.options.fixed_format_cache: meta = _load_ff_file(meta_file, manager, log_error=f"Could not load cache for {id}: ") if meta is None: @@ -2185,6 +2187,12 @@ def __init__( if self.manager.workers: self.tree = None + def reload_meta(self) -> None: + assert self.path is not None + self.meta = find_cache_meta(self.id, self.path, self.manager) + assert self.meta is not None + self.interface_hash = self.meta.interface_hash + def add_ancestors(self) -> None: if self.path is not None: _, name = os.path.split(self.path) @@ -3522,7 +3530,10 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: if fresh: done = fresh else: - done, still_working = manager.wait_for_done(graph) + done, still_working, results = manager.wait_for_done(graph) + for id, (interface_cache, errors) in results.items(): + graph[id].interface_hash = bytes.fromhex(interface_cache) + manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False) ready = [] t0 = time.time() for done_scc in done: @@ -3597,7 +3608,7 @@ def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManage manager.add_stats(process_fresh_time=t2 - t0, load_tree_time=t1 - t0) -def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: +def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> dict[str, tuple[str, list[str]]]: """Process the modules in one SCC from source code.""" # First verify if all transitive dependencies are loaded in the current process. t0 = time.time() @@ -3619,6 +3630,11 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: manager.scc_by_id[sid] for sid in manager.top_order if sid in missing_sccs ] manager.add_stats(fresh_order_time=time.time() - ts) + + for prev_scc in fresh_sccs_to_load: + for mod_id in prev_scc.mod_ids: + graph[mod_id].reload_meta() + manager.log(f"Processing {len(fresh_sccs_to_load)} fresh SCCs") if ( not manager.options.test_env @@ -3721,6 +3737,10 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None: type_check_time=t5 - t4, flush_and_cache_time=time.time() - t5, ) + scc_result = {} + for id in scc: + scc_result[id] = graph[id].interface_hash.hex(), errors_by_id.get(id, []) + return scc_result def prepare_sccs_full( diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index b4985c83b584..e52181d3cdab 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -59,12 +59,10 @@ def main(argv: list[str]) -> None: errors = Errors(options, read_source=lambda path: read_py_file(path, cached_read)) plugin, snapshot = load_plugins(options, errors, sys.stdout, []) - messages = [] - def flush_errors( filename: str | None, new_messages: list[str], is_serious: bool ) -> None: - messages.extend(new_messages) + pass manager = BuildManager( data_dir, @@ -110,12 +108,12 @@ def flush_errors( scc_id = data["scc_id"] scc = manager.scc_by_id[scc_id] t0 = time.time() - process_stale_scc(graph, scc, manager) + result = process_stale_scc(graph, scc, manager) manager.add_stats( total_process_stale_time=time.time() - t0, stale_sccs_processed=1, ) - send(server, {"scc_id": scc_id}) + send(server, {"scc_id": scc_id, "result": result}) server.cleanup() From 76e00cecaabe393ef9ef44746f1e85248aabbc10 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Sun, 16 Nov 2025 17:49:35 +0000 Subject: [PATCH 04/21] Better handling of the GC trick --- mypy/build.py | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index f362f6ea3982..d776afe6fda5 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -21,7 +21,6 @@ import os import pickle import platform -import psutil import re import stat import subprocess @@ -119,6 +118,8 @@ # We are careful now, we can increase this in future if safe/useful. MAX_GC_FREEZE_CYCLES = 1 +initial_gc_freeze_done = False + Graph: _TypeAlias = dict[str, "State"] t_import = time.time() @@ -248,9 +249,6 @@ def start_worker(options_data: str, idx: int) -> None: def get_worker(idx: int) -> WorkerClient: status_file = f".mypy_worker.{idx}.json" pid, connection_name = wait_for_worker(status_file) - proc = psutil.Process(pid=pid) - core = idx + 1 - proc.cpu_affinity([core * 3, core * 3 + 1, core * 3 + 2]) return WorkerClient(idx, IPCClient(connection_name, 10), pid) @@ -307,11 +305,6 @@ def default_flush_errors( stderr = stderr or sys.stderr extra_plugins = extra_plugins or [] - print("Starting workers", time.time() - t_import) - - proc = psutil.Process() - proc.cpu_affinity([0, 1, 2]) - workers = [] if options.num_workers > 0: pickled_options = pickle.dumps(options.snapshot()) @@ -327,8 +320,6 @@ def default_flush_errors( ] send(worker.conn, {"sources": source_tuples}) - print("Sent sources to workers", time.time() - t_import) - try: result = build_inner( sources, options, alt_lib_path, flush_errors, fscache, stdout, stderr, extra_plugins, workers @@ -2603,6 +2594,7 @@ def finish_passes(self) -> None: self.free_state() if not manager.options.fine_grained_incremental and not manager.options.preserve_asts: free_tree(self.tree) + self.tree.defs.clear() self.time_spent_us += time_spent_us(t0) def free_state(self) -> None: @@ -3063,12 +3055,15 @@ def dispatch(sources: list[BuildSource], manager: BuildManager, stdout: TextIO) log_configuration(manager, sources) t0 = time.time() - gc.disable() + + global initial_gc_freeze_done + if ( + not manager.options.test_env + and platform.python_implementation() == "CPython" + and not initial_gc_freeze_done + ): + gc.disable() graph = load_graph(sources, manager) - gc.freeze() - gc.unfreeze() - gc.enable() - print("Coordinator loaded graph", time.time() - t_import) # This is a kind of unfortunate hack to work around some of fine-grained's # fragility: if we have loaded less than 50% of the specified files from @@ -3081,6 +3076,16 @@ def dispatch(sources: list[BuildSource], manager: BuildManager, stdout: TextIO) manager.cache_enabled = False graph = load_graph(sources, manager) + if ( + not manager.options.test_env + and platform.python_implementation() == "CPython" + and not initial_gc_freeze_done + ): + gc.freeze() + gc.unfreeze() + gc.enable() + initial_gc_freeze_done = True + for id in graph: manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) @@ -3497,14 +3502,12 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: for worker in manager.workers: data = receive(worker.conn) - print(worker.idx, data["status"]) + assert data["status"] == "ok" send(worker.conn, {"sccs": [(list(scc.mod_ids), scc.id, list(scc.deps)) for scc in sccs]}) for worker in manager.workers: data = receive(worker.conn) - print(worker.idx, data["status"]) - - print("Workers loaded graph", time.time() - t_import) + assert data["status"] == "ok" manager.free_workers = {w.idx for w in manager.workers} @@ -3649,6 +3652,8 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> dict[st # a hack, but it gives huge performance wins for large third-party # libraries, like torch. tc = time.time() + if manager.gc_freeze_cycles > 0: + gc.collect() gc.disable() manager.add_stats(gc_pre_freeze_time=time.time() - tc) for prev_scc in fresh_sccs_to_load: From 3a5bc3c6965161819f28c749b55f9db5917dc71f Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Sun, 16 Nov 2025 20:42:27 +0000 Subject: [PATCH 05/21] Basic handling for blockers --- mypy/build.py | 47 ++++++++++++++++++++++++++----------- mypy/build_worker/worker.py | 36 ++++++++++++++++++---------- mypy/dmypy/client.py | 2 +- 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index d776afe6fda5..19f5102a5454 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -28,10 +28,10 @@ import time import types from collections.abc import Iterator, Mapping, Sequence, Set as AbstractSet +from heapq import heappop, heappush from select import select from typing import TYPE_CHECKING, Any, Callable, ClassVar, Final, NoReturn, TextIO, TypedDict from typing_extensions import TypeAlias as _TypeAlias -from heapq import heappush, heappop from librt.internal import cache_version @@ -42,7 +42,7 @@ from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort from mypy.indirection import TypeIndirectionVisitor -from mypy.ipc import IPCClient, read_status, BadStatus, IPCBase +from mypy.ipc import BadStatus, IPCBase, IPCClient, read_status from mypy.messages import MessageBuilder from mypy.nodes import Import, ImportAll, ImportBase, ImportFrom, MypyFile, SymbolTable from mypy.partially_defined import PossiblyUndefinedVariableVisitor @@ -130,7 +130,9 @@ class SCC: id_counter: ClassVar[int] = 0 - def __init__(self, ids: set[str], scc_id: int | None = None, deps: list[int] | None = None) -> None: + def __init__( + self, ids: set[str], scc_id: int | None = None, deps: list[int] | None = None + ) -> None: if scc_id is None: self.id = SCC.id_counter SCC.id_counter += 1 @@ -241,7 +243,7 @@ def start_worker(options_data: str, idx: int) -> None: "-m", "mypy.build_worker", f"--status-file={status_file}", - f'--options-data="{options_data}"' + f'--options-data="{options_data}"', ] subprocess.Popen(command) @@ -315,14 +317,20 @@ def default_flush_errors( workers.append(get_worker(i)) for worker in workers: - source_tuples = [ - (s.path, s.module, s.text, s.base_dir, s.followed) for s in sources - ] + source_tuples = [(s.path, s.module, s.text, s.base_dir, s.followed) for s in sources] send(worker.conn, {"sources": source_tuples}) try: result = build_inner( - sources, options, alt_lib_path, flush_errors, fscache, stdout, stderr, extra_plugins, workers + sources, + options, + alt_lib_path, + flush_errors, + fscache, + stdout, + stderr, + extra_plugins, + workers, ) result.errors = messages return result @@ -1041,7 +1049,9 @@ def submit(self, sccs: list[SCC]) -> None: _, _, scc = self.scc_queue.pop(0) send(self.workers[worker].conn, {"scc_id": scc.id}) - def wait_for_done(self, graph: Graph) -> tuple[list[SCC], bool, dict[str, tuple[str, list[str]]]]: + def wait_for_done( + self, graph: Graph + ) -> tuple[list[SCC], bool, dict[str, tuple[str, list[str]]]]: """Wait for a stale SCC processing (in process) to finish. Return next processed SCC and whether we have more in the queue. @@ -1068,10 +1078,19 @@ def wait_for_done(self, graph: Graph) -> tuple[list[SCC], bool, dict[str, tuple[ data = receive(self.workers[idx].conn) self.free_workers.add(idx) scc_id = data["scc_id"] + if "blocker" in data: + blocker = data["blocker"] + raise CompileError( + blocker["messages"], blocker["use_stdout"], blocker["module_with_blocker"] + ) results.update(data["result"]) done_sccs.append(self.scc_by_id[scc_id]) self.submit([]) # advance after some workers are free. - return done_sccs, bool(self.scc_queue) or len(self.free_workers) < len(self.workers), results + return ( + done_sccs, + bool(self.scc_queue) or len(self.free_workers) < len(self.workers), + results, + ) def deps_to_json(x: dict[str, set[str]]) -> bytes: @@ -2269,9 +2288,7 @@ def load_tree(self, temporary: bool = False) -> None: if self.options.fixed_format_cache: data = _load_ff_file(data_file, self.manager, "Could not load tree: ") else: - data = _load_json_file( - data_file, self.manager, "Load tree ", "Could not load tree: " - ) + data = _load_json_file(data_file, self.manager, "Load tree ", "Could not load tree: ") if data is None: return @@ -3611,7 +3628,9 @@ def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManage manager.add_stats(process_fresh_time=t2 - t0, load_tree_time=t1 - t0) -def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> dict[str, tuple[str, list[str]]]: +def process_stale_scc( + graph: Graph, ascc: SCC, manager: BuildManager +) -> dict[str, tuple[str, list[str]]]: """Process the modules in one SCC from source code.""" # First verify if all transitive dependencies are loaded in the current process. t0 = time.time() diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index e52181d3cdab..7bc0896b96fe 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -10,12 +10,20 @@ import sys import time -from mypy.build import receive, send, BuildManager, load_plugins, load_graph, dump_graph, SCC, process_stale_scc -from mypy.errors import Errors +from mypy.build import ( + SCC, + BuildManager, + load_graph, + load_plugins, + process_stale_scc, + receive, + send, +) +from mypy.errors import Errors, CompileError from mypy.fscache import FileSystemCache from mypy.ipc import IPCServer from mypy.main import RECURSION_LIMIT -from mypy.modulefinder import BuildSource, compute_search_paths, BuildSourceSet +from mypy.modulefinder import BuildSource, BuildSourceSet, compute_search_paths from mypy.options import Options from mypy.util import read_py_file from mypy.version import __version__ @@ -59,9 +67,7 @@ def main(argv: list[str]) -> None: errors = Errors(options, read_source=lambda path: read_py_file(path, cached_read)) plugin, snapshot = load_plugins(options, errors, sys.stdout, []) - def flush_errors( - filename: str | None, new_messages: list[str], is_serious: bool - ) -> None: + def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool) -> None: pass manager = BuildManager( @@ -108,12 +114,18 @@ def flush_errors( scc_id = data["scc_id"] scc = manager.scc_by_id[scc_id] t0 = time.time() - result = process_stale_scc(graph, scc, manager) - manager.add_stats( - total_process_stale_time=time.time() - t0, - stale_sccs_processed=1, - ) - send(server, {"scc_id": scc_id, "result": result}) + try: + result = process_stale_scc(graph, scc, manager) + except CompileError as e: + blocker = { + "messages": e.messages, + "use_stdout": e.use_stdout, + "module_with_blocker": e.module_with_blocker, + } + send(server, {"scc_id": scc_id, "blocker": blocker}) + else: + send(server, {"scc_id": scc_id, "result": result}) + manager.add_stats(total_process_stale_time=time.time() - t0, stale_sccs_processed=1) server.cleanup() diff --git a/mypy/dmypy/client.py b/mypy/dmypy/client.py index 265db565476e..302c2484fd9e 100644 --- a/mypy/dmypy/client.py +++ b/mypy/dmypy/client.py @@ -19,7 +19,7 @@ from mypy.dmypy_os import alive, kill from mypy.dmypy_util import DEFAULT_STATUS_FILE, receive, send -from mypy.ipc import IPCClient, IPCException, BadStatus, read_status +from mypy.ipc import BadStatus, IPCClient, IPCException, read_status from mypy.main import RECURSION_LIMIT from mypy.util import check_python_version, get_terminal_width, should_force_color from mypy.version import __version__ From 25ca8d572682edb44671673f9ed42895d96eaa93 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Mon, 17 Nov 2025 02:26:34 +0000 Subject: [PATCH 06/21] Basic crash handling --- mypy/build.py | 2 +- mypy/build_worker/worker.py | 158 +++++++++++++++++++----------------- 2 files changed, 85 insertions(+), 75 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 19f5102a5454..3018aa38f711 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1083,7 +1083,7 @@ def wait_for_done( raise CompileError( blocker["messages"], blocker["use_stdout"], blocker["module_with_blocker"] ) - results.update(data["result"]) + results.update({k: tuple(v) for k, v in data["result"].items()}) done_sccs.append(self.scc_by_id[scc_id]) self.submit([]) # advance after some workers are free. return ( diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index 7bc0896b96fe..1db8f757f3ff 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -19,7 +19,7 @@ receive, send, ) -from mypy.errors import Errors, CompileError +from mypy.errors import CompileError, Errors, report_internal_error from mypy.fscache import FileSystemCache from mypy.ipc import IPCServer from mypy.main import RECURSION_LIMIT @@ -54,80 +54,90 @@ def main(argv: list[str]) -> None: json.dump({"pid": os.getpid(), "connection_name": server.connection_name}, f) f.write("\n") - with server: + fscache = FileSystemCache() + cached_read = fscache.read + errors = Errors(options, read_source=lambda path: read_py_file(path, cached_read)) + + try: + with server: + serve(server, options, errors, fscache) + except OSError: + pass + except Exception as exc: + report_internal_error(exc, errors.file, 0, errors, options) + finally: + server.cleanup() + + +def serve(server: IPCServer, options: Options, errors: Errors, fscache: FileSystemCache) -> None: + data = receive(server) + sources = [BuildSource(*st) for st in data["sources"]] + + data_dir = os.path.dirname(os.path.dirname(__file__)) + search_paths = compute_search_paths(sources, options, data_dir, None) + + source_set = BuildSourceSet(sources) + plugin, snapshot = load_plugins(options, errors, sys.stdout, []) + + def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool) -> None: + pass + + manager = BuildManager( + data_dir, + search_paths, + ignore_prefix=os.getcwd(), + source_set=source_set, + reports=None, + options=options, + version_id=__version__, + plugin=plugin, + plugins_snapshot=snapshot, + errors=errors, + error_formatter=None, + flush_errors=flush_errors, + fscache=fscache, + stdout=sys.stdout, + stderr=sys.stderr, + ) + + gc.disable() + graph = load_graph(sources, manager) + gc.freeze() + gc.unfreeze() + gc.enable() + + for id in graph: + manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) + send(server, {"status": "ok"}) + + data = receive(server) + sccs = [SCC(set(mod_ids), scc_id, deps) for (mod_ids, scc_id, deps) in data["sccs"]] + + manager.scc_by_id = {scc.id: scc for scc in sccs} + manager.top_order = [scc.id for scc in sccs] + + send(server, {"status": "ok"}) + + while True: data = receive(server) - sources = [BuildSource(*st) for st in data["sources"]] - - data_dir = os.path.dirname(os.path.dirname(__file__)) - fscache = FileSystemCache() - search_paths = compute_search_paths(sources, options, data_dir, None) - - source_set = BuildSourceSet(sources) - cached_read = fscache.read - errors = Errors(options, read_source=lambda path: read_py_file(path, cached_read)) - plugin, snapshot = load_plugins(options, errors, sys.stdout, []) - - def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool) -> None: - pass - - manager = BuildManager( - data_dir, - search_paths, - ignore_prefix=os.getcwd(), - source_set=source_set, - reports=None, - options=options, - version_id=__version__, - plugin=plugin, - plugins_snapshot=snapshot, - errors=errors, - error_formatter=None, - flush_errors=flush_errors, - fscache=fscache, - stdout=sys.stdout, - stderr=sys.stderr, - ) - - gc.disable() - graph = load_graph(sources, manager) - gc.freeze() - gc.unfreeze() - gc.enable() - - for id in graph: - manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) - send(server, {"status": "ok"}) - - data = receive(server) - sccs = [SCC(set(mod_ids), scc_id, deps) for (mod_ids, scc_id, deps) in data["sccs"]] - - manager.scc_by_id = {scc.id: scc for scc in sccs} - manager.top_order = [scc.id for scc in sccs] - - send(server, {"status": "ok"}) - - while True: - data = receive(server) - if "final" in data: - manager.dump_stats() - break - scc_id = data["scc_id"] - scc = manager.scc_by_id[scc_id] - t0 = time.time() - try: - result = process_stale_scc(graph, scc, manager) - except CompileError as e: - blocker = { - "messages": e.messages, - "use_stdout": e.use_stdout, - "module_with_blocker": e.module_with_blocker, - } - send(server, {"scc_id": scc_id, "blocker": blocker}) - else: - send(server, {"scc_id": scc_id, "result": result}) - manager.add_stats(total_process_stale_time=time.time() - t0, stale_sccs_processed=1) - - server.cleanup() + if "final" in data: + manager.dump_stats() + break + scc_id = data["scc_id"] + scc = manager.scc_by_id[scc_id] + t0 = time.time() + try: + result = process_stale_scc(graph, scc, manager) + except CompileError as e: + blocker = { + "messages": e.messages, + "use_stdout": e.use_stdout, + "module_with_blocker": e.module_with_blocker, + } + send(server, {"scc_id": scc_id, "blocker": blocker}) + else: + send(server, {"scc_id": scc_id, "result": result}) + manager.add_stats(total_process_stale_time=time.time() - t0, stale_sccs_processed=1) def console_entry() -> None: From 5775df583bd582b2e2424fafae22c108e70214f5 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Tue, 18 Nov 2025 01:53:21 +0000 Subject: [PATCH 07/21] Better testing support --- mypy/build.py | 49 ++++++++++++++++++++++++++----------- mypy/build_worker/worker.py | 8 ++++-- mypy/test/testcheck.py | 2 +- 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 3018aa38f711..b34d99170140 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -203,10 +203,10 @@ def send(connection: IPCBase, data: dict[str, Any]) -> None: class WorkerClient: - def __init__(self, idx: int, conn: IPCClient, worker_pid: int) -> None: + def __init__(self, idx: int, conn: IPCClient, proc: subprocess.Popen[bytes]) -> None: self.idx = idx self.conn = conn - self.worker_pid = worker_pid + self.proc = proc def wait_for_worker(status_file: str, timeout: float = 5.0) -> tuple[int, str]: @@ -234,7 +234,7 @@ def wait_for_worker(status_file: str, timeout: float = 5.0) -> tuple[int, str]: sys.exit(2) -def start_worker(options_data: str, idx: int) -> None: +def start_worker(options_data: str, idx: int) -> subprocess.Popen[bytes]: status_file = f".mypy_worker.{idx}.json" if os.path.isfile(status_file): os.unlink(status_file) @@ -245,13 +245,14 @@ def start_worker(options_data: str, idx: int) -> None: f"--status-file={status_file}", f'--options-data="{options_data}"', ] - subprocess.Popen(command) + return subprocess.Popen(command) -def get_worker(idx: int) -> WorkerClient: +def get_worker(idx: int, proc: subprocess.Popen[bytes]) -> WorkerClient: status_file = f".mypy_worker.{idx}.json" pid, connection_name = wait_for_worker(status_file) - return WorkerClient(idx, IPCClient(connection_name, 10), pid) + assert pid == proc.pid + return WorkerClient(idx, IPCClient(connection_name, 10), proc) def build_error(msg: str) -> NoReturn: @@ -308,13 +309,19 @@ def default_flush_errors( extra_plugins = extra_plugins or [] workers = [] + procs = [] if options.num_workers > 0: + if options.use_builtins_fixtures: + os.environ["MYPY_TEST_PREFIX"] = os.path.dirname(os.path.dirname(__file__)) + if alt_lib_path: + os.environ["MYPY_ALT_LIB_PATH"] = alt_lib_path + pickled_options = pickle.dumps(options.snapshot()) options_data = base64.b64encode(pickled_options).decode() for i in range(options.num_workers): - start_worker(options_data, i) - for i in range(options.num_workers): - workers.append(get_worker(i)) + procs.append(start_worker(options_data, i)) + for i, proc in enumerate(procs): + workers.append(get_worker(i, proc)) for worker in workers: source_tuples = [(s.path, s.module, s.text, s.base_dir, s.followed) for s in sources] @@ -421,10 +428,13 @@ def build_inner( finally: for worker in workers: - send(worker.conn, {"final": True}) - time.sleep(0.005) + try: + send(worker.conn, {"final": True}) + except OSError: + pass for worker in workers: worker.conn.close() + worker.proc.wait() status_file = f".mypy_worker.{worker.idx}.json" if os.path.isfile(status_file): os.unlink(status_file) @@ -1474,13 +1484,16 @@ def options_snapshot(id: str, manager: BuildManager) -> dict[str, object]: return {"platform": platform_opt, "other_options": hash_digest(json_dumps(snapshot))} -def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | None: +def find_cache_meta( + id: str, path: str, manager: BuildManager, skip_validation: bool = False +) -> CacheMeta | None: """Find cache data for a module. Args: id: module ID path: module path manager: the build manager (for pyversion, log/trace, and build options) + skip_validation: if True skip any validation steps (used for parallel checking) Returns: A CacheMeta instance if the cache data was found and appears @@ -1528,6 +1541,8 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | No manager.add_stats( load_meta_time=t2 - t0, load_meta_load_time=t1 - t0, load_meta_from_dict_time=t2 - t1 ) + if skip_validation: + return m # Ignore cache if generated by an older mypy version. if m.version_id != manager.version_id and not manager.options.skip_version_check: @@ -2199,7 +2214,7 @@ def __init__( def reload_meta(self) -> None: assert self.path is not None - self.meta = find_cache_meta(self.id, self.path, self.manager) + self.meta = find_cache_meta(self.id, self.path, self.manager, skip_validation=True) assert self.meta is not None self.interface_hash = self.meta.interface_hash @@ -3541,6 +3556,9 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: while ready or not_ready or still_working: stale, fresh = find_stale_sccs(ready, graph, manager) if stale: + for scc in stale: + for id in scc.mod_ids: + graph[id].mark_as_rechecked() manager.submit(stale) still_working = True # We eagerly walk over fresh SCCs to reach as many stale SCCs as soon @@ -3552,7 +3570,10 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: else: done, still_working, results = manager.wait_for_done(graph) for id, (interface_cache, errors) in results.items(): - graph[id].interface_hash = bytes.fromhex(interface_cache) + new_hash = bytes.fromhex(interface_cache) + if new_hash != graph[id].interface_hash: + graph[id].mark_interface_stale() + graph[id].interface_hash = new_hash manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False) ready = [] t0 = time.time() diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index 1db8f757f3ff..318ed4cb2f47 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -74,7 +74,8 @@ def serve(server: IPCServer, options: Options, errors: Errors, fscache: FileSyst sources = [BuildSource(*st) for st in data["sources"]] data_dir = os.path.dirname(os.path.dirname(__file__)) - search_paths = compute_search_paths(sources, options, data_dir, None) + alt_lib_path = os.environ.get("MYPY_ALT_LIB_PATH") + search_paths = compute_search_paths(sources, options, data_dir, alt_lib_path) source_set = BuildSourceSet(sources) plugin, snapshot = load_plugins(options, errors, sys.stdout, []) @@ -101,7 +102,10 @@ def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool ) gc.disable() - graph = load_graph(sources, manager) + try: + graph = load_graph(sources, manager) + except CompileError: + return gc.freeze() gc.unfreeze() gc.enable() diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index f2b7057d9f20..3406d591c93a 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -143,7 +143,7 @@ def run_case_once( if "union-error" not in testcase.file and "Pep604" not in testcase.name: options.force_union_syntax = True - if incremental_step and options.incremental: + if incremental_step and options.incremental or options.num_workers > 0: # Don't overwrite # flags: --no-incremental in incremental test cases options.incremental = True else: From 2cd69e4e677f791e3552134bedd17e3d2bc75306 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 19 Nov 2025 15:10:23 +0000 Subject: [PATCH 08/21] Better testing support --- mypy/build.py | 37 ++++----- mypy/build_worker/worker.py | 80 +++++++++++-------- mypy/main.py | 1 - mypy/test/helpers.py | 4 + mypy/test/testcheck.py | 26 +++++- test-data/unit/check-ctypes.test | 4 +- test-data/unit/lib-stub/typing_extensions.pyi | 1 + 7 files changed, 94 insertions(+), 59 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index b34d99170140..fd209d5b47b4 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -234,7 +234,7 @@ def wait_for_worker(status_file: str, timeout: float = 5.0) -> tuple[int, str]: sys.exit(2) -def start_worker(options_data: str, idx: int) -> subprocess.Popen[bytes]: +def start_worker(options_data: str, idx: int, env: Mapping[str, str]) -> subprocess.Popen[bytes]: status_file = f".mypy_worker.{idx}.json" if os.path.isfile(status_file): os.unlink(status_file) @@ -245,7 +245,7 @@ def start_worker(options_data: str, idx: int) -> subprocess.Popen[bytes]: f"--status-file={status_file}", f'--options-data="{options_data}"', ] - return subprocess.Popen(command) + return subprocess.Popen(command, env=env) def get_worker(idx: int, proc: subprocess.Popen[bytes]) -> WorkerClient: @@ -268,6 +268,7 @@ def build( stdout: TextIO | None = None, stderr: TextIO | None = None, extra_plugins: Sequence[Plugin] | None = None, + worker_env: Mapping[str, str] | None = None, ) -> BuildResult: """Analyze a program. @@ -311,15 +312,10 @@ def default_flush_errors( workers = [] procs = [] if options.num_workers > 0: - if options.use_builtins_fixtures: - os.environ["MYPY_TEST_PREFIX"] = os.path.dirname(os.path.dirname(__file__)) - if alt_lib_path: - os.environ["MYPY_ALT_LIB_PATH"] = alt_lib_path - pickled_options = pickle.dumps(options.snapshot()) options_data = base64.b64encode(pickled_options).decode() for i in range(options.num_workers): - procs.append(start_worker(options_data, i)) + procs.append(start_worker(options_data, i, worker_env or os.environ)) for i, proc in enumerate(procs): workers.append(get_worker(i, proc)) @@ -350,6 +346,18 @@ def default_flush_errors( flush_errors(None, e.messages, serious) e.messages = messages raise + finally: + for worker in workers: + try: + send(worker.conn, {"final": True}) + except OSError: + pass + for worker in workers: + worker.conn.close() + worker.proc.wait() + status_file = f".mypy_worker.{worker.idx}.json" + if os.path.isfile(status_file): + os.unlink(status_file) def build_inner( @@ -426,19 +434,6 @@ def build_inner( dump_line_checking_stats(options.line_checking_stats, graph) return BuildResult(manager, graph) finally: - - for worker in workers: - try: - send(worker.conn, {"final": True}) - except OSError: - pass - for worker in workers: - worker.conn.close() - worker.proc.wait() - status_file = f".mypy_worker.{worker.idx}.json" - if os.path.isfile(status_file): - os.unlink(status_file) - t0 = time.time() manager.metastore.commit() manager.add_stats(cache_commit_time=time.time() - t0) diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index 318ed4cb2f47..ca7d99ab1032 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -10,6 +10,7 @@ import sys import time +from mypy import util from mypy.build import ( SCC, BuildManager, @@ -68,47 +69,27 @@ def main(argv: list[str]) -> None: finally: server.cleanup() + if options.fast_exit: + util.hard_exit(0) + def serve(server: IPCServer, options: Options, errors: Errors, fscache: FileSystemCache) -> None: data = receive(server) sources = [BuildSource(*st) for st in data["sources"]] + manager = setup_worker_manager(sources, options, errors, fscache) + if manager is None: + return - data_dir = os.path.dirname(os.path.dirname(__file__)) - alt_lib_path = os.environ.get("MYPY_ALT_LIB_PATH") - search_paths = compute_search_paths(sources, options, data_dir, alt_lib_path) - - source_set = BuildSourceSet(sources) - plugin, snapshot = load_plugins(options, errors, sys.stdout, []) - - def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool) -> None: - pass - - manager = BuildManager( - data_dir, - search_paths, - ignore_prefix=os.getcwd(), - source_set=source_set, - reports=None, - options=options, - version_id=__version__, - plugin=plugin, - plugins_snapshot=snapshot, - errors=errors, - error_formatter=None, - flush_errors=flush_errors, - fscache=fscache, - stdout=sys.stdout, - stderr=sys.stderr, - ) - - gc.disable() + if platform.python_implementation() == "CPython": + gc.disable() try: graph = load_graph(sources, manager) except CompileError: return - gc.freeze() - gc.unfreeze() - gc.enable() + if platform.python_implementation() == "CPython": + gc.freeze() + gc.unfreeze() + gc.enable() for id in graph: manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) @@ -144,5 +125,40 @@ def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool manager.add_stats(total_process_stale_time=time.time() - t0, stale_sccs_processed=1) +def setup_worker_manager( + sources: list[BuildSource], options: Options, errors: Errors, fscache: FileSystemCache +) -> BuildManager | None: + data_dir = os.path.dirname(os.path.dirname(__file__)) + alt_lib_path = os.environ.get("MYPY_ALT_LIB_PATH") + search_paths = compute_search_paths(sources, options, data_dir, alt_lib_path) + + source_set = BuildSourceSet(sources) + try: + plugin, snapshot = load_plugins(options, errors, sys.stdout, []) + except CompileError: + return None + + def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool) -> None: + pass + + return BuildManager( + data_dir, + search_paths, + ignore_prefix=os.getcwd(), + source_set=source_set, + reports=None, + options=options, + version_id=__version__, + plugin=plugin, + plugins_snapshot=snapshot, + errors=errors, + error_formatter=None, + flush_errors=flush_errors, + fscache=fscache, + stdout=sys.stdout, + stderr=sys.stderr, + ) + + def console_entry() -> None: main(sys.argv[1:]) diff --git a/mypy/main.py b/mypy/main.py index 58cdb5f76722..22bd4cea7d47 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -167,7 +167,6 @@ def main( # Exit without freeing objects -- it's faster. # # NOTE: We don't flush all open files on exit (or run other destructors)! - # TODO: use this for workers and join? util.hard_exit(code) elif code: sys.exit(code) diff --git a/mypy/test/helpers.py b/mypy/test/helpers.py index 8ff6874e746a..a0c93c05bb11 100644 --- a/mypy/test/helpers.py +++ b/mypy/test/helpers.py @@ -482,3 +482,7 @@ def find_test_files(pattern: str, exclude: list[str] | None = None) -> list[str] for path in (pathlib.Path(test_data_prefix).rglob(pattern)) if path.name not in (exclude or []) ] + + +def remove_typevar_ids(a: list[str]) -> list[str]: + return [re.sub(r"`-?\d+", "", line) for line in a] diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index 3406d591c93a..eefb7006448e 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -22,6 +22,7 @@ normalize_error_messages, parse_options, perform_file_operations, + remove_typevar_ids, ) from mypy.test.update_data import update_testcase_output @@ -133,6 +134,11 @@ def run_case_once( options.use_builtins_fixtures = True options.show_traceback = True + if options.num_workers: + options.fixed_format_cache = True + if testcase.output_files: + raise pytest.skip("Reports are not supported in parallel mode yet") + # Enable some options automatically based on test file name. if "columns" in testcase.file: options.show_column_numbers = True @@ -160,12 +166,22 @@ def run_case_once( ) plugin_dir = os.path.join(test_data_prefix, "plugins") - sys.path.insert(0, plugin_dir) + worker_env = None + if options.num_workers > 0: + worker_env = os.environ.copy() + root_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + worker_env["PYTHONPATH"] = os.pathsep.join([root_dir, plugin_dir]) + worker_env["MYPY_TEST_PREFIX"] = root_dir + worker_env["MYPY_ALT_LIB_PATH"] = test_temp_dir + + sys.path.insert(0, plugin_dir) res = None blocker = False try: - res = build.build(sources=sources, options=options, alt_lib_path=test_temp_dir) + res = build.build( + sources=sources, options=options, alt_lib_path=test_temp_dir, worker_env=worker_env + ) a = res.errors except CompileError as e: a = e.messages @@ -196,6 +212,9 @@ def run_case_once( if output != a and testcase.config.getoption("--update-data", False): update_testcase_output(testcase, a, incremental_step=incremental_step) + if options.num_workers > 0: + a = remove_typevar_ids(a) + output = remove_typevar_ids(output) assert_string_arrays_equal(output, a, msg.format(testcase.file, testcase.line)) if res: @@ -211,7 +230,8 @@ def run_case_once( for module, target in res.manager.processed_targets if module in testcase.test_modules ] - if expected is not None: + # TODO: check targets in parallel mode (e.g. per SCC). + if options.num_workers == 0 and expected is not None: assert_target_equivalence(name, expected, actual) if incremental_step > 1: suffix = "" if incremental_step == 2 else str(incremental_step - 1) diff --git a/test-data/unit/check-ctypes.test b/test-data/unit/check-ctypes.test index a0a5c44b2ba5..88cb524035ba 100644 --- a/test-data/unit/check-ctypes.test +++ b/test-data/unit/check-ctypes.test @@ -166,11 +166,11 @@ reveal_type(intarr4(*int_values)) # N: Revealed type is "_ctypes.Array[ctypes.c reveal_type(intarr4(*c_int_values)) # N: Revealed type is "_ctypes.Array[ctypes.c_int]" reveal_type(intarr6(1, ctypes.c_int(2), *int_values)) # N: Revealed type is "_ctypes.Array[ctypes.c_int]" reveal_type(intarr6(1, ctypes.c_int(2), *c_int_values)) # N: Revealed type is "_ctypes.Array[ctypes.c_int]" -[typing fixtures/typing-medium.pyi] float_values = [1.0, 2.0, 3.0, 4.0] -intarr4(*float_values) # E: Array constructor argument 1 of type "List[float]" is not convertible to the array element type "Iterable[c_int]" +intarr4(*float_values) # E: Array constructor argument 1 of type "list[float]" is not convertible to the array element type "Iterable[c_int]" [builtins fixtures/floatdict.pyi] +[typing fixtures/typing-medium.pyi] [case testCtypesArrayConstructorKwargs] import ctypes diff --git a/test-data/unit/lib-stub/typing_extensions.pyi b/test-data/unit/lib-stub/typing_extensions.pyi index 71a17a939d41..6b529d8a7c41 100644 --- a/test-data/unit/lib-stub/typing_extensions.pyi +++ b/test-data/unit/lib-stub/typing_extensions.pyi @@ -1,3 +1,4 @@ +import collections import typing from typing import Any, Callable, Mapping, Iterable, Iterator, NoReturn as NoReturn, Dict, Tuple, Type, Union from typing import TYPE_CHECKING as TYPE_CHECKING From 1e5b1fe0831fc176159d0efebd6f489ebf627a07 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 19 Nov 2025 17:06:50 +0000 Subject: [PATCH 09/21] Skip some tests --- mypy/test/data.py | 6 ++++ mypy/test/helpers.py | 2 ++ mypy/test/testcheck.py | 4 ++- test-data/unit/check-classes.test | 4 +-- test-data/unit/check-flags.test | 2 +- test-data/unit/check-functions.test | 38 +++++++------------------- test-data/unit/check-ignore.test | 2 +- test-data/unit/check-incremental.test | 2 +- test-data/unit/check-inference.test | 9 ++---- test-data/unit/check-kwargs.test | 2 +- test-data/unit/check-modules.test | 38 ++++++++------------------ test-data/unit/check-namedtuple.test | 3 +- test-data/unit/check-newsemanal.test | 31 +++++++-------------- test-data/unit/check-plugin-attrs.test | 10 ++----- test-data/unit/check-protocols.test | 4 +-- test-data/unit/check-selftype.test | 2 +- test-data/unit/check-type-aliases.test | 2 +- 17 files changed, 60 insertions(+), 101 deletions(-) diff --git a/mypy/test/data.py b/mypy/test/data.py index 5b0ad84c0ba7..d9bfb98361d3 100644 --- a/mypy/test/data.py +++ b/mypy/test/data.py @@ -604,6 +604,12 @@ def pytest_addoption(parser: Any) -> None: default=False, help="Update test data to reflect actual output (supported only for certain tests)", ) + group.addoption( + "--mypy-num-workers", + type=int, + default=0, + help="Run tests using multiple worker processes for each test case", + ) group.addoption( "--save-failures-to", default=None, diff --git a/mypy/test/helpers.py b/mypy/test/helpers.py index a0c93c05bb11..3c7f7d191068 100644 --- a/mypy/test/helpers.py +++ b/mypy/test/helpers.py @@ -365,6 +365,8 @@ def parse_options( if testcase.config.getoption("--mypy-verbose"): options.verbosity = testcase.config.getoption("--mypy-verbose") + if testcase.config.getoption("--mypy-num-workers"): + options.num_workers = testcase.config.getoption("--mypy-num-workers") return options diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index eefb7006448e..24874ac74218 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -137,7 +137,9 @@ def run_case_once( if options.num_workers: options.fixed_format_cache = True if testcase.output_files: - raise pytest.skip("Reports are not supported in parallel mode yet") + raise pytest.skip("Reports are not supported in parallel mode") + if testcase.name.endswith("_no_parallel"): + raise pytest.skip("Test not supported in parallel mode yet") # Enable some options automatically based on test file name. if "columns" in testcase.file: diff --git a/test-data/unit/check-classes.test b/test-data/unit/check-classes.test index 0e9d6357af1a..93fb47e546ed 100644 --- a/test-data/unit/check-classes.test +++ b/test-data/unit/check-classes.test @@ -7683,7 +7683,7 @@ class Child(metaclass=M, thing=0): pass [builtins fixtures/object_with_init_subclass.pyi] -[case testTooManyArgsForObject] +[case testTooManyArgsForObject_no_parallel] class A(thing=5): pass [out] @@ -8498,7 +8498,7 @@ def identity_wrapper(func: FuncT) -> FuncT: def foo(self: Any) -> str: return "" -[case testParentClassWithTypeAliasAndSubclassWithMethod] +[case testParentClassWithTypeAliasAndSubclassWithMethod_no_parallel] from typing import Any, Callable, TypeVar class Parent: diff --git a/test-data/unit/check-flags.test b/test-data/unit/check-flags.test index 8eec979029d0..a892aeba5a2c 100644 --- a/test-data/unit/check-flags.test +++ b/test-data/unit/check-flags.test @@ -2477,7 +2477,7 @@ cb(lambda x: a) # OK fn = lambda x: a cb(fn) -[case testShowErrorCodeLinks] +[case testShowErrorCodeLinks_no_parallel] # flags: --show-error-codes --show-error-code-links x: int = "" # E: Incompatible types in assignment (expression has type "str", variable has type "int") [assignment] diff --git a/test-data/unit/check-functions.test b/test-data/unit/check-functions.test index 1882f235f7e3..4bdb7e1f8173 100644 --- a/test-data/unit/check-functions.test +++ b/test-data/unit/check-functions.test @@ -1224,24 +1224,20 @@ import b from d import dec @dec def f(x: int) -> None: pass -b.g(1) # E +b.g(1) # E: Argument 1 to "g" has incompatible type "int"; expected "str" [file b.py] import a from d import dec @dec def g(x: str) -> None: pass -a.f('') +a.f('') # E: Argument 1 to "f" has incompatible type "str"; expected "int" [file d.py] from typing import TypeVar T = TypeVar('T') def dec(f: T) -> T: return f -[out] -tmp/b.py:5: error: Argument 1 to "f" has incompatible type "str"; expected "int" -tmp/a.py:5: error: Argument 1 to "g" has incompatible type "int"; expected "str" - [case testDecoratorWithNoAnnotationInImportCycle] import a @@ -1270,23 +1266,19 @@ import b from d import dec @dec def f(x: int) -> str: pass -b.g(1)() +b.g(1)() # E: "str" not callable [file b.py] import a from d import dec @dec def g(x: int) -> str: pass -a.f(1)() +a.f(1)() # E: "str" not callable [file d.py] from typing import Callable def dec(f: Callable[[int], str]) -> Callable[[int], str]: return f -[out] -tmp/b.py:5: error: "str" not callable -tmp/a.py:5: error: "str" not callable - [case testDecoratorWithCallAndFixedReturnTypeInImportCycle] import a @@ -1295,50 +1287,40 @@ import b from d import dec @dec() def f(x: int) -> str: pass -b.g(1)() +b.g(1)() # E: "str" not callable [file b.py] import a from d import dec @dec() def g(x: int) -> str: pass -a.f(1)() +a.f(1)() # E: "str" not callable [file d.py] from typing import Callable def dec() -> Callable[[Callable[[int], str]], Callable[[int], str]]: pass -[out] -tmp/b.py:5: error: "str" not callable -tmp/a.py:5: error: "str" not callable - [case testDecoratorWithCallAndFixedReturnTypeInImportCycleAndDecoratorArgs] import a [file a.py] import b from d import dec -@dec(1) +@dec(1) # E: Argument 1 to "dec" has incompatible type "int"; expected "str" def f(x: int) -> str: pass -b.g(1)() +b.g(1)() # E: "str" not callable [file b.py] import a from d import dec -@dec(1) +@dec(1) # E: Argument 1 to "dec" has incompatible type "int"; expected "str" def g(x: int) -> str: pass -a.f(1)() +a.f(1)() # E: "str" not callable [file d.py] from typing import Callable def dec(x: str) -> Callable[[Callable[[int], str]], Callable[[int], str]]: pass -[out] -tmp/b.py:3: error: Argument 1 to "dec" has incompatible type "int"; expected "str" -tmp/b.py:5: error: "str" not callable -tmp/a.py:3: error: Argument 1 to "dec" has incompatible type "int"; expected "str" -tmp/a.py:5: error: "str" not callable - [case testUndefinedDecoratorInImportCycle] # cmd: mypy -m foo.base [file foo/__init__.py] diff --git a/test-data/unit/check-ignore.test b/test-data/unit/check-ignore.test index d0f6bb6aeb60..0c373c0e2788 100644 --- a/test-data/unit/check-ignore.test +++ b/test-data/unit/check-ignore.test @@ -198,7 +198,7 @@ bar(Child()) [out] main:19: error: Argument 1 to "bar" has incompatible type "Child"; expected "Base[str, str]" -[case testTypeIgnoreLineNumberWithinFile] +[case testTypeIgnoreLineNumberWithinFile_no_parallel] import m pass # type: ignore m.f(kw=1) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 56c9cef80f34..78b9ad719fa7 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -6902,7 +6902,7 @@ class TheClass: tmp/a.py:3: note: Revealed type is "def (value: builtins.object) -> lib.TheClass.pyenum@6" -[case testIncrementalFunctoolsPartial] +[case testIncrementalFunctoolsPartial_no_parallel] import a [file a.py] diff --git a/test-data/unit/check-inference.test b/test-data/unit/check-inference.test index bc4b56e49622..d473c3d831e9 100644 --- a/test-data/unit/check-inference.test +++ b/test-data/unit/check-inference.test @@ -1747,7 +1747,7 @@ def f(blocks: Any): # E: Name "Any" is not defined \ to_process = list(blocks) [builtins fixtures/list.pyi] -[case testSpecialCaseEmptyListInitialization2] +[case testSpecialCaseEmptyListInitialization2_no_parallel] def f(blocks: object): to_process = [] to_process = list(blocks) # E: No overload variant of "list" matches argument type "object" \ @@ -3644,17 +3644,14 @@ class A: import a [file a.py] import b -reveal_type(b.B.x) +reveal_type(b.B.x) # N: Revealed type is "builtins.int" class A: x = 42 [file b.py] import a -reveal_type(a.A.x) +reveal_type(a.A.x) # N: Revealed type is "builtins.int" class B: x = 42 -[out] -tmp/b.py:2: note: Revealed type is "builtins.int" -tmp/a.py:2: note: Revealed type is "builtins.int" [case testUnionTypeCallableInference] from typing import Callable, Type, TypeVar, Union diff --git a/test-data/unit/check-kwargs.test b/test-data/unit/check-kwargs.test index 689553445e9d..708b6662611e 100644 --- a/test-data/unit/check-kwargs.test +++ b/test-data/unit/check-kwargs.test @@ -464,7 +464,7 @@ class A: pass A.B(x=1) # E: Unexpected keyword argument "x" for "B" -[case testUnexpectedMethodKwargFromOtherModule] +[case testUnexpectedMethodKwargFromOtherModule_no_parallel] import m m.A(x=1) [file m.py] diff --git a/test-data/unit/check-modules.test b/test-data/unit/check-modules.test index 862cd8ea3905..9345d4d13777 100644 --- a/test-data/unit/check-modules.test +++ b/test-data/unit/check-modules.test @@ -2496,54 +2496,43 @@ y = int() [case testImportFromReExportInCycleUsingRelativeImport1] from m import One -reveal_type(One) +reveal_type(One) # N: Revealed type is "def () -> m.one.One" [file m/__init__.py] from .one import One from .two import Two -reveal_type(One) +reveal_type(One) # N: Revealed type is "def () -> m.one.One" [file m/one.py] class One: pass [file m/two.py] from m import One -reveal_type(One) +reveal_type(One) # N: Revealed type is "def () -> m.one.One" x: One -reveal_type(x) +reveal_type(x) # N: Revealed type is "m.one.One" class Two(One): pass y: Two -y = x +y = x # E: Incompatible types in assignment (expression has type "One", variable has type "Two") x = y -[out] -tmp/m/two.py:2: note: Revealed type is "def () -> m.one.One" -tmp/m/two.py:4: note: Revealed type is "m.one.One" -tmp/m/two.py:9: error: Incompatible types in assignment (expression has type "One", variable has type "Two") -tmp/m/__init__.py:3: note: Revealed type is "def () -> m.one.One" -main:2: note: Revealed type is "def () -> m.one.One" [case testImportReExportInCycleUsingRelativeImport2] from m import One -reveal_type(One) +reveal_type(One) # N: Revealed type is "def () -> m.one.One" [file m/__init__.py] from .one import One from .two import Two -reveal_type(One) +reveal_type(One) # N: Revealed type is "def () -> m.one.One" [file m/one.py] class One: pass [file m/two.py] import m -reveal_type(m.One) +reveal_type(m.One) # N: Revealed type is "def () -> m.one.One" x: m.One -reveal_type(x) +reveal_type(x) # N: Revealed type is "m.one.One" class Two: pass -[out] -tmp/m/two.py:2: note: Revealed type is "def () -> m.one.One" -tmp/m/two.py:4: note: Revealed type is "m.one.One" -tmp/m/__init__.py:3: note: Revealed type is "def () -> m.one.One" -main:2: note: Revealed type is "def () -> m.one.One" [case testImportReExportedNamedTupleInCycle1] from m import One @@ -2604,7 +2593,7 @@ import p [file p/__init__.py] from . import a from . import b -reveal_type(a.foo()) +reveal_type(a.foo()) # N: Revealed type is "builtins.int" [file p/a.py] import p def foo() -> int: pass @@ -2612,11 +2601,8 @@ def foo() -> int: pass import p def run() -> None: - reveal_type(p.a.foo()) + reveal_type(p.a.foo()) # N: Revealed type is "builtins.int" [builtins fixtures/module.pyi] -[out] -tmp/p/b.py:4: note: Revealed type is "builtins.int" -tmp/p/__init__.py:3: note: Revealed type is "builtins.int" [case testMissingSubmoduleImportedWithIgnoreMissingImports] # flags: --ignore-missing-imports @@ -2960,7 +2946,7 @@ class Some: name = __name__ reveal_type(Some.name) # N: Revealed type is "builtins.str" -[case testReExportAllInStub] +[case testReExportAllInStub_no_parallel] from m1 import C from m1 import D # E: Module "m1" has no attribute "D" C() diff --git a/test-data/unit/check-namedtuple.test b/test-data/unit/check-namedtuple.test index 66eb555421f4..6be3f2b3e953 100644 --- a/test-data/unit/check-namedtuple.test +++ b/test-data/unit/check-namedtuple.test @@ -115,7 +115,7 @@ a, b = x a, b, c = x # E: Need more than 2 values to unpack (3 expected) [builtins fixtures/tuple.pyi] -[case testNamedTupleAdditionalArgs] +[case testNamedTupleAdditionalArgs_no_parallel] from collections import namedtuple A = namedtuple('A', 'a b') @@ -125,7 +125,6 @@ D = namedtuple('D', 'a b', unrecognized_arg=False) E = namedtuple('E', 'a b', 0) [builtins fixtures/bool.pyi] - [out] main:4: error: Boolean literal expected as the "rename" argument to namedtuple() main:5: error: Boolean literal expected as the "rename" argument to namedtuple() diff --git a/test-data/unit/check-newsemanal.test b/test-data/unit/check-newsemanal.test index 61bf08018722..00c2d899f231 100644 --- a/test-data/unit/check-newsemanal.test +++ b/test-data/unit/check-newsemanal.test @@ -54,15 +54,12 @@ import a from b import B class A: pass y: B -y() +y() # E: "B" not callable [file b.py] from a import A class B: pass x: A -x() -[out] -tmp/b.py:4: error: "A" not callable -tmp/a.py:4: error: "B" not callable +x() # E: "A" not callable [case testNewAnalyzerTypeAnnotationCycle3] import b @@ -2205,7 +2202,8 @@ from typing import TypeVar, Any from b import B, C T = TypeVar('T', bound=B[Any]) -T = TypeVar('T', bound=C) +T = TypeVar('T', bound=C) # E: Cannot redefine "T" as a type variable \ + # E: Invalid assignment target [file b.py] from typing import Generic, Any @@ -2215,16 +2213,11 @@ class B(Generic[T]): x: T class C: ... -x: B[int] +x: B[int] # E: Type argument "int" of "B" must be a subtype of "B[Any]" y: B[B[Any]] -reveal_type(y.x) +reveal_type(y.x) # N: Revealed type is "b.B[Any]" [builtins fixtures/tuple.pyi] [typing fixtures/typing-full.pyi] -[out] -tmp/b.py:8: error: Type argument "int" of "B" must be a subtype of "B[Any]" -tmp/b.py:10: note: Revealed type is "b.B[Any]" -tmp/a.py:5: error: Cannot redefine "T" as a type variable -tmp/a.py:5: error: Invalid assignment target [case testNewAnalyzerDuplicateTypeVarImportCycleWithAliases] # flags: --disable-error-code used-before-def @@ -2234,7 +2227,8 @@ from typing import TypeVar, Any from b import BA, C T = TypeVar('T', bound=BAA[Any]) -T = TypeVar('T', bound=C) +T = TypeVar('T', bound=C) # E: Cannot redefine "T" as a type variable \ + # E: Invalid assignment target BAA = BA [file b.py] @@ -2246,14 +2240,9 @@ class B(Generic[T]): x: T class C: ... -x: B[int] +x: B[int] # E: Type argument "int" of "B" must be a subtype of "B[Any]" y: B[B[Any]] -reveal_type(y.x) -[out] -tmp/b.py:9: error: Type argument "int" of "B" must be a subtype of "B[Any]" -tmp/b.py:11: note: Revealed type is "b.B[Any]" -tmp/a.py:5: error: Cannot redefine "T" as a type variable -tmp/a.py:5: error: Invalid assignment target +reveal_type(y.x) # N: Revealed type is "b.B[Any]" [case testNewAnalyzerTypeVarBoundInCycle] import factory, box diff --git a/test-data/unit/check-plugin-attrs.test b/test-data/unit/check-plugin-attrs.test index 42f21e945ef0..91c53f0125a3 100644 --- a/test-data/unit/check-plugin-attrs.test +++ b/test-data/unit/check-plugin-attrs.test @@ -1861,10 +1861,10 @@ from typing import Callable @attr.s class C: - a = attr.ib(type=Lst[int]) + a = attr.ib(type=Lst[int]) # E: Name "Lst" is not defined # Note that for this test, the 'Value of type "int" is not indexable' errors are silly, # and a consequence of Callable etc. being set to an int in the test stub. - b = attr.ib(type=Callable[[], C]) + b = attr.ib(type=Callable[[], C]) # E: Value of type "int" is not indexable [file b.py] import attr import a @@ -1873,12 +1873,8 @@ from typing import List as Lst, Optional @attr.s class D: a = attr.ib(type=Lst[int]) - b = attr.ib(type=Optional[int]) + b = attr.ib(type=Optional[int]) # E: Value of type "int" is not indexable [builtins fixtures/list.pyi] -[out] -tmp/b.py:8: error: Value of type "int" is not indexable -tmp/a.py:7: error: Name "Lst" is not defined -tmp/a.py:10: error: Value of type "int" is not indexable [case testAttrsGenericInheritanceSpecialCase1] import attr diff --git a/test-data/unit/check-protocols.test b/test-data/unit/check-protocols.test index fd7f0c3449da..99fd94a69e4d 100644 --- a/test-data/unit/check-protocols.test +++ b/test-data/unit/check-protocols.test @@ -3890,7 +3890,7 @@ other_flag = False def update() -> str: ... [builtins fixtures/module.pyi] -[case testModuleAsProtocolImplementationClassObject] +[case testModuleAsProtocolImplementationClassObject_no_parallel] import runner import bad_runner from typing import Callable, Protocol @@ -3922,7 +3922,7 @@ class Run: def __init__(self, arg: str) -> None: ... [builtins fixtures/module.pyi] -[case testModuleAsProtocolImplementationTypeAlias] +[case testModuleAsProtocolImplementationTypeAlias_no_parallel] import runner import bad_runner from typing import Callable, Protocol diff --git a/test-data/unit/check-selftype.test b/test-data/unit/check-selftype.test index 89603efafddd..53754bf3c217 100644 --- a/test-data/unit/check-selftype.test +++ b/test-data/unit/check-selftype.test @@ -749,7 +749,7 @@ def bad(x: str) -> str: ... reveal_type(ci.from_item(conv)) # N: Revealed type is "builtins.str" ci.from_item(bad) # E: Argument 1 to "from_item" of "C" has incompatible type "Callable[[str], str]"; expected "Callable[[int], str]" -[case testSelfTypeRestrictedMethodOverloadInit] +[case testSelfTypeRestrictedMethodOverloadInit_no_parallel] from typing import TypeVar from lib import P, C diff --git a/test-data/unit/check-type-aliases.test b/test-data/unit/check-type-aliases.test index 6923b0d8f006..bae1f8d08fb5 100644 --- a/test-data/unit/check-type-aliases.test +++ b/test-data/unit/check-type-aliases.test @@ -1339,7 +1339,7 @@ a: A reveal_type(a) # N: Revealed type is "builtins.list[builtins.str]" [builtins fixtures/tuple.pyi] -[case testTypeAliasDict] +[case testTypeAliasDict_no_parallel] D = dict[str, int] d = D() reveal_type(d) # N: Revealed type is "builtins.dict[builtins.str, builtins.int]" From 518218ee21a09a6f8c0c0b66c69e198589a28c54 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 19 Nov 2025 17:41:45 +0000 Subject: [PATCH 10/21] Fix custom error codes --- mypy/build_worker/worker.py | 35 ++++++++++++++------ test-data/unit/check-plugin-error-codes.test | 4 +-- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index ca7d99ab1032..349c765117b2 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -9,6 +9,7 @@ import platform import sys import time +from typing import NamedTuple from mypy import util from mypy.build import ( @@ -36,6 +37,14 @@ CONNECTION_NAME = "build_worker" +class ServerContext(NamedTuple): + options: Options + disable_error_code: list[str] + enable_error_code: list[str] + errors: Errors + fscache: FileSystemCache + + def main(argv: list[str]) -> None: # Set recursion limit consistent with mypy/main.py sys.setrecursionlimit(RECURSION_LIMIT) @@ -46,6 +55,8 @@ def main(argv: list[str]) -> None: options_dict = pickle.loads(base64.b64decode(args.options_data)) options_obj = Options() + disable_error_code = options_dict.pop("disable_error_code", []) + enable_error_code = options_dict.pop("enable_error_code", []) options = options_obj.apply_changes(options_dict) status_file = args.status_file @@ -59,9 +70,10 @@ def main(argv: list[str]) -> None: cached_read = fscache.read errors = Errors(options, read_source=lambda path: read_py_file(path, cached_read)) + ctx = ServerContext(options, disable_error_code, enable_error_code, errors, fscache) try: with server: - serve(server, options, errors, fscache) + serve(server, ctx) except OSError: pass except Exception as exc: @@ -73,10 +85,10 @@ def main(argv: list[str]) -> None: util.hard_exit(0) -def serve(server: IPCServer, options: Options, errors: Errors, fscache: FileSystemCache) -> None: +def serve(server: IPCServer, ctx: ServerContext) -> None: data = receive(server) sources = [BuildSource(*st) for st in data["sources"]] - manager = setup_worker_manager(sources, options, errors, fscache) + manager = setup_worker_manager(sources, ctx) if manager is None: return @@ -125,19 +137,22 @@ def serve(server: IPCServer, options: Options, errors: Errors, fscache: FileSyst manager.add_stats(total_process_stale_time=time.time() - t0, stale_sccs_processed=1) -def setup_worker_manager( - sources: list[BuildSource], options: Options, errors: Errors, fscache: FileSystemCache -) -> BuildManager | None: +def setup_worker_manager(sources: list[BuildSource], ctx: ServerContext) -> BuildManager | None: data_dir = os.path.dirname(os.path.dirname(__file__)) alt_lib_path = os.environ.get("MYPY_ALT_LIB_PATH") - search_paths = compute_search_paths(sources, options, data_dir, alt_lib_path) + search_paths = compute_search_paths(sources, ctx.options, data_dir, alt_lib_path) source_set = BuildSourceSet(sources) try: - plugin, snapshot = load_plugins(options, errors, sys.stdout, []) + plugin, snapshot = load_plugins(ctx.options, ctx.errors, sys.stdout, []) except CompileError: return None + options = ctx.options + options.disable_error_code = ctx.disable_error_code + options.enable_error_code = ctx.enable_error_code + options.process_error_codes(error_callback=lambda msg: None) + def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool) -> None: pass @@ -151,10 +166,10 @@ def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool version_id=__version__, plugin=plugin, plugins_snapshot=snapshot, - errors=errors, + errors=ctx.errors, error_formatter=None, flush_errors=flush_errors, - fscache=fscache, + fscache=ctx.fscache, stdout=sys.stdout, stderr=sys.stderr, ) diff --git a/test-data/unit/check-plugin-error-codes.test b/test-data/unit/check-plugin-error-codes.test index 95789477977e..10f475b352e6 100644 --- a/test-data/unit/check-plugin-error-codes.test +++ b/test-data/unit/check-plugin-error-codes.test @@ -3,7 +3,7 @@ def main() -> None: return -main() # E: Custom error [custom] +main() # E: Custom error [custom] [file mypy.ini] \[mypy] @@ -25,7 +25,7 @@ plugins=/test-data/unit/plugins/custom_errorcode.py def main() -> None: return -main() # E: Custom error [custom] +main() # E: Custom error [custom] [file mypy.ini] \[mypy] From 2d70d59b2faf483937f4d0bcca208a33ee1a8b68 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 19 Nov 2025 21:21:41 +0000 Subject: [PATCH 11/21] Skip couple more tests for now --- test-data/unit/check-modules.test | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/test-data/unit/check-modules.test b/test-data/unit/check-modules.test index 9345d4d13777..a9070247c97d 100644 --- a/test-data/unit/check-modules.test +++ b/test-data/unit/check-modules.test @@ -947,21 +947,16 @@ accept_float(a.b.c.value) [file a/__init__.py] value = 3 -b.value -a.b.value +b.value # E: Name "b" is not defined +a.b.value # E: Name "a" is not defined [file a/b/__init__.py] value = "a" -c.value -a.b.c.value +c.value # E: Name "c" is not defined +a.b.c.value # E: Name "a" is not defined [file a/b/c.py] value = 3.2 -[out] -tmp/a/__init__.py:2: error: Name "b" is not defined -tmp/a/__init__.py:3: error: Name "a" is not defined -tmp/a/b/__init__.py:2: error: Name "c" is not defined -tmp/a/b/__init__.py:3: error: Name "a" is not defined [case testSubmoduleMixingLocalAndQualifiedNames] from a.b import MyClass @@ -3170,7 +3165,7 @@ from b import no_such_export [file b.py] from a import no_such_export # E: Module "a" has no attribute "no_such_export" -[case testCyclicUndefinedImportWithStar1] +[case testCyclicUndefinedImportWithStar1_no_parallel] import a [file a.py] from b import no_such_export @@ -3180,7 +3175,7 @@ from a import * tmp/b.py:1: error: Cannot resolve name "no_such_export" (possible cyclic definition) tmp/a.py:1: error: Module "b" has no attribute "no_such_export" -[case testCyclicUndefinedImportWithStar2] +[case testCyclicUndefinedImportWithStar2_no_parallel] import a [file a.py] from b import no_such_export From fa50b916b7c4e409872f003d103989d90f2a46aa Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Thu, 20 Nov 2025 00:11:51 +0000 Subject: [PATCH 12/21] Add CI jobs --- .github/workflows/test.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6fe825748073..11e6e116460d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -101,6 +101,18 @@ jobs: # tox_extra_args: "-n 4 mypyc/test/test_run.py mypyc/test/test_external.py" # debug_build: true + - name: Parallel tests with py314-ubuntu, interpreted + python: '3.14' + os: ubuntu-24.04-arm + toxenv: py + tox_extra_args: "-n 4 --mypy-num-workers=4 mypy/test/testcheck.py" + - name: Parallel tests with py314-ubuntu, mypyc-compiled + python: '3.14' + os: ubuntu-24.04-arm + toxenv: py + tox_extra_args: "-n 4 --mypy-num-workers=4 mypy/test/testcheck.py" + test_mypyc: true + - name: Type check our own code (py39-ubuntu) python: '3.9' os: ubuntu-latest From f69b81b19238b4325306363fbb7026def5829d55 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Thu, 20 Nov 2025 11:04:10 +0000 Subject: [PATCH 13/21] Undo unnecessary changes --- mypy/build.py | 2 -- pyproject.toml | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index fd209d5b47b4..3f2b05cbbf2e 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -122,8 +122,6 @@ Graph: _TypeAlias = dict[str, "State"] -t_import = time.time() - class SCC: """A simple class that represents a strongly connected component (import cycle).""" diff --git a/pyproject.toml b/pyproject.toml index 4f0d65fb1435..336a16c48979 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,6 @@ requires = [ # the following is from build-requirements.txt "types-psutil", "types-setuptools", - "psutil>=4.0", ] build-backend = "setuptools.build_meta" @@ -60,7 +59,7 @@ dependencies = [ dynamic = ["version"] [project.optional-dependencies] -dmypy = [] +dmypy = ["psutil>=4.0"] mypyc = ["setuptools>=50"] python2 = [] reports = ["lxml"] From 3e6ff9db2a52ad8461dee74108944b9b37812db0 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Thu, 20 Nov 2025 14:17:16 +0000 Subject: [PATCH 14/21] Some refactoring --- mypy/build.py | 165 +++++++++++++++--------------------- mypy/build_worker/worker.py | 14 +-- mypy/defaults.py | 7 ++ mypy/dmypy/client.py | 2 +- mypy/ipc.py | 47 +++++++++- mypy/main.py | 2 +- 6 files changed, 127 insertions(+), 110 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 3f2b05cbbf2e..ca25ab32b121 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -29,20 +29,25 @@ import types from collections.abc import Iterator, Mapping, Sequence, Set as AbstractSet from heapq import heappop, heappush -from select import select from typing import TYPE_CHECKING, Any, Callable, ClassVar, Final, NoReturn, TextIO, TypedDict from typing_extensions import TypeAlias as _TypeAlias from librt.internal import cache_version import mypy.semanal_main -from mypy.cache import CACHE_VERSION, CacheMeta, ReadBuffer, WriteBuffer, read_json, write_json +from mypy.cache import CACHE_VERSION, CacheMeta, ReadBuffer, WriteBuffer from mypy.checker import TypeChecker +from mypy.defaults import ( + WORKER_CONNECTION_TIMEOUT, + WORKER_DONE_TIMEOUT, + WORKER_START_INTERVAL, + WORKER_START_TIMEOUT, +) from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort from mypy.indirection import TypeIndirectionVisitor -from mypy.ipc import BadStatus, IPCBase, IPCClient, read_status +from mypy.ipc import BadStatus, IPCClient, read_status, ready_to_read, receive, send from mypy.messages import MessageBuilder from mypy.nodes import Import, ImportAll, ImportBase, ImportFrom, MypyFile, SymbolTable from mypy.partially_defined import PossiblyUndefinedVariableVisitor @@ -172,66 +177,13 @@ def __init__(self, manager: BuildManager, graph: Graph) -> None: self.errors: list[str] = [] # Filled in by build if desired -def receive(connection: IPCBase) -> dict[str, Any]: - """Receive single JSON data frame from a connection. - - Raise OSError if the data received is not valid JSON or if it is - not a dict. - """ - bdata = connection.read_bytes() - if not bdata: - raise OSError("No data received") - try: - buf = ReadBuffer(bdata) - data = read_json(buf) - except Exception as e: - raise OSError("Data received is not valid JSON dict") from e - return data - - -def send(connection: IPCBase, data: dict[str, Any]) -> None: - """Send data to a connection encoded and framed. - - The data must be JSON-serializable. We assume that a single send call is a - single frame to be sent on the connect. - """ - buf = WriteBuffer() - write_json(buf, data) - connection.write_bytes(buf.getvalue()) - - class WorkerClient: - def __init__(self, idx: int, conn: IPCClient, proc: subprocess.Popen[bytes]) -> None: - self.idx = idx + def __init__(self, status_file: str, conn: IPCClient, proc: subprocess.Popen[bytes]) -> None: + self.status_file = status_file self.conn = conn self.proc = proc -def wait_for_worker(status_file: str, timeout: float = 5.0) -> tuple[int, str]: - """Wait until the worker is up. - - Exit if it doesn't happen within the timeout. - """ - endtime = time.time() + timeout - while time.time() < endtime: - try: - data = read_status(status_file) - except BadStatus: - # If the file isn't there yet, retry later. - time.sleep(0.05) - continue - try: - pid = data["pid"] - connection_name = data["connection_name"] - assert isinstance(pid, int) and isinstance(connection_name, str) - return pid, connection_name - except Exception: - # If the file's content is bogus or the process is dead, fail. - pass - print("Worker process failed to start") - sys.exit(2) - - def start_worker(options_data: str, idx: int, env: Mapping[str, str]) -> subprocess.Popen[bytes]: status_file = f".mypy_worker.{idx}.json" if os.path.isfile(status_file): @@ -246,11 +198,31 @@ def start_worker(options_data: str, idx: int, env: Mapping[str, str]) -> subproc return subprocess.Popen(command, env=env) -def get_worker(idx: int, proc: subprocess.Popen[bytes]) -> WorkerClient: +def wait_for_worker(idx: int, proc: subprocess.Popen[bytes]) -> WorkerClient: + """Wait until the worker is up. + + Exit if it doesn't happen within the timeout. + """ status_file = f".mypy_worker.{idx}.json" - pid, connection_name = wait_for_worker(status_file) - assert pid == proc.pid - return WorkerClient(idx, IPCClient(connection_name, 10), proc) + endtime = time.time() + WORKER_START_TIMEOUT + while time.time() < endtime: + try: + data = read_status(status_file) + except BadStatus: + # If the file isn't there yet, retry later. + time.sleep(WORKER_START_INTERVAL) + continue + try: + pid, connection_name = data["pid"], data["connection_name"] + assert isinstance(pid, int) and isinstance(connection_name, str) + assert pid == proc.pid + return WorkerClient( + status_file, IPCClient(connection_name, WORKER_CONNECTION_TIMEOUT), proc + ) + except Exception: + break + print("Worker process failed to start") + sys.exit(2) def build_error(msg: str) -> NoReturn: @@ -308,14 +280,15 @@ def default_flush_errors( extra_plugins = extra_plugins or [] workers = [] - procs = [] if options.num_workers > 0: pickled_options = pickle.dumps(options.snapshot()) options_data = base64.b64encode(pickled_options).decode() - for i in range(options.num_workers): - procs.append(start_worker(options_data, i, worker_env or os.environ)) - for i, proc in enumerate(procs): - workers.append(get_worker(i, proc)) + procs = [ + start_worker(options_data, idx, worker_env or os.environ) + for idx in range(options.num_workers) + ] + for idx, proc in enumerate(procs): + workers.append(wait_for_worker(idx, proc)) for worker in workers: source_tuples = [(s.path, s.module, s.text, s.base_dir, s.followed) for s in sources] @@ -353,9 +326,8 @@ def default_flush_errors( for worker in workers: worker.conn.close() worker.proc.wait() - status_file = f".mypy_worker.{worker.idx}.json" - if os.path.isfile(status_file): - os.unlink(status_file) + if os.path.isfile(worker.status_file): + os.unlink(worker.status_file) def build_inner( @@ -1037,47 +1009,48 @@ def stats_summary(self) -> Mapping[str, object]: return self.stats def submit(self, sccs: list[SCC]) -> None: - """Submit a stale SCC for processing in current process.""" + """Submit a stale SCC for processing in current process or parallel workers.""" if self.workers: + self.submit_to_workers(sccs) + else: + self.scc_queue.extend([(0, 0, scc) for scc in sccs]) + + def submit_to_workers(self, sccs: list[SCC] | None = None) -> None: + if sccs is not None: for scc in sccs: heappush(self.scc_queue, (-scc.size_hint, self.queue_order, scc)) self.queue_order += 1 - else: - self.scc_queue.extend([(0, 0, scc) for scc in sccs]) while self.scc_queue and self.free_workers: - worker = self.free_workers.pop() - if self.workers: - _, _, scc = heappop(self.scc_queue) - else: - _, _, scc = self.scc_queue.pop(0) - send(self.workers[worker].conn, {"scc_id": scc.id}) + idx = self.free_workers.pop() + _, _, scc = heappop(self.scc_queue) + send(self.workers[idx].conn, {"scc_id": scc.id}) def wait_for_done( self, graph: Graph ) -> tuple[list[SCC], bool, dict[str, tuple[str, list[str]]]]: - """Wait for a stale SCC processing (in process) to finish. + """Wait for a stale SCC processing to finish. - Return next processed SCC and whether we have more in the queue. - This emulates the API we will have for parallel processing - in multiple worker processes. + Return a tuple three items: + * processed SCCs + * whether we have more in the queue + * new interface hash and list of errors for each module + The last item is only used for parallel processing. """ - if not self.workers: - if not self.scc_queue: - return [], False, {} - _, _, next_scc = self.scc_queue.pop(0) - process_stale_scc(graph, next_scc, self) - return [next_scc], bool(self.scc_queue), {} + if self.workers: + return self.wait_for_done_workers() + if not self.scc_queue: + return [], False, {} + _, _, next_scc = self.scc_queue.pop(0) + process_stale_scc(graph, next_scc, self) + return [next_scc], bool(self.scc_queue), {} + def wait_for_done_workers(self) -> tuple[list[SCC], bool, dict[str, tuple[str, list[str]]]]: if not self.scc_queue and len(self.free_workers) == len(self.workers): return [], False, {} - # TODO: don't select from free workers. - conns = [w.conn.connection for w in self.workers] - ready, _, _ = select(conns, [], [], 100) done_sccs = [] results = {} - for r in ready: - idx = conns.index(r) + for idx in ready_to_read([w.conn for w in self.workers], WORKER_DONE_TIMEOUT): data = receive(self.workers[idx].conn) self.free_workers.add(idx) scc_id = data["scc_id"] @@ -1088,7 +1061,7 @@ def wait_for_done( ) results.update({k: tuple(v) for k, v in data["result"].items()}) done_sccs.append(self.scc_by_id[scc_id]) - self.submit([]) # advance after some workers are free. + self.submit_to_workers() # advance after some workers are free. return ( done_sccs, bool(self.scc_queue) or len(self.free_workers) < len(self.workers), @@ -3534,7 +3507,7 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: data = receive(worker.conn) assert data["status"] == "ok" - manager.free_workers = {w.idx for w in manager.workers} + manager.free_workers = set(range(manager.options.num_workers)) # Prime the ready list with leaf SCCs (that have no dependencies). ready = [] diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index 349c765117b2..1fd98fdd061c 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -12,19 +12,11 @@ from typing import NamedTuple from mypy import util -from mypy.build import ( - SCC, - BuildManager, - load_graph, - load_plugins, - process_stale_scc, - receive, - send, -) +from mypy.build import SCC, BuildManager, load_graph, load_plugins, process_stale_scc +from mypy.defaults import RECURSION_LIMIT from mypy.errors import CompileError, Errors, report_internal_error from mypy.fscache import FileSystemCache -from mypy.ipc import IPCServer -from mypy.main import RECURSION_LIMIT +from mypy.ipc import IPCServer, receive, send from mypy.modulefinder import BuildSource, BuildSourceSet, compute_search_paths from mypy.options import Options from mypy.util import read_py_file diff --git a/mypy/defaults.py b/mypy/defaults.py index 58a74a478b16..fe67c92da46b 100644 --- a/mypy/defaults.py +++ b/mypy/defaults.py @@ -42,3 +42,10 @@ # Threshold after which we sometimes filter out most errors to avoid very # verbose output. The default is to show all errors. MANY_ERRORS_THRESHOLD: Final = -1 + +RECURSION_LIMIT: Final = 2**14 + +WORKER_START_INTERVAL: Final = 0.03 +WORKER_START_TIMEOUT: Final = 3 +WORKER_CONNECTION_TIMEOUT: Final = 10 +WORKER_DONE_TIMEOUT: Final = 600 diff --git a/mypy/dmypy/client.py b/mypy/dmypy/client.py index 302c2484fd9e..4af39e7d27ad 100644 --- a/mypy/dmypy/client.py +++ b/mypy/dmypy/client.py @@ -17,10 +17,10 @@ from collections.abc import Mapping from typing import Any, Callable, NoReturn +from mypy.defaults import RECURSION_LIMIT from mypy.dmypy_os import alive, kill from mypy.dmypy_util import DEFAULT_STATUS_FILE, receive, send from mypy.ipc import BadStatus, IPCClient, IPCException, read_status -from mypy.main import RECURSION_LIMIT from mypy.util import check_python_version, get_terminal_width, should_force_color from mypy.version import __version__ diff --git a/mypy/ipc.py b/mypy/ipc.py index 57ab4665bc44..ee73dcdc8c07 100644 --- a/mypy/ipc.py +++ b/mypy/ipc.py @@ -13,8 +13,13 @@ import shutil import sys import tempfile +from select import select from types import TracebackType -from typing import Callable, Final +from typing import Any, Callable, Final + +from librt.internal import ReadBuffer, WriteBuffer + +from mypy.cache import read_json, write_json if sys.platform == "win32": # This may be private, but it is needed for IPC on Windows, and is basically stable @@ -346,3 +351,43 @@ def read_status(status_file: str) -> dict[str, object]: if not isinstance(data, dict): raise BadStatus("Invalid status file (not a dict)") return data + + +def ready_to_read(conns: list[IPCClient], timeout: float | None = None) -> list[int]: + """Wait until some connections are readable. + + Return index of each readable connection in the original list. + """ + # TODO: add Windows support for this. + assert sys.platform != "win32" + connections = [conn.connection for conn in conns] + ready, _, _ = select(connections, [], [], timeout) + return [connections.index(r) for r in ready] + + +def receive(connection: IPCBase) -> dict[str, Any]: + """Receive single JSON data frame from a connection. + + Raise OSError if the data received is not valid JSON or if it is + not a dict. + """ + bdata = connection.read_bytes() + if not bdata: + raise OSError("No data received") + try: + buf = ReadBuffer(bdata) + data = read_json(buf) + except Exception as e: + raise OSError("Data received is not valid JSON dict") from e + return data + + +def send(connection: IPCBase, data: dict[str, Any]) -> None: + """Send data to a connection encoded and framed. + + The data must be JSON-serializable. We assume that a single send call is a + single frame to be sent on the connect. + """ + buf = WriteBuffer() + write_json(buf, data) + connection.write_bytes(buf.getvalue()) diff --git a/mypy/main.py b/mypy/main.py index 22bd4cea7d47..4077461802c7 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -20,6 +20,7 @@ parse_version, validate_package_allow_list, ) +from mypy.defaults import RECURSION_LIMIT from mypy.error_formatter import OUTPUT_CHOICES from mypy.errors import CompileError from mypy.find_sources import InvalidSourceList, create_source_list @@ -42,7 +43,6 @@ orig_stat: Final = os.stat MEM_PROFILE: Final = False # If True, dump memory profile -RECURSION_LIMIT: Final = 2**14 def stat_proxy(path: str) -> os.stat_result: From f70123a53fbcec0b164be524b8537ef93f2cf4c7 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Thu, 20 Nov 2025 16:59:13 +0000 Subject: [PATCH 15/21] Add some docs/comments --- mypy/build.py | 201 ++++++++++++++++++++---------------- mypy/build_worker/worker.py | 40 +++++-- mypy/ipc.py | 26 ++--- mypy/test/testcheck.py | 3 + 4 files changed, 159 insertions(+), 111 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index ca25ab32b121..b90c1dea3531 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -123,6 +123,8 @@ # We are careful now, we can increase this in future if safe/useful. MAX_GC_FREEZE_CYCLES = 1 +# We store status of initial GC freeze as a global variable to avoid memory +# leaks in tests, where we keep creating new BuildManagers in the same process. initial_gc_freeze_done = False Graph: _TypeAlias = dict[str, "State"] @@ -153,6 +155,8 @@ def __init__( # make processing order more predictable. Dependents will be notified # that they may be ready in the order in this list. self.direct_dependents: list[int] = [] + # Rough estimate of how much time processing this SCC will take, this + # is used for more efficient scheduling across multiple build workers. self.size_hint: int = 0 @@ -178,51 +182,54 @@ def __init__(self, manager: BuildManager, graph: Graph) -> None: class WorkerClient: - def __init__(self, status_file: str, conn: IPCClient, proc: subprocess.Popen[bytes]) -> None: - self.status_file = status_file - self.conn = conn - self.proc = proc - - -def start_worker(options_data: str, idx: int, env: Mapping[str, str]) -> subprocess.Popen[bytes]: - status_file = f".mypy_worker.{idx}.json" - if os.path.isfile(status_file): - os.unlink(status_file) - command = [ - sys.executable, - "-m", - "mypy.build_worker", - f"--status-file={status_file}", - f'--options-data="{options_data}"', - ] - return subprocess.Popen(command, env=env) + """A simple class that represents a mypy build worker.""" + conn: IPCClient -def wait_for_worker(idx: int, proc: subprocess.Popen[bytes]) -> WorkerClient: - """Wait until the worker is up. + def __init__(self, status_file: str, options_data: str, env: Mapping[str, str]) -> None: + self.status_file = status_file + if os.path.isfile(status_file): + os.unlink(status_file) + + command = [ + sys.executable, + "-m", + "mypy.build_worker", + f"--status-file={status_file}", + f'--options-data="{options_data}"', + ] + # Return early without waiting, caller must call connect() before using the client. + self.proc = subprocess.Popen(command, env=env) - Exit if it doesn't happen within the timeout. - """ - status_file = f".mypy_worker.{idx}.json" - endtime = time.time() + WORKER_START_TIMEOUT - while time.time() < endtime: - try: - data = read_status(status_file) - except BadStatus: - # If the file isn't there yet, retry later. - time.sleep(WORKER_START_INTERVAL) - continue + def connect(self) -> None: + end_time = time.time() + WORKER_START_TIMEOUT + while time.time() < end_time: + try: + data = read_status(self.status_file) + except BadStatus: + time.sleep(WORKER_START_INTERVAL) + continue + try: + pid, connection_name = data["pid"], data["connection_name"] + assert isinstance(pid, int) and isinstance(connection_name, str) + # Double-check this status file is created by us. + assert pid == self.proc.pid + self.conn = IPCClient(connection_name, WORKER_CONNECTION_TIMEOUT) + return + except Exception: + break + print("Failed to establish connection with worker") + sys.exit(2) + + def close(self) -> None: + self.conn.close() + # Technically we don't need to wait, but otherwise we will get ResourceWarnings. try: - pid, connection_name = data["pid"], data["connection_name"] - assert isinstance(pid, int) and isinstance(connection_name, str) - assert pid == proc.pid - return WorkerClient( - status_file, IPCClient(connection_name, WORKER_CONNECTION_TIMEOUT), proc - ) - except Exception: - break - print("Worker process failed to start") - sys.exit(2) + self.proc.wait(timeout=1) + except subprocess.TimeoutExpired: + pass + if os.path.isfile(self.status_file): + os.unlink(self.status_file) def build_error(msg: str) -> NoReturn: @@ -260,7 +267,7 @@ def build( (takes precedence over other directories) flush_errors: optional function to flush errors after a file is processed fscache: optionally a file-system cacher - + worker_env: An environment to start parallel build workers (used for tests) """ # If we were not given a flush_errors, we use one that will populate those # fields for callers that want the traditional API. @@ -283,16 +290,15 @@ def default_flush_errors( if options.num_workers > 0: pickled_options = pickle.dumps(options.snapshot()) options_data = base64.b64encode(pickled_options).decode() - procs = [ - start_worker(options_data, idx, worker_env or os.environ) + workers = [ + WorkerClient(f".mypy_worker.{idx}.json", options_data, worker_env or os.environ) for idx in range(options.num_workers) ] - for idx, proc in enumerate(procs): - workers.append(wait_for_worker(idx, proc)) - - for worker in workers: - source_tuples = [(s.path, s.module, s.text, s.base_dir, s.followed) for s in sources] - send(worker.conn, {"sources": source_tuples}) + for worker in workers: + # Start loading graph in each worker as soon as it is up. + worker.connect() + source_tuples = [(s.path, s.module, s.text, s.base_dir, s.followed) for s in sources] + send(worker.conn, {"sources": source_tuples}) try: result = build_inner( @@ -324,10 +330,7 @@ def default_flush_errors( except OSError: pass for worker in workers: - worker.conn.close() - worker.proc.wait() - if os.path.isfile(worker.status_file): - os.unlink(worker.status_file) + worker.close() def build_inner( @@ -800,15 +803,21 @@ def __init__( # Global topological order for SCCs. This exists to make order of processing # SCCs more predictable. self.top_order: list[int] = [] - # Stale SCCs that are queued for processing. Note that as of now we have just - # one worker, that is the same process. In the future, we will support multiple - # parallel worker processes. + # Stale SCCs that are queued for processing. Each tuple contains SCC size hint, + # SCC adding order (tie-breaker), and the SCC itself. self.scc_queue: list[tuple[int, int, SCC]] = [] # SCCs that have been fully processed. self.done_sccs: set[int] = set() + # Parallel build workers, list is empty for in-process type-checking. self.workers: list[WorkerClient] = [] + # We track which workers are currently free in the coordinator process. + # This is a tiny bit faster and conceptually simpler than check which ones + # are writeable each time we want to submit an SCC for processing. self.free_workers: set[int] = set() + # A global adding order for SCC queue, see comment above. self.queue_order: int = 0 + # Is this an instance used by a parallel worker? + self.parallel_worker = False def dump_stats(self) -> None: if self.options.dump_build_stats: @@ -1465,11 +1474,11 @@ def find_cache_meta( A CacheMeta instance if the cache data was found and appears valid; otherwise None. """ - t0 = time.time() # TODO: May need to take more build options into account meta_file, data_file, _ = get_cache_names(id, path, manager.options) manager.trace(f"Looking for {id} at {meta_file}") meta: bytes | dict[str, Any] | None + t0 = time.time() if manager.options.fixed_format_cache: meta = _load_ff_file(meta_file, manager, log_error=f"Could not load cache for {id}: ") if meta is None: @@ -2061,6 +2070,10 @@ class State: # on a given source code line). per_line_checking_time_ns: dict[int, int] + # Rough estimate of how much time it would take to process this file. Currently, + # we use file size as a proxy for complexity. + size_hint: int + def __init__( self, id: str | None, @@ -2176,9 +2189,17 @@ def __init__( self.parse_file(temporary=temporary) self.compute_dependencies() if self.manager.workers: + # We don't need parsed trees in coordinator process, we parse only to + # compute dependencies. self.tree = None def reload_meta(self) -> None: + """Force reload of cache meta. + + This is used by parallel checking workers to update shared information + that may ave changed after initial graph loading. Currently, this is only + the interface hash. + """ assert self.path is not None self.meta = find_cache_meta(self.id, self.path, self.manager, skip_validation=True) assert self.meta is not None @@ -2258,12 +2279,14 @@ def load_fine_grained_deps(self) -> dict[str, set[str]]: return self.manager.load_fine_grained_deps(self.id) def load_tree(self, temporary: bool = False) -> None: - # assert ( - # self.meta is not None - # ), "Internal error: this method must be called only for cached modules" - - assert self.path is not None - _, data_file, _ = get_cache_names(self.id, self.path, self.manager.options) + if self.manager.parallel_worker: + assert self.path is not None + _, data_file, _ = get_cache_names(self.id, self.path, self.manager.options) + else: + assert ( + self.meta is not None + ), "Internal error: this method must be called only for cached modules" + data_file = self.meta.data_file data: bytes | dict[str, Any] | None if self.options.fixed_format_cache: @@ -3054,6 +3077,10 @@ def dispatch(sources: list[BuildSource], manager: BuildManager, stdout: TextIO) t0 = time.time() + # We disable GC while loading the graph as a performance optimization for + # cold-cache runs. The parsed ASTs are trees, and therefore should not have any + # reference cycles. This is an important optimization, since we create a lot of + # new objects while parsing files. global initial_gc_freeze_done if ( not manager.options.test_env @@ -3434,7 +3461,6 @@ def find_stale_sccs( * The interface hashes of direct dependents matches those recorded in the cache. The first and second conditions are verified by is_fresh(). """ - t0 = time.time() stale_sccs = [] fresh_sccs = [] for ascc in sccs: @@ -3483,7 +3509,6 @@ def find_stale_sccs( else: manager.log("Scheduling SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg)) stale_sccs.append(ascc) - manager.add_stats(find_stale_time=time.time() - t0) return stale_sccs, fresh_sccs @@ -3498,11 +3523,11 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: manager.scc_by_id = scc_by_id manager.top_order = [scc.id for scc in sccs] + # Broadcast SCC structure to the parallel workers, since they don't compute it. for worker in manager.workers: data = receive(worker.conn) assert data["status"] == "ok" send(worker.conn, {"sccs": [(list(scc.mod_ids), scc.id, list(scc.deps)) for scc in sccs]}) - for worker in manager.workers: data = receive(worker.conn) assert data["status"] == "ok" @@ -3535,6 +3560,10 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: done = fresh else: done, still_working, results = manager.wait_for_done(graph) + # Expose the results of type-checking by workers. For in-process + # type-checking this is already done and results should be empty here. + if not manager.workers: + assert not results for id, (interface_cache, errors) in results.items(): new_hash = bytes.fromhex(interface_cache) if new_hash != graph[id].interface_hash: @@ -3542,14 +3571,12 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: graph[id].interface_hash = new_hash manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False) ready = [] - t0 = time.time() for done_scc in done: for dependent in done_scc.direct_dependents: scc_by_id[dependent].not_ready_deps.discard(done_scc.id) if not scc_by_id[dependent].not_ready_deps: not_ready.remove(scc_by_id[dependent]) ready.append(scc_by_id[dependent]) - manager.add_stats(notify_dependents_time=time.time() - t0) def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_INDIRECT) -> list[str]: @@ -3630,19 +3657,19 @@ def process_stale_scc( missing_sccs.add(dep_scc) sccs_to_find.update(manager.scc_by_id[dep_scc].deps) - t1 = time.time() if missing_sccs: # Load missing SCCs from cache. # TODO: speed-up ordering if this causes problems for large builds. - ts = time.time() fresh_sccs_to_load = [ manager.scc_by_id[sid] for sid in manager.top_order if sid in missing_sccs ] - manager.add_stats(fresh_order_time=time.time() - ts) - for prev_scc in fresh_sccs_to_load: - for mod_id in prev_scc.mod_ids: - graph[mod_id].reload_meta() + if manager.parallel_worker: + # Update cache metas as well, cache data is loaded below + # in process_fresh_modules(). + for prev_scc in fresh_sccs_to_load: + for mod_id in prev_scc.mod_ids: + graph[mod_id].reload_meta() manager.log(f"Processing {len(fresh_sccs_to_load)} fresh SCCs") if ( @@ -3657,11 +3684,9 @@ def process_stale_scc( # generation with the freeze()/unfreeze() trick below. This is arguably # a hack, but it gives huge performance wins for large third-party # libraries, like torch. - tc = time.time() if manager.gc_freeze_cycles > 0: gc.collect() gc.disable() - manager.add_stats(gc_pre_freeze_time=time.time() - tc) for prev_scc in fresh_sccs_to_load: manager.done_sccs.add(prev_scc.id) process_fresh_modules(graph, sorted(prev_scc.mod_ids), manager) @@ -3671,17 +3696,15 @@ def process_stale_scc( and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES ): manager.gc_freeze_cycles += 1 - tc = time.time() gc.freeze() gc.unfreeze() gc.enable() - manager.add_stats(gc_post_freeze_time=time.time() - tc) - t2 = time.time() + t1 = time.time() # Process the SCC in stable order. scc = order_ascc_ex(graph, ascc) - t3 = time.time() + t2 = time.time() stale = scc for id in stale: # We may already have parsed the module, or not. @@ -3695,7 +3718,7 @@ def process_stale_scc( assert typing_mod, "The typing module was not parsed" mypy.semanal_main.semantic_analysis_for_scc(graph, scc, manager.errors) - t4 = time.time() + t3 = time.time() # Track what modules aren't yet done, so we can finish them as soon # as possible, saving memory. unfinished_modules = set(stale) @@ -3718,7 +3741,7 @@ def process_stale_scc( graph[id].generate_unused_ignore_notes() graph[id].generate_ignore_without_code_notes() - t5 = time.time() + t4 = time.time() # Flush errors, and write cache in two phases: first data files, then meta files. meta_tuples = {} errors_by_id = {} @@ -3730,7 +3753,6 @@ def process_stale_scc( manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False) errors_by_id[id] = errors meta_tuples[id] = graph[id].write_cache() - graph[id].mark_as_rechecked() for id in stale: meta_tuple = meta_tuples[id] if meta_tuple is None: @@ -3741,12 +3763,11 @@ def process_stale_scc( write_cache_meta(meta, manager, meta_file) manager.done_sccs.add(ascc.id) manager.add_stats( - find_missing_time=t1 - t0, - load_missing_time=t2 - t1, - order_scc_time=t3 - t2, - semanal_time=t4 - t3, - type_check_time=t5 - t4, - flush_and_cache_time=time.time() - t5, + load_missing_time=t1 - t0, + order_scc_time=t2 - t1, + semanal_time=t3 - t2, + type_check_time=t4 - t3, + flush_and_cache_time=time.time() - t4, ) scc_result = {} for id in scc: diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index 1fd98fdd061c..208b9a1f4704 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -1,3 +1,16 @@ +""" +Mypy parallel build worker. + +The protocol of communication with the coordinator is as following: +* Read (pickled) build options from command line. +* Populate status file with pid and socket address. +* Receive build sources from coordinator. +* Load graph using the sources, and send "ok" to coordinator. +* Receive SCC structure from coordinator, and ack it with an "ok". +* Receive an SCC id from coordinator, process it, and send back the results. +* When prompted by coordinator (with s "final" message), cleanup and shutdown. +""" + from __future__ import annotations import argparse @@ -13,7 +26,7 @@ from mypy import util from mypy.build import SCC, BuildManager, load_graph, load_plugins, process_stale_scc -from mypy.defaults import RECURSION_LIMIT +from mypy.defaults import RECURSION_LIMIT, WORKER_CONNECTION_TIMEOUT from mypy.errors import CompileError, Errors, report_internal_error from mypy.fscache import FileSystemCache from mypy.ipc import IPCServer, receive, send @@ -38,13 +51,16 @@ class ServerContext(NamedTuple): def main(argv: list[str]) -> None: - # Set recursion limit consistent with mypy/main.py + # Set recursion limit and GC thresholds consistent with mypy/main.py sys.setrecursionlimit(RECURSION_LIMIT) if platform.python_implementation() == "CPython": gc.set_threshold(200 * 1000, 30, 30) args = parser.parse_args(argv) + # This mimics how daemon receives the options. Note we need to postpone + # processing error codes after plugins are loaded, because plugins can add + # custom error codes. options_dict = pickle.loads(base64.b64decode(args.options_data)) options_obj = Options() disable_error_code = options_dict.pop("disable_error_code", []) @@ -52,7 +68,7 @@ def main(argv: list[str]) -> None: options = options_obj.apply_changes(options_dict) status_file = args.status_file - server = IPCServer(CONNECTION_NAME, 10) + server = IPCServer(CONNECTION_NAME, WORKER_CONNECTION_TIMEOUT) with open(status_file, "w") as f: json.dump({"pid": os.getpid(), "connection_name": server.connection_name}, f) @@ -74,6 +90,7 @@ def main(argv: list[str]) -> None: server.cleanup() if options.fast_exit: + # Exit fast if allowed, since coordinator is waiting on us. util.hard_exit(0) @@ -84,29 +101,30 @@ def serve(server: IPCServer, ctx: ServerContext) -> None: if manager is None: return + # Mirror the GC freeze hack in the coordinator. if platform.python_implementation() == "CPython": gc.disable() try: graph = load_graph(sources, manager) except CompileError: + # CompileError during loading will be reported by the coordinator. return if platform.python_implementation() == "CPython": gc.freeze() gc.unfreeze() gc.enable() - for id in graph: manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) - send(server, {"status": "ok"}) + # Notify worker we are done loading graph. + send(server, {"status": "ok"}) data = receive(server) sccs = [SCC(set(mod_ids), scc_id, deps) for (mod_ids, scc_id, deps) in data["sccs"]] - manager.scc_by_id = {scc.id: scc for scc in sccs} manager.top_order = [scc.id for scc in sccs] + # Notify coordinator we are ready to process SCCs. send(server, {"status": "ok"}) - while True: data = receive(server) if "final" in data: @@ -131,6 +149,7 @@ def serve(server: IPCServer, ctx: ServerContext) -> None: def setup_worker_manager(sources: list[BuildSource], ctx: ServerContext) -> BuildManager | None: data_dir = os.path.dirname(os.path.dirname(__file__)) + # This is used for testing only now. alt_lib_path = os.environ.get("MYPY_ALT_LIB_PATH") search_paths = compute_search_paths(sources, ctx.options, data_dir, alt_lib_path) @@ -138,17 +157,20 @@ def setup_worker_manager(sources: list[BuildSource], ctx: ServerContext) -> Buil try: plugin, snapshot = load_plugins(ctx.options, ctx.errors, sys.stdout, []) except CompileError: + # CompileError while importing plugins will be reported by the coordinator. return None + # Process the rest of the options when plugins are loaded. options = ctx.options options.disable_error_code = ctx.disable_error_code options.enable_error_code = ctx.enable_error_code options.process_error_codes(error_callback=lambda msg: None) def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool) -> None: + # We never flush errors in the worker, we send them back to coordinator. pass - return BuildManager( + manager = BuildManager( data_dir, search_paths, ignore_prefix=os.getcwd(), @@ -165,6 +187,8 @@ def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool stdout=sys.stdout, stderr=sys.stderr, ) + manager.parallel_worker = True + return manager def console_entry() -> None: diff --git a/mypy/ipc.py b/mypy/ipc.py index ee73dcdc8c07..e239a4dfe559 100644 --- a/mypy/ipc.py +++ b/mypy/ipc.py @@ -365,11 +365,22 @@ def ready_to_read(conns: list[IPCClient], timeout: float | None = None) -> list[ return [connections.index(r) for r in ready] +# TODO: switch send() and receive() to proper fixed binary format. +def send(connection: IPCBase, data: dict[str, Any]) -> None: + """Send data to a connection encoded and framed. + + The data must be a JSON object. We assume that a single send call is a + single frame to be sent. + """ + buf = WriteBuffer() + write_json(buf, data) + connection.write_bytes(buf.getvalue()) + + def receive(connection: IPCBase) -> dict[str, Any]: """Receive single JSON data frame from a connection. - Raise OSError if the data received is not valid JSON or if it is - not a dict. + Raise OSError if the data received is not valid. """ bdata = connection.read_bytes() if not bdata: @@ -380,14 +391,3 @@ def receive(connection: IPCBase) -> dict[str, Any]: except Exception as e: raise OSError("Data received is not valid JSON dict") from e return data - - -def send(connection: IPCBase, data: dict[str, Any]) -> None: - """Send data to a connection encoded and framed. - - The data must be JSON-serializable. We assume that a single send call is a - single frame to be sent on the connect. - """ - buf = WriteBuffer() - write_json(buf, data) - connection.write_bytes(buf.getvalue()) diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index 24874ac74218..37c348d044c0 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -172,6 +172,8 @@ def run_case_once( worker_env = None if options.num_workers > 0: worker_env = os.environ.copy() + # Make sure we are running tests with current worktree files, *not* with + # an installed version of mypy. root_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) worker_env["PYTHONPATH"] = os.pathsep.join([root_dir, plugin_dir]) worker_env["MYPY_TEST_PREFIX"] = root_dir @@ -215,6 +217,7 @@ def run_case_once( update_testcase_output(testcase, a, incremental_step=incremental_step) if options.num_workers > 0: + # TypeVarIds are not stable in parallel checking, normalize. a = remove_typevar_ids(a) output = remove_typevar_ids(output) assert_string_arrays_equal(output, a, msg.format(testcase.file, testcase.line)) From 42a530deddf5297b703b2fe41263f0f80a753359 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Thu, 20 Nov 2025 23:42:16 +0000 Subject: [PATCH 16/21] One more comment; fix a typo --- mypy/build.py | 2 +- test-data/unit/lib-stub/typing_extensions.pyi | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index b90c1dea3531..648fcacc73a1 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -2197,7 +2197,7 @@ def reload_meta(self) -> None: """Force reload of cache meta. This is used by parallel checking workers to update shared information - that may ave changed after initial graph loading. Currently, this is only + that may have changed after initial graph loading. Currently, this is only the interface hash. """ assert self.path is not None diff --git a/test-data/unit/lib-stub/typing_extensions.pyi b/test-data/unit/lib-stub/typing_extensions.pyi index 6b529d8a7c41..43b5ef3a0950 100644 --- a/test-data/unit/lib-stub/typing_extensions.pyi +++ b/test-data/unit/lib-stub/typing_extensions.pyi @@ -1,3 +1,5 @@ +# Do not delete this import. Dependency structure of core modules for fixtures should match +# the real dependency structure, otherwise things like add_typing_extension_aliases() will break. import collections import typing from typing import Any, Callable, Mapping, Iterable, Iterator, NoReturn as NoReturn, Dict, Tuple, Type, Union From d63a2fcae851176310575f33ac328559481f82d1 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Sat, 22 Nov 2025 19:35:02 +0000 Subject: [PATCH 17/21] Address CR --- mypy/build.py | 24 ++++++++++++++++++++---- mypy/build_worker/worker.py | 2 +- mypy/defaults.py | 2 +- mypy/main.py | 4 +++- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 648fcacc73a1..f137d4897224 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -35,7 +35,7 @@ from librt.internal import cache_version import mypy.semanal_main -from mypy.cache import CACHE_VERSION, CacheMeta, ReadBuffer, WriteBuffer +from mypy.cache import CACHE_VERSION, CacheMeta, ReadBuffer, WriteBuffer, write_json from mypy.checker import TypeChecker from mypy.defaults import ( WORKER_CONNECTION_TIMEOUT, @@ -288,17 +288,18 @@ def default_flush_errors( workers = [] if options.num_workers > 0: + # TODO: switch to something more efficient than pickle (also in the daemon). pickled_options = pickle.dumps(options.snapshot()) options_data = base64.b64encode(pickled_options).decode() workers = [ WorkerClient(f".mypy_worker.{idx}.json", options_data, worker_env or os.environ) for idx in range(options.num_workers) ] + sources_data = sources_to_bytes(sources) for worker in workers: # Start loading graph in each worker as soon as it is up. worker.connect() - source_tuples = [(s.path, s.module, s.text, s.base_dir, s.followed) for s in sources] - send(worker.conn, {"sources": source_tuples}) + worker.conn.write_bytes(sources_data) try: result = build_inner( @@ -3524,10 +3525,11 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: manager.top_order = [scc.id for scc in sccs] # Broadcast SCC structure to the parallel workers, since they don't compute it. + sccs_data = sccs_to_bytes(sccs) for worker in manager.workers: data = receive(worker.conn) assert data["status"] == "ok" - send(worker.conn, {"sccs": [(list(scc.mod_ids), scc.id, list(scc.deps)) for scc in sccs]}) + worker.conn.write_bytes(sccs_data) for worker in manager.workers: data = receive(worker.conn) assert data["status"] == "ok" @@ -3904,3 +3906,17 @@ def write_undocumented_ref_info( deps_json = get_undocumented_ref_info_json(state.tree, type_map) metastore.write(ref_info_file, json_dumps(deps_json)) + + +def sources_to_bytes(sources: list[BuildSource]) -> bytes: + source_tuples = [(s.path, s.module, s.text, s.base_dir, s.followed) for s in sources] + buf = WriteBuffer() + write_json(buf, {"sources": source_tuples}) + return buf.getvalue() + + +def sccs_to_bytes(sccs: list[SCC]) -> bytes: + scc_tuples = [(list(scc.mod_ids), scc.id, list(scc.deps)) for scc in sccs] + buf = WriteBuffer() + write_json(buf, {"sccs": scc_tuples}) + return buf.getvalue() diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index 208b9a1f4704..04311fc06324 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -8,7 +8,7 @@ * Load graph using the sources, and send "ok" to coordinator. * Receive SCC structure from coordinator, and ack it with an "ok". * Receive an SCC id from coordinator, process it, and send back the results. -* When prompted by coordinator (with s "final" message), cleanup and shutdown. +* When prompted by coordinator (with a "final" message), cleanup and shutdown. """ from __future__ import annotations diff --git a/mypy/defaults.py b/mypy/defaults.py index fe67c92da46b..2167449fa7d2 100644 --- a/mypy/defaults.py +++ b/mypy/defaults.py @@ -45,7 +45,7 @@ RECURSION_LIMIT: Final = 2**14 -WORKER_START_INTERVAL: Final = 0.03 +WORKER_START_INTERVAL: Final = 0.01 WORKER_START_TIMEOUT: Final = 3 WORKER_CONNECTION_TIMEOUT: Final = 10 WORKER_DONE_TIMEOUT: Final = 600 diff --git a/mypy/main.py b/mypy/main.py index 4077461802c7..0217867d7aa6 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -1153,7 +1153,9 @@ def add_invertible_flag( internals_group.add_argument("--export-ref-info", action="store_true", help=argparse.SUPPRESS) # Experimental parallel type-checking support. - internals_group.add_argument("--num-workers", type=int, default=0, help=argparse.SUPPRESS) + internals_group.add_argument( + "-n", "--num-workers", type=int, default=0, help=argparse.SUPPRESS + ) report_group = parser.add_argument_group( title="Report generation", description="Generate a report in the specified format." From 1cb50ddec97e6591907cfe2b3d241903d19dcfe3 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Thu, 27 Nov 2025 23:43:29 +0000 Subject: [PATCH 18/21] Fix --sqlite-cache --- mypy/build.py | 12 ++++++++---- mypy/build_worker/worker.py | 7 ++++--- mypy/metastore.py | 11 ++++++++--- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index f137d4897224..bffcb6362bbf 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -696,6 +696,7 @@ def __init__( stdout: TextIO, stderr: TextIO, error_formatter: ErrorFormatter | None = None, + parallel_worker: bool = False, ) -> None: self.stats: dict[str, Any] = {} # Values are ints or floats self.stdout = stdout @@ -769,7 +770,7 @@ def __init__( ] ) - self.metastore = create_metastore(options) + self.metastore = create_metastore(options, parallel_worker) # a mapping from source files to their corresponding shadow files # for efficient lookup @@ -818,7 +819,7 @@ def __init__( # A global adding order for SCC queue, see comment above. self.queue_order: int = 0 # Is this an instance used by a parallel worker? - self.parallel_worker = False + self.parallel_worker = parallel_worker def dump_stats(self) -> None: if self.options.dump_build_stats: @@ -1397,10 +1398,13 @@ def exclude_from_backups(target_dir: str) -> None: pass -def create_metastore(options: Options) -> MetadataStore: +def create_metastore(options: Options, parallel_worker: bool = False) -> MetadataStore: """Create the appropriate metadata store.""" if options.sqlite_cache: - mds: MetadataStore = SqliteMetadataStore(_cache_dir_prefix(options)) + # We use this flag in both coordinator and workers to seep up commits, + # see mypy.metastore.connect_db() for details. + sync_off = options.num_workers > 0 or parallel_worker + mds: MetadataStore = SqliteMetadataStore(_cache_dir_prefix(options), sync_off=sync_off) else: mds = FilesystemMetadataStore(_cache_dir_prefix(options)) return mds diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py index 04311fc06324..3af34411b729 100644 --- a/mypy/build_worker/worker.py +++ b/mypy/build_worker/worker.py @@ -135,6 +135,8 @@ def serve(server: IPCServer, ctx: ServerContext) -> None: t0 = time.time() try: result = process_stale_scc(graph, scc, manager) + # We must commit after each SCC, otherwise we break --sqlite-cache. + manager.metastore.commit() except CompileError as e: blocker = { "messages": e.messages, @@ -170,7 +172,7 @@ def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool # We never flush errors in the worker, we send them back to coordinator. pass - manager = BuildManager( + return BuildManager( data_dir, search_paths, ignore_prefix=os.getcwd(), @@ -186,9 +188,8 @@ def flush_errors(filename: str | None, new_messages: list[str], is_serious: bool fscache=ctx.fscache, stdout=sys.stdout, stderr=sys.stderr, + parallel_worker=True, ) - manager.parallel_worker = True - return manager def console_entry() -> None: diff --git a/mypy/metastore.py b/mypy/metastore.py index 442c7dc77461..4f204d4b8cb7 100644 --- a/mypy/metastore.py +++ b/mypy/metastore.py @@ -145,16 +145,21 @@ def list_all(self) -> Iterable[str]: """ -def connect_db(db_file: str) -> sqlite3.Connection: +def connect_db(db_file: str, sync_off: bool = False) -> sqlite3.Connection: import sqlite3.dbapi2 db = sqlite3.dbapi2.connect(db_file) + if sync_off: + # This is a bit unfortunate (as we may get corrupt cache after e.g. Ctrl + C), + # but without this flag, commits are *very* slow, especially when using HDDs, + # see https://www.sqlite.org/faq.html#q19 for details. + db.execute("PRAGMA synchronous=OFF") db.executescript(SCHEMA) return db class SqliteMetadataStore(MetadataStore): - def __init__(self, cache_dir_prefix: str) -> None: + def __init__(self, cache_dir_prefix: str, sync_off: bool = False) -> None: # We check startswith instead of equality because the version # will have already been appended by the time the cache dir is # passed here. @@ -163,7 +168,7 @@ def __init__(self, cache_dir_prefix: str) -> None: return os.makedirs(cache_dir_prefix, exist_ok=True) - self.db = connect_db(os.path.join(cache_dir_prefix, "cache.db")) + self.db = connect_db(os.path.join(cache_dir_prefix, "cache.db"), sync_off=sync_off) def _query(self, name: str, field: str) -> Any: # Raises FileNotFound for consistency with the file system version From 8126bc18bf4b1ce05517f1e8303b918f45ab6525 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Sat, 29 Nov 2025 02:09:26 +0000 Subject: [PATCH 19/21] Apply black --- mypy/cache.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mypy/cache.py b/mypy/cache.py index bec38b5abc53..e5fce2221634 100644 --- a/mypy/cache.py +++ b/mypy/cache.py @@ -392,7 +392,15 @@ def write_str_opt_list(data: WriteBuffer, value: list[str | None]) -> None: JsonValue: _TypeAlias = None | int | str | bool | list["JsonValue"] | dict[str, "JsonValue"] -JsonValueEx: _TypeAlias = None | int | str | bool | list["JsonValueEx"] | dict[str, "JsonValueEx"] | tuple["JsonValueEx", ...] +JsonValueEx: _TypeAlias = ( + None + | int + | str + | bool + | list["JsonValueEx"] + | dict[str, "JsonValueEx"] + | tuple["JsonValueEx", ...] +) def read_json_value(data: ReadBuffer) -> JsonValue: From df84adbeb3133c0bf783fa7c3d38f97043f98643 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Sat, 29 Nov 2025 02:20:11 +0000 Subject: [PATCH 20/21] A tiny refactor --- mypy/cache.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/mypy/cache.py b/mypy/cache.py index e5fce2221634..67b0e0c20837 100644 --- a/mypy/cache.py +++ b/mypy/cache.py @@ -391,15 +391,13 @@ def write_str_opt_list(data: WriteBuffer, value: list[str | None]) -> None: write_str_opt(data, item) -JsonValue: _TypeAlias = None | int | str | bool | list["JsonValue"] | dict[str, "JsonValue"] +Value: _TypeAlias = None | int | str | bool +JsonValue: _TypeAlias = Value | list["JsonValue"] | dict[str, "JsonValue"] + +# Currently tuples are used by mypyc plugin. They will be normalized to +# JSON lists after a roundtrip. JsonValueEx: _TypeAlias = ( - None - | int - | str - | bool - | list["JsonValueEx"] - | dict[str, "JsonValueEx"] - | tuple["JsonValueEx", ...] + Value | list["JsonValueEx"] | dict[str, "JsonValueEx"] | tuple["JsonValueEx", ...] ) @@ -424,8 +422,6 @@ def read_json_value(data: ReadBuffer) -> JsonValue: assert False, f"Invalid JSON tag: {tag}" -# Currently tuples are used by mypyc plugin. They will be normalized to -# JSON lists after a roundtrip. def write_json_value(data: WriteBuffer, value: JsonValueEx) -> None: if value is None: write_tag(data, LITERAL_NONE) From 1b7ed16e63a7a3f7295ee4d4aed9a4ceea399fd3 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Sat, 29 Nov 2025 13:10:47 +0000 Subject: [PATCH 21/21] Fix merge --- mypy/ipc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mypy/ipc.py b/mypy/ipc.py index f34e57148ffc..61cc628d76f2 100644 --- a/mypy/ipc.py +++ b/mypy/ipc.py @@ -14,11 +14,13 @@ import sys import tempfile from collections.abc import Callable +from select import select from types import TracebackType from typing import Any, Final from librt.internal import ReadBuffer, WriteBuffer +from mypy.cache import read_json, write_json if sys.platform == "win32": # This may be private, but it is needed for IPC on Windows, and is basically stable