From 5fd24c2c040f141fc43b3a8e84efe8769053ebf4 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Thu, 24 Jan 2019 15:37:53 -0800 Subject: [PATCH 01/20] write out inverse deps --- mypy/build.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/mypy/build.py b/mypy/build.py index 17ff2a8687f2..8c3b4a66b0c4 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -754,6 +754,36 @@ def write_protocol_deps_cache(proto_deps: Dict[str, Set[str]], manager.errors.report(0, 0, "Error writing protocol dependencies cache", blocker=True) +def invert_deps(proto_deps: Dict[str, Set[str]], + manager: BuildManager, graph: Graph) -> None: + deps = {} # type: Dict[str, Set[str]] + things = [st.fine_grained_deps for st in graph.values()] + [proto_deps] + for st_deps in things: + for trigger, targets in st_deps.items(): + deps.setdefault(trigger, set()).update(targets) + + from mypy.server.target import module_prefix + rdeps = {id: {} for id in graph} # type: Dict[str, Dict[str, Set[str]]] + extra_deps = {} # type: Dict[str, Set[str]] + for trigger, targets in deps.items(): + assert trigger[0] == '<' + module = module_prefix(graph, trigger[1:-1]) + if module: + rdeps[module].setdefault(trigger, set()).update(targets) + else: + extra_deps.setdefault(trigger, set()).update(targets) + + def convert(x: Dict[str, Set[str]]) -> Dict[str, List[str]]: + return {k: list(v) for k, v in x.items()} + + # XXX: NOT THE PLACE FOR THIS YOU KNOW?? + for id in graph: + _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) + assert deps_json + fname = deps_json.replace('.deps.json', '.rdeps.json') + manager.metastore.write(fname, json.dumps(convert(rdeps[id]))) + manager.metastore.write('@extra_deps.json', json.dumps(convert(extra_deps))) + PLUGIN_SNAPSHOT_FILE = '@plugins_snapshot.json' # type: Final @@ -2293,6 +2323,10 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: if TypeState.proto_deps is not None and not manager.options.fine_grained_incremental: write_protocol_deps_cache(TypeState.proto_deps, manager, graph) + invert_deps(TypeState.proto_deps, manager, graph) + + + if manager.options.dump_deps: # This speeds up startup a little when not using the daemon mode. from mypy.server.deps import dump_all_dependencies From ae5d581e3b1373edc239125f427c3c542ca338e3 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Thu, 24 Jan 2019 16:17:47 -0800 Subject: [PATCH 02/20] Get it working on many tests --- mypy/build.py | 68 ++++++++++++------------------------------- mypy/server/update.py | 25 +++++++++------- 2 files changed, 34 insertions(+), 59 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 8c3b4a66b0c4..2b9d2abec76a 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -241,7 +241,6 @@ def default_data_dir() -> str: ('hash', str), ('dependencies', List[str]), # names of imported modules ('data_mtime', int), # mtime of data_json - ('deps_mtime', Optional[int]), # mtime of deps_json ('data_json', str), # path of .data.json # path of .deps.json, which we use to store fine-grained # dependency information for fine-grained mode @@ -281,7 +280,6 @@ def cache_meta_from_dict(meta: Dict[str, Any], meta.get('hash', sentinel), meta.get('dependencies', []), int(meta['data_mtime']) if 'data_mtime' in meta else sentinel, - int(meta['deps_mtime']) if meta.get('deps_mtime') is not None else None, data_json, deps_json, meta.get('suppressed', []), @@ -754,10 +752,11 @@ def write_protocol_deps_cache(proto_deps: Dict[str, Set[str]], manager.errors.report(0, 0, "Error writing protocol dependencies cache", blocker=True) + def invert_deps(proto_deps: Dict[str, Set[str]], manager: BuildManager, graph: Graph) -> None: deps = {} # type: Dict[str, Set[str]] - things = [st.fine_grained_deps for st in graph.values()] + [proto_deps] + things = [st.compute_fine_grained_deps() for st in graph.values()] + [proto_deps] for st_deps in things: for trigger, targets in st_deps.items(): deps.setdefault(trigger, set()).update(targets) @@ -780,9 +779,9 @@ def convert(x: Dict[str, Set[str]]) -> Dict[str, List[str]]: for id in graph: _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) assert deps_json - fname = deps_json.replace('.deps.json', '.rdeps.json') - manager.metastore.write(fname, json.dumps(convert(rdeps[id]))) - manager.metastore.write('@extra_deps.json', json.dumps(convert(extra_deps))) + manager.metastore.write(deps_json, json.dumps(convert(rdeps[id]))) + _, proto_cache = get_protocol_deps_cache_name() + manager.metastore.write(proto_cache, json.dumps(convert(extra_deps))) PLUGIN_SNAPSHOT_FILE = '@plugins_snapshot.json' # type: Final @@ -948,8 +947,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache # Don't check for path match, that is dealt with in validate_meta(). if (m.id != id or m.mtime is None or m.size is None or - m.dependencies is None or m.data_mtime is None or - (manager.options.cache_fine_grained and m.deps_mtime is None)): + m.dependencies is None or m.data_mtime is None): manager.log('Metadata abandoned for {}: attributes are missing'.format(id)) return None @@ -1019,13 +1017,6 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], if data_mtime != meta.data_mtime: manager.log('Metadata abandoned for {}: data cache is modified'.format(id)) return None - deps_mtime = None - if manager.options.cache_fine_grained: - assert meta.deps_json - deps_mtime = manager.getmtime(meta.deps_json) - if deps_mtime != meta.deps_mtime: - manager.log('Metadata abandoned for {}: deps cache is modified'.format(id)) - return None path = manager.normpath(path) try: @@ -1083,7 +1074,6 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str], 'size': size, 'hash': source_hash, 'data_mtime': data_mtime, - 'deps_mtime': deps_mtime, 'dependencies': meta.dependencies, 'suppressed': meta.suppressed, 'child_modules': meta.child_modules, @@ -1125,7 +1115,6 @@ def json_dumps(obj: Any, debug_cache: bool) -> str: def write_cache(id: str, path: str, tree: MypyFile, - serialized_fine_grained_deps: Dict[str, List[str]], dependencies: List[str], suppressed: List[str], child_modules: List[str], dep_prios: List[int], dep_lines: List[int], old_interface_hash: str, source_hash: str, @@ -1214,14 +1203,6 @@ def write_cache(id: str, path: str, tree: MypyFile, return interface_hash, None data_mtime = manager.getmtime(data_json) - deps_mtime = None - if deps_json: - deps_str = json_dumps(serialized_fine_grained_deps, manager.options.debug_cache) - if not metastore.write(deps_json, deps_str): - manager.log("Error writing deps JSON file {}".format(deps_json)) - return interface_hash, None - deps_mtime = manager.getmtime(deps_json) - mtime = 0 if bazel else int(st.st_mtime) size = st.st_size options = manager.options.clone_for_module(id) @@ -1232,7 +1213,6 @@ def write_cache(id: str, path: str, tree: MypyFile, 'size': size, 'hash': source_hash, 'data_mtime': data_mtime, - 'deps_mtime': deps_mtime, 'dependencies': dependencies, 'suppressed': suppressed, 'child_modules': child_modules, @@ -1472,8 +1452,6 @@ class State: # Whether the module has an error or any of its dependencies have one. transitive_error = False - fine_grained_deps = None # type: Dict[str, Set[str]] - # Type checker used for checking this file. Use type_checker() for # access and to construct this on demand. _type_checker = None # type: Optional[TypeChecker] @@ -1507,7 +1485,6 @@ def __init__(self, self.id = id or '__main__' self.options = manager.options.clone_for_module(self.id) self._type_checker = None - self.fine_grained_deps = {} if not path and source is None: assert id is not None try: @@ -1638,14 +1615,17 @@ def wrap_context(self) -> Iterator[None]: self.manager.errors.set_import_context(save_import_context) self.check_blockers() - # Methods for processing cached modules. - def load_fine_grained_deps(self) -> None: + def load_fine_grained_deps(self) -> Dict[str, Set[str]]: + if self.meta is None: return {} + t0 = time.time() assert self.meta is not None, "Internal error: this method must be called only" \ " for cached modules" assert self.meta.deps_json deps = json.loads(self.manager.metastore.read(self.meta.deps_json)) # TODO: Assert deps file wasn't changed. - self.fine_grained_deps = {k: set(v) for k, v in deps.items()} + val = {k: set(v) for k, v in deps.items()} + self.manager.add_stats(load_fg_deps_time=time.time() - t0) + return val def load_tree(self, temporary: bool = False) -> None: assert self.meta is not None, "Internal error: this method must be called only" \ @@ -1925,7 +1905,7 @@ def _patch_indirect_dependencies(self, elif dep not in self.suppressed and dep in self.manager.missing_modules: self.suppressed.append(dep) - def compute_fine_grained_deps(self) -> None: + def compute_fine_grained_deps(self) -> Dict[str, Set[str]]: assert self.tree is not None if '/typeshed/' in self.xpath or self.xpath.startswith('typeshed/'): # We don't track changes to typeshed -- the assumption is that they are only changed @@ -1933,12 +1913,12 @@ def compute_fine_grained_deps(self) -> None: # # TODO: Not a reliable test, as we could have a package named typeshed. # TODO: Consider relaxing this -- maybe allow some typeshed changes to be tracked. - return + return {} from mypy.server.deps import get_dependencies # Lazy import to speed up startup - self.fine_grained_deps = get_dependencies(target=self.tree, - type_map=self.type_map(), - python_version=self.options.python_version, - options=self.manager.options) + return get_dependencies(target=self.tree, + type_map=self.type_map(), + python_version=self.options.python_version, + options=self.manager.options) def valid_references(self) -> Set[str]: assert self.ancestors is not None @@ -1968,7 +1948,6 @@ def write_cache(self) -> None: assert self.source_hash is not None new_interface_hash, self.meta = write_cache( self.id, self.path, self.tree, - {k: list(v) for k, v in self.fine_grained_deps.items()}, list(self.dependencies), list(self.suppressed), list(self.child_modules), dep_prios, dep_lines, self.interface_hash, self.source_hash, self.ignore_all, self.manager) @@ -2325,8 +2304,6 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: invert_deps(TypeState.proto_deps, manager, graph) - - if manager.options.dump_deps: # This speeds up startup a little when not using the daemon mode. from mypy.server.deps import dump_all_dependencies @@ -2627,14 +2604,7 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: def process_fine_grained_cache_graph(graph: Graph, manager: BuildManager) -> None: """Finish loading everything for use in the fine-grained incremental cache""" - - # If we are running in fine-grained incremental mode with caching, - # we don't actually have much to do: just load the fine-grained - # deps. - t0 = time.time() - for id, state in graph.items(): - state.load_fine_grained_deps() - manager.add_stats(load_fg_deps_time=time.time() - t0) + pass def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]: diff --git a/mypy/server/update.py b/mypy/server/update.py index 50d6157cdc49..b9901263b28a 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -165,6 +165,7 @@ def __init__(self, result: BuildResult) -> None: self.graph = result.graph self.previous_modules = get_module_to_path_map(self.graph) self.deps = get_all_dependencies(manager, self.graph) + self.unloaded_deps = set(self.graph) self.previous_targets_with_errors = manager.errors.targets() self.previous_messages = result.errors[:] # Module, if any, that had blocking errors in the last run as (id, path) tuple. @@ -328,6 +329,11 @@ def update_module(self, previous_modules = self.previous_modules graph = self.graph + # XXX: COMMENT + if module in self.unloaded_deps and module in graph: + merge_dependencies(graph[module].load_fine_grained_deps(), self.deps) + self.unloaded_deps.remove(module) + # If this is an already existing module, make sure that we have # its tree loaded so that we can snapshot it for comparison. ensure_trees_loaded(manager, graph, [module]) @@ -358,7 +364,8 @@ def update_module(self, if not trigger.endswith('__>')] self.manager.log_fine_grained('triggered: %r' % sorted(filtered)) self.triggered.extend(triggered | self.previous_targets_with_errors) - collect_dependencies([module], self.deps, graph) + if module in graph: + merge_dependencies(graph[module].compute_fine_grained_deps(), self.deps) remaining += propagate_changes_using_dependencies( manager, graph, self.deps, triggered, {module}, @@ -417,7 +424,9 @@ def get_all_dependencies(manager: BuildManager, graph: Dict[str, State]) -> Dict """Return the fine-grained dependency map for an entire build.""" # Deps for each module were computed during build() or loaded from the cache. deps = {} # type: Dict[str, Set[str]] - collect_dependencies(graph, deps, graph) + for id in graph: + if graph[id].tree is not None: + merge_dependencies(graph[id].compute_fine_grained_deps(), deps) TypeState.add_all_protocol_deps(deps) return deps @@ -653,14 +662,10 @@ def get_sources(fscache: FileSystemCache, return sources -def collect_dependencies(new_modules: Iterable[str], - deps: Dict[str, Set[str]], - graph: Dict[str, State]) -> None: - for id in new_modules: - if id not in graph: - continue - for trigger, targets in graph[id].fine_grained_deps.items(): - deps.setdefault(trigger, set()).update(targets) +def merge_dependencies(new_deps: Dict[str, Set[str]], + deps: Dict[str, Set[str]]) -> None: + for trigger, targets in new_deps.items(): + deps.setdefault(trigger, set()).update(targets) # Merge also the newly added protocol deps. TypeState.update_protocol_deps(deps) From 4dab2914432f553bc2b3b49ce7792c3a00fe4f4e Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Thu, 24 Jan 2019 16:50:15 -0800 Subject: [PATCH 03/20] Get it mostly working --- mypy/build.py | 5 +++++ mypy/server/update.py | 23 ++++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 2b9d2abec76a..74ac6e7747e3 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -779,6 +779,7 @@ def convert(x: Dict[str, Set[str]]) -> Dict[str, List[str]]: for id in graph: _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) assert deps_json + manager.log("Writing deps cache", deps_json) manager.metastore.write(deps_json, json.dumps(convert(rdeps[id]))) _, proto_cache = get_protocol_deps_cache_name() manager.metastore.write(proto_cache, json.dumps(convert(extra_deps))) @@ -1456,6 +1457,8 @@ class State: # access and to construct this on demand. _type_checker = None # type: Optional[TypeChecker] + fine_grained_deps_loaded = False + def __init__(self, id: Optional[str], path: Optional[str], @@ -2271,7 +2274,9 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: # The `read_protocol_cache` will also validate # the protocol cache against the loaded individual cache files. if manager.options.cache_fine_grained or manager.use_fine_grained_cache(): + t2 = time.time() proto_deps = read_protocol_cache(manager, graph) + manager.add_stats(load_fg_deps_time=time.time() - t2) if proto_deps is not None: TypeState.proto_deps = proto_deps elif manager.stats.get('fresh_metas', 0) > 0: diff --git a/mypy/server/update.py b/mypy/server/update.py index b9901263b28a..05349bdacb7b 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -165,7 +165,6 @@ def __init__(self, result: BuildResult) -> None: self.graph = result.graph self.previous_modules = get_module_to_path_map(self.graph) self.deps = get_all_dependencies(manager, self.graph) - self.unloaded_deps = set(self.graph) self.previous_targets_with_errors = manager.errors.targets() self.previous_messages = result.errors[:] # Module, if any, that had blocking errors in the last run as (id, path) tuple. @@ -329,10 +328,7 @@ def update_module(self, previous_modules = self.previous_modules graph = self.graph - # XXX: COMMENT - if module in self.unloaded_deps and module in graph: - merge_dependencies(graph[module].load_fine_grained_deps(), self.deps) - self.unloaded_deps.remove(module) + ensure_deps_loaded(module, self.deps, graph) # If this is an already existing module, make sure that we have # its tree loaded so that we can snapshot it for comparison. @@ -409,6 +405,18 @@ def find_unloaded_deps(manager: BuildManager, graph: Dict[str, State], return unloaded +def ensure_deps_loaded(module: str, + deps: Dict[str, Set[str]], graph: Dict[str, State]) -> None: + # XXX: COMMENT + if module in graph and graph[module].fine_grained_deps_loaded: + return + parts = module.split('.') + for i in range(len(parts)): + base = '.'.join(parts[:i + 1]) + if base in graph and not graph[base].fine_grained_deps_loaded: + merge_dependencies(graph[base].load_fine_grained_deps(), deps) + graph[base].fine_grained_deps_loaded = True + def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State], initial: Sequence[str]) -> None: """Ensure that the modules in initial and their deps have loaded trees.""" @@ -825,6 +833,11 @@ def find_targets_recursive( worklist = set() for target in current: if target.startswith('<'): + # XXX: slow?? + module_id = module_prefix(graph, target[1:-1]) + if module_id: + ensure_deps_loaded(module_id, deps, graph) + worklist |= deps.get(target, set()) - processed else: module_id = module_prefix(graph, target) From 40cd25b0cf7523859035f1b6bd0ee674364d0856 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Thu, 24 Jan 2019 17:53:22 -0800 Subject: [PATCH 04/20] Incrementally update the extra dependencies --- mypy/build.py | 45 +++++++++++++------ mypy/test/testfinegrained.py | 1 + .../unit/fine-grained-cache-incremental.test | 28 ++++++++++++ 3 files changed, 61 insertions(+), 13 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 74ac6e7747e3..58f7247dbfec 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -753,30 +753,45 @@ def write_protocol_deps_cache(proto_deps: Dict[str, Set[str]], blocker=True) -def invert_deps(proto_deps: Dict[str, Set[str]], - manager: BuildManager, graph: Graph) -> None: - deps = {} # type: Dict[str, Set[str]] - things = [st.compute_fine_grained_deps() for st in graph.values()] + [proto_deps] - for st_deps in things: - for trigger, targets in st_deps.items(): - deps.setdefault(trigger, set()).update(targets) - +def invert_deps_inner( + deps: Dict[str, Set[str]], + graph: Graph) -> Tuple[Dict[str, Dict[str, Set[str]]], Dict[str, Set[str]]]: from mypy.server.target import module_prefix - rdeps = {id: {} for id in graph} # type: Dict[str, Dict[str, Set[str]]] + rdeps = {} # type: Dict[str, Dict[str, Set[str]]] extra_deps = {} # type: Dict[str, Set[str]] for trigger, targets in deps.items(): assert trigger[0] == '<' module = module_prefix(graph, trigger[1:-1]) - if module: - rdeps[module].setdefault(trigger, set()).update(targets) + if module and graph[module].tree: + mod_rdeps = rdeps.setdefault(module, {}) + mod_rdeps.setdefault(trigger, set()).update(targets) else: extra_deps.setdefault(trigger, set()).update(targets) + return (rdeps, extra_deps) + +def invert_deps(proto_deps: Dict[str, Set[str]], + manager: BuildManager, graph: Graph) -> None: + deps = {} # type: Dict[str, Set[str]] + things = [st.compute_fine_grained_deps() for st in graph.values() if st.tree] + [proto_deps] + for st_deps in things: + for trigger, targets in st_deps.items(): + deps.setdefault(trigger, set()).update(targets) + def convert(x: Dict[str, Set[str]]) -> Dict[str, List[str]]: return {k: list(v) for k, v in x.items()} + # XXX: can only split them up if there was no cache + # no_cache = all(st.meta is None for st in graph.values()) + # print(no_cache) + # print([(st.id, st.meta) for st in graph.values()]) + if not manager.options.incremental: + rdeps, extra_deps = invert_deps_inner(deps, graph) + else: + rdeps, extra_deps = {}, deps + # XXX: NOT THE PLACE FOR THIS YOU KNOW?? - for id in graph: + for id in rdeps: _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) assert deps_json manager.log("Writing deps cache", deps_json) @@ -1624,7 +1639,11 @@ def load_fine_grained_deps(self) -> Dict[str, Set[str]]: assert self.meta is not None, "Internal error: this method must be called only" \ " for cached modules" assert self.meta.deps_json - deps = json.loads(self.manager.metastore.read(self.meta.deps_json)) + # XXX: Check this reasonably?? + try: + deps = json.loads(self.manager.metastore.read(self.meta.deps_json)) + except FileNotFoundError: + deps = {} # TODO: Assert deps file wasn't changed. val = {k: set(v) for k, v in deps.items()} self.manager.add_stats(load_fg_deps_time=time.time() - t0) diff --git a/mypy/test/testfinegrained.py b/mypy/test/testfinegrained.py index bda63493b09c..bec40ad0419e 100644 --- a/mypy/test/testfinegrained.py +++ b/mypy/test/testfinegrained.py @@ -86,6 +86,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: step = 1 sources = self.parse_sources(main_src, step, options) if step <= num_regular_incremental_steps: + build_options.incremental = step > 1 messages = self.build(build_options, sources) else: messages = self.run_check(server, sources) diff --git a/test-data/unit/fine-grained-cache-incremental.test b/test-data/unit/fine-grained-cache-incremental.test index 9387d6d1c3d6..9faa28c82c0e 100644 --- a/test-data/unit/fine-grained-cache-incremental.test +++ b/test-data/unit/fine-grained-cache-incremental.test @@ -51,6 +51,34 @@ x = 'hi' == a.py:3: error: Unsupported operand types for + ("int" and "str") +[case testIncrCacheDoubleChange1] +# num_build_steps: 2 +import b +import c +[file a.py] +def f(x: int) -> None: + pass +[file b.py] +from a import f +f(10) +[file c.py] +from a import f +f(10) + +[file a.py.2] +def f(x: int) -> None: + pass +# nothing changed + +[file a.py.3] +def f(x: str) -> None: + pass +[out] +== +== +c.py:2: error: Argument 1 to "f" has incompatible type "int"; expected "str" +b.py:2: error: Argument 1 to "f" has incompatible type "int"; expected "str" + [case testIncrCacheProtocol1] # num_build_steps: 2 import a From f519a7f97ca1401ef17536499691d88f759cee02 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Fri, 25 Jan 2019 17:30:17 -0800 Subject: [PATCH 05/20] Repurpose the protocol cache stuff for the new deps cache --- mypy/build.py | 129 ++++++++---------- mypy/server/update.py | 1 + .../unit/fine-grained-cache-incremental.test | 2 +- 3 files changed, 62 insertions(+), 70 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 58f7247dbfec..5b9cdd43d4f3 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -473,7 +473,7 @@ class BuildManager(BuildManagerBase): determine whether we write cache files or not. stats: Dict with various instrumentation numbers, it is used not only for debugging, but also required for correctness, - in particular to check consistency of the protocol dependency cache. + in particular to check consistency of the fine-grained dependency cache. fscache: A file system cacher """ @@ -713,21 +713,23 @@ def stats_summary(self) -> Mapping[str, object]: return self.stats -def write_protocol_deps_cache(proto_deps: Dict[str, Set[str]], - manager: BuildManager, graph: Graph) -> None: - """Write cache files for protocol dependencies. +def deps_to_json(x: Dict[str, Set[str]]) -> str: + return json.dumps({k: list(v) for k, v in x.items()}) - Serialize protocol dependencies map for fine grained mode. Also take the snapshot - of current sources to later check consistency between protocol cache and individual + +def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], + extra_deps: Dict[str, Set[str]], + manager: BuildManager, graph: Graph) -> None: + """Write cache files for fine-grained dependencies. + + Serialize fine-grained dependencies map for fine grained mode. Also take the snapshot + of current sources to later check consistency between the extra cache and individual cache files. - Out of three kinds of protocol dependencies described in TypeState._snapshot_protocol_deps, - only the last two kinds are stored in global protocol caches, dependencies of the first kind - (i.e. , -> ) are written to the normal - per-file fine grained dependency caches. + XXX: DESCRIBE """ metastore = manager.metastore - proto_meta, proto_cache = get_protocol_deps_cache_name() + deps_meta, deps_cache = get_deps_cache_name() meta_snapshot = {} # type: Dict[str, str] error = False for id, st in graph.items(): @@ -740,16 +742,23 @@ def write_protocol_deps_cache(proto_deps: Dict[str, Set[str]], assert st.meta, "Module must be either parsed or cached" meta_snapshot[id] = st.meta.hash - if not metastore.write(proto_meta, json.dumps(meta_snapshot)): - manager.log("Error writing protocol meta JSON file {}".format(proto_cache)) + if not metastore.write(deps_meta, json.dumps(meta_snapshot)): + manager.log("Error writing fine-grained deps meta JSON file {}".format(deps_meta)) error = True - listed_proto_deps = {k: list(v) for (k, v) in proto_deps.items()} - if not metastore.write(proto_cache, json.dumps(listed_proto_deps)): - manager.log("Error writing protocol deps JSON file {}".format(proto_cache)) + if not metastore.write(deps_cache, deps_to_json(extra_deps)): + manager.log("Error writing fine-grained extra deps JSON file {}".format(deps_cache)) error = True + for id in rdeps: + _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) + assert deps_json + manager.log("Writing deps cache", deps_json) + if not manager.metastore.write(deps_json, deps_to_json(rdeps[id])): + manager.log("Error writing fine-grained deps JSON file {}".format(deps_json)) + error = True + if error: manager.errors.set_file(_cache_dir_prefix(manager), None) - manager.errors.report(0, 0, "Error writing protocol dependencies cache", + manager.errors.report(0, 0, "Error writing fine-grained dependencies cache", blocker=True) @@ -770,17 +779,17 @@ def invert_deps_inner( return (rdeps, extra_deps) + def invert_deps(proto_deps: Dict[str, Set[str]], - manager: BuildManager, graph: Graph) -> None: + manager: BuildManager, + graph: Graph) -> Tuple[Dict[str, Dict[str, Set[str]]], + Dict[str, Set[str]]]: deps = {} # type: Dict[str, Set[str]] things = [st.compute_fine_grained_deps() for st in graph.values() if st.tree] + [proto_deps] for st_deps in things: for trigger, targets in st_deps.items(): deps.setdefault(trigger, set()).update(targets) - def convert(x: Dict[str, Set[str]]) -> Dict[str, List[str]]: - return {k: list(v) for k, v in x.items()} - # XXX: can only split them up if there was no cache # no_cache = all(st.meta is None for st in graph.values()) # print(no_cache) @@ -790,14 +799,7 @@ def convert(x: Dict[str, Set[str]]) -> Dict[str, List[str]]: else: rdeps, extra_deps = {}, deps - # XXX: NOT THE PLACE FOR THIS YOU KNOW?? - for id in rdeps: - _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) - assert deps_json - manager.log("Writing deps cache", deps_json) - manager.metastore.write(deps_json, json.dumps(convert(rdeps[id]))) - _, proto_cache = get_protocol_deps_cache_name() - manager.metastore.write(proto_cache, json.dumps(convert(extra_deps))) + return rdeps, extra_deps PLUGIN_SNAPSHOT_FILE = '@plugins_snapshot.json' # type: Final @@ -825,17 +827,13 @@ def read_plugins_snapshot(manager: BuildManager) -> Optional[Dict[str, str]]: return snapshot -def read_protocol_cache(manager: BuildManager, - graph: Graph) -> Optional[Dict[str, Set[str]]]: - """Read and validate protocol dependencies cache. - - See docstring for write_protocol_cache for details about which kinds of - dependencies are read. - """ - proto_meta, proto_cache = get_protocol_deps_cache_name() - meta_snapshot = _load_json_file(proto_meta, manager, - log_sucess='Proto meta ', - log_error='Could not load protocol metadata: ') +def read_deps_cache(manager: BuildManager, + graph: Graph) -> Optional[Dict[str, Set[str]]]: + """Read and validate dependencies cache. """ + deps_meta, deps_extra_cache = get_deps_cache_name() + meta_snapshot = _load_json_file(deps_meta, manager, + log_sucess='Deps meta ', + log_error='Could not load fine-grained dependency metadata: ') if meta_snapshot is None: return None # Take a snapshot of the source hashes from all of the metas we found. @@ -847,15 +845,15 @@ def read_protocol_cache(manager: BuildManager, common = set(meta_snapshot.keys()) & set(current_meta_snapshot.keys()) if any(meta_snapshot[id] != current_meta_snapshot[id] for id in common): # TODO: invalidate also if options changed (like --strict-optional)? - manager.log('Protocol cache inconsistent, ignoring') + manager.log('Fine-grained dependencies cache inconsistent, ignoring') return None - deps = _load_json_file(proto_cache, manager, - log_sucess='Proto deps ', - log_error='Could not load protocol cache: ') + deps = _load_json_file(deps_extra_cache, manager, + log_sucess='Extra deps ', + log_error='Could not load fine-grained dependencies cache: ') if deps is None: return None if not isinstance(deps, dict): - manager.log('Could not load protocol cache: cache is not a dict: {}' + manager.log('Could not load fine-grained dependencies cache: cache is not a dict: {}' .format(type(deps))) return None return {k: set(v) for (k, v) in deps.items()} @@ -918,17 +916,9 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str return (prefix + '.meta.json', prefix + '.data.json', deps_json) -def get_protocol_deps_cache_name() -> Tuple[str, str]: - """Return file names for fine grained protocol dependencies cache. - - Since these dependencies represent a global state of the program, they - are serialized per program, not per module, and the corresponding files - live at the root of the cache folder for a given Python version. - Return a tuple ('meta file path', 'data file path'), where the meta file - contains hashes of all source files at the time the protocol dependencies - were written, and data file contains the protocol dependencies. - """ - name = '@proto_deps' +def get_deps_cache_name() -> Tuple[str, str]: + """Return file names for global fine grained dependencies cache.""" + name = '@deps' return name + '.meta.json', name + '.data.json' @@ -2286,23 +2276,24 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: dump_graph(graph) return graph - # Fine grained protocol dependencies are serialized separately, so we read them - # after we load the cache for whole graph. + # Fine grained dependencies that didn't have an associated module in the build + # are serialized separately, so we read them after we load the graph. # We need to read them both for running in daemon mode and if we are generating # a fine-grained cache (so that we can properly update them incrementally). - # The `read_protocol_cache` will also validate - # the protocol cache against the loaded individual cache files. + # The `read_deps_cache` will also validate + # the deps cache against the loaded individual cache files. if manager.options.cache_fine_grained or manager.use_fine_grained_cache(): t2 = time.time() - proto_deps = read_protocol_cache(manager, graph) + extra_deps = read_deps_cache(manager, graph) manager.add_stats(load_fg_deps_time=time.time() - t2) - if proto_deps is not None: - TypeState.proto_deps = proto_deps + if extra_deps is not None: + # XXX: Is this where we want to put this? + TypeState.proto_deps = extra_deps elif manager.stats.get('fresh_metas', 0) > 0: # Clear the stats so we don't infinite loop because of positive fresh_metas manager.stats.clear() - # There were some cache files read, but no protocol dependencies loaded. - manager.log("Error reading protocol dependencies cache -- aborting cache load") + # There were some cache files read, but no fine-grained dependencies loaded. + manager.log("Error reading fine-grained dependencies cache -- aborting cache load") manager.cache_enabled = False manager.log("Falling back to full run -- reloading graph...") return dispatch(sources, manager) @@ -2323,10 +2314,10 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: # Since these are a global property of the program, they are calculated after we # processed the whole graph. TypeState.update_protocol_deps() - if TypeState.proto_deps is not None and not manager.options.fine_grained_incremental: - write_protocol_deps_cache(TypeState.proto_deps, manager, graph) - - invert_deps(TypeState.proto_deps, manager, graph) + if not manager.options.fine_grained_incremental: + proto_deps = TypeState.proto_deps or {} + rdeps, extra_deps = invert_deps(proto_deps, manager, graph) + write_deps_cache(rdeps, extra_deps, manager, graph) if manager.options.dump_deps: # This speeds up startup a little when not using the daemon mode. diff --git a/mypy/server/update.py b/mypy/server/update.py index 05349bdacb7b..6c0c5c7958c9 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -417,6 +417,7 @@ def ensure_deps_loaded(module: str, merge_dependencies(graph[base].load_fine_grained_deps(), deps) graph[base].fine_grained_deps_loaded = True + def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State], initial: Sequence[str]) -> None: """Ensure that the modules in initial and their deps have loaded trees.""" diff --git a/test-data/unit/fine-grained-cache-incremental.test b/test-data/unit/fine-grained-cache-incremental.test index 9faa28c82c0e..a47986728e08 100644 --- a/test-data/unit/fine-grained-cache-incremental.test +++ b/test-data/unit/fine-grained-cache-incremental.test @@ -202,7 +202,7 @@ a.py:8: note: x: expected "int", got "str" [file b.py] -- This is a heinous hack, but we simulate having a invalid cache by clobbering -- the proto deps file with something with hash mismatches. -[file ../.mypy_cache/3.6/@proto_deps.meta.json.2] +[file ../.mypy_cache/3.6/@deps.meta.json.2] {"__main__": "00000000000000000000000000000000", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "00000000000000000000000000000000"} [file b.py.2] # uh From 9111c0e704cf790349d9483743a04b7a63682732 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Fri, 25 Jan 2019 17:33:29 -0800 Subject: [PATCH 06/20] Handle [wildcard], refactor some --- mypy/build.py | 5 +++-- mypy/server/target.py | 10 ++++++++++ mypy/server/update.py | 4 ++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 5b9cdd43d4f3..940717742cdd 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -765,12 +765,13 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], def invert_deps_inner( deps: Dict[str, Set[str]], graph: Graph) -> Tuple[Dict[str, Dict[str, Set[str]]], Dict[str, Set[str]]]: - from mypy.server.target import module_prefix + from mypy.server.target import module_prefix, trigger_to_target + rdeps = {} # type: Dict[str, Dict[str, Set[str]]] extra_deps = {} # type: Dict[str, Set[str]] for trigger, targets in deps.items(): assert trigger[0] == '<' - module = module_prefix(graph, trigger[1:-1]) + module = module_prefix(graph, trigger_to_target(trigger)) if module and graph[module].tree: mod_rdeps = rdeps.setdefault(module, {}) mod_rdeps.setdefault(trigger, set()).update(targets) diff --git a/mypy/server/target.py b/mypy/server/target.py index 78e28582f4b3..3bc5b748bf6e 100644 --- a/mypy/server/target.py +++ b/mypy/server/target.py @@ -1,6 +1,16 @@ from typing import Iterable, Tuple, List, Optional +def trigger_to_target(s: str) -> str: + assert s[0] == '<' + # Strip off the angle brackets + s = s[1:-1] + # If there is a [wildcard] or similar, strip that off too + if s[-1] == ']': + s = s.split('[')[0] + return s + + def module_prefix(modules: Iterable[str], target: str) -> Optional[str]: result = split_target(modules, target) if result is None: diff --git a/mypy/server/update.py b/mypy/server/update.py index 6c0c5c7958c9..736fd0a964fd 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -138,7 +138,7 @@ from mypy.server.astmerge import merge_asts from mypy.server.aststrip import strip_target from mypy.server.deps import get_dependencies_of_target -from mypy.server.target import module_prefix, split_target +from mypy.server.target import module_prefix, split_target, trigger_to_target from mypy.server.trigger import make_trigger, WILDCARD_TAG from mypy.typestate import TypeState @@ -835,7 +835,7 @@ def find_targets_recursive( for target in current: if target.startswith('<'): # XXX: slow?? - module_id = module_prefix(graph, target[1:-1]) + module_id = module_prefix(graph, trigger_to_target(target)) if module_id: ensure_deps_loaded(module_id, deps, graph) From 12b2b737b08a019296a7f649d90d7c4bd89796f3 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Mon, 28 Jan 2019 11:35:47 -0800 Subject: [PATCH 07/20] Some cleanup --- mypy/build.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 940717742cdd..d50c611592c0 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -762,7 +762,7 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], blocker=True) -def invert_deps_inner( +def invert_deps( deps: Dict[str, Set[str]], graph: Graph) -> Tuple[Dict[str, Dict[str, Set[str]]], Dict[str, Set[str]]]: from mypy.server.target import module_prefix, trigger_to_target @@ -781,22 +781,28 @@ def invert_deps_inner( return (rdeps, extra_deps) -def invert_deps(proto_deps: Dict[str, Set[str]], - manager: BuildManager, - graph: Graph) -> Tuple[Dict[str, Dict[str, Set[str]]], - Dict[str, Set[str]]]: +def process_deps(proto_deps: Dict[str, Set[str]], + manager: BuildManager, + graph: Graph) -> Tuple[Dict[str, Dict[str, Set[str]]], + Dict[str, Set[str]]]: + """Process fine-grained dependenecies into a form suitable for serializing. + + Returns an (rdeps, extra_deps) pair, where rdeps maps from module ids to + all dependencies on that module, and extra_deps contains dependencies that + weren't associated with any module. + """ deps = {} # type: Dict[str, Set[str]] things = [st.compute_fine_grained_deps() for st in graph.values() if st.tree] + [proto_deps] for st_deps in things: for trigger, targets in st_deps.items(): deps.setdefault(trigger, set()).update(targets) - # XXX: can only split them up if there was no cache - # no_cache = all(st.meta is None for st in graph.values()) - # print(no_cache) - # print([(st.id, st.meta) for st in graph.values()]) + # If we are operating in non-incremental mode, properly split up + # dependencies between all the files. If this is an incremental update, + # though, we write all the dependencies into the "extra_deps" file so we + # don't need to reload dependency files to update them. if not manager.options.incremental: - rdeps, extra_deps = invert_deps_inner(deps, graph) + rdeps, extra_deps = invert_deps(deps, graph) else: rdeps, extra_deps = {}, deps @@ -2317,7 +2323,7 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: TypeState.update_protocol_deps() if not manager.options.fine_grained_incremental: proto_deps = TypeState.proto_deps or {} - rdeps, extra_deps = invert_deps(proto_deps, manager, graph) + rdeps, extra_deps = process_deps(proto_deps, manager, graph) write_deps_cache(rdeps, extra_deps, manager, graph) if manager.options.dump_deps: From 6be7c9134b70948779cebf43a67633102b3245ef Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Mon, 28 Jan 2019 12:07:06 -0800 Subject: [PATCH 08/20] Fix two of the tests --- test-data/unit/check-incremental.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 1818a0814a50..41c51f0de979 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -3638,7 +3638,7 @@ import b [file b.py] -- This is a heinous hack, but we simulate having a invalid cache by clobbering -- the proto deps file with something with hash mismatches. -[file ../.mypy_cache/3.6/@proto_deps.meta.json.2] +[file ../.mypy_cache/3.6/@deps.meta.json.2] {"__main__": "00000000000000000000000000000000", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "00000000000000000000000000000000"} [file b.py.2] # uh @@ -3666,7 +3666,7 @@ import b [file b.py] -- This is a heinous hack, but we simulate having a invalid cache by deleting -- the proto deps file. -[delete ../.mypy_cache/3.6/@proto_deps.meta.json.2] +[delete ../.mypy_cache/3.6/@deps.meta.json.2] [file b.py.2] # uh -- Every file should get reloaded, since the cache was invalidated From 94dea79d7471c5de691b69dda5772982eadf3183 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Mon, 28 Jan 2019 12:58:56 -0800 Subject: [PATCH 09/20] Redo how metadata for the deps files are handled --- mypy/build.py | 124 +++++++++++------- test-data/unit/check-incremental.test | 4 +- .../unit/fine-grained-cache-incremental.test | 5 +- 3 files changed, 81 insertions(+), 52 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index d50c611592c0..bf13c1911e85 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -30,6 +30,8 @@ from typing import ClassVar from typing_extensions import Final +from mypy_extensions import TypedDict + from mypy.nodes import (MypyFile, ImportBase, Import, ImportFrom, ImportAll) from mypy.semanal_pass1 import SemanticAnalyzerPass1 from mypy.newsemanal.semanal_pass1 import ReachabilityAnalyzer @@ -70,7 +72,6 @@ Graph = Dict[str, 'State'] - # TODO: Get rid of BuildResult. We might as well return a BuildManager. class BuildResult: """The result of a successful build. @@ -242,9 +243,6 @@ def default_data_dir() -> str: ('dependencies', List[str]), # names of imported modules ('data_mtime', int), # mtime of data_json ('data_json', str), # path of .data.json - # path of .deps.json, which we use to store fine-grained - # dependency information for fine-grained mode - ('deps_json', Optional[str]), ('suppressed', List[str]), # dependencies that weren't imported ('child_modules', List[str]), # all submodules of the given module ('options', Optional[Dict[str, object]]), # build options @@ -260,16 +258,14 @@ def default_data_dir() -> str: # suppressed contains those reachable imports that were prevented by # silent mode or simply not found. +FgDepMeta = TypedDict('FgDepMeta', {'path': str, 'mtime': int}) -def cache_meta_from_dict(meta: Dict[str, Any], - data_json: str, deps_json: Optional[str]) -> CacheMeta: +def cache_meta_from_dict(meta: Dict[str, Any], data_json: str) -> CacheMeta: """Build a CacheMeta object from a json metadata dictionary Args: meta: JSON metadata read from the metadata cache file data_json: Path to the .data.json file containing the AST trees - deps_json: Optionally, path to the .deps.json file containing - fine-grained dependency information. """ sentinel = None # type: Any # Values to be validated by the caller return CacheMeta( @@ -281,7 +277,6 @@ def cache_meta_from_dict(meta: Dict[str, Any], meta.get('dependencies', []), int(meta['data_mtime']) if 'data_mtime' in meta else sentinel, data_json, - deps_json, meta.get('suppressed', []), meta.get('child_modules', []), meta.get('options'), @@ -502,6 +497,7 @@ def __init__(self, data_dir: str, self.version_id = version_id self.modules = {} # type: Dict[str, MypyFile] self.missing_modules = set() # type: Set[str] + self.fg_deps_meta = {} # type: Dict[str, FgDepMeta] self.plugin = plugin if options.new_semantic_analyzer: # Set of namespaces (module or class) that are being populated during semantic @@ -729,25 +725,17 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], XXX: DESCRIBE """ metastore = manager.metastore - deps_meta, deps_cache = get_deps_cache_name() - meta_snapshot = {} # type: Dict[str, str] + meta_path, cache_path = get_deps_cache_name() + error = False - for id, st in graph.items(): - # If we didn't parse a file (so it doesn't have a - # source_hash), then it must be a module with a fresh cache, - # so use the hash from that. - if st.source_hash: - meta_snapshot[id] = st.source_hash - else: - assert st.meta, "Module must be either parsed or cached" - meta_snapshot[id] = st.meta.hash - if not metastore.write(deps_meta, json.dumps(meta_snapshot)): - manager.log("Error writing fine-grained deps meta JSON file {}".format(deps_meta)) - error = True - if not metastore.write(deps_cache, deps_to_json(extra_deps)): - manager.log("Error writing fine-grained extra deps JSON file {}".format(deps_cache)) + fg_deps_meta = manager.fg_deps_meta.copy() + if not metastore.write(cache_path, deps_to_json(extra_deps)): + manager.log("Error writing fine-grained extra deps JSON file {}".format(cache_path)) error = True + else: + fg_deps_meta['@extra'] = {'path': cache_path, 'mtime': manager.getmtime(cache_path)} + for id in rdeps: _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) assert deps_json @@ -755,6 +743,26 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], if not manager.metastore.write(deps_json, deps_to_json(rdeps[id])): manager.log("Error writing fine-grained deps JSON file {}".format(deps_json)) error = True + else: + fg_deps_meta[id] = {'path': deps_json, 'mtime': manager.getmtime(deps_json)} + + meta_snapshot = {} # type: Dict[str, str] + for id, st in graph.items(): + # If we didn't parse a file (so it doesn't have a + # source_hash), then it must be a module with a fresh cache, + # so use the hash from that. + if st.source_hash: + hash = st.source_hash + else: + assert st.meta, "Module must be either parsed or cached" + hash = st.meta.hash + meta_snapshot[id] = hash + + meta = {'snapshot': meta_snapshot, 'deps_meta': fg_deps_meta} + + if not metastore.write(meta_path, json.dumps(meta)): + manager.log("Error writing fine-grained deps meta JSON file {}".format(meta_path)) + error = True if error: manager.errors.set_file(_cache_dir_prefix(manager), None) @@ -767,7 +775,7 @@ def invert_deps( graph: Graph) -> Tuple[Dict[str, Dict[str, Set[str]]], Dict[str, Set[str]]]: from mypy.server.target import module_prefix, trigger_to_target - rdeps = {} # type: Dict[str, Dict[str, Set[str]]] + rdeps = {id: {} for id in graph} # type: Dict[str, Dict[str, Set[str]]] extra_deps = {} # type: Dict[str, Set[str]] for trigger, targets in deps.items(): assert trigger[0] == '<' @@ -835,14 +843,16 @@ def read_plugins_snapshot(manager: BuildManager) -> Optional[Dict[str, str]]: def read_deps_cache(manager: BuildManager, - graph: Graph) -> Optional[Dict[str, Set[str]]]: + graph: Graph) -> Optional[Tuple[Dict[str, Set[str]], + Dict[str, FgDepMeta]]]: """Read and validate dependencies cache. """ - deps_meta, deps_extra_cache = get_deps_cache_name() - meta_snapshot = _load_json_file(deps_meta, manager, - log_sucess='Deps meta ', - log_error='Could not load fine-grained dependency metadata: ') - if meta_snapshot is None: + deps_meta_path, deps_extra_cache = get_deps_cache_name() + deps_meta = _load_json_file(deps_meta_path, manager, + log_sucess='Deps meta ', + log_error='Could not load fine-grained dependency metadata: ') + if deps_meta is None: return None + meta_snapshot = deps_meta['snapshot'] # Take a snapshot of the source hashes from all of the metas we found. # (Including the ones we rejected because they were out of date.) # We use this to verify that they match up with the proto_deps. @@ -854,6 +864,18 @@ def read_deps_cache(manager: BuildManager, # TODO: invalidate also if options changed (like --strict-optional)? manager.log('Fine-grained dependencies cache inconsistent, ignoring') return None + + module_deps_metas = deps_meta['deps_meta'] + # XXX: check module metas + for id, meta in module_deps_metas.items(): + try: + matched = manager.getmtime(meta['path']) == meta['mtime'] + except FileNotFoundError: + matched = False + if not matched: + manager.log('Invalid or missing fine-grained deps cache: {}'.format(meta['path'])) + return None + deps = _load_json_file(deps_extra_cache, manager, log_sucess='Extra deps ', log_error='Could not load fine-grained dependencies cache: ') @@ -863,7 +885,7 @@ def read_deps_cache(manager: BuildManager, manager.log('Could not load fine-grained dependencies cache: cache is not a dict: {}' .format(type(deps))) return None - return {k: set(v) for (k, v) in deps.items()} + return {k: set(v) for (k, v) in deps.items()}, module_deps_metas def _load_json_file(file: str, manager: BuildManager, @@ -923,6 +945,13 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str return (prefix + '.meta.json', prefix + '.data.json', deps_json) +# def get_deps_cache_name(id: str, path: str, manager: BuildManager) -> str: +# assert manager.options.cache_fine_grained +# _, _, deps_json = get_deps_cache_name(id, path, manager) +# assert deps_json is not None +# return deps_json + + def get_deps_cache_name() -> Tuple[str, str]: """Return file names for global fine grained dependencies cache.""" name = '@deps' @@ -942,7 +971,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache valid; otherwise None. """ # TODO: May need to take more build options into account - meta_json, data_json, deps_json = get_cache_names(id, path, manager) + meta_json, data_json, _ = get_cache_names(id, path, manager) manager.trace('Looking for {} at {}'.format(id, meta_json)) t0 = time.time() meta = _load_json_file(meta_json, manager, @@ -954,7 +983,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache manager.log('Could not load cache for {}: meta cache is not a dict: {}' .format(id, repr(meta))) return None - m = cache_meta_from_dict(meta, data_json, deps_json) + m = cache_meta_from_dict(meta, data_json) manager.add_stats(load_meta_time=time.time() - t0) # Don't check for path match, that is dealt with in validate_meta(). @@ -1163,9 +1192,9 @@ def write_cache(id: str, path: str, tree: MypyFile, # Obtain file paths. path = manager.normpath(path) - meta_json, data_json, deps_json = get_cache_names(id, path, manager) - manager.log('Writing {} {} {} {} {}'.format( - id, path, meta_json, data_json, deps_json)) + meta_json, data_json, _ = get_cache_names(id, path, manager) + manager.log('Writing {} {} {} {}'.format( + id, path, meta_json, data_json)) # Update tree.path so that in bazel mode it's made relative (since # sometimes paths leak out). @@ -1245,7 +1274,7 @@ def write_cache(id: str, path: str, tree: MypyFile, # The next run will simply find the cache entry out of date. manager.log("Error writing meta JSON file {}".format(meta_json)) - return interface_hash, cache_meta_from_dict(meta, data_json, deps_json) + return interface_hash, cache_meta_from_dict(meta, data_json) def delete_cache(id: str, path: str, manager: BuildManager) -> None: @@ -1635,13 +1664,12 @@ def load_fine_grained_deps(self) -> Dict[str, Set[str]]: t0 = time.time() assert self.meta is not None, "Internal error: this method must be called only" \ " for cached modules" - assert self.meta.deps_json - # XXX: Check this reasonably?? - try: - deps = json.loads(self.manager.metastore.read(self.meta.deps_json)) - except FileNotFoundError: + if self.id in self.manager.fg_deps_meta: + # TODO: Assert deps file wasn't changed. + deps = json.loads( + self.manager.metastore.read(self.manager.fg_deps_meta[self.id]['path'])) + else: deps = {} - # TODO: Assert deps file wasn't changed. val = {k: set(v) for k, v in deps.items()} self.manager.add_stats(load_fg_deps_time=time.time() - t0) return val @@ -2291,11 +2319,11 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: # the deps cache against the loaded individual cache files. if manager.options.cache_fine_grained or manager.use_fine_grained_cache(): t2 = time.time() - extra_deps = read_deps_cache(manager, graph) + deps_cache_info = read_deps_cache(manager, graph) manager.add_stats(load_fg_deps_time=time.time() - t2) - if extra_deps is not None: + if deps_cache_info is not None: # XXX: Is this where we want to put this? - TypeState.proto_deps = extra_deps + TypeState.proto_deps, manager.fg_deps_meta = deps_cache_info elif manager.stats.get('fresh_metas', 0) > 0: # Clear the stats so we don't infinite loop because of positive fresh_metas manager.stats.clear() diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 41c51f0de979..92e83cc68bfc 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -3637,9 +3637,9 @@ import b [file a.py] [file b.py] -- This is a heinous hack, but we simulate having a invalid cache by clobbering --- the proto deps file with something with hash mismatches. +-- the proto deps file with something with mtime mismatches. [file ../.mypy_cache/3.6/@deps.meta.json.2] -{"__main__": "00000000000000000000000000000000", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "00000000000000000000000000000000"} +{"snapshot": {"__main__": "a7c958b001a45bd6a2a320f4e53c4c16", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "c532c89da517a4b779bcf7a964478d67"}, "deps_meta": {"@extra": {"path": "@deps.data.json", "mtime": 0}, "__main__": {"path": "__main__.deps.json", "mtime": 0}, "a": {"path": "a.deps.json", "mtime": 0}, "b": {"path": "b.deps.json", "mtime": 0}, "builtins": {"path": "builtins.deps.json", "mtime": 0}}} [file b.py.2] # uh -- Every file should get reloaded, since the cache was invalidated diff --git a/test-data/unit/fine-grained-cache-incremental.test b/test-data/unit/fine-grained-cache-incremental.test index a47986728e08..4513fcd894ab 100644 --- a/test-data/unit/fine-grained-cache-incremental.test +++ b/test-data/unit/fine-grained-cache-incremental.test @@ -201,9 +201,10 @@ a.py:8: note: x: expected "int", got "str" [file a.py] [file b.py] -- This is a heinous hack, but we simulate having a invalid cache by clobbering --- the proto deps file with something with hash mismatches. +-- the proto deps file with something with mtime mismatches. [file ../.mypy_cache/3.6/@deps.meta.json.2] -{"__main__": "00000000000000000000000000000000", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "00000000000000000000000000000000"} +{"snapshot": {"__main__": "a7c958b001a45bd6a2a320f4e53c4c16", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "c532c89da517a4b779bcf7a964478d67"}, "deps_meta": {"@extra": {"path": "@deps.data.json", "mtime": 0}, "__main__": {"path": "__main__.deps.json", "mtime": 0}, "a": {"path": "a.deps.json", "mtime": 0}, "b": {"path": "b.deps.json", "mtime": 0}, "builtins": {"path": "builtins.deps.json", "mtime": 0}}} + [file b.py.2] # uh -- A full reload shows up as nothing getting rechecked by fine-grained mode. From 846f835036df93c496937853d4456f4733b93dab Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Mon, 28 Jan 2019 15:05:17 -0800 Subject: [PATCH 10/20] Some comment cleanup --- mypy/build.py | 25 ++++++++++++------------- mypy/server/update.py | 1 - 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index bf13c1911e85..3de3c20fee00 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -72,6 +72,7 @@ Graph = Dict[str, 'State'] + # TODO: Get rid of BuildResult. We might as well return a BuildManager. class BuildResult: """The result of a successful build. @@ -718,11 +719,18 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], manager: BuildManager, graph: Graph) -> None: """Write cache files for fine-grained dependencies. - Serialize fine-grained dependencies map for fine grained mode. Also take the snapshot - of current sources to later check consistency between the extra cache and individual - cache files. + Serialize fine-grained dependencies map for fine grained mode. + + Dependencies on some module 'm' is stored in the dependency cache + file m.deps.json. This entails some spooky action at a distance: + if module 'n' depends on 'm', that produces entries in m.deps.json. - XXX: DESCRIBE + This means that the validity of the fine-grained dependency caches + are a global property, so we store validity checking information + fine-grained dependencies in a global cache file. + To do this, we take a snapshot of current sources to later check + consistency between the extra cache and individual cache files as + well as storing the mtime for all of the dependency files. """ metastore = manager.metastore meta_path, cache_path = get_deps_cache_name() @@ -778,7 +786,6 @@ def invert_deps( rdeps = {id: {} for id in graph} # type: Dict[str, Dict[str, Set[str]]] extra_deps = {} # type: Dict[str, Set[str]] for trigger, targets in deps.items(): - assert trigger[0] == '<' module = module_prefix(graph, trigger_to_target(trigger)) if module and graph[module].tree: mod_rdeps = rdeps.setdefault(module, {}) @@ -866,7 +873,6 @@ def read_deps_cache(manager: BuildManager, return None module_deps_metas = deps_meta['deps_meta'] - # XXX: check module metas for id, meta in module_deps_metas.items(): try: matched = manager.getmtime(meta['path']) == meta['mtime'] @@ -945,13 +951,6 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str return (prefix + '.meta.json', prefix + '.data.json', deps_json) -# def get_deps_cache_name(id: str, path: str, manager: BuildManager) -> str: -# assert manager.options.cache_fine_grained -# _, _, deps_json = get_deps_cache_name(id, path, manager) -# assert deps_json is not None -# return deps_json - - def get_deps_cache_name() -> Tuple[str, str]: """Return file names for global fine grained dependencies cache.""" name = '@deps' diff --git a/mypy/server/update.py b/mypy/server/update.py index 736fd0a964fd..00034124c23d 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -834,7 +834,6 @@ def find_targets_recursive( worklist = set() for target in current: if target.startswith('<'): - # XXX: slow?? module_id = module_prefix(graph, trigger_to_target(target)) if module_id: ensure_deps_loaded(module_id, deps, graph) From 00e83c83517887f798bd8516e10789838cadcabf Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Mon, 28 Jan 2019 16:30:15 -0800 Subject: [PATCH 11/20] Get rid of the dodgy use of TypeState --- mypy/build.py | 55 +++++++++++++++++++------------------------ mypy/server/update.py | 9 +++++-- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 3de3c20fee00..0b9637f8bb39 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -699,6 +699,17 @@ def parse_file(self, id: str, path: str, source: str, ignore_errors: bool) -> My self.errors.set_file_ignored_lines(path, tree.ignored_lines, ignore_errors) return tree + def load_fine_grained_deps(self, id: str) -> Dict[str, Set[str]]: + t0 = time.time() + if id in self.fg_deps_meta: + # TODO: Assert deps file wasn't changed. + deps = json.loads(self.metastore.read(self.fg_deps_meta[id]['path'])) + else: + deps = {} + val = {k: set(v) for k, v in deps.items()} + self.add_stats(load_fg_deps_time=time.time() - t0) + return val + def report_file(self, file: MypyFile, type_map: Dict[Expression, Type], @@ -731,6 +742,10 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], To do this, we take a snapshot of current sources to later check consistency between the extra cache and individual cache files as well as storing the mtime for all of the dependency files. + + There is additionally some "extra" dependency information that + isn't stored in any module-specific files. It is treated as + belonging to the module '@extra'. """ metastore = manager.metastore meta_path, cache_path = get_deps_cache_name() @@ -806,7 +821,8 @@ def process_deps(proto_deps: Dict[str, Set[str]], all dependencies on that module, and extra_deps contains dependencies that weren't associated with any module. """ - deps = {} # type: Dict[str, Set[str]] + + deps = manager.load_fine_grained_deps('@extra') things = [st.compute_fine_grained_deps() for st in graph.values() if st.tree] + [proto_deps] for st_deps in things: for trigger, targets in st_deps.items(): @@ -850,10 +866,9 @@ def read_plugins_snapshot(manager: BuildManager) -> Optional[Dict[str, str]]: def read_deps_cache(manager: BuildManager, - graph: Graph) -> Optional[Tuple[Dict[str, Set[str]], - Dict[str, FgDepMeta]]]: + graph: Graph) -> Optional[Dict[str, FgDepMeta]]: """Read and validate dependencies cache. """ - deps_meta_path, deps_extra_cache = get_deps_cache_name() + deps_meta_path, _ = get_deps_cache_name() deps_meta = _load_json_file(deps_meta_path, manager, log_sucess='Deps meta ', log_error='Could not load fine-grained dependency metadata: ') @@ -882,16 +897,7 @@ def read_deps_cache(manager: BuildManager, manager.log('Invalid or missing fine-grained deps cache: {}'.format(meta['path'])) return None - deps = _load_json_file(deps_extra_cache, manager, - log_sucess='Extra deps ', - log_error='Could not load fine-grained dependencies cache: ') - if deps is None: - return None - if not isinstance(deps, dict): - manager.log('Could not load fine-grained dependencies cache: cache is not a dict: {}' - .format(type(deps))) - return None - return {k: set(v) for (k, v) in deps.items()}, module_deps_metas + return module_deps_metas def _load_json_file(file: str, manager: BuildManager, @@ -1659,19 +1665,7 @@ def wrap_context(self) -> Iterator[None]: self.check_blockers() def load_fine_grained_deps(self) -> Dict[str, Set[str]]: - if self.meta is None: return {} - t0 = time.time() - assert self.meta is not None, "Internal error: this method must be called only" \ - " for cached modules" - if self.id in self.manager.fg_deps_meta: - # TODO: Assert deps file wasn't changed. - deps = json.loads( - self.manager.metastore.read(self.manager.fg_deps_meta[self.id]['path'])) - else: - deps = {} - val = {k: set(v) for k, v in deps.items()} - self.manager.add_stats(load_fg_deps_time=time.time() - t0) - return val + return self.manager.load_fine_grained_deps(self.id) def load_tree(self, temporary: bool = False) -> None: assert self.meta is not None, "Internal error: this method must be called only" \ @@ -2318,11 +2312,10 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: # the deps cache against the loaded individual cache files. if manager.options.cache_fine_grained or manager.use_fine_grained_cache(): t2 = time.time() - deps_cache_info = read_deps_cache(manager, graph) + fg_deps_meta = read_deps_cache(manager, graph) manager.add_stats(load_fg_deps_time=time.time() - t2) - if deps_cache_info is not None: - # XXX: Is this where we want to put this? - TypeState.proto_deps, manager.fg_deps_meta = deps_cache_info + if fg_deps_meta is not None: + manager.fg_deps_meta = fg_deps_meta elif manager.stats.get('fresh_metas', 0) > 0: # Clear the stats so we don't infinite loop because of positive fresh_metas manager.stats.clear() diff --git a/mypy/server/update.py b/mypy/server/update.py index 00034124c23d..ae28c79b8a1f 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -407,7 +407,12 @@ def find_unloaded_deps(manager: BuildManager, graph: Dict[str, State], def ensure_deps_loaded(module: str, deps: Dict[str, Set[str]], graph: Dict[str, State]) -> None: - # XXX: COMMENT + """Ensure that the dependencies on a module are loaded. + + This also requires loading dependencies from any parent modules, + since dependencies will get stored with parent modules when a module + doesn't exist. + """ if module in graph and graph[module].fine_grained_deps_loaded: return parts = module.split('.') @@ -432,7 +437,7 @@ def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State], def get_all_dependencies(manager: BuildManager, graph: Dict[str, State]) -> Dict[str, Set[str]]: """Return the fine-grained dependency map for an entire build.""" # Deps for each module were computed during build() or loaded from the cache. - deps = {} # type: Dict[str, Set[str]] + deps = manager.load_fine_grained_deps('@extra') # type: Dict[str, Set[str]] for id in graph: if graph[id].tree is not None: merge_dependencies(graph[id].compute_fine_grained_deps(), deps) From 1f961adb63e44faa366582a0e02c07b060ad367b Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Tue, 29 Jan 2019 11:04:52 -0800 Subject: [PATCH 12/20] lint --- mypy/build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mypy/build.py b/mypy/build.py index 0b9637f8bb39..d60156fbb0b8 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -261,6 +261,7 @@ def default_data_dir() -> str: FgDepMeta = TypedDict('FgDepMeta', {'path': str, 'mtime': int}) + def cache_meta_from_dict(meta: Dict[str, Any], data_json: str) -> CacheMeta: """Build a CacheMeta object from a json metadata dictionary From c994bff728ca8b0cf13432abdc3c049c816ab357 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Tue, 29 Jan 2019 11:47:35 -0800 Subject: [PATCH 13/20] Some cleanups --- mypy/build.py | 30 ++++++++++++++---------------- mypy/server/update.py | 2 ++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index d60156fbb0b8..5909eec16804 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -738,11 +738,12 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], if module 'n' depends on 'm', that produces entries in m.deps.json. This means that the validity of the fine-grained dependency caches - are a global property, so we store validity checking information - fine-grained dependencies in a global cache file. - To do this, we take a snapshot of current sources to later check - consistency between the extra cache and individual cache files as - well as storing the mtime for all of the dependency files. + are a global property, so we store validity checking information for + fine-grained dependencies in a global cache file: + * We take a snapshot of current sources to later check consistency + between the fine-grained dependency cache and module cache metadata + * We store the mtime of all of the dependency files to verify they + haven't changed There is additionally some "extra" dependency information that isn't stored in any module-specific files. It is treated as @@ -868,7 +869,11 @@ def read_plugins_snapshot(manager: BuildManager) -> Optional[Dict[str, str]]: def read_deps_cache(manager: BuildManager, graph: Graph) -> Optional[Dict[str, FgDepMeta]]: - """Read and validate dependencies cache. """ + """Read and validate the fine-grained dependencies cache. + + See the write_deps_cache documentation for more information on + the details of the cache. + """ deps_meta_path, _ = get_deps_cache_name() deps_meta = _load_json_file(deps_meta_path, manager, log_sucess='Deps meta ', @@ -2327,11 +2332,9 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: return dispatch(sources, manager) # If we are loading a fine-grained incremental mode cache, we - # don't want to do a real incremental reprocess of the graph---we - # just want to load in all of the cache information. - if manager.use_fine_grained_cache(): - process_fine_grained_cache_graph(graph, manager) - else: + # don't want to do a real incremental reprocess of the + # graph---we'll handle it all later. + if not manager.use_fine_grained_cache(): process_graph(graph, manager) # Update plugins snapshot. write_plugins_snapshot(manager) @@ -2645,11 +2648,6 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: manager.log("No fresh SCCs left in queue") -def process_fine_grained_cache_graph(graph: Graph, manager: BuildManager) -> None: - """Finish loading everything for use in the fine-grained incremental cache""" - pass - - def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]: """Come up with the ideal processing order within an SCC. diff --git a/mypy/server/update.py b/mypy/server/update.py index ae28c79b8a1f..907b566ab36f 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -409,6 +409,8 @@ def ensure_deps_loaded(module: str, deps: Dict[str, Set[str]], graph: Dict[str, State]) -> None: """Ensure that the dependencies on a module are loaded. + Dependencies are loaded into the 'deps' dictionary. + This also requires loading dependencies from any parent modules, since dependencies will get stored with parent modules when a module doesn't exist. From d354b7b7d54ee483a573be418d8ade5de224722f Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Tue, 29 Jan 2019 11:59:03 -0800 Subject: [PATCH 14/20] Reload modules to sort dependencies properly --- mypy/build.py | 27 +++++++++++++++------------ mypy/test/testfinegrained.py | 1 - 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 5909eec16804..6d82af4ab68b 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -800,7 +800,7 @@ def invert_deps( graph: Graph) -> Tuple[Dict[str, Dict[str, Set[str]]], Dict[str, Set[str]]]: from mypy.server.target import module_prefix, trigger_to_target - rdeps = {id: {} for id in graph} # type: Dict[str, Dict[str, Set[str]]] + rdeps = {id: {} for id, st in graph.items() if st.tree} # type: Dict[str, Dict[str, Set[str]]] extra_deps = {} # type: Dict[str, Set[str]] for trigger, targets in deps.items(): module = module_prefix(graph, trigger_to_target(trigger)) @@ -824,20 +824,23 @@ def process_deps(proto_deps: Dict[str, Set[str]], weren't associated with any module. """ + from mypy.server.update import merge_dependencies + deps = manager.load_fine_grained_deps('@extra') + # Compute the full set of dependencies from everything we've processed. things = [st.compute_fine_grained_deps() for st in graph.values() if st.tree] + [proto_deps] for st_deps in things: - for trigger, targets in st_deps.items(): - deps.setdefault(trigger, set()).update(targets) - - # If we are operating in non-incremental mode, properly split up - # dependencies between all the files. If this is an incremental update, - # though, we write all the dependencies into the "extra_deps" file so we - # don't need to reload dependency files to update them. - if not manager.options.incremental: - rdeps, extra_deps = invert_deps(deps, graph) - else: - rdeps, extra_deps = {}, deps + merge_dependencies(st_deps, deps) + + # Split the dependencies out into based on the module that is depended on. + rdeps, extra_deps = invert_deps(deps, graph) + + # We can't just clobber existing dependency information, so we + # load the deps for every module we've generated new dependencies + # to and merge the new deps into them. + for module, mdeps in rdeps.items(): + old_deps = manager.load_fine_grained_deps(module) + merge_dependencies(old_deps, mdeps) return rdeps, extra_deps diff --git a/mypy/test/testfinegrained.py b/mypy/test/testfinegrained.py index bec40ad0419e..bda63493b09c 100644 --- a/mypy/test/testfinegrained.py +++ b/mypy/test/testfinegrained.py @@ -86,7 +86,6 @@ def run_case(self, testcase: DataDrivenTestCase) -> None: step = 1 sources = self.parse_sources(main_src, step, options) if step <= num_regular_incremental_steps: - build_options.incremental = step > 1 messages = self.build(build_options, sources) else: messages = self.run_check(server, sources) From 389e779451f3b5cd55e90783cae978cf90341d94 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Tue, 29 Jan 2019 13:20:13 -0800 Subject: [PATCH 15/20] Simplify extra handling --- mypy/build.py | 70 ++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 6d82af4ab68b..de1288c223aa 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -726,8 +726,11 @@ def deps_to_json(x: Dict[str, Set[str]]) -> str: return json.dumps({k: list(v) for k, v in x.items()}) +DEPS_META_FILE = '@deps.meta.json' # type: Final +DEPS_EXTRA_FILE = '@extra.meta.json' # type: Final + + def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], - extra_deps: Dict[str, Set[str]], manager: BuildManager, graph: Graph) -> None: """Write cache files for fine-grained dependencies. @@ -736,6 +739,9 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], Dependencies on some module 'm' is stored in the dependency cache file m.deps.json. This entails some spooky action at a distance: if module 'n' depends on 'm', that produces entries in m.deps.json. + When there is a dependency on a module that does not exist in the + build, it is stored with its first existing parent module. If no + such module exists, it is stored with the fake module '@extra'. This means that the validity of the fine-grained dependency caches are a global property, so we store validity checking information for @@ -744,25 +750,18 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], between the fine-grained dependency cache and module cache metadata * We store the mtime of all of the dependency files to verify they haven't changed - - There is additionally some "extra" dependency information that - isn't stored in any module-specific files. It is treated as - belonging to the module '@extra'. """ metastore = manager.metastore - meta_path, cache_path = get_deps_cache_name() error = False fg_deps_meta = manager.fg_deps_meta.copy() - if not metastore.write(cache_path, deps_to_json(extra_deps)): - manager.log("Error writing fine-grained extra deps JSON file {}".format(cache_path)) - error = True - else: - fg_deps_meta['@extra'] = {'path': cache_path, 'mtime': manager.getmtime(cache_path)} for id in rdeps: - _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) + if id != '@extra': + _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) + else: + deps_json = DEPS_EXTRA_FILE assert deps_json manager.log("Writing deps cache", deps_json) if not manager.metastore.write(deps_json, deps_to_json(rdeps[id])): @@ -785,8 +784,8 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], meta = {'snapshot': meta_snapshot, 'deps_meta': fg_deps_meta} - if not metastore.write(meta_path, json.dumps(meta)): - manager.log("Error writing fine-grained deps meta JSON file {}".format(meta_path)) + if not metastore.write(DEPS_META_FILE, json.dumps(meta)): + manager.log("Error writing fine-grained deps meta JSON file {}".format(DEPS_META_FILE)) error = True if error: @@ -797,43 +796,41 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], def invert_deps( deps: Dict[str, Set[str]], - graph: Graph) -> Tuple[Dict[str, Dict[str, Set[str]]], Dict[str, Set[str]]]: + graph: Graph) -> Dict[str, Dict[str, Set[str]]]: from mypy.server.target import module_prefix, trigger_to_target rdeps = {id: {} for id, st in graph.items() if st.tree} # type: Dict[str, Dict[str, Set[str]]] - extra_deps = {} # type: Dict[str, Set[str]] for trigger, targets in deps.items(): module = module_prefix(graph, trigger_to_target(trigger)) - if module and graph[module].tree: - mod_rdeps = rdeps.setdefault(module, {}) - mod_rdeps.setdefault(trigger, set()).update(targets) - else: - extra_deps.setdefault(trigger, set()).update(targets) + if not module or not graph[module].tree: + module = '@extra' + + mod_rdeps = rdeps.setdefault(module, {}) + mod_rdeps.setdefault(trigger, set()).update(targets) - return (rdeps, extra_deps) + return rdeps def process_deps(proto_deps: Dict[str, Set[str]], manager: BuildManager, - graph: Graph) -> Tuple[Dict[str, Dict[str, Set[str]]], - Dict[str, Set[str]]]: + graph: Graph) -> Dict[str, Dict[str, Set[str]]]: """Process fine-grained dependenecies into a form suitable for serializing. - Returns an (rdeps, extra_deps) pair, where rdeps maps from module ids to - all dependencies on that module, and extra_deps contains dependencies that - weren't associated with any module. + Returns a dictionary from module ids to all dependencies on that + module. Dependencies not associated with a module in the build are + associated with the fake module '@extra'. """ from mypy.server.update import merge_dependencies - deps = manager.load_fine_grained_deps('@extra') # Compute the full set of dependencies from everything we've processed. + deps = {} # type: Dict[str, Set[str]] things = [st.compute_fine_grained_deps() for st in graph.values() if st.tree] + [proto_deps] for st_deps in things: merge_dependencies(st_deps, deps) # Split the dependencies out into based on the module that is depended on. - rdeps, extra_deps = invert_deps(deps, graph) + rdeps = invert_deps(deps, graph) # We can't just clobber existing dependency information, so we # load the deps for every module we've generated new dependencies @@ -842,7 +839,7 @@ def process_deps(proto_deps: Dict[str, Set[str]], old_deps = manager.load_fine_grained_deps(module) merge_dependencies(old_deps, mdeps) - return rdeps, extra_deps + return rdeps PLUGIN_SNAPSHOT_FILE = '@plugins_snapshot.json' # type: Final @@ -877,8 +874,7 @@ def read_deps_cache(manager: BuildManager, See the write_deps_cache documentation for more information on the details of the cache. """ - deps_meta_path, _ = get_deps_cache_name() - deps_meta = _load_json_file(deps_meta_path, manager, + deps_meta = _load_json_file(DEPS_META_FILE, manager, log_sucess='Deps meta ', log_error='Could not load fine-grained dependency metadata: ') if deps_meta is None: @@ -966,12 +962,6 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str return (prefix + '.meta.json', prefix + '.data.json', deps_json) -def get_deps_cache_name() -> Tuple[str, str]: - """Return file names for global fine grained dependencies cache.""" - name = '@deps' - return name + '.meta.json', name + '.data.json' - - def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]: """Find cache data for a module. @@ -2350,8 +2340,8 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: TypeState.update_protocol_deps() if not manager.options.fine_grained_incremental: proto_deps = TypeState.proto_deps or {} - rdeps, extra_deps = process_deps(proto_deps, manager, graph) - write_deps_cache(rdeps, extra_deps, manager, graph) + rdeps = process_deps(proto_deps, manager, graph) + write_deps_cache(rdeps, manager, graph) if manager.options.dump_deps: # This speeds up startup a little when not using the daemon mode. From 1b688c05cabc233ed66842efbe0760f96f2368d6 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Tue, 29 Jan 2019 13:20:49 -0800 Subject: [PATCH 16/20] Rename @extra to @root --- mypy/build.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index de1288c223aa..9c682f933404 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -727,7 +727,7 @@ def deps_to_json(x: Dict[str, Set[str]]) -> str: DEPS_META_FILE = '@deps.meta.json' # type: Final -DEPS_EXTRA_FILE = '@extra.meta.json' # type: Final +DEPS_ROOT_FILE = '@root.meta.json' # type: Final def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], @@ -741,7 +741,7 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], if module 'n' depends on 'm', that produces entries in m.deps.json. When there is a dependency on a module that does not exist in the build, it is stored with its first existing parent module. If no - such module exists, it is stored with the fake module '@extra'. + such module exists, it is stored with the fake module '@root'. This means that the validity of the fine-grained dependency caches are a global property, so we store validity checking information for @@ -758,10 +758,10 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], fg_deps_meta = manager.fg_deps_meta.copy() for id in rdeps: - if id != '@extra': + if id != '@root': _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) else: - deps_json = DEPS_EXTRA_FILE + deps_json = DEPS_ROOT_FILE assert deps_json manager.log("Writing deps cache", deps_json) if not manager.metastore.write(deps_json, deps_to_json(rdeps[id])): @@ -803,7 +803,7 @@ def invert_deps( for trigger, targets in deps.items(): module = module_prefix(graph, trigger_to_target(trigger)) if not module or not graph[module].tree: - module = '@extra' + module = '@root' mod_rdeps = rdeps.setdefault(module, {}) mod_rdeps.setdefault(trigger, set()).update(targets) @@ -818,7 +818,7 @@ def process_deps(proto_deps: Dict[str, Set[str]], Returns a dictionary from module ids to all dependencies on that module. Dependencies not associated with a module in the build are - associated with the fake module '@extra'. + associated with the fake module '@root'. """ from mypy.server.update import merge_dependencies From 4e1c5fb1560326c1775df6fba0280edfc9f755ee Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Tue, 29 Jan 2019 13:35:46 -0800 Subject: [PATCH 17/20] More docs and cleanup --- mypy/build.py | 36 +++++++++++++------ mypy/server/update.py | 2 +- test-data/unit/check-incremental.test | 2 +- .../unit/fine-grained-cache-incremental.test | 2 +- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 9c682f933404..270a1a066578 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -727,7 +727,7 @@ def deps_to_json(x: Dict[str, Set[str]]) -> str: DEPS_META_FILE = '@deps.meta.json' # type: Final -DEPS_ROOT_FILE = '@root.meta.json' # type: Final +DEPS_ROOT_FILE = '@root.deps.json' # type: Final def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], @@ -794,11 +794,20 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], blocker=True) -def invert_deps( - deps: Dict[str, Set[str]], - graph: Graph) -> Dict[str, Dict[str, Set[str]]]: +def invert_deps(deps: Dict[str, Set[str]], + graph: Graph) -> Dict[str, Dict[str, Set[str]]]: + """Splits fine-grained dependencies based on the module of the trigger + + Returns a dictionary from module ids to all dependencies on that + module. Dependencies not associated with a module in the build are + associated with the fake module '@root'. + """ + # Lazy import to speed up startup from mypy.server.target import module_prefix, trigger_to_target + # Prepopulate the map for all the modules that have been processed, + # so that we always generate files for processed modules (even if + # there aren't any dependencies to them.) rdeps = {id: {} for id, st in graph.items() if st.tree} # type: Dict[str, Dict[str, Set[str]]] for trigger, targets in deps.items(): module = module_prefix(graph, trigger_to_target(trigger)) @@ -811,17 +820,22 @@ def invert_deps( return rdeps -def process_deps(proto_deps: Dict[str, Set[str]], - manager: BuildManager, - graph: Graph) -> Dict[str, Dict[str, Set[str]]]: - """Process fine-grained dependenecies into a form suitable for serializing. +def generate_deps_for_cache(proto_deps: Dict[str, Set[str]], + manager: BuildManager, + graph: Graph) -> Dict[str, Dict[str, Set[str]]]: + """Generate fine-grained dependenecies into a form suitable for serializing. + + This does a few things: + 1. Computes all fine grained deps from modules that were processed + 2. Splits fine-grained deps based on the module of the trigger + 3. For each module we generated fine-grained deps for, load any previous + deps and merge them in. Returns a dictionary from module ids to all dependencies on that module. Dependencies not associated with a module in the build are associated with the fake module '@root'. """ - - from mypy.server.update import merge_dependencies + from mypy.server.update import merge_dependencies # Lazy import to speed up startup # Compute the full set of dependencies from everything we've processed. deps = {} # type: Dict[str, Set[str]] @@ -2340,7 +2354,7 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: TypeState.update_protocol_deps() if not manager.options.fine_grained_incremental: proto_deps = TypeState.proto_deps or {} - rdeps = process_deps(proto_deps, manager, graph) + rdeps = generate_deps_for_cache(proto_deps, manager, graph) write_deps_cache(rdeps, manager, graph) if manager.options.dump_deps: diff --git a/mypy/server/update.py b/mypy/server/update.py index 907b566ab36f..7c1d5fc9db81 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -439,7 +439,7 @@ def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State], def get_all_dependencies(manager: BuildManager, graph: Dict[str, State]) -> Dict[str, Set[str]]: """Return the fine-grained dependency map for an entire build.""" # Deps for each module were computed during build() or loaded from the cache. - deps = manager.load_fine_grained_deps('@extra') # type: Dict[str, Set[str]] + deps = manager.load_fine_grained_deps('@root') # type: Dict[str, Set[str]] for id in graph: if graph[id].tree is not None: merge_dependencies(graph[id].compute_fine_grained_deps(), deps) diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 92e83cc68bfc..a4010956d5d5 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -3639,7 +3639,7 @@ import b -- This is a heinous hack, but we simulate having a invalid cache by clobbering -- the proto deps file with something with mtime mismatches. [file ../.mypy_cache/3.6/@deps.meta.json.2] -{"snapshot": {"__main__": "a7c958b001a45bd6a2a320f4e53c4c16", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "c532c89da517a4b779bcf7a964478d67"}, "deps_meta": {"@extra": {"path": "@deps.data.json", "mtime": 0}, "__main__": {"path": "__main__.deps.json", "mtime": 0}, "a": {"path": "a.deps.json", "mtime": 0}, "b": {"path": "b.deps.json", "mtime": 0}, "builtins": {"path": "builtins.deps.json", "mtime": 0}}} +{"snapshot": {"__main__": "a7c958b001a45bd6a2a320f4e53c4c16", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "c532c89da517a4b779bcf7a964478d67"}, "deps_meta": {"@root": {"path": "@root.deps.json", "mtime": 0}, "__main__": {"path": "__main__.deps.json", "mtime": 0}, "a": {"path": "a.deps.json", "mtime": 0}, "b": {"path": "b.deps.json", "mtime": 0}, "builtins": {"path": "builtins.deps.json", "mtime": 0}}} [file b.py.2] # uh -- Every file should get reloaded, since the cache was invalidated diff --git a/test-data/unit/fine-grained-cache-incremental.test b/test-data/unit/fine-grained-cache-incremental.test index 4513fcd894ab..79e8abdb9776 100644 --- a/test-data/unit/fine-grained-cache-incremental.test +++ b/test-data/unit/fine-grained-cache-incremental.test @@ -203,7 +203,7 @@ a.py:8: note: x: expected "int", got "str" -- This is a heinous hack, but we simulate having a invalid cache by clobbering -- the proto deps file with something with mtime mismatches. [file ../.mypy_cache/3.6/@deps.meta.json.2] -{"snapshot": {"__main__": "a7c958b001a45bd6a2a320f4e53c4c16", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "c532c89da517a4b779bcf7a964478d67"}, "deps_meta": {"@extra": {"path": "@deps.data.json", "mtime": 0}, "__main__": {"path": "__main__.deps.json", "mtime": 0}, "a": {"path": "a.deps.json", "mtime": 0}, "b": {"path": "b.deps.json", "mtime": 0}, "builtins": {"path": "builtins.deps.json", "mtime": 0}}} +{"snapshot": {"__main__": "a7c958b001a45bd6a2a320f4e53c4c16", "a": "d41d8cd98f00b204e9800998ecf8427e", "b": "d41d8cd98f00b204e9800998ecf8427e", "builtins": "c532c89da517a4b779bcf7a964478d67"}, "deps_meta": {"@root": {"path": "@root.deps.json", "mtime": 0}, "__main__": {"path": "__main__.deps.json", "mtime": 0}, "a": {"path": "a.deps.json", "mtime": 0}, "b": {"path": "b.deps.json", "mtime": 0}, "builtins": {"path": "builtins.deps.json", "mtime": 0}}} [file b.py.2] # uh From 489e80068cb9f0be158e44395c8e9abc1016dfd7 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Wed, 30 Jan 2019 12:08:33 -0800 Subject: [PATCH 18/20] docs cleanup --- mypy/build.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index de31842f17ee..52e1bc8595d6 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -259,6 +259,7 @@ def default_data_dir() -> str: # suppressed contains those reachable imports that were prevented by # silent mode or simply not found. +# Metadata for the fine-grained dependencies file associated with a module. FgDepMeta = TypedDict('FgDepMeta', {'path': str, 'mtime': int}) @@ -455,6 +456,7 @@ class BuildManager(BuildManagerBase): options: Build options missing_modules: Set of modules that could not be imported encountered so far stale_modules: Set of modules that needed to be rechecked (only used by tests) + fg_deps_meta: Metadata for fine-grained dependencies caches associated with modules version_id: The current mypy version (based on commit id when possible) plugin: Active mypy plugin(s) plugins_snapshot: @@ -725,8 +727,9 @@ def stats_summary(self) -> Mapping[str, object]: def deps_to_json(x: Dict[str, Set[str]]) -> str: return json.dumps({k: list(v) for k, v in x.items()}) - +# File for storing metadata about all the fine-grained dependency caches DEPS_META_FILE = '@deps.meta.json' # type: Final +# File for storing fine-grained dependencies that didn't a parent in the build DEPS_ROOT_FILE = '@root.deps.json' # type: Final @@ -796,11 +799,12 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], def invert_deps(deps: Dict[str, Set[str]], graph: Graph) -> Dict[str, Dict[str, Set[str]]]: - """Splits fine-grained dependencies based on the module of the trigger + """Splits fine-grained dependencies based on the module of the trigger. Returns a dictionary from module ids to all dependencies on that - module. Dependencies not associated with a module in the build are - associated with the fake module '@root'. + module. Dependencies not associated with a module in the build will be + associated with the nearest parent module that is in the build, or the + fake module '@root' if none are. """ # Lazy import to speed up startup from mypy.server.target import module_prefix, trigger_to_target @@ -823,7 +827,7 @@ def invert_deps(deps: Dict[str, Set[str]], def generate_deps_for_cache(proto_deps: Dict[str, Set[str]], manager: BuildManager, graph: Graph) -> Dict[str, Dict[str, Set[str]]]: - """Generate fine-grained dependenecies into a form suitable for serializing. + """Generate fine-grained dependencies into a form suitable for serializing. This does a few things: 1. Computes all fine grained deps from modules that were processed @@ -832,8 +836,9 @@ def generate_deps_for_cache(proto_deps: Dict[str, Set[str]], deps and merge them in. Returns a dictionary from module ids to all dependencies on that - module. Dependencies not associated with a module in the build are - associated with the fake module '@root'. + module. Dependencies not associated with a module in the build will be + associated with the nearest parent module that is in the build, or the + fake module '@root' if none are. """ from mypy.server.update import merge_dependencies # Lazy import to speed up startup @@ -887,6 +892,8 @@ def read_deps_cache(manager: BuildManager, See the write_deps_cache documentation for more information on the details of the cache. + + Returns None if the cache was invalid in some way. """ deps_meta = _load_json_file(DEPS_META_FILE, manager, log_sucess='Deps meta ', From 45403b1e680160606768cf4a202c60760e0db913 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Wed, 30 Jan 2019 13:10:16 -0800 Subject: [PATCH 19/20] make @root a constant --- mypy/build.py | 14 +++++++++----- mypy/server/update.py | 3 ++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 52e1bc8595d6..ac53f503c365 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -732,6 +732,10 @@ def deps_to_json(x: Dict[str, Set[str]]) -> str: # File for storing fine-grained dependencies that didn't a parent in the build DEPS_ROOT_FILE = '@root.deps.json' # type: Final +# The name of the fake module used to store fine-grained dependencies that +# have no other place to go. +FAKE_ROOT_MODULE = '@root' # type: Final + def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], manager: BuildManager, graph: Graph) -> None: @@ -744,7 +748,7 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], if module 'n' depends on 'm', that produces entries in m.deps.json. When there is a dependency on a module that does not exist in the build, it is stored with its first existing parent module. If no - such module exists, it is stored with the fake module '@root'. + such module exists, it is stored with the fake module FAKE_ROOT_MODULE. This means that the validity of the fine-grained dependency caches are a global property, so we store validity checking information for @@ -761,7 +765,7 @@ def write_deps_cache(rdeps: Dict[str, Dict[str, Set[str]]], fg_deps_meta = manager.fg_deps_meta.copy() for id in rdeps: - if id != '@root': + if id != FAKE_ROOT_MODULE: _, _, deps_json = get_cache_names(id, graph[id].xpath, manager) else: deps_json = DEPS_ROOT_FILE @@ -804,7 +808,7 @@ def invert_deps(deps: Dict[str, Set[str]], Returns a dictionary from module ids to all dependencies on that module. Dependencies not associated with a module in the build will be associated with the nearest parent module that is in the build, or the - fake module '@root' if none are. + fake module FAKE_ROOT_MODULE if none are. """ # Lazy import to speed up startup from mypy.server.target import module_prefix, trigger_to_target @@ -816,7 +820,7 @@ def invert_deps(deps: Dict[str, Set[str]], for trigger, targets in deps.items(): module = module_prefix(graph, trigger_to_target(trigger)) if not module or not graph[module].tree: - module = '@root' + module = FAKE_ROOT_MODULE mod_rdeps = rdeps.setdefault(module, {}) mod_rdeps.setdefault(trigger, set()).update(targets) @@ -838,7 +842,7 @@ def generate_deps_for_cache(proto_deps: Dict[str, Set[str]], Returns a dictionary from module ids to all dependencies on that module. Dependencies not associated with a module in the build will be associated with the nearest parent module that is in the build, or the - fake module '@root' if none are. + fake module FAKE_ROOT_MODULE if none are. """ from mypy.server.update import merge_dependencies # Lazy import to speed up startup diff --git a/mypy/server/update.py b/mypy/server/update.py index 7c1d5fc9db81..f7ec00a67da7 100644 --- a/mypy/server/update.py +++ b/mypy/server/update.py @@ -121,6 +121,7 @@ from mypy.build import ( BuildManager, State, BuildResult, Graph, load_graph, process_fresh_modules, DEBUG_FINE_GRAINED, + FAKE_ROOT_MODULE, ) from mypy.modulefinder import BuildSource from mypy.checker import FineGrainedDeferredNode @@ -439,7 +440,7 @@ def ensure_trees_loaded(manager: BuildManager, graph: Dict[str, State], def get_all_dependencies(manager: BuildManager, graph: Dict[str, State]) -> Dict[str, Set[str]]: """Return the fine-grained dependency map for an entire build.""" # Deps for each module were computed during build() or loaded from the cache. - deps = manager.load_fine_grained_deps('@root') # type: Dict[str, Set[str]] + deps = manager.load_fine_grained_deps(FAKE_ROOT_MODULE) # type: Dict[str, Set[str]] for id in graph: if graph[id].tree is not None: merge_dependencies(graph[id].compute_fine_grained_deps(), deps) From 454f5cc01e92d67c38f466f2781c118a484791a5 Mon Sep 17 00:00:00 2001 From: "Michael J. Sullivan" Date: Wed, 30 Jan 2019 17:37:17 -0800 Subject: [PATCH 20/20] linter --- mypy/build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mypy/build.py b/mypy/build.py index ac53f503c365..97aa47f4f2b1 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -727,6 +727,7 @@ def stats_summary(self) -> Mapping[str, object]: def deps_to_json(x: Dict[str, Set[str]]) -> str: return json.dumps({k: list(v) for k, v in x.items()}) + # File for storing metadata about all the fine-grained dependency caches DEPS_META_FILE = '@deps.meta.json' # type: Final # File for storing fine-grained dependencies that didn't a parent in the build