New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support protocols in fine grained mode #4790
Changes from all commits
ca018df
f218a5a
8b7d40b
a7031e8
1230d91
63ad42d
11453c5
331985e
bccfbc5
a0e32ad
4ca0afe
bce5f5d
dd541b0
60433e0
9c3a46e
281659b
29ea7a8
156a477
081552a
df72a03
c20878a
f565c8f
2cd62af
3934a8e
276564a
2604067
531b75e
2d57038
5da0aeb
608e6e1
257f7ae
1e29ecc
17d7db2
15bd91f
ecaaeb1
544d135
3045ad3
39ffd04
ed1b8f6
95f13fa
10a9454
d136a27
74b3e7f
2fbb5f1
234ad3c
88799dd
7209bf1
cd58a4f
8ece521
c146bcb
a4e120c
c565769
fc680d9
10f6a88
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,7 +58,7 @@ | |
from mypy.defaults import PYTHON3_VERSION_MIN | ||
from mypy.server.deps import get_dependencies | ||
from mypy.fscache import FileSystemCache | ||
from mypy.typestate import TypeState | ||
from mypy.typestate import TypeState, reset_global_state | ||
|
||
|
||
# Switch to True to produce debug output related to fine-grained incremental | ||
|
@@ -280,7 +280,7 @@ def _build(sources: List[BuildSource], | |
flush_errors=flush_errors, | ||
fscache=fscache) | ||
|
||
TypeState.reset_all_subtype_caches() | ||
reset_global_state() | ||
try: | ||
graph = dispatch(sources, manager) | ||
if not options.fine_grained_incremental: | ||
|
@@ -609,7 +609,9 @@ class BuildManager: | |
but is disabled if fine-grained cache loading fails | ||
and after an initial fine-grained load. This doesn't | ||
determine whether we write cache files or not. | ||
stats: Dict with various instrumentation numbers | ||
stats: Dict with various instrumentation numbers, it is used | ||
not only for debugging, but also required for correctness, | ||
in particular to check consistency of the protocol dependency cache. | ||
fscache: A file system cacher | ||
""" | ||
|
||
|
@@ -962,6 +964,96 @@ def verify_module(fscache: FileSystemCache, id: str, path: str) -> bool: | |
return True | ||
|
||
|
||
def write_protocol_deps_cache(proto_deps: Dict[str, Set[str]], | ||
manager: BuildManager, graph: Graph) -> None: | ||
"""Write cache files for protocol dependencies. | ||
|
||
Serialize protocol dependencies map for fine grained mode. Also take the snapshot | ||
of current sources to later check consistency between protocol cache and individual | ||
cache files. | ||
|
||
Out of three kinds of protocol dependencies described in TypeState._snapshot_protocol_deps, | ||
only the last two kinds are stored in global protocol caches, dependencies of the first kind | ||
(i.e. <SuperProto[wildcard]>, <Proto[wildcard]> -> <Proto>) are written to the normal | ||
per-file fine grained dependency caches. | ||
""" | ||
proto_meta, proto_cache = get_protocol_deps_cache_name(manager) | ||
meta_snapshot = {} # type: Dict[str, str] | ||
error = False | ||
for id in graph: | ||
meta_snapshot[id] = graph[id].source_hash | ||
if not atomic_write(proto_meta, json.dumps(meta_snapshot), '\n'): | ||
manager.log("Error writing protocol meta JSON file {}".format(proto_cache)) | ||
error = True | ||
listed_proto_deps = {k: list(v) for (k, v) in proto_deps.items()} | ||
if not atomic_write(proto_cache, json.dumps(listed_proto_deps), '\n'): | ||
manager.log("Error writing protocol deps JSON file {}".format(proto_cache)) | ||
error = True | ||
if error: | ||
manager.errors.set_file(_cache_dir_prefix(manager), None) | ||
manager.errors.report(0, 0, "Error writing protocol dependencies cache", | ||
blocker=True) | ||
|
||
|
||
def read_protocol_cache(manager: BuildManager, | ||
graph: Graph) -> Optional[Dict[str, Set[str]]]: | ||
"""Read and validate protocol dependencies cache. | ||
|
||
See docstring for write_protocol_cache for details about which kinds of | ||
dependencies are read. | ||
""" | ||
proto_meta, proto_cache = get_protocol_deps_cache_name(manager) | ||
meta_snapshot = _load_json_file(proto_meta, manager, | ||
log_sucess='Proto meta ', | ||
log_error='Could not load protocol metadata: ') | ||
if meta_snapshot is None: | ||
return None | ||
current_meta_snapshot = {} # type: Dict[str, str] | ||
for id in graph: | ||
meta = graph[id].meta | ||
assert meta is not None, 'Protocol cache should be read after all other' | ||
current_meta_snapshot[id] = meta.hash | ||
common = set(meta_snapshot.keys()) & set(current_meta_snapshot.keys()) | ||
if any(meta_snapshot[id] != current_meta_snapshot[id] for id in common): | ||
# TODO: invalidate also if options changed (like --strict-optional)? | ||
manager.log('Protocol cache inconsistent, ignoring') | ||
return None | ||
deps = _load_json_file(proto_cache, manager, | ||
log_sucess='Proto deps ', | ||
log_error='Could not load protocol cache: ') | ||
if deps is None: | ||
return None | ||
if not isinstance(deps, dict): | ||
manager.log('Could not load protocol cache: cache is not a dict: {}' | ||
.format(type(deps))) | ||
return None | ||
return {k: set(v) for (k, v) in deps.items()} | ||
|
||
|
||
def _load_json_file(file: str, manager: BuildManager, | ||
log_sucess: str, log_error: str) -> Optional[Dict[str, Any]]: | ||
"""A simple helper to read a JSON file with logging.""" | ||
try: | ||
with open(file, 'r') as f: | ||
data = f.read() | ||
except IOError: | ||
manager.log(log_error + file) | ||
return None | ||
manager.trace(log_sucess + data.rstrip()) | ||
result = json.loads(data) # TODO: Errors | ||
return result | ||
|
||
|
||
def _cache_dir_prefix(manager: BuildManager, id: Optional[str] = None) -> str: | ||
"""Get current cache directory (or file if id is given).""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm the new name doesn't work with a non- |
||
cache_dir = manager.options.cache_dir | ||
pyversion = manager.options.python_version | ||
base = os.path.join(cache_dir, '%d.%d' % pyversion) | ||
if id is None: | ||
return base | ||
return os.path.join(base, *id.split('.')) | ||
|
||
|
||
def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str, Optional[str]]: | ||
"""Return the file names for the cache files. | ||
|
||
|
@@ -975,9 +1067,7 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str | |
A tuple with the file names to be used for the meta JSON, the | ||
data JSON, and the fine-grained deps JSON, respectively. | ||
""" | ||
cache_dir = manager.options.cache_dir | ||
pyversion = manager.options.python_version | ||
prefix = os.path.join(cache_dir, '%d.%d' % pyversion, *id.split('.')) | ||
prefix = _cache_dir_prefix(manager, id) | ||
is_package = os.path.basename(path).startswith('__init__.py') | ||
if is_package: | ||
prefix = os.path.join(prefix, '__init__') | ||
|
@@ -988,6 +1078,20 @@ def get_cache_names(id: str, path: str, manager: BuildManager) -> Tuple[str, str | |
return (prefix + '.meta.json', prefix + '.data.json', deps_json) | ||
|
||
|
||
def get_protocol_deps_cache_name(manager: BuildManager) -> Tuple[str, str]: | ||
"""Return file names for fine grained protocol dependencies cache. | ||
|
||
Since these dependencies represent a global state of the program, they | ||
are serialized per program, not per module, and the corresponding files | ||
live at the root of the cache folder for a given Python version. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe document that returns tuple (meta file path, data file path) and describe what's included in the meta file. |
||
Return a tuple ('meta file path', 'data file path'), where the meta file | ||
contains hashes of all source files at the time the protocol dependencies | ||
were written, and data file contains the protocol dependencies. | ||
""" | ||
name = os.path.join(_cache_dir_prefix(manager), 'proto_deps') | ||
return name + '.meta.json', name + '.data.json' | ||
|
||
|
||
def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]: | ||
"""Find cache data for a module. | ||
|
||
|
@@ -1003,13 +1107,10 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache | |
# TODO: May need to take more build options into account | ||
meta_json, data_json, deps_json = get_cache_names(id, path, manager) | ||
manager.trace('Looking for {} at {}'.format(id, meta_json)) | ||
try: | ||
with open(meta_json, 'r') as f: | ||
meta_str = f.read() | ||
manager.trace('Meta {} {}'.format(id, meta_str.rstrip())) | ||
meta = json.loads(meta_str) # TODO: Errors | ||
except IOError: | ||
manager.log('Could not load cache for {}: could not find {}'.format(id, meta_json)) | ||
meta = _load_json_file(meta_json, manager, | ||
log_sucess='Meta {} '.format(id), | ||
log_error='Could not load cache for {}: '.format(id)) | ||
if meta is None: | ||
return None | ||
if not isinstance(meta, dict): | ||
manager.log('Could not load cache for {}: meta cache is not a dict: {}' | ||
|
@@ -2237,8 +2338,25 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph: | |
# just want to load in all of the cache information. | ||
if manager.use_fine_grained_cache(): | ||
process_fine_grained_cache_graph(graph, manager) | ||
# Fine grained protocol dependencies are serialized separately, so we read them | ||
# after we loaded cache for whole graph. The `read_protocol_cache` will also validate | ||
# the protocol cache against the loaded individual cache files. | ||
TypeState.proto_deps = read_protocol_cache(manager, graph) | ||
if TypeState.proto_deps is None and manager.stats.get('fresh_trees', 0) > 0: | ||
# There were some cache files read, but no protocol dependencies loaded. | ||
manager.errors.set_file(_cache_dir_prefix(manager), None) | ||
manager.errors.report(0, 0, "Error reading protocol dependencies cache -- aborting", | ||
blocker=True) | ||
else: | ||
process_graph(graph, manager) | ||
if manager.options.cache_fine_grained or manager.options.fine_grained_incremental: | ||
# If we are running a daemon or are going to write cache for further fine grained use, | ||
# then we need to collect fine grained protocol dependencies. | ||
# Since these are a global property of the program, they are calculated after we | ||
# processed the whole graph. | ||
TypeState.update_protocol_deps() | ||
if TypeState.proto_deps is not None and not manager.options.fine_grained_incremental: | ||
write_protocol_deps_cache(TypeState.proto_deps, manager, graph) | ||
|
||
if manager.options.dump_deps: | ||
# This speeds up startup a little when not using the daemon mode. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2065,9 +2065,6 @@ class is generic then it will be a type constructor of higher kind. | |
is_protocol = False # Is this a protocol class? | ||
runtime_protocol = False # Does this protocol support isinstance checks? | ||
abstract_attributes = None # type: List[str] | ||
# Protocol members are names of all attributes/methods defined in a protocol | ||
# and in all its supertypes (except for 'object'). | ||
protocol_members = None # type: List[str] | ||
|
||
# The attributes 'assuming' and 'assuming_proper' represent structural subtype matrices. | ||
# | ||
|
@@ -2211,6 +2208,18 @@ def get_containing_type_info(self, name: str) -> 'Optional[TypeInfo]': | |
return cls | ||
return None | ||
|
||
@property | ||
def protocol_members(self) -> List[str]: | ||
# Protocol members are names of all attributes/methods defined in a protocol | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this return an empty list if the class is not a protocol, similar to how this was defined earlier? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would say it is not necessary, but I see no particular danger in this. |
||
# and in all its supertypes (except for 'object'). | ||
members = set() # type: Set[str] | ||
assert self.mro, "This property can be only acessed after MRO is (re-)calculated" | ||
for base in self.mro[:-1]: # we skip "object" since everyone implements it | ||
if base.is_protocol: | ||
for name in base.names: | ||
members.add(name) | ||
return sorted(list(members)) | ||
|
||
def __getitem__(self, name: str) -> 'SymbolTableNode': | ||
n = self.get(name) | ||
if n: | ||
|
@@ -2331,7 +2340,6 @@ def serialize(self) -> JsonDict: | |
'names': self.names.serialize(self.fullname()), | ||
'defn': self.defn.serialize(), | ||
'abstract_attributes': self.abstract_attributes, | ||
'protocol_members': self.protocol_members, | ||
'type_vars': self.type_vars, | ||
'bases': [b.serialize() for b in self.bases], | ||
'mro': [c.fullname() for c in self.mro], | ||
|
@@ -2357,7 +2365,6 @@ def deserialize(cls, data: JsonDict) -> 'TypeInfo': | |
ti._fullname = data['fullname'] | ||
# TODO: Is there a reason to reconstruct ti.subtypes? | ||
ti.abstract_attributes = data['abstract_attributes'] | ||
ti.protocol_members = data['protocol_members'] | ||
ti.type_vars = data['type_vars'] | ||
ti.bases = [mypy.types.Instance.deserialize(b) for b in data['bases']] | ||
ti._promote = (None if data['_promote'] is None | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Document which dependencies are included in
proto_deps
(there are three different kinds which might plausibly be included).