Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flush error messages incrementally after processing a file #4396

Merged
merged 15 commits into from
Jan 9, 2018
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 51 additions & 5 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import time
from os.path import dirname, basename
import errno
from functools import wraps
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need this any more.


from typing import (AbstractSet, Any, cast, Dict, Iterable, Iterator, List,
Mapping, NamedTuple, Optional, Set, Tuple, Union, Callable)
Expand Down Expand Up @@ -80,7 +81,7 @@ def __init__(self, manager: 'BuildManager', graph: Graph) -> None:
self.graph = graph
self.files = manager.modules
self.types = manager.all_types # Non-empty for tests only or if dumping deps
self.errors = manager.errors.messages()
self.errors = [] # type: List[str] # Filled in by build if desired


class BuildSource:
Expand Down Expand Up @@ -133,6 +134,7 @@ def build(sources: List[BuildSource],
alt_lib_path: Optional[str] = None,
bin_dir: Optional[str] = None,
saved_cache: Optional[SavedCache] = None,
flush_errors: Optional[Callable[[List[str], bool], None]] = None,
) -> BuildResult:
"""Analyze a program.

Expand All @@ -142,6 +144,11 @@ def build(sources: List[BuildSource],
Return BuildResult if successful or only non-blocking errors were found;
otherwise raise CompileError.

If a flush_errors callback is provided, all error messages will be
passed to it and the errors and messages fields of BuildResult and
CompileError (respectively) will be empty. Otherwise those fields will
report any error messages.

Args:
sources: list of sources to build
options: build options
Expand All @@ -150,7 +157,36 @@ def build(sources: List[BuildSource],
bin_dir: directory containing the mypy script, used for finding data
directories; if omitted, use '.' as the data directory
saved_cache: optional dict with saved cache state for dmypy (read-write!)
flush_errors: optional function to flush errors after a file is processed

"""
# If we were not given a flush_errors, we use one that will populate those
# fields for callers that want the traditional API.
messages = []

def default_flush_errors(new_messages: List[str], is_serious: bool) -> None:
messages.extend(new_messages)

flush_errors = flush_errors or default_flush_errors

try:
result = _build(sources, options, alt_lib_path, bin_dir, saved_cache, flush_errors)
result.errors = messages
return result
except CompileError as e:
serious = not e.use_stdout
flush_errors(e.messages, serious)
e.messages = messages
raise


def _build(sources: List[BuildSource],
options: Options,
alt_lib_path: Optional[str],
bin_dir: Optional[str],
saved_cache: Optional[SavedCache],
flush_errors: Callable[[List[str], bool], None],
) -> BuildResult:
# This seems the most reasonable place to tune garbage collection.
gc.set_threshold(50000)

Expand Down Expand Up @@ -212,7 +248,8 @@ def build(sources: List[BuildSource],
version_id=__version__,
plugin=plugin,
errors=errors,
saved_cache=saved_cache)
saved_cache=saved_cache,
flush_errors=flush_errors)

try:
graph = dispatch(sources, manager)
Expand Down Expand Up @@ -518,6 +555,7 @@ class BuildManager:
version_id: The current mypy version (based on commit id when possible)
plugin: Active mypy plugin(s)
errors: Used for reporting all errors
flush_errors: A function for processing errors after each SCC
saved_cache: Dict with saved cache state for dmypy and fine-grained incremental mode
(read-write!)
stats: Dict with various instrumentation numbers
Expand All @@ -532,6 +570,7 @@ def __init__(self, data_dir: str,
version_id: str,
plugin: Plugin,
errors: Errors,
flush_errors: Callable[[List[str], bool], None],
saved_cache: Optional[SavedCache] = None,
) -> None:
self.start_time = time.time()
Expand All @@ -555,6 +594,7 @@ def __init__(self, data_dir: str,
self.stale_modules = set() # type: Set[str]
self.rechecked_modules = set() # type: Set[str]
self.plugin = plugin
self.flush_errors = flush_errors
self.saved_cache = saved_cache if saved_cache is not None else {} # type: SavedCache
self.stats = {} # type: Dict[str, Any] # Values are ints or floats

Expand Down Expand Up @@ -703,6 +743,9 @@ def add_stats(self, **kwds: Any) -> None:
def stats_summary(self) -> Mapping[str, object]:
return self.stats

def error_flush(self, msgs: List[str], serious: bool=False) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider getting rid of this and just inlining the only call site to add serious=False?

self.flush_errors(msgs, serious)


def remove_cwd_prefix_from_path(p: str) -> str:
"""Remove current working directory prefix from p, if present.
Expand Down Expand Up @@ -1973,6 +2016,10 @@ def write_cache(self) -> None:
def dependency_priorities(self) -> List[int]:
return [self.priorities.get(dep, PRI_HIGH) for dep in self.dependencies]

def generate_unused_ignore_notes(self) -> None:
if self.options.warn_unused_ignores:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you've made this effectively into a per-module option, please add it to the list of such in options.py.

self.manager.errors.generate_unused_ignore_notes(self.xpath)


def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
set_orig = set(manager.saved_cache)
Expand All @@ -1999,9 +2046,6 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
dump_graph(graph)
return graph
process_graph(graph, manager)
if manager.options.warn_unused_ignores:
# TODO: This could also be a per-module option.
manager.errors.generate_unused_ignore_notes()
updated = preserve_cache(graph)
set_updated = set(updated)
manager.saved_cache.clear()
Expand Down Expand Up @@ -2490,6 +2534,8 @@ def process_stale_scc(graph: Graph, scc: List[str], manager: BuildManager) -> No
graph[id].transitive_error = True
for id in stale:
graph[id].finish_passes()
graph[id].generate_unused_ignore_notes()
manager.error_flush(manager.errors.file_messages(graph[id].xpath))
graph[id].write_cache()
graph[id].mark_as_rechecked()

Expand Down
109 changes: 76 additions & 33 deletions mypy/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ class ErrorInfo:
# Only report this particular messages once per program.
only_once = False

# Actual origin of the error message
origin = None # type: Tuple[str, int]

# Fine-grained incremental target where this was reported
target = None # type: Optional[str]

Expand Down Expand Up @@ -90,15 +93,17 @@ class Errors:
current error context (nested imports).
"""

# List of generated error messages.
error_info = None # type: List[ErrorInfo]
# Map from files to generated error messages. Is an OrderedDict so
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it would be safer to use the module ID rather than the file as the key? Because add_error_info() doesn't call remove_path_prefix(). And IIRC sometimes different passes have different ideas about the filename (normalized or not). However the old code makes the same assumption about error_files I suppose.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not all error infos have a module, unfortunately.

# that it can be used to order messages based on the order the
# files were processed.
error_info_map = None # type: Dict[str, List[ErrorInfo]]

# Files that we have reported the errors for
flushed_files = None # type: Set[str]

# Current error context: nested import context/stack, as a list of (path, line) pairs.
import_ctx = None # type: List[Tuple[str, int]]

# Set of files with errors.
error_files = None # type: Set[str]

# Path name prefix that is removed from all paths, if set.
ignore_prefix = None # type: str

Expand Down Expand Up @@ -141,9 +146,9 @@ def __init__(self, show_error_context: bool = False,
self.initialize()

def initialize(self) -> None:
self.error_info = []
self.error_info_map = OrderedDict()
self.flushed_files = set()
self.import_ctx = []
self.error_files = set()
self.type_name = [None]
self.function_or_member = [None]
self.ignored_lines = OrderedDict()
Expand Down Expand Up @@ -289,8 +294,13 @@ def report(self,
target=self.current_target())
self.add_error_info(info)

def _add_error_info(self, file: str, info: ErrorInfo) -> None:
if file not in self.error_info_map:
self.error_info_map[file] = []
self.error_info_map[file].append(info)

def add_error_info(self, info: ErrorInfo) -> None:
(file, line) = cast(Tuple[str, int], info.origin) # see issue 1855
file, line = info.origin
if not info.blocker: # Blockers cannot be ignored
if file in self.ignored_lines and line in self.ignored_lines[file]:
# Annotation requests us to ignore all errors on this line.
Expand All @@ -302,62 +312,64 @@ def add_error_info(self, info: ErrorInfo) -> None:
if info.message in self.only_once_messages:
return
self.only_once_messages.add(info.message)
self.error_info.append(info)
self.error_files.add(file)

def generate_unused_ignore_notes(self) -> None:
for file, ignored_lines in self.ignored_lines.items():
if not self.is_typeshed_file(file):
for line in ignored_lines - self.used_ignored_lines[file]:
# Don't use report since add_error_info will ignore the error!
info = ErrorInfo(self.import_context(), file, self.current_module(), None,
None, line, -1, 'note', "unused 'type: ignore' comment",
False, False)
self.error_info.append(info)
self._add_error_info(file, info)

def generate_unused_ignore_notes(self, file: str) -> None:
ignored_lines = self.ignored_lines[file]
if not self.is_typeshed_file(file):
for line in ignored_lines - self.used_ignored_lines[file]:
# Don't use report since add_error_info will ignore the error!
info = ErrorInfo(self.import_context(), file, self.current_module(), None,
None, line, -1, 'note', "unused 'type: ignore' comment",
False, False)
self._add_error_info(file, info)

def is_typeshed_file(self, file: str) -> bool:
# gross, but no other clear way to tell
return 'typeshed' in os.path.normpath(file).split(os.sep)

def num_messages(self) -> int:
"""Return the number of generated messages."""
return len(self.error_info)
return sum(len(x) for x in self.error_info_map.values())

def is_errors(self) -> bool:
"""Are there any generated errors?"""
return bool(self.error_info)
return bool(self.error_info_map)

def is_blockers(self) -> bool:
"""Are the any errors that are blockers?"""
return any(err for err in self.error_info if err.blocker)
return any(err for errs in self.error_info_map.values() for err in errs if err.blocker)

def blocker_module(self) -> Optional[str]:
"""Return the module with a blocking error, or None if not possible."""
for err in self.error_info:
if err.blocker:
return err.module
for errs in self.error_info_map.values():
for err in errs:
if err.blocker:
return err.module
return None

def is_errors_for_file(self, file: str) -> bool:
"""Are there any errors for the given file?"""
return file in self.error_files
return file in self.error_info_map

def raise_error(self) -> None:
"""Raise a CompileError with the generated messages.

Render the messages suitable for displaying.
"""
raise CompileError(self.messages(),
# self.new_messages() will format all messages that haven't already
# been returned from a new_module_messages() call.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/new_module_messages/???/

raise CompileError(self.new_messages(),
use_stdout=True,
module_with_blocker=self.blocker_module())

def messages(self) -> List[str]:
def format_messages(self, error_info: List[ErrorInfo]) -> List[str]:
"""Return a string list that represents the error messages.

Use a form suitable for displaying to the user.
"""
a = [] # type: List[str]
errors = self.render_messages(self.sort_messages(self.error_info))
errors = self.render_messages(self.sort_messages(error_info))
errors = self.remove_duplicates(errors)
for file, line, column, severity, message in errors:
s = ''
Expand All @@ -375,12 +387,36 @@ def messages(self) -> List[str]:
a.append(s)
return a

def file_messages(self, path: str) -> List[str]:
"""Return a string list of new error messages from a given file.

Use a form suitable for displaying to the user.
"""
if path not in self.error_info_map:
return []
self.flushed_files.add(path)
return self.format_messages(self.error_info_map[path])

def new_messages(self) -> List[str]:
"""Return a string list of new error messages.

Use a form suitable for displaying to the user.
Errors from different files are ordered based on the order in which
they first generated an error.
"""
msgs = []
for path in self.error_info_map.keys():
if path not in self.flushed_files:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic using flushed_files worries/confuses me. It seems that once file_messages() has been called for a given file we don't expect there ever to be more messages for that file? Because it seems to work that way. If so I would add an assert to add_error_info() that the file isn't in flushed_messages, since if it is, the error will be suppressed by this logic.

msgs.extend(self.file_messages(path))
return msgs

def targets(self) -> Set[str]:
"""Return a set of all targets that contain errors."""
# TODO: Make sure that either target is always defined or that not being defined
# is okay for fine-grained incremental checking.
return set(info.target
for info in self.error_info
for errs in self.error_info_map.values()
for info in errs
if info.target)

def render_messages(self, errors: List[ErrorInfo]) -> List[Tuple[Optional[str], int, int,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GitHub won't let me place this comment on the line to which it applies -- the docstring for sort_messages() below: IIUC there's no longer any chance that messages in the same array have different files, so you can change "same file context" into "same context" in that docstring.

Expand Down Expand Up @@ -461,7 +497,7 @@ def render_messages(self, errors: List[ErrorInfo]) -> List[Tuple[Optional[str],
def sort_messages(self, errors: List[ErrorInfo]) -> List[ErrorInfo]:
"""Sort an array of error messages locally by line number.

I.e., sort a run of consecutive messages with the same file
I.e., sort a run of consecutive messages with the same
context by line number, but otherwise retain the general
ordering of the messages.
"""
Expand Down Expand Up @@ -511,6 +547,12 @@ class CompileError(Exception):

It can be a parse, semantic analysis, type check or other
compilation-related error.

CompileErrors raised from an errors object carry all of the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is very helpful. But perhaps a form of it would also be useful in the except clause in build.build, where the logic had me baffled for a bit.

messages that have not been reported out by error streaming.
This is patched up by build.build to contain either all error
messages (if errors were streamed) or none (if they were not).

"""

messages = None # type: List[str]
Expand Down Expand Up @@ -554,7 +596,8 @@ def report_internal_error(err: Exception, file: Optional[str], line: int,
# Dump out errors so far, they often provide a clue.
# But catch unexpected errors rendering them.
try:
for msg in errors.messages():
errors.flushed_files = set() # Print out already flushed messages too
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you tried to provoke this? ISTM that in the only use case where it matters (real users running into a crash) this will just print the entire list of errors twice, potentially just confusing everyone. Or is there a unit test that needs this?

Also, flushed_files feels like an internal attribute of the Errors class -- if you really need this consider making it a flag to new_messages().

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My thought was that I wanted to always make sure that all of the messages printed, even in the cases where the messages were being buffered in build.build. But I think you are right and we would rather lose some messages while running tests than confuse matters by printing duplicate messages in actual use.

for msg in errors.new_messages():
print(msg)
except Exception as e:
print("Failed to dump errors:", repr(e), file=sys.stderr)
Expand Down
Loading