Merge pull request #394 from sirosen/plugin/inspect-asserts

Add "prettyassert" plugin that gives bare asserts better support
nose-devs · Dec 1, 2018 · 8949e7e · 8949e7e
2 parents b769079 + 6b6eed0
commit 8949e7e
Show file tree

Hide file tree

Showing 14 changed files with 613 additions and 0 deletions.
diff --git a/nose2/plugins/_constants.py b/nose2/plugins/_constants.py
@@ -13,4 +13,5 @@
     'nose2.plugins.buffer',
     'nose2.plugins.failfast',
     'nose2.plugins.debugger',
+    'nose2.plugins.prettyassert',
 )
diff --git a/nose2/plugins/prettyassert.py b/nose2/plugins/prettyassert.py
@@ -0,0 +1,388 @@
+"""
+Make assert statements print pretty output, including source.
+
+This makes ``assert x == y`` more usable, as an alternative to
+``self.assertEqual(x, y)``
+
+This plugin implements :func:`outcomeDetail` and checks for event.exc_info
+If it finds that an AssertionError happened, it will inspect the traceback and
+add additional detail to the error report.
+
+"""
+
+from __future__ import print_function
+
+import collections
+import inspect
+import re
+import six
+import textwrap
+import tokenize
+
+from nose2 import events
+
+
+__unittest = True
+
+
+class PrettyAssert(events.Plugin):
+    """Add pretty output for "assert" statements"""
+    configSection = 'pretty-assert'
+    commandLineSwitch = (
+        None, 'pretty-assert', 'Add pretty output for "assert" statements')
+
+    def outcomeDetail(self, event):
+        # skip if no exception or expected error
+        if (not event.outcomeEvent.exc_info) or event.outcomeEvent.expected:
+            return
+
+        # unpack, but skip if it's not an AssertionError
+        excty, exc, trace = event.outcomeEvent.exc_info
+        if excty is not AssertionError:
+            return
+
+        self.addAssertDetail(event.extraDetail, exc, trace)
+
+    @staticmethod
+    def addAssertDetail(extraDetail, exc, trace):
+        """
+        Add details to output regarding AssertionError and its context
+
+        extraDetail: a list of lines which will be joined with newlines and
+        added to the output for this test failure -- defined as part of the
+        event format
+
+        exc: the AssertionError exception which was thrown
+
+        trace: a traceback object for the exception
+        """
+        assert_statement, token_descriptions = _collect_assert_data(trace)
+
+        # no message was given
+        if len(exc.args) == 0:
+            message = None
+        else:
+            message = exc.args[0]
+
+        # if there is no assertion statement found, do not add detail to output
+        #
+        # in cases like unittest assert*() methods, an assertion error is
+        # raised, but it doesn't originate with an `assert` statement and has
+        # an autogenerated message
+        if not assert_statement:
+            return
+
+        #
+        # actually add exception info to detail
+        #
+
+        # add the assert statement to output with '>>>' prefix
+        extraDetail.append(
+            re.sub(
+                '^', '>>> ',
+                assert_statement,
+                flags=re.MULTILINE
+            )
+        )
+
+        if message:
+            extraDetail.append('\nmessage:')
+            extraDetail.append('    {}'.format(message))
+
+        if token_descriptions:
+            extraDetail.append('\nvalues:')
+            for k, v in token_descriptions.items():
+                extraDetail.append('    {} = {}'.format(k, v))
+
+
+def _collect_assert_data(trace):
+    """
+    Given a traceback, extract the assertion statement and get the set of bound
+    variable names (i.e. tokens)
+    """
+    # inspect the trace, collecting various data and determining whether or not
+    # it can be tokenized at all
+    source_lines, frame_locals, frame_globals, can_tokenize = (
+        _get_inspection_info(trace))
+
+    # if things will tokenize cleanly, actually do it
+    if can_tokenize:
+        assert_startline, token_descriptions = _tokenize_assert(
+            source_lines, frame_locals, frame_globals)
+    # otherwise, indicate that we can't render detail by use of Nones
+    else:
+        assert_startline = None
+        token_descriptions = None
+
+    # if we found an "assert" (we might not, if someone raises
+    # AssertionError themselves), grab the whole assertion statement
+    #
+    # as a fallback, stick with whatever we think the statement was
+    # - this is easily deceived by multiline expressions
+    if assert_startline is not None:
+        statement = textwrap.dedent(
+            ''.join(source_lines[assert_startline:]).rstrip('\n'))
+    else:
+        statement = None
+
+    return statement, token_descriptions
+
+
+def _get_inspection_info(trace):
+    """
+    Pick apart a traceback for the info we actually want to inspect from it
+    - lines of source (truncated)
+    - locals and globals from execution frame
+    - statement which failed (which can be garbage -- don't trust it)
+    - can_tokenize: a bool indicating that the lines of source can be parsed
+    """
+    (frame, fname, lineno, funcname, context, ctx_index) = (
+        inspect.getinnerframes(trace)[-1])
+    original_source_lines, firstlineno = inspect.getsourcelines(frame)
+
+    # truncate to the code in this frame
+    # - remove test function definition line
+    # - remove anything after current assert statement
+    last_index = (lineno - firstlineno + 1)
+    source_lines = original_source_lines[1:last_index]
+
+    # in case the current line is actually an incomplete expression, as in
+    #   assert x == (y
+    #                ).z
+    #
+    # in which case the the current line is "assert x == (y", which is not a
+    # complete expression
+    # try to append lines to complete the expression, retrying parsing each and
+    # every time until it succeeds
+    for line in original_source_lines[last_index:]:
+        if _can_tokenize(source_lines):
+            break
+        else:
+            source_lines.append(line)
+
+    return (
+        source_lines,
+        frame.f_locals, frame.f_globals,
+        _can_tokenize(source_lines)
+    )
+
+
+def _can_tokenize(source_lines):
+    """
+    Check if a list of lines of source can successfully be tokenized
+    """
+    # tokenize.generate_tokens requires a file-like object, so we need to
+    # convert source_lines to a StringIO to give it that interface
+    filelike = six.StringIO(textwrap.dedent(''.join(source_lines)))
+
+    try:
+        for tokty, tok, start, end, tok_lineno in (
+                tokenize.generate_tokens(filelike.readline)):
+            pass
+    except tokenize.TokenError:
+        return False
+
+    return True
+
+
+def _tokenize_assert(source_lines, frame_locals, frame_globals):
+    """
+    Given a set of lines of source ending in a failing assert, plus the frame
+    locals and globals, tokenize source.
+
+    Only look at tokens in the final assert statement
+    Resolve all names to repr() of values
+
+    Return
+        The line on which the assert starts (relative to start of
+        source_lines)
+
+        A collection of token descriptions as a name=val ordered dict
+    """
+    # tokenize.generate_tokens requires a file-like object, so we need to
+    # convert source_lines to a StringIO to give it that interface
+    filelike_context = six.StringIO(textwrap.dedent(''.join(source_lines)))
+
+    # track the first line of the assert statement
+    # when the assert is on oneline, we'll have it easily, but a multiline
+    # statement like
+    #   assert (x ==
+    #           1)
+    # will leave us holding the last line of the statement,
+    # e.g. "       1)", which is not useful
+    # so every time a new assert is found, we get a value back indicate
+    # that it's the start line
+    #
+    #   assert True
+    #   assert False
+    # works fine, because we'll just hold the last value
+    #
+    #   assert True
+    #   assert False
+    #   assert True
+    # also works because we truncated source_lines to remove the final
+    # assert, which we didn't reach during execution
+    assert_startline = None
+
+    token_processor = TokenProcessor(frame_locals, frame_globals)
+
+    # tokenize and process each token
+    for tokty, tok, start, end, tok_lineno in (
+            tokenize.generate_tokens(filelike_context.readline)):
+        ret = token_processor.handle_token(tokty, tok, start, end, tok_lineno)
+        if ret:
+            assert_startline = ret
+
+    # adjust assert_startline by 1 to become a valid index into the
+    # source_lines -- "line 1" means "index 0"
+    if assert_startline:
+        assert_startline -= 1
+
+    token_descriptions = collections.OrderedDict()
+    for (name, obj) in token_processor.get_token_collection().items():
+        # okay, get repr() for a good string representation
+        strvalue = repr(obj)
+        # add in the form we want to print
+        token_descriptions[name] = strvalue
+
+    return assert_startline, token_descriptions
+
+
+class TokenProcessor(object):
+    def __init__(self, frame_locals, frame_globals):
+        # local and global variables from the frame which we're inspecting
+        self.frame_locals, self.frame_globals = frame_locals, frame_globals
+
+        # None or a tuple of (object, name) where
+        # - "object" is the object whose attributes we are currently resolving
+        # - "name" is its name, as we would like to display it
+        #
+        # start each time we see a sequence of NAME OP NAME OP NAME (etc.)
+        # end each time we see a token which is neither NAME nor OP
+        self.doing_resolution = None
+
+        # an index of known token names (including the long "x.y.z" names we
+        # get from attribute resolution) to their values, in the order in which
+        # they were encountered
+        # track which tokens we've seen to avoid duplicates if a name appears
+        # twice, as in `assert x != x`
+        self.seen_tokens = collections.OrderedDict()
+
+        # the previous token seen as a tuple of (tok_type, token_name)
+        # (or None when we start)
+        self.last_tok = None
+
+    def get_token_collection(self):
+        return self.seen_tokens
+
+    def handle_token(self, toktype, tok, start, end, line):
+        """
+        A tokenization processor for tokenize.generate_tokens
+        Skips certain token types, class names, etc
+
+        When an identifiable/usable token is found, add it to the token
+        collection (self.seen_tokens)
+
+        When an "assert" statement is found, reset the token collection
+        and return the start line (relative to the text being tokenized)
+        """
+        prior_tok = self.last_tok
+        self.last_tok = (toktype, tok)
+
+        # CASE 0: skip non "NAME" or "OP" tokens and clear current resolution
+        #
+        # NAME is most identifiers and keywords
+        # OP is operators, including .
+        #
+        # special note: don't clear resolution for whitespace (e.g. newline)
+        if toktype not in (tokenize.NAME, tokenize.OP):
+            # only newline for now, maybe we'll find others
+            if toktype not in (tokenize.NEWLINE,):
+                self.doing_resolution = None
+            return
+
+        # CASE 1: Operator token
+        #
+        # skip tokens and either leave resolution in progress or reset,
+        # depending
+        #
+        # continue resolution for
+        #   "."
+        #     because that's what attribute resolution *is*
+        #   ")"
+        #     this is handy, as it means that "(x).y" works
+        #
+        # reset resolution for everything else, e.g. "[", "]", ":"
+        # special note: reset resolution for "("
+        #
+        # failing to filter out "(" can result in badness in cases like this:
+        #     >>> def foo():
+        #     >>>     return [1]
+        #     >>> foo.pop = 2
+        #     >>> ...
+        #     >>> def test_foo():
+        #     >>>    assert foo().pop() == 2
+        #
+        # if we stop resolution when we see an LPAREN, we resolve `foo`
+        # successfully, fail on `pop` and everything is OK, but if we try to
+        # traverse the LPAREN, we get `foo.pop = 2` in our values, which is
+        # wrong
+        if toktype == tokenize.OP:
+            if tok not in (".", ")"):
+                self.doing_resolution = None
+            return
+
+        # CASE 2: "assert" statement
+        # assert statement was reached, reset
+        # return the start line (start = (startrow, startcol))
+        if tok == 'assert':
+            self.seen_tokens.clear()
+            self.doing_resolution = None
+            return start[0]
+
+        # handle tokens
+
+        # CASE 3: a name is being resolved,
+        #         there is a previous token,
+        #         and it's a "." operator
+        if self.doing_resolution and prior_tok and (
+                prior_tok[0] == tokenize.OP and prior_tok[1] == '.'):
+            # unpack and look for the attribute
+            obj, name = self.doing_resolution
+            if hasattr(obj, tok):
+                obj = getattr(obj, tok)
+                name = name + '.' + tok
+                self.doing_resolution = (obj, name)
+                self.seen_tokens[name] = obj
+            # if we couldn't find a relevant attribute, reset on resolution so
+            # that we can try afresh
+            else:
+                self.doing_resolution = None
+
+        # CASE 4: a name is being resolved and there is no preceding "." or
+        #         resolution was explicitly stopped
+        else:
+            # skip tokens we've seen, but grab them as the current things under
+            # resolution
+            if tok in self.seen_tokens:
+                self.doing_resolution = (self.seen_tokens[tok], tok)
+                return
+            # we've never seen this token before
+            else:
+                # try to resolve to a value
+                try:
+                    value = self.frame_locals[tok]
+                except KeyError:
+                    try:
+                        value = self.frame_globals[tok]
+                    except KeyError:
+                        # unresolveable name -- short circuit
+                        # shows up in some cases like `f().x` in which `x`
+                        # might not be a name bound to a value
+                        return
+
+                # add it (so we don't try it again unless we hit a new assert
+                # and reset)
+                self.seen_tokens[tok] = value
+
+                self.doing_resolution = (value, tok)