Merge branch 'add-statistics' into 'master'

Add the statistics module *Description of changes* Start adding support for `--statistics` and legacy `get_statistics` API. *Related to:* (Add bug number here) See merge request !73
PyCQA · Jul 13, 2016 · 4d6929c · 4d6929c
2 parents 58e6763 + 2ffcf96
commit 4d6929c
Show file tree

Hide file tree

Showing 4 changed files with 248 additions and 1 deletion.
diff --git a/src/flake8/api/legacy.py b/src/flake8/api/legacy.py
@@ -141,6 +141,8 @@ def __init__(self, application):
         .. warning:: This should not be instantiated by users.
         """
         self._application = application
+        self._style_guide = application.guide
+        self._stats = self._style_guide.stats
 
     @property
     def total_errors(self):
@@ -149,4 +151,7 @@ def total_errors(self):
 
     def get_statistics(self, violation):
         """Get the number of occurences of a violation."""
-        raise NotImplementedError('Statistics capturing needs to happen first')
+        return [
+            '{} {} {}'.format(s.count, s.error_code, s.message)
+            for s in self._stats.statistics_for(violation)
+        ]
diff --git a/src/flake8/statistics.py b/src/flake8/statistics.py
@@ -0,0 +1,118 @@
+"""Statistic collection logic for Flake8."""
+import collections
+
+
+class Statistics(object):
+    """Manager of aggregated statistics for a run of Flake8."""
+
+    def __init__(self):
+        """Initialize the underlying dictionary for our statistics."""
+        self._store = {}
+
+    def record(self, error):
+        """Add the fact that the error was seen in the file.
+
+        :param error:
+            The Error instance containing the information about the violation.
+        :type error:
+            flake8.style_guide.Error
+        """
+        key = Key.create_from(error)
+        if key not in self._store:
+            self._store[key] = Statistic.create_from(error)
+        self._store[key].increment()
+
+    def statistics_for(self, prefix, filename=None):
+        """Generate statistics for the prefix and filename.
+
+        If you have a :class:`Statistics` object that has recorded errors,
+        you can generate the statistics for a prefix (e.g., ``E``, ``E1``,
+        ``W50``, ``W503``) with the optional filter of a filename as well.
+
+        .. code-block:: python
+
+            >>> stats = Statistics()
+            >>> stats.statistics_for('E12',
+                                     filename='src/flake8/statistics.py')
+            <generator ...>
+            >>> stats.statistics_for('W')
+            <generator ...>
+
+        :param str prefix:
+            The error class or specific error code to find statistics for.
+        :param str filename:
+            (Optional) The filename to further filter results by.
+        :returns:
+            Generator of instances of :class:`Statistic`
+        """
+        matching_errors = sorted(key for key in self._store.keys()
+                                 if key.matches(prefix, filename))
+        for error_code in matching_errors:
+            yield self._store[error_code]
+
+
+class Key(collections.namedtuple('Key', ['filename', 'code'])):
+    """Simple key structure for the Statistics dictionary.
+
+    To make things clearer, easier to read, and more understandable, we use a
+    namedtuple here for all Keys in the underlying dictionary for the
+    Statistics object.
+    """
+
+    __slots__ = ()
+
+    @classmethod
+    def create_from(cls, error):
+        """Create a Key from :class:`flake8.style_guide.Error`."""
+        return cls(
+            filename=error.filename,
+            code=error.code,
+        )
+
+    def matches(self, prefix, filename):
+        """Determine if this key matches some constraints.
+
+        :param str prefix:
+            The error code prefix that this key's error code should start with.
+        :param str filename:
+            The filename that we potentially want to match on. This can be
+            None to only match on error prefix.
+        :returns:
+            True if the Key's code starts with the prefix and either filename
+            is None, or the Key's filename matches the value passed in.
+        :rtype:
+            bool
+        """
+        return (self.code.startswith(prefix) and
+                (filename is None or
+                    self.filename == filename))
+
+
+class Statistic(object):
+    """Simple wrapper around the logic of each statistic.
+
+    Instead of maintaining a simple but potentially hard to reason about
+    tuple, we create a namedtuple which has attributes and a couple
+    convenience methods on it.
+    """
+
+    def __init__(self, error_code, filename, message, count):
+        """Initialize our Statistic."""
+        self.error_code = error_code
+        self.filename = filename
+        self.message = message
+        self.count = count
+
+    @classmethod
+    def create_from(cls, error):
+        """Create a Statistic from a :class:`flake8.style_guide.Error`."""
+        return cls(
+            error_code=error.code,
+            filename=error.filename,
+            message=error.text,
+            count=0,
+        )
+
+    def increment(self):
+        """Increment the number of times we've seen this error in this file."""
+        self.count += 1
diff --git a/src/flake8/style_guide.py b/src/flake8/style_guide.py
@@ -5,6 +5,7 @@
 import logging
 import re
 
+from flake8 import statistics
 from flake8 import utils
 
 __all__ = (
@@ -74,6 +75,7 @@ def __init__(self, options, listener_trie, formatter):
         self.options = options
         self.listener = listener_trie
         self.formatter = formatter
+        self.stats = statistics.Statistics()
         self._selected = tuple(options.select)
         self._ignored = tuple(options.ignore)
         self._decision_cache = {}
@@ -267,6 +269,7 @@ def handle_error(self, code, filename, line_number, column_number, text,
         if (error_is_selected and is_not_inline_ignored and
                 is_included_in_diff):
             self.formatter.handle(error)
+            self.stats.record(error)
             self.listener.notify(error.code, error)
             return 1
         return 0

diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py
@@ -0,0 +1,121 @@
+"""Tests for the statistics module in Flake8."""
+import pytest
+
+from flake8 import statistics as stats
+from flake8 import style_guide
+
+DEFAULT_ERROR_CODE = 'E100'
+DEFAULT_FILENAME = 'file.py'
+DEFAULT_TEXT = 'Default text'
+
+
+def make_error(**kwargs):
+    """Create errors with a bunch of default values."""
+    return style_guide.Error(
+        code=kwargs.pop('code', DEFAULT_ERROR_CODE),
+        filename=kwargs.pop('filename', DEFAULT_FILENAME),
+        line_number=kwargs.pop('line_number', 1),
+        column_number=kwargs.pop('column_number', 1),
+        text=kwargs.pop('text', DEFAULT_TEXT),
+        physical_line=None,
+    )
+
+
+def test_key_creation():
+    """Verify how we create Keys from Errors."""
+    key = stats.Key.create_from(make_error())
+    assert key == (DEFAULT_FILENAME, DEFAULT_ERROR_CODE)
+    assert key.filename == DEFAULT_FILENAME
+    assert key.code == DEFAULT_ERROR_CODE
+
+
+@pytest.mark.parametrize('code, filename, args, expected_result', [
+    # Error prefix matches
+    ('E123', 'file000.py', ('E', None), True),
+    ('E123', 'file000.py', ('E1', None), True),
+    ('E123', 'file000.py', ('E12', None), True),
+    ('E123', 'file000.py', ('E123', None), True),
+    # Error prefix and filename match
+    ('E123', 'file000.py', ('E', 'file000.py'), True),
+    ('E123', 'file000.py', ('E1', 'file000.py'), True),
+    ('E123', 'file000.py', ('E12', 'file000.py'), True),
+    ('E123', 'file000.py', ('E123', 'file000.py'), True),
+    # Error prefix does not match
+    ('E123', 'file000.py', ('W', None), False),
+    # Error prefix matches but filename does not
+    ('E123', 'file000.py', ('E', 'file001.py'), False),
+    # Error prefix does not match but filename does
+    ('E123', 'file000.py', ('W', 'file000.py'), False),
+    # Neither error prefix match nor filename
+    ('E123', 'file000.py', ('W', 'file001.py'), False),
+])
+def test_key_matching(code, filename, args, expected_result):
+    """Verify Key#matches behaves as we expect with fthe above input."""
+    key = stats.Key.create_from(make_error(code=code, filename=filename))
+    assert key.matches(*args) is expected_result
+
+
+def test_statistic_creation():
+    """Verify how we create Statistic objects from Errors."""
+    stat = stats.Statistic.create_from(make_error())
+    assert stat.error_code == DEFAULT_ERROR_CODE
+    assert stat.message == DEFAULT_TEXT
+    assert stat.filename == DEFAULT_FILENAME
+    assert stat.count == 0
+
+
+def test_statistic_increment():
+    """Verify we update the count."""
+    stat = stats.Statistic.create_from(make_error())
+    assert stat.count == 0
+    stat.increment()
+    assert stat.count == 1
+
+
+def test_recording_statistics():
+    """Verify that we appropriately create a new Statistic and store it."""
+    aggregator = stats.Statistics()
+    assert list(aggregator.statistics_for('E')) == []
+    aggregator.record(make_error())
+    storage = aggregator._store
+    for key, value in storage.items():
+        assert isinstance(key, stats.Key)
+        assert isinstance(value, stats.Statistic)
+
+    assert storage[(DEFAULT_FILENAME, DEFAULT_ERROR_CODE)].count == 1
+
+
+def test_statistics_for_single_record():
+    """Show we can retrieve the only statistic recorded."""
+    aggregator = stats.Statistics()
+    assert list(aggregator.statistics_for('E')) == []
+    aggregator.record(make_error())
+    statistics = list(aggregator.statistics_for('E'))
+    assert len(statistics) == 1
+    assert isinstance(statistics[0], stats.Statistic)
+
+
+def test_statistics_for_filters_by_filename():
+    """Show we can retrieve the only statistic recorded."""
+    aggregator = stats.Statistics()
+    assert list(aggregator.statistics_for('E')) == []
+    aggregator.record(make_error())
+    aggregator.record(make_error(filename='example.py'))
+
+    statistics = list(aggregator.statistics_for('E', DEFAULT_FILENAME))
+    assert len(statistics) == 1
+    assert isinstance(statistics[0], stats.Statistic)
+
+
+def test_statistic_for_retrieves_more_than_one_value():
+    """Show this works for more than a couple statistic values."""
+    aggregator = stats.Statistics()
+    for i in range(50):
+        aggregator.record(make_error(code='E1{:02d}'.format(i)))
+        aggregator.record(make_error(code='W2{:02d}'.format(i)))
+
+    statistics = list(aggregator.statistics_for('E'))
+    assert len(statistics) == 50
+
+    statistics = list(aggregator.statistics_for('W22'))
+    assert len(statistics) == 10