Skip to content

Commit

Permalink
Merge branch 'add-statistics' into 'master'
Browse files Browse the repository at this point in the history
Add the statistics module

*Description of changes*

Start adding support for `--statistics` and legacy `get_statistics` API.

*Related to:*  (Add bug number here)

See merge request !73
  • Loading branch information
sigmavirus24 committed Jul 13, 2016
2 parents 58e6763 + 2ffcf96 commit 4d6929c
Show file tree
Hide file tree
Showing 4 changed files with 248 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/flake8/api/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ def __init__(self, application):
.. warning:: This should not be instantiated by users.
"""
self._application = application
self._style_guide = application.guide
self._stats = self._style_guide.stats

@property
def total_errors(self):
Expand All @@ -149,4 +151,7 @@ def total_errors(self):

def get_statistics(self, violation):
"""Get the number of occurences of a violation."""
raise NotImplementedError('Statistics capturing needs to happen first')
return [
'{} {} {}'.format(s.count, s.error_code, s.message)
for s in self._stats.statistics_for(violation)
]
118 changes: 118 additions & 0 deletions src/flake8/statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Statistic collection logic for Flake8."""
import collections


class Statistics(object):
"""Manager of aggregated statistics for a run of Flake8."""

def __init__(self):
"""Initialize the underlying dictionary for our statistics."""
self._store = {}

def record(self, error):
"""Add the fact that the error was seen in the file.
:param error:
The Error instance containing the information about the violation.
:type error:
flake8.style_guide.Error
"""
key = Key.create_from(error)
if key not in self._store:
self._store[key] = Statistic.create_from(error)
self._store[key].increment()

def statistics_for(self, prefix, filename=None):
"""Generate statistics for the prefix and filename.
If you have a :class:`Statistics` object that has recorded errors,
you can generate the statistics for a prefix (e.g., ``E``, ``E1``,
``W50``, ``W503``) with the optional filter of a filename as well.
.. code-block:: python
>>> stats = Statistics()
>>> stats.statistics_for('E12',
filename='src/flake8/statistics.py')
<generator ...>
>>> stats.statistics_for('W')
<generator ...>
:param str prefix:
The error class or specific error code to find statistics for.
:param str filename:
(Optional) The filename to further filter results by.
:returns:
Generator of instances of :class:`Statistic`
"""
matching_errors = sorted(key for key in self._store.keys()
if key.matches(prefix, filename))
for error_code in matching_errors:
yield self._store[error_code]


class Key(collections.namedtuple('Key', ['filename', 'code'])):
"""Simple key structure for the Statistics dictionary.
To make things clearer, easier to read, and more understandable, we use a
namedtuple here for all Keys in the underlying dictionary for the
Statistics object.
"""

__slots__ = ()

@classmethod
def create_from(cls, error):
"""Create a Key from :class:`flake8.style_guide.Error`."""
return cls(
filename=error.filename,
code=error.code,
)

def matches(self, prefix, filename):
"""Determine if this key matches some constraints.
:param str prefix:
The error code prefix that this key's error code should start with.
:param str filename:
The filename that we potentially want to match on. This can be
None to only match on error prefix.
:returns:
True if the Key's code starts with the prefix and either filename
is None, or the Key's filename matches the value passed in.
:rtype:
bool
"""
return (self.code.startswith(prefix) and
(filename is None or
self.filename == filename))


class Statistic(object):
"""Simple wrapper around the logic of each statistic.
Instead of maintaining a simple but potentially hard to reason about
tuple, we create a namedtuple which has attributes and a couple
convenience methods on it.
"""

def __init__(self, error_code, filename, message, count):
"""Initialize our Statistic."""
self.error_code = error_code
self.filename = filename
self.message = message
self.count = count

@classmethod
def create_from(cls, error):
"""Create a Statistic from a :class:`flake8.style_guide.Error`."""
return cls(
error_code=error.code,
filename=error.filename,
message=error.text,
count=0,
)

def increment(self):
"""Increment the number of times we've seen this error in this file."""
self.count += 1
3 changes: 3 additions & 0 deletions src/flake8/style_guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import re

from flake8 import statistics
from flake8 import utils

__all__ = (
Expand Down Expand Up @@ -74,6 +75,7 @@ def __init__(self, options, listener_trie, formatter):
self.options = options
self.listener = listener_trie
self.formatter = formatter
self.stats = statistics.Statistics()
self._selected = tuple(options.select)
self._ignored = tuple(options.ignore)
self._decision_cache = {}
Expand Down Expand Up @@ -267,6 +269,7 @@ def handle_error(self, code, filename, line_number, column_number, text,
if (error_is_selected and is_not_inline_ignored and
is_included_in_diff):
self.formatter.handle(error)
self.stats.record(error)
self.listener.notify(error.code, error)
return 1
return 0
Expand Down
121 changes: 121 additions & 0 deletions tests/unit/test_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""Tests for the statistics module in Flake8."""
import pytest

from flake8 import statistics as stats
from flake8 import style_guide

DEFAULT_ERROR_CODE = 'E100'
DEFAULT_FILENAME = 'file.py'
DEFAULT_TEXT = 'Default text'


def make_error(**kwargs):
"""Create errors with a bunch of default values."""
return style_guide.Error(
code=kwargs.pop('code', DEFAULT_ERROR_CODE),
filename=kwargs.pop('filename', DEFAULT_FILENAME),
line_number=kwargs.pop('line_number', 1),
column_number=kwargs.pop('column_number', 1),
text=kwargs.pop('text', DEFAULT_TEXT),
physical_line=None,
)


def test_key_creation():
"""Verify how we create Keys from Errors."""
key = stats.Key.create_from(make_error())
assert key == (DEFAULT_FILENAME, DEFAULT_ERROR_CODE)
assert key.filename == DEFAULT_FILENAME
assert key.code == DEFAULT_ERROR_CODE


@pytest.mark.parametrize('code, filename, args, expected_result', [
# Error prefix matches
('E123', 'file000.py', ('E', None), True),
('E123', 'file000.py', ('E1', None), True),
('E123', 'file000.py', ('E12', None), True),
('E123', 'file000.py', ('E123', None), True),
# Error prefix and filename match
('E123', 'file000.py', ('E', 'file000.py'), True),
('E123', 'file000.py', ('E1', 'file000.py'), True),
('E123', 'file000.py', ('E12', 'file000.py'), True),
('E123', 'file000.py', ('E123', 'file000.py'), True),
# Error prefix does not match
('E123', 'file000.py', ('W', None), False),
# Error prefix matches but filename does not
('E123', 'file000.py', ('E', 'file001.py'), False),
# Error prefix does not match but filename does
('E123', 'file000.py', ('W', 'file000.py'), False),
# Neither error prefix match nor filename
('E123', 'file000.py', ('W', 'file001.py'), False),
])
def test_key_matching(code, filename, args, expected_result):
"""Verify Key#matches behaves as we expect with fthe above input."""
key = stats.Key.create_from(make_error(code=code, filename=filename))
assert key.matches(*args) is expected_result


def test_statistic_creation():
"""Verify how we create Statistic objects from Errors."""
stat = stats.Statistic.create_from(make_error())
assert stat.error_code == DEFAULT_ERROR_CODE
assert stat.message == DEFAULT_TEXT
assert stat.filename == DEFAULT_FILENAME
assert stat.count == 0


def test_statistic_increment():
"""Verify we update the count."""
stat = stats.Statistic.create_from(make_error())
assert stat.count == 0
stat.increment()
assert stat.count == 1


def test_recording_statistics():
"""Verify that we appropriately create a new Statistic and store it."""
aggregator = stats.Statistics()
assert list(aggregator.statistics_for('E')) == []
aggregator.record(make_error())
storage = aggregator._store
for key, value in storage.items():
assert isinstance(key, stats.Key)
assert isinstance(value, stats.Statistic)

assert storage[(DEFAULT_FILENAME, DEFAULT_ERROR_CODE)].count == 1


def test_statistics_for_single_record():
"""Show we can retrieve the only statistic recorded."""
aggregator = stats.Statistics()
assert list(aggregator.statistics_for('E')) == []
aggregator.record(make_error())
statistics = list(aggregator.statistics_for('E'))
assert len(statistics) == 1
assert isinstance(statistics[0], stats.Statistic)


def test_statistics_for_filters_by_filename():
"""Show we can retrieve the only statistic recorded."""
aggregator = stats.Statistics()
assert list(aggregator.statistics_for('E')) == []
aggregator.record(make_error())
aggregator.record(make_error(filename='example.py'))

statistics = list(aggregator.statistics_for('E', DEFAULT_FILENAME))
assert len(statistics) == 1
assert isinstance(statistics[0], stats.Statistic)


def test_statistic_for_retrieves_more_than_one_value():
"""Show this works for more than a couple statistic values."""
aggregator = stats.Statistics()
for i in range(50):
aggregator.record(make_error(code='E1{:02d}'.format(i)))
aggregator.record(make_error(code='W2{:02d}'.format(i)))

statistics = list(aggregator.statistics_for('E'))
assert len(statistics) == 50

statistics = list(aggregator.statistics_for('W22'))
assert len(statistics) == 10

0 comments on commit 4d6929c

Please sign in to comment.