diff --git a/README.md b/README.md index 440d909..2a028a1 100644 --- a/README.md +++ b/README.md @@ -20,14 +20,15 @@ pip install flake8-koles ## Usage ``` -flake8 --ignore-shorties 4 --censor-msg --lang=english,polish +flake8 --ignore-shorties 4 --censor-msg --lang=english,polish --ignore-swears=very,bad,words ``` ##### Options |OPTION | DEFAULT|DESCRIPTION | | -------- |---|-------------| -|`--ignore-shorties`|0 |ignore bad words shorter or equal to the argument| -|`--censor-msg`|False |replace bad words not leading letters with `*` in error messages| -|`--lang`|english |use bad words from the selected languages| +|`--censor-msg`|False |replace swears not leading letters with `*` in error messages| +|`--ignore-shorties`|0 |ignore swears shorter or equal to the argument| +|`--ignore-swears`| |explicitly pass swears to ignore| +|`--lang`|english |use swears from the selected languages| ## Development notes diff --git a/flake8_koles/__about__.py b/flake8_koles/__about__.py index 0eaa349..73e15f1 100644 --- a/flake8_koles/__about__.py +++ b/flake8_koles/__about__.py @@ -1,8 +1,7 @@ """Package information module.""" - __title__ = 'flake8-koles' __description__ = 'Watch your language young lad! Swears and curses linter.' -__version__ = 'v0.1.2' +__version__ = '0.1.2' __author__ = 'myslak71' __author_email__ = 'myslak@protonmail.com' __url__ = 'https://github.com/myslak71/flake8-koles' diff --git a/flake8_koles/checker.py b/flake8_koles/checker.py index 7ac107a..86c5fec 100644 --- a/flake8_koles/checker.py +++ b/flake8_koles/checker.py @@ -3,7 +3,8 @@ import optparse import os import re -from typing import Generator, List, Set, Tuple +from itertools import chain, permutations +from typing import Generator, List, Tuple import pkg_resources from flake8.options.manager import OptionManager @@ -24,7 +25,7 @@ class KolesChecker: def __init__(self, tree: ast.Module, filename: str) -> None: """Initialize class values. Parameter `tree` is required by flake8.""" self.filename = filename - self._pattern = '|'.join(self._get_bad_words()) + self._pattern = self._get_pattern() def run(self) -> Generator[Tuple[int, int, str, type], None, None]: """Run the linter and return a generator of errors.""" @@ -36,23 +37,23 @@ def run(self) -> Generator[Tuple[int, int, str, type], None, None]: def add_options(cls, parser: OptionManager) -> None: """Add koles linter options to the flake8 parser.""" parser.add_option( - '--ignore-shorties', - default=0, - type='int', - parse_from_config=True + '--censor-msg', default=0, parse_from_config=True, action='store_true' ) parser.add_option( - '--censor-msg', - default=0, + '--ignore-shorties', default=0, type='int', parse_from_config=True + ) + parser.add_option( + '--ignore-swears', + default='', parse_from_config=True, - action='store_true' + comma_separated_list=True, ) parser.add_option( '--lang', default='english', parse_from_config=True, comma_separated_list=True, - choices=cls._get_lang_choices() + choices=cls._get_lang_choices(), ) @classmethod @@ -60,27 +61,32 @@ def parse_options(cls, options: optparse.Values) -> None: """Get parser options from flake8.""" cls.options = options - def _get_bad_words(self) -> Set[str]: + def _get_pattern(self) -> str: """Get a set of bad words.""" data = self._get_swears_data() - - return { + swear_set = { word for word in data.decode().strip().split('\n') - if len(word) > self.options.ignore_shorties + if not self._is_swear_ignored(word) } + return '|'.join(swear_set) def _get_swears_data(self) -> bytes: """Get swears data from languages present in the options.""" data = b'' for lang in self.options.lang: file_path = f'{self.SWEAR_DATA_DIR}/{lang}.dat' - data += pkg_resources.resource_string( - __name__, file_path - ) + data += pkg_resources.resource_string(__name__, file_path) return data + def _is_swear_ignored(self, word: str) -> bool: + """Check if word is ignored in options.""" + is_short = len(word) <= self.options.ignore_shorties + is_ignored = word in self.options.ignore_swears + + return is_short or is_ignored + def _get_file_content(self) -> List[str]: """Return file content as a list of lines.""" if self.filename in ('stdin', '-', None): @@ -125,10 +131,7 @@ def _check_row(self, string: str) -> List[Tuple[int, str]]: regex = re.compile(f'(?=({self._pattern}))', flags=re.IGNORECASE) - return [ - (match.start(), match.group(1)) - for match in regex.finditer(string) - ] + return [(match.start(), match.group(1)) for match in regex.finditer(string)] def _censor_word(self, word: str) -> str: """Replace all letters but first with `*` if censor_msg option is True.""" @@ -138,9 +141,18 @@ def _censor_word(self, word: str) -> str: @classmethod def _get_lang_choices(cls) -> List[str]: - """Get language choices by removing .dat from language filenames.""" - return [ + """ + Get language choices. + + Remove.dat from language filenames and generate all language combinations. + """ + languages = [ lang_file.replace('.dat', '') - for lang_file in - pkg_resources.resource_listdir(__name__, cls.SWEAR_DATA_DIR) + for lang_file in pkg_resources.resource_listdir(__name__, cls.SWEAR_DATA_DIR) ] + lang_permutations = [ + permutations(languages, number) + for number, language in enumerate(languages, 1) + ] + + return [','.join(permutation) for permutation in chain(*lang_permutations)] diff --git a/tests/test_checker.py b/tests/test_checker.py index ae5c1cc..2794f7f 100644 --- a/tests/test_checker.py +++ b/tests/test_checker.py @@ -9,13 +9,10 @@ @mock.patch('flake8_koles.checker.readlines') -@mock.patch('flake8_koles.checker.KolesChecker._get_bad_words') -def test_run( - mock_get_bad_words, - mock_readlines -): +@mock.patch('flake8_koles.checker.KolesChecker._get_pattern') +def test_run(mock_get_pattern, mock_readlines): """Test that flake interface returns appropriate error messages.""" - mock_get_bad_words.return_value = ['very', 'bad', 'words'] + mock_get_pattern.return_value = 'very|bad|words' mock_readlines.return_value = ['Test very', 'nice', 'and bad words'] koles_checker = KolesChecker(tree='test_tree', filename='test_filename') koles_checker.options = Mock(censor_msg=True) @@ -24,34 +21,40 @@ def test_run( assert result == [ (1, 5, 'KOL001 Bad language found: v***', KolesChecker), (3, 4, 'KOL001 Bad language found: b**', KolesChecker), - (3, 8, 'KOL001 Bad language found: w****', KolesChecker)] + (3, 8, 'KOL001 Bad language found: w****', KolesChecker), + ] def test_add_options(koles_checker): """Test that all options are added to the parser.""" option_manager = OptionManager() koles_checker.add_options(option_manager) - assert repr(option_manager.options) == repr( [ + Option( + long_option_name='--censor-msg', + default=0, + parse_from_config=True, + action='store_true', + ), Option( long_option_name='--ignore-shorties', default=0, type='int', - parse_from_config=True + parse_from_config=True, ), Option( - long_option_name='--censor-msg', - default=0, + long_option_name='--ignore-swears', + default='', parse_from_config=True, - action='store_true' + comma_separated_list=True, ), Option( long_option_name='--lang', default='english', parse_from_config=True, - comma_separated_list=True - ) + comma_separated_list=True, + ), ] ) @@ -65,24 +68,29 @@ def test_parse_options(koles_checker): @pytest.mark.parametrize( - "ignore_shorties, expected_result, get_swears_data_value", + "ignore_shorties, ignore_swears, expected_result, get_swears_data_value", ( - (0, {'Mike D', 'MCA', 'Ad-Rock'}, b'Mike D\nMCA\nAd-Rock\n'), - (1, {'Mike D', 'MCA', 'Ad-Rock'}, b'Mike D\nMCA\nAd-Rock\n'), - (3, {'Mike D', 'Ad-Rock'}, b'Mike D\nMCA\nAd-Rock\n'), - (6, {'Ad-Rock', }, b'Mike D\nMCA\nAd-Rock\n'), - (69, set(), b'Mike D\nMCA\nAd-Rock\n'), - # Case: repeated swears, distinct ones returned - (0, {'Mike D', 'MCA', 'Ad-Rock'}, b'Mike D\nMCA\nAd-Rock\nMike D\nMCA\n'), + (0, [], ['Mike D', 'MCA', 'Ad-Rock'], b'Mike D\nMCA\nAd-Rock\n'), + (1, [], ['Mike D', 'MCA', 'Ad-Rock'], b'Mike D\nMCA\nAd-Rock\n'), + (3, [], ['Mike D', 'Ad-Rock'], b'Mike D\nMCA\nAd-Rock\n'), + (6, [], ['Ad-Rock'], b'Mike D\nMCA\nAd-Rock\n'), + (69, [], [], b'Mike D\nMCA\nAd-Rock\n'), + # Case: repeated swears, distinct ones returned + (0, [], ['Mike D', 'MCA', 'Ad-Rock'], b'Mike D\nMCA\nAd-Rock\nMike D\nMCA\n'), + # Case: ignore word is present + (0, ['MCA'], ['Mike D', 'Ad-Rock'], b'Mike D\nMCA\nAd-Rock\n'), + # Case: all swears are ignored + (0, ['Ad-Rock', 'MCA', 'Mike D'], [], b'Mike D\nMCA\nAd-Rock\n'), ), ) @mock.patch('flake8_koles.checker.KolesChecker._get_swears_data') -def test_get_bad_words( - mock_get_swears_data, - ignore_shorties, - expected_result, - get_swears_data_value, - koles_checker +def test_get_pattern( + mock_get_swears_data, + ignore_shorties, + ignore_swears, + expected_result, + get_swears_data_value, + koles_checker, ): """ Test _get_bad_words. @@ -91,36 +99,31 @@ def test_get_bad_words( depending on the ignore-shorties option. """ mock_get_swears_data.return_value = get_swears_data_value - koles_checker.options = Mock(ignore_shorties=ignore_shorties) - result = koles_checker._get_bad_words() + koles_checker.options = Mock( + ignore_shorties=ignore_shorties, ignore_swears=ignore_swears + ) + result = koles_checker._get_pattern() - assert result == expected_result + assert result.count('|') == max(len(expected_result) - 1, 0) + assert all((word in result for word in expected_result)) @pytest.mark.parametrize( "lang, resource_string_value, expected_result", ( - # Case 1: One language - ( - ['beastie_boys_lang'], - [b'Mike D\nMCA\nAd-Rock\n'], - b'Mike D\nMCA\nAd-Rock\n' - ), - # Case 2: Two languages - ( - ['beastie_boys_lang', 'snoop_dogg_lang'], - [b'Mike D\nMCA\nAd-Rock\n', b'Snoop Dogg\nDr. Dre\n'], - b'Mike D\nMCA\nAd-Rock\nSnoop Dogg\nDr. Dre\n' - ), + # Case 1: One language + (['beastie_boys_lang'], [b'Mike D\nMCA\nAd-Rock\n'], b'Mike D\nMCA\nAd-Rock\n'), + # Case 2: Two languages + ( + ['beastie_boys_lang', 'snoop_dogg_lang'], + [b'Mike D\nMCA\nAd-Rock\n', b'Snoop Dogg\nDr. Dre\n'], + b'Mike D\nMCA\nAd-Rock\nSnoop Dogg\nDr. Dre\n', + ), ), ) @mock.patch('flake8_koles.checker.pkg_resources.resource_string') def test_get_swears_data( - mock_resource_string, - lang, - resource_string_value, - expected_result, - koles_checker + mock_resource_string, lang, resource_string_value, expected_result, koles_checker ): """ Test that _get_swear_data merges swears from languages given in the options. @@ -132,20 +135,9 @@ def test_get_swears_data( assert result == expected_result -@pytest.mark.parametrize( - 'filename,', - ( - 'stdin', - '-', - None, - ), -) +@pytest.mark.parametrize('filename,', ('stdin', '-', None)) @mock.patch('flake8_koles.checker.stdin_get_value') -def test_get_file_content_stdin( - mock_stdin_get_value, - filename, - koles_checker -): +def test_get_file_content_stdin(mock_stdin_get_value, filename, koles_checker): """Test that flake8 stdin_get_value is called for appropriate filename.""" koles_checker.filename = filename koles_checker._get_file_content() @@ -154,9 +146,7 @@ def test_get_file_content_stdin( @mock.patch('flake8_koles.checker.readlines') -def test_get_file_content_regular_filename( - mock_readlines, koles_checker -): +def test_get_file_content_regular_filename(mock_readlines, koles_checker): """Test that flake8 pycodestyle readlines is called for appropriate filename.""" koles_checker.filename = 'test_filename' koles_checker._get_file_content() @@ -167,45 +157,34 @@ def test_get_file_content_regular_filename( @pytest.mark.parametrize( 'filename, check_row_value, censor_word_value, expected_result', ( - ( - 'test_filename', [], [], [] - ), - ( - 'ugly_name', - [(0, 'ugly')], - ['ugly', ], - [(0, 0, 'KOL002 Filename contains bad language: ugly', KolesChecker)] - ), - ( - 'bad_ugly_name', - [(0, 'bad'), (4, 'ugly')], - ['bad', 'ugly'], - [ - ( - 0, 0, - 'KOL002 Filename contains bad language: bad', - KolesChecker - ), - ( - 0, 4, - 'KOL002 Filename contains bad language: ugly', - KolesChecker - ) - ] - ) - + ('test_filename', [], [], []), + ( + 'ugly_name', + [(0, 'ugly')], + ['ugly'], + [(0, 0, 'KOL002 Filename contains bad language: ugly', KolesChecker)], + ), + ( + 'bad_ugly_name', + [(0, 'bad'), (4, 'ugly')], + ['bad', 'ugly'], + [ + (0, 0, 'KOL002 Filename contains bad language: bad', KolesChecker), + (0, 4, 'KOL002 Filename contains bad language: ugly', KolesChecker), + ], + ), ), ) @mock.patch('flake8_koles.checker.KolesChecker._check_row') @mock.patch('flake8_koles.checker.KolesChecker._censor_word') def test_get_filename_errors( - mock_censor_word, - mock_check_row, - filename, - check_row_value, - censor_word_value, - expected_result, - koles_checker + mock_censor_word, + mock_check_row, + filename, + check_row_value, + censor_word_value, + expected_result, + koles_checker, ): """Test that appropriate error messages are returned.""" mock_check_row.return_value = check_row_value @@ -219,37 +198,34 @@ def test_get_filename_errors( @pytest.mark.parametrize( 'content, check_row_value, censor_word_value, expected_result', ( - ( - ['nice_content'], [[]], [], [] - ), - ( - ['ugly_content'], - [[(0, 'ugly')]], - ['ugly', ], - [(1, 0, 'KOL001 Bad language found: ugly', KolesChecker)] - ), - ( - ['ugly_content', 'very_bad_content'], - [[(0, 'ugly')], [(5, 'bad')]], - ['ugly', 'bad'], - [ - (1, 0, 'KOL001 Bad language found: ugly', KolesChecker), - (2, 5, 'KOL001 Bad language found: bad', KolesChecker) - ] - ), - + (['nice_content'], [[]], [], []), + ( + ['ugly_content'], + [[(0, 'ugly')]], + ['ugly'], + [(1, 0, 'KOL001 Bad language found: ugly', KolesChecker)], + ), + ( + ['ugly_content', 'very_bad_content'], + [[(0, 'ugly')], [(5, 'bad')]], + ['ugly', 'bad'], + [ + (1, 0, 'KOL001 Bad language found: ugly', KolesChecker), + (2, 5, 'KOL001 Bad language found: bad', KolesChecker), + ], + ), ), ) @mock.patch('flake8_koles.checker.KolesChecker._check_row') @mock.patch('flake8_koles.checker.KolesChecker._censor_word') def test_get_content_errors( - mock_censor_word, - mock_check_row, - content, - check_row_value, - censor_word_value, - expected_result, - koles_checker + mock_censor_word, + mock_check_row, + content, + check_row_value, + censor_word_value, + expected_result, + koles_checker, ): """Test that appropriate error messages are returned.""" mock_check_row.side_effect = check_row_value @@ -262,26 +238,21 @@ def test_get_content_errors( @pytest.mark.parametrize( 'pattern, string, expected_result', ( - # Case 1: Multiple overlapping patterns - ('abcd|ab|abc|cd', 'abcdab', [(0, 'abcd'), (2, 'cd'), (4, 'ab')]), - # Case 2: Single non-overlapping pattern - ('ab', 'abcdab', [(0, 'ab'), (4, 'ab')]), - # Case 3: Empty string - ('(?=(ab))', '', []), - # Case 4: Empty pattern - ('', 'abcdab', []), - # Case 6: Empty string and pattern - ('', '', []), - # Case 7: Uppercase string - ('abcd|ab|abc|cd', 'ABCDAB', [(0, 'ABCD'), (2, 'CD'), (4, 'AB')]), + # Case 1: Multiple overlapping patterns + ('abcd|ab|abc|cd', 'abcdab', [(0, 'abcd'), (2, 'cd'), (4, 'ab')]), + # Case 2: Single non-overlapping pattern + ('ab', 'abcdab', [(0, 'ab'), (4, 'ab')]), + # Case 3: Empty string + ('(?=(ab))', '', []), + # Case 4: Empty pattern + ('', 'abcdab', []), + # Case 6: Empty string and pattern + ('', '', []), + # Case 7: Uppercase string + ('abcd|ab|abc|cd', 'ABCDAB', [(0, 'ABCD'), (2, 'CD'), (4, 'AB')]), ), ) -def test_check_row( - pattern, - string, - expected_result, - koles_checker -): +def test_check_row(pattern, string, expected_result, koles_checker): """Test that check_string returns appropriate value for given pattern and string.""" koles_checker._pattern = pattern result = koles_checker._check_row(string) @@ -292,20 +263,15 @@ def test_check_row( @pytest.mark.parametrize( 'word, censor_msg, expected_result', ( - ('Mike D', True, 'M*****'), - ('Mike D', False, 'Mike D'), - ('MCA', True, 'M**'), - ('MCA', False, 'MCA'), - ('Ad-Rock', True, 'A******'), - ('Ad-Rock', False, 'Ad-Rock'), + ('Mike D', True, 'M*****'), + ('Mike D', False, 'Mike D'), + ('MCA', True, 'M**'), + ('MCA', False, 'MCA'), + ('Ad-Rock', True, 'A******'), + ('Ad-Rock', False, 'Ad-Rock'), ), ) -def test_censor_word( - word, - censor_msg, - expected_result, - koles_checker -): +def test_censor_word(word, censor_msg, expected_result, koles_checker): """Test censor_word. Test that the function returns proper set of bad words @@ -320,18 +286,17 @@ def test_censor_word( @pytest.mark.parametrize( 'listdir_value, expected_result', ( - # Case: empty language dir - ([], []), - (['english.dat'], ['english']), - (['english.dat', 'polish.dat'], ['english', 'polish']), + # Case: empty language dir + ([], []), + (['english.dat'], ['english']), + ( + ['english.dat', 'polish.dat'], + ['english', 'polish', 'english,polish', 'polish,english'], + ), ), ) @mock.patch('flake8_koles.checker.pkg_resources.resource_listdir') -def test_get_lang_choices( - listdir_mock, - listdir_value, - expected_result, - koles_checker): +def test_get_lang_choices(listdir_mock, listdir_value, expected_result, koles_checker): """Test that proper language names are fetched from the resources.""" listdir_mock.return_value = listdir_value