From ba6f4688e847179b23644acb90568d56674ddb2d Mon Sep 17 00:00:00 2001 From: Patrick Mazulo Date: Thu, 13 Oct 2022 15:50:59 -0300 Subject: [PATCH 1/6] Rename assets dir name --- setup.py | 2 +- .../{assets => json_sources}/wikipedia.json | 0 src/misspelling_lib/misspelling_detector.py | 2 +- tests/assets/good_file_list | 1 - tests/assets/various_spellings.good_out | 21 --------------- .../{assets => test_assets}/broken_file_list | 0 tests/{assets => test_assets}/broken_msl.json | 0 tests/{assets => test_assets}/broken_msl.txt | 0 tests/test_assets/good_file_list | 1 + .../nine_misspellings.c | 0 .../nine_misspellings.json | 0 tests/{assets => test_assets}/small_msl.txt | 0 .../various_spellings.c | 0 tests/test_assets/various_spellings.good_out | 21 +++++++++++++++ tests/test_class.py | 10 +++---- tests/test_cli.py | 26 +++++++++---------- 16 files changed, 42 insertions(+), 42 deletions(-) rename src/misspelling_lib/{assets => json_sources}/wikipedia.json (100%) delete mode 100644 tests/assets/good_file_list delete mode 100644 tests/assets/various_spellings.good_out rename tests/{assets => test_assets}/broken_file_list (100%) rename tests/{assets => test_assets}/broken_msl.json (100%) rename tests/{assets => test_assets}/broken_msl.txt (100%) create mode 100644 tests/test_assets/good_file_list rename tests/{assets => test_assets}/nine_misspellings.c (100%) rename tests/{assets => test_assets}/nine_misspellings.json (100%) rename tests/{assets => test_assets}/small_msl.txt (100%) rename tests/{assets => test_assets}/various_spellings.c (100%) create mode 100644 tests/test_assets/various_spellings.good_out diff --git a/setup.py b/setup.py index 9144781..84b5d24 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ def run(self): long_description=long_description, long_description_content_type="text/markdown", package_dir={"": "src"}, - package_data={"misspelling_lib": ["assets/wikipedia.json"]}, + package_data={"misspelling_lib": ["json_sources/wikipedia.json"]}, entry_points={"console_scripts": ["misspellings = misspelling_lib.misspellings:main"]}, install_requires=["typed-argument-parser==1.7.2", "rich==12.6.0"], keywords="check, code, spelling, spellcheck", diff --git a/src/misspelling_lib/assets/wikipedia.json b/src/misspelling_lib/json_sources/wikipedia.json similarity index 100% rename from src/misspelling_lib/assets/wikipedia.json rename to src/misspelling_lib/json_sources/wikipedia.json diff --git a/src/misspelling_lib/misspelling_detector.py b/src/misspelling_lib/misspelling_detector.py index e98fc79..d3a65d4 100644 --- a/src/misspelling_lib/misspelling_detector.py +++ b/src/misspelling_lib/misspelling_detector.py @@ -23,7 +23,7 @@ def __init__(self) -> None: @staticmethod def _get_default_json_files() -> List[pathlib.Path]: - assets_dir = pathlib.Path(__file__).parents[0] / "assets" + assets_dir = pathlib.Path(__file__).parents[0] / "json_sources" file_paths = [ assets_dir.joinpath(file) for file in os.listdir(assets_dir.as_posix()) diff --git a/tests/assets/good_file_list b/tests/assets/good_file_list deleted file mode 100644 index d4af0c6..0000000 --- a/tests/assets/good_file_list +++ /dev/null @@ -1 +0,0 @@ -assets/nine_misspellings.c diff --git a/tests/assets/various_spellings.good_out b/tests/assets/various_spellings.good_out deleted file mode 100644 index 83d62eb..0000000 --- a/tests/assets/various_spellings.good_out +++ /dev/null @@ -1,21 +0,0 @@ -cp "assets/various_spellings.c" "assets/various_spellings.c," -sed "1s/Yuo/You/" "assets/various_spellings.c" > "assets/various_spellings.c," -mv "assets/various_spellings.c," "assets/various_spellings.c" -cp "assets/various_spellings.c" "assets/various_spellings.c," -sed "2s/Zeebra/Zebra/" "assets/various_spellings.c" > "assets/various_spellings.c," -mv "assets/various_spellings.c," "assets/various_spellings.c" -cp "assets/various_spellings.c" "assets/various_spellings.c," -sed "4s/teh/the/" "assets/various_spellings.c" > "assets/various_spellings.c," -mv "assets/various_spellings.c," "assets/various_spellings.c" -cp "assets/various_spellings.c" "assets/various_spellings.c," -sed "4s/zeebra/zebra/" "assets/various_spellings.c" > "assets/various_spellings.c," -mv "assets/various_spellings.c," "assets/various_spellings.c" -cp "assets/various_spellings.c" "assets/various_spellings.c," -sed "4s/Rockerfeller/Rockefeller/" "assets/various_spellings.c" > "assets/various_spellings.c," -mv "assets/various_spellings.c," "assets/various_spellings.c" -cp "assets/various_spellings.c" "assets/various_spellings.c," -sed "6s/withdrawl/withdraw/" "assets/various_spellings.c" > "assets/various_spellings.c," -mv "assets/various_spellings.c," "assets/various_spellings.c" -cp "assets/various_spellings.c" "assets/various_spellings.c," -sed "7s/gardai/gardaĆ­/" "assets/various_spellings.c" > "assets/various_spellings.c," -mv "assets/various_spellings.c," "assets/various_spellings.c" diff --git a/tests/assets/broken_file_list b/tests/test_assets/broken_file_list similarity index 100% rename from tests/assets/broken_file_list rename to tests/test_assets/broken_file_list diff --git a/tests/assets/broken_msl.json b/tests/test_assets/broken_msl.json similarity index 100% rename from tests/assets/broken_msl.json rename to tests/test_assets/broken_msl.json diff --git a/tests/assets/broken_msl.txt b/tests/test_assets/broken_msl.txt similarity index 100% rename from tests/assets/broken_msl.txt rename to tests/test_assets/broken_msl.txt diff --git a/tests/test_assets/good_file_list b/tests/test_assets/good_file_list new file mode 100644 index 0000000..e51aff2 --- /dev/null +++ b/tests/test_assets/good_file_list @@ -0,0 +1 @@ +test_assets/nine_misspellings.c diff --git a/tests/assets/nine_misspellings.c b/tests/test_assets/nine_misspellings.c similarity index 100% rename from tests/assets/nine_misspellings.c rename to tests/test_assets/nine_misspellings.c diff --git a/tests/assets/nine_misspellings.json b/tests/test_assets/nine_misspellings.json similarity index 100% rename from tests/assets/nine_misspellings.json rename to tests/test_assets/nine_misspellings.json diff --git a/tests/assets/small_msl.txt b/tests/test_assets/small_msl.txt similarity index 100% rename from tests/assets/small_msl.txt rename to tests/test_assets/small_msl.txt diff --git a/tests/assets/various_spellings.c b/tests/test_assets/various_spellings.c similarity index 100% rename from tests/assets/various_spellings.c rename to tests/test_assets/various_spellings.c diff --git a/tests/test_assets/various_spellings.good_out b/tests/test_assets/various_spellings.good_out new file mode 100644 index 0000000..c8f49db --- /dev/null +++ b/tests/test_assets/various_spellings.good_out @@ -0,0 +1,21 @@ +cp "test_assets/various_spellings.c" "test_assets/various_spellings.c," +sed "1s/Yuo/You/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c," +mv "test_assets/various_spellings.c," "test_assets/various_spellings.c" +cp "test_assets/various_spellings.c" "test_assets/various_spellings.c," +sed "2s/Zeebra/Zebra/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c," +mv "test_assets/various_spellings.c," "test_assets/various_spellings.c" +cp "test_assets/various_spellings.c" "test_assets/various_spellings.c," +sed "4s/teh/the/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c," +mv "test_assets/various_spellings.c," "test_assets/various_spellings.c" +cp "test_assets/various_spellings.c" "test_assets/various_spellings.c," +sed "4s/zeebra/zebra/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c," +mv "test_assets/various_spellings.c," "test_assets/various_spellings.c" +cp "test_assets/various_spellings.c" "test_assets/various_spellings.c," +sed "4s/Rockerfeller/Rockefeller/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c," +mv "test_assets/various_spellings.c," "test_assets/various_spellings.c" +cp "test_assets/various_spellings.c" "test_assets/various_spellings.c," +sed "6s/withdrawl/withdraw/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c," +mv "test_assets/various_spellings.c," "test_assets/various_spellings.c" +cp "test_assets/various_spellings.c" "test_assets/various_spellings.c," +sed "7s/gardai/gardaĆ­/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c," +mv "test_assets/various_spellings.c," "test_assets/various_spellings.c" diff --git a/tests/test_class.py b/tests/test_class.py index 2bf180f..e93ff00 100755 --- a/tests/test_class.py +++ b/tests/test_class.py @@ -16,7 +16,7 @@ def test_missing_ms_list(self): def test_broken_ms_list(self): with pytest.raises(ValueError): - MisspellingFileDetector(os.path.join(BASE_PATH, "assets/broken_msl.txt")) + MisspellingFileDetector(os.path.join(BASE_PATH, "test_assets/broken_msl.txt")) def test_missing_ms_list_for_json_detector(self): with pytest.raises(IOError): @@ -24,22 +24,22 @@ def test_missing_ms_list_for_json_detector(self): def test_broken_ms_list_for_json_detector(self): with pytest.raises(ValueError): - MisspellingJSONDetector(os.path.join(BASE_PATH, "assets/broken_msl.json")) + MisspellingJSONDetector(os.path.join(BASE_PATH, "test_assets/broken_msl.json")) def test_missing_file(self): ms = MisspellingDetector() - errors, _ = ms.check(BASE_PATH / "assets/missing_source.c") + errors, _ = ms.check(BASE_PATH / "test_assets/missing_source.c") assert errors def test_good_file(self): ms = MisspellingDetector() - errors, results = ms.check(BASE_PATH / "assets/nine_misspellings.json") + errors, results = ms.check(BASE_PATH / "test_assets/nine_misspellings.json") assert len(errors) == 0 assert len(results) == 9 def test_more_complex_file(self): ms = MisspellingDetector() - errors, results = ms.check(BASE_PATH / "assets/various_spellings.c") + errors, results = ms.check(BASE_PATH / "test_assets/various_spellings.c") assert len(errors) == 0 assert len(results) == 7 diff --git a/tests/test_cli.py b/tests/test_cli.py index d793071..983dab0 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -20,7 +20,7 @@ class TestCli: def test_good_file(self): p = subprocess.Popen( - [CLI, "assets/nine_misspellings.c"], + [CLI, "test_assets/nine_misspellings.c"], cwd=TEST_BASE_DIR, stderr=subprocess.PIPE, stdout=subprocess.PIPE, @@ -32,7 +32,7 @@ def test_good_file(self): def test_bad_file(self): p = subprocess.Popen( - [CLI, "assets/missing.c"], + [CLI, "test_assets/missing.c"], cwd=TEST_BASE_DIR, stderr=subprocess.PIPE, stdout=subprocess.PIPE, @@ -44,7 +44,7 @@ def test_bad_file(self): def test_good_flag_f(self): p = subprocess.Popen( - [CLI, "--file-list", "assets/good_file_list"], + [CLI, "--file-list", "test_assets/good_file_list"], cwd=TEST_BASE_DIR, stderr=subprocess.PIPE, stdout=subprocess.PIPE, @@ -56,7 +56,7 @@ def test_good_flag_f(self): def test_bad_flag_f(self): p = subprocess.Popen( - [CLI, "--file-list", "assets/broken_file_list"], + [CLI, "--file-list", "test_assets/broken_file_list"], cwd=TEST_BASE_DIR, stderr=subprocess.PIPE, stdout=subprocess.PIPE, @@ -72,7 +72,7 @@ def test_bad_flag_m(self): CLI, "--dump-misspelling", "--misspelling-file", - "assets/broken_msl.txt", + "test_assets/broken_msl.txt", ], cwd=TEST_BASE_DIR, stderr=subprocess.PIPE, @@ -89,7 +89,7 @@ def test_good_flag_m(self): CLI, "--dump-misspelling", "--misspelling-file", - "assets/small_msl.txt", + "test_assets/small_msl.txt", ], cwd=TEST_BASE_DIR, stderr=subprocess.PIPE, @@ -106,7 +106,7 @@ def test_passing_misspelling_json_file(self): CLI, "--dump-misspelling", "--json-file", - "assets/nine_misspellings.json", + "test_assets/nine_misspellings.json", ], cwd=TEST_BASE_DIR, stderr=subprocess.PIPE, @@ -119,7 +119,7 @@ def test_passing_misspelling_json_file(self): def test_bad_flag_s(self): p = subprocess.Popen( - [CLI, "--script-output", "assets/various_spellings.c"], + [CLI, "--script-output", "test_assets/various_spellings.c"], cwd=TEST_BASE_DIR, stderr=subprocess.PIPE, stdout=subprocess.PIPE, @@ -130,16 +130,16 @@ def test_bad_flag_s(self): assert p.returncode == 2 def test_good_flag_s(self): - test_out = TEST_BASE_DIR.joinpath("assets/various_spellings.test_out") - good_out = TEST_BASE_DIR.joinpath("assets/various_spellings.good_out") + test_out = TEST_BASE_DIR.joinpath("test_assets/various_spellings.test_out") + good_out = TEST_BASE_DIR.joinpath("test_assets/various_spellings.good_out") if os.path.exists(test_out): os.unlink(test_out) p = subprocess.Popen( [ CLI, "--script-output", - "assets/various_spellings.test_out", - "assets/various_spellings.c", + "test_assets/various_spellings.test_out", + "test_assets/various_spellings.c", ], cwd=TEST_BASE_DIR, stderr=subprocess.PIPE, @@ -165,7 +165,7 @@ def test_standard_in(self): stdout=subprocess.PIPE, stdin=subprocess.PIPE, ) - (output, error_output) = p.communicate(input="assets/nine_misspellings.c\n".encode("utf8")) + (output, error_output) = p.communicate(input="test_assets/nine_misspellings.c\n".encode("utf8")) assert error_output.decode() == "" assert len(output.decode().split("\n")) == 10 assert p.returncode == 2 From 8f41db59e455c98e58e76a25d6d1944a9983eec0 Mon Sep 17 00:00:00 2001 From: Patrick Mazulo Date: Thu, 13 Oct 2022 15:54:18 -0300 Subject: [PATCH 2/6] Speed up expand_directories by using iglob instead of os.walk --- src/misspelling_lib/misspelling_checker.py | 14 +++++------- src/misspelling_lib/utils/argument_parser.py | 5 ++--- src/misspelling_lib/utils/files.py | 23 +++++++++----------- src/misspelling_lib/utils/words.py | 4 +++- 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/misspelling_lib/misspelling_checker.py b/src/misspelling_lib/misspelling_checker.py index 288955a..0853033 100644 --- a/src/misspelling_lib/misspelling_checker.py +++ b/src/misspelling_lib/misspelling_checker.py @@ -2,7 +2,7 @@ import pathlib import sys from codecs import StreamWriter -from typing import List, TextIO, Tuple, Union +from typing import Iterator, List, TextIO, Tuple, Union from tap.tap import TapType @@ -11,7 +11,7 @@ class MisspellingChecker(IMisspellingChecker): - def check(self, filename: pathlib.Path) -> Tuple[List[Exception], List[List[Union[str, int, str]]]]: + def check(self, filename: pathlib.Path) -> Tuple[List[Exception], List[List[Union[pathlib.Path, int, str]]]]: """ Checks the files for misspellings. Returns: @@ -47,9 +47,7 @@ def get_suggestions(self, word: str) -> List[str]: List of zero or more suggested replacements for word. """ - suggestions = set(self._misspelling_dict.get(word, [])).union( - set(self._misspelling_dict.get(word.lower(), [])) - ) + suggestions = set(self._misspelling_dict.get(word, [])).union(set(self._misspelling_dict.get(word.lower(), []))) return sorted(same_case(source=word, destination=w) for w in suggestions) def dump_corrections(self) -> List[List[str]]: @@ -60,7 +58,7 @@ def dump_corrections(self) -> List[List[str]]: results.append([bad_word, correction]) return results - def print_result(self, filenames: List[pathlib.Path], output: StreamWriter) -> bool: + def print_result(self, filenames: Iterator[pathlib.Path], output: StreamWriter) -> bool: """ Print a list of misspelled words and their corrections. @@ -82,7 +80,7 @@ def print_result(self, filenames: List[pathlib.Path], output: StreamWriter) -> b return found - def export_result_to_file(self, filenames: List[pathlib.Path], output: TextIO) -> None: + def export_result_to_file(self, filenames: Iterator[pathlib.Path], output: TextIO) -> None: """ Save the list of misspelled words and their corrections into a file. """ @@ -99,7 +97,7 @@ def export_result_to_file(self, filenames: List[pathlib.Path], output: TextIO) - ) ) - def output_sed_commands(self, parser: TapType, args: TapType, filenames: List[pathlib.Path]) -> None: + def output_sed_commands(self, parser: TapType, args: TapType, filenames: Iterator[pathlib.Path]) -> None: """ Output a series of portable sed commands to change the file. """ diff --git a/src/misspelling_lib/utils/argument_parser.py b/src/misspelling_lib/utils/argument_parser.py index 4ef8f42..64db134 100644 --- a/src/misspelling_lib/utils/argument_parser.py +++ b/src/misspelling_lib/utils/argument_parser.py @@ -4,7 +4,7 @@ from tap import Tap -def to_path(files: Union[List[Path], List[str]]) -> List[str]: +def to_path(files: Union[List[Path], List[str]]) -> List[Path]: return files @@ -25,12 +25,11 @@ def __init__(self): export_file: Optional[Path] = None # Export the list of misspelled words into a file dump_misspelling: bool = False # Dump the list of misspelled words version: bool = False # Version of the misspellings package - files: Optional[Union[List[Path], List[str]]] = None # Files to check + files: Optional[List[Path]] = None # Files to check def configure(self) -> None: self.add_argument( "files", nargs="*", help="Files to check", - type=to_path, ) diff --git a/src/misspelling_lib/utils/files.py b/src/misspelling_lib/utils/files.py index 420e6f8..d2c9f42 100644 --- a/src/misspelling_lib/utils/files.py +++ b/src/misspelling_lib/utils/files.py @@ -1,13 +1,13 @@ -import os import re import sys +from glob import iglob from pathlib import Path -from typing import List, NoReturn, Union +from typing import Iterator, List, NoReturn, Union -EXCLUDED_RE = re.compile(r"\.(py[co]|s?o|a|sh|txt|pylintrc|coverage|gitignore|python-version)|LICENSE$") +EXCLUDED_FILES_RE = re.compile(r"\.(pyc|s?o|a|sh|txt|coverage|gitignore|python-version)|LICENSE$") EXCLUDED_DIRS_RE = re.compile( - r"^(.*\..*egg|.*\..*egg-info|\..git|\..github|\..*mypy_cache|CVS|" - r"\.pytest_cache|bin|\.idea|assets|tests/assets|)$" + r"\.(git|github|mypy_cache|pytest_cache|idea|vscode)|" + r"^(\*egg|\*egg-info|CVS|bin|node_modules|json_sources|tests/sources)$" ) @@ -22,19 +22,16 @@ def parse_file_list(filename: Path) -> Union[List[Path], NoReturn]: f.close() return file_list except IOError as err: - print(f"ERRO NO ARQUIVO: {err}") raise err -def expand_directories(path_list: List[Path]) -> List[Path]: +def expand_directories(path_list: List[Path]) -> Iterator[Path]: """Return list with directories replaced their contained files.""" for path in path_list: - if os.path.isdir(path): - for root, dirnames, filenames in os.walk(path): + if path.is_dir() and not EXCLUDED_DIRS_RE.match(path.as_posix()): + for filenames in iglob(path.as_posix()): for name in filenames: - if not EXCLUDED_RE.search(name): - yield os.path.join(root, name) - - dirnames[:] = [d for d in dirnames if not EXCLUDED_DIRS_RE.match(d)] + if not EXCLUDED_FILES_RE.search(name): + yield Path(name) else: yield path diff --git a/src/misspelling_lib/utils/words.py b/src/misspelling_lib/utils/words.py index d93cc94..e1aa0b9 100644 --- a/src/misspelling_lib/utils/words.py +++ b/src/misspelling_lib/utils/words.py @@ -1,5 +1,6 @@ import re import string +from pathlib import Path from typing import List _NORM_REGEX = re.compile(r"(?<=[a-z])(?=[A-Z])") @@ -38,6 +39,7 @@ def esc_sed(raw_text: str) -> str: return raw_text.replace('"', '\\"').replace("/", "\\/") -def esc_file(raw_text: str) -> str: +def esc_file(path_text: Path) -> str: """Escape chars for a file name on a shell command line.""" + raw_text = path_text.as_posix() return raw_text.replace("'", "'\"'\"'") From ce505f7eb5f4af966f8b4a12cc3457a553952a08 Mon Sep 17 00:00:00 2001 From: Patrick Mazulo Date: Thu, 13 Oct 2022 15:55:06 -0300 Subject: [PATCH 3/6] Bump version to 2.0.6 --- pyproject.toml | 2 +- src/misspelling_lib/utils/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 64d1986..6ce1b89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "misspelling" -version = "2.0.5" +version = "2.0.6" description = "This is a Python library and tool to check for misspelled words in source code." authors = ["Patrick Mazulo "] license = "GNU GENERAL PUBLIC LICENSE" diff --git a/src/misspelling_lib/utils/version.py b/src/misspelling_lib/utils/version.py index 60bda72..e143b2a 100644 --- a/src/misspelling_lib/utils/version.py +++ b/src/misspelling_lib/utils/version.py @@ -1,2 +1,2 @@ def get_version() -> str: - return "2.0.5" + return "2.0.6" From 866ecc1cf6cd7a4338d071ef8d27af96ff30c6fe Mon Sep 17 00:00:00 2001 From: Patrick Mazulo Date: Thu, 13 Oct 2022 21:12:20 -0300 Subject: [PATCH 4/6] Fix regex used in expand_directories --- src/misspelling_lib/utils/files.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/misspelling_lib/utils/files.py b/src/misspelling_lib/utils/files.py index d2c9f42..5bdd9ae 100644 --- a/src/misspelling_lib/utils/files.py +++ b/src/misspelling_lib/utils/files.py @@ -7,7 +7,7 @@ EXCLUDED_FILES_RE = re.compile(r"\.(pyc|s?o|a|sh|txt|coverage|gitignore|python-version)|LICENSE$") EXCLUDED_DIRS_RE = re.compile( r"\.(git|github|mypy_cache|pytest_cache|idea|vscode)|" - r"^(\*egg|\*egg-info|CVS|bin|node_modules|json_sources|tests/sources)$" + r"(\*egg|\*egg-info|CVS|bin|node_modules|__pycache__|json_sources|test_assets)" ) @@ -28,10 +28,9 @@ def parse_file_list(filename: Path) -> Union[List[Path], NoReturn]: def expand_directories(path_list: List[Path]) -> Iterator[Path]: """Return list with directories replaced their contained files.""" for path in path_list: - if path.is_dir() and not EXCLUDED_DIRS_RE.match(path.as_posix()): - for filenames in iglob(path.as_posix()): - for name in filenames: - if not EXCLUDED_FILES_RE.search(name): - yield Path(name) + if path.is_dir() and not EXCLUDED_DIRS_RE.search(path.as_posix()): + for filename in iglob(f"{path}/**/*.*", recursive=True): + if not EXCLUDED_DIRS_RE.search(filename) and not EXCLUDED_FILES_RE.search(filename): + yield Path(filename) else: yield path From d2bf6ca60c96e40f4e3076c7a22f3deb6c737a9b Mon Sep 17 00:00:00 2001 From: Patrick Mazulo Date: Fri, 14 Oct 2022 13:10:10 -0300 Subject: [PATCH 5/6] Remove unnecessary code from argument_parser.py --- src/misspelling_lib/utils/argument_parser.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/misspelling_lib/utils/argument_parser.py b/src/misspelling_lib/utils/argument_parser.py index 64db134..14fcf4c 100644 --- a/src/misspelling_lib/utils/argument_parser.py +++ b/src/misspelling_lib/utils/argument_parser.py @@ -1,13 +1,9 @@ from pathlib import Path -from typing import List, Optional, Union +from typing import List, Optional from tap import Tap -def to_path(files: Union[List[Path], List[str]]) -> List[Path]: - return files - - class MisspellingArgumentParser(Tap): """ Argument parser with the options for the misspellings command From 57bd1675f43f283b21f6672fe1b56c2c61390ffb Mon Sep 17 00:00:00 2001 From: Patrick Mazulo Date: Fri, 14 Oct 2022 13:10:37 -0300 Subject: [PATCH 6/6] Speed up expand_directories by using os.scandir --- src/misspelling_lib/utils/files.py | 33 +++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/misspelling_lib/utils/files.py b/src/misspelling_lib/utils/files.py index 5bdd9ae..cab4635 100644 --- a/src/misspelling_lib/utils/files.py +++ b/src/misspelling_lib/utils/files.py @@ -1,13 +1,13 @@ +import os import re import sys -from glob import iglob from pathlib import Path -from typing import Iterator, List, NoReturn, Union +from typing import Iterator, List, NoReturn, Optional, Union EXCLUDED_FILES_RE = re.compile(r"\.(pyc|s?o|a|sh|txt|coverage|gitignore|python-version)|LICENSE$") +DOT_EXCLUDED_DIRS_RE = re.compile(r"\.(git|github|mypy_cache|pytest_cache|idea|vscode|.local)") EXCLUDED_DIRS_RE = re.compile( - r"\.(git|github|mypy_cache|pytest_cache|idea|vscode)|" - r"(\*egg|\*egg-info|CVS|bin|node_modules|__pycache__|json_sources|test_assets)" + r"(\*egg|\*egg-info|CVS|bin|node_modules|__pycache__|json_sources|test_assets|_local|build|dist)" ) @@ -25,12 +25,25 @@ def parse_file_list(filename: Path) -> Union[List[Path], NoReturn]: raise err -def expand_directories(path_list: List[Path]) -> Iterator[Path]: +def expand_directories(path_list: List[Path], result_files: Optional[List[Path]] = None) -> Iterator[Path]: """Return list with directories replaced their contained files.""" + + if result_files is None: + result_files = [] + for path in path_list: - if path.is_dir() and not EXCLUDED_DIRS_RE.search(path.as_posix()): - for filename in iglob(f"{path}/**/*.*", recursive=True): - if not EXCLUDED_DIRS_RE.search(filename) and not EXCLUDED_FILES_RE.search(filename): - yield Path(filename) + if path.is_dir(): + for entry in os.scandir(path): + if ( + not entry.path.find(".local") > -1 + and not entry.path.find("_local") > -1 + and not DOT_EXCLUDED_DIRS_RE.search(entry.path) + and not EXCLUDED_DIRS_RE.search(entry.path) + and entry.is_dir() + ): + expand_directories(path_list=[Path(entry.path)], result_files=result_files) + if entry.is_file() and not EXCLUDED_FILES_RE.search(entry.name): + result_files.append(Path(entry.path)) else: - yield path + result_files.append(path) + return result_files