Skip to content

Commit

Permalink
Merge pull request #17 from mazulo/mazulo/speed-up-expand-directories…
Browse files Browse the repository at this point in the history
…-function

Speed up expand_directories function
  • Loading branch information
mazulo committed Oct 14, 2022
2 parents 24b337a + 57bd167 commit e108263
Show file tree
Hide file tree
Showing 22 changed files with 78 additions and 74 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "misspelling"
version = "2.0.5"
version = "2.0.6"
description = "This is a Python library and tool to check for misspelled words in source code."
authors = ["Patrick Mazulo <pmazulo@gmail.com>"]
license = "GNU GENERAL PUBLIC LICENSE"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def run(self):
long_description=long_description,
long_description_content_type="text/markdown",
package_dir={"": "src"},
package_data={"misspelling_lib": ["assets/wikipedia.json"]},
package_data={"misspelling_lib": ["json_sources/wikipedia.json"]},
entry_points={"console_scripts": ["misspellings = misspelling_lib.misspellings:main"]},
install_requires=["typed-argument-parser==1.7.2", "rich==12.6.0"],
keywords="check, code, spelling, spellcheck",
Expand Down
File renamed without changes.
14 changes: 6 additions & 8 deletions src/misspelling_lib/misspelling_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pathlib
import sys
from codecs import StreamWriter
from typing import List, TextIO, Tuple, Union
from typing import Iterator, List, TextIO, Tuple, Union

from tap.tap import TapType

Expand All @@ -11,7 +11,7 @@


class MisspellingChecker(IMisspellingChecker):
def check(self, filename: pathlib.Path) -> Tuple[List[Exception], List[List[Union[str, int, str]]]]:
def check(self, filename: pathlib.Path) -> Tuple[List[Exception], List[List[Union[pathlib.Path, int, str]]]]:
"""
Checks the files for misspellings.
Returns:
Expand Down Expand Up @@ -47,9 +47,7 @@ def get_suggestions(self, word: str) -> List[str]:
List of zero or more suggested replacements for word.
"""
suggestions = set(self._misspelling_dict.get(word, [])).union(
set(self._misspelling_dict.get(word.lower(), []))
)
suggestions = set(self._misspelling_dict.get(word, [])).union(set(self._misspelling_dict.get(word.lower(), [])))
return sorted(same_case(source=word, destination=w) for w in suggestions)

def dump_corrections(self) -> List[List[str]]:
Expand All @@ -60,7 +58,7 @@ def dump_corrections(self) -> List[List[str]]:
results.append([bad_word, correction])
return results

def print_result(self, filenames: List[pathlib.Path], output: StreamWriter) -> bool:
def print_result(self, filenames: Iterator[pathlib.Path], output: StreamWriter) -> bool:
"""
Print a list of misspelled words and their corrections.
Expand All @@ -82,7 +80,7 @@ def print_result(self, filenames: List[pathlib.Path], output: StreamWriter) -> b

return found

def export_result_to_file(self, filenames: List[pathlib.Path], output: TextIO) -> None:
def export_result_to_file(self, filenames: Iterator[pathlib.Path], output: TextIO) -> None:
"""
Save the list of misspelled words and their corrections into a file.
"""
Expand All @@ -99,7 +97,7 @@ def export_result_to_file(self, filenames: List[pathlib.Path], output: TextIO) -
)
)

def output_sed_commands(self, parser: TapType, args: TapType, filenames: List[pathlib.Path]) -> None:
def output_sed_commands(self, parser: TapType, args: TapType, filenames: Iterator[pathlib.Path]) -> None:
"""
Output a series of portable sed commands to change the file.
"""
Expand Down
2 changes: 1 addition & 1 deletion src/misspelling_lib/misspelling_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self) -> None:

@staticmethod
def _get_default_json_files() -> List[pathlib.Path]:
assets_dir = pathlib.Path(__file__).parents[0] / "assets"
assets_dir = pathlib.Path(__file__).parents[0] / "json_sources"
file_paths = [
assets_dir.joinpath(file)
for file in os.listdir(assets_dir.as_posix())
Expand Down
9 changes: 2 additions & 7 deletions src/misspelling_lib/utils/argument_parser.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
from pathlib import Path
from typing import List, Optional, Union
from typing import List, Optional

from tap import Tap


def to_path(files: Union[List[Path], List[str]]) -> List[str]:
return files


class MisspellingArgumentParser(Tap):
"""
Argument parser with the options for the misspellings command
Expand All @@ -25,12 +21,11 @@ def __init__(self):
export_file: Optional[Path] = None # Export the list of misspelled words into a file
dump_misspelling: bool = False # Dump the list of misspelled words
version: bool = False # Version of the misspellings package
files: Optional[Union[List[Path], List[str]]] = None # Files to check
files: Optional[List[Path]] = None # Files to check

def configure(self) -> None:
self.add_argument(
"files",
nargs="*",
help="Files to check",
type=to_path,
)
37 changes: 23 additions & 14 deletions src/misspelling_lib/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
import re
import sys
from pathlib import Path
from typing import List, NoReturn, Union
from typing import Iterator, List, NoReturn, Optional, Union

EXCLUDED_RE = re.compile(r"\.(py[co]|s?o|a|sh|txt|pylintrc|coverage|gitignore|python-version)|LICENSE$")
EXCLUDED_FILES_RE = re.compile(r"\.(pyc|s?o|a|sh|txt|coverage|gitignore|python-version)|LICENSE$")
DOT_EXCLUDED_DIRS_RE = re.compile(r"\.(git|github|mypy_cache|pytest_cache|idea|vscode|.local)")
EXCLUDED_DIRS_RE = re.compile(
r"^(.*\..*egg|.*\..*egg-info|\..git|\..github|\..*mypy_cache|CVS|"
r"\.pytest_cache|bin|\.idea|assets|tests/assets|)$"
r"(\*egg|\*egg-info|CVS|bin|node_modules|__pycache__|json_sources|test_assets|_local|build|dist)"
)


Expand All @@ -22,19 +22,28 @@ def parse_file_list(filename: Path) -> Union[List[Path], NoReturn]:
f.close()
return file_list
except IOError as err:
print(f"ERRO NO ARQUIVO: {err}")
raise err


def expand_directories(path_list: List[Path]) -> List[Path]:
def expand_directories(path_list: List[Path], result_files: Optional[List[Path]] = None) -> Iterator[Path]:
"""Return list with directories replaced their contained files."""
for path in path_list:
if os.path.isdir(path):
for root, dirnames, filenames in os.walk(path):
for name in filenames:
if not EXCLUDED_RE.search(name):
yield os.path.join(root, name)

dirnames[:] = [d for d in dirnames if not EXCLUDED_DIRS_RE.match(d)]
if result_files is None:
result_files = []

for path in path_list:
if path.is_dir():
for entry in os.scandir(path):
if (
not entry.path.find(".local") > -1
and not entry.path.find("_local") > -1
and not DOT_EXCLUDED_DIRS_RE.search(entry.path)
and not EXCLUDED_DIRS_RE.search(entry.path)
and entry.is_dir()
):
expand_directories(path_list=[Path(entry.path)], result_files=result_files)
if entry.is_file() and not EXCLUDED_FILES_RE.search(entry.name):
result_files.append(Path(entry.path))
else:
yield path
result_files.append(path)
return result_files
2 changes: 1 addition & 1 deletion src/misspelling_lib/utils/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
def get_version() -> str:
return "2.0.5"
return "2.0.6"
4 changes: 3 additions & 1 deletion src/misspelling_lib/utils/words.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import string
from pathlib import Path
from typing import List

_NORM_REGEX = re.compile(r"(?<=[a-z])(?=[A-Z])")
Expand Down Expand Up @@ -38,6 +39,7 @@ def esc_sed(raw_text: str) -> str:
return raw_text.replace('"', '\\"').replace("/", "\\/")


def esc_file(raw_text: str) -> str:
def esc_file(path_text: Path) -> str:
"""Escape chars for a file name on a shell command line."""
raw_text = path_text.as_posix()
return raw_text.replace("'", "'\"'\"'")
1 change: 0 additions & 1 deletion tests/assets/good_file_list

This file was deleted.

21 changes: 0 additions & 21 deletions tests/assets/various_spellings.good_out

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions tests/test_assets/good_file_list
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test_assets/nine_misspellings.c
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
21 changes: 21 additions & 0 deletions tests/test_assets/various_spellings.good_out
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
cp "test_assets/various_spellings.c" "test_assets/various_spellings.c,"
sed "1s/Yuo/You/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c,"
mv "test_assets/various_spellings.c," "test_assets/various_spellings.c"
cp "test_assets/various_spellings.c" "test_assets/various_spellings.c,"
sed "2s/Zeebra/Zebra/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c,"
mv "test_assets/various_spellings.c," "test_assets/various_spellings.c"
cp "test_assets/various_spellings.c" "test_assets/various_spellings.c,"
sed "4s/teh/the/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c,"
mv "test_assets/various_spellings.c," "test_assets/various_spellings.c"
cp "test_assets/various_spellings.c" "test_assets/various_spellings.c,"
sed "4s/zeebra/zebra/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c,"
mv "test_assets/various_spellings.c," "test_assets/various_spellings.c"
cp "test_assets/various_spellings.c" "test_assets/various_spellings.c,"
sed "4s/Rockerfeller/Rockefeller/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c,"
mv "test_assets/various_spellings.c," "test_assets/various_spellings.c"
cp "test_assets/various_spellings.c" "test_assets/various_spellings.c,"
sed "6s/withdrawl/withdraw/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c,"
mv "test_assets/various_spellings.c," "test_assets/various_spellings.c"
cp "test_assets/various_spellings.c" "test_assets/various_spellings.c,"
sed "7s/gardai/gardaí/" "test_assets/various_spellings.c" > "test_assets/various_spellings.c,"
mv "test_assets/various_spellings.c," "test_assets/various_spellings.c"
10 changes: 5 additions & 5 deletions tests/test_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,30 @@ def test_missing_ms_list(self):

def test_broken_ms_list(self):
with pytest.raises(ValueError):
MisspellingFileDetector(os.path.join(BASE_PATH, "assets/broken_msl.txt"))
MisspellingFileDetector(os.path.join(BASE_PATH, "test_assets/broken_msl.txt"))

def test_missing_ms_list_for_json_detector(self):
with pytest.raises(IOError):
MisspellingJSONDetector(os.path.join(BASE_PATH, "missing_msl.json"))

def test_broken_ms_list_for_json_detector(self):
with pytest.raises(ValueError):
MisspellingJSONDetector(os.path.join(BASE_PATH, "assets/broken_msl.json"))
MisspellingJSONDetector(os.path.join(BASE_PATH, "test_assets/broken_msl.json"))

def test_missing_file(self):
ms = MisspellingDetector()
errors, _ = ms.check(BASE_PATH / "assets/missing_source.c")
errors, _ = ms.check(BASE_PATH / "test_assets/missing_source.c")
assert errors

def test_good_file(self):
ms = MisspellingDetector()
errors, results = ms.check(BASE_PATH / "assets/nine_misspellings.json")
errors, results = ms.check(BASE_PATH / "test_assets/nine_misspellings.json")
assert len(errors) == 0
assert len(results) == 9

def test_more_complex_file(self):
ms = MisspellingDetector()
errors, results = ms.check(BASE_PATH / "assets/various_spellings.c")
errors, results = ms.check(BASE_PATH / "test_assets/various_spellings.c")
assert len(errors) == 0
assert len(results) == 7

Expand Down
26 changes: 13 additions & 13 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class TestCli:

def test_good_file(self):
p = subprocess.Popen(
[CLI, "assets/nine_misspellings.c"],
[CLI, "test_assets/nine_misspellings.c"],
cwd=TEST_BASE_DIR,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
Expand All @@ -32,7 +32,7 @@ def test_good_file(self):

def test_bad_file(self):
p = subprocess.Popen(
[CLI, "assets/missing.c"],
[CLI, "test_assets/missing.c"],
cwd=TEST_BASE_DIR,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
Expand All @@ -44,7 +44,7 @@ def test_bad_file(self):

def test_good_flag_f(self):
p = subprocess.Popen(
[CLI, "--file-list", "assets/good_file_list"],
[CLI, "--file-list", "test_assets/good_file_list"],
cwd=TEST_BASE_DIR,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
Expand All @@ -56,7 +56,7 @@ def test_good_flag_f(self):

def test_bad_flag_f(self):
p = subprocess.Popen(
[CLI, "--file-list", "assets/broken_file_list"],
[CLI, "--file-list", "test_assets/broken_file_list"],
cwd=TEST_BASE_DIR,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
Expand All @@ -72,7 +72,7 @@ def test_bad_flag_m(self):
CLI,
"--dump-misspelling",
"--misspelling-file",
"assets/broken_msl.txt",
"test_assets/broken_msl.txt",
],
cwd=TEST_BASE_DIR,
stderr=subprocess.PIPE,
Expand All @@ -89,7 +89,7 @@ def test_good_flag_m(self):
CLI,
"--dump-misspelling",
"--misspelling-file",
"assets/small_msl.txt",
"test_assets/small_msl.txt",
],
cwd=TEST_BASE_DIR,
stderr=subprocess.PIPE,
Expand All @@ -106,7 +106,7 @@ def test_passing_misspelling_json_file(self):
CLI,
"--dump-misspelling",
"--json-file",
"assets/nine_misspellings.json",
"test_assets/nine_misspellings.json",
],
cwd=TEST_BASE_DIR,
stderr=subprocess.PIPE,
Expand All @@ -119,7 +119,7 @@ def test_passing_misspelling_json_file(self):

def test_bad_flag_s(self):
p = subprocess.Popen(
[CLI, "--script-output", "assets/various_spellings.c"],
[CLI, "--script-output", "test_assets/various_spellings.c"],
cwd=TEST_BASE_DIR,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
Expand All @@ -130,16 +130,16 @@ def test_bad_flag_s(self):
assert p.returncode == 2

def test_good_flag_s(self):
test_out = TEST_BASE_DIR.joinpath("assets/various_spellings.test_out")
good_out = TEST_BASE_DIR.joinpath("assets/various_spellings.good_out")
test_out = TEST_BASE_DIR.joinpath("test_assets/various_spellings.test_out")
good_out = TEST_BASE_DIR.joinpath("test_assets/various_spellings.good_out")
if os.path.exists(test_out):
os.unlink(test_out)
p = subprocess.Popen(
[
CLI,
"--script-output",
"assets/various_spellings.test_out",
"assets/various_spellings.c",
"test_assets/various_spellings.test_out",
"test_assets/various_spellings.c",
],
cwd=TEST_BASE_DIR,
stderr=subprocess.PIPE,
Expand All @@ -165,7 +165,7 @@ def test_standard_in(self):
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
)
(output, error_output) = p.communicate(input="assets/nine_misspellings.c\n".encode("utf8"))
(output, error_output) = p.communicate(input="test_assets/nine_misspellings.c\n".encode("utf8"))
assert error_output.decode() == ""
assert len(output.decode().split("\n")) == 10
assert p.returncode == 2

0 comments on commit e108263

Please sign in to comment.