Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Tests run in Docker because enchant requires native system libraries. Lint runs
Pure Python library for multi-language spell-checking backed by `pyenchant`.

**Public API** (`spellchecks/__init__.py` — 5 exports):
- `SpellChecker` — frozen dataclass (kw_only, slots); `SpellChecker(language=..., exclusion_words=(), exclude_urls=True, cache_size=10000, max_suggestions=0).check(text) -> list[OneCorrection]`
- `SpellChecker` — class with `__slots__` (kw_only); `SpellChecker(language=..., exclusion_words=(), exclude_urls=True, cache_size=10000, max_suggestions=0).check(text) -> list[OneCorrection]`
- `OneCorrection` — frozen dataclass: `first_position`, `last_position`, `word`, `suggestions: tuple[str, ...]`
- `FileProvider` — sync file-backed user word list; `__init__(base_path)`, `get_words/add_word/remove_word(user_name, ...)`
- `DummyProvider` — no-op implementation of the same interface
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ corrections = checker.check("Привет Превед")
- `language` — one of the supported language codes
- `exclusion_words` — iterable of words to ignore (case-insensitive)
- `exclude_urls` — automatically ignore URLs found in the text (default `True`)
- `cache_size` — LRU cache size for suggestions; `0` disables caching (default `10000`)
- `cache_size` — LRU cache size for suggestions; must be > 0 (default `10000`)
- `max_suggestions` — max suggestions per misspelled word; `0` = unlimited (default `0`)

`SpellChecker.check(text)`
Expand Down
235 changes: 0 additions & 235 deletions plan.md

This file was deleted.

6 changes: 3 additions & 3 deletions spellchecks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ._checker import SpellChecker
from ._dict import DummyProvider, FileProvider, UserDictProtocol
from ._models import OneCorrection
from spellchecks._checker import SpellChecker
from spellchecks._dict import DummyProvider, FileProvider, UserDictProtocol
from spellchecks._models import OneCorrection


__all__ = ["DummyProvider", "FileProvider", "OneCorrection", "SpellChecker", "UserDictProtocol"]
61 changes: 37 additions & 24 deletions spellchecks/_checker.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,48 @@
import re
import typing
from dataclasses import dataclass, field

import cachebox
import urlextract
from enchant.checker import SpellChecker as _EnchantChecker

from ._models import OneCorrection
from spellchecks._models import OneCorrection


_URL_SPLITTER: typing.Final[re.Pattern[str]] = re.compile(r"\.|\:|\/\/|\/|\?|\&|\=|\+|\#|\-")
_URL_EXTRACTOR: urlextract.URLExtract = urlextract.URLExtract()


@dataclass(kw_only=True, slots=True, frozen=True)
class SpellChecker:
language: str
exclusion_words: typing.Iterable[str] = field(default_factory=tuple)
exclude_urls: bool = True
cache_size: int = 10000
max_suggestions: int = 0
_cache: dict[str, list[str]] = field(init=False, repr=False, compare=False)

def __post_init__(self) -> None:
object.__setattr__(
self,
"_cache",
typing.cast("dict[str, list[str]]", cachebox.LRUCache[str, list[str]](self.cache_size))
if self.cache_size > 0
else {},
)
__slots__ = (
"_cache",
"_enchant_checker",
"_exclusions",
"cache_size",
"exclude_urls",
"language",
"max_suggestions",
)

def __init__(
self,
*,
language: str,
exclusion_words: typing.Iterable[str] = (),
exclude_urls: bool = True,
cache_size: int = 1000,
max_suggestions: int = 0,
) -> None:
self.language = language
self.exclude_urls = exclude_urls
self.cache_size = cache_size
self.max_suggestions = max_suggestions
if cache_size <= 0:
msg = "cache_size must be greater than 0"
raise ValueError(msg)

self._cache: cachebox.LRUCache[str, list[str]] = cachebox.LRUCache(cache_size)
self._enchant_checker: _EnchantChecker = _EnchantChecker(language)
self._exclusions: frozenset[str] = frozenset(w.lower() for w in exclusion_words)

def _get_suggestions(self, checker: _EnchantChecker) -> tuple[str, ...]:
word: str = typing.cast(str, checker.word)
Expand All @@ -42,14 +55,14 @@ def _get_suggestions(self, checker: _EnchantChecker) -> tuple[str, ...]:
return tuple(suggestions[: self.max_suggestions] if self.max_suggestions > 0 else suggestions)

def check(self, text: str) -> list[OneCorrection]:
exclusions: list[str] = [w.lower() for w in self.exclusion_words]
exclusions: set[str] = set(self._exclusions)
if self.exclude_urls:
for url in _URL_EXTRACTOR.find_urls(text):
exclusions.extend({w.lower() for w in re.split(_URL_SPLITTER, str(url)) if w})
engine = _EnchantChecker(self.language)
engine.set_text(text)
exclusions.update(
w.lower() for url in _URL_EXTRACTOR.find_urls(text) for w in re.split(_URL_SPLITTER, str(url)) if w
)
self._enchant_checker.set_text(text)
result: list[OneCorrection] = []
for item in engine:
for item in self._enchant_checker:
if typing.cast(str, item.word).lower() in exclusions:
continue
result.append(
Expand Down
17 changes: 14 additions & 3 deletions tests/test_spell.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
RU_LANG: typing.Final = "ru_RU"


@pytest.fixture(scope="session")
def spell_checker() -> SpellChecker:
return SpellChecker(language=RU_LANG)


@pytest.mark.parametrize(
("text_input", "expected_corrections"),
[
Expand All @@ -28,10 +33,11 @@
],
)
def test_correct_spell(
spell_checker: SpellChecker,
text_input: str,
expected_corrections: list[tuple[str, int, int, str | None]],
) -> None:
corrections = SpellChecker(language=RU_LANG).check(text_input)
corrections = spell_checker.check(text_input)
assert len(corrections) == len(expected_corrections)
for one_correction, (word, first_position, last_position, suggestion) in zip(
corrections,
Expand Down Expand Up @@ -77,7 +83,12 @@ def test_exclusion_words() -> None:
assert corrections == []


def test_suggestions_cache_hit() -> None:
corrections = SpellChecker(language=RU_LANG).check("превет превет")
def test_suggestions_cache_hit(spell_checker: SpellChecker) -> None:
corrections = spell_checker.check("превет превет")
assert len(corrections) == 2
assert corrections[0].suggestions == corrections[1].suggestions


def test_invalid_cache_size() -> None:
with pytest.raises(ValueError, match="cache_size must be greater than 0"):
SpellChecker(language=RU_LANG, cache_size=0)
Loading