/
spelling.py
194 lines (147 loc) · 5.63 KB
/
spelling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
"""Spell checking."""
import re
from typing import Dict
from typing import List
from typing import Optional
import pygls.uris as Uri
from docutils import nodes
from lsprotocol.types import CodeAction
from lsprotocol.types import CodeActionKind
from lsprotocol.types import CodeActionParams
from lsprotocol.types import Diagnostic
from lsprotocol.types import DiagnosticSeverity
from lsprotocol.types import DidSaveTextDocumentParams
from lsprotocol.types import Position
from lsprotocol.types import Range
from lsprotocol.types import TextEdit
from lsprotocol.types import WorkspaceEdit
from spellchecker import SpellChecker # type: ignore
from esbonio.lsp.rst import LanguageFeature
from esbonio.lsp.sphinx import SphinxLanguageServer
IGNORED_NODES = {nodes.raw, nodes.literal, nodes.literal_block}
"""Don't spell check Text contained in any of these nodes."""
class Spelling(LanguageFeature):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.lang = SpellChecker()
self.errors: Dict[str, List[MisSpelling]] = {}
def code_action(self, params: CodeActionParams) -> List[CodeAction]:
uri = params.text_document.uri
ranges = {d.range for d in params.context.diagnostics}
if uri in self.errors:
errors = self.errors[uri]
else:
errors = self.find_errors_for_uri(uri)
diagnostics = errors_to_diagnostics(errors)
actions = []
for error, diagnostic in zip(errors, diagnostics):
if len(ranges) > 0 and diagnostic.range not in ranges:
continue
fixes = self.lang.candidates(error.text)
if fixes is None:
continue
for fix in fixes:
actions.append(
CodeAction(
title=f"Correct '{error.text}' -> '{fix}'",
kind=CodeActionKind.QuickFix,
diagnostics=[diagnostic],
edit=WorkspaceEdit(
changes={
uri: [TextEdit(range=diagnostic.range, new_text=fix)]
}
),
)
)
return actions
def save(self, params: DidSaveTextDocumentParams):
self.find_errors_for_uri(params.text_document.uri)
def find_errors_for_uri(self, uri: str) -> List["MisSpelling"]:
"""Find any mis-spellings in the given document."""
doctree = self.rst.get_doctree(uri=uri)
if doctree is None:
return []
errors = []
for text in doctree.traverse(condition=nodes.Text):
parent = text.parent
# Don't spell check code block, raw blocks etc.
if type(parent) in IGNORED_NODES:
continue
# Don't spell check text we cannot tie back to the user's actual source file
self.logger.debug("%s: %s", type(parent), parent.source)
if parent.source != Uri.to_fs_path(uri):
continue
for word in find_words(
text.astext(), startline=parent.line, source=parent.source
):
# Ignore short words or any "word" that contains digits or other
# punctuation.
if len(word) <= 1 or re.search("[-/\\_\\d.=']", str(word)):
continue
if self.lang.unknown([str(word)]):
errors.append(word)
self.errors[uri] = errors
self.rst.set_diagnostics("spellcheck[en]", uri, errors_to_diagnostics(errors))
self.rst.sync_diagnostics()
return errors
class MisSpelling:
"""Represents an incorrectly spelled word."""
def __init__(self, line: int, character: int, text: str, source: Optional[str]):
self.line = line
self.character = character
self.text = text
self.source = source
def __str__(self):
return self.text
def __contains__(self, item):
return item in self.text
def __len__(self):
return len(self.text)
def __repr__(self):
return f"MisSpelling<{self.line}:{self.character}, {self.text}>"
def errors_to_diagnostics(errors: List[MisSpelling]) -> List[Diagnostic]:
diagnostics = []
for error in errors:
range_ = Range(
start=Position(line=error.line - 1, character=error.character),
end=Position(line=error.line - 1, character=error.character + len(error)),
)
diagnostics.append(
Diagnostic(
range=range_,
message=f"Incorrect spelling: '{error}'",
severity=DiagnosticSeverity.Warning,
source="spellcheck[en]",
)
)
return diagnostics
def find_words(
text: str, startline: int = 0, source: Optional[str] = None
) -> List[MisSpelling]:
words = []
delimiters = " \n"
skip_characters = ",'\"()[]"
current_word = None
line = startline
col = -1
for c in text:
col += 1
if c in skip_characters:
continue
if c not in delimiters:
if current_word is None:
current_word = MisSpelling(line, col, c, source)
else:
current_word.text += c
else:
if current_word is not None:
words.append(current_word)
current_word = None
if c == "\n":
line += 1
col = -1
if current_word is not None:
words.append(current_word)
return words
def esbonio_setup(rst: SphinxLanguageServer):
rst.add_feature(Spelling(rst))