-
Notifications
You must be signed in to change notification settings - Fork 101
/
utils.py
128 lines (106 loc) · 4.53 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# pyenchant
#
# Copyright (C) 2004-2008 Ryan Kelly
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
#
# In addition, as a special exception, you are
# given permission to link the code of this program with
# non-LGPL Spelling Provider libraries (eg: a MSFT Office
# spell checker backend) and distribute linked combinations including
# the two. You must obey the GNU Lesser General Public License in all
# respects for all of the code used other than said providers. If you modify
# this file, you may extend this exception to your version of the
# file, but you are not obligated to do so. If you do not wish to
# do so, delete this exception statement from your version.
#
"""
enchant.utils: Misc utilities for the enchant package
========================================================
This module provides miscellaneous utilities for use with the
enchant spellchecking package. Currently available functionality
includes:
* functions for dealing with locale/language settings
* ability to list supporting data files (win32 only)
* functions for bundling supporting data files from a build
"""
import locale
from typing import Callable, Iterable, List, Optional, Sequence # noqa F401
from enchant.errors import * # noqa F401,F403
from enchant.errors import Error
def levenshtein(s1: str, s2: str) -> int:
"""Calculate the Levenshtein distance between two strings.
This is straight from `Wikipedia <https://en.wikipedia.org/wiki/Levenshtein_distance>`_.
"""
if len(s1) < len(s2):
return levenshtein(s2, s1)
if not s1:
return len(s2)
previous_row = range(len(s2) + 1) # type: Sequence[int]
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]
def trim_suggestions(
word: str,
suggs: Iterable[str],
maxlen: int,
calcdist: Callable[[str, str], int] = None,
) -> List[str]:
"""Trim a list of suggestions to a maximum length.
If the list of suggested words is too long, you can use this function
to trim it down to a maximum length. It tries to keep the "best"
suggestions based on similarity to the original word.
If the optional `calcdist` argument is provided, it must be a callable
taking two words and returning the distance between them. It will be
used to determine which words to retain in the list. The default is
a simple Levenshtein distance.
"""
if calcdist is None:
calcdist = levenshtein
decorated = [(calcdist(word, s), s) for s in suggs]
decorated.sort()
return [s for (l, s) in decorated[:maxlen]]
def get_default_language(default: Optional[str] = None) -> Optional[str]:
"""Determine the user's default language, if possible.
This function uses the :py:mod:`locale` module to try to determine
the user's preferred language. The return value is as
follows:
* if a locale is available for the `LC_MESSAGES` category,
that language is used
* if a default locale is available, that language is used
* if the keyword argument `default` is given, it is used
* if nothing else works, `None` is returned
Note that determining the user's language is in general only
possible if they have set the necessary environment variables
on their system.
"""
try:
tag = locale.getlocale()[0]
if tag is None:
tag = locale.getdefaultlocale()[0]
if tag is None:
raise Error("No default language available")
return tag
except Exception:
pass
return default
get_default_language._DOC_ERRORS = ["LC"] # type: ignore