/
fuzzy_best_match.py
101 lines (84 loc) · 4.1 KB
/
fuzzy_best_match.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from fuzzywuzzy import fuzz
from fuzzywuzzy import process as fuzz_process
import regex
from will import settings
from will.decorators import require_settings
from will.utils import Bunch
from .base import GenerationBackend, GeneratedOption
class FuzzyBestMatch(GenerationBackend):
def _generate_compiled_regex(self, method_meta):
if not hasattr(self, "cached_regex"):
self.cached_regex = {}
method_path = method_meta["plugin_info"]["parent_path"]
if not method_path in self.cached_regex:
regex_string = method_meta["regex_pattern"]
if "case_sensitive" in method_meta and not method_meta["case_sensitive"]:
regex_string = "(?i)%s" % regex_string
if method_meta["multiline"]:
try:
self.cached_regex[method_path] = regex.compile("%s{e<=%s}" % (
regex_string,
settings.FUZZY_REGEX_ALLOWABLE_ERRORS
), regex.MULTILINE | regex.DOTALL | regex.ENHANCEMATCH)
except:
self.cached_regex[method_path] = regex.compile("%s{e<=%s}" % (
regex.escape(regex_string),
settings.FUZZY_REGEX_ALLOWABLE_ERRORS
), regex.MULTILINE | regex.DOTALL | regex.ENHANCEMATCH)
else:
try:
self.cached_regex[method_path] = regex.compile("%s{e<=%s}" % (
regex_string,
settings.FUZZY_REGEX_ALLOWABLE_ERRORS
), regex.ENHANCEMATCH)
except:
self.cached_regex[method_path] = regex.compile("%s{e<=%s}" % (
regex.escape(regex_string),
settings.FUZZY_REGEX_ALLOWABLE_ERRORS
), regex.ENHANCEMATCH)
return self.cached_regex[method_path]
def do_generate(self, event):
exclude_list = ["fn", ]
matches = []
message = event.data
# TODO: add token_sort_ratio
if not hasattr(self, "match_choices"):
self.match_choices = []
self.match_methods = {}
if message.content:
for name, l in self.bot.message_listeners.items():
if not l["regex_pattern"] in self.match_methods:
self.match_methods[l["regex_pattern"]] = l
self.match_choices.append(l["regex_pattern"])
match_str, confidence = fuzz_process.extractOne(message.content, self.match_choices)
l = self.match_methods[match_str]
if confidence >= settings.FUZZY_MINIMUM_MATCH_CONFIDENCE:
regex_matches = l["regex"].search(message.content)
if (
# The search regex matches and
# regex_matches
# It's not from me, or this search includes me, and
(
message.will_said_it is False
or ("include_me" in l and l["include_me"])
)
# I'm mentioned, or this is an overheard, or we're in a 1-1
and (
message.is_private_chat
or ("direct_mentions_only" not in l or not l["direct_mentions_only"])
or message.is_direct
)
):
fuzzy_regex = self._generate_compiled_regex(l)
regex_matches = fuzzy_regex.search(message.content)
context = Bunch()
for k, v in l.items():
if k not in exclude_list:
context[k] = v
if regex_matches and hasattr(regex_matches, "groupdict"):
context.search_matches = regex_matches.groupdict()
else:
context.search_matches = {}
o = GeneratedOption(context=context, backend="regex", score=confidence)
matches.append(o)
return matches