-
Notifications
You must be signed in to change notification settings - Fork 759
/
formatters.py
364 lines (304 loc) · 12.4 KB
/
formatters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
import logging
import re
from typing import Union
from talon import Context, Module, actions, app
from talon.grammar import Phrase
ctx = Context()
key = actions.key
edit = actions.edit
words_to_keep_lowercase = (
"a an and as at but by en for if in nor of on or per the to v via vs".split()
)
# The last phrase spoken, without & with formatting. Used for reformatting.
last_phrase = ""
last_phrase_formatted = ""
# Internally, a formatter is a pair (sep, fn).
#
# - sep: a boolean, true iff the formatter should leave spaces between words.
# We use SEP & NOSEP for this for clarity.
#
# - fn: a function (i, word, is_end) --> formatted_word, called on each `word`.
# `i` is the word's index in the list, and `is_end` is True iff it's the
# last word in the list.
SEP = True
NOSEP = False
def format_phrase(m: Union[str, Phrase], formatters: str):
global last_phrase, last_phrase_formatted
last_phrase = m
words = []
if isinstance(m, str):
words = m.split(" ")
else:
# # I believe this is no longer necessary. -rntz, 2022-02-10
# # TODO: I've commented this out, remove if nobody has issues -rntz, 2022-06-21
# if m.words[-1] == "over":
# m.words = m.words[:-1]
words = actions.dictate.replace_words(actions.dictate.parse_words(m))
result = last_phrase_formatted = format_phrase_without_adding_to_history(
words, formatters
)
actions.user.add_phrase_to_history(result)
# Arguably, we shouldn't be dealing with history here, but somewhere later
# down the line. But we have a bunch of code that relies on doing it this
# way and I don't feel like rewriting it just now. -rntz, 2020-11-04
return result
def format_phrase_without_adding_to_history(word_list, formatters: str):
# A formatter is a pair (keep_spaces, function). We drop spaces if any
# formatter does; we apply their functions in reverse order.
formatters = [all_formatters[name] for name in formatters.split(",")]
separator = " " if all(x[0] for x in formatters) else ""
functions = [x[1] for x in reversed(formatters)]
words = []
for i, word in enumerate(word_list):
for f in functions:
word = f(i, word, i == len(word_list) - 1)
words.append(word)
return separator.join(words)
# Formatter helpers
def surround(by):
return lambda i, word, last: (by if i == 0 else "") + word + (by if last else "")
def words_with_joiner(joiner):
"""Pass through words unchanged, but add a separator between them."""
return (NOSEP, lambda i, word, _: ("" if i == 0 else joiner) + word)
def first_vs_rest(first_func, rest_func=lambda w: w):
"""Supply one or two transformer functions for the first and rest of
words respectively.
Leave second argument out if you want all but the first word to be passed
through unchanged.
Set first argument to None if you want the first word to be passed
through unchanged.
"""
first_func = first_func or (lambda w: w)
return lambda i, word, _: first_func(word) if i == 0 else rest_func(word)
def title_case():
last_word = None
def title_case_word(i, word, is_end):
nonlocal last_word
if word.islower() and ( # contains only lowercase letters
word not in words_to_keep_lowercase
or i == 0
or is_end
or not last_word[
-1
].isalnum() # title case subsequent words if they follow punctuation
):
if "-" in word:
components = word.split("-")
title_case_component = title_case()
components = [
title_case_component(j, component, j == len(components) - 1)
for j, component in enumerate(components)
]
word = "-".join(components)
elif word_start := re.match(r"\W*", word).end():
# word begins with non-alphanumeric characters
word = word[:word_start] + word[word_start:].capitalize()
else:
word = word.capitalize()
last_word = word
return word
return title_case_word
def every_word(word_func):
"""Apply one function to every word."""
return lambda i, word, _: word_func(word)
# All formatters (code and prose)
formatters_dict = {
"NOOP": (SEP, lambda i, word, _: word),
"DOUBLE_UNDERSCORE": (NOSEP, first_vs_rest(lambda w: f"__{w}__")),
"PRIVATE_CAMEL_CASE": (
NOSEP,
first_vs_rest(lambda w: w.lower(), lambda w: w.capitalize()),
),
"PROTECTED_CAMEL_CASE": (
NOSEP,
first_vs_rest(lambda w: w.lower(), lambda w: w.capitalize()),
),
"PUBLIC_CAMEL_CASE": (NOSEP, every_word(lambda w: w.capitalize())),
"SNAKE_CASE": (
NOSEP,
first_vs_rest(lambda w: w.lower(), lambda w: "_" + w.lower()),
),
"NO_SPACES": (NOSEP, every_word(lambda w: w)),
"DASH_SEPARATED": words_with_joiner("-"),
"TERMINAL_DASH_SEPARATED": (
NOSEP,
first_vs_rest(lambda w: " --" + w.lower(), lambda w: "-" + w.lower()),
),
"DOUBLE_COLON_SEPARATED": words_with_joiner("::"),
"ALL_CAPS": (SEP, every_word(lambda w: w.upper())),
"ALL_LOWERCASE": (SEP, every_word(lambda w: w.lower())),
"DOUBLE_QUOTED_STRING": (SEP, surround('"')),
"SINGLE_QUOTED_STRING": (SEP, surround("'")),
"SPACE_SURROUNDED_STRING": (SEP, surround(" ")),
"DOT_SEPARATED": words_with_joiner("."),
"DOT_SNAKE": (NOSEP, lambda i, word, _: "." + word if i == 0 else "_" + word),
"ALL_SLASHES": (NOSEP, every_word(lambda w: "/" + w)),
"SLASH_SEPARATED": words_with_joiner("/"),
"CAPITALIZE_FIRST_WORD": (
SEP,
first_vs_rest(lambda w: title_case()(0, w, True)),
),
"CAPITALIZE_ALL_WORDS": (SEP, title_case()),
}
# Mapping from spoken phrases to formatter names
code_formatter_names = {
"all cap": "ALL_CAPS",
"all down": "ALL_LOWERCASE",
"camel": "PRIVATE_CAMEL_CASE",
"dotted": "DOT_SEPARATED",
"dub string": "DOUBLE_QUOTED_STRING",
"dunder": "DOUBLE_UNDERSCORE",
"hammer": "PUBLIC_CAMEL_CASE",
"kebab": "DASH_SEPARATED",
"packed": "DOUBLE_COLON_SEPARATED",
"padded": "SPACE_SURROUNDED_STRING",
"slasher": "ALL_SLASHES",
"conga": "SLASH_SEPARATED",
"smash": "NO_SPACES",
"snake": "SNAKE_CASE",
"string": "SINGLE_QUOTED_STRING",
}
prose_formatter_names = {
"say": "NOOP",
"speak": "NOOP",
"sentence": "CAPITALIZE_FIRST_WORD",
"title": "CAPITALIZE_ALL_WORDS",
}
# Mapping from spoken phrases to formatters
formatter_words = {
phrase: formatters_dict[name]
for phrase, name in (code_formatter_names | prose_formatter_names).items()
}
# Allow referencing formatters by either their names or spoken forms
all_prose_formatters = [
item for sublist in prose_formatter_names.items() for item in sublist
]
all_formatters = formatters_dict | formatter_words
mod = Module()
mod.list("formatters", desc="list of all formatters (code and prose)")
mod.list("code_formatter", desc="list of formatters typically applied to code")
mod.list(
"prose_formatter", desc="list of prose formatters (words to start dictating prose)"
)
@mod.capture(rule="{self.formatters}+")
def formatters(m) -> str:
"Returns a comma-separated string of formatters e.g. 'SNAKE,DUBSTRING'"
return ",".join(m.formatters_list)
@mod.capture(rule="{self.code_formatter}+")
def code_formatters(m) -> str:
"Returns a comma-separated string of code formatters e.g. 'SNAKE,DUBSTRING'"
return ",".join(m.code_formatter_list)
@mod.capture(
# Note that if the user speaks something like "snake dot", it will
# insert "dot" - otherwise, they wouldn't be able to insert punctuation
# words directly.
rule="<self.formatters> <user.text> (<user.text> | <user.formatter_immune>)*"
)
def format_text(m) -> str:
"""Formats text and returns a string"""
out = ""
formatters = m[0]
for chunk in m[1:]:
if isinstance(chunk, ImmuneString):
out += chunk.string
else:
out += format_phrase(chunk, formatters)
return out
@mod.capture(
rule="<self.code_formatters> <user.text> (<user.text> | <user.formatter_immune>)*"
)
def format_code(m) -> str:
"""Formats code and returns a string"""
return format_text(m)
class ImmuneString:
"""Wrapper that makes a string immune from formatting."""
def __init__(self, string):
self.string = string
@mod.capture(
# Add anything else into this that you want to be able to speak during a
# formatter.
rule="(<user.symbol_key> | (numb | numeral) <number>)"
)
def formatter_immune(m) -> ImmuneString:
"""Text that can be interspersed into a formatter, e.g. characters.
It will be inserted directly, without being formatted.
"""
if hasattr(m, "number"):
value = m.number
else:
value = m[0]
return ImmuneString(str(value))
@mod.action_class
class Actions:
def formatted_text(phrase: Union[str, Phrase], formatters: str) -> str:
"""Formats a phrase according to formatters. formatters is a comma-separated string of formatters (e.g. 'CAPITALIZE_ALL_WORDS,DOUBLE_QUOTED_STRING')"""
return format_phrase(phrase, formatters)
def insert_formatted(phrase: Union[str, Phrase], formatters: str):
"""Inserts a phrase formatted according to formatters. Formatters is a comma separated list of formatters (e.g. 'CAPITALIZE_ALL_WORDS,DOUBLE_QUOTED_STRING')"""
actions.insert(format_phrase(phrase, formatters))
def insert_with_history(text: str):
"""Inserts some text, remembering it in the phrase history."""
actions.user.deprecate_action("2022-12-11", "user.insert_with_history")
actions.user.add_phrase_to_history(text)
actions.insert(text)
def formatters_reformat_last(formatters: str) -> str:
"""Clears and reformats last formatted phrase"""
global last_phrase, last_phrase_formatted
if actions.user.get_last_phrase() != last_phrase_formatted:
# The last thing we inserted isn't the same as the last thing we
# formatted, so abort.
logging.warning(
"formatters_reformat_last(): Last phrase wasn't a formatter!"
)
return
actions.user.clear_last_phrase()
actions.user.insert_formatted(last_phrase, formatters)
def formatters_reformat_selection(formatters: str) -> str:
"""Reformats the current selection."""
selected = edit.selected_text()
if not selected:
app.notify("Asked to reformat selection, but nothing selected!")
return
if formatters not in all_prose_formatters:
selected = unformat_text(selected)
# Delete separately for compatibility with programs that don't overwrite
# selected text (e.g. Emacs)
edit.delete()
text = actions.self.formatted_text(selected, formatters)
actions.insert(text)
return text
def get_formatters_words() -> dict:
"""returns a list of words currently used as formatters, and a demonstration string using those formatters"""
formatters_help_demo = {}
for name in sorted(set(formatter_words)):
demo = format_phrase_without_adding_to_history(
["one", "two", "three"], name
)
if name in prose_formatter_names:
name += " *"
formatters_help_demo[name] = demo
return formatters_help_demo
def reformat_text(text: str, formatters: str) -> str:
"""Reformat the text."""
if formatters not in all_prose_formatters:
text = unformat_text(text)
return actions.user.formatted_text(text, formatters)
def insert_many(strings: list[str]) -> None:
"""Insert a list of strings, sequentially."""
for string in strings:
actions.insert(string)
def unformat_text(text: str) -> str:
"""Remove format from text"""
unformatted = re.sub(r"[\W_]+", " ", text)
# Split on camelCase, including numbers
# FIXME: handle non-ASCII letters!
unformatted = re.sub(
r"(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|(?<=[a-zA-Z])(?=[0-9])|(?<=[0-9])(?=[a-zA-Z])",
" ",
unformatted,
)
# TODO: Separate out studleycase vars
return unformatted.lower()
ctx.lists["self.formatters"] = formatter_words.keys()
ctx.lists["self.code_formatter"] = code_formatter_names.keys()
ctx.lists["self.prose_formatter"] = prose_formatter_names.keys()