-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
readerdictionary.lua
479 lines (456 loc) · 18 KB
/
readerdictionary.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
local ConfirmBox = require("ui/widget/confirmbox")
local DataStorage = require("datastorage")
local Device = require("device")
local DictQuickLookup = require("ui/widget/dictquicklookup")
local InfoMessage = require("ui/widget/infomessage")
local InputContainer = require("ui/widget/container/inputcontainer")
local JSON = require("json")
local UIManager = require("ui/uimanager")
local logger = require("logger")
local util = require("util")
local _ = require("gettext")
local Screen = Device.screen
local T = require("ffi/util").template
-- We'll store the list of available dictionaries as a module local
-- so we only have to look for them on the first :init()
local available_ifos = nil
local function getIfosInDir(path)
-- Get all the .ifo under directory path.
-- We use the same logic as sdcv to walk directories and ifos files
-- (so we get them in the order sdcv queries them) :
-- - No sorting, entries are processed in the order the dir_read_name() call
-- returns them (inodes linked list)
-- - If entry is a directory, Walk in it first and recurse
local ifos = {}
local ok, iter, dir_obj = pcall(lfs.dir, path)
if ok then
for name in iter, dir_obj do
if name ~= "." and name ~= ".." then
local fullpath = path.."/"..name
local attributes = lfs.attributes(fullpath)
if attributes ~= nil then
if attributes.mode == "directory" then
local dirifos = getIfosInDir(fullpath) -- recurse
for _, ifo in pairs(dirifos) do
table.insert(ifos, ifo)
end
elseif fullpath:match("%.ifo$") then
table.insert(ifos, fullpath)
end
end
end
end
end
return ifos
end
local ReaderDictionary = InputContainer:new{
data_dir = nil,
dict_window_list = {},
lookup_msg = _("Searching dictionary for:\n%1")
}
function ReaderDictionary:init()
self.ui.menu:registerToMainMenu(self)
self.data_dir = os.getenv("STARDICT_DATA_DIR") or
DataStorage:getDataDir() .. "/data/dict"
-- Gather info about available dictionaries
if not available_ifos then
available_ifos = {}
logger.dbg("Getting list of dictionaries")
local ifo_files = getIfosInDir(self.data_dir)
local dict_ext = self.data_dir.."_ext"
if lfs.attributes(dict_ext, "mode") == "directory" then
local extifos = getIfosInDir(dict_ext)
for _, ifo in pairs(extifos) do
table.insert(ifo_files, ifo)
end
end
for _, ifo_file in pairs(ifo_files) do
local f = io.open(ifo_file, "r")
if f then
local content = f:read("*all")
f:close()
local dictname = content:match("\nbookname=(.-)\n")
-- sdcv won't use dict that don't have a bookname=
if dictname then
table.insert(available_ifos, {
file = ifo_file,
name = dictname,
})
end
end
end
logger.dbg("found", #available_ifos, "dictionaries")
if not G_reader_settings:readSetting("dicts_disabled") then
-- Create an empty dict for this setting, so that we can
-- access and update it directly thru G_reader_settings
-- and it will automatically be saved.
G_reader_settings:saveSetting("dicts_disabled", {})
end
end
-- Prepare the -u options to give to sdcv if some dictionaries are disabled
self:updateSdcvDictNamesOptions()
end
function ReaderDictionary:updateSdcvDictNamesOptions()
-- We cannot tell sdcv which dictionaries to ignore, but we
-- can tell it which dictionaries to use, by using multiple
-- -u <dictname> options.
-- (The order of the -u does not matter, and we can not use
-- them for ordering queries and results)
local dicts_disabled = G_reader_settings:readSetting("dicts_disabled")
if not next(dicts_disabled) then
-- no dict disabled, no need to use any -u option
self.sdcv_dictnames_options_raw = nil
self.sdcv_dictnames_options_escaped = nil
return
end
local u_options_raw = {} -- for android call (individual unesscaped elements)
local u_options_escaped = {} -- for other devices call via shell
for _, ifo in pairs(available_ifos) do
if not dicts_disabled[ifo.file] then
table.insert(u_options_raw, "-u")
table.insert(u_options_raw, ifo.name)
-- Escape chars in dictname so it's ok for the shell command
-- local u_esc = ("-u %q"):format(ifo.name)
-- This may be safer than using lua's %q:
local u_esc = "-u '" .. ifo.name:gsub("'", "'\\''") .. "'"
table.insert(u_options_escaped, u_esc)
end
-- Note: if all dicts are disabled, we won't get any -u, and so
-- all dicts will be queried.
end
self.sdcv_dictnames_options_raw = u_options_raw
self.sdcv_dictnames_options_escaped = table.concat(u_options_escaped, " ")
end
function ReaderDictionary:addToMainMenu(menu_items)
menu_items.dictionary_lookup = {
text = _("Dictionary lookup"),
tap_input = {
title = _("Enter a word to look up"),
ok_text = _("Search dictionary"),
type = "text",
callback = function(input)
self:onLookupWord(input)
end,
},
}
menu_items.dictionary_settings = {
text = _("Dictionary settings"),
sub_item_table = {
{
text_func = function()
local nb_available, nb_enabled, nb_disabled = self:getNumberOfDictionaries()
local nb_str = nb_available
if nb_disabled > 0 then
nb_str = nb_enabled .. "/" .. nb_available
end
return T(_("Installed dictionaries (%1)"), nb_str)
end,
enabled_func = function()
return self:getNumberOfDictionaries() > 0
end,
sub_item_table = self:genDictionariesMenu(),
},
{
text = _("Info on dictionary order"),
callback = function()
UIManager:show(InfoMessage:new{
text = T(_([[
If you'd like to change the order in which dictionaries are queried (and their results displayed), you can:
- move all dictionary directories out of %1.
- move them back there, one by one, in the order you want them to be used.]]), self.data_dir)
})
end
},
{
text = _("Disable dictionary fuzzy search"),
checked_func = function()
return self.disable_fuzzy_search == true
end,
callback = function()
self.disable_fuzzy_search = not self.disable_fuzzy_search
end,
hold_callback = function()
self:makeDisableFuzzyDefault(self.disable_fuzzy_search)
end,
},
{ -- setting used by dictquicklookup
text = _("Justify text"),
checked_func = function()
return G_reader_settings:nilOrTrue("dict_justify")
end,
callback = function()
G_reader_settings:flipNilOrTrue("dict_justify")
end,
}
}
}
end
function ReaderDictionary:onLookupWord(word, box, highlight, link)
self.highlight = highlight
self:stardictLookup(word, box, link)
return true
end
--- Gets number of available, enabled, and disabled dictionaries
-- @treturn int nb_available
-- @treturn int nb_enabled
-- @treturn int nb_disabled
function ReaderDictionary:getNumberOfDictionaries()
local nb_available = #available_ifos
local nb_disabled = 0
for _ in pairs(G_reader_settings:readSetting("dicts_disabled")) do
nb_disabled = nb_disabled + 1
end
local nb_enabled = nb_available - nb_disabled
return nb_available, nb_enabled, nb_disabled
end
function ReaderDictionary:genDictionariesMenu()
local items = {}
for _, ifo in pairs(available_ifos) do
table.insert(items, {
text = ifo.name,
callback = function()
local dicts_disabled = G_reader_settings:readSetting("dicts_disabled")
if dicts_disabled[ifo.file] then
dicts_disabled[ifo.file] = nil
else
dicts_disabled[ifo.file] = true
end
-- Update the -u options to give to sdcv
self:updateSdcvDictNamesOptions()
end,
checked_func = function()
local dicts_disabled = G_reader_settings:readSetting("dicts_disabled")
return not dicts_disabled[ifo.file]
end
})
end
return items
end
local function dictDirsEmpty(dict_dirs)
for _, dict_dir in ipairs(dict_dirs) do
if not util.isEmptyDir(dict_dir) then
return false
end
end
return true
end
local function tidyMarkup(results)
local cdata_tag = "<!%[CDATA%[(.-)%]%]>"
local format_escape = "&[29Ib%+]{(.-)}"
for _, result in ipairs(results) do
local def = result.definition
-- preserve the <br> tag for line break
def = def:gsub("<[bB][rR] ?/?>", "\n")
-- parse CDATA text in XML
if def:find(cdata_tag) then
def = def:gsub(cdata_tag, "%1")
-- ignore format strings
while def:find(format_escape) do
def = def:gsub(format_escape, "%1")
end
end
-- ignore all markup tags
def = def:gsub("%b<>", "")
-- strip all leading empty lines/spaces
def = def:gsub("^%s+", "")
result.definition = def
end
return results
end
function ReaderDictionary:cleanSelection(text)
-- Will be used by ReaderWikipedia too
if not text then
return ""
end
-- crengine does now a much better job at finding word boundaries, but
-- some cleanup is still needed for selection we get from other engines
-- (example: pdf selection "qu’autrefois," will be cleaned to "autrefois")
--
-- Replace extended quote (included in the general puncturation range)
-- with plain ascii quote (for french words like "aujourd’hui")
text = string.gsub(text, "\xE2\x80\x99", "'") -- U+2019 (right single quotation mark)
-- Strip punctuation characters around selection
text = util.stripePunctuations(text)
-- Strip some common english grammatical construct
text = string.gsub(text, "'s$", '') -- english possessive
-- Strip some common french grammatical constructs
text = string.gsub(text, "^[LSDMNTlsdmnt]'", '') -- french l' s' t'...
text = string.gsub(text, "^[Qq][Uu]'", '') -- french qu'
-- Replace no-break space with regular space
text = string.gsub(text, "\xC2\xA0", ' ') -- U+00A0 no-break space
-- There may be a need to remove some (all?) diacritical marks
-- https://en.wikipedia.org/wiki/Combining_character#Unicode_ranges
-- see discussion at https://github.com/koreader/koreader/issues/1649
-- Commented for now, will have to be checked by people who read
-- languages and texts that use them.
-- text = string.gsub(text, "\204[\128-\191]", '') -- U+0300 to U+033F
-- text = string.gsub(text, "\205[\128-\175]", '') -- U+0340 to U+036F
return text
end
function ReaderDictionary:showLookupInfo(word)
local text = T(self.lookup_msg, word)
self.lookup_progress_msg = InfoMessage:new{text=text}
UIManager:show(self.lookup_progress_msg)
UIManager:forceRePaint()
end
function ReaderDictionary:dismissLookupInfo()
if self.lookup_progress_msg then
UIManager:close(self.lookup_progress_msg)
UIManager:forceRePaint()
end
self.lookup_progress_msg = nil
end
function ReaderDictionary:stardictLookup(word, box, link)
logger.dbg("lookup word:", word, box)
-- escape quotes and other funny characters in word
word = self:cleanSelection(word)
logger.dbg("stripped word:", word)
if word == "" then
return
end
if not self.disable_fuzzy_search then
self:showLookupInfo(word)
end
local final_results = {}
local seen_results = {}
-- Allow for two sdcv calls : one in the classic data/dict, and
-- another one in data/dict_ext if it exists
-- We could put in data/dict_ext dictionaries with a great number of words
-- but poor definitions as a fall back. If these were in data/dict,
-- they would prevent fuzzy searches in other dictories with better
-- definitions, and masks such results. This way, we can get both.
local dict_dirs = {self.data_dir}
local dict_ext = self.data_dir.."_ext"
if lfs.attributes(dict_ext, "mode") == "directory" then
table.insert(dict_dirs, dict_ext)
end
-- early exit if no dictionaries
if dictDirsEmpty(dict_dirs) then
final_results = {
{
dict = "",
word = word,
definition = _([[No dictionaries installed. Please search for "Dictionary support" in the KOReader Wiki to get more information about installing new dictionaries.]]),
}
}
self:showDict(word, final_results, box)
return
end
local common_options = self.disable_fuzzy_search and "-njf" or "-nj"
for _, dict_dir in ipairs(dict_dirs) do
local results_str = nil
if Device:isAndroid() then
local A = require("android")
local args = {"./sdcv", "--utf8-input", "--utf8-output", common_options, word, "--data-dir", dict_dir}
if self.sdcv_dictnames_options_raw then
for _, opt in pairs(self.sdcv_dictnames_options_raw) do
table.insert(args, opt)
end
end
results_str = A.stdout(unpack(args))
else
local cmd = ("./sdcv --utf8-input --utf8-output %q %q --data-dir %q"):format(common_options, word, dict_dir)
if self.sdcv_dictnames_options_escaped then
cmd = cmd .. " " .. self.sdcv_dictnames_options_escaped
end
local std_out = io.popen(cmd, "r")
if std_out then
results_str = std_out:read("*all")
std_out:close()
end
end
local ok, results = pcall(JSON.decode, results_str)
if ok and results then
-- we may get duplicates (sdcv may do multiple queries,
-- in fixed mode then in fuzzy mode), we have to remove them
local h
for _,r in ipairs(results) do
h = r.dict .. r.word .. r.definition
if seen_results[h] == nil then
table.insert(final_results, r)
seen_results[h] = true
end
end
else
logger.warn("JSON data cannot be decoded", results)
end
end
if #final_results == 0 then
-- dummy results
final_results = {
{
dict = "",
word = word,
definition = _("No definition found."),
}
}
end
self:showDict(word, tidyMarkup(final_results), box, link)
end
function ReaderDictionary:showDict(word, results, box, link)
self:dismissLookupInfo()
if results and results[1] then
logger.dbg("showing quick lookup window", word, results)
self.dict_window = DictQuickLookup:new{
window_list = self.dict_window_list,
ui = self.ui,
highlight = self.highlight,
dialog = self.dialog,
-- original lookup word
word = word,
-- selected link, if any
selected_link = link,
results = results,
dictionary = self.default_dictionary,
width = Screen:getWidth() - Screen:scaleBySize(80),
word_box = box,
-- differentiate between dict and wiki
is_wiki = self.is_wiki,
wiki_languages = self.wiki_languages,
refresh_callback = function()
if self.view then
-- update info in footer (time, battery, etc)
self.view.footer:updateFooter()
end
end,
}
table.insert(self.dict_window_list, self.dict_window)
UIManager:show(self.dict_window)
end
end
function ReaderDictionary:onUpdateDefaultDict(dict)
logger.dbg("make default dictionary:", dict)
self.default_dictionary = dict
UIManager:show(InfoMessage:new{
text = T(_("%1 is now the default dictionary for this document."),
dict),
timeout = 2,
})
return true
end
function ReaderDictionary:onReadSettings(config)
self.default_dictionary = config:readSetting("default_dictionary")
self.disable_fuzzy_search = config:readSetting("disable_fuzzy_search")
if self.disable_fuzzy_search == nil then
self.disable_fuzzy_search = G_reader_settings:isTrue("disable_fuzzy_search")
end
end
function ReaderDictionary:onSaveSettings()
logger.dbg("save default dictionary", self.default_dictionary)
self.ui.doc_settings:saveSetting("default_dictionary", self.default_dictionary)
self.ui.doc_settings:saveSetting("disable_fuzzy_search", self.disable_fuzzy_search)
end
function ReaderDictionary:makeDisableFuzzyDefault(disable_fuzzy_search)
logger.dbg("disable fuzzy search", self.disable_fuzzy_search)
UIManager:show(ConfirmBox:new{
text = T(
disable_fuzzy_search
and _("Disable fuzzy search by default?")
or _("Enable fuzzy search by default?")
),
ok_callback = function()
G_reader_settings:saveSetting("disable_fuzzy_search", disable_fuzzy_search)
end,
})
end
return ReaderDictionary