Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fulltext search: all entries in entire document #11313

Merged
merged 30 commits into from Jan 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
3a1a2bb
pdfdocument: findTextAll()
hius07 Jan 2, 2024
ecfbba3
koptinterface: findTextAll()
hius07 Jan 2, 2024
f8eba22
readersearch: search all in pdf
hius07 Jan 2, 2024
f08e673
pdfdocument: adjustable nb_context_words
hius07 Jan 3, 2024
d428cf0
koptinterface: update
hius07 Jan 3, 2024
ada6203
readersearch: update
hius07 Jan 3, 2024
f6c7125
koptinterface: optimize generating item text
hius07 Jan 3, 2024
169e904
koptinterface: no additional spaces in results list
hius07 Jan 4, 2024
3ad1c40
readersearch: "All" button - note to translators
hius07 Jan 4, 2024
0014463
koptinterface: add check for max_hits
hius07 Jan 5, 2024
46bfe89
pdfdocument: add check for max_hits
hius07 Jan 5, 2024
c9c7feb
credocument: add findTextAll()
hius07 Jan 5, 2024
0cd5698
readersearch: add search All for cre documents
hius07 Jan 5, 2024
76198c1
readersearch: fix misprint
hius07 Jan 5, 2024
940dfb3
credocument: naming
hius07 Jan 6, 2024
7e307a9
djvudocument: add findAllText()
hius07 Jan 6, 2024
a8f8cca
document: add findAllText()
hius07 Jan 6, 2024
5f450dd
koptinterface: naming
hius07 Jan 6, 2024
a5457e6
pdfdocument: naming
hius07 Jan 6, 2024
1fcac5d
readersearch: naming
hius07 Jan 6, 2024
b9183f7
readersearch: show target xpointer marker on results jump
hius07 Jan 6, 2024
0ce975e
koptinterface: do not return empty findAll results
hius07 Jan 7, 2024
4a3591a
readersearch: do not rebuild cached mandatory
hius07 Jan 7, 2024
3fa9b04
credocument: update findText(), findAllText()
hius07 Jan 12, 2024
c06f11e
reader_menu_order: add Fulltext search settings
hius07 Jan 12, 2024
eae57e1
readersearch: add Fulltext search settings
hius07 Jan 12, 2024
d85398e
credocument: temporary revert findText() until base is updated
hius07 Jan 12, 2024
a21cde1
Update readersearch.lua
hius07 Jan 13, 2024
283a002
credocument: new findText()
hius07 Jan 13, 2024
3786a14
Merge branch 'koreader:master' into pdf-text-search-all
hius07 Jan 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
165 changes: 160 additions & 5 deletions frontend/apps/reader/modules/readersearch.lua
Expand Up @@ -4,7 +4,9 @@ local CheckButton = require("ui/widget/checkbutton")
local Device = require("device")
local InfoMessage = require("ui/widget/infomessage")
local InputDialog = require("ui/widget/inputdialog")
local Menu = require("ui/widget/menu")
local Notification = require("ui/widget/notification")
local SpinWidget = require("ui/widget/spinwidget")
local UIManager = require("ui/uimanager")
local Utf8Proc = require("ffi/utf8proc")
local WidgetContainer = require("ui/widget/container/widgetcontainer")
Expand All @@ -27,7 +29,11 @@ local ReaderSearch = WidgetContainer:extend{
-- The speed of the search depends on the regexs. Complex ones might need some time, easy ones
-- go with the speed of light.
-- Setting max_hits higher, does not mean to require more memory. More hits means smaller single hits.
max_hits = 2048, -- maximum hits for search; timinges tested on a Tolino
max_hits = 2048, -- maximum hits for findText search; timinges tested on a Tolino
findall_max_hits = 5000, -- maximum hits for findAllText search
-- number of words before and after the search string in All search results
findall_nb_context_words = G_reader_settings:readSetting("fulltext_search_nb_context_words") or 3,
findall_results_per_page = G_reader_settings:readSetting("fulltext_search_results_per_page") or 10,

-- internal: whether we expect results on previous pages
-- (can be different from self.direction, if, from a page in the
Expand Down Expand Up @@ -72,6 +78,54 @@ SRELL_ERROR_CODES[111] = _("Expression too complex, some hits will not be shown.
SRELL_ERROR_CODES[666] = _("Expression may lead to an extremely long search time.")

function ReaderSearch:addToMainMenu(menu_items)
menu_items.fulltext_search_settings = {
text = _("Fulltext search settings"),
sub_item_table = {
{
text_func = function()
return T(_("Words in context: %1"), self.findall_nb_context_words)
end,
keep_menu_open = true,
callback = function(touchmenu_instance)
local widget = SpinWidget:new{
title_text = _("Words in context"),
value = self.findall_nb_context_words,
value_min = 1,
value_max = 20,
default_value = 3,
callback = function(spin)
self.last_search_hash = nil
self.findall_nb_context_words = spin.value
G_reader_settings:saveSetting("fulltext_search_nb_context_words", spin.value)
touchmenu_instance:updateItems()
end,
}
UIManager:show(widget)
end,
},
{
text_func = function()
return T(_("Results per page: %1"), self.findall_results_per_page)
end,
keep_menu_open = true,
callback = function(touchmenu_instance)
local widget = SpinWidget:new{
title_text = _("Results per page"),
value = self.findall_results_per_page,
value_min = 6,
value_max = 24,
default_value = 10,
callback = function(spin)
self.findall_results_per_page = spin.value
G_reader_settings:saveSetting("fulltext_search_results_per_page", spin.value)
touchmenu_instance:updateItems()
end,
}
UIManager:show(widget)
end,
},
},
}
menu_items.fulltext_search = {
text = _("Fulltext search"),
callback = function()
Expand Down Expand Up @@ -103,7 +157,14 @@ function ReaderSearch:searchCallback(reverse)
UIManager:show(InfoMessage:new{ text = error_message })
else
UIManager:close(self.input_dialog)
self:onShowSearchDialog(search_text, reverse, self.use_regex, self.case_insensitive)
if reverse then
self:onShowSearchDialog(search_text, reverse, self.use_regex, self.case_insensitive)
else
local Trapper = require("ui/trapper")
Trapper:wrap(function()
self:findAllText(search_text)
end)
end
end
end

Expand All @@ -126,6 +187,13 @@ function ReaderSearch:onShowFulltextSearchInput()
UIManager:close(self.input_dialog)
end,
},
{
-- @translators Search all entries in entire document
text = _("All"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd probably add a context. Whether it's "all search results" or "all aliens from Mars" might make a difference in some languages.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@translators Find all entries in entire document, button displayed on the search bar, should be short. ?

callback = function()
self:searchCallback()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is reverse only nil when called from there? May be another paremeter then?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is reverse only nil when called from there?

Yes

end,
},
{
text = backward_text,
callback = function()
Expand Down Expand Up @@ -183,7 +251,7 @@ function ReaderSearch:onShowSearchDialog(text, direction, regex, case_insensitiv
local no_results = true -- for notification
local res = search_func(self, search_term, param, regex, case_insensitive)
if res then
if self.ui.document.info.has_pages then
if self.ui.paging then
no_results = false
self.ui.link:onGotoLink({page = res.page - 1}, neglect_current_location)
self.view.highlight.temp[res.page] = res
Expand Down Expand Up @@ -370,6 +438,13 @@ function ReaderSearch:search(pattern, origin, regex, case_insensitive)
Device:setIgnoreInput(true)
local retval, words_found = self.ui.document:findText(pattern, origin, direction, case_insensitive, page, regex, self.max_hits)
Device:setIgnoreInput(false)
self:showErrorNotification(words_found, regex, self.max_hits)
return retval
end

function ReaderSearch:showErrorNotification(words_found, regex, max_hits)
regex = regex or self.use_regex
max_hits = max_hits or self.findall_max_hits
local regex_retval = regex and self.ui.document:getAndClearRegexSearchError()
if regex and regex_retval ~= 0 then
local error_message
Expand All @@ -382,13 +457,12 @@ function ReaderSearch:search(pattern, origin, regex, case_insensitive)
text = error_message,
timeout = false,
})
elseif words_found and words_found > self.max_hits then
elseif words_found and words_found >= max_hits then
UIManager:show(Notification:new{
text =_("Too many hits"),
timeout = 4,
})
end
return retval
end

function ReaderSearch:searchFromStart(pattern, _, regex, case_insensitive)
Expand Down Expand Up @@ -416,4 +490,85 @@ function ReaderSearch:searchNext(pattern, direction, regex, case_insensitive)
return self:search(pattern, 1, regex, case_insensitive)
end

function ReaderSearch:findAllText(search_text)
local last_search_hash = self.last_search_text .. tostring(self.case_insensitive) .. tostring(self.use_regex)
local not_cached = self.last_search_hash ~= last_search_hash
if not_cached then
local Trapper = require("ui/trapper")
local info = InfoMessage:new{ text = _("Searching… (tap to cancel)") }
UIManager:show(info)
UIManager:forceRePaint()
local completed, res = Trapper:dismissableRunInSubprocess(function()
return self.ui.document:findAllText(search_text,
self.case_insensitive, self.findall_nb_context_words, self.findall_max_hits, self.use_regex)
end, info)
if not completed then return end
UIManager:close(info)
self.last_search_hash = last_search_hash
self.findall_results = res
end
if self.findall_results then
self:showFindAllResults(not_cached)
else
UIManager:show(InfoMessage:new{ text = _("No results in the document") })
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Dunno if results is the right word ? occurences or matches ? @Frenzie)
'Search results" for the widget title sounds ok - but "no results in the document" sounds odd to my non-english ears.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

notification_text = _("No results on preceding pages")
else
notification_text = _("No results on following pages")

end
end

function ReaderSearch:showFindAllResults(not_cached)
if self.ui.rolling and not_cached then -- for ui.paging: items are built in KoptInterface:findAllText()
for _, item in ipairs(self.findall_results) do
-- PDF/Kopt shows full words when only some part matches; let's do the same with CRE
local word = item.matched_text or ""
if item.matched_word_prefix then
word = item.matched_word_prefix .. word
end
if item.matched_word_suffix then
word = word .. item.matched_word_suffix
end
-- append context before and after the word
local text = "【" .. word .. "】"
if item.prev_text then
text = item.prev_text .. text
end
if item.next_text then
text = text .. item.next_text
end
item.text = text
item.mandatory = self.ui.bookmark:getBookmarkPageString(item.start)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I didn't add the page number in the cre.cpp new function returned results, because I figured it would be slow having it done there - while in frontend, we have document:getPageFromXPointer() which will have them cached in cre call cache.
I also forgot we can have pagemaplabel and hidden flows :) so it would have been on no use to have it done by cre.cpp :)
So, I figure it it's fine getting it via your function. How much slowness it adds getting page number vs. not getting them?)

Copy link
Member Author

@hius07 hius07 Jan 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Getting pages for 8,000 entries takes 4 seconds (epub of 800 pages, without non-linear flows).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, getting page numbers is little (4s) vs. the time taken by the cre.cpp search itself (~45s), right?

Copy link
Member Author

@hius07 hius07 Jan 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, comparing to the search time it's not significant.

There is more complicated page string building in


and further (not very optimized at first glance). Should I use this way?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No real opinion :) Keep it simple (as it is in this PR), as there's no performance need.

end
end

local menu
menu = Menu:new{
title = T(_("Search results (%1)"), #self.findall_results),
subtitle = T(_("Query: %1"), self.last_search_text),
item_table = self.findall_results,
items_per_page = self.findall_results_per_page,
covers_fullscreen = true,
is_borderless = true,
is_popout = false,
title_bar_fm_style = true,
onMenuChoice = function(_, item)
if self.ui.rolling then
self.ui.link:addCurrentLocationToStack()
self.ui.rolling:onGotoXPointer(item.start, item.start) -- show target line marker
self.ui.document:getTextFromXPointers(item.start, item["end"], true) -- highlight
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I had in mind you would need to launch an incremental search to get highlight on the destination pages. But doing it that way does not need that, and it may be better highlighting only the tapped occurences, and not have all other on that page highlighted and needing to find the right one among them visually.)

else
local page = item.mandatory
local boxes = {}
for i, box in ipairs(item.boxes) do
boxes[i] = self.ui.document:nativeToPageRectTransform(page, box)
end
self.ui.link:onGotoLink({ page = page - 1 })
self.view.highlight.temp[page] = boxes
end
end,
close_callback = function()
UIManager:close(menu)
end,
}
UIManager:show(menu)
self:showErrorNotification(#self.findall_results)
end

return ReaderSearch
12 changes: 8 additions & 4 deletions frontend/document/credocument.lua
Expand Up @@ -1383,10 +1383,14 @@ function CreDocument:getAndClearRegexSearchError()
return retval
end

function CreDocument:findText(pattern, origin, reverse, caseInsensitive, page, regex, max_hits)
logger.dbg("CreDocument: find text", pattern, origin, reverse, caseInsensitive, regex, max_hits)
return self._document:findText(
pattern, origin, reverse, caseInsensitive and 1 or 0, regex and 1 or 0, max_hits or 200)
function CreDocument:findText(pattern, origin, direction, case_insensitive, page, regex, max_hits)
logger.dbg("CreDocument: find text", pattern, origin, direction == 1, case_insensitive, regex, max_hits)
return self._document:findText(pattern, origin, direction == 1, case_insensitive, regex, max_hits)
end

function CreDocument:findAllText(pattern, case_insensitive, nb_context_words, max_hits, regex)
logger.dbg("CreDocument: find all text", pattern, case_insensitive, regex, max_hits, true, nb_context_words)
return self._document:findAllText(pattern, case_insensitive, regex, max_hits, true, nb_context_words)
end

function CreDocument:enableInternalHistory(toggle)
Expand Down
8 changes: 6 additions & 2 deletions frontend/document/djvudocument.lua
Expand Up @@ -123,8 +123,12 @@ function DjvuDocument:getCoverPageImage()
return self.koptinterface:getCoverPageImage(self)
end

function DjvuDocument:findText(pattern, origin, reverse, caseInsensitive, page)
return self.koptinterface:findText(self, pattern, origin, reverse, caseInsensitive, page)
function DjvuDocument:findText(pattern, origin, reverse, case_insensitive, page)
return self.koptinterface:findText(self, pattern, origin, reverse, case_insensitive, page)
end

function DjvuDocument:findAllText(pattern, case_insensitive, nb_context_words, max_hits)
return self.koptinterface:findAllText(self, pattern, case_insensitive, nb_context_words, max_hits)
end

function DjvuDocument:renderPage(pageno, rect, zoom, rotation, gamma, render_mode, hinting)
Expand Down
4 changes: 4 additions & 0 deletions frontend/document/document.lua
Expand Up @@ -373,6 +373,10 @@ function Document:findText()
return nil
end

function Document:findAllText()
return nil
end

function Document:updateColorRendering()
if self.is_color_capable and CanvasContext.is_color_rendering_enabled then
self.render_color = true
Expand Down