Skip to content

Commit

Permalink
Forgot to exclude ruby tags from searching in the native viewer
Browse files Browse the repository at this point in the history
Also fix ignore_text not recursing and add some performance improvements
to the python function for extracting searchable text
  • Loading branch information
kovidgoyal committed May 25, 2024
1 parent f3b35c3 commit 69cf7e6
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
26 changes: 17 additions & 9 deletions src/calibre/gui2/viewer/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,21 +224,24 @@ def __str__(self):
@lru_cache(maxsize=None)
def searchable_text_for_name(name):
ans = []
add_text = ans.append
serialized_data = json.loads(get_data(name)[0])
stack = []
a = stack.append
removed_tails = []
no_visit = frozenset({'script', 'style', 'title', 'head'})
ignore_text = frozenset({'img', 'math', 'rt', 'rp', 'rtc'})
for child in serialized_data['tree']['c']:
if child.get('n') == 'body':
stack.append(child)
a((child, False))
# the JS code does not add the tail of body tags to flat text
removed_tails.append((child.pop('l', None), child))
ignore_text = {'script', 'style', 'title'}
text_pos = 0
anchor_offset_map = OrderedDict()
while stack:
node = stack.pop()
node, text_ignored_in_parent = stack.pop()
if isinstance(node, str):
ans.append(node)
add_text(node)
text_pos += len(node)
continue
g = node.get
Expand All @@ -253,13 +256,18 @@ def searchable_text_for_name(name):
aid = x[1]
if aid not in anchor_offset_map:
anchor_offset_map[aid] = text_pos
if name and text and name not in ignore_text:
ans.append(text)
if name in no_visit:
continue
ignore_text_in_node_and_children = text_ignored_in_parent or name in ignore_text

if text and not ignore_text_in_node_and_children:
add_text(text)
text_pos += len(text)
if tail:
stack.append(tail)
if tail and not text_ignored_in_parent:
a((tail, ignore_text_in_node_and_children))
if children:
stack.extend(reversed(children))
for child in reversed(children):
a((child, ignore_text_in_node_and_children))
for (tail, body) in removed_tails:
if tail is not None:
body['l'] = tail
Expand Down
2 changes: 1 addition & 1 deletion src/pyj/read_book/resources.pyj
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ def text_from_serialized_html(data, get_anchor_offset_map):
anchor_offset_map[aid] = text_pos
if no_visit[src.n]:
continue
ignore_text_in_node_and_children = v'!!ignore_text[src.n]'
ignore_text_in_node_and_children = text_ignored_in_parent or v'!!ignore_text[src.n]'
if not ignore_text_in_node_and_children and src.x:
ans.push(src.x)
text_pos += src.x.length
Expand Down

0 comments on commit 69cf7e6

Please sign in to comment.