Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support searching for index entries #10819

Merged
merged 6 commits into from
Sep 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ Features added
* #10718: HTML Search: Save search result score to the HTML element for debugging
* #10673: Make toctree accept 'genindex', 'modindex' and 'search' docnames
* #6316, #10804: Add domain objects to the table of contents. Patch by Adam Turner
* #6692: HTML Search: Include explicit :rst:dir:`index` directive index entries
in the search index and search results. Patch by Adam Turner

Bugs fixed
----------
Expand Down
41 changes: 40 additions & 1 deletion sphinx/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from sphinx import addnodes, package_dir
from sphinx.deprecation import RemovedInSphinx70Warning
from sphinx.environment import BuildEnvironment
from sphinx.util import split_into


class SearchLanguage:
Expand Down Expand Up @@ -242,6 +243,7 @@ def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str
# stemmed words in titles -> set(docname)
self._title_mapping: Dict[str, Set[str]] = {}
self._all_titles: Dict[str, List[Tuple[str, str]]] = {} # docname -> all titles
self._index_entries: Dict[str, List[Tuple[str, str, str]]] = {} # docname -> index entry
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i feel like this section is crying out for some type aliases to simplify the signatures here

self._stem_cache: Dict[str, str] = {} # word -> stemmed word
self._objtypes: Dict[Tuple[str, str], int] = {} # objtype -> index
# objtype index -> (domain, type, objname (localized))
Expand Down Expand Up @@ -380,10 +382,15 @@ def freeze(self) -> Dict[str, Any]:
for title, titleid in titlelist:
alltitles.setdefault(title, []).append((fn2index[docname], titleid))

index_entries: Dict[str, List[Tuple[int, str]]] = {}
for docname, entries in self._index_entries.items():
for entry, entry_id, main_entry in entries:
index_entries.setdefault(entry.lower(), []).append((fn2index[docname], entry_id))

return dict(docnames=docnames, filenames=filenames, titles=titles, terms=terms,
objects=objects, objtypes=objtypes, objnames=objnames,
titleterms=title_terms, envversion=self.env.version,
alltitles=alltitles)
alltitles=alltitles, indexentries=index_entries)

def label(self) -> str:
return "%s (code: %s)" % (self.lang.language_name, self.lang.lang)
Expand Down Expand Up @@ -441,6 +448,38 @@ def stem(word: str) -> str:
if _filter(stemmed_word) and not already_indexed:
self._mapping.setdefault(stemmed_word, set()).add(docname)

# find explicit entries within index directives
_index_entries: Set[Tuple[str, str, str]] = set()
for node in doctree.findall(addnodes.index):
for entry_type, value, tid, main, *index_key in node['entries']:
tid = tid or ''
try:
if entry_type == 'single':
try:
entry, subentry = split_into(2, 'single', value)
except ValueError:
entry, = split_into(1, 'single', value)
subentry = ''
_index_entries.add((entry, tid, main))
if subentry:
_index_entries.add((subentry, tid, main))
elif entry_type == 'pair':
first, second = split_into(2, 'pair', value)
_index_entries.add((first, tid, main))
_index_entries.add((second, tid, main))
elif entry_type == 'triple':
first, second, third = split_into(3, 'triple', value)
_index_entries.add((first, tid, main))
_index_entries.add((second, tid, main))
_index_entries.add((third, tid, main))
elif entry_type in {'see', 'seealso'}:
first, second = split_into(2, 'see', value)
_index_entries.add((first, tid, main))
except ValueError:
pass

self._index_entries[docname] = sorted(_index_entries)

def context_for_searchtool(self) -> Dict[str, Any]:
if self.lang.js_splitter_code:
js_splitter_code = self.lang.js_splitter_code
Expand Down
18 changes: 18 additions & 0 deletions sphinx/themes/basic/static/searchtools.js
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ const Search = {
const docNames = Search._index.docnames;
const titles = Search._index.titles;
const allTitles = Search._index.alltitles;
const indexEntries = Search._index.indexentries;

// stem the search terms and add them to the correct list
const stemmer = new Stemmer();
Expand Down Expand Up @@ -295,6 +296,23 @@ const Search = {
}
}

// search for explicit entries in index directives
for (const [entry, foundEntries] of Object.entries(indexEntries)) {
if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) {
for (const [file, id] of foundEntries) {
let score = Math.round(100 * queryLower.length / entry.length)
results.push([
docNames[file],
titles[file],
id ? "#" + id : "",
null,
score,
filenames[file],
]);
}
}
}

// lookup as object
objectTerms.forEach((term) =>
results.push(...Search.performObjectSearch(term, objectTerms))
Expand Down
6 changes: 4 additions & 2 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,8 @@ def test_IndexBuilder():
'test': [0, 1, 2, 3]},
'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'),
'titleterms': {'section_titl': [0, 1, 2, 3]},
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title'), (2, 'section-title'), (3, 'section-title')]}
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title'), (2, 'section-title'), (3, 'section-title')]},
'indexentries': {},
}
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
Expand Down Expand Up @@ -236,7 +237,8 @@ def test_IndexBuilder():
'test': [0, 1]},
'titles': ('title1_2', 'title2_2'),
'titleterms': {'section_titl': [0, 1]},
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title')]}
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title')]},
'indexentries': {},
}
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
Expand Down