Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize search index #707

Merged
merged 8 commits into from
Jul 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions book-example/src/format/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ Available configuration options for the `[output.html.playpen]` table:

Available configuration options for the `[output.html.search]` table:

- **enable:** Enables the search feature. Defaults to `true`.
- **limit-results:** The maximum number of search results. Defaults to `30`.
- **teaser-word-count:** The number of words used for a search result teaser.
Defaults to `30`.
Expand Down Expand Up @@ -168,6 +169,7 @@ boost-hierarchy = 1
boost-paragraph = 1
expand = true
heading-split-level = 3
copy-js = true
```


Expand Down
5 changes: 4 additions & 1 deletion src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,9 +463,11 @@ impl Default for Playpen {
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(default, rename_all = "kebab-case")]
pub struct Search {
/// Enable the search feature. Default: `true`.
pub enable: bool,
/// Maximum number of visible results. Default: `30`.
pub limit_results: u32,
/// The number of words used for a search result teaser. Default: `30`,
/// The number of words used for a search result teaser. Default: `30`.
pub teaser_word_count: u32,
/// Define the logical link between multiple search words.
/// If true, all search words must appear in each result. Default: `true`.
Expand Down Expand Up @@ -494,6 +496,7 @@ impl Default for Search {
fn default() -> Search {
// Please update the documentation of `Search` when changing values!
Search {
enable: true,
limit_results: 30,
teaser_word_count: 30,
use_boolean_and: false,
Expand Down
13 changes: 7 additions & 6 deletions src/renderer/html_handlebars/hbs_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,8 +367,10 @@ impl Renderer for HtmlHandlebars {
.chain_err(|| "Unable to copy across additional CSS and JS")?;

// Render search index
#[cfg(feature = "search")]
super::search::create_files(&html_config.search.unwrap_or_default(), &destination, &book)?;
let search = html_config.search.unwrap_or_default();
if cfg!(feature = "search") && search.enable {
super::search::create_files(&search, &destination, &book)?;
}

// Copy all remaining files
utils::fs::copy_files_except_ext(&src_dir, &destination, true, &["md"])?;
Expand Down Expand Up @@ -446,10 +448,9 @@ fn make_data(

let search = html_config.search.clone();
if cfg!(feature = "search") {
data.insert("search_enabled".to_owned(), json!(true));
if search.unwrap_or_default().copy_js {
data.insert("search_js".to_owned(), json!(true));
}
let search = search.unwrap_or_default();
data.insert("search_enabled".to_owned(), json!(search.enable));
data.insert("search_js".to_owned(), json!(search.enable && search.copy_js));
} else if search.is_some() {
warn!("mdBook compiled without search support, ignoring `output.html.search` table");
warn!(
Expand Down
52 changes: 32 additions & 20 deletions src/renderer/html_handlebars/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,21 @@ use theme::searcher;
/// Creates all files required for search.
pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> {
let mut index = Index::new(&["title", "body", "breadcrumbs"]);
let mut doc_urls = Vec::with_capacity(book.sections.len());

for item in book.iter() {
render_item(&mut index, &search_config, item)?;
render_item(&mut index, &search_config, &mut doc_urls, item)?;
}

let index = write_to_js(index, &search_config)?;
let index = write_to_json(index, &search_config, doc_urls)?;
debug!("Writing search index ✓");
if index.len() > 10_000_000 {
warn!("searchindex.json is very large ({} bytes)", index.len());
}

if search_config.copy_js {
utils::fs::write_file(destination, "searchindex.js", index.as_bytes())?;
utils::fs::write_file(destination, "searchindex.json", index.as_bytes())?;
utils::fs::write_file(destination, "searchindex.js", format!("window.search = {};", index).as_bytes())?;
utils::fs::write_file(destination, "searcher.js", searcher::JS)?;
utils::fs::write_file(destination, "mark.min.js", searcher::MARK_JS)?;
utils::fs::write_file(destination, "elasticlunr.min.js", searcher::ELASTICLUNR_JS)?;
Expand All @@ -38,18 +43,22 @@ pub fn create_files(search_config: &Search, destination: &Path, book: &Book) ->
}

/// Uses the given arguments to construct a search document, then inserts it to the given index.
fn add_doc<'a>(
fn add_doc(
index: &mut Index,
anchor_base: &'a str,
doc_urls: &mut Vec<String>,
anchor_base: &str,
section_id: &Option<String>,
items: &[&str],
) {
let doc_ref: Cow<'a, str> = if let &Some(ref id) = section_id {
format!("{}#{}", anchor_base, id).into()
let url = if let &Some(ref id) = section_id {
Cow::Owned(format!("{}#{}", anchor_base, id))
} else {
anchor_base.into()
Cow::Borrowed(anchor_base)
};
let doc_ref = utils::collapse_whitespace(doc_ref.trim());
let url = utils::collapse_whitespace(url.trim());
let doc_ref = doc_urls.len().to_string();
doc_urls.push(url.into());

let items = items.iter().map(|&x| utils::collapse_whitespace(x.trim()));
index.add_doc(&doc_ref, items);
}
Expand All @@ -58,6 +67,7 @@ fn add_doc<'a>(
fn render_item(
index: &mut Index,
search_config: &Search,
doc_urls: &mut Vec<String>,
item: &BookItem,
) -> Result<()> {
let chapter = match item {
Expand Down Expand Up @@ -92,6 +102,7 @@ fn render_item(
// Write the data to the index, and clear it for the next section
add_doc(
index,
doc_urls,
&anchor_base,
&section_id,
&[&heading, &body, &breadcrumbs.join(" » ")],
Expand Down Expand Up @@ -144,6 +155,7 @@ fn render_item(
// Make sure the last section is added to the index
add_doc(
index,
doc_urls,
&anchor_base,
&section_id,
&[&heading, &body, &breadcrumbs.join(" » ")],
Expand All @@ -153,10 +165,7 @@ fn render_item(
Ok(())
}

/// Exports the index and search options to a JS script which stores the index in `window.search`.
/// Using a JS script is a workaround for CORS in `file://` URIs. It also removes the need for
/// downloading/parsing JSON in JS.
fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
fn write_to_json(index: Index, search_config: &Search, doc_urls: Vec<String>) -> Result<String> {
use std::collections::BTreeMap;
use self::elasticlunr::config::{SearchBool, SearchOptions, SearchOptionsField};

Expand All @@ -169,9 +178,11 @@ fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
#[derive(Serialize)]
struct SearchindexJson {
/// The options used for displaying search results
resultsoptions: ResultsOptions,
results_options: ResultsOptions,
/// The searchoptions for elasticlunr.js
searchoptions: SearchOptions,
search_options: SearchOptions,
/// Used to lookup a document's URL from an integer document ref.
doc_urls: Vec<String>,
/// The index for elasticlunr.js
index: elasticlunr::Index,
}
Expand All @@ -185,7 +196,7 @@ fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
opt.boost = Some(search_config.boost_hierarchy);
fields.insert("breadcrumbs".into(), opt);

let searchoptions = SearchOptions {
let search_options = SearchOptions {
bool: if search_config.use_boolean_and {
SearchBool::And
} else {
Expand All @@ -195,14 +206,15 @@ fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
fields,
};

let resultsoptions = ResultsOptions {
let results_options = ResultsOptions {
limit_results: search_config.limit_results,
teaser_word_count: search_config.teaser_word_count,
};

let json_contents = SearchindexJson {
resultsoptions,
searchoptions,
results_options,
search_options,
doc_urls,
index,
};

Expand All @@ -211,7 +223,7 @@ fn write_to_js(index: Index, search_config: &Search) -> Result<String> {
let json_contents = serde_json::to_value(&json_contents)?;
let json_contents = serde_json::to_string(&json_contents)?;

Ok(format!("window.search = {};", json_contents))
Ok(json_contents)
}

fn clean_html(html: &str) -> String {
Expand Down
35 changes: 23 additions & 12 deletions src/theme/searcher/searcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ window.search = window.search || {};
content = document.getElementById('content'),

searchindex = null,
resultsoptions = {
doc_urls = [],
results_options = {
teaser_word_count: 30,
limit_results: 30,
},
searchoptions = {
search_options = {
bool: "AND",
expand: true,
fields: {
Expand Down Expand Up @@ -139,7 +140,7 @@ window.search = window.search || {};
teaser_count++;

// The ?URL_MARK_PARAM= parameter belongs inbetween the page and the #heading-anchor
var url = result.ref.split("#");
var url = doc_urls[result.ref].split("#");
if (url.length == 1) { // no anchor found
url.push("");
}
Expand Down Expand Up @@ -196,7 +197,7 @@ window.search = window.search || {};
}

var window_weight = [];
var window_size = Math.min(weighted.length, resultsoptions.teaser_word_count);
var window_size = Math.min(weighted.length, results_options.teaser_word_count);

var cur_sum = 0;
for (var wordindex = 0; wordindex < window_size; wordindex++) {
Expand Down Expand Up @@ -246,11 +247,12 @@ window.search = window.search || {};
return teaser_split.join('');
}

function init() {
resultsoptions = window.search.resultsoptions;
searchoptions = window.search.searchoptions;
searchbar_outer = window.search.searchbar_outer;
searchindex = elasticlunr.Index.load(window.search.index);
function init(config) {
results_options = config.results_options;
search_options = config.search_options;
searchbar_outer = config.searchbar_outer;
doc_urls = config.doc_urls;
searchindex = elasticlunr.Index.load(config.index);

// Set up events
searchicon.addEventListener('click', function(e) { searchIconClickHandler(); }, false);
Expand Down Expand Up @@ -441,8 +443,8 @@ window.search = window.search || {};
if (searchindex == null) { return; }

// Do the actual search
var results = searchindex.search(searchterm, searchoptions);
var resultcount = Math.min(results.length, resultsoptions.limit_results);
var results = searchindex.search(searchterm, search_options);
var resultcount = Math.min(results.length, results_options.limit_results);

// Display search metrics
searchresults_header.innerText = formatSearchMetric(resultcount, searchterm);
Expand All @@ -460,7 +462,16 @@ window.search = window.search || {};
showResults(true);
}

init();
fetch(path_to_root + 'searchindex.json')
.then(response => response.json())
.then(json => init(json))
.catch(error => { // Try to load searchindex.js if fetch failed
var script = document.createElement('script');
script.src = path_to_root + 'searchindex.js';
script.onload = () => init(window.search);
document.head.appendChild(script);
});

// Exported functions
search.hasFocus = hasFocus;
})(window.search);
32 changes: 23 additions & 9 deletions tests/rendered_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -426,33 +426,47 @@ mod search {

let index = read_book_index(temp.path());

let doc_urls = index["doc_urls"].as_array().unwrap();
let get_doc_ref = |url: &str| -> String {
doc_urls.iter()
.position(|s| s == url)
.unwrap()
.to_string()
};

let first_chapter = get_doc_ref("first/index.html#first-chapter");
let introduction = get_doc_ref("intro.html#introduction");
let some_section = get_doc_ref("first/index.html#some-section");
let summary = get_doc_ref("first/includes.html#summary");
let conclusion = get_doc_ref("conclusion.html#conclusion");

let bodyidx = &index["index"]["index"]["body"]["root"];
let textidx = &bodyidx["t"]["e"]["x"]["t"];
assert_eq!(textidx["df"], 2);
assert_eq!(textidx["docs"]["first/index.html#first-chapter"]["tf"], 1.0);
assert_eq!(textidx["docs"]["intro.html#introduction"]["tf"], 1.0);
assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);

let docs = &index["index"]["documentStore"]["docs"];
assert_eq!(docs["first/index.html#first-chapter"]["body"], "more text.");
assert_eq!(docs["first/index.html#some-section"]["body"], "");
assert_eq!(docs[&first_chapter]["body"], "more text.");
assert_eq!(docs[&some_section]["body"], "");
assert_eq!(
docs["first/includes.html#summary"]["body"],
docs[&summary]["body"],
"Introduction First Chapter Nested Chapter Includes Recursive Second Chapter Conclusion"
);
assert_eq!(
docs["first/includes.html#summary"]["breadcrumbs"],
docs[&summary]["breadcrumbs"],
"First Chapter » Summary"
);
assert_eq!(
docs["conclusion.html#conclusion"]["body"],
docs[&conclusion]["body"],
"I put &lt;HTML&gt; in here!"
);
}

// Setting this to `true` may cause issues with `cargo watch`,
// since it may not finish writing the fixture before the tests
// are run again.
const GENERATE_FIXTURE: bool = true;
const GENERATE_FIXTURE: bool = false;

fn get_fixture() -> serde_json::Value {
if GENERATE_FIXTURE {
Expand Down Expand Up @@ -481,7 +495,7 @@ mod search {
//
// If you're pretty sure you haven't broken anything, change `GENERATE_FIXTURE`
// above to `true`, and run `cargo test` to generate a new fixture. Then
// change it back to `false`. Include the changed `searchindex_fixture.json` in your commit.
// **change it back to `false`**. Include the changed `searchindex_fixture.json` in your commit.
#[test]
fn search_index_hasnt_changed_accidentally() {
let temp = DummyBook::new().build().unwrap();
Expand Down