Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Introduce a cache on the docid_word_positions database method
Browse files Browse the repository at this point in the history
  • Loading branch information
Kerollmops committed Mar 8, 2021
1 parent 5fcaedb commit 82a0f67
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 23 deletions.
72 changes: 52 additions & 20 deletions milli/src/search/criteria/proximity.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap, btree_map};
use std::collections::btree_map::{self, BTreeMap};
use std::collections::hash_map::{HashMap, Entry};
use std::mem::take;

use roaring::RoaringBitmap;
Expand Down Expand Up @@ -331,19 +332,21 @@ fn resolve_candidates<'t>(
Ok(candidates)
}

fn resolve_plane_sweep_candidates<'t>(
ctx: &'t dyn Context,
fn resolve_plane_sweep_candidates(
ctx: &dyn Context,
query_tree: &Operation,
allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>>
{
/// FIXME may be buggy with query like "new new york"
fn plane_sweep<'t>(
ctx: &'t dyn Context,
operations: &[Operation],
fn plane_sweep<'a>(
ctx: &dyn Context,
operations: &'a [Operation],
docid: DocumentId,
consecutive: bool,
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
dwpcache: &mut HashMap<String, Option<RoaringBitmap>>,
wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Vec<(Position, u8, Position)>>
{
Expand Down Expand Up @@ -385,7 +388,7 @@ fn resolve_plane_sweep_candidates<'t>(
let mut groups_positions = Vec::with_capacity(groups_len);

for operation in operations {
let positions = resolve_operation(ctx, operation, docid, wdcache)?;
let positions = resolve_operation(ctx, operation, docid, rocache, dwpcache, wdcache)?;
groups_positions.push(positions.into_iter());
}

Expand Down Expand Up @@ -456,25 +459,32 @@ fn resolve_plane_sweep_candidates<'t>(
Ok(output)
}

fn resolve_operation<'t>(
ctx: &'t dyn Context,
query_tree: &Operation,
fn resolve_operation<'a>(
ctx: &dyn Context,
query_tree: &'a Operation,
docid: DocumentId,
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
dwpcache: &mut HashMap<String, Option<RoaringBitmap>>,
wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Vec<(Position, u8, Position)>> {
) -> anyhow::Result<Vec<(Position, u8, Position)>>
{
use Operation::{And, Consecutive, Or};

match query_tree {
And(ops) => plane_sweep(ctx, ops, docid, false, wdcache),
Consecutive(ops) => plane_sweep(ctx, ops, docid, true, wdcache),
if let Some(result) = rocache.get(query_tree) {
return Ok(result.clone());
}

let result = match query_tree {
And(ops) => plane_sweep(ctx, ops, docid, false, rocache, dwpcache, wdcache)?,
Consecutive(ops) => plane_sweep(ctx, ops, docid, true, rocache, dwpcache, wdcache)?,
Or(_, ops) => {
let mut result = Vec::new();
for op in ops {
result.extend(resolve_operation(ctx, op, docid, wdcache)?)
result.extend(resolve_operation(ctx, op, docid, rocache, dwpcache, wdcache)?)
}

result.sort_unstable();
Ok(result)
result
},
Operation::Query(Query {prefix, kind}) => {
let fst = ctx.words_fst();
Expand All @@ -493,21 +503,43 @@ fn resolve_plane_sweep_candidates<'t>(

let mut result = Vec::new();
for (word, _) in words.as_ref() {
if let Some(positions) = ctx.docid_word_positions(docid, word)? {
let positions = match dwpcache.entry(word.to_string()) {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => {
let positions = ctx.docid_word_positions(docid, word)?;
entry.insert(positions)
}
};

if let Some(positions) = positions {
let iter = positions.iter().map(|p| (p, 0, p));
result.extend(iter);
}
}

result.sort_unstable();
Ok(result)
result
}
}
};

rocache.insert(query_tree, result.clone());
Ok(result)
}

let mut word_positions_cache = HashMap::new();
let mut resolve_operation_cache = HashMap::new();
let mut candidates = BTreeMap::new();
for docid in allowed_candidates {
let positions = resolve_operation(ctx, query_tree, docid, wdcache)?;
word_positions_cache.clear();
resolve_operation_cache.clear();
let positions = resolve_operation(
ctx,
query_tree,
docid,
&mut resolve_operation_cache,
&mut word_positions_cache,
wdcache,
)?;
let best_proximity = positions.into_iter().min_by_key(|(_, proximity, _)| *proximity);
let best_proximity = best_proximity.map(|(_, proximity, _)| proximity).unwrap_or(7);
candidates.entry(best_proximity).or_insert_with(RoaringBitmap::new).insert(docid);
Expand Down
6 changes: 3 additions & 3 deletions milli/src/search/criteria/typo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ mod test {

let facet_candidates = None;

let mut wdcache = WordDerivationsCache::new();
let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, Some(query_tree), facet_candidates);

let candidates_1 = context.word_docids("split").unwrap().unwrap()
Expand Down Expand Up @@ -428,7 +428,7 @@ let mut wdcache = WordDerivationsCache::new();
let query_tree = None;
let facet_candidates = context.word_docids("earth").unwrap().unwrap();

let mut wdcache = WordDerivationsCache::new();
let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, query_tree, Some(facet_candidates.clone()));

let expected = CriterionResult {
Expand Down Expand Up @@ -457,7 +457,7 @@ let mut wdcache = WordDerivationsCache::new();

let facet_candidates = context.word_docids("earth").unwrap().unwrap();

let mut wdcache = WordDerivationsCache::new();
let mut wdcache = WordDerivationsCache::new();
let mut criteria = Typo::initial(&context, Some(query_tree), Some(facet_candidates.clone()));

let candidates_1 = context.word_docids("split").unwrap().unwrap()
Expand Down

0 comments on commit 82a0f67

Please sign in to comment.