Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Fix bug in prefix DB indexing
Browse files Browse the repository at this point in the history
Where the batch's information was not properly updated in cases
where only the proximity changed between two consecutive word pair
proximities.

Closes meilisearch/meilisearch#3043
  • Loading branch information
loiclec committed Nov 17, 2022
1 parent a651397 commit f7c8730
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 5 deletions.
47 changes: 47 additions & 0 deletions milli/src/update/prefix_word_pairs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,4 +238,51 @@ mod tests {
db_snap!(index, word_prefix_pair_proximity_docids, "update");
db_snap!(index, prefix_word_pair_proximity_docids, "update");
}
#[test]
fn test_batch_bug_3034() {
// https://github.com/meilisearch/meilisearch/issues/3043
let mut index = TempIndex::new();
index.index_documents_config.words_prefix_threshold = Some(50);
index.index_documents_config.autogenerate_docids = true;

index
.update_settings(|settings| {
settings.set_searchable_fields(vec!["text".to_owned()]);
})
.unwrap();

let batch_reader_from_documents = |documents| {
let mut builder = DocumentsBatchBuilder::new(Vec::new());
for object in documents {
builder.append_json_object(&object).unwrap();
}
DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
};

let mut documents = documents_with_enough_different_words_for_prefixes(&["y"]);
// now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
documents.push(
serde_json::json!({
"text": "x y"
})
.as_object()
.unwrap()
.clone(),
);
documents.push(
serde_json::json!({
"text": "x a y"
})
.as_object()
.unwrap()
.clone(),
);

let documents = batch_reader_from_documents(documents);
index.add_documents(documents).unwrap();

db_snap!(index, word_pair_proximity_docids);
db_snap!(index, word_prefix_pair_proximity_docids);
db_snap!(index, prefix_word_pair_proximity_docids);
}
}
9 changes: 4 additions & 5 deletions milli/src/update/prefix_word_pairs/word_prefix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ word2 : doggo
2. **Inner loop:** Then, we iterate over all the prefixes of `word2` that are
in the list of sorted prefixes. And we insert the key `prefix`
and the value (`docids`) to a sorted map which we call the “batch”. For example,
at the end of the first inner loop, we may have:
at the end of the first outer loop, we may have:
```text
Outer loop 1:
------------------------------
Expand Down Expand Up @@ -85,7 +85,7 @@ end of the batch.
4. On the third iteration of the outer loop, we have:
```text
Outer loop 4:
Outer loop 3:
------------------------------
proximity: 1
word1 : good
Expand Down Expand Up @@ -340,17 +340,16 @@ fn execute_on_word_pairs_and_prefixes<I>(
if prox_different_than_prev || word1_different_than_prev || word2_start_different_than_prev
{
batch.flush(&mut merge_buffer, &mut insert)?;
batch.proximity = proximity;
// don't forget to reset the value of batch.word1 and prev_word2_start
if word1_different_than_prev {
prefix_search_start.0 = 0;
batch.word1.clear();
batch.word1.extend_from_slice(word1);
batch.proximity = proximity;
}
if word2_start_different_than_prev {
// word2_start_different_than_prev == true
prev_word2_start = word2[0];
}
prefix_search_start.0 = 0;
// Optimisation: find the search start in the prefix trie to iterate over the prefixes of word2
empty_prefixes = !prefixes.set_search_start(word2, &mut prefix_search_start);
}
Expand Down

0 comments on commit f7c8730

Please sign in to comment.