Skip to content

Commit

Permalink
Merge #3824
Browse files Browse the repository at this point in the history
3824: Changes the way words are counted in the word count DB r=ManyTheFish a=dureuill

# Pull Request

## Related issue

Fixes #3823

## What does this PR do?

- Apply offset when parsing query that is consistent with the indexing

### DB breaking changes

- Count the number of words in `field_id_word_count_docids`
- raise limit of word count for storing the entry in the DB from 10 to 30

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
  • Loading branch information
meili-bors[bot] and dureuill committed Jun 8, 2023
2 parents 9dcf1da + a2a3b8c commit 047d22f
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 6 deletions.
2 changes: 1 addition & 1 deletion milli/src/search/new/query_term/parse_query.rs
Expand Up @@ -79,7 +79,7 @@ pub fn located_query_terms_from_tokens(
TokenKind::Separator(separator_kind) => {
// add penalty for hard separators
if let SeparatorKind::Hard = separator_kind {
position = position.wrapping_add(1);
position = position.wrapping_add(7);
}

phrase = 'phrase: {
Expand Down
@@ -1,6 +1,6 @@
use std::collections::HashMap;
use std::fs::File;
use std::{cmp, io};
use std::io;

use grenad::Sorter;

Expand Down Expand Up @@ -54,11 +54,10 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
}

for position in read_u32_ne_bytes(value) {
let (field_id, position) = relative_from_absolute_position(position);
let word_count = position as u32 + 1;
let (field_id, _) = relative_from_absolute_position(position);

let value = document_fid_wordcount.entry(field_id as FieldId).or_insert(0);
*value = cmp::max(*value, word_count);
*value += 1;
}
}

Expand All @@ -83,7 +82,7 @@ fn drain_document_fid_wordcount_into_sorter(
let mut key_buffer = Vec::new();

for (fid, count) in document_fid_wordcount.drain() {
if count <= 10 {
if count <= 30 {
key_buffer.clear();
key_buffer.extend_from_slice(&fid.to_be_bytes());
key_buffer.push(count as u8);
Expand Down

0 comments on commit 047d22f

Please sign in to comment.