Skip to content

Commit

Permalink
Merge #4233
Browse files Browse the repository at this point in the history
4233: Add test reproducing #4232 r=dureuill a=ManyTheFish

- add a test reproducing the bug
- fix the bug by creating 2 different restricting lists of attributes, one for the exact attributes, and the other for the tolerant attributes

## Related issue
Fixes #4232


Co-authored-by: ManyTheFish <many@meilisearch.com>
  • Loading branch information
meili-bors[bot] and ManyTheFish committed Nov 29, 2023
2 parents b11f85a + 3b3fa38 commit c95d68e
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 20 deletions.
32 changes: 32 additions & 0 deletions meilisearch/tests/search/restrict_searchable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,3 +335,35 @@ async fn exactness_ranking_rule_order() {
})
.await;
}

#[actix_rt::test]
async fn search_on_exact_field() {
let server = Server::new().await;
let index = index_with_documents(
&server,
&json!([
{
"title": "Captain Marvel",
"exact": "Captain Marivel",
"id": "1",
},
{
"title": "Captain Marivel",
"exact": "Captain the Marvel",
"id": "2",
}]),
)
.await;

let (response, code) =
index.update_settings_typo_tolerance(json!({ "disableOnAttributes": ["exact"] })).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(1).await;
// Searching on an exact attribute should only return the document matching without typo.
index
.search(json!({"q": "Marvel", "attributesToSearchOn": ["exact"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"].as_array().unwrap().len(), @"1");
})
.await;
}
66 changes: 50 additions & 16 deletions milli/src/search/new/db_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ impl<'ctx> SearchContext<'ctx> {
match &self.restricted_fids {
Some(restricted_fids) => {
let interned = self.word_interner.get(word).as_str();
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
let keys: Vec<_> =
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();

DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
Expand All @@ -182,13 +183,29 @@ impl<'ctx> SearchContext<'ctx> {
&mut self,
word: Interned<String>,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
self.txn,
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.exact_word_docids,
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
)
match &self.restricted_fids {
Some(restricted_fids) => {
let interned = self.word_interner.get(word).as_str();
let keys: Vec<_> =
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();

DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
word,
&keys[..],
&mut self.db_cache.exact_word_docids,
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
self.txn,
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.exact_word_docids,
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
),
}
}

pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
Expand Down Expand Up @@ -219,7 +236,8 @@ impl<'ctx> SearchContext<'ctx> {
match &self.restricted_fids {
Some(restricted_fids) => {
let interned = self.word_interner.get(prefix).as_str();
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
let keys: Vec<_> =
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();

DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
Expand All @@ -244,13 +262,29 @@ impl<'ctx> SearchContext<'ctx> {
&mut self,
prefix: Interned<String>,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
self.txn,
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.exact_word_prefix_docids,
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
)
match &self.restricted_fids {
Some(restricted_fids) => {
let interned = self.word_interner.get(prefix).as_str();
let keys: Vec<_> =
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();

DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
prefix,
&keys[..],
&mut self.db_cache.exact_word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
self.txn,
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.exact_word_prefix_docids,
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
),
}
}

pub fn get_db_word_pair_proximity_docids(
Expand Down
26 changes: 22 additions & 4 deletions milli/src/search/new/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ use crate::error::FieldIdMapMissingEntry;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
use crate::{
AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32,
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError,
BEU32,
};

/// A structure used throughout the execution of a search query.
Expand All @@ -63,7 +64,7 @@ pub struct SearchContext<'ctx> {
pub phrase_interner: DedupInterner<Phrase>,
pub term_interner: Interner<QueryTerm>,
pub phrase_docids: PhraseDocIdsCache,
pub restricted_fids: Option<Vec<u16>>,
pub restricted_fids: Option<RestrictedFids>,
}

impl<'ctx> SearchContext<'ctx> {
Expand All @@ -83,8 +84,9 @@ impl<'ctx> SearchContext<'ctx> {
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
let fids_map = self.index.fields_ids_map(self.txn)?;
let searchable_names = self.index.searchable_fields(self.txn)?;
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;

let mut restricted_fids = Vec::new();
let mut restricted_fids = RestrictedFids::default();
let mut contains_wildcard = false;
for field_name in searchable_attributes {
if field_name == "*" {
Expand Down Expand Up @@ -123,7 +125,11 @@ impl<'ctx> SearchContext<'ctx> {
}
};

restricted_fids.push(fid);
if exact_attributes_ids.contains(&fid) {
restricted_fids.exact.push(fid);
} else {
restricted_fids.tolerant.push(fid);
};
}

self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
Expand All @@ -147,6 +153,18 @@ impl Word {
}
}

#[derive(Debug, Clone, Default)]
pub struct RestrictedFids {
pub tolerant: Vec<FieldId>,
pub exact: Vec<FieldId>,
}

impl RestrictedFids {
pub fn contains(&self, fid: &FieldId) -> bool {
self.tolerant.contains(fid) || self.exact.contains(fid)
}
}

/// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it.
fn resolve_maximally_reduced_query_graph(
ctx: &mut SearchContext,
Expand Down

0 comments on commit c95d68e

Please sign in to comment.