Skip to content

Commit

Permalink
Merge #2468
Browse files Browse the repository at this point in the history
2468: Update milli 0.29 r=curquiza a=ManyTheFish

- [x] Update milli to 0.29
- [x] Integrate charabia
- [x] Set disabled_words to default when Index::exact_words returns None
- [x] Fix ranking rules integration test

fixes #2375
fixes #2144
fixes #2417
fixes #2407

Co-authored-by: ManyTheFish <many@meilisearch.com>
  • Loading branch information
bors[bot] and ManyTheFish committed Jun 7, 2022
2 parents 1968950 + 56c44be commit 6a21b9d
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 79 deletions.
97 changes: 50 additions & 47 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion meilisearch-auth/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2021"
base64 = "0.13.0"
enum-iterator = "0.7.0"
meilisearch-error = { path = "../meilisearch-error" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.28.0" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.29.0" }
rand = "0.8.4"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] }
Expand Down
2 changes: 1 addition & 1 deletion meilisearch-http/tests/settings/get_settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ async fn error_set_invalid_ranking_rules() {
assert_eq!(response["status"], "failed");

let expected_error = json!({
"message": r#"`manyTheFish` ranking rule is invalid. Valid ranking rules are Words, Typo, Sort, Proximity, Attribute, Exactness and custom ranking rules."#,
"message": r#"`manyTheFish` ranking rule is invalid. Valid ranking rules are words, typo, sort, proximity, attribute, exactness and custom ranking rules."#,
"code": "invalid_ranking_rule",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_ranking_rule"
Expand Down
2 changes: 1 addition & 1 deletion meilisearch-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ lazy_static = "1.4.0"
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-error = { path = "../meilisearch-error" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.28.0" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.29.0" }
mime = "0.3.16"
num_cpus = "1.13.1"
obkv = "0.2.0"
Expand Down
10 changes: 4 additions & 6 deletions meilisearch-lib/src/index/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,10 @@ impl Index {
two_typos: Setting::Set(self.min_word_len_two_typos(txn)?),
};

let disabled_words = self
.exact_words(txn)?
.into_stream()
.into_strs()?
.into_iter()
.collect();
let disabled_words = match self.exact_words(txn)? {
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
None => BTreeSet::new(),
};

let disabled_attributes = self
.exact_attributes(txn)?
Expand Down
30 changes: 7 additions & 23 deletions meilisearch-lib/src/index/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::str::FromStr;
use std::time::Instant;

use either::Either;
use milli::tokenizer::{Analyzer, AnalyzerConfig};
use milli::tokenizer::TokenizerBuilder;
use milli::{
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError,
};
Expand Down Expand Up @@ -175,12 +175,9 @@ impl Index {
&displayed_ids,
);

let stop_words = fst::Set::default();
let mut config = AnalyzerConfig::default();
config.stop_words(&stop_words);
let analyzer = Analyzer::new(config);
let tokenizer = TokenizerBuilder::default().build();

let mut formatter_builder = MatcherBuilder::from_matching_words(matching_words);
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
formatter_builder.crop_marker(query.crop_marker);
formatter_builder.highlight_prefix(query.highlight_pre_tag);
formatter_builder.highlight_suffix(query.highlight_post_tag);
Expand All @@ -204,7 +201,6 @@ impl Index {
&displayed_document,
&fields_ids_map,
&formatter_builder,
&analyzer,
&formatted_options,
query.show_matches_position,
&displayed_ids,
Expand Down Expand Up @@ -414,8 +410,7 @@ fn make_document(
fn format_fields<'a, A: AsRef<[u8]>>(
document: &Document,
field_ids_map: &FieldsIdsMap,
builder: &MatcherBuilder,
analyzer: &'a Analyzer<'a, A>,
builder: &MatcherBuilder<'a, A>,
formatted_options: &BTreeMap<FieldId, FormatOptions>,
compute_matches: bool,
displayable_ids: &BTreeSet<FieldId>,
Expand Down Expand Up @@ -446,7 +441,6 @@ fn format_fields<'a, A: AsRef<[u8]>>(
std::mem::take(value),
builder,
format,
analyzer,
&mut infos,
compute_matches,
);
Expand All @@ -470,19 +464,14 @@ fn format_fields<'a, A: AsRef<[u8]>>(

fn format_value<'a, A: AsRef<[u8]>>(
value: Value,
builder: &MatcherBuilder,
builder: &MatcherBuilder<'a, A>,
format_options: Option<FormatOptions>,
analyzer: &'a Analyzer<'a, A>,
infos: &mut Vec<MatchBounds>,
compute_matches: bool,
) -> Value {
match value {
Value::String(old_string) => {
// this will be removed with charabia
let analyzed = analyzer.analyze(&old_string);
let tokens: Vec<_> = analyzed.tokens().collect();

let mut matcher = builder.build(&tokens[..], &old_string);
let mut matcher = builder.build(&old_string);
if compute_matches {
let matches = matcher.matches();
infos.extend_from_slice(&matches[..]);
Expand All @@ -507,7 +496,6 @@ fn format_value<'a, A: AsRef<[u8]>>(
highlight: format_options.highlight,
crop: None,
}),
analyzer,
infos,
compute_matches,
)
Expand All @@ -527,7 +515,6 @@ fn format_value<'a, A: AsRef<[u8]>>(
highlight: format_options.highlight,
crop: None,
}),
analyzer,
infos,
compute_matches,
),
Expand All @@ -536,12 +523,9 @@ fn format_value<'a, A: AsRef<[u8]>>(
.collect(),
),
Value::Number(number) => {
// this will be removed with charabia
let s = number.to_string();
let analyzed = analyzer.analyze(&s);
let tokens: Vec<_> = analyzed.tokens().collect();

let mut matcher = builder.build(&tokens[..], &s);
let mut matcher = builder.build(&s);
if compute_matches {
let matches = matcher.matches();
infos.extend_from_slice(&matches[..]);
Expand Down

0 comments on commit 6a21b9d

Please sign in to comment.