From a686cdf94af314652daef5235a36c676b526bea0 Mon Sep 17 00:00:00 2001 From: muji Date: Sat, 31 Dec 2022 10:48:42 +0800 Subject: [PATCH] Support fields that may be collections. --- benches/test_benchmark.rs | 8 ++++---- src/index.rs | 24 ++++++++++++------------ src/lib.rs | 10 +++++----- src/score/default/zero_to_one.rs | 16 ++++++++-------- tests/integrations_tests.rs | 8 ++++---- 5 files changed, 33 insertions(+), 33 deletions(-) diff --git a/benches/test_benchmark.rs b/benches/test_benchmark.rs index ddf2e4d..9946496 100644 --- a/benches/test_benchmark.rs +++ b/benches/test_benchmark.rs @@ -30,8 +30,8 @@ pub fn test_speed(c: &mut Criterion) { } s } - fn title_extract_x(d: &DocX) -> Option<&str> { - Some(d.title.as_str()) + fn title_extract_x(d: &DocX) -> Vec<&str> { + vec![d.title.as_str()] } c.bench_function("add_100k_docs", |b| { @@ -43,14 +43,14 @@ pub fn test_speed(c: &mut Criterion) { new_rand.push_str(&generate_string(0, 4)); random_strings.push(new_rand); } - let extractor = [title_extract_x as fn(&_) -> Option<&str>]; + let extractor = [title_extract_x as fn(&DocX) -> Vec<&str>]; b.iter(|| add_all_documents(&mut index, &extractor, &random_strings)); }); } fn add_all_documents( index: &mut Index, - extractor: &[fn(&DocX) -> Option<&str>], + extractor: &[fn(&DocX) -> Vec<&str>], random_strings: &[String], ) { for (i, s) in random_strings.iter().enumerate() { diff --git a/src/index.rs b/src/index.rs index c235097..cea8ea0 100644 --- a/src/index.rs +++ b/src/index.rs @@ -89,12 +89,12 @@ impl Index { let mut all_terms: Vec> = Vec::new(); for i in 0..fields.len() { - if let Some(field_value) = field_accessors[i](doc) { - let fields_len = fields.len(); - let mut field_details = fields.get_mut(i).unwrap(); - + let field_values = field_accessors[i](doc); + let fields_len = fields.len(); + let mut field_details = fields.get_mut(i).unwrap(); + for field_value in field_values { // tokenize text - let terms = tokenizer(field_value); + let terms = tokenizer(&field_value); // filter and count terms, ignore empty strings let mut filtered_terms_count = 0; @@ -486,8 +486,8 @@ mod tests { text: String, } - fn field_accessor(doc: &Doc) -> Option<&str> { - Some(doc.text.as_str()) + fn field_accessor(doc: &Doc) -> Vec<&str> { + vec![doc.text.as_str()] } mod add { @@ -497,7 +497,7 @@ mod tests { #[test] fn it_should_add_one_document_with_three_terms<'idn>() { let field_accessors: Vec> = - vec![field_accessor as fn(doc: &Doc) -> Option<&str>]; + vec![field_accessor]; let mut index = Index::::new(1); let doc = Doc { @@ -549,7 +549,7 @@ mod tests { #[test] fn it_should_add_shared_terms() { let field_accessors: Vec> = - vec![field_accessor as fn(doc: &Doc) -> Option<&str>]; + vec![field_accessor]; let mut index = Index::::new(1); let doc_1 = Doc { @@ -609,7 +609,7 @@ mod tests { #[test] fn it_should_ignore_empty_tokens() { let field_accessors: Vec> = - vec![field_accessor as fn(doc: &Doc) -> Option<&str>]; + vec![field_accessor]; let mut index = Index::::new(1); let doc_1 = Doc { @@ -742,7 +742,7 @@ mod tests { #[test] fn it_should_count_nodes() { let field_accessors: Vec> = - vec![field_accessor as fn(doc: &Doc) -> Option<&str>]; + vec![field_accessor as fn(doc: &Doc) -> Vec<&str>]; let mut index = Index::::new(1); let doc = Doc { @@ -762,7 +762,7 @@ mod tests { #[test] fn it_should_count_nodes_2() { let field_accessors: Vec> = - vec![field_accessor as fn(doc: &Doc) -> Option<&str>]; + vec![field_accessor]; let mut index = Index::::new(1); diff --git a/src/lib.rs b/src/lib.rs index f0db0d3..14f26ef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ pub use index::*; pub use query::QueryResult; /// Function that extracts a field value from a document. -pub type FieldAccessor = fn(&D) -> Option<&str>; +pub type FieldAccessor = fn(&D) -> Vec<&str>; /// Function used to tokenize a field. pub type Tokenizer = fn(&str) -> Vec>; @@ -31,12 +31,12 @@ pub mod test_util { pub text: String, } - pub fn title_extract(d: &Doc) -> Option<&str> { - Some(d.title.as_str()) + pub fn title_extract(d: &Doc) -> Vec<&str> { + vec![d.title.as_str()] } - pub fn text_extract(d: &Doc) -> Option<&str> { - Some(d.text.as_str()) + pub fn text_extract(d: &Doc) -> Vec<&str> { + vec![d.text.as_str()] } pub fn tokenizer(s: &str) -> Vec> { diff --git a/src/score/default/zero_to_one.rs b/src/score/default/zero_to_one.rs index 4a70020..1234bac 100644 --- a/src/score/default/zero_to_one.rs +++ b/src/score/default/zero_to_one.rs @@ -315,11 +315,11 @@ mod tests { title: String, description: String, } - fn title_extract(doc: &DocTitleDescription) -> Option<&str> { - Some(doc.title.as_str()) + fn title_extract(doc: &DocTitleDescription) -> Vec<&str> { + vec![doc.title.as_str()] } - fn description_extract(doc: &DocTitleDescription) -> Option<&str> { - Some(doc.description.as_str()) + fn description_extract(doc: &DocTitleDescription) -> Vec<&str> { + vec![doc.description.as_str()] } for (i, (title, description)) in titles.iter().zip(descriptions.iter()).enumerate() { @@ -364,11 +364,11 @@ mod tests { title: String, description: String, } - fn title_extract(doc: &DocTitleDescription) -> Option<&str> { - Some(doc.title.as_str()) + fn title_extract(doc: &DocTitleDescription) -> Vec<&str> { + vec![doc.title.as_str()] } - fn description_extract(doc: &DocTitleDescription) -> Option<&str> { - Some(doc.description.as_str()) + fn description_extract(doc: &DocTitleDescription) -> Vec<&str> { + vec![doc.description.as_str()] } for (i, (title, description)) in titles.iter().zip(descriptions.iter()).enumerate() { diff --git a/tests/integrations_tests.rs b/tests/integrations_tests.rs index 7068fd5..debefd7 100644 --- a/tests/integrations_tests.rs +++ b/tests/integrations_tests.rs @@ -16,12 +16,12 @@ fn tokenizer(s: &str) -> Vec> { s.split(' ').map(Cow::from).collect::>() } -fn title_extract(d: &Doc) -> Option<&str> { - Some(d.title.as_str()) +fn title_extract(d: &Doc) -> Vec<&str> { + vec![d.title.as_str()] } -fn description_extract(d: &Doc) -> Option<&str> { - Some(d.description.as_str()) +fn description_extract(d: &Doc) -> Vec<&str> { + vec![d.description.as_str()] } #[test]