From f2a54c388c81c6da3235da480c82a107637c3d14 Mon Sep 17 00:00:00 2001
From: muji <muji@tmpfs.org>
Date: Thu, 11 Aug 2022 14:08:01 +0800
Subject: [PATCH 1/8] Pass document by reference.

---
 src/index.rs                           | 22 +++++++++++-----------
 src/lib.rs                             |  2 +-
 src/query.rs                           | 14 +++++++-------
 src/query/score/default/zero_to_one.rs |  4 ++--
 tests/integrations_tests.rs            | 10 +++++-----
 5 files changed, 26 insertions(+), 26 deletions(-)
diff --git a/src/index.rs b/src/index.rs
index 7e47b70..11f72a8 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -290,7 +290,7 @@ pub fn add_document_to_index<T: Eq + Hash + Copy, D>(
     tokenizer: Tokenizer,
     filter: Filter,
     key: T,
-    doc: D,
+    doc: &D,
 ) {
     let docs = &mut index.docs;
     let fields = &mut index.fields;
@@ -298,7 +298,7 @@ pub fn add_document_to_index<T: Eq + Hash + Copy, D>(
     let mut term_counts: HashMap<String, Vec<usize>> = HashMap::new();
     let mut all_terms: Vec<String> = Vec::new();
     for i in 0..fields.len() {
-        if let Some(field_value) = field_accessors[i](&doc) {
+        if let Some(field_value) = field_accessors[i](doc) {
             let fields_len = fields.len();
             let mut field_details = fields.get_mut(i).unwrap();
 
@@ -593,7 +593,7 @@ mod tests {
                 text: "a b c".to_string(),
             };
 
-            add_document_to_index(&mut index, &field_accessors, tokenizer, filter, doc.id, doc);
+            add_document_to_index(&mut index, &field_accessors, tokenizer, filter, doc.id, &doc);
 
             assert_eq!(index.docs.len(), 1);
             let (_, added_doc) = index.docs.iter().next().unwrap();
@@ -655,7 +655,7 @@ mod tests {
                 tokenizer,
                 filter,
                 doc_1.id,
-                doc_1.clone(),
+                &doc_1,
             );
 
             add_document_to_index(
@@ -664,7 +664,7 @@ mod tests {
                 tokenizer,
                 filter,
                 doc_2.id,
-                doc_2.clone(),
+                &doc_2,
             );
 
             assert_eq!(index.docs.len(), 2);
@@ -725,7 +725,7 @@ mod tests {
                 tokenizer,
                 filter,
                 doc_1.id,
-                doc_1,
+                &doc_1,
             );
         }
     }
@@ -751,7 +751,7 @@ mod tests {
                     tokenizer,
                     filter,
                     doc.id,
-                    doc,
+                    &doc,
                 )
             }
 
@@ -871,14 +871,14 @@ mod tests {
                     text: "abe".to_string(),
                 };
 
-                add_document_to_index(&mut index, &field_accessors, tokenizer, filter, doc.id, doc);
+                add_document_to_index(&mut index, &field_accessors, tokenizer, filter, doc.id, &doc);
                 add_document_to_index(
                     &mut index,
                     &field_accessors,
                     tokenizer,
                     filter,
                     doc_2.id,
-                    doc_2,
+                    &doc_2,
                 );
                 assert_eq!(count_nodes(&index), 5); //
             }
@@ -899,14 +899,14 @@ mod tests {
                     text: "ab ef".to_string(),
                 };
 
-                add_document_to_index(&mut index, &field_accessors, tokenizer, filter, doc.id, doc);
+                add_document_to_index(&mut index, &field_accessors, tokenizer, filter, doc.id, &doc);
                 add_document_to_index(
                     &mut index,
                     &field_accessors,
                     tokenizer,
                     filter,
                     doc_2.id,
-                    doc_2,
+                    &doc_2,
                 );
                 assert_eq!(count_nodes(&index), 7); //
             }
diff --git a/src/lib.rs b/src/lib.rs
index 93dfe92..d68768b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -73,7 +73,7 @@ pub mod test_util {
                 id: i,
                 title: title.to_string(),
             };
-            add_document_to_index(&mut index, &[title_extract], tokenizer, filter, doc.id, doc);
+            add_document_to_index(&mut index, &[title_extract], tokenizer, filter, doc.id, &doc);
         }
         index
     }
diff --git a/src/query.rs b/src/query.rs
index 73539f0..acf5f7b 100644
--- a/src/query.rs
+++ b/src/query.rs
@@ -258,7 +258,7 @@ mod tests {
                     tokenizer,
                     filter,
                     doc.id,
-                    doc,
+                    &doc,
                 );
             }
             let result = query(
@@ -301,7 +301,7 @@ mod tests {
                     tokenizer,
                     filter,
                     doc.id,
-                    doc,
+                    &doc,
                 );
             }
 
@@ -357,7 +357,7 @@ mod tests {
                     tokenizer,
                     filter,
                     doc.id,
-                    doc,
+                    &doc,
                 );
             }
 
@@ -401,7 +401,7 @@ mod tests {
                     tokenizer,
                     filter,
                     doc.id,
-                    doc,
+                    &doc,
                 );
             }
 
@@ -446,7 +446,7 @@ mod tests {
                     tokenizer,
                     filter,
                     doc.id,
-                    doc,
+                    &doc,
                 );
             }
 
@@ -505,7 +505,7 @@ mod tests {
                     tokenizer,
                     filter,
                     doc.id,
-                    doc,
+                    &doc,
                 );
             }
             let exp = expand_term(&index, &"a".to_string(), &index.arena_index);
@@ -535,7 +535,7 @@ mod tests {
                     tokenizer,
                     filter,
                     doc.id,
-                    doc,
+                    &doc,
                 );
             }
             let exp = expand_term(&index, &"x".to_string(), &index.arena_index);
diff --git a/src/query/score/default/zero_to_one.rs b/src/query/score/default/zero_to_one.rs
index ad8e062..85511a5 100644
--- a/src/query/score/default/zero_to_one.rs
+++ b/src/query/score/default/zero_to_one.rs
@@ -340,7 +340,7 @@ mod tests {
                 tokenizer,
                 filter,
                 doc.id,
-                doc,
+                &doc,
             );
         }
 
@@ -391,7 +391,7 @@ mod tests {
                 tokenizer,
                 filter,
                 doc.id,
-                doc,
+                &doc,
             );
         }
 
diff --git a/tests/integrations_tests.rs b/tests/integrations_tests.rs
index 83d7ce6..630de1e 100644
--- a/tests/integrations_tests.rs
+++ b/tests/integrations_tests.rs
@@ -57,7 +57,7 @@ pub fn test_add_query_delete_bm25() {
         tokenizer,
         filter,
         doc_1.id,
-        doc_1.clone(),
+        &doc_1,
     );
 
     add_document_to_index(
@@ -66,7 +66,7 @@ pub fn test_add_query_delete_bm25() {
         tokenizer,
         filter,
         doc_2.id,
-        doc_2,
+        &doc_2,
     );
 
     // Search, expected 2 results
@@ -144,7 +144,7 @@ pub fn test_add_query_delete_zero_to_one() {
         tokenizer,
         filter,
         doc_1.id,
-        doc_1.clone(),
+        &doc_1,
     );
 
     add_document_to_index(
@@ -153,7 +153,7 @@ pub fn test_add_query_delete_zero_to_one() {
         tokenizer,
         filter,
         doc_2.id,
-        doc_2,
+        &doc_2,
     );
 
     // Search, expected 2 results
@@ -215,6 +215,6 @@ pub fn it_is_thread_safe() {
         tokenizer,
         filter,
         doc_1.id,
-        doc_1.clone(),
+        &doc_1,
     );
 }

From 98f809f273428c0487dbd2af2815661a398d8dce Mon Sep 17 00:00:00 2001
From: muji <muji@tmpfs.org>
Date: Thu, 11 Aug 2022 15:36:28 +0800
Subject: [PATCH 2/8] Run cargo fmt.

---
 src/index.rs | 27 ++++++++++++++++++++++++---
 src/lib.rs   |  9 ++++++++-
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/index.rs b/src/index.rs
index 11f72a8..58f8fb1 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -593,7 +593,14 @@ mod tests {
                 text: "a b c".to_string(),
             };
 
-            add_document_to_index(&mut index, &field_accessors, tokenizer, filter, doc.id, &doc);
+            add_document_to_index(
+                &mut index,
+                &field_accessors,
+                tokenizer,
+                filter,
+                doc.id,
+                &doc,
+            );
 
             assert_eq!(index.docs.len(), 1);
             let (_, added_doc) = index.docs.iter().next().unwrap();
@@ -871,7 +878,14 @@ mod tests {
                     text: "abe".to_string(),
                 };
 
-                add_document_to_index(&mut index, &field_accessors, tokenizer, filter, doc.id, &doc);
+                add_document_to_index(
+                    &mut index,
+                    &field_accessors,
+                    tokenizer,
+                    filter,
+                    doc.id,
+                    &doc,
+                );
                 add_document_to_index(
                     &mut index,
                     &field_accessors,
@@ -899,7 +913,14 @@ mod tests {
                     text: "ab ef".to_string(),
                 };
 
-                add_document_to_index(&mut index, &field_accessors, tokenizer, filter, doc.id, &doc);
+                add_document_to_index(
+                    &mut index,
+                    &field_accessors,
+                    tokenizer,
+                    filter,
+                    doc.id,
+                    &doc,
+                );
                 add_document_to_index(
                     &mut index,
                     &field_accessors,
diff --git a/src/lib.rs b/src/lib.rs
index d68768b..167e70a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -73,7 +73,14 @@ pub mod test_util {
                 id: i,
                 title: title.to_string(),
             };
-            add_document_to_index(&mut index, &[title_extract], tokenizer, filter, doc.id, &doc);
+            add_document_to_index(
+                &mut index,
+                &[title_extract],
+                tokenizer,
+                filter,
+                doc.id,
+                &doc,
+            );
         }
         index
     }

From 391e17965e30f777ad153b3fe467ec6723197b5f Mon Sep 17 00:00:00 2001
From: muji <muji@tmpfs.org>
Date: Thu, 11 Aug 2022 15:37:45 +0800
Subject: [PATCH 3/8] Fix benchmark.

---
 benches/test_benchmark.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benches/test_benchmark.rs b/benches/test_benchmark.rs
index fdd64bc..66b8439 100644
--- a/benches/test_benchmark.rs
+++ b/benches/test_benchmark.rs
@@ -62,6 +62,6 @@ fn add_all_documents(
             id: i,
             title: s.to_owned(),
         };
-        add_document_to_index(&mut index, extractor, tokenizer, filter, d.id, d);
+        add_document_to_index(&mut index, extractor, tokenizer, filter, d.id, &d);
     }
 }

From 559ab7380776f4388081d841736c2866258c6a3c Mon Sep 17 00:00:00 2001
From: muji <muji@tmpfs.org>
Date: Thu, 11 Aug 2022 17:19:33 +0800
Subject: [PATCH 4/8] Fix clippy warning.

---
 src/query/score/default/zero_to_one.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/query/score/default/zero_to_one.rs b/src/query/score/default/zero_to_one.rs
index 85511a5..9b7141b 100644
--- a/src/query/score/default/zero_to_one.rs
+++ b/src/query/score/default/zero_to_one.rs
@@ -71,7 +71,7 @@ impl<T: Debug + Eq + Hash + Clone> ScoreCalculator<T, ZeroToOneBeforeCalculation
                 }
 
                 self.score_by_document_and_field
-                    .get_mut(&key.to_owned())
+                    .get_mut(key)
                     .unwrap()[x]
                     .push(ScoreByTerm {
                         score: 1. - f64::abs(term_exp_len - term_len) / (term_exp_len),

From af0b45ad1e2623b5e333b22d6435059b1d579433 Mon Sep 17 00:00:00 2001
From: muji <muji@tmpfs.org>
Date: Thu, 11 Aug 2022 17:21:45 +0800
Subject: [PATCH 5/8] Update README.

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 65396ef..7e2d697 100644
--- a/README.md
+++ b/README.md
@@ -95,7 +95,7 @@ add_document_to_index(
     tokenizer,
     filter,
     doc_1.id,
-    doc_1.clone(),
+    &doc_1,
 );
 
 add_document_to_index(
@@ -104,7 +104,7 @@ add_document_to_index(
     tokenizer,
     filter,
     doc_2.id,
-    doc_2,
+    &doc_2,
 );
 
 // Search, expected 2 results

From fd76dc0de1a1aeefddefa2918b7965ccd15b9d68 Mon Sep 17 00:00:00 2001
From: muji <muji@tmpfs.org>
Date: Thu, 11 Aug 2022 17:23:28 +0800
Subject: [PATCH 6/8] Bump version number.

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 41b50a8..ee371de 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "probly-search"
 description = "A lightweight full-text search engine with a fully customizable scoring function"
-version = "1.2.4"
+version = "2.0.0-alpha-1"
 authors = ["marcus-pousette <marcus.pousette@quantleaf.com>"]
 edition = "2018"
 license = "MIT"

From 2207438ab05a6b75b6363363233ebf4b3d7c4486 Mon Sep 17 00:00:00 2001
From: muji <muji@tmpfs.org>
Date: Fri, 12 Aug 2022 12:06:33 +0800
Subject: [PATCH 7/8] Run cargo fmt.

---
 src/query/score/default/zero_to_one.rs | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/query/score/default/zero_to_one.rs b/src/query/score/default/zero_to_one.rs
index 9b7141b..b8d9b37 100644
--- a/src/query/score/default/zero_to_one.rs
+++ b/src/query/score/default/zero_to_one.rs
@@ -70,17 +70,14 @@ impl<T: Debug + Eq + Hash + Clone> ScoreCalculator<T, ZeroToOneBeforeCalculation
                         .insert(key.to_owned(), score_by_field);
                 }
 
-                self.score_by_document_and_field
-                    .get_mut(key)
-                    .unwrap()[x]
-                    .push(ScoreByTerm {
-                        score: 1. - f64::abs(term_exp_len - term_len) / (term_exp_len),
-                        all_query_terms_len: term_data.all_query_terms.len(),
-                        query_term_index: term_data.query_term_index.to_owned(),
-                        index_node_id: index_node.to_idx(),
-                        term_frequency: tf,
-                        field_length,
-                    });
+                self.score_by_document_and_field.get_mut(key).unwrap()[x].push(ScoreByTerm {
+                    score: 1. - f64::abs(term_exp_len - term_len) / (term_exp_len),
+                    all_query_terms_len: term_data.all_query_terms.len(),
+                    query_term_index: term_data.query_term_index.to_owned(),
+                    index_node_id: index_node.to_idx(),
+                    term_frequency: tf,
+                    field_length,
+                });
             }
         }
         Some(0.) // A dummy value, we do not know the score yet

From 68fd90a540b857bb83ff5f4365570a432ba10438 Mon Sep 17 00:00:00 2001
From: muji <muji@tmpfs.org>
Date: Fri, 12 Aug 2022 12:22:00 +0800
Subject: [PATCH 8/8] Use &str in Tokenizer and Filter.

---
 benches/test_benchmark.rs     | 10 ++++------
 src/index.rs                  | 18 ++++++++----------
 src/lib.rs                    | 10 ++++------
 src/query.rs                  | 21 ++++++++++-----------
 src/query/score/calculator.rs |  2 +-
 src/utils.rs                  |  4 ++--
 tests/integrations_tests.rs   | 10 ++++------
 7 files changed, 33 insertions(+), 42 deletions(-)

diff --git a/benches/test_benchmark.rs b/benches/test_benchmark.rs
index 66b8439..ad2fe83 100644
--- a/benches/test_benchmark.rs
+++ b/benches/test_benchmark.rs
@@ -8,13 +8,11 @@ struct DocX {
     title: String,
 }
 
-fn filter(s: &str) -> String {
-    s.to_owned()
+fn filter(s: &str) -> &str {
+    s
 }
-fn tokenizer(s: &str) -> Vec<String> {
-    s.split(' ')
-        .map(|slice| slice.to_owned())
-        .collect::<Vec<String>>()
+fn tokenizer(s: &str) -> Vec<&str> {
+    s.split(' ').collect::<Vec<_>>()
 }
 
 pub fn test_speed(c: &mut Criterion) {
diff --git a/src/index.rs b/src/index.rs
index 58f8fb1..f3f6685 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -81,7 +81,7 @@ Document Details object stores additional information about documents.
  * typeparam `T` Document key.
  */
 
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Eq)]
 pub struct DocumentDetails<T> {
     /**
     Document key. It can be a simple unique ID or a direct reference to original document.
@@ -308,16 +308,16 @@ pub fn add_document_to_index<T: Eq + Hash + Copy, D>(
             // filter and count terms, ignore empty strings
             let mut filtered_terms_count = 0;
             for mut term in terms {
-                term = filter(&term);
+                term = filter(term);
                 if !term.is_empty() {
                     all_terms.push(term.to_owned());
                     filtered_terms_count += 1;
-                    let counts = term_counts.get_mut(&term);
+                    let counts = term_counts.get_mut(term);
                     match counts {
                         None => {
                             let mut new_count = vec![0; fields_len];
                             new_count[i] += 1;
-                            term_counts.insert(term, new_count);
+                            term_counts.insert(term.to_owned(), new_count);
                         }
                         Some(c) => {
                             c[i] += 1;
@@ -565,14 +565,12 @@ mod tests {
         text: String,
     }
 
-    fn tokenizer(s: &str) -> Vec<String> {
-        s.split(' ')
-            .map(|slice| slice.to_owned())
-            .collect::<Vec<String>>()
+    fn tokenizer(s: &str) -> Vec<&str> {
+        s.split(' ').collect::<Vec<_>>()
     }
 
-    fn filter(s: &str) -> String {
-        s.to_owned()
+    fn filter(s: &str) -> &str {
+        s
     }
     fn field_accessor(doc: &Doc) -> Option<&str> {
         Some(doc.text.as_str())
diff --git a/src/lib.rs b/src/lib.rs
index 167e70a..c027d79 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -19,17 +19,15 @@ pub mod test_util {
         id: usize,
         title: String,
     }
-    fn tokenizer(s: &str) -> Vec<String> {
-        s.split(' ')
-            .map(|slice| slice.to_owned().to_lowercase())
-            .collect::<Vec<String>>()
+    fn tokenizer(s: &str) -> Vec<&str> {
+        s.split(' ').collect::<Vec<_>>()
     }
     fn title_extract(d: &Doc) -> Option<&str> {
         Some(d.title.as_str())
     }
 
-    fn filter(s: &str) -> String {
-        s.to_owned()
+    fn filter(s: &str) -> &str {
+        s
     }
 
     pub fn test_score<'arena, M, S: ScoreCalculator<usize, M>>(
diff --git a/src/query.rs b/src/query.rs
index acf5f7b..c204797 100644
--- a/src/query.rs
+++ b/src/query.rs
@@ -79,7 +79,7 @@ pub fn query<T: Eq + Hash + Clone + Debug, M, S: ScoreCalculator<T, M>>(
     for (query_term_index, query_term_pre_filter) in query_terms.iter().enumerate() {
         let query_term = filter(query_term_pre_filter);
         if !query_term.is_empty() {
-            let expanded_terms = expand_term(index, &query_term, &index.arena_index);
+            let expanded_terms = expand_term(index, query_term, &index.arena_index);
             let mut visited_documents_for_term: HashSet<T> = HashSet::new();
             for query_term_expanded in expanded_terms {
                 let term_node_option =
@@ -92,8 +92,8 @@ pub fn query<T: Eq + Hash + Clone + Debug, M, S: ScoreCalculator<T, M>>(
                         if document_frequency > 0 {
                             let term_expansion_data = TermData {
                                 query_term_index,
-                                all_query_terms: &query_terms,
-                                query_term: &query_term,
+                                all_query_terms: query_terms.clone(),
+                                query_term,
                                 query_term_expanded: &query_term_expanded,
                             };
                             let pre_calculations = &score_calculator.before_each(
@@ -224,13 +224,12 @@ mod tests {
         Some(d.text.as_str())
     }
 
-    pub fn tokenizer(s: &str) -> Vec<String> {
-        s.split(' ')
-            .map(|slice| slice.to_owned())
-            .collect::<Vec<String>>()
+    pub fn tokenizer(s: &str) -> Vec<&str> {
+        s.split(' ').collect::<Vec<_>>()
     }
-    pub fn filter(s: &str) -> String {
-        s.to_owned()
+
+    pub fn filter(s: &str) -> &str {
+        s
     }
 
     pub mod query {
@@ -405,9 +404,9 @@ mod tests {
                 );
             }
 
-            fn custom_filter(s: &str) -> String {
+            fn custom_filter(s: &str) -> &str {
                 if s == "a" {
-                    return "".to_string();
+                    return "";
                 }
                 filter(s)
             }
diff --git a/src/query/score/calculator.rs b/src/query/score/calculator.rs
index e1738f1..697442c 100644
--- a/src/query/score/calculator.rs
+++ b/src/query/score/calculator.rs
@@ -15,7 +15,7 @@ pub struct TermData<'a> {
     pub query_term_expanded: &'a str,
 
     // All available query terms
-    pub all_query_terms: &'a Vec<String>,
+    pub all_query_terms: Vec<&'a str>,
 }
 
 pub struct FieldData<'a> {
diff --git a/src/utils.rs b/src/utils.rs
index 846ef0f..69280be 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -1,3 +1,3 @@
 pub type FieldAccessor<D> = fn(&D) -> Option<&str>;
-pub type Tokenizer = fn(&str) -> Vec<String>;
-pub type Filter = fn(&str) -> String;
+pub type Tokenizer = fn(&str) -> Vec<&str>;
+pub type Filter = fn(&str) -> &str;
diff --git a/tests/integrations_tests.rs b/tests/integrations_tests.rs
index 630de1e..06c143c 100644
--- a/tests/integrations_tests.rs
+++ b/tests/integrations_tests.rs
@@ -15,10 +15,8 @@ struct Doc {
     description: String,
 }
 
-fn tokenizer(s: &str) -> Vec<String> {
-    s.split(' ')
-        .map(|slice| slice.to_owned())
-        .collect::<Vec<String>>()
+fn tokenizer(s: &str) -> Vec<&str> {
+    s.split(' ').collect::<Vec<_>>()
 }
 fn title_extract(d: &Doc) -> Option<&str> {
     Some(d.title.as_str())
@@ -28,8 +26,8 @@ fn description_extract(d: &Doc) -> Option<&str> {
     Some(d.description.as_str())
 }
 
-fn filter(s: &str) -> String {
-    s.to_owned()
+fn filter(s: &str) -> &str {
+    s
 }
 
 #[test]