From 81fc5501d10bbbc7a77f5ed5d1b2d514206caf4a Mon Sep 17 00:00:00 2001
From: Marcus Pousette <marcus.pousette@quantleaf.com>
Date: Sat, 20 Aug 2022 23:25:57 +0200
Subject: [PATCH] refactor

---
 benches/test_benchmark.rs        |  2 +-
 src/index.rs                     | 59 +++++++++++----------------
 src/lib.rs                       | 19 ++++-----
 src/query.rs                     | 69 ++++++--------------------------
 src/score/calculator.rs          |  9 ++---
 src/score/default/bm25.rs        |  4 +-
 src/score/default/zero_to_one.rs |  1 -
 tests/integrations_tests.rs      | 26 ++----------
 8 files changed, 55 insertions(+), 134 deletions(-)
diff --git a/benches/test_benchmark.rs b/benches/test_benchmark.rs
index a09f955..ddf2e4d 100644
--- a/benches/test_benchmark.rs
+++ b/benches/test_benchmark.rs
@@ -58,6 +58,6 @@ fn add_all_documents(
             id: i,
             title: s.to_owned(),
         };
-        index.add_document(extractor, tokenizer,  d.id, &d);
+        index.add_document(extractor, tokenizer, d.id, &d);
     }
 }
diff --git a/src/index.rs b/src/index.rs
index 6097863..c235097 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -1,12 +1,12 @@
 use std::{
+    borrow::Cow,
     fmt::{Debug, Formatter},
     hash::Hash,
-    usize, borrow::Cow
-    
+    usize,
 };
 
-use hashbrown::{HashMap, HashSet};
 use crate::{FieldAccessor, Tokenizer};
+use hashbrown::{HashMap, HashSet};
 extern crate typed_generational_arena;
 use typed_generational_arena::StandardArena;
 use typed_generational_arena::StandardIndex as ArenaIndex;
@@ -74,21 +74,20 @@ impl<T: Eq + Hash + Copy + Debug> Index<T> {
         self.removed.as_ref()
     }
 
-
     /// Adds a document to the index.
     pub fn add_document<D>(
         &mut self,
         field_accessors: &[FieldAccessor<D>],
         tokenizer: Tokenizer,
         key: T,
-        doc: & D,
+        doc: &D,
     ) {
         let docs = &mut self.docs;
         let fields = &mut self.fields;
         let mut field_length = vec![0; fields.len()];
         let mut term_counts: HashMap<Cow<str>, Vec<usize>> = HashMap::new();
         let mut all_terms: Vec<Cow<str>> = Vec::new();
-        
+
         for i in 0..fields.len() {
             if let Some(field_value) = field_accessors[i](doc) {
                 let fields_len = fields.len();
@@ -100,25 +99,15 @@ impl<T: Eq + Hash + Copy + Debug> Index<T> {
                 // filter and count terms, ignore empty strings
                 let mut filtered_terms_count = 0;
                 for term in terms {
-                    if !term.is_empty() { 
+                    if !term.is_empty() {
                         filtered_terms_count += 1;
-                        /* let counts = term_counts
-                            .entry(term.clone())
-                            .or_insert_with(|| vec![0; fields_len]); */
-
-                        let counts = match term_counts.get_mut(&term)
-                        {
-                            Some(counts) => {
-                                counts},
-                            None => {
-                                term_counts.insert(term.clone(), vec![0; fields_len]);
-                                term_counts.get_mut(&term).unwrap()
-                            }
-                        };
-                        counts[i] += 1; 
-                        all_terms.push(term);
-
-                     } 
+                        all_terms.push(term.clone());
+                        let counts = term_counts
+                            .entry(term)
+                            .or_insert_with(|| vec![0; fields_len]);
+
+                        counts[i] += 1;
+                    }
                 }
 
                 field_details.sum += filtered_terms_count;
@@ -467,7 +456,7 @@ fn create_inverted_index_nodes<T: Clone>(
 mod tests {
     use super::*;
 
-    use crate::test_util::{ tokenizer};
+    use crate::test_util::tokenizer;
 
     /// Count the amount of nodes of the index.
     ///
@@ -516,7 +505,7 @@ mod tests {
                 text: "a b c".to_string(),
             };
 
-            index.add_document(&field_accessors, tokenizer,  doc.id, &doc);
+            index.add_document(&field_accessors, tokenizer, doc.id, &doc);
 
             assert_eq!(index.docs.len(), 1);
             let (_, added_doc) = index.docs.iter().next().unwrap();
@@ -572,9 +561,9 @@ mod tests {
                 text: "b c d".to_string(),
             };
 
-            index.add_document(&field_accessors, tokenizer,  doc_1.id, &doc_1);
+            index.add_document(&field_accessors, tokenizer, doc_1.id, &doc_1);
 
-            index.add_document(&field_accessors, tokenizer,  doc_2.id, &doc_2);
+            index.add_document(&field_accessors, tokenizer, doc_2.id, &doc_2);
 
             assert_eq!(index.docs.len(), 2);
             assert_eq!(
@@ -598,7 +587,7 @@ mod tests {
             assert_eq!(&root.char, &char::from_u32(0).unwrap());
             assert_eq!(&root.next.is_none(), &true);
             assert_eq!(&root.first_doc.is_none(), &true);
-            
+
             let first_child = index.arena_index.get(root.first_child.unwrap()).unwrap();
             assert_eq!(&first_child.char, &char::from_u32(100).unwrap());
             assert_eq!(&first_child.first_child.is_none(), &true);
@@ -628,7 +617,7 @@ mod tests {
                 text: "a  b".to_string(), // double space could introduce empty tokens
             };
 
-            index.add_document(&field_accessors, tokenizer,  doc_1.id, &doc_1);
+            index.add_document(&field_accessors, tokenizer, doc_1.id, &doc_1);
         }
     }
 
@@ -646,7 +635,7 @@ mod tests {
             }];
 
             for doc in docs {
-                index.add_document(&[field_accessor], tokenizer,  doc.id, &doc)
+                index.add_document(&[field_accessor], tokenizer, doc.id, &doc)
             }
 
             index.remove_document(1);
@@ -765,8 +754,8 @@ mod tests {
                     text: "abe".to_string(),
                 };
 
-                index.add_document(&field_accessors, tokenizer,  doc.id, &doc);
-                index.add_document(&field_accessors, tokenizer,  doc_2.id, &doc_2);
+                index.add_document(&field_accessors, tokenizer, doc.id, &doc);
+                index.add_document(&field_accessors, tokenizer, doc_2.id, &doc_2);
                 assert_eq!(count_nodes(&index), 5); //
             }
 
@@ -786,8 +775,8 @@ mod tests {
                     text: "ab ef".to_string(),
                 };
 
-                index.add_document(&field_accessors, tokenizer,  doc.id, &doc);
-                index.add_document(&field_accessors, tokenizer,  doc_2.id, &doc_2);
+                index.add_document(&field_accessors, tokenizer, doc.id, &doc);
+                index.add_document(&field_accessors, tokenizer, doc_2.id, &doc_2);
                 assert_eq!(count_nodes(&index), 7); //
             }
 
diff --git a/src/lib.rs b/src/lib.rs
index 5059c24..f0db0d3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -8,10 +8,10 @@ pub use index::*;
 pub use query::QueryResult;
 
 /// Function that extracts a field value from a document.
-pub type FieldAccessor< D> = fn(& D) -> Option<&str>;
+pub type FieldAccessor<D> = fn(&D) -> Option<&str>;
 
 /// Function used to tokenize a field.
-pub type Tokenizer = fn( &str) -> Vec<Cow<'_, str>>;
+pub type Tokenizer = fn(&str) -> Vec<Cow<'_, str>>;
 
 #[cfg(test)]
 pub mod test_util {
@@ -31,15 +31,15 @@ pub mod test_util {
         pub text: String,
     }
 
-    pub fn title_extract<'a>(d: &'a Doc) -> Option<&'a str> {
+    pub fn title_extract(d: &Doc) -> Option<&str> {
         Some(d.title.as_str())
     }
 
-    pub fn text_extract<'a>(d: &'a Doc) -> Option<&'a str> {
+    pub fn text_extract(d: &Doc) -> Option<&str> {
         Some(d.text.as_str())
     }
 
-    pub fn tokenizer<'a>(s: &'a str) ->  Vec<Cow<'a, str>> {
+    pub fn tokenizer(s: &str) -> Vec<Cow<str>> {
         s.split(' ').map(Cow::from).collect::<Vec<_>>()
     }
 
@@ -50,12 +50,7 @@ pub mod test_util {
         expected: Vec<QueryResult<usize>>,
     ) {
         let fields_len = idx.fields.len();
-        let mut results = idx.query(
-            q,
-            score_calculator,
-            tokenizer,
-            &vec![1.; fields_len],
-        );
+        let mut results = idx.query(q, score_calculator, tokenizer, &vec![1.; fields_len]);
         results.sort_by(|a, b| {
             let mut sort = b.score.partial_cmp(&a.score).unwrap();
             sort = sort.then_with(|| a.key.partial_cmp(&b.key).unwrap());
@@ -82,7 +77,7 @@ pub mod test_util {
                 title: title.to_string(),
                 text: String::new(),
             };
-            index.add_document(&[title_extract], tokenizer,  doc.id, &doc);
+            index.add_document(&[title_extract], tokenizer, doc.id, &doc);
         }
         index
     }
diff --git a/src/query.rs b/src/query.rs
index f8a6ee9..254b87b 100644
--- a/src/query.rs
+++ b/src/query.rs
@@ -1,12 +1,9 @@
-use std::{
-    fmt::Debug,
-    hash::Hash,
-};
 use hashbrown::{HashMap, HashSet};
+use std::{fmt::Debug, hash::Hash};
 
 use typed_generational_arena::StandardArena;
 
-use crate::{score::*,  Index, InvertedIndexNode, Tokenizer};
+use crate::{score::*, Index, InvertedIndexNode, Tokenizer};
 
 /// Result type for querying an index.
 #[derive(Debug, PartialEq)]
@@ -23,17 +20,17 @@ impl<T: Eq + Hash + Copy + Debug> Index<T> {
     /// All token separators work as a disjunction operator.
     pub fn query<'a, M, S: ScoreCalculator<T, M>>(
         &self,
-        query:  &'a str,
+        query: &'a str,
         score_calculator: &mut S,
         tokenizer: Tokenizer,
         fields_boost: &[f64],
     ) -> Vec<QueryResult<T>> {
         let removed = self.removed_documents();
-         let query_terms = tokenizer(query);/* .iter().map(|term| term.to_string()).collect() */
-         
+        let query_terms = tokenizer(query); /* .iter().map(|term| term.to_string()).collect() */
+
         let mut scores = HashMap::new();
         let query_terms_len = query_terms.len();
-        
+
         for (query_term_index, query_term) in query_terms.iter().enumerate() {
             if !query_term.is_empty() {
                 let expanded_terms = self.expand_term(query_term.as_ref(), &self.arena_index);
@@ -94,7 +91,7 @@ impl<T: Eq + Hash + Copy + Debug> Index<T> {
                         }
                     }
                 }
-            } 
+            }
         }
 
         let mut result = Vec::new();
@@ -171,7 +168,6 @@ pub(crate) mod tests {
 
     use crate::test_util::*;
     use crate::Index;
-    use std::borrow::Cow;
 
     fn approx_equal(a: f64, b: f64, dp: u8) -> bool {
         let p: f64 = 10f64.powf(-(dp as f64));
@@ -198,18 +194,12 @@ pub(crate) mod tests {
                 },
             ];
             for doc in docs {
-                index.add_document(
-                    &[title_extract, text_extract],
-                    tokenizer,
-                    doc.id,
-                    &doc,
-                );
+                index.add_document(&[title_extract, text_extract], tokenizer, doc.id, &doc);
             }
             let result = index.query(
                 &"a".to_string(),
                 &mut crate::score::bm25::new(),
                 tokenizer,
-                
                 &[1., 1.],
             );
             assert_eq!(result.len(), 1);
@@ -237,20 +227,13 @@ pub(crate) mod tests {
             ];
 
             for doc in docs {
-                index.add_document(
-                    &[title_extract, text_extract],
-                    tokenizer,
-                    
-                    doc.id,
-                    &doc,
-                );
+                index.add_document(&[title_extract, text_extract], tokenizer, doc.id, &doc);
             }
 
             let result = index.query(
                 &"c".to_string(),
                 &mut crate::score::bm25::new(),
                 tokenizer,
-                
                 &[1., 1.],
             );
 
@@ -291,20 +274,13 @@ pub(crate) mod tests {
             ];
 
             for doc in docs {
-                index.add_document(
-                    &[title_extract, text_extract],
-                    tokenizer,
-                    
-                    doc.id,
-                    &doc,
-                );
+                index.add_document(&[title_extract, text_extract], tokenizer, doc.id, &doc);
             }
 
             let result = index.query(
                 &"h".to_string(),
                 &mut crate::score::bm25::new(),
                 tokenizer,
-                
                 &[1., 1.],
             );
             assert_eq!(result.len(), 1);
@@ -332,20 +308,13 @@ pub(crate) mod tests {
             ];
 
             for doc in docs {
-                index.add_document(
-                    &[title_extract, text_extract],
-                    tokenizer,
-                    
-                    doc.id,
-                    &doc,
-                );
+                index.add_document(&[title_extract, text_extract], tokenizer, doc.id, &doc);
             }
 
             let result = index.query(
                 &"a d".to_string(),
                 &mut crate::score::bm25::new(),
                 tokenizer,
-                
                 &[1., 1.],
             );
             assert_eq!(result.len(), 2);
@@ -388,13 +357,7 @@ pub(crate) mod tests {
             ];
 
             for doc in docs {
-                index.add_document(
-                    &[title_extract, text_extract],
-                    tokenizer,
-                    
-                    doc.id,
-                    &doc,
-                );
+                index.add_document(&[title_extract, text_extract], tokenizer, doc.id, &doc);
             }
             let exp = index.expand_term(&"a".to_string(), &index.arena_index);
             assert_eq!(exp, vec!["adef".to_string(), "abc".to_string()]);
@@ -417,13 +380,7 @@ pub(crate) mod tests {
             ];
 
             for doc in docs {
-                index.add_document(
-                    &[title_extract, text_extract],
-                    tokenizer,
-                    
-                    doc.id,
-                    &doc,
-                );
+                index.add_document(&[title_extract, text_extract], tokenizer, doc.id, &doc);
             }
             let exp = index.expand_term(&"x".to_string(), &index.arena_index);
             assert_eq!(exp, Vec::new() as Vec<String>);
diff --git a/src/score/calculator.rs b/src/score/calculator.rs
index c462d84..2183796 100644
--- a/src/score/calculator.rs
+++ b/src/score/calculator.rs
@@ -2,9 +2,9 @@ use crate::{
     index::{DocumentDetails, DocumentPointer, FieldDetails, InvertedIndexNode},
     QueryResult,
 };
-use std::{ fmt::Debug};
-use typed_generational_arena::StandardIndex as ArenaIndex;
 use hashbrown::HashMap;
+use std::fmt::Debug;
+use typed_generational_arena::StandardIndex as ArenaIndex;
 
 pub struct TermData<'a> {
     // Current query term index.
@@ -14,9 +14,8 @@ pub struct TermData<'a> {
     // Current expanded term from the expanded terms generated
     // from the current query term `query_term`
     pub query_term_expanded: &'a str,
-
-    // All available query terms
-    pub query_terms_len: usize
+    // Total number of query terms present in the query this TermData was created from
+    pub query_terms_len: usize,
 }
 
 pub struct FieldData<'a> {
diff --git a/src/score/default/bm25.rs b/src/score/default/bm25.rs
index 5008b50..4882af3 100644
--- a/src/score/default/bm25.rs
+++ b/src/score/default/bm25.rs
@@ -2,8 +2,8 @@
     https://en.wikipedia.org/wiki/Okapi_BM25
 */
 
-use std::{fmt::Debug};
-use hashbrown::{HashMap};
+use hashbrown::HashMap;
+use std::fmt::Debug;
 
 use crate::{
     index::{DocumentDetails, DocumentPointer, InvertedIndexNode},
diff --git a/src/score/default/zero_to_one.rs b/src/score/default/zero_to_one.rs
index a8e0084..4a70020 100644
--- a/src/score/default/zero_to_one.rs
+++ b/src/score/default/zero_to_one.rs
@@ -380,7 +380,6 @@ mod tests {
             x.add_document(
                 &[title_extract, description_extract],
                 tokenizer,
-                
                 doc.id,
                 &doc,
             );
diff --git a/tests/integrations_tests.rs b/tests/integrations_tests.rs
index 0270e67..7068fd5 100644
--- a/tests/integrations_tests.rs
+++ b/tests/integrations_tests.rs
@@ -24,7 +24,6 @@ fn description_extract(d: &Doc) -> Option<&str> {
     Some(d.description.as_str())
 }
 
-
 #[test]
 pub fn test_add_query_delete_bm25() {
     // Create index with 2 fields
@@ -46,7 +45,6 @@ pub fn test_add_query_delete_bm25() {
     index.add_document(
         &[title_extract, description_extract],
         tokenizer,
-        
         doc_1.id,
         &doc_1,
     );
@@ -54,13 +52,12 @@ pub fn test_add_query_delete_bm25() {
     index.add_document(
         &[title_extract, description_extract],
         tokenizer,
-        
         doc_2.id,
         &doc_2,
     );
 
     // Search, expected 2 results
-    let mut result = index.query("abc", &mut bm25::new(), tokenizer,  &[1., 1.]);
+    let mut result = index.query("abc", &mut bm25::new(), tokenizer, &[1., 1.]);
     assert_eq!(result.len(), 2);
     assert_eq!(
         result[0],
@@ -84,7 +81,7 @@ pub fn test_add_query_delete_bm25() {
     index.vacuum();
 
     // Search, expect 1 result
-    result = index.query("abc", &mut bm25::new(), tokenizer,  &[1., 1.]);
+    result = index.query("abc", &mut bm25::new(), tokenizer, &[1., 1.]);
     assert_eq!(result.len(), 1);
     assert_eq!(
         result[0],
@@ -114,7 +111,6 @@ pub fn test_add_query_delete_zero_to_one() {
     index.add_document(
         &[title_extract, description_extract],
         tokenizer,
-        
         doc_1.id,
         &doc_1,
     );
@@ -122,19 +118,12 @@ pub fn test_add_query_delete_zero_to_one() {
     index.add_document(
         &[title_extract, description_extract],
         tokenizer,
-        
         doc_2.id,
         &doc_2,
     );
 
     // Search, expected 2 results
-    let mut result = index.query(
-        "abc",
-        &mut zero_to_one::new(),
-        tokenizer,
-        
-        &[1., 1.],
-    );
+    let mut result = index.query("abc", &mut zero_to_one::new(), tokenizer, &[1., 1.]);
     assert_eq!(result.len(), 2);
     assert_eq!(result[0], QueryResult { key: 0, score: 1. });
     assert_eq!(
@@ -148,13 +137,7 @@ pub fn test_add_query_delete_zero_to_one() {
     index.remove_document(doc_1.id);
 
     // Search, expect 1 result
-    result = index.query(
-        "abc",
-        &mut zero_to_one::new(),
-        tokenizer,
-        
-        &[1., 1.],
-    );
+    result = index.query("abc", &mut zero_to_one::new(), tokenizer, &[1., 1.]);
     assert_eq!(result.len(), 1);
     assert_eq!(
         result[0],
@@ -179,7 +162,6 @@ pub fn it_is_thread_safe() {
     idx.add_document(
         &[title_extract, description_extract],
         tokenizer,
-        
         doc_1.id,
         &doc_1,
     );