Merge branch 'master' of https://github.com/quantleaf/probly-search

quantleaf · Aug 2, 2021 · 5b6b385 · 5b6b385
2 parents 2cce97b + 7e42387
commit 5b6b385
Show file tree

Hide file tree

Showing 11 changed files with 480 additions and 401 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -13,6 +13,9 @@ keywords = ["search", "query", "bm25","index"]
 categories = ["text-processing"]
 exclude = [".github/**", ".gitignore", ".rustfmt.toml"]
 
+[dependencies]
+typed-generational-arena = "0.2"
+
 [dev-dependencies]
 lazy_static = "1.4.0"
 rand = "0.8.3"
@@ -22,4 +25,9 @@ criterion = {version ="0.3" , features  = ["html_reports"]}
 
 [[bench]]
 name = "test_benchmark"
-harness = false
+harness = false
+
+
+[profile.dev]
+opt-level = 0
+debug = true
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 [Latest Version]: https://img.shields.io/crates/v/probly-search.svg
 [crates.io]: https://crates.io/crates/probly-search
 
-A lightweight and thread-safe, full-text search library that provides full control over the scoring calculations.
+A full-text search library, optimized for insertion speed, that provides full control over the scoring calculations.
 
 This start initially as a port of the Node library [NDX](https://github.com/ndx-search/ndx).
 
@@ -20,11 +20,11 @@ https://quantleaf.github.io/probly-search-demo/
 
 - [Trie](https://en.wikipedia.org/wiki/Trie) based dynamic
   [Inverted Index](https://en.wikipedia.org/wiki/Inverted_index).
-- Small memory footprint, optimized for mobile devices.
 - Multiple fields full-text indexing and searching.
 - Per-field score boosting.
 - Configurable tokenizer and term filter.
 - Free text queries with query expansion.
+- Fast allocation, but latent deletion.
 
 
 ## Documentation 

diff --git a/benches/test_benchmark.rs b/benches/test_benchmark.rs
@@ -1,5 +1,5 @@
 use criterion::{criterion_group, criterion_main, Criterion};
-use probly_search::index::{add_document_to_index, create_index, Index};
+use probly_search::index::{add_document_to_index, create_index_with_capacity, Index};
 
 criterion_group!(benches, test_speed);
 criterion_main!(benches);
@@ -8,7 +8,7 @@ struct DocX {
     title: String,
 }
 
-fn filter(s: &String) -> String {
+fn filter(s: &str) -> String {
     s.to_owned()
 }
 fn tokenizer(s: &str) -> Vec<String> {
@@ -39,30 +39,29 @@ pub fn test_speed(c: &mut Criterion) {
     }
 
     c.bench_function("add_100k_docs", |b| {
-        let mut idx: Index<usize> = create_index(1);
+        let mut index = create_index_with_capacity(1, 100000, 100000);
         let mut random_strings: Vec<String> = Vec::new();
         for _ in 1..100000 {
             let mut new_rand = generate_string(0, 4);
-            new_rand.push_str(" ");
+            new_rand.push(' ');
             new_rand.push_str(&generate_string(0, 4));
             random_strings.push(new_rand);
         }
-        // whatever you want to do
         let extractor = [title_extract_x as fn(&_) -> Option<&str>];
-        b.iter(|| add_all_documents(&mut idx, &extractor, &&random_strings));
+        b.iter(|| add_all_documents(&mut index, &extractor, &random_strings));
     });
 }
 
 fn add_all_documents(
-    mut idx: &mut Index<usize>,
+    mut index: &mut Index<usize>,
     extractor: &[fn(&DocX) -> Option<&str>],
-    random_strings: &Vec<String>,
+    random_strings: &[String],
 ) {
     for (i, s) in random_strings.iter().enumerate() {
         let d = DocX {
             id: i,
             title: s.to_owned(),
         };
-        add_document_to_index(&mut idx, &extractor, tokenizer, filter, d.id, d);
+        add_document_to_index(&mut index, extractor, tokenizer, filter, d.id, d);
     }
 }