Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
marcus-pousette committed Aug 2, 2021
2 parents 2cce97b + 7e42387 commit 5b6b385
Show file tree
Hide file tree
Showing 11 changed files with 480 additions and 401 deletions.
10 changes: 9 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ keywords = ["search", "query", "bm25","index"]
categories = ["text-processing"]
exclude = [".github/**", ".gitignore", ".rustfmt.toml"]

[dependencies]
typed-generational-arena = "0.2"

[dev-dependencies]
lazy_static = "1.4.0"
rand = "0.8.3"
Expand All @@ -22,4 +25,9 @@ criterion = {version ="0.3" , features = ["html_reports"]}

[[bench]]
name = "test_benchmark"
harness = false
harness = false


[profile.dev]
opt-level = 0
debug = true
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[Latest Version]: https://img.shields.io/crates/v/probly-search.svg
[crates.io]: https://crates.io/crates/probly-search

A lightweight and thread-safe, full-text search library that provides full control over the scoring calculations.
A full-text search library, optimized for insertion speed, that provides full control over the scoring calculations.

This start initially as a port of the Node library [NDX](https://github.com/ndx-search/ndx).

Expand All @@ -20,11 +20,11 @@ https://quantleaf.github.io/probly-search-demo/

- [Trie](https://en.wikipedia.org/wiki/Trie) based dynamic
[Inverted Index](https://en.wikipedia.org/wiki/Inverted_index).
- Small memory footprint, optimized for mobile devices.
- Multiple fields full-text indexing and searching.
- Per-field score boosting.
- Configurable tokenizer and term filter.
- Free text queries with query expansion.
- Fast allocation, but latent deletion.


## Documentation
Expand Down
17 changes: 8 additions & 9 deletions benches/test_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use criterion::{criterion_group, criterion_main, Criterion};
use probly_search::index::{add_document_to_index, create_index, Index};
use probly_search::index::{add_document_to_index, create_index_with_capacity, Index};

criterion_group!(benches, test_speed);
criterion_main!(benches);
Expand All @@ -8,7 +8,7 @@ struct DocX {
title: String,
}

fn filter(s: &String) -> String {
fn filter(s: &str) -> String {
s.to_owned()
}
fn tokenizer(s: &str) -> Vec<String> {
Expand Down Expand Up @@ -39,30 +39,29 @@ pub fn test_speed(c: &mut Criterion) {
}

c.bench_function("add_100k_docs", |b| {
let mut idx: Index<usize> = create_index(1);
let mut index = create_index_with_capacity(1, 100000, 100000);
let mut random_strings: Vec<String> = Vec::new();
for _ in 1..100000 {
let mut new_rand = generate_string(0, 4);
new_rand.push_str(" ");
new_rand.push(' ');
new_rand.push_str(&generate_string(0, 4));
random_strings.push(new_rand);
}
// whatever you want to do
let extractor = [title_extract_x as fn(&_) -> Option<&str>];
b.iter(|| add_all_documents(&mut idx, &extractor, &&random_strings));
b.iter(|| add_all_documents(&mut index, &extractor, &random_strings));
});
}

fn add_all_documents(
mut idx: &mut Index<usize>,
mut index: &mut Index<usize>,
extractor: &[fn(&DocX) -> Option<&str>],
random_strings: &Vec<String>,
random_strings: &[String],
) {
for (i, s) in random_strings.iter().enumerate() {
let d = DocX {
id: i,
title: s.to_owned(),
};
add_document_to_index(&mut idx, &extractor, tokenizer, filter, d.id, d);
add_document_to_index(&mut index, extractor, tokenizer, filter, d.id, d);
}
}
Loading

0 comments on commit 5b6b385

Please sign in to comment.