Skip to content

Commit

Permalink
create index with initial capacity
Browse files Browse the repository at this point in the history
  • Loading branch information
marcus-pousette committed Aug 1, 2021
1 parent 3f2732a commit c7170da
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 26 deletions.
4 changes: 2 additions & 2 deletions benches/test_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use criterion::{criterion_group, criterion_main, Criterion};
use probly_search::index::{add_document_to_index, create_index, Index};
use probly_search::index::{add_document_to_index, create_index_with_capacity, Index};

criterion_group!(benches, test_speed);
criterion_main!(benches);
Expand Down Expand Up @@ -39,7 +39,7 @@ pub fn test_speed(c: &mut Criterion) {
}

c.bench_function("add_100k_docs", |b| {
let mut index = create_index(1);
let mut index = create_index_with_capacity(1, 100000, 100000);
let mut random_strings: Vec<String> = Vec::new();
for _ in 1..100000 {
let mut new_rand = generate_string(0, 4);
Expand Down
53 changes: 44 additions & 9 deletions src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,31 @@ Creates an Index.
* returns `Index`
*/
pub fn create_index<T>(fields_num: usize) -> Index<T> {
create_index_with_capacity(fields_num, 1000, 10000)
}
/**
Creates an Index.
* typeparam `T` Document key.
* `fieldsNum` Number of fields.
* `expected_index_size` Expected node count of index tree.
* `expected_documents_count` Expected amount of documents added
* returns `Index`
*/
pub fn create_index_with_capacity<T>(
fields_num: usize,
expected_index_size: usize,
expected_documents_count: usize,
) -> Index<T> {
let fields: Vec<FieldDetails> = vec![FieldDetails { sum: 0, avg: 0_f64 }; fields_num];
let mut arena_index = StandardArena::new();
arena_index.reserve(expected_index_size);
let mut arena_doc = StandardArena::new();
arena_doc.reserve(expected_documents_count);
Index {
docs: HashMap::new(),
root: arena_index.insert(create_inverted_index_node(&char::from_u32(0).unwrap())),
fields,
arena_doc: StandardArena::new(),
arena_doc,
arena_index,
}
}
Expand Down Expand Up @@ -447,7 +465,8 @@ fn vacuum_node<T: Hash + Eq>(
let node = index.arena_index.get_mut(node_index).unwrap();
let mut pointer_option = node.first_doc;
while let Some(pointer) = pointer_option {
if removed.contains(&index.arena_doc.get(pointer).unwrap().details_key) {
let is_removed = removed.contains(&index.arena_doc.get(pointer).unwrap().details_key);
if is_removed {
match &prev_pointer {
None => {
node.first_doc = index.arena_doc.get(pointer).unwrap().next;
Expand All @@ -461,34 +480,42 @@ fn vacuum_node<T: Hash + Eq>(
prev_pointer = Some(pointer);
}
pointer_option = index.arena_doc.get(pointer).unwrap().next;
if is_removed {
index.arena_doc.remove(pointer);
}
}

let mut prev_child: Option<ArenaIndex<InvertedIndexNode<T>>> = None;
let mut ret = 0;
if node.first_doc.is_some() {
ret = 1;
}
/*

let mut child_option = node.first_child;
while let Some(child_index) = child_option {
let r = vacuum_node(index, child_index, removed);
let child = index.arena_index.get(child_index).unwrap();
ret |= r;
if r == 0 {
// subtree doesn't have any documents, remove this node
match prev_child {
Some(prev) => {
index.arena_index.get_mut(prev).unwrap().next = child.next;
index.arena_index.get_mut(prev).unwrap().next =
index.arena_index.get(child_index).unwrap().next;
}
None => {
node.first_child = child.next;
index.arena_index.get_mut(node_index).unwrap().first_child =
index.arena_index.get(child_index).unwrap().next;
}
}
} else {
prev_child = Some(child_index);
}
child_option = child.next;
}*/
child_option = index.arena_index.get(child_index).unwrap().next;

if r == 0 {
index.arena_index.remove(child_index);
}
}
ret
}

Expand Down Expand Up @@ -696,6 +723,8 @@ mod tests {
#[test]
fn it_should_delete_1() {
let mut index = create_index::<usize>(1);
assert_eq!(index.arena_doc.is_empty(), true);

let mut removed = HashSet::new();
let docs = vec![Doc {
id: 1,
Expand All @@ -712,6 +741,7 @@ mod tests {
doc,
)
}

remove_document_from_index(&mut index, &mut removed, 1);
vacuum_index(&mut index, &mut removed);

Expand All @@ -728,6 +758,11 @@ mod tests {
next: None,
};
assert_eq!(x, y);

// Delete root from index and assert is empty
index.arena_index.remove(index.root);
assert_eq!(index.arena_doc.is_empty(), true);
assert_eq!(index.arena_index.is_empty(), true);
}
}
mod find {
Expand Down Expand Up @@ -865,7 +900,7 @@ mod tests {

#[test]
fn it_should_count_nodes_empty() {
let mut index = create_index::<usize>(1);
let index = create_index::<usize>(1);
assert_eq!(count_nodes(&index), 1); // 1 for root
}
}
Expand Down
23 changes: 8 additions & 15 deletions src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,39 +90,32 @@ pub fn query<T: Eq + Hash + Clone + Debug, M, S: ScoreCalculator<T, M>>(
find_inverted_index_node(index.root, &query_term_expanded, &index.arena_index);
if let Some(term_node_index) = term_node_option {
let term_node = index.arena_index.get_mut(term_node_index).unwrap();
let mut new_first_doc = None;
let mut assign_new_first_doc = false;
let mut document_frequency = 0;

if let Some(term_node_option_first_doc) = term_node.first_doc {
let mut prev_pointer: Option<ArenaIndex<DocumentPointer<T>>> = None;
let mut pointer_option = Some(term_node_option_first_doc);
while let Some(pointer) = pointer_option {
let pointer_value = index.arena_doc.get(pointer).unwrap();
if removed.is_some() // Cleanup old removed documents while searching. If vaccume after delete, this will have not effect
&& removed
.unwrap()
.contains(&pointer_value.details_key)
{
let is_removed = removed.is_some()
&& removed.unwrap().contains(&pointer_value.details_key);
if is_removed {
// Cleanup old removed documents while searching. If vaccume after delete, this will have not effect
if let Some(pp) = prev_pointer {
index.arena_doc.get_mut(pp).unwrap().next = pointer_value.next;
} else {
new_first_doc = pointer_value.next;
assign_new_first_doc = true;
// term_node_borrowed.first_doc = (&pointer.get().next).clone();
term_node.first_doc = pointer_value.next;
}
} else {
prev_pointer = Some(pointer);
document_frequency += 1;
}
pointer_option = index.arena_doc.get(pointer).unwrap().next;
if is_removed {
index.arena_doc.remove(pointer);
}
}
}

if assign_new_first_doc {
term_node.first_doc = new_first_doc;
}

if let Some(term_node_option_first_doc) = term_node.first_doc {
if document_frequency > 0 {
let term_expansion_data = TermData {
Expand Down

0 comments on commit c7170da

Please sign in to comment.