Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
marcus-pousette committed Aug 7, 2021
1 parent 419a06c commit 1f1c683
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 189 deletions.
68 changes: 29 additions & 39 deletions src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,6 @@ fn add_inverted_index_doc<T: Clone>(
doc.next = Some(first);
}
let doc_index = arena_doc.insert(doc);

node_value.first_doc = Some(doc_index);
}

Expand All @@ -295,46 +294,41 @@ pub fn add_document_to_index<T: Eq + Hash + Copy, D>(
) {
let docs = &mut index.docs;
let fields = &mut index.fields;
let mut field_length = Vec::new();
let mut field_length = vec![0; fields.len()];
let mut term_counts: HashMap<String, Vec<usize>> = HashMap::new();
let mut all_terms: Vec<String> = Vec::new();
for i in 0..fields.len() {
match field_accessors[i](&doc) {
None => {
field_length.push(0);
}
Some(field_value) => {
let fields_len = fields.len();
let mut field_details = fields.get_mut(i).unwrap();

// tokenize text
let terms = tokenizer(field_value);

// filter and count terms, ignore empty strings
let mut filtered_terms_count = 0;
for mut term in terms {
term = filter(&term);
if !term.is_empty() {
all_terms.push(term.to_owned());
filtered_terms_count += 1;
let counts = term_counts.get_mut(&term);
match counts {
None => {
let mut new_count = vec![0; fields_len];
new_count[i] += 1;
term_counts.insert(term, new_count);
}
Some(c) => {
c[i] += 1;
}
if let Some(field_value) = field_accessors[i](&doc) {
let fields_len = fields.len();
let mut field_details = fields.get_mut(i).unwrap();

// tokenize text
let terms = tokenizer(field_value);

// filter and count terms, ignore empty strings
let mut filtered_terms_count = 0;
for mut term in terms {
term = filter(&term);
if !term.is_empty() {
all_terms.push(term.to_owned());
filtered_terms_count += 1;
let counts = term_counts.get_mut(&term);
match counts {
None => {
let mut new_count = vec![0; fields_len];
new_count[i] += 1;
term_counts.insert(term, new_count);
}
Some(c) => {
c[i] += 1;
}
}
}

field_details.sum += filtered_terms_count;
field_details.avg = field_details.sum as f64 / (docs.len() as f64 + 1_f64);
field_length.push(filtered_terms_count);
}

field_details.sum += filtered_terms_count;
field_details.avg = field_details.sum as f64 / (docs.len() as f64 + 1_f64);
field_length[i] = filtered_terms_count;
}
}

Expand Down Expand Up @@ -387,10 +381,7 @@ fn create_inverted_index_nodes<T: Clone>(
term: &str,
start: &usize,
) -> ArenaIndex<InvertedIndexNode<T>> {
for (i, char) in term.chars().enumerate() {
if &i < start {
continue;
}
for char in term.chars().skip(start.to_owned()) {
let new_node = arena_index.insert(create_inverted_index_node(&char));
let new_parent = {
add_inverted_index_child_node(parent, new_node, arena_index); // unsafe { .get().as_mut().unwrap() }
Expand All @@ -414,7 +405,6 @@ pub fn remove_document_from_index<T: Hash + Eq + Copy>(
removed: &mut HashSet<T>,
key: T,
) {
//
let fields = &mut index.fields;
let doc_details_option = index.docs.get(&key);
let mut remove_key = false;
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ pub mod test_util {
sort
});

assert_eq!(expected.len(), results.len());

for (index, result) in results.iter().enumerate() {
assert_eq!(expected[index], *result);
assert_eq!(approx_equal(expected[index].score, result.score, 8), true)
Expand Down
8 changes: 1 addition & 7 deletions src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ use std::{
hash::Hash,
};

use typed_generational_arena::StandardIndex as ArenaIndex;

use self::score::calculator::{FieldData, TermData};
extern crate typed_generational_arena;
/**
Expand Down Expand Up @@ -78,8 +76,6 @@ pub fn query<T: Eq + Hash + Clone + Debug, M, S: ScoreCalculator<T, M>>(
) -> Vec<QueryResult<T>> {
let query_terms = tokenizer(query);
let mut scores: HashMap<T, f64> = HashMap::new();

let mut visited_nodes_score: HashMap<usize, f64> = HashMap::new();
for (query_term_index, query_term_pre_filter) in query_terms.iter().enumerate() {
let query_term = filter(query_term_pre_filter);
if !query_term.is_empty() {
Expand All @@ -95,7 +91,7 @@ pub fn query<T: Eq + Hash + Clone + Debug, M, S: ScoreCalculator<T, M>>(
if let Some(term_node_option_first_doc) = term_node.first_doc {
if document_frequency > 0 {
let term_expansion_data = TermData {
query_term_index: query_term_index,
query_term_index,
all_query_terms: &query_terms,
query_term: &query_term,
query_term_expanded: &query_term_expanded,
Expand Down Expand Up @@ -130,8 +126,6 @@ pub fn query<T: Eq + Hash + Clone + Debug, M, S: ScoreCalculator<T, M>>(
visited_documents_for_term.contains(key),
);
scores.insert(key.to_owned(), new_score);
visited_nodes_score
.insert(term_node_index.to_idx(), new_score);
}
}
visited_documents_for_term.insert(key.to_owned());
Expand Down
Loading

0 comments on commit 1f1c683

Please sign in to comment.