Skip to content

Commit

Permalink
Restore document frequency count in query.
Browse files Browse the repository at this point in the history
  • Loading branch information
tmpfs committed Aug 20, 2022
1 parent 9e6d176 commit 0a8037a
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 21 deletions.
12 changes: 12 additions & 0 deletions src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,18 @@ impl<T: Eq + Hash + Copy + Debug> Index<T> {
document_frequency
}

/// Count the document frequency.
pub(crate) fn count_documents(&self, node_index: ArenaIndex<InvertedIndexNode<T>>) -> usize {
let node = self.arena_index.get(node_index).unwrap();
let mut pointer_option = node.first_doc;
let mut document_frequency = 0;
while let Some(pointer) = pointer_option {
document_frequency += 1;
pointer_option = self.arena_doc.get(pointer).unwrap().next;
}
document_frequency
}

/// Finds inverted index node that matches the `term`.
pub(crate) fn find_inverted_index_node(
node: ArenaIndex<InvertedIndexNode<T>>,
Expand Down
25 changes: 6 additions & 19 deletions src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,16 @@ pub struct QueryResult<T> {
}

impl<T: Eq + Hash + Copy + Debug> Index<T> {
/**
Performs a search with a simple free text query.
All token separators work as a disjunction operator.
Arguments
* typeparam `T` Document key.
* `index`.
* `query` Query string.
* `score_calculator` A struct that implements the ScoreCalculator trait to provide score calculations.
* `tokenizer Tokenizer is a function that breaks a text into words, phrases, symbols, or other meaningful elements called tokens.
* `filter` Filter is a function that processes tokens and returns terms, terms are used in Inverted Index to index documents.
* `fields_boost` Fields boost factors.
* `remove`d Set of removed document keys.
returns Array of QueryResult structs
*/
/// Performs a search with a simple free text query.
///
/// All token separators work as a disjunction operator.
pub fn query<M, S: ScoreCalculator<T, M>>(
&self,
query: &str,
score_calculator: &mut S,
tokenizer: Tokenizer,
filter: Filter,
fields_boost: &[f64],
//removed: Option<&HashSet<T>>,
) -> Vec<QueryResult<T>> {
let removed = self.removed_documents();
let query_terms = tokenizer(query);
Expand All @@ -56,7 +43,7 @@ impl<T: Eq + Hash + Copy + Debug> Index<T> {
&self.arena_index,
);
if let Some(term_node_index) = term_node_option {
let document_frequency = 1;
let document_frequency = self.count_documents(term_node_index);
let term_node = self.arena_index.get(term_node_index).unwrap();
if let Some(term_node_option_first_doc) = term_node.first_doc {
let term_expansion_data = TermData {
Expand Down Expand Up @@ -265,15 +252,15 @@ pub(crate) mod tests {

assert_eq!(result.len(), 2);
assert_eq!(
approx_equal(result.get(0).unwrap().score, 0.6931471805599453, 8),
approx_equal(result.get(0).unwrap().score, 0.1823215567939546, 8),
true
);
assert_eq!(
result.get(0).unwrap().key == 1 || result.get(0).unwrap().key == 2,
true
);
assert_eq!(
approx_equal(result.get(1).unwrap().score, 0.6931471805599453, 8),
approx_equal(result.get(1).unwrap().score, 0.1823215567939546, 8),
true
);
assert_eq!(
Expand Down
4 changes: 2 additions & 2 deletions src/score/default/bm25.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,11 @@ mod tests {
vec![
QueryResult {
key: 0,
score: 0.6931471805599453,
score: 0.1823215567939546,
},
QueryResult {
key: 1,
score: 0.6931471805599453,
score: 0.1823215567939546,
},
],
);
Expand Down

0 comments on commit 0a8037a

Please sign in to comment.