Skip to content

Commit

Permalink
Tidy query module.
Browse files Browse the repository at this point in the history
Now that query() is a method of Index it makes more sense to have the
query helper functions in the index module.

Keep the query module just for tests.

Some improvements to the handling of test utility functions and flatten
the score module hierarchy for better documentation output.
  • Loading branch information
tmpfs committed Aug 19, 2022
1 parent e39f508 commit 8fd30ac
Show file tree
Hide file tree
Showing 12 changed files with 145 additions and 155 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/target
Cargo.lock
Cargo.lock
*.bak
88 changes: 84 additions & 4 deletions src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@ use std::{
usize,
};

use crate::{
query::{score::calculator::*, *},
utils::{FieldAccessor, Filter, Tokenizer},
};
use crate::{score::*, FieldAccessor, Filter, Tokenizer};
extern crate typed_generational_arena;
use typed_generational_arena::StandardArena;
use typed_generational_arena::StandardIndex as ArenaIndex;
Expand Down Expand Up @@ -642,6 +639,89 @@ fn create_inverted_index_nodes<T: Clone>(
parent
}

/// Result type for querying an index.
#[derive(Debug, PartialEq)]
pub struct QueryResult<T> {
/**
* Document key.
*/
pub key: T,
/**
* Result score.
*/
pub score: f64,
}

pub(crate) fn max_score_merger(
score: &f64,
previous_score: Option<&f64>,
document_visited_for_term: bool,
) -> f64 {
{
if let Some(p) = previous_score {
if document_visited_for_term {
f64::max(p.to_owned(), score.to_owned())
} else {
p + score
}
} else {
score.to_owned()
}
}
}

/**
Expands term with all possible combinations.
* `index`
* `term` Term.
returns All terms that starts with `term` string.
*/
pub(crate) fn expand_term<I: Debug>(
index: &Index<I>,
term: &str,
arena_index: &StandardArena<InvertedIndexNode<I>>,
) -> Vec<String> {
let node = find_inverted_index_node(index.root, term, &index.arena_index);
let mut results = Vec::new();
if let Some(n) = node {
expand_term_from_node(
index.arena_index.get(n).unwrap(),
&mut results,
term,
arena_index,
);
}

results
}

/**
Recursively goes through inverted index nodes and expands term with all possible combinations.
* typeparam `I` Document ID type.
* `index {@link Index}
* `results Results.
* `term Term.
*/
fn expand_term_from_node<I: Debug>(
node: &InvertedIndexNode<I>,
results: &mut Vec<String>,
term: &str,
arena_index: &StandardArena<InvertedIndexNode<I>>,
) {
if node.first_doc.is_some() {
results.push(term.to_owned());
}
let mut child = node.first_child;
while let Some(child_index) = child {
let cb = arena_index.get(child_index).unwrap();
let mut inter = term.to_owned();
inter.push(cb.char);
expand_term_from_node(cb, results, &inter, arena_index); // String.fromCharCode(child.charCode)
child = cb.next;
}
}

#[cfg(test)]
mod tests {

Expand Down
45 changes: 32 additions & 13 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,35 +1,53 @@
mod index;
mod query;
mod utils;
pub mod score;

pub use index::*;
pub use query::*;

/// Type for functions that extract a field value from a document.
pub type FieldAccessor<D> = fn(&D) -> Option<&str>;

/// Type used to tokenize a field.
pub type Tokenizer = fn(&str) -> Vec<&str>;

/// Type used to filter fields.
pub type Filter = fn(&str) -> &str;

#[cfg(test)]
mod query;

#[cfg(test)]
pub mod test_util {

use crate::{
index::Index,
query::{score::calculator::ScoreCalculator, QueryResult},
index::{Index, QueryResult},
score::ScoreCalculator,
};

fn approx_equal(a: f64, b: f64, dp: u8) -> bool {
let p: f64 = 10f64.powf(-(dp as f64));

(a - b).abs() < p
}

struct Doc {
id: usize,
title: String,
pub struct Doc {
pub id: usize,
pub title: String,
pub text: String,
}
fn tokenizer(s: &str) -> Vec<&str> {
s.split(' ').collect::<Vec<_>>()
}
fn title_extract(d: &Doc) -> Option<&str> {

pub fn title_extract(d: &Doc) -> Option<&str> {
Some(d.title.as_str())
}

fn filter(s: &str) -> &str {
pub fn text_extract(d: &Doc) -> Option<&str> {
Some(d.text.as_str())
}

pub fn tokenizer(s: &str) -> Vec<&str> {
s.split(' ').collect::<Vec<_>>()
}

pub fn filter(s: &str) -> &str {
s
}

Expand Down Expand Up @@ -72,6 +90,7 @@ pub mod test_util {
let doc = Doc {
id: i,
title: title.to_string(),
text: String::new(),
};
index.add_document(&[title_extract], tokenizer, filter, doc.id, &doc);
}
Expand Down
133 changes: 14 additions & 119 deletions src/query.rs
Original file line number Diff line number Diff line change
@@ -1,125 +1,20 @@
pub mod score;

use typed_generational_arena::StandardArena;

use crate::index::*;
use std::fmt::Debug;

extern crate typed_generational_arena;

/// Result type for querying an index.
#[derive(Debug, PartialEq)]
pub struct QueryResult<T> {
/**
* Document key.
*/
pub key: T,
/**
* Result score.
*/
pub score: f64,
}

pub(crate) fn max_score_merger(
score: &f64,
previous_score: Option<&f64>,
document_visited_for_term: bool,
) -> f64 {
{
if let Some(p) = previous_score {
if document_visited_for_term {
f64::max(p.to_owned(), score.to_owned())
} else {
p + score
}
} else {
score.to_owned()
}
}
}

/**
Expands term with all possible combinations.
* `index`
* `term` Term.
returns All terms that starts with `term` string.
*/
pub(crate) fn expand_term<I: Debug>(
index: &Index<I>,
term: &str,
arena_index: &StandardArena<InvertedIndexNode<I>>,
) -> Vec<String> {
let node = find_inverted_index_node(index.root, term, &index.arena_index);
let mut results = Vec::new();
if let Some(n) = node {
expand_term_from_node(
index.arena_index.get(n).unwrap(),
&mut results,
term,
arena_index,
);
}

results
}

/**
Recursively goes through inverted index nodes and expands term with all possible combinations.
* typeparam `I` Document ID type.
* `index {@link Index}
* `results Results.
* `term Term.
*/
fn expand_term_from_node<I: Debug>(
node: &InvertedIndexNode<I>,
results: &mut Vec<String>,
term: &str,
arena_index: &StandardArena<InvertedIndexNode<I>>,
) {
if node.first_doc.is_some() {
results.push(term.to_owned());
}
let mut child = node.first_child;
while let Some(child_index) = child {
let cb = arena_index.get(child_index).unwrap();
let mut inter = term.to_owned();
inter.push(cb.char);
expand_term_from_node(cb, results, &inter, arena_index); // String.fromCharCode(child.charCode)
child = cb.next;
}
}

#[cfg(test)]
mod tests {
pub(crate) mod tests {
use crate::{index::expand_term, Index};

use crate::test_util::*;

fn approx_equal(a: f64, b: f64, dp: u8) -> bool {
let p: f64 = 10f64.powf(-(dp as f64));

(a - b).abs() < p
}

use super::*;
struct Doc {
id: usize,
title: String,
text: String,
}

fn title_extract(d: &Doc) -> Option<&str> {
Some(d.title.as_str())
}
fn text_extract(d: &Doc) -> Option<&str> {
Some(d.text.as_str())
}

pub fn tokenizer(s: &str) -> Vec<&str> {
s.split(' ').collect::<Vec<_>>()
}

pub fn filter(s: &str) -> &str {
s
}
//struct Doc {
//id: usize,
//title: String,
//text: String,
//}

pub mod query {
use super::*;
Expand Down Expand Up @@ -150,7 +45,7 @@ mod tests {
}
let result = index.query(
&"a".to_string(),
&mut crate::query::score::default::bm25::new(),
&mut crate::score::bm25::new(),
tokenizer,
filter,
&[1., 1.],
Expand Down Expand Up @@ -192,7 +87,7 @@ mod tests {

let result = index.query(
&"c".to_string(),
&mut crate::query::score::default::bm25::new(),
&mut crate::score::bm25::new(),
tokenizer,
filter,
&[1., 1.],
Expand Down Expand Up @@ -246,7 +141,7 @@ mod tests {

let result = index.query(
&"h".to_string(),
&mut crate::query::score::default::bm25::new(),
&mut crate::score::bm25::new(),
tokenizer,
filter,
&[1., 1.],
Expand Down Expand Up @@ -294,7 +189,7 @@ mod tests {
}
let result = index.query(
&"a".to_string(),
&mut crate::query::score::default::bm25::new(),
&mut crate::score::bm25::new(),
tokenizer,
custom_filter,
&[1., 1.],
Expand Down Expand Up @@ -331,7 +226,7 @@ mod tests {

let result = index.query(
&"a d".to_string(),
&mut crate::query::score::default::bm25::new(),
&mut crate::score::bm25::new(),
tokenizer,
filter,
&[1., 1.],
Expand Down
2 changes: 0 additions & 2 deletions src/query/score/mod.rs

This file was deleted.

5 changes: 2 additions & 3 deletions src/query/score/calculator.rs → src/score/calculator.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::{
index::{DocumentDetails, DocumentPointer, FieldDetails, InvertedIndexNode},
query::QueryResult,
use crate::index::{
DocumentDetails, DocumentPointer, FieldDetails, InvertedIndexNode, QueryResult,
};
use std::{collections::HashMap, fmt::Debug};
use typed_generational_arena::StandardIndex as ArenaIndex;
Expand Down
4 changes: 2 additions & 2 deletions src/query/score/default/bm25.rs → src/score/default/bm25.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::{collections::HashMap, fmt::Debug};

use crate::{
index::{DocumentDetails, DocumentPointer, InvertedIndexNode},
query::score::calculator::{FieldData, ScoreCalculator, TermData},
score::calculator::{FieldData, ScoreCalculator, TermData},
};
use typed_generational_arena::StandardIndex as ArenaIndex;

Expand Down Expand Up @@ -100,8 +100,8 @@ mod tests {

use super::*;
use crate::{
query::QueryResult,
test_util::{build_test_index, test_score},
QueryResult,
};
#[test]
fn it_should_return_doc_1() {
Expand Down
File renamed without changes.
Loading

0 comments on commit 8fd30ac

Please sign in to comment.