Skip to content

Commit

Permalink
issue/50 PhraseQuery working
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Nov 3, 2016
1 parent 627e4f1 commit 9d3c999
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 36 deletions.
1 change: 0 additions & 1 deletion src/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ pub use self::timer::OpenTimer;
pub use self::vint::VInt;
use std::io;


pub fn make_io_err(msg: String) -> io::Error {
io::Error::new(io::ErrorKind::Other, msg)
}
Expand Down
1 change: 0 additions & 1 deletion src/postings/docset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ pub trait DocSet {
}
}


impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {

fn advance(&mut self,) -> bool {
Expand Down
1 change: 0 additions & 1 deletion src/postings/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ pub use self::postings::Postings;

#[cfg(test)]
pub use self::vec_postings::VecPostings;

pub use self::chained_postings::ChainedPostings;
pub use self::segment_postings::SegmentPostings;
pub use self::intersection::IntersectionDocSet;
Expand Down
1 change: 1 addition & 0 deletions src/postings/postings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ impl<'a, TPostings: Postings> Postings for &'a mut TPostings {
}



1 change: 1 addition & 0 deletions src/query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pub use self::phrase_query::PhraseQuery;
pub use self::multi_term_query::MultiTermQuery;
pub use self::multi_term_query::MultiTermWeight;
pub use self::scorer::Scorer;
pub use self::scorer::EmptyScorer;
pub use self::query_parser::QueryParser;
pub use self::explanation::Explanation;
pub use self::query_parser::ParsingError;
Expand Down
19 changes: 12 additions & 7 deletions src/query/phrase_query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,23 @@ mod tests {
}
assert!(index_writer.commit().is_ok());
}
let mut test_collector = TestCollector::default();
let build_query = |texts: Vec<&str>| {

let searcher = index.searcher();
let test_query = |texts: Vec<&str>| {
let mut test_collector = TestCollector::default();
let terms: Vec<Term> = texts
.iter()
.map(|text| Term::from_field_text(text_field, text))
.collect();
PhraseQuery::from(terms)
let phrase_query = PhraseQuery::from(terms);
phrase_query.search(&*searcher, &mut test_collector).expect("search should succeed");
test_collector.docs()
};
let phrase_query = build_query(vec!("a", "b", "c"));
let searcher = index.searcher();
phrase_query.search(&*searcher, &mut test_collector).expect("search should succeed");
assert_eq!(test_collector.docs(), vec!(1, 2, 4));
assert_eq!(test_query(vec!("a", "b", "c")), vec!(2, 4));
assert_eq!(test_query(vec!("a", "b")), vec!(1, 2, 3, 4));
assert_eq!(test_query(vec!("b", "b")), vec!(0, 1));
assert_eq!(test_query(vec!("g", "ewrwer")), vec!());
assert_eq!(test_query(vec!("g", "a")), vec!());
}

}
48 changes: 27 additions & 21 deletions src/query/phrase_query/phrase_scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ use DocId;

pub struct PhraseScorer<'a> {
pub intersection_docset: IntersectionDocSet<SegmentPostings<'a>>,
pub positions_offsets: Vec<u32>,
}


impl<'a> PhraseScorer<'a> {
fn phrase_match(&self) -> bool {
let mut positions_arr: Vec<&[u32]> = self.intersection_docset
Expand All @@ -19,42 +19,48 @@ impl<'a> PhraseScorer<'a> {
posting.positions()
})
.collect();
println!("positions arr {:?}", positions_arr);

let num_postings = positions_arr.len() as u32;

let mut ord = 1u32;
let mut pos_candidate = positions_arr[0][0];
positions_arr[0] = &(positions_arr[0])[1..];
let mut count_matching = 1;

let mut cur = 0;
'outer: loop {
for i in 0..positions_arr.len() {
println!("i {}", i);
let positions: &mut &[u32] = &mut positions_arr[i];
if positions.len() == 0 {
println!("NOPE");
return false;
let target = pos_candidate + ord;
let positions = positions_arr[ord as usize];
for i in 0..positions.len() {
let pos_i = positions[i];
if pos_i < target {
continue;
}
let head_position = positions[0] + self.positions_offsets[i];
println!("cur: {}, head_position {}", cur, head_position);
while head_position < cur {
if positions.len() == 1 {
return false;
if pos_i == target {
count_matching += 1;
if count_matching == num_postings {
return true;
}
*positions = &(*positions)[1..];
}
if head_position != cur {
cur = head_position;
continue 'outer;
else if pos_i > target {
count_matching = 1;
pos_candidate = positions[i] - ord;
positions_arr[ord as usize] = &(positions_arr[ord as usize])[(i+1)..];
}
ord += 1;
if ord == num_postings {
ord = 0;
}
continue 'outer;
}
return true;
return false;
}
}
}

impl<'a> DocSet for PhraseScorer<'a> {
fn advance(&mut self,) -> bool {
while self.intersection_docset.advance() {
println!("doc {}", self.intersection_docset.doc());
if self.phrase_match() {
println!("return {}", self.intersection_docset.doc());
return true;
}
}
Expand Down
6 changes: 4 additions & 2 deletions src/query/phrase_query/phrase_weight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use postings::SegmentPostingsOption;
use core::SegmentReader;
use super::PhraseScorer;
use postings::IntersectionDocSet;
use query::EmptyScorer;
use Result;

pub struct PhraseWeight {
Expand All @@ -27,11 +28,12 @@ impl Weight for PhraseWeight {
if let Some(term_postings) = term_postings_option {
term_postings_list.push(term_postings);
}
else {
return Ok(box EmptyScorer);
}
}
let positions_offsets: Vec<u32> = (0u32..self.phrase_terms.len() as u32).collect();
Ok(box PhraseScorer {
intersection_docset: IntersectionDocSet::from(term_postings_list),
positions_offsets: positions_offsets,
})
}
}
27 changes: 24 additions & 3 deletions src/query/scorer.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use DocSet;
use DocId;
use Score;
use collector::Collector;
use std::ops::{Deref, DerefMut};

Expand All @@ -10,7 +12,7 @@ pub trait Scorer: DocSet {
/// Returns the score.
///
/// This method will perform a bit of computation and is not cached.
fn score(&self,) -> f32;
fn score(&self,) -> Score;

/// Consumes the complete `DocSet` and
/// push the scored documents to the collector.
Expand All @@ -23,7 +25,7 @@ pub trait Scorer: DocSet {


impl<'a> Scorer for Box<Scorer + 'a> {
fn score(&self,) -> f32 {
fn score(&self,) -> Score {
self.deref().score()
}

Expand All @@ -33,4 +35,23 @@ impl<'a> Scorer for Box<Scorer + 'a> {
collector.collect(scorer.doc(), scorer.score());
}
}
}
}


pub struct EmptyScorer;

impl DocSet for EmptyScorer {
fn advance(&mut self,) -> bool {
false
}

fn doc(&self,) -> DocId {
DocId::max_value()
}
}

impl Scorer for EmptyScorer {
fn score(&self,) -> Score {
0f32
}
}

0 comments on commit 9d3c999

Please sign in to comment.