Skip to content

Commit

Permalink
Checking the type of range queries
Browse files Browse the repository at this point in the history
  • Loading branch information
pmasurel committed Apr 16, 2018
1 parent 8083bc6 commit e65f716
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 24 deletions.
22 changes: 18 additions & 4 deletions src/compression/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,24 @@ mod bench {
use test::Bencher;
use tests;


fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
let seed: &[u32; 4] = &[1, 2, 3, seed_val];
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
(0..u32::max_value())
.filter(|_| rng.next_f32() < ratio)
.take(n)
.collect()
}

pub fn generate_array(n: usize, ratio: f32) -> Vec<u32> {
generate_array_with_seed(n, ratio, 4)
}

#[bench]
fn bench_compress(b: &mut Bencher) {
let mut encoder = BlockEncoder::new();
let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
b.iter(|| {
encoder.compress_block_sorted(&data, 0u32);
});
Expand All @@ -283,7 +297,7 @@ mod bench {
#[bench]
fn bench_uncompress(b: &mut Bencher) {
let mut encoder = BlockEncoder::new();
let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
let compressed = encoder.compress_block_sorted(&data, 0u32);
let mut decoder = BlockDecoder::new();
b.iter(|| {
Expand All @@ -310,7 +324,7 @@ mod bench {
#[bench]
fn bench_compress_vint(b: &mut Bencher) {
let mut encoder = BlockEncoder::new();
let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001);
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
b.iter(|| {
encoder.compress_vint_sorted(&data, 0u32);
});
Expand All @@ -319,7 +333,7 @@ mod bench {
#[bench]
fn bench_uncompress_vint(b: &mut Bencher) {
let mut encoder = BlockEncoder::new();
let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001);
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
let compressed = encoder.compress_vint_sorted(&data, 0u32);
let mut decoder = BlockDecoder::new();
b.iter(|| {
Expand Down
5 changes: 5 additions & 0 deletions src/core/segment_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ impl SegmentReader {
self.segment_meta.num_docs()
}

/// Returns the schema of the index this segment belongs to.
pub fn schema(&self) -> &Schema {
&self.schema
}

/// Return the number of documents that have been
/// deleted in the segment.
pub fn num_deleted_docs(&self) -> DocId {
Expand Down
5 changes: 3 additions & 2 deletions src/indexer/segment_updater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,9 @@ impl SegmentUpdater {
let committed_merge_candidates = merge_policy.compute_merge_candidates(&committed_segments);
merge_candidates.extend_from_slice(&committed_merge_candidates[..]);
for MergeCandidate(segment_metas) in merge_candidates {
// TODO what do we do with the future here
self.start_merge(&segment_metas);
if let Err(e) = self.start_merge(&segment_metas).fuse().poll() {
error!("The merge task failed quickly after starting: {:?}", e);
}
}
}

Expand Down
2 changes: 0 additions & 2 deletions src/indexer/stamper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ mod archicture_impl {
}




#[cfg(not(target="x86_64"))]
mod archicture_impl {

Expand Down
13 changes: 0 additions & 13 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,15 +293,6 @@ mod tests {
pub fn nearly_equals(a: f32, b: f32) -> bool {
(a - b).abs() < 0.0005 * (a + b).abs()
}

fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
let seed: &[u32; 4] = &[1, 2, 3, seed_val];
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
(0..u32::max_value())
.filter(|_| rng.next_f32() < ratio)
.take(n)
.collect()
}

pub fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
let seed: &[u32; 4] = &[1, 2, 3, 4];
Expand All @@ -312,10 +303,6 @@ mod tests {
.collect::<Vec<u32>>()
}

pub fn generate_array(n: usize, ratio: f32) -> Vec<u32> {
generate_array_with_seed(n, ratio, 4)
}

pub fn sample_with_seed(n: u32, ratio: f32, seed_val: u32) -> Vec<u32> {
let seed: &[u32; 4] = &[1, 2, 3, seed_val];
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
Expand Down
32 changes: 30 additions & 2 deletions src/query/range_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use core::Searcher;
use query::BitSetDocSet;
use query::ConstScorer;
use std::ops::Range;
use schema::Type;
use std::collections::Bound;

fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
Expand Down Expand Up @@ -81,13 +82,17 @@ fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
#[derive(Debug)]
pub struct RangeQuery {
field: Field,
value_type: Type,
left_bound: Bound<Vec<u8>>,
right_bound: Bound<Vec<u8>>,
}

impl RangeQuery {


/// Creates a new `RangeQuery` over a `i64` field.
///
/// If the field is not of the type `i64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_i64(
field: Field,
range: Range<i64>
Expand All @@ -99,6 +104,9 @@ impl RangeQuery {
///
/// The two `Bound` arguments make it possible to create more complex
/// ranges than semi-inclusive range.
///
/// If the field is not of the type `i64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_i64_bounds(
field: Field,
left_bound: Bound<i64>,
Expand All @@ -107,6 +115,7 @@ impl RangeQuery {
let make_term_val = |val: i64| Term::from_field_i64(field, val).value_bytes().to_owned();
RangeQuery {
field,
value_type: Type::I64,
left_bound: map_bound(left_bound, &make_term_val),
right_bound: map_bound(right_bound, &make_term_val),
}
Expand All @@ -116,6 +125,9 @@ impl RangeQuery {
///
/// The two `Bound` arguments make it possible to create more complex
/// ranges than semi-inclusive range.
///
/// If the field is not of the type `u64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_u64_bounds(
field: Field,
left_bound: Bound<u64>,
Expand All @@ -124,12 +136,16 @@ impl RangeQuery {
let make_term_val = |val: u64| Term::from_field_u64(field, val).value_bytes().to_owned();
RangeQuery {
field,
value_type: Type::U64,
left_bound: map_bound(left_bound, &make_term_val),
right_bound: map_bound(right_bound, &make_term_val),
}
}

/// Create a new `RangeQuery` over a `u64` field.
///
/// If the field is not of the type `u64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_u64(
field: Field,
range: Range<u64>
Expand All @@ -141,6 +157,9 @@ impl RangeQuery {
///
/// The two `Bound` arguments make it possible to create more complex
/// ranges than semi-inclusive range.
///
/// If the field is not of the type `Str`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_str_bounds<'b>(
field: Field,
left: Bound<&'b str>,
Expand All @@ -149,12 +168,16 @@ impl RangeQuery {
let make_term_val = |val: &str| val.as_bytes().to_vec();
RangeQuery {
field,
value_type: Type::Str,
left_bound: map_bound(left, &make_term_val),
right_bound: map_bound(right, &make_term_val),
}
}

/// Create a new `RangeQuery` over a `Str` field.
///
/// If the field is not of the type `Str`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_str<'b>(
field: Field,
range: Range<&'b str>
Expand All @@ -164,7 +187,12 @@ impl RangeQuery {
}

impl Query for RangeQuery {
fn weight(&self, _searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<Weight>> {
fn weight(&self, searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<Weight>> {
if let Some(first_segment_reader) = searcher.segment_readers().iter().next() {
let value_type = first_segment_reader.schema().get_field_entry(self.field).field_type().value_type();
assert_eq!(value_type, self.value_type, "Create a range query of the type {:?}, when the field given was of \
type {:?}", self.value_type, value_type);
}
Ok(Box::new(RangeWeight {
field: self.field,
left_bound: self.left_bound.clone(),
Expand Down
27 changes: 27 additions & 0 deletions src/schema/field_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ pub enum ValueParsingError {
TypeError(String),
}

/// Type of the value that a field can take.
///
/// Contrary to FieldType, this does
/// not include the way the field must be indexed.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum Type {
Str,
U64,
I64,
HierarchicalFacet
}

/// A `FieldType` describes the type (text, u64) of a field as well as
/// how it should be handled by tantivy.
#[derive(Clone, Debug, Eq, PartialEq)]
Expand All @@ -32,6 +44,21 @@ pub enum FieldType {
}

impl FieldType {

/// Returns the value type associated for this field.
pub fn value_type(&self) -> Type {
match *self {
FieldType::Str(_) =>
Type::Str,
FieldType::U64(_) =>
Type::U64,
FieldType::I64(_) =>
Type::I64,
FieldType::HierarchicalFacet =>
Type::HierarchicalFacet,
}
}

/// returns true iff the field is indexed.
pub fn is_indexed(&self) -> bool {
match *self {
Expand Down
2 changes: 1 addition & 1 deletion src/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ pub use self::document::Document;
pub use self::field::Field;
pub use self::term::Term;

pub use self::field_type::FieldType;
pub use self::field_type::{Type, FieldType};
pub use self::field_entry::FieldEntry;
pub use self::field_value::FieldValue;

Expand Down

0 comments on commit e65f716

Please sign in to comment.