Checking the type of range queries

quickwit-oss · Apr 16, 2018 · e65f716 · e65f716
1 parent 8083bc6
commit e65f716
Show file tree

Hide file tree

Showing 8 changed files with 84 additions and 24 deletions.
diff --git a/src/compression/mod.rs b/src/compression/mod.rs
@@ -271,10 +271,24 @@ mod bench {
     use test::Bencher;
     use tests;
 
+
+    fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
+        let seed: &[u32; 4] = &[1, 2, 3, seed_val];
+        let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
+        (0..u32::max_value())
+            .filter(|_| rng.next_f32() < ratio)
+            .take(n)
+            .collect()
+    }
+
+    pub fn generate_array(n: usize, ratio: f32) -> Vec<u32> {
+        generate_array_with_seed(n, ratio, 4)
+    }
+
     #[bench]
     fn bench_compress(b: &mut Bencher) {
         let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
+        let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
         b.iter(|| {
             encoder.compress_block_sorted(&data, 0u32);
         });
@@ -283,7 +297,7 @@ mod bench {
     #[bench]
     fn bench_uncompress(b: &mut Bencher) {
         let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
+        let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
         let compressed = encoder.compress_block_sorted(&data, 0u32);
         let mut decoder = BlockDecoder::new();
         b.iter(|| {
@@ -310,7 +324,7 @@ mod bench {
     #[bench]
     fn bench_compress_vint(b: &mut Bencher) {
         let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001);
+        let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
         b.iter(|| {
             encoder.compress_vint_sorted(&data, 0u32);
         });
@@ -319,7 +333,7 @@ mod bench {
     #[bench]
     fn bench_uncompress_vint(b: &mut Bencher) {
         let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001);
+        let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
         let compressed = encoder.compress_vint_sorted(&data, 0u32);
         let mut decoder = BlockDecoder::new();
         b.iter(|| {

diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
@@ -76,6 +76,11 @@ impl SegmentReader {
         self.segment_meta.num_docs()
     }
 
+    /// Returns the schema of the index this segment belongs to.
+    pub fn schema(&self) -> &Schema {
+        &self.schema
+    }
+
     /// Return the number of documents that have been
     /// deleted in the segment.
     pub fn num_deleted_docs(&self) -> DocId {

diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs
@@ -361,8 +361,9 @@ impl SegmentUpdater {
         let committed_merge_candidates = merge_policy.compute_merge_candidates(&committed_segments);
         merge_candidates.extend_from_slice(&committed_merge_candidates[..]);
         for MergeCandidate(segment_metas) in merge_candidates {
-            // TODO what do we do with the future here
-            self.start_merge(&segment_metas);
+            if let Err(e) = self.start_merge(&segment_metas).fuse().poll() {
+                error!("The merge task failed quickly after starting: {:?}", e);
+            }
         }
     }
 

diff --git a/src/indexer/stamper.rs b/src/indexer/stamper.rs
@@ -23,8 +23,6 @@ mod archicture_impl {
 }
 
 
-
-
 #[cfg(not(target="x86_64"))]
 mod archicture_impl {
 

diff --git a/src/lib.rs b/src/lib.rs
@@ -293,15 +293,6 @@ mod tests {
     pub fn nearly_equals(a: f32, b: f32) -> bool {
         (a - b).abs() < 0.0005 * (a + b).abs()
     }
-
-    fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
-        let seed: &[u32; 4] = &[1, 2, 3, seed_val];
-        let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
-        (0..u32::max_value())
-            .filter(|_| rng.next_f32() < ratio)
-            .take(n)
-            .collect()
-    }
 
     pub fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
         let seed: &[u32; 4] = &[1, 2, 3, 4];
@@ -312,10 +303,6 @@ mod tests {
             .collect::<Vec<u32>>()
     }
 
-    pub fn generate_array(n: usize, ratio: f32) -> Vec<u32> {
-        generate_array_with_seed(n, ratio, 4)
-    }
-
     pub fn sample_with_seed(n: u32, ratio: f32, seed_val: u32) -> Vec<u32> {
         let seed: &[u32; 4] = &[1, 2, 3, seed_val];
         let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);

diff --git a/src/query/range_query.rs b/src/query/range_query.rs
@@ -8,6 +8,7 @@ use core::Searcher;
 use query::BitSetDocSet;
 use query::ConstScorer;
 use std::ops::Range;
+use schema::Type;
 use std::collections::Bound;
 
 fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
@@ -81,13 +82,17 @@ fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
 #[derive(Debug)]
 pub struct RangeQuery {
     field: Field,
+    value_type: Type,
     left_bound: Bound<Vec<u8>>,
     right_bound: Bound<Vec<u8>>,
 }
 
 impl RangeQuery {
 
-
+    /// Creates a new `RangeQuery` over a `i64` field.
+    ///
+    /// If the field is not of the type `i64`, tantivy
+    /// will panic when the `Weight` object is created.
     pub fn new_i64(
         field: Field,
         range: Range<i64>
@@ -99,6 +104,9 @@ impl RangeQuery {
     ///
     /// The two `Bound` arguments make it possible to create more complex
     /// ranges than semi-inclusive range.
+    ///
+    /// If the field is not of the type `i64`, tantivy
+    /// will panic when the `Weight` object is created.
     pub fn new_i64_bounds(
         field: Field,
         left_bound: Bound<i64>,
@@ -107,6 +115,7 @@ impl RangeQuery {
         let make_term_val = |val: i64| Term::from_field_i64(field, val).value_bytes().to_owned();
         RangeQuery {
             field,
+            value_type: Type::I64,
             left_bound: map_bound(left_bound, &make_term_val),
             right_bound: map_bound(right_bound, &make_term_val),
         }
@@ -116,6 +125,9 @@ impl RangeQuery {
     ///
     /// The two `Bound` arguments make it possible to create more complex
     /// ranges than semi-inclusive range.
+    ///
+    /// If the field is not of the type `u64`, tantivy
+    /// will panic when the `Weight` object is created.
     pub fn new_u64_bounds(
         field: Field,
         left_bound: Bound<u64>,
@@ -124,12 +136,16 @@ impl RangeQuery {
         let make_term_val = |val: u64| Term::from_field_u64(field, val).value_bytes().to_owned();
         RangeQuery {
             field,
+            value_type: Type::U64,
             left_bound: map_bound(left_bound, &make_term_val),
             right_bound: map_bound(right_bound, &make_term_val),
         }
     }
 
     /// Create a new `RangeQuery` over a `u64` field.
+    ///
+    /// If the field is not of the type `u64`, tantivy
+    /// will panic when the `Weight` object is created.
     pub fn new_u64(
         field: Field,
         range: Range<u64>
@@ -141,6 +157,9 @@ impl RangeQuery {
     ///
     /// The two `Bound` arguments make it possible to create more complex
     /// ranges than semi-inclusive range.
+    ///
+    /// If the field is not of the type `Str`, tantivy
+    /// will panic when the `Weight` object is created.
     pub fn new_str_bounds<'b>(
         field: Field,
         left: Bound<&'b str>,
@@ -149,12 +168,16 @@ impl RangeQuery {
         let make_term_val = |val: &str| val.as_bytes().to_vec();
         RangeQuery {
             field,
+            value_type: Type::Str,
             left_bound: map_bound(left, &make_term_val),
             right_bound: map_bound(right, &make_term_val),
         }
     }
 
     /// Create a new `RangeQuery` over a `Str` field.
+    ///
+    /// If the field is not of the type `Str`, tantivy
+    /// will panic when the `Weight` object is created.
     pub fn new_str<'b>(
         field: Field,
         range: Range<&'b str>
@@ -164,7 +187,12 @@ impl RangeQuery {
 }
 
 impl Query for RangeQuery {
-    fn weight(&self, _searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<Weight>> {
+    fn weight(&self, searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<Weight>> {
+        if let Some(first_segment_reader) = searcher.segment_readers().iter().next() {
+            let value_type = first_segment_reader.schema().get_field_entry(self.field).field_type().value_type();
+            assert_eq!(value_type, self.value_type, "Create a range query of the type {:?}, when the field given was of \
+            type {:?}", self.value_type, value_type);
+        }
         Ok(Box::new(RangeWeight {
             field: self.field,
             left_bound: self.left_bound.clone(),

diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs
@@ -17,6 +17,18 @@ pub enum ValueParsingError {
     TypeError(String),
 }
 
+/// Type of the value that a field can take.
+///
+/// Contrary to FieldType, this does
+/// not include the way the field must be indexed.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum Type {
+    Str,
+    U64,
+    I64,
+    HierarchicalFacet
+}
+
 /// A `FieldType` describes the type (text, u64) of a field as well as
 /// how it should be handled by tantivy.
 #[derive(Clone, Debug, Eq, PartialEq)]
@@ -32,6 +44,21 @@ pub enum FieldType {
 }
 
 impl FieldType {
+
+    /// Returns the value type associated for this field.
+    pub fn value_type(&self) -> Type {
+        match *self {
+            FieldType::Str(_) =>
+                Type::Str,
+            FieldType::U64(_) =>
+                Type::U64,
+            FieldType::I64(_) =>
+                Type::I64,
+            FieldType::HierarchicalFacet =>
+                Type::HierarchicalFacet,
+        }
+    }
+
     /// returns true iff the field is indexed.
     pub fn is_indexed(&self) -> bool {
         match *self {

diff --git a/src/schema/mod.rs b/src/schema/mod.rs
@@ -128,7 +128,7 @@ pub use self::document::Document;
 pub use self::field::Field;
 pub use self::term::Term;
 
-pub use self::field_type::FieldType;
+pub use self::field_type::{Type, FieldType};
 pub use self::field_entry::FieldEntry;
 pub use self::field_value::FieldValue;