Skip to content

Commit

Permalink
prepare for multiple fastfield codecs (#1063)
Browse files Browse the repository at this point in the history
* prepare for multiple fastfield codecs

 prepare for multiple fastfield codecs by wrapping the codecs in an enum #1042

* add FastFieldSerializer trait, add DynamicFastFieldSerializer

add FastFieldSerializer trait
add DynamicFastFieldSerializer enum to wrap all implementors of the FastFieldSerializer trait

* add estimation for fastfield bitpacker
  • Loading branch information
PSeitz committed May 31, 2021
1 parent 8d32c3b commit dff0ffd
Show file tree
Hide file tree
Showing 19 changed files with 336 additions and 115 deletions.
4 changes: 2 additions & 2 deletions examples/custom_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// ---
// Importing tantivy...
use tantivy::collector::{Collector, SegmentCollector};
use tantivy::fastfield::FastFieldReader;
use tantivy::fastfield::{DynamicFastFieldReader, FastFieldReader};
use tantivy::query::QueryParser;
use tantivy::schema::Field;
use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
Expand Down Expand Up @@ -98,7 +98,7 @@ impl Collector for StatsCollector {
}

struct StatsSegmentCollector {
fast_field_reader: FastFieldReader<u64>,
fast_field_reader: DynamicFastFieldReader<u64>,
stats: Stats,
}

Expand Down
4 changes: 2 additions & 2 deletions src/collector/filter_collector_wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
use std::marker::PhantomData;

use crate::collector::{Collector, SegmentCollector};
use crate::fastfield::{FastFieldReader, FastValue};
use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
use crate::schema::Field;
use crate::{Score, SegmentReader, TantivyError};

Expand Down Expand Up @@ -155,7 +155,7 @@ where
TPredicate: 'static,
TPredicateValue: FastValue,
{
fast_field_reader: FastFieldReader<TPredicateValue>,
fast_field_reader: DynamicFastFieldReader<TPredicateValue>,
segment_collector: TSegmentCollector,
predicate: TPredicate,
t_predicate_value: PhantomData<TPredicateValue>,
Expand Down
4 changes: 2 additions & 2 deletions src/collector/histogram_collector.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::collector::{Collector, SegmentCollector};
use crate::fastfield::{FastFieldReader, FastValue};
use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
use crate::schema::{Field, Type};
use crate::{DocId, Score};
use fastdivide::DividerU64;
Expand Down Expand Up @@ -84,7 +84,7 @@ impl HistogramComputer {
}
pub struct SegmentHistogramCollector {
histogram_computer: HistogramComputer,
ff_reader: FastFieldReader<u64>,
ff_reader: DynamicFastFieldReader<u64>,
}

impl SegmentCollector for SegmentHistogramCollector {
Expand Down
3 changes: 2 additions & 1 deletion src/collector/tests.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::*;
use crate::core::SegmentReader;
use crate::fastfield::BytesFastFieldReader;
use crate::fastfield::DynamicFastFieldReader;
use crate::fastfield::FastFieldReader;
use crate::schema::Field;
use crate::DocId;
Expand Down Expand Up @@ -162,7 +163,7 @@ pub struct FastFieldTestCollector {

pub struct FastFieldSegmentCollector {
vals: Vec<u64>,
reader: FastFieldReader<u64>,
reader: DynamicFastFieldReader<u64>,
}

impl FastFieldTestCollector {
Expand Down
8 changes: 5 additions & 3 deletions src/collector/top_score_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::collector::tweak_score_top_collector::TweakedScoreTopCollector;
use crate::collector::{
CustomScorer, CustomSegmentScorer, ScoreSegmentTweaker, ScoreTweaker, SegmentCollector,
};
use crate::fastfield::FastFieldReader;
use crate::fastfield::{DynamicFastFieldReader, FastFieldReader};
use crate::query::Weight;
use crate::schema::Field;
use crate::DocAddress;
Expand Down Expand Up @@ -129,7 +129,7 @@ impl fmt::Debug for TopDocs {
}

struct ScorerByFastFieldReader {
ff_reader: FastFieldReader<u64>,
ff_reader: DynamicFastFieldReader<u64>,
}

impl CustomSegmentScorer<u64> for ScorerByFastFieldReader {
Expand All @@ -151,7 +151,7 @@ impl CustomScorer<u64> for ScorerByField {
// mapping is monotonic, so it is sufficient to compute our top-K docs.
//
// The conversion will then happen only on the top-K docs.
let ff_reader: FastFieldReader<u64> = segment_reader
let ff_reader = segment_reader
.fast_fields()
.typed_fast_field_reader(self.field)?;
Ok(ScorerByFastFieldReader { ff_reader })
Expand Down Expand Up @@ -401,6 +401,7 @@ impl TopDocs {
/// # use tantivy::query::QueryParser;
/// use tantivy::SegmentReader;
/// use tantivy::collector::TopDocs;
/// use tantivy::fastfield::FastFieldReader;
/// use tantivy::schema::Field;
///
/// fn create_schema() -> Schema {
Expand Down Expand Up @@ -508,6 +509,7 @@ impl TopDocs {
/// use tantivy::SegmentReader;
/// use tantivy::collector::TopDocs;
/// use tantivy::schema::Field;
/// use tantivy::fastfield::FastFieldReader;
///
/// # fn create_schema() -> Schema {
/// # let mut schema_builder = Schema::builder();
Expand Down
8 changes: 4 additions & 4 deletions src/fastfield/bytes/reader.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::directory::FileSlice;
use crate::directory::OwnedBytes;
use crate::fastfield::FastFieldReader;
use crate::fastfield::{BitpackedFastFieldReader, FastFieldReader, MultiValueLength};
use crate::DocId;
use crate::{directory::FileSlice, fastfield::MultiValueLength};

/// Reader for byte array fast fields
///
Expand All @@ -15,13 +15,13 @@ use crate::{directory::FileSlice, fastfield::MultiValueLength};
/// and the start index for the next document, and keeping the bytes in between.
#[derive(Clone)]
pub struct BytesFastFieldReader {
idx_reader: FastFieldReader<u64>,
idx_reader: BitpackedFastFieldReader<u64>,
values: OwnedBytes,
}

impl BytesFastFieldReader {
pub(crate) fn open(
idx_reader: FastFieldReader<u64>,
idx_reader: BitpackedFastFieldReader<u64>,
values_file: FileSlice,
) -> crate::Result<BytesFastFieldReader> {
let values = values_file.read_bytes()?;
Expand Down
7 changes: 5 additions & 2 deletions src/fastfield/bytes/writer.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use std::io;

use crate::fastfield::serializer::FastFieldSerializer;
use crate::schema::{Document, Field, Value};
use crate::DocId;
use crate::{fastfield::serializer::FastFieldSerializer, indexer::doc_id_mapping::DocIdMapping};
use crate::{
fastfield::serializer::CompositeFastFieldSerializer, indexer::doc_id_mapping::DocIdMapping,
};

/// Writer for byte array (as in, any number of bytes per document) fast fields
///
Expand Down Expand Up @@ -104,7 +107,7 @@ impl BytesFastFieldWriter {
/// Serializes the fast field values by pushing them to the `FastFieldSerializer`.
pub fn serialize(
&self,
serializer: &mut FastFieldSerializer,
serializer: &mut CompositeFastFieldSerializer,
doc_id_map: Option<&DocIdMapping>,
) -> io::Result<()> {
// writing the offset index
Expand Down
35 changes: 19 additions & 16 deletions src/fastfield/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@ pub use self::delete::DeleteBitSet;
pub use self::error::{FastFieldNotAvailableError, Result};
pub use self::facet_reader::FacetReader;
pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter};
pub use self::reader::BitpackedFastFieldReader;
pub use self::reader::DynamicFastFieldReader;
pub use self::reader::FastFieldReader;
pub use self::readers::FastFieldReaders;
pub use self::serializer::CompositeFastFieldSerializer;
pub use self::serializer::FastFieldSerializer;
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
use crate::schema::Cardinality;
Expand Down Expand Up @@ -211,7 +214,7 @@ mod tests {
use super::*;
use crate::common::CompositeFile;
use crate::directory::{Directory, RamDirectory, WritePtr};
use crate::fastfield::FastFieldReader;
use crate::fastfield::BitpackedFastFieldReader;
use crate::merge_policy::NoMergePolicy;
use crate::schema::Field;
use crate::schema::Schema;
Expand All @@ -236,7 +239,7 @@ mod tests {

#[test]
pub fn test_fastfield() {
let test_fastfield = FastFieldReader::<u64>::from(vec![100, 200, 300]);
let test_fastfield = BitpackedFastFieldReader::<u64>::from(vec![100, 200, 300]);
assert_eq!(test_fastfield.get(0), 100);
assert_eq!(test_fastfield.get(1), 200);
assert_eq!(test_fastfield.get(2), 300);
Expand All @@ -254,7 +257,7 @@ mod tests {
let directory: RamDirectory = RamDirectory::create();
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
fast_field_writers.add_document(&doc!(*FIELD=>13u64));
fast_field_writers.add_document(&doc!(*FIELD=>14u64));
Expand All @@ -268,7 +271,7 @@ mod tests {
assert_eq!(file.len(), 36 as usize);
let composite_file = CompositeFile::open(&file)?;
let file = composite_file.open_read(*FIELD).unwrap();
let fast_field_reader = FastFieldReader::<u64>::open(file)?;
let fast_field_reader = BitpackedFastFieldReader::<u64>::open(file)?;
assert_eq!(fast_field_reader.get(0), 13u64);
assert_eq!(fast_field_reader.get(1), 14u64);
assert_eq!(fast_field_reader.get(2), 2u64);
Expand All @@ -281,7 +284,7 @@ mod tests {
let directory: RamDirectory = RamDirectory::create();
{
let write: WritePtr = directory.open_write(Path::new("test"))?;
let mut serializer = FastFieldSerializer::from_write(write)?;
let mut serializer = CompositeFastFieldSerializer::from_write(write)?;
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
fast_field_writers.add_document(&doc!(*FIELD=>4u64));
fast_field_writers.add_document(&doc!(*FIELD=>14_082_001u64));
Expand All @@ -300,7 +303,7 @@ mod tests {
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite.open_read(*FIELD).unwrap();
let fast_field_reader = FastFieldReader::<u64>::open(data)?;
let fast_field_reader = BitpackedFastFieldReader::<u64>::open(data)?;
assert_eq!(fast_field_reader.get(0), 4u64);
assert_eq!(fast_field_reader.get(1), 14_082_001u64);
assert_eq!(fast_field_reader.get(2), 3_052u64);
Expand All @@ -321,7 +324,7 @@ mod tests {

{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
for _ in 0..10_000 {
fast_field_writers.add_document(&doc!(*FIELD=>100_000u64));
Expand All @@ -336,7 +339,7 @@ mod tests {
{
let fast_fields_composite = CompositeFile::open(&file).unwrap();
let data = fast_fields_composite.open_read(*FIELD).unwrap();
let fast_field_reader = FastFieldReader::<u64>::open(data)?;
let fast_field_reader = BitpackedFastFieldReader::<u64>::open(data)?;
for doc in 0..10_000 {
assert_eq!(fast_field_reader.get(doc), 100_000u64);
}
Expand All @@ -351,7 +354,7 @@ mod tests {

{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
// forcing the amplitude to be high
fast_field_writers.add_document(&doc!(*FIELD=>0u64));
Expand All @@ -368,7 +371,7 @@ mod tests {
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite.open_read(*FIELD).unwrap();
let fast_field_reader = FastFieldReader::<u64>::open(data)?;
let fast_field_reader = BitpackedFastFieldReader::<u64>::open(data)?;
assert_eq!(fast_field_reader.get(0), 0u64);
for doc in 1..10_001 {
assert_eq!(
Expand All @@ -390,7 +393,7 @@ mod tests {
let schema = schema_builder.build();
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
for i in -100i64..10_000i64 {
let mut doc = Document::default();
Expand All @@ -407,7 +410,7 @@ mod tests {
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite.open_read(i64_field).unwrap();
let fast_field_reader = FastFieldReader::<i64>::open(data)?;
let fast_field_reader = BitpackedFastFieldReader::<i64>::open(data)?;

assert_eq!(fast_field_reader.min_value(), -100i64);
assert_eq!(fast_field_reader.max_value(), 9_999i64);
Expand All @@ -433,7 +436,7 @@ mod tests {

{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
let doc = Document::default();
fast_field_writers.add_document(&doc);
Expand All @@ -447,7 +450,7 @@ mod tests {
{
let fast_fields_composite = CompositeFile::open(&file).unwrap();
let data = fast_fields_composite.open_read(i64_field).unwrap();
let fast_field_reader = FastFieldReader::<i64>::open(data)?;
let fast_field_reader = BitpackedFastFieldReader::<i64>::open(data)?;
assert_eq!(fast_field_reader.get(0u32), 0i64);
}
Ok(())
Expand All @@ -468,7 +471,7 @@ mod tests {
let directory = RamDirectory::create();
{
let write: WritePtr = directory.open_write(Path::new("test"))?;
let mut serializer = FastFieldSerializer::from_write(write)?;
let mut serializer = CompositeFastFieldSerializer::from_write(write)?;
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
for &x in &permutation {
fast_field_writers.add_document(&doc!(*FIELD=>x));
Expand All @@ -480,7 +483,7 @@ mod tests {
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite.open_read(*FIELD).unwrap();
let fast_field_reader = FastFieldReader::<u64>::open(data)?;
let fast_field_reader = BitpackedFastFieldReader::<u64>::open(data)?;

let mut a = 0u64;
for _ in 0..n {
Expand Down
10 changes: 5 additions & 5 deletions src/fastfield/multivalued/reader.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::ops::Range;

use crate::fastfield::{FastFieldReader, FastValue, MultiValueLength};
use crate::fastfield::{BitpackedFastFieldReader, FastFieldReader, FastValue, MultiValueLength};
use crate::DocId;

/// Reader for a multivalued `u64` fast field.
Expand All @@ -13,14 +13,14 @@ use crate::DocId;
///
#[derive(Clone)]
pub struct MultiValuedFastFieldReader<Item: FastValue> {
idx_reader: FastFieldReader<u64>,
vals_reader: FastFieldReader<Item>,
idx_reader: BitpackedFastFieldReader<u64>,
vals_reader: BitpackedFastFieldReader<Item>,
}

impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
pub(crate) fn open(
idx_reader: FastFieldReader<u64>,
vals_reader: FastFieldReader<Item>,
idx_reader: BitpackedFastFieldReader<u64>,
vals_reader: BitpackedFastFieldReader<Item>,
) -> MultiValuedFastFieldReader<Item> {
MultiValuedFastFieldReader {
idx_reader,
Expand Down
9 changes: 5 additions & 4 deletions src/fastfield/multivalued/writer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::fastfield::serializer::FastSingleFieldSerializer;
use crate::fastfield::FastFieldSerializer;
use crate::fastfield::serializer::DynamicFastFieldSerializer;
use crate::fastfield::serializer::FastFieldSerializer;
use crate::fastfield::CompositeFastFieldSerializer;
use crate::postings::UnorderedTermId;
use crate::schema::{Document, Field};
use crate::termdict::TermOrdinal;
Expand Down Expand Up @@ -134,7 +135,7 @@ impl MultiValuedFastFieldWriter {
///
pub fn serialize(
&self,
serializer: &mut FastFieldSerializer,
serializer: &mut CompositeFastFieldSerializer,
mapping_opt: Option<&FnvHashMap<UnorderedTermId, TermOrdinal>>,
doc_id_map: Option<&DocIdMapping>,
) -> io::Result<()> {
Expand All @@ -154,7 +155,7 @@ impl MultiValuedFastFieldWriter {
}
{
// writing the values themselves.
let mut value_serializer: FastSingleFieldSerializer<'_, _>;
let mut value_serializer: DynamicFastFieldSerializer<'_, _>;
match mapping_opt {
Some(mapping) => {
value_serializer = serializer.new_u64_fast_field_with_idx(
Expand Down

0 comments on commit dff0ffd

Please sign in to comment.