Skip to content

Commit

Permalink
return CompactDocValue instead of trait (#2410)
Browse files Browse the repository at this point in the history
The CompactDocValue is easier to handle than the trait in some cases like comparison
and conversion
  • Loading branch information
PSeitz committed May 27, 2024
1 parent b806122 commit 2e3641c
Show file tree
Hide file tree
Showing 10 changed files with 137 additions and 91 deletions.
3 changes: 2 additions & 1 deletion examples/date_time_field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::{DateOptions, Document, Schema, INDEXED, STORED, STRING};
use tantivy::schema::{DateOptions, Document, Schema, Value, INDEXED, STORED, STRING};
use tantivy::{Index, IndexWriter, TantivyDocument};

fn main() -> tantivy::Result<()> {
Expand Down Expand Up @@ -64,6 +64,7 @@ fn main() -> tantivy::Result<()> {
assert!(retrieved_doc
.get_first(occurred_at)
.unwrap()
.as_value()
.as_datetime()
.is_some(),);
assert_eq!(
Expand Down
6 changes: 4 additions & 2 deletions src/fastfield/facet_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ impl FacetReader {

#[cfg(test)]
mod tests {
use crate::schema::{Facet, FacetOptions, SchemaBuilder, STORED};
use crate::schema::{Facet, FacetOptions, SchemaBuilder, Value, STORED};
use crate::{DocAddress, Index, IndexWriter, TantivyDocument};

#[test]
Expand All @@ -88,7 +88,9 @@ mod tests {
let doc = searcher
.doc::<TantivyDocument>(DocAddress::new(0u32, 0u32))
.unwrap();
let value = doc.get_first(facet_field).and_then(|v| v.as_facet());
let value = doc
.get_first(facet_field)
.and_then(|v| v.as_value().as_facet());
assert_eq!(value, None);
}

Expand Down
10 changes: 8 additions & 2 deletions src/indexer/index_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -816,7 +816,7 @@ mod tests {
use crate::query::{BooleanQuery, Occur, Query, QueryParser, TermQuery};
use crate::schema::{
self, Facet, FacetOptions, IndexRecordOption, IpAddrOptions, NumericOptions, Schema,
TextFieldIndexing, TextOptions, FAST, INDEXED, STORED, STRING, TEXT,
TextFieldIndexing, TextOptions, Value, FAST, INDEXED, STORED, STRING, TEXT,
};
use crate::store::DOCSTORE_CACHE_CAPACITY;
use crate::{
Expand Down Expand Up @@ -1979,7 +1979,13 @@ mod tests {
.unwrap();
// test store iterator
for doc in store_reader.iter::<TantivyDocument>(segment_reader.alive_bitset()) {
let id = doc.unwrap().get_first(id_field).unwrap().as_u64().unwrap();
let id = doc
.unwrap()
.get_first(id_field)
.unwrap()
.as_value()
.as_u64()
.unwrap();
assert!(expected_ids_and_num_occurrences.contains_key(&id));
}
// test store random access
Expand Down
17 changes: 13 additions & 4 deletions src/indexer/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -797,7 +797,7 @@ mod tests {
use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
use crate::schema::{
Facet, FacetOptions, IndexRecordOption, NumericOptions, TantivyDocument, Term,
TextFieldIndexing, INDEXED, TEXT,
TextFieldIndexing, Value, INDEXED, TEXT,
};
use crate::time::OffsetDateTime;
use crate::{
Expand Down Expand Up @@ -909,15 +909,24 @@ mod tests {
}
{
let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0, 0))?;
assert_eq!(doc.get_first(text_field).unwrap().as_str(), Some("af b"));
assert_eq!(
doc.get_first(text_field).unwrap().as_value().as_str(),
Some("af b")
);
}
{
let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0, 1))?;
assert_eq!(doc.get_first(text_field).unwrap().as_str(), Some("a b c"));
assert_eq!(
doc.get_first(text_field).unwrap().as_value().as_str(),
Some("a b c")
);
}
{
let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0, 2))?;
assert_eq!(doc.get_first(text_field).unwrap().as_str(), Some("a b c d"));
assert_eq!(
doc.get_first(text_field).unwrap().as_value().as_str(),
Some("a b c d")
);
}
{
let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0, 3))?;
Expand Down
9 changes: 6 additions & 3 deletions src/indexer/merger_sorted_index_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mod tests {
use crate::query::QueryParser;
use crate::schema::{
self, BytesOptions, Facet, FacetOptions, IndexRecordOption, NumericOptions,
TextFieldIndexing, TextOptions,
TextFieldIndexing, TextOptions, Value,
};
use crate::{
DocAddress, DocSet, IndexSettings, IndexSortByField, IndexWriter, Order, TantivyDocument,
Expand Down Expand Up @@ -280,13 +280,16 @@ mod tests {
.doc::<TantivyDocument>(DocAddress::new(0, blubber_pos))
.unwrap();
assert_eq!(
doc.get_first(my_text_field).unwrap().as_str(),
doc.get_first(my_text_field).unwrap().as_value().as_str(),
Some("blubber")
);
let doc = searcher
.doc::<TantivyDocument>(DocAddress::new(0, 0))
.unwrap();
assert_eq!(doc.get_first(int_field).unwrap().as_u64(), Some(1000));
assert_eq!(
doc.get_first(int_field).unwrap().as_value().as_u64(),
Some(1000)
);
}
}

Expand Down
11 changes: 7 additions & 4 deletions src/indexer/segment_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -500,8 +500,8 @@ mod tests {
use crate::postings::{Postings, TermInfo};
use crate::query::{PhraseQuery, QueryParser};
use crate::schema::{
Document, IndexRecordOption, OwnedValue, Schema, TextFieldIndexing, TextOptions, STORED,
STRING, TEXT,
Document, IndexRecordOption, OwnedValue, Schema, TextFieldIndexing, TextOptions, Value,
STORED, STRING, TEXT,
};
use crate::store::{Compressor, StoreReader, StoreWriter};
use crate::time::format_description::well_known::Rfc3339;
Expand Down Expand Up @@ -555,9 +555,12 @@ mod tests {
let doc = reader.get::<TantivyDocument>(0).unwrap();

assert_eq!(doc.field_values().count(), 2);
assert_eq!(doc.get_all(text_field).next().unwrap().as_str(), Some("A"));
assert_eq!(
doc.get_all(text_field).nth(1).unwrap().as_str(),
doc.get_all(text_field).next().unwrap().as_value().as_str(),
Some("A")
);
assert_eq!(
doc.get_all(text_field).nth(1).unwrap().as_value().as_str(),
Some("title")
);
}
Expand Down
160 changes: 90 additions & 70 deletions src/schema/document/default_document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,29 +157,24 @@ impl CompactDoc {
}

/// field_values accessor
pub fn field_values(
&self,
) -> impl Iterator<Item = (Field, ReferenceValue<'_, CompactDocValue<'_>>)> {
pub fn field_values(&self) -> impl Iterator<Item = (Field, CompactDocValue<'_>)> {
self.field_values.iter().map(|field_val| {
let field = Field::from_field_id(field_val.field as u32);
let val = self.extract_value(field_val.value_addr).unwrap();
let val = self.get_compact_doc_value(field_val.value_addr);
(field, val)
})
}

/// Returns all of the `ReferenceValue`s associated the given field
pub fn get_all(
&self,
field: Field,
) -> impl Iterator<Item = ReferenceValue<'_, CompactDocValue<'_>>> + '_ {
pub fn get_all(&self, field: Field) -> impl Iterator<Item = CompactDocValue<'_>> + '_ {
self.field_values
.iter()
.filter(move |field_value| Field::from_field_id(field_value.field as u32) == field)
.map(|val| self.extract_value(val.value_addr).unwrap())
.map(|val| self.get_compact_doc_value(val.value_addr))
}

/// Returns the first `ReferenceValue` associated the given field
pub fn get_first(&self, field: Field) -> Option<ReferenceValue<'_, CompactDocValue<'_>>> {
pub fn get_first(&self, field: Field) -> Option<CompactDocValue<'_>> {
self.get_all(field).next()
}

Expand Down Expand Up @@ -299,58 +294,11 @@ impl CompactDoc {
}
}

fn extract_value(
&self,
ref_value: ValueAddr,
) -> io::Result<ReferenceValue<'_, CompactDocValue<'_>>> {
match ref_value.type_id {
ValueType::Null => Ok(ReferenceValueLeaf::Null.into()),
ValueType::Str => {
let str_ref = self.extract_str(ref_value.val_addr);
Ok(ReferenceValueLeaf::Str(str_ref).into())
}
ValueType::Facet => {
let str_ref = self.extract_str(ref_value.val_addr);
Ok(ReferenceValueLeaf::Facet(str_ref).into())
}
ValueType::Bytes => {
let data = self.extract_bytes(ref_value.val_addr);
Ok(ReferenceValueLeaf::Bytes(data).into())
}
ValueType::U64 => self
.read_from::<u64>(ref_value.val_addr)
.map(ReferenceValueLeaf::U64)
.map(Into::into),
ValueType::I64 => self
.read_from::<i64>(ref_value.val_addr)
.map(ReferenceValueLeaf::I64)
.map(Into::into),
ValueType::F64 => self
.read_from::<f64>(ref_value.val_addr)
.map(ReferenceValueLeaf::F64)
.map(Into::into),
ValueType::Bool => Ok(ReferenceValueLeaf::Bool(ref_value.val_addr != 0).into()),
ValueType::Date => self
.read_from::<i64>(ref_value.val_addr)
.map(|ts| ReferenceValueLeaf::Date(DateTime::from_timestamp_nanos(ts)))
.map(Into::into),
ValueType::IpAddr => self
.read_from::<u128>(ref_value.val_addr)
.map(|num| ReferenceValueLeaf::IpAddr(Ipv6Addr::from_u128(num)))
.map(Into::into),
ValueType::PreTokStr => self
.read_from::<PreTokenizedString>(ref_value.val_addr)
.map(Into::into)
.map(ReferenceValueLeaf::PreTokStr)
.map(Into::into),
ValueType::Object => Ok(ReferenceValue::Object(CompactDocObjectIter::new(
self,
ref_value.val_addr,
)?)),
ValueType::Array => Ok(ReferenceValue::Array(CompactDocArrayIter::new(
self,
ref_value.val_addr,
)?)),
/// Get CompactDocValue for address
fn get_compact_doc_value(&self, value_addr: ValueAddr) -> CompactDocValue<'_> {
CompactDocValue {
container: self,
value_addr,
}
}

Expand Down Expand Up @@ -410,7 +358,7 @@ impl PartialEq for CompactDoc {
let convert_to_comparable_map = |doc: &CompactDoc| {
let mut field_value_set: HashMap<Field, HashSet<String>> = Default::default();
for field_value in doc.field_values.iter() {
let value: OwnedValue = doc.extract_value(field_value.value_addr).unwrap().into();
let value: OwnedValue = doc.get_compact_doc_value(field_value.value_addr).into();
let value = serde_json::to_string(&value).unwrap();
field_value_set
.entry(Field::from_field_id(field_value.field as u32))
Expand Down Expand Up @@ -444,15 +392,87 @@ impl DocumentDeserialize for CompactDoc {
#[derive(Debug, Clone, Copy)]
pub struct CompactDocValue<'a> {
container: &'a CompactDoc,
value: ValueAddr,
value_addr: ValueAddr,
}
impl PartialEq for CompactDocValue<'_> {
fn eq(&self, other: &Self) -> bool {
let value1: OwnedValue = (*self).into();
let value2: OwnedValue = (*other).into();
value1 == value2
}
}
impl<'a> From<CompactDocValue<'a>> for OwnedValue {
fn from(value: CompactDocValue) -> Self {
value.as_value().into()
}
}
impl<'a> Value<'a> for CompactDocValue<'a> {
type ArrayIter = CompactDocArrayIter<'a>;

type ObjectIter = CompactDocObjectIter<'a>;

fn as_value(&self) -> ReferenceValue<'a, Self> {
self.container.extract_value(self.value).unwrap()
self.get_ref_value().unwrap()
}
}
impl<'a> CompactDocValue<'a> {
fn get_ref_value(&self) -> io::Result<ReferenceValue<'a, CompactDocValue<'a>>> {
let addr = self.value_addr.val_addr;
match self.value_addr.type_id {
ValueType::Null => Ok(ReferenceValueLeaf::Null.into()),
ValueType::Str => {
let str_ref = self.container.extract_str(addr);
Ok(ReferenceValueLeaf::Str(str_ref).into())
}
ValueType::Facet => {
let str_ref = self.container.extract_str(addr);
Ok(ReferenceValueLeaf::Facet(str_ref).into())
}
ValueType::Bytes => {
let data = self.container.extract_bytes(addr);
Ok(ReferenceValueLeaf::Bytes(data).into())
}
ValueType::U64 => self
.container
.read_from::<u64>(addr)
.map(ReferenceValueLeaf::U64)
.map(Into::into),
ValueType::I64 => self
.container
.read_from::<i64>(addr)
.map(ReferenceValueLeaf::I64)
.map(Into::into),
ValueType::F64 => self
.container
.read_from::<f64>(addr)
.map(ReferenceValueLeaf::F64)
.map(Into::into),
ValueType::Bool => Ok(ReferenceValueLeaf::Bool(addr != 0).into()),
ValueType::Date => self
.container
.read_from::<i64>(addr)
.map(|ts| ReferenceValueLeaf::Date(DateTime::from_timestamp_nanos(ts)))
.map(Into::into),
ValueType::IpAddr => self
.container
.read_from::<u128>(addr)
.map(|num| ReferenceValueLeaf::IpAddr(Ipv6Addr::from_u128(num)))
.map(Into::into),
ValueType::PreTokStr => self
.container
.read_from::<PreTokenizedString>(addr)
.map(Into::into)
.map(ReferenceValueLeaf::PreTokStr)
.map(Into::into),
ValueType::Object => Ok(ReferenceValue::Object(CompactDocObjectIter::new(
self.container,
addr,
)?)),
ValueType::Array => Ok(ReferenceValue::Array(CompactDocArrayIter::new(
self.container,
addr,
)?)),
}
}
}

Expand Down Expand Up @@ -601,9 +621,9 @@ impl<'a> Iterator for CompactDocObjectIter<'a> {
let value = ValueAddr::deserialize(&mut self.node_addresses_slice).ok()?;
let value = CompactDocValue {
container: self.container,
value,
value_addr: value,
};
return Some((key, value));
Some((key, value))
}
}

Expand Down Expand Up @@ -635,9 +655,9 @@ impl<'a> Iterator for CompactDocArrayIter<'a> {
let value = ValueAddr::deserialize(&mut self.node_addresses_slice).ok()?;
let value = CompactDocValue {
container: self.container,
value,
value_addr: value,
};
return Some(value);
Some(value)
}
}

Expand Down Expand Up @@ -668,7 +688,7 @@ impl<'a> Iterator for FieldValueIterRef<'a> {
Field::from_field_id(field_value.field as u32),
CompactDocValue::<'a> {
container: self.container,
value: field_value.value_addr,
value_addr: field_value.value_addr,
},
)
})
Expand Down
4 changes: 2 additions & 2 deletions src/schema/field_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -659,9 +659,9 @@ mod tests {
let schema = schema_builder.build();
let doc_json = r#"{"date": "2019-10-12T07:20:50.52+02:00"}"#;
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
let date = doc.get_first(date_field).unwrap();
let date = OwnedValue::from(doc.get_first(date_field).unwrap());
// Time zone is converted to UTC
assert_eq!("Leaf(Date(2019-10-12T05:20:50.52Z))", format!("{date:?}"));
assert_eq!("Date(2019-10-12T05:20:50.52Z)", format!("{date:?}"));
}

#[test]
Expand Down
Loading

0 comments on commit 2e3641c

Please sign in to comment.