Skip to content

Commit

Permalink
Merge pull request #1570 from quickwit-oss/no_sort_on_multi
Browse files Browse the repository at this point in the history
validate index settings on create
  • Loading branch information
PSeitz committed Sep 30, 2022
2 parents fadd784 + 0e94213 commit 2fe4271
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 5 deletions.
40 changes: 36 additions & 4 deletions src/core/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::error::{DataCorruption, TantivyError};
use crate::indexer::index_writer::{MAX_NUM_THREAD, MEMORY_ARENA_NUM_BYTES_MIN};
use crate::indexer::segment_updater::save_metas;
use crate::reader::{IndexReader, IndexReaderBuilder};
use crate::schema::{Field, FieldType, Schema};
use crate::schema::{Cardinality, Field, FieldType, Schema};
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
use crate::IndexWriter;

Expand Down Expand Up @@ -152,9 +152,7 @@ impl IndexBuilder {
/// This should only be used for unit tests.
pub fn create_in_ram(self) -> Result<Index, TantivyError> {
let ram_directory = RamDirectory::create();
Ok(self
.create(ram_directory)
.expect("Creating a RamDirectory should never fail"))
self.create(ram_directory)
}

/// Creates a new index in a given filepath.
Expand Down Expand Up @@ -228,10 +226,44 @@ impl IndexBuilder {
))
}
}

fn validate(&self) -> crate::Result<()> {
if let Some(schema) = self.schema.as_ref() {
if let Some(sort_by_field) = self.index_settings.sort_by_field.as_ref() {
let schema_field = schema.get_field(&sort_by_field.field).ok_or_else(|| {
TantivyError::InvalidArgument(format!(
"Field to sort index {} not found in schema",
sort_by_field.field
))
})?;
let entry = schema.get_field_entry(schema_field);
if !entry.is_fast() {
return Err(TantivyError::InvalidArgument(format!(
"Field {} is no fast field. Field needs to be a single value fast field \
to be used to sort an index",
sort_by_field.field
)));
}
if entry.field_type().fastfield_cardinality() != Some(Cardinality::SingleValue) {
return Err(TantivyError::InvalidArgument(format!(
"Only single value fast field Cardinality supported for sorting index {}",
sort_by_field.field
)));
}
}
Ok(())
} else {
Err(TantivyError::InvalidArgument(
"no schema passed".to_string(),
))
}
}

/// Creates a new index given an implementation of the trait `Directory`.
///
/// If a directory previously existed, it will be erased.
fn create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index> {
self.validate()?;
let dir = dir.into();
let directory = ManagedDirectory::wrap(dir)?;
save_new_metas(
Expand Down
29 changes: 29 additions & 0 deletions src/indexer/index_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1395,6 +1395,35 @@ mod tests {
assert!(commit_again.is_ok());
}

#[test]
fn test_sort_by_multivalue_field_error() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let options = NumericOptions::default().set_fast(Cardinality::MultiValues);
schema_builder.add_u64_field("id", options);
let schema = schema_builder.build();

let settings = IndexSettings {
sort_by_field: Some(IndexSortByField {
field: "id".to_string(),
order: Order::Desc,
}),
..Default::default()
};

let err = Index::builder()
.schema(schema)
.settings(settings)
.create_in_ram()
.unwrap_err();
assert_eq!(
err.to_string(),
"An invalid argument was passed: 'Only single value fast field Cardinality supported \
for sorting index id'"
);

Ok(())
}

#[test]
fn test_delete_with_sort_by_field() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
Expand Down
1 change: 0 additions & 1 deletion src/query/set_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ impl TermSetQuery {
let error_msg = format!("Field {:?} is not indexed.", field_entry.name());
return Err(crate::TantivyError::SchemaError(error_msg));
}
let field_type = field_type.value_type();

// In practice this won't fail because:
// - we are writing to memory, so no IoError
Expand Down
21 changes: 21 additions & 0 deletions src/schema/field_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use serde::{Deserialize, Serialize};
use serde_json::Value as JsonValue;
use thiserror::Error;

use super::Cardinality;
use crate::schema::bytes_options::BytesOptions;
use crate::schema::facet_options::FacetOptions;
use crate::schema::{
Expand Down Expand Up @@ -214,6 +215,26 @@ impl FieldType {
}
}

/// returns true if the field is fast.
pub fn fastfield_cardinality(&self) -> Option<Cardinality> {
match *self {
FieldType::Bytes(ref bytes_options) if bytes_options.is_fast() => {
Some(Cardinality::SingleValue)
}
FieldType::Str(ref text_options) if text_options.is_fast() => {
Some(Cardinality::MultiValues)
}
FieldType::U64(ref int_options)
| FieldType::I64(ref int_options)
| FieldType::F64(ref int_options)
| FieldType::Bool(ref int_options) => int_options.get_fastfield_cardinality(),
FieldType::Date(ref date_options) => date_options.get_fastfield_cardinality(),
FieldType::Facet(_) => Some(Cardinality::MultiValues),
FieldType::JsonObject(_) => None,
_ => None,
}
}

/// returns true if the field is normed (see [fieldnorms](crate::fieldnorm)).
pub fn has_fieldnorms(&self) -> bool {
match *self {
Expand Down

0 comments on commit 2fe4271

Please sign in to comment.