Skip to content

Commit

Permalink
add missing Bytes validation to term_agg (#2077)
Browse files Browse the repository at this point in the history
returns empty for now instead of failing like before
  • Loading branch information
PSeitz committed Jun 12, 2023
1 parent 3a82ef2 commit 657f0cd
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/aggregation/agg_req_with_accessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,14 @@ impl AggregationWithAccessor {
ColumnType::I64,
ColumnType::U64,
ColumnType::F64,
ColumnType::Bytes,
ColumnType::Str,
// ColumnType::Bytes Unsupported
// ColumnType::Bool Unsupported
// ColumnType::IpAddr Unsupported
// ColumnType::DateTime Unsupported
];
let mut columns =
get_all_ff_reader(reader, field_name, Some(&allowed_column_types))?;
get_all_ff_reader_or_empty(reader, field_name, Some(&allowed_column_types))?;
let first = columns.pop().unwrap();
accessor2 = columns.pop();
first
Expand Down Expand Up @@ -177,7 +177,7 @@ fn get_ff_reader(
/// Get all fast field reader or empty as default.
///
/// Is guaranteed to return at least one column.
fn get_all_ff_reader(
fn get_all_ff_reader_or_empty(
reader: &SegmentReader,
field_name: &str,
allowed_column_types: Option<&[ColumnType]>,
Expand Down
43 changes: 43 additions & 0 deletions src/aggregation/bucket/term_agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,12 @@ impl SegmentTermCollector {
field_type: ColumnType,
accessor_idx: usize,
) -> crate::Result<Self> {
if field_type == ColumnType::Bytes || field_type == ColumnType::Bool {
return Err(TantivyError::InvalidArgument(format!(
"terms aggregation is not supported for column type {:?}",
field_type
)));
}
let term_buckets = TermBuckets::default();

if let Some(custom_order) = req.order.as_ref() {
Expand Down Expand Up @@ -1500,4 +1506,41 @@ mod tests {

Ok(())
}

#[test]
fn terms_aggregation_bytes() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let bytes_field = schema_builder.add_bytes_field("bytes", FAST);
let index = Index::create_in_ram(schema_builder.build());
{
let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!(
bytes_field => vec![1,2,3],
))?;
index_writer.commit()?;
}

let agg_req: Aggregations = serde_json::from_value(json!({
"my_texts": {
"terms": {
"field": "bytes"
},
}
}))
.unwrap();

let res = exec_request_with_query(agg_req, &index, None)?;

// TODO: Returning an error would be better instead of an empty result, since this is not a
// JSON field
assert_eq!(
res["my_texts"]["buckets"][0]["key"],
serde_json::Value::Null
);
assert_eq!(res["my_texts"]["sum_other_doc_count"], 0);
assert_eq!(res["my_texts"]["doc_count_error_upper_bound"], 0);

Ok(())
}
}

0 comments on commit 657f0cd

Please sign in to comment.