diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 706fbbef674..5456a7ec859 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -155,6 +155,7 @@ enum-iterator,https://github.com/stephaneyfx/enum-iterator,MIT,Stephane Raux env_logger,https://github.com/rust-cli/env_logger,MIT OR Apache-2.0,The env_logger Authors equivalent,https://github.com/indexmap-rs/equivalent,Apache-2.0 OR MIT,The equivalent Authors +erased-serde,https://github.com/dtolnay/erased-serde,MIT OR Apache-2.0,David Tolnay errno,https://github.com/lambda-fairy/rust-errno,MIT OR Apache-2.0,"Chris Wong , Dan Gohman " fail,https://github.com/tikv/fail-rs,Apache-2.0,The TiKV Project Developers fastdivide,https://github.com/fulmicoton/fastdivide,zlib-acknowledgement OR MIT,Paul Masurel @@ -229,6 +230,7 @@ indexmap,https://github.com/indexmap-rs/indexmap,Apache-2.0 OR MIT,The indexmap indicatif,https://github.com/console-rs/indicatif,MIT,The indicatif Authors inout,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers instant,https://github.com/sebcrozet/instant,BSD-3-Clause,sebcrozet +inventory,https://github.com/dtolnay/inventory,MIT OR Apache-2.0,David Tolnay io-uring,https://github.com/tokio-rs/io-uring,MIT OR Apache-2.0,quininer ipnet,https://github.com/krisprice/ipnet,MIT OR Apache-2.0,Kris Price ipnetwork,https://github.com/achanda/ipnetwork,MIT OR Apache-2.0,"Abhishek Chanda , Linus Färnstrand " @@ -512,7 +514,10 @@ tracing-serde,https://github.com/tokio-rs/tracing,MIT,Tokio Contributors , David Barsky , Tokio Contributors " try-lock,https://github.com/seanmonstar/try-lock,MIT,Sean McArthur ttl_cache,https://github.com/stusmall/ttl_cache,MIT OR Apache-2.0,Stu Small +typeid,https://github.com/dtolnay/typeid,MIT OR Apache-2.0,David Tolnay typenum,https://github.com/paholg/typenum,MIT OR Apache-2.0,"Paho Lurie-Gregg , Andre Bogus " +typetag,https://github.com/dtolnay/typetag,MIT OR Apache-2.0,David Tolnay +typetag-impl,https://github.com/dtolnay/typetag,MIT OR Apache-2.0,David Tolnay ulid,https://github.com/dylanhart/ulid-rs,MIT,dylanhart unarray,https://github.com/cameron1024/unarray,MIT OR Apache-2.0,The unarray Authors unicase,https://github.com/seanmonstar/unicase,MIT OR Apache-2.0,Sean McArthur diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index f5cd19ec89e..2c33224b843 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -2818,6 +2818,16 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "erased-serde" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e004d887f51fcb9fef17317a2f3525c887d8aa3f4f50fed920816a688284a5b7" +dependencies = [ + "serde", + "typeid", +] + [[package]] name = "errno" version = "0.3.14" @@ -4176,6 +4186,15 @@ dependencies = [ "web-sys", ] +[[package]] +name = "inventory" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc61209c082fbeb19919bee74b176221b27223e27b65d781eb91af24eb1fb46e" +dependencies = [ + "rustversion", +] + [[package]] name = "io-uring" version = "0.7.10" @@ -5682,7 +5701,7 @@ checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" [[package]] name = "ownedbytes" version = "0.9.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=dabcaa5#dabcaa58093a3f7f10e98a5a3b06cfe2370482f9" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=70e591e#70e591e2306e8c3a87d65f0690030fed978efe3c" dependencies = [ "stable_deref_trait", ] @@ -9570,7 +9589,7 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" [[package]] name = "tantivy" version = "0.25.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=dabcaa5#dabcaa58093a3f7f10e98a5a3b06cfe2370482f9" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=70e591e#70e591e2306e8c3a87d65f0690030fed978efe3c" dependencies = [ "aho-corasick", "arc-swap", @@ -9617,6 +9636,7 @@ dependencies = [ "tempfile", "thiserror 2.0.16", "time", + "typetag", "uuid", "winapi 0.3.9", "zstd 0.13.3", @@ -9625,7 +9645,7 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" version = "0.9.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=dabcaa5#dabcaa58093a3f7f10e98a5a3b06cfe2370482f9" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=70e591e#70e591e2306e8c3a87d65f0690030fed978efe3c" dependencies = [ "bitpacking", ] @@ -9633,7 +9653,7 @@ dependencies = [ [[package]] name = "tantivy-columnar" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=dabcaa5#dabcaa58093a3f7f10e98a5a3b06cfe2370482f9" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=70e591e#70e591e2306e8c3a87d65f0690030fed978efe3c" dependencies = [ "downcast-rs", "fastdivide", @@ -9648,7 +9668,7 @@ dependencies = [ [[package]] name = "tantivy-common" version = "0.10.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=dabcaa5#dabcaa58093a3f7f10e98a5a3b06cfe2370482f9" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=70e591e#70e591e2306e8c3a87d65f0690030fed978efe3c" dependencies = [ "async-trait", "byteorder", @@ -9671,7 +9691,7 @@ dependencies = [ [[package]] name = "tantivy-query-grammar" version = "0.25.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=dabcaa5#dabcaa58093a3f7f10e98a5a3b06cfe2370482f9" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=70e591e#70e591e2306e8c3a87d65f0690030fed978efe3c" dependencies = [ "fnv", "nom 7.1.3", @@ -9683,7 +9703,7 @@ dependencies = [ [[package]] name = "tantivy-sstable" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=dabcaa5#dabcaa58093a3f7f10e98a5a3b06cfe2370482f9" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=70e591e#70e591e2306e8c3a87d65f0690030fed978efe3c" dependencies = [ "futures-util", "itertools 0.14.0", @@ -9696,7 +9716,7 @@ dependencies = [ [[package]] name = "tantivy-stacker" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=dabcaa5#dabcaa58093a3f7f10e98a5a3b06cfe2370482f9" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=70e591e#70e591e2306e8c3a87d65f0690030fed978efe3c" dependencies = [ "murmurhash32", "rand_distr", @@ -9706,7 +9726,7 @@ dependencies = [ [[package]] name = "tantivy-tokenizer-api" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=dabcaa5#dabcaa58093a3f7f10e98a5a3b06cfe2370482f9" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=70e591e#70e591e2306e8c3a87d65f0690030fed978efe3c" dependencies = [ "serde", ] @@ -10415,12 +10435,42 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" +[[package]] +name = "typeid" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" + [[package]] name = "typenum" version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +[[package]] +name = "typetag" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be2212c8a9b9bcfca32024de14998494cf9a5dfa59ea1b829de98bac374b86bf" +dependencies = [ + "erased-serde", + "inventory", + "once_cell", + "serde", + "typetag-impl", +] + +[[package]] +name = "typetag-impl" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27a7a9b72ba121f6f1f6c3632b85604cac41aedb5ddc70accbebb6cac83de846" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "tz-rs" version = "0.6.14" diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 89e1429d9ab..e623a628d52 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -346,7 +346,7 @@ quickwit-serve = { path = "quickwit-serve" } quickwit-storage = { path = "quickwit-storage" } quickwit-telemetry = { path = "quickwit-telemetry" } -tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "dabcaa5", default-features = false, features = [ +tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "70e591e", default-features = false, features = [ "lz4-compression", "mmap", "quickwit", diff --git a/quickwit/quickwit-query/src/aggregations.rs b/quickwit/quickwit-query/src/aggregations.rs index eca275ad265..6b7755c4fcc 100644 --- a/quickwit/quickwit-query/src/aggregations.rs +++ b/quickwit/quickwit-query/src/aggregations.rs @@ -189,6 +189,9 @@ impl From for BucketResult { sum_other_doc_count, doc_count_error_upper_bound, }, + TantivyBucketResult::Filter(_filter_bucket_result) => { + unimplemented!("filter aggregation is not yet supported in quickwit") + } } } } diff --git a/quickwit/quickwit-search/src/collector.rs b/quickwit/quickwit-search/src/collector.rs index 8e3b4925101..ed21fd968ba 100644 --- a/quickwit/quickwit-search/src/collector.rs +++ b/quickwit/quickwit-search/src/collector.rs @@ -27,10 +27,11 @@ use quickwit_proto::types::SplitId; use serde::Deserialize; use tantivy::aggregation::agg_req::{Aggregations, get_fast_field_names}; use tantivy::aggregation::intermediate_agg_result::IntermediateAggregationResults; -use tantivy::aggregation::{AggregationLimitsGuard, AggregationSegmentCollector}; +use tantivy::aggregation::{AggContextParams, AggregationLimitsGuard, AggregationSegmentCollector}; use tantivy::collector::{Collector, SegmentCollector}; use tantivy::columnar::{ColumnType, MonotonicallyMappableToU64}; use tantivy::fastfield::Column; +use tantivy::tokenizer::TokenizerManager; use tantivy::{DateTime, DocId, Score, SegmentOrdinal, SegmentReader, TantivyError}; use crate::find_trace_ids_collector::{FindTraceIdsCollector, FindTraceIdsSegmentCollector, Span}; @@ -715,7 +716,7 @@ pub(crate) struct QuickwitCollector { pub max_hits: usize, pub sort_by: SortByPair, pub aggregation: Option, - pub aggregation_limits: AggregationLimitsGuard, + pub agg_context_params: AggContextParams, search_after: Option, } @@ -785,7 +786,7 @@ impl Collector for QuickwitCollector { aggs, segment_reader, segment_ord, - &self.aggregation_limits, + &self.agg_context_params, )?, ), ), @@ -1033,7 +1034,7 @@ pub(crate) fn sort_by_from_request(search_request: &SearchRequest) -> SortByPair pub(crate) fn make_collector_for_split( split_id: SplitId, search_request: &SearchRequest, - aggregation_limits: AggregationLimitsGuard, + agg_context_params: AggContextParams, ) -> crate::Result { let aggregation = match &search_request.aggregation_request { Some(aggregation) => Some(serde_json::from_str(aggregation)?), @@ -1046,7 +1047,7 @@ pub(crate) fn make_collector_for_split( max_hits: search_request.max_hits as usize, sort_by, aggregation, - aggregation_limits, + agg_context_params, search_after: search_request.search_after.clone(), }) } @@ -1054,8 +1055,15 @@ pub(crate) fn make_collector_for_split( /// Builds a QuickwitCollector that's only useful for merging fruits. pub(crate) fn make_merge_collector( search_request: &SearchRequest, - aggregation_limits: &AggregationLimitsGuard, + agg_limits: AggregationLimitsGuard, ) -> crate::Result { + // Note: at this point the tokenizer manager is not used anymore by aggregations (filter query), + // so we can create an empty one. So if it will ever be used, it would panic. + let agg_context_params = AggContextParams { + limits: agg_limits, + tokenizers: TokenizerManager::new(), + }; + let aggregation = match &search_request.aggregation_request { Some(aggregation) => Some(serde_json::from_str(aggregation)?), None => None, @@ -1067,7 +1075,7 @@ pub(crate) fn make_merge_collector( max_hits: search_request.max_hits as usize, sort_by, aggregation, - aggregation_limits: aggregation_limits.clone(), + agg_context_params, search_after: search_request.search_after.clone(), }) } @@ -1748,7 +1756,7 @@ mod tests { request: &SearchRequest, results: Vec, ) -> LeafSearchResponse { - let collector = make_merge_collector(request, &Default::default()).unwrap(); + let collector = make_merge_collector(request, Default::default()).unwrap(); let mut incremental_collector = IncrementalCollector::new(collector.clone()); let result = collector diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index f5bfcc7d728..c1362d83600 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -35,8 +35,8 @@ use quickwit_storage::{ BundleStorage, ByteRangeCache, MemorySizedCache, OwnedBytes, SplitCache, Storage, StorageResolver, TimeoutAndRetryStorage, wrap_storage_with_cache, }; -use tantivy::aggregation::AggregationLimitsGuard; use tantivy::aggregation::agg_req::{AggregationVariants, Aggregations}; +use tantivy::aggregation::{AggContextParams, AggregationLimitsGuard}; use tantivy::directory::FileSlice; use tantivy::fastfield::FastFieldReaders; use tantivy::schema::Field; @@ -489,8 +489,12 @@ async fn leaf_search_single_split( .try_into()?; let searcher = reader.searcher(); + let agg_context_params = AggContextParams { + limits: aggregations_limits, + tokenizers: doc_mapper.tokenizer_manager().tantivy_manager().clone(), + }; let mut collector = - make_collector_for_split(split_id.clone(), &search_request, aggregations_limits)?; + make_collector_for_split(split_id.clone(), &search_request, agg_context_params)?; let split_schema = index.schema(); let (query, mut warmup_info) = doc_mapper.query(split_schema.clone(), &query_ast, false)?; @@ -1226,7 +1230,7 @@ pub async fn multi_index_leaf_search( try_join_all(leaf_request_tasks), ) .await??; - let merge_collector = make_merge_collector(&search_request, &aggregation_limits)?; + let merge_collector = make_merge_collector(&search_request, aggregation_limits)?; let mut incremental_merge_collector = IncrementalCollector::new(merge_collector); for result in leaf_responses { match result { @@ -1310,7 +1314,7 @@ pub async fn single_doc_mapping_leaf_search( let mut leaf_search_single_split_join_handles: Vec<(String, tokio::task::JoinHandle<()>)> = Vec::with_capacity(split_with_req.len()); - let merge_collector = make_merge_collector(&request, &aggregations_limits)?; + let merge_collector = make_merge_collector(&request, aggregations_limits.clone())?; let incremental_merge_collector = IncrementalCollector::new(merge_collector); let incremental_merge_collector = Arc::new(Mutex::new(incremental_merge_collector)); diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index 7d89bc1baa9..345cfdacd19 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -751,9 +751,8 @@ pub(crate) async fn search_partial_hits_phase( try_join_all(leaf_request_tasks).await? }; - // Creates a collector which merges responses into one let merge_collector = - make_merge_collector(search_request, &searcher_context.get_aggregation_limits())?; + make_merge_collector(search_request, searcher_context.get_aggregation_limits())?; // Merging is a cpu-bound task. // It should be executed by Tokio's blocking threads. @@ -1287,7 +1286,7 @@ pub async fn search_plan( &request_metadata.query_ast_resolved, true, )?; - let merge_collector = make_merge_collector(&search_request, &Default::default())?; + let merge_collector = make_merge_collector(&search_request, Default::default())?; warmup_info.merge(merge_collector.warmup_info()); warmup_info.simplify();