quickwit-oss · PSeitz-dd · Sep 18, 2025 · Sep 17, 2025 · Sep 17, 2025 · Sep 18, 2025
diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs
@@ -45,12 +45,86 @@ impl<'a> QueryAstVisitor<'a> for RangeQueryFields {
     }
 }
 
-struct ExistsQueryFastFields {
-    fields: HashSet<FastFieldWarmupInfo>,
+/// Term Queries on fields which are fast but not indexed.
+struct TermSearchOnColumnar<'f> {
+    fields: &'f mut HashSet<FastFieldWarmupInfo>,
     schema: Schema,
 }
+impl<'a, 'f> QueryAstVisitor<'a> for TermSearchOnColumnar<'f> {
+    type Err = Infallible;
+
+    fn visit_term_set(&mut self, term_set_query: &'a TermSetQuery) -> Result<(), Infallible> {
+        for field in term_set_query.terms_per_field.keys() {
+            if let Some((_field, field_entry, path)) =
+                find_field_or_hit_dynamic(field, &self.schema)
+            {
+                if field_entry.is_fast() && !field_entry.is_indexed() {
+                    self.fields.insert(FastFieldWarmupInfo {
+                        name: if path.is_empty() {
+                            field_entry.name().to_string()
+                        } else {
+                            format!("{}.{}", field_entry.name(), path)
+                        },
+                        with_subfields: false,
+                    });
+                }
+            }
+        }
+        Ok(())
+    }
+
+    fn visit_term(
+        &mut self,
+        term_query: &'a quickwit_query::query_ast::TermQuery,
+    ) -> Result<(), Infallible> {
+        if let Some((_field, field_entry, path)) =
+            find_field_or_hit_dynamic(&term_query.field, &self.schema)
+        {
+            if field_entry.is_fast() && !field_entry.is_indexed() {
+                self.fields.insert(FastFieldWarmupInfo {
+                    name: if path.is_empty() {
+                        field_entry.name().to_string()
+                    } else {
+                        format!("{}.{}", field_entry.name(), path)
+                    },
+                    with_subfields: false,
+                });
+            }
+        }
+        Ok(())
+    }
+    /// We also need to visit full text queries because they can be converted to term queries
+    /// on fast fields. We only care about the field being fast and not indexed AND the tokenizer
+    /// being `raw` or None.
+    fn visit_full_text(&mut self, full_text_query: &'a FullTextQuery) -> Result<(), Infallible> {
+        if let Some((_field, field_entry, path)) =
+            find_field_or_hit_dynamic(&full_text_query.field, &self.schema)
+        {
+            if field_entry.is_fast()
+                && !field_entry.is_indexed()
+                && (full_text_query.params.tokenizer.is_none()
+                    || full_text_query.params.tokenizer.as_deref() == Some("raw"))
+            {
+                self.fields.insert(FastFieldWarmupInfo {
+                    name: if path.is_empty() {
+                        field_entry.name().to_string()
+                    } else {
+                        format!("{}.{}", field_entry.name(), path)
+                    },
+                    with_subfields: false,
+                });
+            }
+        }
+        Ok(())
+    }
+}
 
-impl<'a> QueryAstVisitor<'a> for ExistsQueryFastFields {
+struct ExistsQueryFastFields<'f> {
+    fields: &'f mut HashSet<FastFieldWarmupInfo>,
+    schema: Schema,
+}
+
+impl<'a, 'f> QueryAstVisitor<'a> for ExistsQueryFastFields<'f> {
     type Err = Infallible;
 
     fn visit_exists(&mut self, exists_query: &'a FieldPresenceQuery) -> Result<(), Infallible> {
@@ -88,18 +162,11 @@ pub(crate) fn build_query(
     search_fields: &[String],
     with_validation: bool,
 ) -> Result<(Box<dyn Query>, WarmupInfo), QueryParserError> {
-    let mut range_query_fields = RangeQueryFields::default();
-    // This cannot fail. The error type is Infallible.
-    let _: Result<(), Infallible> = range_query_fields.visit(query_ast);
+    let mut fast_fields: HashSet<FastFieldWarmupInfo> = HashSet::new();
 
-    let mut exists_query_fields = ExistsQueryFastFields {
-        fields: HashSet::new(),
-        schema: schema.clone(),
-    };
+    let mut range_query_fields = RangeQueryFields::default();
     // This cannot fail. The error type is Infallible.
-    let _: Result<(), Infallible> = exists_query_fields.visit(query_ast);
-
-    let mut fast_fields = HashSet::new();
+    let Ok(_) = range_query_fields.visit(query_ast);
     let range_query_fast_fields =
         range_query_fields
             .range_query_field_names
@@ -109,7 +176,18 @@ pub(crate) fn build_query(
                 with_subfields: false,
             });
     fast_fields.extend(range_query_fast_fields);
-    fast_fields.extend(exists_query_fields.fields);
+
+    let Ok(_) = TermSearchOnColumnar {
+        fields: &mut fast_fields,
+        schema: schema.clone(),
+    }
+    .visit(query_ast);
+
+    let Ok(_) = ExistsQueryFastFields {
+        fields: &mut fast_fields,
+        schema: schema.clone(),
+    }
+    .visit(query_ast);
 
     let query = query_ast.build_tantivy_query(
         &schema,
@@ -125,6 +203,9 @@ pub(crate) fn build_query(
     let mut terms_grouped_by_field: HashMap<Field, HashMap<_, bool>> = Default::default();
     query.query_terms(&mut |term, need_position| {
         let field = term.field();
+        if !schema.get_field_entry(field).is_indexed() {
+            return;
+        }
         *terms_grouped_by_field
             .entry(field)
             .or_default()

diff --git a/quickwit/quickwit-query/src/query_ast/utils.rs b/quickwit/quickwit-query/src/query_ast/utils.rs
@@ -17,15 +17,15 @@ use tantivy::json_utils::convert_to_fast_value_and_append_to_json_term;
 use tantivy::query::TermQuery as TantivyTermQuery;
 use tantivy::schema::{
     Field, FieldEntry, FieldType, IndexRecordOption, JsonObjectOptions, Schema as TantivySchema,
-    Type,
+    TextFieldIndexing, Type,
 };
 
 use crate::InvalidQuery;
 use crate::MatchAllOrNone::MatchNone as TantivyEmptyQuery;
 use crate::json_literal::InterpretUserInput;
 use crate::query_ast::full_text_query::FullTextParams;
 use crate::query_ast::tantivy_query_ast::{TantivyBoolQuery, TantivyQueryAst};
-use crate::tokenizers::TokenizerManager;
+use crate::tokenizers::{RAW_TOKENIZER_NAME, TokenizerManager};
 
 pub(crate) const DYNAMIC_FIELD_NAME: &str = "_dynamic";
 
@@ -147,12 +147,18 @@ fn compute_query_with_field(
             Ok(make_term_query(term))
         }
         FieldType::Str(text_options) => {
-            let text_field_indexing = text_options.get_indexing_options().ok_or_else(|| {
-                InvalidQuery::SchemaError(format!(
-                    "field {} is not full-text searchable",
-                    field_entry.name()
-                ))
-            })?;
+            let columnar_opt = TextFieldIndexing::default()
+                .set_fieldnorms(false)
+                .set_tokenizer(RAW_TOKENIZER_NAME);
+            let text_field_indexing = text_options
+                .get_indexing_options()
+                .or_else(|| text_options.is_fast().then_some(&columnar_opt))
+                .ok_or_else(|| {
+                    InvalidQuery::SchemaError(format!(
+                        "field {} is not full-text searchable",
+                        field_entry.name()
+                    ))
+                })?;
             let terms = full_text_params.tokenize_text_into_terms(
                 field,
                 value,

diff --git a/quickwit/quickwit-query/src/tokenizers/mod.rs b/quickwit/quickwit-query/src/tokenizers/mod.rs
@@ -28,7 +28,7 @@ use self::chinese_compatible::ChineseTokenizer;
 pub use self::code_tokenizer::CodeTokenizer;
 #[cfg(feature = "multilang")]
 pub use self::multilang::MultiLangTokenizer;
-pub use self::tokenizer_manager::TokenizerManager;
+pub use self::tokenizer_manager::{RAW_TOKENIZER_NAME, TokenizerManager};
 
 pub const DEFAULT_REMOVE_TOKEN_LENGTH: usize = 255;
 

diff --git a/quickwit/quickwit-query/src/tokenizers/tokenizer_manager.rs b/quickwit/quickwit-query/src/tokenizers/tokenizer_manager.rs
@@ -22,7 +22,7 @@ use tantivy::tokenizer::{
 
 use crate::DEFAULT_REMOVE_TOKEN_LENGTH;
 
-const RAW_TOKENIZER_NAME: &str = "raw";
+pub const RAW_TOKENIZER_NAME: &str = "raw";
 const LOWERCASE_TOKENIZER_NAME: &str = "lowercase";
 const RAW_LOWERCASE_TOKENIZER_NAME: &str = "raw_lowercase";
 

diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0020-stats.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0020-stats.yaml
@@ -64,12 +64,12 @@ expected:
   _all:
     primaries:
       docs:
-        count: 100
+        count: 102
     total:
       segments:
-        count: 1
+        count: 2
       docs:
-        count: 100
+        count: 102
   indices:
     gharchive:
       primaries:
@@ -80,6 +80,15 @@ expected:
           count: 1
         docs:
           count: 100
+    fast_only:
+      primaries:
+        docs:
+          count: 2
+      total:
+        segments:
+          count: 1
+        docs:
+          count: 2
     empty_index:
       primaries:
         docs:

diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0021-cat-indices.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0021-cat-indices.yaml
@@ -5,11 +5,13 @@ endpoint: "_cat/indices?format=json"
 expected:
 - index: empty_index
   docs.count: '0'
-- dataset.size: 222.8kb
+- index: fast_only
+  docs.count: '2'
+- index: gharchive
+  dataset.size: 222.8kb
   docs.count: '100'
   docs.deleted: '0'
   health: green
-  index: gharchive
   pri: '1'
   pri.store.size:
       $expect: 270 < float(val[:-2]) < 280

diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0028-fast_only_field_query.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0028-fast_only_field_query.yaml
@@ -0,0 +1,133 @@
+# Search for a term in a field that is not indexed but is a fast field
+engines:
+  - quickwit
+endpoint: "fast_only/_search"
+params:
+  size: 0
+json:
+  query:
+    term:
+      fast_text: "abc-123"
+expected:
+  hits:
+    total:
+      value: 1
+      relation: "eq"
+--- # term query with no matches
+engines:
+  - quickwit
+endpoint: "fast_only/_search"
+params:
+  size: 0
+json:
+  query:
+    term:
+      fast_text: "zzz"
+expected:
+  hits:
+    total:
+      value: 0
+      relation: "eq"
+
+--- # term set query with partial match
+engines:
+  - quickwit
+endpoint: "fast_only/_search"
+params:
+  size: 0
+json:
+  query:
+    terms:
+      fast_text:
+        - "abc-123"
+        - "zzz"
+expected:
+  hits:
+    total:
+      value: 1
+      relation: "eq"
+
+--- # term set query with multiple matches
+engines:
+  - quickwit
+endpoint: "fast_only/_search"
+params:
+  size: 0
+json:
+  query:
+    terms:
+      fast_text:
+        - "abc-123"
+        - "def-456"
+expected:
+  hits:
+    total:
+      value: 2
+      relation: "eq"
+
+--- # term query on nested JSON field
+engines:
+  - quickwit
+endpoint: "fast_only/_search"
+params:
+  size: 0
+json:
+  query:
+    term:
+      obj.nested_text: "abc-123"
+expected:
+  hits:
+    total:
+      value: 1
+      relation: "eq"
+
+--- # term query with no matches
+engines:
+  - quickwit
+endpoint: "fast_only/_search"
+params:
+  size: 0
+json:
+  query:
+    term:
+      obj.nested_text: "zzz"
+expected:
+  hits:
+    total:
+      value: 0
+      relation: "eq"
+
+--- # term set query
+engines:
+  - quickwit
+endpoint: "fast_only/_search"
+params:
+  size: 0
+json:
+  query:
+    terms:
+      obj.nested_text:
+        - "abc-123"
+        - "ghi-789"
+expected:
+  hits:
+    total:
+      value: 2
+      relation: "eq"
+
+--- # term set query with no matches
+engines:
+  - quickwit
+endpoint: "fast_only/_search"
+params:
+  size: 0
+json:
+  query:
+    terms:
+      obj.nested_text:
+        - "zzz"
+expected:
+  hits:
+    total:
+      value: 0
+      relation: "eq"