risingwavelabs · WanYixian · Sep 29, 2025 · Sep 17, 2025 · Sep 17, 2025 · Sep 26, 2025
diff --git a/mint.json b/mint.json
@@ -419,6 +419,7 @@
             "processing/watermarks",
             "processing/emit-on-window-close",
             "processing/indexes",
+            "processing/vector-indexes",
             "processing/time-travel-queries"
           ]
         },
@@ -636,6 +637,7 @@
                 "sql/data-types/map-type",
                 "sql/data-types/jsonb",
                 "sql/data-types/rw-int256",
+                "sql/data-types/vector",
                 "sql/data-types/supported-protobuf-types"
               ]
             },

diff --git a/operate/view-configure-runtime-parameters.mdx b/operate/view-configure-runtime-parameters.mdx
@@ -29,66 +29,67 @@

 | Name                                         | Values or examples        | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | :--- | :--- | :--- |
 | implicit_flush                          | true/false                      | If set to `true`, every INSERT/UPDATE/DELETE statement will block until the entire dataflow is refreshed. In other words, every related table & MV will be able to see the write.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | create_compaction_group_for_mv           | true/false                      | If set to `true`, RisingWave will create dedicated compaction groups when creating these materialized views.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
 | query_mode                                  | auto                            | A temporary config variable to force query running in either local or distributed mode. The default value is auto which means let the system decide to run batch queries in local or distributed mode automatically.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | extra_float_digits                         | 1                               | Set the number of digits displayed for floating-point values. See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#:~:text=for%20more%20information.-,extra%5Ffloat%5Fdigits,-%28integer%29) for details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | application_name                            | psql                            | Set the application name to be reported in statistics and logs. See [here](https://www.postgresql.org/docs/14/runtime-config-logging.html#:~:text=What%20to%20Log-,application%5Fname,-%28string%29) for details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | datestyle                                    | DMY                             | It is typically set by an application upon connection to the server. See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-DATESTYLE) for details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | batch_enable_lookup_join              | true/false                      | Force the use of lookup join instead of hash join when possible for local batch execution.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | batch_enable_sort_agg                 | true/false                      | Enable usage of sortAgg instead of hash agg when order property is satisfied in batch execution.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | batch_enable_distributed_dml              | true/false                      | Enable distributed DML, allowing INSERT/UPDATE/DELETE statements to be executed in a distributed way, such as running on multiple Compute Nodes. Defaults to false.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | batch_expr_strict_mode | true/false | Control whether to let the entire query fail or fill `NULL` values for expression evaluation failures. |
+| batch_hnsw_ef_search | 40 | Set the `ef_search` parameter for [HNSW](/processing/vector-indexes#index-types) vector index queries. |
 | max_split_range_gap                       | 8                               | The max gap allowed to transform small range scan into multi point lookup.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | search_path                                 | "$user", public                 | Set the order in which schemas are searched when an object (table, data type, function, etc.) is referenced by a simple name with no schema specified. See [here](https://www.postgresql.org/docs/14/runtime-config-client.html#GUC-SEARCH-PATH) for details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | visibility_mode                             | default                         | If `VISIBILITY_MODE` is `all`, we will support querying the latest uncommitted data, and consistency is not guaranteed between the tables.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | transaction_isolation                       | read committed                  | See [here](https://www.postgresql.org/docs/current/transaction-iso.html) for details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
 | query_epoch                                 | 0                               | Select as of specific epoch. Sets the historical epoch for querying data. If 0, querying latest data.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
 | timezone                                     | UTC                             | Session timezone. Defaults to UTC.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | streaming_parallelism                       | ADAPTIVE/0,1,2,...              | If `STREAMING_PARALLELISM` is non-zero, CREATE MATERIALIZED VIEW/TABLE/INDEX will use it as streaming parallelism. The value will be bounded at `STREAMING_MAX_PARALLELISM`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | streaming_parallelism_for_table      | default/1,2,3,... | Specific parallelism for tables. If set to `default`, it will fall back to the global `streaming_parallelism`. |
 | streaming_parallelism_for_materialized_view | default/1,2,3,... | Specific parallelism for materialized views. If set to `default`, it will fall back to the global `streaming_parallelism`. |
 | streaming_parallelism_for_index      | default/1,2,3,... | Specific parallelism for indexes. If set to `default`, it will fall back to the global `streaming_parallelism`. |
 | streaming_parallelism_for_sink       | default/1,2,3,... | Specific parallelism for sinks. If set to `default`, it will fall back to the global `streaming_parallelism`. |
 | streaming_parallelism_for_source     | default/1,2,3,... | Specific parallelism for sources. If set to `default`, it will fall back to the global `streaming_parallelism`. |
 | streaming_max_parallelism                  | 256                             | The maximum parallelism allowed for streaming queries. For more information, see [Configuring maximum parallelism](/deploy/k8s-cluster-scaling#configuring-maximum-parallelism).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | streaming_enable_delta_join           | true/false                      | Enable delta join for streaming queries. Defaults to false.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | streaming_enable_bushy_join           | true/false                      | Enable bushy join for streaming queries. Defaults to true.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | streaming_enable_materialized_expressions           | true/false                      | Enable materialized expressions for impure functions (typically UDF). Defaults to true.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | streaming_enable_unaligned_join           | true/false                      | Control whether the streaming join should be unaligned or not. See [Isolating high-amplification joins](/processing/sql/joins#isolating-high-amplification-joins) for more details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | streaming_force_filter_inside_join           | true/false                      | Force filter to be pushed down into inner join. Defaults to false. See [inner join](/processing/sql/joins#inner-joins) for more details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | streaming_join_encoding | memory_optimized /cpu_optimized | Control which encoding strategy of rows is used in the join operator. <ul><li>`memory_optimized`: Uses more CPU but significantly reduces memory per cached row, allowing more rows to be stored. Recommended when state size exceeds memory or disk cache, often indicated by high cache misses or remote I/O in Grafana.</li><li>`cpu_optimized`: Uses less CPU but more memory per row. Recommended when state fits in memory or cache. Can improve performance by up to 50%, depending on query complexity.</li></ul>  |
 | streaming_separate_consecutive_join           | true/false                      |  Separate consecutive StreamHashJoin by no-shuffle StreamExchange. Defaults to false.                                               |
 | streaming_separate_sink         | true/false                      |  Separate StreamSink by no-shuffle StreamExchange. Defaults to false.                                               |
 | streaming_use_arrangement_backfill        | true/false                      | Enable arrangement backfill for streaming queries. Defaults to true.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | streaming_use_snapshot_backfill           | true/false                      | Enable snapshot backfill for streaming queries. Defaults to false.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | enable_join_ordering                   | true/false                      | Enable join ordering for streaming and batch queries. Defaults to true.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | enable_two_phase_agg                  | true/false                      | Enable two phase agg optimization. Defaults to true. Setting this to true will always set `FORCE_TWO_PHASE_AGG` to false.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | force_two_phase_agg                   | true/false                      | Force two phase agg optimization whenever there's a choice between optimizations. Defaults to false. Setting this to true will always set `ENABLE_TWO_PHASE_AGG` to false.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | enable_share_plan                      | true/false                      | Enable sharing of common sub-plans. This means that DAG structured query plans can be constructed, rather than only tree structured query plans.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | force_split_distinct_agg              | true/false                      | Enable split distinct agg.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | intervalstyle                                | postgres                        | Set the display format for interval values. It is typically set by an application upon connection to the server. See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-INTERVALSTYLE) for details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | batch_parallelism                           | 0                               | If `BATCH_PARALLELISM` is non-zero, batch queries will use this parallelism.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | server_version                              | 9.5.0                           | The version of PostgreSQL that Risingwave claims to be.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | server_version_num                         | 90500                           | The version of PostgreSQL that Risingwave claims to be.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | client_min_messages                        | notice                          | See [here](https://www.postgresql.org/docs/15/runtime-config-client.html#GUC-CLIENT-MIN-MESSAGES) for details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | client_encoding                             | UTF8                            | See [here](https://www.postgresql.org/docs/15/runtime-config-client.html#GUC-CLIENT-ENCODING) for details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | sink_decouple                               | default                         | Enable decoupling sink and internal streaming graph or not.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | synchronize_seqscans                        | true/false                      | See [here](https://www.postgresql.org/docs/current/runtime-config-compatible.html#RUNTIME-CONFIG-COMPATIBLE-VERSION) for details. Unused in RisingWave, support for compatibility.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | statement_timeout                           | 3600                            | Abort query statement that takes more than the specified amount of time in sec. If `log_min_error_statement` is set to ERROR or lower, the statement that timed out will also be logged. The default value is 1 hour.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
 | lock_timeout                                | 0                               | See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-LOCK-TIMEOUT) for details. Unused in RisingWave, support for compatibility.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | cdc_source_wait_streaming_start_timeout | 30                              | For limiting the startup time of a shareable CDC streaming source when the source is being created. Unit: seconds.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | row_security                                | true/false                      | See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-ROW-SECURITY) for details. Unused in RisingWave, support for compatibility.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | standard_conforming_strings                | on                              | See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-STANDARD-CONFORMING-STRINGS) for details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | source_rate_limit                          | default/positive integer/0 | Set the maximum number of records per second per source, for each parallelism. This parameter is applied when creating new sources and tables with sources. <br/><br/> The value can be default, 0, or a positive integer. <ul><li>Set it to 0 will pause the source read for sources.</li><li>Set it to default will remove the rate limit.</li></ul>Setting this variable will only affect new DDLs within the session, but not change the rate limits of existing jobs. Use `ALTER` to change the rate limits in existing [sources](/sql/commands/sql-alter-source/#set-source-rate-limit) and [tables that have source](/sql/commands/sql-alter-table/#set-source-rate-limit). <br/><br/> Note that the total throughput of a streaming job is determined by multiplying the parallelism with the throttle rate. To obtain the parallelism value for a streaming job, you can refer to the `streaming_parallelism` runtime parameter in this table.  |
 | backfill_rate_limit                        | default/positive integer/0 | Set the maximum number of records per second per parallelism for the backfill process of materialized views, sinks, and indexes. This parameter is applied when creating new jobs, and throttles the backfill from upstream materialized views and sources. <br/><br/> The value can be default, 0, or a positive integer. <ul><li>Set it to 0 will pause the backfill.</li><li>Set it to default will remove the backfill rate limit.</li></ul>Setting this variable will only affect new DDLs within the session, but not change the rate limits of existing jobs. Use `ALTER` to change the backfill rate limits in existing [materialized views](/sql/commands/sql-alter-materialized-view#set-backfill-rate-limit) and [CDC tables](/sql/commands/sql-alter-table/#set-backfill%5Frate%5Flimit). <br/><br/> Note that the total throughput of a streaming job is determined by multiplying the parallelism with the throttle rate. To obtain the parallelism value for a streaming job, you can refer to the `streaming_parallelism` runtime parameter in this table.                                                   |
 | dml_rate_limit | positive integer/0| Set streaming rate limit (rows per second) for each parallelism for table DML. <ul><li>Set it to -1 will disable rate limit.</li><li>Set it to 0 will pause the DML.</li></ul>|
 | sink_rate_limit | positive integer/0 | Set sink rate limit (rows per second) for each parallelism for external sink. <ul><li>Set it to -1 will disable rate limit.</li><li>Set it to 0 will pause the sink.</li></ul> |
 | streaming_over_window_cache_policy   | full                            | Cache policy for partition cache in streaming over window. Can be `full`, `recent`, `recent_first_n` or `recent_last_n`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | background_ddl                              | true/false                      | Run DDL statements in background.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | server_encoding                             | UTF8                            | Show the server-side character set encoding. At present, this parameter can be shown but not set, because the encoding is determined at database creation time.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 | bytea_output                                | hex                             | Set the output format for values of type bytea. Valid values are hex (the default) and escape (the traditional PostgreSQL format). The bytea type always accepts both formats on input, regardless of this setting. |
 | iceberg_engine_connection | string | Specifies the connection information for the Iceberg engine tables. Create an Iceberg connection and set this variable before creating Iceberg engine tables. The format should be `schema_name.connection_name`.|

 If you just want to view a specific parameter's value, you can also use the `SHOW` command.


diff --git a/processing/vector-indexes.mdx b/processing/vector-indexes.mdx
@@ -0,0 +1,186 @@
+---
+title: "Vector indexes"
+description: "Create and use vector indexes for efficient similarity search operations in RisingWave."
+---
+
+RisingWave supports vector indexes to enable efficient similarity search operations. Vector indexes are specialized data structures that optimize queries involving vector distance calculations.
+
+## Creating vector indexes
+
+Use the `CREATE INDEX` command with vector-specific syntax to create vector indexes. For more details, see [CREATE INDEX](/sql/commands/sql-create-index).
+
+```sql Syntax
+CREATE INDEX index_name ON table_name 
+USING { FLAT | HNSW } (vector_column | expression) 
+[ INCLUDE ( include_column [, ...] ) ]
+[ WITH ( option = value [, ...] ) ];
+```
+
+## Index types
+
+Before creating a vector index, you may create a sample table `item` to reference the table name and column names. Currently, we only support creating vector indexes on append-only inputs, such as append-only tables or materialized views. Therefore, we have to specify the table as append-only here:
+
+```sql
+create table items (id int primary key, name string, embedding vector(128)) append only;
+```
+
+RisingWave supports two methods when creating index:
+
+- FLAT index: Provides exact results by comparing the query vector against all stored vectors.
+
+    ```sql
+    -- Create a FLAT vector index
+    CREATE INDEX idx_embedding ON items 
+    USING FLAT (embedding) 
+    INCLUDE (name) 
+    WITH (distance_type = 'l2');
+    ```
+
+- HNSW index: Hierarchical Navigable Small World (HNSW) index that provides approximate nearest neighbor search with better performance for large datasets.
+
+
+    ```sql
+    -- Create an HNSW vector index
+    CREATE INDEX idx_embedding_hnsw ON items 
+    USING HNSW (embedding) 
+    INCLUDE (name) 
+    WITH (
+        distance_type = 'inner_product', 
+        m = 32, 
+        ef_construction = 40, 
+        max_level = 5
+    );
+    ```
+
+For HNSW index, we also support specifying a query parameter `ef_search` by setting the session variable `batch_hnsw_ef_search` (the default value is 40).
+
+## Parameters
+
+| Parameter | Description | Valid for |
+| :--- | :--- | :--- |
+| `distance_type` | Distance metric to use: `l2`, `cosine`, `l1`, or `inner_product` | FLAT, HNSW |
+| `m` | Optional. Maximum number of connections per node | HNSW |
+| `ef_construction` | Optional. Size of dynamic candidate list during construction | HNSW |
+| `max_level` | Optional. Maximum level of the HNSW graph | HNSW |
+
+## Vector distance operators
+
+RisingWave provides specialized operators for calculating vector distances:
+
+| Operator | Function | Description |
+| :--- | :--- | :--- |
+| `<->` | `l2_distance()` | Euclidean (L2) distance |
+| `<=>` | `cosine_distance()` | Cosine distance |
+| `<+>` | `l1_distance()` | Manhattan (L1) distance |
+| `<#>` | Negative inner product | Negative inner product distance |
+
+## Vector similarity search
+
+Use vector distance operators with `ORDER BY` and `LIMIT` to perform similarity search:
+
+```sql
+-- Find the 5 most similar items using L2 distance
+SELECT * FROM items 
+ORDER BY embedding <-> '[3,1,2]' 
+LIMIT 5;
+
+-- Find similar items using cosine distance
+SELECT id, name FROM items 
+ORDER BY embedding <=> '[0.5, 0.3, 0.2]' 
+LIMIT 10;
+```
+
+## Vector indexes on function expressions
+
+You can create vector indexes on function expressions instead of raw columns. This allows you to avoid storing a separate vector column, saving storage and reducing maintenance costs.
+
+1. Create the table to include the input column
+
+```sql
+CREATE TABLE items (
+    id INT PRIMARY KEY,
+    description STRING
+    -- embedding column is optional if using function expression
+);
+```
+
+The `embedding` column is used to store the embedding generated from the `description` column. If you create the vector index directly from `description` column with function expression, you don't have to store raw `embedding` in the table.
+
+2. Define the user-defined function (UDF)
+
+```sql
+CREATE FUNCTION get_embedding(string) RETURNS VECTOR(128) LANGUAGE SQL AS $$
+SELECT openai_embedding('{"model": <EMBEDDING_MODEL_NAME>, "api_key": <API_KEY>}'::jsonb, $1)::vector(128);
+$$;
+```
+
+3. Create the vector index on the function expression
+
+```sql
+CREATE INDEX idx_embedding_func ON items
+USING FLAT (get_embedding(description))
+INCLUDE(description)
+WITH (distance_type = 'l2');
+```
+
+In this example, `get_embedding(description)` is used as the index expression.
+
+This approach avoids materializing a separate vector column in the table, which reduces storage costs and keeps the table schema simpler.
+
+## Examples
+
+### Basic vector similarity search
+
+```sql
+-- Create table with vector data
+CREATE TABLE products (
+    id INT PRIMARY KEY,
+    name STRING,
+    description STRING,
+    embedding vector(128)
+) APPEND ONLY;
+
+-- Create vector index
+CREATE INDEX idx_embedding ON products
+USING HNSW (embedding)
+WITH (distance_type = 'cosine');
+
+-- Insert sample data
+INSERT INTO products (id, name, description, embedding) VALUES
+(1, 'Product A', 'Description for Product A', '[0.1, 0.2, ...]'),
+(2, 'Product B', 'Description for Product B', '[0.3, 0.4, ...]');
+
+-- Find similar products
+SELECT id, name
+FROM products
+ORDER BY embedding <=> '[0.2, 0.3, ...]'
+LIMIT 5;
+```
+
+### Using cosine distance type
+
+The SQL query depends on the type of vector index you created:
+
+- If the vector index is built on a raw embedding column, use the raw column in your `ORDER BY` clause.
+
+```sql
+-- Query on the raw embedding column
+SELECT * FROM items
+ORDER BY embedding <=> '[0.5, 0.5, 0.0]' 
+LIMIT 3;
+```
+
+- If the vector index is built using a function expression, use the same function expression in your `ORDER BY` clause.
+
+```sql
+-- Query on a function expression
+SELECT * FROM items
+ORDER BY get_embedding(description) <=> '[1.0, 2.0, 3.0]'
+LIMIT 3;
+```
+
+## Related topics
+
+- [CREATE INDEX](/sql/commands/sql-create-index) command
+
+- [Vector data type, operators, functions](/sql/data-types/vector)