Merge branch 'main' into ia_pgwire_oidc

glasstiger · web-flow · commit e829336c219b · 2025-10-28T22:54:58.000Z
diff --git a/documentation/configuration-utils/_cairo.config.json b/documentation/configuration-utils/_cairo.config.json
@@ -466,5 +466,29 @@
   "cairo.partition.encoder.parquet.raw.array.encoding.enabled": {
     "default": "false",
     "description": "determines whether to export arrays in QuestDB-native binary format (true, less compatible) or Parquet-native format (false, more compatible)."
+  },
+  "cairo.partition.encoder.parquet.version": {
+    "default": "1",
+    "description": "Output parquet version to use for parquet-encoded partitions. Can be 1 or 2."
+  },
+  "cairo.partition.encoder.parquet.statistics.enabled": {
+    "default": "true",
+    "description": "Controls whether or not statistics are included in parquet-encoded partitions."
+  },
+  "cairo.partition.encoder.parquet.compression.codec": {
+    "default": "ZSTD",
+    "description": "Sets the default compression codec for parquet-encoded partitions. Alternatives include `LZ4_RAW`, `SNAPPY`."
+  },
+  "cairo.partition.encoder.parquet.compression.level": {
+    "default": "9 (ZSTD), 0 (otherwise)",
+    "description": "Sets the default compression level for parquet-encoded partitions. Dependent on underlying compression codec."
+  },
+  "cairo.partition.encoder.parquet.row.group.size": {
+    "default": "100000",
+    "description": "Sets the default row-group size for parquet-encoded partitions."
+  },
+  "cairo.partition.encoder.parquet.data.page.size": {
+    "default": "1048576",
+    "description": "Sets the default page size for parquet-encoded partitions."
   }
 }
diff --git a/documentation/configuration-utils/_parquet-export.config.json b/documentation/configuration-utils/_parquet-export.config.json
@@ -0,0 +1,6 @@
+{
+  "cairo.sql.copy.export.root": {
+    "default": "export",
+    "description": "Root directory for parquet exports via `COPY-TO` SQL. This path must not overlap with other directory (e.g. db, conf) of running instance, otherwise export may delete or overwrite existing files. Relative paths are resolved against the server root directory."
+  }
+}
diff --git a/documentation/configuration.md b/documentation/configuration.md
@@ -10,6 +10,7 @@ import cairoConfig from "./configuration-utils/\_cairo.config.json"
 import parallelSqlConfig from "./configuration-utils/\_parallel-sql.config.json"
 import walConfig from "./configuration-utils/\_wal.config.json"
 import csvImportConfig from "./configuration-utils/\_csv-import.config.json"
+import parquetExportConfig from "./configuration-utils/\_parquet-export.config.json"
 import postgresConfig from "./configuration-utils/\_postgres.config.json"
 import tcpConfig from "./configuration-utils/\_tcp.config.json"
 import udpConfig from "./configuration-utils/\_udp.config.json"
@@ -168,12 +169,14 @@ applying WAL data to the table storage:
 
 <ConfigTable rows={walConfig} />
 
-### CSV import
+### COPY settings
+
+#### Import 
 
 This section describes configuration settings for using `COPY` to import large
-CSV files.
+CSV files, or export parquet files.
 
-Settings for `COPY`:
+Settings for `COPY FROM` (import):
 
 <ConfigTable
   rows={csvImportConfig}
@@ -188,7 +191,7 @@ Settings for `COPY`:
   ]}
 />
 
-#### CSV import configuration for Docker
+**CSV import configuration for Docker**
 
 For QuestDB instances using Docker:
 
@@ -222,6 +225,28 @@ Where:
 It is important that the two path are identical
 (`/var/lib/questdb/questdb_import` in the example).
 
+
+#### Export
+
+<ConfigTable rows={parquetExportConfig} />
+
+Parquet export is also generally impacted by query execution and parquet conversion parameters.
+
+If not overridden, the following default setting will be used.
+
+<ConfigTable
+    rows={cairoConfig}
+    pick={[
+        "cairo.partition.encoder.parquet.raw.array.encoding.enabled",
+        "cairo.partition.encoder.parquet.version",
+        "cairo.partition.encoder.parquet.statistics.enabled",
+        "cairo.partition.encoder.parquet.compression.codec",
+        "cairo.partition.encoder.parquet.compression.level",
+        "cairo.partition.encoder.parquet.row.group.size",
+        "cairo.partition.encoder.parquet.data.page.size"
+    ]}
+/>
+
 ### Parallel SQL execution
 
 This section describes settings that can affect the level of parallelism during
diff --git a/documentation/guides/export-parquet.md b/documentation/guides/export-parquet.md
@@ -33,20 +33,17 @@ You can override these defaults when [exporting via COPY](#export-query-as-files
 
 ## Export queries as files
 
-:::warning
-Exporting as files is right now available on a development branch: [https://github.com/questdb/questdb/pull/6008](https://github.com/questdb/questdb/pull/6008).
-If you want to test this feature, you need to clone and compile the branch.
-
-The code is functional, but it is just lacking fuzzy tests and documentation. We should be able to include this in a
-release soon enough, but for exporting it is safe to just checkout the development branch, compile, and start QuestDB
-pointing to the target jar.
-:::
-
 To export a query as a file, you can use either the `/exp` REST API endpoint or the `COPY` command.
 
 
 ### Export query as file via REST
 
+:::tip
+
+See also the [/exp documentation](/docs/reference/api/rest/#exp---export-data).
+
+:::
+
 You can use the same parameters as when doing a [CSV export](/docs/reference/api/rest/#exp---export-data), only passing `parquet` as the `fmt` parameter value.
 
 ```
@@ -67,12 +64,18 @@ to point DuckDB to the example file exported in the previous example, you could
 start DuckDB and execute:
 
 ```
-    select * from read_parquet('~/tmp/exp.parquet');
+select * from read_parquet('~/tmp/exp.parquet');
 ```
 
-
 ### Export query as files via COPY
 
+
+:::tip
+
+See also the [COPY-TO documentation](/docs/reference/sql/copy).
+
+:::
+
 If you prefer to export data via SQL, or if you want to export asynchronously, you
 can use the `COPY` command from the web console, from any pgwire-compliant client,
 or using the [`exec` endpoint](/docs/reference/api/rest/#exec---execute-queries) of the REST API.
@@ -81,13 +84,13 @@ or using the [`exec` endpoint](/docs/reference/api/rest/#exec---execute-queries)
 You can export a query:
 
 ```
-    COPY (select * from market_data limit 3) TO 'market_data_parquet_table' WITH FORMAT PARQUET;
+COPY (select * from market_data limit 3) TO 'market_data_parquet_table' WITH FORMAT PARQUET;
 ```
 
 Or you can export a whole table:
 
 ```
-    COPY market_data TO 'market_data_parquet_table' WITH FORMAT PARQUET;
+COPY market_data TO 'market_data_parquet_table' WITH FORMAT PARQUET;
 ```
 
 
@@ -106,7 +109,6 @@ If you want to monitor the export process, you can issue a call like this:
 SELECT * FROM 'sys.copy_export_log' WHERE id = '45ba24e5ba338099';
 ```
 
-
 While it is running, export can be cancelled with:
 
 ```
diff --git a/documentation/guides/import-csv.md b/documentation/guides/import-csv.md
@@ -127,7 +127,7 @@ csvstack *.csv > singleFile.csv
 
 #### Configure `COPY`
 
-- Enable `COPY` and [configure](/docs/configuration/#csv-import) the `COPY`
+- Enable `COPY` and [configure](/docs/configuration/#copy-settings) the `COPY`
   directories to suit your server.
 - `cairo.sql.copy.root` must be set for `COPY` to work.
 
diff --git a/documentation/reference/api/rest.md b/documentation/reference/api/rest.md
@@ -20,8 +20,7 @@ off-the-shelf HTTP clients. It provides a simple way to interact with QuestDB
 and is compatible with most programming languages. API functions are fully keyed
 on the URL and they use query parameters as their arguments.
 
-The Web Console[Web Console](/docs/web-console/) is the official Web client
-relying on the REST API.
+The [Web Console](/docs/web-console/) is the official Web client for QuestDB, that relies on the REST API.
 
 **Available methods**
 
@@ -591,15 +590,41 @@ returned in a tabular form to be saved and reused as opposed to JSON.
 `/exp` is expecting an HTTP GET request with following parameters:
 
 | Parameter | Required | Description                                                                                                                                                                                                                  |
-| :-------- | :------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+|:----------|:---------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `query`   | Yes      | URL encoded query text. It can be multi-line.                                                                                                                                                                                |
 | `limit`   | No       | Paging opp parameter. For example, `limit=10,20` will return row numbers 10 through to 20 inclusive and `limit=20` will return first 20 rows, which is equivalent to `limit=0,20`. `limit=-20` will return the last 20 rows. |
 | `nm`      | No       | `true` or `false`. Skips the metadata section of the response when set to `true`.                                                                                                                                            |
+| `fmt`     | No       | Export format. Valid values: `parquet`, `csv`. When set to `parquet`, exports data in Parquet format instead of CSV.                                                                                                         |
+
+#### Parquet Export Parameters
+
+:::warning
+
+Parquet exports currently require writing interim data to disk, and therefore must be run on **read-write instances only**.
+
+This limitation will be removed in future.
+
+:::
+
+When `fmt=parquet`, the following additional parameters are supported:
+
+| Parameter            | Required | Default   | Description                                                                                                        |
+|:---------------------|:---------|:----------|:-------------------------------------------------------------------------------------------------------------------|
+| `partition_by`       | No       | `NONE`    | Partition unit: `NONE`, `HOUR`, `DAY`, `WEEK`, `MONTH`, or `YEAR`.                                                 |
+| `compression_codec`  | No       | `ZSTD`    | Compression algorithm: `UNCOMPRESSED`, `SNAPPY`, `GZIP`, `LZ4`, `ZSTD`, `LZ4_RAW`, `BROTLI`, `LZO`.                |
+| `compression_level`  | No       | `9`       | Compression level (codec-specific). Higher values = better compression but slower.                                 |
+| `row_group_size`     | No       | `100000`  | Number of rows per Parquet row group.                                                                              |
+| `data_page_size`     | No       | `1048576` | Size of data pages in bytes (default 1MB).                                                                         |
+| `statistics_enabled` | No       | `true`    | Enable Parquet column statistics: `true` or `false`.                                                               |
+| `parquet_version`    | No       | `2`       | Parquet format version: `1` (v1.0) or `2` (v2.0).                                                                  |
+| `raw_array_encoding` | No       | `false`   | Use raw encoding for arrays: `true` (lighter-weight, less compatible) or `false` (heavier-weight, more compatible) |
 
 The parameters must be URL encoded.
 
 ### Examples
 
+#### CSV Export (default)
+
 Considering the query:
 
 ```shell
@@ -620,6 +645,44 @@ A HTTP status code of `200` is returned with the following response body:
 200501BS00005,"2005-01-10T00:00:00.000Z",21:13
 ```
 
+#### Parquet Export
+
+Export query results to Parquet format:
+
+```shell
+curl -G \
+  --data-urlencode "query=SELECT * FROM trades WHERE timestamp IN today()" \
+  --data-urlencode "fmt=parquet" \
+  http://localhost:9000/exp > trades_today.parquet
+```
+
+#### Parquet Export with Custom Options
+
+Export with custom compression and partitioning:
+
+```shell
+curl -G \
+  --data-urlencode "query=SELECT * FROM trades" \
+  --data-urlencode "fmt=parquet" \
+  --data-urlencode "partition_by=DAY" \
+  --data-urlencode "compression_codec=ZSTD" \
+  --data-urlencode "compression_level=9" \
+  --data-urlencode "row_group_size=1000000" \
+  http://localhost:9000/exp > trades.parquet
+```
+
+#### Parquet Export with LZ4 Compression
+
+Export with LZ4_RAW compression for faster export:
+
+```shell
+curl -G \
+  --data-urlencode "query=SELECT symbol, price, amount FROM trades WHERE timestamp > dateadd('h', -1, now())" \
+  --data-urlencode "fmt=parquet" \
+  --data-urlencode "compression_codec=LZ4_RAW" \
+  http://localhost:9000/exp > recent_trades.parquet
+```
+
 ## Error responses
 
 ### Malformed queries
diff --git a/documentation/reference/function/aggregation.md b/documentation/reference/function/aggregation.md
@@ -22,17 +22,19 @@ Running it will result in the following error:
 
 You can work around this limitation by using CTEs or subqueries:
 
-```questdb-sql title="aggregates as function args workaround" demo
+```questdb-sql title="CTE workaround"
 -- CTE
 WITH minmax AS (
-    SELECT min(timestamp) as min_date, max(timestamp) as max_date FROM trades
+  SELECT min(timestamp) AS min_date, max(timestamp) AS max_date FROM trades
 )
 SELECT datediff('d', min_date, max_date) FROM minmax;
 
 -- Subquery
-SELECT datediff('d', min_date, max_date) FROM (
-    SELECT min(timestamp) as min_date, max(timestamp) as max_date FROM trades
+SELECT datediff('d', min_date, max_date)
+FROM (
+  SELECT min(timestamp) AS min_date, max(timestamp) AS max_date FROM trades
 );
+
 ```
 
 :::
diff --git a/documentation/reference/sql/copy.md b/documentation/reference/sql/copy.md
diff --git a/static/images/docs/diagrams/.railroad b/static/images/docs/diagrams/.railroad
diff --git a/static/images/docs/diagrams/copy.svg b/static/images/docs/diagrams/copy.svg