From dbb7c0afb02e3a0862a1311cde017e64ac104f91 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Fri, 10 Apr 2026 08:20:41 -0700 Subject: [PATCH 1/4] Add parser_override to eliminate SEMANTIC prefix requirement Use DuckDB's parser_override API (v1.5+) to intercept queries before the native parser, allowing AGGREGATE() to work in all statement types (SELECT, CTAS, INSERT...SELECT) without the SEMANTIC prefix. The existing parse_function fallback and yardstick_bind operator extension are kept for backwards compatibility. Also filters AGGREGATE() argument extraction to simple identifiers so DuckDB's built-in list aggregate() function is not intercepted. --- src/include/yardstick_extension.hpp | 8 + src/yardstick_extension.cpp | 107 ++ test/sql/no_semantic_prefix.test | 1845 +++++++++++++++++++++++++++ yardstick-rs/src/sql/measures.rs | 6 +- 4 files changed, 1964 insertions(+), 2 deletions(-) create mode 100644 test/sql/no_semantic_prefix.test diff --git a/src/include/yardstick_extension.hpp b/src/include/yardstick_extension.hpp index 0cd7849..067b8fc 100644 --- a/src/include/yardstick_extension.hpp +++ b/src/include/yardstick_extension.hpp @@ -28,6 +28,10 @@ ParserExtensionParseResult yardstick_parse(ParserExtensionInfo *, ParserExtensionPlanResult yardstick_plan(ParserExtensionInfo *, ClientContext &, unique_ptr); +ParserOverrideResult yardstick_parser_override(ParserExtensionInfo *info, + const std::string &query, + ParserOptions &options); + // Operator extension: handles binding after parsing struct YardstickOperatorExtension : public OperatorExtension { YardstickOperatorExtension() : OperatorExtension() { Bind = yardstick_bind; } @@ -39,10 +43,14 @@ struct YardstickOperatorExtension : public OperatorExtension { }; // Parser extension: intercepts query strings +// parser_override runs BEFORE DuckDB's native parser, handling all statement types. +// parse_function/plan_function are kept as fallback for when the native parser fails +// (e.g., AT(...) syntax that is not valid SQL). struct YardstickParserExtension : public ParserExtension { YardstickParserExtension() : ParserExtension() { parse_function = yardstick_parse; plan_function = yardstick_plan; + parser_override = yardstick_parser_override; } }; diff --git a/src/yardstick_extension.cpp b/src/yardstick_extension.cpp index 1f9ecb8..ee204c1 100644 --- a/src/yardstick_extension.cpp +++ b/src/yardstick_extension.cpp @@ -422,6 +422,109 @@ ParserExtensionParseResult yardstick_parse(ParserExtensionInfo *, return ParserExtensionParseResult(); } +//============================================================================= +// PARSER OVERRIDE: intercepts ALL queries before DuckDB's native parser +//============================================================================= + +ParserOverrideResult yardstick_parser_override(ParserExtensionInfo *, + const std::string &query, + ParserOptions &options) { + // Strip SEMANTIC prefix if present (backwards compatibility) + std::string sql_to_check = query; + std::string semantic_stripped; + bool had_semantic_prefix = StartsWithSemantic(query, semantic_stripped); + if (had_semantic_prefix) { + sql_to_check = semantic_stripped; + } + + // Check for DROP VIEW on measure views + if (yardstick_drop_measure_view_from_sql(sql_to_check.c_str())) { + // Catalog cleanup done; let DuckDB handle the actual DROP + return ParserOverrideResult(); + } + + // Check for AGGREGATE() function + if (yardstick_has_aggregate(sql_to_check.c_str())) { + YardstickAggregateResult result = yardstick_expand_aggregate(sql_to_check.c_str()); + + if (result.error) { + // Expansion failed: this might not be a yardstick AGGREGATE() call + // (e.g. DuckDB's built-in list aggregate function). Fall through to + // the native parser in case it can handle the query. + yardstick_free_aggregate_result(result); + return ParserOverrideResult(); + } + + if (result.had_aggregate) { + string expanded_sql(result.expanded_sql); + yardstick_free_aggregate_result(result); + + // Validate the expanded SQL parses. If expansion produced garbage + // (e.g. because AGGREGATE() was actually DuckDB's list aggregate + // function, not a yardstick measure), fall through to the native parser. + try { + Parser validation_parser; + validation_parser.ParseQuery(expanded_sql); + } catch (...) { + return ParserOverrideResult(); + } + + // Escape single quotes for embedding in string literal + string escaped_sql; + for (char c : expanded_sql) { + if (c == '\'') { + escaped_sql += "''"; + } else { + escaped_sql += c; + } + } + + // Wrap in table function call and parse with DuckDB's native parser + string wrapper_sql = "SELECT * FROM yardstick('" + escaped_sql + "')"; + + Parser parser; + parser.ParseQuery(wrapper_sql); + return ParserOverrideResult(std::move(parser.statements)); + } + + yardstick_free_aggregate_result(result); + } + + // Check for CREATE VIEW with AS MEASURE + if (yardstick_has_as_measure(sql_to_check.c_str())) { + std::string rewritten_query = RewritePercentileWithinGroup(query); + YardstickCreateViewResult result = yardstick_process_create_view(rewritten_query.c_str()); + + if (result.error) { + string error_msg(result.error); + yardstick_free_create_view_result(result); + try { + throw ParserException(error_msg); + } catch (std::exception &e) { + return ParserOverrideResult(e); + } + } + + if (result.is_measure_view) { + string clean_sql = RewritePercentileWithinGroup(result.clean_sql); + yardstick_free_create_view_result(result); + + try { + Parser parser; + parser.ParseQuery(clean_sql); + return ParserOverrideResult(std::move(parser.statements)); + } catch (std::exception &e) { + return ParserOverrideResult(e); + } + } + + yardstick_free_create_view_result(result); + } + + // Not a yardstick query; fall through to DuckDB's native parser + return ParserOverrideResult(); +} + ParserExtensionPlanResult yardstick_plan(ParserExtensionInfo *, ClientContext &context, unique_ptr parse_data) { @@ -531,6 +634,10 @@ static void LoadInternal(ExtensionLoader &loader) { auto &db = loader.GetDatabaseInstance(); auto &config = DBConfig::GetConfig(db); + // Enable parser_override so yardstick intercepts queries before DuckDB's native parser. + // FALLBACK mode: if our override doesn't handle the query, DuckDB's parser takes over. + config.SetOptionByName("allow_parser_override_extension", Value("fallback")); + // Register parser extension YardstickParserExtension parser; #if __has_include("duckdb/main/extension_callback_manager.hpp") diff --git a/test/sql/no_semantic_prefix.test b/test/sql/no_semantic_prefix.test new file mode 100644 index 0000000..69d078b --- /dev/null +++ b/test/sql/no_semantic_prefix.test @@ -0,0 +1,1845 @@ +# name: test/sql/no_semantic.test +# description: Test Julian Hyde "Measures in SQL" functionality +# group: [yardstick] + +require yardstick + +# ============================================================================= +# Setup: Create test tables +# ============================================================================= + +statement ok +CREATE TABLE sales (year INT, region TEXT, amount DOUBLE); + +statement ok +INSERT INTO sales VALUES + (2022, 'US', 100), (2022, 'EU', 50), + (2023, 'US', 150), (2023, 'EU', 75); + +# ============================================================================= +# Test: AS MEASURE in CREATE VIEW +# ============================================================================= + +statement ok +CREATE VIEW sales_v AS +SELECT year, region, SUM(amount) AS MEASURE revenue +FROM sales; + +# Basic query on view with measure +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) FROM sales_v; +---- +2022 EU 50.0 +2022 US 100.0 +2023 EU 75.0 +2023 US 150.0 + +# ============================================================================= +# Test: CTE with AGGREGATE +# ============================================================================= + +query IIR rowsort +WITH a AS ( + SELECT year, region, AGGREGATE(revenue) AS revenue + FROM sales_v +) +SELECT * FROM a; +---- +2022 EU 50.0 +2022 US 100.0 +2023 EU 75.0 +2023 US 150.0 + +# ============================================================================= +# Test: GROUP BY with extra spaces +# ============================================================================= + +statement ok +CREATE VIEW sales_grouped AS +SELECT year, region, SUM(amount) AS MEASURE revenue +FROM sales +GROUP BY year, region; + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total +FROM sales_grouped; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 225.0 +2023 US 225.0 + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total +FROM sales_v +GROUP BY year, region; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 225.0 +2023 US 225.0 + +# Positional GROUP BY should resolve to SELECT dimensions (fallback from parser ordinals) +query IR rowsort +SELECT year, AGGREGATE(revenue) +FROM sales_v +GROUP BY 1 +ORDER BY 1; +---- +2022 150.0 +2023 225.0 + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) +FROM sales_v +GROUP BY 1, 2 +ORDER BY 1, 2; +---- +2022 EU 50.0 +2022 US 100.0 +2023 EU 75.0 +2023 US 150.0 + +# ============================================================================= +# Test: AT (ALL dimension) - remove dimension from context +# ============================================================================= + +# Revenue and total revenue for that year (across all regions) +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total +FROM sales_v; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 225.0 +2023 US 225.0 + +# Lowercase from with line break +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total +from + sales_v; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 225.0 +2023 US 225.0 + +# FROM inside a line comment should be ignored +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total +-- from sales_v +FROM sales_v; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 225.0 +2023 US 225.0 + +# FROM inside a block comment should be ignored +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total +/* from sales_v */ +FROM sales_v; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 225.0 +2023 US 225.0 + +# ============================================================================= +# Test: AT modifiers without SEMANTIC prefix +# ============================================================================= + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total +FROM sales_v; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 225.0 +2023 US 225.0 + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (WHERE region = 'US') AS us_revenue +FROM sales_v; +---- +2022 EU 250.0 +2022 US 250.0 +2023 EU 250.0 +2023 US 250.0 + +# ============================================================================= +# Test: AT (ALL) - grand total (no dimensions) +# ============================================================================= + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL) AS grand_total +FROM sales_v; +---- +2022 EU 375.0 +2022 US 375.0 +2023 EU 375.0 +2023 US 375.0 + +# ============================================================================= +# Test: AT (WHERE condition) +# ============================================================================= + +# Revenue for US only +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (WHERE region = 'US') AS us_revenue +FROM sales_v; +---- +2022 EU 250.0 +2022 US 250.0 +2023 EU 250.0 +2023 US 250.0 + +# Qualified columns inside AT (WHERE) +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (WHERE sales_v.region = 'US') AS us_revenue +FROM sales_v; +---- +2022 EU 250.0 +2022 US 250.0 +2023 EU 250.0 +2023 US 250.0 + +# ============================================================================= +# Test: AT (SET dimension = expression) - Year over Year +# ============================================================================= + +# Create a simpler view for YoY test +statement ok +CREATE VIEW sales_yearly AS +SELECT year, SUM(amount) AS MEASURE revenue +FROM sales; + +# Prior year comparison +query IR rowsort +SELECT year, AGGREGATE(revenue) AT (SET year = year - 1) AS prior_year +FROM sales_yearly; +---- +2022 NULL +2023 150.0 + +# ============================================================================= +# Test: CURRENT keyword in SET +# ============================================================================= + +query IR rowsort +SELECT year, AGGREGATE(revenue) AT (SET year = CURRENT year - 1) AS prior_year +FROM sales_yearly; +---- +2022 NULL +2023 150.0 + +# ============================================================================= +# Test: AT (VISIBLE) - respects outer WHERE +# ============================================================================= + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (VISIBLE) AS visible_rev +FROM sales_v +WHERE region = 'US'; +---- +2022 US 100.0 +2023 US 150.0 + +# ============================================================================= +# Test: Chaining AT modifiers +# ============================================================================= + +# Chaining multiple ALL should combine to grand total +query IR rowsort +SELECT year, AGGREGATE(revenue) AT (ALL year) AT (ALL region) AS grand_total +FROM sales_v; +---- +2022 375.0 +2023 375.0 + +# ============================================================================= +# Test: Percent of total calculation +# ============================================================================= + +query IIRR rowsort +SELECT + year, + region, + AGGREGATE(revenue) as revenue, + 100.0 * AGGREGATE(revenue) / AGGREGATE(revenue) AT (ALL) AS pct_of_total +FROM sales_v; +---- +2022 EU 50.0 13.333333333333334 +2022 US 100.0 26.666666666666668 +2023 EU 75.0 20.0 +2023 US 150.0 40.0 + +# ============================================================================= +# Test: Year over year with arithmetic +# ============================================================================= + +query IRR rowsort +SELECT + year, + AGGREGATE(revenue) as revenue, + AGGREGATE(revenue) - AGGREGATE(revenue) AT (SET year = year - 1) AS yoy_change +FROM sales_yearly; +---- +2022 150.0 NULL +2023 225.0 75.0 + +# ============================================================================= +# Test: Multiple measures in same view +# ============================================================================= + +statement ok +CREATE VIEW orders_v AS +SELECT + year, + SUM(amount) AS MEASURE total_revenue, + COUNT(*) AS MEASURE order_count, + AVG(amount) AS MEASURE avg_order +FROM sales; + +query IRR rowsort +SELECT year, AGGREGATE(total_revenue), AGGREGATE(avg_order) FROM orders_v; +---- +2022 150.0 75.0 +2023 225.0 112.5 + +# ============================================================================= +# Test: AT (ALL year) - remove year from context (keep region) +# ============================================================================= + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL year) AS region_total +FROM sales_v; +---- +2022 EU 125.0 +2022 US 250.0 +2023 EU 125.0 +2023 US 250.0 + +# ============================================================================= +# Test: AT (ALL dim) when dim is the only GROUP BY column +# ============================================================================= + +query IR rowsort +SELECT year, AGGREGATE(revenue) AT (ALL year) AS grand_total +FROM sales_yearly; +---- +2022 375.0 +2023 375.0 + +# ============================================================================= +# Test: AT (VISIBLE) without WHERE clause - should be identity +# ============================================================================= + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (VISIBLE) AS same_as_base +FROM sales_v; +---- +2022 EU 50.0 +2022 US 100.0 +2023 EU 75.0 +2023 US 150.0 + +# ============================================================================= +# Test: AT (VISIBLE) with year filter +# ============================================================================= + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (VISIBLE) AS visible_2023 +FROM sales_v +WHERE year = 2023; +---- +2023 EU 75.0 +2023 US 150.0 + +# ============================================================================= +# Test: AT (WHERE) with complex condition (AND) +# ============================================================================= + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (WHERE year = 2023 AND region = 'US') AS us_2023 +FROM sales_v; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 150.0 +2023 US 150.0 + +# ============================================================================= +# Test: AT (WHERE) with OR condition +# ============================================================================= + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (WHERE region = 'US' OR year = 2022) AS filtered +FROM sales_v; +---- +2022 EU 300.0 +2022 US 300.0 +2023 EU 300.0 +2023 US 300.0 + +# ============================================================================= +# Test: AT (SET) with future year +# ============================================================================= + +query IR rowsort +SELECT year, AGGREGATE(revenue) AT (SET year = year + 1) AS next_year +FROM sales_yearly; +---- +2022 225.0 +2023 NULL + +# ============================================================================= +# Test: Percent of year total +# ============================================================================= + +query IIRR rowsort +SELECT + year, + region, + AGGREGATE(revenue) as revenue, + 100.0 * AGGREGATE(revenue) / AGGREGATE(revenue) AT (ALL region) AS pct_of_year +FROM sales_v; +---- +2022 EU 50.0 33.33333333333333 +2022 US 100.0 66.66666666666667 +2023 EU 75.0 33.33333333333333 +2023 US 150.0 66.66666666666667 + +# ============================================================================= +# Test: Multiple measures with different AT modifiers +# ============================================================================= + +query IRRR rowsort +SELECT + year, + AGGREGATE(total_revenue) as revenue, + AGGREGATE(total_revenue) AT (ALL year) as grand_total, + AGGREGATE(avg_order) AT (SET year = year - 1) as prev_avg +FROM orders_v; +---- +2022 150.0 375.0 NULL +2023 225.0 375.0 75.0 + +# ============================================================================= +# Test: YoY growth percentage +# ============================================================================= + +query IRR rowsort +SELECT + year, + AGGREGATE(revenue) as revenue, + 100.0 * (AGGREGATE(revenue) - AGGREGATE(revenue) AT (SET year = year - 1)) / AGGREGATE(revenue) AT (SET year = year - 1) AS yoy_pct +FROM sales_yearly; +---- +2022 150.0 NULL +2023 225.0 50.0 + +# ============================================================================= +# Test: Three-dimensional data +# ============================================================================= + +statement ok +CREATE TABLE products (year INT, region TEXT, category TEXT, amount DOUBLE); + +statement ok +INSERT INTO products VALUES + (2022, 'US', 'A', 100), (2022, 'US', 'B', 50), + (2022, 'EU', 'A', 80), (2022, 'EU', 'B', 40), + (2023, 'US', 'A', 120), (2023, 'US', 'B', 60), + (2023, 'EU', 'A', 100), (2023, 'EU', 'B', 50); + +statement ok +CREATE VIEW products_v AS +SELECT year, region, category, SUM(amount) AS MEASURE revenue +FROM products; + +# Grand total across all dimensions +query IIIR rowsort +SELECT year, region, category, AGGREGATE(revenue) AT (ALL) AS grand_total +FROM products_v +; +---- +2022 EU A 600.0 +2022 EU B 600.0 +2022 US A 600.0 +2022 US B 600.0 +2023 EU A 600.0 +2023 EU B 600.0 +2023 US A 600.0 +2023 US B 600.0 + +# Scalar aggregation without GROUP BY - should return single row +query R +SELECT AGGREGATE(revenue) FROM sales_v; +---- +375.0 + +# Total by year only - chaining AT (ALL region) AT (ALL category) correlates on year +query IIIR rowsort +SELECT year, region, category, AGGREGATE(revenue) AT (ALL region) AT (ALL category) AS year_total +FROM products_v +; +---- +2022 EU A 270.0 +2022 EU B 270.0 +2022 US A 270.0 +2022 US B 270.0 +2023 EU A 330.0 +2023 EU B 330.0 +2023 US A 330.0 +2023 US B 330.0 + +# Total by category only - chaining AT (ALL year) AT (ALL region) correlates on category +query IIIR rowsort +SELECT year, region, category, AGGREGATE(revenue) AT (ALL year) AT (ALL region) AS category_total +FROM products_v +; +---- +2022 EU A 400.0 +2022 EU B 200.0 +2022 US A 400.0 +2022 US B 200.0 +2023 EU A 400.0 +2023 EU B 200.0 +2023 US A 400.0 +2023 US B 200.0 + +# ============================================================================= +# Test: MIN and MAX measures +# ============================================================================= + +statement ok +CREATE VIEW sales_minmax AS +SELECT year, region, MIN(amount) AS MEASURE min_sale, MAX(amount) AS MEASURE max_sale +FROM sales; + +query IIRR rowsort +SELECT year, region, AGGREGATE(min_sale), AGGREGATE(max_sale) FROM sales_minmax; +---- +2022 EU 50.0 50.0 +2022 US 100.0 100.0 +2023 EU 75.0 75.0 +2023 US 150.0 150.0 + +# ============================================================================= +# Test: COUNT measure +# Note: COUNT(*) in a measure is evaluated against the base relation and re-aggregated +# ============================================================================= + +query II rowsort +SELECT year, AGGREGATE(order_count) FROM orders_v; +---- +2022 2 +2023 2 + +# Count with AT (ALL) +query II rowsort +SELECT year, AGGREGATE(order_count) AT (ALL) FROM orders_v; +---- +2022 4 +2023 4 + +# ============================================================================= +# Test: Combining base AGGREGATE with multiple AT variants +# ============================================================================= + +query IIRRRR rowsort +SELECT + year, + region, + AGGREGATE(revenue) as base, + AGGREGATE(revenue) AT (ALL region) as year_total, + AGGREGATE(revenue) AT (ALL year) as region_total, + AGGREGATE(revenue) AT (ALL) as grand_total +FROM sales_v; +---- +2022 EU 50.0 150.0 125.0 375.0 +2022 US 100.0 150.0 250.0 375.0 +2023 EU 75.0 225.0 125.0 375.0 +2023 US 150.0 225.0 250.0 375.0 + +# ============================================================================= +# Test: AT (SET) combined with AT (ALL) +# ============================================================================= + +query IRR rowsort +SELECT + year, + AGGREGATE(revenue) AT (SET year = year - 1) as prior_year, + AGGREGATE(revenue) AT (SET year = year - 1) AT (ALL year) as prior_grand +FROM sales_yearly; +---- +2022 NULL 375.0 +2023 150.0 375.0 + +# ============================================================================= +# Test: Negative and zero values +# ============================================================================= + +statement ok +CREATE TABLE adjustments (year INT, region TEXT, amount DOUBLE); + +statement ok +INSERT INTO adjustments VALUES + (2022, 'US', -20), (2022, 'EU', 10), + (2023, 'US', 0), (2023, 'EU', -5); + +statement ok +CREATE VIEW adj_v AS +SELECT year, region, SUM(amount) AS MEASURE adjustment +FROM adjustments; + +query IIR rowsort +SELECT year, region, AGGREGATE(adjustment) FROM adj_v; +---- +2022 EU 10.0 +2022 US -20.0 +2023 EU -5.0 +2023 US 0.0 + +query IIR rowsort +SELECT year, region, AGGREGATE(adjustment) AT (ALL) AS total_adj +FROM adj_v; +---- +2022 EU -15.0 +2022 US -15.0 +2023 EU -15.0 +2023 US -15.0 + +# ============================================================================= +# Test: Moving total (2-year window using SET) +# ============================================================================= + +statement ok +CREATE TABLE yearly_data (year INT, amount DOUBLE); + +statement ok +INSERT INTO yearly_data VALUES (2020, 100), (2021, 120), (2022, 150), (2023, 180); + +statement ok +CREATE VIEW yearly_v AS +SELECT year, SUM(amount) AS MEASURE revenue +FROM yearly_data; + +query IRR rowsort +SELECT + year, + AGGREGATE(revenue) as current, + AGGREGATE(revenue) + AGGREGATE(revenue) AT (SET year = year - 1) as two_year_total +FROM yearly_v; +---- +2020 100.0 NULL +2021 120.0 220.0 +2022 150.0 270.0 +2023 180.0 330.0 + +# ============================================================================= +# Test: Comparison operations with AGGREGATE +# NOTE: CASE expressions in AS MEASURE not yet supported +# ============================================================================= + +# ============================================================================= +# Test: AT (WHERE) with numeric comparison on view column +# ============================================================================= + +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (WHERE year > 2022) AS recent_sales +FROM sales_v; +---- +2022 EU 225.0 +2022 US 225.0 +2023 EU 225.0 +2023 US 225.0 + +# ============================================================================= +# Test: Single row result with AT (ALL) - requires empty GROUP BY or no GROUP BY collapses to view rows +# Note: Without GROUP BY, returns one row per view row (all with grand total) +# ============================================================================= + +# ============================================================================= +# Test: AT modifiers with aliased table +# ============================================================================= + +# Table alias without AS keyword +query IR rowsort +SELECT s.year, AGGREGATE(revenue) AT (SET year = year - 1) AS prior_year +FROM sales_yearly s +; +---- +2022 NULL +2023 150.0 + +# Table alias with AS keyword +query IR rowsort +SELECT s.year, AGGREGATE(revenue) AT (ALL year) AS grand_total +FROM sales_yearly AS s +; +---- +2022 375.0 +2023 375.0 + +# Table alias with VISIBLE modifier +query IIR rowsort +SELECT s.year, s.region, AGGREGATE(revenue) AT (VISIBLE) AS visible_rev +FROM sales_v AS s +WHERE s.region = 'US' +; +---- +2022 US 100.0 +2023 US 150.0 + +# ============================================================================= +# Test: Expression in AGGREGATE argument (if supported) +# ============================================================================= + +query IR rowsort +SELECT year, 2 * AGGREGATE(revenue) AS doubled +FROM sales_yearly; +---- +2022 300.0 +2023 450.0 + +# ============================================================================= +# Test: Ratio to parent (region share of year) +# ============================================================================= + +query IIRR rowsort +SELECT + year, + region, + AGGREGATE(revenue) as revenue, + AGGREGATE(revenue) / AGGREGATE(revenue) AT (ALL region) AS share_of_year +FROM sales_v; +---- +2022 EU 50.0 0.3333333333333333 +2022 US 100.0 0.6666666666666666 +2023 EU 75.0 0.3333333333333333 +2023 US 150.0 0.6666666666666666 + +# ============================================================================= +# Test: Index / ratio to base period +# ============================================================================= + +query IRR rowsort +SELECT + year, + AGGREGATE(revenue) as revenue, + AGGREGATE(revenue) / AGGREGATE(revenue) AT (SET year = 2022) AS index_to_2022 +FROM sales_yearly; +---- +2022 150.0 1.0 +2023 225.0 1.5 + +# ============================================================================= +# Test: Difference from average +# ============================================================================= + +statement ok +CREATE VIEW quarterly AS +SELECT year, quarter, SUM(amount) AS MEASURE revenue +FROM (VALUES (2022, 1, 100), (2022, 2, 120), (2022, 3, 90), (2022, 4, 140)) AS t(year, quarter, amount); + +query IIR rowsort +SELECT + year, + quarter, + AGGREGATE(revenue) - (AGGREGATE(revenue) AT (ALL quarter) / 4.0) AS diff_from_avg +FROM quarterly +; +---- +2022 1 -12.5 +2022 2 7.5 +2022 3 -22.5 +2022 4 27.5 + +# ============================================================================= +# Test: CASE expression in measure +# ============================================================================= + +statement ok +CREATE OR REPLACE VIEW case_measure AS +SELECT year, CASE WHEN SUM(amount) > 150 THEN 1 ELSE 0 END AS MEASURE high_value +FROM (VALUES (2022, 100), (2022, 50), (2023, 200), (2023, 100)) AS t(year, amount); + +query II rowsort +SELECT year, AGGREGATE(high_value) +FROM case_measure; +---- +2022 0 +2023 1 + +# ============================================================================= +# Test: Derived measures (measures referencing other measures) +# ============================================================================= + +statement ok +CREATE TABLE financials (year INT, revenue DOUBLE, cost DOUBLE); + +statement ok +INSERT INTO financials VALUES + (2022, 100, 60), (2022, 150, 80), + (2023, 200, 100), (2023, 250, 120); + +statement ok +CREATE VIEW financials_v AS +SELECT year, + SUM(revenue) AS MEASURE revenue, + SUM(cost) AS MEASURE cost, + revenue - cost AS MEASURE profit +FROM financials; + +# Query basic measures +query IRR rowsort +SELECT year, AGGREGATE(revenue), AGGREGATE(cost) +FROM financials_v; +---- +2022 250.0 140.0 +2023 450.0 220.0 + +# Query derived measure (profit = revenue - cost) +query IR rowsort +SELECT year, AGGREGATE(profit) +FROM financials_v; +---- +2022 110.0 +2023 230.0 + +# Derived measure with AT modifier +query IRR rowsort +SELECT year, AGGREGATE(profit), AGGREGATE(profit) AT (ALL) AS total_profit +FROM financials_v; +---- +2022 110.0 340.0 +2023 230.0 340.0 + +# ============================================================================= +# Test: Ad hoc dimensions (expressions in SET/ALL) +# ============================================================================= + +# Create table with dates for ad hoc dimension testing +statement ok +CREATE TABLE daily_orders (order_date DATE, amount DOUBLE); + +statement ok +INSERT INTO daily_orders VALUES + ('2023-01-15', 100), ('2023-01-20', 150), + ('2023-02-10', 200), ('2023-02-25', 120), + ('2023-03-05', 180), ('2023-03-15', 90); + +# View exposes order_date so we can use expressions on it +statement ok +CREATE VIEW daily_orders_v AS +SELECT order_date, SUM(amount) AS MEASURE revenue +FROM daily_orders; + +# SET with expression dimension: fix to a specific month +# This generates WHERE MONTH(_inner.order_date) = 2 +query IRR rowsort +SELECT MONTH(order_date), AGGREGATE(revenue), AGGREGATE(revenue) AT (SET MONTH(order_date) = 2) AS feb_revenue +FROM daily_orders_v +; +---- +1 250.0 320.0 +2 320.0 320.0 +3 270.0 320.0 + +# ALL with expression dimension: remove the month grouping, get total +query IRR rowsort +SELECT MONTH(order_date), AGGREGATE(revenue), AGGREGATE(revenue) AT (ALL MONTH(order_date)) AS total +FROM daily_orders_v +; +---- +1 250.0 840.0 +2 320.0 840.0 +3 270.0 840.0 + +# ============================================================================= +# Test: GROUP BY alias for expression dimension +# ============================================================================= + +statement ok +CREATE TABLE monthly_sales (order_date DATE, region TEXT, amount DOUBLE); + +statement ok +INSERT INTO monthly_sales VALUES + ('2023-01-05', 'US', 100), ('2023-01-12', 'EU', 50), + ('2023-02-03', 'US', 200), ('2023-02-20', 'EU', 20); + +statement ok +CREATE VIEW monthly_sales_v AS +SELECT DATE_TRUNC('month', order_date) AS month, region, SUM(amount) AS MEASURE revenue +FROM monthly_sales +GROUP BY DATE_TRUNC('month', order_date), region; + +query IIRR rowsort +SELECT month, region, AGGREGATE(revenue), AGGREGATE(revenue) AT (ALL region) AS month_total +FROM monthly_sales_v +; +---- +2023-01-01 00:00:00 EU 50.0 150.0 +2023-01-01 00:00:00 US 100.0 150.0 +2023-02-01 00:00:00 EU 20.0 220.0 +2023-02-01 00:00:00 US 200.0 220.0 + +# ============================================================================= +# Test: Multi-fact JOINs (wide tables) +# ============================================================================= + +# Create two fact tables +statement ok +CREATE TABLE fact_orders (year INT, region TEXT, amount DOUBLE); + +statement ok +INSERT INTO fact_orders VALUES (2022, 'US', 100), (2022, 'EU', 50), (2023, 'US', 150), (2023, 'EU', 75); + +statement ok +CREATE TABLE fact_returns (year INT, region TEXT, return_amount DOUBLE); + +statement ok +INSERT INTO fact_returns VALUES (2022, 'US', 10), (2022, 'EU', 5), (2023, 'US', 20), (2023, 'EU', 8); + +# Create two measure views +statement ok +CREATE VIEW fact_orders_v AS SELECT year, region, SUM(amount) AS MEASURE revenue FROM fact_orders; + +statement ok +CREATE VIEW fact_returns_v AS SELECT year, region, SUM(return_amount) AS MEASURE refunds FROM fact_returns; + +# Query measures from both fact tables in a JOIN +query IIRR rowsort +SELECT o.year, o.region, AGGREGATE(revenue), AGGREGATE(refunds) +FROM fact_orders_v o JOIN fact_returns_v r ON o.year = r.year AND o.region = r.region +; +---- +2022 EU 50.0 5.0 +2022 US 100.0 10.0 +2023 EU 75.0 8.0 +2023 US 150.0 20.0 + +# Compute derived metric across fact tables (net revenue = revenue - refunds) +query IIRRR rowsort +SELECT o.year, o.region, AGGREGATE(revenue), AGGREGATE(refunds), AGGREGATE(revenue) - AGGREGATE(refunds) AS net_revenue +FROM fact_orders_v o JOIN fact_returns_v r ON o.year = r.year AND o.region = r.region +; +---- +2022 EU 50.0 5.0 45.0 +2022 US 100.0 10.0 90.0 +2023 EU 75.0 8.0 67.0 +2023 US 150.0 20.0 130.0 + +# AT modifiers work with multi-fact JOINs +query IIRR rowsort +SELECT o.year, o.region, AGGREGATE(revenue), AGGREGATE(revenue) AT (ALL region) AS year_total +FROM fact_orders_v o JOIN fact_returns_v r ON o.year = r.year AND o.region = r.region +; +---- +2022 EU 50.0 150.0 +2022 US 100.0 150.0 +2023 EU 75.0 225.0 +2023 US 150.0 225.0 + +# ============================================================================= +# Test: JOIN with extra dimension from second table +# ============================================================================= + +statement ok +CREATE TABLE salesdetails (year INT, region TEXT, product TEXT, amount DOUBLE); + +statement ok +INSERT INTO salesdetails VALUES + (2022, 'US', 'Shoes', 2), (2022, 'US', 'Cars', 1), + (2022, 'EU', 'Shoes', 3), + (2023, 'US', 'Shoes', 4), (2023, 'US', 'Cars', 2), + (2023, 'EU', 'Cars', 5); + +statement ok +CREATE VIEW salesdetails_v AS +SELECT year, region, product, SUM(amount) AS MEASURE quantity +FROM salesdetails; + +query IIIRRR rowsort +SELECT s.year, s.region, sd.product, + AGGREGATE(revenue) AS year_sales_revenue, + AGGREGATE(revenue) AT (ALL year) AS region_total, + AGGREGATE(quantity) AS product_qty +FROM sales_v s JOIN salesdetails_v sd ON s.year = sd.year AND s.region = sd.region +; +---- +2022 EU Shoes 50.0 125.0 3.0 +2022 US Cars 100.0 250.0 1.0 +2022 US Shoes 100.0 250.0 2.0 +2023 EU Cars 75.0 125.0 5.0 +2023 US Cars 150.0 250.0 2.0 +2023 US Shoes 150.0 250.0 4.0 + +# ============================================================================= +# Test: SET reaches beyond WHERE clause (paper semantics) +# Per paper: SET should evaluate over data removed by outer WHERE clause +# ============================================================================= + +# SET should reach 2022 data even when outer WHERE filters to 2023 +query IR rowsort +SELECT year, AGGREGATE(revenue) AT (SET year = year - 1) AS prior_year +FROM sales_yearly +WHERE year = 2023; +---- +2023 150.0 + +# ============================================================================= +# Test: SET correlates on remaining dimensions (paper semantics) +# Per paper: SET only removes terms for specified dimension, correlates on others +# ============================================================================= + +# SET year=2022 should return 2022's value per region (not grand total) +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (SET year = 2022) AS fixed_2022 +FROM sales_v; +---- +2022 EU 50.0 +2022 US 100.0 +2023 EU 50.0 +2023 US 100.0 + +# Verify: without SET, each row shows its own revenue +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AS own_revenue +FROM sales_v; +---- +2022 EU 50.0 +2022 US 100.0 +2023 EU 75.0 +2023 US 150.0 + +# ============================================================================= +# Test: DuckDB-specific scalar functions in measures +# ============================================================================= + +# Date functions: MONTH, YEAR, DATE_TRUNC +statement ok +CREATE TABLE dated_sales (sale_date DATE, amount DOUBLE); + +statement ok +INSERT INTO dated_sales VALUES + ('2023-01-15', 100), ('2023-01-20', 150), + ('2023-02-10', 200), ('2023-03-05', 80), + ('2024-01-08', 250), ('2024-02-14', 180); + +statement ok +CREATE VIEW dated_sales_v AS +SELECT sale_date, SUM(amount) AS MEASURE revenue +FROM dated_sales; + +# YEAR() and MONTH() in GROUP BY with AGGREGATE +query IIR rowsort +SELECT YEAR(sale_date), MONTH(sale_date), AGGREGATE(revenue) +FROM dated_sales_v +; +---- +2023 1 250.0 +2023 2 200.0 +2023 3 80.0 +2024 1 250.0 +2024 2 180.0 + +# DATE_TRUNC in GROUP BY +query IR rowsort +SELECT DATE_TRUNC('year', sale_date) AS yr, AGGREGATE(revenue) +FROM dated_sales_v +; +---- +2023-01-01 00:00:00 530.0 +2024-01-01 00:00:00 430.0 + +# ============================================================================= +# Test: DuckDB string functions in measures +# ============================================================================= + +statement ok +CREATE TABLE products_str (category VARCHAR, subcategory VARCHAR, amount DOUBLE); + +statement ok +INSERT INTO products_str VALUES + ('Electronics', 'Phones', 500), + ('Electronics', 'Laptops', 800), + ('Clothing', 'Shirts', 100), + ('Clothing', 'Pants', 150); + +statement ok +CREATE VIEW products_str_v AS +SELECT category, subcategory, SUM(amount) AS MEASURE revenue +FROM products_str; + +# CONCAT, UPPER, LOWER in SELECT with AGGREGATE +query IR rowsort +SELECT UPPER(category), AGGREGATE(revenue) +FROM products_str_v +; +---- +CLOTHING 250.0 +ELECTRONICS 1300.0 + +# LEFT function +query IR rowsort +SELECT LEFT(category, 4) AS cat_prefix, AGGREGATE(revenue) +FROM products_str_v +; +---- +Clot 250.0 +Elec 1300.0 + +# ============================================================================= +# Test: DuckDB COALESCE and NULLIF +# ============================================================================= + +statement ok +CREATE TABLE nullable_sales (region VARCHAR, amount DOUBLE); + +statement ok +INSERT INTO nullable_sales VALUES + ('US', 100), (NULL, 50), ('EU', 75), (NULL, 25); + +statement ok +CREATE VIEW nullable_sales_v AS +SELECT region, SUM(amount) AS MEASURE revenue +FROM nullable_sales; + +# COALESCE in GROUP BY +query IR rowsort +SELECT COALESCE(region, 'Unknown'), AGGREGATE(revenue) +FROM nullable_sales_v +; +---- +EU 75.0 +US 100.0 +Unknown 75.0 + +# ============================================================================= +# Test: DuckDB list/array functions +# ============================================================================= + +statement ok +CREATE TABLE tagged_items (tags VARCHAR[], amount DOUBLE); + +statement ok +INSERT INTO tagged_items VALUES + (['a', 'b'], 100), + (['b', 'c'], 150), + (['a'], 80); + +statement ok +CREATE VIEW tagged_items_v AS +SELECT tags, SUM(amount) AS MEASURE revenue +FROM tagged_items; + +# ARRAY_LENGTH in SELECT +query IR rowsort +SELECT ARRAY_LENGTH(tags), AGGREGATE(revenue) +FROM tagged_items_v +; +---- +1 80.0 +2 250.0 + +# ============================================================================= +# Test: DuckDB :: cast syntax +# ============================================================================= + +query IR rowsort +SELECT year, AGGREGATE(revenue)::INTEGER AS revenue_int +FROM sales_yearly; +---- +2022 150 +2023 225 + +# ============================================================================= +# Test: DuckDB FILTER clause on aggregates +# ============================================================================= + +statement ok +CREATE VIEW filtered_agg_v AS +SELECT year, + SUM(amount) AS MEASURE total_revenue, + SUM(amount) FILTER (WHERE region = 'US') AS MEASURE us_revenue +FROM sales; + +query IRR rowsort +SELECT year, AGGREGATE(total_revenue), AGGREGATE(us_revenue) +FROM filtered_agg_v; +---- +2022 150.0 100.0 +2023 225.0 150.0 + +# ============================================================================= +# Test: DuckDB DISTINCT in aggregate +# Note: COUNT(DISTINCT) is non-additive, so we recompute from base rows +# when the view is a simple single-table source. +# ============================================================================= + +statement ok +CREATE TABLE dup_regions (year INT, region VARCHAR, amount DOUBLE); + +statement ok +INSERT INTO dup_regions VALUES + (2023, 'US', 100), (2023, 'US', 100), (2023, 'EU', 50); + +statement ok +CREATE VIEW distinct_count_v AS +SELECT year, COUNT(DISTINCT region) AS MEASURE unique_regions +FROM dup_regions; + +# COUNT(DISTINCT) works when querying the view directly +query II rowsort +SELECT year, unique_regions FROM distinct_count_v; +---- +2023 2 + +# AGGREGATE() recomputes COUNT(DISTINCT) for simple base views +query II rowsort +SELECT year, AGGREGATE(unique_regions) FROM distinct_count_v; +---- +2023 2 + +# AT (ALL) also recomputes for simple base views +query II rowsort +SELECT year, AGGREGATE(unique_regions) AT (ALL) FROM distinct_count_v; +---- +2023 2 + +# COUNT(DISTINCT) with CTE-based view +statement ok +CREATE VIEW distinct_count_cte_v AS +WITH base AS (SELECT * FROM dup_regions) +SELECT year, COUNT(DISTINCT region) AS MEASURE unique_regions +FROM base; + +query II rowsort +SELECT year, AGGREGATE(unique_regions) FROM distinct_count_cte_v; +---- +2023 2 + +# ============================================================================= +# Test: MEDIAN also fails (non-decomposable) +# ============================================================================= + +statement ok +CREATE TABLE median_test (category VARCHAR, value DOUBLE); + +statement ok +INSERT INTO median_test VALUES ('A', 10), ('A', 20), ('A', 30), ('B', 100), ('B', 100); + +statement ok +CREATE VIEW median_v AS +SELECT category, MEDIAN(value) AS MEASURE med_value +FROM median_test; + +# Direct query works +query II rowsort +SELECT category, med_value FROM median_v; +---- +A 20.0 +B 100.0 + +# AGGREGATE() recomputes for non-decomposable measures +query II rowsort +SELECT category, AGGREGATE(med_value) FROM median_v; +---- +A 20.0 +B 100.0 + +# AT (ALL) recomputes from base rows +query R +SELECT AGGREGATE(med_value) AT (ALL) FROM median_v; +---- +30.0 + +# ============================================================================= +# Test: MODE / QUANTILE aggregates (non-decomposable) +# ============================================================================= + +statement ok +CREATE TABLE ordered_set_test (category VARCHAR, value INT); + +statement ok +INSERT INTO ordered_set_test VALUES + ('A', 1), ('A', 1), ('A', 2), ('A', 3), ('A', 4), + ('B', 10), ('B', 10), ('B', 20); + +statement ok +CREATE VIEW ordered_set_v AS +SELECT + category, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY value) AS MEASURE p50, + PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY value) AS MEASURE p50d, + QUANTILE_CONT(value, 0.5) AS MEASURE q50, + QUANTILE_DISC(value, 0.5) AS MEASURE q50d, + MODE(value) AS MEASURE mode_value +FROM ordered_set_test; + +query II rowsort +SELECT category, AGGREGATE(p50) FROM ordered_set_v; +---- +A 2.0 +B 10.0 + +query II rowsort +SELECT category, AGGREGATE(p50d) FROM ordered_set_v; +---- +A 2 +B 10 + +query II rowsort +SELECT category, AGGREGATE(q50) FROM ordered_set_v; +---- +A 2.0 +B 10.0 + +query II rowsort +SELECT category, AGGREGATE(q50d) FROM ordered_set_v; +---- +A 2 +B 10 + +query II rowsort +SELECT category, AGGREGATE(mode_value) FROM ordered_set_v; +---- +A 1 +B 10 + +# ============================================================================= +# Test: DuckDB generate_series / UNNEST +# ============================================================================= + +# ============================================================================= +# Test: Window function in AS MEASURE +# ============================================================================= + +statement ok +CREATE TABLE window_measure_orders (year INT, revenue INT); + +statement ok +INSERT INTO window_measure_orders VALUES + (2021, 10), + (2022, 20), + (2023, 30); + +statement ok +CREATE VIEW window_measure_v AS +SELECT + year, + SUM(revenue) OVER (ORDER BY year) AS MEASURE running_total +FROM window_measure_orders; + +query IT rowsort +SELECT year, running_total::VARCHAR +FROM window_measure_v +ORDER BY year; +---- +2021 10 +2022 30 +2023 60 + +query IT rowsort +SELECT year, AGGREGATE(running_total)::VARCHAR +FROM window_measure_v +GROUP BY year +ORDER BY year; +---- +2021 10 +2022 30 +2023 60 + +query IT rowsort +SELECT year, AGGREGATE(running_total) AT (WHERE year = 2022)::VARCHAR +FROM window_measure_v; +---- +2021 20 +2022 20 +2023 20 + +statement error +SELECT year, AGGREGATE(running_total) AT (ALL) +FROM window_measure_v +GROUP BY year; +---- +Window measure running_total returned multiple values for the evaluation context + +statement ok +CREATE VIEW window_total_v AS +SELECT + year, + SUM(revenue) OVER () AS MEASURE global_total +FROM window_measure_orders; + +query IT rowsort +SELECT year, AGGREGATE(global_total) AT (ALL)::VARCHAR +FROM window_total_v +GROUP BY year +ORDER BY year; +---- +2021 60 +2022 60 +2023 60 + +statement ok +CREATE VIEW series_v AS +SELECT x, SUM(x) AS MEASURE total +FROM generate_series(1, 5) AS t(x); + +query II rowsort +SELECT x, AGGREGATE(total) FROM series_v ; +---- +1 1 +2 2 +3 3 +4 4 +5 5 + +# Grand total of series (with scalar aggregation) +query I +SELECT AGGREGATE(total) FROM series_v; +---- +15 + +# ============================================================================= +# Test: Literal constants in SELECT should not become GROUP BY terms (#10) +# ============================================================================= + +# Integer constant with AGGREGATE +query IR +SELECT 1000, AGGREGATE(revenue) FROM sales_v; +---- +1000 375.0 + +# String literal with AGGREGATE +query IR +SELECT 'hello', AGGREGATE(revenue) FROM sales_v; +---- +hello 375.0 + +# Dimension column alongside integer constant +query IIR rowsort +SELECT year, 1000, AGGREGATE(revenue) FROM sales_v; +---- +2022 1000 150.0 +2023 1000 225.0 + +# NULL constant with AGGREGATE +query IR +SELECT NULL, AGGREGATE(revenue) FROM sales_v; +---- +NULL 375.0 + +# ============================================================================= +# Test: NULL dimension values handled correctly (#6) +# ============================================================================= + +# Setup: table with NULL dimension values +statement ok +CREATE TABLE sales_nulls (year INT, region TEXT, amount DOUBLE); + +statement ok +INSERT INTO sales_nulls VALUES + (2022, 'US', 100), (2022, NULL, 50), + (2023, 'US', 150), (2023, NULL, 75), + (NULL, 'US', 10), (NULL, NULL, 5); + +statement ok +CREATE VIEW sales_nulls_v AS +SELECT year, region, SUM(amount) AS MEASURE revenue +FROM sales_nulls; + +# Basic: NULL region rows should appear with correct aggregation +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) FROM sales_nulls_v; +---- +2022 NULL 50.0 +2022 US 100.0 +2023 NULL 75.0 +2023 US 150.0 +NULL NULL 5.0 +NULL US 10.0 + +# AT (ALL region): year totals should include NULL region rows +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total FROM sales_nulls_v; +---- +2022 NULL 150.0 +2022 US 150.0 +2023 NULL 225.0 +2023 US 225.0 +NULL NULL 15.0 +NULL US 15.0 + +# AT (ALL year): region totals should include NULL year rows +query IIR rowsort +SELECT year, region, AGGREGATE(revenue) AT (ALL year) AS region_total FROM sales_nulls_v; +---- +2022 NULL 130.0 +2022 US 260.0 +2023 NULL 130.0 +2023 US 260.0 +NULL NULL 130.0 +NULL US 260.0 + +# Grand total across all (including NULLs) +query I +SELECT AGGREGATE(revenue) FROM sales_nulls_v; +---- +390.0 + +# ============================================================================= +# Paper regression matrix (consolidated) +# ============================================================================= + +# Case 1: AS MEASURE view cardinality should match base relation +statement ok +CREATE TABLE orders_cardinality (order_date DATE, prod TEXT, revenue INT, cost INT); + +statement ok +INSERT INTO orders_cardinality VALUES + ('2024-01-01', 'A', 100, 40), + ('2024-01-01', 'A', 50, 10); + +statement ok +CREATE VIEW orders_cardinality_v AS +SELECT + order_date, + prod, + (SUM(revenue) - SUM(cost))::DOUBLE / SUM(revenue) AS MEASURE profit_margin +FROM orders_cardinality; + +query II +SELECT + (SELECT COUNT(*) FROM orders_cardinality) AS base_rows, + (SELECT COUNT(*) FROM orders_cardinality_v) AS view_rows; +---- +2 2 + +# Case 2: implicit measure reference in grouped SELECT +query IR rowsort +SELECT year, revenue +FROM sales_v +GROUP BY year +ORDER BY year; +---- +2022 150.0 +2023 225.0 + +# Case 3: AT (ALL d1 d2) in a single modifier clause +query IIRR rowsort +SELECT + year, + region, + AGGREGATE(revenue) AT (ALL year region) AS single_all, + AGGREGATE(revenue) AT (ALL year) AT (ALL region) AS chained_all +FROM sales_v +ORDER BY year, region; +---- +2022 EU 375.0 375.0 +2022 US 375.0 375.0 +2023 EU 375.0 375.0 +2023 US 375.0 375.0 + +# Case 4: qualified measure in join +statement ok +CREATE TABLE customers_qualified (cust_id INT, age INT); + +statement ok +INSERT INTO customers_qualified VALUES (1, 20), (2, 40); + +statement ok +CREATE TABLE orders_qualified (cust_id INT, product TEXT); + +statement ok +INSERT INTO orders_qualified VALUES (1, 'X'), (1, 'X'), (2, 'X'); + +statement ok +CREATE VIEW customers_qualified_v AS +SELECT cust_id, AVG(age) AS MEASURE avg_age +FROM customers_qualified; + +query TRR +SELECT + o.product, + AGGREGATE(c.avg_age) AS measure_avg, + AGGREGATE(c.avg_age) AT (VISIBLE) AS visible_avg +FROM orders_qualified o +JOIN customers_qualified_v c ON o.cust_id = c.cust_id +GROUP BY o.product; +---- +X 30.0 30.0 + +# Case 5: nested function parentheses in AT (WHERE ...) +query IR rowsort +SELECT + YEAR(sale_date) AS y, + AGGREGATE(revenue) AT (WHERE YEAR(sale_date) = 2023) AS y2023 +FROM dated_sales_v +GROUP BY YEAR(sale_date) +ORDER BY y; +---- +2023 530.0 +2024 530.0 + +# Case 6: derived measure with AT (SET ...) +query IRR rowsort +SELECT + year, + AGGREGATE(profit) AS current_profit, + AGGREGATE(profit) AT (SET year = year - 1) AS prior_profit +FROM financials_v +ORDER BY year; +---- +2022 110.0 NULL +2023 230.0 110.0 + +# ============================================================================= +# Paper parity: listings 8, 9, 12 and CURRENT null semantics +# ============================================================================= + +statement ok +CREATE TABLE paper_orders (prodName TEXT, custName TEXT, order_date DATE, revenue INT); + +statement ok +INSERT INTO paper_orders VALUES + ('Happy', 'Var Bob', '2024-01-01', 4), + ('Happy', 'Alice', '2024-01-02', 6), + ('Happy', 'Alice', '2024-01-03', 7), + ('Whizz', 'Alice', '2024-01-04', 3); + +statement ok +CREATE VIEW paper_orders_v AS +SELECT *, SUM(revenue) AS MEASURE sumRevenue +FROM paper_orders; + +# Listing 8 style: AGGREGATE() is visible, plain measure ref ignores WHERE, +# and measure AT(VISIBLE) is accepted without AGGREGATE wrapper. +query TIRRR rowsort +SELECT + o.prodName, + COUNT(*) AS c, + AGGREGATE(o.sumRevenue) AS rAgg, + o.sumRevenue AT (VISIBLE) AS rViz, + o.sumRevenue AS r +FROM paper_orders_v o +WHERE o.custName <> 'Var Bob' +GROUP BY ROLLUP(o.prodName) +ORDER BY o.prodName; +---- +Happy 2 13 13 17 +NULL 3 NULL NULL NULL +Whizz 1 3 3 3 + +# Measures/CSEs should be valid in HAVING, and respect context semantics. +query TI rowsort +SELECT + o.prodName, + AGGREGATE(o.sumRevenue) AT (VISIBLE) AS rViz +FROM paper_orders_v o +WHERE o.custName <> 'Var Bob' +GROUP BY o.prodName +HAVING AGGREGATE(o.sumRevenue) AT (VISIBLE) > 10 +ORDER BY o.prodName; +---- +Happy 13 + +statement ok +CREATE TABLE paper_customers (custName TEXT, custAge INT); + +statement ok +INSERT INTO paper_customers VALUES + ('Alice', 30), ('Var Bob', 16), ('Carol', 40); + +statement ok +CREATE TABLE paper_order_customers (prodName TEXT, custName TEXT); + +statement ok +INSERT INTO paper_order_customers VALUES + ('Happy', 'Alice'), + ('Happy', 'Var Bob'), + ('Whizz', 'Carol'); + +statement ok +CREATE VIEW enhanced_customers_paper AS +SELECT *, AVG(custAge) AS MEASURE avgAge +FROM paper_customers; + +# Listing 9 style: weighted average vs unweighted avgAge vs visibleAvgAge. +query TRRRR rowsort +SELECT + o.prodName, + COUNT(*) AS orderCount, + AVG(c.custAge) AS weightedAvgAge, + c.avgAge AS avgAge, + c.avgAge AT (VISIBLE) AS visibleAvgAge +FROM paper_order_customers o +JOIN enhanced_customers_paper c USING (custName) +WHERE c.custAge >= 18 +GROUP BY o.prodName +ORDER BY o.prodName; +---- +Happy 1 30.0 28.666666666666668 35.0 +Whizz 1 40.0 28.666666666666668 35.0 + +statement ok +CREATE TABLE paper_orders_l12 (prodName TEXT, orderDate DATE, revenue INT); + +statement ok +INSERT INTO paper_orders_l12 VALUES + ('Happy', '2024-01-01', 4), + ('Happy', '2024-01-02', 6), + ('Happy', '2024-01-03', 7), + ('Whizz', '2024-01-04', 3); + +statement ok +CREATE VIEW paper_orders_l12_v AS +SELECT prodName, orderDate, revenue, AVG(revenue) AS MEASURE avgRevenue +FROM paper_orders_l12; + +# Listing 12 style measure syntax without AGGREGATE wrapper. +query TT rowsort +SELECT o.prodName, o.orderDate +FROM paper_orders_l12_v o +WHERE o.revenue > o.avgRevenue AT (WHERE prodName = o.prodName) +ORDER BY o.prodName, o.orderDate; +---- +Happy 2024-01-02 +Happy 2024-01-03 + +# Listing 12 query 1: correlated subquery. +query TT rowsort +SELECT o.prodName, o.orderDate +FROM paper_orders_l12 o +WHERE o.revenue > + (SELECT AVG(revenue) + FROM paper_orders_l12 o1 + WHERE o1.prodName = o.prodName) +ORDER BY o.prodName, o.orderDate; +---- +Happy 2024-01-02 +Happy 2024-01-03 + +# Listing 12 query 2: self-join. +query TT rowsort +SELECT o.prodName, o.orderDate +FROM paper_orders_l12 o +LEFT JOIN + (SELECT prodName, AVG(revenue) AS avgRevenue + FROM paper_orders_l12 + GROUP BY prodName) o2 +ON o.prodName = o2.prodName +WHERE o.revenue > o2.avgRevenue +ORDER BY o.prodName, o.orderDate; +---- +Happy 2024-01-02 +Happy 2024-01-03 + +# Listing 12 query 3: window aggregate. +query TT rowsort +SELECT o.prodName, o.orderDate +FROM + (SELECT prodName, revenue, orderDate, + AVG(revenue) OVER (PARTITION BY prodName) AS avgRevenue + FROM paper_orders_l12) o +WHERE o.revenue > o.avgRevenue +ORDER BY o.prodName, o.orderDate; +---- +Happy 2024-01-02 +Happy 2024-01-03 + +# CURRENT should evaluate to NULL when dimension is not single-valued in context. +query TR rowsort +SELECT region, AGGREGATE(revenue) AT (SET year = CURRENT year - 1) AS prior_from_current +FROM sales_v +GROUP BY region +ORDER BY region; +---- +EU NULL +US NULL + +# CURRENT can resolve from a single-valued WHERE context. +query R +SELECT AGGREGATE(revenue) AT (SET year = CURRENT year - 1) +FROM sales_v +WHERE year = 2023; +---- +150.0 + +# ============================================================================= +# Test: CTAS with AGGREGATE (parser_override required; bind fallback can't handle this) +# ============================================================================= + +statement ok +CREATE TABLE ctas_result AS +SELECT year, region, AGGREGATE(revenue) AS rev FROM sales_v; + +query IIR rowsort +SELECT * FROM ctas_result; +---- +2022 EU 50.0 +2022 US 100.0 +2023 EU 75.0 +2023 US 150.0 + +statement ok +DROP TABLE ctas_result; + +# ============================================================================= +# Test: INSERT...SELECT with AGGREGATE (parser_override required) +# ============================================================================= + +statement ok +CREATE TABLE insert_target (year INT, region TEXT, rev DOUBLE); + +statement ok +INSERT INTO insert_target +SELECT year, region, AGGREGATE(revenue) FROM sales_v; + +query IIR rowsort +SELECT * FROM insert_target; +---- +2022 EU 50.0 +2022 US 100.0 +2023 EU 75.0 +2023 US 150.0 + +statement ok +DROP TABLE insert_target; + +# ============================================================================= +# Test: DuckDB's built-in list aggregate() function still works +# parser_override must not intercept non-yardstick uses of aggregate() +# ============================================================================= + +query I +SELECT aggregate([1, 2, 3], 'sum'); +---- +6 + +query I +SELECT aggregate([10, 20, 30], 'min'); +---- +10 + +query I +SELECT aggregate([4, 5, 6], 'max'); +---- +6 + +query I +SELECT list_aggregate([1, 2, 3, 4], 'avg'); +---- +2.5 + +# list aggregate with alias list_aggr also works +query I +SELECT list_aggr([100, 200], 'sum'); +---- +300 diff --git a/yardstick-rs/src/sql/measures.rs b/yardstick-rs/src/sql/measures.rs index 4328a54..01b90de 100644 --- a/yardstick-rs/src/sql/measures.rs +++ b/yardstick-rs/src/sql/measures.rs @@ -1205,8 +1205,10 @@ pub fn extract_all_aggregate_calls(sql: &str) -> Vec<(String, usize, usize)> { let start = search_pos + agg_offset; if let Ok((remaining, (measure, modifiers))) = aggregate_with_at(&sql[start..]) { - // Only include calls WITHOUT AT modifier - if modifiers.is_empty() { + // Only include calls WITHOUT AT modifier, and only when the argument + // is a simple identifier (measure name). This avoids intercepting + // DuckDB's built-in aggregate([list], 'fn') list function. + if modifiers.is_empty() && parse_simple_measure_ref(measure).is_some() { let end = sql.len() - remaining.len(); results.push((measure.to_string(), start, end)); } From cfd15a3a9f6b74bef2ccb20834b76983efe4f1e0 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Fri, 10 Apr 2026 08:41:39 -0700 Subject: [PATCH 2/4] Handle quoted identifiers in AGGREGATE() argument extraction Fix parse_simple_measure_ref to allow any characters inside matched quotes (double-quotes, backticks, brackets) so AGGREGATE("col_name") is correctly recognized as a measure reference. Add test for quoted measure names. --- test/sql/no_semantic_prefix.test | 28 ++++++++++++++++++++ yardstick-rs/src/sql/measures.rs | 45 +++++++++++++++++++++++++++++--- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/test/sql/no_semantic_prefix.test b/test/sql/no_semantic_prefix.test index 69d078b..d366444 100644 --- a/test/sql/no_semantic_prefix.test +++ b/test/sql/no_semantic_prefix.test @@ -1843,3 +1843,31 @@ query I SELECT list_aggr([100, 200], 'sum'); ---- 300 + +# ============================================================================= +# Test: Quoted measure names work with AGGREGATE +# ============================================================================= + +statement ok +CREATE TABLE quoted_sales (year INT, amount DOUBLE); + +statement ok +INSERT INTO quoted_sales VALUES (2022, 100), (2023, 200); + +statement ok +CREATE VIEW quoted_v AS +SELECT year, SUM(amount) AS MEASURE total_revenue +FROM quoted_sales; + +# Quoted identifier referencing a measure should be recognized +query IR rowsort +SELECT year, AGGREGATE("total_revenue") FROM quoted_v; +---- +2022 100.0 +2023 200.0 + +statement ok +DROP VIEW quoted_v; + +statement ok +DROP TABLE quoted_sales; diff --git a/yardstick-rs/src/sql/measures.rs b/yardstick-rs/src/sql/measures.rs index 01b90de..59b7f87 100644 --- a/yardstick-rs/src/sql/measures.rs +++ b/yardstick-rs/src/sql/measures.rs @@ -354,9 +354,48 @@ fn parse_simple_measure_ref(expr: &str) -> Option<(Option, String)> { return None; } - let allowed = |c: char| c.is_ascii_alphanumeric() || c == '_' || c == '.' || c == '"' || c == '`' || c == '[' || c == ']'; - if !trimmed.chars().all(allowed) { - return None; + // Validate that the expression is a simple identifier or qualifier.identifier. + // Allow any characters inside matched quotes (e.g. "total revenue"). + let chars: Vec = trimmed.chars().collect(); + let mut i = 0; + while i < chars.len() { + match chars[i] { + '"' => { + // Skip quoted identifier contents (any characters allowed inside) + i += 1; + while i < chars.len() && chars[i] != '"' { + i += 1; + } + if i >= chars.len() { + return None; // unmatched quote + } + i += 1; + } + '`' => { + i += 1; + while i < chars.len() && chars[i] != '`' { + i += 1; + } + if i >= chars.len() { + return None; + } + i += 1; + } + '[' => { + i += 1; + while i < chars.len() && chars[i] != ']' { + i += 1; + } + if i >= chars.len() { + return None; + } + i += 1; + } + c if c.is_ascii_alphanumeric() || c == '_' || c == '.' => { + i += 1; + } + _ => return None, // disallowed character outside quotes (commas, parens, etc.) + } } let parts: Vec<&str> = trimmed.split('.').collect(); From bfafe8d1dc9342fd0c2a01a018201262e332d275 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Fri, 10 Apr 2026 17:48:37 -0700 Subject: [PATCH 3/4] Return parsed statements directly for non-SELECT to preserve transaction context For CTAS and INSERT...SELECT, return the expanded SQL as parsed statements instead of wrapping in yardstick() table function. The table function executes via con.Query() on a fresh Connection, which runs outside the caller's transaction context. SELECT statements still use the table function wrapper for the second expansion pass. --- src/yardstick_extension.cpp | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/yardstick_extension.cpp b/src/yardstick_extension.cpp index ee204c1..bcd0bcc 100644 --- a/src/yardstick_extension.cpp +++ b/src/yardstick_extension.cpp @@ -462,29 +462,37 @@ ParserOverrideResult yardstick_parser_override(ParserExtensionInfo *, // Validate the expanded SQL parses. If expansion produced garbage // (e.g. because AGGREGATE() was actually DuckDB's list aggregate // function, not a yardstick measure), fall through to the native parser. + Parser validation_parser; try { - Parser validation_parser; validation_parser.ParseQuery(expanded_sql); } catch (...) { return ParserOverrideResult(); } - // Escape single quotes for embedding in string literal - string escaped_sql; - for (char c : expanded_sql) { - if (c == '\'') { - escaped_sql += "''"; - } else { - escaped_sql += c; + // For SELECT statements, wrap in yardstick() table function so that + // any remaining AGGREGATE() calls get a second expansion pass. + // For non-SELECT (CTAS, INSERT...SELECT), return parsed statements + // directly to preserve the caller's transaction context. + bool is_select = !validation_parser.statements.empty() && + validation_parser.statements[0]->type == StatementType::SELECT_STATEMENT; + + if (is_select) { + string escaped_sql; + for (char c : expanded_sql) { + if (c == '\'') { + escaped_sql += "''"; + } else { + escaped_sql += c; + } } - } - // Wrap in table function call and parse with DuckDB's native parser - string wrapper_sql = "SELECT * FROM yardstick('" + escaped_sql + "')"; + string wrapper_sql = "SELECT * FROM yardstick('" + escaped_sql + "')"; + Parser parser; + parser.ParseQuery(wrapper_sql); + return ParserOverrideResult(std::move(parser.statements)); + } - Parser parser; - parser.ParseQuery(wrapper_sql); - return ParserOverrideResult(std::move(parser.statements)); + return ParserOverrideResult(std::move(validation_parser.statements)); } yardstick_free_aggregate_result(result); From 5ce29d612495f6999aaab7b3cf675e8f8bded255 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sat, 11 Apr 2026 06:50:17 -0700 Subject: [PATCH 4/4] Allow whitespace in qualified measure refs like AGGREGATE(s . revenue) parse_simple_measure_ref now accepts spaces around the dot in qualified identifiers (e.g. "s . revenue", "s. revenue") and trims parts before normalization. Add tests for spaced qualified refs. --- test/sql/no_semantic_prefix.test | 24 ++++++++++++++++++++++++ yardstick-rs/src/sql/measures.rs | 8 ++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/test/sql/no_semantic_prefix.test b/test/sql/no_semantic_prefix.test index d366444..d62d3f4 100644 --- a/test/sql/no_semantic_prefix.test +++ b/test/sql/no_semantic_prefix.test @@ -1871,3 +1871,27 @@ DROP VIEW quoted_v; statement ok DROP TABLE quoted_sales; + +# ============================================================================= +# Test: Spaced qualified refs like AGGREGATE(s . revenue) +# ============================================================================= + +query IR rowsort +SELECT s.year, AGGREGATE(s.revenue) FROM sales_v s GROUP BY 1; +---- +2022 150.0 +2023 225.0 + +# Spaces around the dot +query IR rowsort +SELECT s . year, AGGREGATE(s . revenue) FROM sales_v s GROUP BY 1; +---- +2022 150.0 +2023 225.0 + +# Space only after dot +query IR rowsort +SELECT s.year, AGGREGATE(s. revenue) FROM sales_v s GROUP BY 1; +---- +2022 150.0 +2023 225.0 diff --git a/yardstick-rs/src/sql/measures.rs b/yardstick-rs/src/sql/measures.rs index 59b7f87..0bdc14c 100644 --- a/yardstick-rs/src/sql/measures.rs +++ b/yardstick-rs/src/sql/measures.rs @@ -391,7 +391,7 @@ fn parse_simple_measure_ref(expr: &str) -> Option<(Option, String)> { } i += 1; } - c if c.is_ascii_alphanumeric() || c == '_' || c == '.' => { + c if c.is_ascii_alphanumeric() || c == '_' || c == '.' || c.is_ascii_whitespace() => { i += 1; } _ => return None, // disallowed character outside quotes (commas, parens, etc.) @@ -400,10 +400,10 @@ fn parse_simple_measure_ref(expr: &str) -> Option<(Option, String)> { let parts: Vec<&str> = trimmed.split('.').collect(); match parts.as_slice() { - [measure] => Some((None, normalize_identifier_name(measure))), + [measure] => Some((None, normalize_identifier_name(measure.trim()))), [qualifier, measure] => Some(( - Some(normalize_identifier_name(qualifier)), - normalize_identifier_name(measure), + Some(normalize_identifier_name(qualifier.trim())), + normalize_identifier_name(measure.trim()), )), _ => None, }