Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/include/yardstick_extension.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ ParserExtensionParseResult yardstick_parse(ParserExtensionInfo *,
ParserExtensionPlanResult yardstick_plan(ParserExtensionInfo *, ClientContext &,
unique_ptr<ParserExtensionParseData>);

ParserOverrideResult yardstick_parser_override(ParserExtensionInfo *info,
const std::string &query,
ParserOptions &options);

// Operator extension: handles binding after parsing
struct YardstickOperatorExtension : public OperatorExtension {
YardstickOperatorExtension() : OperatorExtension() { Bind = yardstick_bind; }
Expand All @@ -39,10 +43,14 @@ struct YardstickOperatorExtension : public OperatorExtension {
};

// Parser extension: intercepts query strings
// parser_override runs BEFORE DuckDB's native parser, handling all statement types.
// parse_function/plan_function are kept as fallback for when the native parser fails
// (e.g., AT(...) syntax that is not valid SQL).
struct YardstickParserExtension : public ParserExtension {
YardstickParserExtension() : ParserExtension() {
parse_function = yardstick_parse;
plan_function = yardstick_plan;
parser_override = yardstick_parser_override;
}
};

Expand Down
115 changes: 115 additions & 0 deletions src/yardstick_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,117 @@ ParserExtensionParseResult yardstick_parse(ParserExtensionInfo *,
return ParserExtensionParseResult();
}

//=============================================================================
// PARSER OVERRIDE: intercepts ALL queries before DuckDB's native parser
//=============================================================================

ParserOverrideResult yardstick_parser_override(ParserExtensionInfo *,
const std::string &query,
ParserOptions &options) {
// Strip SEMANTIC prefix if present (backwards compatibility)
std::string sql_to_check = query;
std::string semantic_stripped;
bool had_semantic_prefix = StartsWithSemantic(query, semantic_stripped);
if (had_semantic_prefix) {
sql_to_check = semantic_stripped;
}

// Check for DROP VIEW on measure views
if (yardstick_drop_measure_view_from_sql(sql_to_check.c_str())) {
// Catalog cleanup done; let DuckDB handle the actual DROP
return ParserOverrideResult();
}

// Check for AGGREGATE() function
if (yardstick_has_aggregate(sql_to_check.c_str())) {
YardstickAggregateResult result = yardstick_expand_aggregate(sql_to_check.c_str());

if (result.error) {
// Expansion failed: this might not be a yardstick AGGREGATE() call
// (e.g. DuckDB's built-in list aggregate function). Fall through to
// the native parser in case it can handle the query.
yardstick_free_aggregate_result(result);
return ParserOverrideResult();
Comment on lines +450 to +455
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Keep AGGREGATE rewrite path for CTAS/INSERT errors

When yardstick_expand_aggregate returns an error, this branch always falls back to DuckDB's native parser, which leaves the original SQL unchanged. That breaks the new non-SEMANTIC path for statement types like CREATE TABLE ... AS SELECT and INSERT ... SELECT: expand_aggregate still relies on parse_select, and yardstick_parse_select explicitly rejects non-SELECT top-level statements (src/yardstick_parser_ffi.cpp around lines 1098-1100), so these queries are never rewritten and end up failing on DuckDB's built-in aggregate function instead of executing Yardstick semantics.

Useful? React with 👍 / 👎.

}

if (result.had_aggregate) {
string expanded_sql(result.expanded_sql);
yardstick_free_aggregate_result(result);

// Validate the expanded SQL parses. If expansion produced garbage
// (e.g. because AGGREGATE() was actually DuckDB's list aggregate
// function, not a yardstick measure), fall through to the native parser.
Parser validation_parser;
try {
validation_parser.ParseQuery(expanded_sql);
} catch (...) {
return ParserOverrideResult();
}

// For SELECT statements, wrap in yardstick() table function so that
// any remaining AGGREGATE() calls get a second expansion pass.
// For non-SELECT (CTAS, INSERT...SELECT), return parsed statements
// directly to preserve the caller's transaction context.
bool is_select = !validation_parser.statements.empty() &&
validation_parser.statements[0]->type == StatementType::SELECT_STATEMENT;

if (is_select) {
string escaped_sql;
for (char c : expanded_sql) {
if (c == '\'') {
escaped_sql += "''";
} else {
escaped_sql += c;
}
}

string wrapper_sql = "SELECT * FROM yardstick('" + escaped_sql + "')";
Parser parser;
parser.ParseQuery(wrapper_sql);
return ParserOverrideResult(std::move(parser.statements));
}

return ParserOverrideResult(std::move(validation_parser.statements));
}

yardstick_free_aggregate_result(result);
}

// Check for CREATE VIEW with AS MEASURE
if (yardstick_has_as_measure(sql_to_check.c_str())) {
std::string rewritten_query = RewritePercentileWithinGroup(query);
YardstickCreateViewResult result = yardstick_process_create_view(rewritten_query.c_str());

if (result.error) {
string error_msg(result.error);
yardstick_free_create_view_result(result);
try {
throw ParserException(error_msg);
} catch (std::exception &e) {
return ParserOverrideResult(e);
}
}

if (result.is_measure_view) {
string clean_sql = RewritePercentileWithinGroup(result.clean_sql);
yardstick_free_create_view_result(result);

try {
Parser parser;
parser.ParseQuery(clean_sql);
return ParserOverrideResult(std::move(parser.statements));
} catch (std::exception &e) {
return ParserOverrideResult(e);
}
}

yardstick_free_create_view_result(result);
}

// Not a yardstick query; fall through to DuckDB's native parser
return ParserOverrideResult();
}

ParserExtensionPlanResult yardstick_plan(ParserExtensionInfo *,
ClientContext &context,
unique_ptr<ParserExtensionParseData> parse_data) {
Expand Down Expand Up @@ -531,6 +642,10 @@ static void LoadInternal(ExtensionLoader &loader) {
auto &db = loader.GetDatabaseInstance();
auto &config = DBConfig::GetConfig(db);

// Enable parser_override so yardstick intercepts queries before DuckDB's native parser.
// FALLBACK mode: if our override doesn't handle the query, DuckDB's parser takes over.
config.SetOptionByName("allow_parser_override_extension", Value("fallback"));

// Register parser extension
YardstickParserExtension parser;
#if __has_include("duckdb/main/extension_callback_manager.hpp")
Expand Down
Loading
Loading