Skip to content

Commit

Permalink
fix(duckdb)!: Combine aggregate functions with orderby from WITHIN GR…
Browse files Browse the repository at this point in the history
…OUP (#3352)

* fix(duckdb): Combine aggregate functions with orderby from WITHIN GROUP

* Completely remove WITHIN GROUP generation

* Move PERCENTILES to expressions.py, reuse it in duckdb.py

* Update tests/dialects/test_snowflake.py

Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com>

---------

Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com>
  • Loading branch information
VaggelisD and georgesittas committed Apr 25, 2024
1 parent 071bd42 commit c5ce47b
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 21 deletions.
32 changes: 19 additions & 13 deletions sqlglot/dialects/duckdb.py
Expand Up @@ -28,7 +28,7 @@
timestrtotime_sql,
unit_to_var,
)
from sqlglot.helper import flatten, seq_get
from sqlglot.helper import seq_get
from sqlglot.tokens import TokenType


Expand Down Expand Up @@ -155,16 +155,6 @@ def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str
return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)))


def _rename_unless_within_group(
a: str, b: str
) -> t.Callable[[DuckDB.Generator, exp.Expression], str]:
return lambda self, expression: (
self.func(a, *flatten(expression.args.values()))
if isinstance(expression.find_ancestor(exp.Select, exp.WithinGroup), exp.WithinGroup)
else self.func(b, *flatten(expression.args.values()))
)


class DuckDB(Dialect):
NULL_ORDERING = "nulls_are_last"
SUPPORTS_USER_DEFINED_TYPES = False
Expand Down Expand Up @@ -425,8 +415,8 @@ class Generator(generator.Generator):
exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True),
),
exp.ParseJSON: rename_func("JSON"),
exp.PercentileCont: _rename_unless_within_group("PERCENTILE_CONT", "QUANTILE_CONT"),
exp.PercentileDisc: _rename_unless_within_group("PERCENTILE_DISC", "QUANTILE_DISC"),
exp.PercentileCont: rename_func("QUANTILE_CONT"),
exp.PercentileDisc: rename_func("QUANTILE_DISC"),
# DuckDB doesn't allow qualified columns inside of PIVOT expressions.
# See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62
exp.Pivot: transforms.preprocess([transforms.unqualify_columns]),
Expand Down Expand Up @@ -617,3 +607,19 @@ def bracket_sql(self, expression: exp.Bracket) -> str:
bracket = f"({bracket})[1]"

return bracket

def withingroup_sql(self, expression: exp.WithinGroup) -> str:
expression_sql = self.sql(expression, "expression")

func = expression.this
if isinstance(func, exp.PERCENTILES):
# Make the order key the first arg and slide the fraction to the right
# https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions
order_col = expression.find(exp.Ordered)
if order_col:
func.set("expression", func.this)
func.set("this", order_col.this)

this = self.sql(expression, "this").rstrip(")")

return f"{this}{expression_sql})"
2 changes: 2 additions & 0 deletions sqlglot/expressions.py
Expand Up @@ -5880,6 +5880,8 @@ def _norm_arg(arg):

JSON_PATH_PARTS = subclasses(__name__, JSONPathPart, (JSONPathPart,))

PERCENTILES = (PercentileCont, PercentileDisc)


# Helpers
@t.overload
Expand Down
7 changes: 2 additions & 5 deletions sqlglot/transforms.py
Expand Up @@ -349,13 +349,10 @@ def new_name(names: t.Set[str], name: str) -> str:
return _explode_to_unnest


PERCENTILES = (exp.PercentileCont, exp.PercentileDisc)


def add_within_group_for_percentiles(expression: exp.Expression) -> exp.Expression:
"""Transforms percentiles by adding a WITHIN GROUP clause to them."""
if (
isinstance(expression, PERCENTILES)
isinstance(expression, exp.PERCENTILES)
and not isinstance(expression.parent, exp.WithinGroup)
and expression.expression
):
Expand All @@ -371,7 +368,7 @@ def remove_within_group_for_percentiles(expression: exp.Expression) -> exp.Expre
"""Transforms percentiles by getting rid of their corresponding WITHIN GROUP clause."""
if (
isinstance(expression, exp.WithinGroup)
and isinstance(expression.this, PERCENTILES)
and isinstance(expression.this, exp.PERCENTILES)
and isinstance(expression.expression, exp.Order)
):
quantile = expression.this.this
Expand Down
10 changes: 8 additions & 2 deletions tests/dialects/test_duckdb.py
Expand Up @@ -652,8 +652,14 @@ def test_duckdb(self):
"SELECT CAST('2020-05-06' AS DATE) + INTERVAL 5 DAY",
read={"bigquery": "SELECT DATE_ADD(CAST('2020-05-06' AS DATE), INTERVAL 5 DAY)"},
)
self.validate_identity("SELECT PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY y DESC) FROM t")
self.validate_identity("SELECT PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY y DESC) FROM t")
self.validate_identity(
"SELECT PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY y DESC) FROM t",
"SELECT QUANTILE_CONT(y, 0.25 ORDER BY y DESC) FROM t",
)
self.validate_identity(
"SELECT PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY y DESC) FROM t",
"SELECT QUANTILE_DISC(y, 0.25 ORDER BY y DESC) FROM t",
)
self.validate_all(
"SELECT QUANTILE_CONT(x, q) FROM t",
write={
Expand Down
9 changes: 8 additions & 1 deletion tests/dialects/test_snowflake.py
Expand Up @@ -456,7 +456,7 @@ def test_snowflake(self):
},
write={
"": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x NULLS LAST){suffix}",
"duckdb": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
"duckdb": f"SELECT QUANTILE_CONT(x, 0.5 ORDER BY x){suffix}",
"postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
"snowflake": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
},
Expand Down Expand Up @@ -821,6 +821,13 @@ def test_snowflake(self):
"snowflake": "CASE WHEN x = a OR (x IS NULL AND a IS NULL) THEN b WHEN x = c OR (x IS NULL AND c IS NULL) THEN d ELSE e END",
},
)
self.validate_all(
"SELECT LISTAGG(col1, ', ') WITHIN GROUP (ORDER BY col2) FROM t",
write={
"duckdb": "SELECT GROUP_CONCAT(col1, ', ' ORDER BY col2) FROM t",
"snowflake": "SELECT LISTAGG(col1, ', ') WITHIN GROUP (ORDER BY col2) FROM t",
},
)

def test_null_treatment(self):
self.validate_all(
Expand Down

0 comments on commit c5ce47b

Please sign in to comment.